Merge branch 'similarity'

This commit is contained in:
Julien Fastré 2018-11-13 12:58:19 +01:00
commit 5841016d50
11 changed files with 387 additions and 6 deletions

View File

@ -9,3 +9,13 @@ Version 1.5.1
- remove inexistant `person.css` file
- fix bug in accompanying person validation
Branch `Similarity`
==================
- Add an column with fullname canonical (lowercase and unaccent) to persons entity ;
- Add a trigram index on fullname canonical ;
- Add a "similar person matcher", which allow to detect person with similar names when adding a person ;
- Add a research of persons by fuzzy name, returning result with a similarity of 0.15 ;
Thanks to @matla :-)

View File

@ -113,7 +113,12 @@ class Person implements HasCenterInterface {
* @var \Doctrine\Common\Collections\Collection
*/
private $addresses;
/**
* @var string
*/
private $fullnameCanonical;
public function __construct(\DateTime $opening = null) {
$this->accompanyingPeriods = new ArrayCollection();
$this->spokenLanguages = new ArrayCollection();

View File

@ -51,6 +51,9 @@ Chill\PersonBundle\Entity\Person:
type: text
nullable: true
length: 40
fullnameCanonical:
type: string
length: 255
manyToOne:
countryOfBirth:
targetEntity: Chill\MainBundle\Entity\Country

View File

@ -11,6 +11,18 @@ services:
tags:
- { name: chill.search, alias: 'person_regular' }
Chill\PersonBundle\Search\SimilarityPersonSearch:
arguments:
- "@doctrine.orm.entity_manager"
- "@security.token_storage"
- "@chill.main.security.authorization.helper"
- "@chill_main.paginator_factory"
- '@chill.person.search_person'
calls:
- ['setContainer', ["@service_container"]]
tags:
- { name: chill.search, alias: 'person_similarity' }
Chill\PersonBundle\Search\SimilarPersonMatcher:
arguments:
$em: '@Doctrine\ORM\EntityManagerInterface'

View File

@ -0,0 +1,79 @@
<?php declare(strict_types=1);
namespace Application\Migrations;
use Doctrine\DBAL\Schema\Schema;
use Doctrine\Migrations\AbstractMigration;
/**
* Add fullnameCanonical column for trigram matching (fast searching)
*/
final class Version20181023101621 extends AbstractMigration
{
public function up(Schema $schema) : void
{
$this->abortIf($this->connection->getDatabasePlatform()->getName() !== 'postgresql', 'Migration can only be executed safely on \'postgresql\'.');
$this->addSql("ALTER TABLE chill_person_person ADD fullnameCanonical VARCHAR(255) DEFAULT '' ");
$this->addSql("UPDATE chill_person_person SET fullnameCanonical=LOWER(UNACCENT(CONCAT(firstname, ' ', lastname)))");
$this->addSql("CREATE INDEX fullnameCanonical_trgm_idx ON chill_person_person USING GIN (fullnameCanonical gin_trgm_ops)");
$this->addSql(<<<'SQL'
CREATE OR REPLACE FUNCTION canonicalize_fullname_on_update() RETURNS TRIGGER AS
$BODY$
BEGIN
IF NEW.firstname <> OLD.firstname OR NEW.lastname <> OLD.lastname
THEN
UPDATE chill_person_person
SET fullnameCanonical=LOWER(UNACCENT(CONCAT(NEW.firstname, ' ', NEW.lastname)))
WHERE id=NEW.id;
END IF;
RETURN NEW;
END;
$BODY$ LANGUAGE PLPGSQL;
SQL
);
$this->addSql(<<<SQL
CREATE TRIGGER canonicalize_fullname_on_update
AFTER UPDATE
ON chill_person_person
FOR EACH ROW
WHEN (pg_trigger_depth() = 0)
EXECUTE PROCEDURE canonicalize_fullname_on_update();
SQL
);
$this->addSql(<<<'SQL'
CREATE OR REPLACE FUNCTION canonicalize_fullname_on_insert() RETURNS TRIGGER AS
$BODY$
BEGIN
UPDATE chill_person_person
SET fullnameCanonical=LOWER(UNACCENT(CONCAT(NEW.firstname, ' ', NEW.lastname)))
WHERE id=NEW.id;
RETURN NEW;
END;
$BODY$ LANGUAGE PLPGSQL;
SQL
);
$this->addSql(<<<SQL
CREATE TRIGGER canonicalize_fullname_on_insert
AFTER INSERT
ON chill_person_person
FOR EACH ROW
EXECUTE PROCEDURE canonicalize_fullname_on_insert();
SQL
);
}
public function down(Schema $schema) : void
{
$this->abortIf($this->connection->getDatabasePlatform()->getName() !== 'postgresql', 'Migration can only be executed safely on \'postgresql\'.');
$this->addSql('DROP INDEX fullnameCanonical_trgm_idx');
$this->addSql('ALTER TABLE chill_person_person DROP fullnameCanonical');
$this->addSql('DROP TRIGGER canonicalize_fullname_on_update ON chill_person_person');
$this->addSql('DROP FUNCTION canonicalize_fullname_on_update()');
$this->addSql('DROP TRIGGER canonicalize_fullname_on_insert ON chill_person_person');
$this->addSql('DROP FUNCTION canonicalize_fullname_on_insert()');
}
}

View File

@ -68,4 +68,5 @@ Reset: 'Remise à zéro'
'Person details': 'Détails de la personne'
Create an accompanying period: Create an accompanying period
'Create': Create
'Create': Create
Similar persons: Similar persons

View File

@ -201,3 +201,4 @@ Aggregate by age: Aggréger par âge
Calculate age in relation to this date: Calculer l'âge par rapport à cette date
Group people by country of birth: Aggréger les personnes par pays de naissance
Similar persons: Personnes similaires

View File

@ -14,7 +14,7 @@
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
#}
<h2>{{ 'Person search results'|trans }}</h2>
<h2>{{ title|default('Person search results')|trans }}</h2>
<p>
{{ '%total% persons matching the search pattern:'|transchoice( total, { '%total%' : total}) }}

View File

@ -201,7 +201,7 @@ class PersonSearch extends AbstractSearch implements ContainerAwareInterface,
* @param array $terms
* @return \Doctrine\ORM\QueryBuilder
*/
protected function createQuery(array $terms)
public function createQuery(array $terms)
{
//get from cache
$cacheKey = md5(serialize($terms));
@ -283,7 +283,7 @@ class PersonSearch extends AbstractSearch implements ContainerAwareInterface,
foreach($grams as $key => $gram) {
$qb->andWhere($qb->expr()
->like('UNACCENT(LOWER(CONCAT(p.firstName, \' \', p.lastName)))', ':default_'.$key))
->like('p.fullnameCanonical', 'UNACCENT(LOWER(:default_'.$key.'))'))
->setParameter('default_'.$key, '%'.$gram.'%');
}
}

View File

@ -75,14 +75,18 @@ class SimilarPersonMatcher
. ' OR UNACCENT(LOWER(p.lastName)) LIKE UNACCENT(LOWER(:lastName)) '
. ' OR UNACCENT(LOWER(p.firstName)) LIKE UNACCENT(LOWER(:lastName)) '
. ' OR UNACCENT(LOWER(p.lastName)) LIKE UNACCENT(LOWER(:firstName)) '
. ' OR SIMILARITY(p.fullnameCanonical, UNACCENT(LOWER(:fullName))) >= 0.15 '
. ' ) '
. ' AND p.center IN (:centers)';
. ' AND p.center IN (:centers)'
. ' ORDER BY SIMILARITY(p.fullnameCanonical, UNACCENT(LOWER(:fullName))) DESC '
;
$query =
$this->em
->createQuery($dql)
->setParameter('firstName', $person->getFirstName())
->setParameter('lastName', $person->getLastName())
->setParameter('fullName', $person->getFirstName() . ' ' . $person->getLastName())
->setParameter('centers', $centers)
;

View File

@ -0,0 +1,266 @@
<?php
namespace Chill\PersonBundle\Search;
use Chill\MainBundle\Pagination\PaginatorFactory;
use Chill\MainBundle\Search\AbstractSearch;
use Chill\MainBundle\Search\SearchInterface;
use Chill\MainBundle\Security\Authorization\AuthorizationHelper;
use Doctrine\ORM\EntityManagerInterface;
use Symfony\Component\DependencyInjection\ContainerAwareInterface;
use Symfony\Component\DependencyInjection\ContainerAwareTrait;
use Symfony\Component\Form\FormBuilderInterface;
use Symfony\Component\Security\Core\Authentication\Token\Storage\TokenStorage;
use Symfony\Component\Security\Core\Role\Role;
/**
* Class SimilarityPersonSearch
*
* @package Chill\PersonBundle\Search
*/
class SimilarityPersonSearch extends AbstractSearch
{
use ContainerAwareTrait;
/**
*
* @var EntityManagerInterface
*/
private $em;
/**
*
* @var \Chill\MainBundle\Entity\User
*/
private $user;
/**
*
* @var AuthorizationHelper
*/
private $helper;
/**
*
* @var PaginatorFactory
*/
protected $paginatorFactory;
const NAME = "person_similarity";
/**
*
* @var PersonSearch
*/
private $personSearch;
/**
* SimilarityPersonSearch constructor.
*
* @param EntityManagerInterface $em
* @param TokenStorage $tokenStorage
* @param AuthorizationHelper $helper
* @param PaginatorFactory $paginatorFactory
* @param PersonSearch $personSearch
*/
public function __construct(
EntityManagerInterface $em,
TokenStorage $tokenStorage,
AuthorizationHelper $helper,
PaginatorFactory $paginatorFactory,
PersonSearch $personSearch)
{
$this->em = $em;
$this->user = $tokenStorage->getToken()->getUser();
$this->helper = $helper;
$this->paginatorFactory = $paginatorFactory;
$this->personSearch = $personSearch;
// throw an error if user is not a valid user
if (!$this->user instanceof \Chill\MainBundle\Entity\User) {
throw new \LogicException('The user provided must be an instance'
. ' of Chill\MainBundle\Entity\User');
}
}
/*
* (non-PHPdoc)
* @see \Chill\MainBundle\Search\SearchInterface::getOrder()
*/
public function getOrder()
{
return 200;
}
/*
* (non-PHPdoc)
* @see \Chill\MainBundle\Search\SearchInterface::isActiveByDefault()
*/
public function isActiveByDefault()
{
return true;
}
public function supports($domain, $format)
{
return 'person' === $domain;
}
/**
* @param array $terms
* @param int $start
* @param int $limit
* @param array $options
* @param string $format
* @return array
*/
public function renderResult(array $terms, $start = 0, $limit = 50, array $options = array(), $format = 'html')
{
$total = $this->count($terms);
$paginator = $this->paginatorFactory->create($total);
if ($format === 'html')
{
if ($total !== 0)
{
return $this->container->get('templating')->render('ChillPersonBundle:Person:list.html.twig',
array(
'persons' => $this->search($terms, $start, $limit, $options),
'pattern' => $this->recomposePattern($terms, array('nationality',
'firstname', 'lastname', 'birthdate', 'gender',
'birthdate-before','birthdate-after'), $terms['_domain']),
'total' => $total,
'start' => $start,
'search_name' => self::NAME,
'preview' => $options[SearchInterface::SEARCH_PREVIEW_OPTION],
'paginator' => $paginator,
'title' => "Similar persons"
));
}
else {
return null;
}
} elseif ($format === 'json')
{
return [
'results' => $this->search($terms, $start, $limit, \array_merge($options, [ 'simplify' => true ])),
'pagination' => [
'more' => $paginator->hasNextPage()
]
];
}
}
/**
*
* @param string $pattern
* @param int $start
* @param int $limit
* @param array $options
* @return Person[]
*/
protected function search(array $terms, $start, $limit, array $options = array())
{
$qb = $this->createQuery($terms, 'search');
if ($options['simplify'] ?? false) {
$qb->select(
'sp.id',
$qb->expr()->concat(
'sp.firstName',
$qb->expr()->literal(' '),
'sp.lastName'
).'AS text'
);
} else {
$qb->select('sp');
}
$qb
->setMaxResults($limit)
->setFirstResult($start);
//order by firstname, lastname
$qb
->orderBy('sp.firstName')
->addOrderBy('sp.lastName');
if ($options['simplify'] ?? false) {
return $qb->getQuery()->getResult(Query::HYDRATE_ARRAY);
} else {
return $qb->getQuery()->getResult();
}
}
protected function count(array $terms)
{
$qb = $this->createQuery($terms);
$qb->select('COUNT(sp.id)');
return $qb->getQuery()->getSingleScalarResult();
}
private $_cacheQuery = array();
/**
*
* @param array $terms
* @return \Doctrine\ORM\QueryBuilder
*/
protected function createQuery(array $terms)
{
//get from cache
$cacheKey = md5(serialize($terms));
if (array_key_exists($cacheKey, $this->_cacheQuery)) {
return clone $this->_cacheQuery[$cacheKey];
}
$qb = $this->em->createQueryBuilder();
$qb ->select('sp')
->from('ChillPersonBundle:Person', 'sp');
if ($terms['_default'] !== '') {
$grams = explode(' ', $terms['_default']);
foreach($grams as $key => $gram) {
$qb->andWhere('SIMILARITY(sp.fullnameCanonical, UNACCENT(LOWER(:default_'.$key.')) ) >= 0.15')
->setParameter('default_'.$key, '%'.$gram.'%');
}
$qb->andWhere($qb->expr()
->notIn(
'sp.id',
$this->personSearch
->createQuery($terms)
->addSelect('p.id')
->getDQL()
)
);
}
//restraint center for security
$reachableCenters = $this->helper->getReachableCenters($this->user,
new Role('CHILL_PERSON_SEE'));
$qb->andWhere($qb->expr()
->in('sp.center', ':centers'))
->setParameter('centers', $reachableCenters)
;
$this->_cacheQuery[$cacheKey] = $qb;
return clone $qb;
}
}