diff --git a/CHANGELOG.md b/CHANGELOG.md index a1aecfe92..d29efd05d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,3 +9,13 @@ Version 1.5.1 - remove inexistant `person.css` file - fix bug in accompanying person validation +Branch `Similarity` +================== + +- Add an column with fullname canonical (lowercase and unaccent) to persons entity ; +- Add a trigram index on fullname canonical ; +- Add a "similar person matcher", which allow to detect person with similar names when adding a person ; +- Add a research of persons by fuzzy name, returning result with a similarity of 0.15 ; + +Thanks to @matla :-) + diff --git a/Entity/Person.php b/Entity/Person.php index 7d19ccc3c..11bad306a 100644 --- a/Entity/Person.php +++ b/Entity/Person.php @@ -113,7 +113,12 @@ class Person implements HasCenterInterface { * @var \Doctrine\Common\Collections\Collection */ private $addresses; - + + /** + * @var string + */ + private $fullnameCanonical; + public function __construct(\DateTime $opening = null) { $this->accompanyingPeriods = new ArrayCollection(); $this->spokenLanguages = new ArrayCollection(); diff --git a/Resources/config/doctrine/Person.orm.yml b/Resources/config/doctrine/Person.orm.yml index df911068b..543757f9a 100644 --- a/Resources/config/doctrine/Person.orm.yml +++ b/Resources/config/doctrine/Person.orm.yml @@ -51,6 +51,9 @@ Chill\PersonBundle\Entity\Person: type: text nullable: true length: 40 + fullnameCanonical: + type: string + length: 255 manyToOne: countryOfBirth: targetEntity: Chill\MainBundle\Entity\Country diff --git a/Resources/config/services/search.yml b/Resources/config/services/search.yml index 2abffbea5..8b4db1373 100644 --- a/Resources/config/services/search.yml +++ b/Resources/config/services/search.yml @@ -11,6 +11,18 @@ services: tags: - { name: chill.search, alias: 'person_regular' } + Chill\PersonBundle\Search\SimilarityPersonSearch: + arguments: + - "@doctrine.orm.entity_manager" + - "@security.token_storage" + - "@chill.main.security.authorization.helper" + - "@chill_main.paginator_factory" + - '@chill.person.search_person' + calls: + - ['setContainer', ["@service_container"]] + tags: + - { name: chill.search, alias: 'person_similarity' } + Chill\PersonBundle\Search\SimilarPersonMatcher: arguments: $em: '@Doctrine\ORM\EntityManagerInterface' diff --git a/Resources/migrations/Version20181023101621.php b/Resources/migrations/Version20181023101621.php new file mode 100644 index 000000000..24632fc12 --- /dev/null +++ b/Resources/migrations/Version20181023101621.php @@ -0,0 +1,79 @@ +abortIf($this->connection->getDatabasePlatform()->getName() !== 'postgresql', 'Migration can only be executed safely on \'postgresql\'.'); + + $this->addSql("ALTER TABLE chill_person_person ADD fullnameCanonical VARCHAR(255) DEFAULT '' "); + $this->addSql("UPDATE chill_person_person SET fullnameCanonical=LOWER(UNACCENT(CONCAT(firstname, ' ', lastname)))"); + $this->addSql("CREATE INDEX fullnameCanonical_trgm_idx ON chill_person_person USING GIN (fullnameCanonical gin_trgm_ops)"); + + $this->addSql(<<<'SQL' + CREATE OR REPLACE FUNCTION canonicalize_fullname_on_update() RETURNS TRIGGER AS + $BODY$ + BEGIN + IF NEW.firstname <> OLD.firstname OR NEW.lastname <> OLD.lastname + THEN + UPDATE chill_person_person + SET fullnameCanonical=LOWER(UNACCENT(CONCAT(NEW.firstname, ' ', NEW.lastname))) + WHERE id=NEW.id; + END IF; + RETURN NEW; + END; + $BODY$ LANGUAGE PLPGSQL; +SQL + ); + $this->addSql(<<addSql(<<<'SQL' + CREATE OR REPLACE FUNCTION canonicalize_fullname_on_insert() RETURNS TRIGGER AS + $BODY$ + BEGIN + UPDATE chill_person_person + SET fullnameCanonical=LOWER(UNACCENT(CONCAT(NEW.firstname, ' ', NEW.lastname))) + WHERE id=NEW.id; + RETURN NEW; + END; + $BODY$ LANGUAGE PLPGSQL; +SQL + ); + $this->addSql(<<abortIf($this->connection->getDatabasePlatform()->getName() !== 'postgresql', 'Migration can only be executed safely on \'postgresql\'.'); + + $this->addSql('DROP INDEX fullnameCanonical_trgm_idx'); + $this->addSql('ALTER TABLE chill_person_person DROP fullnameCanonical'); + $this->addSql('DROP TRIGGER canonicalize_fullname_on_update ON chill_person_person'); + $this->addSql('DROP FUNCTION canonicalize_fullname_on_update()'); + $this->addSql('DROP TRIGGER canonicalize_fullname_on_insert ON chill_person_person'); + $this->addSql('DROP FUNCTION canonicalize_fullname_on_insert()'); + } +} diff --git a/Resources/translations/messages.en.yml b/Resources/translations/messages.en.yml index a99af9e74..4c0c13d83 100644 --- a/Resources/translations/messages.en.yml +++ b/Resources/translations/messages.en.yml @@ -68,4 +68,5 @@ Reset: 'Remise à zéro' 'Person details': 'Détails de la personne' Create an accompanying period: Create an accompanying period -'Create': Create \ No newline at end of file +'Create': Create +Similar persons: Similar persons diff --git a/Resources/translations/messages.fr.yml b/Resources/translations/messages.fr.yml index 65b7cd632..ba6d770cf 100644 --- a/Resources/translations/messages.fr.yml +++ b/Resources/translations/messages.fr.yml @@ -201,3 +201,4 @@ Aggregate by age: Aggréger par âge Calculate age in relation to this date: Calculer l'âge par rapport à cette date Group people by country of birth: Aggréger les personnes par pays de naissance +Similar persons: Personnes similaires diff --git a/Resources/views/Person/list.html.twig b/Resources/views/Person/list.html.twig index 31b80f477..11252404c 100644 --- a/Resources/views/Person/list.html.twig +++ b/Resources/views/Person/list.html.twig @@ -14,7 +14,7 @@ * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . #} -

{{ 'Person search results'|trans }}

+

{{ title|default('Person search results')|trans }}

{{ '%total% persons matching the search pattern:'|transchoice( total, { '%total%' : total}) }} diff --git a/Search/PersonSearch.php b/Search/PersonSearch.php index 0c9d0eab4..6d562ec95 100644 --- a/Search/PersonSearch.php +++ b/Search/PersonSearch.php @@ -201,7 +201,7 @@ class PersonSearch extends AbstractSearch implements ContainerAwareInterface, * @param array $terms * @return \Doctrine\ORM\QueryBuilder */ - protected function createQuery(array $terms) + public function createQuery(array $terms) { //get from cache $cacheKey = md5(serialize($terms)); @@ -283,7 +283,7 @@ class PersonSearch extends AbstractSearch implements ContainerAwareInterface, foreach($grams as $key => $gram) { $qb->andWhere($qb->expr() - ->like('UNACCENT(LOWER(CONCAT(p.firstName, \' \', p.lastName)))', ':default_'.$key)) + ->like('p.fullnameCanonical', 'UNACCENT(LOWER(:default_'.$key.'))')) ->setParameter('default_'.$key, '%'.$gram.'%'); } } diff --git a/Search/SimilarPersonMatcher.php b/Search/SimilarPersonMatcher.php index 2667f061f..ab13e607a 100644 --- a/Search/SimilarPersonMatcher.php +++ b/Search/SimilarPersonMatcher.php @@ -75,14 +75,18 @@ class SimilarPersonMatcher . ' OR UNACCENT(LOWER(p.lastName)) LIKE UNACCENT(LOWER(:lastName)) ' . ' OR UNACCENT(LOWER(p.firstName)) LIKE UNACCENT(LOWER(:lastName)) ' . ' OR UNACCENT(LOWER(p.lastName)) LIKE UNACCENT(LOWER(:firstName)) ' + . ' OR SIMILARITY(p.fullnameCanonical, UNACCENT(LOWER(:fullName))) >= 0.15 ' . ' ) ' - . ' AND p.center IN (:centers)'; + . ' AND p.center IN (:centers)' + . ' ORDER BY SIMILARITY(p.fullnameCanonical, UNACCENT(LOWER(:fullName))) DESC ' + ; $query = $this->em ->createQuery($dql) ->setParameter('firstName', $person->getFirstName()) ->setParameter('lastName', $person->getLastName()) + ->setParameter('fullName', $person->getFirstName() . ' ' . $person->getLastName()) ->setParameter('centers', $centers) ; diff --git a/Search/SimilarityPersonSearch.php b/Search/SimilarityPersonSearch.php new file mode 100644 index 000000000..bedc715f0 --- /dev/null +++ b/Search/SimilarityPersonSearch.php @@ -0,0 +1,266 @@ +em = $em; + $this->user = $tokenStorage->getToken()->getUser(); + $this->helper = $helper; + $this->paginatorFactory = $paginatorFactory; + $this->personSearch = $personSearch; + + // throw an error if user is not a valid user + if (!$this->user instanceof \Chill\MainBundle\Entity\User) { + throw new \LogicException('The user provided must be an instance' + . ' of Chill\MainBundle\Entity\User'); + } + } + + /* + * (non-PHPdoc) + * @see \Chill\MainBundle\Search\SearchInterface::getOrder() + */ + public function getOrder() + { + return 200; + } + + /* + * (non-PHPdoc) + * @see \Chill\MainBundle\Search\SearchInterface::isActiveByDefault() + */ + public function isActiveByDefault() + { + return true; + } + + public function supports($domain, $format) + { + return 'person' === $domain; + } + + /** + * @param array $terms + * @param int $start + * @param int $limit + * @param array $options + * @param string $format + * @return array + */ + public function renderResult(array $terms, $start = 0, $limit = 50, array $options = array(), $format = 'html') + { + $total = $this->count($terms); + $paginator = $this->paginatorFactory->create($total); + + if ($format === 'html') + { + if ($total !== 0) + { + return $this->container->get('templating')->render('ChillPersonBundle:Person:list.html.twig', + array( + 'persons' => $this->search($terms, $start, $limit, $options), + 'pattern' => $this->recomposePattern($terms, array('nationality', + 'firstname', 'lastname', 'birthdate', 'gender', + 'birthdate-before','birthdate-after'), $terms['_domain']), + 'total' => $total, + 'start' => $start, + 'search_name' => self::NAME, + 'preview' => $options[SearchInterface::SEARCH_PREVIEW_OPTION], + 'paginator' => $paginator, + 'title' => "Similar persons" + )); + } + else { + return null; + } + + } elseif ($format === 'json') + { + return [ + 'results' => $this->search($terms, $start, $limit, \array_merge($options, [ 'simplify' => true ])), + 'pagination' => [ + 'more' => $paginator->hasNextPage() + ] + ]; + } + } + + + /** + * + * @param string $pattern + * @param int $start + * @param int $limit + * @param array $options + * @return Person[] + */ + protected function search(array $terms, $start, $limit, array $options = array()) + { + $qb = $this->createQuery($terms, 'search'); + + + if ($options['simplify'] ?? false) { + $qb->select( + 'sp.id', + $qb->expr()->concat( + 'sp.firstName', + $qb->expr()->literal(' '), + 'sp.lastName' + ).'AS text' + ); + } else { + $qb->select('sp'); + } + + $qb + ->setMaxResults($limit) + ->setFirstResult($start); + + //order by firstname, lastname + + $qb + ->orderBy('sp.firstName') + ->addOrderBy('sp.lastName'); + + if ($options['simplify'] ?? false) { + return $qb->getQuery()->getResult(Query::HYDRATE_ARRAY); + } else { + return $qb->getQuery()->getResult(); + } + } + + + protected function count(array $terms) + { + $qb = $this->createQuery($terms); + + + $qb->select('COUNT(sp.id)'); + + return $qb->getQuery()->getSingleScalarResult(); + } + + + private $_cacheQuery = array(); + + /** + * + * @param array $terms + * @return \Doctrine\ORM\QueryBuilder + */ + protected function createQuery(array $terms) + { + //get from cache + $cacheKey = md5(serialize($terms)); + if (array_key_exists($cacheKey, $this->_cacheQuery)) { + return clone $this->_cacheQuery[$cacheKey]; + } + + $qb = $this->em->createQueryBuilder(); + + $qb ->select('sp') + ->from('ChillPersonBundle:Person', 'sp'); + + if ($terms['_default'] !== '') { + $grams = explode(' ', $terms['_default']); + + foreach($grams as $key => $gram) { + $qb->andWhere('SIMILARITY(sp.fullnameCanonical, UNACCENT(LOWER(:default_'.$key.')) ) >= 0.15') + ->setParameter('default_'.$key, '%'.$gram.'%'); + } + + $qb->andWhere($qb->expr() + ->notIn( + 'sp.id', + $this->personSearch + ->createQuery($terms) + ->addSelect('p.id') + ->getDQL() + ) + ); + } + + //restraint center for security + $reachableCenters = $this->helper->getReachableCenters($this->user, + new Role('CHILL_PERSON_SEE')); + $qb->andWhere($qb->expr() + ->in('sp.center', ':centers')) + ->setParameter('centers', $reachableCenters) + ; + + $this->_cacheQuery[$cacheKey] = $qb; + + return clone $qb; + } + +} \ No newline at end of file