From 1552b3c9d7c426950e356e8592505f24976d8551 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Julien=20Fastr=C3=A9?= Date: Wed, 12 Jul 2023 17:30:46 +0200 Subject: [PATCH] [Addresses] create a service to collate addresses with the address reference --- ...ollateAddressWithReferenceOrPostalCode.php | 147 ++++++++++++++++++ ...teAddressWithReferenceOrPostalCodeTest.php | 44 ++++++ 2 files changed, 191 insertions(+) create mode 100644 src/Bundle/ChillMainBundle/Service/AddressGeographicalUnit/CollateAddressWithReferenceOrPostalCode.php create mode 100644 src/Bundle/ChillMainBundle/Tests/Services/AddressGeographicalUnit/CollateAddressWithReferenceOrPostalCodeTest.php diff --git a/src/Bundle/ChillMainBundle/Service/AddressGeographicalUnit/CollateAddressWithReferenceOrPostalCode.php b/src/Bundle/ChillMainBundle/Service/AddressGeographicalUnit/CollateAddressWithReferenceOrPostalCode.php new file mode 100644 index 000000000..656751eeb --- /dev/null +++ b/src/Bundle/ChillMainBundle/Service/AddressGeographicalUnit/CollateAddressWithReferenceOrPostalCode.php @@ -0,0 +1,147 @@ + :since_id -- to set the first id + ) sq + WHERE ranked = 1) + UPDATE chill_main_address SET postcode_id = cmpc_reference_id FROM recollate WHERE recollate.address_id = chill_main_address.id; + SQL; + + /** + * associate the address with the most similar address reference. + * + * This query intentionally ignores the existing addressreference_id, to let fixing the address match the + * most similar address reference. + */ + private const FORCE_MOST_SIMILAR_ADDRESS_REFERENCE = <<<'SQL' + WITH recollate AS ( + SELECT * FROM ( + SELECT cma.id AS address_id, cma.streetnumber, cma.street, cmpc.code, cmpc.label, cmar.id AS address_reference_id, cmar.streetnumber, cmar.street, cmpc_reference.code, cmpc_reference.label, + similarity(cma.street, cmar.street), + RANK() OVER (PARTITION BY cma.id ORDER BY SIMILARITY (cma.street, cmar.street) DESC, SIMILARITY (cma.streetnumber, cmar.streetnumber), cmar.id ASC) AS ranked + FROM + chill_main_address cma + JOIN chill_main_postal_code cmpc on cma.postcode_id = cmpc.id, + chill_main_address_reference cmar JOIN chill_main_postal_code cmpc_reference ON cmar.postcode_id = cmpc_reference.id + WHERE + -- only where the reference is null + -- cma.addressreference_id IS NULL + cma.addressreference_id != cmar.id + -- only if cmpc is a reference (must be matched before executing this query) + AND cma.postcode_id = cmar.postcode_id + -- join cmpc to cma + AND SIMILARITY(LOWER(cma.street), LOWER(cmar.street)) > 0.6 AND LOWER(cma.streetnumber) = LOWER(cmar.streetnumber) + -- only addresses which match the address reference - let the user decide if the reference has changed + AND cma.refstatus = 'match' + -- only the most recent + AND cma.id > :since_id + ) AS sq + WHERE ranked = 1 + ) + UPDATE chill_main_address SET addressreference_id = recollate.address_reference_id FROM recollate WHERE chill_main_address.id = recollate.address_id; + SQL; + + private const UPDATE_POINT = <<<'SQL' + WITH address_geom AS ( + SELECT cma.id AS address_id, COALESCE(cmar.point, cmpc.center) AS point + FROM chill_main_address cma + LEFT JOIN chill_main_address_reference cmar ON cma.addressreference_id = cmar.id + LEFT JOIN chill_main_postal_code cmpc ON cma.postcode_id = cmpc.id + WHERE cma.id > :since_id + ) + UPDATE chill_main_address SET point = address_geom.point FROM address_geom WHERE address_geom.address_id = chill_main_address.id + SQL; + + private const MAX_ADDRESS_ID = <<<'SQL' + SELECT MAX(id) AS max_id FROM chill_main_address; + SQL; + + + public function __construct( + private Connection $connection, + private LoggerInterface $logger, + ) { + } + + /** + * @throws \Throwable + */ + public function __invoke(int $sinceId = 0): int + { + try { + [ + $postCodeSetReferenceFromMostSimilar, + $addressReferenceMatch, + $pointUpdates, + $lastId, + ] = $this->connection->transactional(function () use ($sinceId) { + $postCodeSetReferenceFromMostSimilar = $this->connection->executeQuery(self::FORCE_ORIGINAL_POSTAL_CODE, ['since_id' => $sinceId]); + $addressReferenceMatch = $this->connection->executeQuery(self::FORCE_MOST_SIMILAR_ADDRESS_REFERENCE, ['since_id' => $sinceId]); + $pointUpdates = $this->connection->executeQuery(self::UPDATE_POINT, ['since_id' => $sinceId]); + $lastId = $this->connection->fetchOne(self::MAX_ADDRESS_ID); + + return [ + $postCodeSetReferenceFromMostSimilar, + $addressReferenceMatch, + $pointUpdates, + $lastId, + ]; + }); + } catch (\Throwable $e) { + $this->logger->error(self::LOG_PREFIX . "error while re-collating addresses", [ + 'message' => $e->getMessage(), + 'trace' => $e->getTraceAsString() + ]); + + throw $e; + } + + $this->logger->info(self::LOG_PREFIX . "Collate the addresses with reference", [ + 'set_postcode_from_most_similar' => $postCodeSetReferenceFromMostSimilar, + 'address_reference_match' => $addressReferenceMatch, + 'point_update' => $pointUpdates, + 'since_id' => $sinceId, + 'last_id' => $lastId, + ]); + + return $lastId; + } +} diff --git a/src/Bundle/ChillMainBundle/Tests/Services/AddressGeographicalUnit/CollateAddressWithReferenceOrPostalCodeTest.php b/src/Bundle/ChillMainBundle/Tests/Services/AddressGeographicalUnit/CollateAddressWithReferenceOrPostalCodeTest.php new file mode 100644 index 000000000..61b36e669 --- /dev/null +++ b/src/Bundle/ChillMainBundle/Tests/Services/AddressGeographicalUnit/CollateAddressWithReferenceOrPostalCodeTest.php @@ -0,0 +1,44 @@ +connection = self::$container->get(Connection::class); + } + + public function testRun(): void + { + $collator = new CollateAddressWithReferenceOrPostalCode( + $this->connection, + new NullLogger() + ); + + $result = $collator(0); + + self::assertGreaterThan(0, $result); + } +}