From 19accc4d0069e1d7af6d0192e6c7dfa40605d847 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Julien=20Fastr=C3=A9?= Date: Fri, 19 Jul 2024 13:41:17 +0200 Subject: [PATCH] Handle duplicate addresses in AddressReferenceBaseImporter Refactored the AddressReferenceBaseImporter to optimize address import and reconciliation. The code now identifies duplicate addresses in the temporary table and handles them according to the 'allowRemoveDoubleRefId' flag. This enhances data consistency during import operations. --- .../unreleased/Feature-20240719-134059.yaml | 5 ++ .../Import/AddressReferenceBaseImporter.php | 74 ++++++++++++++++--- 2 files changed, 68 insertions(+), 11 deletions(-) create mode 100644 .changes/unreleased/Feature-20240719-134059.yaml diff --git a/.changes/unreleased/Feature-20240719-134059.yaml b/.changes/unreleased/Feature-20240719-134059.yaml new file mode 100644 index 000000000..888219d08 --- /dev/null +++ b/.changes/unreleased/Feature-20240719-134059.yaml @@ -0,0 +1,5 @@ +kind: Feature +body: Handle duplicate reference id in the import of reference addresses +time: 2024-07-19T13:40:59.418509766+02:00 +custom: + Issue: "" diff --git a/src/Bundle/ChillMainBundle/Service/Import/AddressReferenceBaseImporter.php b/src/Bundle/ChillMainBundle/Service/Import/AddressReferenceBaseImporter.php index 9407bd600..a11bd0c12 100644 --- a/src/Bundle/ChillMainBundle/Service/Import/AddressReferenceBaseImporter.php +++ b/src/Bundle/ChillMainBundle/Service/Import/AddressReferenceBaseImporter.php @@ -15,6 +15,12 @@ use Doctrine\DBAL\Connection; use Doctrine\DBAL\Statement; use Psr\Log\LoggerInterface; +/** + * Import addresses into the database. + * + * This importer do some optimization about the import, ensuring that adresses are inserted and reconciled with + * the existing one on a optimized way. + */ final class AddressReferenceBaseImporter { private const INSERT = <<<'SQL' @@ -49,11 +55,18 @@ final class AddressReferenceBaseImporter { } - public function finalize(): void + /** + * Finalize the import process and make reconciliation with addresses. + * + * @param bool $allowRemoveDoubleRefId if true, allow the importer to remove automatically addresses with same refid + * + * @throws \Exception + */ + public function finalize(bool $allowRemoveDoubleRefId = false): void { $this->doInsertPending(); - $this->updateAddressReferenceTable(); + $this->updateAddressReferenceTable($allowRemoveDoubleRefId); $this->deleteTemporaryTable(); @@ -61,6 +74,11 @@ final class AddressReferenceBaseImporter $this->isInitialized = false; } + /** + * Do import a single address. + * + * @throws \Exception + */ public function importAddress( string $refAddress, ?string $refPostalCode, @@ -169,15 +187,48 @@ final class AddressReferenceBaseImporter $this->isInitialized = true; } - private function updateAddressReferenceTable(): void + private function updateAddressReferenceTable(bool $allowRemoveDoubleRefId): void { $this->defaultConnection->executeStatement( 'CREATE INDEX idx_ref_add_temp ON reference_address_temp (refid)' ); - // 1) Add new addresses - $this->logger->info(self::LOG_PREFIX.'upsert new addresses'); - $affected = $this->defaultConnection->executeStatement("INSERT INTO chill_main_address_reference + // 0) detect for doublon in current temporary table + $results = $this->defaultConnection->executeQuery( + 'SELECT COUNT(*) AS nb_appearance, refid FROM reference_address_temp GROUP BY refid HAVING count(*) > 1' + ); + + $hasDouble = false; + foreach ($results->iterateAssociative() as $result) { + $this->logger->error(self::LOG_PREFIX.'Some reference id are present more than one time', ['nb_apparearance' => $result['nb_appearance'], 'refid' => $result['refid']]); + $hasDouble = true; + } + + if ($hasDouble) { + if ($allowRemoveDoubleRefId) { + $this->logger->alert(self::LOG_PREFIX.'We are going to remove the addresses which are present more than once in the table'); + $this->defaultConnection->executeStatement('ALTER TABLE reference_address_temp ADD COLUMN gid SERIAL'); + $removed = $this->defaultConnection->executeStatement(<<<'SQL' + WITH ordering AS ( + SELECT gid, rank() over (PARTITION BY refid ORDER BY gid DESC) AS ranking + FROM reference_address_temp + WHERE refid IN (SELECT refid FROM reference_address_temp group by refid having count(*) > 1) + ), + keep_last AS ( + SELECT gid, ranking FROM ordering where ranking > 1 + ) + DELETE FROM reference_address_temp WHERE gid IN (SELECT gid FROM keep_last); + SQL); + $this->logger->alert(self::LOG_PREFIX.'addresses with same refid present twice, we removed some double', ['nb_removed', $removed]); + } else { + throw new \RuntimeException('Some addresses are present twice in the database, we cannot process them'); + } + } + + $this->defaultConnection->transactional(function ($connection): void { + // 1) Add new addresses + $this->logger->info(self::LOG_PREFIX.'upsert new addresses'); + $affected = $connection->executeStatement("INSERT INTO chill_main_address_reference (id, postcode_id, refid, street, streetnumber, municipalitycode, source, point, createdat, deletedat, updatedat) SELECT nextval('chill_main_address_reference_id_seq'), @@ -195,16 +246,17 @@ final class AddressReferenceBaseImporter ON CONFLICT (refid, source) DO UPDATE SET postcode_id = excluded.postcode_id, refid = excluded.refid, street = excluded.street, streetnumber = excluded.streetnumber, municipalitycode = excluded.municipalitycode, source = excluded.source, point = excluded.point, updatedat = NOW(), deletedAt = NULL "); - $this->logger->info(self::LOG_PREFIX.'addresses upserted', ['upserted' => $affected]); + $this->logger->info(self::LOG_PREFIX.'addresses upserted', ['upserted' => $affected]); - // 3) Delete addresses - $this->logger->info(self::LOG_PREFIX.'soft delete adresses'); - $affected = $this->defaultConnection->executeStatement('UPDATE chill_main_address_reference + // 3) Delete addresses + $this->logger->info(self::LOG_PREFIX.'soft delete adresses'); + $affected = $connection->executeStatement('UPDATE chill_main_address_reference SET deletedat = NOW() WHERE chill_main_address_reference.refid NOT IN (SELECT refid FROM reference_address_temp WHERE source LIKE ?) AND chill_main_address_reference.source LIKE ? ', [$this->currentSource, $this->currentSource]); - $this->logger->info(self::LOG_PREFIX.'addresses deleted', ['deleted' => $affected]); + $this->logger->info(self::LOG_PREFIX.'addresses deleted', ['deleted' => $affected]); + }); } }