diff --git a/.changes/unreleased/Feature-20240719-134059.yaml b/.changes/unreleased/Feature-20240719-134059.yaml new file mode 100644 index 000000000..888219d08 --- /dev/null +++ b/.changes/unreleased/Feature-20240719-134059.yaml @@ -0,0 +1,5 @@ +kind: Feature +body: Handle duplicate reference id in the import of reference addresses +time: 2024-07-19T13:40:59.418509766+02:00 +custom: + Issue: "" diff --git a/src/Bundle/ChillMainBundle/Service/Import/AddressReferenceBaseImporter.php b/src/Bundle/ChillMainBundle/Service/Import/AddressReferenceBaseImporter.php index 9407bd600..a11bd0c12 100644 --- a/src/Bundle/ChillMainBundle/Service/Import/AddressReferenceBaseImporter.php +++ b/src/Bundle/ChillMainBundle/Service/Import/AddressReferenceBaseImporter.php @@ -15,6 +15,12 @@ use Doctrine\DBAL\Connection; use Doctrine\DBAL\Statement; use Psr\Log\LoggerInterface; +/** + * Import addresses into the database. + * + * This importer do some optimization about the import, ensuring that adresses are inserted and reconciled with + * the existing one on a optimized way. + */ final class AddressReferenceBaseImporter { private const INSERT = <<<'SQL' @@ -49,11 +55,18 @@ final class AddressReferenceBaseImporter { } - public function finalize(): void + /** + * Finalize the import process and make reconciliation with addresses. + * + * @param bool $allowRemoveDoubleRefId if true, allow the importer to remove automatically addresses with same refid + * + * @throws \Exception + */ + public function finalize(bool $allowRemoveDoubleRefId = false): void { $this->doInsertPending(); - $this->updateAddressReferenceTable(); + $this->updateAddressReferenceTable($allowRemoveDoubleRefId); $this->deleteTemporaryTable(); @@ -61,6 +74,11 @@ final class AddressReferenceBaseImporter $this->isInitialized = false; } + /** + * Do import a single address. + * + * @throws \Exception + */ public function importAddress( string $refAddress, ?string $refPostalCode, @@ -169,15 +187,48 @@ final class AddressReferenceBaseImporter $this->isInitialized = true; } - private function updateAddressReferenceTable(): void + private function updateAddressReferenceTable(bool $allowRemoveDoubleRefId): void { $this->defaultConnection->executeStatement( 'CREATE INDEX idx_ref_add_temp ON reference_address_temp (refid)' ); - // 1) Add new addresses - $this->logger->info(self::LOG_PREFIX.'upsert new addresses'); - $affected = $this->defaultConnection->executeStatement("INSERT INTO chill_main_address_reference + // 0) detect for doublon in current temporary table + $results = $this->defaultConnection->executeQuery( + 'SELECT COUNT(*) AS nb_appearance, refid FROM reference_address_temp GROUP BY refid HAVING count(*) > 1' + ); + + $hasDouble = false; + foreach ($results->iterateAssociative() as $result) { + $this->logger->error(self::LOG_PREFIX.'Some reference id are present more than one time', ['nb_apparearance' => $result['nb_appearance'], 'refid' => $result['refid']]); + $hasDouble = true; + } + + if ($hasDouble) { + if ($allowRemoveDoubleRefId) { + $this->logger->alert(self::LOG_PREFIX.'We are going to remove the addresses which are present more than once in the table'); + $this->defaultConnection->executeStatement('ALTER TABLE reference_address_temp ADD COLUMN gid SERIAL'); + $removed = $this->defaultConnection->executeStatement(<<<'SQL' + WITH ordering AS ( + SELECT gid, rank() over (PARTITION BY refid ORDER BY gid DESC) AS ranking + FROM reference_address_temp + WHERE refid IN (SELECT refid FROM reference_address_temp group by refid having count(*) > 1) + ), + keep_last AS ( + SELECT gid, ranking FROM ordering where ranking > 1 + ) + DELETE FROM reference_address_temp WHERE gid IN (SELECT gid FROM keep_last); + SQL); + $this->logger->alert(self::LOG_PREFIX.'addresses with same refid present twice, we removed some double', ['nb_removed', $removed]); + } else { + throw new \RuntimeException('Some addresses are present twice in the database, we cannot process them'); + } + } + + $this->defaultConnection->transactional(function ($connection): void { + // 1) Add new addresses + $this->logger->info(self::LOG_PREFIX.'upsert new addresses'); + $affected = $connection->executeStatement("INSERT INTO chill_main_address_reference (id, postcode_id, refid, street, streetnumber, municipalitycode, source, point, createdat, deletedat, updatedat) SELECT nextval('chill_main_address_reference_id_seq'), @@ -195,16 +246,17 @@ final class AddressReferenceBaseImporter ON CONFLICT (refid, source) DO UPDATE SET postcode_id = excluded.postcode_id, refid = excluded.refid, street = excluded.street, streetnumber = excluded.streetnumber, municipalitycode = excluded.municipalitycode, source = excluded.source, point = excluded.point, updatedat = NOW(), deletedAt = NULL "); - $this->logger->info(self::LOG_PREFIX.'addresses upserted', ['upserted' => $affected]); + $this->logger->info(self::LOG_PREFIX.'addresses upserted', ['upserted' => $affected]); - // 3) Delete addresses - $this->logger->info(self::LOG_PREFIX.'soft delete adresses'); - $affected = $this->defaultConnection->executeStatement('UPDATE chill_main_address_reference + // 3) Delete addresses + $this->logger->info(self::LOG_PREFIX.'soft delete adresses'); + $affected = $connection->executeStatement('UPDATE chill_main_address_reference SET deletedat = NOW() WHERE chill_main_address_reference.refid NOT IN (SELECT refid FROM reference_address_temp WHERE source LIKE ?) AND chill_main_address_reference.source LIKE ? ', [$this->currentSource, $this->currentSource]); - $this->logger->info(self::LOG_PREFIX.'addresses deleted', ['deleted' => $affected]); + $this->logger->info(self::LOG_PREFIX.'addresses deleted', ['deleted' => $affected]); + }); } }