Handle duplicate addresses in AddressReferenceBaseImporter

Refactored the AddressReferenceBaseImporter to optimize address import and reconciliation. The code now identifies duplicate addresses in the temporary table and handles them according to the 'allowRemoveDoubleRefId' flag. This enhances data consistency during import operations.
This commit is contained in:
Julien Fastré 2024-07-19 13:41:17 +02:00
parent 6cb085f5f7
commit 19accc4d00
Signed by: julienfastre
GPG Key ID: BDE2190974723FCB
2 changed files with 68 additions and 11 deletions

View File

@ -0,0 +1,5 @@
kind: Feature
body: Handle duplicate reference id in the import of reference addresses
time: 2024-07-19T13:40:59.418509766+02:00
custom:
Issue: ""

View File

@ -15,6 +15,12 @@ use Doctrine\DBAL\Connection;
use Doctrine\DBAL\Statement;
use Psr\Log\LoggerInterface;
/**
* Import addresses into the database.
*
* This importer do some optimization about the import, ensuring that adresses are inserted and reconciled with
* the existing one on a optimized way.
*/
final class AddressReferenceBaseImporter
{
private const INSERT = <<<'SQL'
@ -49,11 +55,18 @@ final class AddressReferenceBaseImporter
{
}
public function finalize(): void
/**
* Finalize the import process and make reconciliation with addresses.
*
* @param bool $allowRemoveDoubleRefId if true, allow the importer to remove automatically addresses with same refid
*
* @throws \Exception
*/
public function finalize(bool $allowRemoveDoubleRefId = false): void
{
$this->doInsertPending();
$this->updateAddressReferenceTable();
$this->updateAddressReferenceTable($allowRemoveDoubleRefId);
$this->deleteTemporaryTable();
@ -61,6 +74,11 @@ final class AddressReferenceBaseImporter
$this->isInitialized = false;
}
/**
* Do import a single address.
*
* @throws \Exception
*/
public function importAddress(
string $refAddress,
?string $refPostalCode,
@ -169,15 +187,48 @@ final class AddressReferenceBaseImporter
$this->isInitialized = true;
}
private function updateAddressReferenceTable(): void
private function updateAddressReferenceTable(bool $allowRemoveDoubleRefId): void
{
$this->defaultConnection->executeStatement(
'CREATE INDEX idx_ref_add_temp ON reference_address_temp (refid)'
);
// 1) Add new addresses
$this->logger->info(self::LOG_PREFIX.'upsert new addresses');
$affected = $this->defaultConnection->executeStatement("INSERT INTO chill_main_address_reference
// 0) detect for doublon in current temporary table
$results = $this->defaultConnection->executeQuery(
'SELECT COUNT(*) AS nb_appearance, refid FROM reference_address_temp GROUP BY refid HAVING count(*) > 1'
);
$hasDouble = false;
foreach ($results->iterateAssociative() as $result) {
$this->logger->error(self::LOG_PREFIX.'Some reference id are present more than one time', ['nb_apparearance' => $result['nb_appearance'], 'refid' => $result['refid']]);
$hasDouble = true;
}
if ($hasDouble) {
if ($allowRemoveDoubleRefId) {
$this->logger->alert(self::LOG_PREFIX.'We are going to remove the addresses which are present more than once in the table');
$this->defaultConnection->executeStatement('ALTER TABLE reference_address_temp ADD COLUMN gid SERIAL');
$removed = $this->defaultConnection->executeStatement(<<<'SQL'
WITH ordering AS (
SELECT gid, rank() over (PARTITION BY refid ORDER BY gid DESC) AS ranking
FROM reference_address_temp
WHERE refid IN (SELECT refid FROM reference_address_temp group by refid having count(*) > 1)
),
keep_last AS (
SELECT gid, ranking FROM ordering where ranking > 1
)
DELETE FROM reference_address_temp WHERE gid IN (SELECT gid FROM keep_last);
SQL);
$this->logger->alert(self::LOG_PREFIX.'addresses with same refid present twice, we removed some double', ['nb_removed', $removed]);
} else {
throw new \RuntimeException('Some addresses are present twice in the database, we cannot process them');
}
}
$this->defaultConnection->transactional(function ($connection): void {
// 1) Add new addresses
$this->logger->info(self::LOG_PREFIX.'upsert new addresses');
$affected = $connection->executeStatement("INSERT INTO chill_main_address_reference
(id, postcode_id, refid, street, streetnumber, municipalitycode, source, point, createdat, deletedat, updatedat)
SELECT
nextval('chill_main_address_reference_id_seq'),
@ -195,16 +246,17 @@ final class AddressReferenceBaseImporter
ON CONFLICT (refid, source) DO UPDATE
SET postcode_id = excluded.postcode_id, refid = excluded.refid, street = excluded.street, streetnumber = excluded.streetnumber, municipalitycode = excluded.municipalitycode, source = excluded.source, point = excluded.point, updatedat = NOW(), deletedAt = NULL
");
$this->logger->info(self::LOG_PREFIX.'addresses upserted', ['upserted' => $affected]);
$this->logger->info(self::LOG_PREFIX.'addresses upserted', ['upserted' => $affected]);
// 3) Delete addresses
$this->logger->info(self::LOG_PREFIX.'soft delete adresses');
$affected = $this->defaultConnection->executeStatement('UPDATE chill_main_address_reference
// 3) Delete addresses
$this->logger->info(self::LOG_PREFIX.'soft delete adresses');
$affected = $connection->executeStatement('UPDATE chill_main_address_reference
SET deletedat = NOW()
WHERE
chill_main_address_reference.refid NOT IN (SELECT refid FROM reference_address_temp WHERE source LIKE ?)
AND chill_main_address_reference.source LIKE ?
', [$this->currentSource, $this->currentSource]);
$this->logger->info(self::LOG_PREFIX.'addresses deleted', ['deleted' => $affected]);
$this->logger->info(self::LOG_PREFIX.'addresses deleted', ['deleted' => $affected]);
});
}
}