[Addresses] create a service to collate addresses with the address reference

This commit is contained in:
Julien Fastré 2023-07-12 17:30:46 +02:00
parent 3f66e1a862
commit 1552b3c9d7
Signed by: julienfastre
GPG Key ID: BDE2190974723FCB
2 changed files with 191 additions and 0 deletions

View File

@ -0,0 +1,147 @@
<?php
declare(strict_types=1);
/*
* Chill is a software for social workers
*
* For the full copyright and license information, please view
* the LICENSE file that was distributed with this source code.
*/
namespace Chill\MainBundle\Service\AddressGeographicalUnit;
use Doctrine\DBAL\Connection;
use Psr\Log\LoggerInterface;
final readonly class CollateAddressWithReferenceOrPostalCode
{
private const LOG_PREFIX = '[collate addresses] ';
/**
* For the address having an "invented" postal code, find the postal code "reference" with the same code,
* and the most similar name. When two reference code match, we add
*
* This query intentionally includes also address with reference, as the reference may be wrong.
*/
private const FORCE_ORIGINAL_POSTAL_CODE = <<<'SQL'
WITH recollate AS (
SELECT * FROM (
SELECT cma.id AS address_id, cmpc.id, cmpc.label, cmpc.code, cmpc_reference.id AS cmpc_reference_id, cmpc_reference.label, cmpc_reference.code,
RANK() OVER (PARTITION BY cma.id ORDER BY SIMILARITY(cmpc.label, cmpc_reference.label) DESC, cmpc_reference.id ASC) AS ranked
FROM
chill_main_address cma JOIN chill_main_postal_code cmpc on cma.postcode_id = cmpc.id,
chill_main_postal_code cmpc_reference
WHERE
-- use only postal code which are reference
cmpc_reference.id != cmpc.id AND cmpc_reference.origin = 0
-- only where the reference is null or the cmpc is created manually
--AND cma.addressreference_id IS NULL
AND cmpc.origin != 0
-- only when postal code match
AND TRIM(REPLACE(LOWER(cmpc.code), ' ', '')) = LOWER(cmpc_reference.code)
AND cma.id > :since_id -- to set the first id
) sq
WHERE ranked = 1)
UPDATE chill_main_address SET postcode_id = cmpc_reference_id FROM recollate WHERE recollate.address_id = chill_main_address.id;
SQL;
/**
* associate the address with the most similar address reference.
*
* This query intentionally ignores the existing addressreference_id, to let fixing the address match the
* most similar address reference.
*/
private const FORCE_MOST_SIMILAR_ADDRESS_REFERENCE = <<<'SQL'
WITH recollate AS (
SELECT * FROM (
SELECT cma.id AS address_id, cma.streetnumber, cma.street, cmpc.code, cmpc.label, cmar.id AS address_reference_id, cmar.streetnumber, cmar.street, cmpc_reference.code, cmpc_reference.label,
similarity(cma.street, cmar.street),
RANK() OVER (PARTITION BY cma.id ORDER BY SIMILARITY (cma.street, cmar.street) DESC, SIMILARITY (cma.streetnumber, cmar.streetnumber), cmar.id ASC) AS ranked
FROM
chill_main_address cma
JOIN chill_main_postal_code cmpc on cma.postcode_id = cmpc.id,
chill_main_address_reference cmar JOIN chill_main_postal_code cmpc_reference ON cmar.postcode_id = cmpc_reference.id
WHERE
-- only where the reference is null
-- cma.addressreference_id IS NULL
cma.addressreference_id != cmar.id
-- only if cmpc is a reference (must be matched before executing this query)
AND cma.postcode_id = cmar.postcode_id
-- join cmpc to cma
AND SIMILARITY(LOWER(cma.street), LOWER(cmar.street)) > 0.6 AND LOWER(cma.streetnumber) = LOWER(cmar.streetnumber)
-- only addresses which match the address reference - let the user decide if the reference has changed
AND cma.refstatus = 'match'
-- only the most recent
AND cma.id > :since_id
) AS sq
WHERE ranked = 1
)
UPDATE chill_main_address SET addressreference_id = recollate.address_reference_id FROM recollate WHERE chill_main_address.id = recollate.address_id;
SQL;
private const UPDATE_POINT = <<<'SQL'
WITH address_geom AS (
SELECT cma.id AS address_id, COALESCE(cmar.point, cmpc.center) AS point
FROM chill_main_address cma
LEFT JOIN chill_main_address_reference cmar ON cma.addressreference_id = cmar.id
LEFT JOIN chill_main_postal_code cmpc ON cma.postcode_id = cmpc.id
WHERE cma.id > :since_id
)
UPDATE chill_main_address SET point = address_geom.point FROM address_geom WHERE address_geom.address_id = chill_main_address.id
SQL;
private const MAX_ADDRESS_ID = <<<'SQL'
SELECT MAX(id) AS max_id FROM chill_main_address;
SQL;
public function __construct(
private Connection $connection,
private LoggerInterface $logger,
) {
}
/**
* @throws \Throwable
*/
public function __invoke(int $sinceId = 0): int
{
try {
[
$postCodeSetReferenceFromMostSimilar,
$addressReferenceMatch,
$pointUpdates,
$lastId,
] = $this->connection->transactional(function () use ($sinceId) {
$postCodeSetReferenceFromMostSimilar = $this->connection->executeQuery(self::FORCE_ORIGINAL_POSTAL_CODE, ['since_id' => $sinceId]);
$addressReferenceMatch = $this->connection->executeQuery(self::FORCE_MOST_SIMILAR_ADDRESS_REFERENCE, ['since_id' => $sinceId]);
$pointUpdates = $this->connection->executeQuery(self::UPDATE_POINT, ['since_id' => $sinceId]);
$lastId = $this->connection->fetchOne(self::MAX_ADDRESS_ID);
return [
$postCodeSetReferenceFromMostSimilar,
$addressReferenceMatch,
$pointUpdates,
$lastId,
];
});
} catch (\Throwable $e) {
$this->logger->error(self::LOG_PREFIX . "error while re-collating addresses", [
'message' => $e->getMessage(),
'trace' => $e->getTraceAsString()
]);
throw $e;
}
$this->logger->info(self::LOG_PREFIX . "Collate the addresses with reference", [
'set_postcode_from_most_similar' => $postCodeSetReferenceFromMostSimilar,
'address_reference_match' => $addressReferenceMatch,
'point_update' => $pointUpdates,
'since_id' => $sinceId,
'last_id' => $lastId,
]);
return $lastId;
}
}

View File

@ -0,0 +1,44 @@
<?php
declare(strict_types=1);
/*
* Chill is a software for social workers
*
* For the full copyright and license information, please view
* the LICENSE file that was distributed with this source code.
*/
namespace Services\AddressGeographicalUnit;
use Chill\MainBundle\Service\AddressGeographicalUnit\CollateAddressWithReferenceOrPostalCode;
use Doctrine\DBAL\Connection;
use Psr\Log\NullLogger;
use Symfony\Bundle\FrameworkBundle\Test\KernelTestCase;
/**
* @internal
* @coversNothing
*/
class CollateAddressWithReferenceOrPostalCodeTest extends KernelTestCase
{
private Connection $connection;
protected function setUp(): void
{
self::bootKernel();
$this->connection = self::$container->get(Connection::class);
}
public function testRun(): void
{
$collator = new CollateAddressWithReferenceOrPostalCode(
$this->connection,
new NullLogger()
);
$result = $collator(0);
self::assertGreaterThan(0, $result);
}
}