chill-bundles/src/Bundle/ChillMainBundle/Service/AddressGeographicalUnit/CollateAddressWithReferenceOrPostalCode.php

151 lines
7.0 KiB
PHP

<?php
declare(strict_types=1);
/*
* Chill is a software for social workers
*
* For the full copyright and license information, please view
* the LICENSE file that was distributed with this source code.
*/
namespace Chill\MainBundle\Service\AddressGeographicalUnit;
use Doctrine\DBAL\Connection;
use Psr\Log\LoggerInterface;
final readonly class CollateAddressWithReferenceOrPostalCode implements CollateAddressWithReferenceOrPostalCodeInterface
{
private const LOG_PREFIX = '[collate addresses] ';
/**
* For the address having an "invented" postal code, find the postal code "reference" with the same code,
* and the most similar name. When two reference code match, we add
*
* This query intentionally includes also address with reference, as the reference may be wrong.
*/
private const FORCE_ORIGINAL_POSTAL_CODE = <<<'SQL'
WITH recollate AS (
SELECT * FROM (
SELECT cma.id AS address_id, cmpc.id, cmpc.label, cmpc.code, cmpc_reference.id AS cmpc_reference_id, cmpc_reference.label, cmpc_reference.code,
RANK() OVER (PARTITION BY cma.id ORDER BY SIMILARITY(cmpc.label, cmpc_reference.label) DESC, cmpc_reference.id ASC) AS ranked
FROM
chill_main_address cma JOIN chill_main_postal_code cmpc on cma.postcode_id = cmpc.id,
chill_main_postal_code cmpc_reference
WHERE
-- use only postal code which are reference
cmpc_reference.id != cmpc.id AND cmpc_reference.origin = 0
-- only where cmpc is created manually
AND cmpc.origin != 0
-- only when postal code match
AND TRIM(REPLACE(LOWER(cmpc.code), ' ', '')) = LOWER(cmpc_reference.code)
AND cmpc.country_id = cmpc_reference.country_id
AND cma.id > :since_id -- to set the first id
) sq
WHERE ranked = 1)
UPDATE chill_main_address SET postcode_id = cmpc_reference_id FROM recollate WHERE recollate.address_id = chill_main_address.id;
SQL;
/**
* associate the address with the most similar address reference.
*
* This query intentionally ignores the existing addressreference_id, to let fixing the address match the
* most similar address reference.
*/
private const FORCE_MOST_SIMILAR_ADDRESS_REFERENCE = <<<'SQL'
WITH recollate AS (
SELECT * FROM (
SELECT cma.id AS address_id, cma.streetnumber, cma.street, cmpc.code, cmpc.label, cmar.id AS address_reference_id, cmar.streetnumber, cmar.street, cmpc_reference.code, cmpc_reference.label,
similarity(cma.street, cmar.street),
RANK() OVER (PARTITION BY cma.id ORDER BY SIMILARITY (cma.street, cmar.street) DESC, SIMILARITY (cma.streetnumber, cmar.streetnumber), cmar.id ASC) AS ranked
FROM
chill_main_address cma
JOIN chill_main_postal_code cmpc on cma.postcode_id = cmpc.id,
chill_main_address_reference cmar JOIN chill_main_postal_code cmpc_reference ON cmar.postcode_id = cmpc_reference.id
WHERE
-- only if cmpc is a reference (must be matched before executing this query)
cma.postcode_id = cmar.postcode_id
-- join cmpc to cma
AND SIMILARITY(LOWER(cma.street), LOWER(cmar.street)) > 0.6 AND LOWER(cma.streetnumber) = LOWER(cmar.streetnumber)
-- only addresses which match the address reference - let the user decide if the reference has changed
AND cma.refstatus = 'match'
-- only the most recent
AND cma.id > :since_id
) AS sq
WHERE ranked = 1
)
UPDATE chill_main_address SET addressreference_id = recollate.address_reference_id FROM recollate WHERE chill_main_address.id = recollate.address_id;
SQL;
/**
* Update the point's address with the:
*
* - address reference point, if the address match the reference with sufficient similarity
* - or the postcal code center
*/
private const UPDATE_POINT = <<<'SQL'
WITH address_geom AS (
SELECT cma.id AS address_id, COALESCE(cmar.point, cmpc.center) AS point
FROM chill_main_address cma
LEFT JOIN chill_main_address_reference cmar ON cma.addressreference_id = cmar.id AND similarity(cma.street, cmar.street) > 0.6 AND LOWER(cma.streetnumber) = LOWER(cmar.streetnumber)
LEFT JOIN chill_main_postal_code cmpc ON cma.postcode_id = cmpc.id
WHERE cma.id > :since_id
)
UPDATE chill_main_address SET point = address_geom.point FROM address_geom WHERE address_geom.address_id = chill_main_address.id
SQL;
private const MAX_ADDRESS_ID = <<<'SQL'
SELECT MAX(id) AS max_id FROM chill_main_address;
SQL;
public function __construct(
private Connection $connection,
private LoggerInterface $logger,
) {
}
/**
* @throws \Throwable
*/
public function __invoke(int $sinceId = 0): int
{
try {
[
$postCodeSetReferenceFromMostSimilar,
$addressReferenceMatch,
$pointUpdates,
$lastId,
] = $this->connection->transactional(function () use ($sinceId) {
$postCodeSetReferenceFromMostSimilar = $this->connection->executeStatement(self::FORCE_ORIGINAL_POSTAL_CODE, ['since_id' => $sinceId]);
$addressReferenceMatch = $this->connection->executeStatement(self::FORCE_MOST_SIMILAR_ADDRESS_REFERENCE, ['since_id' => $sinceId]);
$pointUpdates = $this->connection->executeStatement(self::UPDATE_POINT, ['since_id' => $sinceId]);
$lastId = $this->connection->fetchOne(self::MAX_ADDRESS_ID);
return [
$postCodeSetReferenceFromMostSimilar,
$addressReferenceMatch,
$pointUpdates,
$lastId,
];
});
} catch (\Throwable $e) {
$this->logger->error(self::LOG_PREFIX . "error while re-collating addresses", [
'message' => $e->getMessage(),
'trace' => $e->getTraceAsString()
]);
throw $e;
}
$this->logger->info(self::LOG_PREFIX . "Collate the addresses with reference", [
'set_postcode_from_most_similar' => $postCodeSetReferenceFromMostSimilar,
'address_reference_match' => $addressReferenceMatch,
'point_update' => $pointUpdates,
'since_id' => $sinceId,
'last_id' => $lastId,
]);
return $lastId;
}
}