:since_id -- to set the first id ) sq WHERE ranked = 1) UPDATE chill_main_address SET postcode_id = cmpc_reference_id FROM recollate WHERE recollate.address_id = chill_main_address.id; SQL; /** * associate the address with the most similar address reference. * * This query intentionally ignores the existing addressreference_id, to let fixing the address match the * most similar address reference. */ private const FORCE_MOST_SIMILAR_ADDRESS_REFERENCE = <<<'SQL' WITH recollate AS ( SELECT * FROM ( SELECT cma.id AS address_id, cma.streetnumber, cma.street, cmpc.code, cmpc.label, cmar.id AS address_reference_id, cmar.streetnumber, cmar.street, cmpc_reference.code, cmpc_reference.label, similarity(cma.street, cmar.street), RANK() OVER (PARTITION BY cma.id ORDER BY SIMILARITY (cma.street, cmar.street) DESC, SIMILARITY (cma.streetnumber, cmar.streetnumber), cmar.id ASC) AS ranked FROM chill_main_address cma JOIN chill_main_postal_code cmpc on cma.postcode_id = cmpc.id, chill_main_address_reference cmar JOIN chill_main_postal_code cmpc_reference ON cmar.postcode_id = cmpc_reference.id WHERE -- only if cmpc is a reference (must be matched before executing this query) cma.postcode_id = cmar.postcode_id -- join cmpc to cma AND SIMILARITY(LOWER(cma.street), LOWER(cmar.street)) > 0.6 AND LOWER(cma.streetnumber) = LOWER(cmar.streetnumber) -- only addresses which match the address reference - let the user decide if the reference has changed AND cma.refstatus = 'match' -- only the most recent AND cma.id > :since_id ) AS sq WHERE ranked = 1 ) UPDATE chill_main_address SET addressreference_id = recollate.address_reference_id FROM recollate WHERE chill_main_address.id = recollate.address_id; SQL; /** * Update the point's address with the: * * - address reference point, if the address match the reference with sufficient similarity * - or the postcal code center */ private const UPDATE_POINT = <<<'SQL' WITH address_geom AS ( SELECT cma.id AS address_id, COALESCE(cmar.point, cmpc.center) AS point FROM chill_main_address cma LEFT JOIN chill_main_address_reference cmar ON cma.addressreference_id = cmar.id AND similarity(cma.street, cmar.street) > 0.6 AND LOWER(cma.streetnumber) = LOWER(cmar.streetnumber) LEFT JOIN chill_main_postal_code cmpc ON cma.postcode_id = cmpc.id WHERE cma.id > :since_id ) UPDATE chill_main_address SET point = address_geom.point FROM address_geom WHERE address_geom.address_id = chill_main_address.id SQL; private const MAX_ADDRESS_ID = <<<'SQL' SELECT MAX(id) AS max_id FROM chill_main_address; SQL; public function __construct( private Connection $connection, private LoggerInterface $logger, ) { } /** * @throws \Throwable */ public function __invoke(int $sinceId = 0): int { try { [ $postCodeSetReferenceFromMostSimilar, $addressReferenceMatch, $pointUpdates, $lastId, ] = $this->connection->transactional(function () use ($sinceId) { $postCodeSetReferenceFromMostSimilar = $this->connection->executeStatement(self::FORCE_ORIGINAL_POSTAL_CODE, ['since_id' => $sinceId]); $addressReferenceMatch = $this->connection->executeStatement(self::FORCE_MOST_SIMILAR_ADDRESS_REFERENCE, ['since_id' => $sinceId]); $pointUpdates = $this->connection->executeStatement(self::UPDATE_POINT, ['since_id' => $sinceId]); $lastId = $this->connection->fetchOne(self::MAX_ADDRESS_ID); return [ $postCodeSetReferenceFromMostSimilar, $addressReferenceMatch, $pointUpdates, $lastId, ]; }); } catch (\Throwable $e) { $this->logger->error(self::LOG_PREFIX . "error while re-collating addresses", [ 'message' => $e->getMessage(), 'trace' => $e->getTraceAsString() ]); throw $e; } $this->logger->info(self::LOG_PREFIX . "Collate the addresses with reference", [ 'set_postcode_from_most_similar' => $postCodeSetReferenceFromMostSimilar, 'address_reference_match' => $addressReferenceMatch, 'point_update' => $pointUpdates, 'since_id' => $sinceId, 'last_id' => $lastId, ]); return $lastId; } }