feat: Enhance CSV import process and data cleanup

- Added support for importing 'periodes' data and adjusted related SQL scripts.
- Introduced a new SQL preparation script to set up the import schema.
- Updated the import_all_csv.sh script to handle new CSV files and tables.
- Implemented checks for required SQL files and improved error handling.
- Modified data types and structures in the import process for better consistency.
- Created a new remove-import-data.sql script to clean up imported data effectively.
- Enhanced the handling of postal codes and addresses in the import logic.
- Added comments and structured the SQL scripts for better readability and maintainability.
This commit is contained in:
Boris Waaub
2026-03-16 13:48:12 +01:00
parent 8481acfe5d
commit c388967ebd
9 changed files with 914 additions and 66 deletions

View File

@@ -12,6 +12,13 @@
--
-- vérifier les étiquettes de colonnes: user-> referrer, acp_social_issues-> acp_socialissues
-- Exécution du bloc DOWN uniquement si demandé explicitement:
-- psql -v run_down=1 -f sql/import.sql
\if :{?run_down}
\else
\set run_down 0
\endif
-- 1. Adjust Type cast for id columns
ALTER TABLE import.personnes ALTER COLUMN id TYPE INTEGER USING (id::integer);
@@ -355,26 +362,28 @@ INSERT INTO chill_person_person_center_history (id, person_id, center_id, startd
-- a) add new columns
ALTER TABLE import.personnes ADD column address_id BIGINT;
ALTER TABLE import.personnes ADD column address_ref_id BIGINT;
ALTER TABLE import.personnes ADD column postcode_id BIGINT[];
ALTER TABLE import.personnes ADD column postcode_arr BIGINT[];
-- special command to extract postcode values in text
ALTER TABLE import.personnes ADD COLUMN postcode1 TEXT;
UPDATE import.personnes SET postcode1=SUBSTRING(postcode FROM '\y\d{5}\y') WHERE postcode!='';
UPDATE import.personnes
SET postcode1 = NULLIF(regexp_replace(coalesce(postcode, ''), '\D', '', 'g'), '')
WHERE coalesce(postcode, '') != '';
-- b) find and add postal code references. Check missing correspondances (if you can)
UPDATE import.personnes
SET postcode_id = (
SELECT pc.id
SET postcode_arr = (
SELECT array_agg(pc.id)
FROM chill_main_postal_code AS pc
WHERE pc.canonical ILIKE import.personnes.postcode
AND pc.origin = 0
WHERE pc.code = import.personnes.postcode1
AND pc.origin = 0
);
--SELECT * FROM import.personnes as ip WHERE ip.postcode_id IS NULL -- missing correspondances
--SELECT * FROM import.personnes as ip WHERE ip.postcode_arr IS NULL -- missing correspondances
-- c) find and add reference addresses. Check missing correspondances (if you can)
UPDATE import.personnes AS ip SET address_ref_id = cmar.id FROM chill_main_address_reference AS cmar
WHERE
cmar.postcode_id = ip.postcode_id
cmar.postcode_id = ANY(ip.postcode_arr)
AND similarity(trim(ip.street), trim(cmar.street)) > 0.6
AND trim(ip.streetnumber) = trim(cmar.streetnumber);
--SELECT * FROM import.personnes as ip WHERE ip.address_ref_id IS NULL -- missing correspondances
@@ -384,7 +393,7 @@ UPDATE import.personnes SET address_id = nextval('chill_main_address_id_seq');
-- e) insert reference address into chill_main_addresses
INSERT INTO chill_main_address (id, postcode_id, street, streetnumber, extra, validFrom, addressreference_id, refstatus, point, createdat, updatedat, createdby_id, updatedby_id)
SELECT address_id, postcode_id, street, coalesce(streetnumber, ''), coalesce(extra,''),
SELECT address_id, postcode_arr[1], street, coalesce(streetnumber, ''), coalesce(extra,''),
CURRENT_DATE,
address_ref_id, 'match',
(SELECT point FROM chill_main_address_reference WHERE id = address_ref_id),
@@ -396,14 +405,14 @@ INSERT INTO chill_main_address (id, postcode_id, street, streetnumber, extra, va
-- f) insert created addresses in chill_main_addresses
INSERT INTO chill_main_address (id, postcode_id, street, streetnumber, extra, validFrom, point, createdat, updatedat, createdby_id, updatedby_id)
SELECT address_id, postcode_id, coalesce(street, ''), coalesce(streetnumber, ''), coalesce(extra,''),
SELECT address_id, postcode_arr[1], coalesce(street, ''), coalesce(streetnumber, ''), coalesce(extra,''),
CURRENT_DATE,
(SELECT center FROM chill_main_postal_code WHERE id = postcode_id), -- geolocation is given by the postcode
(SELECT center FROM chill_main_postal_code WHERE id = postcode_arr[1]), -- geolocation is given by the postcode
CURRENT_DATE,
CURRENT_DATE,
(SELECT distinct(first_value(id) OVER(ORDER BY id)) FROM users),
(SELECT distinct(first_value(id) OVER(ORDER BY id)) FROM users)
FROM import.personnes WHERE address_ref_id IS NULL AND postcode_id IS NOT NULL;
FROM import.personnes WHERE address_ref_id IS NULL AND postcode_arr IS NOT NULL;
SELECT setval('chill_main_address_id_seq', (SELECT COALESCE(max(id)) FROM chill_main_address));
@@ -440,7 +449,7 @@ UPDATE import.periodes SET period_id = periodid
INSERT INTO chill_person_accompanying_period (id, openingdate, closingdate, step, remark, intensity, createdby_id, createdat, updatedby_id, updatedat) SELECT
period_id,
COALESCE(openingdate1, date(date_trunc('year', CURRENT_DATE))), closingdate1,
'CONFIRMED', COALESCE(TRIM(remark), ''), intensity1,
'CONFIRMED', COALESCE(TRIM(remark::text), ''), intensity1,
(SELECT distinct(first_value(id) OVER(ORDER BY id)) FROM users), CURRENT_DATE,
(SELECT distinct(first_value(id) OVER(ORDER BY id)) FROM users), CURRENT_DATE
FROM import.periodes;
@@ -482,11 +491,7 @@ INSERT INTO chill_person_accompanying_period_location_history (id, period_id, st
INSERT INTO chill_person_accompanying_period_social_issues (accompanyingperiod_id, socialissue_id)
SELECT
DISTINCT ON (t.period_id) t.period_id,
COALESCE(
t.enfant_id,
t.parent_id,
1 -- default value ?
) AS socialissue_id
COALESCE(t.enfant_id, t.parent_id) AS socialissue_id
FROM (
SELECT p.period_id,
(SELECT id FROM chill_person_social_issue WHERE title::jsonb->>'fr' = icp.parent1::jsonb->>'fr' AND parent_id IS NULL) AS parent_id, icp.parent1,
@@ -494,13 +499,14 @@ FROM (
(SELECT id FROM chill_person_social_issue WHERE title::jsonb->>'fr' = icp.parent1::jsonb->>'fr' AND parent_id IS NULL)) AS enfant_id, icp.enfant1
FROM import.periodes p
JOIN import.choix_periodes icp ON p.acp_socialissues = icp.acp_social_issues
ORDER BY id) AS t;
ORDER BY id) AS t
WHERE COALESCE(t.enfant_id, t.parent_id) IS NOT NULL;
-- 57. Link referrer to periods
UPDATE chill_person_accompanying_period acp
SET user_id = COALESCE(
(SELECT id FROM users WHERE users.username = ip.referrer),
1 -- default value ?
(SELECT distinct(first_value(id) OVER(ORDER BY id)) FROM users)
)
FROM import.periodes ip WHERE acp.id = ip.period_id;
--SELECT ip.id, (SELECT id FROM users WHERE users.username = ip.referrer) AS referrer_id, ip.referrer, acp.id as period_id, acp.user_id FROM chill_person_accompanying_period acp JOIN import.periodes ip ON ip.period_id = acp.id ORDER BY ip.id;
@@ -557,11 +563,16 @@ SELECT setval('chill_person_accompanying_period_work_id_seq', (SELECT COALESCE(m
-- 58. Link scopes to periods
INSERT INTO accompanying_periods_scopes (accompanying_period_id, scope_id)
SELECT ip.period_id, COALESCE(
(SELECT id FROM scopes s WHERE ip.acp_scopes1::jsonb->>'fr' = s.name::jsonb->>'fr'),
(SELECT id from scopes s WHERE s.name::jsonb->>'fr' = 'Principal') -- default value 'Principal'
)
FROM import.periodes ip;
SELECT t.period_id, t.scope_id
FROM (
SELECT ip.period_id, COALESCE(
(SELECT id FROM scopes s WHERE ip.acp_scopes1::jsonb->>'fr' = s.name::jsonb->>'fr'),
(SELECT id from scopes s WHERE s.name::jsonb->>'fr' = 'Principal'),
(SELECT distinct(first_value(id) OVER(ORDER BY id)) FROM scopes)
) AS scope_id
FROM import.periodes ip
) AS t
WHERE t.scope_id IS NOT NULL;
-- 59. Link origin to periods
UPDATE chill_person_accompanying_period acp SET origin_id =
@@ -937,6 +948,8 @@ AND NOT EXISTS ( SELECT 1 FROM chill_3party.third_party WHERE "name" = trim(t.pe
-- DOWN
--
\if :run_down
-- Undo 68.
--TODO
@@ -1026,7 +1039,7 @@ SELECT setval('chill_person_household_id_seq', (SELECT COALESCE(max(id),1) FROM
-- Undo 43.
DELETE FROM chill_main_address addr USING import.personnes ip WHERE addr.id = ip.address_id;
SELECT setval('chill_main_address_id_seq', (SELECT COALESCE(max(id),1) FROM chill_main_address));
ALTER TABLE import.personnes DROP column postcode_id;
ALTER TABLE import.personnes DROP column postcode_arr;
ALTER TABLE import.personnes DROP column address_id;
-- Undo 42.
@@ -1179,6 +1192,7 @@ ALTER TABLE import.periodes DROP COLUMN closingdate1;
-- =============
\endif
-- QUESTIONS
--
-- définir par défaut: quel user, quel centre ?