feat: Enhance CSV import process and data cleanup
- Added support for importing 'periodes' data and adjusted related SQL scripts. - Introduced a new SQL preparation script to set up the import schema. - Updated the import_all_csv.sh script to handle new CSV files and tables. - Implemented checks for required SQL files and improved error handling. - Modified data types and structures in the import process for better consistency. - Created a new remove-import-data.sql script to clean up imported data effectively. - Enhanced the handling of postal codes and addresses in the import logic. - Added comments and structured the SQL scripts for better readability and maintainability.
This commit is contained in:
@@ -12,6 +12,13 @@
|
||||
--
|
||||
-- vérifier les étiquettes de colonnes: user-> referrer, acp_social_issues-> acp_socialissues
|
||||
|
||||
-- Exécution du bloc DOWN uniquement si demandé explicitement:
|
||||
-- psql -v run_down=1 -f sql/import.sql
|
||||
\if :{?run_down}
|
||||
\else
|
||||
\set run_down 0
|
||||
\endif
|
||||
|
||||
|
||||
-- 1. Adjust Type cast for id columns
|
||||
ALTER TABLE import.personnes ALTER COLUMN id TYPE INTEGER USING (id::integer);
|
||||
@@ -355,26 +362,28 @@ INSERT INTO chill_person_person_center_history (id, person_id, center_id, startd
|
||||
-- a) add new columns
|
||||
ALTER TABLE import.personnes ADD column address_id BIGINT;
|
||||
ALTER TABLE import.personnes ADD column address_ref_id BIGINT;
|
||||
ALTER TABLE import.personnes ADD column postcode_id BIGINT[];
|
||||
ALTER TABLE import.personnes ADD column postcode_arr BIGINT[];
|
||||
|
||||
-- special command to extract postcode values in text
|
||||
ALTER TABLE import.personnes ADD COLUMN postcode1 TEXT;
|
||||
UPDATE import.personnes SET postcode1=SUBSTRING(postcode FROM '\y\d{5}\y') WHERE postcode!='';
|
||||
UPDATE import.personnes
|
||||
SET postcode1 = NULLIF(regexp_replace(coalesce(postcode, ''), '\D', '', 'g'), '')
|
||||
WHERE coalesce(postcode, '') != '';
|
||||
|
||||
-- b) find and add postal code references. Check missing correspondances (if you can)
|
||||
UPDATE import.personnes
|
||||
SET postcode_id = (
|
||||
SELECT pc.id
|
||||
SET postcode_arr = (
|
||||
SELECT array_agg(pc.id)
|
||||
FROM chill_main_postal_code AS pc
|
||||
WHERE pc.canonical ILIKE import.personnes.postcode
|
||||
AND pc.origin = 0
|
||||
WHERE pc.code = import.personnes.postcode1
|
||||
AND pc.origin = 0
|
||||
);
|
||||
--SELECT * FROM import.personnes as ip WHERE ip.postcode_id IS NULL -- missing correspondances
|
||||
--SELECT * FROM import.personnes as ip WHERE ip.postcode_arr IS NULL -- missing correspondances
|
||||
|
||||
-- c) find and add reference addresses. Check missing correspondances (if you can)
|
||||
UPDATE import.personnes AS ip SET address_ref_id = cmar.id FROM chill_main_address_reference AS cmar
|
||||
WHERE
|
||||
cmar.postcode_id = ip.postcode_id
|
||||
cmar.postcode_id = ANY(ip.postcode_arr)
|
||||
AND similarity(trim(ip.street), trim(cmar.street)) > 0.6
|
||||
AND trim(ip.streetnumber) = trim(cmar.streetnumber);
|
||||
--SELECT * FROM import.personnes as ip WHERE ip.address_ref_id IS NULL -- missing correspondances
|
||||
@@ -384,7 +393,7 @@ UPDATE import.personnes SET address_id = nextval('chill_main_address_id_seq');
|
||||
|
||||
-- e) insert reference address into chill_main_addresses
|
||||
INSERT INTO chill_main_address (id, postcode_id, street, streetnumber, extra, validFrom, addressreference_id, refstatus, point, createdat, updatedat, createdby_id, updatedby_id)
|
||||
SELECT address_id, postcode_id, street, coalesce(streetnumber, ''), coalesce(extra,''),
|
||||
SELECT address_id, postcode_arr[1], street, coalesce(streetnumber, ''), coalesce(extra,''),
|
||||
CURRENT_DATE,
|
||||
address_ref_id, 'match',
|
||||
(SELECT point FROM chill_main_address_reference WHERE id = address_ref_id),
|
||||
@@ -396,14 +405,14 @@ INSERT INTO chill_main_address (id, postcode_id, street, streetnumber, extra, va
|
||||
|
||||
-- f) insert created addresses in chill_main_addresses
|
||||
INSERT INTO chill_main_address (id, postcode_id, street, streetnumber, extra, validFrom, point, createdat, updatedat, createdby_id, updatedby_id)
|
||||
SELECT address_id, postcode_id, coalesce(street, ''), coalesce(streetnumber, ''), coalesce(extra,''),
|
||||
SELECT address_id, postcode_arr[1], coalesce(street, ''), coalesce(streetnumber, ''), coalesce(extra,''),
|
||||
CURRENT_DATE,
|
||||
(SELECT center FROM chill_main_postal_code WHERE id = postcode_id), -- geolocation is given by the postcode
|
||||
(SELECT center FROM chill_main_postal_code WHERE id = postcode_arr[1]), -- geolocation is given by the postcode
|
||||
CURRENT_DATE,
|
||||
CURRENT_DATE,
|
||||
(SELECT distinct(first_value(id) OVER(ORDER BY id)) FROM users),
|
||||
(SELECT distinct(first_value(id) OVER(ORDER BY id)) FROM users)
|
||||
FROM import.personnes WHERE address_ref_id IS NULL AND postcode_id IS NOT NULL;
|
||||
FROM import.personnes WHERE address_ref_id IS NULL AND postcode_arr IS NOT NULL;
|
||||
|
||||
SELECT setval('chill_main_address_id_seq', (SELECT COALESCE(max(id)) FROM chill_main_address));
|
||||
|
||||
@@ -440,7 +449,7 @@ UPDATE import.periodes SET period_id = periodid
|
||||
INSERT INTO chill_person_accompanying_period (id, openingdate, closingdate, step, remark, intensity, createdby_id, createdat, updatedby_id, updatedat) SELECT
|
||||
period_id,
|
||||
COALESCE(openingdate1, date(date_trunc('year', CURRENT_DATE))), closingdate1,
|
||||
'CONFIRMED', COALESCE(TRIM(remark), ''), intensity1,
|
||||
'CONFIRMED', COALESCE(TRIM(remark::text), ''), intensity1,
|
||||
(SELECT distinct(first_value(id) OVER(ORDER BY id)) FROM users), CURRENT_DATE,
|
||||
(SELECT distinct(first_value(id) OVER(ORDER BY id)) FROM users), CURRENT_DATE
|
||||
FROM import.periodes;
|
||||
@@ -482,11 +491,7 @@ INSERT INTO chill_person_accompanying_period_location_history (id, period_id, st
|
||||
INSERT INTO chill_person_accompanying_period_social_issues (accompanyingperiod_id, socialissue_id)
|
||||
SELECT
|
||||
DISTINCT ON (t.period_id) t.period_id,
|
||||
COALESCE(
|
||||
t.enfant_id,
|
||||
t.parent_id,
|
||||
1 -- default value ?
|
||||
) AS socialissue_id
|
||||
COALESCE(t.enfant_id, t.parent_id) AS socialissue_id
|
||||
FROM (
|
||||
SELECT p.period_id,
|
||||
(SELECT id FROM chill_person_social_issue WHERE title::jsonb->>'fr' = icp.parent1::jsonb->>'fr' AND parent_id IS NULL) AS parent_id, icp.parent1,
|
||||
@@ -494,13 +499,14 @@ FROM (
|
||||
(SELECT id FROM chill_person_social_issue WHERE title::jsonb->>'fr' = icp.parent1::jsonb->>'fr' AND parent_id IS NULL)) AS enfant_id, icp.enfant1
|
||||
FROM import.periodes p
|
||||
JOIN import.choix_periodes icp ON p.acp_socialissues = icp.acp_social_issues
|
||||
ORDER BY id) AS t;
|
||||
ORDER BY id) AS t
|
||||
WHERE COALESCE(t.enfant_id, t.parent_id) IS NOT NULL;
|
||||
|
||||
-- 57. Link referrer to periods
|
||||
UPDATE chill_person_accompanying_period acp
|
||||
SET user_id = COALESCE(
|
||||
(SELECT id FROM users WHERE users.username = ip.referrer),
|
||||
1 -- default value ?
|
||||
(SELECT distinct(first_value(id) OVER(ORDER BY id)) FROM users)
|
||||
)
|
||||
FROM import.periodes ip WHERE acp.id = ip.period_id;
|
||||
--SELECT ip.id, (SELECT id FROM users WHERE users.username = ip.referrer) AS referrer_id, ip.referrer, acp.id as period_id, acp.user_id FROM chill_person_accompanying_period acp JOIN import.periodes ip ON ip.period_id = acp.id ORDER BY ip.id;
|
||||
@@ -557,11 +563,16 @@ SELECT setval('chill_person_accompanying_period_work_id_seq', (SELECT COALESCE(m
|
||||
|
||||
-- 58. Link scopes to periods
|
||||
INSERT INTO accompanying_periods_scopes (accompanying_period_id, scope_id)
|
||||
SELECT ip.period_id, COALESCE(
|
||||
(SELECT id FROM scopes s WHERE ip.acp_scopes1::jsonb->>'fr' = s.name::jsonb->>'fr'),
|
||||
(SELECT id from scopes s WHERE s.name::jsonb->>'fr' = 'Principal') -- default value 'Principal'
|
||||
)
|
||||
FROM import.periodes ip;
|
||||
SELECT t.period_id, t.scope_id
|
||||
FROM (
|
||||
SELECT ip.period_id, COALESCE(
|
||||
(SELECT id FROM scopes s WHERE ip.acp_scopes1::jsonb->>'fr' = s.name::jsonb->>'fr'),
|
||||
(SELECT id from scopes s WHERE s.name::jsonb->>'fr' = 'Principal'),
|
||||
(SELECT distinct(first_value(id) OVER(ORDER BY id)) FROM scopes)
|
||||
) AS scope_id
|
||||
FROM import.periodes ip
|
||||
) AS t
|
||||
WHERE t.scope_id IS NOT NULL;
|
||||
|
||||
-- 59. Link origin to periods
|
||||
UPDATE chill_person_accompanying_period acp SET origin_id =
|
||||
@@ -937,6 +948,8 @@ AND NOT EXISTS ( SELECT 1 FROM chill_3party.third_party WHERE "name" = trim(t.pe
|
||||
-- DOWN
|
||||
--
|
||||
|
||||
\if :run_down
|
||||
|
||||
-- Undo 68.
|
||||
--TODO
|
||||
|
||||
@@ -1026,7 +1039,7 @@ SELECT setval('chill_person_household_id_seq', (SELECT COALESCE(max(id),1) FROM
|
||||
-- Undo 43.
|
||||
DELETE FROM chill_main_address addr USING import.personnes ip WHERE addr.id = ip.address_id;
|
||||
SELECT setval('chill_main_address_id_seq', (SELECT COALESCE(max(id),1) FROM chill_main_address));
|
||||
ALTER TABLE import.personnes DROP column postcode_id;
|
||||
ALTER TABLE import.personnes DROP column postcode_arr;
|
||||
ALTER TABLE import.personnes DROP column address_id;
|
||||
|
||||
-- Undo 42.
|
||||
@@ -1179,6 +1192,7 @@ ALTER TABLE import.periodes DROP COLUMN closingdate1;
|
||||
|
||||
|
||||
-- =============
|
||||
\endif
|
||||
-- QUESTIONS
|
||||
--
|
||||
-- définir par défaut: quel user, quel centre ?
|
||||
|
||||
Reference in New Issue
Block a user