feat: Enhance CSV import process and data cleanup

- Added support for importing 'periodes' data and adjusted related SQL scripts.
- Introduced a new SQL preparation script to set up the import schema.
- Updated the import_all_csv.sh script to handle new CSV files and tables.
- Implemented checks for required SQL files and improved error handling.
- Modified data types and structures in the import process for better consistency.
- Created a new remove-import-data.sql script to clean up imported data effectively.
- Enhanced the handling of postal codes and addresses in the import logic.
- Added comments and structured the SQL scripts for better readability and maintainability.
This commit is contained in:
Boris Waaub
2026-03-16 13:48:12 +01:00
parent 8481acfe5d
commit c388967ebd
9 changed files with 914 additions and 66 deletions
+34 -21
View File
@@ -9,9 +9,11 @@ DB_USER="${PGUSER:-postgres}"
DB_NAME="${PGDATABASE:-chill-import}"
PSQL=(psql -v ON_ERROR_STOP=1 -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME")
PREPARE_SQL="$ROOT_DIR/sql/prepare-import.sql"
required_tables=(
"import.personnes"
"import.periodes"
"import.choix_periodes"
"import.choix_localisations"
"import.tiers"
@@ -19,11 +21,21 @@ required_tables=(
required_files=(
"$ROOT_DIR/csv/choix_usagers.csv"
"$ROOT_DIR/csv/periodes.csv"
"$ROOT_DIR/csv/choix_periodes.csv"
"$ROOT_DIR/csv/choix_localisations.csv"
"$ROOT_DIR/csv/choix_tiers.csv"
)
if [[ ! -f "$PREPARE_SQL" ]]; then
echo "Fichier SQL manquant: ${PREPARE_SQL#$ROOT_DIR/}" >&2
exit 1
fi
echo "[-1/3] Recreation du schema import..."
"${PSQL[@]}" -c "DROP SCHEMA IF EXISTS import CASCADE;"
"${PSQL[@]}" -f "$PREPARE_SQL"
for table in "${required_tables[@]}"; do
exists="$("${PSQL[@]}" -tAc "SELECT to_regclass('${table}') IS NOT NULL;")"
if [[ "$exists" != "t" ]]; then
@@ -40,7 +52,7 @@ for csv_file in "${required_files[@]}"; do
fi
done
echo "[0/4] Assouplissement des types texte des tables import..."
echo "[0/3] Assouplissement des types texte des tables import..."
"${PSQL[@]}" <<'SQL'
DO $$
DECLARE r record;
@@ -49,7 +61,7 @@ BEGIN
SELECT table_schema, table_name, column_name
FROM information_schema.columns
WHERE table_schema = 'import'
AND table_name IN ('personnes', 'choix_periodes', 'choix_localisations', 'tiers')
AND table_name IN ('personnes', 'periodes', 'choix_periodes', 'choix_localisations', 'tiers')
AND data_type IN ('character varying', 'character')
LOOP
EXECUTE format(
@@ -62,17 +74,18 @@ BEGIN
END $$;
SQL
echo "[1/4] Vidage des tables d'import..."
echo "[1/3] Vidage des tables d'import..."
"${PSQL[@]}" <<'SQL'
TRUNCATE TABLE
import.personnes,
import.periodes,
import.choix_periodes,
import.choix_localisations,
import.tiers
RESTART IDENTITY;
SQL
echo "[2/4] Import des CSV du dossier csv/..."
echo "[2/3] Import des CSV du dossier csv/..."
copy_table_from_file() {
local table="$1"
@@ -87,26 +100,26 @@ copy_table_from_file() {
}
copy_table_from_file "import.personnes" "$ROOT_DIR/csv/choix_usagers.csv" "civility,lastname,firstname,gender,gendercomment,nationality,memo,birthdate,place_of_birth,countryofbirth,deathdate,email,phonenumber,mobilenumber,contactinfo,street,extra,streetnumber,postcode,country"
copy_table_from_file "import.choix_periodes" "$ROOT_DIR/csv/choix_periodes.csv" "closingmotive,origin,acp_scopes,job,referrer,parent,enfant,acp_social_issues,work_social_action,street,extra,streetnumber,postcode,country"
copy_table_from_file "import.choix_localisations" "$ROOT_DIR/csv/choix_localisations.csv" "title,addressRequired,availableForUsers,contactData,defaultFor,editableByUsers"
echo "[3/4] Import de tiers via table staging..."
"${PSQL[@]}" -c "\copy import.tiers_stage(coordonnees,nom,categorie) FROM STDIN WITH (FORMAT csv, HEADER true, DELIMITER ',')" < "$ROOT_DIR/csv/choix_tiers.csv"
echo " - generation des ids manquants dans import.personnes"
"${PSQL[@]}" <<'SQL'
INSERT INTO import.tiers (id, coordonnees, nom, categorie)
SELECT
row_number() OVER (ORDER BY nom) AS id,
NULLIF(trim(coordonnees), ''),
NULLIF(trim(nom), ''),
NULLIF(trim(categorie), '')
FROM import.tiers_stage
WHERE COALESCE(trim(nom), '') <> '';
DROP TABLE import.tiers_stage;
WITH numbered AS (
SELECT ctid, row_number() OVER (ORDER BY ctid) AS new_id
FROM import.personnes
WHERE id IS NULL OR NULLIF(id::text, '') IS NULL
)
UPDATE import.personnes p
SET id = numbered.new_id
FROM numbered
WHERE p.ctid = numbered.ctid;
SQL
echo "[4/4] Verification des volumes importes..."
"${PSQL[@]}" -c "SELECT 'personnes' AS table_name, count(*) AS rows FROM import.personnes UNION ALL SELECT 'choix_periodes', count(*) FROM import.choix_periodes UNION ALL SELECT 'choix_localisations', count(*) FROM import.choix_localisations UNION ALL SELECT 'tiers', count(*) FROM import.tiers;"
copy_table_from_file "import.periodes" "$ROOT_DIR/csv/periodes.csv" "id,nom,openingdate,closingdate,closingmotive,origin,remark,intensity,referrer,job,acp_scopes,address,personlocation,addresslocation,acp_socialissues,work_socialaction,comment1_content,comment2_content,comment3_content,comment4_content,comment5_content"
copy_table_from_file "import.choix_periodes" "$ROOT_DIR/csv/choix_periodes.csv" "closingmotive,origin,acp_scopes,job,referrer,parent,enfant,acp_social_issues,work_social_action,street,extra,streetnumber,postcode,country"
copy_table_from_file "import.choix_localisations" "$ROOT_DIR/csv/choix_localisations.csv" "title,\"addressRequired\",\"availableForUsers\",\"contactData\",\"defaultFor\",\"editableByUsers\""
copy_table_from_file "import.tiers" "$ROOT_DIR/csv/choix_tiers.csv" "acronym,nom,categorie"
echo "[3/3] Verification des volumes importes..."
"${PSQL[@]}" -c "SELECT 'personnes' AS table_name, count(*) AS rows FROM import.personnes UNION ALL SELECT 'periodes', count(*) FROM import.periodes UNION ALL SELECT 'choix_periodes', count(*) FROM import.choix_periodes UNION ALL SELECT 'choix_localisations', count(*) FROM import.choix_localisations UNION ALL SELECT 'tiers', count(*) FROM import.tiers;"
echo "Import termine sans doublons (tables videes au debut)."