feat: Enhance CSV import process and data cleanup

- Added support for importing 'periodes' data and adjusted related SQL scripts. - Introduced a new SQL preparation script to set up the import schema. - Updated the import_all_csv.sh script to handle new CSV files and tables. - Implemented checks for required SQL files and improved error handling. - Modified data types and structures in the import process for better consistency. - Created a new remove-import-data.sql script to clean up imported data effectively. - Enhanced the handling of postal codes and addresses in the import logic. - Added comments and structured the SQL scripts for better readability and maintainability.
2026-03-16 13:48:12 +01:00
parent 8481acfe5d
commit c388967ebd
9 changed files with 914 additions and 66 deletions
@@ -9,9 +9,11 @@ DB_USER="${PGUSER:-postgres}"
 DB_NAME="${PGDATABASE:-chill-import}"

 PSQL=(psql -v ON_ERROR_STOP=1 -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME")
+PREPARE_SQL="$ROOT_DIR/sql/prepare-import.sql"

 required_tables=(
  "import.personnes"
+  "import.periodes"
  "import.choix_periodes"
  "import.choix_localisations"
  "import.tiers"
@@ -19,11 +21,21 @@ required_tables=(

 required_files=(
  "$ROOT_DIR/csv/choix_usagers.csv"
+  "$ROOT_DIR/csv/periodes.csv"
  "$ROOT_DIR/csv/choix_periodes.csv"
  "$ROOT_DIR/csv/choix_localisations.csv"
  "$ROOT_DIR/csv/choix_tiers.csv"
 )

+if [[ ! -f "$PREPARE_SQL" ]]; then
+  echo "Fichier SQL manquant: ${PREPARE_SQL#$ROOT_DIR/}" >&2
+  exit 1
+fi
+
+echo "[-1/3] Recreation du schema import..."
+"${PSQL[@]}" -c "DROP SCHEMA IF EXISTS import CASCADE;"
+"${PSQL[@]}" -f "$PREPARE_SQL"
+
 for table in "${required_tables[@]}"; do
  exists="$("${PSQL[@]}" -tAc "SELECT to_regclass('${table}') IS NOT NULL;")"
  if [[ "$exists" != "t" ]]; then
@@ -40,7 +52,7 @@ for csv_file in "${required_files[@]}"; do
  fi
 done

-echo "[0/4] Assouplissement des types texte des tables import..."
+echo "[0/3] Assouplissement des types texte des tables import..."
 "${PSQL[@]}" <<'SQL'
 DO $$
 DECLARE r record;
@@ -49,7 +61,7 @@ BEGIN
    SELECT table_schema, table_name, column_name
    FROM information_schema.columns
    WHERE table_schema = 'import'
-      AND table_name IN ('personnes', 'choix_periodes', 'choix_localisations', 'tiers')
+      AND table_name IN ('personnes', 'periodes', 'choix_periodes', 'choix_localisations', 'tiers')
      AND data_type IN ('character varying', 'character')
  LOOP
    EXECUTE format(
@@ -62,17 +74,18 @@ BEGIN
 END $$;
 SQL

-echo "[1/4] Vidage des tables d'import..."
+echo "[1/3] Vidage des tables d'import..."
 "${PSQL[@]}" <<'SQL'
 TRUNCATE TABLE
  import.personnes,
+  import.periodes,
  import.choix_periodes,
  import.choix_localisations,
  import.tiers
 RESTART IDENTITY;
 SQL

-echo "[2/4] Import des CSV du dossier csv/..."
+echo "[2/3] Import des CSV du dossier csv/..."

 copy_table_from_file() {
  local table="$1"
@@ -87,26 +100,26 @@ copy_table_from_file() {
 }

 copy_table_from_file "import.personnes" "$ROOT_DIR/csv/choix_usagers.csv" "civility,lastname,firstname,gender,gendercomment,nationality,memo,birthdate,place_of_birth,countryofbirth,deathdate,email,phonenumber,mobilenumber,contactinfo,street,extra,streetnumber,postcode,country"
-copy_table_from_file "import.choix_periodes" "$ROOT_DIR/csv/choix_periodes.csv" "closingmotive,origin,acp_scopes,job,referrer,parent,enfant,acp_social_issues,work_social_action,street,extra,streetnumber,postcode,country"
-copy_table_from_file "import.choix_localisations" "$ROOT_DIR/csv/choix_localisations.csv" "title,addressRequired,availableForUsers,contactData,defaultFor,editableByUsers"
-
-echo "[3/4] Import de tiers via table staging..."
-"${PSQL[@]}" -c "\copy import.tiers_stage(coordonnees,nom,categorie) FROM STDIN WITH (FORMAT csv, HEADER true, DELIMITER ',')" < "$ROOT_DIR/csv/choix_tiers.csv"

+echo "  - generation des ids manquants dans import.personnes"
 "${PSQL[@]}" <<'SQL'
-INSERT INTO import.tiers (id, coordonnees, nom, categorie)
-SELECT
-  row_number() OVER (ORDER BY nom) AS id,
-  NULLIF(trim(coordonnees), ''),
-  NULLIF(trim(nom), ''),
-  NULLIF(trim(categorie), '')
-FROM import.tiers_stage
-WHERE COALESCE(trim(nom), '') <> '';
-
-DROP TABLE import.tiers_stage;
+WITH numbered AS (
+  SELECT ctid, row_number() OVER (ORDER BY ctid) AS new_id
+  FROM import.personnes
+  WHERE id IS NULL OR NULLIF(id::text, '') IS NULL
+)
+UPDATE import.personnes p
+SET id = numbered.new_id
+FROM numbered
+WHERE p.ctid = numbered.ctid;
 SQL

-echo "[4/4] Verification des volumes importes..."
-"${PSQL[@]}" -c "SELECT 'personnes' AS table_name, count(*) AS rows FROM import.personnes UNION ALL SELECT 'choix_periodes', count(*) FROM import.choix_periodes UNION ALL SELECT 'choix_localisations', count(*) FROM import.choix_localisations UNION ALL SELECT 'tiers', count(*) FROM import.tiers;"
+copy_table_from_file "import.periodes" "$ROOT_DIR/csv/periodes.csv" "id,nom,openingdate,closingdate,closingmotive,origin,remark,intensity,referrer,job,acp_scopes,address,personlocation,addresslocation,acp_socialissues,work_socialaction,comment1_content,comment2_content,comment3_content,comment4_content,comment5_content"
+copy_table_from_file "import.choix_periodes" "$ROOT_DIR/csv/choix_periodes.csv" "closingmotive,origin,acp_scopes,job,referrer,parent,enfant,acp_social_issues,work_social_action,street,extra,streetnumber,postcode,country"
+copy_table_from_file "import.choix_localisations" "$ROOT_DIR/csv/choix_localisations.csv" "title,\"addressRequired\",\"availableForUsers\",\"contactData\",\"defaultFor\",\"editableByUsers\""
+copy_table_from_file "import.tiers" "$ROOT_DIR/csv/choix_tiers.csv" "acronym,nom,categorie"
+
+echo "[3/3] Verification des volumes importes..."
+"${PSQL[@]}" -c "SELECT 'personnes' AS table_name, count(*) AS rows FROM import.personnes UNION ALL SELECT 'periodes', count(*) FROM import.periodes UNION ALL SELECT 'choix_periodes', count(*) FROM import.choix_periodes UNION ALL SELECT 'choix_localisations', count(*) FROM import.choix_localisations UNION ALL SELECT 'tiers', count(*) FROM import.tiers;"

 echo "Import termine sans doublons (tables videes au debut)."