From 597e20292aba31ef22687584029c62817c1ac280 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Julien=20Fastr=C3=A9?= Date: Fri, 2 Sep 2022 13:11:55 +0200 Subject: [PATCH] improve performance, remove retired addresses and add center on postal codes --- .drone.yml | 46 ++++++++++++++++++++++++---------------------- .gitignore | 4 ++++ README.md | 44 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 72 insertions(+), 22 deletions(-) create mode 100644 .gitignore diff --git a/.drone.yml b/.drone.yml index bec7034..8268b43 100644 --- a/.drone.yml +++ b/.drone.yml @@ -7,28 +7,29 @@ steps: - name: download image: plugins/download settings: - source: https://ac.ngi.be/remoteclient-open/GeoBePartners-open/BOSA/ca0fd5c0-8146-11e9-9012-482ae30f98d9_geopackage+sqlite3_3812.zip - destination: bestaddresses.zip - + source: https://ac.ngi.be/remoteclient-open/GeoBePartners-open/BOSA/ca0fd5c0-8146-11e9-9012-482ae30f98d9_geopackage+sqlite3_3812.zip + destination: bestaddresses.zip + - name: convert - image: osgeo/gdal:alpine-small-latest + image: osgeo/gdal:alpine-normal-latest + environment: + SELECT: SELECT shape AS geom, id, best_id, coalesce(streetname_fr, streetname_nl, streetname_de) AS streetname, postal_info_objectid, municipality_objectid, COALESCE(municipality_fr, municipality_nl, municipality_de) AS municipality commands: - unzip -o bestaddresses.zip - - ogr2ogr -sql "SELECT DISTINCT COALESCE(municipality_fr, municipality_nl, municipality_de) AS municipality_name, postal_info_objectid, municipality_objectid FROM addresses_complete" postals.csv ca0fd5c0-8146-11e9-9012-482ae30f98d9_geopackage+sqlite3_3812.gpkg - - ogr2ogr -sql "SELECT * FROM addresses_complete" -lco GEOMETRY=AS_XY addresses-full.csv ca0fd5c0-8146-11e9-9012-482ae30f98d9_geopackage+sqlite3_3812.gpkg - - ogr2ogr -sql "SELECT * FROM addresses_complete WHERE postal_info_objectid LIKE '1%'" -lco GEOMETRY=AS_XY addresses-1xxx.csv ca0fd5c0-8146-11e9-9012-482ae30f98d9_geopackage+sqlite3_3812.gpkg - - ogr2ogr -sql "SELECT * FROM addresses_complete WHERE postal_info_objectid LIKE '2%'" -lco GEOMETRY=AS_XY addresses-2xxx.csv ca0fd5c0-8146-11e9-9012-482ae30f98d9_geopackage+sqlite3_3812.gpkg - - ogr2ogr -sql "SELECT * FROM addresses_complete WHERE postal_info_objectid LIKE '3%'" -lco GEOMETRY=AS_XY addresses-3xxx.csv ca0fd5c0-8146-11e9-9012-482ae30f98d9_geopackage+sqlite3_3812.gpkg - - ogr2ogr -sql "SELECT * FROM addresses_complete WHERE postal_info_objectid LIKE '4%'" -lco GEOMETRY=AS_XY addresses-4xxx.csv ca0fd5c0-8146-11e9-9012-482ae30f98d9_geopackage+sqlite3_3812.gpkg - - ogr2ogr -sql "SELECT * FROM addresses_complete WHERE postal_info_objectid LIKE '5%'" -lco GEOMETRY=AS_XY addresses-5xxx.csv ca0fd5c0-8146-11e9-9012-482ae30f98d9_geopackage+sqlite3_3812.gpkg - - ogr2ogr -sql "SELECT * FROM addresses_complete WHERE postal_info_objectid LIKE '6%'" -lco GEOMETRY=AS_XY addresses-6xxx.csv ca0fd5c0-8146-11e9-9012-482ae30f98d9_geopackage+sqlite3_3812.gpkg - - ogr2ogr -sql "SELECT * FROM addresses_complete WHERE postal_info_objectid LIKE '7%'" -lco GEOMETRY=AS_XY addresses-7xxx.csv ca0fd5c0-8146-11e9-9012-482ae30f98d9_geopackage+sqlite3_3812.gpkg - - ogr2ogr -sql "SELECT * FROM addresses_complete WHERE postal_info_objectid LIKE '8%'" -lco GEOMETRY=AS_XY addresses-8xxx.csv ca0fd5c0-8146-11e9-9012-482ae30f98d9_geopackage+sqlite3_3812.gpkg - - ogr2ogr -sql "SELECT * FROM addresses_complete WHERE postal_info_objectid LIKE '9%'" -lco GEOMETRY=AS_XY addresses-9xxx.csv ca0fd5c0-8146-11e9-9012-482ae30f98d9_geopackage+sqlite3_3812.gpkg + - ogrinfo ca0fd5c0-8146-11e9-9012-482ae30f98d9_geopackage+sqlite3_3812.gpkg -sql "CREATE INDEX idx_postal_code ON addresses_complete (postal_info_objectid) WHERE status NOT LIKE 'retired'" + - ogr2ogr -sql "select St_Centroid(St_Envelope(St_Collect(shape))) AS center, COALESCE(municipality_fr, municipality_nl, municipality_de) AS municipality_name, postal_info_objectid, municipality_objectid FROM addresses_complete where fid % 10 = 1 AND status NOT LIKE 'retired' GROUP BY COALESCE(municipality_fr, municipality_nl, municipality_de), postal_info_objectid, municipality_objectid" -lco GEOMETRY=AS_XY postals.fr.csv ca0fd5c0-8146-11e9-9012-482ae30f98d9_geopackage+sqlite3_3812.gpkg + - ogr2ogr -sql "$${SELECT} FROM addresses_complete WHERE status NOT LIKE 'retired' LIMIT 10000" -lco GEOMETRY=AS_XY addresses-extract.fr.csv ca0fd5c0-8146-11e9-9012-482ae30f98d9_geopackage+sqlite3_3812.gpkg + - ogr2ogr -sql "$${SELECT} FROM addresses_complete WHERE status NOT LIKE 'retired'" -lco GEOMETRY=AS_XY addresses-full.fr.csv ca0fd5c0-8146-11e9-9012-482ae30f98d9_geopackage+sqlite3_3812.gpkg + - ogr2ogr -sql "$${SELECT} FROM addresses_complete WHERE postal_info_objectid LIKE '1%' AND status NOT LIKE 'retired'" -lco GEOMETRY=AS_XY addresses-1xxx.fr.csv ca0fd5c0-8146-11e9-9012-482ae30f98d9_geopackage+sqlite3_3812.gpkg + - ogr2ogr -sql "$${SELECT} FROM addresses_complete WHERE postal_info_objectid LIKE '2%' AND status NOT LIKE 'retired'" -lco GEOMETRY=AS_XY addresses-2xxx.fr.csv ca0fd5c0-8146-11e9-9012-482ae30f98d9_geopackage+sqlite3_3812.gpkg + - ogr2ogr -sql "$${SELECT} FROM addresses_complete WHERE postal_info_objectid LIKE '3%' AND status NOT LIKE 'retired'" -lco GEOMETRY=AS_XY addresses-3xxx.fr.csv ca0fd5c0-8146-11e9-9012-482ae30f98d9_geopackage+sqlite3_3812.gpkg + - ogr2ogr -sql "$${SELECT} FROM addresses_complete WHERE postal_info_objectid LIKE '4%' AND status NOT LIKE 'retired'" -lco GEOMETRY=AS_XY addresses-4xxx.fr.csv ca0fd5c0-8146-11e9-9012-482ae30f98d9_geopackage+sqlite3_3812.gpkg + - ogr2ogr -sql "$${SELECT} FROM addresses_complete WHERE postal_info_objectid LIKE '5%' AND status NOT LIKE 'retired'" -lco GEOMETRY=AS_XY addresses-5xxx.fr.csv ca0fd5c0-8146-11e9-9012-482ae30f98d9_geopackage+sqlite3_3812.gpkg + - ogr2ogr -sql "$${SELECT} FROM addresses_complete WHERE postal_info_objectid LIKE '6%' AND status NOT LIKE 'retired'" -lco GEOMETRY=AS_XY addresses-6xxx.fr.csv ca0fd5c0-8146-11e9-9012-482ae30f98d9_geopackage+sqlite3_3812.gpkg + - ogr2ogr -sql "$${SELECT} FROM addresses_complete WHERE postal_info_objectid LIKE '7%' AND status NOT LIKE 'retired'" -lco GEOMETRY=AS_XY addresses-7xxx.fr.csv ca0fd5c0-8146-11e9-9012-482ae30f98d9_geopackage+sqlite3_3812.gpkg + - ogr2ogr -sql "$${SELECT} FROM addresses_complete WHERE postal_info_objectid LIKE '8%' AND status NOT LIKE 'retired'" -lco GEOMETRY=AS_XY addresses-8xxx.fr.csv ca0fd5c0-8146-11e9-9012-482ae30f98d9_geopackage+sqlite3_3812.gpkg + - ogr2ogr -sql "$${SELECT} FROM addresses_complete WHERE postal_info_objectid LIKE '9%' AND status NOT LIKE 'retired'" -lco GEOMETRY=AS_XY addresses-9xxx.fr.csv ca0fd5c0-8146-11e9-9012-482ae30f98d9_geopackage+sqlite3_3812.gpkg - gzip -9 *.csv - volumes: - - name: mworkdir - path: /mworkdir - name: gitea_release image: plugins/gitea-release @@ -37,9 +38,10 @@ steps: from_secret: gitea_key title: "v1.0.0" files: - - "*.csv.gz" + - "*.fr.csv.gz" base_url: https://gitea.champs-libres.be - volumes: - - name: mworkdir - path: /mworkdir + trigger: + event: + include: + - tag diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3bd9fcd --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +*.zip +*.gpkg +*.csv +*.qgz diff --git a/README.md b/README.md index 42f2527..73cb5b4 100644 --- a/README.md +++ b/README.md @@ -2,3 +2,47 @@ Ce dépôt convertit les données d'adresses mises à disposition dans le cadre de [Best Address](https://www.geo.be/catalog/details/ca0fd5c0-8146-11e9-9012-482ae30f98d9?l=fr) en un format csv, utilisable par le logiciel Chill. + +## Liste des codes postaux + +Une liste des codes postaux est générées à partir de la liste des adresses. + +### Point pour chaque code postal + +Un centre est calculé sur et fixé au centroide de la liste des adresses. Pour plus de rapidité, seule une adresse sur 10 est prise en compte, de manière pseudo-aléatoire sur base de l'identifiant de la ligne. + +Justifications de ce filtre pour un code postal (ici, le 4610): + +``` +# Calcul du centre sans filtre sur le fid: +POINT(741703.97364 647537.227319) 4610 + +# Avec un filtre fid % 5 = 1 +POINT(741669.696698 647591.04102) 4610 + +# Avec un filtre fid %10 = 1 +POINT(741682.686228 647591.04102) 4610 +``` + +## Liste des adresses + +Les adresses retenues excluent systématiquement celles marquées comme 'retired'. + +Les fichiers sont les suivants: + +* `addresses-extract`: contient les 10000 premières adresses. A utiliser comme extrait; +* `addresses-full`: contient l'entièreté des adresses; +* `addresses-1xxx`: contient les adresses dont le code postal est compris entre 1000 et 1999; +* `addresses-2xxx`: contient les adresses dont le code postal est compris entre 2000 et 2999; +* `addresses-3xxx`: contient les adresses dont le code postal est compris entre 3000 et 3999; +* `addresses-4xxx`: contient les adresses dont le code postal est compris entre 4000 et 4999; +* `addresses-5xxx`: contient les adresses dont le code postal est compris entre 5000 et 5999; +* `addresses-6xxx`: contient les adresses dont le code postal est compris entre 6000 et 6999; +* `addresses-7xxx`: contient les adresses dont le code postal est compris entre 7000 et 7999; +* `addresses-8xxx`: contient les adresses dont le code postal est compris entre 8000 et 8999; +* `addresses-9xxx`: contient les adresses dont le code postal est compris entre 9000 et 9999; + +## Langue + +* Dans les fichiers qui comprennent `fr.csv`, les noms de rues et de municipalités sont choisies d'abord en français si elles sont présentes, ensuite en néerlandais puis en allemand. +