import external data and qualification (#17): improvements

This commit is contained in:
nobohan 2021-09-15 10:57:31 +02:00
parent c7f7931af6
commit 3ed4872b60
4 changed files with 63 additions and 49 deletions

2
utils/.gitignore vendored
View File

@ -5,3 +5,5 @@ lib/
lib64/
share/
pyvenv.cfg
sql/
!.gitkeep

View File

@ -26,19 +26,29 @@ $ source bin/activate
### Some notes about this import
The import script creates database entries by filling not all the field of the table. Only mandatory (non-nullable) fields and informative fields are filled.
The import script creates database entries by filling not all the fields of the table. Only mandatory (non-nullable) fields and informative fields are filled.
Here is the column list and the corresponding values list:
Here is the column list and the corresponding values list for the table of sites
```
column_list = ['uuid_sinp', 'id_program', 'name', 'geom', 'timestamp_create', 'id_type', 'obs_txt']
values_list = ['UUID', '2', 'import arbres remarquables', 'POINT', 'NOW', '1', 'import' ]
column_list = ['id_site', 'uuid_sinp', 'id_program', 'name', 'geom', 'timestamp_create', 'id_type', 'obs_txt']
values_list = ['__ID__', '__UUID__', '2', 'import arbres remarquables', '__POINT__', '__NOW__', '1', 'import' ]
```
Some values in these list MUST follow some conventions:
Some values in these list MUST follow some special conventions:
- the "UUID" value will be converted to a a formula that creates uuid value
- the "POINT" value will be converted to a PostGIS ST_GeomFromText formula based on the coordinates of the geojson file
- the "NOW" value will be converted to a formula that creates a timestamp
- the "__ID__" value will be converted to a unique identifier for the site that will be created by adding 1000000 to the index of the geojson table.
- the "__UUID__" value will be converted to a a formula that creates uuid value
- the "__POINT__" value will be converted to a PostGIS ST_GeomFromText formula based on the coordinates of the geojson file
- the "__NOW__" value will be converted to a formula that creates a timestamp
Lastly, if the values starts with `properties.`, it will be mapped to a property in the geojson. E.g, `properties.COMMENTAIR`.
For populating the table of visits, the same function is applied. But there is another special convention that applies for building the json details of the visit: the "__JSON__" value will be converted to a dedicated formula that will build the json object in the database.
Here is the column list and the corresponding values list for the table of visits
```
column_list = ['id_site', 'json_data', 'timestamp_create', 'obs_txt']
values_list = ['__ID__', '__JSON__', '__NOW__', 'import']
```

View File

@ -1,25 +0,0 @@
INSERT INTO "gnc_sites"."t_sites" ("uuid_sinp","id_program","name","geom","timestamp_create","id_type","obs_txt") VALUES (uuid_generate_v4(),'2','import arbres remarquables',ST_GeomFromText('POINT(5.02557 50.626467)'),now()::timestamptz,'1','import');
INSERT INTO "gnc_sites"."t_sites" ("uuid_sinp","id_program","name","geom","timestamp_create","id_type","obs_txt") VALUES (uuid_generate_v4(),'2','import arbres remarquables',ST_GeomFromText('POINT(5.025652 50.626434)'),now()::timestamptz,'1','import');
INSERT INTO "gnc_sites"."t_sites" ("uuid_sinp","id_program","name","geom","timestamp_create","id_type","obs_txt") VALUES (uuid_generate_v4(),'2','import arbres remarquables',ST_GeomFromText('POINT(5.032234 50.627176)'),now()::timestamptz,'1','import');
INSERT INTO "gnc_sites"."t_sites" ("uuid_sinp","id_program","name","geom","timestamp_create","id_type","obs_txt") VALUES (uuid_generate_v4(),'2','import arbres remarquables',ST_GeomFromText('POINT(5.039723 50.635981)'),now()::timestamptz,'1','import');
INSERT INTO "gnc_sites"."t_sites" ("uuid_sinp","id_program","name","geom","timestamp_create","id_type","obs_txt") VALUES (uuid_generate_v4(),'2','import arbres remarquables',ST_GeomFromText('POINT(5.010499 50.623075)'),now()::timestamptz,'1','import');
INSERT INTO "gnc_sites"."t_sites" ("uuid_sinp","id_program","name","geom","timestamp_create","id_type","obs_txt") VALUES (uuid_generate_v4(),'2','import arbres remarquables',ST_GeomFromText('POINT(5.010748 50.624248)'),now()::timestamptz,'1','import');
INSERT INTO "gnc_sites"."t_sites" ("uuid_sinp","id_program","name","geom","timestamp_create","id_type","obs_txt") VALUES (uuid_generate_v4(),'2','import arbres remarquables',ST_GeomFromText('POINT(5.019938 50.609787)'),now()::timestamptz,'1','import');
INSERT INTO "gnc_sites"."t_sites" ("uuid_sinp","id_program","name","geom","timestamp_create","id_type","obs_txt") VALUES (uuid_generate_v4(),'2','import arbres remarquables',ST_GeomFromText('POINT(5.023591 50.610221)'),now()::timestamptz,'1','import');
INSERT INTO "gnc_sites"."t_sites" ("uuid_sinp","id_program","name","geom","timestamp_create","id_type","obs_txt") VALUES (uuid_generate_v4(),'2','import arbres remarquables',ST_GeomFromText('POINT(5.046384 50.59244)'),now()::timestamptz,'1','import');
INSERT INTO "gnc_sites"."t_sites" ("uuid_sinp","id_program","name","geom","timestamp_create","id_type","obs_txt") VALUES (uuid_generate_v4(),'2','import arbres remarquables',ST_GeomFromText('POINT(5.049593 50.597045)'),now()::timestamptz,'1','import');
INSERT INTO "gnc_sites"."t_sites" ("uuid_sinp","id_program","name","geom","timestamp_create","id_type","obs_txt") VALUES (uuid_generate_v4(),'2','import arbres remarquables',ST_GeomFromText('POINT(5.049672 50.597144)'),now()::timestamptz,'1','import');
INSERT INTO "gnc_sites"."t_sites" ("uuid_sinp","id_program","name","geom","timestamp_create","id_type","obs_txt") VALUES (uuid_generate_v4(),'2','import arbres remarquables',ST_GeomFromText('POINT(5.050006 50.597396)'),now()::timestamptz,'1','import');
INSERT INTO "gnc_sites"."t_sites" ("uuid_sinp","id_program","name","geom","timestamp_create","id_type","obs_txt") VALUES (uuid_generate_v4(),'2','import arbres remarquables',ST_GeomFromText('POINT(5.01025 50.605798)'),now()::timestamptz,'1','import');
INSERT INTO "gnc_sites"."t_sites" ("uuid_sinp","id_program","name","geom","timestamp_create","id_type","obs_txt") VALUES (uuid_generate_v4(),'2','import arbres remarquables',ST_GeomFromText('POINT(5.018572 50.608674)'),now()::timestamptz,'1','import');
INSERT INTO "gnc_sites"."t_sites" ("uuid_sinp","id_program","name","geom","timestamp_create","id_type","obs_txt") VALUES (uuid_generate_v4(),'2','import arbres remarquables',ST_GeomFromText('POINT(5.015283 50.609051)'),now()::timestamptz,'1','import');
INSERT INTO "gnc_sites"."t_sites" ("uuid_sinp","id_program","name","geom","timestamp_create","id_type","obs_txt") VALUES (uuid_generate_v4(),'2','import arbres remarquables',ST_GeomFromText('POINT(5.015149 50.609143)'),now()::timestamptz,'1','import');
INSERT INTO "gnc_sites"."t_sites" ("uuid_sinp","id_program","name","geom","timestamp_create","id_type","obs_txt") VALUES (uuid_generate_v4(),'2','import arbres remarquables',ST_GeomFromText('POINT(5.003621 50.618948)'),now()::timestamptz,'1','import');
INSERT INTO "gnc_sites"."t_sites" ("uuid_sinp","id_program","name","geom","timestamp_create","id_type","obs_txt") VALUES (uuid_generate_v4(),'2','import arbres remarquables',ST_GeomFromText('POINT(5.004804 50.619465)'),now()::timestamptz,'1','import');
INSERT INTO "gnc_sites"."t_sites" ("uuid_sinp","id_program","name","geom","timestamp_create","id_type","obs_txt") VALUES (uuid_generate_v4(),'2','import arbres remarquables',ST_GeomFromText('POINT(5.005737 50.620252)'),now()::timestamptz,'1','import');
INSERT INTO "gnc_sites"."t_sites" ("uuid_sinp","id_program","name","geom","timestamp_create","id_type","obs_txt") VALUES (uuid_generate_v4(),'2','import arbres remarquables',ST_GeomFromText('POINT(5.010218 50.623831)'),now()::timestamptz,'1','import');
INSERT INTO "gnc_sites"."t_sites" ("uuid_sinp","id_program","name","geom","timestamp_create","id_type","obs_txt") VALUES (uuid_generate_v4(),'2','import arbres remarquables',ST_GeomFromText('POINT(5.010279 50.624028)'),now()::timestamptz,'1','import');
INSERT INTO "gnc_sites"."t_sites" ("uuid_sinp","id_program","name","geom","timestamp_create","id_type","obs_txt") VALUES (uuid_generate_v4(),'2','import arbres remarquables',ST_GeomFromText('POINT(4.999617 50.624649)'),now()::timestamptz,'1','import');
INSERT INTO "gnc_sites"."t_sites" ("uuid_sinp","id_program","name","geom","timestamp_create","id_type","obs_txt") VALUES (uuid_generate_v4(),'2','import arbres remarquables',ST_GeomFromText('POINT(4.999643 50.624726)'),now()::timestamptz,'1','import');
INSERT INTO "gnc_sites"."t_sites" ("uuid_sinp","id_program","name","geom","timestamp_create","id_type","obs_txt") VALUES (uuid_generate_v4(),'2','import arbres remarquables',ST_GeomFromText('POINT(4.989328 50.62538)'),now()::timestamptz,'1','import');
INSERT INTO "gnc_sites"."t_sites" ("uuid_sinp","id_program","name","geom","timestamp_create","id_type","obs_txt") VALUES (uuid_generate_v4(),'2','import arbres remarquables',ST_GeomFromText('POINT(4.989827 50.626227)'),now()::timestamptz,'1','import');

View File

@ -1,13 +1,27 @@
import geojson
import json
def convert_coordinates_to_geom(coordinates, geometry_type):
""" Returns a string with a valid PostGIS command for creating a geometry from an array of coordinates """
if geometry_type == 'POINT':
if geometry_type == '__POINT__': # TODO other cases
res = f'ST_GeomFromText(\'POINT({coordinates[0]} {coordinates[1]})\'),'
return res
def safe_json(string):
""" Encodes a string a json and escape single quote for postgresql"""
return json.dumps(string.replace("'", "''"))
def convert_feature_to_json(feature):
""" Returns a string with a json of properties """
res = f'"hauteur": {feature.properties["H"]},' if feature.properties["H"] else ''
res += f'"circonference": {feature.properties["CIRC"]},' if feature.properties["CIRC"] else ''
res += f'"etatsanitaire": {feature.properties["SANIT"]},' if feature.properties["SANIT"] else ''
res += f'"espece": {safe_json(feature.properties["SPFR"])},' if feature.properties["SPFR"] else ''
res += f'"remarques": {safe_json(feature.properties["COMMENTAIR"])},' if feature.properties["COMMENTAIR"] else ''
return '\'{' + res[:-1] + '}\','
def import_geosjon_as_sql(filename, sql_filename, schema_name, table_name, column_list, values_list):
""" Import a geojson """
@ -16,30 +30,36 @@ def import_geosjon_as_sql(filename, sql_filename, schema_name, table_name, colum
sql_f = open(sql_filename,'w')
for f in g['features']:
for i,f in enumerate(g['features']):
#print(f)
sql = f'INSERT INTO "{schema_name}"."{table_name}" ('
col_name = ''
col_value = ''
for i,c in enumerate(column_list):
for j,c in enumerate(column_list):
if values_list[i] == 'UUID':
if values_list[j] == '__ID__':
col_name += f'"{c}",'
col_value += f'\'{1000000 + i}\','
elif values_list[j] == '__UUID__':
col_name += f'"{c}",'
col_value += 'uuid_generate_v4(),'
elif values_list[i] == 'NOW':
elif values_list[j] == '__NOW__':
col_name += f'"{c}",'
col_value += 'now()::timestamptz,'
elif c == 'geom':
elif values_list[j] == '__POINT__':
col_name += f'"{c}",'
col_value += convert_coordinates_to_geom(f.geometry.coordinates, values_list[i])
elif values_list[i].startswith('properties.'):
col_value += convert_coordinates_to_geom(f.geometry.coordinates, values_list[j])
elif values_list[j] == '__JSON__':
col_name += f'"{c}",'
prop = values_list[i].split('.')[1]
col_value += convert_feature_to_json(f)
elif values_list[j].startswith('properties.'):
col_name += f'"{c}",'
prop = values_list[j].split('.')[1]
col_value += f'\'{f.properties[prop]}\','
else:
col_name += f'"{c}",'
col_value += f'\'{values_list[i]}\','
col_value += f'\'{values_list[j]}\','
sql += f'{col_name[:-1]}) VALUES ({col_value[:-1]});\n'
@ -49,13 +69,20 @@ def import_geosjon_as_sql(filename, sql_filename, schema_name, table_name, colum
if __name__ == '__main__':
# import arbres.geojson into gnc_sites.t_sites
filename = './gis/arbres.geojson'
sql_filename = 'arbres.sql'
sql_filename = './sql/arbres-sites.sql'
schema_name= 'gnc_sites'
table_name = 't_sites'
column_list = ['uuid_sinp', 'id_program', 'name', 'geom', 'timestamp_create', 'id_type', 'obs_txt']
values_list = ['UUID', '2', 'import arbres remarquables', 'POINT', 'NOW', '1', 'import' ]
column_list = ['id_site', 'uuid_sinp', 'id_program', 'name', 'geom', 'timestamp_create', 'id_type', 'obs_txt']
values_list = ['__ID__', '__UUID__', '2', 'import arbres remarquables', '__POINT__', '__NOW__', '1', 'import' ]
import_geosjon_as_sql(filename, sql_filename, schema_name, table_name, column_list, values_list)
# add CREATE EXTENSION IF NOT EXISTS "uuid-ossp"; ??
# import arbres.geojson into gnc_sites.t_visit
filename = './gis/arbres.geojson'
sql_filename = './sql/arbres-visits.sql'
schema_name= 'gnc_sites'
table_name = 't_visit'
column_list = ['id_site', 'json_data', 'timestamp_create', 'obs_txt', 'date']
values_list = ['__ID__', '__JSON__', '__NOW__', 'import', '__NOW__']
import_geosjon_as_sql(filename, sql_filename, schema_name, table_name, column_list, values_list)