Skip to content

Commit 41678a1

Browse files
maudetesgeoffreyaldebertabulte
authoredSep 6, 2022
Perf modifications (#98)
* Check message structure to prevent errors * Add pandas profiling analysis (optional) in api * Update requirements (csv-detective) * Update message structure format * Add requirements * Remove requirements, switch to poetry * Add poetry lock file * Lint code * setuptools * upgrade and clean deps * lint test Co-authored-by: Geoffrey Aldebert <geoffrey.aldebert@data.gouv.fr> Co-authored-by: Alexandre Bulté <alexandre@bulte.net>
1 parent a83e645 commit 41678a1

10 files changed

+762
-416
lines changed
 

‎.vscode/settings.json

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"python.pythonPath": "/Users/geoffrey/.pyenv/versions/3.9.7/envs/csvapi/bin/python",
3+
"python.linting.enabled": true,
4+
"python.linting.flake8Args": ["--ignore=E24,W504", "--verbose"]
5+
}

‎config.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,4 @@
1414
# It will also match subdomains
1515
# e.g. REFERRERS_FILTER = ['data.gouv.fr'] will match 'demo.data.gouv.fr'
1616
REFERRERS_FILTER = None
17-
PANDAS_PROFILING_CONFIG_MIN = False
17+
PANDAS_PROFILING_CONFIG_MIN = False

‎csvapi/consumer.py

+89-170
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,19 @@
11
import asyncio
22
import json
33
import os
4-
5-
from datetime import datetime
6-
74
import boto3
8-
import pandas as pd
9-
105
from botocore.client import Config, ClientError
116

127
from csvapi.parseview import ParseView
138
from csvapi.profileview import ProfileView
9+
from csvapi.utils import enrich_db_with_metadata
1410
from csvapi.setup_logger import logger
15-
from csvapi.utils import get_hash, create_connection
16-
17-
from config import DB_ROOT_DIR
11+
from csvapi.utils import (
12+
get_hash,
13+
check_message_structure,
14+
check_csv_detective_report_structure,
15+
check_profile_report_structure
16+
)
1817

1918
MINIO_URL = os.environ.get("MINIO_URL", "http://localhost:9000")
2019
MINIO_USER = os.environ.get("MINIO_USER", "minio")
@@ -26,167 +25,87 @@ def run_process_message(key: str, data: dict, topic: str) -> None:
2625

2726

2827
async def process_message(key: str, message: dict, topic: str) -> None:
29-
# Get url
30-
# Should think if we keep that
31-
# r = requests.get('https://www.data.gouv.fr/api/1/datasets/{}/resources/{}'.format(message['meta']['dataset_id'], key)) # noqa
32-
# url = r.json()['url']
33-
if message is not None and message['service'] == 'csvdetective':
34-
url = 'https://www.data.gouv.fr/fr/datasets/r/{}'.format(key)
35-
urlhash = get_hash(url)
36-
logger.info(urlhash)
37-
38-
# Connect to minio
39-
s3_client = boto3.client(
40-
"s3",
41-
endpoint_url=MINIO_URL,
42-
aws_access_key_id=MINIO_USER,
43-
aws_secret_access_key=MINIO_PASSWORD,
44-
config=Config(signature_version="s3v4"),
45-
)
46-
47-
try:
48-
s3_client.head_bucket(Bucket=message['value']['data_location']['bucket'])
49-
except ClientError as e:
50-
logger.error(e)
51-
logger.error(
52-
"Bucket {} does not exist or credentials are invalid".format(
53-
message['value']['location']['bucket']
28+
if message['service'] == "csvdetective":
29+
if check_message_structure(message):
30+
url = 'https://www.data.gouv.fr/fr/datasets/r/{}'.format(key)
31+
urlhash = get_hash(url)
32+
logger.info(urlhash)
33+
34+
# Connect to minio
35+
s3_client = boto3.client(
36+
"s3",
37+
endpoint_url=MINIO_URL,
38+
aws_access_key_id=MINIO_USER,
39+
aws_secret_access_key=MINIO_PASSWORD,
40+
config=Config(signature_version="s3v4"),
41+
)
42+
43+
try:
44+
s3_client.head_bucket(Bucket=message['value']['report_location']['bucket'])
45+
except ClientError as e:
46+
logger.error(e)
47+
logger.error(
48+
"Bucket {} does not exist or credentials are invalid".format(
49+
message['value']['report_location']['bucket']
50+
)
51+
)
52+
return
53+
54+
# Load csv-detective report
55+
try:
56+
response = s3_client.get_object(
57+
Bucket=message['value']['report_location']['bucket'],
58+
Key=message['value']['report_location']['key']
59+
)
60+
content = response['Body']
61+
csv_detective_report = json.loads(content.read())
62+
except ClientError as e:
63+
logger.error(e)
64+
logger.error(
65+
"Report does not exist in bucket or content is not valid json"
5466
)
67+
return
68+
69+
if not check_csv_detective_report_structure(csv_detective_report):
70+
logger.error(
71+
"csvdetective report malformed"
72+
)
73+
return
74+
75+
# Parse file and store it to sqlite
76+
parseViewInstance = ParseView()
77+
await parseViewInstance.parse_from_consumer(
78+
parseViewInstance,
79+
url=url,
80+
urlhash=urlhash,
81+
csv_detective_report=csv_detective_report
5582
)
56-
return
57-
58-
# Load csv-detective report
59-
response = s3_client.get_object(
60-
Bucket=message['value']['report_location']['bucket'],
61-
Key=message['value']['report_location']['key']
62-
)
63-
content = response['Body']
64-
csv_detective_report = json.loads(content.read())
65-
66-
# Parse file and store it to sqlite
67-
parseViewInstance = ParseView()
68-
await parseViewInstance.parse_from_consumer(
69-
parseViewInstance,
70-
url=url,
71-
urlhash=urlhash,
72-
csv_detective_report=csv_detective_report,
73-
)
74-
75-
# Profile file
76-
profileViewInstance = ProfileView()
77-
profile_report = await profileViewInstance.get_minimal_profile(
78-
profileViewInstance,
79-
urlhash=urlhash,
80-
csv_detective_report=csv_detective_report,
81-
)
82-
83-
# Save to sql
84-
conn = create_connection(DB_ROOT_DIR+'/'+urlhash+'.db')
85-
# c = conn.cursor()
86-
87-
general_infos = [
88-
{
89-
'encoding': csv_detective_report['encoding'],
90-
'separator': csv_detective_report['separator'],
91-
'header_row_idx': csv_detective_report['header_row_idx'],
92-
'total_lines': profile_report['table']['n'],
93-
'nb_columns': profile_report['table']['n_var'],
94-
'nb_cells_missing': profile_report['table']['n_cells_missing'],
95-
'nb_vars_with_missing': profile_report['table']['n_vars_with_missing'],
96-
'nb_vars_all_missing': profile_report['table']['n_vars_all_missing'],
97-
'date_last_check': datetime.today().strftime('%Y-%m-%d'),
98-
'dataset_id': message['meta']['dataset_id'],
99-
'resource_id': key
100-
}
101-
]
102-
df = pd.DataFrame(general_infos)
103-
df.to_sql('general_infos', con=conn, if_exists='replace', index=False)
104-
105-
columns_infos = []
106-
categorical_infos = []
107-
top_infos = []
108-
numeric_infos = []
109-
numeric_plot_infos = []
110-
for col in profile_report['variables']:
111-
column_info = {}
112-
column_info['name'] = col
113-
column_info['format'] = csv_detective_report['columns'][col]['format']
114-
column_info['nb_distinct'] = profile_report['variables'][col]['n_distinct']
115-
column_info['is_unique'] = profile_report['variables'][col]['is_unique']
116-
column_info['nb_unique'] = profile_report['variables'][col]['n_unique']
117-
column_info['type'] = profile_report['variables'][col]['type']
118-
column_info['nb_missing'] = profile_report['variables'][col]['n_missing']
119-
column_info['count'] = profile_report['variables'][col]['count']
120-
columns_infos.append(column_info)
121-
122-
cat_cols = [
123-
'siren', 'siret', 'code_postal', 'code_commune_insee',
124-
'code_departement', 'code_region', 'tel_fr',
125-
]
126-
if csv_detective_report['columns'][col]['format'] in cat_cols:
127-
column_info['type'] = 'Categorical'
128-
129-
if column_info['type'] == 'Categorical' and \
130-
len(profile_report['variables'][col]['value_counts_without_nan']) < 10:
131-
for cat in profile_report['variables'][col]['value_counts_without_nan']:
132-
categorical_info = {}
133-
categorical_info['column'] = col
134-
categorical_info['value'] = cat
135-
categorical_info['nb'] = profile_report['variables'][col]['value_counts_without_nan'][cat]
136-
categorical_infos.append(categorical_info)
137-
138-
if column_info['type'] == 'Numeric':
139-
numeric_info = {}
140-
numeric_info['column'] = col
141-
numeric_info['mean'] = profile_report['variables'][col]['mean']
142-
numeric_info['std'] = profile_report['variables'][col]['std']
143-
numeric_info['min'] = profile_report['variables'][col]['min']
144-
numeric_info['max'] = profile_report['variables'][col]['max']
145-
numeric_infos.append(numeric_info)
146-
for i in range(len(profile_report['variables'][col]['histogram']['bin_edges'])):
147-
numeric_plot_info = {}
148-
numeric_plot_info['column'] = col
149-
numeric_plot_info['value'] = profile_report['variables'][col]['histogram']['bin_edges'][i]
150-
numeric_plot_info['type'] = 'bin_edges'
151-
numeric_plot_infos.append(numeric_plot_info)
152-
153-
for i in range(len(profile_report['variables'][col]['histogram']['counts'])):
154-
numeric_plot_info = {}
155-
numeric_plot_info['column'] = col
156-
numeric_plot_info['value'] = profile_report['variables'][col]['histogram']['counts'][i]
157-
numeric_plot_info['type'] = 'counts'
158-
numeric_plot_infos.append(numeric_plot_info)
159-
160-
cpt = 0
161-
for top in profile_report['variables'][col]['value_counts_without_nan']:
162-
if (cpt < 10):
163-
top_info = {}
164-
top_info['column'] = col
165-
top_info['value'] = top
166-
top_info['nb'] = profile_report['variables'][col]['value_counts_without_nan'][top]
167-
top_infos.append(top_info)
168-
cpt = cpt + 1
169-
170-
df = pd.DataFrame(columns_infos)
171-
if df.shape[0] > 0:
172-
df.to_sql('columns_infos', con=conn, if_exists='replace', index=False)
173-
174-
df = pd.DataFrame(categorical_infos)
175-
if df.shape[0] > 0:
176-
df.to_sql('categorical_infos', con=conn, if_exists='replace', index=False)
177-
178-
df = pd.DataFrame(top_infos)
179-
if df.shape[0] > 0:
180-
df.to_sql('top_infos', con=conn, if_exists='replace', index=False)
181-
182-
df = pd.DataFrame(numeric_infos)
183-
if df.shape[0] > 0:
184-
df.to_sql('numeric_infos', con=conn, if_exists='replace', index=False)
185-
186-
df = pd.DataFrame(numeric_plot_infos)
187-
if df.shape[0] > 0:
188-
df.to_sql('numeric_plot_infos', con=conn, if_exists='replace', index=False)
189-
190-
conn.commit()
191-
192-
print('ok')
83+
84+
# Profile file
85+
profileViewInstance = ProfileView()
86+
profile_report = await profileViewInstance.get_minimal_profile(
87+
profileViewInstance,
88+
urlhash=urlhash,
89+
csv_detective_report=csv_detective_report
90+
)
91+
92+
if not check_profile_report_structure(profile_report):
93+
logger.error(
94+
"pandas profiling report malformed"
95+
)
96+
return
97+
98+
enrich_db_with_metadata(
99+
urlhash,
100+
csv_detective_report,
101+
profile_report,
102+
message['meta']['dataset_id'],
103+
key
104+
)
105+
106+
logger.info('Enrichment done!')
107+
108+
else:
109+
logger.error('Problem with structure message')
110+
else:
111+
logger.info('Message received from {} service - do not process'.format(message['service']))

‎csvapi/parser.py

+1
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ def from_csv(filepath, encoding='utf-8', sniff_limit=SNIFF_LIMIT, agate_types=No
5555
'encoding': encoding,
5656
'column_types': agate_tester()
5757
}
58+
logger.warning('Types from csv-detective provoke errors, use of agate type tester instead.')
5859
return agate.Table.from_csv(filepath, **kwargs)
5960
except Exception as e:
6061
logger.error('error casting %s', e)

‎csvapi/parseview.py

+58-5
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,24 @@
77
from quart import request, jsonify, current_app as app
88
from quart.views import MethodView
99

10+
from csvapi.profileview import ProfileView
11+
1012
from csvapi.errors import APIError
1113
from csvapi.parser import parse
12-
from csvapi.utils import already_exists, get_hash
14+
from csvapi.utils import (
15+
already_exists,
16+
get_hash,
17+
check_csv_detective_report_structure,
18+
check_profile_report_structure,
19+
enrich_db_with_metadata
20+
)
1321

1422
from csvapi.setup_logger import logger
1523
from csvapi.type_tester import convert_types
1624
from config import DB_ROOT_DIR, CSV_SNIFF_LIMIT, MAX_FILE_SIZE
1725

26+
from csv_detective.explore_csv import routine
27+
1828

1929
class ParseView(MethodView):
2030

@@ -38,7 +48,17 @@ async def parse_from_consumer(self, url: str, urlhash: str, csv_detective_report
3848
)
3949

4050
@staticmethod
41-
async def do_parse(url, urlhash, encoding, storage, logger, sniff_limit, max_file_size, agate_types=None):
51+
async def do_parse(
52+
url,
53+
urlhash,
54+
encoding,
55+
storage,
56+
logger,
57+
sniff_limit,
58+
max_file_size,
59+
agate_types=None,
60+
analysis=None
61+
):
4262
logger.debug('* do_parse %s (%s)', urlhash, url)
4363
tmp = tempfile.NamedTemporaryFile(delete=False)
4464
chunk_count = 0
@@ -56,9 +76,42 @@ async def do_parse(url, urlhash, encoding, storage, logger, sniff_limit, max_fil
5676
tmp.write(chunk)
5777
chunk_count += 1
5878
tmp.close()
79+
5980
logger.debug('* Downloaded %s', urlhash)
6081
logger.debug('* Parsing %s...', urlhash)
6182
parse(tmp.name, urlhash, storage, encoding=encoding, sniff_limit=sniff_limit, agate_types=agate_types)
83+
84+
if analysis and analysis == 'yes':
85+
csv_detective_report = routine(tmp.name)
86+
logger.info(csv_detective_report)
87+
88+
if not check_csv_detective_report_structure(csv_detective_report):
89+
logger.error(
90+
"csvdetective report malformed"
91+
)
92+
return
93+
94+
profileViewInstance = ProfileView()
95+
profile_report = await profileViewInstance.get_minimal_profile(
96+
profileViewInstance,
97+
urlhash=urlhash,
98+
csv_detective_report=csv_detective_report
99+
)
100+
101+
if not check_profile_report_structure(profile_report):
102+
logger.error(
103+
"pandas profiling report malformed"
104+
)
105+
return
106+
107+
enrich_db_with_metadata(
108+
urlhash,
109+
csv_detective_report,
110+
profile_report,
111+
None,
112+
None
113+
)
114+
62115
logger.debug('* Parsed %s', urlhash)
63116
finally:
64117
logger.debug('Removing tmp file: %s', tmp.name)
@@ -73,7 +126,7 @@ async def get(self):
73126
if not validators.url(url):
74127
raise APIError('Malformed url parameter.', status=400)
75128
urlhash = get_hash(url)
76-
129+
analysis = request.args.get('analysis')
77130
if not already_exists(urlhash):
78131
try:
79132
storage = app.config['DB_ROOT_DIR']
@@ -83,8 +136,8 @@ async def get(self):
83136
storage=storage,
84137
logger=app.logger,
85138
sniff_limit=app.config.get('CSV_SNIFF_LIMIT'),
86-
max_file_size=app.config.get('MAX_FILE_SIZE')
87-
)
139+
max_file_size=app.config.get('MAX_FILE_SIZE'),
140+
analysis=analysis)
88141
except Exception as e:
89142
raise APIError('Error parsing CSV: %s' % e)
90143
else:

‎csvapi/utils.py

+172-1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
from config import PROFILES_ROOT_DIR, DB_ROOT_DIR
88

99
import sqlite3
10+
from datetime import datetime
11+
import pandas as pd
1012

1113
executor = None
1214

@@ -47,4 +49,173 @@ def already_exists(urlhash):
4749

4850

4951
def create_connection(db_file):
50-
return sqlite3.connect(db_file, uri=True)
52+
conn = None
53+
conn = sqlite3.connect(db_file)
54+
return conn
55+
56+
57+
def keys_exists(element, *keys):
58+
'''
59+
Check if *keys (nested) exists in `element` (dict).
60+
'''
61+
if not isinstance(element, dict):
62+
raise AttributeError('keys_exists() expects dict as first argument.')
63+
if len(keys) == 0:
64+
raise AttributeError('keys_exists() expects at least two arguments, one given.')
65+
_element = element
66+
for key in keys:
67+
try:
68+
_element = _element[key]
69+
except KeyError:
70+
return False
71+
return True
72+
73+
74+
def check_message_structure(message):
75+
if (message is not None) & \
76+
(keys_exists(message, "service")) & \
77+
(keys_exists(message, "value", "report_location", "bucket")) & \
78+
(keys_exists(message, "value", "report_location", "key")) & \
79+
(keys_exists(message, "meta", "dataset_id")):
80+
return True
81+
else:
82+
return False
83+
84+
85+
def check_csv_detective_report_structure(report):
86+
if (report is not None) and \
87+
(keys_exists(report, "columns")) and \
88+
(keys_exists(report, "encoding")) and \
89+
(keys_exists(report, "separator")) and \
90+
(keys_exists(report, "header_row_idx")):
91+
92+
for item in report['columns']:
93+
if (not keys_exists(report, "columns", item, "python_type")) | \
94+
(not keys_exists(report, "columns", item, "format")):
95+
return False
96+
return True
97+
else:
98+
return False
99+
100+
101+
def check_profile_report_structure(report):
102+
if (report is not None) and \
103+
(keys_exists(report, "table", "n")) and \
104+
(keys_exists(report, "table", "n_var")) and \
105+
(keys_exists(report, "table", "n_cells_missing")) and \
106+
(keys_exists(report, "table", "n_vars_with_missing")) and \
107+
(keys_exists(report, "table", "n_vars_all_missing")) and \
108+
(keys_exists(report, "table", "n_cells_missing")) and \
109+
(keys_exists(report, "variables")):
110+
111+
for item in report['variables']:
112+
if (not keys_exists(report, "variables", item, "n_distinct")) | \
113+
(not keys_exists(report, "variables", item, "is_unique")) | \
114+
(not keys_exists(report, "variables", item, "n_unique")) | \
115+
(not keys_exists(report, "variables", item, "type")) | \
116+
(not keys_exists(report, "variables", item, "n_missing")) | \
117+
(not keys_exists(report, "variables", item, "count")):
118+
return False
119+
return True
120+
else:
121+
return False
122+
123+
124+
def df_to_sql(obj, conn, name):
125+
df = pd.DataFrame(obj)
126+
if df.shape[0] > 0:
127+
df.to_sql(name, con=conn, if_exists='replace', index=False)
128+
129+
130+
def enrich_db_with_metadata(urlhash, csv_detective_report, profile_report, dataset_id, key):
131+
# Save to sql
132+
conn = create_connection(DB_ROOT_DIR + '/' + urlhash + '.db')
133+
# c = conn.cursor()
134+
135+
general_infos = [
136+
{
137+
'encoding': csv_detective_report['encoding'],
138+
'separator': csv_detective_report['separator'],
139+
'header_row_idx': csv_detective_report['header_row_idx'],
140+
'total_lines': profile_report['table']['n'],
141+
'nb_columns': profile_report['table']['n_var'],
142+
'nb_cells_missing': profile_report['table']['n_cells_missing'],
143+
'nb_vars_with_missing': profile_report['table']['n_vars_with_missing'],
144+
'nb_vars_all_missing': profile_report['table']['n_vars_all_missing'],
145+
'date_last_check': datetime.today().strftime('%Y-%m-%d'),
146+
'dataset_id': dataset_id,
147+
'resource_id': key
148+
}
149+
]
150+
df = pd.DataFrame(general_infos)
151+
df.to_sql('general_infos', con=conn, if_exists='replace', index=False)
152+
153+
columns_infos = []
154+
categorical_infos = []
155+
top_infos = []
156+
numeric_infos = []
157+
numeric_plot_infos = []
158+
for col in profile_report['variables']:
159+
column_info = {}
160+
column_info['name'] = col
161+
column_info['format'] = csv_detective_report['columns'][col]['format']
162+
column_info['nb_distinct'] = profile_report['variables'][col]['n_distinct']
163+
column_info['is_unique'] = profile_report['variables'][col]['is_unique']
164+
column_info['nb_unique'] = profile_report['variables'][col]['n_unique']
165+
column_info['type'] = profile_report['variables'][col]['type']
166+
column_info['nb_missing'] = profile_report['variables'][col]['n_missing']
167+
column_info['count'] = profile_report['variables'][col]['count']
168+
columns_infos.append(column_info)
169+
170+
if csv_detective_report['columns'][col]['format'] in \
171+
['siren', 'siret', 'code_postal', 'code_commune_insee', 'code_departement', 'code_region', 'tel_fr']:
172+
column_info['type'] = 'Categorical'
173+
174+
if (column_info['type'] == 'Categorical') & \
175+
(len(profile_report['variables'][col]['value_counts_without_nan']) < 10):
176+
for cat in profile_report['variables'][col]['value_counts_without_nan']:
177+
categorical_info = {}
178+
categorical_info['column'] = col
179+
categorical_info['value'] = cat
180+
categorical_info['nb'] = profile_report['variables'][col]['value_counts_without_nan'][cat]
181+
categorical_infos.append(categorical_info)
182+
183+
if column_info['type'] == 'Numeric':
184+
numeric_info = {}
185+
numeric_info['column'] = col
186+
numeric_info['mean'] = profile_report['variables'][col]['mean']
187+
numeric_info['std'] = profile_report['variables'][col]['std']
188+
numeric_info['min'] = profile_report['variables'][col]['min']
189+
numeric_info['max'] = profile_report['variables'][col]['max']
190+
numeric_infos.append(numeric_info)
191+
for i in range(len(profile_report['variables'][col]['histogram']['bin_edges'])):
192+
numeric_plot_info = {}
193+
numeric_plot_info['column'] = col
194+
numeric_plot_info['value'] = profile_report['variables'][col]['histogram']['bin_edges'][i]
195+
numeric_plot_info['type'] = 'bin_edges'
196+
numeric_plot_infos.append(numeric_plot_info)
197+
198+
for i in range(len(profile_report['variables'][col]['histogram']['counts'])):
199+
numeric_plot_info = {}
200+
numeric_plot_info['column'] = col
201+
numeric_plot_info['value'] = profile_report['variables'][col]['histogram']['counts'][i]
202+
numeric_plot_info['type'] = 'counts'
203+
numeric_plot_infos.append(numeric_plot_info)
204+
205+
cpt = 0
206+
for top in profile_report['variables'][col]['value_counts_without_nan']:
207+
if (cpt < 10):
208+
top_info = {}
209+
top_info['column'] = col
210+
top_info['value'] = top
211+
top_info['nb'] = profile_report['variables'][col]['value_counts_without_nan'][top]
212+
top_infos.append(top_info)
213+
cpt = cpt + 1
214+
215+
df_to_sql(columns_infos, conn, 'columns_infos')
216+
df_to_sql(categorical_infos, conn, 'categorical_infos')
217+
df_to_sql(top_infos, conn, 'top_infos')
218+
df_to_sql(numeric_infos, conn, 'numeric_infos')
219+
df_to_sql(numeric_plot_infos, conn, 'numeric_plot_infos')
220+
221+
conn.commit()

‎csvapi/webservice.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
from quart import Quart, jsonify
55
from quart_cors import cors
6-
from quart.exceptions import NotFound
6+
from werkzeug.exceptions import NotFound
77

88
from csvapi.errors import APIError
99
from csvapi.tableview import TableView

‎poetry.lock

+410-216
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎pyproject.toml

+17-18
Original file line numberDiff line numberDiff line change
@@ -9,31 +9,30 @@ license = "MIT"
99
python = ">=3.9,<4"
1010
click_default_group = "~1.2.2"
1111
click = "~8.1.3"
12-
agate = "~1.6.1"
13-
agate-sql = "~0.5.5"
14-
aiohttp = "~3.7.3"
15-
validators = "~0.18.2"
16-
agate-excel = "~0.2.3"
17-
Quart = "~0.14.1"
18-
quart-cors = "~0.3.0"
19-
sentry-sdk = "~1.0.0"
12+
agate = "~1.6.3"
13+
agate-sql = "~0.5.8"
14+
aiohttp = "~3.8.1"
15+
validators = "~0.20.0"
16+
agate-excel = "~0.2.5"
17+
Quart = "~0.18.0"
18+
quart-cors = "~0.5.0"
19+
sentry-sdk = "~1.9.8"
2020
cchardet = "~2.1.7"
21-
python-stdnum = "~1.15"
22-
aiosqlite = "~0.16.1"
23-
pandas = "~1.4.3"
21+
python-stdnum = "~1.17"
22+
aiosqlite = "~0.17.0"
23+
pandas = "~1.4.4"
2424
pandas-profiling = "~3.2.0"
25-
requests = "~2.27.1"
25+
requests = "~2.28.1"
2626
udata-event-service = "~0.0.8"
27-
boto3 = "~1.21.21"
28-
Jinja2 = "~3.0.3"
29-
Werkzeug = "~1.0.0"
27+
boto3 = "~1.24.66"
28+
csv-detective = "~0.4.5"
3029

3130
[tool.poetry.dev-dependencies]
32-
aioresponses = "~0.7.1"
33-
pytest = "~7.1.2"
31+
aioresponses = "~0.7.3"
32+
pytest = "~7.1.3"
3433
pytest-asyncio = "~0.19.0"
3534
flake8 = "~5.0.4"
36-
pytest-cov = "~2.5.1"
35+
pytest-cov = "~3.0.0"
3736

3837
[tool.poetry.scripts]
3938
csvapi = "csvapi.cli:cli"

‎tests/test_api.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,8 @@ async def test_api_objects_shape(client, rmock, uploaded_csv):
269269
res = await client.get(f"/api/{MOCK_CSV_HASH}?_shape=objects")
270270
assert res.status_code == 200
271271
jsonres = await res.json
272-
assert jsonres['rows'] == [{
272+
assert jsonres['rows'] == [
273+
{
273274
'rowid': 1,
274275
'col a': 'data à1',
275276
'col b': 'data b1',
@@ -279,22 +280,25 @@ async def test_api_objects_shape(client, rmock, uploaded_csv):
279280
'col a': 'data ª2',
280281
'col b': 'data b2',
281282
'col c': 'a',
282-
}]
283+
}
284+
]
283285

284286

285287
async def test_api_objects_norowid(client, rmock, uploaded_csv):
286288
res = await client.get(f"/api/{MOCK_CSV_HASH}?_shape=objects&_rowid=hide")
287289
assert res.status_code == 200
288290
jsonres = await res.json
289-
assert jsonres['rows'] == [{
291+
assert jsonres['rows'] == [
292+
{
290293
'col a': 'data à1',
291294
'col b': 'data b1',
292295
'col c': 'z',
293296
}, {
294297
'col a': 'data ª2',
295298
'col b': 'data b2',
296299
'col c': 'a',
297-
}]
300+
}
301+
]
298302

299303

300304
async def test_api_objects_nototal(client, rmock, uploaded_csv):

0 commit comments

Comments
 (0)
Please sign in to comment.