etalab · Sep 6, 2022
diff --git a/‎.vscode/settings.json
+5 b/‎.vscode/settings.json
+5
diff --git a/‎config.py
+1-1 b/‎config.py
+1-1
diff --git a/‎csvapi/consumer.py
+89-170 b/‎csvapi/consumer.py
+89-170
diff --git a/‎csvapi/parser.py
+1 b/‎csvapi/parser.py
+1
diff --git a/‎csvapi/parseview.py
+58-5 b/‎csvapi/parseview.py
+58-5
diff --git a/‎csvapi/utils.py
+172-1 b/‎csvapi/utils.py
+172-1
diff --git a/‎csvapi/webservice.py
+1-1 b/‎csvapi/webservice.py
+1-1
diff --git a/‎poetry.lock
+410-216 b/‎poetry.lock
+410-216
diff --git a/‎pyproject.toml
+17-18 b/‎pyproject.toml
+17-18
diff --git a/‎tests/test_api.py
+8-4 b/‎tests/test_api.py
+8-4
@@ -0,0 +1,5 @@
+{
+    "python.pythonPath": "/Users/geoffrey/.pyenv/versions/3.9.7/envs/csvapi/bin/python",
+    "python.linting.enabled": true,
+    "python.linting.flake8Args": ["--ignore=E24,W504", "--verbose"]
+}
@@ -14,4 +14,4 @@
 # It will also match subdomains
 # e.g. REFERRERS_FILTER = ['data.gouv.fr'] will match 'demo.data.gouv.fr'
 REFERRERS_FILTER = None
-PANDAS_PROFILING_CONFIG_MIN = False
+PANDAS_PROFILING_CONFIG_MIN = False
@@ -1,20 +1,19 @@
 import asyncio
 import json
 import os
-
-from datetime import datetime
-
 import boto3
-import pandas as pd
-
 from botocore.client import Config, ClientError
 
 from csvapi.parseview import ParseView
 from csvapi.profileview import ProfileView
+from csvapi.utils import enrich_db_with_metadata
 from csvapi.setup_logger import logger
-from csvapi.utils import get_hash, create_connection
-
-from config import DB_ROOT_DIR
+from csvapi.utils import (
+    get_hash,
+    check_message_structure,
+    check_csv_detective_report_structure,
+    check_profile_report_structure
+)
 
 MINIO_URL = os.environ.get("MINIO_URL", "http://localhost:9000")
 MINIO_USER = os.environ.get("MINIO_USER", "minio")
@@ -26,167 +25,87 @@ def run_process_message(key: str, data: dict, topic: str) -> None:
 
 
 async def process_message(key: str, message: dict, topic: str) -> None:
-    # Get url
-    # Should think if we keep that
-    # r = requests.get('https://www.data.gouv.fr/api/1/datasets/{}/resources/{}'.format(message['meta']['dataset_id'], key))  # noqa
-    # url = r.json()['url']
-    if message is not None and message['service'] == 'csvdetective':
-        url = 'https://www.data.gouv.fr/fr/datasets/r/{}'.format(key)
-        urlhash = get_hash(url)
-        logger.info(urlhash)
-
-        # Connect to minio
-        s3_client = boto3.client(
-            "s3",
-            endpoint_url=MINIO_URL,
-            aws_access_key_id=MINIO_USER,
-            aws_secret_access_key=MINIO_PASSWORD,
-            config=Config(signature_version="s3v4"),
-        )
-
-        try:
-            s3_client.head_bucket(Bucket=message['value']['data_location']['bucket'])
-        except ClientError as e:
-            logger.error(e)
-            logger.error(
-                "Bucket {} does not exist or credentials are invalid".format(
-                    message['value']['location']['bucket']
+    if message['service'] == "csvdetective":
+        if check_message_structure(message):
+            url = 'https://www.data.gouv.fr/fr/datasets/r/{}'.format(key)
+            urlhash = get_hash(url)
+            logger.info(urlhash)
+
+            # Connect to minio
+            s3_client = boto3.client(
+                "s3",
+                endpoint_url=MINIO_URL,
+                aws_access_key_id=MINIO_USER,
+                aws_secret_access_key=MINIO_PASSWORD,
+                config=Config(signature_version="s3v4"),
+            )
+
+            try:
+                s3_client.head_bucket(Bucket=message['value']['report_location']['bucket'])
+            except ClientError as e:
+                logger.error(e)
+                logger.error(
+                    "Bucket {} does not exist or credentials are invalid".format(
+                        message['value']['report_location']['bucket']
+                    )
+                )
+                return
+
+            # Load csv-detective report
+            try:
+                response = s3_client.get_object(
+                    Bucket=message['value']['report_location']['bucket'],
+                    Key=message['value']['report_location']['key']
+                )
+                content = response['Body']
+                csv_detective_report = json.loads(content.read())
+            except ClientError as e:
+                logger.error(e)
+                logger.error(
+                    "Report does not exist in bucket or content is not valid json"
                 )
+                return
+
+            if not check_csv_detective_report_structure(csv_detective_report):
+                logger.error(
+                    "csvdetective report malformed"
+                )
+                return
+
+            # Parse file and store it to sqlite
+            parseViewInstance = ParseView()
+            await parseViewInstance.parse_from_consumer(
+                parseViewInstance,
+                url=url,
+                urlhash=urlhash,
+                csv_detective_report=csv_detective_report
             )
-            return
-
-        # Load csv-detective report
-        response = s3_client.get_object(
-            Bucket=message['value']['report_location']['bucket'],
-            Key=message['value']['report_location']['key']
-        )
-        content = response['Body']
-        csv_detective_report = json.loads(content.read())
-
-        # Parse file and store it to sqlite
-        parseViewInstance = ParseView()
-        await parseViewInstance.parse_from_consumer(
-            parseViewInstance,
-            url=url,
-            urlhash=urlhash,
-            csv_detective_report=csv_detective_report,
-        )
-
-        # Profile file
-        profileViewInstance = ProfileView()
-        profile_report = await profileViewInstance.get_minimal_profile(
-            profileViewInstance,
-            urlhash=urlhash,
-            csv_detective_report=csv_detective_report,
-        )
-
-        # Save to sql
-        conn = create_connection(DB_ROOT_DIR+'/'+urlhash+'.db')
-        # c = conn.cursor()
-
-        general_infos = [
-            {
-                'encoding': csv_detective_report['encoding'],
-                'separator': csv_detective_report['separator'],
-                'header_row_idx': csv_detective_report['header_row_idx'],
-                'total_lines': profile_report['table']['n'],
-                'nb_columns': profile_report['table']['n_var'],
-                'nb_cells_missing': profile_report['table']['n_cells_missing'],
-                'nb_vars_with_missing': profile_report['table']['n_vars_with_missing'],
-                'nb_vars_all_missing': profile_report['table']['n_vars_all_missing'],
-                'date_last_check': datetime.today().strftime('%Y-%m-%d'),
-                'dataset_id': message['meta']['dataset_id'],
-                'resource_id': key
-            }
-        ]
-        df = pd.DataFrame(general_infos)
-        df.to_sql('general_infos', con=conn, if_exists='replace', index=False)
-
-        columns_infos = []
-        categorical_infos = []
-        top_infos = []
-        numeric_infos = []
-        numeric_plot_infos = []
-        for col in profile_report['variables']:
-            column_info = {}
-            column_info['name'] = col
-            column_info['format'] = csv_detective_report['columns'][col]['format']
-            column_info['nb_distinct'] = profile_report['variables'][col]['n_distinct']
-            column_info['is_unique'] = profile_report['variables'][col]['is_unique']
-            column_info['nb_unique'] = profile_report['variables'][col]['n_unique']
-            column_info['type'] = profile_report['variables'][col]['type']
-            column_info['nb_missing'] = profile_report['variables'][col]['n_missing']
-            column_info['count'] = profile_report['variables'][col]['count']
-            columns_infos.append(column_info)
-
-            cat_cols = [
-                'siren', 'siret', 'code_postal', 'code_commune_insee',
-                'code_departement', 'code_region', 'tel_fr',
-            ]
-            if csv_detective_report['columns'][col]['format'] in cat_cols:
-                column_info['type'] = 'Categorical'
-
-            if column_info['type'] == 'Categorical' and \
-                    len(profile_report['variables'][col]['value_counts_without_nan']) < 10:
-                for cat in profile_report['variables'][col]['value_counts_without_nan']:
-                    categorical_info = {}
-                    categorical_info['column'] = col
-                    categorical_info['value'] = cat
-                    categorical_info['nb'] = profile_report['variables'][col]['value_counts_without_nan'][cat]
-                    categorical_infos.append(categorical_info)
-
-            if column_info['type'] == 'Numeric':
-                numeric_info = {}
-                numeric_info['column'] = col
-                numeric_info['mean'] = profile_report['variables'][col]['mean']
-                numeric_info['std'] = profile_report['variables'][col]['std']
-                numeric_info['min'] = profile_report['variables'][col]['min']
-                numeric_info['max'] = profile_report['variables'][col]['max']
-                numeric_infos.append(numeric_info)
-                for i in range(len(profile_report['variables'][col]['histogram']['bin_edges'])):
-                    numeric_plot_info = {}
-                    numeric_plot_info['column'] = col
-                    numeric_plot_info['value'] = profile_report['variables'][col]['histogram']['bin_edges'][i]
-                    numeric_plot_info['type'] = 'bin_edges'
-                    numeric_plot_infos.append(numeric_plot_info)
-
-                for i in range(len(profile_report['variables'][col]['histogram']['counts'])):
-                    numeric_plot_info = {}
-                    numeric_plot_info['column'] = col
-                    numeric_plot_info['value'] = profile_report['variables'][col]['histogram']['counts'][i]
-                    numeric_plot_info['type'] = 'counts'
-                    numeric_plot_infos.append(numeric_plot_info)
-
-            cpt = 0
-            for top in profile_report['variables'][col]['value_counts_without_nan']:
-                if (cpt < 10):
-                    top_info = {}
-                    top_info['column'] = col
-                    top_info['value'] = top
-                    top_info['nb'] = profile_report['variables'][col]['value_counts_without_nan'][top]
-                    top_infos.append(top_info)
-                    cpt = cpt + 1
-
-        df = pd.DataFrame(columns_infos)
-        if df.shape[0] > 0:
-            df.to_sql('columns_infos', con=conn, if_exists='replace', index=False)
-
-        df = pd.DataFrame(categorical_infos)
-        if df.shape[0] > 0:
-            df.to_sql('categorical_infos', con=conn, if_exists='replace', index=False)
-
-        df = pd.DataFrame(top_infos)
-        if df.shape[0] > 0:
-            df.to_sql('top_infos', con=conn, if_exists='replace', index=False)
-
-        df = pd.DataFrame(numeric_infos)
-        if df.shape[0] > 0:
-            df.to_sql('numeric_infos', con=conn, if_exists='replace', index=False)
-
-        df = pd.DataFrame(numeric_plot_infos)
-        if df.shape[0] > 0:
-            df.to_sql('numeric_plot_infos', con=conn, if_exists='replace', index=False)
-
-        conn.commit()
-
-        print('ok')
+
+            # Profile file
+            profileViewInstance = ProfileView()
+            profile_report = await profileViewInstance.get_minimal_profile(
+                profileViewInstance,
+                urlhash=urlhash,
+                csv_detective_report=csv_detective_report
+            )
+
+            if not check_profile_report_structure(profile_report):
+                logger.error(
+                    "pandas profiling report malformed"
+                )
+                return
+
+            enrich_db_with_metadata(
+                urlhash,
+                csv_detective_report,
+                profile_report,
+                message['meta']['dataset_id'],
+                key
+            )
+
+            logger.info('Enrichment done!')
+
+        else:
+            logger.error('Problem with structure message')
+    else:
+        logger.info('Message received from {} service - do not process'.format(message['service']))
@@ -55,6 +55,7 @@ def from_csv(filepath, encoding='utf-8', sniff_limit=SNIFF_LIMIT, agate_types=No
                 'encoding': encoding,
                 'column_types': agate_tester()
             }
+            logger.warning('Types from csv-detective provoke errors, use of agate type tester instead.')
             return agate.Table.from_csv(filepath, **kwargs)
         except Exception as e:
             logger.error('error casting %s', e)
 
@@ -7,14 +7,24 @@
 from quart import request, jsonify, current_app as app
 from quart.views import MethodView
 
+from csvapi.profileview import ProfileView
+
 from csvapi.errors import APIError
 from csvapi.parser import parse
-from csvapi.utils import already_exists, get_hash
+from csvapi.utils import (
+    already_exists,
+    get_hash,
+    check_csv_detective_report_structure,
+    check_profile_report_structure,
+    enrich_db_with_metadata
+)
 
 from csvapi.setup_logger import logger
 from csvapi.type_tester import convert_types
 from config import DB_ROOT_DIR, CSV_SNIFF_LIMIT, MAX_FILE_SIZE
 
+from csv_detective.explore_csv import routine
+
 
 class ParseView(MethodView):
 
@@ -38,7 +48,17 @@ async def parse_from_consumer(self, url: str, urlhash: str, csv_detective_report
         )
 
     @staticmethod
-    async def do_parse(url, urlhash, encoding, storage, logger, sniff_limit, max_file_size, agate_types=None):
+    async def do_parse(
+        url,
+        urlhash,
+        encoding,
+        storage,
+        logger,
+        sniff_limit,
+        max_file_size,
+        agate_types=None,
+        analysis=None
+    ):
         logger.debug('* do_parse %s (%s)', urlhash, url)
         tmp = tempfile.NamedTemporaryFile(delete=False)
         chunk_count = 0
@@ -56,9 +76,42 @@ async def do_parse(url, urlhash, encoding, storage, logger, sniff_limit, max_fil
                         tmp.write(chunk)
                         chunk_count += 1
             tmp.close()
+
             logger.debug('* Downloaded %s', urlhash)
             logger.debug('* Parsing %s...', urlhash)
             parse(tmp.name, urlhash, storage, encoding=encoding, sniff_limit=sniff_limit, agate_types=agate_types)
+
+            if analysis and analysis == 'yes':
+                csv_detective_report = routine(tmp.name)
+                logger.info(csv_detective_report)
+
+                if not check_csv_detective_report_structure(csv_detective_report):
+                    logger.error(
+                        "csvdetective report malformed"
+                    )
+                    return
+
+                profileViewInstance = ProfileView()
+                profile_report = await profileViewInstance.get_minimal_profile(
+                    profileViewInstance,
+                    urlhash=urlhash,
+                    csv_detective_report=csv_detective_report
+                )
+
+                if not check_profile_report_structure(profile_report):
+                    logger.error(
+                        "pandas profiling report malformed"
+                    )
+                    return
+
+                enrich_db_with_metadata(
+                    urlhash,
+                    csv_detective_report,
+                    profile_report,
+                    None,
+                    None
+                )
+
             logger.debug('* Parsed %s', urlhash)
         finally:
             logger.debug('Removing tmp file: %s', tmp.name)
@@ -73,7 +126,7 @@ async def get(self):
         if not validators.url(url):
             raise APIError('Malformed url parameter.', status=400)
         urlhash = get_hash(url)
-
+        analysis = request.args.get('analysis')
         if not already_exists(urlhash):
             try:
                 storage = app.config['DB_ROOT_DIR']
@@ -83,8 +136,8 @@ async def get(self):
                                     storage=storage,
                                     logger=app.logger,
                                     sniff_limit=app.config.get('CSV_SNIFF_LIMIT'),
-                                    max_file_size=app.config.get('MAX_FILE_SIZE')
-                                    )
+                                    max_file_size=app.config.get('MAX_FILE_SIZE'),
+                                    analysis=analysis)
             except Exception as e:
                 raise APIError('Error parsing CSV: %s' % e)
         else:
 
@@ -7,6 +7,8 @@
 from config import PROFILES_ROOT_DIR, DB_ROOT_DIR
 
 import sqlite3
+from datetime import datetime
+import pandas as pd
 
 executor = None
 
@@ -47,4 +49,173 @@ def already_exists(urlhash):
 
 
 def create_connection(db_file):
-    return sqlite3.connect(db_file, uri=True)
+    conn = None
+    conn = sqlite3.connect(db_file)
+    return conn
+
+
+def keys_exists(element, *keys):
+    '''
+    Check if *keys (nested) exists in `element` (dict).
+    '''
+    if not isinstance(element, dict):
+        raise AttributeError('keys_exists() expects dict as first argument.')
+    if len(keys) == 0:
+        raise AttributeError('keys_exists() expects at least two arguments, one given.')
+    _element = element
+    for key in keys:
+        try:
+            _element = _element[key]
+        except KeyError:
+            return False
+    return True
+
+
+def check_message_structure(message):
+    if (message is not None) & \
+            (keys_exists(message, "service")) & \
+            (keys_exists(message, "value", "report_location", "bucket")) & \
+            (keys_exists(message, "value", "report_location", "key")) & \
+            (keys_exists(message, "meta", "dataset_id")):
+        return True
+    else:
+        return False
+
+
+def check_csv_detective_report_structure(report):
+    if (report is not None) and \
+            (keys_exists(report, "columns")) and \
+            (keys_exists(report, "encoding")) and \
+            (keys_exists(report, "separator")) and \
+            (keys_exists(report, "header_row_idx")):
+
+        for item in report['columns']:
+            if (not keys_exists(report, "columns", item, "python_type")) | \
+                    (not keys_exists(report, "columns", item, "format")):
+                return False
+        return True
+    else:
+        return False
+
+
+def check_profile_report_structure(report):
+    if (report is not None) and \
+            (keys_exists(report, "table", "n")) and \
+            (keys_exists(report, "table", "n_var")) and \
+            (keys_exists(report, "table", "n_cells_missing")) and \
+            (keys_exists(report, "table", "n_vars_with_missing")) and \
+            (keys_exists(report, "table", "n_vars_all_missing")) and \
+            (keys_exists(report, "table", "n_cells_missing")) and \
+            (keys_exists(report, "variables")):
+
+        for item in report['variables']:
+            if (not keys_exists(report, "variables", item, "n_distinct")) | \
+                    (not keys_exists(report, "variables", item, "is_unique")) | \
+                    (not keys_exists(report, "variables", item, "n_unique")) | \
+                    (not keys_exists(report, "variables", item, "type")) | \
+                    (not keys_exists(report, "variables", item, "n_missing")) | \
+                    (not keys_exists(report, "variables", item, "count")):
+                return False
+        return True
+    else:
+        return False
+
+
+def df_to_sql(obj, conn, name):
+    df = pd.DataFrame(obj)
+    if df.shape[0] > 0:
+        df.to_sql(name, con=conn, if_exists='replace', index=False)
+
+
+def enrich_db_with_metadata(urlhash, csv_detective_report, profile_report, dataset_id, key):
+    # Save to sql
+    conn = create_connection(DB_ROOT_DIR + '/' + urlhash + '.db')
+    # c = conn.cursor()
+
+    general_infos = [
+        {
+            'encoding': csv_detective_report['encoding'],
+            'separator': csv_detective_report['separator'],
+            'header_row_idx': csv_detective_report['header_row_idx'],
+            'total_lines': profile_report['table']['n'],
+            'nb_columns': profile_report['table']['n_var'],
+            'nb_cells_missing': profile_report['table']['n_cells_missing'],
+            'nb_vars_with_missing': profile_report['table']['n_vars_with_missing'],
+            'nb_vars_all_missing': profile_report['table']['n_vars_all_missing'],
+            'date_last_check': datetime.today().strftime('%Y-%m-%d'),
+            'dataset_id': dataset_id,
+            'resource_id': key
+        }
+    ]
+    df = pd.DataFrame(general_infos)
+    df.to_sql('general_infos', con=conn, if_exists='replace', index=False)
+
+    columns_infos = []
+    categorical_infos = []
+    top_infos = []
+    numeric_infos = []
+    numeric_plot_infos = []
+    for col in profile_report['variables']:
+        column_info = {}
+        column_info['name'] = col
+        column_info['format'] = csv_detective_report['columns'][col]['format']
+        column_info['nb_distinct'] = profile_report['variables'][col]['n_distinct']
+        column_info['is_unique'] = profile_report['variables'][col]['is_unique']
+        column_info['nb_unique'] = profile_report['variables'][col]['n_unique']
+        column_info['type'] = profile_report['variables'][col]['type']
+        column_info['nb_missing'] = profile_report['variables'][col]['n_missing']
+        column_info['count'] = profile_report['variables'][col]['count']
+        columns_infos.append(column_info)
+
+        if csv_detective_report['columns'][col]['format'] in \
+                ['siren', 'siret', 'code_postal', 'code_commune_insee', 'code_departement', 'code_region', 'tel_fr']:
+            column_info['type'] = 'Categorical'
+
+        if (column_info['type'] == 'Categorical') & \
+                (len(profile_report['variables'][col]['value_counts_without_nan']) < 10):
+            for cat in profile_report['variables'][col]['value_counts_without_nan']:
+                categorical_info = {}
+                categorical_info['column'] = col
+                categorical_info['value'] = cat
+                categorical_info['nb'] = profile_report['variables'][col]['value_counts_without_nan'][cat]
+                categorical_infos.append(categorical_info)
+
+        if column_info['type'] == 'Numeric':
+            numeric_info = {}
+            numeric_info['column'] = col
+            numeric_info['mean'] = profile_report['variables'][col]['mean']
+            numeric_info['std'] = profile_report['variables'][col]['std']
+            numeric_info['min'] = profile_report['variables'][col]['min']
+            numeric_info['max'] = profile_report['variables'][col]['max']
+            numeric_infos.append(numeric_info)
+            for i in range(len(profile_report['variables'][col]['histogram']['bin_edges'])):
+                numeric_plot_info = {}
+                numeric_plot_info['column'] = col
+                numeric_plot_info['value'] = profile_report['variables'][col]['histogram']['bin_edges'][i]
+                numeric_plot_info['type'] = 'bin_edges'
+                numeric_plot_infos.append(numeric_plot_info)
+
+            for i in range(len(profile_report['variables'][col]['histogram']['counts'])):
+                numeric_plot_info = {}
+                numeric_plot_info['column'] = col
+                numeric_plot_info['value'] = profile_report['variables'][col]['histogram']['counts'][i]
+                numeric_plot_info['type'] = 'counts'
+                numeric_plot_infos.append(numeric_plot_info)
+
+        cpt = 0
+        for top in profile_report['variables'][col]['value_counts_without_nan']:
+            if (cpt < 10):
+                top_info = {}
+                top_info['column'] = col
+                top_info['value'] = top
+                top_info['nb'] = profile_report['variables'][col]['value_counts_without_nan'][top]
+                top_infos.append(top_info)
+                cpt = cpt + 1
+
+    df_to_sql(columns_infos, conn, 'columns_infos')
+    df_to_sql(categorical_infos, conn, 'categorical_infos')
+    df_to_sql(top_infos, conn, 'top_infos')
+    df_to_sql(numeric_infos, conn, 'numeric_infos')
+    df_to_sql(numeric_plot_infos, conn, 'numeric_plot_infos')
+
+    conn.commit()
@@ -3,7 +3,7 @@
 
 from quart import Quart, jsonify
 from quart_cors import cors
-from quart.exceptions import NotFound
+from werkzeug.exceptions import NotFound
 
 from csvapi.errors import APIError
 from csvapi.tableview import TableView
 
@@ -9,31 +9,30 @@ license = "MIT"
 python = ">=3.9,<4"
 click_default_group = "~1.2.2"
 click = "~8.1.3"
-agate = "~1.6.1"
-agate-sql = "~0.5.5"
-aiohttp = "~3.7.3"
-validators = "~0.18.2"
-agate-excel = "~0.2.3"
-Quart = "~0.14.1"
-quart-cors = "~0.3.0"
-sentry-sdk = "~1.0.0"
+agate = "~1.6.3"
+agate-sql = "~0.5.8"
+aiohttp = "~3.8.1"
+validators = "~0.20.0"
+agate-excel = "~0.2.5"
+Quart = "~0.18.0"
+quart-cors = "~0.5.0"
+sentry-sdk = "~1.9.8"
 cchardet = "~2.1.7"
-python-stdnum = "~1.15"
-aiosqlite = "~0.16.1"
-pandas = "~1.4.3"
+python-stdnum = "~1.17"
+aiosqlite = "~0.17.0"
+pandas = "~1.4.4"
 pandas-profiling = "~3.2.0"
-requests = "~2.27.1"
+requests = "~2.28.1"
 udata-event-service = "~0.0.8"
-boto3 = "~1.21.21"
-Jinja2 = "~3.0.3"
-Werkzeug = "~1.0.0"
+boto3 = "~1.24.66"
+csv-detective = "~0.4.5"
 
 [tool.poetry.dev-dependencies]
-aioresponses = "~0.7.1"
-pytest = "~7.1.2"
+aioresponses = "~0.7.3"
+pytest = "~7.1.3"
 pytest-asyncio = "~0.19.0"
 flake8 = "~5.0.4"
-pytest-cov = "~2.5.1"
+pytest-cov = "~3.0.0"
 
 [tool.poetry.scripts]
 csvapi = "csvapi.cli:cli"
 
@@ -269,7 +269,8 @@ async def test_api_objects_shape(client, rmock, uploaded_csv):
     res = await client.get(f"/api/{MOCK_CSV_HASH}?_shape=objects")
     assert res.status_code == 200
     jsonres = await res.json
-    assert jsonres['rows'] == [{
+    assert jsonres['rows'] == [
+        {
             'rowid': 1,
             'col a': 'data à1',
             'col b': 'data b1',
@@ -279,22 +280,25 @@ async def test_api_objects_shape(client, rmock, uploaded_csv):
             'col a': 'data ª2',
             'col b': 'data b2',
             'col c': 'a',
-    }]
+        }
+    ]
 
 
 async def test_api_objects_norowid(client, rmock, uploaded_csv):
     res = await client.get(f"/api/{MOCK_CSV_HASH}?_shape=objects&_rowid=hide")
     assert res.status_code == 200
     jsonres = await res.json
-    assert jsonres['rows'] == [{
+    assert jsonres['rows'] == [
+        {
             'col a': 'data à1',
             'col b': 'data b1',
             'col c': 'z',
         }, {
             'col a': 'data ª2',
             'col b': 'data b2',
             'col c': 'a',
-    }]
+        }
+    ]
 
 
 async def test_api_objects_nototal(client, rmock, uploaded_csv):
Original file line number	Diff line number	Diff line change
`@@ -55,6 +55,7 @@ def from_csv(filepath, encoding='utf-8', sniff_limit=SNIFF_LIMIT, agate_types=No`
`55`	`55`	`'encoding': encoding,`
`56`	`56`	`'column_types': agate_tester()`
`57`	`57`	`}`
	`58`	`+ logger.warning('Types from csv-detective provoke errors, use of agate type tester instead.')`
`58`	`59`	`return agate.Table.from_csv(filepath, **kwargs)`
`59`	`60`	`except Exception as e:`
`60`	`61`	`logger.error('error casting %s', e)`