Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: etalab/csvapi
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: v0.0.9
Choose a base ref
...
head repository: etalab/csvapi
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: master
Choose a head ref
Loading
125 changes: 83 additions & 42 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
@@ -1,106 +1,147 @@
---
version: 2
version: 2.1

parameters:
docker-image:
type: string
default: "etalab/csvapi-circle"
python-module:
type: string
default: "csvapi"
publish-branch:
type: string
default: "master"
cache-prefix:
type: string
default: "py-cache-v2"

jobs:
python:
install:
docker:
- image: circleci/python:3.6.6-stretch-browsers
- image: << pipeline.parameters.docker-image >>
steps:
- checkout
- run:
name: Compute Python dependencies key
command: cat requirements/*.pip > python.deps
- run:
name: Get the base reference branch
command: export BASE_BRANCH=$(base_branch)
- restore_cache:
keys:
- py-cache-v2-{{ checksum "python.deps" }}
- py-cache-v2-{{ .Branch }}
- py-cache-v2-{{ .Environment.BASE_BRANCH }}
- << pipeline.parameters.cache-prefix >>-{{ arch }}-{{ checksum "poetry.lock" }}
- << pipeline.parameters.cache-prefix >>-{{ arch }}-{{ .Branch }}
- << pipeline.parameters.cache-prefix >>-{{ arch }}-{{ .Environment.BASE_BRANCH }}
- run:
name: Install python dependencies
command: |
python3 -m venv venv
source venv/bin/activate
pip install -r requirements/develop.pip || pip install -r requirements/develop.pip
poetry self update
poetry config virtualenvs.in-project true
poetry install
- save_cache:
key: py-cache-v2-{{ checksum "python.deps" }}
key: << pipeline.parameters.cache-prefix >>-{{ arch }}-{{ checksum "poetry.lock" }}
paths:
- venv
- .venv
- save_cache:
key: py-cache-v2-{{ .Branch }}
key: << pipeline.parameters.cache-prefix >>-{{ arch }}-{{ .Branch }}
paths:
- .venv
- persist_to_workspace:
root: .
paths:
- venv
- .

lint:
docker:
- image: << pipeline.parameters.docker-image >>
steps:
- attach_workspace:
at: .
- run:
name: Lint code
command: poetry run flake8 << pipeline.parameters.python-module >>

tests:
docker:
- image: << pipeline.parameters.docker-image >>
steps:
- attach_workspace:
at: .
- run:
name: Run tests
command: |
source venv/bin/activate
inv qa test --report
poetry run pytest --junitxml=reports/python/tests.xml -p no:sugar --color=yes
- store_test_results:
path: reports/python
- store_artifacts:
path: reports/
destination: reports

- persist_to_workspace:
root: .
paths:
- venv

dist:
build:
docker:
- image: circleci/python:3.6.6-stretch-browsers
- image: << pipeline.parameters.docker-image >>
steps:
- checkout
- attach_workspace:
at: .
- run:
name: Build a distributable package
command: |
source venv/bin/activate
if [[ $CIRCLE_TAG ]]; then inv dist; else inv dist -b $CIRCLE_BUILD_NUM; fi
# Build a wheel release
if [[ $CIRCLE_TAG ]]; then
# This is a tagged release, version has been handled upstream
poetry build
else
# Relies on a dev version like "1.2.1.dev" by default
poetry version $(poetry version -s)$CIRCLE_BUILD_NUM
poetry build
fi
- store_artifacts:
path: dist
- persist_to_workspace:
root: .
paths:
- dist
- .

publish:
docker:
- image: circleci/python:3.6.6-stretch-browsers
- image: << pipeline.parameters.docker-image >>
steps:
- attach_workspace:
at: .
- run:
name: Install Twine
command: sudo pip install twine
- deploy:
name: Publish on PyPI
command: twine upload --username "${PYPI_USERNAME}" --password "${PYPI_PASSWORD}" dist/*.whl
command: |
poetry publish --username "${PYPI_USERNAME}" --password "${PYPI_PASSWORD}" --no-interaction
workflows:
version: 2
build:
jobs:
- python:
- install:
filters:
tags:
only: /v[0-9]+(\.[0-9]+)*/
- lint:
requires:
- install
filters:
tags:
only: /v[0-9]+(\.[0-9]+)*/
- tests:
requires:
- install
filters:
tags:
only: /v[0-9]+(\.[0-9]+)*/
- dist:
- build:
requires:
- python
- tests
- lint
filters:
tags:
only: /v[0-9]+(\.[0-9]+)*/
- publish:
requires:
- dist
- build
filters:
branches:
only:
- /v[0-9]+(\.[0-9]+)*/
- << pipeline.parameters.publish-branch >>
- /[0-9]+(\.[0-9]+)+/
tags:
only: /v[0-9]+(\.[0-9]+)*/
context: org-global
3 changes: 3 additions & 0 deletions .circleci/images/csvapi-circle/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
FROM cimg/python:3.9

RUN sudo apt-get update && sudo apt-get install -y file
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
dbs/*.db
profiles/*.html
*.egg-info/
*.pyc
build/
dist/
reports/
.vscode/
.pytest_cache/
74 changes: 74 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,79 @@
# Changelog

## Current (in progress)

- Nothing yet

## 2.2.0 (2022-11-04)

- Remove profile endpoint, disable thread pool for profiling [#135](https://github.com/etalab/csvapi/pull/135)
- Fix tests by using a custom docker image [#135](https://github.com/etalab/csvapi/pull/135)

## 2.1.1 (2022-10-25)

* Fix bugs [#126](https://github.com/etalab/csvapi/pull/126) with json files

## 2.1.0 (2022-10-13)

* Fix bugs [#110](https://github.com/etalab/csvapi/pull/110) and [#111](https://github.com/etalab/csvapi/pull/111)
* Add endpoint API greater_than or less_than int or float value [#109](https://github.com/etalab/csvapi/pull/109)
* Update version csv-detective [#119](https://github.com/etalab/csvapi/pull/119)

## 2.0.0 (2022-09-15)

- [BREAKING] Migrate to python >= 3.9 [#104](https://github.com/etalab/csvapi/pull/104)
- Migrate to poetry [#104](https://github.com/etalab/csvapi/pull/104)
- Enrich sqlite dbs with metadata extracted from csv-detective and pandas profiling [#104](https://github.com/etalab/csvapi/pull/104)
- Enrich apify api with possibility to analyse resource [#104](https://github.com/etalab/csvapi/pull/104)

## 1.2.1 (2021-04-29)

- Upgrade raven to sentry-sdk (a bit dirty so far)

## 1.2.0 (2021-04-29)

- Add profiling support [#77](https://github.com/etalab/csvapi/pull/77)
- Fix bug in filters w/ blanks in column names [#77](https://github.com/etalab/csvapi/pull/77)

## 1.1.0 (2021-03-23)

- Use aiosqlite [#76](https://github.com/etalab/csvapi/pull/76)

## 1.0.6 (2020-12-14)

- Better parsing fallback [#71](https://github.com/etalab/csvapi/pull/71)

## 1.0.5 (2020-11-17)

- Parsing view now raises exception on http error response codes [#69](https://github.com/etalab/csvapi/pull/69)

## 1.0.4 (2020-10-26)

- Protect custom type testers against None values [#66](https://github.com/etalab/csvapi/pull/66)
- Fix xlsx file support [#67](https://github.com/etalab/csvapi/pull/67)

## 1.0.3 (2020-03-04)

- Fix packaging problem

## 1.0.2 (2020-03-04)

- Fix XLS parsing [#60](https://github.com/etalab/csvapi/pull/60)

## 1.0.1 (2020-01-03)

- Fix aiohttp import [#52](https://github.com/etalab/csvapi/pull/52)

## 1.0.0 (2020-01-03)

- Add filters support [#50](https://github.com/etalab/csvapi/pull/50)
- Replace requests by aiohttp for asynchronous http requests. Also replace every format() string to use only f"strings. [#46](https://github.com/etalab/csvapi/pull/46)

## 0.1.0 (2019-09-06)

- Upgrade to Quart-0.9.1 :warning: requires python-3.7 [#21](https://github.com/opendatateam/csvapi/pull/21)
- Parse hours, SIREN and SIRET as text [#42](https://github.com/opendatateam/csvapi/pull/42)

## 0.0.9 (2019-01-18)

- Upgrade to Quart-0.6.6 and hypercorn-0.4.6 [#16](https://github.com/opendatateam/csvapi/pull/16)
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Copyright 2018 Alexandre Bulté
Copyright 2022 Etalab

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

21 changes: 15 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
@@ -2,9 +2,11 @@

"Instantly" publish an API for a CSV hosted anywhere on the internet. Also supports Excel files.

This tool is used by [data.gouv.fr](https://www.data.gouv.fr) to show a preview of hosted CSV and XLS files.

## Installation

Requires Python 3.6+ and a Unix OS with the `file` command available.
Requires Python 3.9+ and a Unix OS with the `file` command available.

```shell
python3 -m venv pyenv && . pyenv/bin/activate
@@ -14,26 +16,24 @@ pip install csvapi
For development:

```shell
pip install -r requirements/develop.pip
pip install -e .
poetry install
```

## Quickstart

```shell
csvapi serve -h 0.0.0.0 -p 8000
poetry run csvapi serve -h 0.0.0.0 -p 8000
```

## Command line options

```shell
$ csvapi serve --help
$ poetry run csvapi serve --help
Usage: csvapi serve [OPTIONS]

Options:
--ssl-key TEXT Path to SSL key
--ssl-cert TEXT Path to SSL certificate
-w, --max-workers INTEGER Max number of ThreadPoolExecutor workers
--cache / --no-cache Do not parse CSV again if DB already exists
--reload Automatically reload if code change detected
--debug Enable debug mode - useful for development
@@ -234,6 +234,15 @@ The `_total` argument is used to display or hide the total number of rows (indep
}
```
#### Column based filters
By adding `{column}__{comparator}={value}` to the query string, you can filter the results based on the following criterions:
- `{column}` must be a valid column in your CSV
- `{comparator}` is `exact` (SQL `= {value}`) or `contains` (SQL `LIKE %{value}%`)
- `{value}` is the value you're filtering the column against
You can add multiple filters, they will be joined with a `AND` at the SQL level.
## Credits
Inspired by the excellent [Datasette](https://github.com/simonw/datasette).
35 changes: 35 additions & 0 deletions benchmark/bench.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import http from 'k6/http';
import { sleep } from 'k6';
import crypto from 'k6/crypto';

export const options = {
vus: 10,
iterations: 20,
};

export default function () {
var toParse = "https://www.data.gouv.fr/fr/datasets/r/e3d83ab3-dc52-4c99-abaf-8a38050cc68c"
var base = "https://csvapi.data.gouv.fr";

// change me to invalidate cache
var rdm = "2";
let toApify = `${toParse}?ts=${rdm}`
let hash = crypto.md5(toApify, 'hex');
console.log(hash);

// apify 1
var apify = `${base}/apify?url=${toApify}`;
http.get(apify);

// analyze 1
var analyze = `${base}/apify?analysis=yes&url=${toApify}`;
http.get(analyze);

// make 10 requests
for (let id = 1; id <= 10; id++) {
http.get("https://csvapi.data.gouv.fr/api/26bdf0d090dfbaecbe213c6f551a46ac", {
tags: { name: 'request' },
});
sleep(0.1);
}
}
Loading