From 83d56e89d3a40baaf66ac1f288f2a460b3038fb4 Mon Sep 17 00:00:00 2001 From: Test User Date: Mon, 27 Jun 2022 10:45:17 +0000 Subject: [PATCH 01/14] =?UTF-8?q?feat:=20=F0=9F=8E=B8=20add=20basis=20for?= =?UTF-8?q?=20the=20docs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/build_pr_documentation.yml | 18 + docs/.python-version | 1 + docs/Makefile | 9 + docs/README.md | 70 +++ docs/poetry.lock | 500 +++++++++++++++++++ docs/poetry.toml | 2 + docs/pyproject.toml | 14 + docs/source/_toctree.yml | 5 + docs/source/index.mdx | 3 + 9 files changed, 622 insertions(+) create mode 100644 .github/workflows/build_pr_documentation.yml create mode 100644 docs/.python-version create mode 100644 docs/Makefile create mode 100644 docs/README.md create mode 100644 docs/poetry.lock create mode 100644 docs/poetry.toml create mode 100644 docs/pyproject.toml create mode 100644 docs/source/_toctree.yml create mode 100644 docs/source/index.mdx diff --git a/.github/workflows/build_pr_documentation.yml b/.github/workflows/build_pr_documentation.yml new file mode 100644 index 0000000000..7489b96954 --- /dev/null +++ b/.github/workflows/build_pr_documentation.yml @@ -0,0 +1,18 @@ +name: Build PR Documentation + +on: + pull_request: + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + build: + uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@main + with: + commit_sha: ${{ github.event.pull_request.head.sha }} + pr_number: ${{ github.event.number }} + package: datasets-server + path_to_docs: docs/source/ + additional_args: --not_python_module diff --git a/docs/.python-version b/docs/.python-version new file mode 100644 index 0000000000..1635d0f5a1 --- /dev/null +++ b/docs/.python-version @@ -0,0 +1 @@ +3.9.6 diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000000..8f591f2c6d --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,9 @@ +BUILD_DIR?=~/tmp/doc-datasets-server + +.PHONY: install +install: + poetry install + +.PHONY: build +build: + poetry run doc-builder build datasets-server source/ --build_dir $(BUILD_DIR) --not_python_module diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000000..bbd4131590 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,70 @@ + + +# Generating the documentation + +To generate the documentation, you first have to build it. Several packages are necessary to build the doc, +you can install them with the following command, in this directory: + +```bash +make install +``` + +--- + +**NOTE** + +You only need to generate the documentation to inspect it locally (if you're planning changes and want to +check how they look like before committing for instance). You don't have to commit the built documentation. + +--- + +## Building the documentation + +Once you have setup the `doc-builder` and additional packages, you can generate the documentation by typing the +following command: + +```bash +BUILD_DIR=/tmp/doc-datasets-server/ make build +``` + +You can adapt the `BUILD_DIR` environment variable to set any temporary folder that you prefer. This command will create it and generate +the MDX files that will be rendered as the documentation on the main website. You can inspect them in your favorite +Markdown editor. + +--- + +**NOTE** + +It's not possible to see locally how the final documentation will look like for now. Once you have opened a PR, you +will see a bot add a comment to a link where the documentation with your changes lives. + +--- + +## Adding a new element to the navigation bar + +Accepted files are Markdown (.md or .mdx). + +Create a file with its extension and put it in the source directory. You can then link it to the toc-tree by putting +the filename without the extension in the [`_toctree.yml`](https://github.com/huggingface/datasets-server/blob/main/docs/source/_toctree.yml) file. + +## Adding an image + +Due to the rapidly growing repository, it is important to make sure that no files that would significantly weigh down the repository are added. This includes images, videos and other non-text files. We prefer to leverage a hf.co hosted `dataset` like +the ones hosted on [`hf-internal-testing`](https://huggingface.co/hf-internal-testing) in which to place these files and reference +them by URL. We recommend putting them in the following dataset: [huggingface/documentation-images](https://huggingface.co/datasets/huggingface/documentation-images). +If an external contribution, feel free to add the images to your PR and ask a Hugging Face member to migrate your images +to this dataset. diff --git a/docs/poetry.lock b/docs/poetry.lock new file mode 100644 index 0000000000..90fc5ab0ad --- /dev/null +++ b/docs/poetry.lock @@ -0,0 +1,500 @@ +[[package]] +name = "attrs" +version = "21.4.0" +description = "Classes Without Boilerplate" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" + +[package.extras] +dev = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface", "furo", "sphinx", "sphinx-notfound-page", "pre-commit", "cloudpickle"] +docs = ["furo", "sphinx", "zope.interface", "sphinx-notfound-page"] +tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface", "cloudpickle"] +tests_no_zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "cloudpickle"] + +[[package]] +name = "black" +version = "22.3.0" +description = "The uncompromising code formatter." +category = "main" +optional = false +python-versions = ">=3.6.2" + +[package.dependencies] +click = ">=8.0.0" +mypy-extensions = ">=0.4.3" +pathspec = ">=0.9.0" +platformdirs = ">=2" +tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} +typing-extensions = {version = ">=3.10.0.0", markers = "python_version < \"3.10\""} + +[package.extras] +colorama = ["colorama (>=0.4.3)"] +d = ["aiohttp (>=3.7.4)"] +jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] +uvloop = ["uvloop (>=0.15.2)"] + +[[package]] +name = "click" +version = "8.1.3" +description = "Composable command line interface toolkit" +category = "main" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[[package]] +name = "colorama" +version = "0.4.5" +description = "Cross-platform colored terminal text." +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" + +[[package]] +name = "fastjsonschema" +version = "2.15.3" +description = "Fastest Python implementation of JSON schema" +category = "main" +optional = false +python-versions = "*" + +[package.extras] +devel = ["colorama", "jsonschema", "json-spec", "pylint", "pytest", "pytest-benchmark", "pytest-cache", "validictory"] + +[[package]] +name = "flake8" +version = "4.0.1" +description = "the modular source code checker: pep8 pyflakes and co" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +mccabe = ">=0.6.0,<0.7.0" +pycodestyle = ">=2.8.0,<2.9.0" +pyflakes = ">=2.4.0,<2.5.0" + +[[package]] +name = "hf-doc-builder" +version = "0.3.0" +description = "Doc building utility" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +black = {version = ">=22.0,<23.0", optional = true, markers = "extra == \"quality\""} +flake8 = {version = ">=3.8.3", optional = true, markers = "extra == \"quality\""} +isort = {version = ">=5.5.4", optional = true, markers = "extra == \"quality\""} +nbformat = "*" +packaging = "*" +pyyaml = "*" +tqdm = "*" + +[package.extras] +all = ["pytest", "pytest-xdist", "torch", "transformers", "tokenizers", "black (>=22.0,<23.0)", "isort (>=5.5.4)", "flake8 (>=3.8.3)"] +dev = ["pytest", "pytest-xdist", "torch", "transformers", "tokenizers", "black (>=22.0,<23.0)", "isort (>=5.5.4)", "flake8 (>=3.8.3)"] +quality = ["black (>=22.0,<23.0)", "isort (>=5.5.4)", "flake8 (>=3.8.3)"] +testing = ["pytest", "pytest-xdist", "torch", "transformers", "tokenizers"] +transformers = ["transformers"] + +[[package]] +name = "isort" +version = "5.10.1" +description = "A Python utility / library to sort Python imports." +category = "main" +optional = false +python-versions = ">=3.6.1,<4.0" + +[package.extras] +pipfile_deprecated_finder = ["pipreqs", "requirementslib"] +requirements_deprecated_finder = ["pipreqs", "pip-api"] +colors = ["colorama (>=0.4.3,<0.5.0)"] +plugins = ["setuptools"] + +[[package]] +name = "jsonschema" +version = "4.6.0" +description = "An implementation of JSON Schema validation for Python" +category = "main" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +attrs = ">=17.4.0" +pyrsistent = ">=0.14.0,<0.17.0 || >0.17.0,<0.17.1 || >0.17.1,<0.17.2 || >0.17.2" + +[package.extras] +format = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3987", "uri-template", "webcolors (>=1.11)"] +format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "uri-template", "webcolors (>=1.11)"] + +[[package]] +name = "jupyter-core" +version = "4.10.0" +description = "Jupyter core package. A base package on which Jupyter projects rely." +category = "main" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +pywin32 = {version = ">=1.0", markers = "sys_platform == \"win32\" and platform_python_implementation != \"PyPy\""} +traitlets = "*" + +[package.extras] +test = ["ipykernel", "pre-commit", "pytest", "pytest-cov", "pytest-timeout"] + +[[package]] +name = "mccabe" +version = "0.6.1" +description = "McCabe checker, plugin for flake8" +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "mypy-extensions" +version = "0.4.3" +description = "Experimental type system extensions for programs checked with the mypy typechecker." +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "nbformat" +version = "5.4.0" +description = "The Jupyter Notebook format" +category = "main" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +fastjsonschema = "*" +jsonschema = ">=2.6" +jupyter-core = "*" +traitlets = ">=5.1" + +[package.extras] +test = ["check-manifest", "testpath", "pytest", "pre-commit"] + +[[package]] +name = "packaging" +version = "21.3" +description = "Core utilities for Python packages" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +pyparsing = ">=2.0.2,<3.0.5 || >3.0.5" + +[[package]] +name = "pathspec" +version = "0.9.0" +description = "Utility library for gitignore style pattern matching of file paths." +category = "main" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" + +[[package]] +name = "platformdirs" +version = "2.5.2" +description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." +category = "main" +optional = false +python-versions = ">=3.7" + +[package.extras] +docs = ["furo (>=2021.7.5b38)", "proselint (>=0.10.2)", "sphinx-autodoc-typehints (>=1.12)", "sphinx (>=4)"] +test = ["appdirs (==1.4.4)", "pytest-cov (>=2.7)", "pytest-mock (>=3.6)", "pytest (>=6)"] + +[[package]] +name = "pycodestyle" +version = "2.8.0" +description = "Python style guide checker" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" + +[[package]] +name = "pyflakes" +version = "2.4.0" +description = "passive checker of Python programs" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" + +[[package]] +name = "pyparsing" +version = "3.0.9" +description = "pyparsing module - Classes and methods to define and execute parsing grammars" +category = "main" +optional = false +python-versions = ">=3.6.8" + +[package.extras] +diagrams = ["railroad-diagrams", "jinja2"] + +[[package]] +name = "pyrsistent" +version = "0.18.1" +description = "Persistent/Functional/Immutable data structures" +category = "main" +optional = false +python-versions = ">=3.7" + +[[package]] +name = "pywin32" +version = "304" +description = "Python for Window Extensions" +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "pyyaml" +version = "6.0" +description = "YAML parser and emitter for Python" +category = "main" +optional = false +python-versions = ">=3.6" + +[[package]] +name = "tomli" +version = "2.0.1" +description = "A lil' TOML parser" +category = "main" +optional = false +python-versions = ">=3.7" + +[[package]] +name = "tqdm" +version = "4.64.0" +description = "Fast, Extensible Progress Meter" +category = "main" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7" + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[package.extras] +dev = ["py-make (>=0.1.0)", "twine", "wheel"] +notebook = ["ipywidgets (>=6)"] +slack = ["slack-sdk"] +telegram = ["requests"] + +[[package]] +name = "traitlets" +version = "5.3.0" +description = "" +category = "main" +optional = false +python-versions = ">=3.7" + +[package.extras] +test = ["pre-commit", "pytest"] + +[[package]] +name = "typing-extensions" +version = "4.2.0" +description = "Backported and Experimental Type Hints for Python 3.7+" +category = "main" +optional = false +python-versions = ">=3.7" + +[metadata] +lock-version = "1.1" +python-versions = "3.9.6" +content-hash = "dd1c84abd84085c6bc252330ff981c1724fefc33f49d52a8f6f1c8c3a75d29a1" + +[metadata.files] +attrs = [ + {file = "attrs-21.4.0-py2.py3-none-any.whl", hash = "sha256:2d27e3784d7a565d36ab851fe94887c5eccd6a463168875832a1be79c82828b4"}, + {file = "attrs-21.4.0.tar.gz", hash = "sha256:626ba8234211db98e869df76230a137c4c40a12d72445c45d5f5b716f076e2fd"}, +] +black = [ + {file = "black-22.3.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:2497f9c2386572e28921fa8bec7be3e51de6801f7459dffd6e62492531c47e09"}, + {file = "black-22.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5795a0375eb87bfe902e80e0c8cfaedf8af4d49694d69161e5bd3206c18618bb"}, + {file = "black-22.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e3556168e2e5c49629f7b0f377070240bd5511e45e25a4497bb0073d9dda776a"}, + {file = "black-22.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:67c8301ec94e3bcc8906740fe071391bce40a862b7be0b86fb5382beefecd968"}, + {file = "black-22.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:fd57160949179ec517d32ac2ac898b5f20d68ed1a9c977346efbac9c2f1e779d"}, + {file = "black-22.3.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:cc1e1de68c8e5444e8f94c3670bb48a2beef0e91dddfd4fcc29595ebd90bb9ce"}, + {file = "black-22.3.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d2fc92002d44746d3e7db7cf9313cf4452f43e9ea77a2c939defce3b10b5c82"}, + {file = "black-22.3.0-cp36-cp36m-win_amd64.whl", hash = "sha256:a6342964b43a99dbc72f72812bf88cad8f0217ae9acb47c0d4f141a6416d2d7b"}, + {file = "black-22.3.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:328efc0cc70ccb23429d6be184a15ce613f676bdfc85e5fe8ea2a9354b4e9015"}, + {file = "black-22.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:06f9d8846f2340dfac80ceb20200ea5d1b3f181dd0556b47af4e8e0b24fa0a6b"}, + {file = "black-22.3.0-cp37-cp37m-win_amd64.whl", hash = "sha256:ad4efa5fad66b903b4a5f96d91461d90b9507a812b3c5de657d544215bb7877a"}, + {file = "black-22.3.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:e8477ec6bbfe0312c128e74644ac8a02ca06bcdb8982d4ee06f209be28cdf163"}, + {file = "black-22.3.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:637a4014c63fbf42a692d22b55d8ad6968a946b4a6ebc385c5505d9625b6a464"}, + {file = "black-22.3.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:863714200ada56cbc366dc9ae5291ceb936573155f8bf8e9de92aef51f3ad0f0"}, + {file = "black-22.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:10dbe6e6d2988049b4655b2b739f98785a884d4d6b85bc35133a8fb9a2233176"}, + {file = "black-22.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:cee3e11161dde1b2a33a904b850b0899e0424cc331b7295f2a9698e79f9a69a0"}, + {file = "black-22.3.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5891ef8abc06576985de8fa88e95ab70641de6c1fca97e2a15820a9b69e51b20"}, + {file = "black-22.3.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:30d78ba6bf080eeaf0b7b875d924b15cd46fec5fd044ddfbad38c8ea9171043a"}, + {file = "black-22.3.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ee8f1f7228cce7dffc2b464f07ce769f478968bfb3dd1254a4c2eeed84928aad"}, + {file = "black-22.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6ee227b696ca60dd1c507be80a6bc849a5a6ab57ac7352aad1ffec9e8b805f21"}, + {file = "black-22.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:9b542ced1ec0ceeff5b37d69838106a6348e60db7b8fdd245294dc1d26136265"}, + {file = "black-22.3.0-py3-none-any.whl", hash = "sha256:bc58025940a896d7e5356952228b68f793cf5fcb342be703c3a2669a1488cb72"}, + {file = "black-22.3.0.tar.gz", hash = "sha256:35020b8886c022ced9282b51b5a875b6d1ab0c387b31a065b84db7c33085ca79"}, +] +click = [ + {file = "click-8.1.3-py3-none-any.whl", hash = "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"}, + {file = "click-8.1.3.tar.gz", hash = "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e"}, +] +colorama = [ + {file = "colorama-0.4.5-py2.py3-none-any.whl", hash = "sha256:854bf444933e37f5824ae7bfc1e98d5bce2ebe4160d46b5edf346a89358e99da"}, + {file = "colorama-0.4.5.tar.gz", hash = "sha256:e6c6b4334fc50988a639d9b98aa429a0b57da6e17b9a44f0451f930b6967b7a4"}, +] +fastjsonschema = [ + {file = "fastjsonschema-2.15.3-py3-none-any.whl", hash = "sha256:ddb0b1d8243e6e3abb822bd14e447a89f4ab7439342912d590444831fa00b6a0"}, + {file = "fastjsonschema-2.15.3.tar.gz", hash = "sha256:0a572f0836962d844c1fc435e200b2e4f4677e4e6611a2e3bdd01ba697c275ec"}, +] +flake8 = [ + {file = "flake8-4.0.1-py2.py3-none-any.whl", hash = "sha256:479b1304f72536a55948cb40a32dce8bb0ffe3501e26eaf292c7e60eb5e0428d"}, + {file = "flake8-4.0.1.tar.gz", hash = "sha256:806e034dda44114815e23c16ef92f95c91e4c71100ff52813adf7132a6ad870d"}, +] +hf-doc-builder = [ + {file = "hf-doc-builder-0.3.0.tar.gz", hash = "sha256:d9a632c8afd01debc24e47f215505a4ec5bbee86f8ec04781b32b3d54d2de266"}, + {file = "hf_doc_builder-0.3.0-py3-none-any.whl", hash = "sha256:d6a91d3e248d6b227a5314d146d8a4c421f1cd0d411216d13c7461703ca9263c"}, +] +isort = [ + {file = "isort-5.10.1-py3-none-any.whl", hash = "sha256:6f62d78e2f89b4500b080fe3a81690850cd254227f27f75c3a0c491a1f351ba7"}, + {file = "isort-5.10.1.tar.gz", hash = "sha256:e8443a5e7a020e9d7f97f1d7d9cd17c88bcb3bc7e218bf9cf5095fe550be2951"}, +] +jsonschema = [ + {file = "jsonschema-4.6.0-py3-none-any.whl", hash = "sha256:1c92d2db1900b668201f1797887d66453ab1fbfea51df8e4b46236689c427baf"}, + {file = "jsonschema-4.6.0.tar.gz", hash = "sha256:9d6397ba4a6c0bf0300736057f649e3e12ecbc07d3e81a0dacb72de4e9801957"}, +] +jupyter-core = [ + {file = "jupyter_core-4.10.0-py3-none-any.whl", hash = "sha256:e7f5212177af7ab34179690140f188aa9bf3d322d8155ed972cbded19f55b6f3"}, + {file = "jupyter_core-4.10.0.tar.gz", hash = "sha256:a6de44b16b7b31d7271130c71a6792c4040f077011961138afed5e5e73181aec"}, +] +mccabe = [ + {file = "mccabe-0.6.1-py2.py3-none-any.whl", hash = "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42"}, + {file = "mccabe-0.6.1.tar.gz", hash = "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f"}, +] +mypy-extensions = [ + {file = "mypy_extensions-0.4.3-py2.py3-none-any.whl", hash = "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d"}, + {file = "mypy_extensions-0.4.3.tar.gz", hash = "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8"}, +] +nbformat = [ + {file = "nbformat-5.4.0-py3-none-any.whl", hash = "sha256:0d6072aaec95dddc39735c144ee8bbc6589c383fb462e4058abc855348152dad"}, + {file = "nbformat-5.4.0.tar.gz", hash = "sha256:44ba5ca6acb80c5d5a500f1e5b83ede8cbe364d5a495c4c8cf60aaf1ba656501"}, +] +packaging = [ + {file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"}, + {file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"}, +] +pathspec = [ + {file = "pathspec-0.9.0-py2.py3-none-any.whl", hash = "sha256:7d15c4ddb0b5c802d161efc417ec1a2558ea2653c2e8ad9c19098201dc1c993a"}, + {file = "pathspec-0.9.0.tar.gz", hash = "sha256:e564499435a2673d586f6b2130bb5b95f04a3ba06f81b8f895b651a3c76aabb1"}, +] +platformdirs = [ + {file = "platformdirs-2.5.2-py3-none-any.whl", hash = "sha256:027d8e83a2d7de06bbac4e5ef7e023c02b863d7ea5d079477e722bb41ab25788"}, + {file = "platformdirs-2.5.2.tar.gz", hash = "sha256:58c8abb07dcb441e6ee4b11d8df0ac856038f944ab98b7be6b27b2a3c7feef19"}, +] +pycodestyle = [ + {file = "pycodestyle-2.8.0-py2.py3-none-any.whl", hash = "sha256:720f8b39dde8b293825e7ff02c475f3077124006db4f440dcbc9a20b76548a20"}, + {file = "pycodestyle-2.8.0.tar.gz", hash = "sha256:eddd5847ef438ea1c7870ca7eb78a9d47ce0cdb4851a5523949f2601d0cbbe7f"}, +] +pyflakes = [ + {file = "pyflakes-2.4.0-py2.py3-none-any.whl", hash = "sha256:3bb3a3f256f4b7968c9c788781e4ff07dce46bdf12339dcda61053375426ee2e"}, + {file = "pyflakes-2.4.0.tar.gz", hash = "sha256:05a85c2872edf37a4ed30b0cce2f6093e1d0581f8c19d7393122da7e25b2b24c"}, +] +pyparsing = [ + {file = "pyparsing-3.0.9-py3-none-any.whl", hash = "sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc"}, + {file = "pyparsing-3.0.9.tar.gz", hash = "sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb"}, +] +pyrsistent = [ + {file = "pyrsistent-0.18.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:df46c854f490f81210870e509818b729db4488e1f30f2a1ce1698b2295a878d1"}, + {file = "pyrsistent-0.18.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d45866ececf4a5fff8742c25722da6d4c9e180daa7b405dc0a2a2790d668c26"}, + {file = "pyrsistent-0.18.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4ed6784ceac462a7d6fcb7e9b663e93b9a6fb373b7f43594f9ff68875788e01e"}, + {file = "pyrsistent-0.18.1-cp310-cp310-win32.whl", hash = "sha256:e4f3149fd5eb9b285d6bfb54d2e5173f6a116fe19172686797c056672689daf6"}, + {file = "pyrsistent-0.18.1-cp310-cp310-win_amd64.whl", hash = "sha256:636ce2dc235046ccd3d8c56a7ad54e99d5c1cd0ef07d9ae847306c91d11b5fec"}, + {file = "pyrsistent-0.18.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e92a52c166426efbe0d1ec1332ee9119b6d32fc1f0bbfd55d5c1088070e7fc1b"}, + {file = "pyrsistent-0.18.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d7a096646eab884bf8bed965bad63ea327e0d0c38989fc83c5ea7b8a87037bfc"}, + {file = "pyrsistent-0.18.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cdfd2c361b8a8e5d9499b9082b501c452ade8bbf42aef97ea04854f4a3f43b22"}, + {file = "pyrsistent-0.18.1-cp37-cp37m-win32.whl", hash = "sha256:7ec335fc998faa4febe75cc5268a9eac0478b3f681602c1f27befaf2a1abe1d8"}, + {file = "pyrsistent-0.18.1-cp37-cp37m-win_amd64.whl", hash = "sha256:6455fc599df93d1f60e1c5c4fe471499f08d190d57eca040c0ea182301321286"}, + {file = "pyrsistent-0.18.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:fd8da6d0124efa2f67d86fa70c851022f87c98e205f0594e1fae044e7119a5a6"}, + {file = "pyrsistent-0.18.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7bfe2388663fd18bd8ce7db2c91c7400bf3e1a9e8bd7d63bf7e77d39051b85ec"}, + {file = "pyrsistent-0.18.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0e3e1fcc45199df76053026a51cc59ab2ea3fc7c094c6627e93b7b44cdae2c8c"}, + {file = "pyrsistent-0.18.1-cp38-cp38-win32.whl", hash = "sha256:b568f35ad53a7b07ed9b1b2bae09eb15cdd671a5ba5d2c66caee40dbf91c68ca"}, + {file = "pyrsistent-0.18.1-cp38-cp38-win_amd64.whl", hash = "sha256:d1b96547410f76078eaf66d282ddca2e4baae8964364abb4f4dcdde855cd123a"}, + {file = "pyrsistent-0.18.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:f87cc2863ef33c709e237d4b5f4502a62a00fab450c9e020892e8e2ede5847f5"}, + {file = "pyrsistent-0.18.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6bc66318fb7ee012071b2792024564973ecc80e9522842eb4e17743604b5e045"}, + {file = "pyrsistent-0.18.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:914474c9f1d93080338ace89cb2acee74f4f666fb0424896fcfb8d86058bf17c"}, + {file = "pyrsistent-0.18.1-cp39-cp39-win32.whl", hash = "sha256:1b34eedd6812bf4d33814fca1b66005805d3640ce53140ab8bbb1e2651b0d9bc"}, + {file = "pyrsistent-0.18.1-cp39-cp39-win_amd64.whl", hash = "sha256:e24a828f57e0c337c8d8bb9f6b12f09dfdf0273da25fda9e314f0b684b415a07"}, + {file = "pyrsistent-0.18.1.tar.gz", hash = "sha256:d4d61f8b993a7255ba714df3aca52700f8125289f84f704cf80916517c46eb96"}, +] +pywin32 = [ + {file = "pywin32-304-cp310-cp310-win32.whl", hash = "sha256:3c7bacf5e24298c86314f03fa20e16558a4e4138fc34615d7de4070c23e65af3"}, + {file = "pywin32-304-cp310-cp310-win_amd64.whl", hash = "sha256:4f32145913a2447736dad62495199a8e280a77a0ca662daa2332acf849f0be48"}, + {file = "pywin32-304-cp310-cp310-win_arm64.whl", hash = "sha256:d3ee45adff48e0551d1aa60d2ec066fec006083b791f5c3527c40cd8aefac71f"}, + {file = "pywin32-304-cp311-cp311-win32.whl", hash = "sha256:30c53d6ce44c12a316a06c153ea74152d3b1342610f1b99d40ba2795e5af0269"}, + {file = "pywin32-304-cp311-cp311-win_amd64.whl", hash = "sha256:7ffa0c0fa4ae4077e8b8aa73800540ef8c24530057768c3ac57c609f99a14fd4"}, + {file = "pywin32-304-cp311-cp311-win_arm64.whl", hash = "sha256:cbbe34dad39bdbaa2889a424d28752f1b4971939b14b1bb48cbf0182a3bcfc43"}, + {file = "pywin32-304-cp36-cp36m-win32.whl", hash = "sha256:be253e7b14bc601718f014d2832e4c18a5b023cbe72db826da63df76b77507a1"}, + {file = "pywin32-304-cp36-cp36m-win_amd64.whl", hash = "sha256:de9827c23321dcf43d2f288f09f3b6d772fee11e809015bdae9e69fe13213988"}, + {file = "pywin32-304-cp37-cp37m-win32.whl", hash = "sha256:f64c0377cf01b61bd5e76c25e1480ca8ab3b73f0c4add50538d332afdf8f69c5"}, + {file = "pywin32-304-cp37-cp37m-win_amd64.whl", hash = "sha256:bb2ea2aa81e96eee6a6b79d87e1d1648d3f8b87f9a64499e0b92b30d141e76df"}, + {file = "pywin32-304-cp38-cp38-win32.whl", hash = "sha256:94037b5259701988954931333aafd39cf897e990852115656b014ce72e052e96"}, + {file = "pywin32-304-cp38-cp38-win_amd64.whl", hash = "sha256:ead865a2e179b30fb717831f73cf4373401fc62fbc3455a0889a7ddac848f83e"}, + {file = "pywin32-304-cp39-cp39-win32.whl", hash = "sha256:25746d841201fd9f96b648a248f731c1dec851c9a08b8e33da8b56148e4c65cc"}, + {file = "pywin32-304-cp39-cp39-win_amd64.whl", hash = "sha256:d24a3382f013b21aa24a5cfbfad5a2cd9926610c0affde3e8ab5b3d7dbcf4ac9"}, +] +pyyaml = [ + {file = "PyYAML-6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53"}, + {file = "PyYAML-6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c"}, + {file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc"}, + {file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b"}, + {file = "PyYAML-6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5"}, + {file = "PyYAML-6.0-cp310-cp310-win32.whl", hash = "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513"}, + {file = "PyYAML-6.0-cp310-cp310-win_amd64.whl", hash = "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a"}, + {file = "PyYAML-6.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86"}, + {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f"}, + {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92"}, + {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4"}, + {file = "PyYAML-6.0-cp36-cp36m-win32.whl", hash = "sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293"}, + {file = "PyYAML-6.0-cp36-cp36m-win_amd64.whl", hash = "sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57"}, + {file = "PyYAML-6.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c"}, + {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0"}, + {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0ce82d761c532fe4ec3f87fc45688bdd3a4c1dc5e0b4a19814b9009a29baefd4"}, + {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:231710d57adfd809ef5d34183b8ed1eeae3f76459c18fb4a0b373ad56bedcdd9"}, + {file = "PyYAML-6.0-cp37-cp37m-win32.whl", hash = "sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737"}, + {file = "PyYAML-6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d"}, + {file = "PyYAML-6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0b4624f379dab24d3725ffde76559cff63d9ec94e1736b556dacdfebe5ab6d4b"}, + {file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:213c60cd50106436cc818accf5baa1aba61c0189ff610f64f4a3e8c6726218ba"}, + {file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34"}, + {file = "PyYAML-6.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287"}, + {file = "PyYAML-6.0-cp38-cp38-win32.whl", hash = "sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78"}, + {file = "PyYAML-6.0-cp38-cp38-win_amd64.whl", hash = "sha256:1e4747bc279b4f613a09eb64bba2ba602d8a6664c6ce6396a4d0cd413a50ce07"}, + {file = "PyYAML-6.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b"}, + {file = "PyYAML-6.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174"}, + {file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803"}, + {file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3"}, + {file = "PyYAML-6.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0"}, + {file = "PyYAML-6.0-cp39-cp39-win32.whl", hash = "sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb"}, + {file = "PyYAML-6.0-cp39-cp39-win_amd64.whl", hash = "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c"}, + {file = "PyYAML-6.0.tar.gz", hash = "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2"}, +] +tomli = [ + {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, + {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, +] +tqdm = [ + {file = "tqdm-4.64.0-py2.py3-none-any.whl", hash = "sha256:74a2cdefe14d11442cedf3ba4e21a3b84ff9a2dbdc6cfae2c34addb2a14a5ea6"}, + {file = "tqdm-4.64.0.tar.gz", hash = "sha256:40be55d30e200777a307a7585aee69e4eabb46b4ec6a4b4a5f2d9f11e7d5408d"}, +] +traitlets = [ + {file = "traitlets-5.3.0-py3-none-any.whl", hash = "sha256:65fa18961659635933100db8ca120ef6220555286949774b9cfc106f941d1c7a"}, + {file = "traitlets-5.3.0.tar.gz", hash = "sha256:0bb9f1f9f017aa8ec187d8b1b2a7a6626a2a1d877116baba52a129bfa124f8e2"}, +] +typing-extensions = [ + {file = "typing_extensions-4.2.0-py3-none-any.whl", hash = "sha256:6657594ee297170d19f67d55c05852a874e7eb634f4f753dbd667855e07c1708"}, + {file = "typing_extensions-4.2.0.tar.gz", hash = "sha256:f1c24655a0da0d1b67f07e17a5e6b2a105894e6824b92096378bb3668ef02376"}, +] diff --git a/docs/poetry.toml b/docs/poetry.toml new file mode 100644 index 0000000000..ab1033bd37 --- /dev/null +++ b/docs/poetry.toml @@ -0,0 +1,2 @@ +[virtualenvs] +in-project = true diff --git a/docs/pyproject.toml b/docs/pyproject.toml new file mode 100644 index 0000000000..fc6088ca02 --- /dev/null +++ b/docs/pyproject.toml @@ -0,0 +1,14 @@ +[tool.poetry] +authors = ["Sylvain Lesage "] +description = "Documentation for datasets-server" +name = "datasets-server-doc" +version = "0.1.0" + +[tool.poetry.dependencies] +#hf-doc-builder = { git = "https://github.com/huggingface/doc-builder.git", rev = "a3c770b97dc447e6b53cb43230ba56009e73b43a", extras = ["quality"] } +hf-doc-builder = { extras = ["quality"], version = "0.3.0" } +python = "3.9.6" + +[build-system] +build-backend = "poetry.core.masonry.api" +requires = ["poetry-core>=1.0.0"] diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml new file mode 100644 index 0000000000..5018c48a73 --- /dev/null +++ b/docs/source/_toctree.yml @@ -0,0 +1,5 @@ + +- sections: + - local: index + title: ๐Ÿค— Datasets server + title: Get started diff --git a/docs/source/index.mdx b/docs/source/index.mdx new file mode 100644 index 0000000000..cbe429640d --- /dev/null +++ b/docs/source/index.mdx @@ -0,0 +1,3 @@ +# Datasets server + +๐Ÿค— Datasets server stores the hub datasets, and provides an API to query their contents, metadata and basic statistics. From b5655f883b38b463589b170a6ec0009db06ebe0b Mon Sep 17 00:00:00 2001 From: Test User Date: Mon, 27 Jun 2022 10:48:35 +0000 Subject: [PATCH 02/14] =?UTF-8?q?ci:=20=F0=9F=8E=A1=20fix=20the=20path?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/build_pr_documentation.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_pr_documentation.yml b/.github/workflows/build_pr_documentation.yml index 7489b96954..cb4572d0bc 100644 --- a/.github/workflows/build_pr_documentation.yml +++ b/.github/workflows/build_pr_documentation.yml @@ -14,5 +14,5 @@ jobs: commit_sha: ${{ github.event.pull_request.head.sha }} pr_number: ${{ github.event.number }} package: datasets-server - path_to_docs: docs/source/ + path_to_docs: datasets-server/docs/source/ additional_args: --not_python_module From 5ef321b5bdec081364752d3e86bdd15b0dc4e57f Mon Sep 17 00:00:00 2001 From: Test User Date: Mon, 27 Jun 2022 13:26:53 +0000 Subject: [PATCH 03/14] =?UTF-8?q?ci:=20=F0=9F=8E=A1=20add=20delete=20doc?= =?UTF-8?q?=20comment=20workflow?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/build_pr_documentation.yml | 1 - .github/workflows/delete_doc_comment.yml | 13 +++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/delete_doc_comment.yml diff --git a/.github/workflows/build_pr_documentation.yml b/.github/workflows/build_pr_documentation.yml index cb4572d0bc..351abfe11e 100644 --- a/.github/workflows/build_pr_documentation.yml +++ b/.github/workflows/build_pr_documentation.yml @@ -14,5 +14,4 @@ jobs: commit_sha: ${{ github.event.pull_request.head.sha }} pr_number: ${{ github.event.number }} package: datasets-server - path_to_docs: datasets-server/docs/source/ additional_args: --not_python_module diff --git a/.github/workflows/delete_doc_comment.yml b/.github/workflows/delete_doc_comment.yml new file mode 100644 index 0000000000..e42b2ee069 --- /dev/null +++ b/.github/workflows/delete_doc_comment.yml @@ -0,0 +1,13 @@ +name: Delete dev documentation + +on: + pull_request: + types: [ closed ] + + +jobs: + delete: + uses: huggingface/doc-builder/.github/workflows/delete_doc_comment.yml@main + with: + pr_number: ${{ github.event.number }} + package: datasets-server From 913f6328aaab0253086290585f6f5dd03bb670ef Mon Sep 17 00:00:00 2001 From: Test User Date: Mon, 27 Jun 2022 14:10:03 +0000 Subject: [PATCH 04/14] =?UTF-8?q?chore:=20=F0=9F=A4=96=20add=20make=20prev?= =?UTF-8?q?iew=20for=20the=20docs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/Makefile | 4 ++++ docs/poetry.lock | 40 +++++++++++++++++++++++++++++++++++++++- docs/pyproject.toml | 2 +- 3 files changed, 44 insertions(+), 2 deletions(-) diff --git a/docs/Makefile b/docs/Makefile index 8f591f2c6d..78667b4280 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -7,3 +7,7 @@ install: .PHONY: build build: poetry run doc-builder build datasets-server source/ --build_dir $(BUILD_DIR) --not_python_module + +.PHONY: preview +preview: + poetry run doc-builder preview datasets-server source/ --not_python_module diff --git a/docs/poetry.lock b/docs/poetry.lock index 90fc5ab0ad..0bb72faa35 100644 --- a/docs/poetry.lock +++ b/docs/poetry.lock @@ -305,10 +305,21 @@ category = "main" optional = false python-versions = ">=3.7" +[[package]] +name = "watchdog" +version = "2.1.9" +description = "Filesystem events monitoring" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.extras] +watchmedo = ["PyYAML (>=3.10)"] + [metadata] lock-version = "1.1" python-versions = "3.9.6" -content-hash = "dd1c84abd84085c6bc252330ff981c1724fefc33f49d52a8f6f1c8c3a75d29a1" +content-hash = "c604ab0487eae9671f12cfba89dc09542868ff1302fc5c9e4c1be27a8e29b578" [metadata.files] attrs = [ @@ -498,3 +509,30 @@ typing-extensions = [ {file = "typing_extensions-4.2.0-py3-none-any.whl", hash = "sha256:6657594ee297170d19f67d55c05852a874e7eb634f4f753dbd667855e07c1708"}, {file = "typing_extensions-4.2.0.tar.gz", hash = "sha256:f1c24655a0da0d1b67f07e17a5e6b2a105894e6824b92096378bb3668ef02376"}, ] +watchdog = [ + {file = "watchdog-2.1.9-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a735a990a1095f75ca4f36ea2ef2752c99e6ee997c46b0de507ba40a09bf7330"}, + {file = "watchdog-2.1.9-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6b17d302850c8d412784d9246cfe8d7e3af6bcd45f958abb2d08a6f8bedf695d"}, + {file = "watchdog-2.1.9-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ee3e38a6cc050a8830089f79cbec8a3878ec2fe5160cdb2dc8ccb6def8552658"}, + {file = "watchdog-2.1.9-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:64a27aed691408a6abd83394b38503e8176f69031ca25d64131d8d640a307591"}, + {file = "watchdog-2.1.9-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:195fc70c6e41237362ba720e9aaf394f8178bfc7fa68207f112d108edef1af33"}, + {file = "watchdog-2.1.9-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:bfc4d351e6348d6ec51df007432e6fe80adb53fd41183716017026af03427846"}, + {file = "watchdog-2.1.9-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8250546a98388cbc00c3ee3cc5cf96799b5a595270dfcfa855491a64b86ef8c3"}, + {file = "watchdog-2.1.9-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:117ffc6ec261639a0209a3252546b12800670d4bf5f84fbd355957a0595fe654"}, + {file = "watchdog-2.1.9-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:97f9752208f5154e9e7b76acc8c4f5a58801b338de2af14e7e181ee3b28a5d39"}, + {file = "watchdog-2.1.9-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:247dcf1df956daa24828bfea5a138d0e7a7c98b1a47cf1fa5b0c3c16241fcbb7"}, + {file = "watchdog-2.1.9-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:226b3c6c468ce72051a4c15a4cc2ef317c32590d82ba0b330403cafd98a62cfd"}, + {file = "watchdog-2.1.9-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:d9820fe47c20c13e3c9dd544d3706a2a26c02b2b43c993b62fcd8011bcc0adb3"}, + {file = "watchdog-2.1.9-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:70af927aa1613ded6a68089a9262a009fbdf819f46d09c1a908d4b36e1ba2b2d"}, + {file = "watchdog-2.1.9-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:ed80a1628cee19f5cfc6bb74e173f1b4189eb532e705e2a13e3250312a62e0c9"}, + {file = "watchdog-2.1.9-py3-none-manylinux2014_aarch64.whl", hash = "sha256:9f05a5f7c12452f6a27203f76779ae3f46fa30f1dd833037ea8cbc2887c60213"}, + {file = "watchdog-2.1.9-py3-none-manylinux2014_armv7l.whl", hash = "sha256:255bb5758f7e89b1a13c05a5bceccec2219f8995a3a4c4d6968fe1de6a3b2892"}, + {file = "watchdog-2.1.9-py3-none-manylinux2014_i686.whl", hash = "sha256:d3dda00aca282b26194bdd0adec21e4c21e916956d972369359ba63ade616153"}, + {file = "watchdog-2.1.9-py3-none-manylinux2014_ppc64.whl", hash = "sha256:186f6c55abc5e03872ae14c2f294a153ec7292f807af99f57611acc8caa75306"}, + {file = "watchdog-2.1.9-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:083171652584e1b8829581f965b9b7723ca5f9a2cd7e20271edf264cfd7c1412"}, + {file = "watchdog-2.1.9-py3-none-manylinux2014_s390x.whl", hash = "sha256:b530ae007a5f5d50b7fbba96634c7ee21abec70dc3e7f0233339c81943848dc1"}, + {file = "watchdog-2.1.9-py3-none-manylinux2014_x86_64.whl", hash = "sha256:4f4e1c4aa54fb86316a62a87b3378c025e228178d55481d30d857c6c438897d6"}, + {file = "watchdog-2.1.9-py3-none-win32.whl", hash = "sha256:5952135968519e2447a01875a6f5fc8c03190b24d14ee52b0f4b1682259520b1"}, + {file = "watchdog-2.1.9-py3-none-win_amd64.whl", hash = "sha256:7a833211f49143c3d336729b0020ffd1274078e94b0ae42e22f596999f50279c"}, + {file = "watchdog-2.1.9-py3-none-win_ia64.whl", hash = "sha256:ad576a565260d8f99d97f2e64b0f97a48228317095908568a9d5c786c829d428"}, + {file = "watchdog-2.1.9.tar.gz", hash = "sha256:43ce20ebb36a51f21fa376f76d1d4692452b2527ccd601950d69ed36b9e21609"}, +] diff --git a/docs/pyproject.toml b/docs/pyproject.toml index fc6088ca02..2038e72974 100644 --- a/docs/pyproject.toml +++ b/docs/pyproject.toml @@ -5,9 +5,9 @@ name = "datasets-server-doc" version = "0.1.0" [tool.poetry.dependencies] -#hf-doc-builder = { git = "https://github.com/huggingface/doc-builder.git", rev = "a3c770b97dc447e6b53cb43230ba56009e73b43a", extras = ["quality"] } hf-doc-builder = { extras = ["quality"], version = "0.3.0" } python = "3.9.6" +watchdog = "^2.1.9" [build-system] build-backend = "poetry.core.masonry.api" From 45c96e2cc0788adf0f67fce105edc9268cc27c09 Mon Sep 17 00:00:00 2001 From: Test User Date: Mon, 27 Jun 2022 14:12:29 +0000 Subject: [PATCH 05/14] =?UTF-8?q?docs:=20=E2=9C=8F=EF=B8=8F=20update=20the?= =?UTF-8?q?=20docs/=20README?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/README.md | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/docs/README.md b/docs/README.md index bbd4131590..cc621e9cbf 100644 --- a/docs/README.md +++ b/docs/README.md @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. --> -# Generating the documentation +# Generate the documentation To generate the documentation, you first have to build it. Several packages are necessary to build the doc, you can install them with the following command, in this directory: @@ -32,11 +32,21 @@ check how they look like before committing for instance). You don't have to comm --- -## Building the documentation +## Preview the documentation -Once you have setup the `doc-builder` and additional packages, you can generate the documentation by typing the +Once you have setup the `doc-builder` and additional packages, you can preview the documentation by typing the following command: +```bash +make preview +``` + +The documentation is available at http://localhost:3000/. + +## Build the documentation + +To build the documentation, launch: + ```bash BUILD_DIR=/tmp/doc-datasets-server/ make build ``` From c9ae7d07cd522e7534a13988ba2e31da207db83c Mon Sep 17 00:00:00 2001 From: Test User Date: Mon, 27 Jun 2022 15:29:06 +0000 Subject: [PATCH 06/14] =?UTF-8?q?docs:=20=E2=9C=8F=EF=B8=8F=20list=20of=20?= =?UTF-8?q?endpoints?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/source/index.mdx | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/docs/source/index.mdx b/docs/source/index.mdx index cbe429640d..0191f48adb 100644 --- a/docs/source/index.mdx +++ b/docs/source/index.mdx @@ -1,3 +1,15 @@ # Datasets server -๐Ÿค— Datasets server stores the hub datasets, and provides an API to query their contents, metadata and basic statistics. +The ๐Ÿค— datasets server gives access, via a REST API, to the contents, metadata and basic statistics of the [Hugging Face Hub datasets](https://huggingface.co/datasets). + +## REST API + +The base URL of the REST API is https://datasets-server.huggingface.co. + +For now, it provides the following endpoints for the [dataset viewer](https://huggingface.co/docs/hub/datasets-viewer) on the Hub: + +| Endpoint | Description | Query parameters | +| --- | --- | --- | +| /valid GET | Get the list of datasets hosted in the Hub and supported by the datasets server. | | +| /splits GET | Get the list of configurations and splits of a dataset. | - `dataset`: name of the dataset | +| /rows GET | Get the first rows of a dataset split. | - `dataset`: name of the dataset - `config`: name of the config - `split`: name of the split | From 30caa8f65c77665f53987e074f4d4f73b895a5b1 Mon Sep 17 00:00:00 2001 From: Test User Date: Mon, 27 Jun 2022 16:53:17 +0000 Subject: [PATCH 07/14] =?UTF-8?q?docs:=20=E2=9C=8F=EF=B8=8F=20write=20docu?= =?UTF-8?q?mentation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/source/_toctree.yml | 8 ++ docs/source/api_reference.mdx | 11 +++ docs/source/index.mdx | 18 ++--- docs/source/rows.mdx | 146 ++++++++++++++++++++++++++++++++++ docs/source/splits.mdx | 75 +++++++++++++++++ docs/source/valid.mdx | 23 ++++++ 6 files changed, 272 insertions(+), 9 deletions(-) create mode 100644 docs/source/api_reference.mdx create mode 100644 docs/source/rows.mdx create mode 100644 docs/source/splits.mdx create mode 100644 docs/source/valid.mdx diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml index 5018c48a73..1653de13ae 100644 --- a/docs/source/_toctree.yml +++ b/docs/source/_toctree.yml @@ -2,4 +2,12 @@ - sections: - local: index title: ๐Ÿค— Datasets server + - local: splits + title: Splits + - local: rows + title: First rows + - local: valid + title: Valid datasets + - local: api_reference + title: API reference title: Get started diff --git a/docs/source/api_reference.mdx b/docs/source/api_reference.mdx new file mode 100644 index 0000000000..7cca5077fa --- /dev/null +++ b/docs/source/api_reference.mdx @@ -0,0 +1,11 @@ +# API reference + +The base URL of the REST API is https://datasets-server.huggingface.co. + +It provides the following endpoints: + +| Endpoint | Description | Query parameters | +| --- | --- | --- | +| /splits GET | Get the list of configurations and splits of a dataset. | - `dataset`: name of the dataset | +| /rows GET | Get the first rows of a dataset split. | - `dataset`: name of the dataset - `config`: name of the config - `split`: name of the split | +| /valid GET | Get the list of datasets hosted in the Hub and supported by the datasets server. | | diff --git a/docs/source/index.mdx b/docs/source/index.mdx index 0191f48adb..1aca4ce589 100644 --- a/docs/source/index.mdx +++ b/docs/source/index.mdx @@ -1,15 +1,15 @@ # Datasets server -The ๐Ÿค— datasets server gives access, via a REST API, to the contents, metadata and basic statistics of the [Hugging Face Hub datasets](https://huggingface.co/datasets). +The ๐Ÿค— datasets server gives access to the contents, metadata and basic statistics of the [Hugging Face Hub datasets](https://huggingface.co/datasets) via a REST API. -## REST API +## History -The base URL of the REST API is https://datasets-server.huggingface.co. +The API has originally been developed to provide data to the [dataset viewer](https://huggingface.co/docs/hub/datasets-viewer) on the Hub. Indeed, the information about the datasets cannot be extracted live, and must be preprocessed and stored beforehand in order to later be accessed quickly on the Hub. -For now, it provides the following endpoints for the [dataset viewer](https://huggingface.co/docs/hub/datasets-viewer) on the Hub: +We decided to make the API public to give access programmatically, through a REST API, to the contents, metadata and statistics of the Hub datasets. -| Endpoint | Description | Query parameters | -| --- | --- | --- | -| /valid GET | Get the list of datasets hosted in the Hub and supported by the datasets server. | | -| /splits GET | Get the list of configurations and splits of a dataset. | - `dataset`: name of the dataset | -| /rows GET | Get the first rows of a dataset split. | - `dataset`: name of the dataset - `config`: name of the config - `split`: name of the split | +## Evolution + +The API currently provides three endpoints. + +The plan is to add more of them in the near future. The two next features should be the basic statistics by column, and random access to the datasets rows. diff --git a/docs/source/rows.mdx b/docs/source/rows.mdx new file mode 100644 index 0000000000..b22941dc43 --- /dev/null +++ b/docs/source/rows.mdx @@ -0,0 +1,146 @@ +# First rows + +The endpoint `/rows` provides the columns and the first rows of a dataset [split](./splits): + + ``` + https://datasets-server.huggingface.co/rows?dataset={dataset_name}&config={config_name}&split={split_name} + ``` + +The first 100 rows, or all the rows if the split contains less than 100 rows, are returned. The list of columns contain the data type. + +For example, the columns and the first rows of the `duorc` / `SelfRC` train split are: https://datasets-server.huggingface.co/rows?dataset=duorc&config=SelfRC&split=train + +```json +{ + "columns": [ + { + "dataset": "duorc", + "config": "SelfRC", + "split": "train", + "column_idx": 0, + "column": { "name": "plot_id", "type": "STRING" } + }, + { + "dataset": "duorc", + "config": "SelfRC", + "split": "train", + "column_idx": 1, + "column": { "name": "plot", "type": "STRING" } + }, + { + "dataset": "duorc", + "config": "SelfRC", + "split": "train", + "column_idx": 2, + "column": { "name": "title", "type": "STRING" } + }, + { + "dataset": "duorc", + "config": "SelfRC", + "split": "train", + "column_idx": 3, + "column": { "name": "question_id", "type": "STRING" } + }, + { + "dataset": "duorc", + "config": "SelfRC", + "split": "train", + "column_idx": 4, + "column": { "name": "question", "type": "STRING" } + }, + { + "dataset": "duorc", + "config": "SelfRC", + "split": "train", + "column_idx": 5, + "column": { "name": "answers", "type": "JSON" } + }, + { + "dataset": "duorc", + "config": "SelfRC", + "split": "train", + "column_idx": 6, + "column": { "name": "no_answer", "type": "BOOL" } + } + ], + "rows": [ + { + "dataset": "duorc", + "config": "SelfRC", + "split": "train", + "row_idx": 0, + "row": { + "plot_id": "/m/03vyhn", + "plot": "200 years in the future, Mars has been colonized by a high-tech company.\nMelanie Ballard (Natasha Henstridge) arrives by train to a Mars mining camp which has cut all communication links with the company headquarters. She's not alone, as she is with a group of fellow police officers. They find the mining camp deserted except for a person in the prison, Desolation Williams (Ice Cube), who seems to laugh about them because they are all going to die. They were supposed to take Desolation to headquarters, but decide to explore first to find out what happened.They find a man inside an encapsulated mining car, who tells them not to open it. However, they do and he tries to kill them. One of the cops witnesses strange men with deep scarred and heavily tattooed faces killing the remaining survivors. The cops realise they need to leave the place fast.Desolation explains that the miners opened a kind of Martian construction in the soil which unleashed red dust. Those who breathed that dust became violent psychopaths who started to build weapons and kill the uninfected. They changed genetically, becoming distorted but much stronger.The cops and Desolation leave the prison with difficulty, and devise a plan to kill all the genetically modified ex-miners on the way out. However, the plan goes awry, and only Melanie and Desolation reach headquarters alive. Melanie realises that her bosses won't ever believe her. However, the red dust eventually arrives to headquarters, and Melanie and Desolation need to fight once again.", + "title": "Ghosts of Mars", + "question_id": "b440de7d-9c3f-841c-eaec-a14bdff950d1", + "question": "How did the police arrive at the Mars mining camp?", + "answers": ["They arrived by train."], + "no_answer": false + }, + "truncated_cells": [] + }, + { + "dataset": "duorc", + "config": "SelfRC", + "split": "train", + "row_idx": 1, + "row": { + "plot_id": "/m/03vyhn", + "plot": "200 years in the future, Mars has been colonized by a high-tech company.\nMelanie Ballard (Natasha Henstridge) arrives by train to a Mars mining camp which has cut all communication links with the company headquarters. She's not alone, as she is with a group of fellow police officers. They find the mining camp deserted except for a person in the prison, Desolation Williams (Ice Cube), who seems to laugh about them because they are all going to die. They were supposed to take Desolation to headquarters, but decide to explore first to find out what happened.They find a man inside an encapsulated mining car, who tells them not to open it. However, they do and he tries to kill them. One of the cops witnesses strange men with deep scarred and heavily tattooed faces killing the remaining survivors. The cops realise they need to leave the place fast.Desolation explains that the miners opened a kind of Martian construction in the soil which unleashed red dust. Those who breathed that dust became violent psychopaths who started to build weapons and kill the uninfected. They changed genetically, becoming distorted but much stronger.The cops and Desolation leave the prison with difficulty, and devise a plan to kill all the genetically modified ex-miners on the way out. However, the plan goes awry, and only Melanie and Desolation reach headquarters alive. Melanie realises that her bosses won't ever believe her. However, the red dust eventually arrives to headquarters, and Melanie and Desolation need to fight once again.", + "title": "Ghosts of Mars", + "question_id": "a9f95c0d-121f-3ca9-1595-d497dc8bc56c", + "question": "Who has colonized Mars 200 years in the future?", + "answers": [ + "A high-tech company has colonized Mars 200 years in the future." + ], + "no_answer": false + }, + "truncated_cells": [] + }, + ... +``` + +## Truncated responses + +When the size of the response for 100 rows would have been too big, the last rows are removed until reaching under the acceptable limit. + +If even the first rows generate a response that does not fit within the limit, the content of the cells themselves is truncated and converted to a string. In this case, the truncated cells are listed in the `truncated_cells` field. + +See for example the [`ett`](https://huggingface.co/datasets/ett/viewer/m2/test) dataset: https://datasets-server.huggingface.co/rows?dataset=ett&config=m2&split=test. Only 10 rows are returned, and the content of two of the columns are truncated: + +```json + ... + "rows": [ + { + "dataset": "ett", + "config": "m2", + "split": "test", + "row_idx": 0, + "row": { + "start": 1467331200.0, + "target": "[38.6619987487793,38.222999572753906,37.34400177001953,37.124000549316406,37.124000549316406,36.9039", + "feat_static_cat": [0], + "feat_dynamic_real": "[[41.130001068115234,39.62200164794922,38.86800003051758,35.518001556396484,37.52799987792969,37.611", + "item_id": "OT" + }, + "truncated_cells": ["target", "feat_dynamic_real"] + }, + { + "dataset": "ett", + "config": "m2", + "split": "test", + "row_idx": 1, + "row": { + "start": 1467331200.0, + "target": "[38.6619987487793,38.222999572753906,37.34400177001953,37.124000549316406,37.124000549316406,36.9039", + "feat_static_cat": [0], + "feat_dynamic_real": "[[41.130001068115234,39.62200164794922,38.86800003051758,35.518001556396484,37.52799987792969,37.611", + "item_id": "OT" + }, + "truncated_cells": ["target", "feat_dynamic_real"] + } + ... +``` + +This limitation was introduced to ensure the response size is always under 1 MB. diff --git a/docs/source/splits.mdx b/docs/source/splits.mdx new file mode 100644 index 0000000000..47591def88 --- /dev/null +++ b/docs/source/splits.mdx @@ -0,0 +1,75 @@ +# Splits + +A dataset generally contains multiple [splits](https://huggingface.co/docs/datasets/load_hub#splits): + +> a split is a specific subset of a dataset like `train` and `test` + +and optionally various [configurations](https://huggingface.co/docs/datasets/load_hub#configurations): + +> Some datasets contain several sub-datasets. For example, the MInDS-14 dataset has several sub-datasets, each one containing audio data in a different language. These sub-datasets are known as configurations. + +See the [documentation](https://huggingface.co/docs/datasets) of the [๐Ÿค— datasets](https://github.com/huggingface/datasets) library to read more in depth about the concepts. + +To get the list of splits and configurations of a dataset: + + ``` + https://datasets-server.huggingface.co/splits?dataset={dataset_name} + ``` + + + Currently, the API only returns rows of the ["streamable" datasets](https://huggingface.co/docs/datasets/stream). + + By loading the datasets in streaming mode, the first rows can be extracted without the need to download the whole dataset. In the future, we plan to also support non-streamable datasets. + + + +For example, [`duorc`](https://huggingface.co/datasets/duorc) has six splits, within two configurations: https://datasets-server.huggingface.co/splits?dataset=duorc + +```json +{ + "splits": [ + { + "dataset": "duorc", + "config": "SelfRC", + "split": "train", + "num_bytes": 239852925, + "num_examples": 60721 + }, + { + "dataset": "duorc", + "config": "SelfRC", + "split": "validation", + "num_bytes": 51662575, + "num_examples": 12961 + }, + { + "dataset": "duorc", + "config": "SelfRC", + "split": "test", + "num_bytes": 49142766, + "num_examples": 12559 + }, + { + "dataset": "duorc", + "config": "ParaphraseRC", + "split": "train", + "num_bytes": 496683105, + "num_examples": 69524 + }, + { + "dataset": "duorc", + "config": "ParaphraseRC", + "split": "validation", + "num_bytes": 106510545, + "num_examples": 15591 + }, + { + "dataset": "duorc", + "config": "ParaphraseRC", + "split": "test", + "num_bytes": 115215816, + "num_examples": 15857 + } + ] +} +``` diff --git a/docs/source/valid.mdx b/docs/source/valid.mdx new file mode 100644 index 0000000000..85983bdd7d --- /dev/null +++ b/docs/source/valid.mdx @@ -0,0 +1,23 @@ +# Valid datasets + +An error can occur when extracting the [splits](./splits) or the first [rows](./rows) of some datasets. In this case, an error is returned. + +The `/valid` endpoints gives the list of the Hub datasets that work without an error: + + ``` + https://datasets-server.huggingface.co/valid + ``` + +The response takes the following form: https://datasets-server.huggingface.co/valid + +```json +{ + "valid": [ + "0n1xus/codexglue", + "0n1xus/pytorrent-standalone", + "0x7194633/rupile", + "51la5/keyword-extraction", + ... + ] +} +``` From e8f601e51c134d113453ae245832e2e8292cbe1b Mon Sep 17 00:00:00 2001 From: Test User Date: Mon, 27 Jun 2022 17:01:26 +0000 Subject: [PATCH 08/14] =?UTF-8?q?docs:=20=E2=9C=8F=EF=B8=8F=20fix=20the=20?= =?UTF-8?q?format=20inside=20a=20"Tip"=20tag?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/source/splits.mdx | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/docs/source/splits.mdx b/docs/source/splits.mdx index 47591def88..fb6f427d71 100644 --- a/docs/source/splits.mdx +++ b/docs/source/splits.mdx @@ -17,9 +17,7 @@ To get the list of splits and configurations of a dataset: ``` - Currently, the API only returns rows of the ["streamable" datasets](https://huggingface.co/docs/datasets/stream). - - By loading the datasets in streaming mode, the first rows can be extracted without the need to download the whole dataset. In the future, we plan to also support non-streamable datasets. + Currently, the API only returns rows of the "streamable" datasets. By loading the datasets in streaming mode, the first rows can be extracted without the need to download the whole dataset. In the future, we plan to also support non-streamable datasets. From ec34fefbe80b8efe2569044f261f1b7b5fb02994 Mon Sep 17 00:00:00 2001 From: Sylvain Lesage Date: Tue, 28 Jun 2022 10:09:26 +0200 Subject: [PATCH 09/14] Update docs/source/index.mdx MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Mario ล aลกko --- docs/source/index.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/index.mdx b/docs/source/index.mdx index 1aca4ce589..2fa93eb015 100644 --- a/docs/source/index.mdx +++ b/docs/source/index.mdx @@ -1,6 +1,6 @@ # Datasets server -The ๐Ÿค— datasets server gives access to the contents, metadata and basic statistics of the [Hugging Face Hub datasets](https://huggingface.co/datasets) via a REST API. +The ๐Ÿค— Datasets server gives access to the contents, metadata and basic statistics of the [Hugging Face Hub datasets](https://huggingface.co/datasets) via a REST API. ## History From b389a8597106fc99b6fe27ed5b674afb594df371 Mon Sep 17 00:00:00 2001 From: Sylvain Lesage Date: Tue, 28 Jun 2022 10:11:23 +0200 Subject: [PATCH 10/14] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com> Co-authored-by: Mario ล aลกko --- docs/source/api_reference.mdx | 4 ++-- docs/source/index.mdx | 2 +- docs/source/splits.mdx | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/source/api_reference.mdx b/docs/source/api_reference.mdx index 7cca5077fa..fe3077a095 100644 --- a/docs/source/api_reference.mdx +++ b/docs/source/api_reference.mdx @@ -6,6 +6,6 @@ It provides the following endpoints: | Endpoint | Description | Query parameters | | --- | --- | --- | -| /splits GET | Get the list of configurations and splits of a dataset. | - `dataset`: name of the dataset | -| /rows GET | Get the first rows of a dataset split. | - `dataset`: name of the dataset - `config`: name of the config - `split`: name of the split | +| /splits GET | Get the list of configurations and splits of a dataset. | `dataset`: name of the dataset | +| /rows GET | Get the first rows of a dataset split. | - `dataset`: name of the dataset
- `config`: name of the config
- `split`: name of the split | | /valid GET | Get the list of datasets hosted in the Hub and supported by the datasets server. | | diff --git a/docs/source/index.mdx b/docs/source/index.mdx index 2fa93eb015..5ae9915f3c 100644 --- a/docs/source/index.mdx +++ b/docs/source/index.mdx @@ -4,7 +4,7 @@ The ๐Ÿค— Datasets server gives access to the contents, metadata and basic statis ## History -The API has originally been developed to provide data to the [dataset viewer](https://huggingface.co/docs/hub/datasets-viewer) on the Hub. Indeed, the information about the datasets cannot be extracted live, and must be preprocessed and stored beforehand in order to later be accessed quickly on the Hub. +The API has originally been developed to provide data to the [Dataset viewer](https://huggingface.co/docs/hub/datasets-viewer) on the Hub. Indeed, the information about the datasets cannot be extracted live, and must be preprocessed and stored beforehand in order to later be accessed quickly on the Hub. We decided to make the API public to give access programmatically, through a REST API, to the contents, metadata and statistics of the Hub datasets. diff --git a/docs/source/splits.mdx b/docs/source/splits.mdx index fb6f427d71..6300787ff3 100644 --- a/docs/source/splits.mdx +++ b/docs/source/splits.mdx @@ -8,7 +8,7 @@ and optionally various [configurations](https://huggingface.co/docs/datasets/loa > Some datasets contain several sub-datasets. For example, the MInDS-14 dataset has several sub-datasets, each one containing audio data in a different language. These sub-datasets are known as configurations. -See the [documentation](https://huggingface.co/docs/datasets) of the [๐Ÿค— datasets](https://github.com/huggingface/datasets) library to read more in depth about the concepts. +See the [documentation](https://huggingface.co/docs/datasets) of the [๐Ÿค— Datasets](https://github.com/huggingface/datasets) library to read more in depth about the concepts. To get the list of splits and configurations of a dataset: From dd6498807374af0ab0a068ed33bf8f83e62bdb3e Mon Sep 17 00:00:00 2001 From: Sylvain Lesage Date: Tue, 28 Jun 2022 10:21:02 +0200 Subject: [PATCH 11/14] Apply suggestions from code review Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com> --- docs/source/index.mdx | 2 +- docs/source/rows.mdx | 2 +- docs/source/splits.mdx | 13 ++++--------- docs/source/valid.mdx | 4 ++-- 4 files changed, 8 insertions(+), 13 deletions(-) diff --git a/docs/source/index.mdx b/docs/source/index.mdx index 5ae9915f3c..669e3720b2 100644 --- a/docs/source/index.mdx +++ b/docs/source/index.mdx @@ -6,7 +6,7 @@ The ๐Ÿค— Datasets server gives access to the contents, metadata and basic statis The API has originally been developed to provide data to the [Dataset viewer](https://huggingface.co/docs/hub/datasets-viewer) on the Hub. Indeed, the information about the datasets cannot be extracted live, and must be preprocessed and stored beforehand in order to later be accessed quickly on the Hub. -We decided to make the API public to give access programmatically, through a REST API, to the contents, metadata and statistics of the Hub datasets. +We decided to make the API public to give programmatic access to the information about datasets. ## Evolution diff --git a/docs/source/rows.mdx b/docs/source/rows.mdx index b22941dc43..5bb021b271 100644 --- a/docs/source/rows.mdx +++ b/docs/source/rows.mdx @@ -103,7 +103,7 @@ For example, the columns and the first rows of the `duorc` / `SelfRC` train spli ## Truncated responses -When the size of the response for 100 rows would have been too big, the last rows are removed until reaching under the acceptable limit. +When the response size for 100 rows is too big, the last rows are removed until the response size is under 1MB. If even the first rows generate a response that does not fit within the limit, the content of the cells themselves is truncated and converted to a string. In this case, the truncated cells are listed in the `truncated_cells` field. diff --git a/docs/source/splits.mdx b/docs/source/splits.mdx index 6300787ff3..979b49588a 100644 --- a/docs/source/splits.mdx +++ b/docs/source/splits.mdx @@ -1,12 +1,6 @@ # Splits -A dataset generally contains multiple [splits](https://huggingface.co/docs/datasets/load_hub#splits): - -> a split is a specific subset of a dataset like `train` and `test` - -and optionally various [configurations](https://huggingface.co/docs/datasets/load_hub#configurations): - -> Some datasets contain several sub-datasets. For example, the MInDS-14 dataset has several sub-datasets, each one containing audio data in a different language. These sub-datasets are known as configurations. +A dataset generally contains multiple *[splits](https://huggingface.co/docs/datasets/load_hub#splits)*, a specific subset of a dataset like `train` and `test`. The dataset can also contain *[configurations](https://huggingface.co/docs/datasets/load_hub#configurations)*, a sub-dataset of the larger dataset. See the [documentation](https://huggingface.co/docs/datasets) of the [๐Ÿค— Datasets](https://github.com/huggingface/datasets) library to read more in depth about the concepts. @@ -17,13 +11,14 @@ To get the list of splits and configurations of a dataset: ``` - Currently, the API only returns rows of the "streamable" datasets. By loading the datasets in streaming mode, the first rows can be extracted without the need to download the whole dataset. In the future, we plan to also support non-streamable datasets. + Currently, the API only returns rows of the "streamable" datasets. By loading a dataset in streaming mode, the first rows can be extracted without downloading the whole dataset. In the future, we plan to also support non-streamable datasets. -For example, [`duorc`](https://huggingface.co/datasets/duorc) has six splits, within two configurations: https://datasets-server.huggingface.co/splits?dataset=duorc +For example, the [duorc](https://huggingface.co/datasets/duorc) dataset has six splits and two configurations: ```json +https://datasets-server.huggingface.co/splits?dataset=duorc { "splits": [ { diff --git a/docs/source/valid.mdx b/docs/source/valid.mdx index 85983bdd7d..404cd54b0a 100644 --- a/docs/source/valid.mdx +++ b/docs/source/valid.mdx @@ -1,6 +1,6 @@ # Valid datasets -An error can occur when extracting the [splits](./splits) or the first [rows](./rows) of some datasets. In this case, an error is returned. +An error may be returned if an issue occurs during extraction of the [splits](./splits) or first [rows](./rows) of some datasets. The `/valid` endpoints gives the list of the Hub datasets that work without an error: @@ -8,7 +8,7 @@ The `/valid` endpoints gives the list of the Hub datasets that work without an e https://datasets-server.huggingface.co/valid ``` -The response takes the following form: https://datasets-server.huggingface.co/valid +The response looks like: ```json { From 8479adcf50b5d9aeb873c8c1c75c24c960dcee60 Mon Sep 17 00:00:00 2001 From: Test User Date: Tue, 28 Jun 2022 08:22:37 +0000 Subject: [PATCH 12/14] =?UTF-8?q?docs:=20=E2=9C=8F=EF=B8=8F=20Apply=20sugg?= =?UTF-8?q?estions=20from=20code=20review?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/source/index.mdx | 5 ----- docs/source/rows.mdx | 2 -- docs/source/splits.mdx | 2 +- 3 files changed, 1 insertion(+), 8 deletions(-) diff --git a/docs/source/index.mdx b/docs/source/index.mdx index 669e3720b2..612bc37d20 100644 --- a/docs/source/index.mdx +++ b/docs/source/index.mdx @@ -8,8 +8,3 @@ The API has originally been developed to provide data to the [Dataset viewer](ht We decided to make the API public to give programmatic access to the information about datasets. -## Evolution - -The API currently provides three endpoints. - -The plan is to add more of them in the near future. The two next features should be the basic statistics by column, and random access to the datasets rows. diff --git a/docs/source/rows.mdx b/docs/source/rows.mdx index 5bb021b271..69ca59c987 100644 --- a/docs/source/rows.mdx +++ b/docs/source/rows.mdx @@ -142,5 +142,3 @@ See for example the [`ett`](https://huggingface.co/datasets/ett/viewer/m2/test) } ... ``` - -This limitation was introduced to ensure the response size is always under 1 MB. diff --git a/docs/source/splits.mdx b/docs/source/splits.mdx index 979b49588a..d9d0dc5811 100644 --- a/docs/source/splits.mdx +++ b/docs/source/splits.mdx @@ -11,7 +11,7 @@ To get the list of splits and configurations of a dataset: ``` - Currently, the API only returns rows of the "streamable" datasets. By loading a dataset in streaming mode, the first rows can be extracted without downloading the whole dataset. In the future, we plan to also support non-streamable datasets. + Currently, the API only returns rows of the "streamable" datasets. By loading a dataset in streaming mode, the first rows can be extracted without downloading the whole dataset. From 11d69b89f1e29e3d72a1d67cf5f8d8d5c000f384 Mon Sep 17 00:00:00 2001 From: Test User Date: Tue, 28 Jun 2022 08:33:32 +0000 Subject: [PATCH 13/14] =?UTF-8?q?docs:=20=E2=9C=8F=EF=B8=8F=20allow=20to?= =?UTF-8?q?=20click=20on=20the=20link=20in=20the=20examples?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/source/api_reference.mdx | 6 +++++- docs/source/rows.mdx | 8 ++++++-- docs/source/splits.mdx | 3 ++- docs/source/valid.mdx | 2 ++ 4 files changed, 15 insertions(+), 4 deletions(-) diff --git a/docs/source/api_reference.mdx b/docs/source/api_reference.mdx index fe3077a095..e84fb0810f 100644 --- a/docs/source/api_reference.mdx +++ b/docs/source/api_reference.mdx @@ -1,6 +1,10 @@ # API reference -The base URL of the REST API is https://datasets-server.huggingface.co. +The base URL of the REST API is + + ``` + https://datasets-server.huggingface.co + ``` It provides the following endpoints: diff --git a/docs/source/rows.mdx b/docs/source/rows.mdx index 69ca59c987..22dd075db1 100644 --- a/docs/source/rows.mdx +++ b/docs/source/rows.mdx @@ -8,7 +8,9 @@ The endpoint `/rows` provides the columns and the first rows of a dataset [split The first 100 rows, or all the rows if the split contains less than 100 rows, are returned. The list of columns contain the data type. -For example, the columns and the first rows of the `duorc` / `SelfRC` train split are: https://datasets-server.huggingface.co/rows?dataset=duorc&config=SelfRC&split=train +For example, here are the columns and the first rows of the `duorc` / `SelfRC` train split. + +https://datasets-server.huggingface.co/rows?dataset=duorc&config=SelfRC&split=train ```json { @@ -107,7 +109,9 @@ When the response size for 100 rows is too big, the last rows are removed until If even the first rows generate a response that does not fit within the limit, the content of the cells themselves is truncated and converted to a string. In this case, the truncated cells are listed in the `truncated_cells` field. -See for example the [`ett`](https://huggingface.co/datasets/ett/viewer/m2/test) dataset: https://datasets-server.huggingface.co/rows?dataset=ett&config=m2&split=test. Only 10 rows are returned, and the content of two of the columns are truncated: +See for example the [`ett`](https://huggingface.co/datasets/ett/viewer/m2/test) dataset: only 10 rows are returned, and the content of two of the columns are truncated. + +https://datasets-server.huggingface.co/rows?dataset=ett&config=m2&split=test ```json ... diff --git a/docs/source/splits.mdx b/docs/source/splits.mdx index d9d0dc5811..a00e75e0a3 100644 --- a/docs/source/splits.mdx +++ b/docs/source/splits.mdx @@ -17,8 +17,9 @@ To get the list of splits and configurations of a dataset: For example, the [duorc](https://huggingface.co/datasets/duorc) dataset has six splits and two configurations: -```json https://datasets-server.huggingface.co/splits?dataset=duorc + +```json { "splits": [ { diff --git a/docs/source/valid.mdx b/docs/source/valid.mdx index 404cd54b0a..e955e5ae8a 100644 --- a/docs/source/valid.mdx +++ b/docs/source/valid.mdx @@ -10,6 +10,8 @@ The `/valid` endpoints gives the list of the Hub datasets that work without an e The response looks like: +https://datasets-server.huggingface.co/valid + ```json { "valid": [ From 328975b3c4dd4cc6c7285c72190b6d62b15fada0 Mon Sep 17 00:00:00 2001 From: Test User Date: Tue, 28 Jun 2022 08:36:56 +0000 Subject: [PATCH 14/14] =?UTF-8?q?ci:=20=F0=9F=8E=A1=20publish=20the=20doc?= =?UTF-8?q?=20to=20huggingface/doc-build?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/build_documentation.yml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 .github/workflows/build_documentation.yml diff --git a/.github/workflows/build_documentation.yml b/.github/workflows/build_documentation.yml new file mode 100644 index 0000000000..96d610c1d1 --- /dev/null +++ b/.github/workflows/build_documentation.yml @@ -0,0 +1,18 @@ +name: Build documentation + +on: + push: + branches: + - main + - doc-builder* + - v*-release + +jobs: + build: + uses: huggingface/doc-builder/.github/workflows/build_main_documentation.yml@main + with: + commit_sha: ${{ github.sha }} + package: datasets-server + additional_args: --not_python_module + secrets: + token: ${{ secrets.HUGGINGFACE_PUSH }}