diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 4ff11e1..f5823a4 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -24,7 +24,7 @@ jobs: - name: Check manifest run: tox -e manifest - name: Check code quality with flake8 - run: tox -e flake8 + run: tox -e format,lint - name: Check package metadata with Pyroma run: tox -e pyroma - name: Check static typing with MyPy @@ -34,7 +34,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [ "3.12", "3.9" ] + python-version: [ "3.12" ] steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} diff --git a/.gitignore b/.gitignore index a1acf3d..c57b880 100644 --- a/.gitignore +++ b/.gitignore @@ -130,3 +130,6 @@ dmypy.json # Auto-generated docs docs/source/api/ +tests/resources/test_1.db +tests/resources/test_1.json.bz2 +tests/resources/test_1.pkl.gz diff --git a/docs/source/conf.py b/docs/source/conf.py index a793beb..3e335f8 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -1,43 +1,55 @@ -# -*- coding: utf-8 -*- -# -# Configuration file for the Sphinx documentation builder. -# -# This file does only contain a selection of the most common options. For a -# full list see the documentation: -# http://www.sphinx-doc.org/en/master/config +""" +Configuration file for the Sphinx documentation builder. -# -- Path setup -------------------------------------------------------------- +This file does only contain a selection of the most common options. For a +full list see the documentation: +http://www.sphinx-doc.org/en/master/config -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# +-- Path setup -------------------------------------------------------------- + +If extensions (or modules to document with autodoc) are in another directory, +add these directories to ``sys.path`` here. If the directory is relative to the +documentation root, use ``os.path.abspath`` to make it absolute, like shown here. +""" import os import re import sys from datetime import date -sys.path.insert(0, os.path.abspath('../../src')) +sys.path.insert(0, os.path.abspath("../../src")) # -- Project information ----------------------------------------------------- -project = 'pystow' -copyright = f'{date.today().year}, Charles Tapley Hoyt' -author = 'Charles Tapley Hoyt' +project = "pystow" +copyright = f"{date.today().year}, Charles Tapley Hoyt" +author = "Charles Tapley Hoyt" # The full version, including alpha/beta/rc tags. -release = '0.5.7-dev' +release = "0.5.7-dev" # The short X.Y version. parsed_version = re.match( - '(?P\d+)\.(?P\d+)\.(?P\d+)(?:-(?P[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?(?:\+(?P[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?', + r"(?P\d+)\.(?P\d+)\.(?P\d+)(?:-(?P[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?(?:\+(?P[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?", release, ) -version = parsed_version.expand('\g.\g.\g') +version = parsed_version.expand(r"\g.\g.\g") + +if parsed_version.group("release"): + tags.add("prerelease") # noqa:F821 + + +# See https://about.readthedocs.com/blog/2024/07/addons-by-default/ +# Define the canonical URL if you are using a custom domain on Read the Docs +html_baseurl = os.environ.get("READTHEDOCS_CANONICAL_URL", "") + +# See https://about.readthedocs.com/blog/2024/07/addons-by-default/ +# Tell Jinja2 templates the build is running on Read the Docs +if os.environ.get("READTHEDOCS", "") == "True": + if "html_context" not in globals(): + html_context = {} + html_context["READTHEDOCS"] = True -if parsed_version.group('release'): - tags.add('prerelease') # -- General configuration --------------------------------------------------- @@ -56,32 +68,37 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'sphinx.ext.autosummary', - 'sphinx.ext.autodoc', - 'sphinx.ext.coverage', - 'sphinx.ext.intersphinx', + "sphinx.ext.autosummary", + "sphinx.ext.autodoc", + "sphinx.ext.coverage", + "sphinx.ext.intersphinx", "sphinx.ext.todo", - 'sphinx.ext.mathjax', - 'sphinx.ext.viewcode', - 'sphinx_autodoc_typehints', - 'sphinx_click.ext', - 'sphinx_automodapi.automodapi', + "sphinx.ext.mathjax", + "sphinx.ext.viewcode", + "sphinx_automodapi.automodapi", + "sphinx_automodapi.smart_resolver", ] + +extensions.append("sphinx_click.ext") + + # generate autosummary pages -# autosummary_generate = True +autosummary_generate = True # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] -source_suffix = '.rst' +source_suffix = { + ".rst": "restructuredtext", +} # The master toctree document. -master_doc = 'index' +master_doc = "index" # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -96,14 +113,14 @@ exclude_patterns = [] # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'sphinx_rtd_theme' +html_theme = "sphinx_rtd_theme" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the @@ -129,13 +146,13 @@ # The name of an image file (relative to this directory) to place at the top # of the sidebar. # -if os.path.exists('logo.png'): - html_logo = 'logo.png' +if os.path.exists("logo.png"): + html_logo = "logo.png" # -- Options for HTMLHelp output --------------------------------------------- # Output file base name for HTML help builder. -htmlhelp_basename = 'PyStowdoc' +htmlhelp_basename = "pystow_doc" # -- Options for LaTeX output ------------------------------------------------ @@ -177,8 +194,8 @@ man_pages = [ ( master_doc, - 'pystow', - 'PyStow Documentation', + "pystow", + "PyStow Documentation", [author], 1, ), @@ -192,12 +209,12 @@ texinfo_documents = [ ( master_doc, - 'pystow', - 'PyStow Documentation', + "pystow", + "PyStow Documentation", author, - 'Charles Tapley Hoyt', - '👜 Easily pick a place to store data for your python package.', - 'Miscellaneous', + "Charles Tapley Hoyt", + "Easily pick a place to store data for your Python code", + "Miscellaneous", ), ] @@ -223,11 +240,21 @@ # -- Options for intersphinx extension --------------------------------------- # Example configuration for intersphinx: refer to the Python standard library. +# Note: don't add trailing slashes, since sphinx adds "/objects.inv" to the end intersphinx_mapping = { - "python": ('https://docs.python.org/3/', None), - 'rdflib': ('https://rdflib.readthedocs.io/en/latest/', None), - 'pandas': ('https://pandas.pydata.org/pandas-docs/dev', None), + "python": ("https://docs.python.org/3", None), + "rdflib": ("https://rdflib.readthedocs.io/en/latest", None), + "pandas": ("https://pandas.pydata.org/docs", None), + "sklearn": ("https://scikit-learn.org/stable", None), + "numpy": ("https://numpy.org/doc/stable", None), + "scipy": ("https://docs.scipy.org/doc/scipy", None), } -autoclass_content = 'both' -autodoc_member_order = 'bysource' +autoclass_content = "both" + +# Don't sort alphabetically, explained at: +# https://stackoverflow.com/questions/37209921/python-how-not-to-sort-sphinx-output-in-alphabetical-order +autodoc_member_order = "bysource" + +todo_include_todos = True +todo_emit_warnings = True diff --git a/pyproject.toml b/pyproject.toml index 11a8e62..b8e3a5f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,14 +1,244 @@ # See https://setuptools.readthedocs.io/en/latest/build_meta.html [build-system] requires = ["setuptools", "wheel"] -build-backend = "setuptools.build_meta:__legacy__" +build-backend = "setuptools.build_meta" -[tool.black] +[project] +name = "pystow" +version = "0.5.7-dev" +description = "Easily pick a place to store data for your Python code" +readme = "README.md" +authors = [ + { name = "Charles Tapley Hoyt", email = "cthoyt@gmail.com" } +] +maintainers = [ + { name = "Charles Tapley Hoyt", email = "cthoyt@gmail.com" } +] + +# See https://packaging.python.org/en/latest/guides/writing-pyproject-toml/#classifiers +# Search tags using the controlled vocabulary at https://pypi.org/classifiers +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Environment :: Console", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Framework :: Pytest", + "Framework :: tox", + "Framework :: Sphinx", + "Programming Language :: Python", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3 :: Only", +] +keywords = [ + "snekpack", # please keep this keyword to credit the cookiecutter-snekpack template + "cookiecutter", + "caching", + "file management" +] + +# License Information. This can be any valid SPDX identifiers that can be resolved +# with URLs like https://spdx.org/licenses/MIT +# See https://packaging.python.org/en/latest/guides/writing-pyproject-toml/#license +license = { file = "LICENSE" } + +requires-python = ">=3.9" +dependencies = [ + "click", + "requests", + "tqdm", + "typing-extensions", +] + +[project.optional-dependencies] +tests = [ + "pytest", + "coverage", + "requests_file", +] +docs = [ + "sphinx>=8", + "sphinx-rtd-theme>=3.0", + "sphinx-click", + "sphinx_automodapi", +] +rdf = [ + "rdflib", +] +xml = [ + "lxml", +] +pandas = [ + "pandas", +] +aws = [ + "boto3", +] + +# See https://packaging.python.org/en/latest/guides/writing-pyproject-toml/#urls +[project.urls] +"Bug Tracker" = "https://github.com/cthoyt/pystow/issues" +Homepage = "https://github.com/cthoyt/pystow" +Repository = "https://github.com/cthoyt/pystow.git" +Documentation = "https://pystow.readthedocs.io" + +[tool.setuptools] +package-dir = { "" = "src" } + +[tool.setuptools.packages.find] +# this implicitly sets `packages = ":find"` +where = ["src"] # list of folders that contain the packages (["."] by default) + +# See https://setuptools.pypa.io/en/latest/userguide/datafiles.html +[tool.setuptools.package-data] +"*" = ["*.*"] + + +[project.scripts] +pystow = "pystow.cli:main" + + +[tool.cruft] +skip = [ + "**/__init__.py", + "tests/*" +] + +# MyPy, see https://mypy.readthedocs.io/en/stable/config_file.html +[tool.mypy] +plugins = [ +] + +# Doc8, see https://doc8.readthedocs.io/en/stable/readme.html#ini-file-usage +[tool.doc8] +max-line-length = 120 + +# Pytest, see https://docs.pytest.org/en/stable/reference/customize.html#pyproject-toml +[tool.pytest.ini_options] +markers = [ + "slow: marks tests as slow (deselect with '-m \"not slow\"')", +] + +# Coverage, see https://coverage.readthedocs.io/en/latest/config.html +[tool.coverage.run] +branch = true +source = [ + "pystow", +] +omit = [ + "tests/*", + "docs/*", +] + +[tool.coverage.paths] +source = [ + "src/pystow", + ".tox/*/lib/python*/site-packages/pystow", +] + +[tool.coverage.report] +show_missing = true +exclude_lines = [ + "pragma: no cover", + "raise NotImplementedError", + "if __name__ == \"__main__\":", + "if TYPE_CHECKING:", + "def __str__", + "def __repr__", +] + +[tool.ruff] line-length = 100 -target-version = ["py39", "py310", "py311", "py312"] +extend-include = ["*.ipynb"] + +[tool.ruff.lint] +# See https://docs.astral.sh/ruff/rules +extend-select = [ + "F", # pyflakes + "E", # pycodestyle errors + "W", # pycodestyle warnings + "C90", # mccabe + "I", # isort + "UP", # pyupgrade + "D", # pydocstyle + "DOC", # pydoclint + "B", # bugbear + "S", # bandit + "T20", # print + "N", # pep8 naming + "ERA", # eradicate commented out code + "NPY", # numpy checks + "RUF", # ruff rules + "C4", # comprehensions +] +ignore = [ + "D105", # Missing docstring in magic method + "E203", # Black conflicts with the following + "S301", # yolo pickle + "S320", # yolo lxml +] + + +# See https://docs.astral.sh/ruff/settings/#per-file-ignores +[tool.ruff.lint.per-file-ignores] +# Ignore security issues in the version.py, which are inconsistent +"src/pystow/version.py" = ["S603", "S607"] +# Ignore commented out code in Sphinx configuration file +"docs/source/conf.py" = ["ERA001"] +# Prints are okay in notebooks +"notebooks/**/*.ipynb" = ["T201"] + +[tool.ruff.lint.pydocstyle] +convention = "pep257" + +[tool.ruff.lint.isort] +relative-imports-order = "closest-to-furthest" +known-third-party = [ + "tqdm", +] +known-first-party = [ + "pystow", + "tests", +] + +[tool.ruff.format] +# see https://docs.astral.sh/ruff/settings/#format_docstring-code-format +docstring-code-format = true + +[tool.bumpversion] +current_version = "0.5.7-dev" +parse = "(?P\\d+)\\.(?P\\d+)\\.(?P\\d+)(?:-(?P[0-9A-Za-z-]+(?:\\.[0-9A-Za-z-]+)*))?(?:\\+(?P[0-9A-Za-z-]+(?:\\.[0-9A-Za-z-]+)*))?" +serialize = [ + "{major}.{minor}.{patch}-{release}+{build}", + "{major}.{minor}.{patch}+{build}", + "{major}.{minor}.{patch}-{release}", + "{major}.{minor}.{patch}", +] +commit = true +tag = false + +[tool.bumpversion.parts.release] +optional_value = "production" +first_value = "dev" +values = [ + "dev", + "production", +] + +[[tool.bumpversion.files]] +filename = "pyproject.toml" +search = "version = \"{current_version}\"" +replace = "version = \"{new_version}\"" + +[[tool.bumpversion.files]] +filename = "docs/source/conf.py" +search = "release = \"{current_version}\"" +replace = "release = \"{new_version}\"" -[tool.isort] -profile = "black" -multi_line_output = 3 -include_trailing_comma = true -reverse_relative = true +[[tool.bumpversion.files]] +filename = "src/pystow/version.py" +search = "VERSION = \"{current_version}\"" +replace = "VERSION = \"{new_version}\"" diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 5c841f7..0000000 --- a/setup.cfg +++ /dev/null @@ -1,159 +0,0 @@ -########################## -# Setup.py Configuration # -########################## -# Configuring setup() -[metadata] -name = pystow -version = 0.5.7-dev -description = Easily pick a place to store data for your python package. -long_description = file: README.md -long_description_content_type = text/markdown - -# Links -url = https://github.com/cthoyt/pystow -download_url = https://github.com/cthoyt/pystow/releases -project_urls = - Bug Tracker = https://github.com/cthoyt/pystow/issues - -# Author information -author = Charles Tapley Hoyt -author_email = cthoyt@gmail.com -maintainer = Charles Tapley Hoyt -maintainer_email = cthoyt@gmail.com - -# License information -license = MIT -license_file = LICENSE - -# Search tags -classifiers = - Development Status :: 5 - Production/Stable - Environment :: Console - License :: OSI Approved :: MIT License - Operating System :: OS Independent - Programming Language :: Python - Programming Language :: Python :: 3.9 - Programming Language :: Python :: 3.10 - Programming Language :: Python :: 3.11 - Programming Language :: Python :: 3.12 - Programming Language :: Python :: 3 :: Only -keywords = - caching - file management - -[options] -install_requires = - click - requests - tqdm - typing_extensions - -zip_safe = false -python_requires = >=3.9 - -# Where is my code -packages = find: -package_dir = - = src - -[options.packages.find] -where = src - -[options.extras_require] -rdf = - rdflib -xml = - lxml -pandas = - pandas -aws = - boto3 -tests = - coverage - pytest - requests_file -docs = - sphinx<8.0 - sphinx-rtd-theme - sphinx-click - sphinx-autodoc-typehints - sphinx_automodapi - -[options.entry_points] -console_scripts = - pystow = pystow.cli:main - -###################### -# Doc8 Configuration # -# (doc8.ini) # -###################### -[doc8] -max-line-length = 120 - -########################## -# Coverage Configuration # -# (.coveragerc) # -########################## -[coverage:run] -branch = True -source = pystow -omit = - tests/* - docs/* - src/pystow/cli.py - src/pystow/__main__.py - -[coverage:paths] -source = - src/pystow - .tox/*/lib/python*/site-packages/pystow - -[coverage:report] -show_missing = True -exclude_lines = - def __str__ - def __repr__ - -########################## -# Darglint Configuration # -########################## -[darglint] -docstring_style = sphinx -strictness = full -# enable = DAR104 - -######################### -# Flake8 Configuration # -# (.flake8) # -######################### -[flake8] -ignore = - # pickle - S403 - # pickle - S301 - # line break before binary operator - W503 - S410 - S320 - # overload operator causes this - E704 -exclude = - .tox, - .git, - __pycache__, - docs/source/conf.py, - build, - dist, - tests/fixtures/*, - *.pyc, - *.egg-info, - .cache, - .eggs, - data -max-line-length = 120 -max-complexity = 20 -import-order-style = pycharm -application-import-names = - pystow - tests diff --git a/src/pystow/__init__.py b/src/pystow/__init__.py index 1e874e4..24c3762 100644 --- a/src/pystow/__init__.py +++ b/src/pystow/__init__.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """PyStow: Easily pick a place to store data for your python package.""" from .api import ( diff --git a/src/pystow/__main__.py b/src/pystow/__main__.py index 350e058..4ae045e 100644 --- a/src/pystow/__main__.py +++ b/src/pystow/__main__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # type: ignore """Command line interface for PyStow.""" diff --git a/src/pystow/api.py b/src/pystow/api.py index 0d4c13a..3018d33 100644 --- a/src/pystow/api.py +++ b/src/pystow/api.py @@ -1,23 +1,19 @@ -# -*- coding: utf-8 -*- - """API functions for PyStow.""" +from __future__ import annotations + import bz2 import io import lzma import sqlite3 +from collections.abc import Generator, Mapping, Sequence from contextlib import contextmanager from io import BytesIO, StringIO from pathlib import Path from typing import ( TYPE_CHECKING, Any, - Generator, Literal, - Mapping, - Optional, - Sequence, - Union, overload, ) @@ -105,7 +101,7 @@ def module(key: str, *subkeys: str, ensure_exists: bool = True) -> Module: def join( key: str, *subkeys: str, - name: Optional[str] = None, + name: str | None = None, ensure_exists: bool = True, version: VersionHint = None, ) -> Path: @@ -159,7 +155,7 @@ def open( *subkeys: str, name: str, mode: Literal["r", "rt", "w", "wt"] = "r", - open_kwargs: Optional[Mapping[str, Any]] = None, + open_kwargs: Mapping[str, Any] | None = None, ) -> Generator[StringIO, None, None]: ... @@ -171,7 +167,7 @@ def open( *subkeys: str, name: str, mode: Literal["rb", "wb"], - open_kwargs: Optional[Mapping[str, Any]] = None, + open_kwargs: Mapping[str, Any] | None = None, ) -> Generator[BytesIO, None, None]: ... @@ -181,9 +177,9 @@ def open( *subkeys: str, name: str, mode: Literal["r", "rb", "rt", "w", "wb", "wt"] = "r", - open_kwargs: Optional[Mapping[str, Any]] = None, + open_kwargs: Mapping[str, Any] | None = None, ensure_exists: bool = False, -) -> Generator[Union[StringIO, BytesIO], None, None]: +) -> Generator[StringIO | BytesIO, None, None]: """Open a file. :param key: @@ -196,7 +192,9 @@ def open( :param name: The name of the file to open :param mode: The read or write mode, passed to :func:`open` :param open_kwargs: Additional keyword arguments passed to :func:`open` - :param ensure_exists: Should the directory the file is in be made? Set to true on write operations. + :param ensure_exists: + Should the directory the file is in be made? + Set to true on write operations. :yields: An open file object @@ -225,7 +223,7 @@ def open_gz( *subkeys: str, name: str, mode: Literal["r", "w", "rt", "wt"] = ..., - open_kwargs: Optional[Mapping[str, Any]], + open_kwargs: Mapping[str, Any] | None, ) -> Generator[StringIO, None, None]: ... @@ -237,7 +235,7 @@ def open_gz( *subkeys: str, name: str, mode: Literal["rb", "wb"] = ..., - open_kwargs: Optional[Mapping[str, Any]], + open_kwargs: Mapping[str, Any] | None, ) -> Generator[BytesIO, None, None]: ... @@ -247,9 +245,9 @@ def open_gz( *subkeys: str, name: str, mode: Literal["r", "w", "rt", "wt", "rb", "wb"] = "rb", - open_kwargs: Optional[Mapping[str, Any]] = None, + open_kwargs: Mapping[str, Any] | None = None, ensure_exists: bool = False, -) -> Generator[Union[StringIO, BytesIO], None, None]: +) -> Generator[StringIO | BytesIO, None, None]: """Open a gzipped file that exists already. :param key: @@ -277,10 +275,10 @@ def ensure( key: str, *subkeys: str, url: str, - name: Optional[str] = None, + name: str | None = None, version: VersionHint = None, force: bool = False, - download_kwargs: Optional[Mapping[str, Any]] = None, + download_kwargs: Mapping[str, Any] | None = None, ) -> Path: """Ensure a file is downloaded. @@ -370,11 +368,11 @@ def ensure_untar( key: str, *subkeys: str, url: str, - name: Optional[str] = None, - directory: Optional[str] = None, + name: str | None = None, + directory: str | None = None, force: bool = False, - download_kwargs: Optional[Mapping[str, Any]] = None, - extract_kwargs: Optional[Mapping[str, Any]] = None, + download_kwargs: Mapping[str, Any] | None = None, + extract_kwargs: Mapping[str, Any] | None = None, ) -> Path: """Ensure a file is downloaded and untarred. @@ -418,10 +416,10 @@ def ensure_gunzip( key: str, *subkeys: str, url: str, - name: Optional[str] = None, + name: str | None = None, force: bool = False, autoclean: bool = True, - download_kwargs: Optional[Mapping[str, Any]] = None, + download_kwargs: Mapping[str, Any] | None = None, ) -> Path: """Ensure a file is downloaded and gunzipped. @@ -464,11 +462,11 @@ def ensure_open( key: str, *subkeys: str, url: str, - name: Optional[str], + name: str | None, force: bool, - download_kwargs: Optional[Mapping[str, Any]], + download_kwargs: Mapping[str, Any] | None, mode: Literal["r", "rt", "w", "wt"] = ..., - open_kwargs: Optional[Mapping[str, Any]], + open_kwargs: Mapping[str, Any] | None, ) -> Generator[StringIO, None, None]: ... @@ -479,11 +477,11 @@ def ensure_open( key: str, *subkeys: str, url: str, - name: Optional[str], + name: str | None, force: bool, - download_kwargs: Optional[Mapping[str, Any]], + download_kwargs: Mapping[str, Any] | None, mode: Literal["rb", "wb"] = ..., - open_kwargs: Optional[Mapping[str, Any]], + open_kwargs: Mapping[str, Any] | None, ) -> Generator[BytesIO, None, None]: ... @@ -492,12 +490,12 @@ def ensure_open( key: str, *subkeys: str, url: str, - name: Optional[str] = None, + name: str | None = None, force: bool = False, - download_kwargs: Optional[Mapping[str, Any]] = None, - mode: Union[Literal["r", "rt", "w", "wt"], Literal["rb", "wb"]] = "r", - open_kwargs: Optional[Mapping[str, Any]] = None, -) -> Generator[Union[StringIO, BytesIO], None, None]: + download_kwargs: Mapping[str, Any] | None = None, + mode: Literal["r", "rt", "w", "wt"] | Literal["rb", "wb"] = "r", + open_kwargs: Mapping[str, Any] | None = None, +) -> Generator[StringIO | BytesIO, None, None]: """Ensure a file is downloaded and open it. :param key: @@ -540,11 +538,11 @@ def ensure_open_zip( *subkeys: str, url: str, inner_path: str, - name: Optional[str] = None, + name: str | None = None, force: bool = False, - download_kwargs: Optional[Mapping[str, Any]] = None, + download_kwargs: Mapping[str, Any] | None = None, mode: str = "r", - open_kwargs: Optional[Mapping[str, Any]] = None, + open_kwargs: Mapping[str, Any] | None = None, ) -> BytesOpener: """Ensure a file is downloaded then open it with :mod:`zipfile`. @@ -592,12 +590,12 @@ def ensure_open_lzma( key: str, *subkeys: str, url: str, - name: Optional[str], + name: str | None, force: bool, - download_kwargs: Optional[Mapping[str, Any]], + download_kwargs: Mapping[str, Any] | None, mode: Literal["r", "w", "rt", "wt"] = "rt", - open_kwargs: Optional[Mapping[str, Any]], -) -> Generator["io.TextIOWrapper[lzma.LZMAFile]", None, None]: ... + open_kwargs: Mapping[str, Any] | None, +) -> Generator[io.TextIOWrapper[lzma.LZMAFile], None, None]: ... # docstr-coverage:excused `overload` @@ -607,11 +605,11 @@ def ensure_open_lzma( key: str, *subkeys: str, url: str, - name: Optional[str], + name: str | None, force: bool, - download_kwargs: Optional[Mapping[str, Any]], + download_kwargs: Mapping[str, Any] | None, mode: Literal["rb", "wb"] = ..., - open_kwargs: Optional[Mapping[str, Any]], + open_kwargs: Mapping[str, Any] | None, ) -> Generator[lzma.LZMAFile, None, None]: ... @@ -620,12 +618,12 @@ def ensure_open_lzma( key: str, *subkeys: str, url: str, - name: Optional[str] = None, + name: str | None = None, force: bool = False, - download_kwargs: Optional[Mapping[str, Any]] = None, + download_kwargs: Mapping[str, Any] | None = None, mode: Literal["r", "rb", "w", "wb", "rt", "wt"] = "rt", - open_kwargs: Optional[Mapping[str, Any]] = None, -) -> Generator[Union[lzma.LZMAFile, "io.TextIOWrapper[lzma.LZMAFile]"], None, None]: + open_kwargs: Mapping[str, Any] | None = None, +) -> Generator[lzma.LZMAFile | io.TextIOWrapper[lzma.LZMAFile], None, None]: """Ensure a LZMA-compressed file is downloaded and open a file inside it. :param key: @@ -668,11 +666,11 @@ def ensure_open_tarfile( *subkeys: str, url: str, inner_path: str, - name: Optional[str] = None, + name: str | None = None, force: bool = False, - download_kwargs: Optional[Mapping[str, Any]] = None, + download_kwargs: Mapping[str, Any] | None = None, mode: str = "r", - open_kwargs: Optional[Mapping[str, Any]] = None, + open_kwargs: Mapping[str, Any] | None = None, ) -> BytesOpener: """Ensure a tar file is downloaded and open a file inside it. @@ -720,11 +718,11 @@ def ensure_open_gz( key: str, *subkeys: str, url: str, - name: Optional[str], + name: str | None, force: bool = False, - download_kwargs: Optional[Mapping[str, Any]], + download_kwargs: Mapping[str, Any] | None, mode: Literal["r", "w", "rt", "wt"] = ..., - open_kwargs: Optional[Mapping[str, Any]], + open_kwargs: Mapping[str, Any] | None, ) -> Generator[StringIO, None, None]: ... @@ -735,11 +733,11 @@ def ensure_open_gz( key: str, *subkeys: str, url: str, - name: Optional[str] = None, + name: str | None = None, force: bool = False, - download_kwargs: Optional[Mapping[str, Any]] = None, + download_kwargs: Mapping[str, Any] | None = None, mode: Literal["rb", "wb"] = ..., - open_kwargs: Optional[Mapping[str, Any]] = None, + open_kwargs: Mapping[str, Any] | None = None, ) -> Generator[BytesIO, None, None]: ... @@ -748,12 +746,12 @@ def ensure_open_gz( key: str, *subkeys: str, url: str, - name: Optional[str] = None, + name: str | None = None, force: bool = False, - download_kwargs: Optional[Mapping[str, Any]] = None, + download_kwargs: Mapping[str, Any] | None = None, mode: Literal["r", "rb", "w", "wb", "rt", "wt"] = "rb", - open_kwargs: Optional[Mapping[str, Any]] = None, -) -> Generator[Union[StringIO, BytesIO], None, None]: + open_kwargs: Mapping[str, Any] | None = None, +) -> Generator[StringIO | BytesIO, None, None]: """Ensure a gzipped file is downloaded and open a file inside it. :param key: @@ -795,11 +793,11 @@ def ensure_open_bz2( key: str, *subkeys: str, url: str, - name: Optional[str] = None, + name: str | None = None, force: bool = False, - download_kwargs: Optional[Mapping[str, Any]] = None, + download_kwargs: Mapping[str, Any] | None = None, mode: Literal["rb"] = "rb", - open_kwargs: Optional[Mapping[str, Any]] = None, + open_kwargs: Mapping[str, Any] | None = None, ) -> Generator[bz2.BZ2File, None, None]: """Ensure a BZ2-compressed file is downloaded and open a file inside it. @@ -841,11 +839,11 @@ def ensure_csv( key: str, *subkeys: str, url: str, - name: Optional[str] = None, + name: str | None = None, force: bool = False, - download_kwargs: Optional[Mapping[str, Any]] = None, - read_csv_kwargs: Optional[Mapping[str, Any]] = None, -) -> "pd.DataFrame": + download_kwargs: Mapping[str, Any] | None = None, + read_csv_kwargs: Mapping[str, Any] | None = None, +) -> pd.DataFrame: """Download a CSV and open as a dataframe with :mod:`pandas`. :param key: The module name @@ -877,8 +875,8 @@ def ensure_csv( >>> import pystow >>> import pandas as pd - >>> url = 'https://raw.githubusercontent.com/pykeen/pykeen/master/src/pykeen/datasets/nations/test.txt' - >>> df: pd.DataFrame = pystow.ensure_csv('pykeen', 'datasets', 'nations', url=url) + >>> url = "https://raw.githubusercontent.com/pykeen/pykeen/master/src/pykeen/datasets/nations/test.txt" + >>> df: pd.DataFrame = pystow.ensure_csv("pykeen", "datasets", "nations", url=url) """ _module = Module.from_key(key, ensure_exists=True) return _module.ensure_csv( @@ -895,8 +893,8 @@ def load_df( key: str, *subkeys: str, name: str, - read_csv_kwargs: Optional[Mapping[str, Any]] = None, -) -> "pd.DataFrame": + read_csv_kwargs: Mapping[str, Any] | None = None, +) -> pd.DataFrame: """Open a pre-existing CSV as a dataframe with :mod:`pandas`. :param key: The module name @@ -913,9 +911,9 @@ def load_df( >>> import pystow >>> import pandas as pd - >>> url = 'https://raw.githubusercontent.com/pykeen/pykeen/master/src/pykeen/datasets/nations/test.txt' - >>> pystow.ensure_csv('pykeen', 'datasets', 'nations', url=url) - >>> df: pd.DataFrame = pystow.load_df('pykeen', 'datasets', 'nations', name='test.txt') + >>> url = "https://raw.githubusercontent.com/pykeen/pykeen/master/src/pykeen/datasets/nations/test.txt" + >>> pystow.ensure_csv("pykeen", "datasets", "nations", url=url) + >>> df: pd.DataFrame = pystow.load_df("pykeen", "datasets", "nations", name="test.txt") """ _module = Module.from_key(key, ensure_exists=True) return _module.load_df( @@ -929,10 +927,10 @@ def dump_df( key: str, *subkeys: str, name: str, - obj: "pd.DataFrame", + obj: pd.DataFrame, sep: str = "\t", index: bool = False, - to_csv_kwargs: Optional[Mapping[str, Any]] = None, + to_csv_kwargs: Mapping[str, Any] | None = None, ) -> None: """Dump a dataframe to a TSV file with :mod:`pandas`. @@ -963,11 +961,11 @@ def ensure_json( key: str, *subkeys: str, url: str, - name: Optional[str] = None, + name: str | None = None, force: bool = False, - download_kwargs: Optional[Mapping[str, Any]] = None, - open_kwargs: Optional[Mapping[str, Any]] = None, - json_load_kwargs: Optional[Mapping[str, Any]] = None, + download_kwargs: Mapping[str, Any] | None = None, + open_kwargs: Mapping[str, Any] | None = None, + json_load_kwargs: Mapping[str, Any] | None = None, ) -> JSON: """Download JSON and open with :mod:`json`. @@ -991,8 +989,8 @@ def ensure_json( Example usage:: >>> import pystow - >>> url = 'https://maayanlab.cloud/CREEDS/download/single_gene_perturbations-v1.0.json' - >>> perturbations = pystow.ensure_json('bio', 'creeds', '1.0', url=url) + >>> url = "https://maayanlab.cloud/CREEDS/download/single_gene_perturbations-v1.0.json" + >>> perturbations = pystow.ensure_json("bio", "creeds", "1.0", url=url) """ _module = Module.from_key(key, ensure_exists=True) return _module.ensure_json( @@ -1010,11 +1008,11 @@ def ensure_json_bz2( key: str, *subkeys: str, url: str, - name: Optional[str] = None, + name: str | None = None, force: bool = False, - download_kwargs: Optional[Mapping[str, Any]] = None, - open_kwargs: Optional[Mapping[str, Any]] = None, - json_load_kwargs: Optional[Mapping[str, Any]] = None, + download_kwargs: Mapping[str, Any] | None = None, + open_kwargs: Mapping[str, Any] | None = None, + json_load_kwargs: Mapping[str, Any] | None = None, ) -> JSON: """Download BZ2-compressed JSON and open with :mod:`json`. @@ -1038,8 +1036,8 @@ def ensure_json_bz2( Example usage:: >>> import pystow - >>> url = 'https://github.com/hetio/hetionet/raw/master/hetnet/json/hetionet-v1.0.json.bz2' - >>> hetionet = pystow.ensure_json_bz2('bio', 'hetionet', '1.0', url=url) + >>> url = "https://github.com/hetio/hetionet/raw/master/hetnet/json/hetionet-v1.0.json.bz2" + >>> hetionet = pystow.ensure_json_bz2("bio", "hetionet", "1.0", url=url) """ _module = Module.from_key(key, ensure_exists=True) return _module.ensure_json_bz2( @@ -1057,7 +1055,7 @@ def load_json( key: str, *subkeys: str, name: str, - json_load_kwargs: Optional[Mapping[str, Any]] = None, + json_load_kwargs: Mapping[str, Any] | None = None, ) -> JSON: """Open a JSON file :mod:`json`. @@ -1078,8 +1076,8 @@ def dump_json( *subkeys: str, name: str, obj: JSON, - open_kwargs: Optional[Mapping[str, Any]] = None, - json_dump_kwargs: Optional[Mapping[str, Any]] = None, + open_kwargs: Mapping[str, Any] | None = None, + json_dump_kwargs: Mapping[str, Any] | None = None, ) -> None: """Dump an object to a file with :mod:`json`. @@ -1102,12 +1100,12 @@ def ensure_pickle( key: str, *subkeys: str, url: str, - name: Optional[str] = None, + name: str | None = None, force: bool = False, - download_kwargs: Optional[Mapping[str, Any]] = None, + download_kwargs: Mapping[str, Any] | None = None, mode: Literal["rb"] = "rb", - open_kwargs: Optional[Mapping[str, Any]] = None, - pickle_load_kwargs: Optional[Mapping[str, Any]] = None, + open_kwargs: Mapping[str, Any] | None = None, + pickle_load_kwargs: Mapping[str, Any] | None = None, ) -> Any: """Download a pickle file and open with :mod:`pickle`. @@ -1147,8 +1145,8 @@ def load_pickle( *subkeys: str, name: str, mode: Literal["rb"] = "rb", - open_kwargs: Optional[Mapping[str, Any]] = None, - pickle_load_kwargs: Optional[Mapping[str, Any]] = None, + open_kwargs: Mapping[str, Any] | None = None, + pickle_load_kwargs: Mapping[str, Any] | None = None, ) -> Any: """Open a pickle file with :mod:`pickle`. @@ -1178,8 +1176,8 @@ def dump_pickle( name: str, obj: Any, mode: Literal["wb"] = "wb", - open_kwargs: Optional[Mapping[str, Any]] = None, - pickle_dump_kwargs: Optional[Mapping[str, Any]] = None, + open_kwargs: Mapping[str, Any] | None = None, + pickle_dump_kwargs: Mapping[str, Any] | None = None, ) -> None: """Dump an object to a file with :mod:`pickle`. @@ -1208,12 +1206,12 @@ def ensure_pickle_gz( key: str, *subkeys: str, url: str, - name: Optional[str] = None, + name: str | None = None, force: bool = False, - download_kwargs: Optional[Mapping[str, Any]] = None, + download_kwargs: Mapping[str, Any] | None = None, mode: Literal["rb"] = "rb", - open_kwargs: Optional[Mapping[str, Any]] = None, - pickle_load_kwargs: Optional[Mapping[str, Any]] = None, + open_kwargs: Mapping[str, Any] | None = None, + pickle_load_kwargs: Mapping[str, Any] | None = None, ) -> Any: """Download a gzipped pickle file and open with :mod:`pickle`. @@ -1253,8 +1251,8 @@ def load_pickle_gz( *subkeys: str, name: str, mode: Literal["rb"] = "rb", - open_kwargs: Optional[Mapping[str, Any]] = None, - pickle_load_kwargs: Optional[Mapping[str, Any]] = None, + open_kwargs: Mapping[str, Any] | None = None, + pickle_load_kwargs: Mapping[str, Any] | None = None, ) -> Any: """Open a gzipped pickle file with :mod:`pickle`. @@ -1282,11 +1280,11 @@ def ensure_xml( key: str, *subkeys: str, url: str, - name: Optional[str] = None, + name: str | None = None, force: bool = False, - download_kwargs: Optional[Mapping[str, Any]] = None, - parse_kwargs: Optional[Mapping[str, Any]] = None, -) -> "lxml.etree.ElementTree": + download_kwargs: Mapping[str, Any] | None = None, + parse_kwargs: Mapping[str, Any] | None = None, +) -> lxml.etree.ElementTree: """Download an XML file and open it with :mod:`lxml`. :param key: The module name @@ -1305,7 +1303,10 @@ def ensure_xml( :param parse_kwargs: Keyword arguments to pass through to :func:`lxml.etree.parse`. :returns: An ElementTree object - .. warning:: If you have lots of files to read in the same archive, it's better just to unzip first. + .. warning:: + + If you have lots of files to read in the same archive, + it's better just to unzip first. """ _module = Module.from_key(key, ensure_exists=True) return _module.ensure_xml( @@ -1322,8 +1323,8 @@ def load_xml( key: str, *subkeys: str, name: str, - parse_kwargs: Optional[Mapping[str, Any]] = None, -) -> "lxml.etree.ElementTree": + parse_kwargs: Mapping[str, Any] | None = None, +) -> lxml.etree.ElementTree: """Load an XML file with :mod:`lxml`. :param key: The module name @@ -1331,10 +1332,14 @@ def load_xml( A sequence of additional strings to join. If none are given, returns the directory for this module. :param name: The name of the file to open - :param parse_kwargs: Keyword arguments to pass through to :func:`lxml.etree.parse`. + :param parse_kwargs: + Keyword arguments to pass through to :func:`lxml.etree.parse`. :returns: An ElementTree object - .. warning:: If you have lots of files to read in the same archive, it's better just to unzip first. + .. warning:: + + If you have lots of files to read in the same archive, + it's better just to unzip first. """ _module = Module.from_key(key, ensure_exists=True) return _module.load_xml( @@ -1348,9 +1353,9 @@ def dump_xml( key: str, *subkeys: str, name: str, - obj: "lxml.etree.ElementTree", - open_kwargs: Optional[Mapping[str, Any]] = None, - write_kwargs: Optional[Mapping[str, Any]] = None, + obj: lxml.etree.ElementTree, + open_kwargs: Mapping[str, Any] | None = None, + write_kwargs: Mapping[str, Any] | None = None, ) -> None: """Dump an XML element tree to a file with :mod:`lxml`. @@ -1377,11 +1382,11 @@ def ensure_excel( key: str, *subkeys: str, url: str, - name: Optional[str] = None, + name: str | None = None, force: bool = False, - download_kwargs: Optional[Mapping[str, Any]] = None, - read_excel_kwargs: Optional[Mapping[str, Any]] = None, -) -> "pd.DataFrame": + download_kwargs: Mapping[str, Any] | None = None, + read_excel_kwargs: Mapping[str, Any] | None = None, +) -> pd.DataFrame: """Download an excel file and open as a dataframe with :mod:`pandas`. :param key: The module name @@ -1416,11 +1421,11 @@ def ensure_tar_df( *subkeys: str, url: str, inner_path: str, - name: Optional[str] = None, + name: str | None = None, force: bool = False, - download_kwargs: Optional[Mapping[str, Any]] = None, - read_csv_kwargs: Optional[Mapping[str, Any]] = None, -) -> "pd.DataFrame": + download_kwargs: Mapping[str, Any] | None = None, + read_csv_kwargs: Mapping[str, Any] | None = None, +) -> pd.DataFrame: """Download a tar file and open an inner file as a dataframe with :mod:`pandas`. :param key: The module name @@ -1441,7 +1446,10 @@ def ensure_tar_df( :param read_csv_kwargs: Keyword arguments to pass through to :func:`pandas.read_csv`. :returns: A dataframe - .. warning:: If you have lots of files to read in the same archive, it's better just to unzip first. + .. warning:: + + If you have lots of files to read in the same archive, + it's better just to unzip first. """ _module = Module.from_key(key, ensure_exists=True) return _module.ensure_tar_df( @@ -1460,11 +1468,11 @@ def ensure_tar_xml( *subkeys: str, url: str, inner_path: str, - name: Optional[str] = None, + name: str | None = None, force: bool = False, - download_kwargs: Optional[Mapping[str, Any]] = None, - parse_kwargs: Optional[Mapping[str, Any]] = None, -) -> "lxml.etree.ElementTree": + download_kwargs: Mapping[str, Any] | None = None, + parse_kwargs: Mapping[str, Any] | None = None, +) -> lxml.etree.ElementTree: """Download a tar file and open an inner file as an XML with :mod:`lxml`. :param key: The module name @@ -1485,7 +1493,10 @@ def ensure_tar_xml( :param parse_kwargs: Keyword arguments to pass through to :func:`lxml.etree.parse`. :returns: An ElementTree object - .. warning:: If you have lots of files to read in the same archive, it's better just to unzip first. + .. warning:: + + If you have lots of files to read in the same archive, + it's better just to unzip first. """ _module = Module.from_key(key, ensure_exists=True) return _module.ensure_tar_xml( @@ -1504,11 +1515,11 @@ def ensure_zip_df( *subkeys: str, url: str, inner_path: str, - name: Optional[str] = None, + name: str | None = None, force: bool = False, - download_kwargs: Optional[Mapping[str, Any]] = None, - read_csv_kwargs: Optional[Mapping[str, Any]] = None, -) -> "pd.DataFrame": + download_kwargs: Mapping[str, Any] | None = None, + read_csv_kwargs: Mapping[str, Any] | None = None, +) -> pd.DataFrame: """Download a zip file and open an inner file as a dataframe with :mod:`pandas`. :param key: The module name @@ -1546,11 +1557,11 @@ def ensure_zip_np( *subkeys: str, url: str, inner_path: str, - name: Optional[str] = None, + name: str | None = None, force: bool = False, - download_kwargs: Optional[Mapping[str, Any]] = None, - load_kwargs: Optional[Mapping[str, Any]] = None, -) -> "numpy.typing.ArrayLike": + download_kwargs: Mapping[str, Any] | None = None, + load_kwargs: Mapping[str, Any] | None = None, +) -> numpy.typing.ArrayLike: """Download a zip file and open an inner file as an array-like with :mod:`numpy`. :param key: The module name @@ -1590,12 +1601,12 @@ def ensure_rdf( key: str, *subkeys: str, url: str, - name: Optional[str] = None, + name: str | None = None, force: bool = False, - download_kwargs: Optional[Mapping[str, Any]] = None, + download_kwargs: Mapping[str, Any] | None = None, precache: bool = True, - parse_kwargs: Optional[Mapping[str, Any]] = None, -) -> "rdflib.Graph": + parse_kwargs: Mapping[str, Any] | None = None, +) -> rdflib.Graph: """Download a RDF file and open with :mod:`rdflib`. :param key: The module name @@ -1624,13 +1635,16 @@ def ensure_rdf( >>> url = 'https://ftp.expasy.org/databases/rhea/rdf/rhea.rdf.gz' >>> rdf_graph: rdflib.Graph = pystow.ensure_rdf('rhea', url=url) - If :mod:`rdflib` fails to guess the format, you can explicitly specify it using the `parse_kwargs` argument: + If :mod:`rdflib` fails to guess the format, you can explicitly specify + it using the `parse_kwargs` argument: >>> import pystow >>> import rdflib >>> url = "http://oaei.webdatacommons.org/tdrs/testdata/persistent/knowledgegraph" \ ... "/v3/suite/memoryalpha-stexpanded/component/reference.xml" - >>> rdf_graph: rdflib.Graph = pystow.ensure_rdf("memoryalpha-stexpanded", url=url, parse_kwargs={"format": "xml"}) + >>> rdf_graph: rdflib.Graph = pystow.ensure_rdf( + ... "memoryalpha-stexpanded", url=url, parse_kwargs={"format": "xml"} + ... ) """ _module = Module.from_key(key, ensure_exists=True) return _module.ensure_rdf( @@ -1647,9 +1661,9 @@ def ensure_rdf( def load_rdf( key: str, *subkeys: str, - name: Optional[str] = None, - parse_kwargs: Optional[Mapping[str, Any]] = None, -) -> "rdflib.Graph": + name: str | None = None, + parse_kwargs: Mapping[str, Any] | None = None, +) -> rdflib.Graph: """Open an RDF file with :mod:`rdflib`. :param key: @@ -1673,9 +1687,9 @@ def dump_rdf( key: str, *subkeys: str, name: str, - obj: "rdflib.Graph", + obj: rdflib.Graph, format: str = "turtle", - serialize_kwargs: Optional[Mapping[str, Any]] = None, + serialize_kwargs: Mapping[str, Any] | None = None, ) -> None: """Dump an RDF graph to a file with :mod:`rdflib`. @@ -1700,8 +1714,8 @@ def ensure_from_s3( key: str, *subkeys: str, s3_bucket: str, - s3_key: Union[str, Sequence[str]], - name: Optional[str] = None, + s3_key: str | Sequence[str], + name: str | None = None, force: bool = False, **kwargs: Any, ) -> Path: @@ -1730,8 +1744,10 @@ def ensure_from_s3( Example downloading ProtMapper 0.0.21: - >>> version = '0.0.21' - >>> ensure_from_s3('test', version, s3_bucket='bigmech', s3_key=f'protmapper/{version}/refseq_uniprot.csv') + >>> version = "0.0.21" + >>> ensure_from_s3( + ... "test", version, s3_bucket="bigmech", s3_key=f"protmapper/{version}/refseq_uniprot.csv" + ... ) """ _module = Module.from_key(key, ensure_exists=True) return _module.ensure_from_s3( @@ -1770,7 +1786,7 @@ def ensure_from_google( Example downloading the WK3l-15k dataset as motivated by https://github.com/pykeen/pykeen/pull/403: - >>> ensure_from_google('test', name='wk3l15k.zip', file_id='1AsPPU4ka1Rc9u-XYMGWtvV65hF3egi0z') + >>> ensure_from_google("test", name="wk3l15k.zip", file_id="1AsPPU4ka1Rc9u-XYMGWtvV65hF3egi0z") """ _module = Module.from_key(key, ensure_exists=True) return _module.ensure_from_google(*subkeys, name=name, file_id=file_id, force=force) @@ -1798,9 +1814,9 @@ def ensure_open_sqlite( key: str, *subkeys: str, url: str, - name: Optional[str] = None, + name: str | None = None, force: bool = False, - download_kwargs: Optional[Mapping[str, Any]] = None, + download_kwargs: Mapping[str, Any] | None = None, ) -> Generator[sqlite3.Connection, None, None]: """Ensure and connect to a SQLite database. @@ -1842,9 +1858,9 @@ def ensure_open_sqlite_gz( key: str, *subkeys: str, url: str, - name: Optional[str] = None, + name: str | None = None, force: bool = False, - download_kwargs: Optional[Mapping[str, Any]] = None, + download_kwargs: Mapping[str, Any] | None = None, ) -> Generator[sqlite3.Connection, None, None]: """Ensure and connect to a gzipped SQLite database. diff --git a/src/pystow/cache.py b/src/pystow/cache.py index 845c847..77e50af 100644 --- a/src/pystow/cache.py +++ b/src/pystow/cache.py @@ -1,22 +1,19 @@ -# -*- coding: utf-8 -*- - """Utilities for caching files.""" +from __future__ import annotations + import functools import json import logging import pickle from abc import ABC, abstractmethod +from collections.abc import MutableMapping from pathlib import Path from typing import ( TYPE_CHECKING, Any, Callable, - Dict, Generic, - List, - MutableMapping, - Optional, TypeVar, Union, cast, @@ -39,8 +36,8 @@ logger = logging.getLogger(__name__) JSONType = Union[ - Dict[str, Any], - List[Any], + dict[str, Any], + list[Any], ] X = TypeVar("X") @@ -52,7 +49,7 @@ class Cached(Generic[X], ABC): def __init__( self, - path: Union[str, Path], + path: str | Path, force: bool = False, ) -> None: """Instantiate the decorator. @@ -134,10 +131,10 @@ def dump(self, rv: Any) -> None: pickle.dump(rv, file, protocol=pickle.HIGHEST_PROTOCOL) -class CachedCollection(Cached[List[str]]): +class CachedCollection(Cached[list[str]]): """Make a function lazily cache its return value as file.""" - def load(self) -> List[str]: + def load(self) -> list[str]: """Load data from the cache as a list of strings. :returns: A list of strings loaded from the cache @@ -145,14 +142,14 @@ def load(self) -> List[str]: with open(self.path) as file: return [line.strip() for line in file] - def dump(self, rv: List[str]) -> None: + def dump(self, rv: list[str]) -> None: """Dump data to the cache as a list of strings. :param rv: The list of strings to dump """ with open(self.path, "w") as file: for line in rv: - print(line, file=file) # noqa:T001,T201 + print(line, file=file) class CachedDataFrame(Cached["pd.DataFrame"]): @@ -160,11 +157,11 @@ class CachedDataFrame(Cached["pd.DataFrame"]): def __init__( self, - path: Union[str, Path], + path: str | Path, force: bool = False, - sep: Optional[str] = None, - dtype: Optional[Any] = None, - read_csv_kwargs: Optional[MutableMapping[str, Any]] = None, + sep: str | None = None, + dtype: Any | None = None, + read_csv_kwargs: MutableMapping[str, Any] | None = None, ) -> None: """Instantiate the decorator. @@ -189,7 +186,7 @@ def __init__( self.read_csv_kwargs["dtype"] = dtype self.read_csv_kwargs.setdefault("keep_default_na", False) - def load(self) -> "pd.DataFrame": + def load(self) -> pd.DataFrame: """Load data from the cache as a dataframe. :returns: A dataframe loaded from the cache. @@ -202,7 +199,7 @@ def load(self) -> "pd.DataFrame": **self.read_csv_kwargs, ) - def dump(self, rv: "pd.DataFrame") -> None: + def dump(self, rv: pd.DataFrame) -> None: """Dump data to the cache as a dataframe. :param rv: The dataframe to dump diff --git a/src/pystow/cli.py b/src/pystow/cli.py index 2de3bfb..ae330d3 100644 --- a/src/pystow/cli.py +++ b/src/pystow/cli.py @@ -4,6 +4,8 @@ """Command line interface for PyStow.""" +from __future__ import annotations + import os from typing import Optional, Sequence diff --git a/src/pystow/config_api.py b/src/pystow/config_api.py index 34d92b4..591d259 100644 --- a/src/pystow/config_api.py +++ b/src/pystow/config_api.py @@ -1,13 +1,13 @@ -# -*- coding: utf-8 -*- - """Configuration handling.""" +from __future__ import annotations + import os from configparser import ConfigParser from functools import lru_cache from pathlib import Path from textwrap import dedent -from typing import Any, Callable, Optional, Type, TypeVar, Union +from typing import Any, Callable, TypeVar from .utils import getenv_path @@ -127,9 +127,9 @@ def get_config( module: str, key: str, *, - passthrough: Optional[X] = None, - default: Optional[X] = None, - dtype: Optional[Type[X]] = None, + passthrough: X | None = None, + default: X | None = None, + dtype: type[X] | None = None, raise_on_missing: bool = False, ) -> Any: """Get a configuration value. @@ -159,7 +159,7 @@ def get_config( return _cast(rv, dtype) -def _cast(rv: Any, dtype: Union[None, Callable[..., Any]]) -> Any: +def _cast(rv: Any, dtype: None | Callable[..., Any]) -> Any: if not isinstance(rv, str): # if it's not a string, it doesn't need munging return rv if dtype in (None, str): # no munging necessary diff --git a/src/pystow/constants.py b/src/pystow/constants.py index 408a77c..79c3fe9 100644 --- a/src/pystow/constants.py +++ b/src/pystow/constants.py @@ -1,10 +1,11 @@ -# -*- coding: utf-8 -*- - """PyStow constants.""" +from __future__ import annotations + +from collections.abc import Generator from io import StringIO from textwrap import dedent -from typing import IO, Any, Callable, Generator +from typing import IO, Any, Callable __all__ = [ "PYSTOW_NAME_ENVVAR", @@ -30,11 +31,12 @@ ### ⚙️️ Configuration -By default, data is stored in the `$HOME/.data` directory. By default, the `` app will create the -`$HOME/.data/` folder. +By default, data is stored in the `$HOME/.data` directory. By default, the `` +app will create the `$HOME/.data/` folder. -If you want to use an alternate folder name to `.data` inside the home directory, you can set the `PYSTOW_NAME` -environment variable. For example, if you set `PYSTOW_NAME=mydata`, then the following code for the `pykeen` app will +If you want to use an alternate folder name to `.data` inside the home directory, +you can set the `PYSTOW_NAME` environment variable. For example, if you set +`PYSTOW_NAME=mydata`, then the following code for the `pykeen` app will create the `$HOME/mydata/pykeen/` directory: ```python @@ -49,8 +51,9 @@ pykeen_directory = pystow.join('pykeen') ``` -If you want to specify a completely custom directory that isn't relative to your home directory, you can set -the `PYSTOW_HOME` environment variable. For example, if you set `PYSTOW_HOME=/usr/local/`, then the following code for +If you want to specify a completely custom directory that isn't relative to +your home directory, you can set the `PYSTOW_HOME` environment variable. For +example, if you set `PYSTOW_HOME=/usr/local/`, then the following code for the `pykeen` app will create the `/usr/local/pykeen/` directory: ```python diff --git a/src/pystow/impl.py b/src/pystow/impl.py index 3f2c612..82531f5 100644 --- a/src/pystow/impl.py +++ b/src/pystow/impl.py @@ -1,7 +1,7 @@ -# -*- coding: utf-8 -*- - """Module implementation.""" +from __future__ import annotations + import bz2 import gzip import io @@ -13,6 +13,7 @@ import sqlite3 import tarfile import zipfile +from collections.abc import Generator, Mapping, Sequence from contextlib import closing, contextmanager from io import BytesIO, StringIO from pathlib import Path @@ -20,12 +21,8 @@ TYPE_CHECKING, Any, Callable, - Dict, - Generator, Literal, - Mapping, Optional, - Sequence, Union, cast, overload, @@ -74,7 +71,7 @@ class Module: """The class wrapping the directory lookup implementation.""" - def __init__(self, base: Union[str, Path], ensure_exists: bool = True) -> None: + def __init__(self, base: str | Path, ensure_exists: bool = True) -> None: """Initialize the module. :param base: @@ -87,7 +84,7 @@ def __init__(self, base: Union[str, Path], ensure_exists: bool = True) -> None: mkdir(self.base, ensure_exists=ensure_exists) @classmethod - def from_key(cls, key: str, *subkeys: str, ensure_exists: bool = True) -> "Module": + def from_key(cls, key: str, *subkeys: str, ensure_exists: bool = True) -> Module: """Get a module for the given directory or one of its subdirectories. :param key: @@ -109,7 +106,7 @@ def from_key(cls, key: str, *subkeys: str, ensure_exists: bool = True) -> "Modul rv = rv.module(*subkeys, ensure_exists=ensure_exists) return rv - def module(self, *subkeys: str, ensure_exists: bool = True) -> "Module": + def module(self, *subkeys: str, ensure_exists: bool = True) -> Module: """Get a module for a subdirectory of the current module. :param subkeys: @@ -127,7 +124,7 @@ def module(self, *subkeys: str, ensure_exists: bool = True) -> "Module": def join( self, *subkeys: str, - name: Optional[str] = None, + name: str | None = None, ensure_exists: bool = True, version: VersionHint = None, ) -> Path: @@ -208,10 +205,10 @@ def ensure( self, *subkeys: str, url: str, - name: Optional[str] = None, + name: str | None = None, version: VersionHint = None, force: bool = False, - download_kwargs: Optional[Mapping[str, Any]] = None, + download_kwargs: Mapping[str, Any] | None = None, ) -> Path: """Ensure a file is downloaded. @@ -305,11 +302,11 @@ def ensure_untar( self, *subkeys: str, url: str, - name: Optional[str] = None, - directory: Optional[str] = None, + name: str | None = None, + directory: str | None = None, force: bool = False, - download_kwargs: Optional[Mapping[str, Any]] = None, - extract_kwargs: Optional[Mapping[str, Any]] = None, + download_kwargs: Mapping[str, Any] | None = None, + extract_kwargs: Mapping[str, Any] | None = None, ) -> Path: """Ensure a tar file is downloaded and unarchived. @@ -352,10 +349,10 @@ def ensure_gunzip( self, *subkeys: str, url: str, - name: Optional[str] = None, + name: str | None = None, force: bool = False, autoclean: bool = True, - download_kwargs: Optional[Mapping[str, Any]] = None, + download_kwargs: Mapping[str, Any] | None = None, ) -> Path: """Ensure a tar.gz file is downloaded and unarchived. @@ -402,11 +399,11 @@ def ensure_open( self, *subkeys: str, url: str, - name: Optional[str], + name: str | None, force: bool, - download_kwargs: Optional[Mapping[str, Any]], + download_kwargs: Mapping[str, Any] | None, mode: Literal["r", "rt", "w", "wt"] = ..., - open_kwargs: Optional[Mapping[str, Any]], + open_kwargs: Mapping[str, Any] | None, ) -> Generator[StringIO, None, None]: ... # docstr-coverage:excused `overload` @@ -416,11 +413,11 @@ def ensure_open( self, *subkeys: str, url: str, - name: Optional[str], + name: str | None, force: bool, - download_kwargs: Optional[Mapping[str, Any]], + download_kwargs: Mapping[str, Any] | None, mode: Literal["rb", "wb"] = ..., - open_kwargs: Optional[Mapping[str, Any]], + open_kwargs: Mapping[str, Any] | None, ) -> Generator[BytesIO, None, None]: ... @contextmanager @@ -428,12 +425,12 @@ def ensure_open( self, *subkeys: str, url: str, - name: Optional[str] = None, + name: str | None = None, force: bool = False, - download_kwargs: Optional[Mapping[str, Any]] = None, - mode: Union[Literal["r", "rt", "w", "wt"], Literal["rb", "wb"]] = "r", - open_kwargs: Optional[Mapping[str, Any]] = None, - ) -> Generator[Union[StringIO, BytesIO], None, None]: + download_kwargs: Mapping[str, Any] | None = None, + mode: Literal["r", "rt", "w", "wt"] | Literal["rb", "wb"] = "r", + open_kwargs: Mapping[str, Any] | None = None, + ) -> Generator[StringIO | BytesIO, None, None]: """Ensure a file is downloaded and open it. :param subkeys: @@ -472,8 +469,8 @@ def _raise_for_mode_ensure_mismatch( :raises ValueError: In the following situations: 1. If the file should be opened in write mode, and it is not ensured to exist - 2. If the file should be opened in read mode, and it is ensured to exist. This is bad because - it will create a file when there previously wasn't one + 2. If the file should be opened in read mode, and it is ensured to exist. + This is bad because it will create a file when there previously wasn't one """ if "w" in mode and not ensure_exists: raise ValueError @@ -488,7 +485,7 @@ def open( *subkeys: str, name: str, mode: Literal["r", "rt", "w", "wt"] = ..., - open_kwargs: Optional[Mapping[str, Any]] = None, + open_kwargs: Mapping[str, Any] | None = None, ensure_exists: bool, ) -> Generator[StringIO, None, None]: ... @@ -500,7 +497,7 @@ def open( *subkeys: str, name: str, mode: Literal["rb", "wb"] = ..., - open_kwargs: Optional[Mapping[str, Any]] = None, + open_kwargs: Mapping[str, Any] | None = None, ensure_exists: bool, ) -> Generator[BytesIO, None, None]: ... @@ -509,10 +506,10 @@ def open( self, *subkeys: str, name: str, - mode: Union[Literal["r", "rt", "w", "wt"], Literal["rb", "wb"]] = "r", - open_kwargs: Optional[Mapping[str, Any]] = None, + mode: Literal["r", "rt", "w", "wt"] | Literal["rb", "wb"] = "r", + open_kwargs: Mapping[str, Any] | None = None, ensure_exists: bool = False, - ) -> Generator[Union[StringIO, BytesIO], None, None]: + ) -> Generator[StringIO | BytesIO, None, None]: """Open a file. :param subkeys: @@ -521,7 +518,8 @@ def open( :param name: The name of the file to open :param mode: The read mode, passed to :func:`open` :param open_kwargs: Additional keyword arguments passed to :func:`open` - :param ensure_exists: Should the directory the file is in be made? Set to true on write operations. + :param ensure_exists: + Should the directory the file is in be made? Set to true on write operations. :yields: An open file object. @@ -550,7 +548,7 @@ def open_gz( *subkeys: str, name: str, mode: Literal["r", "w", "rt", "wt"] = ..., - open_kwargs: Optional[Mapping[str, Any]], + open_kwargs: Mapping[str, Any] | None, ensure_exists: bool, ) -> Generator[StringIO, None, None]: ... @@ -562,7 +560,7 @@ def open_gz( *subkeys: str, name: str, mode: Literal["rb", "wb"] = ..., - open_kwargs: Optional[Mapping[str, Any]], + open_kwargs: Mapping[str, Any] | None, ensure_exists: bool, ) -> Generator[BytesIO, None, None]: ... @@ -572,9 +570,9 @@ def open_gz( *subkeys: str, name: str, mode: Literal["r", "w", "rt", "wt", "rb", "wb"] = "rb", - open_kwargs: Optional[Mapping[str, Any]] = None, + open_kwargs: Mapping[str, Any] | None = None, ensure_exists: bool = False, - ) -> Generator[Union[StringIO, BytesIO], None, None]: + ) -> Generator[StringIO | BytesIO, None, None]: """Open a gzipped file that exists already. :param subkeys: @@ -601,12 +599,12 @@ def ensure_open_lzma( self, *subkeys: str, url: str, - name: Optional[str], + name: str | None, force: bool, - download_kwargs: Optional[Mapping[str, Any]], + download_kwargs: Mapping[str, Any] | None, mode: Literal["r", "w", "rt", "wt"] = "rt", - open_kwargs: Optional[Mapping[str, Any]], - ) -> Generator["io.TextIOWrapper[lzma.LZMAFile]", None, None]: ... + open_kwargs: Mapping[str, Any] | None, + ) -> Generator[io.TextIOWrapper[lzma.LZMAFile], None, None]: ... # docstr-coverage:excused `overload` @overload @@ -615,11 +613,11 @@ def ensure_open_lzma( self, *subkeys: str, url: str, - name: Optional[str], + name: str | None, force: bool, - download_kwargs: Optional[Mapping[str, Any]], + download_kwargs: Mapping[str, Any] | None, mode: Literal["rb", "wb"] = ..., - open_kwargs: Optional[Mapping[str, Any]], + open_kwargs: Mapping[str, Any] | None, ) -> Generator[lzma.LZMAFile, None, None]: ... @contextmanager @@ -627,12 +625,12 @@ def ensure_open_lzma( self, *subkeys: str, url: str, - name: Optional[str] = None, + name: str | None = None, force: bool = False, - download_kwargs: Optional[Mapping[str, Any]] = None, + download_kwargs: Mapping[str, Any] | None = None, mode: Literal["r", "rb", "w", "wb", "rt", "wt"] = "rt", - open_kwargs: Optional[Mapping[str, Any]] = None, - ) -> Generator[Union[lzma.LZMAFile, "io.TextIOWrapper[lzma.LZMAFile]"], None, None]: + open_kwargs: Mapping[str, Any] | None = None, + ) -> Generator[lzma.LZMAFile | io.TextIOWrapper[lzma.LZMAFile], None, None]: """Ensure a LZMA-compressed file is downloaded and open a file inside it. :param subkeys: @@ -666,11 +664,11 @@ def ensure_open_tarfile( *subkeys: str, url: str, inner_path: str, - name: Optional[str] = None, + name: str | None = None, force: bool = False, - download_kwargs: Optional[Mapping[str, Any]] = None, + download_kwargs: Mapping[str, Any] | None = None, mode: str = "r", - open_kwargs: Optional[Mapping[str, Any]] = None, + open_kwargs: Mapping[str, Any] | None = None, ) -> BytesOpener: """Ensure a tar file is downloaded and open a file inside it. @@ -708,11 +706,11 @@ def ensure_open_zip( *subkeys: str, url: str, inner_path: str, - name: Optional[str] = None, + name: str | None = None, force: bool = False, - download_kwargs: Optional[Mapping[str, Any]] = None, + download_kwargs: Mapping[str, Any] | None = None, mode: str = "r", - open_kwargs: Optional[Mapping[str, Any]] = None, + open_kwargs: Mapping[str, Any] | None = None, ) -> BytesOpener: """Ensure a file is downloaded then open it with :mod:`zipfile`. @@ -751,11 +749,11 @@ def ensure_open_gz( self, *subkeys: str, url: str, - name: Optional[str], + name: str | None, force: bool, - download_kwargs: Optional[Mapping[str, Any]], + download_kwargs: Mapping[str, Any] | None, mode: Literal["r", "w", "rt", "wt"] = ..., - open_kwargs: Optional[Mapping[str, Any]], + open_kwargs: Mapping[str, Any] | None, ) -> Generator[StringIO, None, None]: ... # docstr-coverage:excused `overload` @@ -765,14 +763,14 @@ def ensure_open_gz( self, *subkeys: str, url: str, - name: Optional[str], + name: str | None, force: bool, - download_kwargs: Optional[Mapping[str, Any]], + download_kwargs: Mapping[str, Any] | None, mode: Literal[ "rb", "wb", ] = ..., - open_kwargs: Optional[Mapping[str, Any]], + open_kwargs: Mapping[str, Any] | None, ) -> Generator[BytesIO, None, None]: ... @contextmanager @@ -780,12 +778,12 @@ def ensure_open_gz( self, *subkeys: str, url: str, - name: Optional[str] = None, + name: str | None = None, force: bool = False, - download_kwargs: Optional[Mapping[str, Any]] = None, + download_kwargs: Mapping[str, Any] | None = None, mode: Literal["r", "rb", "w", "wb", "rt", "wt"] = "rb", - open_kwargs: Optional[Mapping[str, Any]] = None, - ) -> Generator[Union[StringIO, BytesIO], None, None]: + open_kwargs: Mapping[str, Any] | None = None, + ) -> Generator[StringIO | BytesIO, None, None]: """Ensure a gzipped file is downloaded and open a file inside it. :param subkeys: @@ -818,11 +816,11 @@ def ensure_open_bz2( self, *subkeys: str, url: str, - name: Optional[str] = None, + name: str | None = None, force: bool = False, - download_kwargs: Optional[Mapping[str, Any]] = None, + download_kwargs: Mapping[str, Any] | None = None, mode: Literal["rb"] = "rb", - open_kwargs: Optional[Mapping[str, Any]] = None, + open_kwargs: Mapping[str, Any] | None = None, ) -> Generator[bz2.BZ2File, None, None]: """Ensure a BZ2-compressed file is downloaded and open a file inside it. @@ -855,11 +853,11 @@ def ensure_csv( self, *subkeys: str, url: str, - name: Optional[str] = None, + name: str | None = None, force: bool = False, - download_kwargs: Optional[Mapping[str, Any]] = None, - read_csv_kwargs: Optional[Mapping[str, Any]] = None, - ) -> "pd.DataFrame": + download_kwargs: Mapping[str, Any] | None = None, + read_csv_kwargs: Mapping[str, Any] | None = None, + ) -> pd.DataFrame: """Download a CSV and open as a dataframe with :mod:`pandas`. :param subkeys: @@ -888,8 +886,8 @@ def load_df( self, *subkeys: str, name: str, - read_csv_kwargs: Optional[Mapping[str, Any]] = None, - ) -> "pd.DataFrame": + read_csv_kwargs: Mapping[str, Any] | None = None, + ) -> pd.DataFrame: """Open a pre-existing CSV as a dataframe with :mod:`pandas`. :param subkeys: @@ -910,10 +908,10 @@ def dump_df( self, *subkeys: str, name: str, - obj: "pd.DataFrame", + obj: pd.DataFrame, sep: str = "\t", index: bool = False, - to_csv_kwargs: Optional[Mapping[str, Any]] = None, + to_csv_kwargs: Mapping[str, Any] | None = None, ) -> None: """Dump a dataframe to a TSV file with :mod:`pandas`. @@ -939,11 +937,11 @@ def ensure_json( self, *subkeys: str, url: str, - name: Optional[str] = None, + name: str | None = None, force: bool = False, - download_kwargs: Optional[Mapping[str, Any]] = None, - open_kwargs: Optional[Mapping[str, Any]] = None, - json_load_kwargs: Optional[Mapping[str, Any]] = None, + download_kwargs: Mapping[str, Any] | None = None, + open_kwargs: Mapping[str, Any] | None = None, + json_load_kwargs: Mapping[str, Any] | None = None, ) -> JSON: """Download JSON and open with :mod:`json`. @@ -977,11 +975,11 @@ def ensure_json_bz2( self, *subkeys: str, url: str, - name: Optional[str] = None, + name: str | None = None, force: bool = False, - download_kwargs: Optional[Mapping[str, Any]] = None, - open_kwargs: Optional[Mapping[str, Any]] = None, - json_load_kwargs: Optional[Mapping[str, Any]] = None, + download_kwargs: Mapping[str, Any] | None = None, + open_kwargs: Mapping[str, Any] | None = None, + json_load_kwargs: Mapping[str, Any] | None = None, ) -> JSON: """Download BZ2-compressed JSON and open with :mod:`json`. @@ -1015,8 +1013,8 @@ def load_json( self, *subkeys: str, name: str, - open_kwargs: Optional[Mapping[str, Any]] = None, - json_load_kwargs: Optional[Mapping[str, Any]] = None, + open_kwargs: Mapping[str, Any] | None = None, + json_load_kwargs: Mapping[str, Any] | None = None, ) -> JSON: """Open a JSON file :mod:`json`. @@ -1038,8 +1036,8 @@ def dump_json( *subkeys: str, name: str, obj: JSON, - open_kwargs: Optional[Mapping[str, Any]] = None, - json_dump_kwargs: Optional[Mapping[str, Any]] = None, + open_kwargs: Mapping[str, Any] | None = None, + json_dump_kwargs: Mapping[str, Any] | None = None, ) -> None: """Dump an object to a file with :mod:`json`. @@ -1060,12 +1058,12 @@ def ensure_pickle( self, *subkeys: str, url: str, - name: Optional[str] = None, + name: str | None = None, force: bool = False, - download_kwargs: Optional[Mapping[str, Any]] = None, + download_kwargs: Mapping[str, Any] | None = None, mode: Literal["rb"] = "rb", - open_kwargs: Optional[Mapping[str, Any]] = None, - pickle_load_kwargs: Optional[Mapping[str, Any]] = None, + open_kwargs: Mapping[str, Any] | None = None, + pickle_load_kwargs: Mapping[str, Any] | None = None, ) -> Any: """Download a pickle file and open with :mod:`pickle`. @@ -1102,8 +1100,8 @@ def load_pickle( *subkeys: str, name: str, mode: Literal["rb"] = "rb", - open_kwargs: Optional[Mapping[str, Any]] = None, - pickle_load_kwargs: Optional[Mapping[str, Any]] = None, + open_kwargs: Mapping[str, Any] | None = None, + pickle_load_kwargs: Mapping[str, Any] | None = None, ) -> Any: """Open a pickle file with :mod:`pickle`. @@ -1131,8 +1129,8 @@ def dump_pickle( name: str, obj: Any, mode: Literal["wb"] = "wb", - open_kwargs: Optional[Mapping[str, Any]] = None, - pickle_dump_kwargs: Optional[Mapping[str, Any]] = None, + open_kwargs: Mapping[str, Any] | None = None, + pickle_dump_kwargs: Mapping[str, Any] | None = None, ) -> None: """Dump an object to a file with :mod:`pickle`. @@ -1158,12 +1156,12 @@ def ensure_pickle_gz( self, *subkeys: str, url: str, - name: Optional[str] = None, + name: str | None = None, force: bool = False, - download_kwargs: Optional[Mapping[str, Any]] = None, + download_kwargs: Mapping[str, Any] | None = None, mode: Literal["rb"] = "rb", - open_kwargs: Optional[Mapping[str, Any]] = None, - pickle_load_kwargs: Optional[Mapping[str, Any]] = None, + open_kwargs: Mapping[str, Any] | None = None, + pickle_load_kwargs: Mapping[str, Any] | None = None, ) -> Any: """Download a gzipped pickle file and open with :mod:`pickle`. @@ -1200,8 +1198,8 @@ def load_pickle_gz( *subkeys: str, name: str, mode: Literal["rb"] = "rb", - open_kwargs: Optional[Mapping[str, Any]] = None, - pickle_load_kwargs: Optional[Mapping[str, Any]] = None, + open_kwargs: Mapping[str, Any] | None = None, + pickle_load_kwargs: Mapping[str, Any] | None = None, ) -> Any: """Open a gzipped pickle file with :mod:`pickle`. @@ -1227,11 +1225,11 @@ def ensure_excel( self, *subkeys: str, url: str, - name: Optional[str] = None, + name: str | None = None, force: bool = False, - download_kwargs: Optional[Mapping[str, Any]] = None, - read_excel_kwargs: Optional[Mapping[str, Any]] = None, - ) -> "pd.DataFrame": + download_kwargs: Mapping[str, Any] | None = None, + read_excel_kwargs: Mapping[str, Any] | None = None, + ) -> pd.DataFrame: """Download an excel file and open as a dataframe with :mod:`pandas`. :param subkeys: @@ -1261,11 +1259,11 @@ def ensure_tar_df( *subkeys: str, url: str, inner_path: str, - name: Optional[str] = None, + name: str | None = None, force: bool = False, - download_kwargs: Optional[Mapping[str, Any]] = None, - read_csv_kwargs: Optional[Mapping[str, Any]] = None, - ) -> "pd.DataFrame": + download_kwargs: Mapping[str, Any] | None = None, + read_csv_kwargs: Mapping[str, Any] | None = None, + ) -> pd.DataFrame: """Download a tar file and open an inner file as a dataframe with :mod:`pandas`. :param subkeys: @@ -1285,7 +1283,10 @@ def ensure_tar_df( :param read_csv_kwargs: Keyword arguments to pass through to :func:`pandas.read_csv`. :returns: A dataframe - .. warning:: If you have lots of files to read in the same archive, it's better just to unzip first. + .. warning:: + + If you have lots of files to read in the same archive, + it's better just to unzip first. """ path = self.ensure( *subkeys, url=url, name=name, force=force, download_kwargs=download_kwargs @@ -1298,11 +1299,11 @@ def ensure_xml( self, *subkeys: str, url: str, - name: Optional[str] = None, + name: str | None = None, force: bool = False, - download_kwargs: Optional[Mapping[str, Any]] = None, - parse_kwargs: Optional[Mapping[str, Any]] = None, - ) -> "lxml.etree.ElementTree": + download_kwargs: Mapping[str, Any] | None = None, + parse_kwargs: Mapping[str, Any] | None = None, + ) -> lxml.etree.ElementTree: """Download an XML file and open it with :mod:`lxml`. :param subkeys: @@ -1320,7 +1321,10 @@ def ensure_xml( :param parse_kwargs: Keyword arguments to pass through to :func:`lxml.etree.parse`. :returns: An ElementTree object - .. warning:: If you have lots of files to read in the same archive, it's better just to unzip first. + .. warning:: + + If you have lots of files to read in the same archive, + it's better just to unzip first. """ from lxml import etree @@ -1333,8 +1337,8 @@ def load_xml( self, *subkeys: str, name: str, - parse_kwargs: Optional[Mapping[str, Any]] = None, - ) -> "lxml.etree.ElementTree": + parse_kwargs: Mapping[str, Any] | None = None, + ) -> lxml.etree.ElementTree: """Load an XML file with :mod:`lxml`. :param subkeys: @@ -1344,7 +1348,10 @@ def load_xml( :param parse_kwargs: Keyword arguments to pass through to :func:`lxml.etree.parse`. :returns: An ElementTree object - .. warning:: If you have lots of files to read in the same archive, it's better just to unzip first. + .. warning:: + + If you have lots of files to read in the same archive, + it's better just to unzip first. """ from lxml import etree @@ -1355,9 +1362,9 @@ def dump_xml( self, *subkeys: str, name: str, - obj: "lxml.etree.ElementTree", - open_kwargs: Optional[Mapping[str, Any]] = None, - write_kwargs: Optional[Mapping[str, Any]] = None, + obj: lxml.etree.ElementTree, + open_kwargs: Mapping[str, Any] | None = None, + write_kwargs: Mapping[str, Any] | None = None, ) -> None: """Dump an XML element tree to a file with :mod:`lxml`. @@ -1367,7 +1374,9 @@ def dump_xml( :param name: The name of the file to open :param obj: The object to dump :param open_kwargs: Additional keyword arguments passed to :func:`open` - :param write_kwargs: Keyword arguments to pass through to :func:`lxml.etree.ElementTree.write`. + :param write_kwargs: + Keyword arguments to pass through to + :func:`lxml.etree.ElementTree.write`. """ with self.open( *subkeys, name=name, mode="wb", open_kwargs=open_kwargs, ensure_exists=True @@ -1379,11 +1388,11 @@ def ensure_tar_xml( *subkeys: str, url: str, inner_path: str, - name: Optional[str] = None, + name: str | None = None, force: bool = False, - download_kwargs: Optional[Mapping[str, Any]] = None, - parse_kwargs: Optional[Mapping[str, Any]] = None, - ) -> "lxml.etree.ElementTree": + download_kwargs: Mapping[str, Any] | None = None, + parse_kwargs: Mapping[str, Any] | None = None, + ) -> lxml.etree.ElementTree: """Download a tar file and open an inner file as an XML with :mod:`lxml`. :param subkeys: @@ -1403,7 +1412,10 @@ def ensure_tar_xml( :param parse_kwargs: Keyword arguments to pass through to :func:`lxml.etree.parse`. :returns: An ElementTree object - .. warning:: If you have lots of files to read in the same archive, it's better just to unzip first. + .. warning:: + + If you have lots of files to read in the same archive, + it's better just to unzip first. """ path = self.ensure( *subkeys, url=url, name=name, force=force, download_kwargs=download_kwargs @@ -1415,11 +1427,11 @@ def ensure_zip_df( *subkeys: str, url: str, inner_path: str, - name: Optional[str] = None, + name: str | None = None, force: bool = False, - download_kwargs: Optional[Mapping[str, Any]] = None, - read_csv_kwargs: Optional[Mapping[str, Any]] = None, - ) -> "pd.DataFrame": + download_kwargs: Mapping[str, Any] | None = None, + read_csv_kwargs: Mapping[str, Any] | None = None, + ) -> pd.DataFrame: """Download a zip file and open an inner file as a dataframe with :mod:`pandas`. :param subkeys: @@ -1451,11 +1463,11 @@ def ensure_zip_np( *subkeys: str, url: str, inner_path: str, - name: Optional[str] = None, + name: str | None = None, force: bool = False, - download_kwargs: Optional[Mapping[str, Any]] = None, - load_kwargs: Optional[Mapping[str, Any]] = None, - ) -> "numpy.typing.ArrayLike": + download_kwargs: Mapping[str, Any] | None = None, + load_kwargs: Mapping[str, Any] | None = None, + ) -> numpy.typing.ArrayLike: """Download a zip file and open an inner file as an array-like with :mod:`numpy`. :param subkeys: @@ -1487,12 +1499,12 @@ def ensure_rdf( self, *subkeys: str, url: str, - name: Optional[str] = None, + name: str | None = None, force: bool = False, - download_kwargs: Optional[Mapping[str, Any]] = None, + download_kwargs: Mapping[str, Any] | None = None, precache: bool = True, - parse_kwargs: Optional[Mapping[str, Any]] = None, - ) -> "rdflib.Graph": + parse_kwargs: Mapping[str, Any] | None = None, + ) -> rdflib.Graph: """Download a RDF file and open with :mod:`rdflib`. :param subkeys: @@ -1507,7 +1519,8 @@ def ensure_rdf( Should the download be done again, even if the path already exists? Defaults to false. :param download_kwargs: Keyword arguments to pass through to :func:`pystow.utils.download`. - :param precache: Should the parsed :class:`rdflib.Graph` be stored as a pickle for fast loading? + :param precache: + Should the parsed :class:`rdflib.Graph` be stored as a pickle for fast loading? :param parse_kwargs: Keyword arguments to pass through to :func:`pystow.utils.read_rdf` and transitively to :func:`rdflib.Graph.parse`. @@ -1532,9 +1545,9 @@ def ensure_rdf( def load_rdf( self, *subkeys: str, - name: Optional[str] = None, - parse_kwargs: Optional[Mapping[str, Any]] = None, - ) -> "rdflib.Graph": + name: str | None = None, + parse_kwargs: Mapping[str, Any] | None = None, + ) -> rdflib.Graph: """Open an RDF file with :mod:`rdflib`. :param subkeys: @@ -1553,9 +1566,9 @@ def dump_rdf( self, *subkeys: str, name: str, - obj: "rdflib.Graph", + obj: rdflib.Graph, format: str = "turtle", - serialize_kwargs: Optional[Mapping[str, Any]] = None, + serialize_kwargs: Mapping[str, Any] | None = None, ) -> None: """Dump an RDF graph to a file with :mod:`rdflib`. @@ -1577,11 +1590,11 @@ def ensure_from_s3( self, *subkeys: str, s3_bucket: str, - s3_key: Union[str, Sequence[str]], - name: Optional[str] = None, - client: Optional["botocore.client.BaseClient"] = None, - client_kwargs: Optional[Mapping[str, Any]] = None, - download_file_kwargs: Optional[Mapping[str, Any]] = None, + s3_key: str | Sequence[str], + name: str | None = None, + client: botocore.client.BaseClient | None = None, + client_kwargs: Mapping[str, Any] | None = None, + download_file_kwargs: Mapping[str, Any] | None = None, force: bool = False, ) -> Path: """Ensure a file is downloaded. @@ -1629,7 +1642,7 @@ def ensure_from_google( name: str, file_id: str, force: bool = False, - download_kwargs: Optional[Mapping[str, Any]] = None, + download_kwargs: Mapping[str, Any] | None = None, ) -> Path: """Ensure a file is downloaded from Google Drive. @@ -1639,9 +1652,9 @@ def ensure_from_google( :param name: The name of the file :param file_id: - The file identifier of the google file. If your share link is - https://drive.google.com/file/d/1AsPPU4ka1Rc9u-XYMGWtvV65hF3egi0z/view, then your file id is - ``1AsPPU4ka1Rc9u-XYMGWtvV65hF3egi0z``. + The file identifier of the Google file. If your share link is + https://drive.google.com/file/d/1AsPPU4ka1Rc9u-XYMGWtvV65hF3egi0z/view, + then your file ID is ``1AsPPU4ka1Rc9u-XYMGWtvV65hF3egi0z``. :param force: Should the download be done again, even if the path already exists? Defaults to false. @@ -1659,9 +1672,9 @@ def ensure_open_sqlite( self, *subkeys: str, url: str, - name: Optional[str] = None, + name: str | None = None, force: bool = False, - download_kwargs: Optional[Mapping[str, Any]] = None, + download_kwargs: Mapping[str, Any] | None = None, ) -> Generator[sqlite3.Connection, None, None]: """Ensure and connect to a SQLite database. @@ -1699,9 +1712,9 @@ def ensure_open_sqlite_gz( self, *subkeys: str, url: str, - name: Optional[str] = None, + name: str | None = None, force: bool = False, - download_kwargs: Optional[Mapping[str, Any]] = None, + download_kwargs: Mapping[str, Any] | None = None, ) -> Generator[sqlite3.Connection, None, None]: """Ensure and connect to a SQLite database that's gzipped. @@ -1738,7 +1751,7 @@ def ensure_open_sqlite_gz( yield conn -def _clean_csv_kwargs(read_csv_kwargs: Union[None, Mapping[str, Any]]) -> Dict[str, Any]: +def _clean_csv_kwargs(read_csv_kwargs: None | Mapping[str, Any]) -> dict[str, Any]: read_csv_kwargs = {} if read_csv_kwargs is None else dict(read_csv_kwargs) read_csv_kwargs.setdefault("sep", "\t") return read_csv_kwargs diff --git a/src/pystow/utils.py b/src/pystow/utils.py index 9e16175..284b9f6 100644 --- a/src/pystow/utils.py +++ b/src/pystow/utils.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Utilities.""" from __future__ import annotations @@ -16,21 +14,16 @@ import tempfile import urllib.error import zipfile +from collections.abc import Collection, Iterable, Iterator, Mapping from functools import partial from io import BytesIO, StringIO from pathlib import Path, PurePosixPath -from subprocess import check_output # noqa: S404 +from subprocess import check_output from typing import ( TYPE_CHECKING, Any, - Collection, - Iterable, - Iterator, Literal, - Mapping, NamedTuple, - Optional, - Union, cast, ) from urllib.parse import urlparse @@ -64,6 +57,7 @@ # Exceptions "HexDigestError", "UnexpectedDirectory", + "UnexpectedDirectoryError", # Functions "get_offending_hexdigests", "get_hashes", @@ -112,7 +106,6 @@ logger = logging.getLogger(__name__) - #: Represents an available backend for downloading DownloadBackend: TypeAlias = Literal["urllib", "requests"] @@ -143,7 +136,7 @@ def __init__(self, offending_hexdigests: Collection[HexDigestMismatch]): """ self.offending_hexdigests = offending_hexdigests - def __str__(self) -> str: # noqa:D105 + def __str__(self) -> str: return "\n".join( ( "Hexdigest of downloaded file does not match the expected ones!", @@ -155,7 +148,7 @@ def __str__(self) -> str: # noqa:D105 ) -class UnexpectedDirectory(FileExistsError): +class UnexpectedDirectoryError(FileExistsError): """Thrown if a directory path is given where file path should have been.""" def __init__(self, path: Path): @@ -165,12 +158,16 @@ def __init__(self, path: Path): """ self.path = path - def __str__(self) -> str: # noqa:D105 + def __str__(self) -> str: return f"got directory instead of file: {self.path}" +#: Backwards compatible name +UnexpectedDirectory = UnexpectedDirectoryError + + def get_hexdigests_remote( - hexdigests_remote: Optional[Mapping[str, str]], hexdigests_strict: bool = False + hexdigests_remote: Mapping[str, str] | None, hexdigests_strict: bool = False ) -> Mapping[str, str]: """Process hexdigests via URLs. @@ -191,10 +188,10 @@ def get_hexdigests_remote( def get_offending_hexdigests( - path: Union[str, Path], - chunk_size: Optional[int] = None, - hexdigests: Optional[Mapping[str, str]] = None, - hexdigests_remote: Optional[Mapping[str, str]] = None, + path: str | Path, + chunk_size: int | None = None, + hexdigests: Mapping[str, str] | None = None, + hexdigests_remote: Mapping[str, str] | None = None, hexdigests_strict: bool = False, ) -> Collection[HexDigestMismatch]: """ @@ -243,10 +240,10 @@ def get_offending_hexdigests( def get_hashes( - path: Union[str, Path], + path: str | Path, names: Iterable[str], *, - chunk_size: Optional[int] = None, + chunk_size: int | None = None, ) -> Mapping[str, Hash]: """Calculate several hexdigests of hash algorithms for a file concurrently. @@ -277,8 +274,8 @@ def get_hashes( def raise_on_digest_mismatch( *, path: Path, - hexdigests: Optional[Mapping[str, str]] = None, - hexdigests_remote: Optional[Mapping[str, str]] = None, + hexdigests: Mapping[str, str] | None = None, + hexdigests_remote: Mapping[str, str] | None = None, hexdigests_strict: bool = False, ) -> None: """Raise a HexDigestError if the digests do not match. @@ -315,7 +312,7 @@ def update_to( self, blocks: int = 1, block_size: int = 1, - total_size: Optional[int] = None, + total_size: int | None = None, ) -> None: """Update the internal state based on a urllib report hook. @@ -330,15 +327,15 @@ def update_to( def download( url: str, - path: Union[str, Path], + path: str | Path, force: bool = True, clean_on_failure: bool = True, backend: DownloadBackend = "urllib", - hexdigests: Optional[Mapping[str, str]] = None, - hexdigests_remote: Optional[Mapping[str, str]] = None, + hexdigests: Mapping[str, str] | None = None, + hexdigests_remote: Mapping[str, str] | None = None, hexdigests_strict: bool = False, progress_bar: bool = True, - tqdm_kwargs: Optional[Mapping[str, Any]] = None, + tqdm_kwargs: Mapping[str, Any] | None = None, **kwargs: Any, ) -> None: """Download a file from a given URL. @@ -358,8 +355,10 @@ def download( Set to true to show a progress bar while downloading :param tqdm_kwargs: Override the default arguments passed to :class:`tadm.tqdm` when progress_bar is True. - :param kwargs: The keyword arguments to pass to :func:`urllib.request.urlretrieve` or to `requests.get` - depending on the backend chosen. If using 'requests' backend, `stream` is set to True by default. + :param kwargs: + The keyword arguments to pass to :func:`urllib.request.urlretrieve` + or to `requests.get` depending on the backend chosen. If using 'requests' backend, + `stream` is set to True by default. :raises Exception: Thrown if an error besides a keyboard interrupt is thrown during download :raises KeyboardInterrupt: If a keyboard interrupt is thrown during download @@ -370,7 +369,7 @@ def download( path = Path(path).resolve() if path.is_dir(): - raise UnexpectedDirectory(path) + raise UnexpectedDirectoryError(path) if path.is_file() and not force: raise_on_digest_mismatch( path=path, @@ -381,15 +380,15 @@ def download( logger.debug("did not re-download %s from %s", path, url) return - _tqdm_kwargs = dict( - unit="B", - unit_scale=True, - unit_divisor=1024, - miniters=1, - disable=not progress_bar, - desc=f"Downloading {path.name}", - leave=False, - ) + _tqdm_kwargs = { + "unit": "B", + "unit_scale": True, + "unit_divisor": 1024, + "miniters": 1, + "disable": not progress_bar, + "desc": f"Downloading {path.name}", + "leave": False, + } if tqdm_kwargs: _tqdm_kwargs.update(tqdm_kwargs) @@ -495,7 +494,8 @@ def mkdir(path: Path, ensure_exists: bool = True) -> None: """Make a directory (or parent directory if a file is given) if flagged with ``ensure_exists``. :param path: The path to a directory - :param ensure_exists: Should the directories leading to the path be created if they don't already exist? + :param ensure_exists: + Should the directories leading to the path be created if they don't already exist? """ if ensure_exists: path.mkdir(exist_ok=True, parents=True) @@ -535,8 +535,10 @@ def getenv_path(envvar: str, default: Path, ensure_exists: bool = True) -> Path: """Get an environment variable representing a path, or use the default. :param envvar: The environmental variable name to check - :param default: The default path to return if the environmental variable is not set - :param ensure_exists: Should the directories leading to the path be created if they don't already exist? + :param default: + The default path to return if the environmental variable is not set + :param ensure_exists: + Should the directories leading to the path be created if they don't already exist? :return: A path either specified by the environmental variable or by the default. """ rv = Path(os.getenv(envvar, default=default)).expanduser() @@ -552,9 +554,7 @@ def n() -> str: return str(uuid4()) -def get_df_io( - df: "pandas.DataFrame", sep: str = "\t", index: bool = False, **kwargs: Any -) -> BytesIO: +def get_df_io(df: pandas.DataFrame, sep: str = "\t", index: bool = False, **kwargs: Any) -> BytesIO: """Get the dataframe as bytes. :param df: A dataframe @@ -570,7 +570,7 @@ def get_df_io( return bio -def get_np_io(arr: "numpy.typing.ArrayLike", **kwargs: Any) -> BytesIO: +def get_np_io(arr: numpy.typing.ArrayLike, **kwargs: Any) -> BytesIO: """Get the numpy object as bytes. :param arr: Array-like @@ -587,7 +587,7 @@ def get_np_io(arr: "numpy.typing.ArrayLike", **kwargs: Any) -> BytesIO: def write_pickle_gz( obj: Any, - path: Union[str, Path], + path: str | Path, **kwargs: Any, ) -> None: """Write an object to a gzipped pickle. @@ -602,8 +602,8 @@ def write_pickle_gz( def write_lzma_csv( - df: "pandas.DataFrame", - path: Union[str, Path], + df: pandas.DataFrame, + path: str | Path, sep: str = "\t", index: bool = False, **kwargs: Any, @@ -624,8 +624,8 @@ def write_lzma_csv( def write_zipfile_csv( - df: "pandas.DataFrame", - path: Union[str, Path], + df: pandas.DataFrame, + path: str | Path, inner_path: str, sep: str = "\t", index: bool = False, @@ -649,8 +649,8 @@ def write_zipfile_csv( def read_zipfile_csv( - path: Union[str, Path], inner_path: str, sep: str = "\t", **kwargs: Any -) -> "pandas.DataFrame": + path: str | Path, inner_path: str, sep: str = "\t", **kwargs: Any +) -> pandas.DataFrame: """Read an inner CSV file from a zip archive. :param path: The path to the zip archive @@ -667,8 +667,8 @@ def read_zipfile_csv( def write_zipfile_xml( - element_tree: "lxml.etree.ElementTree", - path: Union[str, Path], + element_tree: lxml.etree.ElementTree, + path: str | Path, inner_path: str, **kwargs: Any, ) -> None: @@ -687,9 +687,7 @@ def write_zipfile_xml( file.write(etree.tostring(element_tree, **kwargs)) -def read_zipfile_xml( - path: Union[str, Path], inner_path: str, **kwargs: Any -) -> "lxml.etree.ElementTree": +def read_zipfile_xml(path: str | Path, inner_path: str, **kwargs: Any) -> lxml.etree.ElementTree: """Read an inner XML file from a zip archive. :param path: The path to the zip archive @@ -705,8 +703,8 @@ def read_zipfile_xml( def write_zipfile_np( - arr: "numpy.typing.ArrayLike", - path: Union[str, Path], + arr: numpy.typing.ArrayLike, + path: str | Path, inner_path: str, **kwargs: Any, ) -> None: @@ -725,7 +723,7 @@ def write_zipfile_np( file.write(bytes_io.read()) -def read_zip_np(path: Union[str, Path], inner_path: str, **kwargs: Any) -> "numpy.typing.ArrayLike": +def read_zip_np(path: str | Path, inner_path: str, **kwargs: Any) -> numpy.typing.ArrayLike: """Read an inner numpy array-like from a zip archive. :param path: The path to the zip archive @@ -740,13 +738,13 @@ def read_zip_np(path: Union[str, Path], inner_path: str, **kwargs: Any) -> "nump return cast(np.typing.ArrayLike, np.load(file, **kwargs)) -def read_zipfile_rdf(path: Union[str, Path], inner_path: str, **kwargs: Any) -> "rdflib.Graph": +def read_zipfile_rdf(path: str | Path, inner_path: str, **kwargs: Any) -> rdflib.Graph: """Read an inner RDF file from a zip archive. :param path: The path to the zip archive :param inner_path: The path inside the zip archive to the dataframe :param kwargs: Additional kwargs to pass to :func:`pandas.read_csv`. - :return: A dataframe + :return: A graph """ import rdflib @@ -758,8 +756,8 @@ def read_zipfile_rdf(path: Union[str, Path], inner_path: str, **kwargs: Any) -> def write_tarfile_csv( - df: "pandas.DataFrame", - path: Union[str, Path], + df: pandas.DataFrame, + path: str | Path, inner_path: str, sep: str = "\t", index: bool = False, @@ -784,8 +782,8 @@ def write_tarfile_csv( def read_tarfile_csv( - path: Union[str, Path], inner_path: str, sep: str = "\t", **kwargs: Any -) -> "pandas.DataFrame": + path: str | Path, inner_path: str, sep: str = "\t", **kwargs: Any +) -> pandas.DataFrame: """Read an inner CSV file from a tar archive. :param path: The path to the tar archive @@ -801,9 +799,7 @@ def read_tarfile_csv( return pd.read_csv(file, sep=sep, **kwargs) -def read_tarfile_xml( - path: Union[str, Path], inner_path: str, **kwargs: Any -) -> "lxml.etree.ElementTree": +def read_tarfile_xml(path: str | Path, inner_path: str, **kwargs: Any) -> lxml.etree.ElementTree: """Read an inner XML file from a tar archive. :param path: The path to the tar archive @@ -818,7 +814,7 @@ def read_tarfile_xml( return etree.parse(file, **kwargs) -def read_rdf(path: Union[str, Path], **kwargs: Any) -> "rdflib.Graph": +def read_rdf(path: str | Path, **kwargs: Any) -> rdflib.Graph: """Read an RDF file with :mod:`rdflib`. :param path: The path to the RDF file @@ -837,7 +833,7 @@ def read_rdf(path: Union[str, Path], **kwargs: Any) -> "rdflib.Graph": return graph -def write_sql(df: "pandas.DataFrame", name: str, path: Union[str, Path], **kwargs: Any) -> None: +def write_sql(df: pandas.DataFrame, name: str, path: str | Path, **kwargs: Any) -> None: """Write a dataframe as a SQL table. :param df: A dataframe @@ -880,10 +876,10 @@ def get_commit(org: str, repo: str, provider: str = "git") -> str: def download_from_google( file_id: str, - path: Union[str, Path], + path: str | Path, force: bool = True, clean_on_failure: bool = True, - hexdigests: Optional[Mapping[str, str]] = None, + hexdigests: Mapping[str, str] | None = None, ) -> None: """Download a file from google drive. @@ -903,7 +899,7 @@ def download_from_google( path = Path(path).resolve() if path.is_dir(): - raise UnexpectedDirectory(path) + raise UnexpectedDirectoryError(path) if path.is_file() and not force: raise_on_digest_mismatch(path=path, hexdigests=hexdigests) logger.debug("did not re-download %s from Google ID %s", path, file_id) @@ -936,10 +932,10 @@ def _get_confirm_token(res: requests.Response) -> str: def download_from_s3( s3_bucket: str, s3_key: str, - path: Union[str, Path], - client: Union[None, "botocore.client.BaseClient"] = None, - client_kwargs: Optional[Mapping[str, Any]] = None, - download_file_kwargs: Optional[Mapping[str, Any]] = None, + path: str | Path, + client: None | botocore.client.BaseClient = None, + client_kwargs: Mapping[str, Any] | None = None, + download_file_kwargs: Mapping[str, Any] | None = None, force: bool = True, clean_on_failure: bool = True, ) -> None: @@ -964,7 +960,7 @@ def download_from_s3( path = Path(path).resolve() if path.is_dir(): - raise UnexpectedDirectory(path) + raise UnexpectedDirectoryError(path) if path.is_file() and not force: logger.debug("did not re-download %s from %s %s", path, s3_bucket, s3_key) return @@ -993,7 +989,7 @@ def download_from_s3( raise -def _unlink(path: Union[str, Path]) -> None: +def _unlink(path: str | Path) -> None: # python 3.6 does not have pathlib.Path.unlink, smh try: os.remove(path) @@ -1048,8 +1044,7 @@ def get_base(key: str, ensure_exists: bool = True) -> Path: _HOME where key is uppercased is checked first before using the default home directory. :param ensure_exists: - Should all directories be created automatically? - Defaults to true. + Should all directories be created automatically? Defaults to true. :returns: The path to the given @@ -1078,17 +1073,18 @@ def ensure_readme() -> None: except PermissionError as e: raise PermissionError( "PyStow was not able to create its home directory in due to a lack of " - "permissions. This can happen, e.g., if you're working on a server where you don't have full " - "rights. See https://pystow.readthedocs.io/en/latest/installation.html#configuration for instructions " - "on choosing a different home folder location for PyStow to somewhere where you have write permissions." + "permissions. This can happen, e.g., if you're working on a server where you don't " + "have full rights. See https://pystow.readthedocs.io/en/latest/installation.html#" + "configuration for instructions on choosing a different home folder location for " + "PyStow to somewhere where you have write permissions." ) from e if readme_path.is_file(): return with readme_path.open("w", encoding="utf8") as file: - print(README_TEXT, file=file) # noqa:T001,T201 + print(README_TEXT, file=file) -def path_to_sqlite(path: Union[str, Path]) -> str: +def path_to_sqlite(path: str | Path) -> str: """Convert a path to a SQLite connection string. :param path: A path to a SQLite database file @@ -1098,7 +1094,7 @@ def path_to_sqlite(path: Union[str, Path]) -> str: return f"sqlite:///{path.as_posix()}" -def gunzip(source: Union[str, Path], target: Union[str, Path]) -> None: +def gunzip(source: str | Path, target: str | Path) -> None: """Unzip a file in the source to the target. :param source: The path to an input file diff --git a/src/pystow/version.py b/src/pystow/version.py index a53c59e..08444f8 100644 --- a/src/pystow/version.py +++ b/src/pystow/version.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Version information for PyStow.""" __all__ = [ diff --git a/tests/test_api.py b/tests/test_api.py index 9a24d73..794bdc6 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -1,7 +1,7 @@ -# -*- coding: utf-8 -*- - """Test for API completeness.""" +from __future__ import annotations + import inspect import unittest @@ -67,7 +67,8 @@ def test_exposed(self): ) self.assertTrue( hasattr(pystow.api, name), - msg=f"`Module.{name} should be exposed as a top-level function in `pystow.api`.", + msg=f"`Module.{name} should be exposed as a top-level " + f"function in `pystow.api`.", ) self.assertTrue( hasattr(pystow, name), diff --git a/tests/test_caching.py b/tests/test_caching.py index b19c132..f46151c 100644 --- a/tests/test_caching.py +++ b/tests/test_caching.py @@ -1,7 +1,7 @@ -# -*- coding: utf-8 -*- - """Tests for caching.""" +from __future__ import annotations + import os import tempfile import unittest @@ -67,7 +67,8 @@ def _f1(): _f1() self.assertFalse( path.is_file(), - msg="the function should throw an exception because of the flag, and no file should be created", + msg="the function should throw an exception " + "because of the flag, and no file should be created", ) raise_flag = False diff --git a/tests/test_config.py b/tests/test_config.py index e16da5d..c47b525 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1,7 +1,7 @@ -# -*- coding: utf-8 -*- - """Test configuration loading.""" +from __future__ import annotations + import tempfile import unittest from pathlib import Path diff --git a/tests/test_module.py b/tests/test_module.py index 476bcf0..d9c749c 100644 --- a/tests/test_module.py +++ b/tests/test_module.py @@ -1,7 +1,7 @@ -# -*- coding: utf-8 -*- - """Tests for PyStow.""" +from __future__ import annotations + import bz2 import contextlib import itertools as itt @@ -12,8 +12,8 @@ import shutil import tempfile import unittest +from collections.abc import Mapping from pathlib import Path -from typing import ContextManager, Mapping, Union from unittest import mock import pandas as pd @@ -135,7 +135,7 @@ def tearDown(self) -> None: self.directory.cleanup() @contextlib.contextmanager - def mock_directory(self) -> ContextManager[Path]: + def mock_directory(self) -> contextlib.AbstractContextManager[Path]: """Use this test case's temporary directory as a mock environment variable. :yield: The mock directory's path @@ -150,20 +150,20 @@ def mock_download(): :return: A patch object that can be applied to the pystow download function """ - def _mock_get_data(url: str, path: Union[str, Path], **_kwargs) -> Path: + def _mock_get_data(url: str, path: str | Path, **_kwargs) -> Path: return shutil.copy(MOCK_FILES[url], path) return mock.patch("pystow.utils.download", side_effect=_mock_get_data) @staticmethod - def mock_download_once(local_path: Union[str, Path]): + def mock_download_once(local_path: str | Path): """Mock connection to the internet using local resource files. :param local_path: the path to the file to mock :return: A patch object that can be applied to the pystow download function """ - def _mock_get_data(path: Union[str, Path], **_kwargs) -> Path: + def _mock_get_data(path: str | Path, **_kwargs) -> Path: return shutil.copy(local_path, path) return mock.patch("pystow.utils.download", side_effect=_mock_get_data) @@ -281,7 +281,7 @@ def test_ensure_open_lzma(self): with self.mock_download_once(path): with lzma.open(path, "wt") as file: for row in TEST_TSV_ROWS: - print(*row, sep="\t", file=file) # noqa:T001,T201 + print(*row, sep="\t", file=file) with pystow.ensure_open_lzma("test", url=n()) as file: df = pd.read_csv(file, sep="\t") self.assertEqual(3, len(df.columns)) diff --git a/tests/test_utils.py b/tests/test_utils.py index 53bb430..4250fd9 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,7 +1,7 @@ -# -*- coding: utf-8 -*- - """Tests for utilities.""" +from __future__ import annotations + import hashlib import os import tempfile @@ -250,7 +250,7 @@ def setUp(self) -> None: self.directory = tempfile.TemporaryDirectory() self.path = Path(self.directory.name).joinpath("test.tsv") - md5 = hashlib.md5() # noqa:S303,S324 + md5 = hashlib.md5() # noqa: S324 with TEST_TXT.open("rb") as file: md5.update(file.read()) self.expected_md5 = md5.hexdigest() @@ -425,14 +425,16 @@ def test_hexdigest_urls(self): hexdigests["md5"], ) - # Live test case - # hexdigests = get_hexdigests_remote( - # {"md5": "https://ftp.ncbi.nlm.nih.gov/pubmed/baseline/pubmed22n0001.xml.gz.md5"}, - # hexdigests_strict=False, - # ) - # self.assertEqual( - # { - # "md5": "0f08d8f3947dde1f3bced5e1f450c0da", - # }, - # hexdigests, - # ) + @unittest.skip + def test_live(self) -> None: + """Test live.""" + hexdigests = get_hexdigests_remote( + {"md5": "https://ftp.ncbi.nlm.nih.gov/pubmed/baseline/pubmed22n0001.xml.gz.md5"}, + hexdigests_strict=False, + ) + self.assertEqual( + { + "md5": "0f08d8f3947dde1f3bced5e1f450c0da", + }, + hexdigests, + ) diff --git a/tox.ini b/tox.ini index dbb7610..056fcb6 100644 --- a/tox.ini +++ b/tox.ini @@ -7,100 +7,113 @@ # To use a PEP 517 build-backend you are required to configure tox to use an isolated_build: # https://tox.readthedocs.io/en/latest/example/package.html isolated_build = True + # These environments are run in order if you just use `tox`: envlist = # always keep coverage-clean first - #coverage-clean - # code linters/stylers + coverage-clean + # code formatters + format + # format-docs + # Code quality assessment manifest - lint - flake8 pyroma + lint mypy - # documentation linters/checkers + # Documentation quality assurance doc8 docstr-coverage docs-test # the actual tests py + doctests # always keep coverage-report last - #coverage-report + # coverage-report [testenv] +description = Run unit and integration tests. +# Runs on the "tests" directory by default, or passes the positional +# arguments from `tox -e py ... commands = coverage run -p -m pytest --durations=20 {posargs:tests} coverage combine coverage xml -passenv = - HOME extras = + # See the [project.optional-dependencies] entry in pyproject.toml for "tests" tests pandas rdf xml -allowlist_externals = - /bin/cat - /bin/cp - /bin/mkdir - /usr/bin/cat - /usr/bin/cp - /usr/bin/mkdir - /usr/bin/git - /usr/local/bin/git - -[testenv:coverage-xml] -deps = coverage -skip_install = true -commands = coverage xml [testenv:coverage-clean] +description = Remove testing coverage artifacts. deps = coverage skip_install = true commands = coverage erase +[testenv:doctests] +description = Test that documentation examples run properly. +commands = + xdoctest -m src +deps = + xdoctest + pygments + +[testenv:treon] +description = Test that notebooks can run to completion +commands = + treon notebooks/ +deps = + treon + +[testenv:format] +description = Format the code in a deterministic way using ruff. Note that ruff check should come before ruff format when using --fix (ref: https://github.com/astral-sh/ruff-pre-commit/blob/main/README.md) +deps = + ruff +skip_install = true +commands = + ruff check --fix + ruff format + +[testenv:format-docs] +description = Run documentation linters. +# note that this doesn't work with sphinx-click +# or any other extension that adds extra directives +deps = + rstfmt +extras = + # See the [project.optional-dependencies] entry in pyproject.toml for "docs" + docs +skip_install = true +commands = + rstfmt docs/source/ + [testenv:manifest] deps = check-manifest skip_install = true commands = check-manifest +description = Check that the MANIFEST.in is written properly and give feedback on how to fix it. -[testenv:pre-commit] -skip_install = true -deps = - pre-commit -commands = pre-commit run -a -usedevelop = true -description = Run the pre-commit tool +[testenv:lint] +description = Check code quality using ruff and other tools. -[testenv:flake8] skip_install = true deps = - flake8 - flake8-bandit - flake8-colors - flake8-docstrings - flake8-isort - flake8-bugbear - flake8-broken-line - flake8-black - flake8-print - pep8-naming - pydocstyle - darglint + ruff commands = - flake8 src/pystow/ tests/ -description = Run the flake8 tool with several plugins (bandit, docstrings, import order, pep8 naming). + ruff check + ruff format --check -[testenv:lint] +[testenv:pyroma] deps = - black - isort + pygments + pyroma skip_install = true -commands = - black src/ tests/ - isort src/ tests/ -description = Run linters. +commands = pyroma --min=10 . +description = Run the pyroma tool to check the package friendliness of the project. [testenv:mypy] +description = Run the mypy tool to check static typing on the project. deps = mypy types-requests @@ -108,49 +121,37 @@ extras = pandas rdf xml -commands = - mypy --install-types --non-interactive --ignore-missing-imports --strict src/pystow/ -description = Run the mypy tool to check static typing on the project. - -[testenv:pyroma] -deps = - pygments - pyroma -skip_install = true -commands = pyroma --min=10 . -description = Run the pyroma tool to check the package friendliness of the project. +commands = mypy --install-types --non-interactive --ignore-missing-imports --strict src/ [testenv:doc8] skip_install = true deps = - sphinx doc8 +extras = + docs commands = doc8 docs/source/ description = Run the doc8 tool to check the style of the RST files in the project docs. [testenv:docstr-coverage] +description = Run the docstr-coverage tool to check documentation coverage. skip_install = true deps = docstr-coverage commands = - docstr-coverage src/pystow/ tests/ --skip-private --skip-magic -description = Run the docstr-coverage tool to check documentation coverage - + docstr-coverage src/ tests/ --skip-private --skip-magic [testenv:docs] -description = Build the documentation locally. +description = Build the documentation locally, allowing warnings. extras = + # See the [project.optional-dependencies] entry in pyproject.toml for "docs" docs - rdf - xml - pandas - aws + # You might need to add additional extras if your documentation covers it commands = - python -m sphinx -W -b html -d docs/build/doctrees docs/source docs/build/html + python -m sphinx -b html -d docs/build/doctrees docs/source docs/build/html [testenv:docs-test] -description = Test building the documentation in an isolated environment. +description = Test building the documentation in an isolated environment. Warnings are considered as errors via -W. changedir = docs extras = {[testenv:docs]extras} @@ -158,20 +159,22 @@ commands = mkdir -p {envtmpdir} cp -r source {envtmpdir}/source python -m sphinx -W -b html -d {envtmpdir}/build/doctrees {envtmpdir}/source {envtmpdir}/build/html - python -m sphinx -W -b coverage -d {envtmpdir}/build/doctrees {envtmpdir}/source {envtmpdir}/build/coverage - cat {envtmpdir}/build/coverage/c.txt - cat {envtmpdir}/build/coverage/python.txt + # python -m sphinx -W -b coverage -d {envtmpdir}/build/doctrees {envtmpdir}/source {envtmpdir}/build/coverage + # cat {envtmpdir}/build/coverage/c.txt + # cat {envtmpdir}/build/coverage/python.txt allowlist_externals = - /bin/cp - /bin/cat - /bin/mkdir - # for compatibility on GitHub actions - /usr/bin/cp - /usr/bin/cat - /usr/bin/mkdir + cp + cat + mkdir + +[testenv:coverage-xml] +deps = coverage[toml] +skip_install = true +commands = coverage xml [testenv:coverage-report] -deps = coverage +# TODO this is broken +deps = coverage[toml] skip_install = true commands = coverage combine @@ -182,54 +185,115 @@ commands = #################### [testenv:bumpversion] -commands = bump2version {posargs} +description = Bump the version number +commands = bump-my-version bump {posargs} skip_install = true passenv = HOME deps = - bump2version + bump-my-version [testenv:bumpversion-release] -commands = bump2version release --tag +description = Remove the -dev tag from the version +commands = bump-my-version bump release --tag skip_install = true passenv = HOME deps = - bump2version + bump-my-version [testenv:build] skip_install = true deps = - wheel - build + uv setuptools commands = - python -m build --sdist --wheel --no-isolation + uv build --sdist --wheel --no-build-isolation + +############ +# Releases # +############ + +# In order to make a release to PyPI, you'll need to take the following steps: +# +# 1. Navigate to https://pypi.org/account/register/ to register for Test PyPI +# 2. Navigate to https://pypi.org/manage/account/ and request to re-send a verification email. +# This is not sent by default, and is required to set up 2-Factor Authentication. +# 3. Get account recovery codes +# 4. Set up 2-Factor Authentication +# 5. Get an API token from https://pypi.org/manage/account/token/ +# 6. Install keyring with `uv tool install keyring` +# 7. Add your token to keyring with `keyring set https://upload.pypi.org/legacy/ __token__` [testenv:release] +description = Release the code to PyPI so users can pip install it skip_install = true -passenv = - TWINE_USERNAME - TWINE_PASSWORD deps = {[testenv:build]deps} - twine >= 1.5.0 + uv + keyring commands = {[testenv:build]commands} - twine upload --non-interactive --skip-existing dist/* + uv publish --username __token__ --keyring-provider subprocess --publish-url https://upload.pypi.org/legacy/ [testenv:finish] +description = + Run a workflow that removes -dev from the version, creates a tagged release on GitHub, + creates a release on PyPI, and bumps the version again. skip_install = true passenv = HOME - TWINE_USERNAME - TWINE_PASSWORD deps = {[testenv:release]deps} - bump2version + bump-my-version commands = - bump2version release --tag + {[testenv:bumpversion-release]commands} {[testenv:release]commands} git push --tags - bump2version patch + bump-my-version bump patch + git push +allowlist_externals = + git + +################# +# Test Releases # +################# + +# In order to test making a release to Test PyPI, you'll need to take the following steps: +# +# 1. Navigate to https://test.pypi.org/account/register/ to register for Test PyPI +# 2. Navigate to https://test.pypi.org/manage/account/ and request to re-send a verification email. +# This is not sent by default, and is required to set up 2-Factor Authentication. +# 3. Get account recovery codes +# 4. Set up 2-Factor Authentication +# 5. Get an API token from https://test.pypi.org/manage/account/token/ +# 6. Install keyring with `uv tool install keyring` +# 7. Add your token to keyring with `keyring set https://test.pypi.org/legacy/ __token__` + +[testenv:testrelease] +description = Release the code to the test PyPI site +skip_install = true +deps = + {[testenv:build]deps} + uv + keyring +commands = + {[testenv:build]commands} + uv publish --username __token__ --keyring-provider subprocess --publish-url https://test.pypi.org/legacy/ + +[testenv:testfinish] +description = + Run a workflow that removes -dev from the version, creates a tagged release on GitHub, + creates a release on Test PyPI, and bumps the version again. +skip_install = true +passenv = + HOME +deps = + {[testenv:testrelease]deps} + bump-my-version +commands = + {[testenv:bumpversion-release]commands} + {[testenv:testrelease]commands} + git push --tags + bump-my-version bump patch git push allowlist_externals = - /usr/bin/git + git