diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index f484664f7b712d..8a2d8145459a1a 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -147,6 +147,8 @@ Lib/ast.py @isidentical **/*cgi* @ethanfurman **/*tarfile* @ethanfurman +**/*tomllib* @encukou + # macOS /Mac/ @python/macos-team **/*osx_support* @python/macos-team diff --git a/Doc/library/fileformats.rst b/Doc/library/fileformats.rst index e9c2e1fbbdf3e8..bb099fe2d3d6e3 100644 --- a/Doc/library/fileformats.rst +++ b/Doc/library/fileformats.rst @@ -12,6 +12,7 @@ that aren't markup languages and are not related to e-mail. csv.rst configparser.rst + tomllib.rst netrc.rst xdrlib.rst plistlib.rst diff --git a/Doc/library/tomllib.rst b/Doc/library/tomllib.rst new file mode 100644 index 00000000000000..918576eb37eaee --- /dev/null +++ b/Doc/library/tomllib.rst @@ -0,0 +1,117 @@ +:mod:`tomllib` --- Parse TOML files +=================================== + +.. module:: tomllib + :synopsis: Parse TOML files. + +.. versionadded:: 3.11 + +.. moduleauthor:: Taneli Hukkinen +.. sectionauthor:: Taneli Hukkinen + +**Source code:** :source:`Lib/tomllib` + +-------------- + +This module provides an interface for parsing TOML (Tom's Obvious Minimal +Language, `https://toml.io `_). This module does not +support writing TOML. + +.. seealso:: + + The `Tomli-W package `__ + is a TOML writer that can be used in conjunction with this module, + providing a write API familiar to users of the standard library + :mod:`marshal` and :mod:`pickle` modules. + +.. seealso:: + + The `TOML Kit package `__ + is a style-preserving TOML library with both read and write capability. + It is a recommended replacement for this module for editing already + existing TOML files. + + +This module defines the following functions: + +.. function:: load(fp, /, *, parse_float=float) + + Read a TOML file. The first argument should be a readable and binary file object. + Return a :class:`dict`. Convert TOML types to Python using this + :ref:`conversion table `. + + *parse_float* will be called with the string of every TOML + float to be decoded. By default, this is equivalent to ``float(num_str)``. + This can be used to use another datatype or parser for TOML floats + (e.g. :class:`decimal.Decimal`). The callable must not return a + :class:`dict` or a :class:`list`, else a :exc:`ValueError` is raised. + + A :exc:`TOMLDecodeError` will be raised on an invalid TOML document. + + +.. function:: loads(s, /, *, parse_float=float) + + Load TOML from a :class:`str` object. Return a :class:`dict`. Convert TOML + types to Python using this :ref:`conversion table `. The + *parse_float* argument has the same meaning as in :func:`load`. + + A :exc:`TOMLDecodeError` will be raised on an invalid TOML document. + + +The following exceptions are available: + +.. exception:: TOMLDecodeError + + Subclass of :exc:`ValueError`. + + +Examples +-------- + +Parsing a TOML file:: + + import tomllib + + with open("pyproject.toml", "rb") as f: + data = tomllib.load(f) + +Parsing a TOML string:: + + import tomllib + + toml_str = """ + python-version = "3.11.0" + python-implementation = "CPython" + """ + + data = tomllib.loads(toml_str) + + +Conversion Table +---------------- + +.. _toml-to-py-table: + ++------------------+--------------------------------------------------------------------------------------+ +| TOML | Python | ++==================+======================================================================================+ +| table | dict | ++------------------+--------------------------------------------------------------------------------------+ +| string | str | ++------------------+--------------------------------------------------------------------------------------+ +| integer | int | ++------------------+--------------------------------------------------------------------------------------+ +| float | float (configurable with *parse_float*) | ++------------------+--------------------------------------------------------------------------------------+ +| boolean | bool | ++------------------+--------------------------------------------------------------------------------------+ +| offset date-time | datetime.datetime (``tzinfo`` attribute set to an instance of ``datetime.timezone``) | ++------------------+--------------------------------------------------------------------------------------+ +| local date-time | datetime.datetime (``tzinfo`` attribute set to ``None``) | ++------------------+--------------------------------------------------------------------------------------+ +| local date | datetime.date | ++------------------+--------------------------------------------------------------------------------------+ +| local time | datetime.time | ++------------------+--------------------------------------------------------------------------------------+ +| array | list | ++------------------+--------------------------------------------------------------------------------------+ diff --git a/Doc/whatsnew/3.11.rst b/Doc/whatsnew/3.11.rst index e7f3dab2b51db4..14500e9d539bc3 100644 --- a/Doc/whatsnew/3.11.rst +++ b/Doc/whatsnew/3.11.rst @@ -209,7 +209,8 @@ Other CPython Implementation Changes New Modules =========== -* None yet. +* A new module, :mod:`tomllib`, was added for parsing TOML. + (Contributed by Taneli Hukkinen in :issue:`40059`.) Improved Modules diff --git a/Lib/test/test_tomllib/__init__.py b/Lib/test/test_tomllib/__init__.py new file mode 100644 index 00000000000000..6204c6e47f0f16 --- /dev/null +++ b/Lib/test/test_tomllib/__init__.py @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: MIT +# SPDX-FileCopyrightText: 2021 Taneli Hukkinen +# Licensed to PSF under a Contributor Agreement. + +__all__ = ("tomllib",) + +# By changing this one line, we can run the tests against +# a different module name. +import tomllib + +import os +from test.support import load_package_tests + +def load_tests(*args): + return load_package_tests(os.path.dirname(__file__), *args) diff --git a/Lib/test/test_tomllib/__main__.py b/Lib/test/test_tomllib/__main__.py new file mode 100644 index 00000000000000..dd063653437433 --- /dev/null +++ b/Lib/test/test_tomllib/__main__.py @@ -0,0 +1,6 @@ +import unittest + +from . import load_tests + + +unittest.main() diff --git a/Lib/test/test_tomllib/burntsushi.py b/Lib/test/test_tomllib/burntsushi.py new file mode 100644 index 00000000000000..71228c65369572 --- /dev/null +++ b/Lib/test/test_tomllib/burntsushi.py @@ -0,0 +1,120 @@ +# SPDX-License-Identifier: MIT +# SPDX-FileCopyrightText: 2021 Taneli Hukkinen +# Licensed to PSF under a Contributor Agreement. + +"""Utilities for tests that are in the "burntsushi" format.""" + +import datetime +from typing import Any + +# Aliases for converting TOML compliance format [1] to BurntSushi format [2] +# [1] https://github.com/toml-lang/compliance/blob/db7c3211fda30ff9ddb10292f4aeda7e2e10abc4/docs/json-encoding.md # noqa: E501 +# [2] https://github.com/BurntSushi/toml-test/blob/4634fdf3a6ecd6aaea5f4cdcd98b2733c2694993/README.md # noqa: E501 +_aliases = { + "boolean": "bool", + "offset datetime": "datetime", + "local datetime": "datetime-local", + "local date": "date-local", + "local time": "time-local", +} + + +def convert(obj): # noqa: C901 + if isinstance(obj, str): + return {"type": "string", "value": obj} + elif isinstance(obj, bool): + return {"type": "bool", "value": str(obj).lower()} + elif isinstance(obj, int): + return {"type": "integer", "value": str(obj)} + elif isinstance(obj, float): + return {"type": "float", "value": _normalize_float_str(str(obj))} + elif isinstance(obj, datetime.datetime): + val = _normalize_datetime_str(obj.isoformat()) + if obj.tzinfo: + return {"type": "datetime", "value": val} + return {"type": "datetime-local", "value": val} + elif isinstance(obj, datetime.time): + return { + "type": "time-local", + "value": _normalize_localtime_str(str(obj)), + } + elif isinstance(obj, datetime.date): + return { + "type": "date-local", + "value": str(obj), + } + elif isinstance(obj, list): + return [convert(i) for i in obj] + elif isinstance(obj, dict): + return {k: convert(v) for k, v in obj.items()} + raise Exception("unsupported type") + + +def normalize(obj: Any) -> Any: + """Normalize test objects. + + This normalizes primitive values (e.g. floats), and also converts from + TOML compliance format [1] to BurntSushi format [2]. + + [1] https://github.com/toml-lang/compliance/blob/db7c3211fda30ff9ddb10292f4aeda7e2e10abc4/docs/json-encoding.md # noqa: E501 + [2] https://github.com/BurntSushi/toml-test/blob/4634fdf3a6ecd6aaea5f4cdcd98b2733c2694993/README.md # noqa: E501 + """ + if isinstance(obj, list): + return [normalize(item) for item in obj] + if isinstance(obj, dict): + if "type" in obj and "value" in obj: + type_ = obj["type"] + norm_type = _aliases.get(type_, type_) + value = obj["value"] + if norm_type == "float": + norm_value = _normalize_float_str(value) + elif norm_type in {"datetime", "datetime-local"}: + norm_value = _normalize_datetime_str(value) + elif norm_type == "time-local": + norm_value = _normalize_localtime_str(value) + else: + norm_value = value + + if norm_type == "array": + return [normalize(item) for item in value] + return {"type": norm_type, "value": norm_value} + return {k: normalize(v) for k, v in obj.items()} + raise AssertionError("Burntsushi fixtures should be dicts/lists only") + + +def _normalize_datetime_str(dt_str: str) -> str: + if dt_str[-1].lower() == "z": + dt_str = dt_str[:-1] + "+00:00" + + date = dt_str[:10] + rest = dt_str[11:] + + if "+" in rest: + sign = "+" + elif "-" in rest: + sign = "-" + else: + sign = "" + + if sign: + time, _, offset = rest.partition(sign) + else: + time = rest + offset = "" + + time = time.rstrip("0") if "." in time else time + return date + "T" + time + sign + offset + + +def _normalize_localtime_str(lt_str: str) -> str: + return lt_str.rstrip("0") if "." in lt_str else lt_str + + +def _normalize_float_str(float_str: str) -> str: + as_float = float(float_str) + + # Normalize "-0.0" and "+0.0" + if as_float == 0: + return "0" + + return str(as_float) diff --git a/Lib/test/test_tomllib/data/invalid/array-missing-comma.toml b/Lib/test/test_tomllib/data/invalid/array-missing-comma.toml new file mode 100644 index 00000000000000..9431d907995706 --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/array-missing-comma.toml @@ -0,0 +1 @@ +arrr = [true false] diff --git a/Lib/test/test_tomllib/data/invalid/array-of-tables/overwrite-array-in-parent.toml b/Lib/test/test_tomllib/data/invalid/array-of-tables/overwrite-array-in-parent.toml new file mode 100644 index 00000000000000..f867c28deeba48 --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/array-of-tables/overwrite-array-in-parent.toml @@ -0,0 +1,4 @@ +[[parent-table.arr]] +[parent-table] +not-arr = 1 +arr = 2 diff --git a/Lib/test/test_tomllib/data/invalid/array-of-tables/overwrite-bool-with-aot.toml b/Lib/test/test_tomllib/data/invalid/array-of-tables/overwrite-bool-with-aot.toml new file mode 100644 index 00000000000000..b1892d6a449061 --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/array-of-tables/overwrite-bool-with-aot.toml @@ -0,0 +1,2 @@ +a=true +[[a]] \ No newline at end of file diff --git a/Lib/test/test_tomllib/data/invalid/array/file-end-after-val.toml b/Lib/test/test_tomllib/data/invalid/array/file-end-after-val.toml new file mode 100644 index 00000000000000..00196bb03ea818 --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/array/file-end-after-val.toml @@ -0,0 +1 @@ +a=[1 \ No newline at end of file diff --git a/Lib/test/test_tomllib/data/invalid/array/unclosed-after-item.toml b/Lib/test/test_tomllib/data/invalid/array/unclosed-after-item.toml new file mode 100644 index 00000000000000..9edec37078fb05 --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/array/unclosed-after-item.toml @@ -0,0 +1 @@ +v=[1, \ No newline at end of file diff --git a/Lib/test/test_tomllib/data/invalid/array/unclosed-empty.toml b/Lib/test/test_tomllib/data/invalid/array/unclosed-empty.toml new file mode 100644 index 00000000000000..1fd099d3683e2c --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/array/unclosed-empty.toml @@ -0,0 +1 @@ +v=[ \ No newline at end of file diff --git a/Lib/test/test_tomllib/data/invalid/basic-str-ends-in-escape.toml b/Lib/test/test_tomllib/data/invalid/basic-str-ends-in-escape.toml new file mode 100644 index 00000000000000..713ea544f82c2a --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/basic-str-ends-in-escape.toml @@ -0,0 +1 @@ +"backslash is the last char\ \ No newline at end of file diff --git a/Lib/test/test_tomllib/data/invalid/boolean/invalid-false-casing.toml b/Lib/test/test_tomllib/data/invalid/boolean/invalid-false-casing.toml new file mode 100644 index 00000000000000..336d2d808e7818 --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/boolean/invalid-false-casing.toml @@ -0,0 +1 @@ +val=falsE \ No newline at end of file diff --git a/Lib/test/test_tomllib/data/invalid/boolean/invalid-true-casing.toml b/Lib/test/test_tomllib/data/invalid/boolean/invalid-true-casing.toml new file mode 100644 index 00000000000000..0370d6cdb671a0 --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/boolean/invalid-true-casing.toml @@ -0,0 +1 @@ +val=trUe \ No newline at end of file diff --git a/Lib/test/test_tomllib/data/invalid/dates-and-times/invalid-day.toml b/Lib/test/test_tomllib/data/invalid/dates-and-times/invalid-day.toml new file mode 100644 index 00000000000000..c69f0914451d97 --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/dates-and-times/invalid-day.toml @@ -0,0 +1 @@ +"only 28 or 29 days in february" = 1988-02-30 diff --git a/Lib/test/test_tomllib/data/invalid/dotted-keys/access-non-table.toml b/Lib/test/test_tomllib/data/invalid/dotted-keys/access-non-table.toml new file mode 100644 index 00000000000000..ada02005a3eb6d --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/dotted-keys/access-non-table.toml @@ -0,0 +1,2 @@ +a = false +a.b = true \ No newline at end of file diff --git a/Lib/test/test_tomllib/data/invalid/dotted-keys/extend-defined-aot.toml b/Lib/test/test_tomllib/data/invalid/dotted-keys/extend-defined-aot.toml new file mode 100644 index 00000000000000..1c3c34b8571fa5 --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/dotted-keys/extend-defined-aot.toml @@ -0,0 +1,3 @@ +[[tab.arr]] +[tab] +arr.val1=1 diff --git a/Lib/test/test_tomllib/data/invalid/dotted-keys/extend-defined-table-with-subtable.toml b/Lib/test/test_tomllib/data/invalid/dotted-keys/extend-defined-table-with-subtable.toml new file mode 100644 index 00000000000000..70e2ac5be8986a --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/dotted-keys/extend-defined-table-with-subtable.toml @@ -0,0 +1,4 @@ +[a.b.c.d] + z = 9 +[a] + b.c.d.k.t = 8 diff --git a/Lib/test/test_tomllib/data/invalid/dotted-keys/extend-defined-table.toml b/Lib/test/test_tomllib/data/invalid/dotted-keys/extend-defined-table.toml new file mode 100644 index 00000000000000..c88c179d3c1774 --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/dotted-keys/extend-defined-table.toml @@ -0,0 +1,4 @@ +[a.b.c] + z = 9 +[a] + b.c.t = 9 diff --git a/Lib/test/test_tomllib/data/invalid/inline-table-missing-comma.toml b/Lib/test/test_tomllib/data/invalid/inline-table-missing-comma.toml new file mode 100644 index 00000000000000..d98b41dd037009 --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/inline-table-missing-comma.toml @@ -0,0 +1 @@ +arrr = { comma-missing = true valid-toml = false } diff --git a/Lib/test/test_tomllib/data/invalid/inline-table/define-twice-in-subtable.toml b/Lib/test/test_tomllib/data/invalid/inline-table/define-twice-in-subtable.toml new file mode 100644 index 00000000000000..706834135d1dc3 --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/inline-table/define-twice-in-subtable.toml @@ -0,0 +1 @@ +table1 = { table2.dupe = 1, table2.dupe = 2 } \ No newline at end of file diff --git a/Lib/test/test_tomllib/data/invalid/inline-table/define-twice.toml b/Lib/test/test_tomllib/data/invalid/inline-table/define-twice.toml new file mode 100644 index 00000000000000..8202b98ff2dbff --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/inline-table/define-twice.toml @@ -0,0 +1 @@ +table = { dupe = 1, dupe = 2 } \ No newline at end of file diff --git a/Lib/test/test_tomllib/data/invalid/inline-table/file-end-after-key-val.toml b/Lib/test/test_tomllib/data/invalid/inline-table/file-end-after-key-val.toml new file mode 100644 index 00000000000000..52d4e779704bd3 --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/inline-table/file-end-after-key-val.toml @@ -0,0 +1 @@ +a={b=1 \ No newline at end of file diff --git a/Lib/test/test_tomllib/data/invalid/inline-table/mutate.toml b/Lib/test/test_tomllib/data/invalid/inline-table/mutate.toml new file mode 100644 index 00000000000000..4e5e9ff15695fd --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/inline-table/mutate.toml @@ -0,0 +1,2 @@ +a = { b = 1 } +a.b = 2 \ No newline at end of file diff --git a/Lib/test/test_tomllib/data/invalid/inline-table/override-val-in-table.toml b/Lib/test/test_tomllib/data/invalid/inline-table/override-val-in-table.toml new file mode 100644 index 00000000000000..6e87cfa456d5fb --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/inline-table/override-val-in-table.toml @@ -0,0 +1,5 @@ +[tab.nested] +inline-t = { nest = {} } + +[tab] +nested.inline-t.nest = 2 diff --git a/Lib/test/test_tomllib/data/invalid/inline-table/override-val-with-array.toml b/Lib/test/test_tomllib/data/invalid/inline-table/override-val-with-array.toml new file mode 100644 index 00000000000000..79b4f325874ed0 --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/inline-table/override-val-with-array.toml @@ -0,0 +1,3 @@ +inline-t = { nest = {} } + +[[inline-t.nest]] diff --git a/Lib/test/test_tomllib/data/invalid/inline-table/override-val-with-table.toml b/Lib/test/test_tomllib/data/invalid/inline-table/override-val-with-table.toml new file mode 100644 index 00000000000000..d0cd6b641ac002 --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/inline-table/override-val-with-table.toml @@ -0,0 +1,3 @@ +inline-t = { nest = {} } + +[inline-t.nest] diff --git a/Lib/test/test_tomllib/data/invalid/inline-table/overwrite-implicitly.toml b/Lib/test/test_tomllib/data/invalid/inline-table/overwrite-implicitly.toml new file mode 100644 index 00000000000000..462c28f168479c --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/inline-table/overwrite-implicitly.toml @@ -0,0 +1 @@ +a = { b = 1, b.c = 2 } diff --git a/Lib/test/test_tomllib/data/invalid/inline-table/overwrite-value-in-inner-array.toml b/Lib/test/test_tomllib/data/invalid/inline-table/overwrite-value-in-inner-array.toml new file mode 100644 index 00000000000000..34ddb85eeec8dc --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/inline-table/overwrite-value-in-inner-array.toml @@ -0,0 +1 @@ +tab = { inner.table = [{}], inner.table.val = "bad" } \ No newline at end of file diff --git a/Lib/test/test_tomllib/data/invalid/inline-table/overwrite-value-in-inner-table.toml b/Lib/test/test_tomllib/data/invalid/inline-table/overwrite-value-in-inner-table.toml new file mode 100644 index 00000000000000..750853f0d9645d --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/inline-table/overwrite-value-in-inner-table.toml @@ -0,0 +1 @@ +tab = { inner = { dog = "best" }, inner.cat = "worst" } \ No newline at end of file diff --git a/Lib/test/test_tomllib/data/invalid/inline-table/unclosed-empty.toml b/Lib/test/test_tomllib/data/invalid/inline-table/unclosed-empty.toml new file mode 100644 index 00000000000000..032edb7266c433 --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/inline-table/unclosed-empty.toml @@ -0,0 +1 @@ +a={ \ No newline at end of file diff --git a/Lib/test/test_tomllib/data/invalid/invalid-comment-char.toml b/Lib/test/test_tomllib/data/invalid/invalid-comment-char.toml new file mode 100644 index 00000000000000..dc5ae336c6a71a --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/invalid-comment-char.toml @@ -0,0 +1 @@ +# form feed ( ) not allowed in comments \ No newline at end of file diff --git a/Lib/test/test_tomllib/data/invalid/invalid-escaped-unicode.toml b/Lib/test/test_tomllib/data/invalid/invalid-escaped-unicode.toml new file mode 100644 index 00000000000000..8feba3a30a45d7 --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/invalid-escaped-unicode.toml @@ -0,0 +1 @@ +escaped-unicode = "\uabag" diff --git a/Lib/test/test_tomllib/data/invalid/invalid-hex.toml b/Lib/test/test_tomllib/data/invalid/invalid-hex.toml new file mode 100644 index 00000000000000..5c55fe80c45697 --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/invalid-hex.toml @@ -0,0 +1 @@ +hex = 0xgabba00f1 diff --git a/Lib/test/test_tomllib/data/invalid/keys-and-vals/ends-early-table-def.toml b/Lib/test/test_tomllib/data/invalid/keys-and-vals/ends-early-table-def.toml new file mode 100644 index 00000000000000..75d43a37eda3a3 --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/keys-and-vals/ends-early-table-def.toml @@ -0,0 +1 @@ +[fwfw.wafw \ No newline at end of file diff --git a/Lib/test/test_tomllib/data/invalid/keys-and-vals/ends-early.toml b/Lib/test/test_tomllib/data/invalid/keys-and-vals/ends-early.toml new file mode 100644 index 00000000000000..e70e265ae20c04 --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/keys-and-vals/ends-early.toml @@ -0,0 +1 @@ +fs.fw \ No newline at end of file diff --git a/Lib/test/test_tomllib/data/invalid/keys-and-vals/no-value.toml b/Lib/test/test_tomllib/data/invalid/keys-and-vals/no-value.toml new file mode 100644 index 00000000000000..4f9ab16c61392b --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/keys-and-vals/no-value.toml @@ -0,0 +1 @@ +why-no-value= \ No newline at end of file diff --git a/Lib/test/test_tomllib/data/invalid/keys-and-vals/only-ws-after-dot.toml b/Lib/test/test_tomllib/data/invalid/keys-and-vals/only-ws-after-dot.toml new file mode 100644 index 00000000000000..abe52587c9eabe --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/keys-and-vals/only-ws-after-dot.toml @@ -0,0 +1 @@ +fs. \ No newline at end of file diff --git a/Lib/test/test_tomllib/data/invalid/keys-and-vals/overwrite-with-deep-table.toml b/Lib/test/test_tomllib/data/invalid/keys-and-vals/overwrite-with-deep-table.toml new file mode 100644 index 00000000000000..103d928bcf7c2f --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/keys-and-vals/overwrite-with-deep-table.toml @@ -0,0 +1,2 @@ +a=1 +[a.b.c.d] diff --git a/Lib/test/test_tomllib/data/invalid/literal-str/unclosed.toml b/Lib/test/test_tomllib/data/invalid/literal-str/unclosed.toml new file mode 100644 index 00000000000000..b1612aa1f6d7e7 --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/literal-str/unclosed.toml @@ -0,0 +1 @@ +unclosed='dwdd \ No newline at end of file diff --git a/Lib/test/test_tomllib/data/invalid/missing-closing-double-square-bracket.toml b/Lib/test/test_tomllib/data/invalid/missing-closing-double-square-bracket.toml new file mode 100644 index 00000000000000..ae1d0d9cf9a674 --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/missing-closing-double-square-bracket.toml @@ -0,0 +1,2 @@ +[[closing-bracket.missing] +blaa=2 diff --git a/Lib/test/test_tomllib/data/invalid/missing-closing-square-bracket.toml b/Lib/test/test_tomllib/data/invalid/missing-closing-square-bracket.toml new file mode 100644 index 00000000000000..354d0167fcd7a1 --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/missing-closing-square-bracket.toml @@ -0,0 +1,2 @@ +[closing-bracket.missingö +blaa=2 diff --git a/Lib/test/test_tomllib/data/invalid/multiline-basic-str/carriage-return.toml b/Lib/test/test_tomllib/data/invalid/multiline-basic-str/carriage-return.toml new file mode 100644 index 00000000000000..4c455cfe0e0150 --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/multiline-basic-str/carriage-return.toml @@ -0,0 +1,2 @@ +s="""cr is not an allowed line ending but we just tried to use it +""" \ No newline at end of file diff --git a/Lib/test/test_tomllib/data/invalid/multiline-basic-str/escape-only.toml b/Lib/test/test_tomllib/data/invalid/multiline-basic-str/escape-only.toml new file mode 100644 index 00000000000000..f3bd9fca2317ea --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/multiline-basic-str/escape-only.toml @@ -0,0 +1 @@ +bee = """\""" diff --git a/Lib/test/test_tomllib/data/invalid/multiline-basic-str/file-ends-after-opening.toml b/Lib/test/test_tomllib/data/invalid/multiline-basic-str/file-ends-after-opening.toml new file mode 100644 index 00000000000000..1c2a56444406de --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/multiline-basic-str/file-ends-after-opening.toml @@ -0,0 +1 @@ +a=""" \ No newline at end of file diff --git a/Lib/test/test_tomllib/data/invalid/multiline-basic-str/last-line-escape.toml b/Lib/test/test_tomllib/data/invalid/multiline-basic-str/last-line-escape.toml new file mode 100644 index 00000000000000..92b22b065663e4 --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/multiline-basic-str/last-line-escape.toml @@ -0,0 +1,4 @@ +bee = """ +hee \ + +gee \ """ diff --git a/Lib/test/test_tomllib/data/invalid/multiline-basic-str/unclosed-ends-in-whitespace-escape.toml b/Lib/test/test_tomllib/data/invalid/multiline-basic-str/unclosed-ends-in-whitespace-escape.toml new file mode 100644 index 00000000000000..3ba7febbd0c112 --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/multiline-basic-str/unclosed-ends-in-whitespace-escape.toml @@ -0,0 +1,3 @@ +bee = """ +hee +gee\ \ No newline at end of file diff --git a/Lib/test/test_tomllib/data/invalid/multiline-literal-str/file-ends-after-opening.toml b/Lib/test/test_tomllib/data/invalid/multiline-literal-str/file-ends-after-opening.toml new file mode 100644 index 00000000000000..9a3924a103f924 --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/multiline-literal-str/file-ends-after-opening.toml @@ -0,0 +1 @@ +a=''' \ No newline at end of file diff --git a/Lib/test/test_tomllib/data/invalid/multiline-literal-str/unclosed.toml b/Lib/test/test_tomllib/data/invalid/multiline-literal-str/unclosed.toml new file mode 100644 index 00000000000000..decd378991359d --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/multiline-literal-str/unclosed.toml @@ -0,0 +1,3 @@ +bee = ''' +hee +gee '' \ No newline at end of file diff --git a/Lib/test/test_tomllib/data/invalid/non-scalar-escaped.toml b/Lib/test/test_tomllib/data/invalid/non-scalar-escaped.toml new file mode 100644 index 00000000000000..c787f9ad354289 --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/non-scalar-escaped.toml @@ -0,0 +1 @@ +a="\ud800" \ No newline at end of file diff --git a/Lib/test/test_tomllib/data/invalid/table/eof-after-opening.toml b/Lib/test/test_tomllib/data/invalid/table/eof-after-opening.toml new file mode 100644 index 00000000000000..8e2f0bef135ba8 --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/table/eof-after-opening.toml @@ -0,0 +1 @@ +[ \ No newline at end of file diff --git a/Lib/test/test_tomllib/data/invalid/table/redefine-1.toml b/Lib/test/test_tomllib/data/invalid/table/redefine-1.toml new file mode 100644 index 00000000000000..d2c66eb38c71d8 --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/table/redefine-1.toml @@ -0,0 +1,3 @@ +[t1] +t2.t3.v = 0 +[t1.t2] diff --git a/Lib/test/test_tomllib/data/invalid/table/redefine-2.toml b/Lib/test/test_tomllib/data/invalid/table/redefine-2.toml new file mode 100644 index 00000000000000..918017f892a401 --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/table/redefine-2.toml @@ -0,0 +1,3 @@ +[t1] +t2.t3.v = 0 +[t1.t2.t3] diff --git a/Lib/test/test_tomllib/data/invalid/unclosed-multiline-string.toml b/Lib/test/test_tomllib/data/invalid/unclosed-multiline-string.toml new file mode 100644 index 00000000000000..401adec2a332b1 --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/unclosed-multiline-string.toml @@ -0,0 +1,4 @@ +not-closed= """ +diibaa +blibae ete +eteta diff --git a/Lib/test/test_tomllib/data/invalid/unclosed-string.toml b/Lib/test/test_tomllib/data/invalid/unclosed-string.toml new file mode 100644 index 00000000000000..89f75acf7718d1 --- /dev/null +++ b/Lib/test/test_tomllib/data/invalid/unclosed-string.toml @@ -0,0 +1 @@ +"a-string".must-be = "closed \ No newline at end of file diff --git a/Lib/test/test_tomllib/data/valid/apostrophes-in-literal-string.json b/Lib/test/test_tomllib/data/valid/apostrophes-in-literal-string.json new file mode 100644 index 00000000000000..fddafe7769c05b --- /dev/null +++ b/Lib/test/test_tomllib/data/valid/apostrophes-in-literal-string.json @@ -0,0 +1 @@ +{"this-str-has-apostrophes": {"type": "string", "value": "' there's one already\n'' two more\n''"}} diff --git a/Lib/test/test_tomllib/data/valid/apostrophes-in-literal-string.toml b/Lib/test/test_tomllib/data/valid/apostrophes-in-literal-string.toml new file mode 100644 index 00000000000000..4288d080ca37bb --- /dev/null +++ b/Lib/test/test_tomllib/data/valid/apostrophes-in-literal-string.toml @@ -0,0 +1,3 @@ +this-str-has-apostrophes='''' there's one already +'' two more +''''' diff --git a/Lib/test/test_tomllib/data/valid/array/array-subtables.json b/Lib/test/test_tomllib/data/valid/array/array-subtables.json new file mode 100644 index 00000000000000..69ad37e98343ac --- /dev/null +++ b/Lib/test/test_tomllib/data/valid/array/array-subtables.json @@ -0,0 +1,11 @@ +{"arr": + {"type":"array","value": + [ + {"subtab": + {"val": {"type":"integer","value":"1"} + } + }, + {"subtab": {"val": {"type":"integer","value":"2"}}} + ] + } +} diff --git a/Lib/test/test_tomllib/data/valid/array/array-subtables.toml b/Lib/test/test_tomllib/data/valid/array/array-subtables.toml new file mode 100644 index 00000000000000..70753078411761 --- /dev/null +++ b/Lib/test/test_tomllib/data/valid/array/array-subtables.toml @@ -0,0 +1,7 @@ +[[arr]] +[arr.subtab] +val=1 + +[[arr]] +[arr.subtab] +val=2 diff --git a/Lib/test/test_tomllib/data/valid/array/open-parent-table.json b/Lib/test/test_tomllib/data/valid/array/open-parent-table.json new file mode 100644 index 00000000000000..7dba1b69d6a8a5 --- /dev/null +++ b/Lib/test/test_tomllib/data/valid/array/open-parent-table.json @@ -0,0 +1,6 @@ +{ + "parent-table": { + "arr": {"type":"array","value":[{},{}]}, + "not-arr": {"type":"integer","value":"1"} + } +} diff --git a/Lib/test/test_tomllib/data/valid/array/open-parent-table.toml b/Lib/test/test_tomllib/data/valid/array/open-parent-table.toml new file mode 100644 index 00000000000000..8eaad12fe3adad --- /dev/null +++ b/Lib/test/test_tomllib/data/valid/array/open-parent-table.toml @@ -0,0 +1,4 @@ +[[parent-table.arr]] +[[parent-table.arr]] +[parent-table] +not-arr = 1 diff --git a/Lib/test/test_tomllib/data/valid/boolean.json b/Lib/test/test_tomllib/data/valid/boolean.json new file mode 100644 index 00000000000000..2540d252776b3e --- /dev/null +++ b/Lib/test/test_tomllib/data/valid/boolean.json @@ -0,0 +1,4 @@ +{ + "a": {"type":"bool","value":"true"}, + "b": {"type":"bool","value":"false"} +} diff --git a/Lib/test/test_tomllib/data/valid/boolean.toml b/Lib/test/test_tomllib/data/valid/boolean.toml new file mode 100644 index 00000000000000..e5aaba35ea8ea3 --- /dev/null +++ b/Lib/test/test_tomllib/data/valid/boolean.toml @@ -0,0 +1,2 @@ +'a'=true +"b"=false \ No newline at end of file diff --git a/Lib/test/test_tomllib/data/valid/dates-and-times/datetimes.json b/Lib/test/test_tomllib/data/valid/dates-and-times/datetimes.json new file mode 100644 index 00000000000000..99aca873480ec3 --- /dev/null +++ b/Lib/test/test_tomllib/data/valid/dates-and-times/datetimes.json @@ -0,0 +1,4 @@ +{ + "local-dt": {"type":"datetime-local","value":"1988-10-27t01:01:01"}, + "zulu-dt": {"type":"datetime","value":"1988-10-27t01:01:01z"} +} diff --git a/Lib/test/test_tomllib/data/valid/dates-and-times/datetimes.toml b/Lib/test/test_tomllib/data/valid/dates-and-times/datetimes.toml new file mode 100644 index 00000000000000..cf84159de46fd8 --- /dev/null +++ b/Lib/test/test_tomllib/data/valid/dates-and-times/datetimes.toml @@ -0,0 +1,2 @@ +local-dt=1988-10-27t01:01:01 +zulu-dt=1988-10-27t01:01:01z diff --git a/Lib/test/test_tomllib/data/valid/dates-and-times/localtime.json b/Lib/test/test_tomllib/data/valid/dates-and-times/localtime.json new file mode 100644 index 00000000000000..4d96abcbc799e6 --- /dev/null +++ b/Lib/test/test_tomllib/data/valid/dates-and-times/localtime.json @@ -0,0 +1,2 @@ +{"t": + {"type":"time-local","value":"00:00:00.999999"}} diff --git a/Lib/test/test_tomllib/data/valid/dates-and-times/localtime.toml b/Lib/test/test_tomllib/data/valid/dates-and-times/localtime.toml new file mode 100644 index 00000000000000..87547c1cf3bd89 --- /dev/null +++ b/Lib/test/test_tomllib/data/valid/dates-and-times/localtime.toml @@ -0,0 +1 @@ +t=00:00:00.99999999999999 \ No newline at end of file diff --git a/Lib/test/test_tomllib/data/valid/empty-inline-table.json b/Lib/test/test_tomllib/data/valid/empty-inline-table.json new file mode 100644 index 00000000000000..2655cfd3ab0bce --- /dev/null +++ b/Lib/test/test_tomllib/data/valid/empty-inline-table.json @@ -0,0 +1 @@ +{"empty": {}} \ No newline at end of file diff --git a/Lib/test/test_tomllib/data/valid/empty-inline-table.toml b/Lib/test/test_tomllib/data/valid/empty-inline-table.toml new file mode 100644 index 00000000000000..d2d15ab9e08ef1 --- /dev/null +++ b/Lib/test/test_tomllib/data/valid/empty-inline-table.toml @@ -0,0 +1 @@ +empty ={ }#nothing here diff --git a/Lib/test/test_tomllib/data/valid/five-quotes.json b/Lib/test/test_tomllib/data/valid/five-quotes.json new file mode 100644 index 00000000000000..5cc527408ecf57 --- /dev/null +++ b/Lib/test/test_tomllib/data/valid/five-quotes.json @@ -0,0 +1,4 @@ +{ + "five-quotes": {"type":"string","value":"Closing with five quotes\n\"\""}, + "four-quotes": {"type":"string","value":"Closing with four quotes\n\""} +} diff --git a/Lib/test/test_tomllib/data/valid/five-quotes.toml b/Lib/test/test_tomllib/data/valid/five-quotes.toml new file mode 100644 index 00000000000000..ccc98c07416b76 --- /dev/null +++ b/Lib/test/test_tomllib/data/valid/five-quotes.toml @@ -0,0 +1,6 @@ +five-quotes = """ +Closing with five quotes +""""" +four-quotes = """ +Closing with four quotes +"""" diff --git a/Lib/test/test_tomllib/data/valid/hex-char.json b/Lib/test/test_tomllib/data/valid/hex-char.json new file mode 100644 index 00000000000000..e632e202fad486 --- /dev/null +++ b/Lib/test/test_tomllib/data/valid/hex-char.json @@ -0,0 +1,5 @@ +{ + "a": {"type":"string","value":"a"}, + "b": {"type":"string","value":"b"}, + "c": {"type":"string","value":"c"} +} diff --git a/Lib/test/test_tomllib/data/valid/hex-char.toml b/Lib/test/test_tomllib/data/valid/hex-char.toml new file mode 100644 index 00000000000000..1d9cdf6a437faa --- /dev/null +++ b/Lib/test/test_tomllib/data/valid/hex-char.toml @@ -0,0 +1,3 @@ +a="\u0061" +b="\u0062" +c="\U00000063" \ No newline at end of file diff --git a/Lib/test/test_tomllib/data/valid/multiline-basic-str/ends-in-whitespace-escape.json b/Lib/test/test_tomllib/data/valid/multiline-basic-str/ends-in-whitespace-escape.json new file mode 100644 index 00000000000000..fc54a7c2a3cc08 --- /dev/null +++ b/Lib/test/test_tomllib/data/valid/multiline-basic-str/ends-in-whitespace-escape.json @@ -0,0 +1 @@ +{"beee": {"type": "string", "value": "heeee\ngeeee"}} diff --git a/Lib/test/test_tomllib/data/valid/multiline-basic-str/ends-in-whitespace-escape.toml b/Lib/test/test_tomllib/data/valid/multiline-basic-str/ends-in-whitespace-escape.toml new file mode 100644 index 00000000000000..4dffe55fcb5cf0 --- /dev/null +++ b/Lib/test/test_tomllib/data/valid/multiline-basic-str/ends-in-whitespace-escape.toml @@ -0,0 +1,6 @@ +beee = """ +heeee +geeee\ + + + """ diff --git a/Lib/test/test_tomllib/data/valid/no-newlines.json b/Lib/test/test_tomllib/data/valid/no-newlines.json new file mode 100644 index 00000000000000..0967ef424bce67 --- /dev/null +++ b/Lib/test/test_tomllib/data/valid/no-newlines.json @@ -0,0 +1 @@ +{} diff --git a/Lib/test/test_tomllib/data/valid/no-newlines.toml b/Lib/test/test_tomllib/data/valid/no-newlines.toml new file mode 100644 index 00000000000000..4b87f4c77c97c8 --- /dev/null +++ b/Lib/test/test_tomllib/data/valid/no-newlines.toml @@ -0,0 +1 @@ +#no newlines at all here \ No newline at end of file diff --git a/Lib/test/test_tomllib/data/valid/trailing-comma.json b/Lib/test/test_tomllib/data/valid/trailing-comma.json new file mode 100644 index 00000000000000..dc6f1665d1fcc9 --- /dev/null +++ b/Lib/test/test_tomllib/data/valid/trailing-comma.json @@ -0,0 +1,7 @@ +{"arr": + {"type":"array","value": + [ + {"type":"integer","value":"1"} + ] + } +} diff --git a/Lib/test/test_tomllib/data/valid/trailing-comma.toml b/Lib/test/test_tomllib/data/valid/trailing-comma.toml new file mode 100644 index 00000000000000..c5d5b9b4122412 --- /dev/null +++ b/Lib/test/test_tomllib/data/valid/trailing-comma.toml @@ -0,0 +1 @@ +arr=[1,] \ No newline at end of file diff --git a/Lib/test/test_tomllib/test_data.py b/Lib/test/test_tomllib/test_data.py new file mode 100644 index 00000000000000..3483d93022b01b --- /dev/null +++ b/Lib/test/test_tomllib/test_data.py @@ -0,0 +1,64 @@ +# SPDX-License-Identifier: MIT +# SPDX-FileCopyrightText: 2021 Taneli Hukkinen +# Licensed to PSF under a Contributor Agreement. + +import json +from pathlib import Path +import unittest + +from . import burntsushi, tomllib + + +class MissingFile: + def __init__(self, path: Path): + self.path = path + + +DATA_DIR = Path(__file__).parent / "data" + +VALID_FILES = tuple((DATA_DIR / "valid").glob("**/*.toml")) +assert VALID_FILES, "Valid TOML test files not found" + +_expected_files = [] +for p in VALID_FILES: + json_path = p.with_suffix(".json") + try: + text = json.loads(json_path.read_bytes().decode()) + except FileNotFoundError: + text = MissingFile(json_path) + _expected_files.append(text) +VALID_FILES_EXPECTED = tuple(_expected_files) + +INVALID_FILES = tuple((DATA_DIR / "invalid").glob("**/*.toml")) +assert INVALID_FILES, "Invalid TOML test files not found" + + +class TestData(unittest.TestCase): + def test_invalid(self): + for invalid in INVALID_FILES: + with self.subTest(msg=invalid.stem): + toml_bytes = invalid.read_bytes() + try: + toml_str = toml_bytes.decode() + except UnicodeDecodeError: + # Some BurntSushi tests are not valid UTF-8. Skip those. + continue + with self.assertRaises(tomllib.TOMLDecodeError): + tomllib.loads(toml_str) + + def test_valid(self): + for valid, expected in zip(VALID_FILES, VALID_FILES_EXPECTED): + with self.subTest(msg=valid.stem): + if isinstance(expected, MissingFile): + # For a poor man's xfail, assert that this is one of the + # test cases where expected data is known to be missing. + assert valid.stem in { + "qa-array-inline-nested-1000", + "qa-table-inline-nested-1000", + } + continue + toml_str = valid.read_bytes().decode() + actual = tomllib.loads(toml_str) + actual = burntsushi.convert(actual) + expected = burntsushi.normalize(expected) + self.assertEqual(actual, expected) diff --git a/Lib/test/test_tomllib/test_error.py b/Lib/test/test_tomllib/test_error.py new file mode 100644 index 00000000000000..72446267f04759 --- /dev/null +++ b/Lib/test/test_tomllib/test_error.py @@ -0,0 +1,57 @@ +# SPDX-License-Identifier: MIT +# SPDX-FileCopyrightText: 2021 Taneli Hukkinen +# Licensed to PSF under a Contributor Agreement. + +import unittest + +from . import tomllib + + +class TestError(unittest.TestCase): + def test_line_and_col(self): + with self.assertRaises(tomllib.TOMLDecodeError) as exc_info: + tomllib.loads("val=.") + self.assertEqual(str(exc_info.exception), "Invalid value (at line 1, column 5)") + + with self.assertRaises(tomllib.TOMLDecodeError) as exc_info: + tomllib.loads(".") + self.assertEqual( + str(exc_info.exception), "Invalid statement (at line 1, column 1)" + ) + + with self.assertRaises(tomllib.TOMLDecodeError) as exc_info: + tomllib.loads("\n\nval=.") + self.assertEqual(str(exc_info.exception), "Invalid value (at line 3, column 5)") + + with self.assertRaises(tomllib.TOMLDecodeError) as exc_info: + tomllib.loads("\n\n.") + self.assertEqual( + str(exc_info.exception), "Invalid statement (at line 3, column 1)" + ) + + def test_missing_value(self): + with self.assertRaises(tomllib.TOMLDecodeError) as exc_info: + tomllib.loads("\n\nfwfw=") + self.assertEqual(str(exc_info.exception), "Invalid value (at end of document)") + + def test_invalid_char_quotes(self): + with self.assertRaises(tomllib.TOMLDecodeError) as exc_info: + tomllib.loads("v = '\n'") + self.assertTrue(" '\\n' " in str(exc_info.exception)) + + def test_module_name(self): + self.assertEqual(tomllib.TOMLDecodeError().__module__, tomllib.__name__) + + def test_invalid_parse_float(self): + def dict_returner(s: str) -> dict: + return {} + + def list_returner(s: str) -> list: + return [] + + for invalid_parse_float in (dict_returner, list_returner): + with self.assertRaises(ValueError) as exc_info: + tomllib.loads("f=0.1", parse_float=invalid_parse_float) + self.assertEqual( + str(exc_info.exception), "parse_float must not return dicts or lists" + ) diff --git a/Lib/test/test_tomllib/test_misc.py b/Lib/test/test_tomllib/test_misc.py new file mode 100644 index 00000000000000..76fa5905fa49ef --- /dev/null +++ b/Lib/test/test_tomllib/test_misc.py @@ -0,0 +1,101 @@ +# SPDX-License-Identifier: MIT +# SPDX-FileCopyrightText: 2021 Taneli Hukkinen +# Licensed to PSF under a Contributor Agreement. + +import copy +import datetime +from decimal import Decimal as D +from pathlib import Path +import tempfile +import unittest + +from . import tomllib + + +class TestMiscellaneous(unittest.TestCase): + def test_load(self): + content = "one=1 \n two='two' \n arr=[]" + expected = {"one": 1, "two": "two", "arr": []} + with tempfile.TemporaryDirectory() as tmp_dir_path: + file_path = Path(tmp_dir_path) / "test.toml" + file_path.write_text(content) + + with open(file_path, "rb") as bin_f: + actual = tomllib.load(bin_f) + self.assertEqual(actual, expected) + + def test_incorrect_load(self): + content = "one=1" + with tempfile.TemporaryDirectory() as tmp_dir_path: + file_path = Path(tmp_dir_path) / "test.toml" + file_path.write_text(content) + + with open(file_path, "r") as txt_f: + with self.assertRaises(TypeError): + tomllib.load(txt_f) # type: ignore[arg-type] + + def test_parse_float(self): + doc = """ + val=0.1 + biggest1=inf + biggest2=+inf + smallest=-inf + notnum1=nan + notnum2=-nan + notnum3=+nan + """ + obj = tomllib.loads(doc, parse_float=D) + expected = { + "val": D("0.1"), + "biggest1": D("inf"), + "biggest2": D("inf"), + "smallest": D("-inf"), + "notnum1": D("nan"), + "notnum2": D("-nan"), + "notnum3": D("nan"), + } + for k, expected_val in expected.items(): + actual_val = obj[k] + self.assertIsInstance(actual_val, D) + if actual_val.is_nan(): + self.assertTrue(expected_val.is_nan()) + else: + self.assertEqual(actual_val, expected_val) + + def test_deepcopy(self): + doc = """ + [bliibaa.diibaa] + offsettime=[1979-05-27T00:32:00.999999-07:00] + """ + obj = tomllib.loads(doc) + obj_copy = copy.deepcopy(obj) + self.assertEqual(obj_copy, obj) + expected_obj = { + "bliibaa": { + "diibaa": { + "offsettime": [ + datetime.datetime( + 1979, + 5, + 27, + 0, + 32, + 0, + 999999, + tzinfo=datetime.timezone(datetime.timedelta(hours=-7)), + ) + ] + } + } + } + self.assertEqual(obj_copy, expected_obj) + + def test_inline_array_recursion_limit(self): + nest_count = 470 + recursive_array_toml = "arr = " + nest_count * "[" + nest_count * "]" + tomllib.loads(recursive_array_toml) + + def test_inline_table_recursion_limit(self): + nest_count = 310 + recursive_table_toml = nest_count * "key = {" + nest_count * "}" + tomllib.loads(recursive_table_toml) diff --git a/Lib/tomllib/__init__.py b/Lib/tomllib/__init__.py new file mode 100644 index 00000000000000..ef91cb9d25dd79 --- /dev/null +++ b/Lib/tomllib/__init__.py @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: MIT +# SPDX-FileCopyrightText: 2021 Taneli Hukkinen +# Licensed to PSF under a Contributor Agreement. + +__all__ = ("loads", "load", "TOMLDecodeError") + +from ._parser import TOMLDecodeError, load, loads + +# Pretend this exception was created here. +TOMLDecodeError.__module__ = __name__ diff --git a/Lib/tomllib/_parser.py b/Lib/tomllib/_parser.py new file mode 100644 index 00000000000000..45ca7a89630f0e --- /dev/null +++ b/Lib/tomllib/_parser.py @@ -0,0 +1,691 @@ +# SPDX-License-Identifier: MIT +# SPDX-FileCopyrightText: 2021 Taneli Hukkinen +# Licensed to PSF under a Contributor Agreement. + +from __future__ import annotations + +from collections.abc import Iterable +import string +from types import MappingProxyType +from typing import Any, BinaryIO, NamedTuple + +from ._re import ( + RE_DATETIME, + RE_LOCALTIME, + RE_NUMBER, + match_to_datetime, + match_to_localtime, + match_to_number, +) +from ._types import Key, ParseFloat, Pos + +ASCII_CTRL = frozenset(chr(i) for i in range(32)) | frozenset(chr(127)) + +# Neither of these sets include quotation mark or backslash. They are +# currently handled as separate cases in the parser functions. +ILLEGAL_BASIC_STR_CHARS = ASCII_CTRL - frozenset("\t") +ILLEGAL_MULTILINE_BASIC_STR_CHARS = ASCII_CTRL - frozenset("\t\n") + +ILLEGAL_LITERAL_STR_CHARS = ILLEGAL_BASIC_STR_CHARS +ILLEGAL_MULTILINE_LITERAL_STR_CHARS = ILLEGAL_MULTILINE_BASIC_STR_CHARS + +ILLEGAL_COMMENT_CHARS = ILLEGAL_BASIC_STR_CHARS + +TOML_WS = frozenset(" \t") +TOML_WS_AND_NEWLINE = TOML_WS | frozenset("\n") +BARE_KEY_CHARS = frozenset(string.ascii_letters + string.digits + "-_") +KEY_INITIAL_CHARS = BARE_KEY_CHARS | frozenset("\"'") +HEXDIGIT_CHARS = frozenset(string.hexdigits) + +BASIC_STR_ESCAPE_REPLACEMENTS = MappingProxyType( + { + "\\b": "\u0008", # backspace + "\\t": "\u0009", # tab + "\\n": "\u000A", # linefeed + "\\f": "\u000C", # form feed + "\\r": "\u000D", # carriage return + '\\"': "\u0022", # quote + "\\\\": "\u005C", # backslash + } +) + + +class TOMLDecodeError(ValueError): + """An error raised if a document is not valid TOML.""" + + +def load(fp: BinaryIO, /, *, parse_float: ParseFloat = float) -> dict[str, Any]: + """Parse TOML from a binary file object.""" + b = fp.read() + try: + s = b.decode() + except AttributeError: + raise TypeError( + "File must be opened in binary mode, e.g. use `open('foo.toml', 'rb')`" + ) from None + return loads(s, parse_float=parse_float) + + +def loads(s: str, /, *, parse_float: ParseFloat = float) -> dict[str, Any]: # noqa: C901 + """Parse TOML from a string.""" + + # The spec allows converting "\r\n" to "\n", even in string + # literals. Let's do so to simplify parsing. + src = s.replace("\r\n", "\n") + pos = 0 + out = Output(NestedDict(), Flags()) + header: Key = () + parse_float = make_safe_parse_float(parse_float) + + # Parse one statement at a time + # (typically means one line in TOML source) + while True: + # 1. Skip line leading whitespace + pos = skip_chars(src, pos, TOML_WS) + + # 2. Parse rules. Expect one of the following: + # - end of file + # - end of line + # - comment + # - key/value pair + # - append dict to list (and move to its namespace) + # - create dict (and move to its namespace) + # Skip trailing whitespace when applicable. + try: + char = src[pos] + except IndexError: + break + if char == "\n": + pos += 1 + continue + if char in KEY_INITIAL_CHARS: + pos = key_value_rule(src, pos, out, header, parse_float) + pos = skip_chars(src, pos, TOML_WS) + elif char == "[": + try: + second_char: str | None = src[pos + 1] + except IndexError: + second_char = None + out.flags.finalize_pending() + if second_char == "[": + pos, header = create_list_rule(src, pos, out) + else: + pos, header = create_dict_rule(src, pos, out) + pos = skip_chars(src, pos, TOML_WS) + elif char != "#": + raise suffixed_err(src, pos, "Invalid statement") + + # 3. Skip comment + pos = skip_comment(src, pos) + + # 4. Expect end of line or end of file + try: + char = src[pos] + except IndexError: + break + if char != "\n": + raise suffixed_err( + src, pos, "Expected newline or end of document after a statement" + ) + pos += 1 + + return out.data.dict + + +class Flags: + """Flags that map to parsed keys/namespaces.""" + + # Marks an immutable namespace (inline array or inline table). + FROZEN = 0 + # Marks a nest that has been explicitly created and can no longer + # be opened using the "[table]" syntax. + EXPLICIT_NEST = 1 + + def __init__(self) -> None: + self._flags: dict[str, dict] = {} + self._pending_flags: set[tuple[Key, int]] = set() + + def add_pending(self, key: Key, flag: int) -> None: + self._pending_flags.add((key, flag)) + + def finalize_pending(self) -> None: + for key, flag in self._pending_flags: + self.set(key, flag, recursive=False) + self._pending_flags.clear() + + def unset_all(self, key: Key) -> None: + cont = self._flags + for k in key[:-1]: + if k not in cont: + return + cont = cont[k]["nested"] + cont.pop(key[-1], None) + + def set(self, key: Key, flag: int, *, recursive: bool) -> None: # noqa: A003 + cont = self._flags + key_parent, key_stem = key[:-1], key[-1] + for k in key_parent: + if k not in cont: + cont[k] = {"flags": set(), "recursive_flags": set(), "nested": {}} + cont = cont[k]["nested"] + if key_stem not in cont: + cont[key_stem] = {"flags": set(), "recursive_flags": set(), "nested": {}} + cont[key_stem]["recursive_flags" if recursive else "flags"].add(flag) + + def is_(self, key: Key, flag: int) -> bool: + if not key: + return False # document root has no flags + cont = self._flags + for k in key[:-1]: + if k not in cont: + return False + inner_cont = cont[k] + if flag in inner_cont["recursive_flags"]: + return True + cont = inner_cont["nested"] + key_stem = key[-1] + if key_stem in cont: + cont = cont[key_stem] + return flag in cont["flags"] or flag in cont["recursive_flags"] + return False + + +class NestedDict: + def __init__(self) -> None: + # The parsed content of the TOML document + self.dict: dict[str, Any] = {} + + def get_or_create_nest( + self, + key: Key, + *, + access_lists: bool = True, + ) -> dict: + cont: Any = self.dict + for k in key: + if k not in cont: + cont[k] = {} + cont = cont[k] + if access_lists and isinstance(cont, list): + cont = cont[-1] + if not isinstance(cont, dict): + raise KeyError("There is no nest behind this key") + return cont + + def append_nest_to_list(self, key: Key) -> None: + cont = self.get_or_create_nest(key[:-1]) + last_key = key[-1] + if last_key in cont: + list_ = cont[last_key] + if not isinstance(list_, list): + raise KeyError("An object other than list found behind this key") + list_.append({}) + else: + cont[last_key] = [{}] + + +class Output(NamedTuple): + data: NestedDict + flags: Flags + + +def skip_chars(src: str, pos: Pos, chars: Iterable[str]) -> Pos: + try: + while src[pos] in chars: + pos += 1 + except IndexError: + pass + return pos + + +def skip_until( + src: str, + pos: Pos, + expect: str, + *, + error_on: frozenset[str], + error_on_eof: bool, +) -> Pos: + try: + new_pos = src.index(expect, pos) + except ValueError: + new_pos = len(src) + if error_on_eof: + raise suffixed_err(src, new_pos, f"Expected {expect!r}") from None + + if not error_on.isdisjoint(src[pos:new_pos]): + while src[pos] not in error_on: + pos += 1 + raise suffixed_err(src, pos, f"Found invalid character {src[pos]!r}") + return new_pos + + +def skip_comment(src: str, pos: Pos) -> Pos: + try: + char: str | None = src[pos] + except IndexError: + char = None + if char == "#": + return skip_until( + src, pos + 1, "\n", error_on=ILLEGAL_COMMENT_CHARS, error_on_eof=False + ) + return pos + + +def skip_comments_and_array_ws(src: str, pos: Pos) -> Pos: + while True: + pos_before_skip = pos + pos = skip_chars(src, pos, TOML_WS_AND_NEWLINE) + pos = skip_comment(src, pos) + if pos == pos_before_skip: + return pos + + +def create_dict_rule(src: str, pos: Pos, out: Output) -> tuple[Pos, Key]: + pos += 1 # Skip "[" + pos = skip_chars(src, pos, TOML_WS) + pos, key = parse_key(src, pos) + + if out.flags.is_(key, Flags.EXPLICIT_NEST) or out.flags.is_(key, Flags.FROZEN): + raise suffixed_err(src, pos, f"Cannot declare {key} twice") + out.flags.set(key, Flags.EXPLICIT_NEST, recursive=False) + try: + out.data.get_or_create_nest(key) + except KeyError: + raise suffixed_err(src, pos, "Cannot overwrite a value") from None + + if not src.startswith("]", pos): + raise suffixed_err(src, pos, "Expected ']' at the end of a table declaration") + return pos + 1, key + + +def create_list_rule(src: str, pos: Pos, out: Output) -> tuple[Pos, Key]: + pos += 2 # Skip "[[" + pos = skip_chars(src, pos, TOML_WS) + pos, key = parse_key(src, pos) + + if out.flags.is_(key, Flags.FROZEN): + raise suffixed_err(src, pos, f"Cannot mutate immutable namespace {key}") + # Free the namespace now that it points to another empty list item... + out.flags.unset_all(key) + # ...but this key precisely is still prohibited from table declaration + out.flags.set(key, Flags.EXPLICIT_NEST, recursive=False) + try: + out.data.append_nest_to_list(key) + except KeyError: + raise suffixed_err(src, pos, "Cannot overwrite a value") from None + + if not src.startswith("]]", pos): + raise suffixed_err(src, pos, "Expected ']]' at the end of an array declaration") + return pos + 2, key + + +def key_value_rule( + src: str, pos: Pos, out: Output, header: Key, parse_float: ParseFloat +) -> Pos: + pos, key, value = parse_key_value_pair(src, pos, parse_float) + key_parent, key_stem = key[:-1], key[-1] + abs_key_parent = header + key_parent + + relative_path_cont_keys = (header + key[:i] for i in range(1, len(key))) + for cont_key in relative_path_cont_keys: + # Check that dotted key syntax does not redefine an existing table + if out.flags.is_(cont_key, Flags.EXPLICIT_NEST): + raise suffixed_err(src, pos, f"Cannot redefine namespace {cont_key}") + # Containers in the relative path can't be opened with the table syntax or + # dotted key/value syntax in following table sections. + out.flags.add_pending(cont_key, Flags.EXPLICIT_NEST) + + if out.flags.is_(abs_key_parent, Flags.FROZEN): + raise suffixed_err( + src, pos, f"Cannot mutate immutable namespace {abs_key_parent}" + ) + + try: + nest = out.data.get_or_create_nest(abs_key_parent) + except KeyError: + raise suffixed_err(src, pos, "Cannot overwrite a value") from None + if key_stem in nest: + raise suffixed_err(src, pos, "Cannot overwrite a value") + # Mark inline table and array namespaces recursively immutable + if isinstance(value, (dict, list)): + out.flags.set(header + key, Flags.FROZEN, recursive=True) + nest[key_stem] = value + return pos + + +def parse_key_value_pair( + src: str, pos: Pos, parse_float: ParseFloat +) -> tuple[Pos, Key, Any]: + pos, key = parse_key(src, pos) + try: + char: str | None = src[pos] + except IndexError: + char = None + if char != "=": + raise suffixed_err(src, pos, "Expected '=' after a key in a key/value pair") + pos += 1 + pos = skip_chars(src, pos, TOML_WS) + pos, value = parse_value(src, pos, parse_float) + return pos, key, value + + +def parse_key(src: str, pos: Pos) -> tuple[Pos, Key]: + pos, key_part = parse_key_part(src, pos) + key: Key = (key_part,) + pos = skip_chars(src, pos, TOML_WS) + while True: + try: + char: str | None = src[pos] + except IndexError: + char = None + if char != ".": + return pos, key + pos += 1 + pos = skip_chars(src, pos, TOML_WS) + pos, key_part = parse_key_part(src, pos) + key += (key_part,) + pos = skip_chars(src, pos, TOML_WS) + + +def parse_key_part(src: str, pos: Pos) -> tuple[Pos, str]: + try: + char: str | None = src[pos] + except IndexError: + char = None + if char in BARE_KEY_CHARS: + start_pos = pos + pos = skip_chars(src, pos, BARE_KEY_CHARS) + return pos, src[start_pos:pos] + if char == "'": + return parse_literal_str(src, pos) + if char == '"': + return parse_one_line_basic_str(src, pos) + raise suffixed_err(src, pos, "Invalid initial character for a key part") + + +def parse_one_line_basic_str(src: str, pos: Pos) -> tuple[Pos, str]: + pos += 1 + return parse_basic_str(src, pos, multiline=False) + + +def parse_array(src: str, pos: Pos, parse_float: ParseFloat) -> tuple[Pos, list]: + pos += 1 + array: list = [] + + pos = skip_comments_and_array_ws(src, pos) + if src.startswith("]", pos): + return pos + 1, array + while True: + pos, val = parse_value(src, pos, parse_float) + array.append(val) + pos = skip_comments_and_array_ws(src, pos) + + c = src[pos : pos + 1] + if c == "]": + return pos + 1, array + if c != ",": + raise suffixed_err(src, pos, "Unclosed array") + pos += 1 + + pos = skip_comments_and_array_ws(src, pos) + if src.startswith("]", pos): + return pos + 1, array + + +def parse_inline_table(src: str, pos: Pos, parse_float: ParseFloat) -> tuple[Pos, dict]: + pos += 1 + nested_dict = NestedDict() + flags = Flags() + + pos = skip_chars(src, pos, TOML_WS) + if src.startswith("}", pos): + return pos + 1, nested_dict.dict + while True: + pos, key, value = parse_key_value_pair(src, pos, parse_float) + key_parent, key_stem = key[:-1], key[-1] + if flags.is_(key, Flags.FROZEN): + raise suffixed_err(src, pos, f"Cannot mutate immutable namespace {key}") + try: + nest = nested_dict.get_or_create_nest(key_parent, access_lists=False) + except KeyError: + raise suffixed_err(src, pos, "Cannot overwrite a value") from None + if key_stem in nest: + raise suffixed_err(src, pos, f"Duplicate inline table key {key_stem!r}") + nest[key_stem] = value + pos = skip_chars(src, pos, TOML_WS) + c = src[pos : pos + 1] + if c == "}": + return pos + 1, nested_dict.dict + if c != ",": + raise suffixed_err(src, pos, "Unclosed inline table") + if isinstance(value, (dict, list)): + flags.set(key, Flags.FROZEN, recursive=True) + pos += 1 + pos = skip_chars(src, pos, TOML_WS) + + +def parse_basic_str_escape( + src: str, pos: Pos, *, multiline: bool = False +) -> tuple[Pos, str]: + escape_id = src[pos : pos + 2] + pos += 2 + if multiline and escape_id in {"\\ ", "\\\t", "\\\n"}: + # Skip whitespace until next non-whitespace character or end of + # the doc. Error if non-whitespace is found before newline. + if escape_id != "\\\n": + pos = skip_chars(src, pos, TOML_WS) + try: + char = src[pos] + except IndexError: + return pos, "" + if char != "\n": + raise suffixed_err(src, pos, "Unescaped '\\' in a string") + pos += 1 + pos = skip_chars(src, pos, TOML_WS_AND_NEWLINE) + return pos, "" + if escape_id == "\\u": + return parse_hex_char(src, pos, 4) + if escape_id == "\\U": + return parse_hex_char(src, pos, 8) + try: + return pos, BASIC_STR_ESCAPE_REPLACEMENTS[escape_id] + except KeyError: + raise suffixed_err(src, pos, "Unescaped '\\' in a string") from None + + +def parse_basic_str_escape_multiline(src: str, pos: Pos) -> tuple[Pos, str]: + return parse_basic_str_escape(src, pos, multiline=True) + + +def parse_hex_char(src: str, pos: Pos, hex_len: int) -> tuple[Pos, str]: + hex_str = src[pos : pos + hex_len] + if len(hex_str) != hex_len or not HEXDIGIT_CHARS.issuperset(hex_str): + raise suffixed_err(src, pos, "Invalid hex value") + pos += hex_len + hex_int = int(hex_str, 16) + if not is_unicode_scalar_value(hex_int): + raise suffixed_err(src, pos, "Escaped character is not a Unicode scalar value") + return pos, chr(hex_int) + + +def parse_literal_str(src: str, pos: Pos) -> tuple[Pos, str]: + pos += 1 # Skip starting apostrophe + start_pos = pos + pos = skip_until( + src, pos, "'", error_on=ILLEGAL_LITERAL_STR_CHARS, error_on_eof=True + ) + return pos + 1, src[start_pos:pos] # Skip ending apostrophe + + +def parse_multiline_str(src: str, pos: Pos, *, literal: bool) -> tuple[Pos, str]: + pos += 3 + if src.startswith("\n", pos): + pos += 1 + + if literal: + delim = "'" + end_pos = skip_until( + src, + pos, + "'''", + error_on=ILLEGAL_MULTILINE_LITERAL_STR_CHARS, + error_on_eof=True, + ) + result = src[pos:end_pos] + pos = end_pos + 3 + else: + delim = '"' + pos, result = parse_basic_str(src, pos, multiline=True) + + # Add at maximum two extra apostrophes/quotes if the end sequence + # is 4 or 5 chars long instead of just 3. + if not src.startswith(delim, pos): + return pos, result + pos += 1 + if not src.startswith(delim, pos): + return pos, result + delim + pos += 1 + return pos, result + (delim * 2) + + +def parse_basic_str(src: str, pos: Pos, *, multiline: bool) -> tuple[Pos, str]: + if multiline: + error_on = ILLEGAL_MULTILINE_BASIC_STR_CHARS + parse_escapes = parse_basic_str_escape_multiline + else: + error_on = ILLEGAL_BASIC_STR_CHARS + parse_escapes = parse_basic_str_escape + result = "" + start_pos = pos + while True: + try: + char = src[pos] + except IndexError: + raise suffixed_err(src, pos, "Unterminated string") from None + if char == '"': + if not multiline: + return pos + 1, result + src[start_pos:pos] + if src.startswith('"""', pos): + return pos + 3, result + src[start_pos:pos] + pos += 1 + continue + if char == "\\": + result += src[start_pos:pos] + pos, parsed_escape = parse_escapes(src, pos) + result += parsed_escape + start_pos = pos + continue + if char in error_on: + raise suffixed_err(src, pos, f"Illegal character {char!r}") + pos += 1 + + +def parse_value( # noqa: C901 + src: str, pos: Pos, parse_float: ParseFloat +) -> tuple[Pos, Any]: + try: + char: str | None = src[pos] + except IndexError: + char = None + + # IMPORTANT: order conditions based on speed of checking and likelihood + + # Basic strings + if char == '"': + if src.startswith('"""', pos): + return parse_multiline_str(src, pos, literal=False) + return parse_one_line_basic_str(src, pos) + + # Literal strings + if char == "'": + if src.startswith("'''", pos): + return parse_multiline_str(src, pos, literal=True) + return parse_literal_str(src, pos) + + # Booleans + if char == "t": + if src.startswith("true", pos): + return pos + 4, True + if char == "f": + if src.startswith("false", pos): + return pos + 5, False + + # Arrays + if char == "[": + return parse_array(src, pos, parse_float) + + # Inline tables + if char == "{": + return parse_inline_table(src, pos, parse_float) + + # Dates and times + datetime_match = RE_DATETIME.match(src, pos) + if datetime_match: + try: + datetime_obj = match_to_datetime(datetime_match) + except ValueError as e: + raise suffixed_err(src, pos, "Invalid date or datetime") from e + return datetime_match.end(), datetime_obj + localtime_match = RE_LOCALTIME.match(src, pos) + if localtime_match: + return localtime_match.end(), match_to_localtime(localtime_match) + + # Integers and "normal" floats. + # The regex will greedily match any type starting with a decimal + # char, so needs to be located after handling of dates and times. + number_match = RE_NUMBER.match(src, pos) + if number_match: + return number_match.end(), match_to_number(number_match, parse_float) + + # Special floats + first_three = src[pos : pos + 3] + if first_three in {"inf", "nan"}: + return pos + 3, parse_float(first_three) + first_four = src[pos : pos + 4] + if first_four in {"-inf", "+inf", "-nan", "+nan"}: + return pos + 4, parse_float(first_four) + + raise suffixed_err(src, pos, "Invalid value") + + +def suffixed_err(src: str, pos: Pos, msg: str) -> TOMLDecodeError: + """Return a `TOMLDecodeError` where error message is suffixed with + coordinates in source.""" + + def coord_repr(src: str, pos: Pos) -> str: + if pos >= len(src): + return "end of document" + line = src.count("\n", 0, pos) + 1 + if line == 1: + column = pos + 1 + else: + column = pos - src.rindex("\n", 0, pos) + return f"line {line}, column {column}" + + return TOMLDecodeError(f"{msg} (at {coord_repr(src, pos)})") + + +def is_unicode_scalar_value(codepoint: int) -> bool: + return (0 <= codepoint <= 55295) or (57344 <= codepoint <= 1114111) + + +def make_safe_parse_float(parse_float: ParseFloat) -> ParseFloat: + """A decorator to make `parse_float` safe. + + `parse_float` must not return dicts or lists, because these types + would be mixed with parsed TOML tables and arrays, thus confusing + the parser. The returned decorated callable raises `ValueError` + instead of returning illegal types. + """ + # The default `float` callable never returns illegal types. Optimize it. + if parse_float is float: # type: ignore[comparison-overlap] + return float + + def safe_parse_float(float_str: str) -> Any: + float_value = parse_float(float_str) + if isinstance(float_value, (dict, list)): + raise ValueError("parse_float must not return dicts or lists") + return float_value + + return safe_parse_float diff --git a/Lib/tomllib/_re.py b/Lib/tomllib/_re.py new file mode 100644 index 00000000000000..994bb7493fd928 --- /dev/null +++ b/Lib/tomllib/_re.py @@ -0,0 +1,107 @@ +# SPDX-License-Identifier: MIT +# SPDX-FileCopyrightText: 2021 Taneli Hukkinen +# Licensed to PSF under a Contributor Agreement. + +from __future__ import annotations + +from datetime import date, datetime, time, timedelta, timezone, tzinfo +from functools import lru_cache +import re +from typing import Any + +from ._types import ParseFloat + +# E.g. +# - 00:32:00.999999 +# - 00:32:00 +_TIME_RE_STR = r"([01][0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9])(?:\.([0-9]{1,6})[0-9]*)?" + +RE_NUMBER = re.compile( + r""" +0 +(?: + x[0-9A-Fa-f](?:_?[0-9A-Fa-f])* # hex + | + b[01](?:_?[01])* # bin + | + o[0-7](?:_?[0-7])* # oct +) +| +[+-]?(?:0|[1-9](?:_?[0-9])*) # dec, integer part +(?P + (?:\.[0-9](?:_?[0-9])*)? # optional fractional part + (?:[eE][+-]?[0-9](?:_?[0-9])*)? # optional exponent part +) +""", + flags=re.VERBOSE, +) +RE_LOCALTIME = re.compile(_TIME_RE_STR) +RE_DATETIME = re.compile( + rf""" +([0-9]{{4}})-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01]) # date, e.g. 1988-10-27 +(?: + [Tt ] + {_TIME_RE_STR} + (?:([Zz])|([+-])([01][0-9]|2[0-3]):([0-5][0-9]))? # optional time offset +)? +""", + flags=re.VERBOSE, +) + + +def match_to_datetime(match: re.Match) -> datetime | date: + """Convert a `RE_DATETIME` match to `datetime.datetime` or `datetime.date`. + + Raises ValueError if the match does not correspond to a valid date + or datetime. + """ + ( + year_str, + month_str, + day_str, + hour_str, + minute_str, + sec_str, + micros_str, + zulu_time, + offset_sign_str, + offset_hour_str, + offset_minute_str, + ) = match.groups() + year, month, day = int(year_str), int(month_str), int(day_str) + if hour_str is None: + return date(year, month, day) + hour, minute, sec = int(hour_str), int(minute_str), int(sec_str) + micros = int(micros_str.ljust(6, "0")) if micros_str else 0 + if offset_sign_str: + tz: tzinfo | None = cached_tz( + offset_hour_str, offset_minute_str, offset_sign_str + ) + elif zulu_time: + tz = timezone.utc + else: # local date-time + tz = None + return datetime(year, month, day, hour, minute, sec, micros, tzinfo=tz) + + +@lru_cache(maxsize=None) +def cached_tz(hour_str: str, minute_str: str, sign_str: str) -> timezone: + sign = 1 if sign_str == "+" else -1 + return timezone( + timedelta( + hours=sign * int(hour_str), + minutes=sign * int(minute_str), + ) + ) + + +def match_to_localtime(match: re.Match) -> time: + hour_str, minute_str, sec_str, micros_str = match.groups() + micros = int(micros_str.ljust(6, "0")) if micros_str else 0 + return time(int(hour_str), int(minute_str), int(sec_str), micros) + + +def match_to_number(match: re.Match, parse_float: ParseFloat) -> Any: + if match.group("floatpart"): + return parse_float(match.group()) + return int(match.group(), 0) diff --git a/Lib/tomllib/_types.py b/Lib/tomllib/_types.py new file mode 100644 index 00000000000000..d949412e03b29d --- /dev/null +++ b/Lib/tomllib/_types.py @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: MIT +# SPDX-FileCopyrightText: 2021 Taneli Hukkinen +# Licensed to PSF under a Contributor Agreement. + +from typing import Any, Callable, Tuple + +# Type annotations +ParseFloat = Callable[[str], Any] +Key = Tuple[str, ...] +Pos = int diff --git a/Misc/NEWS.d/next/Library/2022-02-23-01-11-08.bpo-40059.Iwc9UH.rst b/Misc/NEWS.d/next/Library/2022-02-23-01-11-08.bpo-40059.Iwc9UH.rst new file mode 100644 index 00000000000000..d41ff1304e83b9 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-02-23-01-11-08.bpo-40059.Iwc9UH.rst @@ -0,0 +1 @@ +:pep:`680`, the :mod:`tomllib` module. Adds support for parsing TOML. diff --git a/Python/stdlib_module_names.h b/Python/stdlib_module_names.h index 754fa94e35eba4..553585a76a394a 100644 --- a/Python/stdlib_module_names.h +++ b/Python/stdlib_module_names.h @@ -277,6 +277,7 @@ static const char* _Py_stdlib_module_names[] = { "tkinter", "token", "tokenize", +"tomllib", "trace", "traceback", "tracemalloc",