From 9236beea928f4bbd20e2a32bd20de419dcd9130b Mon Sep 17 00:00:00 2001 From: Avy Faingezicht Date: Tue, 30 May 2023 14:50:54 -0700 Subject: [PATCH 1/5] create a pointer from extracted elements back to the bs4 tag --- .gitignore | 161 ++++++++++++++++++++++++++++ ixbrlparse/components/nonnumeric.py | 7 +- ixbrlparse/components/numeric.py | 7 +- ixbrlparse/core.py | 4 + tests/test_parse.py | 3 + 5 files changed, 178 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index 8c2be7a..23b8014 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,163 @@ env/ working/ + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ diff --git a/ixbrlparse/components/nonnumeric.py b/ixbrlparse/components/nonnumeric.py index 412b22e..fbf0adb 100644 --- a/ixbrlparse/components/nonnumeric.py +++ b/ixbrlparse/components/nonnumeric.py @@ -1,6 +1,8 @@ from copy import deepcopy from typing import Any, Dict, List, Optional, Union +from bs4 import Tag + from ixbrlparse.components import ixbrlContext @@ -11,8 +13,8 @@ def __init__( name: str, format_: Optional[str], value: str, + soup_tag: Optional[Tag] = None, ) -> None: - name_split: List[str] = name.split(":", maxsplit=1) if len(name_split) == 2: self.schema = name_split[0] @@ -24,9 +26,10 @@ def __init__( self.context = context self.format = format_ self.value = value + self.soup_tag = soup_tag def to_json(self) -> Dict[str, Any]: - values = deepcopy(self.__dict__) + values = {k: deepcopy(v) for k, v in self.__dict__.items() if k != "soup_tag"} if isinstance(self.context, ixbrlContext): values["context"] = self.context.to_json() return values diff --git a/ixbrlparse/components/numeric.py b/ixbrlparse/components/numeric.py index a07a298..8c60f29 100644 --- a/ixbrlparse/components/numeric.py +++ b/ixbrlparse/components/numeric.py @@ -1,12 +1,13 @@ from copy import deepcopy from typing import Dict, Optional, Union +from bs4 import Tag + from .context import ixbrlContext from .transform import get_format, ixbrlFormat class ixbrlNumeric: - # contextref # decimals # format @@ -18,6 +19,7 @@ class ixbrlNumeric: # xmlns:ix def __init__( self, + soup_tag: Optional[Tag] = None, name: Optional[str] = None, unit: Optional[str] = None, value: Optional[Union[str, int, float]] = None, @@ -44,6 +46,7 @@ def __init__( self.context: Union[ixbrlContext, str, None] = context self.unit: Optional[str] = unit self.value: Optional[Union[int, float]] = None + self.soup_tag = soup_tag format_ = { "format_": attrs.get("format"), @@ -61,7 +64,7 @@ def __init__( raise def to_json(self) -> Dict: - values = deepcopy(self.__dict__) + values = {k: deepcopy(v) for k, v in self.__dict__.items() if k != "soup_tag"} if isinstance(self.format, ixbrlFormat): values["format"] = self.format.to_json() if isinstance(self.context, ixbrlContext): diff --git a/ixbrlparse/core.py b/ixbrlparse/core.py index e2a09f2..abad985 100644 --- a/ixbrlparse/core.py +++ b/ixbrlparse/core.py @@ -177,6 +177,7 @@ def _get_nonnumeric(self) -> None: value=text.strip().replace("\n", "") if isinstance(text, str) else "", + soup_tag=s, ) ) except Exception as e: @@ -198,6 +199,7 @@ def _get_numeric(self) -> None: text=s.text, context=self.contexts.get(s["contextRef"], s["contextRef"]), unit=self.units.get(s["unitRef"], s["unitRef"]), + soup_tag=s, **s.attrs ) ) @@ -248,6 +250,7 @@ def _get_numeric(self) -> None: text=s.text, context=self.contexts.get(context_ref, context_ref), unit=self.units.get(unit_ref, unit_ref), + soup_tag=s, **s.attrs ) ) @@ -289,6 +292,7 @@ def _get_nonnumeric(self) -> None: value=text.strip().replace("\n", "") if isinstance(text, str) else "", + soup_tag=s, ) ) diff --git a/tests/test_parse.py b/tests/test_parse.py index c446c80..b798a47 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -243,6 +243,7 @@ def test_nonnumeric(): assert n.value == "03456789" assert isinstance(n.context, ixbrlContext) value_seen = True + assert x.nonnumeric[0].soup_tag is not None assert value_seen @@ -287,6 +288,7 @@ def test_numeric(): assert x.numeric[0].value == 52982 assert x.numeric[0].name == "PropertyPlantEquipment" assert x.numeric[0].schema == "ns5" + assert x.nonnumeric[0].soup_tag is not None def test_numeric_xml(): @@ -315,6 +317,7 @@ def test_numeric_xml(): assert x.numeric[0].value == 1 assert x.numeric[0].name == "CashBankInHand" assert x.numeric[0].schema == "unknown" + assert x.nonnumeric[0].soup_tag is not None def test_exclude(): From daca2c590ce69684f7c76b04641cda77c0d647e8 Mon Sep 17 00:00:00 2001 From: Avy Faingezicht Date: Wed, 28 Jun 2023 08:36:15 -0700 Subject: [PATCH 2/5] address PR comments --- tests/test_parse.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_parse.py b/tests/test_parse.py index b798a47..645ba1e 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -3,7 +3,7 @@ from datetime import date import pytest -from bs4 import BeautifulSoup +from bs4 import BeautifulSoup, Tag from ixbrlparse import IXBRL from ixbrlparse.core import ( @@ -243,7 +243,7 @@ def test_nonnumeric(): assert n.value == "03456789" assert isinstance(n.context, ixbrlContext) value_seen = True - assert x.nonnumeric[0].soup_tag is not None + assert isinstance(x.nonnumeric[0].soup_tag, Tag) assert value_seen @@ -288,7 +288,7 @@ def test_numeric(): assert x.numeric[0].value == 52982 assert x.numeric[0].name == "PropertyPlantEquipment" assert x.numeric[0].schema == "ns5" - assert x.nonnumeric[0].soup_tag is not None + assert isinstance(x.nonnumeric[0].soup_tag, Tag) def test_numeric_xml(): @@ -317,7 +317,7 @@ def test_numeric_xml(): assert x.numeric[0].value == 1 assert x.numeric[0].name == "CashBankInHand" assert x.numeric[0].schema == "unknown" - assert x.nonnumeric[0].soup_tag is not None + assert isinstance(x.nonnumeric[0].soup_tag, Tag) def test_exclude(): From e608c5dee1faf6d0d45e7b1593d98adaeb068af6 Mon Sep 17 00:00:00 2001 From: David Kane Date: Sun, 9 Jul 2023 09:42:55 +0100 Subject: [PATCH 3/5] type checking --- tests/test_parse.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/tests/test_parse.py b/tests/test_parse.py index 645ba1e..4a91d2a 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -111,7 +111,7 @@ def test_open_xml_str(): "https://xbrl.frc.org.uk/FRS-102/2014-09-01/FRS-102-2014-09-01.xsd", "https://xbrl.frc.org.uk/FRS-102/2014-09-01/FRS-102-2014-09-01.xsd", "http://www.companieshouse.gov.uk/ef/xbrl/uk/fr/gaap/ae/2009-06-21/uk-gaap-ae-2009-06-21.xsd", - ], + ], # type: ignore [ 11, 12, @@ -119,7 +119,7 @@ def test_open_xml_str(): 38, 19, 10, - ], + ], # type: ignore ), ) def test_schema(account, schema, namespaces): @@ -336,9 +336,17 @@ def test_continuation(): value_seen = False for n in x.nonnumeric: if n.name == "AccountantsReportOnFinancialStatements": - assert ( - n.value - == "This report is made solely to the board of directors of Test Exclude Limited, as a body, in accordance with the terms of our engagement letter dated 18 November 2022. Our work has been undertaken solely to prepare for your approval the financial statements of Test Exclude Limited and state those matters that we have agreed to state to the board of directors of Test Exclude Limited, as a body, in this report in accordance with ICAEW Technical Release 07/16 AAF. To the fullest extent permitted by law, we do not accept or assume responsibility to anyone other than Test Exclude Limited and its board of directors as a body, for our work or for this report." + assert n.value == ( + "This report is made solely to the board of directors of Test Exclude " + "Limited, as a body, in accordance with the terms of our engagement " + "letter dated 18 November 2022. Our work has been undertaken solely " + "to prepare for your approval the financial statements of Test Exclude " + "Limited and state those matters that we have agreed to state to the " + "board of directors of Test Exclude Limited, as a body, in this report " + "in accordance with ICAEW Technical Release 07/16 AAF. To the fullest " + "extent permitted by law, we do not accept or assume responsibility " + "to anyone other than Test Exclude Limited and its board of directors " + "as a body, for our work or for this report." ) value_seen = True From 2ee8cd4278b7058c9e1a8ae11d3dd2b80fd50d75 Mon Sep 17 00:00:00 2001 From: David Kane Date: Sun, 9 Jul 2023 09:45:15 +0100 Subject: [PATCH 4/5] version bump --- README.md | 2 ++ ixbrlparse/version.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index ccce28d..6849cf6 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,8 @@ A python module for getting useful data out of ixbrl files. The library is at an early stage - feedback and improvements are very welcome. +**New in version 0.5.4**: Added backreferences to BeautifulSoup objects - thanks to @avyfain for PR. + **New in version 0.5.3**: Support for `exclude` and `continuation` elements within XBRL documents. Thanks to @wcollinscw for adding support for exclude elements. **New in version 0.5**: Support for Python 3.11 has been added. I've had some problems with Python 3.11 and Windows as lxml binaries aren't yet available. Also new in version 0.5 is type checking - the whole library now has types added. diff --git a/ixbrlparse/version.py b/ixbrlparse/version.py index 43a1e95..6b27eee 100644 --- a/ixbrlparse/version.py +++ b/ixbrlparse/version.py @@ -1 +1 @@ -__version__ = "0.5.3" +__version__ = "0.5.4" From 3d5aa919c687f852fea8f5fea3b58b1905b71996 Mon Sep 17 00:00:00 2001 From: David Kane Date: Sun, 9 Jul 2023 09:47:39 +0100 Subject: [PATCH 5/5] add tag to end of init --- ixbrlparse/components/numeric.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ixbrlparse/components/numeric.py b/ixbrlparse/components/numeric.py index 8c60f29..a5f4848 100644 --- a/ixbrlparse/components/numeric.py +++ b/ixbrlparse/components/numeric.py @@ -19,12 +19,12 @@ class ixbrlNumeric: # xmlns:ix def __init__( self, - soup_tag: Optional[Tag] = None, name: Optional[str] = None, unit: Optional[str] = None, value: Optional[Union[str, int, float]] = None, text: Optional[Union[str, int, float]] = None, context: Union[ixbrlContext, str, None] = None, + soup_tag: Optional[Tag] = None, **attrs, ) -> None: self.name: Optional[str] = name