Skip to content

Commit

Permalink
Merge pull request #33 from avyfain/feat/backref
Browse files Browse the repository at this point in the history
create a pointer from extracted elements back to the bs4 tag
  • Loading branch information
drkane authored Jul 9, 2023
2 parents 097886c + 3d5aa91 commit 7a805df
Show file tree
Hide file tree
Showing 7 changed files with 195 additions and 11 deletions.
161 changes: 161 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,163 @@
env/
working/

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock

# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml

# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

A python module for getting useful data out of ixbrl files. The library is at an early stage - feedback and improvements are very welcome.

**New in version 0.5.4**: Added backreferences to BeautifulSoup objects - thanks to @avyfain for PR.

**New in version 0.5.3**: Support for `exclude` and `continuation` elements within XBRL documents. Thanks to @wcollinscw for adding support for exclude elements.

**New in version 0.5**: Support for Python 3.11 has been added. I've had some problems with Python 3.11 and Windows as lxml binaries aren't yet available. Also new in version 0.5 is type checking - the whole library now has types added.
Expand Down
7 changes: 5 additions & 2 deletions ixbrlparse/components/nonnumeric.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from copy import deepcopy
from typing import Any, Dict, List, Optional, Union

from bs4 import Tag

from ixbrlparse.components import ixbrlContext


Expand All @@ -11,8 +13,8 @@ def __init__(
name: str,
format_: Optional[str],
value: str,
soup_tag: Optional[Tag] = None,
) -> None:

name_split: List[str] = name.split(":", maxsplit=1)
if len(name_split) == 2:
self.schema = name_split[0]
Expand All @@ -24,9 +26,10 @@ def __init__(
self.context = context
self.format = format_
self.value = value
self.soup_tag = soup_tag

def to_json(self) -> Dict[str, Any]:
values = deepcopy(self.__dict__)
values = {k: deepcopy(v) for k, v in self.__dict__.items() if k != "soup_tag"}
if isinstance(self.context, ixbrlContext):
values["context"] = self.context.to_json()
return values
7 changes: 5 additions & 2 deletions ixbrlparse/components/numeric.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
from copy import deepcopy
from typing import Dict, Optional, Union

from bs4 import Tag

from .context import ixbrlContext
from .transform import get_format, ixbrlFormat


class ixbrlNumeric:

# contextref
# decimals
# format
Expand All @@ -23,6 +24,7 @@ def __init__(
value: Optional[Union[str, int, float]] = None,
text: Optional[Union[str, int, float]] = None,
context: Union[ixbrlContext, str, None] = None,
soup_tag: Optional[Tag] = None,
**attrs,
) -> None:
self.name: Optional[str] = name
Expand All @@ -44,6 +46,7 @@ def __init__(
self.context: Union[ixbrlContext, str, None] = context
self.unit: Optional[str] = unit
self.value: Optional[Union[int, float]] = None
self.soup_tag = soup_tag

format_ = {
"format_": attrs.get("format"),
Expand All @@ -61,7 +64,7 @@ def __init__(
raise

def to_json(self) -> Dict:
values = deepcopy(self.__dict__)
values = {k: deepcopy(v) for k, v in self.__dict__.items() if k != "soup_tag"}
if isinstance(self.format, ixbrlFormat):
values["format"] = self.format.to_json()
if isinstance(self.context, ixbrlContext):
Expand Down
4 changes: 4 additions & 0 deletions ixbrlparse/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ def _get_nonnumeric(self) -> None:
value=text.strip().replace("\n", "")
if isinstance(text, str)
else "",
soup_tag=s,
)
)
except Exception as e:
Expand All @@ -198,6 +199,7 @@ def _get_numeric(self) -> None:
text=s.text,
context=self.contexts.get(s["contextRef"], s["contextRef"]),
unit=self.units.get(s["unitRef"], s["unitRef"]),
soup_tag=s,
**s.attrs
)
)
Expand Down Expand Up @@ -248,6 +250,7 @@ def _get_numeric(self) -> None:
text=s.text,
context=self.contexts.get(context_ref, context_ref),
unit=self.units.get(unit_ref, unit_ref),
soup_tag=s,
**s.attrs
)
)
Expand Down Expand Up @@ -289,6 +292,7 @@ def _get_nonnumeric(self) -> None:
value=text.strip().replace("\n", "")
if isinstance(text, str)
else "",
soup_tag=s,
)
)

Expand Down
2 changes: 1 addition & 1 deletion ixbrlparse/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.5.3"
__version__ = "0.5.4"
23 changes: 17 additions & 6 deletions tests/test_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from datetime import date

import pytest
from bs4 import BeautifulSoup
from bs4 import BeautifulSoup, Tag

from ixbrlparse import IXBRL
from ixbrlparse.core import (
Expand Down Expand Up @@ -111,15 +111,15 @@ def test_open_xml_str():
"https://xbrl.frc.org.uk/FRS-102/2014-09-01/FRS-102-2014-09-01.xsd",
"https://xbrl.frc.org.uk/FRS-102/2014-09-01/FRS-102-2014-09-01.xsd",
"http://www.companieshouse.gov.uk/ef/xbrl/uk/fr/gaap/ae/2009-06-21/uk-gaap-ae-2009-06-21.xsd",
],
], # type: ignore
[
11,
12,
12,
38,
19,
10,
],
], # type: ignore
),
)
def test_schema(account, schema, namespaces):
Expand Down Expand Up @@ -243,6 +243,7 @@ def test_nonnumeric():
assert n.value == "03456789"
assert isinstance(n.context, ixbrlContext)
value_seen = True
assert isinstance(x.nonnumeric[0].soup_tag, Tag)
assert value_seen


Expand Down Expand Up @@ -287,6 +288,7 @@ def test_numeric():
assert x.numeric[0].value == 52982
assert x.numeric[0].name == "PropertyPlantEquipment"
assert x.numeric[0].schema == "ns5"
assert isinstance(x.nonnumeric[0].soup_tag, Tag)


def test_numeric_xml():
Expand Down Expand Up @@ -315,6 +317,7 @@ def test_numeric_xml():
assert x.numeric[0].value == 1
assert x.numeric[0].name == "CashBankInHand"
assert x.numeric[0].schema == "unknown"
assert isinstance(x.nonnumeric[0].soup_tag, Tag)


def test_exclude():
Expand All @@ -333,9 +336,17 @@ def test_continuation():
value_seen = False
for n in x.nonnumeric:
if n.name == "AccountantsReportOnFinancialStatements":
assert (
n.value
== "This report is made solely to the board of directors of Test Exclude Limited, as a body, in accordance with the terms of our engagement letter dated 18 November 2022. Our work has been undertaken solely to prepare for your approval the financial statements of Test Exclude Limited and state those matters that we have agreed to state to the board of directors of Test Exclude Limited, as a body, in this report in accordance with ICAEW Technical Release 07/16 AAF. To the fullest extent permitted by law, we do not accept or assume responsibility to anyone other than Test Exclude Limited and its board of directors as a body, for our work or for this report."
assert n.value == (
"This report is made solely to the board of directors of Test Exclude "
"Limited, as a body, in accordance with the terms of our engagement "
"letter dated 18 November 2022. Our work has been undertaken solely "
"to prepare for your approval the financial statements of Test Exclude "
"Limited and state those matters that we have agreed to state to the "
"board of directors of Test Exclude Limited, as a body, in this report "
"in accordance with ICAEW Technical Release 07/16 AAF. To the fullest "
"extent permitted by law, we do not accept or assume responsibility "
"to anyone other than Test Exclude Limited and its board of directors "
"as a body, for our work or for this report."
)
value_seen = True

Expand Down

0 comments on commit 7a805df

Please sign in to comment.