Skip to content

Commit

Permalink
Relaxed mode with "ignore_errors" option (#201)
Browse files Browse the repository at this point in the history
* Exlcude one more large file from tests as it gets timeout

* Add ignore_errors option on CLI

* Bump version: 3.0.0-alpha.5 → 3.0.0-alpha.29

* add new gitactions workflow to publish on test.pypi

* try publish on test.pypi attempt #2

* Bump version: 3.0.0-alpha.29 → 3.0.0-alpha.6

* undo changes in workflow made for test.pypi publish

* Python module: Add ignore_errors option in class methods

* Python module: Add ignore_errors in calling class methods from CLI

* JSON/dict format: remove slash(\) from value of closing
  • Loading branch information
kavitharaju authored Dec 15, 2022
1 parent f8d3b49 commit a08b2ae
Show file tree
Hide file tree
Showing 10 changed files with 165 additions and 17 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/check-on-push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,4 +72,4 @@ jobs:
- name: Run tests for parsing errors
working-directory: ./python-usfm-parser
run:
pytest -k "not compare_usx_with_testsuite_samples and not testsuite_usx_with_rnc_grammar and not samples-from-wild" -n auto
pytest -k "not compare_usx_with_testsuite_samples and not testsuite_usx_with_rnc_grammar and not samples-from-wild and not 57-TIT.partial" -n auto
2 changes: 1 addition & 1 deletion .github/workflows/pypi_publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ jobs:
name: artifact
path: python-usfm-parser/dist/

- name: Publish distribution 📦 to Test PyPI
- name: Publish distribution 📦 to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
user: __token__
Expand Down
64 changes: 64 additions & 0 deletions .github/workflows/testpypi_publish.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
name: Test.PyPI Publish

on:
# push: # need to use this temporarly to be able to publish
workflow_dispatch: # works only on default branch

jobs:
build_wheels:
name: Build wheels on ${{ matrix.os }}
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-20.04, windows-2019, macos-11]
# os: [ubuntu-20.04]

steps:
- uses: actions/checkout@v3
- name: Setup node and npm
uses: actions/setup-node@v2
with:
node-version: 14
- name: Build the tree-sitter parser
run: |
cd tree-sitter-usfm3
npm install .
./node_modules/.bin/tree-sitter generate
- name: Build wheels
uses: pypa/cibuildwheel@v2.9.0
env:
CIBW_BEFORE_BUILD: >
python -m pip install tree-sitter==0.20.0 &&
python python-usfm-parser/src/grammar_rebuild.py tree-sitter-usfm3 python-usfm-parser/src/usfm_grammar/my-languages.so
with:
package-dir: python-usfm-parser
output-dir: python-usfm-parser/dist
config-file: "python-usfm-parser/pyproject.toml"

- uses: actions/upload-artifact@v3
with:
path: python-usfm-parser/dist/*.whl

publish_to_pypi:
needs: [build_wheels]
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@master
- name: Set up Python 3.10
uses: actions/setup-python@v3
with:
python-version: "3.10"
- uses: actions/download-artifact@v3
with:
name: artifact
path: python-usfm-parser/dist/

- name: Publish distribution 📦 to Test PyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
user: __token__
password: ${{ secrets.PUBLISH_ON_TEST_PYPI }}
repository_url: https://test.pypi.org/legacy/
packages_dir: python-usfm-parser/dist/
2 changes: 1 addition & 1 deletion python-usfm-parser/.bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 3.0.0-alpha.5
current_version = 3.0.0-alpha.6
commit = True
tag = False
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)\-(?P<release>\w+).(?P<num>\d+)
Expand Down
2 changes: 1 addition & 1 deletion python-usfm-parser/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "usfm-grammar"
version = "3.0.0-alpha.5"
version = "3.0.0-alpha.6"
description = "Python parser for USFM files, based on tree-sitter-usfm3"
readme = "README.md"
authors = [{ name = "BCS Team", email = "joel@bridgeconn.com" }]
Expand Down
2 changes: 1 addition & 1 deletion python-usfm-parser/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ def has_ext_modules(self):

setup(
name="usfm-grammar", # Required
version="3.0.0-alpha.5", # Required
version="3.0.0-alpha.6", # Required
python_requires=">=3.10",
install_requires=["tree-sitter", "lxml"], # Optional
package_data={ # Optional
Expand Down
2 changes: 1 addition & 1 deletion python-usfm-parser/src/usfm_grammar/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@
Format = usfm_parser.Format
USFMParser = usfm_parser.USFMParser

__version__ = "3.0.0-alpha.5"
__version__ = "3.0.0-alpha.6"
13 changes: 8 additions & 5 deletions python-usfm-parser/src/usfm_grammar/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ def main():
arg_parser.add_argument('--csv_row_sep', type=str,
help="row separator or delimiter. Only useful with format=table.",
default="\n")
arg_parser.add_argument('--ignore_errors',
help="to get some output from successfully parsed portions",
action='store_true')

infile = arg_parser.parse_args().infile
output_format = arg_parser.parse_args().format
Expand All @@ -36,7 +39,7 @@ def main():

my_parser = USFMParser(file_content)

if my_parser.errors:
if my_parser.errors and not arg_parser.parse_args().ignore_errors:
err_str = "\n\t".join([":".join(err) for err in my_parser.errors])
print(f"Errors present:\n\t{err_str}")
sys.exit(1)
Expand All @@ -50,23 +53,23 @@ def main():

match output_format:
case Format.JSON:
dict_output = my_parser.to_dict(filters=updated_filt)
dict_output = my_parser.to_dict(filters=updated_filt, ignore_errors=True)
print(json.dumps(dict_output, indent=4, ensure_ascii=False))
case Format.CSV:
table_output = my_parser.to_list(filters = updated_filt)
table_output = my_parser.to_list(filters = updated_filt, ignore_errors=True)
outfile = sys.stdout
writer = csv.writer(outfile,
delimiter=arg_parser.parse_args().csv_col_sep,
lineterminator=arg_parser.parse_args().csv_row_sep)
writer.writerows(table_output)
case Format.USX:
xmlstr = etree.tostring(my_parser.to_usx(),
xmlstr = etree.tostring(my_parser.to_usx(ignore_errors=True),
encoding='unicode', pretty_print=True)
print(xmlstr)
case Format.MD:
print(my_parser.to_markdown())
case Format.ST:
print(my_parser.to_syntax_tree())
print(my_parser.to_syntax_tree(ignore_errors=True))
case _:
raise Exception(f"Un-recognized output format:{output_format}!")

Expand Down
35 changes: 29 additions & 6 deletions python-usfm-parser/src/usfm_grammar/usfm_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -522,7 +522,7 @@ def node_2_dict_generic(node, usfm_bytes, filters): # pylint: disable=R0912
result['attributes'] = attribs
if closing_node is not None:
result['closing'] = usfm_bytes[\
closing_node.start_byte:closing_node.end_byte].decode('utf-8').strip()
closing_node.start_byte:closing_node.end_byte].decode('utf-8').strip().replace("\\","")
return result

@reduce_nesting
Expand Down Expand Up @@ -722,12 +722,22 @@ def __init__(self, usfm_string):
for err in errors]


def to_syntax_tree(self):
def to_syntax_tree(self, ignore_errors=False):
'''gives the syntax tree from class, as a string'''
if not ignore_errors and self.errors:
err_str = "\n\t".join([":".join(err) for err in self.errors])
raise Exception("Errors present:"+\
f'\n\t{err_str}'+\
"\nUse ignore_errors=True, to generate output inspite of errors")
return self.syntax_tree.sexp()

def to_dict(self, filters=None): #pylint: disable=too-many-branches
def to_dict(self, filters=None, ignore_errors=False): #pylint: disable=too-many-branches
'''Converts syntax tree to dictionary/json and selection of desired type of contents'''
if (not ignore_errors) and self.errors:
err_str = "\n\t".join([":".join(err) for err in self.errors])
raise Exception("Errors present:"+\
f'\n\t{err_str}'+\
"\nUse ignore_errors=True, to generate output inspite of errors")
dict_output = {"book":{}}
if filters is None or filters == []:
filters = list(Filter)
Expand Down Expand Up @@ -775,14 +785,20 @@ def to_dict(self, filters=None): #pylint: disable=too-many-branches
raise Exception(message) from exe
return dict_output

def to_list(self, filters=None): # pylint: disable=too-many-branches
def to_list(self, filters=None, ignore_errors=False): # pylint: disable=too-many-branches, too-many-locals
'''uses the toJSON function and converts JSON to CSV
To be re-implemented to work with the flat JSON schema'''
if not ignore_errors and self.errors:
err_str = "\n\t".join([":".join(err) for err in self.errors])
raise Exception("Errors present:"+\
f'\n\t{err_str}'+\
"\nUse ignore_errors=True, to generate output inspite of errors")

if filters is None:
filters = list(Filter)
if Filter.PARAGRAPHS in filters:
filters.remove(Filter.PARAGRAPHS)
scripture_json = self.to_dict(filters)
scripture_json = self.to_dict(filters, ignore_errors=ignore_errors)
table_output = [["Book","Chapter","Verse","Verse-Text","Notes","Milestone","Other"]]
book = scripture_json['book']['bookCode']
verse_num = 0
Expand Down Expand Up @@ -831,8 +847,15 @@ def to_markdown(self):
return "yet to be implemeneted"


def to_usx(self):
def to_usx(self, ignore_errors=False):
'''convert the syntax_tree to the XML format USX'''
if not ignore_errors and self.errors:
err_str = "\n\t".join([":".join(err) for err in self.errors])
raise Exception("Errors present:"+\
f'\n\t{err_str}'+\
"\nUse ignore_errors=True, to generate output inspite of errors")


usx_root = etree.Element("usx")
usx_root.set("version", "3.0")
try:
Expand Down
58 changes: 58 additions & 0 deletions python-usfm-parser/tests/test_parsing.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
'''To test parsing success/errors for USFM/X committee's test suite'''
import pytest
from lxml import etree

from src.usfm_grammar import USFMParser
from tests import all_usfm_files, initialise_parser, is_valid_usfm,\
doubtful_usfms, negative_tests, find_all_markers

Expand Down Expand Up @@ -47,3 +49,59 @@ def test_all_markers_are_in_output(file_path):
if marker in ['qt', 'ts'] or marker.startswith("z"):
marker = "milestone"
assert marker in all_nodes_in_st, marker

USFM_WITH_ERROR = '''
\\id GEN
\\c 1
\\p
\\v 1 correct verse one
\\v 2 correct verse two
\\p
\\v3 wrong verse
\\c 3
\\v 1 verse in chapter without paragraph
\\p
\\v 2 a correct verse following one without para
\\c 4
\\s5
\\p
\\v 1 correct verse three after s5
'''

def test_partial_parsing_with_errors():
'''Test use of ignore_errors flag to obtain some output even when input has errors'''
test_parser = USFMParser(USFM_WITH_ERROR)
assert test_parser.errors

# without ignore_errors flag
def use_API_negative(test_parser, api_str_expression):
'''negative tests to ensure exception is raised'''
threw_error = False
try:
eval(api_str_expression)
except Exception as exe:
assert "Errors present:" in str(exe), api_str_expression
assert "Use ignore_errors=True" in str(exe)
threw_error = True
assert threw_error

use_API_negative(test_parser, 'test_parser.to_dict()')
use_API_negative(test_parser, 'test_parser.to_list()')
use_API_negative(test_parser, 'test_parser.to_usx()')

# with ignore_errors=True
def use_API_positive(test_parser, api_str_expression):
'''positive tests to ensure correct portions are made available in output'''
output = eval(api_str_expression)
if isinstance(output, etree._Element):
str_output = etree.tostring(output).decode('utf-8')
else:
str_output = str(output)
assert "correct verse one" in str_output, api_str_expression
assert "correct verse two" in str_output, api_str_expression
assert "correct verse three after s5" in str_output, api_str_expression

use_API_positive(test_parser, "test_parser.to_dict(ignore_errors=True)")
use_API_positive(test_parser, "test_parser.to_list(ignore_errors=True)")
use_API_positive(test_parser, "test_parser.to_usx(ignore_errors=True)")

0 comments on commit a08b2ae

Please sign in to comment.