Python module: Revised JSON structure and filters (#175)

* update toc rule to follow the pattern of other number markers * start working on to_dict_new to revise the JSON output structure * implement attribute, list, table and milestone handling in node_to_dict_new * fix id_query and ensure bookCode is added * include titles and poetry in JSON output * reduce inner nesting in JSON with a decorator fuction * handle footnotes and cross refs * implement filtering * remove old to_dict code * re-write the to_list function as per the new JSON * update syntax trees in test as per the change in toc rule * add linting for python module on gitactions * fix error in github action script * change the use of filter Enum in CLI * remove unused import
Bridgeconn · Oct 10, 2022 · 28e613a · 28e613a
1 parent 895450f
commit 28e613a
Show file tree

Hide file tree

Showing 8 changed files with 465 additions and 227 deletions.
diff --git a/.github/workflows/lint-on-push.yml b/.github/workflows/lint-on-push.yml
@@ -0,0 +1,35 @@
+name: check-linting
+
+# Controls when the action will run. Triggers the workflow on push or pull request
+# events but only for the master branch
+on: [push, pull_request ]
+
+# A workflow run is made up of one or more jobs that can run sequentially or in parallel
+jobs:
+  # This workflow contains the linting for python module
+  python-build-n-lint:
+    # The type of runner that the job will run on
+    runs-on: ubuntu-latest
+
+    # Steps represent a sequence of tasks that will be executed as part of the job
+    steps:
+      # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
+      - uses: actions/checkout@v2
+      - uses: actions/setup-python@v2
+        with:
+            python-version: '3.10.6' # Version range or exact version of a Python version to use, using SemVer's version range syntax
+            architecture: 'x64' # optional x64 or x86. Defaults to x64 if not specified
+
+      - name: Create Virtual Environment
+        run: python -m venv ENV-dev
+
+      - name: Use VENV
+        run: source ENV-dev/bin/activate
+
+      - name: install packages
+        working-directory: ./python-usfm-parser
+        run: pip install -r dev-requirements.txt
+
+      - name: Run linter
+        working-directory: ./python-usfm-parser
+        run: pylint --extension-pkg-allow-list=lxml src/usfm_grammar/*.py
diff --git a/python-usfm-parser/src/usfm_grammar/__init__.py b/python-usfm-parser/src/usfm_grammar/__init__.py
diff --git a/python-usfm-parser/src/usfm_grammar/__main__.py b/python-usfm-parser/src/usfm_grammar/__main__.py
@@ -14,12 +14,11 @@ def main():
         "Syntax-tree, JSON, CSV, USX etc.')
     arg_parser.add_argument('infile', type=str, help='input usfm file')
     arg_parser.add_argument('--format', type=str, help='output format',
-                            choices=[Format.JSON.value, Format.CSV.value, Format.USX.value,
-                                        Format.MD.value, Format.ST.value],
+                            choices=[itm.value for itm in Format],
                             default=Format.JSON.value)
     arg_parser.add_argument('--filter', type=str, help='the type of contents to be included',
-                            choices=[Filter.SCRIPTURE_BCV.value, Filter.NOTES.value,
-                            Filter.SCRIPTURE_PARAGRAPHS.value, Filter.ALL.value])
+                            choices=[itm.name.lower() for itm in Filter],
+                            action="append")
     arg_parser.add_argument('--csv_col_sep', type=str,
                             help="column separator or delimiter. Only useful with format=table.",
                             default="\t")
@@ -43,19 +42,26 @@ def main():
         print(f"Errors present:\n\t{err_str}")
         sys.exit(1)
 
+    if output_filter is None:
+        updated_filt = None
+    else:
+        updated_filt = []
+        for itm in output_filter:
+            updated_filt.append(Filter[itm.upper()])
+
     match output_format:
         case Format.JSON:
-            dict_output = my_parser.to_dict(filt = output_filter)
+            dict_output = my_parser.to_dict(filt=updated_filt)
             print(json.dumps(dict_output, indent=4, ensure_ascii=False))
         case Format.CSV:
-            table_output = my_parser.to_list(filt = output_filter)
+            table_output = my_parser.to_list(filt = updated_filt)
             print(csv_row_sep.join([csv_col_sep.join(row) for row in table_output]))
         case Format.USX:
-            xmlstr = etree.tostring(my_parser.to_usx(filt=output_filter),
+            xmlstr = etree.tostring(my_parser.to_usx(),
                 encoding='unicode', pretty_print=True)
             print(xmlstr)
         case Format.MD:
-            print(my_parser.to_markdown(filt = output_filter))
+            print(my_parser.to_markdown())
         case Format.ST:
             print(my_parser.to_syntax_tree())
         case _: