cdli-gh · rillian · Jun 20, 2019 · Jun 10, 2019 · Jun 10, 2019 · Jun 10, 2019
diff --git a/.travis.yml b/.travis.yml
@@ -38,13 +38,6 @@ before_install:
           jython -c "print ''";
           jython -c "import sys; print sys.version"
         fi
-        if [[ $TRAVIS_OS_NAME == 'osx' ]]; then
-          brew update
-          brew upgrade
-          brew upgrade python
-          brew install python3
-          python3 --version
-        fi
 
 install:
     - |
@@ -55,7 +48,8 @@ install:
         fi
         $PIP install wheel
         $PIP install setuptools
-        $PIP install ply pep8 mako
+        $PIP install ply mako
+        $PIP install pycodestyle
         if [ "$MYPYTHON" != "jython" ]; then
           $PIP install --upgrade pytest pytest-cov codecov
         fi
@@ -67,12 +61,12 @@ script:
       $MYPYTHON -c "from pyoracc import _generate_parsetab; _generate_parsetab()"
       echo "Running tests"
       if [ "$MYPYTHON" == "jython" ]; then
-        py.test
+        pytest
       else
         pytest --cov=pyoracc
       fi
 
-  - pep8 --exclude=parsetab.py .
+  - pycodestyle --exclude=parsetab.py
 
 after_success:
   - |

diff --git a/pyoracc/atf/common/atflex.py b/pyoracc/atf/common/atflex.py
@@ -69,16 +69,16 @@ def resolve_keyword(self, value, source, fallback=None, extra=None):
 
     states = AtfLexicon.STATES
 
-    t_AMPERSAND = r'\&'
-    t_HASH = r'\#'
-    t_EXCLAIM = r'\!'
+    t_AMPERSAND = r'&'
+    t_HASH = r'#'
+    t_EXCLAIM = r'!'
     t_QUERY = r'\?'
     t_STAR = r'\*'
     t_DOLLAR = r'\$'
-    t_MINUS = r'\-'
-    t_FROM = r'\<\<'
-    t_TO = r'\>\>'
-    t_COMMA = r'\,'
+    t_MINUS = r'-'
+    t_FROM = r'<<'
+    t_TO = r'>>'
+    t_COMMA = r','
     t_PARBAR = r'\|\|'
 
     t_INITIAL_transctrl_PARENTHETICALID = r'\([^\n\r]*\)'
@@ -88,22 +88,22 @@ def t_INITIAL_transctrl_WHITESPACE(self, t):
         # NO TOKEN
 
     def t_MULTILINGUAL(self, t):
-        r'\=\='
+        r'=='
         t.lexer.push_state("text")
         return t
 
     def t_EQUALBRACE(self, t):
-        r'^\=\{'
+        r'^=\{'
         t.lexer.push_state('text')
         return t
 
     def t_EQUALS(self, t):
-        r'\='
+        r'='
         t.lexer.push_state('flagged')
         return t
 
     def t_INITIAL_parallel_labeled_COMMENT(self, t):
-        r'^\#+(?![a-zA-Z]+\:)'
+        r'^#+(?![a-zA-Z]+:)'
         # Negative lookahead to veto protocols as comments
         t.lexer.push_state('absorb')
         return t
@@ -121,7 +121,7 @@ def t_NEWLINE(self, t):
         return t
 
     def t_INITIAL_parallel_labeled_ATID(self, t):
-        r'^\@[a-zA-Z][a-zA-Z0-9\[\]]*\+?'
+        r'^@[a-zA-Z][a-zA-Z0-9\[\]]*\+?'
         t.value = t.value[1:]
         t.lexpos += 1
         t.type = self.resolve_keyword(t.value,
@@ -171,13 +171,13 @@ def t_INITIAL_parallel_labeled_ATID(self, t):
         return t
 
     def t_labeled_OPENR(self, t):
-        r'\@\('
+        r'@\('
         t.lexer.push_state("para")
         t.lexer.push_state("transctrl")
         return t
 
     def t_INITIAL_parallel_labeled_HASHID(self, t):
-        r'\#[a-zA-Z][a-zA-Z0-9\[\]]+\:'
+        r'#[a-zA-Z][a-zA-Z0-9\[\]]+:'
         # Note that \:? absorbs a trailing colon in protocol keywords
         t.value = t.value[1:-1]
         t.lexpos += 1
@@ -213,19 +213,19 @@ def t_INITIAL_parallel_labeled_HASHID(self, t):
         return t
 
     def t_LINELABEL(self, t):
-        r'^[^\ \t\n]*\.'
+        r'^[^ \t\n]*\.'
         t.value = t.value[:-1]
         t.lexer.push_state('text')
         return t
 
     def t_score_SCORELABEL(self, t):
-        r'^[^.:\ \t\#][^.:\ \t]*\:'
+        r'^[^.: \t#][^.: \t]*:'
         t.value = t.value[:-1]
         t.lexer.push_state('text')
         return t
 
     def t_ID(self, t):
-        u'[a-zA-Z0-9][a-zA-Z\'\u2019\xb4\/\.0-9\:\-\[\]_\u2080-\u2089]*'
+        r'[a-zA-Z0-9][a-zA-Z0-9/.:_\-\[\]' u'\'\u2019\xb4\u2080-\u2089]*'
         t.value = t.value.replace(u'\u2019', "'")
         t.value = t.value.replace(u'\xb4', "'")
         t.type = self.resolve_keyword(t.value,
@@ -271,7 +271,7 @@ def t_flagged_text_lemmatize_transctrl_nonequals_absorb_NEWLINE(self, t):
     # Unicode 2032  is PRIME
     # All of these could be used as prime
     def t_transctrl_ID(self, t):
-        u'[a-zA-Z0-9][a-zA-Z\'\u2019\u2032\u02CA\xb4\/\.0-9\:\-\[\]_' \
+        r'[a-zA-Z0-9][a-zA-Z0-9/.:_\-\[\]' u'\'\u2019\u2032\u02CA\xb4' \
             u'\u2080-\u2089]*'
         t.value = t.value.replace(u'\u2019', "'")
         t.value = t.value.replace(u'\u2032', "'")
@@ -306,7 +306,7 @@ def t_transctrl_ID(self, t):
     t_parallel_QUERY = r'\?'
 
     def t_parallel_LINELABEL(self, t):
-        r'^([^\.\ \t]*)\.[\ \t]*'
+        r'^([^. \t]*)\.[ \t]*'
         t.value = t.value.strip(" \t.")
         return t
 
@@ -315,7 +315,7 @@ def t_parallel_labeled_DOLLAR(self, t):
         t.lexer.push_state("absorb")
         return t
 
-    t_transctrl_MINUS = r'\-\ '
+    t_transctrl_MINUS = r'- '
 
     def t_transctrl_CLOSER(self, t):
         r'\)'
@@ -347,12 +347,12 @@ def t_labeled_NEWLINE(self, t):
     # Flag characters (#! etc ) don't apply in translations
     # But reference anchors ^1^ etc do.
     # lines beginning with a space are continuations
-    white = r'[\ \t]*'
+    white = r'[ \t]*'
     # translation_regex1 and translation_regex2 are identical appart from the
     # fact that the first character may not be a ?
     # We are looking for a string that does not start with ? it may include
     # newlines if they are followed by a whitespace.
-    translation_regex1 = r'([^\?\^\n\r]|([\n\r](?=[ \t])))'
+    translation_regex1 = r'([^?\^\n\r]|([\n\r](?=[ \t])))'
     translation_regex2 = r'([^\^\n\r]|([\n\r](?=[ \t])))*'
     translation_regex = white + translation_regex1 + translation_regex2 + white
 
@@ -366,7 +366,7 @@ def t_parallel_interlinear_ID(self, t):
         return t
 
     def t_parallel_labeled_AMPERSAND(self, t):
-        r'\&'
+        r'&'
         # New document, so leave translation state
         t.lexer.pop_state()
         return t
@@ -383,9 +383,9 @@ def t_parallel_labeled_AMPERSAND(self, t):
     # Used for states where only flag# characters! and ^1^ references
     # Are separately tokenised
 
-    nonflagnonwhite = r'[^\ \t\#\!\^\*\?\n\r\=]'
-    internalonly = r'[^\n\^\r\=]'
-    nonflag = r'[^\ \t\#\!\^\*\?\n\r\=]'
+    nonflagnonwhite = r'[^ \t#!\^*?\n\r=]'
+    internalonly = r'[^\n\^\r=]'
+    nonflag = r'[^ \t#!\^*?\n\r=]'
     many_int_then_nonflag = '(' + internalonly + '*' + nonflag + '+' + ')'
     many_nonflag = nonflag + '*'
     intern_or_nonflg = '(' + many_int_then_nonflag + '|' + many_nonflag + ')'
@@ -399,17 +399,17 @@ def t_flagged_ID(self, t):
         t.value = t.value.strip()
         return t
 
-    t_flagged_HASH = r'\#'
-    t_flagged_EXCLAIM = r'\!'
+    t_flagged_HASH = r'#'
+    t_flagged_EXCLAIM = r'!'
     t_flagged_QUERY = r'\?'
     t_flagged_STAR = r'\*'
-    t_flagged_parallel_para_HAT = r'[\ \t]*\^[\ \t]*'
-    t_flagged_EQUALS = r'\='
+    t_flagged_parallel_para_HAT = r'[ \t]*\^[ \t]*'
+    t_flagged_EQUALS = r'='
     # --- Rules for paragaph state----------------------------------
     # Free text, ended by double new line
 
     terminates_para = \
-        "(\#|\@[^i][^\{]|\&|\Z|(^[0-9]+[\'\u2019\u2032\u02CA\xb4]?\.))"
+        r'(#|@[^i][^{]|&|\Z|(^[0-9]+' u'[\'\u2019\u2032\u02CA\xb4]\\.))'
 
     @lex.TOKEN(r'([^\^\n\r]|(\r?\n(?!\s*\r?\n)(?!' +
                terminates_para + ')))+')
@@ -441,11 +441,11 @@ def t_para_MAGICNEWLINE(self, t):
     # --- RULES FOR THE nonequals STATE -----
     # Absorb everything except an equals
     def t_nonequals_ID(self, t):
-        r'[^\=\n\r]+'
+        r'[^=\n\r]+'
         t.value = t.value.strip()
         return t
 
-    t_nonequals_EQUALS = r'\='
+    t_nonequals_EQUALS = r'='
 
     # --- RULES FOR THE absorb STATE -----
     # Absorb everything
@@ -455,15 +455,15 @@ def t_absorb_ID(self, t):
         return t
 
     # --- RULES FOR THE text STATE ----
-    t_text_ID = r'[^\ \t \n\r]+'
+    t_text_ID = r'[^ \t\n\r]+'
 
     def t_text_SPACE(self, t):
-        r'[\ \t]'
+        r'[ \t]'
         # No token generated
 
     # --- RULES FOR THE lemmatize STATE
-    t_lemmatize_ID = r'[^\;\n\r]+'
-    t_lemmatize_SEMICOLON = r'\;[\ \t]*'
+    t_lemmatize_ID = r'[^;\n\r]+'
+    t_lemmatize_SEMICOLON = r';[ \t]*'
 
     # Error handling rule
     def t_ANY_error(self, t):

diff --git a/pyoracc/wrapper/cli.py b/pyoracc/wrapper/cli.py
@@ -10,11 +10,14 @@
 from pyoracc.atf.common.atffile import check_atf
 
 
-def check_atf_message((segpathname, atftype, verbose)):
-    click.echo('\n Info: Parsing {0}.'.format(segpathname))
+def check_atf_message(args):
+    segpathname, atftype, verbose = args
+    if verbose:
+        click.echo('\n Info: Parsing {0}.'.format(segpathname))
     try:
         check_atf(segpathname, atftype, verbose)
-        click.echo('Info: Correctly parsed {0}.'.format(segpathname))
+        if verbose:
+            click.echo('Info: Correctly parsed {0}.'.format(segpathname))
     except (SyntaxError, IndexError, AttributeError,
             UnicodeDecodeError) as e:
         click.echo("Info: Failed with message: {0} in {1}"
@@ -36,15 +39,18 @@ def check_and_process(pathname, atftype, whole, verbose=False):
                 if verbose:
                     click.echo('Info: Segmented into {0}.'.format(outfolder))
 
-                files = map(lambda f: os.path.join(outfolder, f), os.listdir(outfolder))
-                count_files = len(files)
+                files = map(lambda f: os.path.join(outfolder, f),
+                            os.listdir(outfolder))
+                count_files = len(list(files))
                 atftypelist = [atftype]*count_files
                 verboselist = [verbose]*count_files
-                pool.map(check_atf_message, zip(files, atftypelist, verboselist))
+                pool.map(check_atf_message,
+                         zip(files, atftypelist, verboselist))
                 pool.close()
             else:
                 check_atf_message((pathname, atftype, verbose))
-            click.echo('Info: Finished parsing {0}.'.format(pathname))
+            if verbose:
+                click.echo('Info: Finished parsing {0}.'.format(pathname))
             return 1
         except (SyntaxError, IndexError, AttributeError,
                 UnicodeDecodeError) as e:
@@ -62,7 +68,7 @@ def check_and_process(pathname, atftype, whole, verbose=False):
               prompt=True, required=True,
               help='Input the atf file type.')
 @click.option('--whole', '-w', default=False, required=False, is_flag=True,
-              help='Disables the segmentation of the atf file and run as a whole.')
+              help='Disable segmentation of the atf file and run as a whole.')
 @click.option('--verbose', '-v', default=False, required=False, is_flag=True,
               help='Enables verbose mode.')
 @click.version_option()
@@ -73,8 +79,8 @@ def main(input_path, atf_type, whole, verbose):
         failures = 0
         successes = 0
         with click.progressbar(os.listdir(input_path),
-                               label='Info: Checking the files') as bar:
-            for index, f in enumerate(bar):
+                               label='Info: Checking the files') as entries:
+            for f in entries:
                 pathname = os.path.join(input_path, f)
                 try:
                     check_and_process(pathname, atf_type, whole, verbose)
@@ -86,12 +92,21 @@ def main(input_path, atf_type, whole, verbose):
                     click.echo("Info: Failed with message: {0} in {1}"
                                .format(e, pathname))
                 finally:
-                    try:
-                        click.echo("Failed with {0} out of {1} ({2}%)"
-                                   .format(failures, failures + successes, failures * 100.0 / (failures + successes)))
-                    except ZeroDivisionError:
+                    total = failures + successes
+                    if not total:
                         click.echo("Empty files to process")
+                    elif failures:
+                        click.echo("Failed with {0} out of {1} ({2}%)"
+                                   .format(failures,
+                                           total,
+                                           failures * 100.0 / total))
+                    else:
+                        click.echo("All {0} passed!".format(successes))
     else:
         check_and_process(input_path, atf_type, whole, verbose)
     tsend = time.time()
     click.echo("Total time taken: {0} minutes".format((tsend-tsbegin)/60.0))
+
+
+if __name__ == '__main__':
+    main()
diff --git a/pyoracc/wrapper/segment.py b/pyoracc/wrapper/segment.py
@@ -6,14 +6,16 @@
 
 
 ts = time.time()
-OUTPUT_FOLDER = 'segment'+str(ts)
+OUTPUT_FOLDER = '-segment-'+str(ts)
 
 
 class Segmentor:
     def __init__(self, inputFile, verbose):
         self.inputFileName = inputFile
-        self.outfolder = os.path.join(os.path.dirname(self.inputFileName), "..",
-                                      os.path.basename(self.inputFileName)+OUTPUT_FOLDER)
+        folderName = os.path.basename(self.inputFileName) + OUTPUT_FOLDER
+        self.outfolder = os.path.join(os.path.dirname(self.inputFileName),
+                                      "..",
+                                      folderName)
         self.verbose = verbose
         self.__reset__()