Light and percent format for scheme and cpp #61

mwouts · Sep 27, 2018 · 69ec8fd · 69ec8fd
1 parent 8bbfd1e
commit 69ec8fd
Show file tree

Hide file tree

Showing 17 changed files with 3,924 additions and 101 deletions.
diff --git a/HISTORY.rst b/HISTORY.rst
@@ -3,6 +3,16 @@
 Release History
 ---------------
 
+0.7.2 (2018-09-??)
+++++++++++++++++++++++
+
+**Improvements**
+
+- `light` and `percent` formats made available for scheme and cpp notebooks. Adding more formats
+is straightforward - just add a new entry to _SCRIPT_EXTENSIONS in languages.py, a sample notebook
+and a mirror test (#61)
+
+
 0.7.1 (2018-09-24)
 ++++++++++++++++++++++
 

diff --git a/jupytext/cell_reader.py b/jupytext/cell_reader.py
@@ -2,6 +2,7 @@
 
 import re
 from nbformat.v4.nbbase import new_code_cell, new_raw_cell, new_markdown_cell
+from .languages import _SCRIPT_EXTENSIONS
 
 try:
     from sphinx_gallery.notebook import rst2md
@@ -12,10 +13,8 @@
     md_options_to_metadata, rmd_options_to_metadata, \
     double_percent_options_to_metadata
 from .stringparser import StringParser
-from .magics import unescape_magic, is_magic, unescape_code_start
+from .magics import uncomment_magic, is_magic, unescape_code_start
 
-_CODE_OPTION_PY = re.compile(r"^(#|# )\+(\s*){(.*)}\s*$")
-_SIMPLE_START_CODE_PY = re.compile(r"^(#|# )\+(\s*)$")
 _BLANK_LINE = re.compile(r"^\s*$")
 _PY_COMMENT = re.compile(r"^\s*#")
 _PY_INDENTED = re.compile(r"^\s")
@@ -88,6 +87,7 @@ class BaseCellReader(object):
     lines_to_next_cell = 1
 
     start_code_re = None
+    simple_start_code_re = None
     end_code_re = None
 
     # How to make code inactive
@@ -171,9 +171,9 @@ def find_code_cell_end(self, lines):
 
             # Simple code pattern in LightScripts must be preceded with
             # a blank line
-            if i > 0 and self.start_code_re == _CODE_OPTION_PY and \
+            if i > 0 and self.simple_start_code_re and \
                     _BLANK_LINE.match(lines[i - 1]) and \
-                    _SIMPLE_START_CODE_PY.match(line):
+                    self.simple_start_code_re.match(line):
                 return i - 1, i, False
 
             if self.end_code_re:
@@ -307,7 +307,7 @@ def options_to_metadata(self, options):
     def uncomment_code_and_magics(self, lines):
         if self.cell_type == 'code':
             if is_active(self.ext, self.metadata):
-                unescape_magic(lines, self.language or self.default_language)
+                uncomment_magic(lines, self.language or self.default_language)
 
         unescape_code_start(lines, self.ext, self.language or
                             self.default_language)
@@ -325,7 +325,7 @@ def options_to_metadata(self, options):
     def uncomment_code_and_magics(self, lines):
         if self.cell_type == 'code':
             if is_active(self.ext, self.metadata):
-                unescape_magic(lines, self.language or self.default_language)
+                uncomment_magic(lines, self.language or self.default_language)
             else:
                 lines = uncomment(lines)
 
@@ -371,21 +371,25 @@ class LightScriptCellReader(ScriptCellReader):
     """Read notebook cells from plain Python or Julia files. Cells
     are identified by line breaks, unless they start with an
     explicit marker '# +' """
-    comment = '#'
-    start_code_re = _CODE_OPTION_PY
 
     def __init__(self, ext):
         super(LightScriptCellReader, self).__init__(ext)
-        self.default_language = 'julia' if ext == '.jl' else 'python'
+        script = _SCRIPT_EXTENSIONS[ext]
+        self.default_language = script['language']
+        self.comment = script['comment']
+        self.start_code_re = re.compile("^({0}|{0} )".format(self.comment) +
+                                        r"\+(\s*){(.*)}\s*$")
+        self.simple_start_code_re = re.compile(
+            r"^({0}|{0} )\+(\s*)$".format(self.comment))
 
     def options_to_metadata(self, options):
         return json_options_to_metadata(options)
 
     def metadata_and_language_from_option_line(self, line):
-        if _CODE_OPTION_PY.match(line):
+        if self.start_code_re.match(line):
             self.metadata = self.options_to_metadata(
-                _CODE_OPTION_PY.match(line).group(3))
-        elif _SIMPLE_START_CODE_PY.match(line):
+                self.start_code_re.match(line).group(3))
+        elif self.simple_start_code_re.match(line):
             self.metadata = {}
 
         if self.metadata is not None:
@@ -413,9 +417,12 @@ def find_cell_end(self, lines):
 class DoublePercentScriptCellReader(ScriptCellReader):
     """Read notebook cells from Hydrogen/Spyder/VScode scripts (#59)"""
 
-    comment = '#'
-    default_language = 'python'
-    start_code_re = re.compile(r"^#\s+%%(.*)$")
+    def __init__(self, ext):
+        ScriptCellReader.__init__(self, ext)
+        script = _SCRIPT_EXTENSIONS[ext]
+        self.default_language = script['language']
+        self.comment = script['comment']
+        self.start_code_re = re.compile(r"^{}\s+%%(.*)$".format(self.comment))
 
     def options_to_metadata(self, options):
         return None, double_percent_options_to_metadata(options)
@@ -471,7 +478,7 @@ class SphinxGalleryScriptCellReader(ScriptCellReader):
 
     comment = '#'
     default_language = 'python'
-    twenty_hash = re.compile('^#( |)#{19,}\s*$')
+    twenty_hash = re.compile(r'^#( |)#{19,}\s*$')
     markdown_marker = None
     rst2md = False
 

diff --git a/jupytext/cell_to_text.py b/jupytext/cell_to_text.py
@@ -6,8 +6,9 @@
 from .cell_metadata import filter_metadata, is_active, \
     metadata_to_rmd_options, metadata_to_json_options, \
     metadata_to_double_percent_options
-from .magics import escape_magic, escape_code_start
+from .magics import comment_magic, escape_code_start
 from .cell_reader import LightScriptCellReader
+from .languages import _SCRIPT_EXTENSIONS
 
 
 def cell_source(cell):
@@ -20,7 +21,7 @@ def cell_source(cell):
     return source.splitlines()
 
 
-def comment(lines, prefix):
+def comment_lines(lines, prefix):
     """Return commented lines"""
     if not prefix:
         return lines
@@ -29,7 +30,6 @@ def comment(lines, prefix):
 
 class BaseCellExporter(object):
     """A class that represent a notebook cell as text"""
-    prefix = None
 
     def __init__(self, cell, default_language, ext):
         self.ext = ext
@@ -38,6 +38,7 @@ def __init__(self, cell, default_language, ext):
         self.metadata = filter_metadata(cell.metadata)
         self.language = cell_language(self.source) or default_language
         self.default_language = default_language
+        self.comment = _SCRIPT_EXTENSIONS.get(ext, {}).get('comment', '#')
 
         # how many blank lines before next cell
         self.lines_to_next_cell = cell.metadata.get('lines_to_next_cell', 1)
@@ -67,13 +68,13 @@ def cell_to_text(self):
             return self.code_to_text()
 
         source = copy(self.source)
-        if not self.prefix:
+        if not self.comment:
             escape_code_start(source, self.ext, None)
         return self.markdown_to_text(source)
 
     def markdown_to_text(self, source):
         """Escape the given source, for a markdown cell"""
-        return comment(source, self.prefix)
+        return comment_lines(source, self.comment)
 
     def code_to_text(self):
         """Return the text representation of this cell as a code cell"""
@@ -88,7 +89,10 @@ def simplify_code_markers(self, text, next_text, lines):
 
 class MarkdownCellExporter(BaseCellExporter):
     """A class that represent a notebook cell as Markdown"""
-    prefix = ''
+
+    def __init__(self, cell, default_language, ext):
+        BaseCellExporter.__init__(self, cell, default_language, ext)
+        self.comment = ''
 
     def code_to_text(self):
         """Return the text representation of a code cell"""
@@ -106,7 +110,10 @@ def code_to_text(self):
 
 class RMarkdownCellExporter(BaseCellExporter):
     """A class that represent a notebook cell as Markdown"""
-    prefix = ''
+
+    def __init__(self, cell, default_language, ext):
+        BaseCellExporter.__init__(self, cell, default_language, ext)
+        self.comment = ''
 
     def code_to_text(self):
         """Return the text representation of a code cell"""
@@ -115,7 +122,7 @@ def code_to_text(self):
         escape_code_start(source, self.ext, self.language)
 
         if active:
-            escape_magic(source, self.language)
+            comment_magic(source, self.language)
 
         lines = []
         if not is_active('Rmd', self.metadata):
@@ -127,12 +134,12 @@ def code_to_text(self):
         return lines
 
 
-def endofcell_marker(source):
+def endofcell_marker(source, comment):
     """Issues #31 #38:  does the cell contain a blank line? In that case
     we add an end-of-cell marker"""
     endofcell = '-'
     while True:
-        endofcell_re = re.compile(r'^#( )' + endofcell + r'\s*$')
+        endofcell_re = re.compile(r'^{}( )'.format(comment) + endofcell + r'\s*$')
         if list(filter(endofcell_re.match, source)):
             endofcell = endofcell + '-'
         else:
@@ -141,13 +148,16 @@ def endofcell_marker(source):
 
 class LightScriptCellExporter(BaseCellExporter):
     """A class that represent a notebook cell as a Python or Julia script"""
-    prefix = '#'
+
+    def __init__(self, cell, default_language, ext):
+        BaseCellExporter.__init__(self, cell, default_language, ext)
+        self.comment = _SCRIPT_EXTENSIONS[ext]['comment']
 
     def is_code(self):
         # Treat markdown cells with metadata as code cells (#66)
         if self.cell_type == 'markdown' and self.metadata:
             self.metadata['cell_type'] = self.cell_type
-            self.source = comment(self.source, self.prefix)
+            self.source = comment_lines(self.source, self.comment)
             return True
         return super(LightScriptCellExporter, self).is_code()
 
@@ -164,12 +174,12 @@ def code_to_text(self):
         escape_code_start(source, self.ext, self.language)
 
         if active:
-            escape_magic(source, self.language)
+            comment_magic(source, self.language)
         else:
-            source = ['# ' + line if line else '#' for line in source]
+            source = [self.comment + ' ' + line if line else self.comment for line in source]
 
         if self.explicit_start_marker(source):
-            self.metadata['endofcell'] = endofcell_marker(source)
+            self.metadata['endofcell'] = endofcell_marker(source, self.comment)
 
         if not self.metadata:
             return source
@@ -179,17 +189,17 @@ def code_to_text(self):
         if endofcell == '-':
             del self.metadata['endofcell']
         options = metadata_to_json_options(self.metadata)
-        lines.append('# + {}'.format(options))
+        lines.append(self.comment + ' + {}'.format(options))
         lines.extend(source)
-        lines.append('# {}'.format(endofcell))
+        lines.append(self.comment + ' {}'.format(endofcell))
         return lines
 
     def explicit_start_marker(self, source):
         """Does the python representation of this cell requires an explicit
         start of cell marker?"""
         if self.metadata:
             return True
-        if all([line.startswith('#') for line in self.source]):
+        if all([line.startswith(self.comment) for line in self.source]):
             return True
         if LightScriptCellReader(self.ext).read(source)[1] < len(source):
             return True
@@ -199,24 +209,27 @@ def explicit_start_marker(self, source):
     def simplify_code_markers(self, text, next_text, lines):
         """Simplify cell marker when previous line is blank, remove end
         of cell marker when next cell has an explicit marker"""
-        if text[0] == '# + {}' and (not lines or not lines[-1]):
-            text[0] = '# +'
+        if text[0] == '{0} + {{}}'.format(self.comment) and (not lines or not lines[-1]):
+            text[0] = self.comment + ' +'
 
         # remove end of cell marker when redundant
         # with next explicit marker
-        if self.is_code() and text[-1] == '# -':
+        if self.is_code() and text[-1] == self.comment + ' -':
             if self.lines_to_end_of_cell_marker:
                 text = text[:-1] + \
-                       [''] * self.lines_to_end_of_cell_marker + ['# -']
-            elif not next_text or next_text[0].startswith('# + {'):
+                       [''] * self.lines_to_end_of_cell_marker + [self.comment + ' -']
+            elif not next_text or next_text[0].startswith(self.comment + ' + {'):
                 text = text[:-1]
 
         return text
 
 
 class RScriptCellExporter(BaseCellExporter):
     """A class that can represent a notebook cell as a R script"""
-    prefix = "#'"
+
+    def __init__(self, cell, default_language, ext):
+        BaseCellExporter.__init__(self, cell, default_language, ext)
+        self.comment = "#'"
 
     def code_to_text(self):
         """Return the text representation of a code cell"""
@@ -230,7 +243,7 @@ def code_to_text(self):
         escape_code_start(source, self.ext, self.language)
 
         if active:
-            escape_magic(source, self.language)
+            comment_magic(source, self.language)
 
         if not active:
             source = ['# ' + line if line else '#' for line in source]
@@ -248,7 +261,10 @@ def code_to_text(self):
 class DoublePercentCellExporter(BaseCellExporter):
     """A class that can represent a notebook cell as an
     Hydrogen/Spyder/VScode script (#59)"""
-    prefix = '#'
+
+    def __init__(self, cell, default_language, ext):
+        BaseCellExporter.__init__(self, cell, default_language, ext)
+        self.comment = _SCRIPT_EXTENSIONS[ext]['comment']
 
     def code_to_text(self):
         """Not used"""
@@ -265,22 +281,26 @@ def cell_to_text(self):
 
         options = metadata_to_double_percent_options(self.metadata)
         if options.startswith('%') or not options:
-            lines = ['# %%' + options]
+            lines = [self.comment + ' %%' + options]
         else:
-            lines = ['# %% ' + options]
+            lines = [self.comment + ' %% ' + options]
 
         if self.cell_type == 'code':
             return lines + self.source
 
-        return lines + comment(self.source, self.prefix)
+        return lines + comment_lines(self.source, self.comment)
 
 
 class SphinxGalleryCellExporter(BaseCellExporter):
     """A class that can represent a notebook cell as a
     Sphinx Gallery script (#80)"""
-    prefix = '#'
+
     default_cell_marker = '#' * 79
 
+    def __init__(self, cell, default_language, ext):
+        BaseCellExporter.__init__(self, cell, default_language, ext)
+        self.comment = '#'
+
     def code_to_text(self):
         """Not used"""
         pass
@@ -304,5 +324,4 @@ def cell_to_text(self):
             return [cell_marker] + self.source + [cell_marker]
 
         return [cell_marker if cell_marker.startswith('#' * 20)
-                else self.default_cell_marker] + comment(self.source,
-                                                         self.prefix)
+                else self.default_cell_marker] + comment_lines(self.source, self.comment)