Skip to content

Commit

Permalink
integrate c2c_markdown into v6_api (#1043)
Browse files Browse the repository at this point in the history
* integrate c2c_markdown into v6_api

* update bleach version
  • Loading branch information
lbesson authored Apr 19, 2021
1 parent 2430ee2 commit eb40019
Show file tree
Hide file tree
Showing 236 changed files with 2,405 additions and 4 deletions.
25 changes: 25 additions & 0 deletions c2corg_api/markdown/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Parsing the custom formating syntax of camptocamp.org

## Syntax

Camptocamp.org markdown to format the documents text attributes. It uses base features of [Python-Markdown](https://github.com/waylan/Python-Markdown).

Upon these features, other custom tags are added:

* LTag `L# | 6a | tremendous pitch`
* Emojis `:smile:`
* images `[img=123]Legend[/img]`
* toc `[toc]`
* alerts `!!!! This is an alert banner`
* wikilinks `[[routes/123|Walker ridge]]`
* custom headers `## Approach # 10 mn`
* ptag (hard new line) `[p]`
* video `[video]https://youtube.com/123[/video]`

## Sanitizer

Output is cleaned from any XSS injection using [Mozilla Bleach](https://github.com/mozilla/bleach)

## Rialability

This parser has been tested and fuzzed (~100,000,000 tests). Issues have also been found in python markdown and bleach: [1](https://github.com/mozilla/bleach/issues/352), [2](https://github.com/Python-Markdown/markdown/issues/643), [3](https://github.com/Python-Markdown/markdown/issues/640) and [4](https://github.com/Python-Markdown/markdown/issues/639) :sunglasses:.
184 changes: 184 additions & 0 deletions c2corg_api/markdown/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
import markdown
import bleach
import binascii
import os
from threading import RLock

from c2corg_api.markdown.wikilinks import C2CWikiLinkExtension
from c2corg_api.markdown.img import C2CImageExtension
from c2corg_api.markdown.video import C2CVideoExtension
from c2corg_api.markdown.ltag import C2CLTagExtension
from c2corg_api.markdown.header import C2CHeaderExtension
from c2corg_api.markdown.ptag import C2CPTagExtension
from c2corg_api.markdown.alerts import AlertExtension
from c2corg_api.markdown.toc import C2CTocExtension
from c2corg_api.markdown.emojis import C2CEmojiExtension
from c2corg_api.markdown.nbsp import C2CNbspExtension
from markdown.extensions.nl2br import Nl2BrExtension


def _get_secret():
return binascii.hexlify(os.urandom(32)).decode('ascii')


_PARSER_EXCEPTION_MESSAGE = """
<div c2c:role="danger" style="font-weight:bold">
Parser error, please send a mail to
<a href="mailto:dev@camptocamp.org">dev@camptocamp.org</a>
or post a message on
<a href="https://forum.camptocamp.org/c/site-et-association/v6-suggestions-bugs-et-problemes">
forum</a>.
</div>
""" # noqa

# RLock because this lock can be released
# only by the thread who acquires it.
_parser_lock = RLock()

_markdown_parser = None
_cleaner = None
_iframe_secret_tag = "iframe_" + _get_secret()

"""
_***_secret_tag is used as a private key to replace critical HTML node and
attributes. The key point is this : the parser will use them. bleach will
remove all critical nodes. Then, a very end parser replace secret_tag by good
HTML node/attribute
PEP 506 :
os.urandom is the safe way to generate private data, where random module only
generate random data without entropy. Hexlify() and ascii() convert it to
lower case string. Once V6_ui will be into python 3.6 or higher, we will use
secrets module.
How to hack C2C ? if you want to inject an iframe, you will need to know the
value of _iframe_secret_tag present into server memory.
"""


def _get_cleaner():
global _cleaner

if not _cleaner:
allowed_tags = bleach.ALLOWED_TAGS + [
# headers
"h1", "h2", "h3", "h4", "h5", "h6",

# blocks
"div", "p", "pre", "hr", "center",

# inline nodes
"span", "br", "sub", "sup", "s", "del", "ins", "small",

# images
"figure", "img", "figcaption",

_iframe_secret_tag,

# tables
"table", "tr", "td", "th", "tbody"
]

allowed_attributes = dict(bleach.ALLOWED_ATTRIBUTES)
allowed_extra_attributes = {
"a": [
"c2c:role",
"c2c:document-type",
"c2c:document-id",
"c2c:lang",
"c2c:slug",
"c2c:anchor"
],
"h1": ["id", "c2c:role"],
"h2": ["id", "c2c:role"],
"h3": ["id", "c2c:role"],
"h4": ["id", "c2c:role"],
"h5": ["id", "c2c:role"],
"h6": ["id", "c2c:role"],
"table": ["c2c:role"],
"div": ["class", "style", "c2c:role"],
"td": ["colspan"],
"span": ["class", "translate", "id", "c2c:role"],
_iframe_secret_tag: ["src"],
"figure": ["c2c:position", "c2c:role", "c2c:size"],
"img": [
"alt",
"c2c:document-id",
"c2c:role",
"c2c:size",
"c2c:url-proxy",
"c2c:svg-name",
"c2c:emoji-db"
],
}

for key in allowed_extra_attributes:
if key not in allowed_attributes:
allowed_attributes[key] = []

allowed_attributes[key] += allowed_extra_attributes[key]

_cleaner = bleach.Cleaner(tags=allowed_tags,
attributes=allowed_attributes,
styles=bleach.ALLOWED_STYLES + ["clear"],
protocols=bleach.ALLOWED_PROTOCOLS,
strip=False,
strip_comments=True)

return _cleaner


def _get_markdown_parser():
global _markdown_parser
if not _markdown_parser:
extensions = [
C2CWikiLinkExtension(),
C2CImageExtension(),
Nl2BrExtension(),
C2CTocExtension(marker='[toc]', baselevel=2),
C2CVideoExtension(iframe_secret_tag=_iframe_secret_tag),
C2CLTagExtension(),
C2CHeaderExtension(),
C2CPTagExtension(),
AlertExtension(),
C2CEmojiExtension(),
C2CNbspExtension(),
]
_markdown_parser = markdown.Markdown(output_format='xhtml5',
extensions=extensions,
enable_attributes=False)
return _markdown_parser


def parse_code(text):
"""
Get markdown, and returns HTML.
This function is thread-safe
"""

# we need parsing to be thread safe because
# L numbering, and Markdown() has internal global variables

# for explanation about Lock context manager usage
# see https://docs.python.org/3/library/threading.html
# on paragraph 17.1.10 (with statement)
with _parser_lock:
parser = _get_markdown_parser()
cleaner = _get_cleaner()

# reset parser state. Otherwise, internals parser cache grows
# indefinitely, and performance decreases over time
parser.reset()

try:
text = parser.convert(text)

# we keep clean function into thread safe part,
# because we are not sure of this function
text = cleaner.clean(text=text)
except: # noqa
text = _PARSER_EXCEPTION_MESSAGE

text = text.replace(_iframe_secret_tag, "iframe")

return text
73 changes: 73 additions & 0 deletions c2corg_api/markdown/alerts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
from markdown.extensions import Extension
from markdown.blockprocessors import BlockProcessor
from markdown import util
import re


class AlertProcessor(BlockProcessor):
RE = re.compile(r'(^|\n)[ ]{0,3}(!{2,4})(([^!]|$).*)')

roles = {
"!!": "info",
"!!!": "warning",
"!!!!": "danger",
}

def test(self, parent, block):
return bool(self.RE.search(block))

def run(self, parent, blocks):
block = blocks.pop(0)
m = self.RE.search(block)
level = m.group(2)
tester = re.compile("^[ ]{0,3}" + level + "([^!]|$)")

before = block[:m.start()] # Lines before blockquote
# Pass lines before alert banner
self.parser.parseBlocks(parent, [before])

after = block[m.start():].split('\n')
if len(after[0]) == 0:
after.pop(0)

block = []

# get all lines starting with the same prefix
while len(after):
is_same_level = bool(tester.search(after[0]))
if is_same_level:
block.append(after.pop(0))
else:
break

# Remove `!!!` from begining of each line.
block = '\n'.join([self.clean(line) for line in block])

quote = util.etree.SubElement(parent, 'div')
quote.set("c2c:role", self.roles[level])
# Recursively parse block with div as parent.
self.parser.parseChunk(quote, block)

# and continue parsing next part of the block
self.parser.parseBlocks(parent, ["\n".join(after)])

def clean(self, line):
""" Remove ``!`` from beginning of a line. """
m = self.RE.match(line)
if line.strip() in ("!!", "!!!", "!!!!"):
return ""
elif m:
return m.group(3)
else:
return line


class AlertExtension(Extension):
def extendMarkdown(self, md, md_globals): # noqa
md.parser.blockprocessors.add('c2calert',
AlertProcessor(md.parser),
"<paragraph")


def makeExtension(*args, **kwargs): # noqa
return AlertExtension(*args, **kwargs)
Empty file.
65 changes: 65 additions & 0 deletions c2corg_api/markdown/emoji_databases/c2c_activities.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
SVG_CDN = "/static/img/documents/activities/"

name = "c2c-activities"

emoji = {
":rock_climbing:": {
"category": "activitiy",
"name": "rock climbing",
"svg_name": "rock_climbing",
"unicode": "1f9d7",
},
":skitouring:": {
"category": "activitiy",
"name": "ski touring",
"svg_name": "skitouring",
"unicode": "26f7"
},
":hiking:": {
"category": "activitiy",
"name": "hiking",
"svg_name": "hiking",
},
":ice_climbing:": {
"category": "activitiy",
"name": "ice climbing",
"svg_name": "ice_climbing",
},
":mountain_biking:": {
"category": "activitiy",
"name": "mountain biking",
"svg_name": "mountain_biking",
},
":paragliding:": {
"category": "activitiy",
"name": "paragliding",
"svg_name": "paragliding",
},
":slacklining:": {
"category": "activitiy",
"name": "slacklining",
"svg_name": "slacklining",
},
":snow_ice_mixed:": {
"category": "activitiy",
"name": "snow ice mixed",
"svg_name": "snow_ice_mixed",
},
":snowshoeing:": {
"category": "activitiy",
"name": "snowshoeing",
"svg_name": "snowshoeing",
},
":via_ferrata:": {
"category": "activitiy",
"name": "via ferrata",
"svg_name": "via_ferrata",
},
":mountain_climbing:": {
"category": "activitiy",
"name": "mountain climbing",
"svg_name": "mountain_climbing",
}
}

aliases = {}
Loading

0 comments on commit eb40019

Please sign in to comment.