-
-
Notifications
You must be signed in to change notification settings - Fork 27
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
integrate c2c_markdown into v6_api (#1043)
* integrate c2c_markdown into v6_api * update bleach version
- Loading branch information
Showing
236 changed files
with
2,405 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
# Parsing the custom formating syntax of camptocamp.org | ||
|
||
## Syntax | ||
|
||
Camptocamp.org markdown to format the documents text attributes. It uses base features of [Python-Markdown](https://github.com/waylan/Python-Markdown). | ||
|
||
Upon these features, other custom tags are added: | ||
|
||
* LTag `L# | 6a | tremendous pitch` | ||
* Emojis `:smile:` | ||
* images `[img=123]Legend[/img]` | ||
* toc `[toc]` | ||
* alerts `!!!! This is an alert banner` | ||
* wikilinks `[[routes/123|Walker ridge]]` | ||
* custom headers `## Approach # 10 mn` | ||
* ptag (hard new line) `[p]` | ||
* video `[video]https://youtube.com/123[/video]` | ||
|
||
## Sanitizer | ||
|
||
Output is cleaned from any XSS injection using [Mozilla Bleach](https://github.com/mozilla/bleach) | ||
|
||
## Rialability | ||
|
||
This parser has been tested and fuzzed (~100,000,000 tests). Issues have also been found in python markdown and bleach: [1](https://github.com/mozilla/bleach/issues/352), [2](https://github.com/Python-Markdown/markdown/issues/643), [3](https://github.com/Python-Markdown/markdown/issues/640) and [4](https://github.com/Python-Markdown/markdown/issues/639) :sunglasses:. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,184 @@ | ||
import markdown | ||
import bleach | ||
import binascii | ||
import os | ||
from threading import RLock | ||
|
||
from c2corg_api.markdown.wikilinks import C2CWikiLinkExtension | ||
from c2corg_api.markdown.img import C2CImageExtension | ||
from c2corg_api.markdown.video import C2CVideoExtension | ||
from c2corg_api.markdown.ltag import C2CLTagExtension | ||
from c2corg_api.markdown.header import C2CHeaderExtension | ||
from c2corg_api.markdown.ptag import C2CPTagExtension | ||
from c2corg_api.markdown.alerts import AlertExtension | ||
from c2corg_api.markdown.toc import C2CTocExtension | ||
from c2corg_api.markdown.emojis import C2CEmojiExtension | ||
from c2corg_api.markdown.nbsp import C2CNbspExtension | ||
from markdown.extensions.nl2br import Nl2BrExtension | ||
|
||
|
||
def _get_secret(): | ||
return binascii.hexlify(os.urandom(32)).decode('ascii') | ||
|
||
|
||
_PARSER_EXCEPTION_MESSAGE = """ | ||
<div c2c:role="danger" style="font-weight:bold"> | ||
Parser error, please send a mail to | ||
<a href="mailto:dev@camptocamp.org">dev@camptocamp.org</a> | ||
or post a message on | ||
<a href="https://forum.camptocamp.org/c/site-et-association/v6-suggestions-bugs-et-problemes"> | ||
forum</a>. | ||
</div> | ||
""" # noqa | ||
|
||
# RLock because this lock can be released | ||
# only by the thread who acquires it. | ||
_parser_lock = RLock() | ||
|
||
_markdown_parser = None | ||
_cleaner = None | ||
_iframe_secret_tag = "iframe_" + _get_secret() | ||
|
||
""" | ||
_***_secret_tag is used as a private key to replace critical HTML node and | ||
attributes. The key point is this : the parser will use them. bleach will | ||
remove all critical nodes. Then, a very end parser replace secret_tag by good | ||
HTML node/attribute | ||
PEP 506 : | ||
os.urandom is the safe way to generate private data, where random module only | ||
generate random data without entropy. Hexlify() and ascii() convert it to | ||
lower case string. Once V6_ui will be into python 3.6 or higher, we will use | ||
secrets module. | ||
How to hack C2C ? if you want to inject an iframe, you will need to know the | ||
value of _iframe_secret_tag present into server memory. | ||
""" | ||
|
||
|
||
def _get_cleaner(): | ||
global _cleaner | ||
|
||
if not _cleaner: | ||
allowed_tags = bleach.ALLOWED_TAGS + [ | ||
# headers | ||
"h1", "h2", "h3", "h4", "h5", "h6", | ||
|
||
# blocks | ||
"div", "p", "pre", "hr", "center", | ||
|
||
# inline nodes | ||
"span", "br", "sub", "sup", "s", "del", "ins", "small", | ||
|
||
# images | ||
"figure", "img", "figcaption", | ||
|
||
_iframe_secret_tag, | ||
|
||
# tables | ||
"table", "tr", "td", "th", "tbody" | ||
] | ||
|
||
allowed_attributes = dict(bleach.ALLOWED_ATTRIBUTES) | ||
allowed_extra_attributes = { | ||
"a": [ | ||
"c2c:role", | ||
"c2c:document-type", | ||
"c2c:document-id", | ||
"c2c:lang", | ||
"c2c:slug", | ||
"c2c:anchor" | ||
], | ||
"h1": ["id", "c2c:role"], | ||
"h2": ["id", "c2c:role"], | ||
"h3": ["id", "c2c:role"], | ||
"h4": ["id", "c2c:role"], | ||
"h5": ["id", "c2c:role"], | ||
"h6": ["id", "c2c:role"], | ||
"table": ["c2c:role"], | ||
"div": ["class", "style", "c2c:role"], | ||
"td": ["colspan"], | ||
"span": ["class", "translate", "id", "c2c:role"], | ||
_iframe_secret_tag: ["src"], | ||
"figure": ["c2c:position", "c2c:role", "c2c:size"], | ||
"img": [ | ||
"alt", | ||
"c2c:document-id", | ||
"c2c:role", | ||
"c2c:size", | ||
"c2c:url-proxy", | ||
"c2c:svg-name", | ||
"c2c:emoji-db" | ||
], | ||
} | ||
|
||
for key in allowed_extra_attributes: | ||
if key not in allowed_attributes: | ||
allowed_attributes[key] = [] | ||
|
||
allowed_attributes[key] += allowed_extra_attributes[key] | ||
|
||
_cleaner = bleach.Cleaner(tags=allowed_tags, | ||
attributes=allowed_attributes, | ||
styles=bleach.ALLOWED_STYLES + ["clear"], | ||
protocols=bleach.ALLOWED_PROTOCOLS, | ||
strip=False, | ||
strip_comments=True) | ||
|
||
return _cleaner | ||
|
||
|
||
def _get_markdown_parser(): | ||
global _markdown_parser | ||
if not _markdown_parser: | ||
extensions = [ | ||
C2CWikiLinkExtension(), | ||
C2CImageExtension(), | ||
Nl2BrExtension(), | ||
C2CTocExtension(marker='[toc]', baselevel=2), | ||
C2CVideoExtension(iframe_secret_tag=_iframe_secret_tag), | ||
C2CLTagExtension(), | ||
C2CHeaderExtension(), | ||
C2CPTagExtension(), | ||
AlertExtension(), | ||
C2CEmojiExtension(), | ||
C2CNbspExtension(), | ||
] | ||
_markdown_parser = markdown.Markdown(output_format='xhtml5', | ||
extensions=extensions, | ||
enable_attributes=False) | ||
return _markdown_parser | ||
|
||
|
||
def parse_code(text): | ||
""" | ||
Get markdown, and returns HTML. | ||
This function is thread-safe | ||
""" | ||
|
||
# we need parsing to be thread safe because | ||
# L numbering, and Markdown() has internal global variables | ||
|
||
# for explanation about Lock context manager usage | ||
# see https://docs.python.org/3/library/threading.html | ||
# on paragraph 17.1.10 (with statement) | ||
with _parser_lock: | ||
parser = _get_markdown_parser() | ||
cleaner = _get_cleaner() | ||
|
||
# reset parser state. Otherwise, internals parser cache grows | ||
# indefinitely, and performance decreases over time | ||
parser.reset() | ||
|
||
try: | ||
text = parser.convert(text) | ||
|
||
# we keep clean function into thread safe part, | ||
# because we are not sure of this function | ||
text = cleaner.clean(text=text) | ||
except: # noqa | ||
text = _PARSER_EXCEPTION_MESSAGE | ||
|
||
text = text.replace(_iframe_secret_tag, "iframe") | ||
|
||
return text |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
from markdown.extensions import Extension | ||
from markdown.blockprocessors import BlockProcessor | ||
from markdown import util | ||
import re | ||
|
||
|
||
class AlertProcessor(BlockProcessor): | ||
RE = re.compile(r'(^|\n)[ ]{0,3}(!{2,4})(([^!]|$).*)') | ||
|
||
roles = { | ||
"!!": "info", | ||
"!!!": "warning", | ||
"!!!!": "danger", | ||
} | ||
|
||
def test(self, parent, block): | ||
return bool(self.RE.search(block)) | ||
|
||
def run(self, parent, blocks): | ||
block = blocks.pop(0) | ||
m = self.RE.search(block) | ||
level = m.group(2) | ||
tester = re.compile("^[ ]{0,3}" + level + "([^!]|$)") | ||
|
||
before = block[:m.start()] # Lines before blockquote | ||
# Pass lines before alert banner | ||
self.parser.parseBlocks(parent, [before]) | ||
|
||
after = block[m.start():].split('\n') | ||
if len(after[0]) == 0: | ||
after.pop(0) | ||
|
||
block = [] | ||
|
||
# get all lines starting with the same prefix | ||
while len(after): | ||
is_same_level = bool(tester.search(after[0])) | ||
if is_same_level: | ||
block.append(after.pop(0)) | ||
else: | ||
break | ||
|
||
# Remove `!!!` from begining of each line. | ||
block = '\n'.join([self.clean(line) for line in block]) | ||
|
||
quote = util.etree.SubElement(parent, 'div') | ||
quote.set("c2c:role", self.roles[level]) | ||
# Recursively parse block with div as parent. | ||
self.parser.parseChunk(quote, block) | ||
|
||
# and continue parsing next part of the block | ||
self.parser.parseBlocks(parent, ["\n".join(after)]) | ||
|
||
def clean(self, line): | ||
""" Remove ``!`` from beginning of a line. """ | ||
m = self.RE.match(line) | ||
if line.strip() in ("!!", "!!!", "!!!!"): | ||
return "" | ||
elif m: | ||
return m.group(3) | ||
else: | ||
return line | ||
|
||
|
||
class AlertExtension(Extension): | ||
def extendMarkdown(self, md, md_globals): # noqa | ||
md.parser.blockprocessors.add('c2calert', | ||
AlertProcessor(md.parser), | ||
"<paragraph") | ||
|
||
|
||
def makeExtension(*args, **kwargs): # noqa | ||
return AlertExtension(*args, **kwargs) |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
SVG_CDN = "/static/img/documents/activities/" | ||
|
||
name = "c2c-activities" | ||
|
||
emoji = { | ||
":rock_climbing:": { | ||
"category": "activitiy", | ||
"name": "rock climbing", | ||
"svg_name": "rock_climbing", | ||
"unicode": "1f9d7", | ||
}, | ||
":skitouring:": { | ||
"category": "activitiy", | ||
"name": "ski touring", | ||
"svg_name": "skitouring", | ||
"unicode": "26f7" | ||
}, | ||
":hiking:": { | ||
"category": "activitiy", | ||
"name": "hiking", | ||
"svg_name": "hiking", | ||
}, | ||
":ice_climbing:": { | ||
"category": "activitiy", | ||
"name": "ice climbing", | ||
"svg_name": "ice_climbing", | ||
}, | ||
":mountain_biking:": { | ||
"category": "activitiy", | ||
"name": "mountain biking", | ||
"svg_name": "mountain_biking", | ||
}, | ||
":paragliding:": { | ||
"category": "activitiy", | ||
"name": "paragliding", | ||
"svg_name": "paragliding", | ||
}, | ||
":slacklining:": { | ||
"category": "activitiy", | ||
"name": "slacklining", | ||
"svg_name": "slacklining", | ||
}, | ||
":snow_ice_mixed:": { | ||
"category": "activitiy", | ||
"name": "snow ice mixed", | ||
"svg_name": "snow_ice_mixed", | ||
}, | ||
":snowshoeing:": { | ||
"category": "activitiy", | ||
"name": "snowshoeing", | ||
"svg_name": "snowshoeing", | ||
}, | ||
":via_ferrata:": { | ||
"category": "activitiy", | ||
"name": "via ferrata", | ||
"svg_name": "via_ferrata", | ||
}, | ||
":mountain_climbing:": { | ||
"category": "activitiy", | ||
"name": "mountain climbing", | ||
"svg_name": "mountain_climbing", | ||
} | ||
} | ||
|
||
aliases = {} |
Oops, something went wrong.