Skip to content

Commit

Permalink
Remove ability to use a custom tokenizer
Browse files Browse the repository at this point in the history
This should be unneeded since the sanitizer changes (#110)
  • Loading branch information
gsnedders committed Jul 6, 2016
1 parent e65bee9 commit d8d5bb6
Showing 1 changed file with 4 additions and 10 deletions.
14 changes: 4 additions & 10 deletions html5lib/html5parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,18 +59,13 @@ class HTMLParser(object):
"""HTML parser. Generates a tree structure from a stream of (possibly
malformed) HTML"""

def __init__(self, tree=None, tokenizer=tokenizer.HTMLTokenizer,
strict=False, namespaceHTMLElements=True, debug=False):
def __init__(self, tree=None, strict=False, namespaceHTMLElements=True, debug=False):
"""
strict - raise an exception when a parse error is encountered
tree - a treebuilder class controlling the type of tree that will be
returned. Built in treebuilders can be accessed through
html5lib.treebuilders.getTreeBuilder(treeType)
tokenizer - a class that provides a stream of tokens to the treebuilder.
This may be replaced for e.g. a sanitizer which converts some tags to
text
"""

# Raise an exception on the first error encountered
Expand All @@ -79,7 +74,6 @@ def __init__(self, tree=None, tokenizer=tokenizer.HTMLTokenizer,
if tree is None:
tree = treebuilders.getTreeBuilder("etree")
self.tree = tree(namespaceHTMLElements)
self.tokenizer_class = tokenizer
self.errors = []

self.phases = dict([(name, cls(self, self.tree)) for name, cls in
Expand All @@ -91,9 +85,9 @@ def _parse(self, stream, innerHTML=False, container="div", encoding=None,
self.innerHTMLMode = innerHTML
self.container = container
self.scripting = scripting
self.tokenizer = self.tokenizer_class(stream, encoding=encoding,
useChardet=useChardet,
parser=self, **kwargs)
self.tokenizer = tokenizer.HTMLTokenizer(stream, encoding=encoding,
useChardet=useChardet,
parser=self, **kwargs)
self.reset()

try:
Expand Down

0 comments on commit d8d5bb6

Please sign in to comment.