html5lib
diff --git a/‎CHANGES.rst‎
Lines changed: 14 additions & 0 deletions b/‎CHANGES.rst‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎doc/html5lib.filters.rst‎
Lines changed: 2 additions & 2 deletions b/‎doc/html5lib.filters.rst‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎doc/html5lib.rst‎
Lines changed: 2 additions & 35 deletions b/‎doc/html5lib.rst‎
Lines changed: 2 additions & 35 deletions
diff --git a/‎doc/html5lib.serializer.rst‎
Lines changed: 0 additions & 19 deletions b/‎doc/html5lib.serializer.rst‎
Lines changed: 0 additions & 19 deletions
diff --git a/‎doc/html5lib.treebuilders.rst‎
Lines changed: 2 additions & 2 deletions b/‎doc/html5lib.treebuilders.rst‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎doc/html5lib.treewalkers.rst‎
Lines changed: 10 additions & 9 deletions b/‎doc/html5lib.treewalkers.rst‎
Lines changed: 10 additions & 9 deletions
diff --git a/‎html5lib/ihatexml.py‎ renamed to ‎html5lib/_ihatexml.py‎ b/‎html5lib/ihatexml.py‎ renamed to ‎html5lib/_ihatexml.py‎
diff --git a/‎html5lib/inputstream.py‎ renamed to ‎html5lib/_inputstream.py‎
Lines changed: 5 additions & 5 deletions b/‎html5lib/inputstream.py‎ renamed to ‎html5lib/_inputstream.py‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎html5lib/tokenizer.py‎ renamed to ‎html5lib/_tokenizer.py‎
Lines changed: 2 additions & 2 deletions b/‎html5lib/tokenizer.py‎ renamed to ‎html5lib/_tokenizer.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎html5lib/trie/__init__.py‎ renamed to ‎html5lib/_trie/__init__.py‎ b/‎html5lib/trie/__init__.py‎ renamed to ‎html5lib/_trie/__init__.py‎
@@ -50,6 +50,20 @@ Released on XXX
   with a set of keyword arguments: override_encoding, transport_encoding,
   same_origin_parent_encoding, likely_encoding, and default_encoding.**
 
+* **Move filters._base, treebuilder._base, and treewalkers._base to .base
+  to clarify their status as public.**
+
+* **Get rid of the sanitizer package. Merge sanitizer.sanitize into the
+  sanitizer.htmlsanitizer module and move that to saniziter. This means
+  anyone who used sanitizer.sanitize or sanitizer.HTMLSanitizer needs no
+  code changes.**
+
+* **Rename treewalkers.lxmletree to .etree_lxml and
+  treewalkers.genshistream to .genshi to have a consistent API.**
+
+* Move a whole load of stuff (inputstream, ihatexml, trie, tokenizer,
+  utils) to be underscore prefixed to clarify their status as private.
+
 
 0.9999999/1.0b8
 ~~~~~~~~~~~~~~~
 
@@ -1,10 +1,10 @@
 filters Package
 ===============
 
-:mod:`_base` Module
+:mod:`base` Module
 -------------------
 
-.. automodule:: html5lib.filters._base
+.. automodule:: html5lib.filters.base
     :members:
     :undoc-members:
     :show-inheritance:
 
@@ -25,42 +25,10 @@ html5lib Package
     :undoc-members:
     :show-inheritance:
 
-:mod:`ihatexml` Module
+:mod:`serializer` Module
 ----------------------
 
-.. automodule:: html5lib.ihatexml
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`inputstream` Module
--------------------------
-
-.. automodule:: html5lib.inputstream
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`sanitizer` Module
------------------------
-
-.. automodule:: html5lib.sanitizer
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`tokenizer` Module
------------------------
-
-.. automodule:: html5lib.tokenizer
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`utils` Module
--------------------
-
-.. automodule:: html5lib.utils
+.. automodule:: html5lib.serializer
     :members:
     :undoc-members:
     :show-inheritance:
@@ -71,7 +39,6 @@ Subpackages
 .. toctree::
 
     html5lib.filters
-    html5lib.serializer
     html5lib.treebuilders
     html5lib.treewalkers
 
@@ -9,10 +9,10 @@ treebuilders Package
     :undoc-members:
     :show-inheritance:
 
-:mod:`_base` Module
+:mod:`base` Module
 -------------------
 
-.. automodule:: html5lib.treebuilders._base
+.. automodule:: html5lib.treebuilders.base
     :members:
     :undoc-members:
     :show-inheritance:
 
@@ -9,10 +9,10 @@ treewalkers Package
     :undoc-members:
     :show-inheritance:
 
-:mod:`_base` Module
+:mod:`base` Module
 -------------------
 
-.. automodule:: html5lib.treewalkers._base
+.. automodule:: html5lib.treewalkers.base
     :members:
     :undoc-members:
     :show-inheritance:
@@ -33,18 +33,19 @@ treewalkers Package
     :undoc-members:
     :show-inheritance:
 
-:mod:`genshistream` Module
---------------------------
+:mod:`etree_lxml` Module
+-----------------------
 
-.. automodule:: html5lib.treewalkers.genshistream
+.. automodule:: html5lib.treewalkers.etree_lxml
     :members:
     :undoc-members:
     :show-inheritance:
 
-:mod:`lxmletree` Module
------------------------
 
-.. automodule:: html5lib.treewalkers.lxmletree
+:mod:`genshi` Module
+--------------------------
+
+.. automodule:: html5lib.treewalkers.genshi
     :members:
     :undoc-members:
-    :show-inheritance:
+    :show-inheritance:
@@ -10,7 +10,7 @@
 
 from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase
 from .constants import ReparseException
-from . import utils
+from . import _utils
 
 from io import StringIO
 
@@ -28,7 +28,7 @@
 
 invalid_unicode_no_surrogate = "[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]"  # noqa
 
-if utils.supports_lone_surrogates:
+if _utils.supports_lone_surrogates:
     # Use one extra step of indirection and create surrogates with
     # eval. Not using this indirection would introduce an illegal
     # unicode literal on platforms not supporting such lone
@@ -176,7 +176,7 @@ def __init__(self, source):
 
         """
 
-        if not utils.supports_lone_surrogates:
+        if not _utils.supports_lone_surrogates:
             # Such platforms will have already checked for such
             # surrogate errors, so no need to do this checking.
             self.reportCharacterErrors = None
@@ -304,9 +304,9 @@ def characterErrorsUCS2(self, data):
             codepoint = ord(match.group())
             pos = match.start()
             # Pretty sure there should be endianness issues here
-            if utils.isSurrogatePair(data[pos:pos + 2]):
+            if _utils.isSurrogatePair(data[pos:pos + 2]):
                 # We have a surrogate pair!
-                char_val = utils.surrogatePairToCodepoint(data[pos:pos + 2])
+                char_val = _utils.surrogatePairToCodepoint(data[pos:pos + 2])
                 if char_val in non_bmp_invalid_codepoints:
                     self.errors.append("invalid-codepoint")
                 skip = True
 
@@ -11,9 +11,9 @@
 from .constants import tokenTypes, tagTokenTypes
 from .constants import replacementCharacters
 
-from .inputstream import HTMLInputStream
+from ._inputstream import HTMLInputStream
 
-from .trie import Trie
+from ._trie import Trie
 
 entitiesTrie = Trie(entities)