diff --git a/.gitignore b/.gitignore
index 0d770e8c..63ab5761 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,3 +7,6 @@ ext/nokogumbo/*
 /lib/nokogumbo/nokogumbo.dll
 /pkg
 /tmp
+/gumbo-parser/googletest
+/gumbo-parser/build
+/test/html5lib-tests
diff --git a/.travis.yml b/.travis.yml
index 82583c94..94f986b7 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,7 +4,6 @@ os:
   - osx
   - linux
 rvm: # http://rubies.travis-ci.org/
-  - 1.9
   - 2.0
   - 2.1
   - 2.2
@@ -13,18 +12,38 @@ rvm: # http://rubies.travis-ci.org/
   - 2.5
 matrix:
   exclude:
-    - os: osx
-      rvm: 1.9
     - os: osx
       rvm: 2.0
     - os: osx
-      env: WITH_LIBXML=false V=1
+      env: WITH_LIBXML=false
+  include:
+    - name: test gumbo
+      os: osx
+      language: cpp
+      install:
+        - curl -L https://github.com/google/googletest/archive/release-1.8.0.tar.gz | tar zxf - --strip-components 1 -C gumbo-parser googletest-release-1.8.0/googletest
+        - make -C gumbo-parser/googletest/make gtest_main.a
+      before_script: true
+      script:
+        - make -C gumbo-parser
+    - name: test gumbo
+      os: linux
+      language: cpp
+      install:
+        - curl -L https://github.com/google/googletest/archive/release-1.8.0.tar.gz | tar zxf - --strip-components 1 -C gumbo-parser googletest-release-1.8.0/googletest
+        - make -C gumbo-parser/googletest/make gtest_main.a
+      before_script: true
+      script:
+        - make -C gumbo-parser
+
 env:
-  - WITH_LIBXML=true V=1
-  - WITH_LIBXML=false V=1
-before_script: |
-  if [ "$WITH_LIBXML" == "false" ]; then
-    sudo apt-get remove libxml2-dev
-  fi
+  - WITH_LIBXML=true
+  - WITH_LIBXML=false
+before_script:
+  - if [ "$WITH_LIBXML" = "false" ]; then sudo apt-get remove libxml2-dev; fi
+  - cd test && git clone --depth 1 --branch master --single-branch https://github.com/html5lib/html5lib-tests.git
+script:
+  - MAKE='make V=1' bundle exec rake compile
+  - bundle exec rake
 sudo: required
 cache: bundler
diff --git a/Rakefile b/Rakefile
index 36f753a4..e573db75 100644
--- a/Rakefile
+++ b/Rakefile
@@ -27,6 +27,12 @@ SOURCES = ['ext/nokogumbo/extconf.rb', 'ext/nokogumbo/nokogumbo.c']
 # gem, package, and extension tasks
 task 'gem' => 'test'
 
+desc 'Run the gumbo unit tests'
+task 'test:gumbo' => 'gumbo-parser/googletest' do
+  sh(*%w{make -C gumbo-parser})
+end
+
+
 SPEC = Gem::Specification.new do |gem|
   gem.name = 'nokogumbo'
   gem.version = Nokogumbo::VERSION
diff --git a/ext/nokogumbo/extconf.rb b/ext/nokogumbo/extconf.rb
index aef75ccd..4829c7c3 100644
--- a/ext/nokogumbo/extconf.rb
+++ b/ext/nokogumbo/extconf.rb
@@ -1,6 +1,8 @@
 require 'mkmf'
 $CFLAGS += " -std=c99"
 
+CONFIG['warnflags'] = '-Wall'
+
 if have_library('xml2', 'xmlNewDoc') 
   # libxml2 libraries from http://www.xmlsoft.org/
   pkg_config('libxml-2.0')
@@ -28,11 +30,6 @@
   require 'fileutils'
   gumbo_dir = File.expand_path('../../gumbo-parser', ext_dir)
   FileUtils.ln_s(Dir[File.join(gumbo_dir, 'src/*.[hc]')], ext_dir, force:true)
-  case RbConfig::CONFIG['target_os']
-  when 'mingw32', /mswin/
-    FileUtils.ln_s(Dir[File.join(gumbo_dir, 'visualc/include/*.h')], ext_dir,
-                   force: true)
-  end
   # Set these to nil so that create_makefile picks up the new sources.
   $srcs = $objs = nil
 end
diff --git a/ext/nokogumbo/nokogumbo.c b/ext/nokogumbo/nokogumbo.c
index 1459e13f..51ac4f3a 100644
--- a/ext/nokogumbo/nokogumbo.c
+++ b/ext/nokogumbo/nokogumbo.c
@@ -21,7 +21,6 @@
 #include <ruby.h>
 #include "gumbo.h"
 #include "error.h"
-#include "parser.h"
 
 // class constants
 static VALUE Document;
@@ -34,7 +33,7 @@ static VALUE XMLSyntaxError;
 #define NIL NULL
 #define CONST_CAST (xmlChar const*)
 #else
-#define NIL 0
+#define NIL Qnil
 #define CONST_CAST
 
 // more class constants
@@ -45,11 +44,15 @@ static VALUE Comment;
 
 // interned symbols
 static VALUE new;
+static VALUE attribute;
 static VALUE set_attribute;
+static VALUE remove_attribute;
 static VALUE add_child;
 static VALUE internal_subset;
 static VALUE remove_;
 static VALUE create_internal_subset;
+static VALUE key_;
+static VALUE node_name_;
 
 // map libxml2 types to Ruby VALUE
 #define xmlNodePtr VALUE
@@ -58,12 +61,10 @@ static VALUE create_internal_subset;
 // redefine libxml2 API as Ruby function calls
 #define xmlNewDocNode(doc, ns, name, content) \
   rb_funcall(Element, new, 2, rb_str_new2(name), doc)
-#define xmlNewProp(element, name, value) \
-  rb_funcall(element, set_attribute, 2, rb_str_new2(name), rb_str_new2(value))
 #define xmlNewDocText(doc, text) \
   rb_funcall(Text, new, 2, rb_str_new2(text), doc)
 #define xmlNewCDataBlock(doc, content, length) \
-  rb_funcall(CDATA, new, 2, rb_str_new(content, length), doc)
+  rb_funcall(CDATA, new, 2, doc, rb_str_new(content, length))
 #define xmlNewDocComment(doc, text) \
   rb_funcall(Comment, new, 2, doc, rb_str_new2(text))
 #define xmlAddChild(element, node) \
@@ -83,6 +84,78 @@ static VALUE xmlNewDoc(char* version) {
   rb_funcall(rb_funcall(doc, internal_subset, 0), remove_, 0);
   return doc;
 }
+
+static VALUE find_dummy_key(VALUE collection) {
+  VALUE r_dummy = Qnil;
+  char dummy[5] = "a";
+  size_t len = 1;
+  while (len < sizeof dummy) {
+    r_dummy = rb_str_new(dummy, len);
+    if (rb_funcall(collection, key_, 1, r_dummy) == Qfalse)
+      return r_dummy;
+    for (size_t i = 0; ; ++i) {
+      if (dummy[i] == 0) {
+        dummy[i] = 'a';
+        ++len;
+        break;
+      }
+      if (dummy[i] == 'z')
+        dummy[i] = 'a';
+      else {
+        ++dummy[i];
+        break;
+      }
+    }
+  }
+  // This collection has 475254 elements?? Give up.
+  return Qnil;
+}
+
+static xmlNodePtr xmlNewProp(xmlNodePtr node, const char *name, const char *value) {
+  // Nokogiri::XML::Node#set_attribute calls xmlSetProp(node, name, value)
+  // which behaves roughly as
+  // if name is a QName prefix:local
+  //   if node->doc has a namespace ns corresponding to prefix
+  //     return xmlSetNsProp(node, ns, local, value)
+  // return xmlSetNsProp(node, NULL, name, value)
+  //
+  // If the prefix is "xml", then the namespace lookup will create it.
+  //
+  // By contrast, xmlNewProp does not do this parsing and creates an attribute
+  // with the name and value exactly as given. This is the behavior that we
+  // want.
+  //
+  // Thus, for attribute names like "xml:lang", #set_attribute will create an
+  // attribute with namespace "xml" and name "lang". This is incorrect for
+  // html elements (but correct for foreign elements).
+  //
+  // Work around this by inserting a dummy attribute and then changing the
+  // name, if needed.
+
+  // Can't use strchr since it's locale-sensitive.
+  size_t len = strlen(name);
+  VALUE r_name = rb_str_new(name, len);
+  if (memchr(name, ':', len) == NULL) {
+    // No colon.
+    return rb_funcall(node, set_attribute, 2, r_name, rb_str_new2(value));
+  }
+  // Find a dummy attribute string that doesn't already exist.
+  VALUE dummy = find_dummy_key(node);
+  if (dummy == Qnil)
+    return Qnil;
+  // Add the dummy attribute.
+  VALUE r_value = rb_funcall(node, set_attribute, 2, dummy, rb_str_new2(value));
+  if (r_value == Qnil)
+    return Qnil;
+  // Remove thet old attribute, if it exists.
+  rb_funcall(node, remove_attribute, 1, r_name);
+  // Rename the dummy
+  VALUE attr = rb_funcall(node, attribute, 1, dummy);
+  if (attr == Qnil)
+    return Qnil;
+  rb_funcall(attr, node_name_, 1, r_name);
+  return attr;
+}
 #endif
 
 // Build a xmlNodePtr for a given GumboNode (recursively)
@@ -90,30 +163,15 @@ static xmlNodePtr walk_tree(xmlDocPtr document, GumboNode *node);
 
 // Build a xmlNodePtr for a given GumboElement (recursively)
 static xmlNodePtr walk_element(xmlDocPtr document, GumboElement *node) {
-  // determine tag name for a given node
-  xmlNodePtr element;
-  if (node->tag != GUMBO_TAG_UNKNOWN) {
-    element = xmlNewDocNode(document, NIL,
-      CONST_CAST gumbo_normalized_tagname(node->tag), NIL);
-  } else {
-    GumboStringPiece tag = node->original_tag;
-    gumbo_tag_from_original_text(&tag);
-#ifdef _MSC_VER
-    char* name = alloca(tag.length+1);
-#else
-    char name[tag.length+1];
-#endif
-    strncpy(name, tag.data, tag.length);
-    name[tag.length] = '\0';
-    element = xmlNewDocNode(document, NIL, CONST_CAST name, NIL);
-  }
+  // create the given element
+  xmlNodePtr element = xmlNewDocNode(document, NIL, CONST_CAST node->name, NIL);
 
   // add in the attributes
   GumboVector* attrs = &node->attributes;
   char *name = NULL;
-  int namelen = 0;
-  char *ns;
-  for (int i=0; i < attrs->length; i++) {
+  size_t namelen = 0;
+  const char *ns;
+  for (size_t i=0; i < attrs->length; i++) {
     GumboAttribute *attr = attrs->data[i];
 
     switch (attr->attr_namespace) {
@@ -156,7 +214,7 @@ static xmlNodePtr walk_element(xmlDocPtr document, GumboElement *node) {
 
   // add in the children
   GumboVector* children = &node->children;
-  for (int i=0; i < children->length; i++) {
+  for (size_t i=0; i < children->length; i++) {
     xmlNodePtr node = walk_tree(document, children->data[i]);
     if (node) xmlAddChild(element, node);
   }
@@ -176,8 +234,8 @@ static xmlNodePtr walk_tree(xmlDocPtr document, GumboNode *node) {
       return xmlNewDocText(document, CONST_CAST node->v.text.text);
     case GUMBO_NODE_CDATA:
       return xmlNewCDataBlock(document,
-        CONST_CAST node->v.text.original_text.data,
-        (int) node->v.text.original_text.length);
+        CONST_CAST node->v.text.text,
+        (int) strlen(node->v.text.text));
     case GUMBO_NODE_COMMENT:
       return xmlNewDocComment(document, CONST_CAST node->v.text.text);
   }
@@ -200,12 +258,12 @@ static VALUE parse(VALUE self, VALUE string, VALUE max_parse_errors) {
     const char *public = output->document->v.document.public_identifier;
     const char *system = output->document->v.document.system_identifier;
     xmlCreateIntSubset(doc, CONST_CAST name,
-      (public[0] ? CONST_CAST public : NIL),
-      (system[0] ? CONST_CAST system : NIL));
+      (public[0] ? CONST_CAST public : NULL),
+      (system[0] ? CONST_CAST system : NULL));
   }
 
   GumboVector *children = &output->document->v.document.children;
-  for (int i=0; i < children->length; i++) {
+  for (size_t i=0; i < children->length; i++) {
     GumboNode *child = children->data[i];
     xmlNodePtr node = walk_tree(doc, child);
     if (node) {
@@ -221,15 +279,14 @@ static VALUE parse(VALUE self, VALUE string, VALUE max_parse_errors) {
   // Add parse errors to rdoc.
   if (output->errors.length) {
     GumboVector *errors = &output->errors;
-    GumboParser parser = { ._options = &options };
     GumboStringBuffer msg;
     VALUE rerrors = rb_ary_new2(errors->length);
 
-    gumbo_string_buffer_init(&parser, &msg);
-    for (int i=0; i < errors->length; i++) {
+    gumbo_string_buffer_init(&msg);
+    for (size_t i=0; i < errors->length; i++) {
       GumboError *err = errors->data[i];
-      gumbo_string_buffer_clear(&parser, &msg);
-      gumbo_caret_diagnostic_to_string(&parser, err, input, input_len, &msg);
+      gumbo_string_buffer_clear(&msg);
+      gumbo_caret_diagnostic_to_string(err, input, input_len, &msg);
       VALUE err_str = rb_str_new(msg.data, msg.length);
       VALUE syntax_error = rb_class_new_instance(1, &err_str, XMLSyntaxError);
       rb_iv_set(syntax_error, "@domain", INT2NUM(1)); // XML_FROM_PARSER
@@ -245,10 +302,10 @@ static VALUE parse(VALUE self, VALUE string, VALUE max_parse_errors) {
       rb_ary_push(rerrors, syntax_error);
     }
     rb_iv_set(rdoc, "@errors", rerrors);
-    gumbo_string_buffer_destroy(&parser, &msg);
+    gumbo_string_buffer_destroy(&msg);
   }
 
-  gumbo_destroy_output(&options, output);
+  gumbo_destroy_output(output);
 
   return rdoc;
 }
@@ -274,11 +331,15 @@ void Init_nokogumbo() {
 
   // interned symbols
   new = rb_intern("new");
+  attribute = rb_intern("attribute");
   set_attribute = rb_intern("set_attribute");
+  remove_attribute = rb_intern("remove_attribute");
   add_child = rb_intern("add_child_node_and_reparent_attrs");
   internal_subset = rb_intern("internal_subset");
   remove_ = rb_intern("remove");
   create_internal_subset = rb_intern("create_internal_subset");
+  key_ = rb_intern("key?");
+  node_name_ = rb_intern("node_name=");
 #endif
 
   // define Nokogumbo module with a parse method
diff --git a/gumbo-parser/.clang-format b/gumbo-parser/.clang-format
deleted file mode 100644
index e2138046..00000000
--- a/gumbo-parser/.clang-format
+++ /dev/null
@@ -1,65 +0,0 @@
----
-Language:        Cpp
-# BasedOnStyle:  Google
-AccessModifierOffset: -1
-AlignAfterOpenBracket: false
-AlignEscapedNewlinesLeft: true
-AlignOperands:   true
-AlignTrailingComments: true
-AllowAllParametersOfDeclarationOnNextLine: true
-AllowShortBlocksOnASingleLine: false
-AllowShortCaseLabelsOnASingleLine: false
-AllowShortIfStatementsOnASingleLine: true
-AllowShortLoopsOnASingleLine: true
-AllowShortFunctionsOnASingleLine: All
-AlwaysBreakAfterDefinitionReturnType: false
-AlwaysBreakTemplateDeclarations: true
-AlwaysBreakBeforeMultilineStrings: true
-BreakBeforeBinaryOperators: None
-BreakBeforeTernaryOperators: true
-BreakConstructorInitializersBeforeComma: false
-BinPackParameters: true
-BinPackArguments: true
-ColumnLimit:     80
-ConstructorInitializerAllOnOneLineOrOnePerLine: true
-ConstructorInitializerIndentWidth: 4
-DerivePointerAlignment: true
-ExperimentalAutoDetectBinPacking: false
-IndentCaseLabels: true
-IndentWrappedFunctionNames: false
-IndentFunctionDeclarationAfterType: false
-MaxEmptyLinesToKeep: 1
-KeepEmptyLinesAtTheStartOfBlocks: false
-NamespaceIndentation: None
-ObjCBlockIndentWidth: 2
-ObjCSpaceAfterProperty: false
-ObjCSpaceBeforeProtocolList: false
-PenaltyBreakBeforeFirstCallParameter: 1
-PenaltyBreakComment: 300
-PenaltyBreakString: 1000
-PenaltyBreakFirstLessLess: 120
-PenaltyExcessCharacter: 1000000
-PenaltyReturnTypeOnItsOwnLine: 200
-PointerAlignment: Left
-SpacesBeforeTrailingComments: 2
-Cpp11BracedListStyle: true
-Standard:        Auto
-IndentWidth:     2
-TabWidth:        8
-UseTab:          Never
-BreakBeforeBraces: Attach
-SpacesInParentheses: false
-SpacesInSquareBrackets: false
-SpacesInAngles:  false
-SpaceInEmptyParentheses: false
-SpacesInCStyleCastParentheses: false
-SpaceAfterCStyleCast: true
-SpacesInContainerLiterals: true
-SpaceBeforeAssignmentOperators: true
-ContinuationIndentWidth: 4
-CommentPragmas:  '^ IWYU pragma:'
-ForEachMacros:   [ foreach, Q_FOREACH, BOOST_FOREACH ]
-SpaceBeforeParens: ControlStatements
-DisableFormat:   false
-...
-
diff --git a/gumbo-parser/.gitignore b/gumbo-parser/.gitignore
deleted file mode 100644
index 508df8fd..00000000
--- a/gumbo-parser/.gitignore
+++ /dev/null
@@ -1,79 +0,0 @@
-# Compilation artifacts
-*.o
-*.lo
-*.la
-
-# Editor swap files
-*.swp
-*.swo
-*.swn
-
-#emacs editor leftovers
-*.*~
-
-#diff leftovers
-*.orig
-
-# gtest pieces
-gtest
-gtest-1.7.0
-
-# Other build artifacts
-/Debug
-/visualc/Debug
-/visualc/Release
-/visualc/gumbo.sdf
-/visualc/gumbo.opensdf
-/build
-.log
-.sdf
-.opensdf
-.deps
-.dirstamp
-.libs
-Makefile
-Makefile.in
-aclocal.m4
-autom4te.cache
-compile
-config.guess
-config.log
-config.status
-config.sub
-configure
-depcomp
-gumbo.pc
-gumbo_test
-gumbo_test.log
-gumbo_test.trs
-install-sh
-libtool
-ltmain.sh
-m4/
-missing
-test-driver
-test-suite.log
-
-# gyp android artifacts
-gumbo_parser.target.mk
-
-# `make dist` artifacts
-/gumbo-[0-9].[0-9].tar.gz
-/gumbo-[0-9].[0-9]/
-
-# Python dist artifacts
-*.pyc
-*.dylib
-dist
-build
-python/gumbo.egg-info
-python/gumbo/libgumbo.so
-
-# Example binaries
-benchmark
-clean_text
-find_links
-get_title
-positions_of_class
-prettyprint
-serialize
diff --git a/gumbo-parser/.gitmodules b/gumbo-parser/.gitmodules
deleted file mode 100644
index be8537ac..00000000
--- a/gumbo-parser/.gitmodules
+++ /dev/null
@@ -1,6 +0,0 @@
-[submodule "third_party/gtest"]
-	path = third_party/gtest
-	url = https://chromium.googlesource.com/external/googletest/
-[submodule "testdata"]
-	path = testdata
-	url = https://github.com/html5lib/html5lib-tests.git
diff --git a/gumbo-parser/.travis.yml b/gumbo-parser/.travis.yml
deleted file mode 100644
index d76208f5..00000000
--- a/gumbo-parser/.travis.yml
+++ /dev/null
@@ -1,26 +0,0 @@
-language: c++
-
-compiler:
-  - gcc
-  - clang
-
-os:
-  - linux
-  - osx
-
-install:
-  - wget 'https://googletest.googlecode.com/files/gtest-1.7.0.zip'
-  - unzip gtest-1.7.0.zip
-  - ln -s gtest-1.7.0 gtest
-  - sudo pip install BeautifulSoup
-  - sudo pip install html5lib==0.95
-
-script:
-  - ./autogen.sh && ./configure && make && make check
-  - python python/gumbo/gumboc_test.py
-  - python python/gumbo/html5lib_adapter_test.py
-  - python python/gumbo/soup_adapter_test.py
-  - sudo make install
-  - g++ examples/clean_text.cc `pkg-config --cflags --libs gumbo`
-  - sudo python setup.py sdist install
-  - python -c 'import gumbo; gumbo.parse("Foo")'
diff --git a/gumbo-parser/Makefile b/gumbo-parser/Makefile
new file mode 100644
index 00000000..b1f688d8
--- /dev/null
+++ b/gumbo-parser/Makefile
@@ -0,0 +1,35 @@
+.PHONY: all clean check dirs
+
+gumbo_objs := $(patsubst %.c,build/%.o,$(wildcard src/*.c))
+test_objs := $(patsubst %.cc,build/%.o,$(wildcard test/*.cc))
+gtest_lib := googletest/make/gtest_main.a
+
+CPPFLAGS := -Isrc
+CFLAGS := -std=c99 -Os
+CXXFLAGS := -isystem googletest/include -std=c++11 -Os
+LDFLAGS := -pthread
+
+all: check
+
+build/src:
+	mkdir -p "$@"
+
+build/test:
+	mkdir -p "$@"
+
+build/src/%.o: src/%.c | build/src
+	$(CC) -MMD $(CPPFLAGS) $(CFLAGS) -c -o $@ $<
+
+build/test/%.o: test/%.cc | build/test
+	$(CXX) -MMD $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $<
+
+build/run_tests: $(gumbo_objs) $(test_objs) $(gtest_lib)
+	$(CXX) -o $@ $+ $(LDFLAGS)
+
+check: build/run_tests
+	./build/run_tests
+
+clean:
+	$(RM) -r build
+
+-include $(test_objs:.o=.d) $(gumbo_objs:.o=.d)
diff --git a/gumbo-parser/src/README.md b/gumbo-parser/src/README.md
new file mode 100644
index 00000000..5e75c41e
--- /dev/null
+++ b/gumbo-parser/src/README.md
@@ -0,0 +1,41 @@
+libgumbo
+========
+
+This is an internal fork of the [libgumbo] library, which was copied and
+later modified under the terms of the Apache 2.0 [license]. See `lua-gumbo`
+commit [`0a04728`] for details of the original import.
+
+Since importing the code, the following notable fixes and improvements
+have been made:
+
+* `91cef89`: Re-implement `adjust_foreign_attributes()` with a gperf hash
+* `b11abe7`: Pass `TagSet` arrays into functions by reference instead of value
+* `b73dc03`: Simplify `maybe_replace_codepoint()` function
+* `d5d0bb3`: Remove special handling of `<menuitem>` tag
+* `7bd5162`: Remove special handling of `<isindex>` tag
+* `a5c1b0e`: Use `realloc(3)` instead of `malloc(3)` in `enlarge_vector_if_full()`
+* `dcbebd7`: Use `realloc(3)` instead of `malloc(3)` in `maybe_resize_string_buffer()`
+* `df15262`: Make `destroy_node()` function non-recursive
+* `2df37f5`: Fix signedness of some format specifiers
+* `176553e`: Add maximum element nesting limit
+* `bed0f4a`: Annotate `gumbo_debug()` with `PRINTF` macro and fix warnings
+* `7ffc218`: Annotate `print_message()` with `PRINTF` macro and fix warnings
+* `1bd8ab5`, `9136507`, `53a1f9a`: Deduplicate some identical `TagSet` arrays
+* `a7a9065`: Add some GCC/Clang function attributes
+* `8d3d4e4`: Remove custom allocator support
+* `8d3b006`: Fix recording of source positions for `</form>` end tags
+* `1a8d763`: Replace linear search in `maybe_replace_codepoint()` with a lookup table
+* `6dca79e`: Replace `strcasecmp()` and `strncasecmp()` with ascii-only equivalents
+* `17ab1d2`: Fix `TAGSET_INCLUDES` macro to work properly with multiple bit flags
+* `7e56d45`: Re-implement `gumbo_normalize_svg_tagname()` with a gperf hash
+* `a518d35`: Replace linear array search in `adjust_svg_attributes()` with a gperf hash
+* `a4a7433`: Fix duplicate `TagSet` initializer being ignored in `is_special_node()`
+* `8137fcd`: Add support for `<dialog>` tag
+* `4b35471`: Add missing `static` qualifiers to hide symbols that shouldn't be extern
+* `df57c59`, `03101f3`, `ea62330`: Replace use of locale-dependant `ctype.h` functions
+  with custom, ASCII-only equivalents
+
+
+[libgumbo]: https://github.com/google/gumbo-parser/tree/aa91b27b02c0c80c482e24348a457ed7c3c088e0/src
+[license]: https://github.com/google/gumbo-parser/blob/aa91b27b02c0c80c482e24348a457ed7c3c088e0/COPYING
+[`0a04728`]: https://gitlab.com/craigbarnes/lua-gumbo/commit/0a047282815af86f3367a7d95fefcfe5723ece48
diff --git a/gumbo-parser/src/ascii.c b/gumbo-parser/src/ascii.c
new file mode 100644
index 00000000..036dc406
--- /dev/null
+++ b/gumbo-parser/src/ascii.c
@@ -0,0 +1,33 @@
+#include "ascii.h"
+
+int gumbo_ascii_strcasecmp(const char *s1, const char *s2) {
+  int c1, c2;
+  while (*s1 && *s2) {
+    c1 = (int)(unsigned char) gumbo_ascii_tolower(*s1);
+    c2 = (int)(unsigned char) gumbo_ascii_tolower(*s2);
+    if (c1 != c2) {
+      return (c1 - c2);
+    }
+    s1++;
+    s2++;
+  }
+  return (((int)(unsigned char) *s1) - ((int)(unsigned char) *s2));
+}
+
+int gumbo_ascii_strncasecmp(const char *s1, const char *s2, size_t n) {
+  int c1, c2;
+  while (n && *s1 && *s2) {
+    n -= 1;
+    c1 = (int)(unsigned char) gumbo_ascii_tolower(*s1);
+    c2 = (int)(unsigned char) gumbo_ascii_tolower(*s2);
+    if (c1 != c2) {
+      return (c1 - c2);
+    }
+    s1++;
+    s2++;
+  }
+  if (n) {
+    return (((int)(unsigned char) *s1) - ((int)(unsigned char) *s2));
+  }
+  return 0;
+}
diff --git a/gumbo-parser/src/ascii.h b/gumbo-parser/src/ascii.h
new file mode 100644
index 00000000..729c69ac
--- /dev/null
+++ b/gumbo-parser/src/ascii.h
@@ -0,0 +1,31 @@
+#ifndef GUMBO_ASCII_H_
+#define GUMBO_ASCII_H_
+
+#include <stddef.h>
+#include "macros.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define gumbo_ascii_isupper(c) (((unsigned)(c) - 'A') < 26)
+
+CONST_FN
+static inline int gumbo_ascii_tolower(int c) {
+  if (gumbo_ascii_isupper(c)) {
+    return c | 32;
+  }
+  return c;
+}
+
+PURE NONNULL_ARGS
+int gumbo_ascii_strcasecmp(const char *s1, const char *s2);
+
+PURE NONNULL_ARGS
+int gumbo_ascii_strncasecmp(const char *s1, const char *s2, size_t n);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // GUMBO_ASCII_H_
diff --git a/gumbo-parser/src/attribute.c b/gumbo-parser/src/attribute.c
index 234927a5..8967ee98 100644
--- a/gumbo-parser/src/attribute.c
+++ b/gumbo-parser/src/attribute.c
@@ -1,44 +1,42 @@
-// Copyright 2010 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// Author: jdtang@google.com (Jonathan Tang)
+/*
+ Copyright 2018 Craig Barnes.
+ Copyright 2010 Google Inc.
 
-#include "attribute.h"
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+    https://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
 
 #include <assert.h>
 #include <stdlib.h>
 #include <string.h>
-#include <strings.h>
-
+#include "attribute.h"
+#include "ascii.h"
 #include "util.h"
 
-struct GumboInternalParser;
-
-GumboAttribute* gumbo_get_attribute(
-    const GumboVector* attributes, const char* name) {
+GumboAttribute* gumbo_get_attribute (
+  const GumboVector* attributes,
+  const char* name
+) {
   for (unsigned int i = 0; i < attributes->length; ++i) {
     GumboAttribute* attr = attributes->data[i];
-    if (!strcasecmp(attr->name, name)) {
+    if (!gumbo_ascii_strcasecmp(attr->name, name)) {
       return attr;
     }
   }
   return NULL;
 }
 
-void gumbo_destroy_attribute(
-    struct GumboInternalParser* parser, GumboAttribute* attribute) {
-  gumbo_parser_deallocate(parser, (void*) attribute->name);
-  gumbo_parser_deallocate(parser, (void*) attribute->value);
-  gumbo_parser_deallocate(parser, (void*) attribute);
+void gumbo_destroy_attribute(GumboAttribute* attribute) {
+  gumbo_free((void*) attribute->name);
+  gumbo_free((void*) attribute->value);
+  gumbo_free((void*) attribute);
 }
diff --git a/gumbo-parser/src/attribute.h b/gumbo-parser/src/attribute.h
index f9b8aea5..3383bde6 100644
--- a/gumbo-parser/src/attribute.h
+++ b/gumbo-parser/src/attribute.h
@@ -1,19 +1,3 @@
-// Copyright 2010 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// Author: jdtang@google.com (Jonathan Tang)
-
 #ifndef GUMBO_ATTRIBUTE_H_
 #define GUMBO_ATTRIBUTE_H_
 
@@ -23,15 +7,11 @@
 extern "C" {
 #endif
 
-struct GumboInternalParser;
-
-// Release the memory used for an GumboAttribute, including the attribute
-// itself.
-void gumbo_destroy_attribute(
-    struct GumboInternalParser* parser, GumboAttribute* attribute);
+// Release the memory used for a GumboAttribute, including the attribute itself
+void gumbo_destroy_attribute(GumboAttribute* attribute);
 
 #ifdef __cplusplus
 }
 #endif
 
-#endif  // GUMBO_ATTRIBUTE_H_
+#endif // GUMBO_ATTRIBUTE_H_
diff --git a/gumbo-parser/src/char_ref.c b/gumbo-parser/src/char_ref.c
index a1d74fd5..d9e35214 100644
--- a/gumbo-parser/src/char_ref.c
+++ b/gumbo-parser/src/char_ref.c
@@ -1,68 +1,43 @@
+/*
+ Copyright 2017-2018 Craig Barnes.
+ Copyright 2011 Google Inc.
 
-#line 1 "char_ref.rl"
-// Copyright 2011 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// Author: jdtang@google.com (Jonathan Tang)
-//
-// This is a Ragel state machine re-implementation of the original char_ref.c,
-// rewritten to improve efficiency.  To generate the .c file from it,
-//
-// $ ragel -F0 char_ref.rl
-//
-// The generated source is also checked into source control so that most people
-// hacking on the parser do not need to install ragel.
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
 
-#include "char_ref.h"
+    https://www.apache.org/licenses/LICENSE-2.0
 
-#include <assert.h>
-#include <ctype.h>
-#include <stddef.h>
-#include <stdio.h>
-#include <string.h>  // Only for debug assertions at present.
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
 
+#include <assert.h>
+#include "char_ref.h"
 #include "error.h"
-#include "string_piece.h"
+#include "macros.h"
 #include "utf8.h"
-#include "util.h"
 
 struct GumboInternalParser;
 
 const int kGumboNoChar = -1;
 
-// Table of replacement characters.  The spec specifies that any occurrence of
-// the first character should be replaced by the second character, and a parse
-// error recorded.
-typedef struct {
-  int from_char;
-  int to_char;
-} CharReplacement;
-
-static const CharReplacement kCharReplacements[] = {{0x00, 0xfffd},
-    {0x0d, 0x000d}, {0x80, 0x20ac}, {0x81, 0x0081}, {0x82, 0x201A},
-    {0x83, 0x0192}, {0x84, 0x201E}, {0x85, 0x2026}, {0x86, 0x2020},
-    {0x87, 0x2021}, {0x88, 0x02C6}, {0x89, 0x2030}, {0x8A, 0x0160},
-    {0x8B, 0x2039}, {0x8C, 0x0152}, {0x8D, 0x008D}, {0x8E, 0x017D},
-    {0x8F, 0x008F}, {0x90, 0x0090}, {0x91, 0x2018}, {0x92, 0x2019},
-    {0x93, 0x201C}, {0x94, 0x201D}, {0x95, 0x2022}, {0x96, 0x2013},
-    {0x97, 0x2014}, {0x98, 0x02DC}, {0x99, 0x2122}, {0x9A, 0x0161},
-    {0x9B, 0x203A}, {0x9C, 0x0153}, {0x9D, 0x009D}, {0x9E, 0x017E},
-    {0x9F, 0x0178},
-    // Terminator.
-    {-1, -1}};
+static const uint32_t kCharReplacements[] = {
+  [0x00] = 0xFFFD, [0x0D] = 0x000D, [0x80] = 0x20AC, [0x81] = 0x0081,
+  [0x82] = 0x201A, [0x83] = 0x0192, [0x84] = 0x201E, [0x85] = 0x2026,
+  [0x86] = 0x2020, [0x87] = 0x2021, [0x88] = 0x02C6, [0x89] = 0x2030,
+  [0x8A] = 0x0160, [0x8B] = 0x2039, [0x8C] = 0x0152, [0x8D] = 0x008D,
+  [0x8E] = 0x017D, [0x8F] = 0x008F, [0x90] = 0x0090, [0x91] = 0x2018,
+  [0x92] = 0x2019, [0x93] = 0x201C, [0x94] = 0x201D, [0x95] = 0x2022,
+  [0x96] = 0x2013, [0x97] = 0x2014, [0x98] = 0x02DC, [0x99] = 0x2122,
+  [0x9A] = 0x0161, [0x9B] = 0x203A, [0x9C] = 0x0153, [0x9D] = 0x009D,
+  [0x9E] = 0x017E, [0x9F] = 0x0178
+};
 
-static int parse_digit(int c, bool allow_hex) {
+static int CONST_FN parse_digit(int c, bool allow_hex) {
   if (c >= '0' && c <= '9') {
     return c - '0';
   }
@@ -75,8 +50,10 @@ static int parse_digit(int c, bool allow_hex) {
   return -1;
 }
 
-static void add_no_digit_error(
-    struct GumboInternalParser* parser, Utf8Iterator* input) {
+static void add_no_digit_error (
+  struct GumboInternalParser* parser,
+  Utf8Iterator* input
+) {
   GumboError* error = gumbo_add_error(parser);
   if (!error) {
     return;
@@ -85,8 +62,12 @@ static void add_no_digit_error(
   error->type = GUMBO_ERR_NUMERIC_CHAR_REF_NO_DIGITS;
 }
 
-static void add_codepoint_error(struct GumboInternalParser* parser,
-    Utf8Iterator* input, GumboErrorType type, int codepoint) {
+static void add_codepoint_error (
+  struct GumboInternalParser* parser,
+  Utf8Iterator* input,
+  GumboErrorType type,
+  int codepoint
+) {
   GumboError* error = gumbo_add_error(parser);
   if (!error) {
     return;
@@ -96,8 +77,12 @@ static void add_codepoint_error(struct GumboInternalParser* parser,
   error->v.codepoint = codepoint;
 }
 
-static void add_named_reference_error(struct GumboInternalParser* parser,
-    Utf8Iterator* input, GumboErrorType type, GumboStringPiece text) {
+static void add_named_reference_error (
+  struct GumboInternalParser* parser,
+  Utf8Iterator* input,
+  GumboErrorType type,
+  GumboStringPiece text
+) {
   GumboError* error = gumbo_add_error(parser);
   if (!error) {
     return;
@@ -107,17 +92,15 @@ static void add_named_reference_error(struct GumboInternalParser* parser,
   error->v.text = text;
 }
 
-static int maybe_replace_codepoint(int codepoint) {
-  for (int i = 0; kCharReplacements[i].from_char != -1; ++i) {
-    if (kCharReplacements[i].from_char == codepoint) {
-      return kCharReplacements[i].to_char;
-    }
-  }
-  return -1;
+static uint32_t PURE maybe_replace_codepoint(uint32_t codepoint) {
+  return (codepoint > 0x9F) ? 0x00 : kCharReplacements[codepoint];
 }
 
-static bool consume_numeric_ref(
-    struct GumboInternalParser* parser, Utf8Iterator* input, int* output) {
+static bool consume_numeric_ref (
+  struct GumboInternalParser* parser,
+  Utf8Iterator* input,
+  int* output
+) {
   utf8iterator_next(input);
   bool is_hex = false;
   int c = utf8iterator_current(input);
@@ -136,7 +119,7 @@ static bool consume_numeric_ref(
     return false;
   }
 
-  int codepoint = 0;
+  uint32_t codepoint = 0;
   bool status = true;
   do {
     codepoint = (codepoint * (is_hex ? 16 : 10)) + digit;
@@ -145,31 +128,47 @@ static bool consume_numeric_ref(
   } while (digit != -1);
 
   if (utf8iterator_current(input) != ';') {
-    add_codepoint_error(
-        parser, input, GUMBO_ERR_NUMERIC_CHAR_REF_WITHOUT_SEMICOLON, codepoint);
+    add_codepoint_error (
+      parser,
+      input,
+      GUMBO_ERR_NUMERIC_CHAR_REF_WITHOUT_SEMICOLON,
+      codepoint
+    );
     status = false;
   } else {
     utf8iterator_next(input);
   }
 
-  int replacement = maybe_replace_codepoint(codepoint);
-  if (replacement != -1) {
-    add_codepoint_error(
-        parser, input, GUMBO_ERR_NUMERIC_CHAR_REF_INVALID, codepoint);
+  uint32_t replacement = maybe_replace_codepoint(codepoint);
+  if (replacement != 0) {
+    add_codepoint_error (
+      parser,
+      input,
+      GUMBO_ERR_NUMERIC_CHAR_REF_INVALID,
+      codepoint
+    );
     *output = replacement;
     return false;
   }
 
   if ((codepoint >= 0xd800 && codepoint <= 0xdfff) || codepoint > 0x10ffff) {
-    add_codepoint_error(
-        parser, input, GUMBO_ERR_NUMERIC_CHAR_REF_INVALID, codepoint);
+    add_codepoint_error (
+      parser,
+      input,
+      GUMBO_ERR_NUMERIC_CHAR_REF_INVALID,
+      codepoint
+    );
     *output = 0xfffd;
     return false;
   }
 
-  if (utf8_is_invalid_code_point(codepoint) || codepoint == 0xb) {
-    add_codepoint_error(
-        parser, input, GUMBO_ERR_NUMERIC_CHAR_REF_INVALID, codepoint);
+  if (utf8_is_invalid_code_point(codepoint)) {
+    add_codepoint_error (
+      parser,
+      input,
+      GUMBO_ERR_NUMERIC_CHAR_REF_INVALID,
+      codepoint
+    );
     status = false;
     // But return it anyway, per spec.
   }
@@ -177,14 +176,19 @@ static bool consume_numeric_ref(
   return status;
 }
 
-static bool maybe_add_invalid_named_reference(
-    struct GumboInternalParser* parser, Utf8Iterator* input) {
+static bool maybe_add_invalid_named_reference (
+  struct GumboInternalParser* parser,
+  Utf8Iterator* input
+) {
   // The iterator will always be reset in this code path, so we don't need to
   // worry about consuming characters.
   const char* start = utf8iterator_get_char_pointer(input);
   int c = utf8iterator_current(input);
-  while ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
-         (c >= '0' && c <= '9')) {
+  while (
+    (c >= 'a' && c <= 'z')
+    || (c >= 'A' && c <= 'Z')
+    || (c >= '0' && c <= '9')
+  ) {
     utf8iterator_next(input);
     c = utf8iterator_current(input);
   }
@@ -192,18 +196,21 @@ static bool maybe_add_invalid_named_reference(
     GumboStringPiece bad_ref;
     bad_ref.data = start;
     bad_ref.length = utf8iterator_get_char_pointer(input) - start;
-    add_named_reference_error(
-        parser, input, GUMBO_ERR_NAMED_CHAR_REF_INVALID, bad_ref);
+    add_named_reference_error (
+      parser,
+      input,
+      GUMBO_ERR_NAMED_CHAR_REF_INVALID,
+      bad_ref
+    );
     return false;
   }
   return true;
 }
 
-#line 2465 "char_ref.rl"
 
-// clang-format off
 
-#line 238 "char_ref.c"
+
+
 static const short _char_ref_actions[] = {
 	0, 1, 0, 1, 1, 1, 2, 1, 
 	3, 1, 4, 1, 5, 1, 6, 1, 
@@ -13934,11 +13941,29 @@ static const int char_ref_start = 7623;
 static const int char_ref_en_valid_named_ref = 7623;
 
 
-#line 2469 "char_ref.rl"
-// clang-format on
 
-static bool consume_named_ref(struct GumboInternalParser* parser,
-    Utf8Iterator* input, bool is_in_attribute, OneOrTwoCodepoints* output) {
+static const unsigned char ascii_alnum_table[256] = {
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //   0.. 15
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //  16.. 31
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //  32.. 47
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, //  48.. 63
+  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //  64.. 79
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, //  80.. 95
+  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //  96..111
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, // 112..127
+  // 128..255: implicitly zero
+};
+
+static inline bool PURE ascii_isalnum(unsigned char ch) {
+  return ascii_alnum_table[ch];
+}
+
+static bool consume_named_ref (
+  struct GumboInternalParser* parser,
+  Utf8Iterator* input,
+  bool is_in_attribute,
+  OneOrTwoCodepoints* output
+) {
   assert(output->first == kGumboNoChar);
   const char* p = utf8iterator_get_char_pointer(input);
   const char* pe = utf8iterator_get_end_pointer(input);
@@ -13947,9 +13972,7 @@ static bool consume_named_ref(struct GumboInternalParser* parser,
   const char *ts, *start;
   int cs, act;
 
-// clang-format off
   
-#line 13985 "char_ref.c"
 	{
 	cs = char_ref_start;
 	ts = 0;
@@ -13957,7 +13980,6 @@ static bool consume_named_ref(struct GumboInternalParser* parser,
 	act = 0;
 	}
 
-#line 2484 "char_ref.rl"
   // Avoid unused variable warnings.
   (void) act;
   (void) ts;
@@ -13965,7 +13987,6 @@ static bool consume_named_ref(struct GumboInternalParser* parser,
 
   start = p;
   
-#line 14001 "char_ref.c"
 	{
 	int _slen;
 	int _trans;
@@ -13984,10 +14005,8 @@ static bool consume_named_ref(struct GumboInternalParser* parser,
 	while ( _nacts-- > 0 ) {
 		switch ( *_acts++ ) {
 	case 1:
-#line 1 "NONE"
 	{ts = p;}
 	break;
-#line 14023 "char_ref.c"
 		}
 	}
 
@@ -14011,8966 +14030,6725 @@ static bool consume_named_ref(struct GumboInternalParser* parser,
 		switch ( *(_acts++) )
 		{
 	case 2:
-#line 1 "NONE"
 	{te = p+1;}
 	break;
 	case 3:
-#line 233 "char_ref.rl"
 	{te = p+1;{ output->first = 0xc6; {p++; goto _out; } }}
 	break;
 	case 4:
-#line 235 "char_ref.rl"
 	{te = p+1;{ output->first = 0x26; {p++; goto _out; } }}
 	break;
 	case 5:
-#line 237 "char_ref.rl"
 	{te = p+1;{ output->first = 0xc1; {p++; goto _out; } }}
 	break;
 	case 6:
-#line 239 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0102; {p++; goto _out; } }}
 	break;
 	case 7:
-#line 240 "char_ref.rl"
 	{te = p+1;{ output->first = 0xc2; {p++; goto _out; } }}
 	break;
 	case 8:
-#line 242 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0410; {p++; goto _out; } }}
 	break;
 	case 9:
-#line 243 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d504; {p++; goto _out; } }}
 	break;
 	case 10:
-#line 244 "char_ref.rl"
 	{te = p+1;{ output->first = 0xc0; {p++; goto _out; } }}
 	break;
 	case 11:
-#line 246 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0391; {p++; goto _out; } }}
 	break;
 	case 12:
-#line 247 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0100; {p++; goto _out; } }}
 	break;
 	case 13:
-#line 248 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a53; {p++; goto _out; } }}
 	break;
 	case 14:
-#line 249 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0104; {p++; goto _out; } }}
 	break;
 	case 15:
-#line 250 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d538; {p++; goto _out; } }}
 	break;
 	case 16:
-#line 251 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2061; {p++; goto _out; } }}
 	break;
 	case 17:
-#line 252 "char_ref.rl"
 	{te = p+1;{ output->first = 0xc5; {p++; goto _out; } }}
 	break;
 	case 18:
-#line 254 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d49c; {p++; goto _out; } }}
 	break;
 	case 19:
-#line 255 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2254; {p++; goto _out; } }}
 	break;
 	case 20:
-#line 256 "char_ref.rl"
 	{te = p+1;{ output->first = 0xc3; {p++; goto _out; } }}
 	break;
 	case 21:
-#line 258 "char_ref.rl"
 	{te = p+1;{ output->first = 0xc4; {p++; goto _out; } }}
 	break;
 	case 22:
-#line 260 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2216; {p++; goto _out; } }}
 	break;
 	case 23:
-#line 261 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ae7; {p++; goto _out; } }}
 	break;
 	case 24:
-#line 262 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2306; {p++; goto _out; } }}
 	break;
 	case 25:
-#line 263 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0411; {p++; goto _out; } }}
 	break;
 	case 26:
-#line 264 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2235; {p++; goto _out; } }}
 	break;
 	case 27:
-#line 265 "char_ref.rl"
 	{te = p+1;{ output->first = 0x212c; {p++; goto _out; } }}
 	break;
 	case 28:
-#line 266 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0392; {p++; goto _out; } }}
 	break;
 	case 29:
-#line 267 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d505; {p++; goto _out; } }}
 	break;
 	case 30:
-#line 268 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d539; {p++; goto _out; } }}
 	break;
 	case 31:
-#line 269 "char_ref.rl"
 	{te = p+1;{ output->first = 0x02d8; {p++; goto _out; } }}
 	break;
 	case 32:
-#line 270 "char_ref.rl"
 	{te = p+1;{ output->first = 0x212c; {p++; goto _out; } }}
 	break;
 	case 33:
-#line 271 "char_ref.rl"
 	{te = p+1;{ output->first = 0x224e; {p++; goto _out; } }}
 	break;
 	case 34:
-#line 272 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0427; {p++; goto _out; } }}
 	break;
 	case 35:
-#line 273 "char_ref.rl"
 	{te = p+1;{ output->first = 0xa9; {p++; goto _out; } }}
 	break;
 	case 36:
-#line 275 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0106; {p++; goto _out; } }}
 	break;
 	case 37:
-#line 276 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22d2; {p++; goto _out; } }}
 	break;
 	case 38:
-#line 277 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2145; {p++; goto _out; } }}
 	break;
 	case 39:
-#line 278 "char_ref.rl"
 	{te = p+1;{ output->first = 0x212d; {p++; goto _out; } }}
 	break;
 	case 40:
-#line 279 "char_ref.rl"
 	{te = p+1;{ output->first = 0x010c; {p++; goto _out; } }}
 	break;
 	case 41:
-#line 280 "char_ref.rl"
 	{te = p+1;{ output->first = 0xc7; {p++; goto _out; } }}
 	break;
 	case 42:
-#line 282 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0108; {p++; goto _out; } }}
 	break;
 	case 43:
-#line 283 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2230; {p++; goto _out; } }}
 	break;
 	case 44:
-#line 284 "char_ref.rl"
 	{te = p+1;{ output->first = 0x010a; {p++; goto _out; } }}
 	break;
 	case 45:
-#line 285 "char_ref.rl"
 	{te = p+1;{ output->first = 0xb8; {p++; goto _out; } }}
 	break;
 	case 46:
-#line 286 "char_ref.rl"
 	{te = p+1;{ output->first = 0xb7; {p++; goto _out; } }}
 	break;
 	case 47:
-#line 287 "char_ref.rl"
 	{te = p+1;{ output->first = 0x212d; {p++; goto _out; } }}
 	break;
 	case 48:
-#line 288 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03a7; {p++; goto _out; } }}
 	break;
 	case 49:
-#line 289 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2299; {p++; goto _out; } }}
 	break;
 	case 50:
-#line 290 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2296; {p++; goto _out; } }}
 	break;
 	case 51:
-#line 291 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2295; {p++; goto _out; } }}
 	break;
 	case 52:
-#line 292 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2297; {p++; goto _out; } }}
 	break;
 	case 53:
-#line 293 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2232; {p++; goto _out; } }}
 	break;
 	case 54:
-#line 294 "char_ref.rl"
 	{te = p+1;{ output->first = 0x201d; {p++; goto _out; } }}
 	break;
 	case 55:
-#line 295 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2019; {p++; goto _out; } }}
 	break;
 	case 56:
-#line 296 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2237; {p++; goto _out; } }}
 	break;
 	case 57:
-#line 297 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a74; {p++; goto _out; } }}
 	break;
 	case 58:
-#line 298 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2261; {p++; goto _out; } }}
 	break;
 	case 59:
-#line 299 "char_ref.rl"
 	{te = p+1;{ output->first = 0x222f; {p++; goto _out; } }}
 	break;
 	case 60:
-#line 300 "char_ref.rl"
 	{te = p+1;{ output->first = 0x222e; {p++; goto _out; } }}
 	break;
 	case 61:
-#line 301 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2102; {p++; goto _out; } }}
 	break;
 	case 62:
-#line 302 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2210; {p++; goto _out; } }}
 	break;
 	case 63:
-#line 303 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2233; {p++; goto _out; } }}
 	break;
 	case 64:
-#line 304 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a2f; {p++; goto _out; } }}
 	break;
 	case 65:
-#line 305 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d49e; {p++; goto _out; } }}
 	break;
 	case 66:
-#line 306 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22d3; {p++; goto _out; } }}
 	break;
 	case 67:
-#line 307 "char_ref.rl"
 	{te = p+1;{ output->first = 0x224d; {p++; goto _out; } }}
 	break;
 	case 68:
-#line 308 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2145; {p++; goto _out; } }}
 	break;
 	case 69:
-#line 309 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2911; {p++; goto _out; } }}
 	break;
 	case 70:
-#line 310 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0402; {p++; goto _out; } }}
 	break;
 	case 71:
-#line 311 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0405; {p++; goto _out; } }}
 	break;
 	case 72:
-#line 312 "char_ref.rl"
 	{te = p+1;{ output->first = 0x040f; {p++; goto _out; } }}
 	break;
 	case 73:
-#line 313 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2021; {p++; goto _out; } }}
 	break;
 	case 74:
-#line 314 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21a1; {p++; goto _out; } }}
 	break;
 	case 75:
-#line 315 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ae4; {p++; goto _out; } }}
 	break;
 	case 76:
-#line 316 "char_ref.rl"
 	{te = p+1;{ output->first = 0x010e; {p++; goto _out; } }}
 	break;
 	case 77:
-#line 317 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0414; {p++; goto _out; } }}
 	break;
 	case 78:
-#line 318 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2207; {p++; goto _out; } }}
 	break;
 	case 79:
-#line 319 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0394; {p++; goto _out; } }}
 	break;
 	case 80:
-#line 320 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d507; {p++; goto _out; } }}
 	break;
 	case 81:
-#line 321 "char_ref.rl"
 	{te = p+1;{ output->first = 0xb4; {p++; goto _out; } }}
 	break;
 	case 82:
-#line 322 "char_ref.rl"
 	{te = p+1;{ output->first = 0x02d9; {p++; goto _out; } }}
 	break;
 	case 83:
-#line 323 "char_ref.rl"
 	{te = p+1;{ output->first = 0x02dd; {p++; goto _out; } }}
 	break;
 	case 84:
-#line 324 "char_ref.rl"
 	{te = p+1;{ output->first = 0x60; {p++; goto _out; } }}
 	break;
 	case 85:
-#line 325 "char_ref.rl"
 	{te = p+1;{ output->first = 0x02dc; {p++; goto _out; } }}
 	break;
 	case 86:
-#line 326 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22c4; {p++; goto _out; } }}
 	break;
 	case 87:
-#line 327 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2146; {p++; goto _out; } }}
 	break;
 	case 88:
-#line 328 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d53b; {p++; goto _out; } }}
 	break;
 	case 89:
-#line 329 "char_ref.rl"
 	{te = p+1;{ output->first = 0xa8; {p++; goto _out; } }}
 	break;
 	case 90:
-#line 330 "char_ref.rl"
 	{te = p+1;{ output->first = 0x20dc; {p++; goto _out; } }}
 	break;
 	case 91:
-#line 331 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2250; {p++; goto _out; } }}
 	break;
 	case 92:
-#line 332 "char_ref.rl"
 	{te = p+1;{ output->first = 0x222f; {p++; goto _out; } }}
 	break;
 	case 93:
-#line 333 "char_ref.rl"
 	{te = p+1;{ output->first = 0xa8; {p++; goto _out; } }}
 	break;
 	case 94:
-#line 334 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21d3; {p++; goto _out; } }}
 	break;
 	case 95:
-#line 335 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21d0; {p++; goto _out; } }}
 	break;
 	case 96:
-#line 336 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21d4; {p++; goto _out; } }}
 	break;
 	case 97:
-#line 337 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ae4; {p++; goto _out; } }}
 	break;
 	case 98:
-#line 338 "char_ref.rl"
 	{te = p+1;{ output->first = 0x27f8; {p++; goto _out; } }}
 	break;
 	case 99:
-#line 339 "char_ref.rl"
 	{te = p+1;{ output->first = 0x27fa; {p++; goto _out; } }}
 	break;
 	case 100:
-#line 340 "char_ref.rl"
 	{te = p+1;{ output->first = 0x27f9; {p++; goto _out; } }}
 	break;
 	case 101:
-#line 341 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21d2; {p++; goto _out; } }}
 	break;
 	case 102:
-#line 342 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22a8; {p++; goto _out; } }}
 	break;
 	case 103:
-#line 343 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21d1; {p++; goto _out; } }}
 	break;
 	case 104:
-#line 344 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21d5; {p++; goto _out; } }}
 	break;
 	case 105:
-#line 345 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2225; {p++; goto _out; } }}
 	break;
 	case 106:
-#line 346 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2193; {p++; goto _out; } }}
 	break;
 	case 107:
-#line 347 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2913; {p++; goto _out; } }}
 	break;
 	case 108:
-#line 348 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21f5; {p++; goto _out; } }}
 	break;
 	case 109:
-#line 349 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0311; {p++; goto _out; } }}
 	break;
 	case 110:
-#line 350 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2950; {p++; goto _out; } }}
 	break;
 	case 111:
-#line 351 "char_ref.rl"
 	{te = p+1;{ output->first = 0x295e; {p++; goto _out; } }}
 	break;
 	case 112:
-#line 352 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21bd; {p++; goto _out; } }}
 	break;
 	case 113:
-#line 353 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2956; {p++; goto _out; } }}
 	break;
 	case 114:
-#line 354 "char_ref.rl"
 	{te = p+1;{ output->first = 0x295f; {p++; goto _out; } }}
 	break;
 	case 115:
-#line 355 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21c1; {p++; goto _out; } }}
 	break;
 	case 116:
-#line 356 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2957; {p++; goto _out; } }}
 	break;
 	case 117:
-#line 357 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22a4; {p++; goto _out; } }}
 	break;
 	case 118:
-#line 358 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21a7; {p++; goto _out; } }}
 	break;
 	case 119:
-#line 359 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21d3; {p++; goto _out; } }}
 	break;
 	case 120:
-#line 360 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d49f; {p++; goto _out; } }}
 	break;
 	case 121:
-#line 361 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0110; {p++; goto _out; } }}
 	break;
 	case 122:
-#line 362 "char_ref.rl"
 	{te = p+1;{ output->first = 0x014a; {p++; goto _out; } }}
 	break;
 	case 123:
-#line 363 "char_ref.rl"
 	{te = p+1;{ output->first = 0xd0; {p++; goto _out; } }}
 	break;
 	case 124:
-#line 365 "char_ref.rl"
 	{te = p+1;{ output->first = 0xc9; {p++; goto _out; } }}
 	break;
 	case 125:
-#line 367 "char_ref.rl"
 	{te = p+1;{ output->first = 0x011a; {p++; goto _out; } }}
 	break;
 	case 126:
-#line 368 "char_ref.rl"
 	{te = p+1;{ output->first = 0xca; {p++; goto _out; } }}
 	break;
 	case 127:
-#line 370 "char_ref.rl"
 	{te = p+1;{ output->first = 0x042d; {p++; goto _out; } }}
 	break;
 	case 128:
-#line 371 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0116; {p++; goto _out; } }}
 	break;
 	case 129:
-#line 372 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d508; {p++; goto _out; } }}
 	break;
 	case 130:
-#line 373 "char_ref.rl"
 	{te = p+1;{ output->first = 0xc8; {p++; goto _out; } }}
 	break;
 	case 131:
-#line 375 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2208; {p++; goto _out; } }}
 	break;
 	case 132:
-#line 376 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0112; {p++; goto _out; } }}
 	break;
 	case 133:
-#line 377 "char_ref.rl"
 	{te = p+1;{ output->first = 0x25fb; {p++; goto _out; } }}
 	break;
 	case 134:
-#line 378 "char_ref.rl"
 	{te = p+1;{ output->first = 0x25ab; {p++; goto _out; } }}
 	break;
 	case 135:
-#line 379 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0118; {p++; goto _out; } }}
 	break;
 	case 136:
-#line 380 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d53c; {p++; goto _out; } }}
 	break;
 	case 137:
-#line 381 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0395; {p++; goto _out; } }}
 	break;
 	case 138:
-#line 382 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a75; {p++; goto _out; } }}
 	break;
 	case 139:
-#line 383 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2242; {p++; goto _out; } }}
 	break;
 	case 140:
-#line 384 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21cc; {p++; goto _out; } }}
 	break;
 	case 141:
-#line 385 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2130; {p++; goto _out; } }}
 	break;
 	case 142:
-#line 386 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a73; {p++; goto _out; } }}
 	break;
 	case 143:
-#line 387 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0397; {p++; goto _out; } }}
 	break;
 	case 144:
-#line 388 "char_ref.rl"
 	{te = p+1;{ output->first = 0xcb; {p++; goto _out; } }}
 	break;
 	case 145:
-#line 390 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2203; {p++; goto _out; } }}
 	break;
 	case 146:
-#line 391 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2147; {p++; goto _out; } }}
 	break;
 	case 147:
-#line 392 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0424; {p++; goto _out; } }}
 	break;
 	case 148:
-#line 393 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d509; {p++; goto _out; } }}
 	break;
 	case 149:
-#line 394 "char_ref.rl"
 	{te = p+1;{ output->first = 0x25fc; {p++; goto _out; } }}
 	break;
 	case 150:
-#line 395 "char_ref.rl"
 	{te = p+1;{ output->first = 0x25aa; {p++; goto _out; } }}
 	break;
 	case 151:
-#line 396 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d53d; {p++; goto _out; } }}
 	break;
 	case 152:
-#line 397 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2200; {p++; goto _out; } }}
 	break;
 	case 153:
-#line 398 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2131; {p++; goto _out; } }}
 	break;
 	case 154:
-#line 399 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2131; {p++; goto _out; } }}
 	break;
 	case 155:
-#line 400 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0403; {p++; goto _out; } }}
 	break;
 	case 156:
-#line 401 "char_ref.rl"
 	{te = p+1;{ output->first = 0x3e; {p++; goto _out; } }}
 	break;
 	case 157:
-#line 403 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0393; {p++; goto _out; } }}
 	break;
 	case 158:
-#line 404 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03dc; {p++; goto _out; } }}
 	break;
 	case 159:
-#line 405 "char_ref.rl"
 	{te = p+1;{ output->first = 0x011e; {p++; goto _out; } }}
 	break;
 	case 160:
-#line 406 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0122; {p++; goto _out; } }}
 	break;
 	case 161:
-#line 407 "char_ref.rl"
 	{te = p+1;{ output->first = 0x011c; {p++; goto _out; } }}
 	break;
 	case 162:
-#line 408 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0413; {p++; goto _out; } }}
 	break;
 	case 163:
-#line 409 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0120; {p++; goto _out; } }}
 	break;
 	case 164:
-#line 410 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d50a; {p++; goto _out; } }}
 	break;
 	case 165:
-#line 411 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22d9; {p++; goto _out; } }}
 	break;
 	case 166:
-#line 412 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d53e; {p++; goto _out; } }}
 	break;
 	case 167:
-#line 413 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2265; {p++; goto _out; } }}
 	break;
 	case 168:
-#line 414 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22db; {p++; goto _out; } }}
 	break;
 	case 169:
-#line 415 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2267; {p++; goto _out; } }}
 	break;
 	case 170:
-#line 416 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2aa2; {p++; goto _out; } }}
 	break;
 	case 171:
-#line 417 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2277; {p++; goto _out; } }}
 	break;
 	case 172:
-#line 418 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a7e; {p++; goto _out; } }}
 	break;
 	case 173:
-#line 419 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2273; {p++; goto _out; } }}
 	break;
 	case 174:
-#line 420 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d4a2; {p++; goto _out; } }}
 	break;
 	case 175:
-#line 421 "char_ref.rl"
 	{te = p+1;{ output->first = 0x226b; {p++; goto _out; } }}
 	break;
 	case 176:
-#line 422 "char_ref.rl"
 	{te = p+1;{ output->first = 0x042a; {p++; goto _out; } }}
 	break;
 	case 177:
-#line 423 "char_ref.rl"
 	{te = p+1;{ output->first = 0x02c7; {p++; goto _out; } }}
 	break;
 	case 178:
-#line 424 "char_ref.rl"
 	{te = p+1;{ output->first = 0x5e; {p++; goto _out; } }}
 	break;
 	case 179:
-#line 425 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0124; {p++; goto _out; } }}
 	break;
 	case 180:
-#line 426 "char_ref.rl"
 	{te = p+1;{ output->first = 0x210c; {p++; goto _out; } }}
 	break;
 	case 181:
-#line 427 "char_ref.rl"
 	{te = p+1;{ output->first = 0x210b; {p++; goto _out; } }}
 	break;
 	case 182:
-#line 428 "char_ref.rl"
 	{te = p+1;{ output->first = 0x210d; {p++; goto _out; } }}
 	break;
 	case 183:
-#line 429 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2500; {p++; goto _out; } }}
 	break;
 	case 184:
-#line 430 "char_ref.rl"
 	{te = p+1;{ output->first = 0x210b; {p++; goto _out; } }}
 	break;
 	case 185:
-#line 431 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0126; {p++; goto _out; } }}
 	break;
 	case 186:
-#line 432 "char_ref.rl"
 	{te = p+1;{ output->first = 0x224e; {p++; goto _out; } }}
 	break;
 	case 187:
-#line 433 "char_ref.rl"
 	{te = p+1;{ output->first = 0x224f; {p++; goto _out; } }}
 	break;
 	case 188:
-#line 434 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0415; {p++; goto _out; } }}
 	break;
 	case 189:
-#line 435 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0132; {p++; goto _out; } }}
 	break;
 	case 190:
-#line 436 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0401; {p++; goto _out; } }}
 	break;
 	case 191:
-#line 437 "char_ref.rl"
 	{te = p+1;{ output->first = 0xcd; {p++; goto _out; } }}
 	break;
 	case 192:
-#line 439 "char_ref.rl"
 	{te = p+1;{ output->first = 0xce; {p++; goto _out; } }}
 	break;
 	case 193:
-#line 441 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0418; {p++; goto _out; } }}
 	break;
 	case 194:
-#line 442 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0130; {p++; goto _out; } }}
 	break;
 	case 195:
-#line 443 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2111; {p++; goto _out; } }}
 	break;
 	case 196:
-#line 444 "char_ref.rl"
 	{te = p+1;{ output->first = 0xcc; {p++; goto _out; } }}
 	break;
 	case 197:
-#line 446 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2111; {p++; goto _out; } }}
 	break;
 	case 198:
-#line 447 "char_ref.rl"
 	{te = p+1;{ output->first = 0x012a; {p++; goto _out; } }}
 	break;
 	case 199:
-#line 448 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2148; {p++; goto _out; } }}
 	break;
 	case 200:
-#line 449 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21d2; {p++; goto _out; } }}
 	break;
 	case 201:
-#line 450 "char_ref.rl"
 	{te = p+1;{ output->first = 0x222c; {p++; goto _out; } }}
 	break;
 	case 202:
-#line 451 "char_ref.rl"
 	{te = p+1;{ output->first = 0x222b; {p++; goto _out; } }}
 	break;
 	case 203:
-#line 452 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22c2; {p++; goto _out; } }}
 	break;
 	case 204:
-#line 453 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2063; {p++; goto _out; } }}
 	break;
 	case 205:
-#line 454 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2062; {p++; goto _out; } }}
 	break;
 	case 206:
-#line 455 "char_ref.rl"
 	{te = p+1;{ output->first = 0x012e; {p++; goto _out; } }}
 	break;
 	case 207:
-#line 456 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d540; {p++; goto _out; } }}
 	break;
 	case 208:
-#line 457 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0399; {p++; goto _out; } }}
 	break;
 	case 209:
-#line 458 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2110; {p++; goto _out; } }}
 	break;
 	case 210:
-#line 459 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0128; {p++; goto _out; } }}
 	break;
 	case 211:
-#line 460 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0406; {p++; goto _out; } }}
 	break;
 	case 212:
-#line 461 "char_ref.rl"
 	{te = p+1;{ output->first = 0xcf; {p++; goto _out; } }}
 	break;
 	case 213:
-#line 463 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0134; {p++; goto _out; } }}
 	break;
 	case 214:
-#line 464 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0419; {p++; goto _out; } }}
 	break;
 	case 215:
-#line 465 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d50d; {p++; goto _out; } }}
 	break;
 	case 216:
-#line 466 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d541; {p++; goto _out; } }}
 	break;
 	case 217:
-#line 467 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d4a5; {p++; goto _out; } }}
 	break;
 	case 218:
-#line 468 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0408; {p++; goto _out; } }}
 	break;
 	case 219:
-#line 469 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0404; {p++; goto _out; } }}
 	break;
 	case 220:
-#line 470 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0425; {p++; goto _out; } }}
 	break;
 	case 221:
-#line 471 "char_ref.rl"
 	{te = p+1;{ output->first = 0x040c; {p++; goto _out; } }}
 	break;
 	case 222:
-#line 472 "char_ref.rl"
 	{te = p+1;{ output->first = 0x039a; {p++; goto _out; } }}
 	break;
 	case 223:
-#line 473 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0136; {p++; goto _out; } }}
 	break;
 	case 224:
-#line 474 "char_ref.rl"
 	{te = p+1;{ output->first = 0x041a; {p++; goto _out; } }}
 	break;
 	case 225:
-#line 475 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d50e; {p++; goto _out; } }}
 	break;
 	case 226:
-#line 476 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d542; {p++; goto _out; } }}
 	break;
 	case 227:
-#line 477 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d4a6; {p++; goto _out; } }}
 	break;
 	case 228:
-#line 478 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0409; {p++; goto _out; } }}
 	break;
 	case 229:
-#line 479 "char_ref.rl"
 	{te = p+1;{ output->first = 0x3c; {p++; goto _out; } }}
 	break;
 	case 230:
-#line 481 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0139; {p++; goto _out; } }}
 	break;
 	case 231:
-#line 482 "char_ref.rl"
 	{te = p+1;{ output->first = 0x039b; {p++; goto _out; } }}
 	break;
 	case 232:
-#line 483 "char_ref.rl"
 	{te = p+1;{ output->first = 0x27ea; {p++; goto _out; } }}
 	break;
 	case 233:
-#line 484 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2112; {p++; goto _out; } }}
 	break;
 	case 234:
-#line 485 "char_ref.rl"
 	{te = p+1;{ output->first = 0x219e; {p++; goto _out; } }}
 	break;
 	case 235:
-#line 486 "char_ref.rl"
 	{te = p+1;{ output->first = 0x013d; {p++; goto _out; } }}
 	break;
 	case 236:
-#line 487 "char_ref.rl"
 	{te = p+1;{ output->first = 0x013b; {p++; goto _out; } }}
 	break;
 	case 237:
-#line 488 "char_ref.rl"
 	{te = p+1;{ output->first = 0x041b; {p++; goto _out; } }}
 	break;
 	case 238:
-#line 489 "char_ref.rl"
 	{te = p+1;{ output->first = 0x27e8; {p++; goto _out; } }}
 	break;
 	case 239:
-#line 490 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2190; {p++; goto _out; } }}
 	break;
 	case 240:
-#line 491 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21e4; {p++; goto _out; } }}
 	break;
 	case 241:
-#line 492 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21c6; {p++; goto _out; } }}
 	break;
 	case 242:
-#line 493 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2308; {p++; goto _out; } }}
 	break;
 	case 243:
-#line 494 "char_ref.rl"
 	{te = p+1;{ output->first = 0x27e6; {p++; goto _out; } }}
 	break;
 	case 244:
-#line 495 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2961; {p++; goto _out; } }}
 	break;
 	case 245:
-#line 496 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21c3; {p++; goto _out; } }}
 	break;
 	case 246:
-#line 497 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2959; {p++; goto _out; } }}
 	break;
 	case 247:
-#line 498 "char_ref.rl"
 	{te = p+1;{ output->first = 0x230a; {p++; goto _out; } }}
 	break;
 	case 248:
-#line 499 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2194; {p++; goto _out; } }}
 	break;
 	case 249:
-#line 500 "char_ref.rl"
 	{te = p+1;{ output->first = 0x294e; {p++; goto _out; } }}
 	break;
 	case 250:
-#line 501 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22a3; {p++; goto _out; } }}
 	break;
 	case 251:
-#line 502 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21a4; {p++; goto _out; } }}
 	break;
 	case 252:
-#line 503 "char_ref.rl"
 	{te = p+1;{ output->first = 0x295a; {p++; goto _out; } }}
 	break;
 	case 253:
-#line 504 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22b2; {p++; goto _out; } }}
 	break;
 	case 254:
-#line 505 "char_ref.rl"
 	{te = p+1;{ output->first = 0x29cf; {p++; goto _out; } }}
 	break;
 	case 255:
-#line 506 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22b4; {p++; goto _out; } }}
 	break;
 	case 256:
-#line 507 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2951; {p++; goto _out; } }}
 	break;
 	case 257:
-#line 508 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2960; {p++; goto _out; } }}
 	break;
 	case 258:
-#line 509 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21bf; {p++; goto _out; } }}
 	break;
 	case 259:
-#line 510 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2958; {p++; goto _out; } }}
 	break;
 	case 260:
-#line 511 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21bc; {p++; goto _out; } }}
 	break;
 	case 261:
-#line 512 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2952; {p++; goto _out; } }}
 	break;
 	case 262:
-#line 513 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21d0; {p++; goto _out; } }}
 	break;
 	case 263:
-#line 514 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21d4; {p++; goto _out; } }}
 	break;
 	case 264:
-#line 515 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22da; {p++; goto _out; } }}
 	break;
 	case 265:
-#line 516 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2266; {p++; goto _out; } }}
 	break;
 	case 266:
-#line 517 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2276; {p++; goto _out; } }}
 	break;
 	case 267:
-#line 518 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2aa1; {p++; goto _out; } }}
 	break;
 	case 268:
-#line 519 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a7d; {p++; goto _out; } }}
 	break;
 	case 269:
-#line 520 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2272; {p++; goto _out; } }}
 	break;
 	case 270:
-#line 521 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d50f; {p++; goto _out; } }}
 	break;
 	case 271:
-#line 522 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22d8; {p++; goto _out; } }}
 	break;
 	case 272:
-#line 523 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21da; {p++; goto _out; } }}
 	break;
 	case 273:
-#line 524 "char_ref.rl"
 	{te = p+1;{ output->first = 0x013f; {p++; goto _out; } }}
 	break;
 	case 274:
-#line 525 "char_ref.rl"
 	{te = p+1;{ output->first = 0x27f5; {p++; goto _out; } }}
 	break;
 	case 275:
-#line 526 "char_ref.rl"
 	{te = p+1;{ output->first = 0x27f7; {p++; goto _out; } }}
 	break;
 	case 276:
-#line 527 "char_ref.rl"
 	{te = p+1;{ output->first = 0x27f6; {p++; goto _out; } }}
 	break;
 	case 277:
-#line 528 "char_ref.rl"
 	{te = p+1;{ output->first = 0x27f8; {p++; goto _out; } }}
 	break;
 	case 278:
-#line 529 "char_ref.rl"
 	{te = p+1;{ output->first = 0x27fa; {p++; goto _out; } }}
 	break;
 	case 279:
-#line 530 "char_ref.rl"
 	{te = p+1;{ output->first = 0x27f9; {p++; goto _out; } }}
 	break;
 	case 280:
-#line 531 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d543; {p++; goto _out; } }}
 	break;
 	case 281:
-#line 532 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2199; {p++; goto _out; } }}
 	break;
 	case 282:
-#line 533 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2198; {p++; goto _out; } }}
 	break;
 	case 283:
-#line 534 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2112; {p++; goto _out; } }}
 	break;
 	case 284:
-#line 535 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21b0; {p++; goto _out; } }}
 	break;
 	case 285:
-#line 536 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0141; {p++; goto _out; } }}
 	break;
 	case 286:
-#line 537 "char_ref.rl"
 	{te = p+1;{ output->first = 0x226a; {p++; goto _out; } }}
 	break;
 	case 287:
-#line 538 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2905; {p++; goto _out; } }}
 	break;
 	case 288:
-#line 539 "char_ref.rl"
 	{te = p+1;{ output->first = 0x041c; {p++; goto _out; } }}
 	break;
 	case 289:
-#line 540 "char_ref.rl"
 	{te = p+1;{ output->first = 0x205f; {p++; goto _out; } }}
 	break;
 	case 290:
-#line 541 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2133; {p++; goto _out; } }}
 	break;
 	case 291:
-#line 542 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d510; {p++; goto _out; } }}
 	break;
 	case 292:
-#line 543 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2213; {p++; goto _out; } }}
 	break;
 	case 293:
-#line 544 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d544; {p++; goto _out; } }}
 	break;
 	case 294:
-#line 545 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2133; {p++; goto _out; } }}
 	break;
 	case 295:
-#line 546 "char_ref.rl"
 	{te = p+1;{ output->first = 0x039c; {p++; goto _out; } }}
 	break;
 	case 296:
-#line 547 "char_ref.rl"
 	{te = p+1;{ output->first = 0x040a; {p++; goto _out; } }}
 	break;
 	case 297:
-#line 548 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0143; {p++; goto _out; } }}
 	break;
 	case 298:
-#line 549 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0147; {p++; goto _out; } }}
 	break;
 	case 299:
-#line 550 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0145; {p++; goto _out; } }}
 	break;
 	case 300:
-#line 551 "char_ref.rl"
 	{te = p+1;{ output->first = 0x041d; {p++; goto _out; } }}
 	break;
 	case 301:
-#line 552 "char_ref.rl"
 	{te = p+1;{ output->first = 0x200b; {p++; goto _out; } }}
 	break;
 	case 302:
-#line 553 "char_ref.rl"
 	{te = p+1;{ output->first = 0x200b; {p++; goto _out; } }}
 	break;
 	case 303:
-#line 554 "char_ref.rl"
 	{te = p+1;{ output->first = 0x200b; {p++; goto _out; } }}
 	break;
 	case 304:
-#line 555 "char_ref.rl"
 	{te = p+1;{ output->first = 0x200b; {p++; goto _out; } }}
 	break;
 	case 305:
-#line 556 "char_ref.rl"
 	{te = p+1;{ output->first = 0x226b; {p++; goto _out; } }}
 	break;
 	case 306:
-#line 557 "char_ref.rl"
 	{te = p+1;{ output->first = 0x226a; {p++; goto _out; } }}
 	break;
 	case 307:
-#line 558 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0a; {p++; goto _out; } }}
 	break;
 	case 308:
-#line 559 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d511; {p++; goto _out; } }}
 	break;
 	case 309:
-#line 560 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2060; {p++; goto _out; } }}
 	break;
 	case 310:
-#line 561 "char_ref.rl"
 	{te = p+1;{ output->first = 0xa0; {p++; goto _out; } }}
 	break;
 	case 311:
-#line 562 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2115; {p++; goto _out; } }}
 	break;
 	case 312:
-#line 563 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2aec; {p++; goto _out; } }}
 	break;
 	case 313:
-#line 564 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2262; {p++; goto _out; } }}
 	break;
 	case 314:
-#line 565 "char_ref.rl"
 	{te = p+1;{ output->first = 0x226d; {p++; goto _out; } }}
 	break;
 	case 315:
-#line 566 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2226; {p++; goto _out; } }}
 	break;
 	case 316:
-#line 567 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2209; {p++; goto _out; } }}
 	break;
 	case 317:
-#line 568 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2260; {p++; goto _out; } }}
 	break;
 	case 318:
-#line 569 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2242; output->second = 0x0338; {p++; goto _out; } }}
 	break;
 	case 319:
-#line 570 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2204; {p++; goto _out; } }}
 	break;
 	case 320:
-#line 571 "char_ref.rl"
 	{te = p+1;{ output->first = 0x226f; {p++; goto _out; } }}
 	break;
 	case 321:
-#line 572 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2271; {p++; goto _out; } }}
 	break;
 	case 322:
-#line 573 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2267; output->second = 0x0338; {p++; goto _out; } }}
 	break;
 	case 323:
-#line 574 "char_ref.rl"
 	{te = p+1;{ output->first = 0x226b; output->second = 0x0338; {p++; goto _out; } }}
 	break;
 	case 324:
-#line 575 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2279; {p++; goto _out; } }}
 	break;
 	case 325:
-#line 576 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a7e; output->second = 0x0338; {p++; goto _out; } }}
 	break;
 	case 326:
-#line 577 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2275; {p++; goto _out; } }}
 	break;
 	case 327:
-#line 578 "char_ref.rl"
 	{te = p+1;{ output->first = 0x224e; output->second = 0x0338; {p++; goto _out; } }}
 	break;
 	case 328:
-#line 579 "char_ref.rl"
 	{te = p+1;{ output->first = 0x224f; output->second = 0x0338; {p++; goto _out; } }}
 	break;
 	case 329:
-#line 580 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22ea; {p++; goto _out; } }}
 	break;
 	case 330:
-#line 581 "char_ref.rl"
 	{te = p+1;{ output->first = 0x29cf; output->second = 0x0338; {p++; goto _out; } }}
 	break;
 	case 331:
-#line 582 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22ec; {p++; goto _out; } }}
 	break;
 	case 332:
-#line 583 "char_ref.rl"
 	{te = p+1;{ output->first = 0x226e; {p++; goto _out; } }}
 	break;
 	case 333:
-#line 584 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2270; {p++; goto _out; } }}
 	break;
 	case 334:
-#line 585 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2278; {p++; goto _out; } }}
 	break;
 	case 335:
-#line 586 "char_ref.rl"
 	{te = p+1;{ output->first = 0x226a; output->second = 0x0338; {p++; goto _out; } }}
 	break;
 	case 336:
-#line 587 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a7d; output->second = 0x0338; {p++; goto _out; } }}
 	break;
 	case 337:
-#line 588 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2274; {p++; goto _out; } }}
 	break;
 	case 338:
-#line 589 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2aa2; output->second = 0x0338; {p++; goto _out; } }}
 	break;
 	case 339:
-#line 590 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2aa1; output->second = 0x0338; {p++; goto _out; } }}
 	break;
 	case 340:
-#line 591 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2280; {p++; goto _out; } }}
 	break;
 	case 341:
-#line 592 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2aaf; output->second = 0x0338; {p++; goto _out; } }}
 	break;
 	case 342:
-#line 593 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22e0; {p++; goto _out; } }}
 	break;
 	case 343:
-#line 594 "char_ref.rl"
 	{te = p+1;{ output->first = 0x220c; {p++; goto _out; } }}
 	break;
 	case 344:
-#line 595 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22eb; {p++; goto _out; } }}
 	break;
 	case 345:
-#line 596 "char_ref.rl"
 	{te = p+1;{ output->first = 0x29d0; output->second = 0x0338; {p++; goto _out; } }}
 	break;
 	case 346:
-#line 597 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22ed; {p++; goto _out; } }}
 	break;
 	case 347:
-#line 598 "char_ref.rl"
 	{te = p+1;{ output->first = 0x228f; output->second = 0x0338; {p++; goto _out; } }}
 	break;
 	case 348:
-#line 599 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22e2; {p++; goto _out; } }}
 	break;
 	case 349:
-#line 600 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2290; output->second = 0x0338; {p++; goto _out; } }}
 	break;
 	case 350:
-#line 601 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22e3; {p++; goto _out; } }}
 	break;
 	case 351:
-#line 602 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2282; output->second = 0x20d2; {p++; goto _out; } }}
 	break;
 	case 352:
-#line 603 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2288; {p++; goto _out; } }}
 	break;
 	case 353:
-#line 604 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2281; {p++; goto _out; } }}
 	break;
 	case 354:
-#line 605 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ab0; output->second = 0x0338; {p++; goto _out; } }}
 	break;
 	case 355:
-#line 606 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22e1; {p++; goto _out; } }}
 	break;
 	case 356:
-#line 607 "char_ref.rl"
 	{te = p+1;{ output->first = 0x227f; output->second = 0x0338; {p++; goto _out; } }}
 	break;
 	case 357:
-#line 608 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2283; output->second = 0x20d2; {p++; goto _out; } }}
 	break;
 	case 358:
-#line 609 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2289; {p++; goto _out; } }}
 	break;
 	case 359:
-#line 610 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2241; {p++; goto _out; } }}
 	break;
 	case 360:
-#line 611 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2244; {p++; goto _out; } }}
 	break;
 	case 361:
-#line 612 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2247; {p++; goto _out; } }}
 	break;
 	case 362:
-#line 613 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2249; {p++; goto _out; } }}
 	break;
 	case 363:
-#line 614 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2224; {p++; goto _out; } }}
 	break;
 	case 364:
-#line 615 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d4a9; {p++; goto _out; } }}
 	break;
 	case 365:
-#line 616 "char_ref.rl"
 	{te = p+1;{ output->first = 0xd1; {p++; goto _out; } }}
 	break;
 	case 366:
-#line 618 "char_ref.rl"
 	{te = p+1;{ output->first = 0x039d; {p++; goto _out; } }}
 	break;
 	case 367:
-#line 619 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0152; {p++; goto _out; } }}
 	break;
 	case 368:
-#line 620 "char_ref.rl"
 	{te = p+1;{ output->first = 0xd3; {p++; goto _out; } }}
 	break;
 	case 369:
-#line 622 "char_ref.rl"
 	{te = p+1;{ output->first = 0xd4; {p++; goto _out; } }}
 	break;
 	case 370:
-#line 624 "char_ref.rl"
 	{te = p+1;{ output->first = 0x041e; {p++; goto _out; } }}
 	break;
 	case 371:
-#line 625 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0150; {p++; goto _out; } }}
 	break;
 	case 372:
-#line 626 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d512; {p++; goto _out; } }}
 	break;
 	case 373:
-#line 627 "char_ref.rl"
 	{te = p+1;{ output->first = 0xd2; {p++; goto _out; } }}
 	break;
 	case 374:
-#line 629 "char_ref.rl"
 	{te = p+1;{ output->first = 0x014c; {p++; goto _out; } }}
 	break;
 	case 375:
-#line 630 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03a9; {p++; goto _out; } }}
 	break;
 	case 376:
-#line 631 "char_ref.rl"
 	{te = p+1;{ output->first = 0x039f; {p++; goto _out; } }}
 	break;
 	case 377:
-#line 632 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d546; {p++; goto _out; } }}
 	break;
 	case 378:
-#line 633 "char_ref.rl"
 	{te = p+1;{ output->first = 0x201c; {p++; goto _out; } }}
 	break;
 	case 379:
-#line 634 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2018; {p++; goto _out; } }}
 	break;
 	case 380:
-#line 635 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a54; {p++; goto _out; } }}
 	break;
 	case 381:
-#line 636 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d4aa; {p++; goto _out; } }}
 	break;
 	case 382:
-#line 637 "char_ref.rl"
 	{te = p+1;{ output->first = 0xd8; {p++; goto _out; } }}
 	break;
 	case 383:
-#line 639 "char_ref.rl"
 	{te = p+1;{ output->first = 0xd5; {p++; goto _out; } }}
 	break;
 	case 384:
-#line 641 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a37; {p++; goto _out; } }}
 	break;
 	case 385:
-#line 642 "char_ref.rl"
 	{te = p+1;{ output->first = 0xd6; {p++; goto _out; } }}
 	break;
 	case 386:
-#line 644 "char_ref.rl"
 	{te = p+1;{ output->first = 0x203e; {p++; goto _out; } }}
 	break;
 	case 387:
-#line 645 "char_ref.rl"
 	{te = p+1;{ output->first = 0x23de; {p++; goto _out; } }}
 	break;
 	case 388:
-#line 646 "char_ref.rl"
 	{te = p+1;{ output->first = 0x23b4; {p++; goto _out; } }}
 	break;
 	case 389:
-#line 647 "char_ref.rl"
 	{te = p+1;{ output->first = 0x23dc; {p++; goto _out; } }}
 	break;
 	case 390:
-#line 648 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2202; {p++; goto _out; } }}
 	break;
 	case 391:
-#line 649 "char_ref.rl"
 	{te = p+1;{ output->first = 0x041f; {p++; goto _out; } }}
 	break;
 	case 392:
-#line 650 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d513; {p++; goto _out; } }}
 	break;
 	case 393:
-#line 651 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03a6; {p++; goto _out; } }}
 	break;
 	case 394:
-#line 652 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03a0; {p++; goto _out; } }}
 	break;
 	case 395:
-#line 653 "char_ref.rl"
 	{te = p+1;{ output->first = 0xb1; {p++; goto _out; } }}
 	break;
 	case 396:
-#line 654 "char_ref.rl"
 	{te = p+1;{ output->first = 0x210c; {p++; goto _out; } }}
 	break;
 	case 397:
-#line 655 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2119; {p++; goto _out; } }}
 	break;
 	case 398:
-#line 656 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2abb; {p++; goto _out; } }}
 	break;
 	case 399:
-#line 657 "char_ref.rl"
 	{te = p+1;{ output->first = 0x227a; {p++; goto _out; } }}
 	break;
 	case 400:
-#line 658 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2aaf; {p++; goto _out; } }}
 	break;
 	case 401:
-#line 659 "char_ref.rl"
 	{te = p+1;{ output->first = 0x227c; {p++; goto _out; } }}
 	break;
 	case 402:
-#line 660 "char_ref.rl"
 	{te = p+1;{ output->first = 0x227e; {p++; goto _out; } }}
 	break;
 	case 403:
-#line 661 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2033; {p++; goto _out; } }}
 	break;
 	case 404:
-#line 662 "char_ref.rl"
 	{te = p+1;{ output->first = 0x220f; {p++; goto _out; } }}
 	break;
 	case 405:
-#line 663 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2237; {p++; goto _out; } }}
 	break;
 	case 406:
-#line 664 "char_ref.rl"
 	{te = p+1;{ output->first = 0x221d; {p++; goto _out; } }}
 	break;
 	case 407:
-#line 665 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d4ab; {p++; goto _out; } }}
 	break;
 	case 408:
-#line 666 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03a8; {p++; goto _out; } }}
 	break;
 	case 409:
-#line 667 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22; {p++; goto _out; } }}
 	break;
 	case 410:
-#line 669 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d514; {p++; goto _out; } }}
 	break;
 	case 411:
-#line 670 "char_ref.rl"
 	{te = p+1;{ output->first = 0x211a; {p++; goto _out; } }}
 	break;
 	case 412:
-#line 671 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d4ac; {p++; goto _out; } }}
 	break;
 	case 413:
-#line 672 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2910; {p++; goto _out; } }}
 	break;
 	case 414:
-#line 673 "char_ref.rl"
 	{te = p+1;{ output->first = 0xae; {p++; goto _out; } }}
 	break;
 	case 415:
-#line 675 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0154; {p++; goto _out; } }}
 	break;
 	case 416:
-#line 676 "char_ref.rl"
 	{te = p+1;{ output->first = 0x27eb; {p++; goto _out; } }}
 	break;
 	case 417:
-#line 677 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21a0; {p++; goto _out; } }}
 	break;
 	case 418:
-#line 678 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2916; {p++; goto _out; } }}
 	break;
 	case 419:
-#line 679 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0158; {p++; goto _out; } }}
 	break;
 	case 420:
-#line 680 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0156; {p++; goto _out; } }}
 	break;
 	case 421:
-#line 681 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0420; {p++; goto _out; } }}
 	break;
 	case 422:
-#line 682 "char_ref.rl"
 	{te = p+1;{ output->first = 0x211c; {p++; goto _out; } }}
 	break;
 	case 423:
-#line 683 "char_ref.rl"
 	{te = p+1;{ output->first = 0x220b; {p++; goto _out; } }}
 	break;
 	case 424:
-#line 684 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21cb; {p++; goto _out; } }}
 	break;
 	case 425:
-#line 685 "char_ref.rl"
 	{te = p+1;{ output->first = 0x296f; {p++; goto _out; } }}
 	break;
 	case 426:
-#line 686 "char_ref.rl"
 	{te = p+1;{ output->first = 0x211c; {p++; goto _out; } }}
 	break;
 	case 427:
-#line 687 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03a1; {p++; goto _out; } }}
 	break;
 	case 428:
-#line 688 "char_ref.rl"
 	{te = p+1;{ output->first = 0x27e9; {p++; goto _out; } }}
 	break;
 	case 429:
-#line 689 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2192; {p++; goto _out; } }}
 	break;
 	case 430:
-#line 690 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21e5; {p++; goto _out; } }}
 	break;
 	case 431:
-#line 691 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21c4; {p++; goto _out; } }}
 	break;
 	case 432:
-#line 692 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2309; {p++; goto _out; } }}
 	break;
 	case 433:
-#line 693 "char_ref.rl"
 	{te = p+1;{ output->first = 0x27e7; {p++; goto _out; } }}
 	break;
 	case 434:
-#line 694 "char_ref.rl"
 	{te = p+1;{ output->first = 0x295d; {p++; goto _out; } }}
 	break;
 	case 435:
-#line 695 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21c2; {p++; goto _out; } }}
 	break;
 	case 436:
-#line 696 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2955; {p++; goto _out; } }}
 	break;
 	case 437:
-#line 697 "char_ref.rl"
 	{te = p+1;{ output->first = 0x230b; {p++; goto _out; } }}
 	break;
 	case 438:
-#line 698 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22a2; {p++; goto _out; } }}
 	break;
 	case 439:
-#line 699 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21a6; {p++; goto _out; } }}
 	break;
 	case 440:
-#line 700 "char_ref.rl"
 	{te = p+1;{ output->first = 0x295b; {p++; goto _out; } }}
 	break;
 	case 441:
-#line 701 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22b3; {p++; goto _out; } }}
 	break;
 	case 442:
-#line 702 "char_ref.rl"
 	{te = p+1;{ output->first = 0x29d0; {p++; goto _out; } }}
 	break;
 	case 443:
-#line 703 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22b5; {p++; goto _out; } }}
 	break;
 	case 444:
-#line 704 "char_ref.rl"
 	{te = p+1;{ output->first = 0x294f; {p++; goto _out; } }}
 	break;
 	case 445:
-#line 705 "char_ref.rl"
 	{te = p+1;{ output->first = 0x295c; {p++; goto _out; } }}
 	break;
 	case 446:
-#line 706 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21be; {p++; goto _out; } }}
 	break;
 	case 447:
-#line 707 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2954; {p++; goto _out; } }}
 	break;
 	case 448:
-#line 708 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21c0; {p++; goto _out; } }}
 	break;
 	case 449:
-#line 709 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2953; {p++; goto _out; } }}
 	break;
 	case 450:
-#line 710 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21d2; {p++; goto _out; } }}
 	break;
 	case 451:
-#line 711 "char_ref.rl"
 	{te = p+1;{ output->first = 0x211d; {p++; goto _out; } }}
 	break;
 	case 452:
-#line 712 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2970; {p++; goto _out; } }}
 	break;
 	case 453:
-#line 713 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21db; {p++; goto _out; } }}
 	break;
 	case 454:
-#line 714 "char_ref.rl"
 	{te = p+1;{ output->first = 0x211b; {p++; goto _out; } }}
 	break;
 	case 455:
-#line 715 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21b1; {p++; goto _out; } }}
 	break;
 	case 456:
-#line 716 "char_ref.rl"
 	{te = p+1;{ output->first = 0x29f4; {p++; goto _out; } }}
 	break;
 	case 457:
-#line 717 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0429; {p++; goto _out; } }}
 	break;
 	case 458:
-#line 718 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0428; {p++; goto _out; } }}
 	break;
 	case 459:
-#line 719 "char_ref.rl"
 	{te = p+1;{ output->first = 0x042c; {p++; goto _out; } }}
 	break;
 	case 460:
-#line 720 "char_ref.rl"
 	{te = p+1;{ output->first = 0x015a; {p++; goto _out; } }}
 	break;
 	case 461:
-#line 721 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2abc; {p++; goto _out; } }}
 	break;
 	case 462:
-#line 722 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0160; {p++; goto _out; } }}
 	break;
 	case 463:
-#line 723 "char_ref.rl"
 	{te = p+1;{ output->first = 0x015e; {p++; goto _out; } }}
 	break;
 	case 464:
-#line 724 "char_ref.rl"
 	{te = p+1;{ output->first = 0x015c; {p++; goto _out; } }}
 	break;
 	case 465:
-#line 725 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0421; {p++; goto _out; } }}
 	break;
 	case 466:
-#line 726 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d516; {p++; goto _out; } }}
 	break;
 	case 467:
-#line 727 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2193; {p++; goto _out; } }}
 	break;
 	case 468:
-#line 728 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2190; {p++; goto _out; } }}
 	break;
 	case 469:
-#line 729 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2192; {p++; goto _out; } }}
 	break;
 	case 470:
-#line 730 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2191; {p++; goto _out; } }}
 	break;
 	case 471:
-#line 731 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03a3; {p++; goto _out; } }}
 	break;
 	case 472:
-#line 732 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2218; {p++; goto _out; } }}
 	break;
 	case 473:
-#line 733 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d54a; {p++; goto _out; } }}
 	break;
 	case 474:
-#line 734 "char_ref.rl"
 	{te = p+1;{ output->first = 0x221a; {p++; goto _out; } }}
 	break;
 	case 475:
-#line 735 "char_ref.rl"
 	{te = p+1;{ output->first = 0x25a1; {p++; goto _out; } }}
 	break;
 	case 476:
-#line 736 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2293; {p++; goto _out; } }}
 	break;
 	case 477:
-#line 737 "char_ref.rl"
 	{te = p+1;{ output->first = 0x228f; {p++; goto _out; } }}
 	break;
 	case 478:
-#line 738 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2291; {p++; goto _out; } }}
 	break;
 	case 479:
-#line 739 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2290; {p++; goto _out; } }}
 	break;
 	case 480:
-#line 740 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2292; {p++; goto _out; } }}
 	break;
 	case 481:
-#line 741 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2294; {p++; goto _out; } }}
 	break;
 	case 482:
-#line 742 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d4ae; {p++; goto _out; } }}
 	break;
 	case 483:
-#line 743 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22c6; {p++; goto _out; } }}
 	break;
 	case 484:
-#line 744 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22d0; {p++; goto _out; } }}
 	break;
 	case 485:
-#line 745 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22d0; {p++; goto _out; } }}
 	break;
 	case 486:
-#line 746 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2286; {p++; goto _out; } }}
 	break;
 	case 487:
-#line 747 "char_ref.rl"
 	{te = p+1;{ output->first = 0x227b; {p++; goto _out; } }}
 	break;
 	case 488:
-#line 748 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ab0; {p++; goto _out; } }}
 	break;
 	case 489:
-#line 749 "char_ref.rl"
 	{te = p+1;{ output->first = 0x227d; {p++; goto _out; } }}
 	break;
 	case 490:
-#line 750 "char_ref.rl"
 	{te = p+1;{ output->first = 0x227f; {p++; goto _out; } }}
 	break;
 	case 491:
-#line 751 "char_ref.rl"
 	{te = p+1;{ output->first = 0x220b; {p++; goto _out; } }}
 	break;
 	case 492:
-#line 752 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2211; {p++; goto _out; } }}
 	break;
 	case 493:
-#line 753 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22d1; {p++; goto _out; } }}
 	break;
 	case 494:
-#line 754 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2283; {p++; goto _out; } }}
 	break;
 	case 495:
-#line 755 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2287; {p++; goto _out; } }}
 	break;
 	case 496:
-#line 756 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22d1; {p++; goto _out; } }}
 	break;
 	case 497:
-#line 757 "char_ref.rl"
 	{te = p+1;{ output->first = 0xde; {p++; goto _out; } }}
 	break;
 	case 498:
-#line 759 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2122; {p++; goto _out; } }}
 	break;
 	case 499:
-#line 760 "char_ref.rl"
 	{te = p+1;{ output->first = 0x040b; {p++; goto _out; } }}
 	break;
 	case 500:
-#line 761 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0426; {p++; goto _out; } }}
 	break;
 	case 501:
-#line 762 "char_ref.rl"
 	{te = p+1;{ output->first = 0x09; {p++; goto _out; } }}
 	break;
 	case 502:
-#line 763 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03a4; {p++; goto _out; } }}
 	break;
 	case 503:
-#line 764 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0164; {p++; goto _out; } }}
 	break;
 	case 504:
-#line 765 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0162; {p++; goto _out; } }}
 	break;
 	case 505:
-#line 766 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0422; {p++; goto _out; } }}
 	break;
 	case 506:
-#line 767 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d517; {p++; goto _out; } }}
 	break;
 	case 507:
-#line 768 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2234; {p++; goto _out; } }}
 	break;
 	case 508:
-#line 769 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0398; {p++; goto _out; } }}
 	break;
 	case 509:
-#line 770 "char_ref.rl"
 	{te = p+1;{ output->first = 0x205f; output->second = 0x200a; {p++; goto _out; } }}
 	break;
 	case 510:
-#line 771 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2009; {p++; goto _out; } }}
 	break;
 	case 511:
-#line 772 "char_ref.rl"
 	{te = p+1;{ output->first = 0x223c; {p++; goto _out; } }}
 	break;
 	case 512:
-#line 773 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2243; {p++; goto _out; } }}
 	break;
 	case 513:
-#line 774 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2245; {p++; goto _out; } }}
 	break;
 	case 514:
-#line 775 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2248; {p++; goto _out; } }}
 	break;
 	case 515:
-#line 776 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d54b; {p++; goto _out; } }}
 	break;
 	case 516:
-#line 777 "char_ref.rl"
 	{te = p+1;{ output->first = 0x20db; {p++; goto _out; } }}
 	break;
 	case 517:
-#line 778 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d4af; {p++; goto _out; } }}
 	break;
 	case 518:
-#line 779 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0166; {p++; goto _out; } }}
 	break;
 	case 519:
-#line 780 "char_ref.rl"
 	{te = p+1;{ output->first = 0xda; {p++; goto _out; } }}
 	break;
 	case 520:
-#line 782 "char_ref.rl"
 	{te = p+1;{ output->first = 0x219f; {p++; goto _out; } }}
 	break;
 	case 521:
-#line 783 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2949; {p++; goto _out; } }}
 	break;
 	case 522:
-#line 784 "char_ref.rl"
 	{te = p+1;{ output->first = 0x040e; {p++; goto _out; } }}
 	break;
 	case 523:
-#line 785 "char_ref.rl"
 	{te = p+1;{ output->first = 0x016c; {p++; goto _out; } }}
 	break;
 	case 524:
-#line 786 "char_ref.rl"
 	{te = p+1;{ output->first = 0xdb; {p++; goto _out; } }}
 	break;
 	case 525:
-#line 788 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0423; {p++; goto _out; } }}
 	break;
 	case 526:
-#line 789 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0170; {p++; goto _out; } }}
 	break;
 	case 527:
-#line 790 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d518; {p++; goto _out; } }}
 	break;
 	case 528:
-#line 791 "char_ref.rl"
 	{te = p+1;{ output->first = 0xd9; {p++; goto _out; } }}
 	break;
 	case 529:
-#line 793 "char_ref.rl"
 	{te = p+1;{ output->first = 0x016a; {p++; goto _out; } }}
 	break;
 	case 530:
-#line 794 "char_ref.rl"
 	{te = p+1;{ output->first = 0x5f; {p++; goto _out; } }}
 	break;
 	case 531:
-#line 795 "char_ref.rl"
 	{te = p+1;{ output->first = 0x23df; {p++; goto _out; } }}
 	break;
 	case 532:
-#line 796 "char_ref.rl"
 	{te = p+1;{ output->first = 0x23b5; {p++; goto _out; } }}
 	break;
 	case 533:
-#line 797 "char_ref.rl"
 	{te = p+1;{ output->first = 0x23dd; {p++; goto _out; } }}
 	break;
 	case 534:
-#line 798 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22c3; {p++; goto _out; } }}
 	break;
 	case 535:
-#line 799 "char_ref.rl"
 	{te = p+1;{ output->first = 0x228e; {p++; goto _out; } }}
 	break;
 	case 536:
-#line 800 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0172; {p++; goto _out; } }}
 	break;
 	case 537:
-#line 801 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d54c; {p++; goto _out; } }}
 	break;
 	case 538:
-#line 802 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2191; {p++; goto _out; } }}
 	break;
 	case 539:
-#line 803 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2912; {p++; goto _out; } }}
 	break;
 	case 540:
-#line 804 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21c5; {p++; goto _out; } }}
 	break;
 	case 541:
-#line 805 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2195; {p++; goto _out; } }}
 	break;
 	case 542:
-#line 806 "char_ref.rl"
 	{te = p+1;{ output->first = 0x296e; {p++; goto _out; } }}
 	break;
 	case 543:
-#line 807 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22a5; {p++; goto _out; } }}
 	break;
 	case 544:
-#line 808 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21a5; {p++; goto _out; } }}
 	break;
 	case 545:
-#line 809 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21d1; {p++; goto _out; } }}
 	break;
 	case 546:
-#line 810 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21d5; {p++; goto _out; } }}
 	break;
 	case 547:
-#line 811 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2196; {p++; goto _out; } }}
 	break;
 	case 548:
-#line 812 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2197; {p++; goto _out; } }}
 	break;
 	case 549:
-#line 813 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03d2; {p++; goto _out; } }}
 	break;
 	case 550:
-#line 814 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03a5; {p++; goto _out; } }}
 	break;
 	case 551:
-#line 815 "char_ref.rl"
 	{te = p+1;{ output->first = 0x016e; {p++; goto _out; } }}
 	break;
 	case 552:
-#line 816 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d4b0; {p++; goto _out; } }}
 	break;
 	case 553:
-#line 817 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0168; {p++; goto _out; } }}
 	break;
 	case 554:
-#line 818 "char_ref.rl"
 	{te = p+1;{ output->first = 0xdc; {p++; goto _out; } }}
 	break;
 	case 555:
-#line 820 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22ab; {p++; goto _out; } }}
 	break;
 	case 556:
-#line 821 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2aeb; {p++; goto _out; } }}
 	break;
 	case 557:
-#line 822 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0412; {p++; goto _out; } }}
 	break;
 	case 558:
-#line 823 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22a9; {p++; goto _out; } }}
 	break;
 	case 559:
-#line 824 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ae6; {p++; goto _out; } }}
 	break;
 	case 560:
-#line 825 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22c1; {p++; goto _out; } }}
 	break;
 	case 561:
-#line 826 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2016; {p++; goto _out; } }}
 	break;
 	case 562:
-#line 827 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2016; {p++; goto _out; } }}
 	break;
 	case 563:
-#line 828 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2223; {p++; goto _out; } }}
 	break;
 	case 564:
-#line 829 "char_ref.rl"
 	{te = p+1;{ output->first = 0x7c; {p++; goto _out; } }}
 	break;
 	case 565:
-#line 830 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2758; {p++; goto _out; } }}
 	break;
 	case 566:
-#line 831 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2240; {p++; goto _out; } }}
 	break;
 	case 567:
-#line 832 "char_ref.rl"
 	{te = p+1;{ output->first = 0x200a; {p++; goto _out; } }}
 	break;
 	case 568:
-#line 833 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d519; {p++; goto _out; } }}
 	break;
 	case 569:
-#line 834 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d54d; {p++; goto _out; } }}
 	break;
 	case 570:
-#line 835 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d4b1; {p++; goto _out; } }}
 	break;
 	case 571:
-#line 836 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22aa; {p++; goto _out; } }}
 	break;
 	case 572:
-#line 837 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0174; {p++; goto _out; } }}
 	break;
 	case 573:
-#line 838 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22c0; {p++; goto _out; } }}
 	break;
 	case 574:
-#line 839 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d51a; {p++; goto _out; } }}
 	break;
 	case 575:
-#line 840 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d54e; {p++; goto _out; } }}
 	break;
 	case 576:
-#line 841 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d4b2; {p++; goto _out; } }}
 	break;
 	case 577:
-#line 842 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d51b; {p++; goto _out; } }}
 	break;
 	case 578:
-#line 843 "char_ref.rl"
 	{te = p+1;{ output->first = 0x039e; {p++; goto _out; } }}
 	break;
 	case 579:
-#line 844 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d54f; {p++; goto _out; } }}
 	break;
 	case 580:
-#line 845 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d4b3; {p++; goto _out; } }}
 	break;
 	case 581:
-#line 846 "char_ref.rl"
 	{te = p+1;{ output->first = 0x042f; {p++; goto _out; } }}
 	break;
 	case 582:
-#line 847 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0407; {p++; goto _out; } }}
 	break;
 	case 583:
-#line 848 "char_ref.rl"
 	{te = p+1;{ output->first = 0x042e; {p++; goto _out; } }}
 	break;
 	case 584:
-#line 849 "char_ref.rl"
 	{te = p+1;{ output->first = 0xdd; {p++; goto _out; } }}
 	break;
 	case 585:
-#line 851 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0176; {p++; goto _out; } }}
 	break;
 	case 586:
-#line 852 "char_ref.rl"
 	{te = p+1;{ output->first = 0x042b; {p++; goto _out; } }}
 	break;
 	case 587:
-#line 853 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d51c; {p++; goto _out; } }}
 	break;
 	case 588:
-#line 854 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d550; {p++; goto _out; } }}
 	break;
 	case 589:
-#line 855 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d4b4; {p++; goto _out; } }}
 	break;
 	case 590:
-#line 856 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0178; {p++; goto _out; } }}
 	break;
 	case 591:
-#line 857 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0416; {p++; goto _out; } }}
 	break;
 	case 592:
-#line 858 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0179; {p++; goto _out; } }}
 	break;
 	case 593:
-#line 859 "char_ref.rl"
 	{te = p+1;{ output->first = 0x017d; {p++; goto _out; } }}
 	break;
 	case 594:
-#line 860 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0417; {p++; goto _out; } }}
 	break;
 	case 595:
-#line 861 "char_ref.rl"
 	{te = p+1;{ output->first = 0x017b; {p++; goto _out; } }}
 	break;
 	case 596:
-#line 862 "char_ref.rl"
 	{te = p+1;{ output->first = 0x200b; {p++; goto _out; } }}
 	break;
 	case 597:
-#line 863 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0396; {p++; goto _out; } }}
 	break;
 	case 598:
-#line 864 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2128; {p++; goto _out; } }}
 	break;
 	case 599:
-#line 865 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2124; {p++; goto _out; } }}
 	break;
 	case 600:
-#line 866 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d4b5; {p++; goto _out; } }}
 	break;
 	case 601:
-#line 867 "char_ref.rl"
 	{te = p+1;{ output->first = 0xe1; {p++; goto _out; } }}
 	break;
 	case 602:
-#line 869 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0103; {p++; goto _out; } }}
 	break;
 	case 603:
-#line 870 "char_ref.rl"
 	{te = p+1;{ output->first = 0x223e; {p++; goto _out; } }}
 	break;
 	case 604:
-#line 871 "char_ref.rl"
 	{te = p+1;{ output->first = 0x223e; output->second = 0x0333; {p++; goto _out; } }}
 	break;
 	case 605:
-#line 872 "char_ref.rl"
 	{te = p+1;{ output->first = 0x223f; {p++; goto _out; } }}
 	break;
 	case 606:
-#line 873 "char_ref.rl"
 	{te = p+1;{ output->first = 0xe2; {p++; goto _out; } }}
 	break;
 	case 607:
-#line 875 "char_ref.rl"
 	{te = p+1;{ output->first = 0xb4; {p++; goto _out; } }}
 	break;
 	case 608:
-#line 877 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0430; {p++; goto _out; } }}
 	break;
 	case 609:
-#line 878 "char_ref.rl"
 	{te = p+1;{ output->first = 0xe6; {p++; goto _out; } }}
 	break;
 	case 610:
-#line 880 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2061; {p++; goto _out; } }}
 	break;
 	case 611:
-#line 881 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d51e; {p++; goto _out; } }}
 	break;
 	case 612:
-#line 882 "char_ref.rl"
 	{te = p+1;{ output->first = 0xe0; {p++; goto _out; } }}
 	break;
 	case 613:
-#line 884 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2135; {p++; goto _out; } }}
 	break;
 	case 614:
-#line 885 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2135; {p++; goto _out; } }}
 	break;
 	case 615:
-#line 886 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03b1; {p++; goto _out; } }}
 	break;
 	case 616:
-#line 887 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0101; {p++; goto _out; } }}
 	break;
 	case 617:
-#line 888 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a3f; {p++; goto _out; } }}
 	break;
 	case 618:
-#line 889 "char_ref.rl"
 	{te = p+1;{ output->first = 0x26; {p++; goto _out; } }}
 	break;
 	case 619:
-#line 891 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2227; {p++; goto _out; } }}
 	break;
 	case 620:
-#line 892 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a55; {p++; goto _out; } }}
 	break;
 	case 621:
-#line 893 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a5c; {p++; goto _out; } }}
 	break;
 	case 622:
-#line 894 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a58; {p++; goto _out; } }}
 	break;
 	case 623:
-#line 895 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a5a; {p++; goto _out; } }}
 	break;
 	case 624:
-#line 896 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2220; {p++; goto _out; } }}
 	break;
 	case 625:
-#line 897 "char_ref.rl"
 	{te = p+1;{ output->first = 0x29a4; {p++; goto _out; } }}
 	break;
 	case 626:
-#line 898 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2220; {p++; goto _out; } }}
 	break;
 	case 627:
-#line 899 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2221; {p++; goto _out; } }}
 	break;
 	case 628:
-#line 900 "char_ref.rl"
 	{te = p+1;{ output->first = 0x29a8; {p++; goto _out; } }}
 	break;
 	case 629:
-#line 901 "char_ref.rl"
 	{te = p+1;{ output->first = 0x29a9; {p++; goto _out; } }}
 	break;
 	case 630:
-#line 902 "char_ref.rl"
 	{te = p+1;{ output->first = 0x29aa; {p++; goto _out; } }}
 	break;
 	case 631:
-#line 903 "char_ref.rl"
 	{te = p+1;{ output->first = 0x29ab; {p++; goto _out; } }}
 	break;
 	case 632:
-#line 904 "char_ref.rl"
 	{te = p+1;{ output->first = 0x29ac; {p++; goto _out; } }}
 	break;
 	case 633:
-#line 905 "char_ref.rl"
 	{te = p+1;{ output->first = 0x29ad; {p++; goto _out; } }}
 	break;
 	case 634:
-#line 906 "char_ref.rl"
 	{te = p+1;{ output->first = 0x29ae; {p++; goto _out; } }}
 	break;
 	case 635:
-#line 907 "char_ref.rl"
 	{te = p+1;{ output->first = 0x29af; {p++; goto _out; } }}
 	break;
 	case 636:
-#line 908 "char_ref.rl"
 	{te = p+1;{ output->first = 0x221f; {p++; goto _out; } }}
 	break;
 	case 637:
-#line 909 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22be; {p++; goto _out; } }}
 	break;
 	case 638:
-#line 910 "char_ref.rl"
 	{te = p+1;{ output->first = 0x299d; {p++; goto _out; } }}
 	break;
 	case 639:
-#line 911 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2222; {p++; goto _out; } }}
 	break;
 	case 640:
-#line 912 "char_ref.rl"
 	{te = p+1;{ output->first = 0xc5; {p++; goto _out; } }}
 	break;
 	case 641:
-#line 913 "char_ref.rl"
 	{te = p+1;{ output->first = 0x237c; {p++; goto _out; } }}
 	break;
 	case 642:
-#line 914 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0105; {p++; goto _out; } }}
 	break;
 	case 643:
-#line 915 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d552; {p++; goto _out; } }}
 	break;
 	case 644:
-#line 916 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2248; {p++; goto _out; } }}
 	break;
 	case 645:
-#line 917 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a70; {p++; goto _out; } }}
 	break;
 	case 646:
-#line 918 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a6f; {p++; goto _out; } }}
 	break;
 	case 647:
-#line 919 "char_ref.rl"
 	{te = p+1;{ output->first = 0x224a; {p++; goto _out; } }}
 	break;
 	case 648:
-#line 920 "char_ref.rl"
 	{te = p+1;{ output->first = 0x224b; {p++; goto _out; } }}
 	break;
 	case 649:
-#line 921 "char_ref.rl"
 	{te = p+1;{ output->first = 0x27; {p++; goto _out; } }}
 	break;
 	case 650:
-#line 922 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2248; {p++; goto _out; } }}
 	break;
 	case 651:
-#line 923 "char_ref.rl"
 	{te = p+1;{ output->first = 0x224a; {p++; goto _out; } }}
 	break;
 	case 652:
-#line 924 "char_ref.rl"
 	{te = p+1;{ output->first = 0xe5; {p++; goto _out; } }}
 	break;
 	case 653:
-#line 926 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d4b6; {p++; goto _out; } }}
 	break;
 	case 654:
-#line 927 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a; {p++; goto _out; } }}
 	break;
 	case 655:
-#line 928 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2248; {p++; goto _out; } }}
 	break;
 	case 656:
-#line 929 "char_ref.rl"
 	{te = p+1;{ output->first = 0x224d; {p++; goto _out; } }}
 	break;
 	case 657:
-#line 930 "char_ref.rl"
 	{te = p+1;{ output->first = 0xe3; {p++; goto _out; } }}
 	break;
 	case 658:
-#line 932 "char_ref.rl"
 	{te = p+1;{ output->first = 0xe4; {p++; goto _out; } }}
 	break;
 	case 659:
-#line 934 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2233; {p++; goto _out; } }}
 	break;
 	case 660:
-#line 935 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a11; {p++; goto _out; } }}
 	break;
 	case 661:
-#line 936 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2aed; {p++; goto _out; } }}
 	break;
 	case 662:
-#line 937 "char_ref.rl"
 	{te = p+1;{ output->first = 0x224c; {p++; goto _out; } }}
 	break;
 	case 663:
-#line 938 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03f6; {p++; goto _out; } }}
 	break;
 	case 664:
-#line 939 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2035; {p++; goto _out; } }}
 	break;
 	case 665:
-#line 940 "char_ref.rl"
 	{te = p+1;{ output->first = 0x223d; {p++; goto _out; } }}
 	break;
 	case 666:
-#line 941 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22cd; {p++; goto _out; } }}
 	break;
 	case 667:
-#line 942 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22bd; {p++; goto _out; } }}
 	break;
 	case 668:
-#line 943 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2305; {p++; goto _out; } }}
 	break;
 	case 669:
-#line 944 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2305; {p++; goto _out; } }}
 	break;
 	case 670:
-#line 945 "char_ref.rl"
 	{te = p+1;{ output->first = 0x23b5; {p++; goto _out; } }}
 	break;
 	case 671:
-#line 946 "char_ref.rl"
 	{te = p+1;{ output->first = 0x23b6; {p++; goto _out; } }}
 	break;
 	case 672:
-#line 947 "char_ref.rl"
 	{te = p+1;{ output->first = 0x224c; {p++; goto _out; } }}
 	break;
 	case 673:
-#line 948 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0431; {p++; goto _out; } }}
 	break;
 	case 674:
-#line 949 "char_ref.rl"
 	{te = p+1;{ output->first = 0x201e; {p++; goto _out; } }}
 	break;
 	case 675:
-#line 950 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2235; {p++; goto _out; } }}
 	break;
 	case 676:
-#line 951 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2235; {p++; goto _out; } }}
 	break;
 	case 677:
-#line 952 "char_ref.rl"
 	{te = p+1;{ output->first = 0x29b0; {p++; goto _out; } }}
 	break;
 	case 678:
-#line 953 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03f6; {p++; goto _out; } }}
 	break;
 	case 679:
-#line 954 "char_ref.rl"
 	{te = p+1;{ output->first = 0x212c; {p++; goto _out; } }}
 	break;
 	case 680:
-#line 955 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03b2; {p++; goto _out; } }}
 	break;
 	case 681:
-#line 956 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2136; {p++; goto _out; } }}
 	break;
 	case 682:
-#line 957 "char_ref.rl"
 	{te = p+1;{ output->first = 0x226c; {p++; goto _out; } }}
 	break;
 	case 683:
-#line 958 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d51f; {p++; goto _out; } }}
 	break;
 	case 684:
-#line 959 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22c2; {p++; goto _out; } }}
 	break;
 	case 685:
-#line 960 "char_ref.rl"
 	{te = p+1;{ output->first = 0x25ef; {p++; goto _out; } }}
 	break;
 	case 686:
-#line 961 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22c3; {p++; goto _out; } }}
 	break;
 	case 687:
-#line 962 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a00; {p++; goto _out; } }}
 	break;
 	case 688:
-#line 963 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a01; {p++; goto _out; } }}
 	break;
 	case 689:
-#line 964 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a02; {p++; goto _out; } }}
 	break;
 	case 690:
-#line 965 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a06; {p++; goto _out; } }}
 	break;
 	case 691:
-#line 966 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2605; {p++; goto _out; } }}
 	break;
 	case 692:
-#line 967 "char_ref.rl"
 	{te = p+1;{ output->first = 0x25bd; {p++; goto _out; } }}
 	break;
 	case 693:
-#line 968 "char_ref.rl"
 	{te = p+1;{ output->first = 0x25b3; {p++; goto _out; } }}
 	break;
 	case 694:
-#line 969 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a04; {p++; goto _out; } }}
 	break;
 	case 695:
-#line 970 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22c1; {p++; goto _out; } }}
 	break;
 	case 696:
-#line 971 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22c0; {p++; goto _out; } }}
 	break;
 	case 697:
-#line 972 "char_ref.rl"
 	{te = p+1;{ output->first = 0x290d; {p++; goto _out; } }}
 	break;
 	case 698:
-#line 973 "char_ref.rl"
 	{te = p+1;{ output->first = 0x29eb; {p++; goto _out; } }}
 	break;
 	case 699:
-#line 974 "char_ref.rl"
 	{te = p+1;{ output->first = 0x25aa; {p++; goto _out; } }}
 	break;
 	case 700:
-#line 975 "char_ref.rl"
 	{te = p+1;{ output->first = 0x25b4; {p++; goto _out; } }}
 	break;
 	case 701:
-#line 976 "char_ref.rl"
 	{te = p+1;{ output->first = 0x25be; {p++; goto _out; } }}
 	break;
 	case 702:
-#line 977 "char_ref.rl"
 	{te = p+1;{ output->first = 0x25c2; {p++; goto _out; } }}
 	break;
 	case 703:
-#line 978 "char_ref.rl"
 	{te = p+1;{ output->first = 0x25b8; {p++; goto _out; } }}
 	break;
 	case 704:
-#line 979 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2423; {p++; goto _out; } }}
 	break;
 	case 705:
-#line 980 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2592; {p++; goto _out; } }}
 	break;
 	case 706:
-#line 981 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2591; {p++; goto _out; } }}
 	break;
 	case 707:
-#line 982 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2593; {p++; goto _out; } }}
 	break;
 	case 708:
-#line 983 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2588; {p++; goto _out; } }}
 	break;
 	case 709:
-#line 984 "char_ref.rl"
 	{te = p+1;{ output->first = 0x3d; output->second = 0x20e5; {p++; goto _out; } }}
 	break;
 	case 710:
-#line 985 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2261; output->second = 0x20e5; {p++; goto _out; } }}
 	break;
 	case 711:
-#line 986 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2310; {p++; goto _out; } }}
 	break;
 	case 712:
-#line 987 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d553; {p++; goto _out; } }}
 	break;
 	case 713:
-#line 988 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22a5; {p++; goto _out; } }}
 	break;
 	case 714:
-#line 989 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22a5; {p++; goto _out; } }}
 	break;
 	case 715:
-#line 990 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22c8; {p++; goto _out; } }}
 	break;
 	case 716:
-#line 991 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2557; {p++; goto _out; } }}
 	break;
 	case 717:
-#line 992 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2554; {p++; goto _out; } }}
 	break;
 	case 718:
-#line 993 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2556; {p++; goto _out; } }}
 	break;
 	case 719:
-#line 994 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2553; {p++; goto _out; } }}
 	break;
 	case 720:
-#line 995 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2550; {p++; goto _out; } }}
 	break;
 	case 721:
-#line 996 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2566; {p++; goto _out; } }}
 	break;
 	case 722:
-#line 997 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2569; {p++; goto _out; } }}
 	break;
 	case 723:
-#line 998 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2564; {p++; goto _out; } }}
 	break;
 	case 724:
-#line 999 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2567; {p++; goto _out; } }}
 	break;
 	case 725:
-#line 1000 "char_ref.rl"
 	{te = p+1;{ output->first = 0x255d; {p++; goto _out; } }}
 	break;
 	case 726:
-#line 1001 "char_ref.rl"
 	{te = p+1;{ output->first = 0x255a; {p++; goto _out; } }}
 	break;
 	case 727:
-#line 1002 "char_ref.rl"
 	{te = p+1;{ output->first = 0x255c; {p++; goto _out; } }}
 	break;
 	case 728:
-#line 1003 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2559; {p++; goto _out; } }}
 	break;
 	case 729:
-#line 1004 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2551; {p++; goto _out; } }}
 	break;
 	case 730:
-#line 1005 "char_ref.rl"
 	{te = p+1;{ output->first = 0x256c; {p++; goto _out; } }}
 	break;
 	case 731:
-#line 1006 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2563; {p++; goto _out; } }}
 	break;
 	case 732:
-#line 1007 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2560; {p++; goto _out; } }}
 	break;
 	case 733:
-#line 1008 "char_ref.rl"
 	{te = p+1;{ output->first = 0x256b; {p++; goto _out; } }}
 	break;
 	case 734:
-#line 1009 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2562; {p++; goto _out; } }}
 	break;
 	case 735:
-#line 1010 "char_ref.rl"
 	{te = p+1;{ output->first = 0x255f; {p++; goto _out; } }}
 	break;
 	case 736:
-#line 1011 "char_ref.rl"
 	{te = p+1;{ output->first = 0x29c9; {p++; goto _out; } }}
 	break;
 	case 737:
-#line 1012 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2555; {p++; goto _out; } }}
 	break;
 	case 738:
-#line 1013 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2552; {p++; goto _out; } }}
 	break;
 	case 739:
-#line 1014 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2510; {p++; goto _out; } }}
 	break;
 	case 740:
-#line 1015 "char_ref.rl"
 	{te = p+1;{ output->first = 0x250c; {p++; goto _out; } }}
 	break;
 	case 741:
-#line 1016 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2500; {p++; goto _out; } }}
 	break;
 	case 742:
-#line 1017 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2565; {p++; goto _out; } }}
 	break;
 	case 743:
-#line 1018 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2568; {p++; goto _out; } }}
 	break;
 	case 744:
-#line 1019 "char_ref.rl"
 	{te = p+1;{ output->first = 0x252c; {p++; goto _out; } }}
 	break;
 	case 745:
-#line 1020 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2534; {p++; goto _out; } }}
 	break;
 	case 746:
-#line 1021 "char_ref.rl"
 	{te = p+1;{ output->first = 0x229f; {p++; goto _out; } }}
 	break;
 	case 747:
-#line 1022 "char_ref.rl"
 	{te = p+1;{ output->first = 0x229e; {p++; goto _out; } }}
 	break;
 	case 748:
-#line 1023 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22a0; {p++; goto _out; } }}
 	break;
 	case 749:
-#line 1024 "char_ref.rl"
 	{te = p+1;{ output->first = 0x255b; {p++; goto _out; } }}
 	break;
 	case 750:
-#line 1025 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2558; {p++; goto _out; } }}
 	break;
 	case 751:
-#line 1026 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2518; {p++; goto _out; } }}
 	break;
 	case 752:
-#line 1027 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2514; {p++; goto _out; } }}
 	break;
 	case 753:
-#line 1028 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2502; {p++; goto _out; } }}
 	break;
 	case 754:
-#line 1029 "char_ref.rl"
 	{te = p+1;{ output->first = 0x256a; {p++; goto _out; } }}
 	break;
 	case 755:
-#line 1030 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2561; {p++; goto _out; } }}
 	break;
 	case 756:
-#line 1031 "char_ref.rl"
 	{te = p+1;{ output->first = 0x255e; {p++; goto _out; } }}
 	break;
 	case 757:
-#line 1032 "char_ref.rl"
 	{te = p+1;{ output->first = 0x253c; {p++; goto _out; } }}
 	break;
 	case 758:
-#line 1033 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2524; {p++; goto _out; } }}
 	break;
 	case 759:
-#line 1034 "char_ref.rl"
 	{te = p+1;{ output->first = 0x251c; {p++; goto _out; } }}
 	break;
 	case 760:
-#line 1035 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2035; {p++; goto _out; } }}
 	break;
 	case 761:
-#line 1036 "char_ref.rl"
 	{te = p+1;{ output->first = 0x02d8; {p++; goto _out; } }}
 	break;
 	case 762:
-#line 1037 "char_ref.rl"
 	{te = p+1;{ output->first = 0xa6; {p++; goto _out; } }}
 	break;
 	case 763:
-#line 1039 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d4b7; {p++; goto _out; } }}
 	break;
 	case 764:
-#line 1040 "char_ref.rl"
 	{te = p+1;{ output->first = 0x204f; {p++; goto _out; } }}
 	break;
 	case 765:
-#line 1041 "char_ref.rl"
 	{te = p+1;{ output->first = 0x223d; {p++; goto _out; } }}
 	break;
 	case 766:
-#line 1042 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22cd; {p++; goto _out; } }}
 	break;
 	case 767:
-#line 1043 "char_ref.rl"
 	{te = p+1;{ output->first = 0x5c; {p++; goto _out; } }}
 	break;
 	case 768:
-#line 1044 "char_ref.rl"
 	{te = p+1;{ output->first = 0x29c5; {p++; goto _out; } }}
 	break;
 	case 769:
-#line 1045 "char_ref.rl"
 	{te = p+1;{ output->first = 0x27c8; {p++; goto _out; } }}
 	break;
 	case 770:
-#line 1046 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2022; {p++; goto _out; } }}
 	break;
 	case 771:
-#line 1047 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2022; {p++; goto _out; } }}
 	break;
 	case 772:
-#line 1048 "char_ref.rl"
 	{te = p+1;{ output->first = 0x224e; {p++; goto _out; } }}
 	break;
 	case 773:
-#line 1049 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2aae; {p++; goto _out; } }}
 	break;
 	case 774:
-#line 1050 "char_ref.rl"
 	{te = p+1;{ output->first = 0x224f; {p++; goto _out; } }}
 	break;
 	case 775:
-#line 1051 "char_ref.rl"
 	{te = p+1;{ output->first = 0x224f; {p++; goto _out; } }}
 	break;
 	case 776:
-#line 1052 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0107; {p++; goto _out; } }}
 	break;
 	case 777:
-#line 1053 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2229; {p++; goto _out; } }}
 	break;
 	case 778:
-#line 1054 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a44; {p++; goto _out; } }}
 	break;
 	case 779:
-#line 1055 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a49; {p++; goto _out; } }}
 	break;
 	case 780:
-#line 1056 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a4b; {p++; goto _out; } }}
 	break;
 	case 781:
-#line 1057 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a47; {p++; goto _out; } }}
 	break;
 	case 782:
-#line 1058 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a40; {p++; goto _out; } }}
 	break;
 	case 783:
-#line 1059 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2229; output->second = 0xfe00; {p++; goto _out; } }}
 	break;
 	case 784:
-#line 1060 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2041; {p++; goto _out; } }}
 	break;
 	case 785:
-#line 1061 "char_ref.rl"
 	{te = p+1;{ output->first = 0x02c7; {p++; goto _out; } }}
 	break;
 	case 786:
-#line 1062 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a4d; {p++; goto _out; } }}
 	break;
 	case 787:
-#line 1063 "char_ref.rl"
 	{te = p+1;{ output->first = 0x010d; {p++; goto _out; } }}
 	break;
 	case 788:
-#line 1064 "char_ref.rl"
 	{te = p+1;{ output->first = 0xe7; {p++; goto _out; } }}
 	break;
 	case 789:
-#line 1066 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0109; {p++; goto _out; } }}
 	break;
 	case 790:
-#line 1067 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a4c; {p++; goto _out; } }}
 	break;
 	case 791:
-#line 1068 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a50; {p++; goto _out; } }}
 	break;
 	case 792:
-#line 1069 "char_ref.rl"
 	{te = p+1;{ output->first = 0x010b; {p++; goto _out; } }}
 	break;
 	case 793:
-#line 1070 "char_ref.rl"
 	{te = p+1;{ output->first = 0xb8; {p++; goto _out; } }}
 	break;
 	case 794:
-#line 1072 "char_ref.rl"
 	{te = p+1;{ output->first = 0x29b2; {p++; goto _out; } }}
 	break;
 	case 795:
-#line 1073 "char_ref.rl"
 	{te = p+1;{ output->first = 0xa2; {p++; goto _out; } }}
 	break;
 	case 796:
-#line 1075 "char_ref.rl"
 	{te = p+1;{ output->first = 0xb7; {p++; goto _out; } }}
 	break;
 	case 797:
-#line 1076 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d520; {p++; goto _out; } }}
 	break;
 	case 798:
-#line 1077 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0447; {p++; goto _out; } }}
 	break;
 	case 799:
-#line 1078 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2713; {p++; goto _out; } }}
 	break;
 	case 800:
-#line 1079 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2713; {p++; goto _out; } }}
 	break;
 	case 801:
-#line 1080 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03c7; {p++; goto _out; } }}
 	break;
 	case 802:
-#line 1081 "char_ref.rl"
 	{te = p+1;{ output->first = 0x25cb; {p++; goto _out; } }}
 	break;
 	case 803:
-#line 1082 "char_ref.rl"
 	{te = p+1;{ output->first = 0x29c3; {p++; goto _out; } }}
 	break;
 	case 804:
-#line 1083 "char_ref.rl"
 	{te = p+1;{ output->first = 0x02c6; {p++; goto _out; } }}
 	break;
 	case 805:
-#line 1084 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2257; {p++; goto _out; } }}
 	break;
 	case 806:
-#line 1085 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21ba; {p++; goto _out; } }}
 	break;
 	case 807:
-#line 1086 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21bb; {p++; goto _out; } }}
 	break;
 	case 808:
-#line 1087 "char_ref.rl"
 	{te = p+1;{ output->first = 0xae; {p++; goto _out; } }}
 	break;
 	case 809:
-#line 1088 "char_ref.rl"
 	{te = p+1;{ output->first = 0x24c8; {p++; goto _out; } }}
 	break;
 	case 810:
-#line 1089 "char_ref.rl"
 	{te = p+1;{ output->first = 0x229b; {p++; goto _out; } }}
 	break;
 	case 811:
-#line 1090 "char_ref.rl"
 	{te = p+1;{ output->first = 0x229a; {p++; goto _out; } }}
 	break;
 	case 812:
-#line 1091 "char_ref.rl"
 	{te = p+1;{ output->first = 0x229d; {p++; goto _out; } }}
 	break;
 	case 813:
-#line 1092 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2257; {p++; goto _out; } }}
 	break;
 	case 814:
-#line 1093 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a10; {p++; goto _out; } }}
 	break;
 	case 815:
-#line 1094 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2aef; {p++; goto _out; } }}
 	break;
 	case 816:
-#line 1095 "char_ref.rl"
 	{te = p+1;{ output->first = 0x29c2; {p++; goto _out; } }}
 	break;
 	case 817:
-#line 1096 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2663; {p++; goto _out; } }}
 	break;
 	case 818:
-#line 1097 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2663; {p++; goto _out; } }}
 	break;
 	case 819:
-#line 1098 "char_ref.rl"
 	{te = p+1;{ output->first = 0x3a; {p++; goto _out; } }}
 	break;
 	case 820:
-#line 1099 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2254; {p++; goto _out; } }}
 	break;
 	case 821:
-#line 1100 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2254; {p++; goto _out; } }}
 	break;
 	case 822:
-#line 1101 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2c; {p++; goto _out; } }}
 	break;
 	case 823:
-#line 1102 "char_ref.rl"
 	{te = p+1;{ output->first = 0x40; {p++; goto _out; } }}
 	break;
 	case 824:
-#line 1103 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2201; {p++; goto _out; } }}
 	break;
 	case 825:
-#line 1104 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2218; {p++; goto _out; } }}
 	break;
 	case 826:
-#line 1105 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2201; {p++; goto _out; } }}
 	break;
 	case 827:
-#line 1106 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2102; {p++; goto _out; } }}
 	break;
 	case 828:
-#line 1107 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2245; {p++; goto _out; } }}
 	break;
 	case 829:
-#line 1108 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a6d; {p++; goto _out; } }}
 	break;
 	case 830:
-#line 1109 "char_ref.rl"
 	{te = p+1;{ output->first = 0x222e; {p++; goto _out; } }}
 	break;
 	case 831:
-#line 1110 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d554; {p++; goto _out; } }}
 	break;
 	case 832:
-#line 1111 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2210; {p++; goto _out; } }}
 	break;
 	case 833:
-#line 1112 "char_ref.rl"
 	{te = p+1;{ output->first = 0xa9; {p++; goto _out; } }}
 	break;
 	case 834:
-#line 1114 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2117; {p++; goto _out; } }}
 	break;
 	case 835:
-#line 1115 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21b5; {p++; goto _out; } }}
 	break;
 	case 836:
-#line 1116 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2717; {p++; goto _out; } }}
 	break;
 	case 837:
-#line 1117 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d4b8; {p++; goto _out; } }}
 	break;
 	case 838:
-#line 1118 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2acf; {p++; goto _out; } }}
 	break;
 	case 839:
-#line 1119 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ad1; {p++; goto _out; } }}
 	break;
 	case 840:
-#line 1120 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ad0; {p++; goto _out; } }}
 	break;
 	case 841:
-#line 1121 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ad2; {p++; goto _out; } }}
 	break;
 	case 842:
-#line 1122 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22ef; {p++; goto _out; } }}
 	break;
 	case 843:
-#line 1123 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2938; {p++; goto _out; } }}
 	break;
 	case 844:
-#line 1124 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2935; {p++; goto _out; } }}
 	break;
 	case 845:
-#line 1125 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22de; {p++; goto _out; } }}
 	break;
 	case 846:
-#line 1126 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22df; {p++; goto _out; } }}
 	break;
 	case 847:
-#line 1127 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21b6; {p++; goto _out; } }}
 	break;
 	case 848:
-#line 1128 "char_ref.rl"
 	{te = p+1;{ output->first = 0x293d; {p++; goto _out; } }}
 	break;
 	case 849:
-#line 1129 "char_ref.rl"
 	{te = p+1;{ output->first = 0x222a; {p++; goto _out; } }}
 	break;
 	case 850:
-#line 1130 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a48; {p++; goto _out; } }}
 	break;
 	case 851:
-#line 1131 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a46; {p++; goto _out; } }}
 	break;
 	case 852:
-#line 1132 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a4a; {p++; goto _out; } }}
 	break;
 	case 853:
-#line 1133 "char_ref.rl"
 	{te = p+1;{ output->first = 0x228d; {p++; goto _out; } }}
 	break;
 	case 854:
-#line 1134 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a45; {p++; goto _out; } }}
 	break;
 	case 855:
-#line 1135 "char_ref.rl"
 	{te = p+1;{ output->first = 0x222a; output->second = 0xfe00; {p++; goto _out; } }}
 	break;
 	case 856:
-#line 1136 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21b7; {p++; goto _out; } }}
 	break;
 	case 857:
-#line 1137 "char_ref.rl"
 	{te = p+1;{ output->first = 0x293c; {p++; goto _out; } }}
 	break;
 	case 858:
-#line 1138 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22de; {p++; goto _out; } }}
 	break;
 	case 859:
-#line 1139 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22df; {p++; goto _out; } }}
 	break;
 	case 860:
-#line 1140 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22ce; {p++; goto _out; } }}
 	break;
 	case 861:
-#line 1141 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22cf; {p++; goto _out; } }}
 	break;
 	case 862:
-#line 1142 "char_ref.rl"
 	{te = p+1;{ output->first = 0xa4; {p++; goto _out; } }}
 	break;
 	case 863:
-#line 1144 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21b6; {p++; goto _out; } }}
 	break;
 	case 864:
-#line 1145 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21b7; {p++; goto _out; } }}
 	break;
 	case 865:
-#line 1146 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22ce; {p++; goto _out; } }}
 	break;
 	case 866:
-#line 1147 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22cf; {p++; goto _out; } }}
 	break;
 	case 867:
-#line 1148 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2232; {p++; goto _out; } }}
 	break;
 	case 868:
-#line 1149 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2231; {p++; goto _out; } }}
 	break;
 	case 869:
-#line 1150 "char_ref.rl"
 	{te = p+1;{ output->first = 0x232d; {p++; goto _out; } }}
 	break;
 	case 870:
-#line 1151 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21d3; {p++; goto _out; } }}
 	break;
 	case 871:
-#line 1152 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2965; {p++; goto _out; } }}
 	break;
 	case 872:
-#line 1153 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2020; {p++; goto _out; } }}
 	break;
 	case 873:
-#line 1154 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2138; {p++; goto _out; } }}
 	break;
 	case 874:
-#line 1155 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2193; {p++; goto _out; } }}
 	break;
 	case 875:
-#line 1156 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2010; {p++; goto _out; } }}
 	break;
 	case 876:
-#line 1157 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22a3; {p++; goto _out; } }}
 	break;
 	case 877:
-#line 1158 "char_ref.rl"
 	{te = p+1;{ output->first = 0x290f; {p++; goto _out; } }}
 	break;
 	case 878:
-#line 1159 "char_ref.rl"
 	{te = p+1;{ output->first = 0x02dd; {p++; goto _out; } }}
 	break;
 	case 879:
-#line 1160 "char_ref.rl"
 	{te = p+1;{ output->first = 0x010f; {p++; goto _out; } }}
 	break;
 	case 880:
-#line 1161 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0434; {p++; goto _out; } }}
 	break;
 	case 881:
-#line 1162 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2146; {p++; goto _out; } }}
 	break;
 	case 882:
-#line 1163 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2021; {p++; goto _out; } }}
 	break;
 	case 883:
-#line 1164 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21ca; {p++; goto _out; } }}
 	break;
 	case 884:
-#line 1165 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a77; {p++; goto _out; } }}
 	break;
 	case 885:
-#line 1166 "char_ref.rl"
 	{te = p+1;{ output->first = 0xb0; {p++; goto _out; } }}
 	break;
 	case 886:
-#line 1168 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03b4; {p++; goto _out; } }}
 	break;
 	case 887:
-#line 1169 "char_ref.rl"
 	{te = p+1;{ output->first = 0x29b1; {p++; goto _out; } }}
 	break;
 	case 888:
-#line 1170 "char_ref.rl"
 	{te = p+1;{ output->first = 0x297f; {p++; goto _out; } }}
 	break;
 	case 889:
-#line 1171 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d521; {p++; goto _out; } }}
 	break;
 	case 890:
-#line 1172 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21c3; {p++; goto _out; } }}
 	break;
 	case 891:
-#line 1173 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21c2; {p++; goto _out; } }}
 	break;
 	case 892:
-#line 1174 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22c4; {p++; goto _out; } }}
 	break;
 	case 893:
-#line 1175 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22c4; {p++; goto _out; } }}
 	break;
 	case 894:
-#line 1176 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2666; {p++; goto _out; } }}
 	break;
 	case 895:
-#line 1177 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2666; {p++; goto _out; } }}
 	break;
 	case 896:
-#line 1178 "char_ref.rl"
 	{te = p+1;{ output->first = 0xa8; {p++; goto _out; } }}
 	break;
 	case 897:
-#line 1179 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03dd; {p++; goto _out; } }}
 	break;
 	case 898:
-#line 1180 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22f2; {p++; goto _out; } }}
 	break;
 	case 899:
-#line 1181 "char_ref.rl"
 	{te = p+1;{ output->first = 0xf7; {p++; goto _out; } }}
 	break;
 	case 900:
-#line 1182 "char_ref.rl"
 	{te = p+1;{ output->first = 0xf7; {p++; goto _out; } }}
 	break;
 	case 901:
-#line 1184 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22c7; {p++; goto _out; } }}
 	break;
 	case 902:
-#line 1185 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22c7; {p++; goto _out; } }}
 	break;
 	case 903:
-#line 1186 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0452; {p++; goto _out; } }}
 	break;
 	case 904:
-#line 1187 "char_ref.rl"
 	{te = p+1;{ output->first = 0x231e; {p++; goto _out; } }}
 	break;
 	case 905:
-#line 1188 "char_ref.rl"
 	{te = p+1;{ output->first = 0x230d; {p++; goto _out; } }}
 	break;
 	case 906:
-#line 1189 "char_ref.rl"
 	{te = p+1;{ output->first = 0x24; {p++; goto _out; } }}
 	break;
 	case 907:
-#line 1190 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d555; {p++; goto _out; } }}
 	break;
 	case 908:
-#line 1191 "char_ref.rl"
 	{te = p+1;{ output->first = 0x02d9; {p++; goto _out; } }}
 	break;
 	case 909:
-#line 1192 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2250; {p++; goto _out; } }}
 	break;
 	case 910:
-#line 1193 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2251; {p++; goto _out; } }}
 	break;
 	case 911:
-#line 1194 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2238; {p++; goto _out; } }}
 	break;
 	case 912:
-#line 1195 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2214; {p++; goto _out; } }}
 	break;
 	case 913:
-#line 1196 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22a1; {p++; goto _out; } }}
 	break;
 	case 914:
-#line 1197 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2306; {p++; goto _out; } }}
 	break;
 	case 915:
-#line 1198 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2193; {p++; goto _out; } }}
 	break;
 	case 916:
-#line 1199 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21ca; {p++; goto _out; } }}
 	break;
 	case 917:
-#line 1200 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21c3; {p++; goto _out; } }}
 	break;
 	case 918:
-#line 1201 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21c2; {p++; goto _out; } }}
 	break;
 	case 919:
-#line 1202 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2910; {p++; goto _out; } }}
 	break;
 	case 920:
-#line 1203 "char_ref.rl"
 	{te = p+1;{ output->first = 0x231f; {p++; goto _out; } }}
 	break;
 	case 921:
-#line 1204 "char_ref.rl"
 	{te = p+1;{ output->first = 0x230c; {p++; goto _out; } }}
 	break;
 	case 922:
-#line 1205 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d4b9; {p++; goto _out; } }}
 	break;
 	case 923:
-#line 1206 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0455; {p++; goto _out; } }}
 	break;
 	case 924:
-#line 1207 "char_ref.rl"
 	{te = p+1;{ output->first = 0x29f6; {p++; goto _out; } }}
 	break;
 	case 925:
-#line 1208 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0111; {p++; goto _out; } }}
 	break;
 	case 926:
-#line 1209 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22f1; {p++; goto _out; } }}
 	break;
 	case 927:
-#line 1210 "char_ref.rl"
 	{te = p+1;{ output->first = 0x25bf; {p++; goto _out; } }}
 	break;
 	case 928:
-#line 1211 "char_ref.rl"
 	{te = p+1;{ output->first = 0x25be; {p++; goto _out; } }}
 	break;
 	case 929:
-#line 1212 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21f5; {p++; goto _out; } }}
 	break;
 	case 930:
-#line 1213 "char_ref.rl"
 	{te = p+1;{ output->first = 0x296f; {p++; goto _out; } }}
 	break;
 	case 931:
-#line 1214 "char_ref.rl"
 	{te = p+1;{ output->first = 0x29a6; {p++; goto _out; } }}
 	break;
 	case 932:
-#line 1215 "char_ref.rl"
 	{te = p+1;{ output->first = 0x045f; {p++; goto _out; } }}
 	break;
 	case 933:
-#line 1216 "char_ref.rl"
 	{te = p+1;{ output->first = 0x27ff; {p++; goto _out; } }}
 	break;
 	case 934:
-#line 1217 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a77; {p++; goto _out; } }}
 	break;
 	case 935:
-#line 1218 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2251; {p++; goto _out; } }}
 	break;
 	case 936:
-#line 1219 "char_ref.rl"
 	{te = p+1;{ output->first = 0xe9; {p++; goto _out; } }}
 	break;
 	case 937:
-#line 1221 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a6e; {p++; goto _out; } }}
 	break;
 	case 938:
-#line 1222 "char_ref.rl"
 	{te = p+1;{ output->first = 0x011b; {p++; goto _out; } }}
 	break;
 	case 939:
-#line 1223 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2256; {p++; goto _out; } }}
 	break;
 	case 940:
-#line 1224 "char_ref.rl"
 	{te = p+1;{ output->first = 0xea; {p++; goto _out; } }}
 	break;
 	case 941:
-#line 1226 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2255; {p++; goto _out; } }}
 	break;
 	case 942:
-#line 1227 "char_ref.rl"
 	{te = p+1;{ output->first = 0x044d; {p++; goto _out; } }}
 	break;
 	case 943:
-#line 1228 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0117; {p++; goto _out; } }}
 	break;
 	case 944:
-#line 1229 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2147; {p++; goto _out; } }}
 	break;
 	case 945:
-#line 1230 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2252; {p++; goto _out; } }}
 	break;
 	case 946:
-#line 1231 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d522; {p++; goto _out; } }}
 	break;
 	case 947:
-#line 1232 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a9a; {p++; goto _out; } }}
 	break;
 	case 948:
-#line 1233 "char_ref.rl"
 	{te = p+1;{ output->first = 0xe8; {p++; goto _out; } }}
 	break;
 	case 949:
-#line 1235 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a96; {p++; goto _out; } }}
 	break;
 	case 950:
-#line 1236 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a98; {p++; goto _out; } }}
 	break;
 	case 951:
-#line 1237 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a99; {p++; goto _out; } }}
 	break;
 	case 952:
-#line 1238 "char_ref.rl"
 	{te = p+1;{ output->first = 0x23e7; {p++; goto _out; } }}
 	break;
 	case 953:
-#line 1239 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2113; {p++; goto _out; } }}
 	break;
 	case 954:
-#line 1240 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a95; {p++; goto _out; } }}
 	break;
 	case 955:
-#line 1241 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a97; {p++; goto _out; } }}
 	break;
 	case 956:
-#line 1242 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0113; {p++; goto _out; } }}
 	break;
 	case 957:
-#line 1243 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2205; {p++; goto _out; } }}
 	break;
 	case 958:
-#line 1244 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2205; {p++; goto _out; } }}
 	break;
 	case 959:
-#line 1245 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2205; {p++; goto _out; } }}
 	break;
 	case 960:
-#line 1246 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2004; {p++; goto _out; } }}
 	break;
 	case 961:
-#line 1247 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2005; {p++; goto _out; } }}
 	break;
 	case 962:
-#line 1248 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2003; {p++; goto _out; } }}
 	break;
 	case 963:
-#line 1249 "char_ref.rl"
 	{te = p+1;{ output->first = 0x014b; {p++; goto _out; } }}
 	break;
 	case 964:
-#line 1250 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2002; {p++; goto _out; } }}
 	break;
 	case 965:
-#line 1251 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0119; {p++; goto _out; } }}
 	break;
 	case 966:
-#line 1252 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d556; {p++; goto _out; } }}
 	break;
 	case 967:
-#line 1253 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22d5; {p++; goto _out; } }}
 	break;
 	case 968:
-#line 1254 "char_ref.rl"
 	{te = p+1;{ output->first = 0x29e3; {p++; goto _out; } }}
 	break;
 	case 969:
-#line 1255 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a71; {p++; goto _out; } }}
 	break;
 	case 970:
-#line 1256 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03b5; {p++; goto _out; } }}
 	break;
 	case 971:
-#line 1257 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03b5; {p++; goto _out; } }}
 	break;
 	case 972:
-#line 1258 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03f5; {p++; goto _out; } }}
 	break;
 	case 973:
-#line 1259 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2256; {p++; goto _out; } }}
 	break;
 	case 974:
-#line 1260 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2255; {p++; goto _out; } }}
 	break;
 	case 975:
-#line 1261 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2242; {p++; goto _out; } }}
 	break;
 	case 976:
-#line 1262 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a96; {p++; goto _out; } }}
 	break;
 	case 977:
-#line 1263 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a95; {p++; goto _out; } }}
 	break;
 	case 978:
-#line 1264 "char_ref.rl"
 	{te = p+1;{ output->first = 0x3d; {p++; goto _out; } }}
 	break;
 	case 979:
-#line 1265 "char_ref.rl"
 	{te = p+1;{ output->first = 0x225f; {p++; goto _out; } }}
 	break;
 	case 980:
-#line 1266 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2261; {p++; goto _out; } }}
 	break;
 	case 981:
-#line 1267 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a78; {p++; goto _out; } }}
 	break;
 	case 982:
-#line 1268 "char_ref.rl"
 	{te = p+1;{ output->first = 0x29e5; {p++; goto _out; } }}
 	break;
 	case 983:
-#line 1269 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2253; {p++; goto _out; } }}
 	break;
 	case 984:
-#line 1270 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2971; {p++; goto _out; } }}
 	break;
 	case 985:
-#line 1271 "char_ref.rl"
 	{te = p+1;{ output->first = 0x212f; {p++; goto _out; } }}
 	break;
 	case 986:
-#line 1272 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2250; {p++; goto _out; } }}
 	break;
 	case 987:
-#line 1273 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2242; {p++; goto _out; } }}
 	break;
 	case 988:
-#line 1274 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03b7; {p++; goto _out; } }}
 	break;
 	case 989:
-#line 1275 "char_ref.rl"
 	{te = p+1;{ output->first = 0xf0; {p++; goto _out; } }}
 	break;
 	case 990:
-#line 1277 "char_ref.rl"
 	{te = p+1;{ output->first = 0xeb; {p++; goto _out; } }}
 	break;
 	case 991:
-#line 1279 "char_ref.rl"
 	{te = p+1;{ output->first = 0x20ac; {p++; goto _out; } }}
 	break;
 	case 992:
-#line 1280 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21; {p++; goto _out; } }}
 	break;
 	case 993:
-#line 1281 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2203; {p++; goto _out; } }}
 	break;
 	case 994:
-#line 1282 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2130; {p++; goto _out; } }}
 	break;
 	case 995:
-#line 1283 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2147; {p++; goto _out; } }}
 	break;
 	case 996:
-#line 1284 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2252; {p++; goto _out; } }}
 	break;
 	case 997:
-#line 1285 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0444; {p++; goto _out; } }}
 	break;
 	case 998:
-#line 1286 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2640; {p++; goto _out; } }}
 	break;
 	case 999:
-#line 1287 "char_ref.rl"
 	{te = p+1;{ output->first = 0xfb03; {p++; goto _out; } }}
 	break;
 	case 1000:
-#line 1288 "char_ref.rl"
 	{te = p+1;{ output->first = 0xfb00; {p++; goto _out; } }}
 	break;
 	case 1001:
-#line 1289 "char_ref.rl"
 	{te = p+1;{ output->first = 0xfb04; {p++; goto _out; } }}
 	break;
 	case 1002:
-#line 1290 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d523; {p++; goto _out; } }}
 	break;
 	case 1003:
-#line 1291 "char_ref.rl"
 	{te = p+1;{ output->first = 0xfb01; {p++; goto _out; } }}
 	break;
 	case 1004:
-#line 1292 "char_ref.rl"
 	{te = p+1;{ output->first = 0x66; output->second = 0x6a; {p++; goto _out; } }}
 	break;
 	case 1005:
-#line 1293 "char_ref.rl"
 	{te = p+1;{ output->first = 0x266d; {p++; goto _out; } }}
 	break;
 	case 1006:
-#line 1294 "char_ref.rl"
 	{te = p+1;{ output->first = 0xfb02; {p++; goto _out; } }}
 	break;
 	case 1007:
-#line 1295 "char_ref.rl"
 	{te = p+1;{ output->first = 0x25b1; {p++; goto _out; } }}
 	break;
 	case 1008:
-#line 1296 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0192; {p++; goto _out; } }}
 	break;
 	case 1009:
-#line 1297 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d557; {p++; goto _out; } }}
 	break;
 	case 1010:
-#line 1298 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2200; {p++; goto _out; } }}
 	break;
 	case 1011:
-#line 1299 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22d4; {p++; goto _out; } }}
 	break;
 	case 1012:
-#line 1300 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ad9; {p++; goto _out; } }}
 	break;
 	case 1013:
-#line 1301 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a0d; {p++; goto _out; } }}
 	break;
 	case 1014:
-#line 1302 "char_ref.rl"
 	{te = p+1;{ output->first = 0xbd; {p++; goto _out; } }}
 	break;
 	case 1015:
-#line 1304 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2153; {p++; goto _out; } }}
 	break;
 	case 1016:
-#line 1305 "char_ref.rl"
 	{te = p+1;{ output->first = 0xbc; {p++; goto _out; } }}
 	break;
 	case 1017:
-#line 1307 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2155; {p++; goto _out; } }}
 	break;
 	case 1018:
-#line 1308 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2159; {p++; goto _out; } }}
 	break;
 	case 1019:
-#line 1309 "char_ref.rl"
 	{te = p+1;{ output->first = 0x215b; {p++; goto _out; } }}
 	break;
 	case 1020:
-#line 1310 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2154; {p++; goto _out; } }}
 	break;
 	case 1021:
-#line 1311 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2156; {p++; goto _out; } }}
 	break;
 	case 1022:
-#line 1312 "char_ref.rl"
 	{te = p+1;{ output->first = 0xbe; {p++; goto _out; } }}
 	break;
 	case 1023:
-#line 1314 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2157; {p++; goto _out; } }}
 	break;
 	case 1024:
-#line 1315 "char_ref.rl"
 	{te = p+1;{ output->first = 0x215c; {p++; goto _out; } }}
 	break;
 	case 1025:
-#line 1316 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2158; {p++; goto _out; } }}
 	break;
 	case 1026:
-#line 1317 "char_ref.rl"
 	{te = p+1;{ output->first = 0x215a; {p++; goto _out; } }}
 	break;
 	case 1027:
-#line 1318 "char_ref.rl"
 	{te = p+1;{ output->first = 0x215d; {p++; goto _out; } }}
 	break;
 	case 1028:
-#line 1319 "char_ref.rl"
 	{te = p+1;{ output->first = 0x215e; {p++; goto _out; } }}
 	break;
 	case 1029:
-#line 1320 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2044; {p++; goto _out; } }}
 	break;
 	case 1030:
-#line 1321 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2322; {p++; goto _out; } }}
 	break;
 	case 1031:
-#line 1322 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d4bb; {p++; goto _out; } }}
 	break;
 	case 1032:
-#line 1323 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2267; {p++; goto _out; } }}
 	break;
 	case 1033:
-#line 1324 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a8c; {p++; goto _out; } }}
 	break;
 	case 1034:
-#line 1325 "char_ref.rl"
 	{te = p+1;{ output->first = 0x01f5; {p++; goto _out; } }}
 	break;
 	case 1035:
-#line 1326 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03b3; {p++; goto _out; } }}
 	break;
 	case 1036:
-#line 1327 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03dd; {p++; goto _out; } }}
 	break;
 	case 1037:
-#line 1328 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a86; {p++; goto _out; } }}
 	break;
 	case 1038:
-#line 1329 "char_ref.rl"
 	{te = p+1;{ output->first = 0x011f; {p++; goto _out; } }}
 	break;
 	case 1039:
-#line 1330 "char_ref.rl"
 	{te = p+1;{ output->first = 0x011d; {p++; goto _out; } }}
 	break;
 	case 1040:
-#line 1331 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0433; {p++; goto _out; } }}
 	break;
 	case 1041:
-#line 1332 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0121; {p++; goto _out; } }}
 	break;
 	case 1042:
-#line 1333 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2265; {p++; goto _out; } }}
 	break;
 	case 1043:
-#line 1334 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22db; {p++; goto _out; } }}
 	break;
 	case 1044:
-#line 1335 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2265; {p++; goto _out; } }}
 	break;
 	case 1045:
-#line 1336 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2267; {p++; goto _out; } }}
 	break;
 	case 1046:
-#line 1337 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a7e; {p++; goto _out; } }}
 	break;
 	case 1047:
-#line 1338 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a7e; {p++; goto _out; } }}
 	break;
 	case 1048:
-#line 1339 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2aa9; {p++; goto _out; } }}
 	break;
 	case 1049:
-#line 1340 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a80; {p++; goto _out; } }}
 	break;
 	case 1050:
-#line 1341 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a82; {p++; goto _out; } }}
 	break;
 	case 1051:
-#line 1342 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a84; {p++; goto _out; } }}
 	break;
 	case 1052:
-#line 1343 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22db; output->second = 0xfe00; {p++; goto _out; } }}
 	break;
 	case 1053:
-#line 1344 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a94; {p++; goto _out; } }}
 	break;
 	case 1054:
-#line 1345 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d524; {p++; goto _out; } }}
 	break;
 	case 1055:
-#line 1346 "char_ref.rl"
 	{te = p+1;{ output->first = 0x226b; {p++; goto _out; } }}
 	break;
 	case 1056:
-#line 1347 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22d9; {p++; goto _out; } }}
 	break;
 	case 1057:
-#line 1348 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2137; {p++; goto _out; } }}
 	break;
 	case 1058:
-#line 1349 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0453; {p++; goto _out; } }}
 	break;
 	case 1059:
-#line 1350 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2277; {p++; goto _out; } }}
 	break;
 	case 1060:
-#line 1351 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a92; {p++; goto _out; } }}
 	break;
 	case 1061:
-#line 1352 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2aa5; {p++; goto _out; } }}
 	break;
 	case 1062:
-#line 1353 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2aa4; {p++; goto _out; } }}
 	break;
 	case 1063:
-#line 1354 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2269; {p++; goto _out; } }}
 	break;
 	case 1064:
-#line 1355 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a8a; {p++; goto _out; } }}
 	break;
 	case 1065:
-#line 1356 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a8a; {p++; goto _out; } }}
 	break;
 	case 1066:
-#line 1357 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a88; {p++; goto _out; } }}
 	break;
 	case 1067:
-#line 1358 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a88; {p++; goto _out; } }}
 	break;
 	case 1068:
-#line 1359 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2269; {p++; goto _out; } }}
 	break;
 	case 1069:
-#line 1360 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22e7; {p++; goto _out; } }}
 	break;
 	case 1070:
-#line 1361 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d558; {p++; goto _out; } }}
 	break;
 	case 1071:
-#line 1362 "char_ref.rl"
 	{te = p+1;{ output->first = 0x60; {p++; goto _out; } }}
 	break;
 	case 1072:
-#line 1363 "char_ref.rl"
 	{te = p+1;{ output->first = 0x210a; {p++; goto _out; } }}
 	break;
 	case 1073:
-#line 1364 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2273; {p++; goto _out; } }}
 	break;
 	case 1074:
-#line 1365 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a8e; {p++; goto _out; } }}
 	break;
 	case 1075:
-#line 1366 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a90; {p++; goto _out; } }}
 	break;
 	case 1076:
-#line 1367 "char_ref.rl"
 	{te = p+1;{ output->first = 0x3e; {p++; goto _out; } }}
 	break;
 	case 1077:
-#line 1369 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2aa7; {p++; goto _out; } }}
 	break;
 	case 1078:
-#line 1370 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a7a; {p++; goto _out; } }}
 	break;
 	case 1079:
-#line 1371 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22d7; {p++; goto _out; } }}
 	break;
 	case 1080:
-#line 1372 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2995; {p++; goto _out; } }}
 	break;
 	case 1081:
-#line 1373 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a7c; {p++; goto _out; } }}
 	break;
 	case 1082:
-#line 1374 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a86; {p++; goto _out; } }}
 	break;
 	case 1083:
-#line 1375 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2978; {p++; goto _out; } }}
 	break;
 	case 1084:
-#line 1376 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22d7; {p++; goto _out; } }}
 	break;
 	case 1085:
-#line 1377 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22db; {p++; goto _out; } }}
 	break;
 	case 1086:
-#line 1378 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a8c; {p++; goto _out; } }}
 	break;
 	case 1087:
-#line 1379 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2277; {p++; goto _out; } }}
 	break;
 	case 1088:
-#line 1380 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2273; {p++; goto _out; } }}
 	break;
 	case 1089:
-#line 1381 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2269; output->second = 0xfe00; {p++; goto _out; } }}
 	break;
 	case 1090:
-#line 1382 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2269; output->second = 0xfe00; {p++; goto _out; } }}
 	break;
 	case 1091:
-#line 1383 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21d4; {p++; goto _out; } }}
 	break;
 	case 1092:
-#line 1384 "char_ref.rl"
 	{te = p+1;{ output->first = 0x200a; {p++; goto _out; } }}
 	break;
 	case 1093:
-#line 1385 "char_ref.rl"
 	{te = p+1;{ output->first = 0xbd; {p++; goto _out; } }}
 	break;
 	case 1094:
-#line 1386 "char_ref.rl"
 	{te = p+1;{ output->first = 0x210b; {p++; goto _out; } }}
 	break;
 	case 1095:
-#line 1387 "char_ref.rl"
 	{te = p+1;{ output->first = 0x044a; {p++; goto _out; } }}
 	break;
 	case 1096:
-#line 1388 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2194; {p++; goto _out; } }}
 	break;
 	case 1097:
-#line 1389 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2948; {p++; goto _out; } }}
 	break;
 	case 1098:
-#line 1390 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21ad; {p++; goto _out; } }}
 	break;
 	case 1099:
-#line 1391 "char_ref.rl"
 	{te = p+1;{ output->first = 0x210f; {p++; goto _out; } }}
 	break;
 	case 1100:
-#line 1392 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0125; {p++; goto _out; } }}
 	break;
 	case 1101:
-#line 1393 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2665; {p++; goto _out; } }}
 	break;
 	case 1102:
-#line 1394 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2665; {p++; goto _out; } }}
 	break;
 	case 1103:
-#line 1395 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2026; {p++; goto _out; } }}
 	break;
 	case 1104:
-#line 1396 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22b9; {p++; goto _out; } }}
 	break;
 	case 1105:
-#line 1397 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d525; {p++; goto _out; } }}
 	break;
 	case 1106:
-#line 1398 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2925; {p++; goto _out; } }}
 	break;
 	case 1107:
-#line 1399 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2926; {p++; goto _out; } }}
 	break;
 	case 1108:
-#line 1400 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21ff; {p++; goto _out; } }}
 	break;
 	case 1109:
-#line 1401 "char_ref.rl"
 	{te = p+1;{ output->first = 0x223b; {p++; goto _out; } }}
 	break;
 	case 1110:
-#line 1402 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21a9; {p++; goto _out; } }}
 	break;
 	case 1111:
-#line 1403 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21aa; {p++; goto _out; } }}
 	break;
 	case 1112:
-#line 1404 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d559; {p++; goto _out; } }}
 	break;
 	case 1113:
-#line 1405 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2015; {p++; goto _out; } }}
 	break;
 	case 1114:
-#line 1406 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d4bd; {p++; goto _out; } }}
 	break;
 	case 1115:
-#line 1407 "char_ref.rl"
 	{te = p+1;{ output->first = 0x210f; {p++; goto _out; } }}
 	break;
 	case 1116:
-#line 1408 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0127; {p++; goto _out; } }}
 	break;
 	case 1117:
-#line 1409 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2043; {p++; goto _out; } }}
 	break;
 	case 1118:
-#line 1410 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2010; {p++; goto _out; } }}
 	break;
 	case 1119:
-#line 1411 "char_ref.rl"
 	{te = p+1;{ output->first = 0xed; {p++; goto _out; } }}
 	break;
 	case 1120:
-#line 1413 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2063; {p++; goto _out; } }}
 	break;
 	case 1121:
-#line 1414 "char_ref.rl"
 	{te = p+1;{ output->first = 0xee; {p++; goto _out; } }}
 	break;
 	case 1122:
-#line 1416 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0438; {p++; goto _out; } }}
 	break;
 	case 1123:
-#line 1417 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0435; {p++; goto _out; } }}
 	break;
 	case 1124:
-#line 1418 "char_ref.rl"
 	{te = p+1;{ output->first = 0xa1; {p++; goto _out; } }}
 	break;
 	case 1125:
-#line 1420 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21d4; {p++; goto _out; } }}
 	break;
 	case 1126:
-#line 1421 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d526; {p++; goto _out; } }}
 	break;
 	case 1127:
-#line 1422 "char_ref.rl"
 	{te = p+1;{ output->first = 0xec; {p++; goto _out; } }}
 	break;
 	case 1128:
-#line 1424 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2148; {p++; goto _out; } }}
 	break;
 	case 1129:
-#line 1425 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a0c; {p++; goto _out; } }}
 	break;
 	case 1130:
-#line 1426 "char_ref.rl"
 	{te = p+1;{ output->first = 0x222d; {p++; goto _out; } }}
 	break;
 	case 1131:
-#line 1427 "char_ref.rl"
 	{te = p+1;{ output->first = 0x29dc; {p++; goto _out; } }}
 	break;
 	case 1132:
-#line 1428 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2129; {p++; goto _out; } }}
 	break;
 	case 1133:
-#line 1429 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0133; {p++; goto _out; } }}
 	break;
 	case 1134:
-#line 1430 "char_ref.rl"
 	{te = p+1;{ output->first = 0x012b; {p++; goto _out; } }}
 	break;
 	case 1135:
-#line 1431 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2111; {p++; goto _out; } }}
 	break;
 	case 1136:
-#line 1432 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2110; {p++; goto _out; } }}
 	break;
 	case 1137:
-#line 1433 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2111; {p++; goto _out; } }}
 	break;
 	case 1138:
-#line 1434 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0131; {p++; goto _out; } }}
 	break;
 	case 1139:
-#line 1435 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22b7; {p++; goto _out; } }}
 	break;
 	case 1140:
-#line 1436 "char_ref.rl"
 	{te = p+1;{ output->first = 0x01b5; {p++; goto _out; } }}
 	break;
 	case 1141:
-#line 1437 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2208; {p++; goto _out; } }}
 	break;
 	case 1142:
-#line 1438 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2105; {p++; goto _out; } }}
 	break;
 	case 1143:
-#line 1439 "char_ref.rl"
 	{te = p+1;{ output->first = 0x221e; {p++; goto _out; } }}
 	break;
 	case 1144:
-#line 1440 "char_ref.rl"
 	{te = p+1;{ output->first = 0x29dd; {p++; goto _out; } }}
 	break;
 	case 1145:
-#line 1441 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0131; {p++; goto _out; } }}
 	break;
 	case 1146:
-#line 1442 "char_ref.rl"
 	{te = p+1;{ output->first = 0x222b; {p++; goto _out; } }}
 	break;
 	case 1147:
-#line 1443 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22ba; {p++; goto _out; } }}
 	break;
 	case 1148:
-#line 1444 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2124; {p++; goto _out; } }}
 	break;
 	case 1149:
-#line 1445 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22ba; {p++; goto _out; } }}
 	break;
 	case 1150:
-#line 1446 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a17; {p++; goto _out; } }}
 	break;
 	case 1151:
-#line 1447 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a3c; {p++; goto _out; } }}
 	break;
 	case 1152:
-#line 1448 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0451; {p++; goto _out; } }}
 	break;
 	case 1153:
-#line 1449 "char_ref.rl"
 	{te = p+1;{ output->first = 0x012f; {p++; goto _out; } }}
 	break;
 	case 1154:
-#line 1450 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d55a; {p++; goto _out; } }}
 	break;
 	case 1155:
-#line 1451 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03b9; {p++; goto _out; } }}
 	break;
 	case 1156:
-#line 1452 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a3c; {p++; goto _out; } }}
 	break;
 	case 1157:
-#line 1453 "char_ref.rl"
 	{te = p+1;{ output->first = 0xbf; {p++; goto _out; } }}
 	break;
 	case 1158:
-#line 1455 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d4be; {p++; goto _out; } }}
 	break;
 	case 1159:
-#line 1456 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2208; {p++; goto _out; } }}
 	break;
 	case 1160:
-#line 1457 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22f9; {p++; goto _out; } }}
 	break;
 	case 1161:
-#line 1458 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22f5; {p++; goto _out; } }}
 	break;
 	case 1162:
-#line 1459 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22f4; {p++; goto _out; } }}
 	break;
 	case 1163:
-#line 1460 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22f3; {p++; goto _out; } }}
 	break;
 	case 1164:
-#line 1461 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2208; {p++; goto _out; } }}
 	break;
 	case 1165:
-#line 1462 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2062; {p++; goto _out; } }}
 	break;
 	case 1166:
-#line 1463 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0129; {p++; goto _out; } }}
 	break;
 	case 1167:
-#line 1464 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0456; {p++; goto _out; } }}
 	break;
 	case 1168:
-#line 1465 "char_ref.rl"
 	{te = p+1;{ output->first = 0xef; {p++; goto _out; } }}
 	break;
 	case 1169:
-#line 1467 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0135; {p++; goto _out; } }}
 	break;
 	case 1170:
-#line 1468 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0439; {p++; goto _out; } }}
 	break;
 	case 1171:
-#line 1469 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d527; {p++; goto _out; } }}
 	break;
 	case 1172:
-#line 1470 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0237; {p++; goto _out; } }}
 	break;
 	case 1173:
-#line 1471 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d55b; {p++; goto _out; } }}
 	break;
 	case 1174:
-#line 1472 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d4bf; {p++; goto _out; } }}
 	break;
 	case 1175:
-#line 1473 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0458; {p++; goto _out; } }}
 	break;
 	case 1176:
-#line 1474 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0454; {p++; goto _out; } }}
 	break;
 	case 1177:
-#line 1475 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03ba; {p++; goto _out; } }}
 	break;
 	case 1178:
-#line 1476 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03f0; {p++; goto _out; } }}
 	break;
 	case 1179:
-#line 1477 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0137; {p++; goto _out; } }}
 	break;
 	case 1180:
-#line 1478 "char_ref.rl"
 	{te = p+1;{ output->first = 0x043a; {p++; goto _out; } }}
 	break;
 	case 1181:
-#line 1479 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d528; {p++; goto _out; } }}
 	break;
 	case 1182:
-#line 1480 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0138; {p++; goto _out; } }}
 	break;
 	case 1183:
-#line 1481 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0445; {p++; goto _out; } }}
 	break;
 	case 1184:
-#line 1482 "char_ref.rl"
 	{te = p+1;{ output->first = 0x045c; {p++; goto _out; } }}
 	break;
 	case 1185:
-#line 1483 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d55c; {p++; goto _out; } }}
 	break;
 	case 1186:
-#line 1484 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d4c0; {p++; goto _out; } }}
 	break;
 	case 1187:
-#line 1485 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21da; {p++; goto _out; } }}
 	break;
 	case 1188:
-#line 1486 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21d0; {p++; goto _out; } }}
 	break;
 	case 1189:
-#line 1487 "char_ref.rl"
 	{te = p+1;{ output->first = 0x291b; {p++; goto _out; } }}
 	break;
 	case 1190:
-#line 1488 "char_ref.rl"
 	{te = p+1;{ output->first = 0x290e; {p++; goto _out; } }}
 	break;
 	case 1191:
-#line 1489 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2266; {p++; goto _out; } }}
 	break;
 	case 1192:
-#line 1490 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a8b; {p++; goto _out; } }}
 	break;
 	case 1193:
-#line 1491 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2962; {p++; goto _out; } }}
 	break;
 	case 1194:
-#line 1492 "char_ref.rl"
 	{te = p+1;{ output->first = 0x013a; {p++; goto _out; } }}
 	break;
 	case 1195:
-#line 1493 "char_ref.rl"
 	{te = p+1;{ output->first = 0x29b4; {p++; goto _out; } }}
 	break;
 	case 1196:
-#line 1494 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2112; {p++; goto _out; } }}
 	break;
 	case 1197:
-#line 1495 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03bb; {p++; goto _out; } }}
 	break;
 	case 1198:
-#line 1496 "char_ref.rl"
 	{te = p+1;{ output->first = 0x27e8; {p++; goto _out; } }}
 	break;
 	case 1199:
-#line 1497 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2991; {p++; goto _out; } }}
 	break;
 	case 1200:
-#line 1498 "char_ref.rl"
 	{te = p+1;{ output->first = 0x27e8; {p++; goto _out; } }}
 	break;
 	case 1201:
-#line 1499 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a85; {p++; goto _out; } }}
 	break;
 	case 1202:
-#line 1500 "char_ref.rl"
 	{te = p+1;{ output->first = 0xab; {p++; goto _out; } }}
 	break;
 	case 1203:
-#line 1502 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2190; {p++; goto _out; } }}
 	break;
 	case 1204:
-#line 1503 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21e4; {p++; goto _out; } }}
 	break;
 	case 1205:
-#line 1504 "char_ref.rl"
 	{te = p+1;{ output->first = 0x291f; {p++; goto _out; } }}
 	break;
 	case 1206:
-#line 1505 "char_ref.rl"
 	{te = p+1;{ output->first = 0x291d; {p++; goto _out; } }}
 	break;
 	case 1207:
-#line 1506 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21a9; {p++; goto _out; } }}
 	break;
 	case 1208:
-#line 1507 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21ab; {p++; goto _out; } }}
 	break;
 	case 1209:
-#line 1508 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2939; {p++; goto _out; } }}
 	break;
 	case 1210:
-#line 1509 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2973; {p++; goto _out; } }}
 	break;
 	case 1211:
-#line 1510 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21a2; {p++; goto _out; } }}
 	break;
 	case 1212:
-#line 1511 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2aab; {p++; goto _out; } }}
 	break;
 	case 1213:
-#line 1512 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2919; {p++; goto _out; } }}
 	break;
 	case 1214:
-#line 1513 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2aad; {p++; goto _out; } }}
 	break;
 	case 1215:
-#line 1514 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2aad; output->second = 0xfe00; {p++; goto _out; } }}
 	break;
 	case 1216:
-#line 1515 "char_ref.rl"
 	{te = p+1;{ output->first = 0x290c; {p++; goto _out; } }}
 	break;
 	case 1217:
-#line 1516 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2772; {p++; goto _out; } }}
 	break;
 	case 1218:
-#line 1517 "char_ref.rl"
 	{te = p+1;{ output->first = 0x7b; {p++; goto _out; } }}
 	break;
 	case 1219:
-#line 1518 "char_ref.rl"
 	{te = p+1;{ output->first = 0x5b; {p++; goto _out; } }}
 	break;
 	case 1220:
-#line 1519 "char_ref.rl"
 	{te = p+1;{ output->first = 0x298b; {p++; goto _out; } }}
 	break;
 	case 1221:
-#line 1520 "char_ref.rl"
 	{te = p+1;{ output->first = 0x298f; {p++; goto _out; } }}
 	break;
 	case 1222:
-#line 1521 "char_ref.rl"
 	{te = p+1;{ output->first = 0x298d; {p++; goto _out; } }}
 	break;
 	case 1223:
-#line 1522 "char_ref.rl"
 	{te = p+1;{ output->first = 0x013e; {p++; goto _out; } }}
 	break;
 	case 1224:
-#line 1523 "char_ref.rl"
 	{te = p+1;{ output->first = 0x013c; {p++; goto _out; } }}
 	break;
 	case 1225:
-#line 1524 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2308; {p++; goto _out; } }}
 	break;
 	case 1226:
-#line 1525 "char_ref.rl"
 	{te = p+1;{ output->first = 0x7b; {p++; goto _out; } }}
 	break;
 	case 1227:
-#line 1526 "char_ref.rl"
 	{te = p+1;{ output->first = 0x043b; {p++; goto _out; } }}
 	break;
 	case 1228:
-#line 1527 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2936; {p++; goto _out; } }}
 	break;
 	case 1229:
-#line 1528 "char_ref.rl"
 	{te = p+1;{ output->first = 0x201c; {p++; goto _out; } }}
 	break;
 	case 1230:
-#line 1529 "char_ref.rl"
 	{te = p+1;{ output->first = 0x201e; {p++; goto _out; } }}
 	break;
 	case 1231:
-#line 1530 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2967; {p++; goto _out; } }}
 	break;
 	case 1232:
-#line 1531 "char_ref.rl"
 	{te = p+1;{ output->first = 0x294b; {p++; goto _out; } }}
 	break;
 	case 1233:
-#line 1532 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21b2; {p++; goto _out; } }}
 	break;
 	case 1234:
-#line 1533 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2264; {p++; goto _out; } }}
 	break;
 	case 1235:
-#line 1534 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2190; {p++; goto _out; } }}
 	break;
 	case 1236:
-#line 1535 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21a2; {p++; goto _out; } }}
 	break;
 	case 1237:
-#line 1536 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21bd; {p++; goto _out; } }}
 	break;
 	case 1238:
-#line 1537 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21bc; {p++; goto _out; } }}
 	break;
 	case 1239:
-#line 1538 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21c7; {p++; goto _out; } }}
 	break;
 	case 1240:
-#line 1539 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2194; {p++; goto _out; } }}
 	break;
 	case 1241:
-#line 1540 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21c6; {p++; goto _out; } }}
 	break;
 	case 1242:
-#line 1541 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21cb; {p++; goto _out; } }}
 	break;
 	case 1243:
-#line 1542 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21ad; {p++; goto _out; } }}
 	break;
 	case 1244:
-#line 1543 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22cb; {p++; goto _out; } }}
 	break;
 	case 1245:
-#line 1544 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22da; {p++; goto _out; } }}
 	break;
 	case 1246:
-#line 1545 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2264; {p++; goto _out; } }}
 	break;
 	case 1247:
-#line 1546 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2266; {p++; goto _out; } }}
 	break;
 	case 1248:
-#line 1547 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a7d; {p++; goto _out; } }}
 	break;
 	case 1249:
-#line 1548 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a7d; {p++; goto _out; } }}
 	break;
 	case 1250:
-#line 1549 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2aa8; {p++; goto _out; } }}
 	break;
 	case 1251:
-#line 1550 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a7f; {p++; goto _out; } }}
 	break;
 	case 1252:
-#line 1551 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a81; {p++; goto _out; } }}
 	break;
 	case 1253:
-#line 1552 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a83; {p++; goto _out; } }}
 	break;
 	case 1254:
-#line 1553 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22da; output->second = 0xfe00; {p++; goto _out; } }}
 	break;
 	case 1255:
-#line 1554 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a93; {p++; goto _out; } }}
 	break;
 	case 1256:
-#line 1555 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a85; {p++; goto _out; } }}
 	break;
 	case 1257:
-#line 1556 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22d6; {p++; goto _out; } }}
 	break;
 	case 1258:
-#line 1557 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22da; {p++; goto _out; } }}
 	break;
 	case 1259:
-#line 1558 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a8b; {p++; goto _out; } }}
 	break;
 	case 1260:
-#line 1559 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2276; {p++; goto _out; } }}
 	break;
 	case 1261:
-#line 1560 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2272; {p++; goto _out; } }}
 	break;
 	case 1262:
-#line 1561 "char_ref.rl"
 	{te = p+1;{ output->first = 0x297c; {p++; goto _out; } }}
 	break;
 	case 1263:
-#line 1562 "char_ref.rl"
 	{te = p+1;{ output->first = 0x230a; {p++; goto _out; } }}
 	break;
 	case 1264:
-#line 1563 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d529; {p++; goto _out; } }}
 	break;
 	case 1265:
-#line 1564 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2276; {p++; goto _out; } }}
 	break;
 	case 1266:
-#line 1565 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a91; {p++; goto _out; } }}
 	break;
 	case 1267:
-#line 1566 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21bd; {p++; goto _out; } }}
 	break;
 	case 1268:
-#line 1567 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21bc; {p++; goto _out; } }}
 	break;
 	case 1269:
-#line 1568 "char_ref.rl"
 	{te = p+1;{ output->first = 0x296a; {p++; goto _out; } }}
 	break;
 	case 1270:
-#line 1569 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2584; {p++; goto _out; } }}
 	break;
 	case 1271:
-#line 1570 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0459; {p++; goto _out; } }}
 	break;
 	case 1272:
-#line 1571 "char_ref.rl"
 	{te = p+1;{ output->first = 0x226a; {p++; goto _out; } }}
 	break;
 	case 1273:
-#line 1572 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21c7; {p++; goto _out; } }}
 	break;
 	case 1274:
-#line 1573 "char_ref.rl"
 	{te = p+1;{ output->first = 0x231e; {p++; goto _out; } }}
 	break;
 	case 1275:
-#line 1574 "char_ref.rl"
 	{te = p+1;{ output->first = 0x296b; {p++; goto _out; } }}
 	break;
 	case 1276:
-#line 1575 "char_ref.rl"
 	{te = p+1;{ output->first = 0x25fa; {p++; goto _out; } }}
 	break;
 	case 1277:
-#line 1576 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0140; {p++; goto _out; } }}
 	break;
 	case 1278:
-#line 1577 "char_ref.rl"
 	{te = p+1;{ output->first = 0x23b0; {p++; goto _out; } }}
 	break;
 	case 1279:
-#line 1578 "char_ref.rl"
 	{te = p+1;{ output->first = 0x23b0; {p++; goto _out; } }}
 	break;
 	case 1280:
-#line 1579 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2268; {p++; goto _out; } }}
 	break;
 	case 1281:
-#line 1580 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a89; {p++; goto _out; } }}
 	break;
 	case 1282:
-#line 1581 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a89; {p++; goto _out; } }}
 	break;
 	case 1283:
-#line 1582 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a87; {p++; goto _out; } }}
 	break;
 	case 1284:
-#line 1583 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a87; {p++; goto _out; } }}
 	break;
 	case 1285:
-#line 1584 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2268; {p++; goto _out; } }}
 	break;
 	case 1286:
-#line 1585 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22e6; {p++; goto _out; } }}
 	break;
 	case 1287:
-#line 1586 "char_ref.rl"
 	{te = p+1;{ output->first = 0x27ec; {p++; goto _out; } }}
 	break;
 	case 1288:
-#line 1587 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21fd; {p++; goto _out; } }}
 	break;
 	case 1289:
-#line 1588 "char_ref.rl"
 	{te = p+1;{ output->first = 0x27e6; {p++; goto _out; } }}
 	break;
 	case 1290:
-#line 1589 "char_ref.rl"
 	{te = p+1;{ output->first = 0x27f5; {p++; goto _out; } }}
 	break;
 	case 1291:
-#line 1590 "char_ref.rl"
 	{te = p+1;{ output->first = 0x27f7; {p++; goto _out; } }}
 	break;
 	case 1292:
-#line 1591 "char_ref.rl"
 	{te = p+1;{ output->first = 0x27fc; {p++; goto _out; } }}
 	break;
 	case 1293:
-#line 1592 "char_ref.rl"
 	{te = p+1;{ output->first = 0x27f6; {p++; goto _out; } }}
 	break;
 	case 1294:
-#line 1593 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21ab; {p++; goto _out; } }}
 	break;
 	case 1295:
-#line 1594 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21ac; {p++; goto _out; } }}
 	break;
 	case 1296:
-#line 1595 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2985; {p++; goto _out; } }}
 	break;
 	case 1297:
-#line 1596 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d55d; {p++; goto _out; } }}
 	break;
 	case 1298:
-#line 1597 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a2d; {p++; goto _out; } }}
 	break;
 	case 1299:
-#line 1598 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a34; {p++; goto _out; } }}
 	break;
 	case 1300:
-#line 1599 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2217; {p++; goto _out; } }}
 	break;
 	case 1301:
-#line 1600 "char_ref.rl"
 	{te = p+1;{ output->first = 0x5f; {p++; goto _out; } }}
 	break;
 	case 1302:
-#line 1601 "char_ref.rl"
 	{te = p+1;{ output->first = 0x25ca; {p++; goto _out; } }}
 	break;
 	case 1303:
-#line 1602 "char_ref.rl"
 	{te = p+1;{ output->first = 0x25ca; {p++; goto _out; } }}
 	break;
 	case 1304:
-#line 1603 "char_ref.rl"
 	{te = p+1;{ output->first = 0x29eb; {p++; goto _out; } }}
 	break;
 	case 1305:
-#line 1604 "char_ref.rl"
 	{te = p+1;{ output->first = 0x28; {p++; goto _out; } }}
 	break;
 	case 1306:
-#line 1605 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2993; {p++; goto _out; } }}
 	break;
 	case 1307:
-#line 1606 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21c6; {p++; goto _out; } }}
 	break;
 	case 1308:
-#line 1607 "char_ref.rl"
 	{te = p+1;{ output->first = 0x231f; {p++; goto _out; } }}
 	break;
 	case 1309:
-#line 1608 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21cb; {p++; goto _out; } }}
 	break;
 	case 1310:
-#line 1609 "char_ref.rl"
 	{te = p+1;{ output->first = 0x296d; {p++; goto _out; } }}
 	break;
 	case 1311:
-#line 1610 "char_ref.rl"
 	{te = p+1;{ output->first = 0x200e; {p++; goto _out; } }}
 	break;
 	case 1312:
-#line 1611 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22bf; {p++; goto _out; } }}
 	break;
 	case 1313:
-#line 1612 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2039; {p++; goto _out; } }}
 	break;
 	case 1314:
-#line 1613 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d4c1; {p++; goto _out; } }}
 	break;
 	case 1315:
-#line 1614 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21b0; {p++; goto _out; } }}
 	break;
 	case 1316:
-#line 1615 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2272; {p++; goto _out; } }}
 	break;
 	case 1317:
-#line 1616 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a8d; {p++; goto _out; } }}
 	break;
 	case 1318:
-#line 1617 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a8f; {p++; goto _out; } }}
 	break;
 	case 1319:
-#line 1618 "char_ref.rl"
 	{te = p+1;{ output->first = 0x5b; {p++; goto _out; } }}
 	break;
 	case 1320:
-#line 1619 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2018; {p++; goto _out; } }}
 	break;
 	case 1321:
-#line 1620 "char_ref.rl"
 	{te = p+1;{ output->first = 0x201a; {p++; goto _out; } }}
 	break;
 	case 1322:
-#line 1621 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0142; {p++; goto _out; } }}
 	break;
 	case 1323:
-#line 1622 "char_ref.rl"
 	{te = p+1;{ output->first = 0x3c; {p++; goto _out; } }}
 	break;
 	case 1324:
-#line 1624 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2aa6; {p++; goto _out; } }}
 	break;
 	case 1325:
-#line 1625 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a79; {p++; goto _out; } }}
 	break;
 	case 1326:
-#line 1626 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22d6; {p++; goto _out; } }}
 	break;
 	case 1327:
-#line 1627 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22cb; {p++; goto _out; } }}
 	break;
 	case 1328:
-#line 1628 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22c9; {p++; goto _out; } }}
 	break;
 	case 1329:
-#line 1629 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2976; {p++; goto _out; } }}
 	break;
 	case 1330:
-#line 1630 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a7b; {p++; goto _out; } }}
 	break;
 	case 1331:
-#line 1631 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2996; {p++; goto _out; } }}
 	break;
 	case 1332:
-#line 1632 "char_ref.rl"
 	{te = p+1;{ output->first = 0x25c3; {p++; goto _out; } }}
 	break;
 	case 1333:
-#line 1633 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22b4; {p++; goto _out; } }}
 	break;
 	case 1334:
-#line 1634 "char_ref.rl"
 	{te = p+1;{ output->first = 0x25c2; {p++; goto _out; } }}
 	break;
 	case 1335:
-#line 1635 "char_ref.rl"
 	{te = p+1;{ output->first = 0x294a; {p++; goto _out; } }}
 	break;
 	case 1336:
-#line 1636 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2966; {p++; goto _out; } }}
 	break;
 	case 1337:
-#line 1637 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2268; output->second = 0xfe00; {p++; goto _out; } }}
 	break;
 	case 1338:
-#line 1638 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2268; output->second = 0xfe00; {p++; goto _out; } }}
 	break;
 	case 1339:
-#line 1639 "char_ref.rl"
 	{te = p+1;{ output->first = 0x223a; {p++; goto _out; } }}
 	break;
 	case 1340:
-#line 1640 "char_ref.rl"
 	{te = p+1;{ output->first = 0xaf; {p++; goto _out; } }}
 	break;
 	case 1341:
-#line 1642 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2642; {p++; goto _out; } }}
 	break;
 	case 1342:
-#line 1643 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2720; {p++; goto _out; } }}
 	break;
 	case 1343:
-#line 1644 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2720; {p++; goto _out; } }}
 	break;
 	case 1344:
-#line 1645 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21a6; {p++; goto _out; } }}
 	break;
 	case 1345:
-#line 1646 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21a6; {p++; goto _out; } }}
 	break;
 	case 1346:
-#line 1647 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21a7; {p++; goto _out; } }}
 	break;
 	case 1347:
-#line 1648 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21a4; {p++; goto _out; } }}
 	break;
 	case 1348:
-#line 1649 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21a5; {p++; goto _out; } }}
 	break;
 	case 1349:
-#line 1650 "char_ref.rl"
 	{te = p+1;{ output->first = 0x25ae; {p++; goto _out; } }}
 	break;
 	case 1350:
-#line 1651 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a29; {p++; goto _out; } }}
 	break;
 	case 1351:
-#line 1652 "char_ref.rl"
 	{te = p+1;{ output->first = 0x043c; {p++; goto _out; } }}
 	break;
 	case 1352:
-#line 1653 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2014; {p++; goto _out; } }}
 	break;
 	case 1353:
-#line 1654 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2221; {p++; goto _out; } }}
 	break;
 	case 1354:
-#line 1655 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d52a; {p++; goto _out; } }}
 	break;
 	case 1355:
-#line 1656 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2127; {p++; goto _out; } }}
 	break;
 	case 1356:
-#line 1657 "char_ref.rl"
 	{te = p+1;{ output->first = 0xb5; {p++; goto _out; } }}
 	break;
 	case 1357:
-#line 1659 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2223; {p++; goto _out; } }}
 	break;
 	case 1358:
-#line 1660 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a; {p++; goto _out; } }}
 	break;
 	case 1359:
-#line 1661 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2af0; {p++; goto _out; } }}
 	break;
 	case 1360:
-#line 1662 "char_ref.rl"
 	{te = p+1;{ output->first = 0xb7; {p++; goto _out; } }}
 	break;
 	case 1361:
-#line 1664 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2212; {p++; goto _out; } }}
 	break;
 	case 1362:
-#line 1665 "char_ref.rl"
 	{te = p+1;{ output->first = 0x229f; {p++; goto _out; } }}
 	break;
 	case 1363:
-#line 1666 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2238; {p++; goto _out; } }}
 	break;
 	case 1364:
-#line 1667 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a2a; {p++; goto _out; } }}
 	break;
 	case 1365:
-#line 1668 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2adb; {p++; goto _out; } }}
 	break;
 	case 1366:
-#line 1669 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2026; {p++; goto _out; } }}
 	break;
 	case 1367:
-#line 1670 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2213; {p++; goto _out; } }}
 	break;
 	case 1368:
-#line 1671 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22a7; {p++; goto _out; } }}
 	break;
 	case 1369:
-#line 1672 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d55e; {p++; goto _out; } }}
 	break;
 	case 1370:
-#line 1673 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2213; {p++; goto _out; } }}
 	break;
 	case 1371:
-#line 1674 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d4c2; {p++; goto _out; } }}
 	break;
 	case 1372:
-#line 1675 "char_ref.rl"
 	{te = p+1;{ output->first = 0x223e; {p++; goto _out; } }}
 	break;
 	case 1373:
-#line 1676 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03bc; {p++; goto _out; } }}
 	break;
 	case 1374:
-#line 1677 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22b8; {p++; goto _out; } }}
 	break;
 	case 1375:
-#line 1678 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22b8; {p++; goto _out; } }}
 	break;
 	case 1376:
-#line 1679 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22d9; output->second = 0x0338; {p++; goto _out; } }}
 	break;
 	case 1377:
-#line 1680 "char_ref.rl"
 	{te = p+1;{ output->first = 0x226b; output->second = 0x20d2; {p++; goto _out; } }}
 	break;
 	case 1378:
-#line 1681 "char_ref.rl"
 	{te = p+1;{ output->first = 0x226b; output->second = 0x0338; {p++; goto _out; } }}
 	break;
 	case 1379:
-#line 1682 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21cd; {p++; goto _out; } }}
 	break;
 	case 1380:
-#line 1683 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21ce; {p++; goto _out; } }}
 	break;
 	case 1381:
-#line 1684 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22d8; output->second = 0x0338; {p++; goto _out; } }}
 	break;
 	case 1382:
-#line 1685 "char_ref.rl"
 	{te = p+1;{ output->first = 0x226a; output->second = 0x20d2; {p++; goto _out; } }}
 	break;
 	case 1383:
-#line 1686 "char_ref.rl"
 	{te = p+1;{ output->first = 0x226a; output->second = 0x0338; {p++; goto _out; } }}
 	break;
 	case 1384:
-#line 1687 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21cf; {p++; goto _out; } }}
 	break;
 	case 1385:
-#line 1688 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22af; {p++; goto _out; } }}
 	break;
 	case 1386:
-#line 1689 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22ae; {p++; goto _out; } }}
 	break;
 	case 1387:
-#line 1690 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2207; {p++; goto _out; } }}
 	break;
 	case 1388:
-#line 1691 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0144; {p++; goto _out; } }}
 	break;
 	case 1389:
-#line 1692 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2220; output->second = 0x20d2; {p++; goto _out; } }}
 	break;
 	case 1390:
-#line 1693 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2249; {p++; goto _out; } }}
 	break;
 	case 1391:
-#line 1694 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a70; output->second = 0x0338; {p++; goto _out; } }}
 	break;
 	case 1392:
-#line 1695 "char_ref.rl"
 	{te = p+1;{ output->first = 0x224b; output->second = 0x0338; {p++; goto _out; } }}
 	break;
 	case 1393:
-#line 1696 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0149; {p++; goto _out; } }}
 	break;
 	case 1394:
-#line 1697 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2249; {p++; goto _out; } }}
 	break;
 	case 1395:
-#line 1698 "char_ref.rl"
 	{te = p+1;{ output->first = 0x266e; {p++; goto _out; } }}
 	break;
 	case 1396:
-#line 1699 "char_ref.rl"
 	{te = p+1;{ output->first = 0x266e; {p++; goto _out; } }}
 	break;
 	case 1397:
-#line 1700 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2115; {p++; goto _out; } }}
 	break;
 	case 1398:
-#line 1701 "char_ref.rl"
 	{te = p+1;{ output->first = 0xa0; {p++; goto _out; } }}
 	break;
 	case 1399:
-#line 1703 "char_ref.rl"
 	{te = p+1;{ output->first = 0x224e; output->second = 0x0338; {p++; goto _out; } }}
 	break;
 	case 1400:
-#line 1704 "char_ref.rl"
 	{te = p+1;{ output->first = 0x224f; output->second = 0x0338; {p++; goto _out; } }}
 	break;
 	case 1401:
-#line 1705 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a43; {p++; goto _out; } }}
 	break;
 	case 1402:
-#line 1706 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0148; {p++; goto _out; } }}
 	break;
 	case 1403:
-#line 1707 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0146; {p++; goto _out; } }}
 	break;
 	case 1404:
-#line 1708 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2247; {p++; goto _out; } }}
 	break;
 	case 1405:
-#line 1709 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a6d; output->second = 0x0338; {p++; goto _out; } }}
 	break;
 	case 1406:
-#line 1710 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a42; {p++; goto _out; } }}
 	break;
 	case 1407:
-#line 1711 "char_ref.rl"
 	{te = p+1;{ output->first = 0x043d; {p++; goto _out; } }}
 	break;
 	case 1408:
-#line 1712 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2013; {p++; goto _out; } }}
 	break;
 	case 1409:
-#line 1713 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2260; {p++; goto _out; } }}
 	break;
 	case 1410:
-#line 1714 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21d7; {p++; goto _out; } }}
 	break;
 	case 1411:
-#line 1715 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2924; {p++; goto _out; } }}
 	break;
 	case 1412:
-#line 1716 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2197; {p++; goto _out; } }}
 	break;
 	case 1413:
-#line 1717 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2197; {p++; goto _out; } }}
 	break;
 	case 1414:
-#line 1718 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2250; output->second = 0x0338; {p++; goto _out; } }}
 	break;
 	case 1415:
-#line 1719 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2262; {p++; goto _out; } }}
 	break;
 	case 1416:
-#line 1720 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2928; {p++; goto _out; } }}
 	break;
 	case 1417:
-#line 1721 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2242; output->second = 0x0338; {p++; goto _out; } }}
 	break;
 	case 1418:
-#line 1722 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2204; {p++; goto _out; } }}
 	break;
 	case 1419:
-#line 1723 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2204; {p++; goto _out; } }}
 	break;
 	case 1420:
-#line 1724 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d52b; {p++; goto _out; } }}
 	break;
 	case 1421:
-#line 1725 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2267; output->second = 0x0338; {p++; goto _out; } }}
 	break;
 	case 1422:
-#line 1726 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2271; {p++; goto _out; } }}
 	break;
 	case 1423:
-#line 1727 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2271; {p++; goto _out; } }}
 	break;
 	case 1424:
-#line 1728 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2267; output->second = 0x0338; {p++; goto _out; } }}
 	break;
 	case 1425:
-#line 1729 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a7e; output->second = 0x0338; {p++; goto _out; } }}
 	break;
 	case 1426:
-#line 1730 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a7e; output->second = 0x0338; {p++; goto _out; } }}
 	break;
 	case 1427:
-#line 1731 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2275; {p++; goto _out; } }}
 	break;
 	case 1428:
-#line 1732 "char_ref.rl"
 	{te = p+1;{ output->first = 0x226f; {p++; goto _out; } }}
 	break;
 	case 1429:
-#line 1733 "char_ref.rl"
 	{te = p+1;{ output->first = 0x226f; {p++; goto _out; } }}
 	break;
 	case 1430:
-#line 1734 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21ce; {p++; goto _out; } }}
 	break;
 	case 1431:
-#line 1735 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21ae; {p++; goto _out; } }}
 	break;
 	case 1432:
-#line 1736 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2af2; {p++; goto _out; } }}
 	break;
 	case 1433:
-#line 1737 "char_ref.rl"
 	{te = p+1;{ output->first = 0x220b; {p++; goto _out; } }}
 	break;
 	case 1434:
-#line 1738 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22fc; {p++; goto _out; } }}
 	break;
 	case 1435:
-#line 1739 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22fa; {p++; goto _out; } }}
 	break;
 	case 1436:
-#line 1740 "char_ref.rl"
 	{te = p+1;{ output->first = 0x220b; {p++; goto _out; } }}
 	break;
 	case 1437:
-#line 1741 "char_ref.rl"
 	{te = p+1;{ output->first = 0x045a; {p++; goto _out; } }}
 	break;
 	case 1438:
-#line 1742 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21cd; {p++; goto _out; } }}
 	break;
 	case 1439:
-#line 1743 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2266; output->second = 0x0338; {p++; goto _out; } }}
 	break;
 	case 1440:
-#line 1744 "char_ref.rl"
 	{te = p+1;{ output->first = 0x219a; {p++; goto _out; } }}
 	break;
 	case 1441:
-#line 1745 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2025; {p++; goto _out; } }}
 	break;
 	case 1442:
-#line 1746 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2270; {p++; goto _out; } }}
 	break;
 	case 1443:
-#line 1747 "char_ref.rl"
 	{te = p+1;{ output->first = 0x219a; {p++; goto _out; } }}
 	break;
 	case 1444:
-#line 1748 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21ae; {p++; goto _out; } }}
 	break;
 	case 1445:
-#line 1749 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2270; {p++; goto _out; } }}
 	break;
 	case 1446:
-#line 1750 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2266; output->second = 0x0338; {p++; goto _out; } }}
 	break;
 	case 1447:
-#line 1751 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a7d; output->second = 0x0338; {p++; goto _out; } }}
 	break;
 	case 1448:
-#line 1752 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a7d; output->second = 0x0338; {p++; goto _out; } }}
 	break;
 	case 1449:
-#line 1753 "char_ref.rl"
 	{te = p+1;{ output->first = 0x226e; {p++; goto _out; } }}
 	break;
 	case 1450:
-#line 1754 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2274; {p++; goto _out; } }}
 	break;
 	case 1451:
-#line 1755 "char_ref.rl"
 	{te = p+1;{ output->first = 0x226e; {p++; goto _out; } }}
 	break;
 	case 1452:
-#line 1756 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22ea; {p++; goto _out; } }}
 	break;
 	case 1453:
-#line 1757 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22ec; {p++; goto _out; } }}
 	break;
 	case 1454:
-#line 1758 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2224; {p++; goto _out; } }}
 	break;
 	case 1455:
-#line 1759 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d55f; {p++; goto _out; } }}
 	break;
 	case 1456:
-#line 1760 "char_ref.rl"
 	{te = p+1;{ output->first = 0xac; {p++; goto _out; } }}
 	break;
 	case 1457:
-#line 1761 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2209; {p++; goto _out; } }}
 	break;
 	case 1458:
-#line 1762 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22f9; output->second = 0x0338; {p++; goto _out; } }}
 	break;
 	case 1459:
-#line 1763 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22f5; output->second = 0x0338; {p++; goto _out; } }}
 	break;
 	case 1460:
-#line 1764 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2209; {p++; goto _out; } }}
 	break;
 	case 1461:
-#line 1765 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22f7; {p++; goto _out; } }}
 	break;
 	case 1462:
-#line 1766 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22f6; {p++; goto _out; } }}
 	break;
 	case 1463:
-#line 1767 "char_ref.rl"
 	{te = p+1;{ output->first = 0x220c; {p++; goto _out; } }}
 	break;
 	case 1464:
-#line 1768 "char_ref.rl"
 	{te = p+1;{ output->first = 0x220c; {p++; goto _out; } }}
 	break;
 	case 1465:
-#line 1769 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22fe; {p++; goto _out; } }}
 	break;
 	case 1466:
-#line 1770 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22fd; {p++; goto _out; } }}
 	break;
 	case 1467:
-#line 1772 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2226; {p++; goto _out; } }}
 	break;
 	case 1468:
-#line 1773 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2226; {p++; goto _out; } }}
 	break;
 	case 1469:
-#line 1774 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2afd; output->second = 0x20e5; {p++; goto _out; } }}
 	break;
 	case 1470:
-#line 1775 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2202; output->second = 0x0338; {p++; goto _out; } }}
 	break;
 	case 1471:
-#line 1776 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a14; {p++; goto _out; } }}
 	break;
 	case 1472:
-#line 1777 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2280; {p++; goto _out; } }}
 	break;
 	case 1473:
-#line 1778 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22e0; {p++; goto _out; } }}
 	break;
 	case 1474:
-#line 1779 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2aaf; output->second = 0x0338; {p++; goto _out; } }}
 	break;
 	case 1475:
-#line 1780 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2280; {p++; goto _out; } }}
 	break;
 	case 1476:
-#line 1781 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2aaf; output->second = 0x0338; {p++; goto _out; } }}
 	break;
 	case 1477:
-#line 1782 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21cf; {p++; goto _out; } }}
 	break;
 	case 1478:
-#line 1783 "char_ref.rl"
 	{te = p+1;{ output->first = 0x219b; {p++; goto _out; } }}
 	break;
 	case 1479:
-#line 1784 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2933; output->second = 0x0338; {p++; goto _out; } }}
 	break;
 	case 1480:
-#line 1785 "char_ref.rl"
 	{te = p+1;{ output->first = 0x219d; output->second = 0x0338; {p++; goto _out; } }}
 	break;
 	case 1481:
-#line 1786 "char_ref.rl"
 	{te = p+1;{ output->first = 0x219b; {p++; goto _out; } }}
 	break;
 	case 1482:
-#line 1787 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22eb; {p++; goto _out; } }}
 	break;
 	case 1483:
-#line 1788 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22ed; {p++; goto _out; } }}
 	break;
 	case 1484:
-#line 1789 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2281; {p++; goto _out; } }}
 	break;
 	case 1485:
-#line 1790 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22e1; {p++; goto _out; } }}
 	break;
 	case 1486:
-#line 1791 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ab0; output->second = 0x0338; {p++; goto _out; } }}
 	break;
 	case 1487:
-#line 1792 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d4c3; {p++; goto _out; } }}
 	break;
 	case 1488:
-#line 1793 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2224; {p++; goto _out; } }}
 	break;
 	case 1489:
-#line 1794 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2226; {p++; goto _out; } }}
 	break;
 	case 1490:
-#line 1795 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2241; {p++; goto _out; } }}
 	break;
 	case 1491:
-#line 1796 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2244; {p++; goto _out; } }}
 	break;
 	case 1492:
-#line 1797 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2244; {p++; goto _out; } }}
 	break;
 	case 1493:
-#line 1798 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2224; {p++; goto _out; } }}
 	break;
 	case 1494:
-#line 1799 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2226; {p++; goto _out; } }}
 	break;
 	case 1495:
-#line 1800 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22e2; {p++; goto _out; } }}
 	break;
 	case 1496:
-#line 1801 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22e3; {p++; goto _out; } }}
 	break;
 	case 1497:
-#line 1802 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2284; {p++; goto _out; } }}
 	break;
 	case 1498:
-#line 1803 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ac5; output->second = 0x0338; {p++; goto _out; } }}
 	break;
 	case 1499:
-#line 1804 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2288; {p++; goto _out; } }}
 	break;
 	case 1500:
-#line 1805 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2282; output->second = 0x20d2; {p++; goto _out; } }}
 	break;
 	case 1501:
-#line 1806 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2288; {p++; goto _out; } }}
 	break;
 	case 1502:
-#line 1807 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ac5; output->second = 0x0338; {p++; goto _out; } }}
 	break;
 	case 1503:
-#line 1808 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2281; {p++; goto _out; } }}
 	break;
 	case 1504:
-#line 1809 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ab0; output->second = 0x0338; {p++; goto _out; } }}
 	break;
 	case 1505:
-#line 1810 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2285; {p++; goto _out; } }}
 	break;
 	case 1506:
-#line 1811 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ac6; output->second = 0x0338; {p++; goto _out; } }}
 	break;
 	case 1507:
-#line 1812 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2289; {p++; goto _out; } }}
 	break;
 	case 1508:
-#line 1813 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2283; output->second = 0x20d2; {p++; goto _out; } }}
 	break;
 	case 1509:
-#line 1814 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2289; {p++; goto _out; } }}
 	break;
 	case 1510:
-#line 1815 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ac6; output->second = 0x0338; {p++; goto _out; } }}
 	break;
 	case 1511:
-#line 1816 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2279; {p++; goto _out; } }}
 	break;
 	case 1512:
-#line 1817 "char_ref.rl"
 	{te = p+1;{ output->first = 0xf1; {p++; goto _out; } }}
 	break;
 	case 1513:
-#line 1819 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2278; {p++; goto _out; } }}
 	break;
 	case 1514:
-#line 1820 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22ea; {p++; goto _out; } }}
 	break;
 	case 1515:
-#line 1821 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22ec; {p++; goto _out; } }}
 	break;
 	case 1516:
-#line 1822 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22eb; {p++; goto _out; } }}
 	break;
 	case 1517:
-#line 1823 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22ed; {p++; goto _out; } }}
 	break;
 	case 1518:
-#line 1824 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03bd; {p++; goto _out; } }}
 	break;
 	case 1519:
-#line 1825 "char_ref.rl"
 	{te = p+1;{ output->first = 0x23; {p++; goto _out; } }}
 	break;
 	case 1520:
-#line 1826 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2116; {p++; goto _out; } }}
 	break;
 	case 1521:
-#line 1827 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2007; {p++; goto _out; } }}
 	break;
 	case 1522:
-#line 1828 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22ad; {p++; goto _out; } }}
 	break;
 	case 1523:
-#line 1829 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2904; {p++; goto _out; } }}
 	break;
 	case 1524:
-#line 1830 "char_ref.rl"
 	{te = p+1;{ output->first = 0x224d; output->second = 0x20d2; {p++; goto _out; } }}
 	break;
 	case 1525:
-#line 1831 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22ac; {p++; goto _out; } }}
 	break;
 	case 1526:
-#line 1832 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2265; output->second = 0x20d2; {p++; goto _out; } }}
 	break;
 	case 1527:
-#line 1833 "char_ref.rl"
 	{te = p+1;{ output->first = 0x3e; output->second = 0x20d2; {p++; goto _out; } }}
 	break;
 	case 1528:
-#line 1834 "char_ref.rl"
 	{te = p+1;{ output->first = 0x29de; {p++; goto _out; } }}
 	break;
 	case 1529:
-#line 1835 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2902; {p++; goto _out; } }}
 	break;
 	case 1530:
-#line 1836 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2264; output->second = 0x20d2; {p++; goto _out; } }}
 	break;
 	case 1531:
-#line 1837 "char_ref.rl"
 	{te = p+1;{ output->first = 0x3c; output->second = 0x20d2; {p++; goto _out; } }}
 	break;
 	case 1532:
-#line 1838 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22b4; output->second = 0x20d2; {p++; goto _out; } }}
 	break;
 	case 1533:
-#line 1839 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2903; {p++; goto _out; } }}
 	break;
 	case 1534:
-#line 1840 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22b5; output->second = 0x20d2; {p++; goto _out; } }}
 	break;
 	case 1535:
-#line 1841 "char_ref.rl"
 	{te = p+1;{ output->first = 0x223c; output->second = 0x20d2; {p++; goto _out; } }}
 	break;
 	case 1536:
-#line 1842 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21d6; {p++; goto _out; } }}
 	break;
 	case 1537:
-#line 1843 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2923; {p++; goto _out; } }}
 	break;
 	case 1538:
-#line 1844 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2196; {p++; goto _out; } }}
 	break;
 	case 1539:
-#line 1845 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2196; {p++; goto _out; } }}
 	break;
 	case 1540:
-#line 1846 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2927; {p++; goto _out; } }}
 	break;
 	case 1541:
-#line 1847 "char_ref.rl"
 	{te = p+1;{ output->first = 0x24c8; {p++; goto _out; } }}
 	break;
 	case 1542:
-#line 1848 "char_ref.rl"
 	{te = p+1;{ output->first = 0xf3; {p++; goto _out; } }}
 	break;
 	case 1543:
-#line 1850 "char_ref.rl"
 	{te = p+1;{ output->first = 0x229b; {p++; goto _out; } }}
 	break;
 	case 1544:
-#line 1851 "char_ref.rl"
 	{te = p+1;{ output->first = 0x229a; {p++; goto _out; } }}
 	break;
 	case 1545:
-#line 1852 "char_ref.rl"
 	{te = p+1;{ output->first = 0xf4; {p++; goto _out; } }}
 	break;
 	case 1546:
-#line 1854 "char_ref.rl"
 	{te = p+1;{ output->first = 0x043e; {p++; goto _out; } }}
 	break;
 	case 1547:
-#line 1855 "char_ref.rl"
 	{te = p+1;{ output->first = 0x229d; {p++; goto _out; } }}
 	break;
 	case 1548:
-#line 1856 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0151; {p++; goto _out; } }}
 	break;
 	case 1549:
-#line 1857 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a38; {p++; goto _out; } }}
 	break;
 	case 1550:
-#line 1858 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2299; {p++; goto _out; } }}
 	break;
 	case 1551:
-#line 1859 "char_ref.rl"
 	{te = p+1;{ output->first = 0x29bc; {p++; goto _out; } }}
 	break;
 	case 1552:
-#line 1860 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0153; {p++; goto _out; } }}
 	break;
 	case 1553:
-#line 1861 "char_ref.rl"
 	{te = p+1;{ output->first = 0x29bf; {p++; goto _out; } }}
 	break;
 	case 1554:
-#line 1862 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d52c; {p++; goto _out; } }}
 	break;
 	case 1555:
-#line 1863 "char_ref.rl"
 	{te = p+1;{ output->first = 0x02db; {p++; goto _out; } }}
 	break;
 	case 1556:
-#line 1864 "char_ref.rl"
 	{te = p+1;{ output->first = 0xf2; {p++; goto _out; } }}
 	break;
 	case 1557:
-#line 1866 "char_ref.rl"
 	{te = p+1;{ output->first = 0x29c1; {p++; goto _out; } }}
 	break;
 	case 1558:
-#line 1867 "char_ref.rl"
 	{te = p+1;{ output->first = 0x29b5; {p++; goto _out; } }}
 	break;
 	case 1559:
-#line 1868 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03a9; {p++; goto _out; } }}
 	break;
 	case 1560:
-#line 1869 "char_ref.rl"
 	{te = p+1;{ output->first = 0x222e; {p++; goto _out; } }}
 	break;
 	case 1561:
-#line 1870 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21ba; {p++; goto _out; } }}
 	break;
 	case 1562:
-#line 1871 "char_ref.rl"
 	{te = p+1;{ output->first = 0x29be; {p++; goto _out; } }}
 	break;
 	case 1563:
-#line 1872 "char_ref.rl"
 	{te = p+1;{ output->first = 0x29bb; {p++; goto _out; } }}
 	break;
 	case 1564:
-#line 1873 "char_ref.rl"
 	{te = p+1;{ output->first = 0x203e; {p++; goto _out; } }}
 	break;
 	case 1565:
-#line 1874 "char_ref.rl"
 	{te = p+1;{ output->first = 0x29c0; {p++; goto _out; } }}
 	break;
 	case 1566:
-#line 1875 "char_ref.rl"
 	{te = p+1;{ output->first = 0x014d; {p++; goto _out; } }}
 	break;
 	case 1567:
-#line 1876 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03c9; {p++; goto _out; } }}
 	break;
 	case 1568:
-#line 1877 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03bf; {p++; goto _out; } }}
 	break;
 	case 1569:
-#line 1878 "char_ref.rl"
 	{te = p+1;{ output->first = 0x29b6; {p++; goto _out; } }}
 	break;
 	case 1570:
-#line 1879 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2296; {p++; goto _out; } }}
 	break;
 	case 1571:
-#line 1880 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d560; {p++; goto _out; } }}
 	break;
 	case 1572:
-#line 1881 "char_ref.rl"
 	{te = p+1;{ output->first = 0x29b7; {p++; goto _out; } }}
 	break;
 	case 1573:
-#line 1882 "char_ref.rl"
 	{te = p+1;{ output->first = 0x29b9; {p++; goto _out; } }}
 	break;
 	case 1574:
-#line 1883 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2295; {p++; goto _out; } }}
 	break;
 	case 1575:
-#line 1884 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2228; {p++; goto _out; } }}
 	break;
 	case 1576:
-#line 1885 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21bb; {p++; goto _out; } }}
 	break;
 	case 1577:
-#line 1886 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a5d; {p++; goto _out; } }}
 	break;
 	case 1578:
-#line 1887 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2134; {p++; goto _out; } }}
 	break;
 	case 1579:
-#line 1888 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2134; {p++; goto _out; } }}
 	break;
 	case 1580:
-#line 1889 "char_ref.rl"
 	{te = p+1;{ output->first = 0xaa; {p++; goto _out; } }}
 	break;
 	case 1581:
-#line 1891 "char_ref.rl"
 	{te = p+1;{ output->first = 0xba; {p++; goto _out; } }}
 	break;
 	case 1582:
-#line 1893 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22b6; {p++; goto _out; } }}
 	break;
 	case 1583:
-#line 1894 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a56; {p++; goto _out; } }}
 	break;
 	case 1584:
-#line 1895 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a57; {p++; goto _out; } }}
 	break;
 	case 1585:
-#line 1896 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a5b; {p++; goto _out; } }}
 	break;
 	case 1586:
-#line 1897 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2134; {p++; goto _out; } }}
 	break;
 	case 1587:
-#line 1898 "char_ref.rl"
 	{te = p+1;{ output->first = 0xf8; {p++; goto _out; } }}
 	break;
 	case 1588:
-#line 1900 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2298; {p++; goto _out; } }}
 	break;
 	case 1589:
-#line 1901 "char_ref.rl"
 	{te = p+1;{ output->first = 0xf5; {p++; goto _out; } }}
 	break;
 	case 1590:
-#line 1903 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2297; {p++; goto _out; } }}
 	break;
 	case 1591:
-#line 1904 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a36; {p++; goto _out; } }}
 	break;
 	case 1592:
-#line 1905 "char_ref.rl"
 	{te = p+1;{ output->first = 0xf6; {p++; goto _out; } }}
 	break;
 	case 1593:
-#line 1907 "char_ref.rl"
 	{te = p+1;{ output->first = 0x233d; {p++; goto _out; } }}
 	break;
 	case 1594:
-#line 1908 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2225; {p++; goto _out; } }}
 	break;
 	case 1595:
-#line 1909 "char_ref.rl"
 	{te = p+1;{ output->first = 0xb6; {p++; goto _out; } }}
 	break;
 	case 1596:
-#line 1911 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2225; {p++; goto _out; } }}
 	break;
 	case 1597:
-#line 1912 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2af3; {p++; goto _out; } }}
 	break;
 	case 1598:
-#line 1913 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2afd; {p++; goto _out; } }}
 	break;
 	case 1599:
-#line 1914 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2202; {p++; goto _out; } }}
 	break;
 	case 1600:
-#line 1915 "char_ref.rl"
 	{te = p+1;{ output->first = 0x043f; {p++; goto _out; } }}
 	break;
 	case 1601:
-#line 1916 "char_ref.rl"
 	{te = p+1;{ output->first = 0x25; {p++; goto _out; } }}
 	break;
 	case 1602:
-#line 1917 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2e; {p++; goto _out; } }}
 	break;
 	case 1603:
-#line 1918 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2030; {p++; goto _out; } }}
 	break;
 	case 1604:
-#line 1919 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22a5; {p++; goto _out; } }}
 	break;
 	case 1605:
-#line 1920 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2031; {p++; goto _out; } }}
 	break;
 	case 1606:
-#line 1921 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d52d; {p++; goto _out; } }}
 	break;
 	case 1607:
-#line 1922 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03c6; {p++; goto _out; } }}
 	break;
 	case 1608:
-#line 1923 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03d5; {p++; goto _out; } }}
 	break;
 	case 1609:
-#line 1924 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2133; {p++; goto _out; } }}
 	break;
 	case 1610:
-#line 1925 "char_ref.rl"
 	{te = p+1;{ output->first = 0x260e; {p++; goto _out; } }}
 	break;
 	case 1611:
-#line 1926 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03c0; {p++; goto _out; } }}
 	break;
 	case 1612:
-#line 1927 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22d4; {p++; goto _out; } }}
 	break;
 	case 1613:
-#line 1928 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03d6; {p++; goto _out; } }}
 	break;
 	case 1614:
-#line 1929 "char_ref.rl"
 	{te = p+1;{ output->first = 0x210f; {p++; goto _out; } }}
 	break;
 	case 1615:
-#line 1930 "char_ref.rl"
 	{te = p+1;{ output->first = 0x210e; {p++; goto _out; } }}
 	break;
 	case 1616:
-#line 1931 "char_ref.rl"
 	{te = p+1;{ output->first = 0x210f; {p++; goto _out; } }}
 	break;
 	case 1617:
-#line 1932 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2b; {p++; goto _out; } }}
 	break;
 	case 1618:
-#line 1933 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a23; {p++; goto _out; } }}
 	break;
 	case 1619:
-#line 1934 "char_ref.rl"
 	{te = p+1;{ output->first = 0x229e; {p++; goto _out; } }}
 	break;
 	case 1620:
-#line 1935 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a22; {p++; goto _out; } }}
 	break;
 	case 1621:
-#line 1936 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2214; {p++; goto _out; } }}
 	break;
 	case 1622:
-#line 1937 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a25; {p++; goto _out; } }}
 	break;
 	case 1623:
-#line 1938 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a72; {p++; goto _out; } }}
 	break;
 	case 1624:
-#line 1939 "char_ref.rl"
 	{te = p+1;{ output->first = 0xb1; {p++; goto _out; } }}
 	break;
 	case 1625:
-#line 1941 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a26; {p++; goto _out; } }}
 	break;
 	case 1626:
-#line 1942 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a27; {p++; goto _out; } }}
 	break;
 	case 1627:
-#line 1943 "char_ref.rl"
 	{te = p+1;{ output->first = 0xb1; {p++; goto _out; } }}
 	break;
 	case 1628:
-#line 1944 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a15; {p++; goto _out; } }}
 	break;
 	case 1629:
-#line 1945 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d561; {p++; goto _out; } }}
 	break;
 	case 1630:
-#line 1946 "char_ref.rl"
 	{te = p+1;{ output->first = 0xa3; {p++; goto _out; } }}
 	break;
 	case 1631:
-#line 1948 "char_ref.rl"
 	{te = p+1;{ output->first = 0x227a; {p++; goto _out; } }}
 	break;
 	case 1632:
-#line 1949 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ab3; {p++; goto _out; } }}
 	break;
 	case 1633:
-#line 1950 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ab7; {p++; goto _out; } }}
 	break;
 	case 1634:
-#line 1951 "char_ref.rl"
 	{te = p+1;{ output->first = 0x227c; {p++; goto _out; } }}
 	break;
 	case 1635:
-#line 1952 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2aaf; {p++; goto _out; } }}
 	break;
 	case 1636:
-#line 1953 "char_ref.rl"
 	{te = p+1;{ output->first = 0x227a; {p++; goto _out; } }}
 	break;
 	case 1637:
-#line 1954 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ab7; {p++; goto _out; } }}
 	break;
 	case 1638:
-#line 1955 "char_ref.rl"
 	{te = p+1;{ output->first = 0x227c; {p++; goto _out; } }}
 	break;
 	case 1639:
-#line 1956 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2aaf; {p++; goto _out; } }}
 	break;
 	case 1640:
-#line 1957 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ab9; {p++; goto _out; } }}
 	break;
 	case 1641:
-#line 1958 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ab5; {p++; goto _out; } }}
 	break;
 	case 1642:
-#line 1959 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22e8; {p++; goto _out; } }}
 	break;
 	case 1643:
-#line 1960 "char_ref.rl"
 	{te = p+1;{ output->first = 0x227e; {p++; goto _out; } }}
 	break;
 	case 1644:
-#line 1961 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2032; {p++; goto _out; } }}
 	break;
 	case 1645:
-#line 1962 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2119; {p++; goto _out; } }}
 	break;
 	case 1646:
-#line 1963 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ab5; {p++; goto _out; } }}
 	break;
 	case 1647:
-#line 1964 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ab9; {p++; goto _out; } }}
 	break;
 	case 1648:
-#line 1965 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22e8; {p++; goto _out; } }}
 	break;
 	case 1649:
-#line 1966 "char_ref.rl"
 	{te = p+1;{ output->first = 0x220f; {p++; goto _out; } }}
 	break;
 	case 1650:
-#line 1967 "char_ref.rl"
 	{te = p+1;{ output->first = 0x232e; {p++; goto _out; } }}
 	break;
 	case 1651:
-#line 1968 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2312; {p++; goto _out; } }}
 	break;
 	case 1652:
-#line 1969 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2313; {p++; goto _out; } }}
 	break;
 	case 1653:
-#line 1970 "char_ref.rl"
 	{te = p+1;{ output->first = 0x221d; {p++; goto _out; } }}
 	break;
 	case 1654:
-#line 1971 "char_ref.rl"
 	{te = p+1;{ output->first = 0x221d; {p++; goto _out; } }}
 	break;
 	case 1655:
-#line 1972 "char_ref.rl"
 	{te = p+1;{ output->first = 0x227e; {p++; goto _out; } }}
 	break;
 	case 1656:
-#line 1973 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22b0; {p++; goto _out; } }}
 	break;
 	case 1657:
-#line 1974 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d4c5; {p++; goto _out; } }}
 	break;
 	case 1658:
-#line 1975 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03c8; {p++; goto _out; } }}
 	break;
 	case 1659:
-#line 1976 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2008; {p++; goto _out; } }}
 	break;
 	case 1660:
-#line 1977 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d52e; {p++; goto _out; } }}
 	break;
 	case 1661:
-#line 1978 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a0c; {p++; goto _out; } }}
 	break;
 	case 1662:
-#line 1979 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d562; {p++; goto _out; } }}
 	break;
 	case 1663:
-#line 1980 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2057; {p++; goto _out; } }}
 	break;
 	case 1664:
-#line 1981 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d4c6; {p++; goto _out; } }}
 	break;
 	case 1665:
-#line 1982 "char_ref.rl"
 	{te = p+1;{ output->first = 0x210d; {p++; goto _out; } }}
 	break;
 	case 1666:
-#line 1983 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a16; {p++; goto _out; } }}
 	break;
 	case 1667:
-#line 1984 "char_ref.rl"
 	{te = p+1;{ output->first = 0x3f; {p++; goto _out; } }}
 	break;
 	case 1668:
-#line 1985 "char_ref.rl"
 	{te = p+1;{ output->first = 0x225f; {p++; goto _out; } }}
 	break;
 	case 1669:
-#line 1986 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22; {p++; goto _out; } }}
 	break;
 	case 1670:
-#line 1988 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21db; {p++; goto _out; } }}
 	break;
 	case 1671:
-#line 1989 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21d2; {p++; goto _out; } }}
 	break;
 	case 1672:
-#line 1990 "char_ref.rl"
 	{te = p+1;{ output->first = 0x291c; {p++; goto _out; } }}
 	break;
 	case 1673:
-#line 1991 "char_ref.rl"
 	{te = p+1;{ output->first = 0x290f; {p++; goto _out; } }}
 	break;
 	case 1674:
-#line 1992 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2964; {p++; goto _out; } }}
 	break;
 	case 1675:
-#line 1993 "char_ref.rl"
 	{te = p+1;{ output->first = 0x223d; output->second = 0x0331; {p++; goto _out; } }}
 	break;
 	case 1676:
-#line 1994 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0155; {p++; goto _out; } }}
 	break;
 	case 1677:
-#line 1995 "char_ref.rl"
 	{te = p+1;{ output->first = 0x221a; {p++; goto _out; } }}
 	break;
 	case 1678:
-#line 1996 "char_ref.rl"
 	{te = p+1;{ output->first = 0x29b3; {p++; goto _out; } }}
 	break;
 	case 1679:
-#line 1997 "char_ref.rl"
 	{te = p+1;{ output->first = 0x27e9; {p++; goto _out; } }}
 	break;
 	case 1680:
-#line 1998 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2992; {p++; goto _out; } }}
 	break;
 	case 1681:
-#line 1999 "char_ref.rl"
 	{te = p+1;{ output->first = 0x29a5; {p++; goto _out; } }}
 	break;
 	case 1682:
-#line 2000 "char_ref.rl"
 	{te = p+1;{ output->first = 0x27e9; {p++; goto _out; } }}
 	break;
 	case 1683:
-#line 2001 "char_ref.rl"
 	{te = p+1;{ output->first = 0xbb; {p++; goto _out; } }}
 	break;
 	case 1684:
-#line 2003 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2192; {p++; goto _out; } }}
 	break;
 	case 1685:
-#line 2004 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2975; {p++; goto _out; } }}
 	break;
 	case 1686:
-#line 2005 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21e5; {p++; goto _out; } }}
 	break;
 	case 1687:
-#line 2006 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2920; {p++; goto _out; } }}
 	break;
 	case 1688:
-#line 2007 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2933; {p++; goto _out; } }}
 	break;
 	case 1689:
-#line 2008 "char_ref.rl"
 	{te = p+1;{ output->first = 0x291e; {p++; goto _out; } }}
 	break;
 	case 1690:
-#line 2009 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21aa; {p++; goto _out; } }}
 	break;
 	case 1691:
-#line 2010 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21ac; {p++; goto _out; } }}
 	break;
 	case 1692:
-#line 2011 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2945; {p++; goto _out; } }}
 	break;
 	case 1693:
-#line 2012 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2974; {p++; goto _out; } }}
 	break;
 	case 1694:
-#line 2013 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21a3; {p++; goto _out; } }}
 	break;
 	case 1695:
-#line 2014 "char_ref.rl"
 	{te = p+1;{ output->first = 0x219d; {p++; goto _out; } }}
 	break;
 	case 1696:
-#line 2015 "char_ref.rl"
 	{te = p+1;{ output->first = 0x291a; {p++; goto _out; } }}
 	break;
 	case 1697:
-#line 2016 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2236; {p++; goto _out; } }}
 	break;
 	case 1698:
-#line 2017 "char_ref.rl"
 	{te = p+1;{ output->first = 0x211a; {p++; goto _out; } }}
 	break;
 	case 1699:
-#line 2018 "char_ref.rl"
 	{te = p+1;{ output->first = 0x290d; {p++; goto _out; } }}
 	break;
 	case 1700:
-#line 2019 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2773; {p++; goto _out; } }}
 	break;
 	case 1701:
-#line 2020 "char_ref.rl"
 	{te = p+1;{ output->first = 0x7d; {p++; goto _out; } }}
 	break;
 	case 1702:
-#line 2021 "char_ref.rl"
 	{te = p+1;{ output->first = 0x5d; {p++; goto _out; } }}
 	break;
 	case 1703:
-#line 2022 "char_ref.rl"
 	{te = p+1;{ output->first = 0x298c; {p++; goto _out; } }}
 	break;
 	case 1704:
-#line 2023 "char_ref.rl"
 	{te = p+1;{ output->first = 0x298e; {p++; goto _out; } }}
 	break;
 	case 1705:
-#line 2024 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2990; {p++; goto _out; } }}
 	break;
 	case 1706:
-#line 2025 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0159; {p++; goto _out; } }}
 	break;
 	case 1707:
-#line 2026 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0157; {p++; goto _out; } }}
 	break;
 	case 1708:
-#line 2027 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2309; {p++; goto _out; } }}
 	break;
 	case 1709:
-#line 2028 "char_ref.rl"
 	{te = p+1;{ output->first = 0x7d; {p++; goto _out; } }}
 	break;
 	case 1710:
-#line 2029 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0440; {p++; goto _out; } }}
 	break;
 	case 1711:
-#line 2030 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2937; {p++; goto _out; } }}
 	break;
 	case 1712:
-#line 2031 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2969; {p++; goto _out; } }}
 	break;
 	case 1713:
-#line 2032 "char_ref.rl"
 	{te = p+1;{ output->first = 0x201d; {p++; goto _out; } }}
 	break;
 	case 1714:
-#line 2033 "char_ref.rl"
 	{te = p+1;{ output->first = 0x201d; {p++; goto _out; } }}
 	break;
 	case 1715:
-#line 2034 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21b3; {p++; goto _out; } }}
 	break;
 	case 1716:
-#line 2035 "char_ref.rl"
 	{te = p+1;{ output->first = 0x211c; {p++; goto _out; } }}
 	break;
 	case 1717:
-#line 2036 "char_ref.rl"
 	{te = p+1;{ output->first = 0x211b; {p++; goto _out; } }}
 	break;
 	case 1718:
-#line 2037 "char_ref.rl"
 	{te = p+1;{ output->first = 0x211c; {p++; goto _out; } }}
 	break;
 	case 1719:
-#line 2038 "char_ref.rl"
 	{te = p+1;{ output->first = 0x211d; {p++; goto _out; } }}
 	break;
 	case 1720:
-#line 2039 "char_ref.rl"
 	{te = p+1;{ output->first = 0x25ad; {p++; goto _out; } }}
 	break;
 	case 1721:
-#line 2040 "char_ref.rl"
 	{te = p+1;{ output->first = 0xae; {p++; goto _out; } }}
 	break;
 	case 1722:
-#line 2042 "char_ref.rl"
 	{te = p+1;{ output->first = 0x297d; {p++; goto _out; } }}
 	break;
 	case 1723:
-#line 2043 "char_ref.rl"
 	{te = p+1;{ output->first = 0x230b; {p++; goto _out; } }}
 	break;
 	case 1724:
-#line 2044 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d52f; {p++; goto _out; } }}
 	break;
 	case 1725:
-#line 2045 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21c1; {p++; goto _out; } }}
 	break;
 	case 1726:
-#line 2046 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21c0; {p++; goto _out; } }}
 	break;
 	case 1727:
-#line 2047 "char_ref.rl"
 	{te = p+1;{ output->first = 0x296c; {p++; goto _out; } }}
 	break;
 	case 1728:
-#line 2048 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03c1; {p++; goto _out; } }}
 	break;
 	case 1729:
-#line 2049 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03f1; {p++; goto _out; } }}
 	break;
 	case 1730:
-#line 2050 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2192; {p++; goto _out; } }}
 	break;
 	case 1731:
-#line 2051 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21a3; {p++; goto _out; } }}
 	break;
 	case 1732:
-#line 2052 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21c1; {p++; goto _out; } }}
 	break;
 	case 1733:
-#line 2053 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21c0; {p++; goto _out; } }}
 	break;
 	case 1734:
-#line 2054 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21c4; {p++; goto _out; } }}
 	break;
 	case 1735:
-#line 2055 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21cc; {p++; goto _out; } }}
 	break;
 	case 1736:
-#line 2056 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21c9; {p++; goto _out; } }}
 	break;
 	case 1737:
-#line 2057 "char_ref.rl"
 	{te = p+1;{ output->first = 0x219d; {p++; goto _out; } }}
 	break;
 	case 1738:
-#line 2058 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22cc; {p++; goto _out; } }}
 	break;
 	case 1739:
-#line 2059 "char_ref.rl"
 	{te = p+1;{ output->first = 0x02da; {p++; goto _out; } }}
 	break;
 	case 1740:
-#line 2060 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2253; {p++; goto _out; } }}
 	break;
 	case 1741:
-#line 2061 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21c4; {p++; goto _out; } }}
 	break;
 	case 1742:
-#line 2062 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21cc; {p++; goto _out; } }}
 	break;
 	case 1743:
-#line 2063 "char_ref.rl"
 	{te = p+1;{ output->first = 0x200f; {p++; goto _out; } }}
 	break;
 	case 1744:
-#line 2064 "char_ref.rl"
 	{te = p+1;{ output->first = 0x23b1; {p++; goto _out; } }}
 	break;
 	case 1745:
-#line 2065 "char_ref.rl"
 	{te = p+1;{ output->first = 0x23b1; {p++; goto _out; } }}
 	break;
 	case 1746:
-#line 2066 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2aee; {p++; goto _out; } }}
 	break;
 	case 1747:
-#line 2067 "char_ref.rl"
 	{te = p+1;{ output->first = 0x27ed; {p++; goto _out; } }}
 	break;
 	case 1748:
-#line 2068 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21fe; {p++; goto _out; } }}
 	break;
 	case 1749:
-#line 2069 "char_ref.rl"
 	{te = p+1;{ output->first = 0x27e7; {p++; goto _out; } }}
 	break;
 	case 1750:
-#line 2070 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2986; {p++; goto _out; } }}
 	break;
 	case 1751:
-#line 2071 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d563; {p++; goto _out; } }}
 	break;
 	case 1752:
-#line 2072 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a2e; {p++; goto _out; } }}
 	break;
 	case 1753:
-#line 2073 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a35; {p++; goto _out; } }}
 	break;
 	case 1754:
-#line 2074 "char_ref.rl"
 	{te = p+1;{ output->first = 0x29; {p++; goto _out; } }}
 	break;
 	case 1755:
-#line 2075 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2994; {p++; goto _out; } }}
 	break;
 	case 1756:
-#line 2076 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a12; {p++; goto _out; } }}
 	break;
 	case 1757:
-#line 2077 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21c9; {p++; goto _out; } }}
 	break;
 	case 1758:
-#line 2078 "char_ref.rl"
 	{te = p+1;{ output->first = 0x203a; {p++; goto _out; } }}
 	break;
 	case 1759:
-#line 2079 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d4c7; {p++; goto _out; } }}
 	break;
 	case 1760:
-#line 2080 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21b1; {p++; goto _out; } }}
 	break;
 	case 1761:
-#line 2081 "char_ref.rl"
 	{te = p+1;{ output->first = 0x5d; {p++; goto _out; } }}
 	break;
 	case 1762:
-#line 2082 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2019; {p++; goto _out; } }}
 	break;
 	case 1763:
-#line 2083 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2019; {p++; goto _out; } }}
 	break;
 	case 1764:
-#line 2084 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22cc; {p++; goto _out; } }}
 	break;
 	case 1765:
-#line 2085 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22ca; {p++; goto _out; } }}
 	break;
 	case 1766:
-#line 2086 "char_ref.rl"
 	{te = p+1;{ output->first = 0x25b9; {p++; goto _out; } }}
 	break;
 	case 1767:
-#line 2087 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22b5; {p++; goto _out; } }}
 	break;
 	case 1768:
-#line 2088 "char_ref.rl"
 	{te = p+1;{ output->first = 0x25b8; {p++; goto _out; } }}
 	break;
 	case 1769:
-#line 2089 "char_ref.rl"
 	{te = p+1;{ output->first = 0x29ce; {p++; goto _out; } }}
 	break;
 	case 1770:
-#line 2090 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2968; {p++; goto _out; } }}
 	break;
 	case 1771:
-#line 2091 "char_ref.rl"
 	{te = p+1;{ output->first = 0x211e; {p++; goto _out; } }}
 	break;
 	case 1772:
-#line 2092 "char_ref.rl"
 	{te = p+1;{ output->first = 0x015b; {p++; goto _out; } }}
 	break;
 	case 1773:
-#line 2093 "char_ref.rl"
 	{te = p+1;{ output->first = 0x201a; {p++; goto _out; } }}
 	break;
 	case 1774:
-#line 2094 "char_ref.rl"
 	{te = p+1;{ output->first = 0x227b; {p++; goto _out; } }}
 	break;
 	case 1775:
-#line 2095 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ab4; {p++; goto _out; } }}
 	break;
 	case 1776:
-#line 2096 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ab8; {p++; goto _out; } }}
 	break;
 	case 1777:
-#line 2097 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0161; {p++; goto _out; } }}
 	break;
 	case 1778:
-#line 2098 "char_ref.rl"
 	{te = p+1;{ output->first = 0x227d; {p++; goto _out; } }}
 	break;
 	case 1779:
-#line 2099 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ab0; {p++; goto _out; } }}
 	break;
 	case 1780:
-#line 2100 "char_ref.rl"
 	{te = p+1;{ output->first = 0x015f; {p++; goto _out; } }}
 	break;
 	case 1781:
-#line 2101 "char_ref.rl"
 	{te = p+1;{ output->first = 0x015d; {p++; goto _out; } }}
 	break;
 	case 1782:
-#line 2102 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ab6; {p++; goto _out; } }}
 	break;
 	case 1783:
-#line 2103 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2aba; {p++; goto _out; } }}
 	break;
 	case 1784:
-#line 2104 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22e9; {p++; goto _out; } }}
 	break;
 	case 1785:
-#line 2105 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a13; {p++; goto _out; } }}
 	break;
 	case 1786:
-#line 2106 "char_ref.rl"
 	{te = p+1;{ output->first = 0x227f; {p++; goto _out; } }}
 	break;
 	case 1787:
-#line 2107 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0441; {p++; goto _out; } }}
 	break;
 	case 1788:
-#line 2108 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22c5; {p++; goto _out; } }}
 	break;
 	case 1789:
-#line 2109 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22a1; {p++; goto _out; } }}
 	break;
 	case 1790:
-#line 2110 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a66; {p++; goto _out; } }}
 	break;
 	case 1791:
-#line 2111 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21d8; {p++; goto _out; } }}
 	break;
 	case 1792:
-#line 2112 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2925; {p++; goto _out; } }}
 	break;
 	case 1793:
-#line 2113 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2198; {p++; goto _out; } }}
 	break;
 	case 1794:
-#line 2114 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2198; {p++; goto _out; } }}
 	break;
 	case 1795:
-#line 2115 "char_ref.rl"
 	{te = p+1;{ output->first = 0xa7; {p++; goto _out; } }}
 	break;
 	case 1796:
-#line 2117 "char_ref.rl"
 	{te = p+1;{ output->first = 0x3b; {p++; goto _out; } }}
 	break;
 	case 1797:
-#line 2118 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2929; {p++; goto _out; } }}
 	break;
 	case 1798:
-#line 2119 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2216; {p++; goto _out; } }}
 	break;
 	case 1799:
-#line 2120 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2216; {p++; goto _out; } }}
 	break;
 	case 1800:
-#line 2121 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2736; {p++; goto _out; } }}
 	break;
 	case 1801:
-#line 2122 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d530; {p++; goto _out; } }}
 	break;
 	case 1802:
-#line 2123 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2322; {p++; goto _out; } }}
 	break;
 	case 1803:
-#line 2124 "char_ref.rl"
 	{te = p+1;{ output->first = 0x266f; {p++; goto _out; } }}
 	break;
 	case 1804:
-#line 2125 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0449; {p++; goto _out; } }}
 	break;
 	case 1805:
-#line 2126 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0448; {p++; goto _out; } }}
 	break;
 	case 1806:
-#line 2127 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2223; {p++; goto _out; } }}
 	break;
 	case 1807:
-#line 2128 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2225; {p++; goto _out; } }}
 	break;
 	case 1808:
-#line 2129 "char_ref.rl"
 	{te = p+1;{ output->first = 0xad; {p++; goto _out; } }}
 	break;
 	case 1809:
-#line 2131 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03c3; {p++; goto _out; } }}
 	break;
 	case 1810:
-#line 2132 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03c2; {p++; goto _out; } }}
 	break;
 	case 1811:
-#line 2133 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03c2; {p++; goto _out; } }}
 	break;
 	case 1812:
-#line 2134 "char_ref.rl"
 	{te = p+1;{ output->first = 0x223c; {p++; goto _out; } }}
 	break;
 	case 1813:
-#line 2135 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a6a; {p++; goto _out; } }}
 	break;
 	case 1814:
-#line 2136 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2243; {p++; goto _out; } }}
 	break;
 	case 1815:
-#line 2137 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2243; {p++; goto _out; } }}
 	break;
 	case 1816:
-#line 2138 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a9e; {p++; goto _out; } }}
 	break;
 	case 1817:
-#line 2139 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2aa0; {p++; goto _out; } }}
 	break;
 	case 1818:
-#line 2140 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a9d; {p++; goto _out; } }}
 	break;
 	case 1819:
-#line 2141 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a9f; {p++; goto _out; } }}
 	break;
 	case 1820:
-#line 2142 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2246; {p++; goto _out; } }}
 	break;
 	case 1821:
-#line 2143 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a24; {p++; goto _out; } }}
 	break;
 	case 1822:
-#line 2144 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2972; {p++; goto _out; } }}
 	break;
 	case 1823:
-#line 2145 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2190; {p++; goto _out; } }}
 	break;
 	case 1824:
-#line 2146 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2216; {p++; goto _out; } }}
 	break;
 	case 1825:
-#line 2147 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a33; {p++; goto _out; } }}
 	break;
 	case 1826:
-#line 2148 "char_ref.rl"
 	{te = p+1;{ output->first = 0x29e4; {p++; goto _out; } }}
 	break;
 	case 1827:
-#line 2149 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2223; {p++; goto _out; } }}
 	break;
 	case 1828:
-#line 2150 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2323; {p++; goto _out; } }}
 	break;
 	case 1829:
-#line 2151 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2aaa; {p++; goto _out; } }}
 	break;
 	case 1830:
-#line 2152 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2aac; {p++; goto _out; } }}
 	break;
 	case 1831:
-#line 2153 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2aac; output->second = 0xfe00; {p++; goto _out; } }}
 	break;
 	case 1832:
-#line 2154 "char_ref.rl"
 	{te = p+1;{ output->first = 0x044c; {p++; goto _out; } }}
 	break;
 	case 1833:
-#line 2155 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2f; {p++; goto _out; } }}
 	break;
 	case 1834:
-#line 2156 "char_ref.rl"
 	{te = p+1;{ output->first = 0x29c4; {p++; goto _out; } }}
 	break;
 	case 1835:
-#line 2157 "char_ref.rl"
 	{te = p+1;{ output->first = 0x233f; {p++; goto _out; } }}
 	break;
 	case 1836:
-#line 2158 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d564; {p++; goto _out; } }}
 	break;
 	case 1837:
-#line 2159 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2660; {p++; goto _out; } }}
 	break;
 	case 1838:
-#line 2160 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2660; {p++; goto _out; } }}
 	break;
 	case 1839:
-#line 2161 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2225; {p++; goto _out; } }}
 	break;
 	case 1840:
-#line 2162 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2293; {p++; goto _out; } }}
 	break;
 	case 1841:
-#line 2163 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2293; output->second = 0xfe00; {p++; goto _out; } }}
 	break;
 	case 1842:
-#line 2164 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2294; {p++; goto _out; } }}
 	break;
 	case 1843:
-#line 2165 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2294; output->second = 0xfe00; {p++; goto _out; } }}
 	break;
 	case 1844:
-#line 2166 "char_ref.rl"
 	{te = p+1;{ output->first = 0x228f; {p++; goto _out; } }}
 	break;
 	case 1845:
-#line 2167 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2291; {p++; goto _out; } }}
 	break;
 	case 1846:
-#line 2168 "char_ref.rl"
 	{te = p+1;{ output->first = 0x228f; {p++; goto _out; } }}
 	break;
 	case 1847:
-#line 2169 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2291; {p++; goto _out; } }}
 	break;
 	case 1848:
-#line 2170 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2290; {p++; goto _out; } }}
 	break;
 	case 1849:
-#line 2171 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2292; {p++; goto _out; } }}
 	break;
 	case 1850:
-#line 2172 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2290; {p++; goto _out; } }}
 	break;
 	case 1851:
-#line 2173 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2292; {p++; goto _out; } }}
 	break;
 	case 1852:
-#line 2174 "char_ref.rl"
 	{te = p+1;{ output->first = 0x25a1; {p++; goto _out; } }}
 	break;
 	case 1853:
-#line 2175 "char_ref.rl"
 	{te = p+1;{ output->first = 0x25a1; {p++; goto _out; } }}
 	break;
 	case 1854:
-#line 2176 "char_ref.rl"
 	{te = p+1;{ output->first = 0x25aa; {p++; goto _out; } }}
 	break;
 	case 1855:
-#line 2177 "char_ref.rl"
 	{te = p+1;{ output->first = 0x25aa; {p++; goto _out; } }}
 	break;
 	case 1856:
-#line 2178 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2192; {p++; goto _out; } }}
 	break;
 	case 1857:
-#line 2179 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d4c8; {p++; goto _out; } }}
 	break;
 	case 1858:
-#line 2180 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2216; {p++; goto _out; } }}
 	break;
 	case 1859:
-#line 2181 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2323; {p++; goto _out; } }}
 	break;
 	case 1860:
-#line 2182 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22c6; {p++; goto _out; } }}
 	break;
 	case 1861:
-#line 2183 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2606; {p++; goto _out; } }}
 	break;
 	case 1862:
-#line 2184 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2605; {p++; goto _out; } }}
 	break;
 	case 1863:
-#line 2185 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03f5; {p++; goto _out; } }}
 	break;
 	case 1864:
-#line 2186 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03d5; {p++; goto _out; } }}
 	break;
 	case 1865:
-#line 2187 "char_ref.rl"
 	{te = p+1;{ output->first = 0xaf; {p++; goto _out; } }}
 	break;
 	case 1866:
-#line 2188 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2282; {p++; goto _out; } }}
 	break;
 	case 1867:
-#line 2189 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ac5; {p++; goto _out; } }}
 	break;
 	case 1868:
-#line 2190 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2abd; {p++; goto _out; } }}
 	break;
 	case 1869:
-#line 2191 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2286; {p++; goto _out; } }}
 	break;
 	case 1870:
-#line 2192 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ac3; {p++; goto _out; } }}
 	break;
 	case 1871:
-#line 2193 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ac1; {p++; goto _out; } }}
 	break;
 	case 1872:
-#line 2194 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2acb; {p++; goto _out; } }}
 	break;
 	case 1873:
-#line 2195 "char_ref.rl"
 	{te = p+1;{ output->first = 0x228a; {p++; goto _out; } }}
 	break;
 	case 1874:
-#line 2196 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2abf; {p++; goto _out; } }}
 	break;
 	case 1875:
-#line 2197 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2979; {p++; goto _out; } }}
 	break;
 	case 1876:
-#line 2198 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2282; {p++; goto _out; } }}
 	break;
 	case 1877:
-#line 2199 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2286; {p++; goto _out; } }}
 	break;
 	case 1878:
-#line 2200 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ac5; {p++; goto _out; } }}
 	break;
 	case 1879:
-#line 2201 "char_ref.rl"
 	{te = p+1;{ output->first = 0x228a; {p++; goto _out; } }}
 	break;
 	case 1880:
-#line 2202 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2acb; {p++; goto _out; } }}
 	break;
 	case 1881:
-#line 2203 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ac7; {p++; goto _out; } }}
 	break;
 	case 1882:
-#line 2204 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ad5; {p++; goto _out; } }}
 	break;
 	case 1883:
-#line 2205 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ad3; {p++; goto _out; } }}
 	break;
 	case 1884:
-#line 2206 "char_ref.rl"
 	{te = p+1;{ output->first = 0x227b; {p++; goto _out; } }}
 	break;
 	case 1885:
-#line 2207 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ab8; {p++; goto _out; } }}
 	break;
 	case 1886:
-#line 2208 "char_ref.rl"
 	{te = p+1;{ output->first = 0x227d; {p++; goto _out; } }}
 	break;
 	case 1887:
-#line 2209 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ab0; {p++; goto _out; } }}
 	break;
 	case 1888:
-#line 2210 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2aba; {p++; goto _out; } }}
 	break;
 	case 1889:
-#line 2211 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ab6; {p++; goto _out; } }}
 	break;
 	case 1890:
-#line 2212 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22e9; {p++; goto _out; } }}
 	break;
 	case 1891:
-#line 2213 "char_ref.rl"
 	{te = p+1;{ output->first = 0x227f; {p++; goto _out; } }}
 	break;
 	case 1892:
-#line 2214 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2211; {p++; goto _out; } }}
 	break;
 	case 1893:
-#line 2215 "char_ref.rl"
 	{te = p+1;{ output->first = 0x266a; {p++; goto _out; } }}
 	break;
 	case 1894:
-#line 2216 "char_ref.rl"
 	{te = p+1;{ output->first = 0xb9; {p++; goto _out; } }}
 	break;
 	case 1895:
-#line 2218 "char_ref.rl"
 	{te = p+1;{ output->first = 0xb2; {p++; goto _out; } }}
 	break;
 	case 1896:
-#line 2220 "char_ref.rl"
 	{te = p+1;{ output->first = 0xb3; {p++; goto _out; } }}
 	break;
 	case 1897:
-#line 2222 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2283; {p++; goto _out; } }}
 	break;
 	case 1898:
-#line 2223 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ac6; {p++; goto _out; } }}
 	break;
 	case 1899:
-#line 2224 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2abe; {p++; goto _out; } }}
 	break;
 	case 1900:
-#line 2225 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ad8; {p++; goto _out; } }}
 	break;
 	case 1901:
-#line 2226 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2287; {p++; goto _out; } }}
 	break;
 	case 1902:
-#line 2227 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ac4; {p++; goto _out; } }}
 	break;
 	case 1903:
-#line 2228 "char_ref.rl"
 	{te = p+1;{ output->first = 0x27c9; {p++; goto _out; } }}
 	break;
 	case 1904:
-#line 2229 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ad7; {p++; goto _out; } }}
 	break;
 	case 1905:
-#line 2230 "char_ref.rl"
 	{te = p+1;{ output->first = 0x297b; {p++; goto _out; } }}
 	break;
 	case 1906:
-#line 2231 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ac2; {p++; goto _out; } }}
 	break;
 	case 1907:
-#line 2232 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2acc; {p++; goto _out; } }}
 	break;
 	case 1908:
-#line 2233 "char_ref.rl"
 	{te = p+1;{ output->first = 0x228b; {p++; goto _out; } }}
 	break;
 	case 1909:
-#line 2234 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ac0; {p++; goto _out; } }}
 	break;
 	case 1910:
-#line 2235 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2283; {p++; goto _out; } }}
 	break;
 	case 1911:
-#line 2236 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2287; {p++; goto _out; } }}
 	break;
 	case 1912:
-#line 2237 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ac6; {p++; goto _out; } }}
 	break;
 	case 1913:
-#line 2238 "char_ref.rl"
 	{te = p+1;{ output->first = 0x228b; {p++; goto _out; } }}
 	break;
 	case 1914:
-#line 2239 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2acc; {p++; goto _out; } }}
 	break;
 	case 1915:
-#line 2240 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ac8; {p++; goto _out; } }}
 	break;
 	case 1916:
-#line 2241 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ad4; {p++; goto _out; } }}
 	break;
 	case 1917:
-#line 2242 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ad6; {p++; goto _out; } }}
 	break;
 	case 1918:
-#line 2243 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21d9; {p++; goto _out; } }}
 	break;
 	case 1919:
-#line 2244 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2926; {p++; goto _out; } }}
 	break;
 	case 1920:
-#line 2245 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2199; {p++; goto _out; } }}
 	break;
 	case 1921:
-#line 2246 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2199; {p++; goto _out; } }}
 	break;
 	case 1922:
-#line 2247 "char_ref.rl"
 	{te = p+1;{ output->first = 0x292a; {p++; goto _out; } }}
 	break;
 	case 1923:
-#line 2248 "char_ref.rl"
 	{te = p+1;{ output->first = 0xdf; {p++; goto _out; } }}
 	break;
 	case 1924:
-#line 2250 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2316; {p++; goto _out; } }}
 	break;
 	case 1925:
-#line 2251 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03c4; {p++; goto _out; } }}
 	break;
 	case 1926:
-#line 2252 "char_ref.rl"
 	{te = p+1;{ output->first = 0x23b4; {p++; goto _out; } }}
 	break;
 	case 1927:
-#line 2253 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0165; {p++; goto _out; } }}
 	break;
 	case 1928:
-#line 2254 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0163; {p++; goto _out; } }}
 	break;
 	case 1929:
-#line 2255 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0442; {p++; goto _out; } }}
 	break;
 	case 1930:
-#line 2256 "char_ref.rl"
 	{te = p+1;{ output->first = 0x20db; {p++; goto _out; } }}
 	break;
 	case 1931:
-#line 2257 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2315; {p++; goto _out; } }}
 	break;
 	case 1932:
-#line 2258 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d531; {p++; goto _out; } }}
 	break;
 	case 1933:
-#line 2259 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2234; {p++; goto _out; } }}
 	break;
 	case 1934:
-#line 2260 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2234; {p++; goto _out; } }}
 	break;
 	case 1935:
-#line 2261 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03b8; {p++; goto _out; } }}
 	break;
 	case 1936:
-#line 2262 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03d1; {p++; goto _out; } }}
 	break;
 	case 1937:
-#line 2263 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03d1; {p++; goto _out; } }}
 	break;
 	case 1938:
-#line 2264 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2248; {p++; goto _out; } }}
 	break;
 	case 1939:
-#line 2265 "char_ref.rl"
 	{te = p+1;{ output->first = 0x223c; {p++; goto _out; } }}
 	break;
 	case 1940:
-#line 2266 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2009; {p++; goto _out; } }}
 	break;
 	case 1941:
-#line 2267 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2248; {p++; goto _out; } }}
 	break;
 	case 1942:
-#line 2268 "char_ref.rl"
 	{te = p+1;{ output->first = 0x223c; {p++; goto _out; } }}
 	break;
 	case 1943:
-#line 2269 "char_ref.rl"
 	{te = p+1;{ output->first = 0xfe; {p++; goto _out; } }}
 	break;
 	case 1944:
-#line 2271 "char_ref.rl"
 	{te = p+1;{ output->first = 0x02dc; {p++; goto _out; } }}
 	break;
 	case 1945:
-#line 2272 "char_ref.rl"
 	{te = p+1;{ output->first = 0xd7; {p++; goto _out; } }}
 	break;
 	case 1946:
-#line 2274 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22a0; {p++; goto _out; } }}
 	break;
 	case 1947:
-#line 2275 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a31; {p++; goto _out; } }}
 	break;
 	case 1948:
-#line 2276 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a30; {p++; goto _out; } }}
 	break;
 	case 1949:
-#line 2277 "char_ref.rl"
 	{te = p+1;{ output->first = 0x222d; {p++; goto _out; } }}
 	break;
 	case 1950:
-#line 2278 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2928; {p++; goto _out; } }}
 	break;
 	case 1951:
-#line 2279 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22a4; {p++; goto _out; } }}
 	break;
 	case 1952:
-#line 2280 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2336; {p++; goto _out; } }}
 	break;
 	case 1953:
-#line 2281 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2af1; {p++; goto _out; } }}
 	break;
 	case 1954:
-#line 2282 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d565; {p++; goto _out; } }}
 	break;
 	case 1955:
-#line 2283 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ada; {p++; goto _out; } }}
 	break;
 	case 1956:
-#line 2284 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2929; {p++; goto _out; } }}
 	break;
 	case 1957:
-#line 2285 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2034; {p++; goto _out; } }}
 	break;
 	case 1958:
-#line 2286 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2122; {p++; goto _out; } }}
 	break;
 	case 1959:
-#line 2287 "char_ref.rl"
 	{te = p+1;{ output->first = 0x25b5; {p++; goto _out; } }}
 	break;
 	case 1960:
-#line 2288 "char_ref.rl"
 	{te = p+1;{ output->first = 0x25bf; {p++; goto _out; } }}
 	break;
 	case 1961:
-#line 2289 "char_ref.rl"
 	{te = p+1;{ output->first = 0x25c3; {p++; goto _out; } }}
 	break;
 	case 1962:
-#line 2290 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22b4; {p++; goto _out; } }}
 	break;
 	case 1963:
-#line 2291 "char_ref.rl"
 	{te = p+1;{ output->first = 0x225c; {p++; goto _out; } }}
 	break;
 	case 1964:
-#line 2292 "char_ref.rl"
 	{te = p+1;{ output->first = 0x25b9; {p++; goto _out; } }}
 	break;
 	case 1965:
-#line 2293 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22b5; {p++; goto _out; } }}
 	break;
 	case 1966:
-#line 2294 "char_ref.rl"
 	{te = p+1;{ output->first = 0x25ec; {p++; goto _out; } }}
 	break;
 	case 1967:
-#line 2295 "char_ref.rl"
 	{te = p+1;{ output->first = 0x225c; {p++; goto _out; } }}
 	break;
 	case 1968:
-#line 2296 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a3a; {p++; goto _out; } }}
 	break;
 	case 1969:
-#line 2297 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a39; {p++; goto _out; } }}
 	break;
 	case 1970:
-#line 2298 "char_ref.rl"
 	{te = p+1;{ output->first = 0x29cd; {p++; goto _out; } }}
 	break;
 	case 1971:
-#line 2299 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a3b; {p++; goto _out; } }}
 	break;
 	case 1972:
-#line 2300 "char_ref.rl"
 	{te = p+1;{ output->first = 0x23e2; {p++; goto _out; } }}
 	break;
 	case 1973:
-#line 2301 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d4c9; {p++; goto _out; } }}
 	break;
 	case 1974:
-#line 2302 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0446; {p++; goto _out; } }}
 	break;
 	case 1975:
-#line 2303 "char_ref.rl"
 	{te = p+1;{ output->first = 0x045b; {p++; goto _out; } }}
 	break;
 	case 1976:
-#line 2304 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0167; {p++; goto _out; } }}
 	break;
 	case 1977:
-#line 2305 "char_ref.rl"
 	{te = p+1;{ output->first = 0x226c; {p++; goto _out; } }}
 	break;
 	case 1978:
-#line 2306 "char_ref.rl"
 	{te = p+1;{ output->first = 0x219e; {p++; goto _out; } }}
 	break;
 	case 1979:
-#line 2307 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21a0; {p++; goto _out; } }}
 	break;
 	case 1980:
-#line 2308 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21d1; {p++; goto _out; } }}
 	break;
 	case 1981:
-#line 2309 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2963; {p++; goto _out; } }}
 	break;
 	case 1982:
-#line 2310 "char_ref.rl"
 	{te = p+1;{ output->first = 0xfa; {p++; goto _out; } }}
 	break;
 	case 1983:
-#line 2312 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2191; {p++; goto _out; } }}
 	break;
 	case 1984:
-#line 2313 "char_ref.rl"
 	{te = p+1;{ output->first = 0x045e; {p++; goto _out; } }}
 	break;
 	case 1985:
-#line 2314 "char_ref.rl"
 	{te = p+1;{ output->first = 0x016d; {p++; goto _out; } }}
 	break;
 	case 1986:
-#line 2315 "char_ref.rl"
 	{te = p+1;{ output->first = 0xfb; {p++; goto _out; } }}
 	break;
 	case 1987:
-#line 2317 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0443; {p++; goto _out; } }}
 	break;
 	case 1988:
-#line 2318 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21c5; {p++; goto _out; } }}
 	break;
 	case 1989:
-#line 2319 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0171; {p++; goto _out; } }}
 	break;
 	case 1990:
-#line 2320 "char_ref.rl"
 	{te = p+1;{ output->first = 0x296e; {p++; goto _out; } }}
 	break;
 	case 1991:
-#line 2321 "char_ref.rl"
 	{te = p+1;{ output->first = 0x297e; {p++; goto _out; } }}
 	break;
 	case 1992:
-#line 2322 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d532; {p++; goto _out; } }}
 	break;
 	case 1993:
-#line 2323 "char_ref.rl"
 	{te = p+1;{ output->first = 0xf9; {p++; goto _out; } }}
 	break;
 	case 1994:
-#line 2325 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21bf; {p++; goto _out; } }}
 	break;
 	case 1995:
-#line 2326 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21be; {p++; goto _out; } }}
 	break;
 	case 1996:
-#line 2327 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2580; {p++; goto _out; } }}
 	break;
 	case 1997:
-#line 2328 "char_ref.rl"
 	{te = p+1;{ output->first = 0x231c; {p++; goto _out; } }}
 	break;
 	case 1998:
-#line 2329 "char_ref.rl"
 	{te = p+1;{ output->first = 0x231c; {p++; goto _out; } }}
 	break;
 	case 1999:
-#line 2330 "char_ref.rl"
 	{te = p+1;{ output->first = 0x230f; {p++; goto _out; } }}
 	break;
 	case 2000:
-#line 2331 "char_ref.rl"
 	{te = p+1;{ output->first = 0x25f8; {p++; goto _out; } }}
 	break;
 	case 2001:
-#line 2332 "char_ref.rl"
 	{te = p+1;{ output->first = 0x016b; {p++; goto _out; } }}
 	break;
 	case 2002:
-#line 2333 "char_ref.rl"
 	{te = p+1;{ output->first = 0xa8; {p++; goto _out; } }}
 	break;
 	case 2003:
-#line 2335 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0173; {p++; goto _out; } }}
 	break;
 	case 2004:
-#line 2336 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d566; {p++; goto _out; } }}
 	break;
 	case 2005:
-#line 2337 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2191; {p++; goto _out; } }}
 	break;
 	case 2006:
-#line 2338 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2195; {p++; goto _out; } }}
 	break;
 	case 2007:
-#line 2339 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21bf; {p++; goto _out; } }}
 	break;
 	case 2008:
-#line 2340 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21be; {p++; goto _out; } }}
 	break;
 	case 2009:
-#line 2341 "char_ref.rl"
 	{te = p+1;{ output->first = 0x228e; {p++; goto _out; } }}
 	break;
 	case 2010:
-#line 2342 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03c5; {p++; goto _out; } }}
 	break;
 	case 2011:
-#line 2343 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03d2; {p++; goto _out; } }}
 	break;
 	case 2012:
-#line 2344 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03c5; {p++; goto _out; } }}
 	break;
 	case 2013:
-#line 2345 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21c8; {p++; goto _out; } }}
 	break;
 	case 2014:
-#line 2346 "char_ref.rl"
 	{te = p+1;{ output->first = 0x231d; {p++; goto _out; } }}
 	break;
 	case 2015:
-#line 2347 "char_ref.rl"
 	{te = p+1;{ output->first = 0x231d; {p++; goto _out; } }}
 	break;
 	case 2016:
-#line 2348 "char_ref.rl"
 	{te = p+1;{ output->first = 0x230e; {p++; goto _out; } }}
 	break;
 	case 2017:
-#line 2349 "char_ref.rl"
 	{te = p+1;{ output->first = 0x016f; {p++; goto _out; } }}
 	break;
 	case 2018:
-#line 2350 "char_ref.rl"
 	{te = p+1;{ output->first = 0x25f9; {p++; goto _out; } }}
 	break;
 	case 2019:
-#line 2351 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d4ca; {p++; goto _out; } }}
 	break;
 	case 2020:
-#line 2352 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22f0; {p++; goto _out; } }}
 	break;
 	case 2021:
-#line 2353 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0169; {p++; goto _out; } }}
 	break;
 	case 2022:
-#line 2354 "char_ref.rl"
 	{te = p+1;{ output->first = 0x25b5; {p++; goto _out; } }}
 	break;
 	case 2023:
-#line 2355 "char_ref.rl"
 	{te = p+1;{ output->first = 0x25b4; {p++; goto _out; } }}
 	break;
 	case 2024:
-#line 2356 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21c8; {p++; goto _out; } }}
 	break;
 	case 2025:
-#line 2357 "char_ref.rl"
 	{te = p+1;{ output->first = 0xfc; {p++; goto _out; } }}
 	break;
 	case 2026:
-#line 2359 "char_ref.rl"
 	{te = p+1;{ output->first = 0x29a7; {p++; goto _out; } }}
 	break;
 	case 2027:
-#line 2360 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21d5; {p++; goto _out; } }}
 	break;
 	case 2028:
-#line 2361 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ae8; {p++; goto _out; } }}
 	break;
 	case 2029:
-#line 2362 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2ae9; {p++; goto _out; } }}
 	break;
 	case 2030:
-#line 2363 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22a8; {p++; goto _out; } }}
 	break;
 	case 2031:
-#line 2364 "char_ref.rl"
 	{te = p+1;{ output->first = 0x299c; {p++; goto _out; } }}
 	break;
 	case 2032:
-#line 2365 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03f5; {p++; goto _out; } }}
 	break;
 	case 2033:
-#line 2366 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03f0; {p++; goto _out; } }}
 	break;
 	case 2034:
-#line 2367 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2205; {p++; goto _out; } }}
 	break;
 	case 2035:
-#line 2368 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03d5; {p++; goto _out; } }}
 	break;
 	case 2036:
-#line 2369 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03d6; {p++; goto _out; } }}
 	break;
 	case 2037:
-#line 2370 "char_ref.rl"
 	{te = p+1;{ output->first = 0x221d; {p++; goto _out; } }}
 	break;
 	case 2038:
-#line 2371 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2195; {p++; goto _out; } }}
 	break;
 	case 2039:
-#line 2372 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03f1; {p++; goto _out; } }}
 	break;
 	case 2040:
-#line 2373 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03c2; {p++; goto _out; } }}
 	break;
 	case 2041:
-#line 2374 "char_ref.rl"
 	{te = p+1;{ output->first = 0x228a; output->second = 0xfe00; {p++; goto _out; } }}
 	break;
 	case 2042:
-#line 2375 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2acb; output->second = 0xfe00; {p++; goto _out; } }}
 	break;
 	case 2043:
-#line 2376 "char_ref.rl"
 	{te = p+1;{ output->first = 0x228b; output->second = 0xfe00; {p++; goto _out; } }}
 	break;
 	case 2044:
-#line 2377 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2acc; output->second = 0xfe00; {p++; goto _out; } }}
 	break;
 	case 2045:
-#line 2378 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03d1; {p++; goto _out; } }}
 	break;
 	case 2046:
-#line 2379 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22b2; {p++; goto _out; } }}
 	break;
 	case 2047:
-#line 2380 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22b3; {p++; goto _out; } }}
 	break;
 	case 2048:
-#line 2381 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0432; {p++; goto _out; } }}
 	break;
 	case 2049:
-#line 2382 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22a2; {p++; goto _out; } }}
 	break;
 	case 2050:
-#line 2383 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2228; {p++; goto _out; } }}
 	break;
 	case 2051:
-#line 2384 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22bb; {p++; goto _out; } }}
 	break;
 	case 2052:
-#line 2385 "char_ref.rl"
 	{te = p+1;{ output->first = 0x225a; {p++; goto _out; } }}
 	break;
 	case 2053:
-#line 2386 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22ee; {p++; goto _out; } }}
 	break;
 	case 2054:
-#line 2387 "char_ref.rl"
 	{te = p+1;{ output->first = 0x7c; {p++; goto _out; } }}
 	break;
 	case 2055:
-#line 2388 "char_ref.rl"
 	{te = p+1;{ output->first = 0x7c; {p++; goto _out; } }}
 	break;
 	case 2056:
-#line 2389 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d533; {p++; goto _out; } }}
 	break;
 	case 2057:
-#line 2390 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22b2; {p++; goto _out; } }}
 	break;
 	case 2058:
-#line 2391 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2282; output->second = 0x20d2; {p++; goto _out; } }}
 	break;
 	case 2059:
-#line 2392 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2283; output->second = 0x20d2; {p++; goto _out; } }}
 	break;
 	case 2060:
-#line 2393 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d567; {p++; goto _out; } }}
 	break;
 	case 2061:
-#line 2394 "char_ref.rl"
 	{te = p+1;{ output->first = 0x221d; {p++; goto _out; } }}
 	break;
 	case 2062:
-#line 2395 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22b3; {p++; goto _out; } }}
 	break;
 	case 2063:
-#line 2396 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d4cb; {p++; goto _out; } }}
 	break;
 	case 2064:
-#line 2397 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2acb; output->second = 0xfe00; {p++; goto _out; } }}
 	break;
 	case 2065:
-#line 2398 "char_ref.rl"
 	{te = p+1;{ output->first = 0x228a; output->second = 0xfe00; {p++; goto _out; } }}
 	break;
 	case 2066:
-#line 2399 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2acc; output->second = 0xfe00; {p++; goto _out; } }}
 	break;
 	case 2067:
-#line 2400 "char_ref.rl"
 	{te = p+1;{ output->first = 0x228b; output->second = 0xfe00; {p++; goto _out; } }}
 	break;
 	case 2068:
-#line 2401 "char_ref.rl"
 	{te = p+1;{ output->first = 0x299a; {p++; goto _out; } }}
 	break;
 	case 2069:
-#line 2402 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0175; {p++; goto _out; } }}
 	break;
 	case 2070:
-#line 2403 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a5f; {p++; goto _out; } }}
 	break;
 	case 2071:
-#line 2404 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2227; {p++; goto _out; } }}
 	break;
 	case 2072:
-#line 2405 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2259; {p++; goto _out; } }}
 	break;
 	case 2073:
-#line 2406 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2118; {p++; goto _out; } }}
 	break;
 	case 2074:
-#line 2407 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d534; {p++; goto _out; } }}
 	break;
 	case 2075:
-#line 2408 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d568; {p++; goto _out; } }}
 	break;
 	case 2076:
-#line 2409 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2118; {p++; goto _out; } }}
 	break;
 	case 2077:
-#line 2410 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2240; {p++; goto _out; } }}
 	break;
 	case 2078:
-#line 2411 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2240; {p++; goto _out; } }}
 	break;
 	case 2079:
-#line 2412 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d4cc; {p++; goto _out; } }}
 	break;
 	case 2080:
-#line 2413 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22c2; {p++; goto _out; } }}
 	break;
 	case 2081:
-#line 2414 "char_ref.rl"
 	{te = p+1;{ output->first = 0x25ef; {p++; goto _out; } }}
 	break;
 	case 2082:
-#line 2415 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22c3; {p++; goto _out; } }}
 	break;
 	case 2083:
-#line 2416 "char_ref.rl"
 	{te = p+1;{ output->first = 0x25bd; {p++; goto _out; } }}
 	break;
 	case 2084:
-#line 2417 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d535; {p++; goto _out; } }}
 	break;
 	case 2085:
-#line 2418 "char_ref.rl"
 	{te = p+1;{ output->first = 0x27fa; {p++; goto _out; } }}
 	break;
 	case 2086:
-#line 2419 "char_ref.rl"
 	{te = p+1;{ output->first = 0x27f7; {p++; goto _out; } }}
 	break;
 	case 2087:
-#line 2420 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03be; {p++; goto _out; } }}
 	break;
 	case 2088:
-#line 2421 "char_ref.rl"
 	{te = p+1;{ output->first = 0x27f8; {p++; goto _out; } }}
 	break;
 	case 2089:
-#line 2422 "char_ref.rl"
 	{te = p+1;{ output->first = 0x27f5; {p++; goto _out; } }}
 	break;
 	case 2090:
-#line 2423 "char_ref.rl"
 	{te = p+1;{ output->first = 0x27fc; {p++; goto _out; } }}
 	break;
 	case 2091:
-#line 2424 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22fb; {p++; goto _out; } }}
 	break;
 	case 2092:
-#line 2425 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a00; {p++; goto _out; } }}
 	break;
 	case 2093:
-#line 2426 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d569; {p++; goto _out; } }}
 	break;
 	case 2094:
-#line 2427 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a01; {p++; goto _out; } }}
 	break;
 	case 2095:
-#line 2428 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a02; {p++; goto _out; } }}
 	break;
 	case 2096:
-#line 2429 "char_ref.rl"
 	{te = p+1;{ output->first = 0x27f9; {p++; goto _out; } }}
 	break;
 	case 2097:
-#line 2430 "char_ref.rl"
 	{te = p+1;{ output->first = 0x27f6; {p++; goto _out; } }}
 	break;
 	case 2098:
-#line 2431 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d4cd; {p++; goto _out; } }}
 	break;
 	case 2099:
-#line 2432 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a06; {p++; goto _out; } }}
 	break;
 	case 2100:
-#line 2433 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2a04; {p++; goto _out; } }}
 	break;
 	case 2101:
-#line 2434 "char_ref.rl"
 	{te = p+1;{ output->first = 0x25b3; {p++; goto _out; } }}
 	break;
 	case 2102:
-#line 2435 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22c1; {p++; goto _out; } }}
 	break;
 	case 2103:
-#line 2436 "char_ref.rl"
 	{te = p+1;{ output->first = 0x22c0; {p++; goto _out; } }}
 	break;
 	case 2104:
-#line 2437 "char_ref.rl"
 	{te = p+1;{ output->first = 0xfd; {p++; goto _out; } }}
 	break;
 	case 2105:
-#line 2439 "char_ref.rl"
 	{te = p+1;{ output->first = 0x044f; {p++; goto _out; } }}
 	break;
 	case 2106:
-#line 2440 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0177; {p++; goto _out; } }}
 	break;
 	case 2107:
-#line 2441 "char_ref.rl"
 	{te = p+1;{ output->first = 0x044b; {p++; goto _out; } }}
 	break;
 	case 2108:
-#line 2442 "char_ref.rl"
 	{te = p+1;{ output->first = 0xa5; {p++; goto _out; } }}
 	break;
 	case 2109:
-#line 2444 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d536; {p++; goto _out; } }}
 	break;
 	case 2110:
-#line 2445 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0457; {p++; goto _out; } }}
 	break;
 	case 2111:
-#line 2446 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d56a; {p++; goto _out; } }}
 	break;
 	case 2112:
-#line 2447 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d4ce; {p++; goto _out; } }}
 	break;
 	case 2113:
-#line 2448 "char_ref.rl"
 	{te = p+1;{ output->first = 0x044e; {p++; goto _out; } }}
 	break;
 	case 2114:
-#line 2449 "char_ref.rl"
 	{te = p+1;{ output->first = 0xff; {p++; goto _out; } }}
 	break;
 	case 2115:
-#line 2451 "char_ref.rl"
 	{te = p+1;{ output->first = 0x017a; {p++; goto _out; } }}
 	break;
 	case 2116:
-#line 2452 "char_ref.rl"
 	{te = p+1;{ output->first = 0x017e; {p++; goto _out; } }}
 	break;
 	case 2117:
-#line 2453 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0437; {p++; goto _out; } }}
 	break;
 	case 2118:
-#line 2454 "char_ref.rl"
 	{te = p+1;{ output->first = 0x017c; {p++; goto _out; } }}
 	break;
 	case 2119:
-#line 2455 "char_ref.rl"
 	{te = p+1;{ output->first = 0x2128; {p++; goto _out; } }}
 	break;
 	case 2120:
-#line 2456 "char_ref.rl"
 	{te = p+1;{ output->first = 0x03b6; {p++; goto _out; } }}
 	break;
 	case 2121:
-#line 2457 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d537; {p++; goto _out; } }}
 	break;
 	case 2122:
-#line 2458 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0436; {p++; goto _out; } }}
 	break;
 	case 2123:
-#line 2459 "char_ref.rl"
 	{te = p+1;{ output->first = 0x21dd; {p++; goto _out; } }}
 	break;
 	case 2124:
-#line 2460 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d56b; {p++; goto _out; } }}
 	break;
 	case 2125:
-#line 2461 "char_ref.rl"
 	{te = p+1;{ output->first = 0x0001d4cf; {p++; goto _out; } }}
 	break;
 	case 2126:
-#line 2462 "char_ref.rl"
 	{te = p+1;{ output->first = 0x200d; {p++; goto _out; } }}
 	break;
 	case 2127:
-#line 2463 "char_ref.rl"
 	{te = p+1;{ output->first = 0x200c; {p++; goto _out; } }}
 	break;
 	case 2128:
-#line 234 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xc6; {p++; goto _out; } }}
 	break;
 	case 2129:
-#line 236 "char_ref.rl"
 	{te = p;p--;{ output->first = 0x26; {p++; goto _out; } }}
 	break;
 	case 2130:
-#line 238 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xc1; {p++; goto _out; } }}
 	break;
 	case 2131:
-#line 241 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xc2; {p++; goto _out; } }}
 	break;
 	case 2132:
-#line 245 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xc0; {p++; goto _out; } }}
 	break;
 	case 2133:
-#line 253 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xc5; {p++; goto _out; } }}
 	break;
 	case 2134:
-#line 257 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xc3; {p++; goto _out; } }}
 	break;
 	case 2135:
-#line 259 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xc4; {p++; goto _out; } }}
 	break;
 	case 2136:
-#line 274 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xa9; {p++; goto _out; } }}
 	break;
 	case 2137:
-#line 281 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xc7; {p++; goto _out; } }}
 	break;
 	case 2138:
-#line 364 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xd0; {p++; goto _out; } }}
 	break;
 	case 2139:
-#line 366 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xc9; {p++; goto _out; } }}
 	break;
 	case 2140:
-#line 369 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xca; {p++; goto _out; } }}
 	break;
 	case 2141:
-#line 374 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xc8; {p++; goto _out; } }}
 	break;
 	case 2142:
-#line 389 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xcb; {p++; goto _out; } }}
 	break;
 	case 2143:
-#line 402 "char_ref.rl"
 	{te = p;p--;{ output->first = 0x3e; {p++; goto _out; } }}
 	break;
 	case 2144:
-#line 438 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xcd; {p++; goto _out; } }}
 	break;
 	case 2145:
-#line 440 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xce; {p++; goto _out; } }}
 	break;
 	case 2146:
-#line 445 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xcc; {p++; goto _out; } }}
 	break;
 	case 2147:
-#line 462 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xcf; {p++; goto _out; } }}
 	break;
 	case 2148:
-#line 480 "char_ref.rl"
 	{te = p;p--;{ output->first = 0x3c; {p++; goto _out; } }}
 	break;
 	case 2149:
-#line 617 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xd1; {p++; goto _out; } }}
 	break;
 	case 2150:
-#line 621 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xd3; {p++; goto _out; } }}
 	break;
 	case 2151:
-#line 623 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xd4; {p++; goto _out; } }}
 	break;
 	case 2152:
-#line 628 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xd2; {p++; goto _out; } }}
 	break;
 	case 2153:
-#line 638 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xd8; {p++; goto _out; } }}
 	break;
 	case 2154:
-#line 640 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xd5; {p++; goto _out; } }}
 	break;
 	case 2155:
-#line 643 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xd6; {p++; goto _out; } }}
 	break;
 	case 2156:
-#line 668 "char_ref.rl"
 	{te = p;p--;{ output->first = 0x22; {p++; goto _out; } }}
 	break;
 	case 2157:
-#line 674 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xae; {p++; goto _out; } }}
 	break;
 	case 2158:
-#line 758 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xde; {p++; goto _out; } }}
 	break;
 	case 2159:
-#line 781 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xda; {p++; goto _out; } }}
 	break;
 	case 2160:
-#line 787 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xdb; {p++; goto _out; } }}
 	break;
 	case 2161:
-#line 792 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xd9; {p++; goto _out; } }}
 	break;
 	case 2162:
-#line 819 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xdc; {p++; goto _out; } }}
 	break;
 	case 2163:
-#line 850 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xdd; {p++; goto _out; } }}
 	break;
 	case 2164:
-#line 868 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xe1; {p++; goto _out; } }}
 	break;
 	case 2165:
-#line 874 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xe2; {p++; goto _out; } }}
 	break;
 	case 2166:
-#line 876 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xb4; {p++; goto _out; } }}
 	break;
 	case 2167:
-#line 879 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xe6; {p++; goto _out; } }}
 	break;
 	case 2168:
-#line 883 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xe0; {p++; goto _out; } }}
 	break;
 	case 2169:
-#line 890 "char_ref.rl"
 	{te = p;p--;{ output->first = 0x26; {p++; goto _out; } }}
 	break;
 	case 2170:
-#line 925 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xe5; {p++; goto _out; } }}
 	break;
 	case 2171:
-#line 931 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xe3; {p++; goto _out; } }}
 	break;
 	case 2172:
-#line 933 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xe4; {p++; goto _out; } }}
 	break;
 	case 2173:
-#line 1038 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xa6; {p++; goto _out; } }}
 	break;
 	case 2174:
-#line 1065 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xe7; {p++; goto _out; } }}
 	break;
 	case 2175:
-#line 1071 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xb8; {p++; goto _out; } }}
 	break;
 	case 2176:
-#line 1074 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xa2; {p++; goto _out; } }}
 	break;
 	case 2177:
-#line 1113 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xa9; {p++; goto _out; } }}
 	break;
 	case 2178:
-#line 1143 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xa4; {p++; goto _out; } }}
 	break;
 	case 2179:
-#line 1167 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xb0; {p++; goto _out; } }}
 	break;
 	case 2180:
-#line 1183 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xf7; {p++; goto _out; } }}
 	break;
 	case 2181:
-#line 1220 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xe9; {p++; goto _out; } }}
 	break;
 	case 2182:
-#line 1225 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xea; {p++; goto _out; } }}
 	break;
 	case 2183:
-#line 1234 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xe8; {p++; goto _out; } }}
 	break;
 	case 2184:
-#line 1276 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xf0; {p++; goto _out; } }}
 	break;
 	case 2185:
-#line 1278 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xeb; {p++; goto _out; } }}
 	break;
 	case 2186:
-#line 1303 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xbd; {p++; goto _out; } }}
 	break;
 	case 2187:
-#line 1306 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xbc; {p++; goto _out; } }}
 	break;
 	case 2188:
-#line 1313 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xbe; {p++; goto _out; } }}
 	break;
 	case 2189:
-#line 1368 "char_ref.rl"
 	{te = p;p--;{ output->first = 0x3e; {p++; goto _out; } }}
 	break;
 	case 2190:
-#line 1412 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xed; {p++; goto _out; } }}
 	break;
 	case 2191:
-#line 1415 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xee; {p++; goto _out; } }}
 	break;
 	case 2192:
-#line 1419 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xa1; {p++; goto _out; } }}
 	break;
 	case 2193:
-#line 1423 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xec; {p++; goto _out; } }}
 	break;
 	case 2194:
-#line 1454 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xbf; {p++; goto _out; } }}
 	break;
 	case 2195:
-#line 1466 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xef; {p++; goto _out; } }}
 	break;
 	case 2196:
-#line 1501 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xab; {p++; goto _out; } }}
 	break;
 	case 2197:
-#line 1623 "char_ref.rl"
 	{te = p;p--;{ output->first = 0x3c; {p++; goto _out; } }}
 	break;
 	case 2198:
-#line 1641 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xaf; {p++; goto _out; } }}
 	break;
 	case 2199:
-#line 1658 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xb5; {p++; goto _out; } }}
 	break;
 	case 2200:
-#line 1663 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xb7; {p++; goto _out; } }}
 	break;
 	case 2201:
-#line 1702 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xa0; {p++; goto _out; } }}
 	break;
 	case 2202:
-#line 1771 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xac; {p++; goto _out; } }}
 	break;
 	case 2203:
-#line 1818 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xf1; {p++; goto _out; } }}
 	break;
 	case 2204:
-#line 1849 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xf3; {p++; goto _out; } }}
 	break;
 	case 2205:
-#line 1853 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xf4; {p++; goto _out; } }}
 	break;
 	case 2206:
-#line 1865 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xf2; {p++; goto _out; } }}
 	break;
 	case 2207:
-#line 1890 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xaa; {p++; goto _out; } }}
 	break;
 	case 2208:
-#line 1892 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xba; {p++; goto _out; } }}
 	break;
 	case 2209:
-#line 1899 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xf8; {p++; goto _out; } }}
 	break;
 	case 2210:
-#line 1902 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xf5; {p++; goto _out; } }}
 	break;
 	case 2211:
-#line 1906 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xf6; {p++; goto _out; } }}
 	break;
 	case 2212:
-#line 1910 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xb6; {p++; goto _out; } }}
 	break;
 	case 2213:
-#line 1940 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xb1; {p++; goto _out; } }}
 	break;
 	case 2214:
-#line 1947 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xa3; {p++; goto _out; } }}
 	break;
 	case 2215:
-#line 1987 "char_ref.rl"
 	{te = p;p--;{ output->first = 0x22; {p++; goto _out; } }}
 	break;
 	case 2216:
-#line 2002 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xbb; {p++; goto _out; } }}
 	break;
 	case 2217:
-#line 2041 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xae; {p++; goto _out; } }}
 	break;
 	case 2218:
-#line 2116 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xa7; {p++; goto _out; } }}
 	break;
 	case 2219:
-#line 2130 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xad; {p++; goto _out; } }}
 	break;
 	case 2220:
-#line 2217 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xb9; {p++; goto _out; } }}
 	break;
 	case 2221:
-#line 2219 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xb2; {p++; goto _out; } }}
 	break;
 	case 2222:
-#line 2221 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xb3; {p++; goto _out; } }}
 	break;
 	case 2223:
-#line 2249 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xdf; {p++; goto _out; } }}
 	break;
 	case 2224:
-#line 2270 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xfe; {p++; goto _out; } }}
 	break;
 	case 2225:
-#line 2273 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xd7; {p++; goto _out; } }}
 	break;
 	case 2226:
-#line 2311 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xfa; {p++; goto _out; } }}
 	break;
 	case 2227:
-#line 2316 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xfb; {p++; goto _out; } }}
 	break;
 	case 2228:
-#line 2324 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xf9; {p++; goto _out; } }}
 	break;
 	case 2229:
-#line 2334 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xa8; {p++; goto _out; } }}
 	break;
 	case 2230:
-#line 2358 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xfc; {p++; goto _out; } }}
 	break;
 	case 2231:
-#line 2438 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xfd; {p++; goto _out; } }}
 	break;
 	case 2232:
-#line 2443 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xa5; {p++; goto _out; } }}
 	break;
 	case 2233:
-#line 2450 "char_ref.rl"
 	{te = p;p--;{ output->first = 0xff; {p++; goto _out; } }}
 	break;
 	case 2234:
-#line 1074 "char_ref.rl"
 	{{p = ((te))-1;}{ output->first = 0xa2; {p++; goto _out; } }}
 	break;
 	case 2235:
-#line 1113 "char_ref.rl"
 	{{p = ((te))-1;}{ output->first = 0xa9; {p++; goto _out; } }}
 	break;
 	case 2236:
-#line 1183 "char_ref.rl"
 	{{p = ((te))-1;}{ output->first = 0xf7; {p++; goto _out; } }}
 	break;
 	case 2237:
-#line 1368 "char_ref.rl"
 	{{p = ((te))-1;}{ output->first = 0x3e; {p++; goto _out; } }}
 	break;
 	case 2238:
-#line 1623 "char_ref.rl"
 	{{p = ((te))-1;}{ output->first = 0x3c; {p++; goto _out; } }}
 	break;
 	case 2239:
-#line 1771 "char_ref.rl"
 	{{p = ((te))-1;}{ output->first = 0xac; {p++; goto _out; } }}
 	break;
 	case 2240:
-#line 1910 "char_ref.rl"
 	{{p = ((te))-1;}{ output->first = 0xb6; {p++; goto _out; } }}
 	break;
 	case 2241:
-#line 2273 "char_ref.rl"
 	{{p = ((te))-1;}{ output->first = 0xd7; {p++; goto _out; } }}
 	break;
-#line 23006 "char_ref.c"
 		}
 	}
 
@@ -22980,10 +20758,8 @@ static bool consume_named_ref(struct GumboInternalParser* parser,
 	while ( _nacts-- > 0 ) {
 		switch ( *_acts++ ) {
 	case 0:
-#line 1 "NONE"
 	{ts = 0;}
 	break;
-#line 23019 "char_ref.c"
 		}
 	}
 
@@ -23003,18 +20779,17 @@ static bool consume_named_ref(struct GumboInternalParser* parser,
 	_out: {}
 	}
 
-#line 2491 "char_ref.rl"
-  // clang-format on
 
   if (cs >= 7623) {
     assert(output->first != kGumboNoChar);
     char last_char = *(te - 1);
-    int len = te - start;
+    size_t len = te - start;
     if (last_char == ';') {
       bool matched = utf8iterator_maybe_consume_match(input, start, len, true);
       assert(matched);
+      UNUSED_IF_NDEBUG(matched);
       return true;
-    } else if (is_in_attribute && (*te == '=' || isalnum(*te))) {
+    } else if (is_in_attribute && (*te == '=' || ascii_isalnum(*te))) {
       output->first = kGumboNoChar;
       output->second = kGumboNoChar;
       utf8iterator_reset(input);
@@ -23023,10 +20798,15 @@ static bool consume_named_ref(struct GumboInternalParser* parser,
       GumboStringPiece bad_ref;
       bad_ref.length = te - start;
       bad_ref.data = start;
-      add_named_reference_error(
-          parser, input, GUMBO_ERR_NAMED_CHAR_REF_WITHOUT_SEMICOLON, bad_ref);
+      add_named_reference_error (
+        parser,
+        input,
+        GUMBO_ERR_NAMED_CHAR_REF_WITHOUT_SEMICOLON,
+        bad_ref
+      );
       bool matched = utf8iterator_maybe_consume_match(input, start, len, true);
       assert(matched);
+      UNUSED_IF_NDEBUG(matched);
       return false;
     }
   } else {
@@ -23038,9 +20818,13 @@ static bool consume_named_ref(struct GumboInternalParser* parser,
   }
 }
 
-bool consume_char_ref(struct GumboInternalParser* parser,
-    struct GumboInternalUtf8Iterator* input, int additional_allowed_char,
-    bool is_in_attribute, OneOrTwoCodepoints* output) {
+bool gumbo_consume_char_ref (
+  struct GumboInternalParser* parser,
+  struct GumboInternalUtf8Iterator* input,
+  int additional_allowed_char,
+  bool is_in_attribute,
+  OneOrTwoCodepoints* output
+) {
   utf8iterator_mark(input);
   utf8iterator_next(input);
   int c = utf8iterator_current(input);
diff --git a/gumbo-parser/src/char_ref.h b/gumbo-parser/src/char_ref.h
index 09d2598f..153858d2 100644
--- a/gumbo-parser/src/char_ref.h
+++ b/gumbo-parser/src/char_ref.h
@@ -1,23 +1,3 @@
-// Copyright 2011 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// Author: jdtang@google.com (Jonathan Tang)
-//
-// Internal header for character reference handling; this should not be exposed
-// transitively by any public API header.  This is why the functions aren't
-// namespaced.
-
 #ifndef GUMBO_CHAR_REF_H_
 #define GUMBO_CHAR_REF_H_
 
@@ -34,8 +14,8 @@ struct GumboInternalUtf8Iterator;
 extern const int kGumboNoChar;
 
 // Certain named character references generate two codepoints, not one, and so
-// the consume_char_ref subroutine needs to return this instead of an int.  The
-// first field will be kGumboNoChar if no character reference was found; the
+// the gumbo_consume_char_ref subroutine needs to return this instead of an int.
+// The first field will be kGumboNoChar if no character reference was found; the
 // second field will be kGumboNoChar if that is the case or if the character
 // reference returns only a single codepoint.
 typedef struct {
@@ -45,16 +25,20 @@ typedef struct {
 
 // Implements the "consume a character reference" section of the spec.
 // This reads in characters from the input as necessary, and fills in a
-// OneOrTwoCodepoints struct containing the characters read.  It may add parse
-// errors to the GumboParser's errors vector, if the spec calls for it.  Pass a
+// OneOrTwoCodepoints struct containing the characters read. It may add parse
+// errors to the GumboParser's errors vector, if the spec calls for it. Pass a
 // space for the "additional allowed char" when the spec says "with no
-// additional allowed char".  Returns false on parse error, true otherwise.
-bool consume_char_ref(struct GumboInternalParser* parser,
-    struct GumboInternalUtf8Iterator* input, int additional_allowed_char,
-    bool is_in_attribute, OneOrTwoCodepoints* output);
+// additional allowed char". Returns false on parse error, true otherwise.
+bool gumbo_consume_char_ref (
+  struct GumboInternalParser* parser,
+  struct GumboInternalUtf8Iterator* input,
+  int additional_allowed_char,
+  bool is_in_attribute,
+  OneOrTwoCodepoints* output
+);
 
 #ifdef __cplusplus
 }
 #endif
 
-#endif  // GUMBO_CHAR_REF_H_
+#endif // GUMBO_CHAR_REF_H_
diff --git a/gumbo-parser/src/char_ref.rl b/gumbo-parser/src/char_ref.rl
index 139a4bbd..3d93ac88 100644
--- a/gumbo-parser/src/char_ref.rl
+++ b/gumbo-parser/src/char_ref.rl
@@ -1,92 +1,43 @@
-// Copyright 2011 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// Author: jdtang@google.com (Jonathan Tang)
-//
-// This is a Ragel state machine re-implementation of the original char_ref.c,
-// rewritten to improve efficiency.  To generate the .c file from it,
-//
-// $ ragel -F0 char_ref.rl
-//
-// The generated source is also checked into source control so that most people
-// hacking on the parser do not need to install ragel.
+/*
+ Copyright 2017-2018 Craig Barnes.
+ Copyright 2011 Google Inc.
 
-#include "char_ref.h"
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
 
-#include <assert.h>
-#include <ctype.h>
-#include <stddef.h>
-#include <stdio.h>
-#include <string.h>     // Only for debug assertions at present.
+    https://www.apache.org/licenses/LICENSE-2.0
 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#include <assert.h>
+#include "char_ref.h"
 #include "error.h"
-#include "string_piece.h"
+#include "macros.h"
 #include "utf8.h"
-#include "util.h"
 
 struct GumboInternalParser;
 
 const int kGumboNoChar = -1;
 
-// Table of replacement characters.  The spec specifies that any occurrence of
-// the first character should be replaced by the second character, and a parse
-// error recorded.
-typedef struct {
-  int from_char;
-  int to_char;
-} CharReplacement;
-
-static const CharReplacement kCharReplacements[] = {
-  { 0x00, 0xfffd },
-  { 0x0d, 0x000d },
-  { 0x80, 0x20ac },
-  { 0x81, 0x0081 },
-  { 0x82, 0x201A },
-  { 0x83, 0x0192 },
-  { 0x84, 0x201E },
-  { 0x85, 0x2026 },
-  { 0x86, 0x2020 },
-  { 0x87, 0x2021 },
-  { 0x88, 0x02C6 },
-  { 0x89, 0x2030 },
-  { 0x8A, 0x0160 },
-  { 0x8B, 0x2039 },
-  { 0x8C, 0x0152 },
-  { 0x8D, 0x008D },
-  { 0x8E, 0x017D },
-  { 0x8F, 0x008F },
-  { 0x90, 0x0090 },
-  { 0x91, 0x2018 },
-  { 0x92, 0x2019 },
-  { 0x93, 0x201C },
-  { 0x94, 0x201D },
-  { 0x95, 0x2022 },
-  { 0x96, 0x2013 },
-  { 0x97, 0x2014 },
-  { 0x98, 0x02DC },
-  { 0x99, 0x2122 },
-  { 0x9A, 0x0161 },
-  { 0x9B, 0x203A },
-  { 0x9C, 0x0153 },
-  { 0x9D, 0x009D },
-  { 0x9E, 0x017E },
-  { 0x9F, 0x0178 },
-  // Terminator.
-  { -1, -1 }
+static const uint32_t kCharReplacements[] = {
+  [0x00] = 0xFFFD, [0x0D] = 0x000D, [0x80] = 0x20AC, [0x81] = 0x0081,
+  [0x82] = 0x201A, [0x83] = 0x0192, [0x84] = 0x201E, [0x85] = 0x2026,
+  [0x86] = 0x2020, [0x87] = 0x2021, [0x88] = 0x02C6, [0x89] = 0x2030,
+  [0x8A] = 0x0160, [0x8B] = 0x2039, [0x8C] = 0x0152, [0x8D] = 0x008D,
+  [0x8E] = 0x017D, [0x8F] = 0x008F, [0x90] = 0x0090, [0x91] = 0x2018,
+  [0x92] = 0x2019, [0x93] = 0x201C, [0x94] = 0x201D, [0x95] = 0x2022,
+  [0x96] = 0x2013, [0x97] = 0x2014, [0x98] = 0x02DC, [0x99] = 0x2122,
+  [0x9A] = 0x0161, [0x9B] = 0x203A, [0x9C] = 0x0153, [0x9D] = 0x009D,
+  [0x9E] = 0x017E, [0x9F] = 0x0178
 };
 
-static int parse_digit(int c, bool allow_hex) {
+static int CONST_FN parse_digit(int c, bool allow_hex) {
   if (c >= '0' && c <= '9') {
     return c - '0';
   }
@@ -99,8 +50,10 @@ static int parse_digit(int c, bool allow_hex) {
   return -1;
 }
 
-static void add_no_digit_error(
-    struct GumboInternalParser* parser, Utf8Iterator* input) {
+static void add_no_digit_error (
+  struct GumboInternalParser* parser,
+  Utf8Iterator* input
+) {
   GumboError* error = gumbo_add_error(parser);
   if (!error) {
     return;
@@ -109,9 +62,12 @@ static void add_no_digit_error(
   error->type = GUMBO_ERR_NUMERIC_CHAR_REF_NO_DIGITS;
 }
 
-static void add_codepoint_error(
-    struct GumboInternalParser* parser, Utf8Iterator* input,
-    GumboErrorType type, int codepoint) {
+static void add_codepoint_error (
+  struct GumboInternalParser* parser,
+  Utf8Iterator* input,
+  GumboErrorType type,
+  int codepoint
+) {
   GumboError* error = gumbo_add_error(parser);
   if (!error) {
     return;
@@ -121,9 +77,12 @@ static void add_codepoint_error(
   error->v.codepoint = codepoint;
 }
 
-static void add_named_reference_error(
-    struct GumboInternalParser* parser, Utf8Iterator* input,
-    GumboErrorType type, GumboStringPiece text) {
+static void add_named_reference_error (
+  struct GumboInternalParser* parser,
+  Utf8Iterator* input,
+  GumboErrorType type,
+  GumboStringPiece text
+) {
   GumboError* error = gumbo_add_error(parser);
   if (!error) {
     return;
@@ -133,17 +92,15 @@ static void add_named_reference_error(
   error->v.text = text;
 }
 
-static int maybe_replace_codepoint(int codepoint) {
-  for (int i = 0; kCharReplacements[i].from_char != -1; ++i) {
-    if (kCharReplacements[i].from_char == codepoint) {
-      return kCharReplacements[i].to_char;
-    }
-  }
-  return -1;
+static uint32_t PURE maybe_replace_codepoint(uint32_t codepoint) {
+  return (codepoint > 0x9F) ? 0x00 : kCharReplacements[codepoint];
 }
 
-static bool consume_numeric_ref(
-    struct GumboInternalParser* parser, Utf8Iterator* input, int* output) {
+static bool consume_numeric_ref (
+  struct GumboInternalParser* parser,
+  Utf8Iterator* input,
+  int* output
+) {
   utf8iterator_next(input);
   bool is_hex = false;
   int c = utf8iterator_current(input);
@@ -162,7 +119,7 @@ static bool consume_numeric_ref(
     return false;
   }
 
-  int codepoint = 0;
+  uint32_t codepoint = 0;
   bool status = true;
   do {
     codepoint = (codepoint * (is_hex ? 16 : 10)) + digit;
@@ -171,31 +128,47 @@ static bool consume_numeric_ref(
   } while (digit != -1);
 
   if (utf8iterator_current(input) != ';') {
-    add_codepoint_error(
-        parser, input, GUMBO_ERR_NUMERIC_CHAR_REF_WITHOUT_SEMICOLON, codepoint);
+    add_codepoint_error (
+      parser,
+      input,
+      GUMBO_ERR_NUMERIC_CHAR_REF_WITHOUT_SEMICOLON,
+      codepoint
+    );
     status = false;
   } else {
     utf8iterator_next(input);
   }
 
-  int replacement = maybe_replace_codepoint(codepoint);
-  if (replacement != -1) {
-    add_codepoint_error(
-        parser, input, GUMBO_ERR_NUMERIC_CHAR_REF_INVALID, codepoint);
+  uint32_t replacement = maybe_replace_codepoint(codepoint);
+  if (replacement != 0) {
+    add_codepoint_error (
+      parser,
+      input,
+      GUMBO_ERR_NUMERIC_CHAR_REF_INVALID,
+      codepoint
+    );
     *output = replacement;
     return false;
   }
 
   if ((codepoint >= 0xd800 && codepoint <= 0xdfff) || codepoint > 0x10ffff) {
-    add_codepoint_error(
-        parser, input, GUMBO_ERR_NUMERIC_CHAR_REF_INVALID, codepoint);
+    add_codepoint_error (
+      parser,
+      input,
+      GUMBO_ERR_NUMERIC_CHAR_REF_INVALID,
+      codepoint
+    );
     *output = 0xfffd;
     return false;
   }
 
-  if (utf8_is_invalid_code_point(codepoint) || codepoint == 0xb) {
-    add_codepoint_error(
-        parser, input, GUMBO_ERR_NUMERIC_CHAR_REF_INVALID, codepoint);
+  if (utf8_is_invalid_code_point(codepoint)) {
+    add_codepoint_error (
+      parser,
+      input,
+      GUMBO_ERR_NUMERIC_CHAR_REF_INVALID,
+      codepoint
+    );
     status = false;
     // But return it anyway, per spec.
   }
@@ -203,15 +176,19 @@ static bool consume_numeric_ref(
   return status;
 }
 
-static bool maybe_add_invalid_named_reference(
-    struct GumboInternalParser* parser, Utf8Iterator* input) {
+static bool maybe_add_invalid_named_reference (
+  struct GumboInternalParser* parser,
+  Utf8Iterator* input
+) {
   // The iterator will always be reset in this code path, so we don't need to
   // worry about consuming characters.
   const char* start = utf8iterator_get_char_pointer(input);
   int c = utf8iterator_current(input);
-  while ((c >= 'a' && c <= 'z') ||
-         (c >= 'A' && c <= 'Z') ||
-         (c >= '0' && c <= '9')) {
+  while (
+    (c >= 'a' && c <= 'z')
+    || (c >= 'A' && c <= 'Z')
+    || (c >= '0' && c <= '9')
+  ) {
     utf8iterator_next(input);
     c = utf8iterator_current(input);
   }
@@ -219,8 +196,12 @@ static bool maybe_add_invalid_named_reference(
     GumboStringPiece bad_ref;
     bad_ref.data = start;
     bad_ref.length = utf8iterator_get_char_pointer(input) - start;
-    add_named_reference_error(
-        parser, input, GUMBO_ERR_NAMED_CHAR_REF_INVALID, bad_ref);
+    add_named_reference_error (
+      parser,
+      input,
+      GUMBO_ERR_NAMED_CHAR_REF_INVALID,
+      bad_ref
+    );
     return false;
   }
   return true;
@@ -2464,13 +2445,30 @@ valid_named_ref := |*
 *|;
 }%%
 
-// clang-format off
 %% write data noerror nofinal;
-// clang-format on
 
-static bool consume_named_ref(
-    struct GumboInternalParser* parser, Utf8Iterator* input, bool is_in_attribute,
-    OneOrTwoCodepoints* output) {
+static const unsigned char ascii_alnum_table[256] = {
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //   0.. 15
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //  16.. 31
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //  32.. 47
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, //  48.. 63
+  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //  64.. 79
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, //  80.. 95
+  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //  96..111
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, // 112..127
+  // 128..255: implicitly zero
+};
+
+static inline bool PURE ascii_isalnum(unsigned char ch) {
+  return ascii_alnum_table[ch];
+}
+
+static bool consume_named_ref (
+  struct GumboInternalParser* parser,
+  Utf8Iterator* input,
+  bool is_in_attribute,
+  OneOrTwoCodepoints* output
+) {
   assert(output->first == kGumboNoChar);
   const char* p = utf8iterator_get_char_pointer(input);
   const char* pe = utf8iterator_get_end_pointer(input);
@@ -2479,7 +2477,6 @@ static bool consume_named_ref(
   const char *ts, *start;
   int cs, act;
 
-  // clang-format off
   %% write init;
   // Avoid unused variable warnings.
   (void) act;
@@ -2488,17 +2485,17 @@ static bool consume_named_ref(
 
   start = p;
   %% write exec;
-  // clang-format on
 
   if (cs >= %%{ write first_final; }%%) {
     assert(output->first != kGumboNoChar);
     char last_char = *(te - 1);
-    int len = te - start;
+    size_t len = te - start;
     if (last_char == ';') {
       bool matched = utf8iterator_maybe_consume_match(input, start, len, true);
       assert(matched);
+      UNUSED_IF_NDEBUG(matched);
       return true;
-    } else if (is_in_attribute && (*te == '=' || isalnum(*te))) {
+    } else if (is_in_attribute && (*te == '=' || ascii_isalnum(*te))) {
       output->first = kGumboNoChar;
       output->second = kGumboNoChar;
       utf8iterator_reset(input);
@@ -2507,10 +2504,15 @@ static bool consume_named_ref(
       GumboStringPiece bad_ref;
       bad_ref.length = te - start;
       bad_ref.data = start;
-      add_named_reference_error(
-          parser, input, GUMBO_ERR_NAMED_CHAR_REF_WITHOUT_SEMICOLON, bad_ref);
+      add_named_reference_error (
+        parser,
+        input,
+        GUMBO_ERR_NAMED_CHAR_REF_WITHOUT_SEMICOLON,
+        bad_ref
+      );
       bool matched = utf8iterator_maybe_consume_match(input, start, len, true);
       assert(matched);
+      UNUSED_IF_NDEBUG(matched);
       return false;
     }
   } else {
@@ -2522,10 +2524,13 @@ static bool consume_named_ref(
   }
 }
 
-bool consume_char_ref(
-    struct GumboInternalParser* parser, struct GumboInternalUtf8Iterator* input,
-    int additional_allowed_char, bool is_in_attribute,
-    OneOrTwoCodepoints* output) {
+bool gumbo_consume_char_ref (
+  struct GumboInternalParser* parser,
+  struct GumboInternalUtf8Iterator* input,
+  int additional_allowed_char,
+  bool is_in_attribute,
+  OneOrTwoCodepoints* output
+) {
   utf8iterator_mark(input);
   utf8iterator_next(input);
   int c = utf8iterator_current(input);
diff --git a/gumbo-parser/src/error.c b/gumbo-parser/src/error.c
index 25af6004..f3c50b12 100644
--- a/gumbo-parser/src/error.c
+++ b/gumbo-parser/src/error.c
@@ -1,157 +1,183 @@
-// Copyright 2010 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// Author: jdtang@google.com (Jonathan Tang)
+/*
+ Copyright 2010 Google Inc.
 
-#include "error.h"
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+    https://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
 
 #include <assert.h>
+#include <inttypes.h>
 #include <stdarg.h>
 #include <stdio.h>
 #include <string.h>
-
+#include "error.h"
 #include "gumbo.h"
+#include "macros.h"
 #include "parser.h"
 #include "string_buffer.h"
 #include "util.h"
 #include "vector.h"
 
-// Prints a formatted message to a StringBuffer.  This automatically resizes the
-// StringBuffer as necessary to fit the message.  Returns the number of bytes
+// Prints a formatted message to a StringBuffer. This automatically resizes the
+// StringBuffer as necessary to fit the message. Returns the number of bytes
 // written.
-static int print_message(
-    GumboParser* parser, GumboStringBuffer* output, const char* format, ...) {
+static int PRINTF(2) print_message (
+  GumboStringBuffer* output,
+  const char* format,
+  ...
+) {
   va_list args;
   int remaining_capacity = output->capacity - output->length;
   va_start(args, format);
-  int bytes_written = vsnprintf(
-      output->data + output->length, remaining_capacity, format, args);
+  int bytes_written = vsnprintf (
+    output->data + output->length,
+    remaining_capacity,
+    format,
+    args
+  );
   va_end(args);
 #ifdef _MSC_VER
   if (bytes_written == -1) {
     // vsnprintf returns -1 on MSVC++ if there's not enough capacity, instead of
     // returning the number of bytes that would've been written had there been
-    // enough.  In this case, we'll double the buffer size and hope it fits when
+    // enough. In this case, we'll double the buffer size and hope it fits when
     // we retry (letting it fail and returning 0 if it doesn't), since there's
     // no way to smartly resize the buffer.
-    gumbo_string_buffer_reserve(parser, output->capacity * 2, output);
+    gumbo_string_buffer_reserve(output->capacity * 2, output);
     va_start(args, format);
-    int result = vsnprintf(
-        output->data + output->length, remaining_capacity, format, args);
+    int result = vsnprintf (
+      output->data + output->length,
+      remaining_capacity,
+      format,
+      args
+    );
     va_end(args);
     return result == -1 ? 0 : result;
   }
 #else
-  // -1 in standard C99 indicates an encoding error.  Return 0 and do nothing.
+  // -1 in standard C99 indicates an encoding error. Return 0 and do nothing.
   if (bytes_written == -1) {
     return 0;
   }
 #endif
 
   if (bytes_written >= remaining_capacity) {
-    gumbo_string_buffer_reserve(
-        parser, output->capacity + bytes_written, output);
+    gumbo_string_buffer_reserve(output->capacity + bytes_written, output);
     remaining_capacity = output->capacity - output->length;
     va_start(args, format);
-    bytes_written = vsnprintf(
-        output->data + output->length, remaining_capacity, format, args);
+    bytes_written = vsnprintf (
+      output->data + output->length,
+      remaining_capacity,
+      format,
+      args
+    );
     va_end(args);
   }
   output->length += bytes_written;
   return bytes_written;
 }
 
-static void print_tag_stack(GumboParser* parser, const GumboParserError* error,
-    GumboStringBuffer* output) {
-  print_message(parser, output, "  Currently open tags: ");
+static void print_tag_stack (
+  const GumboParserError* error,
+  GumboStringBuffer* output
+) {
+  print_message(output, "  Currently open tags: ");
   for (unsigned int i = 0; i < error->tag_stack.length; ++i) {
     if (i) {
-      print_message(parser, output, ", ");
+      print_message(output, ", ");
     }
     GumboTag tag = (GumboTag) error->tag_stack.data[i];
-    print_message(parser, output, gumbo_normalized_tagname(tag));
+    print_message(output, "%s", gumbo_normalized_tagname(tag));
   }
-  gumbo_string_buffer_append_codepoint(parser, '.', output);
+  gumbo_string_buffer_append_codepoint('.', output);
 }
 
-static void handle_parser_error(GumboParser* parser,
-    const GumboParserError* error, GumboStringBuffer* output) {
-  if (error->parser_state == GUMBO_INSERTION_MODE_INITIAL &&
-      error->input_type != GUMBO_TOKEN_DOCTYPE) {
-    print_message(
-        parser, output, "The doctype must be the first token in the document");
+static void handle_parser_error (
+  const GumboParserError* error,
+  GumboStringBuffer* output
+) {
+  if (
+    error->parser_state == GUMBO_INSERTION_MODE_INITIAL
+    && error->input_type != GUMBO_TOKEN_DOCTYPE
+  ) {
+    print_message (
+      output,
+      "The doctype must be the first token in the document"
+    );
     return;
   }
 
   switch (error->input_type) {
     case GUMBO_TOKEN_DOCTYPE:
-      print_message(parser, output, "This is not a legal doctype");
+      print_message(output, "This is not a legal doctype");
       return;
     case GUMBO_TOKEN_COMMENT:
       // Should never happen; comments are always legal.
       assert(0);
       // But just in case...
-      print_message(parser, output, "Comments aren't legal here");
+      print_message(output, "Comments aren't legal here");
       return;
     case GUMBO_TOKEN_CDATA:
     case GUMBO_TOKEN_WHITESPACE:
     case GUMBO_TOKEN_CHARACTER:
-      print_message(parser, output, "Character tokens aren't legal here");
+      print_message(output, "Character tokens aren't legal here");
       return;
     case GUMBO_TOKEN_NULL:
-      print_message(parser, output, "Null bytes are not allowed in HTML5");
+      print_message(output, "Null bytes are not allowed in HTML5");
       return;
     case GUMBO_TOKEN_EOF:
       if (error->parser_state == GUMBO_INSERTION_MODE_INITIAL) {
-        print_message(parser, output, "You must provide a doctype");
+        print_message(output, "You must provide a doctype");
       } else {
-        print_message(parser, output, "Premature end of file");
-        print_tag_stack(parser, error, output);
+        print_message(output, "Premature end of file");
+        print_tag_stack(error, output);
       }
       return;
     case GUMBO_TOKEN_START_TAG:
     case GUMBO_TOKEN_END_TAG:
-      print_message(parser, output, "That tag isn't allowed here");
-      print_tag_stack(parser, error, output);
+      print_message(output, "That tag isn't allowed here");
+      print_tag_stack(error, output);
       // TODO(jdtang): Give more specific messaging.
       return;
   }
 }
 
 // Finds the preceding newline in an original source buffer from a given byte
-// location.  Returns a character pointer to the character after that, or a
+// location. Returns a character pointer to the character after that, or a
 // pointer to the beginning of the string if this is the first line.
-static const char* find_prev_newline(
-    const char* source_text, const char* error_location) {
+static const char* find_prev_newline (
+  const char* source_text,
+  const char* error_location
+) {
   assert(error_location >= source_text);
   const char* c = error_location;
   if (*c == '\n' && c != source_text)
     --c;
-  for (; c != source_text && *c != '\n'; --c)
-    ;
+  while (c != source_text && *c != '\n')
+    --c;
   return c == source_text ? c : c + 1;
 }
 
 // Finds the next newline in the original source buffer from a given byte
-// location.  Returns a character pointer to that newline, or a pointer to the
+// location. Returns a character pointer to that newline, or a pointer to the
 // terminating null byte if this is the last line.
 static const char* find_next_newline(
-    const char* source_text_end, const char* error_location) {
+  const char* source_text_end,
+  const char* error_location
+) {
   assert(error_location <= source_text_end);
   const char* c = error_location;
-  for (; c != source_text_end && *c != '\n'; ++c)
-    ;
+  while (c != source_text_end && *c != '\n')
+    ++c;
   return c;
 }
 
@@ -160,130 +186,176 @@ GumboError* gumbo_add_error(GumboParser* parser) {
   if (max_errors >= 0 && parser->_output->errors.length >= (unsigned int) max_errors) {
     return NULL;
   }
-  GumboError* error = gumbo_parser_allocate(parser, sizeof(GumboError));
-  gumbo_vector_add(parser, error, &parser->_output->errors);
+  GumboError* error = gumbo_alloc(sizeof(GumboError));
+  gumbo_vector_add(error, &parser->_output->errors);
   return error;
 }
 
-void gumbo_error_to_string(
-    GumboParser* parser, const GumboError* error, GumboStringBuffer* output) {
-  print_message(
-      parser, output, "@%d:%d: ", error->position.line, error->position.column);
+void gumbo_error_to_string (
+  const GumboError* error,
+  GumboStringBuffer* output
+) {
+  print_message (
+    output,
+    "@%zu:%zu: ",
+    error->position.line,
+    error->position.column
+  );
   switch (error->type) {
     case GUMBO_ERR_UTF8_INVALID:
-      print_message(
-          parser, output, "Invalid UTF8 character 0x%x", error->v.codepoint);
+      print_message (
+        output,
+        "Invalid UTF8 character 0x%" PRIx32,
+        error->v.codepoint
+      );
       break;
     case GUMBO_ERR_UTF8_TRUNCATED:
-      print_message(parser, output,
-          "Input stream ends with a truncated UTF8 character 0x%x",
-          error->v.codepoint);
+      print_message (
+        output,
+        "Input stream ends with a truncated UTF8 character 0x%" PRIx32,
+        error->v.codepoint
+      );
       break;
     case GUMBO_ERR_UTF8_NULL:
-      print_message(parser, output,
-          "Unexpected NULL character in the input stream");
+      print_message (
+        output,
+        "Unexpected NULL character in the input stream"
+      );
       break;
     case GUMBO_ERR_NUMERIC_CHAR_REF_NO_DIGITS:
-      print_message(
-          parser, output, "No digits after &# in numeric character reference");
+      print_message (
+        output,
+        "No digits after &# in numeric character reference"
+      );
       break;
     case GUMBO_ERR_NUMERIC_CHAR_REF_WITHOUT_SEMICOLON:
-      print_message(parser, output,
-          "The numeric character reference &#%d should be followed "
-          "by a semicolon",
-          error->v.codepoint);
+      print_message (
+        output,
+        "The numeric character reference &#%" PRIu32 " should be followed "
+        "by a semicolon",
+        error->v.codepoint
+      );
       break;
     case GUMBO_ERR_NUMERIC_CHAR_REF_INVALID:
-      print_message(parser, output,
-          "The numeric character reference &#%d; encodes an invalid "
-          "unicode codepoint",
-          error->v.codepoint);
+      print_message (
+        output,
+        "The numeric character reference &#%" PRIu32 "; encodes an invalid "
+        "unicode codepoint",
+        error->v.codepoint
+      );
       break;
     case GUMBO_ERR_NAMED_CHAR_REF_WITHOUT_SEMICOLON:
       // The textual data came from one of the literal strings in the table, and
       // so it'll be null-terminated.
-      print_message(parser, output,
-          "The named character reference &%.*s should be followed by a "
-          "semicolon",
-          (int) error->v.text.length, error->v.text.data);
+      print_message (
+        output,
+        "The named character reference &%.*s should be followed by a "
+        "semicolon",
+        (int) error->v.text.length,
+        error->v.text.data
+      );
       break;
     case GUMBO_ERR_NAMED_CHAR_REF_INVALID:
-      print_message(parser, output,
-          "The named character reference &%.*s; is not a valid entity name",
-          (int) error->v.text.length, error->v.text.data);
+      print_message (
+        output,
+        "The named character reference &%.*s; is not a valid entity name",
+        (int) error->v.text.length,
+        error->v.text.data
+      );
       break;
     case GUMBO_ERR_DUPLICATE_ATTR:
-      print_message(parser, output,
-          "Attribute %s occurs multiple times, at positions %d and %d",
-          error->v.duplicate_attr.name, error->v.duplicate_attr.original_index,
-          error->v.duplicate_attr.new_index);
+      print_message (
+        output,
+        "Attribute %s occurs multiple times, at positions %u and %u",
+        error->v.duplicate_attr.name,
+        error->v.duplicate_attr.original_index,
+        error->v.duplicate_attr.new_index
+      );
+      break;
+    case GUMBO_ERR_DASHES_OR_DOCTYPE:
+      print_message (
+        output,
+        "Incorrectly opened comment; expected '--', 'DOCTYPE', or '[CDATA['"
+      );
       break;
     case GUMBO_ERR_PARSER:
-    case GUMBO_ERR_UNACKNOWLEDGED_SELF_CLOSING_TAG:
-      handle_parser_error(parser, &error->v.parser, output);
+      handle_parser_error(&error->v.parser, output);
       break;
-    case GUMBO_ERR_DASHES_OR_DOCTYPE:
-      print_message(parser, output,
-          "Incorrectly opened comment; expected '--', 'DOCTYPE', or '[CDATA['");
+    case GUMBO_ERR_UNACKNOWLEDGED_SELF_CLOSING_TAG:
+    case GUMBO_ERR_SELF_CLOSING_END_TAG:
+      print_message (
+        output,
+        "Tag cannot be self-closing");
       break;
     default:
-      print_message(parser, output,
-          "Tokenizer error with an unimplemented error message");
+      print_message (
+        output,
+        "Tokenizer error with an unimplemented error message"
+      );
       break;
   }
-  gumbo_string_buffer_append_codepoint(parser, '.', output);
+  gumbo_string_buffer_append_codepoint('.', output);
 }
 
-void gumbo_caret_diagnostic_to_string(GumboParser* parser,
-    const GumboError* error, const char* source_text,
-    size_t length, GumboStringBuffer* output) {
-  gumbo_error_to_string(parser, error, output);
+void gumbo_caret_diagnostic_to_string (
+  const GumboError* error,
+  const char* source_text,
+  size_t source_length,
+  GumboStringBuffer* output
+) {
+  gumbo_error_to_string(error, output);
 
   const char* line_start = find_prev_newline(source_text, error->original_text);
-  const char* line_end = find_next_newline(source_text+length, error->original_text);
+  const char* line_end = find_next_newline(source_text + source_length, error->original_text);
   GumboStringPiece original_line;
   original_line.data = line_start;
   original_line.length = line_end - line_start;
 
-  gumbo_string_buffer_append_codepoint(parser, '\n', output);
-  gumbo_string_buffer_append_string(parser, &original_line, output);
-  gumbo_string_buffer_append_codepoint(parser, '\n', output);
-  gumbo_string_buffer_reserve(
-      parser, output->length + error->position.column, output);
-  int num_spaces = error->position.column - 1;
-  memset(output->data + output->length, ' ', num_spaces);
-  output->length += num_spaces;
-  gumbo_string_buffer_append_codepoint(parser, '^', output);
-  gumbo_string_buffer_append_codepoint(parser, '\n', output);
+  gumbo_string_buffer_append_codepoint('\n', output);
+  gumbo_string_buffer_append_string(&original_line, output);
+  gumbo_string_buffer_append_codepoint('\n', output);
+  gumbo_string_buffer_reserve(output->length + error->position.column, output);
+  if (error->position.column >= 2) {
+    size_t num_spaces = error->position.column - 1;
+    memset(output->data + output->length, ' ', num_spaces);
+    output->length += num_spaces;
+  }
+  gumbo_string_buffer_append_codepoint('^', output);
+  gumbo_string_buffer_append_codepoint('\n', output);
 }
 
-void gumbo_print_caret_diagnostic(
-    GumboParser* parser, const GumboError* error, const char* source_text,
-    size_t length) {
+void gumbo_print_caret_diagnostic (
+  const GumboError* error,
+  const char* source_text,
+  size_t source_length
+) {
   GumboStringBuffer text;
-  gumbo_string_buffer_init(parser, &text);
-  gumbo_caret_diagnostic_to_string(parser, error, source_text, length, &text);
+  gumbo_string_buffer_init(&text);
+  gumbo_caret_diagnostic_to_string(error, source_text, source_length, &text);
   printf("%.*s", (int) text.length, text.data);
-  gumbo_string_buffer_destroy(parser, &text);
+  gumbo_string_buffer_destroy(&text);
 }
 
-void gumbo_error_destroy(GumboParser* parser, GumboError* error) {
-  if (error->type == GUMBO_ERR_PARSER ||
-      error->type == GUMBO_ERR_UNACKNOWLEDGED_SELF_CLOSING_TAG) {
-    gumbo_vector_destroy(parser, &error->v.parser.tag_stack);
+void gumbo_error_destroy(GumboError* error) {
+  if (
+    error->type == GUMBO_ERR_PARSER
+    || error->type == GUMBO_ERR_UNACKNOWLEDGED_SELF_CLOSING_TAG
+    || error->type == GUMBO_ERR_SELF_CLOSING_END_TAG
+  ) {
+    gumbo_vector_destroy(&error->v.parser.tag_stack);
   } else if (error->type == GUMBO_ERR_DUPLICATE_ATTR) {
-    gumbo_parser_deallocate(parser, (void*) error->v.duplicate_attr.name);
+    gumbo_free((void*) error->v.duplicate_attr.name);
   }
-  gumbo_parser_deallocate(parser, error);
+  gumbo_free(error);
 }
 
 void gumbo_init_errors(GumboParser* parser) {
-  gumbo_vector_init(parser, 5, &parser->_output->errors);
+  gumbo_vector_init(5, &parser->_output->errors);
 }
 
 void gumbo_destroy_errors(GumboParser* parser) {
   for (unsigned int i = 0; i < parser->_output->errors.length; ++i) {
-    gumbo_error_destroy(parser, parser->_output->errors.data[i]);
+    gumbo_error_destroy(parser->_output->errors.data[i]);
   }
-  gumbo_vector_destroy(parser, &parser->_output->errors);
+  gumbo_vector_destroy(&parser->_output->errors);
 }
diff --git a/gumbo-parser/src/error.h b/gumbo-parser/src/error.h
index 344e57ee..839c71e0 100644
--- a/gumbo-parser/src/error.h
+++ b/gumbo-parser/src/error.h
@@ -1,26 +1,6 @@
-// Copyright 2010 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// Author: jdtang@google.com (Jonathan Tang)
-//
-// Error types, enums, and handling functions.
-
 #ifndef GUMBO_ERROR_H_
 #define GUMBO_ERROR_H_
-#ifdef _MSC_VER
-#define _CRT_SECURE_NO_WARNINGS
-#endif
+
 #include <stdint.h>
 
 #include "gumbo.h"
@@ -77,11 +57,12 @@ typedef enum {
   GUMBO_ERR_DOCTYPE_END,
   GUMBO_ERR_PARSER,
   GUMBO_ERR_UNACKNOWLEDGED_SELF_CLOSING_TAG,
+  GUMBO_ERR_SELF_CLOSING_END_TAG,
 } GumboErrorType;
 
 // Additional data for duplicated attributes.
 typedef struct GumboInternalDuplicateAttrError {
-  // The name of the attribute.  Owned by this struct.
+  // The name of the attribute. Owned by this struct.
   const char* name;
 
   // The (0-based) index within the attributes vector of the original
@@ -93,7 +74,7 @@ typedef struct GumboInternalDuplicateAttrError {
 } GumboDuplicateAttrError;
 
 // A simplified representation of the tokenizer state, designed to be more
-// useful to clients of this library than the internal representation.  This
+// useful to clients of this library than the internal representation. This
 // condenses the actual states used in the tokenizer state machine into a few
 // values that will be familiar to users of HTML.
 typedef enum {
@@ -129,20 +110,20 @@ typedef struct GumboInternalParserError {
   // The type of input token that resulted in this error.
   GumboTokenType input_type;
 
-  // The HTML tag of the input token.  TAG_UNKNOWN if this was not a tag token.
+  // The HTML tag of the input token. TAG_UNKNOWN if this was not a tag token.
   GumboTag input_tag;
 
   // The insertion mode that the parser was in at the time.
   GumboInsertionMode parser_state;
 
-  // The tag stack at the point of the error.  Note that this is an GumboVector
+  // The tag stack at the point of the error. Note that this is an GumboVector
   // of GumboTag's *stored by value* - cast the void* to an GumboTag directly to
   // get at the tag.
   GumboVector /* GumboTag */ tag_stack;
 } GumboParserError;
 
 // The overall error struct representing an error in decoding/tokenizing/parsing
-// the HTML.  This contains an enumerated type flag, a source position, and then
+// the HTML. This contains an enumerated type flag, a source position, and then
 // a union of fields containing data specific to the error.
 typedef struct GumboInternalError {
   // The type of error.
@@ -163,7 +144,7 @@ typedef struct GumboInternalError {
     // * GUMBO_ERR_UTF8_TRUNCATED
     // * GUMBO_ERR_NUMERIC_CHAR_REF_WITHOUT_SEMICOLON
     // * GUMBO_ERR_NUMERIC_CHAR_REF_INVALID
-    uint64_t codepoint;
+    uint32_t codepoint;
 
     // Tokenizer errors.
     GumboTokenizerError tokenizer;
@@ -183,7 +164,7 @@ typedef struct GumboInternalError {
 } GumboError;
 
 // Adds a new error to the parser's error list, and returns a pointer to it so
-// that clients can fill out the rest of its fields.  May return NULL if we're
+// that clients can fill out the rest of its fields. May return NULL if we're
 // already over the max_errors field specified in GumboOptions.
 GumboError* gumbo_add_error(struct GumboInternalParser* parser);
 
@@ -194,32 +175,36 @@ void gumbo_init_errors(struct GumboInternalParser* errors);
 void gumbo_destroy_errors(struct GumboInternalParser* errors);
 
 // Frees the memory used for a single GumboError.
-void gumbo_error_destroy(struct GumboInternalParser* parser, GumboError* error);
-
-// Prints an error to a string.  This fills an empty GumboStringBuffer with a
-// freshly-allocated buffer containing the error message text.  The caller is
-// responsible for deleting the buffer.  (Note that the buffer is allocated with
-// the allocator specified in the GumboParser config and hence should be freed
-// by gumbo_parser_deallocate().)
-void gumbo_error_to_string(struct GumboInternalParser* parser,
-    const GumboError* error, GumboStringBuffer* output);
-
-// Prints a caret diagnostic to a string.  This fills an empty GumboStringBuffer
-// with a freshly-allocated buffer containing the error message text.  The
-// caller is responsible for deleting the buffer.  (Note that the buffer is
-// allocated with the allocator specified in the GumboParser config and hence
-// should be freed by gumbo_parser_deallocate().)
-void gumbo_caret_diagnostic_to_string(struct GumboInternalParser* parser,
-    const GumboError* error, const char* source_text, size_t length,
-    GumboStringBuffer* output);
+void gumbo_error_destroy(GumboError* error);
+
+// Prints an error to a string. This fills an empty GumboStringBuffer with a
+// freshly-allocated buffer containing the error message text. The caller is
+// responsible for freeing the buffer.
+void gumbo_error_to_string (
+  const GumboError* error,
+  GumboStringBuffer* output
+);
+
+// Prints a caret diagnostic to a string. This fills an empty GumboStringBuffer
+// with a freshly-allocated buffer containing the error message text. The
+// caller is responsible for freeing the buffer.
+void gumbo_caret_diagnostic_to_string (
+  const GumboError* error,
+  const char* source_text,
+  size_t source_length,
+  GumboStringBuffer* output
+);
 
 // Like gumbo_caret_diagnostic_to_string, but prints the text to stdout instead
 // of writing to a string.
-void gumbo_print_caret_diagnostic(struct GumboInternalParser* parser,
-    const GumboError* error, const char* source_text, size_t length);
+void gumbo_print_caret_diagnostic (
+  const GumboError* error,
+  const char* source_text,
+  size_t source_length
+);
 
 #ifdef __cplusplus
 }
 #endif
 
-#endif  // GUMBO_ERROR_H_
+#endif // GUMBO_ERROR_H_
diff --git a/gumbo-parser/src/foreign_attrs.c b/gumbo-parser/src/foreign_attrs.c
new file mode 100644
index 00000000..7a77a188
--- /dev/null
+++ b/gumbo-parser/src/foreign_attrs.c
@@ -0,0 +1,104 @@
+/* ANSI-C code produced by gperf version 3.1 */
+/* Command-line: gperf -m100 -n lib/foreign_attrs.gperf  */
+/* Computed positions: -k'2,8' */
+/* Filtered by: mk/gperf-filter.sed */
+
+#include "replacement.h"
+#include "macros.h"
+#include <string.h>
+
+#define TOTAL_KEYWORDS 11
+#define MIN_WORD_LENGTH 5
+#define MAX_WORD_LENGTH 13
+#define MIN_HASH_VALUE 0
+#define MAX_HASH_VALUE 10
+/* maximum key range = 11, duplicates = 0 */
+
+static inline unsigned int
+hash (register const char *str, register size_t len)
+{
+  static const unsigned char asso_values[] =
+    {
+      11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+      11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+      11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+      11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+      11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+      11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+      11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+      11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+      11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+      11, 11, 11, 11, 11, 11, 11, 11, 11,  2,
+      11, 10, 11,  9,  7,  6, 11, 11,  1,  0,
+      11,  5, 11, 11,  4, 11, 11, 11, 11, 11,
+      11,  3, 11, 11, 11, 11, 11, 11, 11, 11,
+      11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+      11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+      11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+      11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+      11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+      11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+      11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+      11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+      11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+      11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+      11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+      11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+      11, 11, 11, 11, 11, 11
+    };
+  register unsigned int hval = 0;
+
+  switch (len)
+    {
+      default:
+        hval += asso_values[(unsigned char)str[7]];
+      /*FALLTHROUGH*/
+      case 7:
+      case 6:
+      case 5:
+      case 4:
+      case 3:
+      case 2:
+        hval += asso_values[(unsigned char)str[1]];
+        break;
+    }
+  return hval;
+}
+
+const ForeignAttrReplacement *
+gumbo_get_foreign_attr_replacement (register const char *str, register size_t len)
+{
+  static const unsigned char lengthtable[] =
+    {
+       5, 11,  9, 13, 10, 10, 10, 11, 10,  8,  8
+    };
+  static const ForeignAttrReplacement wordlist[] =
+    {
+      {"xmlns", "xmlns", GUMBO_ATTR_NAMESPACE_XMLNS},
+      {"xmlns:xlink", "xlink", GUMBO_ATTR_NAMESPACE_XMLNS},
+      {"xml:space", "space", GUMBO_ATTR_NAMESPACE_XML},
+      {"xlink:actuate", "actuate", GUMBO_ATTR_NAMESPACE_XLINK},
+      {"xlink:type", "type", GUMBO_ATTR_NAMESPACE_XLINK},
+      {"xlink:href", "href", GUMBO_ATTR_NAMESPACE_XLINK},
+      {"xlink:role", "role", GUMBO_ATTR_NAMESPACE_XLINK},
+      {"xlink:title", "title", GUMBO_ATTR_NAMESPACE_XLINK},
+      {"xlink:show", "show", GUMBO_ATTR_NAMESPACE_XLINK},
+      {"xml:lang", "lang", GUMBO_ATTR_NAMESPACE_XML},
+      {"xml:base", "base", GUMBO_ATTR_NAMESPACE_XML}
+    };
+
+  if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
+    {
+      register unsigned int key = hash (str, len);
+
+      if (key <= MAX_HASH_VALUE)
+        if (len == lengthtable[key])
+          {
+            register const char *s = wordlist[key].from;
+
+            if (s && *str == *s && !memcmp (str + 1, s + 1, len - 1))
+              return &wordlist[key];
+          }
+    }
+  return 0;
+}
diff --git a/gumbo-parser/src/foreign_attrs.gperf b/gumbo-parser/src/foreign_attrs.gperf
new file mode 100644
index 00000000..91a0aa40
--- /dev/null
+++ b/gumbo-parser/src/foreign_attrs.gperf
@@ -0,0 +1,27 @@
+%{
+#include "replacement.h"
+#include "macros.h"
+%}
+
+%struct-type
+%omit-struct-type
+%compare-lengths
+%readonly-tables
+%null-strings
+%includes
+%define lookup-function-name gumbo_get_foreign_attr_replacement
+%define slot-name from
+ForeignAttrReplacement;
+
+%%
+"xlink:actuate", "actuate", GUMBO_ATTR_NAMESPACE_XLINK
+"xlink:href", "href", GUMBO_ATTR_NAMESPACE_XLINK
+"xlink:role", "role", GUMBO_ATTR_NAMESPACE_XLINK
+"xlink:show", "show", GUMBO_ATTR_NAMESPACE_XLINK
+"xlink:title", "title", GUMBO_ATTR_NAMESPACE_XLINK
+"xlink:type", "type", GUMBO_ATTR_NAMESPACE_XLINK
+"xml:base", "base", GUMBO_ATTR_NAMESPACE_XML
+"xml:lang", "lang", GUMBO_ATTR_NAMESPACE_XML
+"xml:space", "space", GUMBO_ATTR_NAMESPACE_XML
+"xmlns", "xmlns", GUMBO_ATTR_NAMESPACE_XMLNS
+"xmlns:xlink", "xlink", GUMBO_ATTR_NAMESPACE_XMLNS
diff --git a/gumbo-parser/src/gumbo.h b/gumbo-parser/src/gumbo.h
index f45a583e..e575bce1 100644
--- a/gumbo-parser/src/gumbo.h
+++ b/gumbo-parser/src/gumbo.h
@@ -1,51 +1,33 @@
-// Copyright 2010 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// Author: jdtang@google.com (Jonathan Tang)
-//
-// We use Gumbo as a prefix for types, gumbo_ as a prefix for functions, and
-// GUMBO_ as a prefix for enum constants (static constants get the Google-style
-// kGumbo prefix).
+// Copyright 2010 Google Inc.
+// Copyright 2018 Craig Barnes.
+// Licensed under the Apache License, version 2.0.
+
+// We use Gumbo as a prefix for types, gumbo_ as a prefix for functions,
+// GUMBO_ as a prefix for enum constants and kGumbo as a prefix for
+// static constants
 
 /**
  * @file
  * @mainpage Gumbo HTML Parser
  *
- * This provides a conformant, no-dependencies implementation of the HTML5
- * parsing algorithm.  It supports only UTF8; if you need to parse a different
- * encoding, run a preprocessing step to convert to UTF8.  It returns a parse
- * tree made of the structs in this file.
+ * This provides a conformant, no-dependencies implementation of the
+ * [HTML5] parsing algorithm. It supports only UTF-8 -- if you need
+ * to parse a different encoding, run a preprocessing step to convert
+ * to UTF-8. It returns a parse tree made of the structs in this file.
  *
  * Example:
  * @code
  *    GumboOutput* output = gumbo_parse(input);
  *    do_something_with_doctype(output->document);
  *    do_something_with_html_tree(output->root);
- *    gumbo_destroy_output(&options, output);
+ *    gumbo_destroy_output(output);
  * @endcode
- * HTML5 Spec:
  *
- * http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html
+ * [HTML5]: https://html.spec.whatwg.org/multipage/
  */
 
-#ifndef GUMBO_GUMBO_H_
-#define GUMBO_GUMBO_H_
-
-#ifdef _MSC_VER
-#define _CRT_SECURE_NO_WARNINGS
-#define fileno _fileno
-#endif
+#ifndef GUMBO_H
+#define GUMBO_H
 
 #include <stdbool.h>
 #include <stddef.h>
@@ -55,41 +37,30 @@ extern "C" {
 #endif
 
 /**
- * A struct representing a character position within the original text buffer.
- * Line and column numbers are 1-based and offsets are 0-based, which matches
- * how most editors and command-line tools work.  Also, columns measure
- * positions in terms of characters while offsets measure by bytes; this is
- * because the offset field is often used to pull out a particular region of
- * text (which in most languages that bind to C implies pointer arithmetic on a
- * buffer of bytes), while the column field is often used to reference a
- * particular column on a printable display, which nowadays is usually UTF-8.
+ * A struct representing a character position within the original text
+ * buffer. Line and column numbers are 1-based and offsets are 0-based,
+ * which matches how most editors and command-line tools work.
  */
 typedef struct {
-  unsigned int line;
-  unsigned int column;
-  unsigned int offset;
+  size_t line;
+  size_t column;
+  size_t offset;
 } GumboSourcePosition;
 
 /**
- * A SourcePosition used for elements that have no source position, i.e.
- * parser-inserted elements.
- */
-extern const GumboSourcePosition kGumboEmptySourcePosition;
-
-/**
- * A struct representing a string or part of a string.  Strings within the
- * parser are represented by a char* and a length; the char* points into
- * an existing data buffer owned by some other code (often the original input).
- * GumboStringPieces are assumed (by convention) to be immutable, because they
- * may share data.  Use GumboStringBuffer if you need to construct a string.
- * Clients should assume that it is not NUL-terminated, and should always use
- * explicit lengths when manipulating them.
+ * A struct representing a string or part of a string. Strings within
+ * the parser are represented by a `char*` and a length; the `char*`
+ * points into an existing data buffer owned by some other code (often
+ * the original input). `GumboStringPiece`s are assumed (by convention)
+ * to be immutable, because they may share data. Clients should assume
+ * that it is not NUL-terminated and should always use explicit lengths
+ * when manipulating them.
  */
 typedef struct {
-  /** A pointer to the beginning of the string.  NULL iff length == 0. */
+  /** A pointer to the beginning of the string. `NULL` if `length == 0`. */
   const char* data;
 
-  /** The length of the string fragment, in bytes.  May be zero. */
+  /** The length of the string fragment, in bytes (may be zero). */
   size_t length;
 } GumboStringPiece;
 
@@ -97,31 +68,36 @@ typedef struct {
 extern const GumboStringPiece kGumboEmptyString;
 
 /**
- * Compares two GumboStringPieces, and returns true if they're equal or false
- * otherwise.
+ * Compares two `GumboStringPiece`s, and returns `true` if they're
+ * equal or `false` otherwise.
  */
-bool gumbo_string_equals(
-    const GumboStringPiece* str1, const GumboStringPiece* str2);
+bool gumbo_string_equals (
+  const GumboStringPiece* str1,
+  const GumboStringPiece* str2
+);
 
 /**
- * Compares two GumboStringPieces ignoring case, and returns true if they're
- * equal or false otherwise.
+ * Compares two `GumboStringPiece`s, ignoring case, and returns `true`
+ * if they're equal or `false` otherwise.
  */
-bool gumbo_string_equals_ignore_case(
-    const GumboStringPiece* str1, const GumboStringPiece* str2);
+bool gumbo_string_equals_ignore_case (
+  const GumboStringPiece* str1,
+  const GumboStringPiece* str2
+);
 
 /**
- * A simple vector implementation.  This stores a pointer to a data array and a
- * length.  All elements are stored as void*; client code must cast to the
- * appropriate type.  Overflows upon addition result in reallocation of the data
- * array, with the size doubling to maintain O(1) amortized cost.  There is no
- * removal function, as this isn't needed for any of the operations within this
- * library.  Iteration can be done through inspecting the structure directly in
- * a for-loop.
+ * A simple vector implementation. This stores a pointer to a data array
+ * and a length. All elements are stored as `void*`; client code must
+ * cast to the appropriate type. Overflows upon addition result in
+ * reallocation of the data array, with the size doubling to maintain
+ * `O(1)` amortized cost. There is no removal function, as this isn't
+ * needed for any of the operations within this library. Iteration can
+ * be done through inspecting the structure directly in a `for` loop.
  */
 typedef struct {
-  /** Data elements.  This points to a dynamically-allocated array of capacity
-   * elements, each a void* to the element itself.
+  /**
+   * Data elements. This points to a dynamically-allocated array of
+   * `capacity` elements, each a `void*` to the element itself.
    */
   void** data;
 
@@ -132,82 +108,229 @@ typedef struct {
   unsigned int capacity;
 } GumboVector;
 
-/** An empty (0-length, 0-capacity) GumboVector. */
+/** An empty (0-length, 0-capacity) `GumboVector`. */
 extern const GumboVector kGumboEmptyVector;
 
 /**
- * Returns the first index at which an element appears in this vector (testing
- * by pointer equality), or -1 if it never does.
+ * Returns the first index at which an element appears in this vector
+ * (testing by pointer equality), or `-1` if it never does.
  */
 int gumbo_vector_index_of(GumboVector* vector, const void* element);
 
 /**
- * An enum for all the tags defined in the HTML5 standard.  These correspond to
- * the tag names themselves.  Enum constants exist only for tags which appear in
- * the spec itself (or for tags with special handling in the SVG and MathML
- * namespaces); any other tags appear as GUMBO_TAG_UNKNOWN and the actual tag
- * name can be obtained through original_tag.
+ * An `enum` for all the tags defined in the HTML5 standard. These
+ * correspond to the tag names themselves. Enum constants exist only
+ * for tags that appear in the spec itself (or for tags with special
+ * handling in the SVG and MathML namespaces). Any other tags appear
+ * as `GUMBO_TAG_UNKNOWN` and the actual tag name can be obtained
+ * through `original_tag`.
  *
- * This is mostly for API convenience, so that clients of this library don't
- * need to perform a strcasecmp to find the normalized tag name.  It also has
- * efficiency benefits, by letting the parser work with enums instead of
- * strings.
+ * This is mostly for API convenience, so that clients of this library
+ * don't need to perform a `strcasecmp` to find the normalized tag
+ * name. It also has efficiency benefits, by letting the parser work
+ * with enums instead of strings.
  */
 typedef enum {
-// Load all the tags from an external source, generated from tag.in.
-#include "tag_enum.h"
-  // Used for all tags that don't have special handling in HTML.  Add new tags
-  // to the end of tag.in so as to preserve backwards-compatibility.
+  GUMBO_TAG_HTML,
+  GUMBO_TAG_HEAD,
+  GUMBO_TAG_TITLE,
+  GUMBO_TAG_BASE,
+  GUMBO_TAG_LINK,
+  GUMBO_TAG_META,
+  GUMBO_TAG_STYLE,
+  GUMBO_TAG_SCRIPT,
+  GUMBO_TAG_NOSCRIPT,
+  GUMBO_TAG_TEMPLATE,
+  GUMBO_TAG_BODY,
+  GUMBO_TAG_ARTICLE,
+  GUMBO_TAG_SECTION,
+  GUMBO_TAG_NAV,
+  GUMBO_TAG_ASIDE,
+  GUMBO_TAG_H1,
+  GUMBO_TAG_H2,
+  GUMBO_TAG_H3,
+  GUMBO_TAG_H4,
+  GUMBO_TAG_H5,
+  GUMBO_TAG_H6,
+  GUMBO_TAG_HGROUP,
+  GUMBO_TAG_HEADER,
+  GUMBO_TAG_FOOTER,
+  GUMBO_TAG_ADDRESS,
+  GUMBO_TAG_P,
+  GUMBO_TAG_HR,
+  GUMBO_TAG_PRE,
+  GUMBO_TAG_BLOCKQUOTE,
+  GUMBO_TAG_OL,
+  GUMBO_TAG_UL,
+  GUMBO_TAG_LI,
+  GUMBO_TAG_DL,
+  GUMBO_TAG_DT,
+  GUMBO_TAG_DD,
+  GUMBO_TAG_FIGURE,
+  GUMBO_TAG_FIGCAPTION,
+  GUMBO_TAG_MAIN,
+  GUMBO_TAG_DIV,
+  GUMBO_TAG_A,
+  GUMBO_TAG_EM,
+  GUMBO_TAG_STRONG,
+  GUMBO_TAG_SMALL,
+  GUMBO_TAG_S,
+  GUMBO_TAG_CITE,
+  GUMBO_TAG_Q,
+  GUMBO_TAG_DFN,
+  GUMBO_TAG_ABBR,
+  GUMBO_TAG_DATA,
+  GUMBO_TAG_TIME,
+  GUMBO_TAG_CODE,
+  GUMBO_TAG_VAR,
+  GUMBO_TAG_SAMP,
+  GUMBO_TAG_KBD,
+  GUMBO_TAG_SUB,
+  GUMBO_TAG_SUP,
+  GUMBO_TAG_I,
+  GUMBO_TAG_B,
+  GUMBO_TAG_U,
+  GUMBO_TAG_MARK,
+  GUMBO_TAG_RUBY,
+  GUMBO_TAG_RT,
+  GUMBO_TAG_RP,
+  GUMBO_TAG_BDI,
+  GUMBO_TAG_BDO,
+  GUMBO_TAG_SPAN,
+  GUMBO_TAG_BR,
+  GUMBO_TAG_WBR,
+  GUMBO_TAG_INS,
+  GUMBO_TAG_DEL,
+  GUMBO_TAG_IMAGE,
+  GUMBO_TAG_IMG,
+  GUMBO_TAG_IFRAME,
+  GUMBO_TAG_EMBED,
+  GUMBO_TAG_OBJECT,
+  GUMBO_TAG_PARAM,
+  GUMBO_TAG_VIDEO,
+  GUMBO_TAG_AUDIO,
+  GUMBO_TAG_SOURCE,
+  GUMBO_TAG_TRACK,
+  GUMBO_TAG_CANVAS,
+  GUMBO_TAG_MAP,
+  GUMBO_TAG_AREA,
+  GUMBO_TAG_MATH,
+  GUMBO_TAG_MI,
+  GUMBO_TAG_MO,
+  GUMBO_TAG_MN,
+  GUMBO_TAG_MS,
+  GUMBO_TAG_MTEXT,
+  GUMBO_TAG_MGLYPH,
+  GUMBO_TAG_MALIGNMARK,
+  GUMBO_TAG_ANNOTATION_XML,
+  GUMBO_TAG_SVG,
+  GUMBO_TAG_FOREIGNOBJECT,
+  GUMBO_TAG_DESC,
+  GUMBO_TAG_TABLE,
+  GUMBO_TAG_CAPTION,
+  GUMBO_TAG_COLGROUP,
+  GUMBO_TAG_COL,
+  GUMBO_TAG_TBODY,
+  GUMBO_TAG_THEAD,
+  GUMBO_TAG_TFOOT,
+  GUMBO_TAG_TR,
+  GUMBO_TAG_TD,
+  GUMBO_TAG_TH,
+  GUMBO_TAG_FORM,
+  GUMBO_TAG_FIELDSET,
+  GUMBO_TAG_LEGEND,
+  GUMBO_TAG_LABEL,
+  GUMBO_TAG_INPUT,
+  GUMBO_TAG_BUTTON,
+  GUMBO_TAG_SELECT,
+  GUMBO_TAG_DATALIST,
+  GUMBO_TAG_OPTGROUP,
+  GUMBO_TAG_OPTION,
+  GUMBO_TAG_TEXTAREA,
+  GUMBO_TAG_KEYGEN,
+  GUMBO_TAG_OUTPUT,
+  GUMBO_TAG_PROGRESS,
+  GUMBO_TAG_METER,
+  GUMBO_TAG_DETAILS,
+  GUMBO_TAG_SUMMARY,
+  GUMBO_TAG_MENU,
+  GUMBO_TAG_MENUITEM,
+  GUMBO_TAG_APPLET,
+  GUMBO_TAG_ACRONYM,
+  GUMBO_TAG_BGSOUND,
+  GUMBO_TAG_DIR,
+  GUMBO_TAG_FRAME,
+  GUMBO_TAG_FRAMESET,
+  GUMBO_TAG_NOFRAMES,
+  GUMBO_TAG_LISTING,
+  GUMBO_TAG_XMP,
+  GUMBO_TAG_NEXTID,
+  GUMBO_TAG_NOEMBED,
+  GUMBO_TAG_PLAINTEXT,
+  GUMBO_TAG_RB,
+  GUMBO_TAG_STRIKE,
+  GUMBO_TAG_BASEFONT,
+  GUMBO_TAG_BIG,
+  GUMBO_TAG_BLINK,
+  GUMBO_TAG_CENTER,
+  GUMBO_TAG_FONT,
+  GUMBO_TAG_MARQUEE,
+  GUMBO_TAG_MULTICOL,
+  GUMBO_TAG_NOBR,
+  GUMBO_TAG_SPACER,
+  GUMBO_TAG_TT,
+  GUMBO_TAG_RTC,
+  GUMBO_TAG_DIALOG,
+  // Used for all tags that don't have special handling in HTML.
   GUMBO_TAG_UNKNOWN,
   // A marker value to indicate the end of the enum, for iterating over it.
-  // Also used as the terminator for varargs functions that take tags.
   GUMBO_TAG_LAST,
 } GumboTag;
 
 /**
- * Returns the normalized (usually all-lowercased, except for foreign content)
- * tag name for an GumboTag enum.  Return value is static data owned by the
- * library.
+ * Returns the normalized (all lower case) tag name for a `GumboTag` enum. The
+ * return value is static data owned by the library.
  */
 const char* gumbo_normalized_tagname(GumboTag tag);
 
 /**
- * Extracts the tag name from the original_text field of an element or token by
- * stripping off </> characters and attributes and adjusting the passed-in
- * GumboStringPiece appropriately.  The tag name is in the original case and
- * shares a buffer with the original text, to simplify memory management.
- * Behavior is undefined if a string-piece that doesn't represent an HTML tag
- * (<tagname> or </tagname>) is passed in.  If the string piece is completely
- * empty (NULL data pointer), then this function will exit successfully as a
- * no-op.
+ * Extracts the tag name from the `original_text` field of an element
+ * or token by stripping off `</>` characters and attributes and
+ * adjusting the passed-in `GumboStringPiece` appropriately. The tag
+ * name is in the original case and shares a buffer with the original
+ * text, to simplify memory management. Behavior is undefined if a
+ * string piece that doesn't represent an HTML tag (`<tagname>` or
+ * `</tagname>`) is passed in. If the string piece is completely
+ * empty (`NULL` data pointer), then this function will exit
+ * successfully as a no-op.
  */
 void gumbo_tag_from_original_text(GumboStringPiece* text);
 
 /**
- * Fixes the case of SVG elements that are not all lowercase.
- * http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#parsing-main-inforeign
- * This is not done at parse time because there's no place to store a mutated
- * tag name.  tag_name is an enum (which will be TAG_UNKNOWN for most SVG tags
- * without special handling), while original_tag_name is a pointer into the
- * original buffer.  Instead, we provide this helper function that clients can
- * use to rename SVG tags as appropriate.
- * Returns the case-normalized SVG tagname if a replacement is found, or NULL if
- * no normalization is called for.  The return value is static data and owned by
- * the library.
+ * Fixes the case of SVG elements that are not all lowercase. This is
+ * not done at parse time because there's no place to store a mutated
+ * tag name. `tag_name` is an enum (which will be `TAG_UNKNOWN` for most
+ * SVG tags without special handling), while `original_tag_name` is a
+ * pointer into the original buffer. Instead, we provide this helper
+ * function that clients can use to rename SVG tags as appropriate.
+ * Returns the case-normalized SVG tagname if a replacement is found, or
+ * `NULL` if no normalization is called for. The return value is static
+ * data and owned by the library.
+ *
+ * @see https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inforeign
  */
 const char* gumbo_normalize_svg_tagname(const GumboStringPiece* tagname);
 
 /**
- * Converts a tag name string (which may be in upper or mixed case) to a tag
- * enum. The `tag` version expects `tagname` to be NULL-terminated
+ * Converts a tag name string (which may be in upper or mixed case) to a
+ * tag enum.
  */
-GumboTag gumbo_tag_enum(const char* tagname);
-GumboTag gumbo_tagn_enum(const char* tagname, unsigned int length);
+GumboTag gumbo_tagn_enum(const char* tagname, size_t length);
 
 /**
  * Attribute namespaces.
- * HTML includes special handling for XLink, XML, and XMLNS namespaces on
- * attributes.  Everything else goes in the generic "NONE" namespace.
+ * HTML includes special handling for XLink, XML, and XMLNS namespaces
+ * on attributes. Everything else goes in the generic "NONE" namespace.
  */
 typedef enum {
   GUMBO_ATTR_NAMESPACE_NONE,
@@ -217,46 +340,47 @@ typedef enum {
 } GumboAttributeNamespaceEnum;
 
 /**
- * A struct representing a single attribute on an HTML tag.  This is a
- * name-value pair, but also includes information about source locations and
- * original source text.
+ * A struct representing a single attribute on a HTML tag. This is a
+ * name-value pair, but also includes information about source locations
+ * and original source text.
  */
 typedef struct {
   /**
-   * The namespace for the attribute.  This will usually be
-   * GUMBO_ATTR_NAMESPACE_NONE, but some XLink/XMLNS/XML attributes take special
-   * values, per:
-   * http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#adjust-foreign-attributes
+   * The namespace for the attribute. This will usually be
+   * `GUMBO_ATTR_NAMESPACE_NONE`, but some XLink/XMLNS/XML attributes
+   * take special values, per:
+   * https://html.spec.whatwg.org/multipage/parsing.html#adjust-foreign-attributes
    */
   GumboAttributeNamespaceEnum attr_namespace;
 
   /**
-   * The name of the attribute.  This is in a freshly-allocated buffer to deal
-   * with case-normalization, and is null-terminated.
+   * The name of the attribute. This is in a freshly-allocated buffer to
+   * deal with case-normalization and is null-terminated.
    */
   const char* name;
 
   /**
-   * The original text of the attribute name, as a pointer into the original
-   * source buffer.
+   * The original text of the attribute name, as a pointer into the
+   * original source buffer.
    */
   GumboStringPiece original_name;
 
   /**
-   * The value of the attribute.  This is in a freshly-allocated buffer to deal
-   * with unescaping, and is null-terminated.  It does not include any quotes
-   * that surround the attribute.  If the attribute has no value (for example,
-   * 'selected' on a checkbox), this will be an empty string.
+   * The value of the attribute. This is in a freshly-allocated buffer
+   * to deal with unescaping and is null-terminated. It does not include
+   * any quotes that surround the attribute. If the attribute has no
+   * value (for example, `selected` on a checkbox) this will be an empty
+   * string.
    */
   const char* value;
 
   /**
-   * The original text of the value of the attribute.  This points into the
-   * original source buffer.  It includes any quotes that surround the
-   * attribute, and you can look at original_value.data[0] and
-   * original_value.data[original_value.length - 1] to determine what the quote
-   * characters were.  If the attribute has no value, this will be a 0-length
-   * string.
+   * The original text of the value of the attribute. This points into
+   * the original source buffer. It includes any quotes that surround
+   * the attribute and you can look at `original_value.data[0]` and
+   * `original_value.data[original_value.length - 1]` to determine what
+   * the quote characters were. If the attribute has no value this will
+   * be a 0-length string.
    */
   GumboStringPiece original_value;
 
@@ -264,9 +388,9 @@ typedef struct {
   GumboSourcePosition name_start;
 
   /**
-   * The ending position of the attribute name.  This is not always derivable
+   * The ending position of the attribute name. This is not always derivable
    * from the starting position of the value because of the possibility of
-   * whitespace around the = sign.
+   * whitespace around the `=` sign.
    */
   GumboSourcePosition name_end;
 
@@ -278,34 +402,37 @@ typedef struct {
 } GumboAttribute;
 
 /**
- * Given a vector of GumboAttributes, look up the one with the specified name
- * and return it, or NULL if no such attribute exists.  This uses a
- * case-insensitive match, as HTML is case-insensitive.
+ * Given a vector of `GumboAttribute`s, look up the one with the
+ * specified name and return it, or `NULL` if no such attribute exists.
+ * This uses a case-insensitive match, as HTML is case-insensitive.
  */
 GumboAttribute* gumbo_get_attribute(const GumboVector* attrs, const char* name);
 
 /**
- * Enum denoting the type of node.  This determines the type of the node.v
- * union.
+ * Enum denoting the type of node. This determines the type of the
+ * `node.v` union.
  */
 typedef enum {
-  /** Document node.  v will be a GumboDocument. */
+  /** Document node. `v` will be a `GumboDocument`. */
   GUMBO_NODE_DOCUMENT,
-  /** Element node.  v will be a GumboElement. */
+  /** Element node. `v` will be a `GumboElement`. */
   GUMBO_NODE_ELEMENT,
-  /** Text node.  v will be a GumboText. */
+  /** Text node. `v` will be a `GumboText`. */
   GUMBO_NODE_TEXT,
-  /** CDATA node. v will be a GumboText. */
+  /** CDATA node. `v` will be a `GumboText`. */
   GUMBO_NODE_CDATA,
-  /** Comment node.  v will be a GumboText, excluding comment delimiters. */
+  /** Comment node. `v` will be a `GumboText`, excluding comment delimiters. */
   GUMBO_NODE_COMMENT,
-  /** Text node, where all contents is whitespace.  v will be a GumboText. */
+  /** Text node, where all contents is whitespace. `v` will be a `GumboText`. */
   GUMBO_NODE_WHITESPACE,
-  /** Template node.  This is separate from GUMBO_NODE_ELEMENT because many
-   * client libraries will want to ignore the contents of template nodes, as
-   * the spec suggests.  Recursing on GUMBO_NODE_ELEMENT will do the right thing
-   * here, while clients that want to include template contents should also
-   * check for GUMBO_NODE_TEMPLATE.  v will be a GumboElement.  */
+  /**
+   * Template node. This is separate from `GUMBO_NODE_ELEMENT` because
+   * many client libraries will want to ignore the contents of template
+   * nodes, as the spec suggests. Recursing on `GUMBO_NODE_ELEMENT` will
+   * do the right thing here, while clients that want to include template
+   * contents should also check for `GUMBO_NODE_TEMPLATE`. `v` will be a
+   * `GumboElement`.
+   */
   GUMBO_NODE_TEMPLATE
 } GumboNodeType;
 
@@ -315,9 +442,7 @@ typedef enum {
  */
 typedef struct GumboInternalNode GumboNode;
 
-/**
- * http://www.whatwg.org/specs/web-apps/current-work/complete/dom.html#quirks-mode
- */
+/** https://dom.spec.whatwg.org/#concept-document-quirks */
 typedef enum {
   GUMBO_DOCTYPE_NO_QUIRKS,
   GUMBO_DOCTYPE_QUIRKS,
@@ -326,10 +451,11 @@ typedef enum {
 
 /**
  * Namespaces.
- * Unlike in X(HT)ML, namespaces in HTML5 are not denoted by a prefix.  Rather,
- * anything inside an <svg> tag is in the SVG namespace, anything inside the
- * <math> tag is in the MathML namespace, and anything else is inside the HTML
- * namespace.  No other namespaces are supported, so this can be an enum only.
+ * Unlike in X(HT)ML, namespaces in HTML5 are not denoted by a prefix.
+ * Rather, anything inside an `<svg>` tag is in the SVG namespace,
+ * anything inside the `<math>` tag is in the MathML namespace, and
+ * anything else is inside the HTML namespace. No other namespaces are
+ * supported, so this can be an `enum`.
  */
 typedef enum {
   GUMBO_NAMESPACE_HTML,
@@ -339,66 +465,70 @@ typedef enum {
 
 /**
  * Parse flags.
- * We track the reasons for parser insertion of nodes and store them in a
- * bitvector in the node itself.  This lets client code optimize out nodes that
- * are implied by the HTML structure of the document, or flag constructs that
- * may not be allowed by a style guide, or track the prevalence of incorrect or
- * tricky HTML code.
+ * We track the reasons for parser insertion of nodes and store them in
+ * a bitvector in the node itself. This lets client code optimize out
+ * nodes that are implied by the HTML structure of the document, or flag
+ * constructs that may not be allowed by a style guide, or track the
+ * prevalence of incorrect or tricky HTML code.
  */
 typedef enum {
   /**
-   * A normal node - both start and end tags appear in the source, nothing has
-   * been reparented.
+   * A normal node -- both start and end tags appear in the source,
+   * nothing has been reparented.
    */
   GUMBO_INSERTION_NORMAL = 0,
 
   /**
-   * A node inserted by the parser to fulfill some implicit insertion rule.
-   * This is usually set in addition to some other flag giving a more specific
-   * insertion reason; it's a generic catch-all term meaning "The start tag for
-   * this node did not appear in the document source".
+   * A node inserted by the parser to fulfill some implicit insertion
+   * rule. This is usually set in addition to some other flag giving a
+   * more specific insertion reason; it's a generic catch-all term
+   * meaning "The start tag for this node did not appear in the document
+   * source".
    */
   GUMBO_INSERTION_BY_PARSER = 1 << 0,
 
   /**
-   * A flag indicating that the end tag for this node did not appear in the
-   * document source.  Note that in some cases, you can still have
-   * parser-inserted nodes with an explicit end tag: for example, "Text</html>"
-   * has GUMBO_INSERTED_BY_PARSER set on the <html> node, but
-   * GUMBO_INSERTED_END_TAG_IMPLICITLY is unset, as the </html> tag actually
-   * exists.  This flag will be set only if the end tag is completely missing;
-   * in some cases, the end tag may be misplaced (eg. a </body> tag with text
-   * afterwards), which will leave this flag unset and require clients to
-   * inspect the parse errors for that case.
+   * A flag indicating that the end tag for this node did not appear in
+   * the document source. Note that in some cases, you can still have
+   * parser-inserted nodes with an explicit end tag. For example,
+   * `Text</html>` has `GUMBO_INSERTED_BY_PARSER` set on the `<html>`
+   * node, but `GUMBO_INSERTED_END_TAG_IMPLICITLY` is unset, as the
+   * `</html>` tag actually exists.
+   *
+   * This flag will be set only if the end tag is completely missing.
+   * In some cases, the end tag may be misplaced (e.g. a `</body>` tag
+   * with text afterwards), which will leave this flag unset and require
+   * clients to inspect the parse errors for that case.
    */
   GUMBO_INSERTION_IMPLICIT_END_TAG = 1 << 1,
 
   // Value 1 << 2 was for a flag that has since been removed.
 
   /**
-   * A flag for nodes that are inserted because their presence is implied by
-   * other tags, eg. <html>, <head>, <body>, <tbody>, etc.
+   * A flag for nodes that are inserted because their presence is
+   * implied by other tags, e.g. `<html>`, `<head>`, `<body>`,
+   * `<tbody>`, etc.
    */
   GUMBO_INSERTION_IMPLIED = 1 << 3,
 
   /**
-   * A flag for nodes that are converted from their end tag equivalents.  For
-   * example,  when no paragraph is open implies that the parser should
-   * create a  tag and immediately close it, while </br> means the same thing
-   * as .
+   * A flag for nodes that are converted from their end tag equivalents.
+   * For example, `` when no paragraph is open implies that the
+   * parser should create a `` tag and immediately close it, while
+   * `</br>` means the same thing as ``.
    */
   GUMBO_INSERTION_CONVERTED_FROM_END_TAG = 1 << 4,
 
-  /** A flag for nodes that are converted from the parse of an <isindex> tag. */
-  GUMBO_INSERTION_FROM_ISINDEX = 1 << 5,
+  // Value 1 << 5 was for a flag that has since been removed.
 
-  /** A flag for <image> tags that are rewritten as <img>. */
+  /** A flag for `<image>` tags that are rewritten as `<img>`. */
   GUMBO_INSERTION_FROM_IMAGE = 1 << 6,
 
   /**
-   * A flag for nodes that are cloned as a result of the reconstruction of
-   * active formatting elements.  This is set only on the clone; the initial
-   * portion of the formatting run is a NORMAL node with an IMPLICIT_END_TAG.
+   * A flag for nodes that are cloned as a result of the reconstruction
+   * of active formatting elements. This is set only on the clone; the
+   * initial portion of the formatting run is a NORMAL node with an
+   * `IMPLICIT_END_TAG`.
    */
   GUMBO_INSERTION_RECONSTRUCTED_FORMATTING_ELEMENT = 1 << 7,
 
@@ -415,18 +545,19 @@ typedef enum {
   GUMBO_INSERTION_FOSTER_PARENTED = 1 << 10,
 } GumboParseFlags;
 
-/**
- * Information specific to document nodes.
- */
+/** Information specific to document nodes. */
 typedef struct {
   /**
-   * An array of GumboNodes, containing the children of this element.  This will
-   * normally consist of the <html> element and any comment nodes found.
-   * Pointers are owned.
+   * An array of `GumboNode`s, containing the children of this element.
+   * This will normally consist of the `<html>` element and any comment
+   * nodes found. Pointers are owned.
    */
   GumboVector /* GumboNode* */ children;
 
-  // True if there was an explicit doctype token as opposed to it being omitted.
+  /**
+   * `true` if there was an explicit doctype token, as opposed to it
+   * being omitted.
+   */
   bool has_doctype;
 
   // Fields from the doctype token, copied verbatim.
@@ -435,65 +566,70 @@ typedef struct {
   const char* system_identifier;
 
   /**
-   * Whether or not the document is in QuirksMode, as determined by the values
-   * in the GumboTokenDocType template.
+   * Whether or not the document is in QuirksMode, as determined by the
+   * values in the GumboTokenDocType template.
    */
   GumboQuirksModeEnum doc_type_quirks_mode;
 } GumboDocument;
 
 /**
- * The struct used to represent TEXT, CDATA, COMMENT, and WHITESPACE elements.
- * This contains just a block of text and its position.
+ * The struct used to represent TEXT, CDATA, COMMENT, and WHITESPACE
+ * elements. This contains just a block of text and its position.
  */
 typedef struct {
   /**
-   * The text of this node, after entities have been parsed and decoded.  For
-   * comment/cdata nodes, this does not include the comment delimiters.
+   * The text of this node, after entities have been parsed and decoded.
+   * For comment and cdata nodes, this does not include the comment
+   * delimiters.
    */
   const char* text;
 
   /**
-   * The original text of this node, as a pointer into the original buffer.  For
-   * comment/cdata nodes, this includes the comment delimiters.
+   * The original text of this node, as a pointer into the original
+   * buffer. For comment/cdata nodes, this includes the comment
+   * delimiters.
    */
   GumboStringPiece original_text;
 
   /**
-   * The starting position of this node.  This corresponds to the position of
-   * original_text, before entities are decoded.
+   * The starting position of this node. This corresponds to the
+   * position of `original_text`, before entities are decoded.
    * */
   GumboSourcePosition start_pos;
 } GumboText;
 
 /**
- * The struct used to represent all HTML elements.  This contains information
- * about the tag, attributes, and child nodes.
+ * The struct used to represent all HTML elements. This contains
+ * information about the tag, attributes, and child nodes.
  */
 typedef struct {
   /**
-   * An array of GumboNodes, containing the children of this element.  Pointers
-   * are owned.
+   * An array of `GumboNode`s, containing the children of this element.
+   * Pointers are owned.
    */
   GumboVector /* GumboNode* */ children;
 
   /** The GumboTag enum for this element. */
   GumboTag tag;
 
+  /** The name for this element. */
+  const char* name;
+
   /** The GumboNamespaceEnum for this element. */
   GumboNamespaceEnum tag_namespace;
 
   /**
-   * A GumboStringPiece pointing to the original tag text for this element,
-   * pointing directly into the source buffer.  If the tag was inserted
-   * algorithmically (for example, <head> or <tbody> insertion), this will be a
-   * zero-length string.
+   * A `GumboStringPiece` pointing to the original tag text for this
+   * element, pointing directly into the source buffer. If the tag was
+   * inserted algorithmically (for example, `<head>` or `<tbody>`
+   * insertion), this will be a zero-length string.
    */
   GumboStringPiece original_tag;
 
   /**
-   * A GumboStringPiece pointing to the original end tag text for this element.
-   * If the end tag was inserted algorithmically, (for example, closing a
-   * self-closing tag), this will be a zero-length string.
+   * A `GumboStringPiece` pointing to the original end tag text for this
+   * element. If the end tag was inserted algorithmically, (for example,
+   * closing a self-closing tag), this will be a zero-length string.
    */
   GumboStringPiece original_end_tag;
 
@@ -504,30 +640,31 @@ typedef struct {
   GumboSourcePosition end_pos;
 
   /**
-   * An array of GumboAttributes, containing the attributes for this tag in the
-   * order that they were parsed.  Pointers are owned.
+   * An array of `GumboAttribute`s, containing the attributes for this
+   * tag in the order that they were parsed. Pointers are owned.
    */
   GumboVector /* GumboAttribute* */ attributes;
 } GumboElement;
 
 /**
- * A supertype for GumboElement and GumboText, so that we can include one
- * generic type in lists of children and cast as necessary to subtypes.
+ * A supertype for `GumboElement` and `GumboText`, so that we can
+ * include one generic type in lists of children and cast as necessary
+ * to subtypes.
  */
 struct GumboInternalNode {
   /** The type of node that this is. */
   GumboNodeType type;
 
-  /** Pointer back to parent node.  Not owned. */
+  /** Pointer back to parent node. Not owned. */
   GumboNode* parent;
 
   /** The index within the parent's children vector of this node. */
-  size_t index_within_parent;
+  unsigned int index_within_parent;
 
   /**
-   * A bitvector of flags containing information about why this element was
-   * inserted into the parse tree, including a variety of special parse
-   * situations.
+   * A bitvector of flags containing information about why this element
+   * was inserted into the parse tree, including a variety of special
+   * parse situations.
    */
   GumboParseFlags parse_flags;
 
@@ -539,81 +676,55 @@ struct GumboInternalNode {
   } v;
 };
 
-/**
- * The type for an allocator function.  Takes the 'userdata' member of the
- * GumboParser struct as its first argument.  Semantics should be the same as
- * malloc, i.e. return a block of size_t bytes on success or NULL on failure.
- * Allocating a block of 0 bytes behaves as per malloc.
- */
-// TODO(jdtang): Add checks throughout the codebase for out-of-memory condition.
-typedef void* (*GumboAllocatorFunction)(void* userdata, size_t size);
-
-/**
- * The type for a deallocator function.  Takes the 'userdata' member of the
- * GumboParser struct as its first argument.
- */
-typedef void (*GumboDeallocatorFunction)(void* userdata, void* ptr);
-
 /**
  * Input struct containing configuration options for the parser.
- * These let you specify alternate memory managers, provide different error
- * handling, etc.
- * Use kGumboDefaultOptions for sensible defaults, and only set what you need.
+ * These let you specify alternate memory managers, provide different
+ * error handling, etc. Use `kGumboDefaultOptions` for sensible
+ * defaults and only set what you need.
  */
 typedef struct GumboInternalOptions {
-  /** A memory allocator function.  Default: malloc. */
-  GumboAllocatorFunction allocator;
-
-  /** A memory deallocator function. Default: free. */
-  GumboDeallocatorFunction deallocator;
-
   /**
-   * An opaque object that's passed in as the first argument to all callbacks
-   * used by this library.  Default: NULL.
-   */
-  void* userdata;
-
-  /**
-   * The tab-stop size, for computing positions in source code that uses tabs.
-   * Default: 8.
+   * The tab-stop size, for computing positions in HTML files that
+   * use tabs. Default: `8`.
    */
   int tab_stop;
 
   /**
    * Whether or not to stop parsing when the first error is encountered.
-   * Default: false.
+   * Default: `false`.
    */
   bool stop_on_first_error;
 
   /**
-   * The maximum number of errors before the parser stops recording them.  This
-   * is provided so that if the page is totally borked, we don't completely fill
-   * up the errors vector and exhaust memory with useless redundant errors.  Set
-   * to -1 to disable the limit.
-   * Default: -1
+   * The maximum number of errors before the parser stops recording
+   * them. This is provided so that if the page is totally borked, we
+   * don't completely fill up the errors vector and exhaust memory with
+   * useless redundant errors. Set to `-1` to disable the limit.
+   * Default: `-1`.
    */
   int max_errors;
 
   /**
    * The fragment context for parsing:
-   * https://html.spec.whatwg.org/multipage/syntax.html#parsing-html-fragments
+   * https://html.spec.whatwg.org/multipage/parsing.html#parsing-html-fragments
    *
-   * If GUMBO_TAG_LAST is passed here, it is assumed to be "no fragment", i.e.
-   * the regular parsing algorithm.  Otherwise, pass the tag enum for the
-   * intended parent of the parsed fragment.  We use just the tag enum rather
-   * than a full node because that's enough to set all the parsing context we
-   * need, and it provides some additional flexibility for client code to act as
-   * if parsing a fragment even when a full HTML tree isn't available.
+   * If `GUMBO_TAG_LAST` is passed here, it is assumed to be "no
+   * fragment", i.e. the regular parsing algorithm. Otherwise, pass the
+   * tag enum for the intended parent of the parsed fragment. We use
+   * just the tag enum rather than a full node because that's enough to
+   * set all the parsing context we need and it provides some additional
+   * flexibility for client code to act as if parsing a fragment even
+   * when a full HTML tree isn't available.
    *
-   * Default: GUMBO_TAG_LAST
+   * Default: `GUMBO_TAG_LAST`.
    */
   GumboTag fragment_context;
 
   /**
-   * The namespace for the fragment context.  This lets client code
-   * differentiate between, say, parsing a <title> tag in SVG vs. parsing it in
-   * HTML.
-   * Default: GUMBO_NAMESPACE_HTML
+   * The namespace for the fragment context. This lets client code
+   * differentiate between, say, parsing a `<title>` tag in SVG vs.
+   * parsing it in HTML.
+   * Default: `GUMBO_NAMESPACE_HTML`.
    */
   GumboNamespaceEnum fragment_namespace;
 } GumboOptions;
@@ -621,51 +732,70 @@ typedef struct GumboInternalOptions {
 /** Default options struct; use this with gumbo_parse_with_options. */
 extern const GumboOptions kGumboDefaultOptions;
 
+typedef enum {
+  GUMBO_STATUS_OK,
+  GUMBO_STATUS_OUT_OF_MEMORY,
+  GUMBO_STATUS_TREE_TOO_DEEP
+} GumboOutputStatus;
+
+
 /** The output struct containing the results of the parse. */
 typedef struct GumboInternalOutput {
   /**
-   * Pointer to the document node.  This is a GumboNode of type NODE_DOCUMENT
-   * that contains the entire document as its child.
+   * Pointer to the document node. This is a `GumboNode` of type
+   * `NODE_DOCUMENT` that contains the entire document as its child.
    */
   GumboNode* document;
 
   /**
-   * Pointer to the root node.  This the <html> tag that forms the root of the
-   * document.
+   * Pointer to the root node. This is the `<html>` tag that forms the
+   * root of the document.
    */
   GumboNode* root;
 
   /**
    * A list of errors that occurred during the parse.
    * NOTE: In version 1.0 of this library, the API for errors hasn't been fully
-   * fleshed out and may change in the future.  For this reason, the GumboError
-   * header isn't part of the public API.  Contact us if you need errors
+   * fleshed out and may change in the future. For this reason, the GumboError
+   * header isn't part of the public API. Contact us if you need errors
    * reported so we can work out something appropriate for your use-case.
    */
   GumboVector /* GumboError */ errors;
+
+  /**
+   * A status code indicating whether parsing finished successfully or was
+   * stopped mid-document due to exceptional circumstances.
+   */
+  GumboOutputStatus status;
 } GumboOutput;
 
 /**
- * Parses a buffer of UTF8 text into an GumboNode parse tree.  The buffer must
- * live at least as long as the parse tree, as some fields (eg. original_text)
- * point directly into the original buffer.
+ * Parses a buffer of UTF-8 text into an `GumboNode` parse tree. The
+ * buffer must live at least as long as the parse tree, as some fields
+ * (eg. `original_text`) point directly into the original buffer.
  *
  * This doesn't support buffers longer than 4 gigabytes.
  */
 GumboOutput* gumbo_parse(const char* buffer);
 
 /**
- * Extended version of gumbo_parse that takes an explicit options structure,
- * buffer, and length.
+ * Extended version of `gumbo_parse` that takes an explicit options
+ * structure, buffer, and length.
  */
-GumboOutput* gumbo_parse_with_options(
-    const GumboOptions* options, const char* buffer, size_t buffer_length);
+GumboOutput* gumbo_parse_with_options (
+  const GumboOptions* options,
+  const char* buffer,
+  size_t buffer_length
+);
+
+/** Convert a `GumboOutputStatus` code into a readable description. */
+const char* gumbo_status_to_string(GumboOutputStatus status);
 
-/** Release the memory used for the parse tree & parse errors. */
-void gumbo_destroy_output(const GumboOptions* options, GumboOutput* output);
+/** Release the memory used for the parse tree and parse errors. */
+void gumbo_destroy_output(GumboOutput* output);
 
 #ifdef __cplusplus
 }
 #endif
 
-#endif  // GUMBO_GUMBO_H_
+#endif // GUMBO_H
diff --git a/gumbo-parser/src/insertion_mode.h b/gumbo-parser/src/insertion_mode.h
index 45134c13..6cb1d341 100644
--- a/gumbo-parser/src/insertion_mode.h
+++ b/gumbo-parser/src/insertion_mode.h
@@ -1,29 +1,9 @@
-// Copyright 2011 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// Author: jdtang@google.com (Jonathan Tang)
-
 #ifndef GUMBO_INSERTION_MODE_H_
 #define GUMBO_INSERTION_MODE_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// http://www.whatwg.org/specs/web-apps/current-work/complete/parsing.html#insertion-mode
-// If new enum values are added, be sure to update the kTokenHandlers dispatch
-// table in parser.c.
+// https://html.spec.whatwg.org/multipage/parsing.html#insertion-mode
+// If new enum values are added, be sure to update the kTokenHandlers
+// dispatch table in parser.c.
 typedef enum {
   GUMBO_INSERTION_MODE_INITIAL,
   GUMBO_INSERTION_MODE_BEFORE_HTML,
@@ -50,8 +30,4 @@ typedef enum {
   GUMBO_INSERTION_MODE_AFTER_AFTER_FRAMESET
 } GumboInsertionMode;
 
-#ifdef __cplusplus
-}  // extern C
-#endif
-
-#endif  // GUMBO_INSERTION_MODE_H_
+#endif // GUMBO_INSERTION_MODE_H_
diff --git a/gumbo-parser/src/macros.h b/gumbo-parser/src/macros.h
new file mode 100644
index 00000000..ccf8bb86
--- /dev/null
+++ b/gumbo-parser/src/macros.h
@@ -0,0 +1,91 @@
+#ifndef MACROS_H
+#define MACROS_H
+
+#if (!defined(__STDC_VERSION__) || !(__STDC_VERSION__ >= 199901L)) \
+    && !defined(_WIN32) && !defined(__cplusplus)
+# error C99 compiler required
+#endif
+
+#if defined(_WIN32)
+# define inline __inline
+# define __func__ __FUNCTION__
+#endif
+
+// Calculate the number of elements in an array.
+// The extra division on the third line is a trick to help prevent
+// passing a pointer to the first element of an array instead of a
+// reference to the array itself.
+#define ARRAY_COUNT(x) ( \
+    (sizeof(x) / sizeof((x)[0])) \
+    / ((size_t)(!(sizeof(x) % sizeof((x)[0])))) \
+)
+
+#ifdef NDEBUG
+    #define UNUSED_IF_NDEBUG(x) (void)(x)
+#else
+    #define UNUSED_IF_NDEBUG(x)
+#endif
+
+#ifdef __GNUC__
+    #define GNUC_AT_LEAST(major, minor) ( \
+        (__GNUC__ > major) \
+        || ((__GNUC__ == major) && (__GNUC_MINOR__ >= minor)) )
+#else
+    #define GNUC_AT_LEAST(major, minor) 0
+#endif
+
+#ifdef __has_attribute
+    #define HAS_ATTRIBUTE(x) __has_attribute(x)
+#else
+    #define HAS_ATTRIBUTE(x) 0
+#endif
+
+#if GNUC_AT_LEAST(3, 0) || HAS_ATTRIBUTE(unused) || defined(__TINYC__)
+    #define UNUSED __attribute__((__unused__))
+#else
+    #define UNUSED
+#endif
+
+#if GNUC_AT_LEAST(3, 0)
+    #define MALLOC __attribute__((__malloc__))
+    #define PRINTF(x) __attribute__((__format__(__printf__, (x), (x + 1))))
+    #define PURE __attribute__((__pure__))
+    #define CONST_FN __attribute__((__const__))
+#else
+    #define MALLOC
+    #define PRINTF(x)
+    #define PURE
+    #define CONST_FN
+#endif
+
+#define UNUSED_ARG(x) unused__ ## x UNUSED
+
+#if GNUC_AT_LEAST(3, 0) && defined(__OPTIMIZE__)
+    #define likely(x) __builtin_expect(!!(x), 1)
+    #define unlikely(x) __builtin_expect(!!(x), 0)
+#else
+    #define likely(x) (x)
+    #define unlikely(x) (x)
+#endif
+
+#if GNUC_AT_LEAST(3, 3) || HAS_ATTRIBUTE(nonnull)
+    #define NONNULL_ARGS __attribute__((__nonnull__))
+#else
+    #define NONNULL_ARGS
+#endif
+
+#if GNUC_AT_LEAST(3, 4) || HAS_ATTRIBUTE(warn_unused_result)
+    #define WARN_UNUSED_RESULT __attribute__((__warn_unused_result__))
+#else
+    #define WARN_UNUSED_RESULT
+#endif
+
+#if GNUC_AT_LEAST(5, 0) || HAS_ATTRIBUTE(returns_nonnull)
+    #define RETURNS_NONNULL __attribute__((__returns_nonnull__))
+#else
+    #define RETURNS_NONNULL
+#endif
+
+#define XMALLOC MALLOC RETURNS_NONNULL
+
+#endif // ndef MACROS_H
diff --git a/gumbo-parser/src/parser.c b/gumbo-parser/src/parser.c
index dc692b3e..41dc1595 100644
--- a/gumbo-parser/src/parser.c
+++ b/gumbo-parser/src/parser.c
@@ -1,310 +1,181 @@
-// Copyright 2010 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// Author: jdtang@google.com (Jonathan Tang)
+/*
+ Copyright 2017-2018 Craig Barnes.
+ Copyright 2010 Google Inc.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+    https://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
 
 #include <assert.h>
-#include <ctype.h>
 #include <stdarg.h>
+#include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
-#include <strings.h>
 
+#include "ascii.h"
 #include "attribute.h"
 #include "error.h"
 #include "gumbo.h"
 #include "insertion_mode.h"
+#include "macros.h"
 #include "parser.h"
+#include "replacement.h"
 #include "tokenizer.h"
 #include "tokenizer_states.h"
 #include "utf8.h"
 #include "util.h"
 #include "vector.h"
 
-#define AVOID_UNUSED_VARIABLE_WARNING(i) (void)(i)
-
-#define GUMBO_STRING(literal) \
-  { literal, sizeof(literal) - 1 }
-#define TERMINATOR \
-  { "", 0 }
-
-typedef char gumbo_tagset[GUMBO_TAG_LAST];
+typedef uint8_t TagSet[GUMBO_TAG_LAST + 1];
 #define TAG(tag) [GUMBO_TAG_##tag] = (1 << GUMBO_NAMESPACE_HTML)
 #define TAG_SVG(tag) [GUMBO_TAG_##tag] = (1 << GUMBO_NAMESPACE_SVG)
 #define TAG_MATHML(tag) [GUMBO_TAG_##tag] = (1 << GUMBO_NAMESPACE_MATHML)
 
-#define TAGSET_INCLUDES(tagset, namespace, tag) \
-  (tag < GUMBO_TAG_LAST && tagset[(int) tag] == (1 << (int) namespace))
-
-// selected forward declarations as it is getting hard to find
-// an appropriate order
-static bool node_html_tag_is(const GumboNode*, GumboTag);
-static GumboInsertionMode get_current_template_insertion_mode(
-    const GumboParser*);
-static bool handle_in_template(GumboParser*, GumboToken*);
-static void destroy_node(GumboParser*, GumboNode*);
-
-static void* malloc_wrapper(void* unused, size_t size) { return malloc(size); }
+static const GumboSourcePosition kGumboEmptySourcePosition = { \
+  .line = 0, \
+  .column = 0, \
+  .offset = 0 \
+};
 
-static void free_wrapper(void* unused, void* ptr) { free(ptr); }
+const GumboOptions kGumboDefaultOptions = {
+  .tab_stop = 8,
+  .stop_on_first_error = false,
+  .max_errors = -1,
+  .fragment_context = GUMBO_TAG_LAST,
+  .fragment_namespace = GUMBO_NAMESPACE_HTML
+};
 
-const GumboOptions kGumboDefaultOptions = {&malloc_wrapper, &free_wrapper, NULL,
-    8, false, -1, GUMBO_TAG_LAST, GUMBO_NAMESPACE_HTML};
+#define STRING(s) {.data = s, .length = sizeof(s) - 1}
+#define TERMINATOR {.data = "", .length = 0}
 
-static const GumboStringPiece kDoctypeHtml = GUMBO_STRING("html");
 static const GumboStringPiece kPublicIdHtml4_0 =
-    GUMBO_STRING("-//W3C//DTD HTML 4.0//EN");
+  STRING("-//W3C//DTD HTML 4.0//EN");
 static const GumboStringPiece kPublicIdHtml4_01 =
-    GUMBO_STRING("-//W3C//DTD HTML 4.01//EN");
+  STRING("-//W3C//DTD HTML 4.01//EN");
 static const GumboStringPiece kPublicIdXhtml1_0 =
-    GUMBO_STRING("-//W3C//DTD XHTML 1.0 Strict//EN");
+  STRING("-//W3C//DTD XHTML 1.0 Strict//EN");
 static const GumboStringPiece kPublicIdXhtml1_1 =
-    GUMBO_STRING("-//W3C//DTD XHTML 1.1//EN");
+  STRING("-//W3C//DTD XHTML 1.1//EN");
 static const GumboStringPiece kSystemIdRecHtml4_0 =
-    GUMBO_STRING("http://www.w3.org/TR/REC-html40/strict.dtd");
+  STRING("http://www.w3.org/TR/REC-html40/strict.dtd");
 static const GumboStringPiece kSystemIdHtml4 =
-    GUMBO_STRING("http://www.w3.org/TR/html4/strict.dtd");
+  STRING("http://www.w3.org/TR/html4/strict.dtd");
 static const GumboStringPiece kSystemIdXhtmlStrict1_1 =
-    GUMBO_STRING("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd");
+  STRING("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd");
 static const GumboStringPiece kSystemIdXhtml1_1 =
-    GUMBO_STRING("http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd");
+  STRING("http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd");
 static const GumboStringPiece kSystemIdLegacyCompat =
-    GUMBO_STRING("about:legacy-compat");
+  STRING("about:legacy-compat");
 
 // The doctype arrays have an explicit terminator because we want to pass them
 // to a helper function, and passing them as a pointer discards sizeof
-// information.  The SVG arrays are used only by one-off functions, and so loops
+// information. The SVG arrays are used only by one-off functions, and so loops
 // over them use sizeof directly instead of a terminator.
 
 static const GumboStringPiece kQuirksModePublicIdPrefixes[] = {
-    GUMBO_STRING("+//Silmaril//dtd html Pro v0r11 19970101//"),
-    GUMBO_STRING("-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//"),
-    GUMBO_STRING("-//AS//DTD HTML 3.0 asWedit + extensions//"),
-    GUMBO_STRING("-//IETF//DTD HTML 2.0 Level 1//"),
-    GUMBO_STRING("-//IETF//DTD HTML 2.0 Level 2//"),
-    GUMBO_STRING("-//IETF//DTD HTML 2.0 Strict Level 1//"),
-    GUMBO_STRING("-//IETF//DTD HTML 2.0 Strict Level 2//"),
-    GUMBO_STRING("-//IETF//DTD HTML 2.0 Strict//"),
-    GUMBO_STRING("-//IETF//DTD HTML 2.0//"),
-    GUMBO_STRING("-//IETF//DTD HTML 2.1E//"),
-    GUMBO_STRING("-//IETF//DTD HTML 3.0//"),
-    GUMBO_STRING("-//IETF//DTD HTML 3.2 Final//"),
-    GUMBO_STRING("-//IETF//DTD HTML 3.2//"),
-    GUMBO_STRING("-//IETF//DTD HTML 3//"),
-    GUMBO_STRING("-//IETF//DTD HTML Level 0//"),
-    GUMBO_STRING("-//IETF//DTD HTML Level 1//"),
-    GUMBO_STRING("-//IETF//DTD HTML Level 2//"),
-    GUMBO_STRING("-//IETF//DTD HTML Level 3//"),
-    GUMBO_STRING("-//IETF//DTD HTML Strict Level 0//"),
-    GUMBO_STRING("-//IETF//DTD HTML Strict Level 1//"),
-    GUMBO_STRING("-//IETF//DTD HTML Strict Level 2//"),
-    GUMBO_STRING("-//IETF//DTD HTML Strict Level 3//"),
-    GUMBO_STRING("-//IETF//DTD HTML Strict//"),
-    GUMBO_STRING("-//IETF//DTD HTML//"),
-    GUMBO_STRING("-//Metrius//DTD Metrius Presentational//"),
-    GUMBO_STRING("-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//"),
-    GUMBO_STRING("-//Microsoft//DTD Internet Explorer 2.0 HTML//"),
-    GUMBO_STRING("-//Microsoft//DTD Internet Explorer 2.0 Tables//"),
-    GUMBO_STRING("-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//"),
-    GUMBO_STRING("-//Microsoft//DTD Internet Explorer 3.0 HTML//"),
-    GUMBO_STRING("-//Microsoft//DTD Internet Explorer 3.0 Tables//"),
-    GUMBO_STRING("-//Netscape Comm. Corp.//DTD HTML//"),
-    GUMBO_STRING("-//Netscape Comm. Corp.//DTD Strict HTML//"),
-    GUMBO_STRING("-//O'Reilly and Associates//DTD HTML 2.0//"),
-    GUMBO_STRING("-//O'Reilly and Associates//DTD HTML Extended 1.0//"),
-    GUMBO_STRING("-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//"),
-    GUMBO_STRING(
-        "-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::)"
-        "extensions to HTML 4.0//"),
-    GUMBO_STRING(
-        "-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::"
-        "extensions to HTML 4.0//"),
-    GUMBO_STRING("-//Spyglass//DTD HTML 2.0 Extended//"),
-    GUMBO_STRING("-//SQ//DTD HTML 2.0 HoTMetaL + extensions//"),
-    GUMBO_STRING("-//Sun Microsystems Corp.//DTD HotJava HTML//"),
-    GUMBO_STRING("-//Sun Microsystems Corp.//DTD HotJava Strict HTML//"),
-    GUMBO_STRING("-//W3C//DTD HTML 3 1995-03-24//"),
-    GUMBO_STRING("-//W3C//DTD HTML 3.2 Draft//"),
-    GUMBO_STRING("-//W3C//DTD HTML 3.2 Final//"),
-    GUMBO_STRING("-//W3C//DTD HTML 3.2//"),
-    GUMBO_STRING("-//W3C//DTD HTML 3.2S Draft//"),
-    GUMBO_STRING("-//W3C//DTD HTML 4.0 Frameset//"),
-    GUMBO_STRING("-//W3C//DTD HTML 4.0 Transitional//"),
-    GUMBO_STRING("-//W3C//DTD HTML Experimental 19960712//"),
-    GUMBO_STRING("-//W3C//DTD HTML Experimental 970421//"),
-    GUMBO_STRING("-//W3C//DTD W3 HTML//"),
-    GUMBO_STRING("-//W3O//DTD W3 HTML 3.0//"),
-    GUMBO_STRING("-//WebTechs//DTD Mozilla HTML 2.0//"),
-    GUMBO_STRING("-//WebTechs//DTD Mozilla HTML//"), TERMINATOR};
+  STRING("+//Silmaril//dtd html Pro v0r11 19970101//"),
+  STRING("-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//"),
+  STRING("-//AS//DTD HTML 3.0 asWedit + extensions//"),
+  STRING("-//IETF//DTD HTML 2.0 Level 1//"),
+  STRING("-//IETF//DTD HTML 2.0 Level 2//"),
+  STRING("-//IETF//DTD HTML 2.0 Strict Level 1//"),
+  STRING("-//IETF//DTD HTML 2.0 Strict Level 2//"),
+  STRING("-//IETF//DTD HTML 2.0 Strict//"),
+  STRING("-//IETF//DTD HTML 2.0//"),
+  STRING("-//IETF//DTD HTML 2.1E//"),
+  STRING("-//IETF//DTD HTML 3.0//"),
+  STRING("-//IETF//DTD HTML 3.2 Final//"),
+  STRING("-//IETF//DTD HTML 3.2//"),
+  STRING("-//IETF//DTD HTML 3//"),
+  STRING("-//IETF//DTD HTML Level 0//"),
+  STRING("-//IETF//DTD HTML Level 1//"),
+  STRING("-//IETF//DTD HTML Level 2//"),
+  STRING("-//IETF//DTD HTML Level 3//"),
+  STRING("-//IETF//DTD HTML Strict Level 0//"),
+  STRING("-//IETF//DTD HTML Strict Level 1//"),
+  STRING("-//IETF//DTD HTML Strict Level 2//"),
+  STRING("-//IETF//DTD HTML Strict Level 3//"),
+  STRING("-//IETF//DTD HTML Strict//"),
+  STRING("-//IETF//DTD HTML//"),
+  STRING("-//Metrius//DTD Metrius Presentational//"),
+  STRING("-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//"),
+  STRING("-//Microsoft//DTD Internet Explorer 2.0 HTML//"),
+  STRING("-//Microsoft//DTD Internet Explorer 2.0 Tables//"),
+  STRING("-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//"),
+  STRING("-//Microsoft//DTD Internet Explorer 3.0 HTML//"),
+  STRING("-//Microsoft//DTD Internet Explorer 3.0 Tables//"),
+  STRING("-//Netscape Comm. Corp.//DTD HTML//"),
+  STRING("-//Netscape Comm. Corp.//DTD Strict HTML//"),
+  STRING("-//O'Reilly and Associates//DTD HTML 2.0//"),
+  STRING("-//O'Reilly and Associates//DTD HTML Extended 1.0//"),
+  STRING("-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//"),
+  STRING(
+    "-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::)"
+    "extensions to HTML 4.0//"),
+  STRING(
+    "-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::"
+    "extensions to HTML 4.0//"),
+  STRING("-//Spyglass//DTD HTML 2.0 Extended//"),
+  STRING("-//SQ//DTD HTML 2.0 HoTMetaL + extensions//"),
+  STRING("-//Sun Microsystems Corp.//DTD HotJava HTML//"),
+  STRING("-//Sun Microsystems Corp.//DTD HotJava Strict HTML//"),
+  STRING("-//W3C//DTD HTML 3 1995-03-24//"),
+  STRING("-//W3C//DTD HTML 3.2 Draft//"),
+  STRING("-//W3C//DTD HTML 3.2 Final//"),
+  STRING("-//W3C//DTD HTML 3.2//"),
+  STRING("-//W3C//DTD HTML 3.2S Draft//"),
+  STRING("-//W3C//DTD HTML 4.0 Frameset//"),
+  STRING("-//W3C//DTD HTML 4.0 Transitional//"),
+  STRING("-//W3C//DTD HTML Experimental 19960712//"),
+  STRING("-//W3C//DTD HTML Experimental 970421//"),
+  STRING("-//W3C//DTD W3 HTML//"),
+  STRING("-//W3O//DTD W3 HTML 3.0//"),
+  STRING("-//WebTechs//DTD Mozilla HTML 2.0//"),
+  STRING("-//WebTechs//DTD Mozilla HTML//"),
+  TERMINATOR
+};
 
 static const GumboStringPiece kQuirksModePublicIdExactMatches[] = {
-    GUMBO_STRING("-//W3O//DTD W3 HTML Strict 3.0//EN//"),
-    GUMBO_STRING("-/W3C/DTD HTML 4.0 Transitional/EN"), GUMBO_STRING("HTML"),
-    TERMINATOR};
+  STRING("-//W3O//DTD W3 HTML Strict 3.0//EN//"),
+  STRING("-/W3C/DTD HTML 4.0 Transitional/EN"),
+  STRING("HTML"),
+  TERMINATOR
+};
 
 static const GumboStringPiece kQuirksModeSystemIdExactMatches[] = {
-    GUMBO_STRING("http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"),
-    TERMINATOR};
+  STRING("http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"),
+  TERMINATOR
+};
 
 static const GumboStringPiece kLimitedQuirksPublicIdPrefixes[] = {
-    GUMBO_STRING("-//W3C//DTD XHTML 1.0 Frameset//"),
-    GUMBO_STRING("-//W3C//DTD XHTML 1.0 Transitional//"), TERMINATOR};
-
-static const GumboStringPiece kLimitedQuirksRequiresSystemIdPublicIdPrefixes[] =
-    {GUMBO_STRING("-//W3C//DTD HTML 4.01 Frameset//"),
-        GUMBO_STRING("-//W3C//DTD HTML 4.01 Transitional//"), TERMINATOR};
-
-// Indexed by GumboNamespaceEnum; keep in sync with that.
-static const char* kLegalXmlns[] = {"http://www.w3.org/1999/xhtml",
-    "http://www.w3.org/2000/svg", "http://www.w3.org/1998/Math/MathML"};
-
-typedef struct _ReplacementEntry {
-  const GumboStringPiece from;
-  const GumboStringPiece to;
-} ReplacementEntry;
-
-#define REPLACEMENT_ENTRY(from, to) \
-  { GUMBO_STRING(from), GUMBO_STRING(to) }
-
-// Static data for SVG attribute replacements.
-// https://html.spec.whatwg.org/multipage/syntax.html#creating-and-inserting-nodes
-static const ReplacementEntry kSvgAttributeReplacements[] = {
-    REPLACEMENT_ENTRY("attributename", "attributeName"),
-    REPLACEMENT_ENTRY("attributetype", "attributeType"),
-    REPLACEMENT_ENTRY("basefrequency", "baseFrequency"),
-    REPLACEMENT_ENTRY("baseprofile", "baseProfile"),
-    REPLACEMENT_ENTRY("calcmode", "calcMode"),
-    REPLACEMENT_ENTRY("clippathunits", "clipPathUnits"),
-    // REPLACEMENT_ENTRY("contentscripttype", "contentScriptType"),
-    // REPLACEMENT_ENTRY("contentstyletype", "contentStyleType"),
-    REPLACEMENT_ENTRY("diffuseconstant", "diffuseConstant"),
-    REPLACEMENT_ENTRY("edgemode", "edgeMode"),
-    // REPLACEMENT_ENTRY("externalresourcesrequired",
-    // "externalResourcesRequired"),
-    // REPLACEMENT_ENTRY("filterres", "filterRes"),
-    REPLACEMENT_ENTRY("filterunits", "filterUnits"),
-    REPLACEMENT_ENTRY("glyphref", "glyphRef"),
-    REPLACEMENT_ENTRY("gradienttransform", "gradientTransform"),
-    REPLACEMENT_ENTRY("gradientunits", "gradientUnits"),
-    REPLACEMENT_ENTRY("kernelmatrix", "kernelMatrix"),
-    REPLACEMENT_ENTRY("kernelunitlength", "kernelUnitLength"),
-    REPLACEMENT_ENTRY("keypoints", "keyPoints"),
-    REPLACEMENT_ENTRY("keysplines", "keySplines"),
-    REPLACEMENT_ENTRY("keytimes", "keyTimes"),
-    REPLACEMENT_ENTRY("lengthadjust", "lengthAdjust"),
-    REPLACEMENT_ENTRY("limitingconeangle", "limitingConeAngle"),
-    REPLACEMENT_ENTRY("markerheight", "markerHeight"),
-    REPLACEMENT_ENTRY("markerunits", "markerUnits"),
-    REPLACEMENT_ENTRY("markerwidth", "markerWidth"),
-    REPLACEMENT_ENTRY("maskcontentunits", "maskContentUnits"),
-    REPLACEMENT_ENTRY("maskunits", "maskUnits"),
-    REPLACEMENT_ENTRY("numoctaves", "numOctaves"),
-    REPLACEMENT_ENTRY("pathlength", "pathLength"),
-    REPLACEMENT_ENTRY("patterncontentunits", "patternContentUnits"),
-    REPLACEMENT_ENTRY("patterntransform", "patternTransform"),
-    REPLACEMENT_ENTRY("patternunits", "patternUnits"),
-    REPLACEMENT_ENTRY("pointsatx", "pointsAtX"),
-    REPLACEMENT_ENTRY("pointsaty", "pointsAtY"),
-    REPLACEMENT_ENTRY("pointsatz", "pointsAtZ"),
-    REPLACEMENT_ENTRY("preservealpha", "preserveAlpha"),
-    REPLACEMENT_ENTRY("preserveaspectratio", "preserveAspectRatio"),
-    REPLACEMENT_ENTRY("primitiveunits", "primitiveUnits"),
-    REPLACEMENT_ENTRY("refx", "refX"), REPLACEMENT_ENTRY("refy", "refY"),
-    REPLACEMENT_ENTRY("repeatcount", "repeatCount"),
-    REPLACEMENT_ENTRY("repeatdur", "repeatDur"),
-    REPLACEMENT_ENTRY("requiredextensions", "requiredExtensions"),
-    REPLACEMENT_ENTRY("requiredfeatures", "requiredFeatures"),
-    REPLACEMENT_ENTRY("specularconstant", "specularConstant"),
-    REPLACEMENT_ENTRY("specularexponent", "specularExponent"),
-    REPLACEMENT_ENTRY("spreadmethod", "spreadMethod"),
-    REPLACEMENT_ENTRY("startoffset", "startOffset"),
-    REPLACEMENT_ENTRY("stddeviation", "stdDeviation"),
-    REPLACEMENT_ENTRY("stitchtiles", "stitchTiles"),
-    REPLACEMENT_ENTRY("surfacescale", "surfaceScale"),
-    REPLACEMENT_ENTRY("systemlanguage", "systemLanguage"),
-    REPLACEMENT_ENTRY("tablevalues", "tableValues"),
-    REPLACEMENT_ENTRY("targetx", "targetX"),
-    REPLACEMENT_ENTRY("targety", "targetY"),
-    REPLACEMENT_ENTRY("textlength", "textLength"),
-    REPLACEMENT_ENTRY("viewbox", "viewBox"),
-    REPLACEMENT_ENTRY("viewtarget", "viewTarget"),
-    REPLACEMENT_ENTRY("xchannelselector", "xChannelSelector"),
-    REPLACEMENT_ENTRY("ychannelselector", "yChannelSelector"),
-    REPLACEMENT_ENTRY("zoomandpan", "zoomAndPan"),
+  STRING("-//W3C//DTD XHTML 1.0 Frameset//"),
+  STRING("-//W3C//DTD XHTML 1.0 Transitional//"),
+  TERMINATOR
 };
 
-static const ReplacementEntry kSvgTagReplacements[] = {
-    REPLACEMENT_ENTRY("altglyph", "altGlyph"),
-    REPLACEMENT_ENTRY("altglyphdef", "altGlyphDef"),
-    REPLACEMENT_ENTRY("altglyphitem", "altGlyphItem"),
-    REPLACEMENT_ENTRY("animatecolor", "animateColor"),
-    REPLACEMENT_ENTRY("animatemotion", "animateMotion"),
-    REPLACEMENT_ENTRY("animatetransform", "animateTransform"),
-    REPLACEMENT_ENTRY("clippath", "clipPath"),
-    REPLACEMENT_ENTRY("feblend", "feBlend"),
-    REPLACEMENT_ENTRY("fecolormatrix", "feColorMatrix"),
-    REPLACEMENT_ENTRY("fecomponenttransfer", "feComponentTransfer"),
-    REPLACEMENT_ENTRY("fecomposite", "feComposite"),
-    REPLACEMENT_ENTRY("feconvolvematrix", "feConvolveMatrix"),
-    REPLACEMENT_ENTRY("fediffuselighting", "feDiffuseLighting"),
-    REPLACEMENT_ENTRY("fedisplacementmap", "feDisplacementMap"),
-    REPLACEMENT_ENTRY("fedistantlight", "feDistantLight"),
-    REPLACEMENT_ENTRY("feflood", "feFlood"),
-    REPLACEMENT_ENTRY("fefunca", "feFuncA"),
-    REPLACEMENT_ENTRY("fefuncb", "feFuncB"),
-    REPLACEMENT_ENTRY("fefuncg", "feFuncG"),
-    REPLACEMENT_ENTRY("fefuncr", "feFuncR"),
-    REPLACEMENT_ENTRY("fegaussianblur", "feGaussianBlur"),
-    REPLACEMENT_ENTRY("feimage", "feImage"),
-    REPLACEMENT_ENTRY("femerge", "feMerge"),
-    REPLACEMENT_ENTRY("femergenode", "feMergeNode"),
-    REPLACEMENT_ENTRY("femorphology", "feMorphology"),
-    REPLACEMENT_ENTRY("feoffset", "feOffset"),
-    REPLACEMENT_ENTRY("fepointlight", "fePointLight"),
-    REPLACEMENT_ENTRY("fespecularlighting", "feSpecularLighting"),
-    REPLACEMENT_ENTRY("fespotlight", "feSpotLight"),
-    REPLACEMENT_ENTRY("fetile", "feTile"),
-    REPLACEMENT_ENTRY("feturbulence", "feTurbulence"),
-    REPLACEMENT_ENTRY("foreignobject", "foreignObject"),
-    REPLACEMENT_ENTRY("glyphref", "glyphRef"),
-    REPLACEMENT_ENTRY("lineargradient", "linearGradient"),
-    REPLACEMENT_ENTRY("radialgradient", "radialGradient"),
-    REPLACEMENT_ENTRY("textpath", "textPath"),
+static const GumboStringPiece kSystemIdDependentPublicIdPrefixes[] = {
+  STRING("-//W3C//DTD HTML 4.01 Frameset//"),
+  STRING("-//W3C//DTD HTML 4.01 Transitional//"),
+  TERMINATOR
 };
 
-typedef struct _NamespacedAttributeReplacement {
-  const char* from;
-  const char* local_name;
-  const GumboAttributeNamespaceEnum attr_namespace;
-} NamespacedAttributeReplacement;
-
-static const NamespacedAttributeReplacement kForeignAttributeReplacements[] = {
-    {"xlink:actuate", "actuate", GUMBO_ATTR_NAMESPACE_XLINK},
-    {"xlink:actuate", "actuate", GUMBO_ATTR_NAMESPACE_XLINK},
-    {"xlink:href", "href", GUMBO_ATTR_NAMESPACE_XLINK},
-    {"xlink:role", "role", GUMBO_ATTR_NAMESPACE_XLINK},
-    {"xlink:show", "show", GUMBO_ATTR_NAMESPACE_XLINK},
-    {"xlink:title", "title", GUMBO_ATTR_NAMESPACE_XLINK},
-    {"xlink:type", "type", GUMBO_ATTR_NAMESPACE_XLINK},
-    {"xml:base", "base", GUMBO_ATTR_NAMESPACE_XML},
-    {"xml:lang", "lang", GUMBO_ATTR_NAMESPACE_XML},
-    {"xml:space", "space", GUMBO_ATTR_NAMESPACE_XML},
-    {"xmlns", "xmlns", GUMBO_ATTR_NAMESPACE_XMLNS},
-    {"xmlns:xlink", "xlink", GUMBO_ATTR_NAMESPACE_XMLNS},
+// Indexed by GumboNamespaceEnum; keep in sync with that.
+static const char* kLegalXmlns[] = {
+  "http://www.w3.org/1999/xhtml",
+  "http://www.w3.org/2000/svg",
+  "http://www.w3.org/1998/Math/MathML"
 };
 
-// The "scope marker" for the list of active formatting elements.  We use a
+// The "scope marker" for the list of active formatting elements. We use a
 // pointer to this as a generic marker element, since the particular element
 // scope doesn't matter.
 static const GumboNode kActiveFormattingScopeMarker;
@@ -315,15 +186,15 @@ static const bool kStartTag = true;
 static const bool kEndTag = false;
 
 // Because GumboStringPieces are immutable, we can't insert a character directly
-// into a text node.  Instead, we accumulate all pending characters here and
+// into a text node. Instead, we accumulate all pending characters here and
 // flush them out to a text node whenever a new element is inserted.
 //
-// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#insert-a-character
+// https://html.spec.whatwg.org/multipage/parsing.html#insert-a-character
 typedef struct _TextNodeBufferState {
   // The accumulated text to be inserted into the current text node.
   GumboStringBuffer _buffer;
 
-  // A pointer to the original text represented by this text node.  Note that
+  // A pointer to the original text represented by this text node. Note that
   // because of foster parenting and other strange DOM manipulations, this may
   // include other non-text HTML tags in it; it is defined as the span of
   // original text from the first character in this text node to the last
@@ -338,24 +209,24 @@ typedef struct _TextNodeBufferState {
 } TextNodeBufferState;
 
 typedef struct GumboInternalParserState {
-  // http://www.whatwg.org/specs/web-apps/current-work/complete/parsing.html#insertion-mode
+  // https://html.spec.whatwg.org/multipage/parsing.html#insertion-mode
   GumboInsertionMode _insertion_mode;
 
   // Used for run_generic_parsing_algorithm, which needs to switch back to the
   // original insertion mode at its conclusion.
   GumboInsertionMode _original_insertion_mode;
 
-  // http://www.whatwg.org/specs/web-apps/current-work/complete/parsing.html#the-stack-of-open-elements
+  // https://html.spec.whatwg.org/multipage/parsing.html#the-stack-of-open-elements
   GumboVector /*GumboNode*/ _open_elements;
 
-  // http://www.whatwg.org/specs/web-apps/current-work/complete/parsing.html#the-list-of-active-formatting-elements
+  // https://html.spec.whatwg.org/multipage/parsing.html#the-list-of-active-formatting-elements
   GumboVector /*GumboNode*/ _active_formatting_elements;
 
   // The stack of template insertion modes.
-  // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#the-insertion-mode
+  // https://html.spec.whatwg.org/multipage/parsing.html#the-insertion-mode
   GumboVector /*InsertionMode*/ _template_insertion_modes;
 
-  // http://www.whatwg.org/specs/web-apps/current-work/complete/parsing.html#the-element-pointers
+  // https://html.spec.whatwg.org/multipage/parsing.html#the-element-pointers
   GumboNode* _head_element;
   GumboNode* _form_element;
 
@@ -375,7 +246,7 @@ typedef struct GumboInternalParserState {
   bool _ignore_next_linefeed;
 
   // The flag for "whenever a node would be inserted into the current node, it
-  // must instead be foster parented".  This is used for misnested table
+  // must instead be foster parented". This is used for misnested table
   // content, which needs to be handled according to "in body" rules yet foster
   // parented outside of the table.
   // It would perhaps be more explicit to have this as a parameter to
@@ -392,7 +263,7 @@ typedef struct GumboInternalParserState {
 
   // The way that the spec is written, the </body> and </html> tags are *always*
   // implicit, because encountering one of those tokens merely switches the
-  // insertion mode out of "in body".  So we have individual state flags for
+  // insertion mode out of "in body". So we have individual state flags for
   // those end tags that are then inspected by pop_current_node when the <body>
   // and <html> nodes are popped to set the GUMBO_INSERTION_IMPLICIT_END_TAG
   // flag appropriately.
@@ -407,23 +278,31 @@ static bool token_has_attribute(const GumboToken* token, const char* name) {
 
 // Checks if the value of the specified attribute is a case-insensitive match
 // for the specified string.
-static bool attribute_matches(
-    const GumboVector* attributes, const char* name, const char* value) {
+static bool attribute_matches (
+  const GumboVector* attributes,
+  const char* name,
+  const char* value
+) {
   const GumboAttribute* attr = gumbo_get_attribute(attributes, name);
-  return attr ? strcasecmp(value, attr->value) == 0 : false;
+  return attr ? gumbo_ascii_strcasecmp(value, attr->value) == 0 : false;
 }
 
 // Checks if the value of the specified attribute is a case-sensitive match
 // for the specified string.
-static bool attribute_matches_case_sensitive(
-    const GumboVector* attributes, const char* name, const char* value) {
+static bool attribute_matches_case_sensitive (
+  const GumboVector* attributes,
+  const char* name,
+  const char* value
+) {
   const GumboAttribute* attr = gumbo_get_attribute(attributes, name);
   return attr ? strcmp(value, attr->value) == 0 : false;
 }
 
 // Checks if the specified attribute vectors are identical.
-static bool all_attributes_match(
-    const GumboVector* attr1, const GumboVector* attr2) {
+static bool all_attributes_match (
+  const GumboVector* attr1,
+  const GumboVector* attr2
+) {
   unsigned int num_unmatched_attr2_elements = attr2->length;
   for (unsigned int i = 0; i < attr1->length; ++i) {
     const GumboAttribute* attr = attr1->data[i];
@@ -441,8 +320,8 @@ static void set_frameset_not_ok(GumboParser* parser) {
   parser->_parser_state->_frameset_ok = false;
 }
 
-static GumboNode* create_node(GumboParser* parser, GumboNodeType type) {
-  GumboNode* node = gumbo_parser_allocate(parser, sizeof(GumboNode));
+static GumboNode* create_node(GumboNodeType type) {
+  GumboNode* node = gumbo_alloc(sizeof(GumboNode));
   node->parent = NULL;
   node->index_within_parent = -1;
   node->type = type;
@@ -450,10 +329,10 @@ static GumboNode* create_node(GumboParser* parser, GumboNodeType type) {
   return node;
 }
 
-static GumboNode* new_document_node(GumboParser* parser) {
-  GumboNode* document_node = create_node(parser, GUMBO_NODE_DOCUMENT);
+static GumboNode* new_document_node() {
+  GumboNode* document_node = create_node(GUMBO_NODE_DOCUMENT);
   document_node->parse_flags = GUMBO_INSERTION_BY_PARSER;
-  gumbo_vector_init(parser, 1, &document_node->v.document.children);
+  gumbo_vector_init(1, &document_node->v.document.children);
 
   // Must be initialized explicitly, as there's no guarantee that we'll see a
   // doc type token.
@@ -466,26 +345,26 @@ static GumboNode* new_document_node(GumboParser* parser) {
 }
 
 static void output_init(GumboParser* parser) {
-  GumboOutput* output = gumbo_parser_allocate(parser, sizeof(GumboOutput));
+  GumboOutput* output = gumbo_alloc(sizeof(GumboOutput));
   output->root = NULL;
-  output->document = new_document_node(parser);
+  output->document = new_document_node();
+  output->status = GUMBO_STATUS_OK;
   parser->_output = output;
   gumbo_init_errors(parser);
 }
 
 static void parser_state_init(GumboParser* parser) {
-  GumboParserState* parser_state =
-      gumbo_parser_allocate(parser, sizeof(GumboParserState));
+  GumboParserState* parser_state = gumbo_alloc(sizeof(GumboParserState));
   parser_state->_insertion_mode = GUMBO_INSERTION_MODE_INITIAL;
   parser_state->_reprocess_current_token = false;
   parser_state->_frameset_ok = true;
   parser_state->_ignore_next_linefeed = false;
   parser_state->_foster_parent_insertions = false;
   parser_state->_text_node._type = GUMBO_NODE_WHITESPACE;
-  gumbo_string_buffer_init(parser, &parser_state->_text_node._buffer);
-  gumbo_vector_init(parser, 10, &parser_state->_open_elements);
-  gumbo_vector_init(parser, 5, &parser_state->_active_formatting_elements);
-  gumbo_vector_init(parser, 5, &parser_state->_template_insertion_modes);
+  gumbo_string_buffer_init(&parser_state->_text_node._buffer);
+  gumbo_vector_init(10, &parser_state->_open_elements);
+  gumbo_vector_init(5, &parser_state->_active_formatting_elements);
+  gumbo_vector_init(5, &parser_state->_template_insertion_modes);
   parser_state->_head_element = NULL;
   parser_state->_form_element = NULL;
   parser_state->_fragment_ctx = NULL;
@@ -495,19 +374,94 @@ static void parser_state_init(GumboParser* parser) {
   parser->_parser_state = parser_state;
 }
 
+typedef void (*TreeTraversalCallback)(GumboNode* node);
+
+static void tree_traverse(GumboNode* node, TreeTraversalCallback callback) {
+  GumboNode* current_node = node;
+  unsigned int offset = 0;
+
+tailcall:
+  switch (current_node->type) {
+    case GUMBO_NODE_DOCUMENT:
+    case GUMBO_NODE_TEMPLATE:
+    case GUMBO_NODE_ELEMENT: {
+      GumboVector* children = (current_node->type == GUMBO_NODE_DOCUMENT)
+        ? &current_node->v.document.children
+        : &current_node->v.element.children
+      ;
+      if (offset >= children->length) {
+        assert(offset == children->length);
+        break;
+      } else {
+        current_node = children->data[offset];
+        offset = 0;
+        goto tailcall;
+      }
+    }
+    case GUMBO_NODE_TEXT:
+    case GUMBO_NODE_CDATA:
+    case GUMBO_NODE_COMMENT:
+    case GUMBO_NODE_WHITESPACE:
+      assert(offset == 0);
+      break;
+  }
+
+  offset = current_node->index_within_parent + 1;
+  GumboNode* next_node = current_node->parent;
+  callback(current_node);
+  if (current_node == node) {
+    return;
+  }
+  current_node = next_node;
+  goto tailcall;
+}
+
+static void destroy_node_callback(GumboNode* node) {
+  switch (node->type) {
+    case GUMBO_NODE_DOCUMENT: {
+      GumboDocument* doc = &node->v.document;
+      gumbo_free((void*) doc->children.data);
+      gumbo_free((void*) doc->name);
+      gumbo_free((void*) doc->public_identifier);
+      gumbo_free((void*) doc->system_identifier);
+    } break;
+    case GUMBO_NODE_TEMPLATE:
+    case GUMBO_NODE_ELEMENT:
+      for (unsigned int i = 0; i < node->v.element.attributes.length; ++i) {
+        gumbo_destroy_attribute(node->v.element.attributes.data[i]);
+      }
+      gumbo_free(node->v.element.attributes.data);
+      gumbo_free(node->v.element.children.data);
+      if (node->v.element.tag == GUMBO_TAG_UNKNOWN)
+        gumbo_free((void *)node->v.element.name);
+      break;
+    case GUMBO_NODE_TEXT:
+    case GUMBO_NODE_CDATA:
+    case GUMBO_NODE_COMMENT:
+    case GUMBO_NODE_WHITESPACE:
+      gumbo_free((void*) node->v.text.text);
+      break;
+  }
+  gumbo_free(node);
+}
+
+static void destroy_node(GumboNode* node) {
+  tree_traverse(node, &destroy_node_callback);
+}
+
 static void parser_state_destroy(GumboParser* parser) {
   GumboParserState* state = parser->_parser_state;
   if (state->_fragment_ctx) {
-    destroy_node(parser, state->_fragment_ctx);
+    destroy_node(state->_fragment_ctx);
   }
-  gumbo_vector_destroy(parser, &state->_active_formatting_elements);
-  gumbo_vector_destroy(parser, &state->_open_elements);
-  gumbo_vector_destroy(parser, &state->_template_insertion_modes);
-  gumbo_string_buffer_destroy(parser, &state->_text_node._buffer);
-  gumbo_parser_deallocate(parser, state);
+  gumbo_vector_destroy(&state->_active_formatting_elements);
+  gumbo_vector_destroy(&state->_open_elements);
+  gumbo_vector_destroy(&state->_template_insertion_modes);
+  gumbo_string_buffer_destroy(&state->_text_node._buffer);
+  gumbo_free(state);
 }
 
-static GumboNode* get_document_node(GumboParser* parser) {
+static GumboNode* get_document_node(const GumboParser* parser) {
   return parser->_output->document;
 }
 
@@ -517,8 +471,8 @@ static bool is_fragment_parser(const GumboParser* parser) {
 
 // Returns the node at the bottom of the stack of open elements, or NULL if no
 // elements have been added yet.
-static GumboNode* get_current_node(GumboParser* parser) {
-  GumboVector* open_elements = &parser->_parser_state->_open_elements;
+static GumboNode* get_current_node(const GumboParser* parser) {
+  const GumboVector* open_elements = &parser->_parser_state->_open_elements;
   if (open_elements->length == 0) {
     assert(!parser->_output->root);
     return NULL;
@@ -528,8 +482,8 @@ static GumboNode* get_current_node(GumboParser* parser) {
   return open_elements->data[open_elements->length - 1];
 }
 
-static GumboNode* get_adjusted_current_node(GumboParser* parser) {
-  GumboParserState* state = parser->_parser_state;
+static GumboNode* get_adjusted_current_node(const GumboParser* parser) {
+  const GumboParserState* state = parser->_parser_state;
   if (state->_open_elements.length == 1 && state->_fragment_ctx) {
     return state->_fragment_ctx;
   }
@@ -537,15 +491,20 @@ static GumboNode* get_adjusted_current_node(GumboParser* parser) {
 }
 
 // Returns true if the given needle is in the given array of literal
-// GumboStringPieces.  If exact_match is true, this requires that they match
+// GumboStringPieces. If exact_match is true, this requires that they match
 // exactly; otherwise, this performs a prefix match to check if any of the
-// elements in haystack start with needle.  This always performs a
+// elements in haystack start with needle. This always performs a
 // case-insensitive match.
-static bool is_in_static_list(
-    const char* needle, const GumboStringPiece* haystack, bool exact_match) {
+static bool is_in_static_list (
+  const char* needle,
+  const GumboStringPiece* haystack,
+  bool exact_match
+) {
   for (unsigned int i = 0; haystack[i].length > 0; ++i) {
-    if ((exact_match && !strcmp(needle, haystack[i].data)) ||
-        (!exact_match && !strcasecmp(needle, haystack[i].data))) {
+    if (
+      (exact_match && !strcmp(needle, haystack[i].data))
+      || (!exact_match && !gumbo_ascii_strcasecmp(needle, haystack[i].data))
+    ) {
       return true;
     }
   }
@@ -556,13 +515,109 @@ static void set_insertion_mode(GumboParser* parser, GumboInsertionMode mode) {
   parser->_parser_state->_insertion_mode = mode;
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete/parsing.html#reset-the-insertion-mode-appropriately
+static void push_template_insertion_mode (
+  GumboParser* parser,
+  GumboInsertionMode mode
+) {
+  gumbo_vector_add (
+    (void*) mode,
+    &parser->_parser_state->_template_insertion_modes
+  );
+}
+
+static void pop_template_insertion_mode(GumboParser* parser) {
+  gumbo_vector_pop(&parser->_parser_state->_template_insertion_modes);
+}
+
+// Returns the current template insertion mode. If the stack of template
+// insertion modes is empty, this returns GUMBO_INSERTION_MODE_INITIAL.
+static GumboInsertionMode get_current_template_insertion_mode (
+  const GumboParser* parser
+) {
+  GumboVector* modes = &parser->_parser_state->_template_insertion_modes;
+  if (modes->length == 0) {
+    return GUMBO_INSERTION_MODE_INITIAL;
+  }
+  return (GumboInsertionMode) modes->data[(modes->length - 1)];
+}
+
+// Returns true if the specified token is either a start or end tag
+// (specified by is_start) with one of the tag types in the TagSet.
+static bool tag_in (
+  const GumboToken* token,
+  bool is_start,
+  const TagSet* tags
+) {
+  GumboTag token_tag;
+  if (is_start && token->type == GUMBO_TOKEN_START_TAG) {
+    token_tag = token->v.start_tag.tag;
+  } else if (!is_start && token->type == GUMBO_TOKEN_END_TAG) {
+    token_tag = token->v.end_tag.tag;
+  } else {
+    return false;
+  }
+  return (*tags)[(unsigned) token_tag] != 0u;
+}
+
+// Like tag_in, but for the single-tag case.
+static bool tag_is(const GumboToken* token, bool is_start, GumboTag tag) {
+  if (is_start && token->type == GUMBO_TOKEN_START_TAG) {
+    return token->v.start_tag.tag == tag;
+  } else if (!is_start && token->type == GUMBO_TOKEN_END_TAG) {
+    return token->v.end_tag.tag == tag;
+  } else {
+    return false;
+  }
+}
+
+static inline bool tagset_includes (
+  const TagSet* tagset,
+  GumboNamespaceEnum ns,
+  GumboTag tag
+) {
+  return ((*tagset)[(unsigned) tag] & (1u << (unsigned) ns)) != 0u;
+}
+
+// Like tag_in, but checks for the tag of a node, rather than a token.
+static bool node_tag_in_set(const GumboNode* node, const TagSet* tags) {
+  assert(node != NULL);
+  if (node->type != GUMBO_NODE_ELEMENT && node->type != GUMBO_NODE_TEMPLATE) {
+    return false;
+  }
+  return tagset_includes (
+    tags,
+    node->v.element.tag_namespace,
+    node->v.element.tag
+  );
+}
+
+// Like node_tag_in, but for the single-tag case.
+static bool node_qualified_tag_is (
+  const GumboNode* node,
+  GumboNamespaceEnum ns,
+  GumboTag tag
+) {
+  assert(node);
+  return
+    (node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE)
+    && node->v.element.tag == tag
+    && node->v.element.tag_namespace == ns;
+}
+
+// Like node_tag_in, but for the single-tag case in the HTML namespace
+static bool node_html_tag_is(const GumboNode* node, GumboTag tag) {
+  return node_qualified_tag_is(node, GUMBO_NAMESPACE_HTML, tag);
+}
+
+// https://html.spec.whatwg.org/multipage/parsing.html#reset-the-insertion-mode-appropriately
 // This is a helper function that returns the appropriate insertion mode instead
-// of setting it.  Returns GUMBO_INSERTION_MODE_INITIAL as a sentinel value to
+// of setting it. Returns GUMBO_INSERTION_MODE_INITIAL as a sentinel value to
 // indicate that there is no appropriate insertion mode, and the loop should
 // continue.
-static GumboInsertionMode get_appropriate_insertion_mode(
-    const GumboParser* parser, int index) {
+static GumboInsertionMode get_appropriate_insertion_mode (
+  const GumboParser* parser,
+  int index
+) {
   const GumboVector* open_elements = &parser->_parser_state->_open_elements;
   const GumboNode* node = open_elements->data[index];
   const bool is_last = index == 0;
@@ -572,10 +627,10 @@ static GumboInsertionMode get_appropriate_insertion_mode(
   }
 
   assert(node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
-  if (node->v.element.tag_namespace != GUMBO_NAMESPACE_HTML)
-    return is_last ?
-      GUMBO_INSERTION_MODE_IN_BODY : GUMBO_INSERTION_MODE_INITIAL;
-  
+  if (node->v.element.tag_namespace != GUMBO_NAMESPACE_HTML) {
+    return is_last ? GUMBO_INSERTION_MODE_IN_BODY : GUMBO_INSERTION_MODE_INITIAL;
+  }
+
   switch (node->v.element.tag) {
     case GUMBO_TAG_SELECT: {
       if (is_last) {
@@ -619,8 +674,8 @@ static GumboInsertionMode get_appropriate_insertion_mode(
       return GUMBO_INSERTION_MODE_IN_FRAMESET;
     case GUMBO_TAG_HTML:
       return parser->_parser_state->_head_element
-                 ? GUMBO_INSERTION_MODE_AFTER_HEAD
-                 : GUMBO_INSERTION_MODE_BEFORE_HEAD;
+        ? GUMBO_INSERTION_MODE_AFTER_HEAD
+        : GUMBO_INSERTION_MODE_BEFORE_HEAD;
     default:
       break;
   }
@@ -642,8 +697,10 @@ static void reset_insertion_mode_appropriately(GumboParser* parser) {
   assert(0);
 }
 
-static GumboError* parser_add_parse_error(
-    GumboParser* parser, const GumboToken* token) {
+static GumboError* parser_add_parse_error (
+  GumboParser* parser,
+  const GumboToken* token
+) {
   gumbo_debug("Adding parse error.\n");
   GumboError* error = gumbo_add_error(parser);
   if (!error) {
@@ -658,144 +715,97 @@ static GumboError* parser_add_parse_error(
   if (token->type == GUMBO_TOKEN_START_TAG) {
     extra_data->input_tag = token->v.start_tag.tag;
   } else if (token->type == GUMBO_TOKEN_END_TAG) {
-    extra_data->input_tag = token->v.end_tag;
+    extra_data->input_tag = token->v.end_tag.tag;
   }
-  GumboParserState* state = parser->_parser_state;
+  const GumboParserState* state = parser->_parser_state;
   extra_data->parser_state = state->_insertion_mode;
-  gumbo_vector_init(
-      parser, state->_open_elements.length, &extra_data->tag_stack);
+  gumbo_vector_init(state->_open_elements.length, &extra_data->tag_stack);
   for (unsigned int i = 0; i < state->_open_elements.length; ++i) {
     const GumboNode* node = state->_open_elements.data[i];
-    assert(
-        node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
-    gumbo_vector_add(
-        parser, (void*) node->v.element.tag, &extra_data->tag_stack);
+    assert (
+      node->type == GUMBO_NODE_ELEMENT
+      || node->type == GUMBO_NODE_TEMPLATE
+    );
+    gumbo_vector_add (
+      (void*) node->v.element.tag,
+      &extra_data->tag_stack
+    );
   }
   return error;
 }
 
-// Returns true if the specified token is either a start or end tag (specified
-// by is_start) with one of the tag types in the varargs list.  Terminate the
-// list with GUMBO_TAG_LAST; this functions as a sentinel since no portion of
-// the spec references tags that are not in the spec.
-static bool tag_in(
-    const GumboToken* token, bool is_start, const gumbo_tagset tags) {
-  GumboTag token_tag;
-  if (is_start && token->type == GUMBO_TOKEN_START_TAG) {
-    token_tag = token->v.start_tag.tag;
-  } else if (!is_start && token->type == GUMBO_TOKEN_END_TAG) {
-    token_tag = token->v.end_tag;
-  } else {
-    return false;
-  }
-  return (token_tag < GUMBO_TAG_LAST && tags[(int) token_tag] != 0);
-}
-
-// Like tag_in, but for the single-tag case.
-static bool tag_is(const GumboToken* token, bool is_start, GumboTag tag) {
-  if (is_start && token->type == GUMBO_TOKEN_START_TAG) {
-    return token->v.start_tag.tag == tag;
-  } else if (!is_start && token->type == GUMBO_TOKEN_END_TAG) {
-    return token->v.end_tag == tag;
-  } else {
-    return false;
-  }
+// https://html.spec.whatwg.org/multipage/parsing.html#mathml-text-integration-point
+static bool is_mathml_integration_point(const GumboNode* node) {
+  static const TagSet mathml_integration_point_tags = {
+    TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
+    TAG_MATHML(MS), TAG_MATHML(MTEXT)
+  };
+  return node_tag_in_set(node, &mathml_integration_point_tags);
 }
 
-// Like tag_in, but checks for the tag of a node, rather than a token.
-static bool node_tag_in_set(const GumboNode* node, const gumbo_tagset tags) {
-  assert(node != NULL);
-  if (node->type != GUMBO_NODE_ELEMENT && node->type != GUMBO_NODE_TEMPLATE) {
-    return false;
+// https://html.spec.whatwg.org/multipage/parsing.html#html-integration-point
+static bool is_html_integration_point(const GumboNode* node) {
+  static const TagSet html_integration_point_svg_tags = {
+      TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE)
+  };
+  if (node_tag_in_set(node, &html_integration_point_svg_tags)) {
+    return true;
   }
-  return TAGSET_INCLUDES(
-      tags, node->v.element.tag_namespace, node->v.element.tag);
-}
-
-// Like node_tag_in, but for the single-tag case.
-static bool node_qualified_tag_is(
-    const GumboNode* node, GumboNamespaceEnum ns, GumboTag tag) {
-  assert(node);
-  return (node->type == GUMBO_NODE_ELEMENT ||
-             node->type == GUMBO_NODE_TEMPLATE) &&
-         node->v.element.tag == tag && node->v.element.tag_namespace == ns;
-}
 
-// Like node_tag_in, but for the single-tag case in the HTML namespace
-static bool node_html_tag_is(const GumboNode* node, GumboTag tag) {
-  return node_qualified_tag_is(node, GUMBO_NAMESPACE_HTML, tag);
-}
-
-static void push_template_insertion_mode(
-    GumboParser* parser, GumboInsertionMode mode) {
-  gumbo_vector_add(
-      parser, (void*) mode, &parser->_parser_state->_template_insertion_modes);
-}
-
-static void pop_template_insertion_mode(GumboParser* parser) {
-  gumbo_vector_pop(parser, &parser->_parser_state->_template_insertion_modes);
-}
-
-// Returns the current template insertion mode.  If the stack of template
-// insertion modes is empty, this returns GUMBO_INSERTION_MODE_INITIAL.
-static GumboInsertionMode get_current_template_insertion_mode(
-    const GumboParser* parser) {
-  GumboVector* template_insertion_modes =
-      &parser->_parser_state->_template_insertion_modes;
-  if (template_insertion_modes->length == 0) {
-    return GUMBO_INSERTION_MODE_INITIAL;
+  const bool is_mathml_annotation_xml_element = node_qualified_tag_is (
+    node,
+    GUMBO_NAMESPACE_MATHML,
+    GUMBO_TAG_ANNOTATION_XML
+  );
+  const GumboVector* attributes = &node->v.element.attributes;
+  if (
+    is_mathml_annotation_xml_element
+    && (
+      attribute_matches(attributes, "encoding", "text/html")
+      || attribute_matches(attributes, "encoding", "application/xhtml+xml")
+    )
+  ) {
+    return true;
   }
-  return (GumboInsertionMode)
-      template_insertion_modes->data[(template_insertion_modes->length - 1)];
-}
-
-// http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#mathml-text-integration-point
-static bool is_mathml_integration_point(const GumboNode* node) {
-  return node_tag_in_set(
-      node, (gumbo_tagset){TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
-                TAG_MATHML(MS), TAG_MATHML(MTEXT)});
-}
 
-// http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#html-integration-point
-static bool is_html_integration_point(const GumboNode* node) {
-  return node_tag_in_set(node, (gumbo_tagset){TAG_SVG(FOREIGNOBJECT),
-                                   TAG_SVG(DESC), TAG_SVG(TITLE)}) ||
-         (node_qualified_tag_is(
-              node, GUMBO_NAMESPACE_MATHML, GUMBO_TAG_ANNOTATION_XML) &&
-             (attribute_matches(
-                  &node->v.element.attributes, "encoding", "text/html") ||
-                 attribute_matches(&node->v.element.attributes, "encoding",
-                     "application/xhtml+xml")));
+  return false;
 }
 
 // This represents a place to insert a node, consisting of a target parent and a
-// child index within that parent.  If the node should be inserted at the end of
+// child index within that parent. If the node should be inserted at the end of
 // the parent's child, index will be -1.
 typedef struct {
   GumboNode* target;
   int index;
 } InsertionLocation;
 
-InsertionLocation get_appropriate_insertion_location(
-    GumboParser* parser, GumboNode* override_target) {
+static InsertionLocation get_appropriate_insertion_location (
+  const GumboParser* parser,
+  GumboNode* override_target
+) {
   InsertionLocation retval = {override_target, -1};
   if (retval.target == NULL) {
     // No override target; default to the current node, but special-case the
     // root node since get_current_node() assumes the stack of open elements is
     // non-empty.
-    retval.target = parser->_output->root != NULL ? get_current_node(parser)
-                                                  : get_document_node(parser);
+    retval.target = (parser->_output->root != NULL)
+      ? get_current_node(parser)
+      : get_document_node(parser)
+    ;
   }
-  if (!parser->_parser_state->_foster_parent_insertions ||
-      !node_tag_in_set(retval.target, (gumbo_tagset){TAG(TABLE), TAG(TBODY),
-                                          TAG(TFOOT), TAG(THEAD), TAG(TR)})) {
+  if (
+    !parser->_parser_state->_foster_parent_insertions
+    || !node_tag_in_set(retval.target, &(const TagSet) {
+      TAG(TABLE), TAG(TBODY), TAG(TFOOT), TAG(THEAD), TAG(TR)
+    })
+  ) {
     return retval;
   }
 
   // Foster-parenting case.
   int last_template_index = -1;
   int last_table_index = -1;
-  GumboVector* open_elements = &parser->_parser_state->_open_elements;
+  const GumboVector* open_elements = &parser->_parser_state->_open_elements;
   for (unsigned int i = 0; i < open_elements->length; ++i) {
     if (node_html_tag_is(open_elements->data[i], GUMBO_TAG_TEMPLATE)) {
       last_template_index = i;
@@ -804,8 +814,10 @@ InsertionLocation get_appropriate_insertion_location(
       last_table_index = i;
     }
   }
-  if (last_template_index != -1 &&
-      (last_table_index == -1 || last_template_index > last_table_index)) {
+  if (
+    last_template_index != -1
+    && (last_table_index == -1 || last_template_index > last_table_index)
+  ) {
     retval.target = open_elements->data[last_template_index];
     return retval;
   }
@@ -813,7 +825,7 @@ InsertionLocation get_appropriate_insertion_location(
     retval.target = open_elements->data[0];
     return retval;
   }
-  GumboNode* last_table = open_elements->data[last_table_index];
+  const GumboNode* last_table = open_elements->data[last_table_index];
   if (last_table->parent != NULL) {
     retval.target = last_table->parent;
     retval.index = last_table->index_within_parent;
@@ -826,13 +838,14 @@ InsertionLocation get_appropriate_insertion_location(
 
 // Appends a node to the end of its parent, setting the "parent" and
 // "index_within_parent" fields appropriately.
-static void append_node(
-    GumboParser* parser, GumboNode* parent, GumboNode* node) {
+static void append_node(GumboNode* parent, GumboNode* node) {
   assert(node->parent == NULL);
-  assert(node->index_within_parent == -1);
+  assert(node->index_within_parent == (unsigned int) -1);
   GumboVector* children;
-  if (parent->type == GUMBO_NODE_ELEMENT ||
-      parent->type == GUMBO_NODE_TEMPLATE) {
+  if (
+    parent->type == GUMBO_NODE_ELEMENT
+    || parent->type == GUMBO_NODE_TEMPLATE
+  ) {
     children = &parent->v.element.children;
   } else {
     assert(parent->type == GUMBO_NODE_DOCUMENT);
@@ -840,23 +853,24 @@ static void append_node(
   }
   node->parent = parent;
   node->index_within_parent = children->length;
-  gumbo_vector_add(parser, (void*) node, children);
+  gumbo_vector_add((void*) node, children);
   assert(node->index_within_parent < children->length);
 }
 
 // Inserts a node at the specified InsertionLocation, updating the
 // "parent" and "index_within_parent" fields of it and all its siblings.
 // If the index of the location is -1, this calls append_node.
-static void insert_node(
-    GumboParser* parser, GumboNode* node, InsertionLocation location) {
+static void insert_node(GumboNode* node, InsertionLocation location) {
   assert(node->parent == NULL);
-  assert(node->index_within_parent == -1);
+  assert(node->index_within_parent == (unsigned int) -1);
   GumboNode* parent = location.target;
   int index = location.index;
   if (index != -1) {
     GumboVector* children = NULL;
-    if (parent->type == GUMBO_NODE_ELEMENT ||
-        parent->type == GUMBO_NODE_TEMPLATE) {
+    if (
+      parent->type == GUMBO_NODE_ELEMENT
+      || parent->type == GUMBO_NODE_TEMPLATE
+    ) {
       children = &parent->v.element.children;
     } else if (parent->type == GUMBO_NODE_DOCUMENT) {
       children = &parent->v.document.children;
@@ -869,7 +883,7 @@ static void insert_node(
     assert((unsigned int) index < children->length);
     node->parent = parent;
     node->index_within_parent = index;
-    gumbo_vector_insert_at(parser, (void*) node, index, children);
+    gumbo_vector_insert_at((void*) node, index, children);
     assert(node->index_within_parent < children->length);
     for (unsigned int i = index + 1; i < children->length; ++i) {
       GumboNode* sibling = children->data[i];
@@ -877,7 +891,7 @@ static void insert_node(
       assert(sibling->index_within_parent < children->length);
     }
   } else {
-    append_node(parser, parent, node);
+    append_node(parent, node);
   }
 }
 
@@ -888,42 +902,49 @@ static void maybe_flush_text_node_buffer(GumboParser* parser) {
     return;
   }
 
-  assert(buffer_state->_type == GUMBO_NODE_WHITESPACE ||
-         buffer_state->_type == GUMBO_NODE_TEXT ||
-         buffer_state->_type == GUMBO_NODE_CDATA);
-  GumboNode* text_node = create_node(parser, buffer_state->_type);
+  assert (
+    buffer_state->_type == GUMBO_NODE_WHITESPACE
+    || buffer_state->_type == GUMBO_NODE_TEXT
+    || buffer_state->_type == GUMBO_NODE_CDATA
+  );
+  GumboNode* text_node = create_node(buffer_state->_type);
   GumboText* text_node_data = &text_node->v.text;
-  text_node_data->text =
-      gumbo_string_buffer_to_string(parser, &buffer_state->_buffer);
+  text_node_data->text = gumbo_string_buffer_to_string(&buffer_state->_buffer);
   text_node_data->original_text.data = buffer_state->_start_original_text;
   text_node_data->original_text.length =
       state->_current_token->original_text.data -
       buffer_state->_start_original_text;
   text_node_data->start_pos = buffer_state->_start_position;
 
-  gumbo_debug("Flushing text node buffer of %.*s.\n",
-      (int) buffer_state->_buffer.length, buffer_state->_buffer.data);
+  gumbo_debug (
+    "Flushing text node buffer of %.*s.\n",
+    (int) buffer_state->_buffer.length,
+    buffer_state->_buffer.data
+  );
 
   InsertionLocation location = get_appropriate_insertion_location(parser, NULL);
   if (location.target->type == GUMBO_NODE_DOCUMENT) {
     // The DOM does not allow Document nodes to have Text children, so per the
     // spec, they are dropped on the floor.
-    destroy_node(parser, text_node);
+    destroy_node(text_node);
   } else {
-    insert_node(parser, text_node, location);
+    insert_node(text_node, location);
   }
 
-  gumbo_string_buffer_clear(parser, &buffer_state->_buffer);
+  gumbo_string_buffer_clear(&buffer_state->_buffer);
   buffer_state->_type = GUMBO_NODE_WHITESPACE;
   assert(buffer_state->_buffer.length == 0);
 }
 
-static void record_end_of_element(
-    GumboToken* current_token, GumboElement* element) {
+static void record_end_of_element (
+  const GumboToken* current_token,
+  GumboElement* element
+) {
   element->end_pos = current_token->position;
-  element->original_end_tag = current_token->type == GUMBO_TOKEN_END_TAG
-                                  ? current_token->original_text
-                                  : kGumboEmptyString;
+  element->original_end_tag =
+    (current_token->type == GUMBO_TOKEN_END_TAG)
+      ? current_token->original_text
+      : kGumboEmptyString;
 }
 
 static GumboNode* pop_current_node(GumboParser* parser) {
@@ -931,24 +952,36 @@ static GumboNode* pop_current_node(GumboParser* parser) {
   maybe_flush_text_node_buffer(parser);
   if (state->_open_elements.length > 0) {
     assert(node_html_tag_is(state->_open_elements.data[0], GUMBO_TAG_HTML));
-    gumbo_debug("Popping %s node.\n",
-        gumbo_normalized_tagname(get_current_node(parser)->v.element.tag));
+    gumbo_debug (
+      "Popping %s node.\n",
+      gumbo_normalized_tagname(get_current_node(parser)->v.element.tag)
+    );
   }
-  GumboNode* current_node = gumbo_vector_pop(parser, &state->_open_elements);
+  GumboNode* current_node = gumbo_vector_pop(&state->_open_elements);
   if (!current_node) {
     assert(state->_open_elements.length == 0);
     return NULL;
   }
-  assert(current_node->type == GUMBO_NODE_ELEMENT ||
-         current_node->type == GUMBO_NODE_TEMPLATE);
+  assert (
+    current_node->type == GUMBO_NODE_ELEMENT
+    || current_node->type == GUMBO_NODE_TEMPLATE
+  );
   bool is_closed_body_or_html_tag =
-      (node_html_tag_is(current_node, GUMBO_TAG_BODY) &&
-          state->_closed_body_tag) ||
-      (node_html_tag_is(current_node, GUMBO_TAG_HTML) &&
-          state->_closed_html_tag);
-  if ((state->_current_token->type != GUMBO_TOKEN_END_TAG ||
-          !node_html_tag_is(current_node, state->_current_token->v.end_tag)) &&
-      !is_closed_body_or_html_tag) {
+    (
+      node_html_tag_is(current_node, GUMBO_TAG_BODY)
+      && state->_closed_body_tag
+    ) || (
+      node_html_tag_is(current_node, GUMBO_TAG_HTML)
+      && state->_closed_html_tag
+    )
+  ;
+  if (
+    (
+      state->_current_token->type != GUMBO_TOKEN_END_TAG
+      || !node_html_tag_is(current_node, state->_current_token->v.end_tag.tag)
+    )
+    && !is_closed_body_or_html_tag
+  ) {
     current_node->parse_flags |= GUMBO_INSERTION_IMPLICIT_END_TAG;
   }
   if (!is_closed_body_or_html_tag) {
@@ -957,76 +990,89 @@ static GumboNode* pop_current_node(GumboParser* parser) {
   return current_node;
 }
 
-static void append_comment_node(
-    GumboParser* parser, GumboNode* node, const GumboToken* token) {
+static void append_comment_node (
+  GumboParser* parser,
+  GumboNode* node,
+  const GumboToken* token
+) {
   maybe_flush_text_node_buffer(parser);
-  GumboNode* comment = create_node(parser, GUMBO_NODE_COMMENT);
+  GumboNode* comment = create_node(GUMBO_NODE_COMMENT);
   comment->type = GUMBO_NODE_COMMENT;
   comment->parse_flags = GUMBO_INSERTION_NORMAL;
   comment->v.text.text = token->v.text;
   comment->v.text.original_text = token->original_text;
   comment->v.text.start_pos = token->position;
-  append_node(parser, node, comment);
+  append_node(node, comment);
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#clear-the-stack-back-to-a-table-row-context
+// https://html.spec.whatwg.org/multipage/parsing.html#clear-the-stack-back-to-a-table-row-context
 static void clear_stack_to_table_row_context(GumboParser* parser) {
-  while (!node_tag_in_set(get_current_node(parser),
-             (gumbo_tagset){TAG(HTML), TAG(TR), TAG(TEMPLATE)})) {
+  static const TagSet tags = {TAG(HTML), TAG(TR), TAG(TEMPLATE)};
+  while (!node_tag_in_set(get_current_node(parser), &tags)) {
     pop_current_node(parser);
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#clear-the-stack-back-to-a-table-context
+// https://html.spec.whatwg.org/multipage/parsing.html#clear-the-stack-back-to-a-table-context
 static void clear_stack_to_table_context(GumboParser* parser) {
-  while (!node_tag_in_set(get_current_node(parser),
-             (gumbo_tagset){TAG(HTML), TAG(TABLE), TAG(TEMPLATE)})) {
+  static const TagSet tags = {TAG(HTML), TAG(TABLE), TAG(TEMPLATE)};
+  while (!node_tag_in_set(get_current_node(parser), &tags)) {
     pop_current_node(parser);
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#clear-the-stack-back-to-a-table-body-context
-void clear_stack_to_table_body_context(GumboParser* parser) {
-  while (!node_tag_in_set(get_current_node(parser),
-             (gumbo_tagset){TAG(HTML), TAG(TBODY), TAG(TFOOT), TAG(THEAD),
-                 TAG(TEMPLATE)})) {
+// https://html.spec.whatwg.org/multipage/parsing.html#clear-the-stack-back-to-a-table-body-context
+static void clear_stack_to_table_body_context(GumboParser* parser) {
+  static const TagSet tags = {
+    TAG(HTML), TAG(TBODY), TAG(TFOOT), TAG(THEAD), TAG(TEMPLATE)
+  };
+  while (!node_tag_in_set(get_current_node(parser), &tags)) {
     pop_current_node(parser);
   }
 }
 
 // Creates a parser-inserted element in the HTML namespace and returns it.
 static GumboNode* create_element(GumboParser* parser, GumboTag tag) {
-  GumboNode* node = create_node(parser, GUMBO_NODE_ELEMENT);
+  GumboNode* node = create_node(GUMBO_NODE_ELEMENT);
   GumboElement* element = &node->v.element;
-  gumbo_vector_init(parser, 1, &element->children);
-  gumbo_vector_init(parser, 0, &element->attributes);
+  gumbo_vector_init(1, &element->children);
+  gumbo_vector_init(0, &element->attributes);
   element->tag = tag;
+  element->name = gumbo_normalized_tagname(tag);
   element->tag_namespace = GUMBO_NAMESPACE_HTML;
   element->original_tag = kGumboEmptyString;
   element->original_end_tag = kGumboEmptyString;
   element->start_pos = (parser->_parser_state->_current_token)
-                           ? parser->_parser_state->_current_token->position
-                           : kGumboEmptySourcePosition;
+    ? parser->_parser_state->_current_token->position
+    : kGumboEmptySourcePosition
+  ;
   element->end_pos = kGumboEmptySourcePosition;
   return node;
 }
 
 // Constructs an element from the given start tag token.
-static GumboNode* create_element_from_token(
-    GumboParser* parser, GumboToken* token, GumboNamespaceEnum tag_namespace) {
+static GumboNode* create_element_from_token (
+  GumboToken* token,
+  GumboNamespaceEnum tag_namespace
+) {
   assert(token->type == GUMBO_TOKEN_START_TAG);
   GumboTokenStartTag* start_tag = &token->v.start_tag;
 
-  GumboNodeType type = (tag_namespace == GUMBO_NAMESPACE_HTML &&
-                           start_tag->tag == GUMBO_TAG_TEMPLATE)
-                           ? GUMBO_NODE_TEMPLATE
-                           : GUMBO_NODE_ELEMENT;
+  GumboNodeType type =
+    (
+      tag_namespace == GUMBO_NAMESPACE_HTML
+      && start_tag->tag == GUMBO_TAG_TEMPLATE
+    )
+    ? GUMBO_NODE_TEMPLATE
+    : GUMBO_NODE_ELEMENT
+  ;
 
-  GumboNode* node = create_node(parser, type);
+  GumboNode* node = create_node(type);
   GumboElement* element = &node->v.element;
-  gumbo_vector_init(parser, 1, &element->children);
+  gumbo_vector_init(1, &element->children);
   element->attributes = start_tag->attributes;
   element->tag = start_tag->tag;
+  element->name = start_tag->name ? start_tag->name : gumbo_normalized_tagname(start_tag->tag);
   element->tag_namespace = tag_namespace;
 
   assert(token->original_text.length >= 2);
@@ -1037,93 +1083,125 @@ static GumboNode* create_element_from_token(
   element->original_end_tag = kGumboEmptyString;
   element->end_pos = kGumboEmptySourcePosition;
 
-  // The element takes ownership of the attributes from the token, so any
-  // allocated-memory fields should be nulled out.
+  // The element takes ownership of the attributes and name from the token, so
+  // any allocated-memory fields should be nulled out.
   start_tag->attributes = kGumboEmptyVector;
+  start_tag->name = NULL;
   return node;
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#insert-an-html-element
-static void insert_element(GumboParser* parser, GumboNode* node,
-    bool is_reconstructing_formatting_elements) {
+// https://html.spec.whatwg.org/multipage/parsing.html#insert-an-html-element
+static void insert_element (
+  GumboParser* parser,
+  GumboNode* node,
+  bool is_reconstructing_formatting_elements
+) {
   GumboParserState* state = parser->_parser_state;
   // NOTE(jdtang): The text node buffer must always be flushed before inserting
   // a node, otherwise we're handling nodes in a different order than the spec
-  // mandated.  However, one clause of the spec (character tokens in the body)
+  // mandated. However, one clause of the spec (character tokens in the body)
   // requires that we reconstruct the active formatting elements *before* adding
   // the character, and reconstructing the active formatting elements may itself
   // result in the insertion of new elements (which should be pushed onto the
-  // stack of open elements before the buffer is flushed).  We solve this (for
+  // stack of open elements before the buffer is flushed). We solve this (for
   // the time being, the spec has been rewritten for <template> and the new
   // version may be simpler here) with a boolean flag to this method.
   if (!is_reconstructing_formatting_elements) {
     maybe_flush_text_node_buffer(parser);
   }
   InsertionLocation location = get_appropriate_insertion_location(parser, NULL);
-  insert_node(parser, node, location);
-  gumbo_vector_add(parser, (void*) node, &state->_open_elements);
+  insert_node(node, location);
+  gumbo_vector_add((void*) node, &state->_open_elements);
 }
 
 // Convenience method that combines create_element_from_token and
 // insert_element, inserting the generated element directly into the current
-// node.  Returns the node inserted.
-static GumboNode* insert_element_from_token(
-    GumboParser* parser, GumboToken* token) {
-  GumboNode* element =
-      create_element_from_token(parser, token, GUMBO_NAMESPACE_HTML);
+// node. Returns the node inserted.
+static GumboNode* insert_element_from_token (
+  GumboParser* parser,
+  GumboToken* token
+) {
+  GumboNode* element = create_element_from_token(token, GUMBO_NAMESPACE_HTML);
   insert_element(parser, element, false);
-  gumbo_debug("Inserting <%s> element (@%x) from token.\n",
-      gumbo_normalized_tagname(element->v.element.tag), element);
+  gumbo_debug (
+    "Inserting <%s> element (@%p) from token.\n",
+    gumbo_normalized_tagname(element->v.element.tag),
+    (void*)element
+  );
   return element;
 }
 
 // Convenience method that combines create_element and insert_element, inserting
-// a parser-generated element of a specific tag type.  Returns the node
+// a parser-generated element of a specific tag type. Returns the node
 // inserted.
-static GumboNode* insert_element_of_tag_type(
-    GumboParser* parser, GumboTag tag, GumboParseFlags reason) {
+static GumboNode* insert_element_of_tag_type (
+  GumboParser* parser,
+  GumboTag tag,
+  GumboParseFlags reason
+) {
   GumboNode* element = create_element(parser, tag);
   element->parse_flags |= GUMBO_INSERTION_BY_PARSER | reason;
   insert_element(parser, element, false);
-  gumbo_debug("Inserting %s element (@%x) from tag type.\n",
-      gumbo_normalized_tagname(tag), element);
+  gumbo_debug (
+    "Inserting %s element (@%p) from tag type.\n",
+    gumbo_normalized_tagname(tag),
+    (void*)element
+  );
   return element;
 }
 
-// Convenience method for creating foreign namespaced element.  Returns the node
+// Convenience method for creating foreign namespaced element. Returns the node
 // inserted.
-static GumboNode* insert_foreign_element(
-    GumboParser* parser, GumboToken* token, GumboNamespaceEnum tag_namespace) {
+static GumboNode* insert_foreign_element (
+  GumboParser* parser,
+  GumboToken* token,
+  GumboNamespaceEnum tag_namespace
+) {
   assert(token->type == GUMBO_TOKEN_START_TAG);
-  GumboNode* element = create_element_from_token(parser, token, tag_namespace);
+  GumboNode* element = create_element_from_token(token, tag_namespace);
   insert_element(parser, element, false);
-  if (token_has_attribute(token, "xmlns") &&
-      !attribute_matches_case_sensitive(&token->v.start_tag.attributes, "xmlns",
-          kLegalXmlns[tag_namespace])) {
+  if (
+    token_has_attribute(token, "xmlns")
+    && !attribute_matches_case_sensitive (
+      &token->v.start_tag.attributes,
+      "xmlns",
+      kLegalXmlns[tag_namespace]
+    )
+  ) {
     // TODO(jdtang): Since there're multiple possible error codes here, we
     // eventually need reason codes to differentiate them.
     parser_add_parse_error(parser, token);
   }
-  if (token_has_attribute(token, "xmlns:xlink") &&
-      !attribute_matches_case_sensitive(&token->v.start_tag.attributes,
-          "xmlns:xlink", "http://www.w3.org/1999/xlink")) {
+  if (
+    token_has_attribute(token, "xmlns:xlink")
+    && !attribute_matches_case_sensitive (
+      &token->v.start_tag.attributes,
+      "xmlns:xlink",
+      "http://www.w3.org/1999/xlink"
+    )
+  ) {
     parser_add_parse_error(parser, token);
   }
   return element;
 }
 
 static void insert_text_token(GumboParser* parser, GumboToken* token) {
-  assert(token->type == GUMBO_TOKEN_WHITESPACE ||
-         token->type == GUMBO_TOKEN_CHARACTER ||
-         token->type == GUMBO_TOKEN_NULL || token->type == GUMBO_TOKEN_CDATA);
+  assert (
+    token->type == GUMBO_TOKEN_WHITESPACE
+    || token->type == GUMBO_TOKEN_CHARACTER
+    || token->type == GUMBO_TOKEN_NULL
+    || token->type == GUMBO_TOKEN_CDATA
+  );
   TextNodeBufferState* buffer_state = &parser->_parser_state->_text_node;
   if (buffer_state->_buffer.length == 0) {
     // Initialize position fields.
     buffer_state->_start_original_text = token->original_text.data;
     buffer_state->_start_position = token->position;
   }
-  gumbo_string_buffer_append_codepoint(
-      parser, token->v.character, &buffer_state->_buffer);
+  gumbo_string_buffer_append_codepoint (
+    token->v.character,
+    &buffer_state->_buffer
+  );
   if (token->type == GUMBO_TOKEN_CHARACTER) {
     buffer_state->_type = GUMBO_NODE_TEXT;
   } else if (token->type == GUMBO_TOKEN_CDATA) {
@@ -1132,14 +1210,17 @@ static void insert_text_token(GumboParser* parser, GumboToken* token) {
   gumbo_debug("Inserting text token '%c'.\n", token->v.character);
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#generic-rcdata-element-parsing-algorithm
-static void run_generic_parsing_algorithm(
-    GumboParser* parser, GumboToken* token, GumboTokenizerEnum lexer_state) {
+// https://html.spec.whatwg.org/multipage/parsing.html#generic-rcdata-element-parsing-algorithm
+static void run_generic_parsing_algorithm (
+  GumboParser* parser,
+  GumboToken* token,
+  GumboTokenizerEnum lexer_state
+) {
   insert_element_from_token(parser, token);
   gumbo_tokenizer_set_state(parser, lexer_state);
-  parser->_parser_state->_original_insertion_mode =
-      parser->_parser_state->_insertion_mode;
-  parser->_parser_state->_insertion_mode = GUMBO_INSERTION_MODE_TEXT;
+  GumboParserState* parser_state = parser->_parser_state;
+  parser_state->_original_insertion_mode = parser_state->_insertion_mode;
+  parser_state->_insertion_mode = GUMBO_INSERTION_MODE_TEXT;
 }
 
 static void acknowledge_self_closing_tag(GumboParser* parser) {
@@ -1165,10 +1246,13 @@ static bool find_last_anchor_index(GumboParser* parser, int* anchor_index) {
 
 // Counts the number of open formatting elements in the list of active
 // formatting elements (after the last active scope marker) that have a specific
-// tag.  If this is > 0, then earliest_matching_index will be filled in with the
+// tag. If this is > 0, then earliest_matching_index will be filled in with the
 // index of the first such element.
-static int count_formatting_elements_of_tag(GumboParser* parser,
-    const GumboNode* desired_node, int* earliest_matching_index) {
+static int count_formatting_elements_of_tag (
+  GumboParser* parser,
+  const GumboNode* desired_node,
+  int* earliest_matching_index
+) {
   const GumboElement* desired_element = &desired_node->v.element;
   GumboVector* elements = &parser->_parser_state->_active_formatting_elements;
   int num_identical_elements = 0;
@@ -1178,10 +1262,10 @@ static int count_formatting_elements_of_tag(GumboParser* parser,
       break;
     }
     assert(node->type == GUMBO_NODE_ELEMENT);
-    if (node_qualified_tag_is(
-            node, desired_element->tag_namespace, desired_element->tag) &&
-        all_attributes_match(
-            &node->v.element.attributes, &desired_element->attributes)) {
+    if (
+      node_qualified_tag_is(node, desired_element->tag_namespace, desired_element->tag)
+      && all_attributes_match(&node->v.element.attributes, &desired_element->attributes)
+    ) {
       num_identical_elements++;
       *earliest_matching_index = i;
     }
@@ -1189,10 +1273,12 @@ static int count_formatting_elements_of_tag(GumboParser* parser,
   return num_identical_elements;
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete/parsing.html#reconstruct-the-active-formatting-elements
+// https://html.spec.whatwg.org/multipage/parsing.html#reconstruct-the-active-formatting-elements
 static void add_formatting_element(GumboParser* parser, const GumboNode* node) {
-  assert(node == &kActiveFormattingScopeMarker ||
-         node->type == GUMBO_NODE_ELEMENT);
+  assert (
+    node == &kActiveFormattingScopeMarker
+    || node->type == GUMBO_NODE_ELEMENT
+  );
   GumboVector* elements = &parser->_parser_state->_active_formatting_elements;
   if (node == &kActiveFormattingScopeMarker) {
     gumbo_debug("Adding a scope marker.\n");
@@ -1202,21 +1288,26 @@ static void add_formatting_element(GumboParser* parser, const GumboNode* node) {
 
   // Hunt for identical elements.
   int earliest_identical_element = elements->length;
-  int num_identical_elements = count_formatting_elements_of_tag(
-      parser, node, &earliest_identical_element);
+  int num_identical_elements = count_formatting_elements_of_tag (
+    parser,
+    node,
+    &earliest_identical_element
+  );
 
   // Noah's Ark clause: if there're at least 3, remove the earliest.
   if (num_identical_elements >= 3) {
-    gumbo_debug("Noah's ark clause: removing element at %d.\n",
-        earliest_identical_element);
-    gumbo_vector_remove_at(parser, earliest_identical_element, elements);
+    gumbo_debug (
+      "Noah's ark clause: removing element at %d.\n",
+      earliest_identical_element
+    );
+    gumbo_vector_remove_at(earliest_identical_element, elements);
   }
 
-  gumbo_vector_add(parser, (void*) node, elements);
+  gumbo_vector_add((void*) node, elements);
 }
 
-static bool is_open_element(GumboParser* parser, const GumboNode* node) {
-  GumboVector* open_elements = &parser->_parser_state->_open_elements;
+static bool is_open_element(const GumboParser* parser, const GumboNode* node) {
+  const GumboVector* open_elements = &parser->_parser_state->_open_elements;
   for (unsigned int i = 0; i < open_elements->length; ++i) {
     if (open_elements->data[i] == node) {
       return true;
@@ -1225,13 +1316,15 @@ static bool is_open_element(GumboParser* parser, const GumboNode* node) {
   return false;
 }
 
-// Clones attributes, tags, etc. of a node, but does not copy the content.  The
+// Clones attributes, tags, etc. of a node, but does not copy the content. The
 // clone shares no structure with the original node: all owned strings and
 // values are fresh copies.
-GumboNode* clone_node(
-    GumboParser* parser, GumboNode* node, GumboParseFlags reason) {
+static GumboNode* clone_node (
+  GumboNode* node,
+  GumboParseFlags reason
+) {
   assert(node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
-  GumboNode* new_node = gumbo_parser_allocate(parser, sizeof(GumboNode));
+  GumboNode* new_node = gumbo_alloc(sizeof(GumboNode));
   *new_node = *node;
   new_node->parent = NULL;
   new_node->index_within_parent = -1;
@@ -1240,26 +1333,25 @@ GumboNode* clone_node(
   new_node->parse_flags &= ~GUMBO_INSERTION_IMPLICIT_END_TAG;
   new_node->parse_flags |= reason | GUMBO_INSERTION_BY_PARSER;
   GumboElement* element = &new_node->v.element;
-  gumbo_vector_init(parser, 1, &element->children);
+  gumbo_vector_init(1, &element->children);
 
   const GumboVector* old_attributes = &node->v.element.attributes;
-  gumbo_vector_init(parser, old_attributes->length, &element->attributes);
+  gumbo_vector_init(old_attributes->length, &element->attributes);
   for (unsigned int i = 0; i < old_attributes->length; ++i) {
     const GumboAttribute* old_attr = old_attributes->data[i];
-    GumboAttribute* attr =
-        gumbo_parser_allocate(parser, sizeof(GumboAttribute));
+    GumboAttribute* attr = gumbo_alloc(sizeof(GumboAttribute));
     *attr = *old_attr;
-    attr->name = gumbo_copy_stringz(parser, old_attr->name);
-    attr->value = gumbo_copy_stringz(parser, old_attr->value);
-    gumbo_vector_add(parser, attr, &element->attributes);
+    attr->name = gumbo_strdup(old_attr->name);
+    attr->value = gumbo_strdup(old_attr->value);
+    gumbo_vector_add(attr, &element->attributes);
   }
   return new_node;
 }
 
 // "Reconstruct active formatting elements" part of the spec.
-// This implementation is based on the html5lib translation from the mess of
-// GOTOs in the spec to reasonably structured programming.
-// http://code.google.com/p/html5lib/source/browse/python/html5lib/treebuilders/_base.py
+// This implementation is based on the html5lib translation from the
+// mess of GOTOs in the spec to reasonably structured programming.
+// https://github.com/html5lib/html5lib-python/blob/master/html5lib/treebuilders/base.py
 static void reconstruct_active_formatting_elements(GumboParser* parser) {
   GumboVector* elements = &parser->_parser_state->_active_formatting_elements;
   // Step 1
@@ -1270,8 +1362,10 @@ static void reconstruct_active_formatting_elements(GumboParser* parser) {
   // Step 2 & 3
   unsigned int i = elements->length - 1;
   GumboNode* element = elements->data[i];
-  if (element == &kActiveFormattingScopeMarker ||
-      is_open_element(parser, element)) {
+  if (
+    element == &kActiveFormattingScopeMarker
+    || is_open_element(parser, element)
+  ) {
     return;
   }
 
@@ -1284,31 +1378,43 @@ static void reconstruct_active_formatting_elements(GumboParser* parser) {
     }
     // Step 5
     element = elements->data[--i];
-  } while (element != &kActiveFormattingScopeMarker &&
-           !is_open_element(parser, element));
+  } while (
+    element != &kActiveFormattingScopeMarker
+    && !is_open_element(parser, element)
+  );
 
   ++i;
-  gumbo_debug("Reconstructing elements from %d on %s parent.\n", i,
-      gumbo_normalized_tagname(get_current_node(parser)->v.element.tag));
+  gumbo_debug (
+    "Reconstructing elements from %u on %s parent.\n",
+    i,
+    gumbo_normalized_tagname(get_current_node(parser)->v.element.tag)
+  );
   for (; i < elements->length; ++i) {
     // Step 7 & 8.
     assert(elements->length > 0);
     assert(i < elements->length);
     element = elements->data[i];
     assert(element != &kActiveFormattingScopeMarker);
-    GumboNode* clone = clone_node(
-        parser, element, GUMBO_INSERTION_RECONSTRUCTED_FORMATTING_ELEMENT);
+    GumboNode* clone = clone_node (
+      element,
+      GUMBO_INSERTION_RECONSTRUCTED_FORMATTING_ELEMENT
+    );
     // Step 9.
     InsertionLocation location =
         get_appropriate_insertion_location(parser, NULL);
-    insert_node(parser, clone, location);
-    gumbo_vector_add(
-        parser, (void*) clone, &parser->_parser_state->_open_elements);
+    insert_node(clone, location);
+    gumbo_vector_add (
+      (void*) clone,
+      &parser->_parser_state->_open_elements
+    );
 
     // Step 10.
     elements->data[i] = clone;
-    gumbo_debug("Reconstructed %s element at %d.\n",
-        gumbo_normalized_tagname(clone->v.element.tag), i);
+    gumbo_debug (
+      "Reconstructed %s element at %u.\n",
+      gumbo_normalized_tagname(clone->v.element.tag),
+      i
+    );
   }
 }
 
@@ -1317,109 +1423,150 @@ static void clear_active_formatting_elements(GumboParser* parser) {
   int num_elements_cleared = 0;
   const GumboNode* node;
   do {
-    node = gumbo_vector_pop(parser, elements);
+    node = gumbo_vector_pop(elements);
     ++num_elements_cleared;
   } while (node && node != &kActiveFormattingScopeMarker);
-  gumbo_debug("Cleared %d elements from active formatting list.\n",
-      num_elements_cleared);
+  gumbo_debug (
+    "Cleared %d elements from active formatting list.\n",
+    num_elements_cleared
+  );
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#the-initial-insertion-mode
-static GumboQuirksModeEnum compute_quirks_mode(
-    const GumboTokenDocType* doctype) {
-  if (doctype->force_quirks || strcmp(doctype->name, kDoctypeHtml.data) ||
-      is_in_static_list(
-          doctype->public_identifier, kQuirksModePublicIdPrefixes, false) ||
-      is_in_static_list(
-          doctype->public_identifier, kQuirksModePublicIdExactMatches, true) ||
-      is_in_static_list(
-          doctype->system_identifier, kQuirksModeSystemIdExactMatches, true) ||
-      (is_in_static_list(doctype->public_identifier,
-           kLimitedQuirksRequiresSystemIdPublicIdPrefixes, false) &&
-          !doctype->has_system_identifier)) {
+// https://html.spec.whatwg.org/multipage/parsing.html#the-initial-insertion-mode
+static GumboQuirksModeEnum compute_quirks_mode(const GumboTokenDocType* doctype) {
+  const char *const pubid = doctype->public_identifier;
+  const char *const sysid = doctype->system_identifier;
+
+  if (
+    doctype->force_quirks
+    || strcmp(doctype->name, "html")
+    || is_in_static_list(pubid, kQuirksModePublicIdPrefixes, false)
+    || is_in_static_list(pubid, kQuirksModePublicIdExactMatches, true)
+    || is_in_static_list(sysid, kQuirksModeSystemIdExactMatches, true)
+    || (
+      !doctype->has_system_identifier
+      && is_in_static_list(pubid, kSystemIdDependentPublicIdPrefixes, false)
+    )
+  ) {
     return GUMBO_DOCTYPE_QUIRKS;
-  } else if (is_in_static_list(doctype->public_identifier,
-                 kLimitedQuirksPublicIdPrefixes, false) ||
-             (is_in_static_list(doctype->public_identifier,
-                  kLimitedQuirksRequiresSystemIdPublicIdPrefixes, false) &&
-                 doctype->has_system_identifier)) {
+  }
+
+  if (
+    is_in_static_list(pubid, kLimitedQuirksPublicIdPrefixes, false)
+    || (
+      doctype->has_system_identifier
+      && is_in_static_list(pubid, kSystemIdDependentPublicIdPrefixes, false)
+    )
+  ) {
     return GUMBO_DOCTYPE_LIMITED_QUIRKS;
   }
+
   return GUMBO_DOCTYPE_NO_QUIRKS;
 }
 
 // The following functions are all defined by the "has an element in __ scope"
 // sections of the HTML5 spec:
-// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-the-specific-scope
+// https://html.spec.whatwg.org/multipage/parsing.html#has-an-element-in-the-specific-scope
 // The basic idea behind them is that they check for an element of the given
 // qualified name, contained within a scope formed by a set of other qualified
-// names.  For example, "has an element in list scope" looks for an element of
+// names. For example, "has an element in list scope" looks for an element of
 // the given qualified name within the nearest enclosing <ol> or <ul>, along
 // with a bunch of generic element types that serve to "firewall" their content
 // from the rest of the document. Note that because of the way the spec is
 // written,
 // all elements are expected to be in the HTML namespace
-static bool has_an_element_in_specific_scope(GumboParser* parser,
-    int expected_size, const GumboTag* expected, bool negate,
-    const gumbo_tagset tags) {
-  GumboVector* open_elements = &parser->_parser_state->_open_elements;
+static bool has_an_element_in_specific_scope (
+  const GumboParser* parser,
+  int expected_size,
+  const GumboTag* expected,
+  bool negate,
+  const TagSet* tags
+) {
+  const GumboVector* open_elements = &parser->_parser_state->_open_elements;
   for (int i = open_elements->length; --i >= 0;) {
     const GumboNode* node = open_elements->data[i];
-    if (node->type != GUMBO_NODE_ELEMENT && node->type != GUMBO_NODE_TEMPLATE)
+    if (node->type != GUMBO_NODE_ELEMENT && node->type != GUMBO_NODE_TEMPLATE) {
       continue;
+    }
 
     GumboTag node_tag = node->v.element.tag;
     GumboNamespaceEnum node_ns = node->v.element.tag_namespace;
     for (int j = 0; j < expected_size; ++j) {
-      if (node_tag == expected[j] && node_ns == GUMBO_NAMESPACE_HTML)
+      if (node_tag == expected[j] && node_ns == GUMBO_NAMESPACE_HTML) {
         return true;
+      }
     }
 
-    bool found = TAGSET_INCLUDES(tags, node_ns, node_tag);
-    if (negate != found) return false;
+    bool found = tagset_includes(tags, node_ns, node_tag);
+    if (negate != found) {
+      return false;
+    }
   }
   return false;
 }
 
 // Checks for the presence of an open element of the specified tag type.
-static bool has_open_element(GumboParser* parser, GumboTag tag) {
-  return has_an_element_in_specific_scope(
-      parser, 1, &tag, false, (gumbo_tagset){TAG(HTML)});
+static bool has_open_element(const GumboParser* parser, GumboTag tag) {
+  static const TagSet tags = {TAG(HTML)};
+  return has_an_element_in_specific_scope(parser, 1, &tag, false, &tags);
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-scope
-static bool has_an_element_in_scope(GumboParser* parser, GumboTag tag) {
-  return has_an_element_in_specific_scope(parser, 1, &tag, false,
-      (gumbo_tagset){TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD),
-          TAG(TH), TAG(MARQUEE), TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI),
-          TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS), TAG_MATHML(MTEXT),
-          TAG_MATHML(ANNOTATION_XML), TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC),
-          TAG_SVG(TITLE)});
+// https://html.spec.whatwg.org/multipage/parsing.html#has-an-element-in-scope
+#define DEFAULT_SCOPE_TAGS \
+  TAG(APPLET), \
+  TAG(CAPTION), \
+  TAG(HTML), \
+  TAG(TABLE), \
+  TAG(TD), \
+  TAG(TH), \
+  TAG(MARQUEE), \
+  TAG(OBJECT), \
+  TAG(TEMPLATE), \
+  TAG_MATHML(MI), \
+  TAG_MATHML(MO), \
+  TAG_MATHML(MN), \
+  TAG_MATHML(MS), \
+  TAG_MATHML(MTEXT), \
+  TAG_MATHML(ANNOTATION_XML), \
+  TAG_SVG(FOREIGNOBJECT), \
+  TAG_SVG(DESC), \
+  TAG_SVG(TITLE)
+
+static const TagSet heading_tags = {
+  TAG(H1), TAG(H2), TAG(H3), TAG(H4), TAG(H5), TAG(H6)
+};
+
+static const TagSet td_th_tags = {
+  TAG(TD), TAG(TH)
+};
+
+static const TagSet dd_dt_tags = {
+  TAG(DD), TAG(DT)
+};
+
+// https://html.spec.whatwg.org/multipage/parsing.html#has-an-element-in-scope
+static bool has_an_element_in_scope(const GumboParser* parser, GumboTag tag) {
+  static const TagSet tags = {DEFAULT_SCOPE_TAGS};
+  return has_an_element_in_specific_scope(parser, 1, &tag, false, &tags);
 }
 
 // Like "has an element in scope", but for the specific case of looking for a
-// unique target node, not for any node with a given tag name.  This duplicates
+// unique target node, not for any node with a given tag name. This duplicates
 // much of the algorithm from has_an_element_in_specific_scope because the
 // predicate is different when checking for an exact node, and it's easier &
 // faster just to duplicate the code for this one case than to try and
 // parameterize it.
-static bool has_node_in_scope(GumboParser* parser, const GumboNode* node) {
-  GumboVector* open_elements = &parser->_parser_state->_open_elements;
+static bool has_node_in_scope(const GumboParser* parser, const GumboNode* node) {
+  static const TagSet tags = {DEFAULT_SCOPE_TAGS};
+  const GumboVector* open_elements = &parser->_parser_state->_open_elements;
   for (int i = open_elements->length; --i >= 0;) {
     const GumboNode* current = open_elements->data[i];
+    const GumboNodeType type = current->type;
     if (current == node) {
       return true;
-    }
-    if (current->type != GUMBO_NODE_ELEMENT &&
-        current->type != GUMBO_NODE_TEMPLATE) {
+    } else if (type != GUMBO_NODE_ELEMENT && type != GUMBO_NODE_TEMPLATE) {
       continue;
-    }
-    if (node_tag_in_set(current,
-            (gumbo_tagset){TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE),
-                TAG(TD), TAG(TH), TAG(MARQUEE), TAG(OBJECT), TAG(TEMPLATE),
-                TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS),
-                TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
-                TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE)})) {
+    } else if (node_tag_in_set(current, &tags)) {
       return false;
     }
   }
@@ -1429,76 +1576,72 @@ static bool has_node_in_scope(GumboParser* parser, const GumboNode* node) {
 
 // Like has_an_element_in_scope, but restricts the expected qualified name to a
 // range of possible qualified names instead of just a single one.
-static bool has_an_element_in_scope_with_tagname(
-    GumboParser* parser, int expected_len, const GumboTag expected[]) {
-  return has_an_element_in_specific_scope(parser, expected_len, expected, false,
-      (gumbo_tagset){TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD),
-          TAG(TH), TAG(MARQUEE), TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI),
-          TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS), TAG_MATHML(MTEXT),
-          TAG_MATHML(ANNOTATION_XML), TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC),
-          TAG_SVG(TITLE)});
+static bool has_an_element_in_scope_with_tagname (
+  const GumboParser* parser,
+  int len,
+  const GumboTag expected[]
+) {
+  static const TagSet tags = {DEFAULT_SCOPE_TAGS};
+  return has_an_element_in_specific_scope(parser, len, expected, false, &tags);
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-list-item-scope
-static bool has_an_element_in_list_scope(GumboParser* parser, GumboTag tag) {
-  return has_an_element_in_specific_scope(parser, 1, &tag, false,
-      (gumbo_tagset){TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD),
-          TAG(TH), TAG(MARQUEE), TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI),
-          TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS), TAG_MATHML(MTEXT),
-          TAG_MATHML(ANNOTATION_XML), TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC),
-          TAG_SVG(TITLE), TAG(OL), TAG(UL)});
+// https://html.spec.whatwg.org/multipage/parsing.html#has-an-element-in-list-item-scope
+static bool has_an_element_in_list_scope(const GumboParser* parser, GumboTag tag) {
+  static const TagSet tags = {DEFAULT_SCOPE_TAGS, TAG(OL), TAG(UL)};
+  return has_an_element_in_specific_scope(parser, 1, &tag, false, &tags);
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-button-scope
-static bool has_an_element_in_button_scope(GumboParser* parser, GumboTag tag) {
-  return has_an_element_in_specific_scope(parser, 1, &tag, false,
-      (gumbo_tagset){TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD),
-          TAG(TH), TAG(MARQUEE), TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI),
-          TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS), TAG_MATHML(MTEXT),
-          TAG_MATHML(ANNOTATION_XML), TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC),
-          TAG_SVG(TITLE), TAG(BUTTON)});
+// https://html.spec.whatwg.org/multipage/parsing.html#has-an-element-in-button-scope
+static bool has_an_element_in_button_scope(const GumboParser* parser, GumboTag tag) {
+  static const TagSet tags = {DEFAULT_SCOPE_TAGS, TAG(BUTTON)};
+  return has_an_element_in_specific_scope(parser, 1, &tag, false, &tags);
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-table-scope
-static bool has_an_element_in_table_scope(GumboParser* parser, GumboTag tag) {
-  return has_an_element_in_specific_scope(parser, 1, &tag, false,
-      (gumbo_tagset){TAG(HTML), TAG(TABLE), TAG(TEMPLATE)});
+// https://html.spec.whatwg.org/multipage/parsing.html#has-an-element-in-table-scope
+static bool has_an_element_in_table_scope(const GumboParser* parser, GumboTag tag) {
+  static const TagSet tags = {TAG(HTML), TAG(TABLE), TAG(TEMPLATE)};
+  return has_an_element_in_specific_scope(parser, 1, &tag, false, &tags);
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-select-scope
-static bool has_an_element_in_select_scope(GumboParser* parser, GumboTag tag) {
-  return has_an_element_in_specific_scope(
-      parser, 1, &tag, true, (gumbo_tagset){TAG(OPTGROUP), TAG(OPTION)});
+// https://html.spec.whatwg.org/multipage/parsing.html#has-an-element-in-select-scope
+static bool has_an_element_in_select_scope(const GumboParser* parser, GumboTag tag) {
+  static const TagSet tags = {TAG(OPTGROUP), TAG(OPTION)};
+  return has_an_element_in_specific_scope(parser, 1, &tag, true, &tags);
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#generate-implied-end-tags
+// https://html.spec.whatwg.org/multipage/parsing.html#generate-implied-end-tags
 // "exception" is the "element to exclude from the process" listed in the spec.
 // Pass GUMBO_TAG_LAST to not exclude any of them.
 static void generate_implied_end_tags(GumboParser* parser, GumboTag exception) {
-  for (; node_tag_in_set(get_current_node(parser),
-             (gumbo_tagset){TAG(DD), TAG(DT), TAG(LI), TAG(OPTION),
-                 TAG(OPTGROUP), TAG(P), TAG(RP), TAG(RB), TAG(RT), TAG(RTC)}) &&
-         !node_html_tag_is(get_current_node(parser), exception);
-       pop_current_node(parser))
-    ;
+  static const TagSet tags = {
+    TAG(DD), TAG(DT), TAG(LI), TAG(OPTION), TAG(OPTGROUP),
+    TAG(P), TAG(RP), TAG(RB), TAG(RT), TAG(RTC)
+  };
+  while (
+    node_tag_in_set(get_current_node(parser), &tags)
+    && !node_html_tag_is(get_current_node(parser), exception)
+  ) {
+    pop_current_node(parser);
+  }
 }
 
 // This is the "generate all implied end tags thoroughly" clause of the spec.
-// https://html.spec.whatwg.org/multipage/syntax.html#closing-elements-that-have-implied-end-tags
+// https://html.spec.whatwg.org/multipage/parsing.html#closing-elements-that-have-implied-end-tags
 static void generate_all_implied_end_tags_thoroughly(GumboParser* parser) {
-  for (
-      ; node_tag_in_set(get_current_node(parser),
-          (gumbo_tagset){TAG(CAPTION), TAG(COLGROUP), TAG(DD), TAG(DT), TAG(LI),
-              TAG(OPTION), TAG(OPTGROUP), TAG(P), TAG(RP), TAG(RT), TAG(RTC),
-              TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(HEAD), TAG(TR)});
-      pop_current_node(parser))
-    ;
+  static const TagSet tags = {
+    TAG(CAPTION), TAG(COLGROUP), TAG(DD), TAG(DT), TAG(LI), TAG(OPTION),
+    TAG(OPTGROUP), TAG(P), TAG(RP), TAG(RT), TAG(RTC), TAG(TBODY),
+    TAG(TD), TAG(TFOOT), TAG(TH), TAG(HEAD), TAG(TR)
+  };
+  while (node_tag_in_set(get_current_node(parser), &tags)) {
+    pop_current_node(parser);
+  }
 }
 
 // This factors out the clauses relating to "act as if an end tag token with tag
-// name "table" had been seen.  Returns true if there's a table element in table
+// name "table" had been seen. Returns true if there's a table element in table
 // scope which was successfully closed, false if not and the token should be
-// ignored.  Does not add parse errors; callers should handle that.
+// ignored. Does not add parse errors; callers should handle that.
 static bool close_table(GumboParser* parser) {
   if (!has_an_element_in_table_scope(parser, GUMBO_TAG_TABLE)) {
     return false;
@@ -1514,8 +1657,11 @@ static bool close_table(GumboParser* parser) {
 
 // This factors out the clauses relating to "act as if an end tag token with tag
 // name `cell_tag` had been seen".
-static bool close_table_cell(
-    GumboParser* parser, const GumboToken* token, GumboTag cell_tag) {
+static bool close_table_cell (
+  GumboParser* parser,
+  const GumboToken* token,
+  GumboTag cell_tag
+) {
   bool result = true;
   generate_implied_end_tags(parser, GUMBO_TAG_LAST);
   const GumboNode* node = get_current_node(parser);
@@ -1532,7 +1678,7 @@ static bool close_table_cell(
   return result;
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#close-the-cell
+// https://html.spec.whatwg.org/multipage/parsing.html#close-the-cell
 // This holds the logic to determine whether we should close a <td> or a <th>.
 static bool close_current_cell(GumboParser* parser, const GumboToken* token) {
   if (has_an_element_in_table_scope(parser, GUMBO_TAG_TD)) {
@@ -1545,7 +1691,7 @@ static bool close_current_cell(GumboParser* parser, const GumboToken* token) {
 }
 
 // This factors out the "act as if an end tag of tag name 'select' had been
-// seen" clause of the spec, since it's referenced in several places.  It pops
+// seen" clause of the spec, since it's referenced in several places. It pops
 // all nodes from the stack until the current <select> has been closed, then
 // resets the insertion mode appropriately.
 static void close_current_select(GumboParser* parser) {
@@ -1557,45 +1703,60 @@ static void close_current_select(GumboParser* parser) {
 }
 
 // The list of nodes in the "special" category:
-// http://www.whatwg.org/specs/web-apps/current-work/complete/parsing.html#special
+// https://html.spec.whatwg.org/multipage/parsing.html#special
 static bool is_special_node(const GumboNode* node) {
   assert(node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
-  return node_tag_in_set(node,
-      (gumbo_tagset){TAG(ADDRESS), TAG(APPLET), TAG(AREA), TAG(ARTICLE),
-          TAG(ASIDE), TAG(BASE), TAG(BASEFONT), TAG(BGSOUND), TAG(BLOCKQUOTE),
-          TAG(BODY), TAG(BR), TAG(BUTTON), TAG(CAPTION), TAG(CENTER), TAG(COL),
-          TAG(COLGROUP), TAG(MENUITEM), TAG(DD), TAG(DETAILS), TAG(DIR),
-          TAG(DIV), TAG(DL), TAG(DT), TAG(EMBED), TAG(FIELDSET),
-          TAG(FIGCAPTION), TAG(FIGURE), TAG(FOOTER), TAG(FORM), TAG(FRAME),
-          TAG(FRAMESET), TAG(H1), TAG(H2), TAG(H3), TAG(H4), TAG(H5), TAG(H6),
-          TAG(HEAD), TAG(HEADER), TAG(HGROUP), TAG(HR), TAG(HTML), TAG(IFRAME),
-          TAG(IMG), TAG(INPUT), TAG(ISINDEX), TAG(LI), TAG(LINK), TAG(LISTING),
-          TAG(MARQUEE), TAG(MENU), TAG(META), TAG(NAV), TAG(NOEMBED),
-          TAG(NOFRAMES), TAG(NOSCRIPT), TAG(OBJECT), TAG(OL), TAG(P),
-          TAG(PARAM), TAG(PLAINTEXT), TAG(PRE), TAG(SCRIPT), TAG(SECTION),
-          TAG(SELECT), TAG(STYLE), TAG(SUMMARY), TAG(TABLE), TAG(TBODY),
-          TAG(TD), TAG(TEMPLATE), TAG(TEXTAREA), TAG(TFOOT), TAG(TH),
-          TAG(THEAD), TAG(TITLE), TAG(TR), TAG(UL), TAG(WBR), TAG(XMP),
-
-          TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS),
-          TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
-
-          TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC)});
+  return node_tag_in_set(node, &(const TagSet) {
+      TAG(ADDRESS), TAG(APPLET), TAG(AREA), TAG(ARTICLE),
+      TAG(ASIDE), TAG(BASE), TAG(BASEFONT), TAG(BGSOUND), TAG(BLOCKQUOTE),
+      TAG(BODY), TAG(BR), TAG(BUTTON), TAG(CAPTION), TAG(CENTER), TAG(COL),
+      TAG(COLGROUP), TAG(DD), TAG(DETAILS), TAG(DIR),
+      TAG(DIV), TAG(DL), TAG(DT), TAG(EMBED), TAG(FIELDSET),
+      TAG(FIGCAPTION), TAG(FIGURE), TAG(FOOTER), TAG(FORM), TAG(FRAME),
+      TAG(FRAMESET), TAG(H1), TAG(H2), TAG(H3), TAG(H4), TAG(H5), TAG(H6),
+      TAG(HEAD), TAG(HEADER), TAG(HGROUP), TAG(HR), TAG(HTML), TAG(IFRAME),
+      TAG(IMG), TAG(INPUT), TAG(LI), TAG(LINK), TAG(LISTING),
+      TAG(MARQUEE), TAG(MENU), TAG(META), TAG(NAV), TAG(NOEMBED),
+      TAG(NOFRAMES), TAG(NOSCRIPT), TAG(OBJECT), TAG(OL), TAG(P),
+      TAG(PARAM), TAG(PLAINTEXT), TAG(PRE), TAG(SCRIPT), TAG(SECTION),
+      TAG(SELECT), TAG(STYLE), TAG(SUMMARY), TAG(TABLE), TAG(TBODY),
+      TAG(TD), TAG(TEMPLATE), TAG(TEXTAREA), TAG(TFOOT), TAG(TH),
+      TAG(THEAD), TAG(TR), TAG(UL), TAG(WBR), TAG(XMP),
+
+      TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS),
+      TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
+
+      TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC),
+
+      // This TagSet needs to include the "title" element in both the
+      // HTML and SVG namespaces. Using both TAG(TITLE) and TAG_SVG(TITLE)
+      // won't work, due to the simplistic way in which the TAG macros are
+      // implemented, so we do it like this instead:
+      [GUMBO_TAG_TITLE] =
+          (1 << GUMBO_NAMESPACE_HTML) |
+          (1 << GUMBO_NAMESPACE_SVG)
+    }
+  );
 }
 
 // Implicitly closes currently open elements until it reaches an element with
 // the
-// specified qualified name.  If the elements closed are in the set handled by
+// specified qualified name. If the elements closed are in the set handled by
 // generate_implied_end_tags, this is normal operation and this function returns
-// true.  Otherwise, a parse error is recorded and this function returns false.
-static bool implicitly_close_tags(GumboParser* parser, GumboToken* token,
-    GumboNamespaceEnum target_ns, GumboTag target) {
+// true. Otherwise, a parse error is recorded and this function returns false.
+static bool implicitly_close_tags (
+  GumboParser* parser,
+  GumboToken* token,
+  GumboNamespaceEnum target_ns,
+  GumboTag target
+) {
   bool result = true;
   generate_implied_end_tags(parser, target);
   if (!node_qualified_tag_is(get_current_node(parser), target_ns, target)) {
     parser_add_parse_error(parser, token);
     while (
-        !node_qualified_tag_is(get_current_node(parser), target_ns, target)) {
+      !node_qualified_tag_is(get_current_node(parser), target_ns, target)
+    ) {
       pop_current_node(parser);
     }
     result = false;
@@ -1606,44 +1767,61 @@ static bool implicitly_close_tags(GumboParser* parser, GumboToken* token,
 }
 
 // If the stack of open elements has a  tag in button scope, this acts as if
-// a  tag was encountered, implicitly closing tags.  Returns false if a
-// parse error occurs.  This is a convenience function because this particular
+// a  tag was encountered, implicitly closing tags. Returns false if a
+// parse error occurs. This is a convenience function because this particular
 // clause appears several times in the spec.
-static bool maybe_implicitly_close_p_tag(
-    GumboParser* parser, GumboToken* token) {
+static bool maybe_implicitly_close_p_tag (
+  GumboParser* parser,
+  GumboToken* token
+) {
   if (has_an_element_in_button_scope(parser, GUMBO_TAG_P)) {
-    return implicitly_close_tags(
-        parser, token, GUMBO_NAMESPACE_HTML, GUMBO_TAG_P);
+    return implicitly_close_tags (
+      parser,
+      token,
+      GUMBO_NAMESPACE_HTML,
+      GUMBO_TAG_P
+    );
   }
   return true;
 }
 
 // Convenience function to encapsulate the logic for closing <li> or <dd>/<dt>
-// tags.  Pass true to is_li for handling <li> tags, false for <dd> and <dt>.
-static void maybe_implicitly_close_list_tag(
-    GumboParser* parser, GumboToken* token, bool is_li) {
+// tags. Pass true to is_li for handling <li> tags, false for <dd> and <dt>.
+static void maybe_implicitly_close_list_tag (
+  GumboParser* parser,
+  GumboToken* token,
+  bool is_li
+) {
   GumboParserState* state = parser->_parser_state;
   state->_frameset_ok = false;
   for (int i = state->_open_elements.length; --i >= 0;) {
     const GumboNode* node = state->_open_elements.data[i];
-    bool is_list_tag =
-        is_li ? node_html_tag_is(node, GUMBO_TAG_LI)
-              : node_tag_in_set(node, (gumbo_tagset){TAG(DD), TAG(DT)});
+    bool is_list_tag = is_li
+      ? node_html_tag_is(node, GUMBO_TAG_LI)
+      : node_tag_in_set(node, &dd_dt_tags)
+    ;
     if (is_list_tag) {
-      implicitly_close_tags(
-          parser, token, node->v.element.tag_namespace, node->v.element.tag);
+      implicitly_close_tags (
+        parser,
+        token,
+        node->v.element.tag_namespace,
+        node->v.element.tag
+      );
       return;
     }
-    if (is_special_node(node) &&
-        !node_tag_in_set(
-            node, (gumbo_tagset){TAG(ADDRESS), TAG(DIV), TAG(P)})) {
+    if (
+      is_special_node(node)
+      && !node_tag_in_set(node, &(const TagSet){TAG(ADDRESS), TAG(DIV), TAG(P)})
+    ) {
       return;
     }
   }
 }
 
-static void merge_attributes(
-    GumboParser* parser, GumboToken* token, GumboNode* node) {
+static void merge_attributes (
+  GumboToken* token,
+  GumboNode* node
+) {
   assert(token->type == GUMBO_TOKEN_START_TAG);
   assert(node->type == GUMBO_NODE_ELEMENT);
   const GumboVector* token_attr = &token->v.start_tag.attributes;
@@ -1655,15 +1833,15 @@ static void merge_attributes(
       // Ownership of the attribute is transferred by this gumbo_vector_add,
       // so it has to be nulled out of the original token so it doesn't get
       // double-deleted.
-      gumbo_vector_add(parser, attr, node_attr);
+      gumbo_vector_add(attr, node_attr);
       token_attr->data[i] = NULL;
     }
   }
   // When attributes are merged, it means the token has been ignored and merged
-  // with another token, so we need to free its memory.  The attributes that are
+  // with another token, so we need to free its memory. The attributes that are
   // transferred need to be nulled-out in the vector above so that they aren't
   // double-deleted.
-  gumbo_token_destroy(parser, token);
+  gumbo_token_destroy(token);
 
 #ifndef NDEBUG
   // Mark this sentinel so the assertion in the main loop knows it's been
@@ -1673,80 +1851,107 @@ static void merge_attributes(
 }
 
 const char* gumbo_normalize_svg_tagname(const GumboStringPiece* tag) {
-  for (size_t i = 0; i < sizeof(kSvgTagReplacements) / sizeof(ReplacementEntry);
-       ++i) {
-    const ReplacementEntry* entry = &kSvgTagReplacements[i];
-    if (gumbo_string_equals_ignore_case(tag, &entry->from)) {
-      return entry->to.data;
-    }
-  }
-  return NULL;
+  const StringReplacement *replacement = gumbo_get_svg_tag_replacement (
+    tag->data,
+    tag->length
+  );
+  return replacement ? replacement->to : NULL;
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#adjust-foreign-attributes
+// https://html.spec.whatwg.org/multipage/parsing.html#adjust-foreign-attributes
 // This destructively modifies any matching attributes on the token and sets the
 // namespace appropriately.
-static void adjust_foreign_attributes(GumboParser* parser, GumboToken* token) {
+static void adjust_foreign_attributes(GumboToken* token) {
   assert(token->type == GUMBO_TOKEN_START_TAG);
   const GumboVector* attributes = &token->v.start_tag.attributes;
-  for (size_t i = 0; i < sizeof(kForeignAttributeReplacements) /
-                             sizeof(NamespacedAttributeReplacement);
-       ++i) {
-    const NamespacedAttributeReplacement* entry =
-        &kForeignAttributeReplacements[i];
-    GumboAttribute* attr = gumbo_get_attribute(attributes, entry->from);
-    if (!attr) {
+  for (unsigned int i = 0, n = attributes->length; i < n; ++i) {
+    GumboAttribute* attr = attributes->data[i];
+    const ForeignAttrReplacement* entry = gumbo_get_foreign_attr_replacement (
+      attr->name,
+      strlen(attr->name)
+    );
+    if (!entry) {
       continue;
     }
-    gumbo_parser_deallocate(parser, (void*) attr->name);
+    gumbo_free((void*) attr->name);
     attr->attr_namespace = entry->attr_namespace;
-    attr->name = gumbo_copy_stringz(parser, entry->local_name);
+    attr->name = gumbo_strdup(entry->local_name);
+  }
+}
+
+// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inforeign
+// This adjusts svg tags.
+static void adjust_svg_tag(GumboToken* token) {
+  assert(token->type == GUMBO_TOKEN_START_TAG);
+  if (token->v.start_tag.tag == GUMBO_TAG_FOREIGNOBJECT) {
+    assert(token->v.start_tag.name == NULL);
+    token->v.start_tag.name = "foreignObject";
+  } else if (token->v.start_tag.tag == GUMBO_TAG_UNKNOWN) {
+    assert(token->v.start_tag.name);
+    const StringReplacement *replacement = gumbo_get_svg_tag_replacement(
+      token->v.start_tag.name,
+      strlen(token->v.start_tag.name)
+    );
+    if (replacement) {
+      // This cast is safe because we allocated this memory and we'll free it.
+      strcpy((char *)token->v.start_tag.name, replacement->to);
+    }
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#adjust-svg-attributes
+// https://html.spec.whatwg.org/multipage/parsing.html#adjust-svg-attributes
 // This destructively modifies any matching attributes on the token.
-static void adjust_svg_attributes(GumboParser* parser, GumboToken* token) {
+static void adjust_svg_attributes(GumboToken* token) {
   assert(token->type == GUMBO_TOKEN_START_TAG);
   const GumboVector* attributes = &token->v.start_tag.attributes;
-  for (size_t i = 0;
-       i < sizeof(kSvgAttributeReplacements) / sizeof(ReplacementEntry); ++i) {
-    const ReplacementEntry* entry = &kSvgAttributeReplacements[i];
-    GumboAttribute* attr = gumbo_get_attribute(attributes, entry->from.data);
-    if (!attr) {
+  for (unsigned int i = 0, n = attributes->length; i < n; i++) {
+    GumboAttribute* attr = (GumboAttribute*) attributes->data[i];
+    const StringReplacement* replacement = gumbo_get_svg_attr_replacement (
+      attr->name,
+      attr->original_name.length
+    );
+    if (!replacement) {
       continue;
     }
-    gumbo_parser_deallocate(parser, (void*) attr->name);
-    attr->name = gumbo_copy_stringz(parser, entry->to.data);
+    gumbo_free((void*) attr->name);
+    attr->name = gumbo_strdup(replacement->to);
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#adjust-mathml-attributes
+// https://html.spec.whatwg.org/multipage/parsing.html#adjust-mathml-attributes
 // Note that this may destructively modify the token with the new attribute
 // value.
-static void adjust_mathml_attributes(GumboParser* parser, GumboToken* token) {
+static void adjust_mathml_attributes(GumboToken* token) {
   assert(token->type == GUMBO_TOKEN_START_TAG);
-  GumboAttribute* attr =
-      gumbo_get_attribute(&token->v.start_tag.attributes, "definitionurl");
+  GumboAttribute* attr = gumbo_get_attribute (
+    &token->v.start_tag.attributes,
+    "definitionurl"
+  );
   if (!attr) {
     return;
   }
-  gumbo_parser_deallocate(parser, (void*) attr->name);
-  attr->name = gumbo_copy_stringz(parser, "definitionURL");
+  gumbo_free((void*) attr->name);
+  attr->name = gumbo_strdup("definitionURL");
 }
 
-static bool doctype_matches(const GumboTokenDocType* doctype,
-    const GumboStringPiece* public_id, const GumboStringPiece* system_id,
-    bool allow_missing_system_id) {
-  return !strcmp(doctype->public_identifier, public_id->data) &&
-         (allow_missing_system_id || doctype->has_system_identifier) &&
-         !strcmp(doctype->system_identifier, system_id->data);
+static bool doctype_matches (
+  const GumboTokenDocType* doctype,
+  const GumboStringPiece* public_id,
+  const GumboStringPiece* system_id,
+  bool allow_missing_system_id
+) {
+  return
+    !strcmp(doctype->public_identifier, public_id->data)
+    && (allow_missing_system_id || doctype->has_system_identifier)
+    && !strcmp(doctype->system_identifier, system_id->data);
 }
 
-static bool maybe_add_doctype_error(
-    GumboParser* parser, const GumboToken* token) {
+static bool maybe_add_doctype_error (
+  GumboParser* parser,
+  const GumboToken* token
+) {
   const GumboTokenDocType* doctype = &token->v.doc_type;
-  bool html_doctype = !strcmp(doctype->name, kDoctypeHtml.data);
+  bool html_doctype = !strcmp(doctype->name, "html");
   if ((!html_doctype || doctype->has_public_identifier ||
           (doctype->has_system_identifier &&
               !strcmp(
@@ -1765,10 +1970,10 @@ static bool maybe_add_doctype_error(
   return true;
 }
 
-static void remove_from_parent(GumboParser* parser, GumboNode* node) {
+static void remove_from_parent(GumboNode* node) {
   if (!node->parent) {
     // The node may not have a parent if, for example, it is a newly-cloned copy
-    // of an active formatting element.  DOM manipulations continue with the
+    // of an active formatting element. DOM manipulations continue with the
     // orphaned fragment of the DOM tree until it's appended/foster-parented to
     // the common ancestor at the end of the adoption agency algorithm.
     return;
@@ -1778,7 +1983,7 @@ static void remove_from_parent(GumboParser* parser, GumboNode* node) {
   int index = gumbo_vector_index_of(children, node);
   assert(index != -1);
 
-  gumbo_vector_remove_at(parser, index, children);
+  gumbo_vector_remove_at(index, children);
   node->parent = NULL;
   node->index_within_parent = -1;
   for (unsigned int i = index; i < children->length; ++i) {
@@ -1787,18 +1992,25 @@ static void remove_from_parent(GumboParser* parser, GumboNode* node) {
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#an-introduction-to-error-handling-and-strange-cases-in-the-parser
+// https://html.spec.whatwg.org/multipage/parsing.html#an-introduction-to-error-handling-and-strange-cases-in-the-parser
 // Also described in the "in body" handling for end formatting tags.
-static bool adoption_agency_algorithm(
-    GumboParser* parser, GumboToken* token, GumboTag subject) {
+static bool adoption_agency_algorithm (
+  GumboParser* parser,
+  GumboToken* token,
+  GumboTag subject
+) {
   GumboParserState* state = parser->_parser_state;
   gumbo_debug("Entering adoption agency algorithm.\n");
   // Step 1.
   GumboNode* current_node = get_current_node(parser);
-  if (current_node->v.element.tag_namespace == GUMBO_NAMESPACE_HTML &&
-      current_node->v.element.tag == subject &&
-      gumbo_vector_index_of(
-          &state->_active_formatting_elements, current_node) == -1) {
+  if (
+    current_node->v.element.tag_namespace == GUMBO_NAMESPACE_HTML
+    && current_node->v.element.tag == subject
+    && -1 == gumbo_vector_index_of (
+      &state->_active_formatting_elements,
+      current_node
+    )
+  ) {
     pop_current_node(parser);
     return false;
   }
@@ -1817,11 +2029,15 @@ static bool adoption_agency_algorithm(
       if (node_html_tag_is(current_node, subject)) {
         // Found it.
         formatting_node = current_node;
-        formatting_node_in_open_elements =
-            gumbo_vector_index_of(&state->_open_elements, formatting_node);
-        gumbo_debug("Formatting element of tag %s at %d.\n",
-            gumbo_normalized_tagname(subject),
-            formatting_node_in_open_elements);
+        formatting_node_in_open_elements = gumbo_vector_index_of (
+          &state->_open_elements,
+          formatting_node
+        );
+        gumbo_debug (
+          "Formatting element of tag %s at %d.\n",
+          gumbo_normalized_tagname(subject),
+          formatting_node_in_open_elements
+        );
         break;
       }
     }
@@ -1837,8 +2053,10 @@ static bool adoption_agency_algorithm(
     if (formatting_node_in_open_elements == -1) {
       gumbo_debug("Formatting node not on stack of open elements.\n");
       parser_add_parse_error(parser, token);
-      gumbo_vector_remove(
-          parser, formatting_node, &state->_active_formatting_elements);
+      gumbo_vector_remove (
+        formatting_node,
+        &state->_active_formatting_elements
+      );
       return false;
     }
 
@@ -1859,8 +2077,11 @@ static bool adoption_agency_algorithm(
 
     // Step 9 & 10
     GumboNode* furthest_block = NULL;
-    for (unsigned int j = formatting_node_in_open_elements;
-         j < state->_open_elements.length; ++j) {
+    for (
+      unsigned int j = formatting_node_in_open_elements;
+      j < state->_open_elements.length;
+      ++j
+    ) {
       assert(j > 0);
       GumboNode* current = state->_open_elements.data[j];
       if (is_special_node(current)) {
@@ -1876,8 +2097,10 @@ static bool adoption_agency_algorithm(
       }
       // And the formatting element itself.
       pop_current_node(parser);
-      gumbo_vector_remove(
-          parser, formatting_node, &state->_active_formatting_elements);
+      gumbo_vector_remove (
+        formatting_node,
+        &state->_active_formatting_elements
+      );
       return false;
     }
     assert(!node_html_tag_is(furthest_block, GUMBO_TAG_HTML));
@@ -1886,18 +2109,20 @@ static bool adoption_agency_algorithm(
     // Step 11.
     // Elements may be moved and reparented by this algorithm, so
     // common_ancestor is not necessarily the same as formatting_node->parent.
-    GumboNode* common_ancestor =
-        state->_open_elements.data[gumbo_vector_index_of(&state->_open_elements,
-                                       formatting_node) -
-                                   1];
-    gumbo_debug("Common ancestor tag = %s, furthest block tag = %s.\n",
-        gumbo_normalized_tagname(common_ancestor->v.element.tag),
-        gumbo_normalized_tagname(furthest_block->v.element.tag));
+    GumboNode* common_ancestor = state->_open_elements.data [
+      gumbo_vector_index_of(&state->_open_elements, formatting_node) - 1
+    ];
+    gumbo_debug (
+      "Common ancestor tag = %s, furthest block tag = %s.\n",
+      gumbo_normalized_tagname(common_ancestor->v.element.tag),
+      gumbo_normalized_tagname(furthest_block->v.element.tag)
+    );
 
     // Step 12.
-    int bookmark = gumbo_vector_index_of(
-                       &state->_active_formatting_elements, formatting_node) +
-                   1;
+    int bookmark = 1 + gumbo_vector_index_of (
+      &state->_active_formatting_elements,
+      formatting_node
+    );
     gumbo_debug("Bookmark at %d.\n", bookmark);
     // Step 13.
     GumboNode* node = furthest_block;
@@ -1912,8 +2137,11 @@ static bool adoption_agency_algorithm(
       ++j;
       // Step 13.3.
       int node_index = gumbo_vector_index_of(&state->_open_elements, node);
-      gumbo_debug(
-          "Current index: %d, last index: %d.\n", node_index, saved_node_index);
+      gumbo_debug (
+        "Current index: %d, last index: %d.\n",
+        node_index,
+        saved_node_index
+      );
       if (node_index == -1) {
         node_index = saved_node_index;
       }
@@ -1926,13 +2154,17 @@ static bool adoption_agency_algorithm(
         // Step 13.4.
         break;
       }
-      int formatting_index =
-          gumbo_vector_index_of(&state->_active_formatting_elements, node);
+      int formatting_index = gumbo_vector_index_of (
+        &state->_active_formatting_elements,
+        node
+      );
       if (j > 3 && formatting_index != -1) {
         // Step 13.5.
         gumbo_debug("Removing formatting element at %d.\n", formatting_index);
-        gumbo_vector_remove_at(
-            parser, formatting_index, &state->_active_formatting_elements);
+        gumbo_vector_remove_at (
+          formatting_index,
+          &state->_active_formatting_elements
+        );
         // Removing the element shifts all indices over by one, so we may need
         // to move the bookmark.
         if (formatting_index < bookmark) {
@@ -1943,13 +2175,13 @@ static bool adoption_agency_algorithm(
       }
       if (formatting_index == -1) {
         // Step 13.6.
-        gumbo_vector_remove_at(parser, node_index, &state->_open_elements);
+        gumbo_vector_remove_at(node_index, &state->_open_elements);
         continue;
       }
       // Step 13.7.
       // "common ancestor as the intended parent" doesn't actually mean insert
       // it into the common ancestor; that happens below.
-      node = clone_node(parser, node, GUMBO_INSERTION_ADOPTION_AGENCY_CLONED);
+      node = clone_node(node, GUMBO_INSERTION_ADOPTION_AGENCY_CLONED);
       assert(formatting_index >= 0);
       state->_active_formatting_elements.data[formatting_index] = node;
       assert(node_index >= 0);
@@ -1962,35 +2194,42 @@ static bool adoption_agency_algorithm(
       }
       // Step 13.9.
       last_node->parse_flags |= GUMBO_INSERTION_ADOPTION_AGENCY_MOVED;
-      remove_from_parent(parser, last_node);
-      append_node(parser, node, last_node);
+      remove_from_parent(last_node);
+      append_node(node, last_node);
       // Step 13.10.
       last_node = node;
     }  // Step 13.11.
 
     // Step 14.
-    gumbo_debug("Removing %s node from parent ",
-        gumbo_normalized_tagname(last_node->v.element.tag));
-    remove_from_parent(parser, last_node);
+    gumbo_debug (
+      "Removing %s node from parent ",
+      gumbo_normalized_tagname(last_node->v.element.tag)
+    );
+    remove_from_parent(last_node);
     last_node->parse_flags |= GUMBO_INSERTION_ADOPTION_AGENCY_MOVED;
-    InsertionLocation location =
-        get_appropriate_insertion_location(parser, common_ancestor);
-    gumbo_debug("and inserting it into %s.\n",
-        gumbo_normalized_tagname(location.target->v.element.tag));
-    insert_node(parser, last_node, location);
+    InsertionLocation location = get_appropriate_insertion_location (
+      parser,
+      common_ancestor
+    );
+    gumbo_debug (
+      "and inserting it into %s.\n",
+      gumbo_normalized_tagname(location.target->v.element.tag)
+    );
+    insert_node(last_node, location);
 
     // Step 15.
-    GumboNode* new_formatting_node = clone_node(
-        parser, formatting_node, GUMBO_INSERTION_ADOPTION_AGENCY_CLONED);
+    GumboNode* new_formatting_node = clone_node (
+      formatting_node,
+      GUMBO_INSERTION_ADOPTION_AGENCY_CLONED
+    );
     formatting_node->parse_flags |= GUMBO_INSERTION_IMPLICIT_END_TAG;
 
-    // Step 16.  Instead of appending nodes one-by-one, we swap the children
+    // Step 16. Instead of appending nodes one-by-one, we swap the children
     // vector of furthest_block with the empty children of new_formatting_node,
-    // reducing memory traffic and allocations.  We still have to reset their
+    // reducing memory traffic and allocations. We still have to reset their
     // parent pointers, though.
     GumboVector temp = new_formatting_node->v.element.children;
-    new_formatting_node->v.element.children =
-        furthest_block->v.element.children;
+    new_formatting_node->v.element.children = furthest_block->v.element.children;
     furthest_block->v.element.children = temp;
 
     temp = new_formatting_node->v.element.children;
@@ -2000,36 +2239,49 @@ static bool adoption_agency_algorithm(
     }
 
     // Step 17.
-    append_node(parser, furthest_block, new_formatting_node);
+    append_node(furthest_block, new_formatting_node);
 
     // Step 18.
     // If the formatting node was before the bookmark, it may shift over all
     // indices after it, so we need to explicitly find the index and possibly
     // adjust the bookmark.
-    int formatting_node_index = gumbo_vector_index_of(
-        &state->_active_formatting_elements, formatting_node);
+    int formatting_node_index = gumbo_vector_index_of (
+      &state->_active_formatting_elements,
+      formatting_node
+    );
     assert(formatting_node_index != -1);
     if (formatting_node_index < bookmark) {
-      gumbo_debug(
-          "Formatting node at %d is before bookmark at %d; decrementing.\n",
-          formatting_node_index, bookmark);
+      gumbo_debug (
+        "Formatting node at %d is before bookmark at %d; decrementing.\n",
+        formatting_node_index, bookmark
+      );
       --bookmark;
     }
-    gumbo_vector_remove_at(
-        parser, formatting_node_index, &state->_active_formatting_elements);
+    gumbo_vector_remove_at (
+      formatting_node_index,
+      &state->_active_formatting_elements
+    );
     assert(bookmark >= 0);
     assert((unsigned int) bookmark <= state->_active_formatting_elements.length);
-    gumbo_vector_insert_at(parser, new_formatting_node, bookmark,
-        &state->_active_formatting_elements);
+    gumbo_vector_insert_at (
+      new_formatting_node,
+      bookmark,
+      &state->_active_formatting_elements
+    );
 
     // Step 19.
-    gumbo_vector_remove(parser, formatting_node, &state->_open_elements);
-    int insert_at =
-        gumbo_vector_index_of(&state->_open_elements, furthest_block) + 1;
+    gumbo_vector_remove(formatting_node, &state->_open_elements);
+    int insert_at = 1 + gumbo_vector_index_of (
+      &state->_open_elements,
+      furthest_block
+    );
     assert(insert_at >= 0);
     assert((unsigned int) insert_at <= state->_open_elements.length);
-    gumbo_vector_insert_at(
-        parser, new_formatting_node, insert_at, &state->_open_elements);
+    gumbo_vector_insert_at (
+      new_formatting_node,
+      insert_at,
+      &state->_open_elements
+    );
   }  // Step 20.
   return true;
 }
@@ -2041,25 +2293,31 @@ static void ignore_token(GumboParser* parser) {
   // element, but if no element is emitted (as happens in non-verbatim-mode
   // when a token is ignored), we need to free it here to prevent a memory
   // leak.
-  gumbo_token_destroy(parser, token);
+  gumbo_token_destroy(token);
 #ifndef NDEBUG
   if (token->type == GUMBO_TOKEN_START_TAG) {
     // Mark this sentinel so the assertion in the main loop knows it's been
     // destroyed.
     token->v.start_tag.attributes = kGumboEmptyVector;
+    token->v.start_tag.name = NULL;
   }
 #endif
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete/the-end.html
+// https://html.spec.whatwg.org/multipage/parsing.html#the-end
 static void finish_parsing(GumboParser* parser) {
   gumbo_debug("Finishing parsing");
   maybe_flush_text_node_buffer(parser);
   GumboParserState* state = parser->_parser_state;
-  for (GumboNode* node = pop_current_node(parser); node;
-       node = pop_current_node(parser)) {
-    if ((node_html_tag_is(node, GUMBO_TAG_BODY) && state->_closed_body_tag) ||
-        (node_html_tag_is(node, GUMBO_TAG_HTML) && state->_closed_html_tag)) {
+  for (
+    GumboNode* node = pop_current_node(parser);
+    node;
+    node = pop_current_node(parser)
+  ) {
+    if (
+      (node_html_tag_is(node, GUMBO_TAG_BODY) && state->_closed_body_tag)
+      || (node_html_tag_is(node, GUMBO_TAG_HTML) && state->_closed_html_tag)
+    ) {
       continue;
     }
     node->parse_flags |= GUMBO_INSERTION_IMPLICIT_END_TAG;
@@ -2092,7 +2350,7 @@ static bool handle_initial(GumboParser* parser, GumboToken* token) {
   return true;
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#the-before-html-insertion-mode
+// https://html.spec.whatwg.org/multipage/parsing.html#the-before-html-insertion-mode
 static bool handle_before_html(GumboParser* parser, GumboToken* token) {
   if (token->type == GUMBO_TOKEN_DOCTYPE) {
     parser_add_parse_error(parser, token);
@@ -2109,15 +2367,19 @@ static bool handle_before_html(GumboParser* parser, GumboToken* token) {
     parser->_output->root = html_node;
     set_insertion_mode(parser, GUMBO_INSERTION_MODE_BEFORE_HEAD);
     return true;
-  } else if (token->type == GUMBO_TOKEN_END_TAG &&
-             !tag_in(token, false,
-                 (gumbo_tagset){TAG(HEAD), TAG(BODY), TAG(HTML), TAG(BR)})) {
+  } else if (
+    token->type == GUMBO_TOKEN_END_TAG
+    && !tag_in(token, false, &(const TagSet){TAG(HEAD), TAG(BODY), TAG(HTML), TAG(BR)})
+  ) {
     parser_add_parse_error(parser, token);
     ignore_token(parser);
     return false;
   } else {
-    GumboNode* html_node = insert_element_of_tag_type(
-        parser, GUMBO_TAG_HTML, GUMBO_INSERTION_IMPLIED);
+    GumboNode* html_node = insert_element_of_tag_type (
+      parser,
+      GUMBO_TAG_HTML,
+      GUMBO_INSERTION_IMPLIED
+    );
     assert(html_node);
     parser->_output->root = html_node;
     set_insertion_mode(parser, GUMBO_INSERTION_MODE_BEFORE_HEAD);
@@ -2126,7 +2388,7 @@ static bool handle_before_html(GumboParser* parser, GumboToken* token) {
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#the-before-head-insertion-mode
+// https://html.spec.whatwg.org/multipage/parsing.html#the-before-head-insertion-mode
 static bool handle_before_head(GumboParser* parser, GumboToken* token) {
   if (token->type == GUMBO_TOKEN_DOCTYPE) {
     parser_add_parse_error(parser, token);
@@ -2143,15 +2405,19 @@ static bool handle_before_head(GumboParser* parser, GumboToken* token) {
     set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_HEAD);
     parser->_parser_state->_head_element = node;
     return true;
-  } else if (token->type == GUMBO_TOKEN_END_TAG &&
-             !tag_in(token, false,
-                 (gumbo_tagset){TAG(HEAD), TAG(BODY), TAG(HTML), TAG(BR)})) {
+  } else if (
+    token->type == GUMBO_TOKEN_END_TAG
+    && !tag_in(token, false, &(const TagSet){TAG(HEAD), TAG(BODY), TAG(HTML), TAG(BR)})
+  ) {
     parser_add_parse_error(parser, token);
     ignore_token(parser);
     return false;
   } else {
-    GumboNode* node = insert_element_of_tag_type(
-        parser, GUMBO_TAG_HEAD, GUMBO_INSERTION_IMPLIED);
+    GumboNode* node = insert_element_of_tag_type (
+      parser,
+      GUMBO_TAG_HEAD,
+      GUMBO_INSERTION_IMPLIED
+    );
     set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_HEAD);
     parser->_parser_state->_head_element = node;
     parser->_parser_state->_reprocess_current_token = true;
@@ -2162,8 +2428,9 @@ static bool handle_before_head(GumboParser* parser, GumboToken* token) {
 // Forward declarations because of mutual dependencies.
 static bool handle_token(GumboParser* parser, GumboToken* token);
 static bool handle_in_body(GumboParser* parser, GumboToken* token);
+static bool handle_in_template(GumboParser* parser, GumboToken* token);
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-inhead
+// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inhead
 static bool handle_in_head(GumboParser* parser, GumboToken* token) {
   if (token->type == GUMBO_TOKEN_WHITESPACE) {
     insert_text_token(parser, token);
@@ -2177,9 +2444,11 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
     return true;
   } else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
     return handle_in_body(parser, token);
-  } else if (tag_in(token, kStartTag,
-                 (gumbo_tagset){TAG(BASE), TAG(BASEFONT), TAG(BGSOUND),
-                     TAG(MENUITEM), TAG(LINK)})) {
+  } else if (
+    tag_in(token, kStartTag, &(const TagSet) {
+      TAG(BASE), TAG(BASEFONT), TAG(BGSOUND), TAG(LINK)
+    })
+  ) {
     insert_element_from_token(parser, token);
     pop_current_node(parser);
     acknowledge_self_closing_tag(parser);
@@ -2189,15 +2458,16 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
     pop_current_node(parser);
     acknowledge_self_closing_tag(parser);
     // NOTE(jdtang): Gumbo handles only UTF-8, so the encoding clause of the
-    // spec doesn't apply.  If clients want to handle meta-tag re-encoding, they
+    // spec doesn't apply. If clients want to handle meta-tag re-encoding, they
     // should specifically look for that string in the document and re-encode it
     // before passing to Gumbo.
     return true;
   } else if (tag_is(token, kStartTag, GUMBO_TAG_TITLE)) {
     run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RCDATA);
     return true;
-  } else if (tag_in(
-                 token, kStartTag, (gumbo_tagset){TAG(NOFRAMES), TAG(STYLE)})) {
+  } else if (
+    tag_in(token, kStartTag, &(const TagSet){TAG(NOFRAMES), TAG(STYLE)})
+  ) {
     run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RAWTEXT);
     return true;
   } else if (tag_is(token, kStartTag, GUMBO_TAG_NOSCRIPT)) {
@@ -2209,12 +2479,13 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
     return true;
   } else if (tag_is(token, kEndTag, GUMBO_TAG_HEAD)) {
     GumboNode* head = pop_current_node(parser);
-    AVOID_UNUSED_VARIABLE_WARNING(head);
+    UNUSED_IF_NDEBUG(head);
     assert(node_html_tag_is(head, GUMBO_TAG_HEAD));
     set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
     return true;
-  } else if (tag_in(token, kEndTag,
-                 (gumbo_tagset){TAG(BODY), TAG(HTML), TAG(BR)})) {
+  } else if (
+    tag_in(token, kEndTag, &(const TagSet){TAG(BODY), TAG(HTML), TAG(BR)})
+  ) {
     pop_current_node(parser);
     set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
     parser->_parser_state->_reprocess_current_token = true;
@@ -2244,8 +2515,10 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
     pop_template_insertion_mode(parser);
     reset_insertion_mode_appropriately(parser);
     return success;
-  } else if (tag_is(token, kStartTag, GUMBO_TAG_HEAD) ||
-             (token->type == GUMBO_TOKEN_END_TAG)) {
+  } else if (
+    tag_is(token, kStartTag, GUMBO_TAG_HEAD)
+    || (token->type == GUMBO_TOKEN_END_TAG)
+  ) {
     parser_add_parse_error(parser, token);
     ignore_token(parser);
     return false;
@@ -2258,7 +2531,7 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
   return true;
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-inheadnoscript
+// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inheadnoscript
 static bool handle_in_head_noscript(GumboParser* parser, GumboToken* token) {
   if (token->type == GUMBO_TOKEN_DOCTYPE) {
     parser_add_parse_error(parser, token);
@@ -2268,19 +2541,25 @@ static bool handle_in_head_noscript(GumboParser* parser, GumboToken* token) {
   } else if (tag_is(token, kEndTag, GUMBO_TAG_NOSCRIPT)) {
     const GumboNode* node = pop_current_node(parser);
     assert(node_html_tag_is(node, GUMBO_TAG_NOSCRIPT));
-    AVOID_UNUSED_VARIABLE_WARNING(node);
+    UNUSED_IF_NDEBUG(node);
     set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_HEAD);
     return true;
-  } else if (token->type == GUMBO_TOKEN_WHITESPACE ||
-             token->type == GUMBO_TOKEN_COMMENT ||
-             tag_in(token, kStartTag,
-                 (gumbo_tagset){TAG(BASEFONT), TAG(BGSOUND), TAG(LINK),
-                     TAG(META), TAG(NOFRAMES), TAG(STYLE)})) {
+  } else if (
+    token->type == GUMBO_TOKEN_WHITESPACE
+    || token->type == GUMBO_TOKEN_COMMENT
+    || tag_in (token, kStartTag, &(const TagSet) {
+      TAG(BASEFONT), TAG(BGSOUND), TAG(LINK),
+      TAG(META), TAG(NOFRAMES), TAG(STYLE)
+    })
+  ) {
     return handle_in_head(parser, token);
-  } else if (tag_in(
-                 token, kStartTag, (gumbo_tagset){TAG(HEAD), TAG(NOSCRIPT)}) ||
-             (token->type == GUMBO_TOKEN_END_TAG &&
-                 !tag_is(token, kEndTag, GUMBO_TAG_BR))) {
+  } else if (
+    tag_in(token, kStartTag, &(const TagSet){TAG(HEAD), TAG(NOSCRIPT)})
+    || (
+      token->type == GUMBO_TOKEN_END_TAG
+      && !tag_is(token, kEndTag, GUMBO_TAG_BR)
+    )
+  ) {
     parser_add_parse_error(parser, token);
     ignore_token(parser);
     return false;
@@ -2288,14 +2567,14 @@ static bool handle_in_head_noscript(GumboParser* parser, GumboToken* token) {
     parser_add_parse_error(parser, token);
     const GumboNode* node = pop_current_node(parser);
     assert(node_html_tag_is(node, GUMBO_TAG_NOSCRIPT));
-    AVOID_UNUSED_VARIABLE_WARNING(node);
+    UNUSED_IF_NDEBUG(node);
     set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_HEAD);
     parser->_parser_state->_reprocess_current_token = true;
     return false;
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#the-after-head-insertion-mode
+// https://html.spec.whatwg.org/multipage/parsing.html#the-after-head-insertion-mode
 static bool handle_after_head(GumboParser* parser, GumboToken* token) {
   GumboParserState* state = parser->_parser_state;
   if (token->type == GUMBO_TOKEN_WHITESPACE) {
@@ -2319,25 +2598,30 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
     insert_element_from_token(parser, token);
     set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_FRAMESET);
     return true;
-  } else if (tag_in(token, kStartTag,
-                 (gumbo_tagset){TAG(BASE), TAG(BASEFONT), TAG(BGSOUND),
-                     TAG(LINK), TAG(META), TAG(NOFRAMES), TAG(SCRIPT),
-                     TAG(STYLE), TAG(TEMPLATE), TAG(TITLE)})) {
+  } else if (
+    tag_in(token, kStartTag, &(const TagSet) {
+      TAG(BASE), TAG(BASEFONT), TAG(BGSOUND), TAG(LINK), TAG(META),
+      TAG(NOFRAMES), TAG(SCRIPT), TAG(STYLE), TAG(TEMPLATE), TAG(TITLE)
+    })
+  ) {
     parser_add_parse_error(parser, token);
     assert(state->_head_element != NULL);
     // This must be flushed before we push the head element on, as there may be
     // pending character tokens that should be attached to the root.
     maybe_flush_text_node_buffer(parser);
-    gumbo_vector_add(parser, state->_head_element, &state->_open_elements);
+    gumbo_vector_add(state->_head_element, &state->_open_elements);
     bool result = handle_in_head(parser, token);
-    gumbo_vector_remove(parser, state->_head_element, &state->_open_elements);
+    gumbo_vector_remove(state->_head_element, &state->_open_elements);
     return result;
   } else if (tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
     return handle_in_head(parser, token);
-  } else if (tag_is(token, kStartTag, GUMBO_TAG_HEAD) ||
-             (token->type == GUMBO_TOKEN_END_TAG &&
-                 !tag_in(token, kEndTag,
-                     (gumbo_tagset){TAG(BODY), TAG(HTML), TAG(BR)}))) {
+  } else if (
+    tag_is(token, kStartTag, GUMBO_TAG_HEAD)
+    || (
+      token->type == GUMBO_TOKEN_END_TAG
+      && !tag_in(token, kEndTag, &(const TagSet){TAG(BODY), TAG(HTML), TAG(BR)})
+    )
+  ) {
     parser_add_parse_error(parser, token);
     ignore_token(parser);
     return false;
@@ -2349,40 +2633,7 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
   }
 }
 
-static void destroy_node(GumboParser* parser, GumboNode* node) {
-  switch (node->type) {
-    case GUMBO_NODE_DOCUMENT: {
-      GumboDocument* doc = &node->v.document;
-      for (unsigned int i = 0; i < doc->children.length; ++i) {
-        destroy_node(parser, doc->children.data[i]);
-      }
-      gumbo_parser_deallocate(parser, (void*) doc->children.data);
-      gumbo_parser_deallocate(parser, (void*) doc->name);
-      gumbo_parser_deallocate(parser, (void*) doc->public_identifier);
-      gumbo_parser_deallocate(parser, (void*) doc->system_identifier);
-    } break;
-    case GUMBO_NODE_TEMPLATE:
-    case GUMBO_NODE_ELEMENT:
-      for (unsigned int i = 0; i < node->v.element.attributes.length; ++i) {
-        gumbo_destroy_attribute(parser, node->v.element.attributes.data[i]);
-      }
-      gumbo_parser_deallocate(parser, node->v.element.attributes.data);
-      for (unsigned int i = 0; i < node->v.element.children.length; ++i) {
-        destroy_node(parser, node->v.element.children.data[i]);
-      }
-      gumbo_parser_deallocate(parser, node->v.element.children.data);
-      break;
-    case GUMBO_NODE_TEXT:
-    case GUMBO_NODE_CDATA:
-    case GUMBO_NODE_COMMENT:
-    case GUMBO_NODE_WHITESPACE:
-      gumbo_parser_deallocate(parser, (void*) node->v.text.text);
-      break;
-  }
-  gumbo_parser_deallocate(parser, node);
-}
-
-// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-inbody
+// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inbody
 static bool handle_in_body(GumboParser* parser, GumboToken* token) {
   GumboParserState* state = parser->_parser_state;
   assert(state->_open_elements.length > 0);
@@ -2394,8 +2645,10 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
     reconstruct_active_formatting_elements(parser);
     insert_text_token(parser, token);
     return true;
-  } else if (token->type == GUMBO_TOKEN_CHARACTER ||
-             token->type == GUMBO_TOKEN_CDATA) {
+  } else if (
+    token->type == GUMBO_TOKEN_CHARACTER
+    || token->type == GUMBO_TOKEN_CDATA
+  ) {
     reconstruct_active_formatting_elements(parser);
     insert_text_token(parser, token);
     set_frameset_not_ok(parser);
@@ -2415,30 +2668,37 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
     }
     assert(parser->_output->root != NULL);
     assert(parser->_output->root->type == GUMBO_NODE_ELEMENT);
-    merge_attributes(parser, token, parser->_output->root);
+    merge_attributes(token, parser->_output->root);
     return false;
-  } else if (tag_in(token, kStartTag,
-                 (gumbo_tagset){TAG(BASE), TAG(BASEFONT), TAG(BGSOUND),
-                     TAG(MENUITEM), TAG(LINK), TAG(META), TAG(NOFRAMES),
-                     TAG(SCRIPT), TAG(STYLE), TAG(TEMPLATE), TAG(TITLE)}) ||
-             tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
+  } else if (
+    tag_in(token, kStartTag, &(const TagSet) {
+      TAG(BASE), TAG(BASEFONT), TAG(BGSOUND), TAG(LINK),
+      TAG(META), TAG(NOFRAMES), TAG(SCRIPT), TAG(STYLE), TAG(TEMPLATE),
+      TAG(TITLE)
+    })
+    || tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)
+  ) {
     return handle_in_head(parser, token);
   } else if (tag_is(token, kStartTag, GUMBO_TAG_BODY)) {
     parser_add_parse_error(parser, token);
-    if (state->_open_elements.length < 2 ||
-        !node_html_tag_is(state->_open_elements.data[1], GUMBO_TAG_BODY) ||
-        has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
+    if (
+      state->_open_elements.length < 2
+      || !node_html_tag_is(state->_open_elements.data[1], GUMBO_TAG_BODY)
+      || has_open_element(parser, GUMBO_TAG_TEMPLATE)
+    ) {
       ignore_token(parser);
       return false;
     }
     state->_frameset_ok = false;
-    merge_attributes(parser, token, state->_open_elements.data[1]);
+    merge_attributes(token, state->_open_elements.data[1]);
     return false;
   } else if (tag_is(token, kStartTag, GUMBO_TAG_FRAMESET)) {
     parser_add_parse_error(parser, token);
-    if (state->_open_elements.length < 2 ||
-        !node_html_tag_is(state->_open_elements.data[1], GUMBO_TAG_BODY) ||
-        !state->_frameset_ok) {
+    if (
+      state->_open_elements.length < 2
+      || !node_html_tag_is(state->_open_elements.data[1], GUMBO_TAG_BODY)
+      || !state->_frameset_ok
+    ) {
       ignore_token(parser);
       return false;
     }
@@ -2454,20 +2714,20 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
     // Removing & destroying the body node is going to kill any nodes that have
     // been added to the list of active formatting elements, and so we should
     // clear it to prevent a use-after-free if the list of active formatting
-    // elements is reconstructed afterwards.  This may happen if whitespace
+    // elements is reconstructed afterwards. This may happen if whitespace
     // follows the </frameset>.
     clear_active_formatting_elements(parser);
 
-    // Remove the body node.  We may want to factor this out into a generic
+    // Remove the body node. We may want to factor this out into a generic
     // helper, but right now this is the only code that needs to do this.
     GumboVector* children = &parser->_output->root->v.element.children;
     for (unsigned int i = 0; i < children->length; ++i) {
       if (children->data[i] == body_node) {
-        gumbo_vector_remove_at(parser, i, children);
+        gumbo_vector_remove_at(i, children);
         break;
       }
     }
-    destroy_node(parser, body_node);
+    destroy_node(body_node);
 
     // Insert the <frameset>, and switch the insertion mode.
     insert_element_from_token(parser, token);
@@ -2475,10 +2735,12 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
     return true;
   } else if (token->type == GUMBO_TOKEN_EOF) {
     for (unsigned int i = 0; i < state->_open_elements.length; ++i) {
-      if (!node_tag_in_set(state->_open_elements.data[i],
-              (gumbo_tagset){TAG(DD), TAG(DT), TAG(LI), TAG(P), TAG(TBODY),
-                  TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR), TAG(BODY),
-                  TAG(HTML)})) {
+      if (
+        !node_tag_in_set(state->_open_elements.data[i], &(const TagSet) {
+          TAG(DD), TAG(DT), TAG(LI), TAG(P), TAG(TBODY), TAG(TD), TAG(TFOOT),
+          TAG(TH), TAG(THEAD), TAG(TR), TAG(BODY), TAG(HTML)
+        })
+      ) {
         parser_add_parse_error(parser, token);
       }
     }
@@ -2487,7 +2749,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
       return handle_in_template(parser, token);
     }
     return true;
-  } else if (tag_in(token, kEndTag, (gumbo_tagset){TAG(BODY), TAG(HTML)})) {
+  } else if (tag_in(token, kEndTag, &(const TagSet){TAG(BODY), TAG(HTML)})) {
     if (!has_an_element_in_scope(parser, GUMBO_TAG_BODY)) {
       parser_add_parse_error(parser, token);
       ignore_token(parser);
@@ -2495,11 +2757,13 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
     }
     bool success = true;
     for (unsigned int i = 0; i < state->_open_elements.length; ++i) {
-      if (!node_tag_in_set(state->_open_elements.data[i],
-              (gumbo_tagset){TAG(DD), TAG(DT), TAG(LI), TAG(OPTGROUP),
-                  TAG(OPTION), TAG(P), TAG(RB), TAG(RP), TAG(RT), TAG(RTC),
-                  TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR),
-                  TAG(BODY), TAG(HTML)})) {
+      if (
+        !node_tag_in_set(state->_open_elements.data[i], &(const TagSet) {
+          TAG(DD), TAG(DT), TAG(LI), TAG(OPTGROUP), TAG(OPTION), TAG(P),
+          TAG(RB), TAG(RP), TAG(RT), TAG(RTC), TAG(TBODY), TAG(TD),
+          TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR), TAG(BODY), TAG(HTML)
+        })
+      ) {
         parser_add_parse_error(parser, token);
         success = false;
         break;
@@ -2514,37 +2778,38 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
       record_end_of_element(state->_current_token, &body->v.element);
     }
     return success;
-  } else if (tag_in(token, kStartTag,
-                 (gumbo_tagset){TAG(ADDRESS), TAG(ARTICLE), TAG(ASIDE),
-                     TAG(BLOCKQUOTE), TAG(CENTER), TAG(DETAILS), TAG(DIR),
-                     TAG(DIV), TAG(DL), TAG(FIELDSET), TAG(FIGCAPTION),
-                     TAG(FIGURE), TAG(FOOTER), TAG(HEADER), TAG(HGROUP),
-                     TAG(MENU), TAG(MAIN), TAG(NAV), TAG(OL), TAG(P),
-                     TAG(SECTION), TAG(SUMMARY), TAG(UL)})) {
+  } else if (
+    tag_in(token, kStartTag, &(const TagSet) {
+      TAG(ADDRESS), TAG(ARTICLE), TAG(ASIDE), TAG(BLOCKQUOTE), TAG(CENTER),
+      TAG(DETAILS), TAG(DIALOG), TAG(DIR), TAG(DIV), TAG(DL), TAG(FIELDSET),
+      TAG(FIGCAPTION), TAG(FIGURE), TAG(FOOTER), TAG(HEADER), TAG(HGROUP),
+      TAG(MENU), TAG(MAIN), TAG(NAV), TAG(OL), TAG(P), TAG(SECTION),
+      TAG(SUMMARY), TAG(UL)
+    })
+  ) {
     bool result = maybe_implicitly_close_p_tag(parser, token);
     insert_element_from_token(parser, token);
     return result;
-  } else if (tag_in(token, kStartTag, (gumbo_tagset){TAG(H1), TAG(H2), TAG(H3),
-                                          TAG(H4), TAG(H5), TAG(H6)})) {
+  } else if (tag_in(token, kStartTag, &heading_tags)) {
     bool result = maybe_implicitly_close_p_tag(parser, token);
-    if (node_tag_in_set(
-            get_current_node(parser), (gumbo_tagset){TAG(H1), TAG(H2), TAG(H3),
-                                          TAG(H4), TAG(H5), TAG(H6)})) {
+    if (node_tag_in_set(get_current_node(parser), &heading_tags)) {
       parser_add_parse_error(parser, token);
       pop_current_node(parser);
       result = false;
     }
     insert_element_from_token(parser, token);
     return result;
-  } else if (tag_in(token, kStartTag, (gumbo_tagset){TAG(PRE), TAG(LISTING)})) {
+  } else if (tag_in(token, kStartTag, &(const TagSet){TAG(PRE), TAG(LISTING)})) {
     bool result = maybe_implicitly_close_p_tag(parser, token);
     insert_element_from_token(parser, token);
     state->_ignore_next_linefeed = true;
     state->_frameset_ok = false;
     return result;
   } else if (tag_is(token, kStartTag, GUMBO_TAG_FORM)) {
-    if (state->_form_element != NULL &&
-        !has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
+    if (
+      state->_form_element != NULL
+      && !has_open_element(parser, GUMBO_TAG_TEMPLATE)
+    ) {
       gumbo_debug("Ignoring nested form.\n");
       parser_add_parse_error(parser, token);
       ignore_token(parser);
@@ -2561,7 +2826,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
     bool result = maybe_implicitly_close_p_tag(parser, token);
     insert_element_from_token(parser, token);
     return result;
-  } else if (tag_in(token, kStartTag, (gumbo_tagset){TAG(DD), TAG(DT)})) {
+  } else if (tag_in(token, kStartTag, &dd_dt_tags)) {
     maybe_implicitly_close_list_tag(parser, token, false);
     bool result = maybe_implicitly_close_p_tag(parser, token);
     insert_element_from_token(parser, token);
@@ -2574,8 +2839,12 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
   } else if (tag_is(token, kStartTag, GUMBO_TAG_BUTTON)) {
     if (has_an_element_in_scope(parser, GUMBO_TAG_BUTTON)) {
       parser_add_parse_error(parser, token);
-      implicitly_close_tags(
-          parser, token, GUMBO_NAMESPACE_HTML, GUMBO_TAG_BUTTON);
+      implicitly_close_tags (
+        parser,
+        token,
+        GUMBO_NAMESPACE_HTML,
+        GUMBO_TAG_BUTTON
+      );
       state->_reprocess_current_token = true;
       return false;
     }
@@ -2583,21 +2852,27 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
     insert_element_from_token(parser, token);
     state->_frameset_ok = false;
     return true;
-  } else if (tag_in(token, kEndTag,
-                 (gumbo_tagset){TAG(ADDRESS), TAG(ARTICLE), TAG(ASIDE),
-                     TAG(BLOCKQUOTE), TAG(BUTTON), TAG(CENTER), TAG(DETAILS),
-                     TAG(DIR), TAG(DIV), TAG(DL), TAG(FIELDSET),
-                     TAG(FIGCAPTION), TAG(FIGURE), TAG(FOOTER), TAG(HEADER),
-                     TAG(HGROUP), TAG(LISTING), TAG(MAIN), TAG(MENU), TAG(NAV),
-                     TAG(OL), TAG(PRE), TAG(SECTION), TAG(SUMMARY), TAG(UL)})) {
-    GumboTag tag = token->v.end_tag;
+  } else if (
+    tag_in(token, kEndTag, &(const TagSet) {
+      TAG(ADDRESS), TAG(ARTICLE), TAG(ASIDE), TAG(BLOCKQUOTE), TAG(BUTTON),
+      TAG(CENTER), TAG(DETAILS), TAG(DIALOG), TAG(DIR), TAG(DIV), TAG(DL),
+      TAG(FIELDSET), TAG(FIGCAPTION), TAG(FIGURE), TAG(FOOTER), TAG(HEADER),
+      TAG(HGROUP), TAG(LISTING), TAG(MAIN), TAG(MENU), TAG(NAV), TAG(OL),
+      TAG(PRE), TAG(SECTION), TAG(SUMMARY), TAG(UL)
+    })
+  ) {
+    GumboTag tag = token->v.end_tag.tag;
     if (!has_an_element_in_scope(parser, tag)) {
       parser_add_parse_error(parser, token);
       ignore_token(parser);
       return false;
     }
-    implicitly_close_tags(
-        parser, token, GUMBO_NAMESPACE_HTML, token->v.end_tag);
+    implicitly_close_tags (
+      parser,
+      token,
+      GUMBO_NAMESPACE_HTML,
+      token->v.end_tag.tag
+    );
     return true;
   } else if (tag_is(token, kEndTag, GUMBO_TAG_FORM)) {
     if (has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
@@ -2617,7 +2892,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
       return success;
     } else {
       bool result = true;
-      const GumboNode* node = state->_form_element;
+      GumboNode* node = state->_form_element;
       assert(!node || node->type == GUMBO_NODE_ELEMENT);
       state->_form_element = NULL;
       if (!node || !has_node_in_scope(parser, node)) {
@@ -2632,48 +2907,67 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
       if (get_current_node(parser) != node) {
         parser_add_parse_error(parser, token);
         result = false;
+      } else {
+        record_end_of_element(token, &node->v.element);
       }
 
       GumboVector* open_elements = &state->_open_elements;
       int index = gumbo_vector_index_of(open_elements, node);
       assert(index >= 0);
-      gumbo_vector_remove_at(parser, index, open_elements);
+      gumbo_vector_remove_at(index, open_elements);
       return result;
     }
   } else if (tag_is(token, kEndTag, GUMBO_TAG_P)) {
     if (!has_an_element_in_button_scope(parser, GUMBO_TAG_P)) {
       parser_add_parse_error(parser, token);
       // reconstruct_active_formatting_elements(parser);
-      insert_element_of_tag_type(
-          parser, GUMBO_TAG_P, GUMBO_INSERTION_CONVERTED_FROM_END_TAG);
+      insert_element_of_tag_type (
+        parser,
+        GUMBO_TAG_P,
+        GUMBO_INSERTION_CONVERTED_FROM_END_TAG
+      );
       state->_reprocess_current_token = true;
       return false;
     }
-    return implicitly_close_tags(
-        parser, token, GUMBO_NAMESPACE_HTML, GUMBO_TAG_P);
+    return implicitly_close_tags (
+      parser,
+      token,
+      GUMBO_NAMESPACE_HTML,
+      GUMBO_TAG_P
+    );
   } else if (tag_is(token, kEndTag, GUMBO_TAG_LI)) {
     if (!has_an_element_in_list_scope(parser, GUMBO_TAG_LI)) {
       parser_add_parse_error(parser, token);
       ignore_token(parser);
       return false;
     }
-    return implicitly_close_tags(
-        parser, token, GUMBO_NAMESPACE_HTML, GUMBO_TAG_LI);
-  } else if (tag_in(token, kEndTag, (gumbo_tagset){TAG(DD), TAG(DT)})) {
+    return implicitly_close_tags (
+      parser,
+      token,
+      GUMBO_NAMESPACE_HTML,
+      GUMBO_TAG_LI
+    );
+  } else if (tag_in(token, kEndTag, &dd_dt_tags)) {
     assert(token->type == GUMBO_TOKEN_END_TAG);
-    GumboTag token_tag = token->v.end_tag;
+    GumboTag token_tag = token->v.end_tag.tag;
     if (!has_an_element_in_scope(parser, token_tag)) {
       parser_add_parse_error(parser, token);
       ignore_token(parser);
       return false;
     }
-    return implicitly_close_tags(
-        parser, token, GUMBO_NAMESPACE_HTML, token_tag);
-  } else if (tag_in(token, kEndTag, (gumbo_tagset){TAG(H1), TAG(H2), TAG(H3),
-                                        TAG(H4), TAG(H5), TAG(H6)})) {
-    if (!has_an_element_in_scope_with_tagname(
-            parser, 6, (GumboTag[]){GUMBO_TAG_H1, GUMBO_TAG_H2, GUMBO_TAG_H3,
-                           GUMBO_TAG_H4, GUMBO_TAG_H5, GUMBO_TAG_H6})) {
+    return implicitly_close_tags (
+      parser,
+      token,
+      GUMBO_NAMESPACE_HTML,
+      token_tag
+    );
+  } else if (tag_in(token, kEndTag, &heading_tags)) {
+    if (
+      !has_an_element_in_scope_with_tagname(parser, 6, (GumboTag[]) {
+        GUMBO_TAG_H1, GUMBO_TAG_H2, GUMBO_TAG_H3, GUMBO_TAG_H4,
+        GUMBO_TAG_H5, GUMBO_TAG_H6
+      })
+    ) {
       // No heading open; ignore the token entirely.
       parser_add_parse_error(parser, token);
       ignore_token(parser);
@@ -2681,7 +2975,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
     } else {
       generate_implied_end_tags(parser, GUMBO_TAG_LAST);
       const GumboNode* current_node = get_current_node(parser);
-      bool success = node_html_tag_is(current_node, token->v.end_tag);
+      bool success = node_html_tag_is(current_node, token->v.end_tag.tag);
       if (!success) {
         // There're children of the heading currently open; close them below and
         // record a parse error.
@@ -2691,9 +2985,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
       }
       do {
         current_node = pop_current_node(parser);
-      } while (!node_tag_in_set(
-                   current_node, (gumbo_tagset){TAG(H1), TAG(H2), TAG(H3),
-                                     TAG(H4), TAG(H5), TAG(H6)}));
+      } while (!node_tag_in_set(current_node, &heading_tags));
       return success;
     }
   } else if (tag_is(token, kStartTag, GUMBO_TAG_A)) {
@@ -2706,22 +2998,26 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
       adoption_agency_algorithm(parser, token, GUMBO_TAG_A);
       // The adoption agency algorithm usually removes all instances of <a>
       // from the list of active formatting elements, but in case it doesn't,
-      // we're supposed to do this.  (The conditions where it might not are
+      // we're supposed to do this. (The conditions where it might not are
       // listed in the spec.)
       if (find_last_anchor_index(parser, &last_a)) {
-        void* last_element = gumbo_vector_remove_at(
-            parser, last_a, &state->_active_formatting_elements);
-        gumbo_vector_remove(parser, last_element, &state->_open_elements);
+        void* last_element = gumbo_vector_remove_at (
+          last_a,
+          &state->_active_formatting_elements
+        );
+        gumbo_vector_remove(last_element, &state->_open_elements);
       }
       success = false;
     }
     reconstruct_active_formatting_elements(parser);
     add_formatting_element(parser, insert_element_from_token(parser, token));
     return success;
-  } else if (tag_in(token, kStartTag,
-                 (gumbo_tagset){TAG(B), TAG(BIG), TAG(CODE), TAG(EM), TAG(FONT),
-                     TAG(I), TAG(S), TAG(SMALL), TAG(STRIKE), TAG(STRONG),
-                     TAG(TT), TAG(U)})) {
+  } else if (
+    tag_in(token, kStartTag, &(const TagSet) {
+      TAG(B), TAG(BIG), TAG(CODE), TAG(EM), TAG(FONT), TAG(I), TAG(S),
+      TAG(SMALL), TAG(STRIKE), TAG(STRONG), TAG(TT), TAG(U)
+    })
+  ) {
     reconstruct_active_formatting_elements(parser);
     add_formatting_element(parser, insert_element_from_token(parser, token));
     return true;
@@ -2737,21 +3033,26 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
     insert_element_from_token(parser, token);
     add_formatting_element(parser, get_current_node(parser));
     return result;
-  } else if (tag_in(token, kEndTag,
-                 (gumbo_tagset){TAG(A), TAG(B), TAG(BIG), TAG(CODE), TAG(EM),
-                     TAG(FONT), TAG(I), TAG(NOBR), TAG(S), TAG(SMALL),
-                     TAG(STRIKE), TAG(STRONG), TAG(TT), TAG(U)})) {
-    return adoption_agency_algorithm(parser, token, token->v.end_tag);
-  } else if (tag_in(token, kStartTag,
-                 (gumbo_tagset){TAG(APPLET), TAG(MARQUEE), TAG(OBJECT)})) {
+  } else if (
+    tag_in(token, kEndTag, &(const TagSet) {
+      TAG(A), TAG(B), TAG(BIG), TAG(CODE), TAG(EM), TAG(FONT), TAG(I),
+      TAG(NOBR), TAG(S), TAG(SMALL), TAG(STRIKE), TAG(STRONG), TAG(TT),
+      TAG(U)
+    })
+  ) {
+    return adoption_agency_algorithm(parser, token, token->v.end_tag.tag);
+  } else if (
+    tag_in(token, kStartTag, &(const TagSet){TAG(APPLET), TAG(MARQUEE), TAG(OBJECT)})
+  ) {
     reconstruct_active_formatting_elements(parser);
     insert_element_from_token(parser, token);
     add_formatting_element(parser, &kActiveFormattingScopeMarker);
     set_frameset_not_ok(parser);
     return true;
-  } else if (tag_in(token, kEndTag,
-                 (gumbo_tagset){TAG(APPLET), TAG(MARQUEE), TAG(OBJECT)})) {
-    GumboTag token_tag = token->v.end_tag;
+  } else if (
+    tag_in(token, kEndTag, &(const TagSet){TAG(APPLET), TAG(MARQUEE), TAG(OBJECT)})
+  ) {
+    GumboTag token_tag = token->v.end_tag.tag;
     if (!has_an_element_in_table_scope(parser, token_tag)) {
       parser_add_parse_error(parser, token);
       ignore_token(parser);
@@ -2761,17 +3062,22 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
     clear_active_formatting_elements(parser);
     return true;
   } else if (tag_is(token, kStartTag, GUMBO_TAG_TABLE)) {
-    if (get_document_node(parser)->v.document.doc_type_quirks_mode !=
-        GUMBO_DOCTYPE_QUIRKS) {
+    if (
+      get_document_node(parser)->v.document.doc_type_quirks_mode
+        != GUMBO_DOCTYPE_QUIRKS
+    ) {
       maybe_implicitly_close_p_tag(parser, token);
     }
     insert_element_from_token(parser, token);
     set_frameset_not_ok(parser);
     set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
     return true;
-  } else if (tag_in(token, kStartTag,
-                 (gumbo_tagset){TAG(AREA), TAG(BR), TAG(EMBED), TAG(IMG),
-                     TAG(IMAGE), TAG(KEYGEN), TAG(WBR)})) {
+  } else if (
+    tag_in(token, kStartTag, &(const TagSet) {
+      TAG(AREA), TAG(BR), TAG(EMBED), TAG(IMG), TAG(IMAGE), TAG(KEYGEN),
+      TAG(WBR)
+    })
+  ) {
     bool success = true;
     if (tag_is(token, kStartTag, GUMBO_TAG_IMAGE)) {
       success = false;
@@ -2801,8 +3107,9 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
     pop_current_node(parser);
     acknowledge_self_closing_tag(parser);
     return true;
-  } else if (tag_in(token, kStartTag,
-                 (gumbo_tagset){TAG(PARAM), TAG(SOURCE), TAG(TRACK)})) {
+  } else if (
+    tag_in(token, kStartTag, &(const TagSet){TAG(PARAM), TAG(SOURCE), TAG(TRACK)})
+  ) {
     insert_element_from_token(parser, token);
     pop_current_node(parser);
     acknowledge_self_closing_tag(parser);
@@ -2814,101 +3121,6 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
     acknowledge_self_closing_tag(parser);
     set_frameset_not_ok(parser);
     return result;
-  } else if (tag_is(token, kStartTag, GUMBO_TAG_ISINDEX)) {
-    parser_add_parse_error(parser, token);
-    if (parser->_parser_state->_form_element != NULL &&
-        !has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
-      ignore_token(parser);
-      return false;
-    }
-    acknowledge_self_closing_tag(parser);
-    maybe_implicitly_close_p_tag(parser, token);
-    set_frameset_not_ok(parser);
-
-    GumboVector* token_attrs = &token->v.start_tag.attributes;
-    GumboAttribute* prompt_attr = gumbo_get_attribute(token_attrs, "prompt");
-    GumboAttribute* action_attr = gumbo_get_attribute(token_attrs, "action");
-    GumboAttribute* name_attr = gumbo_get_attribute(token_attrs, "name");
-
-    GumboNode* form = insert_element_of_tag_type(
-        parser, GUMBO_TAG_FORM, GUMBO_INSERTION_FROM_ISINDEX);
-    if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
-      parser->_parser_state->_form_element = form;
-    }
-    if (action_attr) {
-      gumbo_vector_add(parser, action_attr, &form->v.element.attributes);
-    }
-    insert_element_of_tag_type(
-        parser, GUMBO_TAG_HR, GUMBO_INSERTION_FROM_ISINDEX);
-    pop_current_node(parser);  // <hr>
-
-    insert_element_of_tag_type(
-        parser, GUMBO_TAG_LABEL, GUMBO_INSERTION_FROM_ISINDEX);
-    TextNodeBufferState* text_state = &parser->_parser_state->_text_node;
-    text_state->_start_original_text = token->original_text.data;
-    text_state->_start_position = token->position;
-    text_state->_type = GUMBO_NODE_TEXT;
-    if (prompt_attr) {
-      int prompt_attr_length = strlen(prompt_attr->value);
-      gumbo_string_buffer_destroy(parser, &text_state->_buffer);
-      text_state->_buffer.data = gumbo_copy_stringz(parser, prompt_attr->value);
-      text_state->_buffer.length = prompt_attr_length;
-      text_state->_buffer.capacity = prompt_attr_length + 1;
-      gumbo_destroy_attribute(parser, prompt_attr);
-    } else {
-      GumboStringPiece prompt_text =
-          GUMBO_STRING("This is a searchable index. Enter search keywords: ");
-      gumbo_string_buffer_append_string(
-          parser, &prompt_text, &text_state->_buffer);
-    }
-
-    GumboNode* input = insert_element_of_tag_type(
-        parser, GUMBO_TAG_INPUT, GUMBO_INSERTION_FROM_ISINDEX);
-    for (unsigned int i = 0; i < token_attrs->length; ++i) {
-      GumboAttribute* attr = token_attrs->data[i];
-      if (attr != prompt_attr && attr != action_attr && attr != name_attr) {
-        gumbo_vector_add(parser, attr, &input->v.element.attributes);
-      }
-      token_attrs->data[i] = NULL;
-    }
-
-    // All attributes have been successfully transferred and nulled out at this
-    // point, so the call to ignore_token will free the memory for it without
-    // touching the attributes.
-    ignore_token(parser);
-
-    // The name attribute, if present, should be destroyed since it's ignored
-    // when copying over.  The action attribute should be kept since it's moved
-    // to the form.
-    if (name_attr) {
-      gumbo_destroy_attribute(parser, name_attr);
-    }
-
-    GumboAttribute* name =
-        gumbo_parser_allocate(parser, sizeof(GumboAttribute));
-    GumboStringPiece name_str = GUMBO_STRING("name");
-    GumboStringPiece isindex_str = GUMBO_STRING("isindex");
-    name->attr_namespace = GUMBO_ATTR_NAMESPACE_NONE;
-    name->name = gumbo_copy_stringz(parser, "name");
-    name->value = gumbo_copy_stringz(parser, "isindex");
-    name->original_name = name_str;
-    name->original_value = isindex_str;
-    name->name_start = kGumboEmptySourcePosition;
-    name->name_end = kGumboEmptySourcePosition;
-    name->value_start = kGumboEmptySourcePosition;
-    name->value_end = kGumboEmptySourcePosition;
-    gumbo_vector_add(parser, name, &input->v.element.attributes);
-
-    pop_current_node(parser);  // <input>
-    pop_current_node(parser);  // <label>
-    insert_element_of_tag_type(
-        parser, GUMBO_TAG_HR, GUMBO_INSERTION_FROM_ISINDEX);
-    pop_current_node(parser);  // <hr>
-    pop_current_node(parser);  // <form>
-    if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
-      parser->_parser_state->_form_element = NULL;
-    }
-    return false;
   } else if (tag_is(token, kStartTag, GUMBO_TAG_TEXTAREA)) {
     run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RCDATA);
     parser->_parser_state->_ignore_next_linefeed = true;
@@ -2932,37 +3144,45 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
     insert_element_from_token(parser, token);
     set_frameset_not_ok(parser);
     GumboInsertionMode state = parser->_parser_state->_insertion_mode;
-    if (state == GUMBO_INSERTION_MODE_IN_TABLE ||
-        state == GUMBO_INSERTION_MODE_IN_CAPTION ||
-        state == GUMBO_INSERTION_MODE_IN_TABLE_BODY ||
-        state == GUMBO_INSERTION_MODE_IN_ROW ||
-        state == GUMBO_INSERTION_MODE_IN_CELL) {
+    if (
+      state == GUMBO_INSERTION_MODE_IN_TABLE
+      || state == GUMBO_INSERTION_MODE_IN_CAPTION
+      || state == GUMBO_INSERTION_MODE_IN_TABLE_BODY
+      || state == GUMBO_INSERTION_MODE_IN_ROW
+      || state == GUMBO_INSERTION_MODE_IN_CELL
+    ) {
       set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_SELECT_IN_TABLE);
     } else {
       set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_SELECT);
     }
     return true;
-  } else if (tag_in(token, kStartTag,
-                 (gumbo_tagset){TAG(OPTION), TAG(OPTGROUP)})) {
+  } else if (
+    tag_in(token, kStartTag, &(const TagSet){TAG(OPTION), TAG(OPTGROUP)})
+  ) {
     if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
       pop_current_node(parser);
     }
     reconstruct_active_formatting_elements(parser);
     insert_element_from_token(parser, token);
     return true;
-  } else if (tag_in(token, kStartTag,
-                 (gumbo_tagset){TAG(RB), TAG(RP), TAG(RT), TAG(RTC)})) {
+  } else if (
+    tag_in(token, kStartTag, &(const TagSet){TAG(RB), TAG(RP), TAG(RT), TAG(RTC)})
+  ) {
     bool success = true;
-    GumboTag exception =
-        tag_in(token, kStartTag, (gumbo_tagset){TAG(RT), TAG(RP)})
-            ? GUMBO_TAG_RTC
-            : GUMBO_TAG_LAST;
+    GumboTag exception = tag_in(token, kStartTag, &(const TagSet){TAG(RT), TAG(RP)})
+      ? GUMBO_TAG_RTC
+      : GUMBO_TAG_LAST
+    ;
     if (has_an_element_in_scope(parser, GUMBO_TAG_RUBY)) {
       generate_implied_end_tags(parser, exception);
     }
-    if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_RUBY) &&
-        !(exception == GUMBO_TAG_LAST ||
-            node_html_tag_is(get_current_node(parser), GUMBO_TAG_RTC))) {
+    if (
+      !node_html_tag_is(get_current_node(parser), GUMBO_TAG_RUBY)
+      && !(
+        exception == GUMBO_TAG_LAST
+        || node_html_tag_is(get_current_node(parser), GUMBO_TAG_RTC)
+      )
+    ) {
       parser_add_parse_error(parser, token);
       success = false;
     }
@@ -2971,14 +3191,17 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
   } else if (tag_is(token, kEndTag, GUMBO_TAG_BR)) {
     parser_add_parse_error(parser, token);
     reconstruct_active_formatting_elements(parser);
-    insert_element_of_tag_type(
-        parser, GUMBO_TAG_BR, GUMBO_INSERTION_CONVERTED_FROM_END_TAG);
+    insert_element_of_tag_type (
+      parser,
+      GUMBO_TAG_BR,
+      GUMBO_INSERTION_CONVERTED_FROM_END_TAG
+    );
     pop_current_node(parser);
     return false;
   } else if (tag_is(token, kStartTag, GUMBO_TAG_MATH)) {
     reconstruct_active_formatting_elements(parser);
-    adjust_mathml_attributes(parser, token);
-    adjust_foreign_attributes(parser, token);
+    adjust_mathml_attributes(token);
+    adjust_foreign_attributes(token);
     insert_foreign_element(parser, token, GUMBO_NAMESPACE_MATHML);
     if (token->v.start_tag.is_self_closing) {
       pop_current_node(parser);
@@ -2987,18 +3210,20 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
     return true;
   } else if (tag_is(token, kStartTag, GUMBO_TAG_SVG)) {
     reconstruct_active_formatting_elements(parser);
-    adjust_svg_attributes(parser, token);
-    adjust_foreign_attributes(parser, token);
+    adjust_svg_attributes(token);
+    adjust_foreign_attributes(token);
     insert_foreign_element(parser, token, GUMBO_NAMESPACE_SVG);
     if (token->v.start_tag.is_self_closing) {
       pop_current_node(parser);
       acknowledge_self_closing_tag(parser);
     }
     return true;
-  } else if (tag_in(token, kStartTag,
-                 (gumbo_tagset){TAG(CAPTION), TAG(COL), TAG(COLGROUP),
-                     TAG(FRAME), TAG(HEAD), TAG(TBODY), TAG(TD), TAG(TFOOT),
-                     TAG(TH), TAG(THEAD), TAG(TR)})) {
+  } else if (
+    tag_in(token, kStartTag, &(const TagSet) {
+      TAG(CAPTION), TAG(COL), TAG(COLGROUP), TAG(FRAME), TAG(HEAD),
+      TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR)
+    })
+  ) {
     parser_add_parse_error(parser, token);
     ignore_token(parser);
     return false;
@@ -3008,22 +3233,26 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
     return true;
   } else {
     assert(token->type == GUMBO_TOKEN_END_TAG);
-    GumboTag end_tag = token->v.end_tag;
+    GumboTag end_tag = token->v.end_tag.tag;
     assert(state->_open_elements.length > 0);
     assert(node_html_tag_is(state->_open_elements.data[0], GUMBO_TAG_HTML));
     // Walk up the stack of open elements until we find one that either:
     // a) Matches the tag name we saw
     // b) Is in the "special" category.
-    // If we see a), implicitly close everything up to and including it.  If we
+    // If we see a), implicitly close everything up to and including it. If we
     // see b), then record a parse error, don't close anything (except the
     // implied end tags) and ignore the end tag token.
     for (int i = state->_open_elements.length; --i >= 0;) {
       const GumboNode* node = state->_open_elements.data[i];
+      // XXX(sfc): This doesn't work for something like <body><foo></bar>
+      // since foo and bar have the same tag of GUMBO_TAG_UNKNOWN
       if (node_html_tag_is(node, end_tag)) {
         generate_implied_end_tags(parser, end_tag);
         // TODO(jdtang): Do I need to add a parse error here?  The condition in
         // the spec seems like it's the inverse of the loop condition above, and
         // so would never fire.
+	// XXX(sfc): Yes, an error is needed here
+	// I think <div> is an example.
         while (node != pop_current_node(parser))
           ;  // Pop everything.
         return true;
@@ -3039,15 +3268,17 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-incdata
+// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-incdata
 static bool handle_text(GumboParser* parser, GumboToken* token) {
-  if (token->type == GUMBO_TOKEN_CHARACTER ||
-      token->type == GUMBO_TOKEN_WHITESPACE) {
+  if (
+    token->type == GUMBO_TOKEN_CHARACTER
+    || token->type == GUMBO_TOKEN_WHITESPACE
+  ) {
     insert_text_token(parser, token);
   } else {
     // We provide only bare-bones script handling that doesn't involve any of
     // the parser-pause/already-started/script-nesting flags or re-entrant
-    // invocations of the tokenizer.  Because the intended usage of this library
+    // invocations of the tokenizer. Because the intended usage of this library
     // is mostly for templating, refactoring, and static-analysis libraries, we
     // provide the script body as a text-node child of the <script> element.
     // This behavior doesn't support document.write of partial HTML elements,
@@ -3062,13 +3293,15 @@ static bool handle_text(GumboParser* parser, GumboToken* token) {
   return true;
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-intable
+// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-intable
 static bool handle_in_table(GumboParser* parser, GumboToken* token) {
   GumboParserState* state = parser->_parser_state;
-  if (token->type == GUMBO_TOKEN_CHARACTER ||
-      token->type == GUMBO_TOKEN_WHITESPACE) {
+  if (
+    token->type == GUMBO_TOKEN_CHARACTER
+    || token->type == GUMBO_TOKEN_WHITESPACE
+  ) {
     // The "pending table character tokens" list described in the spec is
-    // nothing more than the TextNodeBufferState.  We accumulate text tokens as
+    // nothing more than the TextNodeBufferState. We accumulate text tokens as
     // normal, except that when we go to flush them in the handle_in_table_text,
     // we set _foster_parent_insertions if there're non-whitespace characters in
     // the buffer.
@@ -3097,19 +3330,27 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
     return true;
   } else if (tag_is(token, kStartTag, GUMBO_TAG_COL)) {
     clear_stack_to_table_context(parser);
-    insert_element_of_tag_type(
-        parser, GUMBO_TAG_COLGROUP, GUMBO_INSERTION_IMPLIED);
+    insert_element_of_tag_type (
+      parser,
+      GUMBO_TAG_COLGROUP,
+      GUMBO_INSERTION_IMPLIED
+    );
     parser->_parser_state->_reprocess_current_token = true;
     set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_COLUMN_GROUP);
     return true;
-  } else if (tag_in(token, kStartTag,
-                 (gumbo_tagset){TAG(TBODY), TAG(TFOOT), TAG(THEAD), TAG(TD),
-                     TAG(TH), TAG(TR)})) {
+  } else if (
+    tag_in(token, kStartTag, &(const TagSet) {
+      TAG(TBODY), TAG(TFOOT), TAG(THEAD), TAG(TD), TAG(TH), TAG(TR)
+    })
+  ) {
     clear_stack_to_table_context(parser);
     set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
-    if (tag_in(token, kStartTag, (gumbo_tagset){TAG(TD), TAG(TH), TAG(TR)})) {
-      insert_element_of_tag_type(
-          parser, GUMBO_TAG_TBODY, GUMBO_INSERTION_IMPLIED);
+    if (tag_in(token, kStartTag, &(const TagSet){TAG(TD), TAG(TH), TAG(TR)})) {
+      insert_element_of_tag_type (
+        parser,
+        GUMBO_TAG_TBODY,
+        GUMBO_INSERTION_IMPLIED
+      );
       state->_reprocess_current_token = true;
     } else {
       insert_element_from_token(parser, token);
@@ -3129,20 +3370,24 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
       return false;
     }
     return true;
-  } else if (tag_in(token, kEndTag,
-                 (gumbo_tagset){TAG(BODY), TAG(CAPTION), TAG(COL),
-                     TAG(COLGROUP), TAG(HTML), TAG(TBODY), TAG(TD), TAG(TFOOT),
-                     TAG(TH), TAG(THEAD), TAG(TR)})) {
+  } else if (
+    tag_in(token, kEndTag, &(const TagSet) {
+      TAG(BODY), TAG(CAPTION), TAG(COL), TAG(COLGROUP), TAG(HTML),
+      TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR)
+    })
+  ) {
     parser_add_parse_error(parser, token);
     ignore_token(parser);
     return false;
-  } else if (tag_in(token, kStartTag,
-                 (gumbo_tagset){TAG(STYLE), TAG(SCRIPT), TAG(TEMPLATE)}) ||
-             (tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE))) {
+  } else if (
+    tag_in(token, kStartTag, &(const TagSet){TAG(STYLE), TAG(SCRIPT), TAG(TEMPLATE)})
+    || (tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE))
+  ) {
     return handle_in_head(parser, token);
-  } else if (tag_is(token, kStartTag, GUMBO_TAG_INPUT) &&
-             attribute_matches(
-                 &token->v.start_tag.attributes, "type", "hidden")) {
+  } else if (
+    tag_is(token, kStartTag, GUMBO_TAG_INPUT)
+    && attribute_matches(&token->v.start_tag.attributes, "type", "hidden")
+  ) {
     parser_add_parse_error(parser, token);
     insert_element_from_token(parser, token);
     pop_current_node(parser);
@@ -3167,32 +3412,40 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-intabletext
+// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-intabletext
 static bool handle_in_table_text(GumboParser* parser, GumboToken* token) {
   if (token->type == GUMBO_TOKEN_NULL) {
     parser_add_parse_error(parser, token);
     ignore_token(parser);
     return false;
-  } else if (token->type == GUMBO_TOKEN_CHARACTER ||
-             token->type == GUMBO_TOKEN_WHITESPACE) {
+  } else if (
+    token->type == GUMBO_TOKEN_CHARACTER
+    || token->type == GUMBO_TOKEN_WHITESPACE
+  ) {
     insert_text_token(parser, token);
     return true;
   } else {
     GumboParserState* state = parser->_parser_state;
     GumboStringBuffer* buffer = &state->_text_node._buffer;
-    // Can't use strspn for this because GumboStringBuffers are not
-    // null-terminated.
-    // Note that TextNodeBuffer may contain UTF-8 characters, but the presence
-    // of any one byte that is not whitespace means we flip the flag, so this
-    // loop is still valid.
-    for (unsigned int i = 0; i < buffer->length; ++i) {
-      if (!isspace((unsigned char) buffer->data[i]) ||
-          buffer->data[i] == '\v') {
+    const char* data = buffer->data;
+    // Note that TextNodeBuffer may contain UTF-8 characters, but the
+    // presence of any one byte that is not whitespace means we flip
+    // the flag, so this loop is still valid.
+    for (size_t i = 0, n = buffer->length; i < n; ++i) {
+      switch (data[i]) {
+      case '\t':
+      case '\n':
+      case '\f':
+      case '\r':
+      case ' ':
+        continue;
+      default:
         state->_foster_parent_insertions = true;
         reconstruct_active_formatting_elements(parser);
-        break;
+        goto loopbreak;
       }
     }
+    loopbreak:
     maybe_flush_text_node_buffer(parser);
     state->_foster_parent_insertions = false;
     state->_reprocess_current_token = true;
@@ -3201,7 +3454,7 @@ static bool handle_in_table_text(GumboParser* parser, GumboToken* token) {
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-incaption
+// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-incaption
 static bool handle_in_caption(GumboParser* parser, GumboToken* token) {
   if (tag_is(token, kEndTag, GUMBO_TAG_CAPTION)) {
     if (!has_an_element_in_table_scope(parser, GUMBO_TAG_CAPTION)) {
@@ -3220,11 +3473,13 @@ static bool handle_in_caption(GumboParser* parser, GumboToken* token) {
       set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
       return result;
     }
-  } else if (tag_in(token, kStartTag,
-                 (gumbo_tagset){TAG(CAPTION), TAG(COL), TAG(COLGROUP),
-                     TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD),
-                     TAG(TR)}) ||
-             (tag_is(token, kEndTag, GUMBO_TAG_TABLE))) {
+  } else if (
+    tag_in(token, kStartTag, &(const TagSet) {
+      TAG(CAPTION), TAG(COL), TAG(COLGROUP), TAG(TBODY), TAG(TD),
+      TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR)
+    })
+    || (tag_is(token, kEndTag, GUMBO_TAG_TABLE))
+  ) {
     if (!has_an_element_in_table_scope(parser, GUMBO_TAG_CAPTION)) {
       parser_add_parse_error(parser, token);
       ignore_token(parser);
@@ -3236,10 +3491,12 @@ static bool handle_in_caption(GumboParser* parser, GumboToken* token) {
     set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
     parser->_parser_state->_reprocess_current_token = true;
     return true;
-  } else if (tag_in(token, kEndTag,
-                 (gumbo_tagset){TAG(BODY), TAG(COL), TAG(COLGROUP), TAG(HTML),
-                     TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD),
-                     TAG(TR)})) {
+  } else if (
+    tag_in(token, kEndTag, &(const TagSet) {
+      TAG(BODY), TAG(COL), TAG(COLGROUP), TAG(HTML), TAG(TBODY), TAG(TD),
+      TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR)
+    })
+  ) {
     parser_add_parse_error(parser, token);
     ignore_token(parser);
     return false;
@@ -3248,7 +3505,7 @@ static bool handle_in_caption(GumboParser* parser, GumboToken* token) {
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-incolgroup
+// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-incolgroup
 static bool handle_in_column_group(GumboParser* parser, GumboToken* token) {
   if (token->type == GUMBO_TOKEN_WHITESPACE) {
     insert_text_token(parser, token);
@@ -3280,8 +3537,10 @@ static bool handle_in_column_group(GumboParser* parser, GumboToken* token) {
     parser_add_parse_error(parser, token);
     ignore_token(parser);
     return false;
-  } else if (tag_is(token, kStartTag, GUMBO_TAG_TEMPLATE) ||
-             tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
+  } else if (
+    tag_is(token, kStartTag, GUMBO_TAG_TEMPLATE)
+    || tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)
+  ) {
     return handle_in_head(parser, token);
   } else if (token->type == GUMBO_TOKEN_EOF) {
     return handle_in_body(parser, token);
@@ -3298,23 +3557,24 @@ static bool handle_in_column_group(GumboParser* parser, GumboToken* token) {
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-intbody
+// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-intbody
 static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
   if (tag_is(token, kStartTag, GUMBO_TAG_TR)) {
     clear_stack_to_table_body_context(parser);
     insert_element_from_token(parser, token);
     set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
     return true;
-  } else if (tag_in(token, kStartTag, (gumbo_tagset){TAG(TD), TAG(TH)})) {
+  } else if (tag_in(token, kStartTag, &td_th_tags)) {
     parser_add_parse_error(parser, token);
     clear_stack_to_table_body_context(parser);
     insert_element_of_tag_type(parser, GUMBO_TAG_TR, GUMBO_INSERTION_IMPLIED);
     parser->_parser_state->_reprocess_current_token = true;
     set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
     return false;
-  } else if (tag_in(token, kEndTag,
-                 (gumbo_tagset){TAG(TBODY), TAG(TFOOT), TAG(THEAD)})) {
-    if (!has_an_element_in_table_scope(parser, token->v.end_tag)) {
+  } else if (
+    tag_in(token, kEndTag, &(const TagSet){TAG(TBODY), TAG(TFOOT), TAG(THEAD)})
+  ) {
+    if (!has_an_element_in_table_scope(parser, token->v.end_tag.tag)) {
       parser_add_parse_error(parser, token);
       ignore_token(parser);
       return false;
@@ -3323,13 +3583,20 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
     pop_current_node(parser);
     set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
     return true;
-  } else if (tag_in(token, kStartTag,
-                 (gumbo_tagset){TAG(CAPTION), TAG(COL), TAG(COLGROUP),
-                     TAG(TBODY), TAG(TFOOT), TAG(THEAD)}) ||
-             tag_is(token, kEndTag, GUMBO_TAG_TABLE)) {
-    if (!(has_an_element_in_table_scope(parser, GUMBO_TAG_TBODY) ||
-            has_an_element_in_table_scope(parser, GUMBO_TAG_THEAD) ||
-            has_an_element_in_table_scope(parser, GUMBO_TAG_TFOOT))) {
+  } else if (
+    tag_in(token, kStartTag, &(const TagSet) {
+      TAG(CAPTION), TAG(COL), TAG(COLGROUP), TAG(TBODY), TAG(TFOOT),
+      TAG(THEAD)
+    })
+    || tag_is(token, kEndTag, GUMBO_TAG_TABLE)
+  ) {
+    if (
+      !(
+        has_an_element_in_table_scope(parser, GUMBO_TAG_TBODY)
+        || has_an_element_in_table_scope(parser, GUMBO_TAG_THEAD)
+        || has_an_element_in_table_scope(parser, GUMBO_TAG_TFOOT)
+      )
+    ) {
       parser_add_parse_error(parser, token);
       ignore_token(parser);
       return false;
@@ -3339,9 +3606,12 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
     set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
     parser->_parser_state->_reprocess_current_token = true;
     return true;
-  } else if (tag_in(token, kEndTag,
-                 (gumbo_tagset){TAG(BODY), TAG(CAPTION), TAG(COL), TAG(TR),
-                     TAG(COLGROUP), TAG(HTML), TAG(TD), TAG(TH)})) {
+  } else if (
+    tag_in(token, kEndTag, &(const TagSet) {
+      TAG(BODY), TAG(CAPTION), TAG(COL), TAG(TR), TAG(COLGROUP),
+      TAG(HTML), TAG(TD), TAG(TH)
+    })
+  ) {
     parser_add_parse_error(parser, token);
     ignore_token(parser);
     return false;
@@ -3350,9 +3620,9 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-intr
+// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-intr
 static bool handle_in_row(GumboParser* parser, GumboToken* token) {
-  if (tag_in(token, kStartTag, (gumbo_tagset){TAG(TH), TAG(TD)})) {
+  if (tag_in(token, kStartTag, &td_th_tags)) {
     clear_stack_to_table_row_context(parser);
     insert_element_from_token(parser, token);
     set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_CELL);
@@ -3369,10 +3639,13 @@ static bool handle_in_row(GumboParser* parser, GumboToken* token) {
       set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
       return true;
     }
-  } else if (tag_in(token, kStartTag,
-                 (gumbo_tagset){TAG(CAPTION), TAG(COL), TAG(COLGROUP),
-                     TAG(TBODY), TAG(TFOOT), TAG(THEAD), TAG(TR)}) ||
-             tag_is(token, kEndTag, GUMBO_TAG_TABLE)) {
+  } else if (
+    tag_in(token, kStartTag, &(const TagSet) {
+      TAG(CAPTION), TAG(COL), TAG(COLGROUP), TAG(TBODY), TAG(TFOOT),
+      TAG(THEAD), TAG(TR)
+    })
+    || tag_is(token, kEndTag, GUMBO_TAG_TABLE)
+  ) {
     if (!has_an_element_in_table_scope(parser, GUMBO_TAG_TR)) {
       parser_add_parse_error(parser, token);
       ignore_token(parser);
@@ -3384,10 +3657,13 @@ static bool handle_in_row(GumboParser* parser, GumboToken* token) {
       parser->_parser_state->_reprocess_current_token = true;
       return true;
     }
-  } else if (tag_in(token, kEndTag,
-                 (gumbo_tagset){TAG(TBODY), TAG(TFOOT), TAG(THEAD)})) {
-    if (!has_an_element_in_table_scope(parser, token->v.end_tag) ||
-        (!has_an_element_in_table_scope(parser, GUMBO_TAG_TR))) {
+  } else if (
+    tag_in(token, kEndTag, &(const TagSet) {TAG(TBODY), TAG(TFOOT), TAG(THEAD)})
+  ) {
+    if (
+      !has_an_element_in_table_scope(parser, token->v.end_tag.tag)
+      || !has_an_element_in_table_scope(parser, GUMBO_TAG_TR)
+    ) {
       parser_add_parse_error(parser, token);
       ignore_token(parser);
       return false;
@@ -3398,9 +3674,12 @@ static bool handle_in_row(GumboParser* parser, GumboToken* token) {
       parser->_parser_state->_reprocess_current_token = true;
       return true;
     }
-  } else if (tag_in(token, kEndTag,
-                 (gumbo_tagset){TAG(BODY), TAG(CAPTION), TAG(COL),
-                     TAG(COLGROUP), TAG(HTML), TAG(TD), TAG(TH)})) {
+  } else if (
+    tag_in(token, kEndTag, &(const TagSet) {
+      TAG(BODY), TAG(CAPTION), TAG(COL), TAG(COLGROUP), TAG(HTML),
+      TAG(TD), TAG(TH)
+    })
+  ) {
     parser_add_parse_error(parser, token);
     ignore_token(parser);
     return false;
@@ -3409,23 +3688,27 @@ static bool handle_in_row(GumboParser* parser, GumboToken* token) {
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-intd
+// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-intd
 static bool handle_in_cell(GumboParser* parser, GumboToken* token) {
-  if (tag_in(token, kEndTag, (gumbo_tagset){TAG(TD), TAG(TH)})) {
-    GumboTag token_tag = token->v.end_tag;
+  if (tag_in(token, kEndTag, &td_th_tags)) {
+    GumboTag token_tag = token->v.end_tag.tag;
     if (!has_an_element_in_table_scope(parser, token_tag)) {
       parser_add_parse_error(parser, token);
       ignore_token(parser);
       return false;
     }
     return close_table_cell(parser, token, token_tag);
-  } else if (tag_in(token, kStartTag,
-                 (gumbo_tagset){TAG(CAPTION), TAG(COL), TAG(COLGROUP),
-                     TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD),
-                     TAG(TR)})) {
+  } else if (
+    tag_in(token, kStartTag, &(const TagSet) {
+      TAG(CAPTION), TAG(COL), TAG(COLGROUP), TAG(TBODY), TAG(TD),
+      TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR)
+    })
+  ) {
     gumbo_debug("Handling <td> in cell.\n");
-    if (!has_an_element_in_table_scope(parser, GUMBO_TAG_TH) &&
-        !has_an_element_in_table_scope(parser, GUMBO_TAG_TD)) {
+    if (
+      !has_an_element_in_table_scope(parser, GUMBO_TAG_TH)
+      && !has_an_element_in_table_scope(parser, GUMBO_TAG_TD)
+    ) {
       gumbo_debug("Bailing out because there's no <td> or <th> in scope.\n");
       parser_add_parse_error(parser, token);
       ignore_token(parser);
@@ -3433,14 +3716,20 @@ static bool handle_in_cell(GumboParser* parser, GumboToken* token) {
     }
     parser->_parser_state->_reprocess_current_token = true;
     return close_current_cell(parser, token);
-  } else if (tag_in(token, kEndTag, (gumbo_tagset){TAG(BODY), TAG(CAPTION),
-                                        TAG(COL), TAG(COLGROUP), TAG(HTML)})) {
+  } else if (
+    tag_in(token, kEndTag, &(const TagSet) {
+      TAG(BODY), TAG(CAPTION), TAG(COL), TAG(COLGROUP), TAG(HTML)
+    })
+  ) {
     parser_add_parse_error(parser, token);
     ignore_token(parser);
     return false;
-  } else if (tag_in(token, kEndTag, (gumbo_tagset){TAG(TABLE), TAG(TBODY),
-                                        TAG(TFOOT), TAG(THEAD), TAG(TR)})) {
-    if (!has_an_element_in_table_scope(parser, token->v.end_tag)) {
+  } else if (
+    tag_in(token, kEndTag, &(const TagSet) {
+      TAG(TABLE), TAG(TBODY), TAG(TFOOT), TAG(THEAD), TAG(TR)
+    })
+  ) {
+    if (!has_an_element_in_table_scope(parser, token->v.end_tag.tag)) {
       parser_add_parse_error(parser, token);
       ignore_token(parser);
       return false;
@@ -3452,14 +3741,16 @@ static bool handle_in_cell(GumboParser* parser, GumboToken* token) {
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-inselect
+// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inselect
 static bool handle_in_select(GumboParser* parser, GumboToken* token) {
   if (token->type == GUMBO_TOKEN_NULL) {
     parser_add_parse_error(parser, token);
     ignore_token(parser);
     return false;
-  } else if (token->type == GUMBO_TOKEN_CHARACTER ||
-             token->type == GUMBO_TOKEN_WHITESPACE) {
+  } else if (
+    token->type == GUMBO_TOKEN_CHARACTER
+    || token->type == GUMBO_TOKEN_WHITESPACE
+  ) {
     insert_text_token(parser, token);
     return true;
   } else if (token->type == GUMBO_TOKEN_DOCTYPE) {
@@ -3488,9 +3779,13 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
     return true;
   } else if (tag_is(token, kEndTag, GUMBO_TAG_OPTGROUP)) {
     GumboVector* open_elements = &parser->_parser_state->_open_elements;
-    if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION) &&
-        node_html_tag_is(open_elements->data[open_elements->length - 2],
-            GUMBO_TAG_OPTGROUP)) {
+    if (
+      node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)
+      && node_html_tag_is (
+        open_elements->data[open_elements->length - 2],
+        GUMBO_TAG_OPTGROUP
+      )
+    ) {
       pop_current_node(parser);
     }
     if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTGROUP)) {
@@ -3525,8 +3820,9 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
       close_current_select(parser);
     }
     return false;
-  } else if (tag_in(token, kStartTag,
-                 (gumbo_tagset){TAG(INPUT), TAG(KEYGEN), TAG(TEXTAREA)})) {
+  } else if (
+    tag_in(token, kStartTag, &(const TagSet) {TAG(INPUT), TAG(KEYGEN), TAG(TEXTAREA)})
+  ) {
     parser_add_parse_error(parser, token);
     if (!has_an_element_in_select_scope(parser, GUMBO_TAG_SELECT)) {
       ignore_token(parser);
@@ -3535,9 +3831,10 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
       parser->_parser_state->_reprocess_current_token = true;
     }
     return false;
-  } else if (tag_in(token, kStartTag,
-                 (gumbo_tagset){TAG(SCRIPT), TAG(TEMPLATE)}) ||
-             tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
+  } else if (
+    tag_in(token, kStartTag, &(const TagSet){TAG(SCRIPT), TAG(TEMPLATE)})
+    || tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)
+  ) {
     return handle_in_head(parser, token);
   } else if (token->type == GUMBO_TOKEN_EOF) {
     return handle_in_body(parser, token);
@@ -3548,20 +3845,20 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-inselectintable
+// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inselectintable
 static bool handle_in_select_in_table(GumboParser* parser, GumboToken* token) {
-  if (tag_in(token, kStartTag,
-          (gumbo_tagset){TAG(CAPTION), TAG(TABLE), TAG(TBODY), TAG(TFOOT),
-              TAG(THEAD), TAG(TR), TAG(TD), TAG(TH)})) {
+  static const TagSet tags = {
+    TAG(CAPTION), TAG(TABLE), TAG(TBODY), TAG(TFOOT), TAG(THEAD),
+    TAG(TR), TAG(TD), TAG(TH)
+  };
+  if (tag_in(token, kStartTag, &tags)) {
     parser_add_parse_error(parser, token);
     close_current_select(parser);
     parser->_parser_state->_reprocess_current_token = true;
     return false;
-  } else if (tag_in(token, kEndTag,
-                 (gumbo_tagset){TAG(CAPTION), TAG(TABLE), TAG(TBODY),
-                     TAG(TFOOT), TAG(THEAD), TAG(TR), TAG(TD), TAG(TH)})) {
+  } else if (tag_in(token, kEndTag, &tags)) {
     parser_add_parse_error(parser, token);
-    if (!has_an_element_in_table_scope(parser, token->v.end_tag)) {
+    if (!has_an_element_in_table_scope(parser, token->v.end_tag.tag)) {
       ignore_token(parser);
       return false;
     } else {
@@ -3577,23 +3874,32 @@ static bool handle_in_select_in_table(GumboParser* parser, GumboToken* token) {
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#parsing-main-intemplate
+// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-intemplate
 static bool handle_in_template(GumboParser* parser, GumboToken* token) {
   GumboParserState* state = parser->_parser_state;
-  if (token->type == GUMBO_TOKEN_WHITESPACE ||
-      token->type == GUMBO_TOKEN_CHARACTER ||
-      token->type == GUMBO_TOKEN_COMMENT || token->type == GUMBO_TOKEN_NULL ||
-      token->type == GUMBO_TOKEN_DOCTYPE) {
-    return handle_in_body(parser, token);
-  } else if (tag_in(token, kStartTag,
-                 (gumbo_tagset){TAG(BASE), TAG(BASEFONT), TAG(BGSOUND),
-                     TAG(LINK), TAG(META), TAG(NOFRAMES), TAG(SCRIPT),
-                     TAG(STYLE), TAG(TEMPLATE), TAG(TITLE)}) ||
-             tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
+  switch (token->type) {
+    case GUMBO_TOKEN_WHITESPACE:
+    case GUMBO_TOKEN_CHARACTER:
+    case GUMBO_TOKEN_COMMENT:
+    case GUMBO_TOKEN_NULL:
+    case GUMBO_TOKEN_DOCTYPE:
+      return handle_in_body(parser, token);
+    default:
+      break;
+  }
+  if (
+    tag_in(token, kStartTag, &(const TagSet) {
+      TAG(BASE), TAG(BASEFONT), TAG(BGSOUND), TAG(LINK), TAG(META),
+      TAG(NOFRAMES), TAG(SCRIPT), TAG(STYLE), TAG(TEMPLATE), TAG(TITLE)
+    })
+    || tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)
+  ) {
     return handle_in_head(parser, token);
-  } else if (tag_in(
-                 token, kStartTag, (gumbo_tagset){TAG(CAPTION), TAG(COLGROUP),
-                                       TAG(TBODY), TAG(TFOOT), TAG(THEAD)})) {
+  } else if (
+    tag_in(token, kStartTag, &(const TagSet) {
+      TAG(CAPTION), TAG(COLGROUP), TAG(TBODY), TAG(TFOOT), TAG(THEAD)
+    })
+  ) {
     pop_template_insertion_mode(parser);
     push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
     set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
@@ -3611,7 +3917,7 @@ static bool handle_in_template(GumboParser* parser, GumboToken* token) {
     set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
     state->_reprocess_current_token = true;
     return true;
-  } else if (tag_in(token, kStartTag, (gumbo_tagset){TAG(TD), TAG(TH)})) {
+  } else if (tag_in(token, kStartTag, &td_th_tags)) {
     pop_template_insertion_mode(parser);
     push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
     set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
@@ -3646,10 +3952,12 @@ static bool handle_in_template(GumboParser* parser, GumboToken* token) {
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-afterbody
+// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-afterbody
 static bool handle_after_body(GumboParser* parser, GumboToken* token) {
-  if (token->type == GUMBO_TOKEN_WHITESPACE ||
-      tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
+  if (
+    token->type == GUMBO_TOKEN_WHITESPACE
+    || tag_is(token, kStartTag, GUMBO_TAG_HTML)
+  ) {
     return handle_in_body(parser, token);
   } else if (token->type == GUMBO_TOKEN_COMMENT) {
     GumboNode* html_node = parser->_output->root;
@@ -3670,8 +3978,10 @@ static bool handle_after_body(GumboParser* parser, GumboToken* token) {
     set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_AFTER_BODY);
     GumboNode* html = parser->_parser_state->_open_elements.data[0];
     assert(node_html_tag_is(html, GUMBO_TAG_HTML));
-    record_end_of_element(
-        parser->_parser_state->_current_token, &html->v.element);
+    record_end_of_element (
+      parser->_parser_state->_current_token,
+      &html->v.element
+    );
     return true;
   } else if (token->type == GUMBO_TOKEN_EOF) {
     return true;
@@ -3683,7 +3993,7 @@ static bool handle_after_body(GumboParser* parser, GumboToken* token) {
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-inframeset
+// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inframeset
 static bool handle_in_frameset(GumboParser* parser, GumboToken* token) {
   if (token->type == GUMBO_TOKEN_WHITESPACE) {
     insert_text_token(parser, token);
@@ -3707,8 +4017,10 @@ static bool handle_in_frameset(GumboParser* parser, GumboToken* token) {
       return false;
     }
     pop_current_node(parser);
-    if (!is_fragment_parser(parser) &&
-        !node_html_tag_is(get_current_node(parser), GUMBO_TAG_FRAMESET)) {
+    if (
+      !is_fragment_parser(parser)
+      && !node_html_tag_is(get_current_node(parser), GUMBO_TAG_FRAMESET)
+    ) {
       set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_FRAMESET);
     }
     return true;
@@ -3732,7 +4044,7 @@ static bool handle_in_frameset(GumboParser* parser, GumboToken* token) {
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-afterframeset
+// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-afterframeset
 static bool handle_after_frameset(GumboParser* parser, GumboToken* token) {
   if (token->type == GUMBO_TOKEN_WHITESPACE) {
     insert_text_token(parser, token);
@@ -3749,8 +4061,10 @@ static bool handle_after_frameset(GumboParser* parser, GumboToken* token) {
   } else if (tag_is(token, kEndTag, GUMBO_TAG_HTML)) {
     GumboNode* html = parser->_parser_state->_open_elements.data[0];
     assert(node_html_tag_is(html, GUMBO_TAG_HTML));
-    record_end_of_element(
-        parser->_parser_state->_current_token, &html->v.element);
+    record_end_of_element (
+      parser->_parser_state->_current_token,
+      &html->v.element
+    );
     set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_AFTER_FRAMESET);
     return true;
   } else if (tag_is(token, kStartTag, GUMBO_TAG_NOFRAMES)) {
@@ -3764,14 +4078,16 @@ static bool handle_after_frameset(GumboParser* parser, GumboToken* token) {
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#the-after-after-body-insertion-mode
+// https://html.spec.whatwg.org/multipage/parsing.html#the-after-after-body-insertion-mode
 static bool handle_after_after_body(GumboParser* parser, GumboToken* token) {
   if (token->type == GUMBO_TOKEN_COMMENT) {
     append_comment_node(parser, get_document_node(parser), token);
     return true;
-  } else if (token->type == GUMBO_TOKEN_DOCTYPE ||
-             token->type == GUMBO_TOKEN_WHITESPACE ||
-             tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
+  } else if (
+    token->type == GUMBO_TOKEN_DOCTYPE
+    || token->type == GUMBO_TOKEN_WHITESPACE
+    || tag_is(token, kStartTag, GUMBO_TAG_HTML)
+  ) {
     return handle_in_body(parser, token);
   } else if (token->type == GUMBO_TOKEN_EOF) {
     return true;
@@ -3783,15 +4099,19 @@ static bool handle_after_after_body(GumboParser* parser, GumboToken* token) {
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#the-after-after-frameset-insertion-mode
-static bool handle_after_after_frameset(
-    GumboParser* parser, GumboToken* token) {
+// https://html.spec.whatwg.org/multipage/parsing.html#the-after-after-frameset-insertion-mode
+static bool handle_after_after_frameset (
+  GumboParser* parser,
+  GumboToken* token
+) {
   if (token->type == GUMBO_TOKEN_COMMENT) {
     append_comment_node(parser, get_document_node(parser), token);
     return true;
-  } else if (token->type == GUMBO_TOKEN_DOCTYPE ||
-             token->type == GUMBO_TOKEN_WHITESPACE ||
-             tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
+  } else if (
+    token->type == GUMBO_TOKEN_DOCTYPE
+    || token->type == GUMBO_TOKEN_WHITESPACE
+    || tag_is(token, kStartTag, GUMBO_TAG_HTML)
+  ) {
     return handle_in_body(parser, token);
   } else if (token->type == GUMBO_TOKEN_EOF) {
     return true;
@@ -3804,24 +4124,42 @@ static bool handle_after_after_frameset(
   }
 }
 
-// Function pointers for each insertion mode.  Keep in sync with
-// insertion_mode.h.
+// Function pointers for each insertion mode.
+// Keep in sync with insertion_mode.h.
 typedef bool (*TokenHandler)(GumboParser* parser, GumboToken* token);
-static const TokenHandler kTokenHandlers[] = {handle_initial,
-    handle_before_html, handle_before_head, handle_in_head,
-    handle_in_head_noscript, handle_after_head, handle_in_body, handle_text,
-    handle_in_table, handle_in_table_text, handle_in_caption,
-    handle_in_column_group, handle_in_table_body, handle_in_row, handle_in_cell,
-    handle_in_select, handle_in_select_in_table, handle_in_template,
-    handle_after_body, handle_in_frameset, handle_after_frameset,
-    handle_after_after_body, handle_after_after_frameset};
+static const TokenHandler kTokenHandlers[] = {
+  handle_initial,
+  handle_before_html,
+  handle_before_head,
+  handle_in_head,
+  handle_in_head_noscript,
+  handle_after_head,
+  handle_in_body,
+  handle_text,
+  handle_in_table,
+  handle_in_table_text,
+  handle_in_caption,
+  handle_in_column_group,
+  handle_in_table_body,
+  handle_in_row,
+  handle_in_cell,
+  handle_in_select,
+  handle_in_select_in_table,
+  handle_in_template,
+  handle_after_body,
+  handle_in_frameset,
+  handle_after_frameset,
+  handle_after_after_body,
+  handle_after_after_frameset
+};
 
 static bool handle_html_content(GumboParser* parser, GumboToken* token) {
-  return kTokenHandlers[(unsigned int) parser->_parser_state->_insertion_mode](
-      parser, token);
+  const GumboInsertionMode mode = parser->_parser_state->_insertion_mode;
+  const TokenHandler handler = kTokenHandlers[mode];
+  return handler(parser, token);
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-inforeign
+// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inforeign
 static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
   gumbo_debug("Handling foreign content");
   switch (token->type) {
@@ -3850,19 +4188,25 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
       break;
   }
   // Order matters for these clauses.
-  if (tag_in(token, kStartTag,
-          (gumbo_tagset){TAG(B), TAG(BIG), TAG(BLOCKQUOTE), TAG(BODY), TAG(BR),
-              TAG(CENTER), TAG(CODE), TAG(DD), TAG(DIV), TAG(DL), TAG(DT),
-              TAG(EM), TAG(EMBED), TAG(H1), TAG(H2), TAG(H3), TAG(H4), TAG(H5),
-              TAG(H6), TAG(HEAD), TAG(HR), TAG(I), TAG(IMG), TAG(LI),
-              TAG(LISTING), TAG(MENU), TAG(META), TAG(NOBR), TAG(OL), TAG(P),
-              TAG(PRE), TAG(RUBY), TAG(S), TAG(SMALL), TAG(SPAN), TAG(STRONG),
-              TAG(STRIKE), TAG(SUB), TAG(SUP), TAG(TABLE), TAG(TT), TAG(U),
-              TAG(UL), TAG(VAR)}) ||
-      (tag_is(token, kStartTag, GUMBO_TAG_FONT) &&
-          (token_has_attribute(token, "color") ||
-              token_has_attribute(token, "face") ||
-              token_has_attribute(token, "size")))) {
+  if (
+    tag_in(token, kStartTag, &(const TagSet) {
+      TAG(B), TAG(BIG), TAG(BLOCKQUOTE), TAG(BODY), TAG(BR), TAG(CENTER),
+      TAG(CODE), TAG(DD), TAG(DIV), TAG(DL), TAG(DT), TAG(EM), TAG(EMBED),
+      TAG(H1), TAG(H2), TAG(H3), TAG(H4), TAG(H5), TAG(H6), TAG(HEAD),
+      TAG(HR), TAG(I), TAG(IMG), TAG(LI), TAG(LISTING), TAG(MENU), TAG(META),
+      TAG(NOBR), TAG(OL), TAG(P), TAG(PRE), TAG(RUBY), TAG(S), TAG(SMALL),
+      TAG(SPAN), TAG(STRONG), TAG(STRIKE), TAG(SUB), TAG(SUP), TAG(TABLE),
+      TAG(TT), TAG(U), TAG(UL), TAG(VAR)
+    })
+    || (
+      tag_is(token, kStartTag, GUMBO_TAG_FONT)
+      && (
+        token_has_attribute(token, "color")
+        || token_has_attribute(token, "face")
+        || token_has_attribute(token, "size")
+      )
+    )
+  ) {
     /* Parse error */
     parser_add_parse_error(parser, token);
 
@@ -3874,10 +4218,13 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
     if (!is_fragment_parser(parser)) {
       do {
         pop_current_node(parser);
-      } while (!(is_mathml_integration_point(get_current_node(parser)) ||
-                   is_html_integration_point(get_current_node(parser)) ||
-                   get_current_node(parser)->v.element.tag_namespace ==
-                       GUMBO_NAMESPACE_HTML));
+      } while (
+        !(
+          is_mathml_integration_point(get_current_node(parser))
+          || is_html_integration_point(get_current_node(parser))
+          || get_current_node(parser)->v.element.tag_namespace == GUMBO_NAMESPACE_HTML
+        )
+      );
       parser->_parser_state->_reprocess_current_token = true;
       return false;
     }
@@ -3889,14 +4236,13 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
     const GumboNamespaceEnum current_namespace =
         get_adjusted_current_node(parser)->v.element.tag_namespace;
     if (current_namespace == GUMBO_NAMESPACE_MATHML) {
-      adjust_mathml_attributes(parser, token);
+      adjust_mathml_attributes(token);
     }
     if (current_namespace == GUMBO_NAMESPACE_SVG) {
-      // Tag adjustment is left to the gumbo_normalize_svg_tagname helper
-      // function.
-      adjust_svg_attributes(parser, token);
+      adjust_svg_tag(token);
+      adjust_svg_attributes(token);
     }
-    adjust_foreign_attributes(parser, token);
+    adjust_foreign_attributes(token);
     insert_foreign_element(parser, token, current_namespace);
     if (token->v.start_tag.is_self_closing) {
       pop_current_node(parser);
@@ -3909,6 +4255,7 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
     assert(token->type == GUMBO_TOKEN_END_TAG);
     GumboNode* node = get_current_node(parser);
     assert(node != NULL);
+    // XXX(sfc): This doesn't properly handle replacements.
     GumboStringPiece token_tagname = token->original_text;
     GumboStringPiece node_tagname = node->v.element.original_tag;
     gumbo_tag_from_original_text(&token_tagname);
@@ -3925,12 +4272,16 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
       // case we do nothing) or we find the element that we're about to
       // close (in which case we pop everything we've seen until that
       // point.)
-      gumbo_debug("Foreign %.*s node at %d.\n", node_tagname.length,
-          node_tagname.data, i);
+      gumbo_debug (
+        "Foreign %.*s node at %d.\n",
+        (int) node_tagname.length,
+        node_tagname.data,
+        i
+      );
       if (gumbo_string_equals_ignore_case(&node_tagname, &token_tagname)) {
         gumbo_debug("Matches.\n");
         while (pop_current_node(parser) != node) {
-          // Pop all the nodes below the current one.  Node is guaranteed to
+          // Pop all the nodes below the current one. Node is guaranteed to
           // be an element on the stack of open elements (set below), so
           // this loop is guaranteed to terminate.
         }
@@ -3954,10 +4305,12 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#tree-construction
+// https://html.spec.whatwg.org/multipage/parsing.html#tree-construction
 static bool handle_token(GumboParser* parser, GumboToken* token) {
-  if (parser->_parser_state->_ignore_next_linefeed &&
-      token->type == GUMBO_TOKEN_WHITESPACE && token->v.character == '\n') {
+  if (
+    parser->_parser_state->_ignore_next_linefeed
+    && token->type == GUMBO_TOKEN_WHITESPACE && token->v.character == '\n'
+  ) {
     parser->_parser_state->_ignore_next_linefeed = false;
     ignore_token(parser);
     return true;
@@ -3976,11 +4329,16 @@ static bool handle_token(GumboParser* parser, GumboToken* token) {
   }
 
   const GumboNode* current_node = get_adjusted_current_node(parser);
-  assert(!current_node || current_node->type == GUMBO_NODE_ELEMENT ||
-         current_node->type == GUMBO_NODE_TEMPLATE);
+  assert (
+    !current_node
+    || current_node->type == GUMBO_NODE_ELEMENT
+    || current_node->type == GUMBO_NODE_TEMPLATE
+  );
   if (current_node) {
-    gumbo_debug("Current node: <%s>.\n",
-        gumbo_normalized_tagname(current_node->v.element.tag));
+    gumbo_debug (
+      "Current node: <%s>.\n",
+      gumbo_normalized_tagname(current_node->v.element.tag)
+    );
   }
   if (!current_node ||
       current_node->v.element.tag_namespace == GUMBO_NAMESPACE_HTML ||
@@ -3990,7 +4348,7 @@ static bool handle_token(GumboParser* parser, GumboToken* token) {
               token->type == GUMBO_TOKEN_NULL ||
               (token->type == GUMBO_TOKEN_START_TAG &&
                   !tag_in(token, kStartTag,
-                      (gumbo_tagset){TAG(MGLYPH), TAG(MALIGNMARK)})))) ||
+                      &(const TagSet){TAG(MGLYPH), TAG(MALIGNMARK)})))) ||
       (current_node->v.element.tag_namespace == GUMBO_NAMESPACE_MATHML &&
           node_qualified_tag_is(
               current_node, GUMBO_NAMESPACE_MATHML, GUMBO_TAG_ANNOTATION_XML) &&
@@ -4007,8 +4365,11 @@ static bool handle_token(GumboParser* parser, GumboToken* token) {
   }
 }
 
-static void fragment_parser_init(GumboParser* parser, GumboTag fragment_ctx,
-    GumboNamespaceEnum fragment_namespace) {
+static void fragment_parser_init (
+  GumboParser* parser,
+  GumboTag fragment_ctx,
+  GumboNamespaceEnum fragment_namespace
+) {
   GumboNode* root;
   assert(fragment_ctx != GUMBO_TAG_LAST);
 
@@ -4054,8 +4415,11 @@ static void fragment_parser_init(GumboParser* parser, GumboTag fragment_ctx,
   }
 
   // 5. 6. 7.
-  root = insert_element_of_tag_type(
-      parser, GUMBO_TAG_HTML, GUMBO_INSERTION_IMPLIED);
+  root = insert_element_of_tag_type (
+    parser,
+    GUMBO_TAG_HTML,
+    GUMBO_INSERTION_IMPLIED
+  );
   parser->_output->root = root;
 
   // 8.
@@ -4068,12 +4432,18 @@ static void fragment_parser_init(GumboParser* parser, GumboTag fragment_ctx,
 }
 
 GumboOutput* gumbo_parse(const char* buffer) {
-  return gumbo_parse_with_options(
-      &kGumboDefaultOptions, buffer, strlen(buffer));
+  return gumbo_parse_with_options (
+    &kGumboDefaultOptions,
+    buffer,
+    strlen(buffer)
+  );
 }
 
-GumboOutput* gumbo_parse_with_options(
-    const GumboOptions* options, const char* buffer, size_t length) {
+GumboOutput* gumbo_parse_with_options (
+  const GumboOptions* options,
+  const char* buffer,
+  size_t length
+) {
   GumboParser parser;
   parser._options = options;
   output_init(&parser);
@@ -4081,16 +4451,23 @@ GumboOutput* gumbo_parse_with_options(
   parser_state_init(&parser);
 
   if (options->fragment_context != GUMBO_TAG_LAST) {
-    fragment_parser_init(
-        &parser, options->fragment_context, options->fragment_namespace);
+    fragment_parser_init (
+      &parser,
+      options->fragment_context,
+      options->fragment_namespace
+    );
   }
 
   GumboParserState* state = parser._parser_state;
-  gumbo_debug("Parsing %.*s.\n", length, buffer);
+  gumbo_debug (
+    "Parsing %.*s.\n",
+    (int) length,
+    buffer
+  );
 
   // Sanity check so that infinite loops die with an assertion failure instead
   // of hanging the process before we ever get an error.
-  int loop_count = 0;
+  uint_fast32_t loop_count = 0;
 
   GumboToken token;
   bool has_error = false;
@@ -4100,21 +4477,27 @@ GumboOutput* gumbo_parse_with_options(
       state->_reprocess_current_token = false;
     } else {
       GumboNode* current_node = get_current_node(&parser);
-      gumbo_tokenizer_set_is_current_node_foreign(&parser,
-          current_node &&
-              current_node->v.element.tag_namespace != GUMBO_NAMESPACE_HTML);
+      gumbo_tokenizer_set_is_current_node_foreign (
+        &parser,
+        current_node &&
+          current_node->v.element.tag_namespace != GUMBO_NAMESPACE_HTML
+      );
       has_error = !gumbo_lex(&parser, &token) || has_error;
     }
+
     const char* token_type = "text";
     switch (token.type) {
       case GUMBO_TOKEN_DOCTYPE:
         token_type = "doctype";
         break;
       case GUMBO_TOKEN_START_TAG:
-        token_type = gumbo_normalized_tagname(token.v.start_tag.tag);
+        if (token.v.start_tag.tag == GUMBO_TAG_UNKNOWN)
+          token_type = token.v.start_tag.name;
+        else
+          token_type = gumbo_normalized_tagname(token.v.start_tag.tag);
         break;
       case GUMBO_TOKEN_END_TAG:
-        token_type = gumbo_normalized_tagname(token.v.end_tag);
+        token_type = gumbo_normalized_tagname(token.v.end_tag.tag);
         break;
       case GUMBO_TOKEN_COMMENT:
         token_type = "comment";
@@ -4122,47 +4505,70 @@ GumboOutput* gumbo_parse_with_options(
       default:
         break;
     }
-    gumbo_debug("Handling %s token @%d:%d in state %d.\n", (char*) token_type,
-        token.position.line, token.position.column, state->_insertion_mode);
+    gumbo_debug (
+      "Handling %s token @%zu:%zu in state %u.\n",
+      (char*) token_type,
+      token.position.line,
+      token.position.column,
+      state->_insertion_mode
+    );
 
     state->_current_token = &token;
-    state->_self_closing_flag_acknowledged =
-        !(token.type == GUMBO_TOKEN_START_TAG &&
-            token.v.start_tag.is_self_closing);
+    state->_self_closing_flag_acknowledged = false;
 
     has_error = !handle_token(&parser, &token) || has_error;
 
     // Check for memory leaks when ownership is transferred from start tag
     // tokens to nodes.
-    assert(state->_reprocess_current_token ||
-           token.type != GUMBO_TOKEN_START_TAG ||
-           token.v.start_tag.attributes.data == NULL);
-
-    if (!state->_self_closing_flag_acknowledged) {
-      GumboError* error = parser_add_parse_error(&parser, &token);
-      if (error) {
-        error->type = GUMBO_ERR_UNACKNOWLEDGED_SELF_CLOSING_TAG;
+    assert (
+      state->_reprocess_current_token
+      || token.type != GUMBO_TOKEN_START_TAG
+      || (token.v.start_tag.attributes.data == NULL
+          && token.v.start_tag.name == NULL)
+    );
+
+    if (!state->_reprocess_current_token) {
+      if (token.type == GUMBO_TOKEN_START_TAG &&
+          token.v.start_tag.is_self_closing &&
+          !state->_self_closing_flag_acknowledged) {
+        GumboError* error = parser_add_parse_error(&parser, &token);
+        if (error)
+          error->type = GUMBO_ERR_UNACKNOWLEDGED_SELF_CLOSING_TAG;
       }
+      if (token.type == GUMBO_TOKEN_END_TAG &&
+          token.v.end_tag.is_self_closing) {
+        GumboError* error = parser_add_parse_error(&parser, &token);
+        if (error)
+          error->type = GUMBO_ERR_SELF_CLOSING_END_TAG;
+      }
+    }
+
+    if (unlikely(parser._parser_state->_open_elements.length > 400)) {
+      parser._output->status = GUMBO_STATUS_TREE_TOO_DEEP;
+      gumbo_debug("Tree depth limit exceeded.\n");
+      break;
     }
 
     ++loop_count;
-    assert(loop_count < 1000000000);
+    assert(loop_count < 1000000000UL);
 
-  } while ((token.type != GUMBO_TOKEN_EOF || state->_reprocess_current_token) &&
-           !(options->stop_on_first_error && has_error));
+  } while (
+    (token.type != GUMBO_TOKEN_EOF || state->_reprocess_current_token)
+    && !(options->stop_on_first_error && has_error)
+  );
 
   finish_parsing(&parser);
   // For API uniformity reasons, if the doctype still has nulls, convert them to
   // empty strings.
   GumboDocument* doc_type = &parser._output->document->v.document;
   if (doc_type->name == NULL) {
-    doc_type->name = gumbo_copy_stringz(&parser, "");
+    doc_type->name = gumbo_strdup("");
   }
   if (doc_type->public_identifier == NULL) {
-    doc_type->public_identifier = gumbo_copy_stringz(&parser, "");
+    doc_type->public_identifier = gumbo_strdup("");
   }
   if (doc_type->system_identifier == NULL) {
-    doc_type->system_identifier = gumbo_copy_stringz(&parser, "");
+    doc_type->system_identifier = gumbo_strdup("");
   }
 
   parser_state_destroy(&parser);
@@ -4170,23 +4576,28 @@ GumboOutput* gumbo_parse_with_options(
   return parser._output;
 }
 
-void gumbo_destroy_node(GumboOptions* options, GumboNode* node) {
-  // Need a dummy GumboParser because the allocator comes along with the
-  // options object.
-  GumboParser parser;
-  parser._options = options;
-  destroy_node(&parser, node);
+const char* gumbo_status_to_string(GumboOutputStatus status) {
+  switch (status) {
+    case GUMBO_STATUS_OK:
+      return "OK";
+    case GUMBO_STATUS_OUT_OF_MEMORY:
+      return "System allocator returned NULL during parsing";
+    case GUMBO_STATUS_TREE_TOO_DEEP:
+      return "Document tree depth limit exceeded";
+    default:
+      return "Unknown GumboOutputStatus value";
+  }
 }
 
-void gumbo_destroy_output(const GumboOptions* options, GumboOutput* output) {
-  // Need a dummy GumboParser because the allocator comes along with the
-  // options object.
-  GumboParser parser;
-  parser._options = options;
-  destroy_node(&parser, output->document);
+void gumbo_destroy_node(GumboNode* node) {
+  destroy_node(node);
+}
+
+void gumbo_destroy_output(GumboOutput* output) {
+  destroy_node(output->document);
   for (unsigned int i = 0; i < output->errors.length; ++i) {
-    gumbo_error_destroy(&parser, output->errors.data[i]);
+    gumbo_error_destroy(output->errors.data[i]);
   }
-  gumbo_vector_destroy(&parser, &output->errors);
-  gumbo_parser_deallocate(&parser, output);
+  gumbo_vector_destroy(&output->errors);
+  gumbo_free(output);
 }
diff --git a/gumbo-parser/src/parser.h b/gumbo-parser/src/parser.h
index 95019e3e..740559f7 100644
--- a/gumbo-parser/src/parser.h
+++ b/gumbo-parser/src/parser.h
@@ -1,22 +1,3 @@
-// Copyright 2010 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// Author: jdtang@google.com (Jonathan Tang)
-//
-// Contains the definition of the top-level GumboParser structure that's
-// threaded through basically every internal function in the library.
-
 #ifndef GUMBO_PARSER_H_
 #define GUMBO_PARSER_H_
 
@@ -24,13 +5,16 @@
 extern "C" {
 #endif
 
+// Contains the definition of the top-level GumboParser structure that's
+// threaded through basically every internal function in the library.
+
 struct GumboInternalParserState;
 struct GumboInternalOutput;
 struct GumboInternalOptions;
 struct GumboInternalTokenizerState;
 
 // An overarching struct that's threaded through (nearly) all functions in the
-// library, OOP-style.  This gives each function access to the options and
+// library, OOP-style. This gives each function access to the options and
 // output, along with any internal state needed for the parse.
 typedef struct GumboInternalParser {
   // Settings for this parse run.
@@ -40,12 +24,12 @@ typedef struct GumboInternalParser {
   struct GumboInternalOutput* _output;
 
   // The internal tokenizer state, defined as a pointer to avoid a cyclic
-  // dependency on html5tokenizer.h.  The main parse routine is responsible for
+  // dependency on html5tokenizer.h. The main parse routine is responsible for
   // initializing this on parse start, and destroying it on parse end.
   // End-users will never see a non-garbage value in this pointer.
   struct GumboInternalTokenizerState* _tokenizer_state;
 
-  // The internal parser state.  Initialized on parse start and destroyed on
+  // The internal parser state. Initialized on parse start and destroyed on
   // parse end; end-users will never see a non-garbage value in this pointer.
   struct GumboInternalParserState* _parser_state;
 } GumboParser;
diff --git a/gumbo-parser/src/replacement.h b/gumbo-parser/src/replacement.h
new file mode 100644
index 00000000..327264d4
--- /dev/null
+++ b/gumbo-parser/src/replacement.h
@@ -0,0 +1,33 @@
+#ifndef GUMBO_REPLACEMENT_H_
+#define GUMBO_REPLACEMENT_H_
+
+#include <stddef.h>
+#include "gumbo.h"
+
+typedef struct {
+  const char *const from;
+  const char *const to;
+} StringReplacement;
+
+const StringReplacement *gumbo_get_svg_tag_replacement (
+  const char* str,
+  size_t len
+);
+
+const StringReplacement *gumbo_get_svg_attr_replacement (
+  const char* str,
+  size_t len
+);
+
+typedef struct {
+  const char *const from;
+  const char *const local_name;
+  const GumboAttributeNamespaceEnum attr_namespace;
+} ForeignAttrReplacement;
+
+const ForeignAttrReplacement *gumbo_get_foreign_attr_replacement (
+  const char* str,
+  size_t len
+);
+
+#endif // GUMBO_REPLACEMENT_H_
diff --git a/gumbo-parser/src/string_buffer.c b/gumbo-parser/src/string_buffer.c
index d9be2f6b..729ff815 100644
--- a/gumbo-parser/src/string_buffer.c
+++ b/gumbo-parser/src/string_buffer.c
@@ -1,67 +1,61 @@
-// Copyright 2010 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// Author: jdtang@google.com (Jonathan Tang)
+/*
+ Copyright 2010 Google Inc.
 
-#include "string_buffer.h"
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
 
-#include <assert.h>
-#include <stdlib.h>
-#include <string.h>
-#include <strings.h>
+    https://www.apache.org/licenses/LICENSE-2.0
 
-#include "string_piece.h"
-#include "util.h"
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
 
-struct GumboInternalParser;
+#include <string.h>
+#include "string_buffer.h"
+#include "util.h"
 
 // Size chosen via statistical analysis of ~60K websites.
 // 99% of text nodes and 98% of attribute names/values fit in this initial size.
 static const size_t kDefaultStringBufferSize = 5;
 
-static void maybe_resize_string_buffer(struct GumboInternalParser* parser,
-    size_t additional_chars, GumboStringBuffer* buffer) {
+static void maybe_resize_string_buffer (
+  size_t additional_chars,
+  GumboStringBuffer* buffer
+) {
   size_t new_length = buffer->length + additional_chars;
   size_t new_capacity = buffer->capacity;
   while (new_capacity < new_length) {
     new_capacity *= 2;
   }
   if (new_capacity != buffer->capacity) {
-    char* new_data = gumbo_parser_allocate(parser, new_capacity);
-    memcpy(new_data, buffer->data, buffer->length);
-    gumbo_parser_deallocate(parser, buffer->data);
-    buffer->data = new_data;
+    buffer->data = gumbo_realloc(buffer->data, new_capacity);
     buffer->capacity = new_capacity;
   }
 }
 
-void gumbo_string_buffer_init(
-    struct GumboInternalParser* parser, GumboStringBuffer* output) {
-  output->data = gumbo_parser_allocate(parser, kDefaultStringBufferSize);
+void gumbo_string_buffer_init(GumboStringBuffer* output) {
+  output->data = gumbo_alloc(kDefaultStringBufferSize);
   output->length = 0;
   output->capacity = kDefaultStringBufferSize;
 }
 
-void gumbo_string_buffer_reserve(struct GumboInternalParser* parser,
-    size_t min_capacity, GumboStringBuffer* output) {
-  maybe_resize_string_buffer(parser, min_capacity - output->length, output);
+void gumbo_string_buffer_reserve (
+  size_t min_capacity,
+  GumboStringBuffer* output
+) {
+  maybe_resize_string_buffer(min_capacity - output->length, output);
 }
 
-void gumbo_string_buffer_append_codepoint(
-    struct GumboInternalParser* parser, int c, GumboStringBuffer* output) {
+void gumbo_string_buffer_append_codepoint (
+  int c,
+  GumboStringBuffer* output
+) {
   // num_bytes is actually the number of continuation bytes, 1 less than the
-  // total number of bytes.  This is done to keep the loop below simple and
+  // total number of bytes. This is done to keep the loop below simple and
   // should probably change if we unroll it.
   int num_bytes, prefix;
   if (c <= 0x7f) {
@@ -77,34 +71,33 @@ void gumbo_string_buffer_append_codepoint(
     num_bytes = 3;
     prefix = 0xf0;
   }
-  maybe_resize_string_buffer(parser, num_bytes + 1, output);
+  maybe_resize_string_buffer(num_bytes + 1, output);
   output->data[output->length++] = prefix | (c >> (num_bytes * 6));
   for (int i = num_bytes - 1; i >= 0; --i) {
     output->data[output->length++] = 0x80 | (0x3f & (c >> (i * 6)));
   }
 }
 
-void gumbo_string_buffer_append_string(struct GumboInternalParser* parser,
-    GumboStringPiece* str, GumboStringBuffer* output) {
-  maybe_resize_string_buffer(parser, str->length, output);
+void gumbo_string_buffer_append_string (
+  GumboStringPiece* str,
+  GumboStringBuffer* output
+) {
+  maybe_resize_string_buffer(str->length, output);
   memcpy(output->data + output->length, str->data, str->length);
   output->length += str->length;
 }
 
-char* gumbo_string_buffer_to_string(
-    struct GumboInternalParser* parser, GumboStringBuffer* input) {
-  char* buffer = gumbo_parser_allocate(parser, input->length + 1);
+char* gumbo_string_buffer_to_string(const GumboStringBuffer* input) {
+  char* buffer = gumbo_alloc(input->length + 1);
   memcpy(buffer, input->data, input->length);
   buffer[input->length] = '\0';
   return buffer;
 }
 
-void gumbo_string_buffer_clear(
-    struct GumboInternalParser* parser, GumboStringBuffer* input) {
+void gumbo_string_buffer_clear(GumboStringBuffer* input) {
   input->length = 0;
 }
 
-void gumbo_string_buffer_destroy(
-    struct GumboInternalParser* parser, GumboStringBuffer* buffer) {
-  gumbo_parser_deallocate(parser, buffer->data);
+void gumbo_string_buffer_destroy(GumboStringBuffer* buffer) {
+  gumbo_free(buffer->data);
 }
diff --git a/gumbo-parser/src/string_buffer.h b/gumbo-parser/src/string_buffer.h
index ee7956ac..41cabd1b 100644
--- a/gumbo-parser/src/string_buffer.h
+++ b/gumbo-parser/src/string_buffer.h
@@ -1,19 +1,3 @@
-// Copyright 2010 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// Author: jdtang@google.com (Jonathan Tang)
-//
 #ifndef GUMBO_STRING_BUFFER_H_
 #define GUMBO_STRING_BUFFER_H_
 
@@ -26,18 +10,16 @@
 extern "C" {
 #endif
 
-struct GumboInternalParser;
-
-// A struct representing a mutable, growable string.  This consists of a
-// heap-allocated buffer that may grow (by doubling) as necessary.  When
+// A struct representing a mutable, growable string. This consists of a
+// heap-allocated buffer that may grow (by doubling) as necessary. When
 // converting to a string, this allocates a new buffer that is only as long as
-// it needs to be.  Note that the internal buffer here is *not* nul-terminated,
+// it needs to be. Note that the internal buffer here is *not* nul-terminated,
 // so be sure not to use ordinary string manipulation functions on it.
 typedef struct {
-  // A pointer to the beginning of the string.  NULL iff length == 0.
+  // A pointer to the beginning of the string. NULL if length == 0.
   char* data;
 
-  // The length of the string fragment, in bytes.  May be zero.
+  // The length of the string fragment, in bytes. May be zero.
   size_t length;
 
   // The capacity of the buffer, in bytes.
@@ -45,40 +27,42 @@ typedef struct {
 } GumboStringBuffer;
 
 // Initializes a new GumboStringBuffer.
-void gumbo_string_buffer_init(
-    struct GumboInternalParser* parser, GumboStringBuffer* output);
+void gumbo_string_buffer_init(GumboStringBuffer* output);
 
-// Ensures that the buffer contains at least a certain amount of space.  Most
+// Ensures that the buffer contains at least a certain amount of space. Most
 // useful with snprintf and the other length-delimited string functions, which
 // may want to write directly into the buffer.
-void gumbo_string_buffer_reserve(struct GumboInternalParser* parser,
-    size_t min_capacity, GumboStringBuffer* output);
+void gumbo_string_buffer_reserve (
+  size_t min_capacity,
+  GumboStringBuffer* output
+);
 
 // Appends a single Unicode codepoint onto the end of the GumboStringBuffer.
 // This is essentially a UTF-8 encoder, and may add 1-4 bytes depending on the
 // value of the codepoint.
-void gumbo_string_buffer_append_codepoint(
-    struct GumboInternalParser* parser, int c, GumboStringBuffer* output);
+void gumbo_string_buffer_append_codepoint (
+  int c,
+  GumboStringBuffer* output
+);
 
 // Appends a string onto the end of the GumboStringBuffer.
-void gumbo_string_buffer_append_string(struct GumboInternalParser* parser,
-    GumboStringPiece* str, GumboStringBuffer* output);
+void gumbo_string_buffer_append_string (
+  GumboStringPiece* str,
+  GumboStringBuffer* output
+);
 
 // Converts this string buffer to const char*, alloctaing a new buffer for it.
-char* gumbo_string_buffer_to_string(
-    struct GumboInternalParser* parser, GumboStringBuffer* input);
+char* gumbo_string_buffer_to_string(const GumboStringBuffer* input);
 
-// Reinitialize this string buffer.  This clears it by setting length=0.  It
+// Reinitialize this string buffer. This clears it by setting length=0. It
 // does not zero out the buffer itself.
-void gumbo_string_buffer_clear(
-    struct GumboInternalParser* parser, GumboStringBuffer* input);
+void gumbo_string_buffer_clear(GumboStringBuffer* input);
 
 // Deallocates this GumboStringBuffer.
-void gumbo_string_buffer_destroy(
-    struct GumboInternalParser* parser, GumboStringBuffer* buffer);
+void gumbo_string_buffer_destroy(GumboStringBuffer* buffer);
 
 #ifdef __cplusplus
 }
 #endif
 
-#endif  // GUMBO_STRING_BUFFER_H_
+#endif // GUMBO_STRING_BUFFER_H_
diff --git a/gumbo-parser/src/string_piece.c b/gumbo-parser/src/string_piece.c
index 8ad5b846..129c8e53 100644
--- a/gumbo-parser/src/string_piece.c
+++ b/gumbo-parser/src/string_piece.c
@@ -1,48 +1,44 @@
-// Copyright 2010 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// Author: jdtang@google.com (Jonathan Tang)
+/*
+ Copyright 2018 Craig Barnes.
+ Copyright 2010 Google Inc.
 
-#include "string_piece.h"
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
 
-#include <assert.h>
-#include <stdlib.h>
-#include <string.h>
-#include <strings.h>
-
-#include "util.h"
-
-struct GumboInternalParser;
+    https://www.apache.org/licenses/LICENSE-2.0
 
-const GumboStringPiece kGumboEmptyString = {NULL, 0};
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
 
-bool gumbo_string_equals(
-    const GumboStringPiece* str1, const GumboStringPiece* str2) {
-  return str1->length == str2->length &&
-         !memcmp(str1->data, str2->data, str1->length);
-}
-
-bool gumbo_string_equals_ignore_case(
-    const GumboStringPiece* str1, const GumboStringPiece* str2) {
-  return str1->length == str2->length &&
-         !strncasecmp(str1->data, str2->data, str1->length);
+#include <stddef.h>
+#include <string.h>
+#include "gumbo.h"
+#include "ascii.h"
+
+const GumboStringPiece kGumboEmptyString = { \
+  .data = NULL, \
+  .length = 0 \
+};
+
+bool gumbo_string_equals (
+  const GumboStringPiece* str1,
+  const GumboStringPiece* str2
+) {
+  return
+    str1->length == str2->length
+    && !memcmp(str1->data, str2->data, str1->length);
 }
 
-void gumbo_string_copy(struct GumboInternalParser* parser,
-    GumboStringPiece* dest, const GumboStringPiece* source) {
-  dest->length = source->length;
-  char* buffer = gumbo_parser_allocate(parser, source->length);
-  memcpy(buffer, source->data, source->length);
-  dest->data = buffer;
+bool gumbo_string_equals_ignore_case (
+  const GumboStringPiece* str1,
+  const GumboStringPiece* str2
+) {
+  return
+    str1->length == str2->length
+    && !gumbo_ascii_strncasecmp(str1->data, str2->data, str1->length);
 }
diff --git a/gumbo-parser/src/string_piece.h b/gumbo-parser/src/string_piece.h
deleted file mode 100644
index 8c8188c5..00000000
--- a/gumbo-parser/src/string_piece.h
+++ /dev/null
@@ -1,38 +0,0 @@
-// Copyright 2010 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// Author: jdtang@google.com (Jonathan Tang)
-
-#ifndef GUMBO_STRING_PIECE_H_
-#define GUMBO_STRING_PIECE_H_
-
-#include "gumbo.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct GumboInternalParser;
-
-// Performs a deep-copy of an GumboStringPiece, allocating a fresh buffer in the
-// destination and copying over the characters from source.  Dest should be
-// empty, with no buffer allocated; otherwise, this leaks it.
-void gumbo_string_copy(struct GumboInternalParser* parser,
-    GumboStringPiece* dest, const GumboStringPiece* source);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif  // GUMBO_STRING_PIECE_H_
diff --git a/gumbo-parser/src/svg_attrs.c b/gumbo-parser/src/svg_attrs.c
new file mode 100644
index 00000000..76a44779
--- /dev/null
+++ b/gumbo-parser/src/svg_attrs.c
@@ -0,0 +1,174 @@
+/* ANSI-C code produced by gperf version 3.1 */
+/* Command-line: gperf -m100 lib/svg_attrs.gperf  */
+/* Computed positions: -k'1,10,$' */
+/* Filtered by: mk/gperf-filter.sed */
+
+#include "replacement.h"
+#include "macros.h"
+#include "ascii.h"
+#include <string.h>
+
+#define TOTAL_KEYWORDS 58
+#define MIN_WORD_LENGTH 4
+#define MAX_WORD_LENGTH 19
+#define MIN_HASH_VALUE 5
+#define MAX_HASH_VALUE 77
+/* maximum key range = 73, duplicates = 0 */
+
+
+
+static inline unsigned int
+hash (register const char *str, register size_t len)
+{
+  static const unsigned char asso_values[] =
+    {
+      78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+      78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+      78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+      78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+      78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+      78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+      78, 78, 78, 78, 78,  5, 78, 39, 14,  1,
+      31, 31, 13, 13, 78, 78, 22, 25, 10,  2,
+       7, 78, 22,  0,  1,  3,  1, 78,  0, 36,
+      14, 17, 20, 78, 78, 78, 78,  5, 78, 39,
+      14,  1, 31, 31, 13, 13, 78, 78, 22, 25,
+      10,  2,  7, 78, 22,  0,  1,  3,  1, 78,
+       0, 36, 14, 17, 20, 78, 78, 78, 78, 78,
+      78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+      78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+      78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+      78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+      78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+      78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+      78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+      78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+      78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+      78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+      78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+      78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+      78, 78, 78, 78, 78, 78, 78, 78
+    };
+  register unsigned int hval = len;
+
+  switch (hval)
+    {
+      default:
+        hval += asso_values[(unsigned char)str[9]];
+      /*FALLTHROUGH*/
+      case 9:
+      case 8:
+      case 7:
+      case 6:
+      case 5:
+      case 4:
+      case 3:
+      case 2:
+      case 1:
+        hval += asso_values[(unsigned char)str[0]+2];
+        break;
+    }
+  return hval + asso_values[(unsigned char)str[len - 1]];
+}
+
+const StringReplacement *
+gumbo_get_svg_attr_replacement (register const char *str, register size_t len)
+{
+  static const unsigned char lengthtable[] =
+    {
+       0,  0,  0,  0,  0,  4,  0,  7,  7,  0,  8,  9, 10, 11,
+      11, 11, 11, 10, 16, 18, 16, 12, 16, 11, 13, 11, 12, 11,
+      16,  0, 17,  9,  9,  8,  9, 10, 13, 10, 12, 14,  8,  4,
+      12, 19,  7,  9, 12, 12, 11, 14, 10, 19,  8, 16, 13, 16,
+      16, 15, 10, 12,  0,  0, 13, 13, 13,  0,  0,  9, 16,  0,
+       0,  0,  0,  0,  0,  0,  0, 17
+    };
+  static const StringReplacement wordlist[] =
+    {
+      {(char*)0,(char*)0}, {(char*)0,(char*)0},
+      {(char*)0,(char*)0}, {(char*)0,(char*)0},
+      {(char*)0,(char*)0},
+      {"refx", "refX"},
+      {(char*)0,(char*)0},
+      {"viewbox", "viewBox"},
+      {"targetx", "targetX"},
+      {(char*)0,(char*)0},
+      {"calcmode", "calcMode"},
+      {"maskunits", "maskUnits"},
+      {"viewtarget", "viewTarget"},
+      {"tablevalues", "tableValues"},
+      {"markerunits", "markerUnits"},
+      {"stitchtiles", "stitchTiles"},
+      {"startoffset", "startOffset"},
+      {"numoctaves", "numOctaves"},
+      {"requiredfeatures", "requiredFeatures"},
+      {"requiredextensions", "requiredExtensions"},
+      {"specularexponent", "specularExponent"},
+      {"surfacescale", "surfaceScale"},
+      {"specularconstant", "specularConstant"},
+      {"repeatcount", "repeatCount"},
+      {"clippathunits", "clipPathUnits"},
+      {"filterunits", "filterUnits"},
+      {"lengthadjust", "lengthAdjust"},
+      {"markerwidth", "markerWidth"},
+      {"maskcontentunits", "maskContentUnits"},
+      {(char*)0,(char*)0},
+      {"limitingconeangle", "limitingConeAngle"},
+      {"pointsatx", "pointsAtX"},
+      {"repeatdur", "repeatDur"},
+      {"keytimes", "keyTimes"},
+      {"keypoints", "keyPoints"},
+      {"keysplines", "keySplines"},
+      {"gradientunits", "gradientUnits"},
+      {"textlength", "textLength"},
+      {"stddeviation", "stdDeviation"},
+      {"primitiveunits", "primitiveUnits"},
+      {"edgemode", "edgeMode"},
+      {"refy", "refY"},
+      {"spreadmethod", "spreadMethod"},
+      {"preserveaspectratio", "preserveAspectRatio"},
+      {"targety", "targetY"},
+      {"pointsatz", "pointsAtZ"},
+      {"markerheight", "markerHeight"},
+      {"patternunits", "patternUnits"},
+      {"baseprofile", "baseProfile"},
+      {"systemlanguage", "systemLanguage"},
+      {"zoomandpan", "zoomAndPan"},
+      {"patterncontentunits", "patternContentUnits"},
+      {"glyphref", "glyphRef"},
+      {"xchannelselector", "xChannelSelector"},
+      {"attributetype", "attributeType"},
+      {"kernelunitlength", "kernelUnitLength"},
+      {"ychannelselector", "yChannelSelector"},
+      {"diffuseconstant", "diffuseConstant"},
+      {"pathlength", "pathLength"},
+      {"kernelmatrix", "kernelMatrix"},
+      {(char*)0,(char*)0}, {(char*)0,(char*)0},
+      {"preservealpha", "preserveAlpha"},
+      {"attributename", "attributeName"},
+      {"basefrequency", "baseFrequency"},
+      {(char*)0,(char*)0}, {(char*)0,(char*)0},
+      {"pointsaty", "pointsAtY"},
+      {"patterntransform", "patternTransform"},
+      {(char*)0,(char*)0}, {(char*)0,(char*)0},
+      {(char*)0,(char*)0}, {(char*)0,(char*)0},
+      {(char*)0,(char*)0}, {(char*)0,(char*)0},
+      {(char*)0,(char*)0}, {(char*)0,(char*)0},
+      {"gradienttransform", "gradientTransform"}
+    };
+
+  if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
+    {
+      register unsigned int key = hash (str, len);
+
+      if (key <= MAX_HASH_VALUE)
+        if (len == lengthtable[key])
+          {
+            register const char *s = wordlist[key].from;
+
+            if (s && (((unsigned char)*str ^ (unsigned char)*s) & ~32) == 0 && !gumbo_ascii_strncasecmp(str, s, len))
+              return &wordlist[key];
+          }
+    }
+  return 0;
+}
diff --git a/gumbo-parser/src/svg_attrs.gperf b/gumbo-parser/src/svg_attrs.gperf
new file mode 100644
index 00000000..ce9b2eb2
--- /dev/null
+++ b/gumbo-parser/src/svg_attrs.gperf
@@ -0,0 +1,77 @@
+%{
+#include "replacement.h"
+#include "macros.h"
+#include "ascii.h"
+%}
+
+%ignore-case
+%struct-type
+%omit-struct-type
+%compare-lengths
+%readonly-tables
+%null-strings
+%includes
+%define lookup-function-name gumbo_get_svg_attr_replacement
+%define slot-name from
+%define initializer-suffix ,(char*)0
+StringReplacement;
+
+%%
+"attributename", "attributeName"
+"attributetype", "attributeType"
+"basefrequency", "baseFrequency"
+"baseprofile", "baseProfile"
+"calcmode", "calcMode"
+"clippathunits", "clipPathUnits"
+"diffuseconstant", "diffuseConstant"
+"edgemode", "edgeMode"
+"filterunits", "filterUnits"
+"glyphref", "glyphRef"
+"gradienttransform", "gradientTransform"
+"gradientunits", "gradientUnits"
+"kernelmatrix", "kernelMatrix"
+"kernelunitlength", "kernelUnitLength"
+"keypoints", "keyPoints"
+"keysplines", "keySplines"
+"keytimes", "keyTimes"
+"lengthadjust", "lengthAdjust"
+"limitingconeangle", "limitingConeAngle"
+"markerheight", "markerHeight"
+"markerunits", "markerUnits"
+"markerwidth", "markerWidth"
+"maskcontentunits", "maskContentUnits"
+"maskunits", "maskUnits"
+"numoctaves", "numOctaves"
+"pathlength", "pathLength"
+"patterncontentunits", "patternContentUnits"
+"patterntransform", "patternTransform"
+"patternunits", "patternUnits"
+"pointsatx", "pointsAtX"
+"pointsaty", "pointsAtY"
+"pointsatz", "pointsAtZ"
+"preservealpha", "preserveAlpha"
+"preserveaspectratio", "preserveAspectRatio"
+"primitiveunits", "primitiveUnits"
+"refx", "refX"
+"refy", "refY"
+"repeatcount", "repeatCount"
+"repeatdur", "repeatDur"
+"requiredextensions", "requiredExtensions"
+"requiredfeatures", "requiredFeatures"
+"specularconstant", "specularConstant"
+"specularexponent", "specularExponent"
+"spreadmethod", "spreadMethod"
+"startoffset", "startOffset"
+"stddeviation", "stdDeviation"
+"stitchtiles", "stitchTiles"
+"surfacescale", "surfaceScale"
+"systemlanguage", "systemLanguage"
+"tablevalues", "tableValues"
+"targetx", "targetX"
+"targety", "targetY"
+"textlength", "textLength"
+"viewbox", "viewBox"
+"viewtarget", "viewTarget"
+"xchannelselector", "xChannelSelector"
+"ychannelselector", "yChannelSelector"
+"zoomandpan", "zoomAndPan"
diff --git a/gumbo-parser/src/svg_tags.c b/gumbo-parser/src/svg_tags.c
new file mode 100644
index 00000000..5d835454
--- /dev/null
+++ b/gumbo-parser/src/svg_tags.c
@@ -0,0 +1,137 @@
+/* ANSI-C code produced by gperf version 3.1 */
+/* Command-line: gperf -m100 lib/svg_tags.gperf  */
+/* Computed positions: -k'3,7' */
+/* Filtered by: mk/gperf-filter.sed */
+
+#include "replacement.h"
+#include "macros.h"
+#include "ascii.h"
+#include <string.h>
+
+#define TOTAL_KEYWORDS 36
+#define MIN_WORD_LENGTH 6
+#define MAX_WORD_LENGTH 19
+#define MIN_HASH_VALUE 6
+#define MAX_HASH_VALUE 42
+/* maximum key range = 37, duplicates = 0 */
+
+
+
+static inline unsigned int
+hash (register const char *str, register size_t len)
+{
+  static const unsigned char asso_values[] =
+    {
+      43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+      43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+      43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+      43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+      43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+      43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+      43, 43, 43, 43, 43, 43, 12,  2, 10, 22,
+       1, 28, 15,  1, 43, 43, 43,  0,  9, 26,
+       3, 17,  1, 11,  0, 22,  5, 43,  3,  2,
+      43, 43, 43, 43, 43, 43, 43, 43, 12,  2,
+      10, 22,  1, 28, 15,  1, 43, 43, 43,  0,
+       9, 26,  3, 17,  1, 11,  0, 22,  5, 43,
+       3,  2, 43, 43, 43, 43, 43, 43, 43, 43,
+      43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+      43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+      43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+      43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+      43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+      43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+      43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+      43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+      43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+      43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+      43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+      43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+      43, 43, 43, 43, 43, 43, 43
+    };
+  register unsigned int hval = len;
+
+  switch (hval)
+    {
+      default:
+        hval += asso_values[(unsigned char)str[6]+1];
+      /*FALLTHROUGH*/
+      case 6:
+      case 5:
+      case 4:
+      case 3:
+        hval += asso_values[(unsigned char)str[2]];
+        break;
+    }
+  return hval;
+}
+
+const StringReplacement *
+gumbo_get_svg_tag_replacement (register const char *str, register size_t len)
+{
+  static const unsigned char lengthtable[] =
+    {
+       0,  0,  0,  0,  0,  0,  6,  0,  7,  7,  7,  8, 11, 12,
+      12, 13, 11, 12, 16,  7,  7, 16, 11,  7, 19,  8, 13, 17,
+      11, 12,  7,  8, 17,  8, 18,  8, 14, 12, 14, 14, 13,  7,
+      14
+    };
+  static const StringReplacement wordlist[] =
+    {
+      {(char*)0,(char*)0}, {(char*)0,(char*)0},
+      {(char*)0,(char*)0}, {(char*)0,(char*)0},
+      {(char*)0,(char*)0}, {(char*)0,(char*)0},
+      {"fetile", "feTile"},
+      {(char*)0,(char*)0},
+      {"femerge", "feMerge"},
+      {"feimage", "feImage"},
+      {"fefuncb", "feFuncB"},
+      {"glyphref", "glyphRef"},
+      {"femergenode", "feMergeNode"},
+      {"femorphology", "feMorphology"},
+      {"animatecolor", "animateColor"},
+      {"animatemotion", "animateMotion"},
+      {"fecomposite", "feComposite"},
+      {"feturbulence", "feTurbulence"},
+      {"animatetransform", "animateTransform"},
+      {"fefuncr", "feFuncR"},
+      {"fefunca", "feFuncA"},
+      {"feconvolvematrix", "feConvolveMatrix"},
+      {"fespotlight", "feSpotLight"},
+      {"fefuncg", "feFuncG"},
+      {"fecomponenttransfer", "feComponentTransfer"},
+      {"altglyph", "altGlyph"},
+      {"fecolormatrix", "feColorMatrix"},
+      {"fedisplacementmap", "feDisplacementMap"},
+      {"altglyphdef", "altGlyphDef"},
+      {"altglyphitem", "altGlyphItem"},
+      {"feflood", "feFlood"},
+      {"clippath", "clipPath"},
+      {"fediffuselighting", "feDiffuseLighting"},
+      {"textpath", "textPath"},
+      {"fespecularlighting", "feSpecularLighting"},
+      {"feoffset", "feOffset"},
+      {"fedistantlight", "feDistantLight"},
+      {"fepointlight", "fePointLight"},
+      {"lineargradient", "linearGradient"},
+      {"radialgradient", "radialGradient"},
+      {"foreignobject", "foreignObject"},
+      {"feblend", "feBlend"},
+      {"fegaussianblur", "feGaussianBlur"}
+    };
+
+  if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
+    {
+      register unsigned int key = hash (str, len);
+
+      if (key <= MAX_HASH_VALUE)
+        if (len == lengthtable[key])
+          {
+            register const char *s = wordlist[key].from;
+
+            if (s && (((unsigned char)*str ^ (unsigned char)*s) & ~32) == 0 && !gumbo_ascii_strncasecmp(str, s, len))
+              return &wordlist[key];
+          }
+    }
+  return 0;
+}
diff --git a/gumbo-parser/src/svg_tags.gperf b/gumbo-parser/src/svg_tags.gperf
new file mode 100644
index 00000000..a3c05f71
--- /dev/null
+++ b/gumbo-parser/src/svg_tags.gperf
@@ -0,0 +1,55 @@
+%{
+#include "replacement.h"
+#include "macros.h"
+#include "ascii.h"
+%}
+
+%ignore-case
+%struct-type
+%omit-struct-type
+%compare-lengths
+%readonly-tables
+%null-strings
+%includes
+%define lookup-function-name gumbo_get_svg_tag_replacement
+%define slot-name from
+%define initializer-suffix ,(char*)0
+StringReplacement;
+
+%%
+"altglyph", "altGlyph"
+"altglyphdef", "altGlyphDef"
+"altglyphitem", "altGlyphItem"
+"animatecolor", "animateColor"
+"animatemotion", "animateMotion"
+"animatetransform", "animateTransform"
+"clippath", "clipPath"
+"feblend", "feBlend"
+"fecolormatrix", "feColorMatrix"
+"fecomponenttransfer", "feComponentTransfer"
+"fecomposite", "feComposite"
+"feconvolvematrix", "feConvolveMatrix"
+"fediffuselighting", "feDiffuseLighting"
+"fedisplacementmap", "feDisplacementMap"
+"fedistantlight", "feDistantLight"
+"feflood", "feFlood"
+"fefunca", "feFuncA"
+"fefuncb", "feFuncB"
+"fefuncg", "feFuncG"
+"fefuncr", "feFuncR"
+"fegaussianblur", "feGaussianBlur"
+"feimage", "feImage"
+"femerge", "feMerge"
+"femergenode", "feMergeNode"
+"femorphology", "feMorphology"
+"feoffset", "feOffset"
+"fepointlight", "fePointLight"
+"fespecularlighting", "feSpecularLighting"
+"fespotlight", "feSpotLight"
+"fetile", "feTile"
+"feturbulence", "feTurbulence"
+"foreignobject", "foreignObject"
+"glyphref", "glyphRef"
+"lineargradient", "linearGradient"
+"radialgradient", "radialGradient"
+"textpath", "textPath"
diff --git a/gumbo-parser/src/tag.c b/gumbo-parser/src/tag.c
index 08cb9238..3cae2d33 100644
--- a/gumbo-parser/src/tag.c
+++ b/gumbo-parser/src/tag.c
@@ -1,40 +1,187 @@
-// Copyright 2011 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// Author: jdtang@google.com (Jonathan Tang)
+/*
+ Copyright 2011 Google Inc.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+    https://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
 
 #include "gumbo.h"
+#include "util.h"
+#include "tag_lookup.h"
 
 #include <assert.h>
-#include <ctype.h>
 #include <string.h>
 
-const char* kGumboTagNames[] = {
-#include "tag_strings.h"
-    "",  // TAG_UNKNOWN
-    "",  // TAG_LAST
-};
+static const char kGumboTagNames[GUMBO_TAG_LAST+1][15] = {
+    [GUMBO_TAG_HTML] = "html",
+    [GUMBO_TAG_HEAD] = "head",
+    [GUMBO_TAG_TITLE] = "title",
+    [GUMBO_TAG_BASE] = "base",
+    [GUMBO_TAG_LINK] = "link",
+    [GUMBO_TAG_META] = "meta",
+    [GUMBO_TAG_STYLE] = "style",
+    [GUMBO_TAG_SCRIPT] = "script",
+    [GUMBO_TAG_NOSCRIPT] = "noscript",
+    [GUMBO_TAG_TEMPLATE] = "template",
+    [GUMBO_TAG_BODY] = "body",
+    [GUMBO_TAG_ARTICLE] = "article",
+    [GUMBO_TAG_SECTION] = "section",
+    [GUMBO_TAG_NAV] = "nav",
+    [GUMBO_TAG_ASIDE] = "aside",
+    [GUMBO_TAG_H1] = "h1",
+    [GUMBO_TAG_H2] = "h2",
+    [GUMBO_TAG_H3] = "h3",
+    [GUMBO_TAG_H4] = "h4",
+    [GUMBO_TAG_H5] = "h5",
+    [GUMBO_TAG_H6] = "h6",
+    [GUMBO_TAG_HGROUP] = "hgroup",
+    [GUMBO_TAG_HEADER] = "header",
+    [GUMBO_TAG_FOOTER] = "footer",
+    [GUMBO_TAG_ADDRESS] = "address",
+    [GUMBO_TAG_P] = "p",
+    [GUMBO_TAG_HR] = "hr",
+    [GUMBO_TAG_PRE] = "pre",
+    [GUMBO_TAG_BLOCKQUOTE] = "blockquote",
+    [GUMBO_TAG_OL] = "ol",
+    [GUMBO_TAG_UL] = "ul",
+    [GUMBO_TAG_LI] = "li",
+    [GUMBO_TAG_DL] = "dl",
+    [GUMBO_TAG_DT] = "dt",
+    [GUMBO_TAG_DD] = "dd",
+    [GUMBO_TAG_FIGURE] = "figure",
+    [GUMBO_TAG_FIGCAPTION] = "figcaption",
+    [GUMBO_TAG_MAIN] = "main",
+    [GUMBO_TAG_DIV] = "div",
+    [GUMBO_TAG_A] = "a",
+    [GUMBO_TAG_EM] = "em",
+    [GUMBO_TAG_STRONG] = "strong",
+    [GUMBO_TAG_SMALL] = "small",
+    [GUMBO_TAG_S] = "s",
+    [GUMBO_TAG_CITE] = "cite",
+    [GUMBO_TAG_Q] = "q",
+    [GUMBO_TAG_DFN] = "dfn",
+    [GUMBO_TAG_ABBR] = "abbr",
+    [GUMBO_TAG_DATA] = "data",
+    [GUMBO_TAG_TIME] = "time",
+    [GUMBO_TAG_CODE] = "code",
+    [GUMBO_TAG_VAR] = "var",
+    [GUMBO_TAG_SAMP] = "samp",
+    [GUMBO_TAG_KBD] = "kbd",
+    [GUMBO_TAG_SUB] = "sub",
+    [GUMBO_TAG_SUP] = "sup",
+    [GUMBO_TAG_I] = "i",
+    [GUMBO_TAG_B] = "b",
+    [GUMBO_TAG_U] = "u",
+    [GUMBO_TAG_MARK] = "mark",
+    [GUMBO_TAG_RUBY] = "ruby",
+    [GUMBO_TAG_RT] = "rt",
+    [GUMBO_TAG_RP] = "rp",
+    [GUMBO_TAG_BDI] = "bdi",
+    [GUMBO_TAG_BDO] = "bdo",
+    [GUMBO_TAG_SPAN] = "span",
+    [GUMBO_TAG_BR] = "br",
+    [GUMBO_TAG_WBR] = "wbr",
+    [GUMBO_TAG_INS] = "ins",
+    [GUMBO_TAG_DEL] = "del",
+    [GUMBO_TAG_IMAGE] = "image",
+    [GUMBO_TAG_IMG] = "img",
+    [GUMBO_TAG_IFRAME] = "iframe",
+    [GUMBO_TAG_EMBED] = "embed",
+    [GUMBO_TAG_OBJECT] = "object",
+    [GUMBO_TAG_PARAM] = "param",
+    [GUMBO_TAG_VIDEO] = "video",
+    [GUMBO_TAG_AUDIO] = "audio",
+    [GUMBO_TAG_SOURCE] = "source",
+    [GUMBO_TAG_TRACK] = "track",
+    [GUMBO_TAG_CANVAS] = "canvas",
+    [GUMBO_TAG_MAP] = "map",
+    [GUMBO_TAG_AREA] = "area",
+    [GUMBO_TAG_MATH] = "math",
+    [GUMBO_TAG_MI] = "mi",
+    [GUMBO_TAG_MO] = "mo",
+    [GUMBO_TAG_MN] = "mn",
+    [GUMBO_TAG_MS] = "ms",
+    [GUMBO_TAG_MTEXT] = "mtext",
+    [GUMBO_TAG_MGLYPH] = "mglyph",
+    [GUMBO_TAG_MALIGNMARK] = "malignmark",
+    [GUMBO_TAG_ANNOTATION_XML] = "annotation-xml",
+    [GUMBO_TAG_SVG] = "svg",
+    [GUMBO_TAG_FOREIGNOBJECT] = "foreignobject",
+    [GUMBO_TAG_DESC] = "desc",
+    [GUMBO_TAG_TABLE] = "table",
+    [GUMBO_TAG_CAPTION] = "caption",
+    [GUMBO_TAG_COLGROUP] = "colgroup",
+    [GUMBO_TAG_COL] = "col",
+    [GUMBO_TAG_TBODY] = "tbody",
+    [GUMBO_TAG_THEAD] = "thead",
+    [GUMBO_TAG_TFOOT] = "tfoot",
+    [GUMBO_TAG_TR] = "tr",
+    [GUMBO_TAG_TD] = "td",
+    [GUMBO_TAG_TH] = "th",
+    [GUMBO_TAG_FORM] = "form",
+    [GUMBO_TAG_FIELDSET] = "fieldset",
+    [GUMBO_TAG_LEGEND] = "legend",
+    [GUMBO_TAG_LABEL] = "label",
+    [GUMBO_TAG_INPUT] = "input",
+    [GUMBO_TAG_BUTTON] = "button",
+    [GUMBO_TAG_SELECT] = "select",
+    [GUMBO_TAG_DATALIST] = "datalist",
+    [GUMBO_TAG_OPTGROUP] = "optgroup",
+    [GUMBO_TAG_OPTION] = "option",
+    [GUMBO_TAG_TEXTAREA] = "textarea",
+    [GUMBO_TAG_KEYGEN] = "keygen",
+    [GUMBO_TAG_OUTPUT] = "output",
+    [GUMBO_TAG_PROGRESS] = "progress",
+    [GUMBO_TAG_METER] = "meter",
+    [GUMBO_TAG_DETAILS] = "details",
+    [GUMBO_TAG_SUMMARY] = "summary",
+    [GUMBO_TAG_MENU] = "menu",
+    [GUMBO_TAG_MENUITEM] = "menuitem",
+    [GUMBO_TAG_APPLET] = "applet",
+    [GUMBO_TAG_ACRONYM] = "acronym",
+    [GUMBO_TAG_BGSOUND] = "bgsound",
+    [GUMBO_TAG_DIR] = "dir",
+    [GUMBO_TAG_FRAME] = "frame",
+    [GUMBO_TAG_FRAMESET] = "frameset",
+    [GUMBO_TAG_NOFRAMES] = "noframes",
+    [GUMBO_TAG_LISTING] = "listing",
+    [GUMBO_TAG_XMP] = "xmp",
+    [GUMBO_TAG_NEXTID] = "nextid",
+    [GUMBO_TAG_NOEMBED] = "noembed",
+    [GUMBO_TAG_PLAINTEXT] = "plaintext",
+    [GUMBO_TAG_RB] = "rb",
+    [GUMBO_TAG_STRIKE] = "strike",
+    [GUMBO_TAG_BASEFONT] = "basefont",
+    [GUMBO_TAG_BIG] = "big",
+    [GUMBO_TAG_BLINK] = "blink",
+    [GUMBO_TAG_CENTER] = "center",
+    [GUMBO_TAG_FONT] = "font",
+    [GUMBO_TAG_MARQUEE] = "marquee",
+    [GUMBO_TAG_MULTICOL] = "multicol",
+    [GUMBO_TAG_NOBR] = "nobr",
+    [GUMBO_TAG_SPACER] = "spacer",
+    [GUMBO_TAG_TT] = "tt",
+    [GUMBO_TAG_RTC] = "rtc",
+    [GUMBO_TAG_DIALOG] = "dialog",
 
-static const unsigned char kGumboTagSizes[] = {
-#include "tag_sizes.h"
-    0,  // TAG_UNKNOWN
-    0,  // TAG_LAST
+    [GUMBO_TAG_UNKNOWN] = "",
+    [GUMBO_TAG_LAST] = "",
 };
 
 const char* gumbo_normalized_tagname(GumboTag tag) {
   assert(tag <= GUMBO_TAG_LAST);
-  return kGumboTagNames[tag];
+  const char *tagname = kGumboTagNames[tag];
+  assert(tagname);
+  return tagname;
 }
 
 void gumbo_tag_from_original_text(GumboStringPiece* text) {
@@ -45,52 +192,31 @@ void gumbo_tag_from_original_text(GumboStringPiece* text) {
   assert(text->length >= 2);
   assert(text->data[0] == '<');
   assert(text->data[text->length - 1] == '>');
+
   if (text->data[1] == '/') {
-    // End tag.
+    // End tag
     assert(text->length >= 3);
     text->data += 2;  // Move past </
     text->length -= 3;
   } else {
-    // Start tag.
+    // Start tag
     text->data += 1;  // Move past <
     text->length -= 2;
-    // strnchr is apparently not a standard C library function, so I loop
-    // explicitly looking for whitespace or other illegal tag characters - as
-    // accepted by the Tag Name State
     for (const char* c = text->data; c != text->data + text->length; ++c) {
-      if (*c == '\t' || *c == '\n' || *c == '\f' || *c == ' ' || *c == '/') {
+      switch (*c) {
+      case '\t':
+      case '\n':
+      case '\f':
+      case ' ':
+      case '/':
         text->length = c - text->data;
-        break;
+        return;
       }
     }
   }
 }
 
-static int case_memcmp(const char* s1, const char* s2, unsigned int n) {
-  while (n--) {
-    unsigned char c1 = tolower(*s1++);
-    unsigned char c2 = tolower(*s2++);
-    if (c1 != c2) return (int) c1 - (int) c2;
-  }
-  return 0;
-}
-
-#include "tag_gperf.h"
-#define TAG_MAP_SIZE (sizeof(kGumboTagMap) / sizeof(kGumboTagMap[0]))
-
-GumboTag gumbo_tagn_enum(const char* tagname, unsigned int length) {
-  if (length) {
-    unsigned int key = tag_hash(tagname, length);
-    if (key < TAG_MAP_SIZE) {
-      GumboTag tag = kGumboTagMap[key];
-      if (length == kGumboTagSizes[(int) tag] &&
-          !case_memcmp(tagname, kGumboTagNames[(int) tag], length))
-        return tag;
-    }
-  }
-  return GUMBO_TAG_UNKNOWN;
-}
-
-GumboTag gumbo_tag_enum(const char* tagname) {
-  return gumbo_tagn_enum(tagname, strlen(tagname));
+GumboTag gumbo_tagn_enum(const char *tagname, size_t tagname_length) {
+    const TagHashSlot *slot = gumbo_tag_lookup(tagname, tagname_length);
+    return slot ? slot->tag : GUMBO_TAG_UNKNOWN;
 }
diff --git a/gumbo-parser/src/tag.in b/gumbo-parser/src/tag.in
deleted file mode 100644
index 4c252648..00000000
--- a/gumbo-parser/src/tag.in
+++ /dev/null
@@ -1,150 +0,0 @@
-html
-head
-title
-base
-link
-meta
-style
-script
-noscript
-template
-body
-article
-section
-nav
-aside
-h1
-h2
-h3
-h4
-h5
-h6
-hgroup
-header
-footer
-address
-p
-hr
-pre
-blockquote
-ol
-ul
-li
-dl
-dt
-dd
-figure
-figcaption
-main
-div
-a
-em
-strong
-small
-s
-cite
-q
-dfn
-abbr
-data
-time
-code
-var
-samp
-kbd
-sub
-sup
-i
-b
-u
-mark
-ruby
-rt
-rp
-bdi
-bdo
-span
-br
-wbr
-ins
-del
-image
-img
-iframe
-embed
-object
-param
-video
-audio
-source
-track
-canvas
-map
-area
-math
-mi
-mo
-mn
-ms
-mtext
-mglyph
-malignmark
-annotation-xml
-svg
-foreignobject
-desc
-table
-caption
-colgroup
-col
-tbody
-thead
-tfoot
-tr
-td
-th
-form
-fieldset
-legend
-label
-input
-button
-select
-datalist
-optgroup
-option
-textarea
-keygen
-output
-progress
-meter
-details
-summary
-menu
-menuitem
-applet
-acronym
-bgsound
-dir
-frame
-frameset
-noframes
-isindex
-listing
-xmp
-nextid
-noembed
-plaintext
-rb
-strike
-basefont
-big
-blink
-center
-font
-marquee
-multicol
-nobr
-spacer
-tt
-rtc
diff --git a/gumbo-parser/src/tag_enum.h b/gumbo-parser/src/tag_enum.h
deleted file mode 100644
index 6d7aeb3d..00000000
--- a/gumbo-parser/src/tag_enum.h
+++ /dev/null
@@ -1,153 +0,0 @@
-// Generated via `gentags.py src/tag.in`.
-// Do not edit; edit src/tag.in instead.
-// clang-format off
-GUMBO_TAG_HTML,
-GUMBO_TAG_HEAD,
-GUMBO_TAG_TITLE,
-GUMBO_TAG_BASE,
-GUMBO_TAG_LINK,
-GUMBO_TAG_META,
-GUMBO_TAG_STYLE,
-GUMBO_TAG_SCRIPT,
-GUMBO_TAG_NOSCRIPT,
-GUMBO_TAG_TEMPLATE,
-GUMBO_TAG_BODY,
-GUMBO_TAG_ARTICLE,
-GUMBO_TAG_SECTION,
-GUMBO_TAG_NAV,
-GUMBO_TAG_ASIDE,
-GUMBO_TAG_H1,
-GUMBO_TAG_H2,
-GUMBO_TAG_H3,
-GUMBO_TAG_H4,
-GUMBO_TAG_H5,
-GUMBO_TAG_H6,
-GUMBO_TAG_HGROUP,
-GUMBO_TAG_HEADER,
-GUMBO_TAG_FOOTER,
-GUMBO_TAG_ADDRESS,
-GUMBO_TAG_P,
-GUMBO_TAG_HR,
-GUMBO_TAG_PRE,
-GUMBO_TAG_BLOCKQUOTE,
-GUMBO_TAG_OL,
-GUMBO_TAG_UL,
-GUMBO_TAG_LI,
-GUMBO_TAG_DL,
-GUMBO_TAG_DT,
-GUMBO_TAG_DD,
-GUMBO_TAG_FIGURE,
-GUMBO_TAG_FIGCAPTION,
-GUMBO_TAG_MAIN,
-GUMBO_TAG_DIV,
-GUMBO_TAG_A,
-GUMBO_TAG_EM,
-GUMBO_TAG_STRONG,
-GUMBO_TAG_SMALL,
-GUMBO_TAG_S,
-GUMBO_TAG_CITE,
-GUMBO_TAG_Q,
-GUMBO_TAG_DFN,
-GUMBO_TAG_ABBR,
-GUMBO_TAG_DATA,
-GUMBO_TAG_TIME,
-GUMBO_TAG_CODE,
-GUMBO_TAG_VAR,
-GUMBO_TAG_SAMP,
-GUMBO_TAG_KBD,
-GUMBO_TAG_SUB,
-GUMBO_TAG_SUP,
-GUMBO_TAG_I,
-GUMBO_TAG_B,
-GUMBO_TAG_U,
-GUMBO_TAG_MARK,
-GUMBO_TAG_RUBY,
-GUMBO_TAG_RT,
-GUMBO_TAG_RP,
-GUMBO_TAG_BDI,
-GUMBO_TAG_BDO,
-GUMBO_TAG_SPAN,
-GUMBO_TAG_BR,
-GUMBO_TAG_WBR,
-GUMBO_TAG_INS,
-GUMBO_TAG_DEL,
-GUMBO_TAG_IMAGE,
-GUMBO_TAG_IMG,
-GUMBO_TAG_IFRAME,
-GUMBO_TAG_EMBED,
-GUMBO_TAG_OBJECT,
-GUMBO_TAG_PARAM,
-GUMBO_TAG_VIDEO,
-GUMBO_TAG_AUDIO,
-GUMBO_TAG_SOURCE,
-GUMBO_TAG_TRACK,
-GUMBO_TAG_CANVAS,
-GUMBO_TAG_MAP,
-GUMBO_TAG_AREA,
-GUMBO_TAG_MATH,
-GUMBO_TAG_MI,
-GUMBO_TAG_MO,
-GUMBO_TAG_MN,
-GUMBO_TAG_MS,
-GUMBO_TAG_MTEXT,
-GUMBO_TAG_MGLYPH,
-GUMBO_TAG_MALIGNMARK,
-GUMBO_TAG_ANNOTATION_XML,
-GUMBO_TAG_SVG,
-GUMBO_TAG_FOREIGNOBJECT,
-GUMBO_TAG_DESC,
-GUMBO_TAG_TABLE,
-GUMBO_TAG_CAPTION,
-GUMBO_TAG_COLGROUP,
-GUMBO_TAG_COL,
-GUMBO_TAG_TBODY,
-GUMBO_TAG_THEAD,
-GUMBO_TAG_TFOOT,
-GUMBO_TAG_TR,
-GUMBO_TAG_TD,
-GUMBO_TAG_TH,
-GUMBO_TAG_FORM,
-GUMBO_TAG_FIELDSET,
-GUMBO_TAG_LEGEND,
-GUMBO_TAG_LABEL,
-GUMBO_TAG_INPUT,
-GUMBO_TAG_BUTTON,
-GUMBO_TAG_SELECT,
-GUMBO_TAG_DATALIST,
-GUMBO_TAG_OPTGROUP,
-GUMBO_TAG_OPTION,
-GUMBO_TAG_TEXTAREA,
-GUMBO_TAG_KEYGEN,
-GUMBO_TAG_OUTPUT,
-GUMBO_TAG_PROGRESS,
-GUMBO_TAG_METER,
-GUMBO_TAG_DETAILS,
-GUMBO_TAG_SUMMARY,
-GUMBO_TAG_MENU,
-GUMBO_TAG_MENUITEM,
-GUMBO_TAG_APPLET,
-GUMBO_TAG_ACRONYM,
-GUMBO_TAG_BGSOUND,
-GUMBO_TAG_DIR,
-GUMBO_TAG_FRAME,
-GUMBO_TAG_FRAMESET,
-GUMBO_TAG_NOFRAMES,
-GUMBO_TAG_ISINDEX,
-GUMBO_TAG_LISTING,
-GUMBO_TAG_XMP,
-GUMBO_TAG_NEXTID,
-GUMBO_TAG_NOEMBED,
-GUMBO_TAG_PLAINTEXT,
-GUMBO_TAG_RB,
-GUMBO_TAG_STRIKE,
-GUMBO_TAG_BASEFONT,
-GUMBO_TAG_BIG,
-GUMBO_TAG_BLINK,
-GUMBO_TAG_CENTER,
-GUMBO_TAG_FONT,
-GUMBO_TAG_MARQUEE,
-GUMBO_TAG_MULTICOL,
-GUMBO_TAG_NOBR,
-GUMBO_TAG_SPACER,
-GUMBO_TAG_TT,
-GUMBO_TAG_RTC,
diff --git a/gumbo-parser/src/tag_gperf.h b/gumbo-parser/src/tag_gperf.h
deleted file mode 100644
index 378eaf95..00000000
--- a/gumbo-parser/src/tag_gperf.h
+++ /dev/null
@@ -1,105 +0,0 @@
-static unsigned int tag_hash(
-    register const char *str, register unsigned int len) {
-  static unsigned short asso_values[] = {296, 296, 296, 296, 296, 296, 296, 296,
-      296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
-      296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
-      296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 6, 4, 3, 1, 1, 0,
-      1, 0, 0, 296, 296, 296, 296, 296, 296, 296, 22, 73, 151, 4, 13, 59, 65, 2,
-      69, 0, 134, 9, 16, 52, 55, 28, 101, 0, 1, 6, 63, 126, 104, 93, 124, 296,
-      296, 296, 296, 296, 296, 296, 22, 73, 151, 4, 13, 59, 65, 2, 69, 0, 134,
-      9, 16, 52, 55, 28, 101, 0, 1, 6, 63, 126, 104, 93, 124, 296, 296, 296,
-      296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
-      296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
-      296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
-      296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
-      296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
-      296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
-      296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
-      296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
-      296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296};
-  register unsigned int hval = len;
-
-  switch (hval) {
-    default:
-      hval += asso_values[(unsigned char) str[1] + 3];
-    /*FALLTHROUGH*/
-    case 1:
-      hval += asso_values[(unsigned char) str[0]];
-      break;
-  }
-  return hval + asso_values[(unsigned char) str[len - 1]];
-}
-
-static const unsigned char kGumboTagMap[] = {GUMBO_TAG_LAST, GUMBO_TAG_LAST,
-    GUMBO_TAG_LAST, GUMBO_TAG_S, GUMBO_TAG_H6, GUMBO_TAG_H5, GUMBO_TAG_H4,
-    GUMBO_TAG_H3, GUMBO_TAG_SPACER, GUMBO_TAG_H2, GUMBO_TAG_HEADER,
-    GUMBO_TAG_H1, GUMBO_TAG_HEAD, GUMBO_TAG_LAST, GUMBO_TAG_DETAILS,
-    GUMBO_TAG_SELECT, GUMBO_TAG_DIR, GUMBO_TAG_LAST, GUMBO_TAG_DEL,
-    GUMBO_TAG_LAST, GUMBO_TAG_SOURCE, GUMBO_TAG_LEGEND, GUMBO_TAG_DATALIST,
-    GUMBO_TAG_METER, GUMBO_TAG_MGLYPH, GUMBO_TAG_LAST, GUMBO_TAG_MATH,
-    GUMBO_TAG_LABEL, GUMBO_TAG_TABLE, GUMBO_TAG_TEMPLATE, GUMBO_TAG_LAST,
-    GUMBO_TAG_RP, GUMBO_TAG_TIME, GUMBO_TAG_TITLE, GUMBO_TAG_DATA,
-    GUMBO_TAG_APPLET, GUMBO_TAG_HGROUP, GUMBO_TAG_SAMP, GUMBO_TAG_TEXTAREA,
-    GUMBO_TAG_ABBR, GUMBO_TAG_MARQUEE, GUMBO_TAG_LAST, GUMBO_TAG_MENUITEM,
-    GUMBO_TAG_SMALL, GUMBO_TAG_META, GUMBO_TAG_A, GUMBO_TAG_LAST,
-    GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_EMBED,
-    GUMBO_TAG_MAP, GUMBO_TAG_LAST, GUMBO_TAG_PARAM, GUMBO_TAG_LAST,
-    GUMBO_TAG_LAST, GUMBO_TAG_NOBR, GUMBO_TAG_P, GUMBO_TAG_SPAN, GUMBO_TAG_EM,
-    GUMBO_TAG_LAST, GUMBO_TAG_NOFRAMES, GUMBO_TAG_SECTION, GUMBO_TAG_NOEMBED,
-    GUMBO_TAG_NEXTID, GUMBO_TAG_FOOTER, GUMBO_TAG_NOSCRIPT, GUMBO_TAG_HR,
-    GUMBO_TAG_LAST, GUMBO_TAG_FONT, GUMBO_TAG_DL, GUMBO_TAG_TR,
-    GUMBO_TAG_SCRIPT, GUMBO_TAG_MO, GUMBO_TAG_LAST, GUMBO_TAG_DD,
-    GUMBO_TAG_MAIN, GUMBO_TAG_TD, GUMBO_TAG_FOREIGNOBJECT, GUMBO_TAG_FORM,
-    GUMBO_TAG_OBJECT, GUMBO_TAG_LAST, GUMBO_TAG_FIELDSET, GUMBO_TAG_LAST,
-    GUMBO_TAG_BGSOUND, GUMBO_TAG_MENU, GUMBO_TAG_TFOOT, GUMBO_TAG_FIGURE,
-    GUMBO_TAG_RB, GUMBO_TAG_LI, GUMBO_TAG_LISTING, GUMBO_TAG_BASEFONT,
-    GUMBO_TAG_OPTGROUP, GUMBO_TAG_LAST, GUMBO_TAG_BASE, GUMBO_TAG_ADDRESS,
-    GUMBO_TAG_MI, GUMBO_TAG_LAST, GUMBO_TAG_PLAINTEXT, GUMBO_TAG_LAST,
-    GUMBO_TAG_PROGRESS, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
-    GUMBO_TAG_ACRONYM, GUMBO_TAG_ARTICLE, GUMBO_TAG_LAST, GUMBO_TAG_PRE,
-    GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_AREA,
-    GUMBO_TAG_RT, GUMBO_TAG_LAST, GUMBO_TAG_OPTION, GUMBO_TAG_IMAGE,
-    GUMBO_TAG_DT, GUMBO_TAG_LAST, GUMBO_TAG_TT, GUMBO_TAG_HTML, GUMBO_TAG_WBR,
-    GUMBO_TAG_OL, GUMBO_TAG_LAST, GUMBO_TAG_STYLE, GUMBO_TAG_STRIKE,
-    GUMBO_TAG_SUP, GUMBO_TAG_MULTICOL, GUMBO_TAG_U, GUMBO_TAG_DFN, GUMBO_TAG_UL,
-    GUMBO_TAG_FIGCAPTION, GUMBO_TAG_MTEXT, GUMBO_TAG_LAST, GUMBO_TAG_VAR,
-    GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_FRAMESET, GUMBO_TAG_LAST,
-    GUMBO_TAG_BR, GUMBO_TAG_I, GUMBO_TAG_FRAME, GUMBO_TAG_LAST, GUMBO_TAG_DIV,
-    GUMBO_TAG_LAST, GUMBO_TAG_TH, GUMBO_TAG_MS, GUMBO_TAG_ANNOTATION_XML,
-    GUMBO_TAG_B, GUMBO_TAG_TBODY, GUMBO_TAG_THEAD, GUMBO_TAG_BIG,
-    GUMBO_TAG_BLOCKQUOTE, GUMBO_TAG_XMP, GUMBO_TAG_LAST, GUMBO_TAG_KBD,
-    GUMBO_TAG_LAST, GUMBO_TAG_LINK, GUMBO_TAG_IFRAME, GUMBO_TAG_MARK,
-    GUMBO_TAG_CENTER, GUMBO_TAG_OUTPUT, GUMBO_TAG_DESC, GUMBO_TAG_CANVAS,
-    GUMBO_TAG_COL, GUMBO_TAG_MALIGNMARK, GUMBO_TAG_IMG, GUMBO_TAG_ASIDE,
-    GUMBO_TAG_LAST, GUMBO_TAG_CODE, GUMBO_TAG_LAST, GUMBO_TAG_SUB, GUMBO_TAG_MN,
-    GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_INS, GUMBO_TAG_AUDIO,
-    GUMBO_TAG_STRONG, GUMBO_TAG_CITE, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
-    GUMBO_TAG_LAST, GUMBO_TAG_INPUT, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
-    GUMBO_TAG_LAST, GUMBO_TAG_NAV, GUMBO_TAG_LAST, GUMBO_TAG_COLGROUP,
-    GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
-    GUMBO_TAG_LAST, GUMBO_TAG_SVG, GUMBO_TAG_KEYGEN, GUMBO_TAG_VIDEO,
-    GUMBO_TAG_BDO, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
-    GUMBO_TAG_LAST, GUMBO_TAG_BODY, GUMBO_TAG_LAST, GUMBO_TAG_Q, GUMBO_TAG_LAST,
-    GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_TRACK,
-    GUMBO_TAG_LAST, GUMBO_TAG_BDI, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
-    GUMBO_TAG_LAST, GUMBO_TAG_CAPTION, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
-    GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
-    GUMBO_TAG_RUBY, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_BUTTON,
-    GUMBO_TAG_SUMMARY, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
-    GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
-    GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
-    GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
-    GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
-    GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
-    GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
-    GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
-    GUMBO_TAG_LAST, GUMBO_TAG_RTC, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
-    GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
-    GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_BLINK, GUMBO_TAG_LAST,
-    GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
-    GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
-    GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
-    GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
-    GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
-    GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
-    GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_ISINDEX};
diff --git a/gumbo-parser/src/tag_lookup.c b/gumbo-parser/src/tag_lookup.c
new file mode 100644
index 00000000..59adf2ab
--- /dev/null
+++ b/gumbo-parser/src/tag_lookup.c
@@ -0,0 +1,382 @@
+/* ANSI-C code produced by gperf version 3.1 */
+/* Command-line: gperf -m100 lib/tag_lookup.gperf  */
+/* Computed positions: -k'1-2,$' */
+/* Filtered by: mk/gperf-filter.sed */
+
+#include "tag_lookup.h"
+#include "macros.h"
+#include "ascii.h"
+#include <string.h>
+
+#define TOTAL_KEYWORDS 150
+#define MIN_WORD_LENGTH 1
+#define MAX_WORD_LENGTH 14
+#define MIN_HASH_VALUE 9
+#define MAX_HASH_VALUE 271
+/* maximum key range = 263, duplicates = 0 */
+
+
+
+static inline unsigned int
+hash (register const char *str, register size_t len)
+{
+  static const unsigned short asso_values[] =
+    {
+      272, 272, 272, 272, 272, 272, 272, 272, 272, 272,
+      272, 272, 272, 272, 272, 272, 272, 272, 272, 272,
+      272, 272, 272, 272, 272, 272, 272, 272, 272, 272,
+      272, 272, 272, 272, 272, 272, 272, 272, 272, 272,
+      272, 272, 272, 272, 272, 272, 272, 272, 272,   9,
+        7,   6,   4,   4,   3,   4,   3,   3, 272, 272,
+      272, 272, 272, 272, 272,  70,  83, 152,   7,  16,
+       61,  98,   5,  76, 102, 126,  12,  19,  54,  54,
+       31,  97,   3,   4,   9,  33, 136, 113,  86,  15,
+      272, 272, 272, 272, 272, 272, 272,  70,  83, 152,
+        7,  16,  61,  98,   5,  76, 102, 126,  12,  19,
+       54,  54,  31,  97,   3,   4,   9,  33, 136, 113,
+       86,  15, 272, 272, 272, 272, 272, 272, 272, 272,
+      272, 272, 272, 272, 272, 272, 272, 272, 272, 272,
+      272, 272, 272, 272, 272, 272, 272, 272, 272, 272,
+      272, 272, 272, 272, 272, 272, 272, 272, 272, 272,
+      272, 272, 272, 272, 272, 272, 272, 272, 272, 272,
+      272, 272, 272, 272, 272, 272, 272, 272, 272, 272,
+      272, 272, 272, 272, 272, 272, 272, 272, 272, 272,
+      272, 272, 272, 272, 272, 272, 272, 272, 272, 272,
+      272, 272, 272, 272, 272, 272, 272, 272, 272, 272,
+      272, 272, 272, 272, 272, 272, 272, 272, 272, 272,
+      272, 272, 272, 272, 272, 272, 272, 272, 272, 272,
+      272, 272, 272, 272, 272, 272, 272, 272, 272, 272,
+      272, 272, 272, 272, 272, 272, 272, 272, 272, 272,
+      272, 272, 272, 272, 272, 272, 272, 272, 272
+    };
+  register unsigned int hval = len;
+
+  switch (hval)
+    {
+      default:
+        hval += asso_values[(unsigned char)str[1]+3];
+      /*FALLTHROUGH*/
+      case 1:
+        hval += asso_values[(unsigned char)str[0]];
+        break;
+    }
+  return hval + asso_values[(unsigned char)str[len - 1]];
+}
+
+const TagHashSlot *
+gumbo_tag_lookup (register const char *str, register size_t len)
+{
+  static const unsigned char lengthtable[] =
+    {
+       0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0,  0,  2,
+       2,  2,  2,  6,  2,  6,  2,  4,  0,  7,  6,  3,  0,  3,
+       0,  6,  6,  8,  5,  0,  0,  4,  5,  5,  8,  0,  2,  4,
+       5,  2,  0,  5,  4,  2,  0,  7,  0,  8,  5,  0,  0,  0,
+       0,  0,  0,  5,  3,  4,  5,  1,  4,  0,  4,  1,  2,  8,
+       7,  7,  6,  6,  8,  2,  8,  4,  2,  0,  6,  0,  0,  3,
+       4,  6, 13,  4,  4,  6,  8,  0,  8,  4,  0,  6,  0,  8,
+       4,  5,  0,  2,  2,  9,  2,  4,  0,  8,  4,  2,  4,  8,
+       7,  0,  2,  5,  2,  0,  6,  0,  3,  2,  2,  6,  3,  8,
+       7,  2,  5,  7,  0,  2,  6,  2,  4,  3,  0, 10,  5,  6,
+       3,  1,  2,  0,  6,  0,  5,  5,  0,  3,  0,  3,  3,  1,
+       4,  6,  4,  7,  3,  0,  0,  2, 10, 10,  0,  0,  6,  1,
+       4,  6,  3,  0,  2,  5,  6,  4,  3,  4,  0,  7,  3,  0,
+       0,  0,  4,  0,  0,  5,  0,  0,  0,  6,  0, 14,  8,  1,
+       3,  0,  0,  7,  3,  0,  0,  0,  0,  0,  0,  5,  3,  0,
+       0,  0,  0,  0,  0,  5,  0,  0,  0,  0,  7,  6,  0,  0,
+       0,  0,  0,  5,  0,  6,  0,  0,  0,  0,  0,  0,  0,  0,
+       3,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+       0,  0,  0,  0,  0,  0,  0,  0,  3,  0,  0,  0,  0,  0,
+       0,  0,  5,  0,  0,  3
+    };
+  static const TagHashSlot wordlist[] =
+    {
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {"s", GUMBO_TAG_S},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {"h6", GUMBO_TAG_H6},
+      {"h5", GUMBO_TAG_H5},
+      {"h4", GUMBO_TAG_H4},
+      {"h3", GUMBO_TAG_H3},
+      {"spacer", GUMBO_TAG_SPACER},
+      {"h2", GUMBO_TAG_H2},
+      {"header", GUMBO_TAG_HEADER},
+      {"h1", GUMBO_TAG_H1},
+      {"head", GUMBO_TAG_HEAD},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {"details", GUMBO_TAG_DETAILS},
+      {"select", GUMBO_TAG_SELECT},
+      {"dir", GUMBO_TAG_DIR},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {"del", GUMBO_TAG_DEL},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {"source", GUMBO_TAG_SOURCE},
+      {"legend", GUMBO_TAG_LEGEND},
+      {"datalist", GUMBO_TAG_DATALIST},
+      {"meter", GUMBO_TAG_METER},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {"math", GUMBO_TAG_MATH},
+      {"label", GUMBO_TAG_LABEL},
+      {"table", GUMBO_TAG_TABLE},
+      {"template", GUMBO_TAG_TEMPLATE},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {"rp", GUMBO_TAG_RP},
+      {"time", GUMBO_TAG_TIME},
+      {"title", GUMBO_TAG_TITLE},
+      {"hr", GUMBO_TAG_HR},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {"tbody", GUMBO_TAG_TBODY},
+      {"samp", GUMBO_TAG_SAMP},
+      {"tr", GUMBO_TAG_TR},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {"marquee", GUMBO_TAG_MARQUEE},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {"menuitem", GUMBO_TAG_MENUITEM},
+      {"small", GUMBO_TAG_SMALL},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {"embed", GUMBO_TAG_EMBED},
+      {"map", GUMBO_TAG_MAP},
+      {"menu", GUMBO_TAG_MENU},
+      {"param", GUMBO_TAG_PARAM},
+      {"p", GUMBO_TAG_P},
+      {"nobr", GUMBO_TAG_NOBR},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {"span", GUMBO_TAG_SPAN},
+      {"u", GUMBO_TAG_U},
+      {"em", GUMBO_TAG_EM},
+      {"noframes", GUMBO_TAG_NOFRAMES},
+      {"section", GUMBO_TAG_SECTION},
+      {"noembed", GUMBO_TAG_NOEMBED},
+      {"nextid", GUMBO_TAG_NEXTID},
+      {"footer", GUMBO_TAG_FOOTER},
+      {"noscript", GUMBO_TAG_NOSCRIPT},
+      {"dl", GUMBO_TAG_DL},
+      {"progress", GUMBO_TAG_PROGRESS},
+      {"font", GUMBO_TAG_FONT},
+      {"mo", GUMBO_TAG_MO},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {"script", GUMBO_TAG_SCRIPT},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {"pre", GUMBO_TAG_PRE},
+      {"main", GUMBO_TAG_MAIN},
+      {"object", GUMBO_TAG_OBJECT},
+      {"foreignobject", GUMBO_TAG_FOREIGNOBJECT},
+      {"form", GUMBO_TAG_FORM},
+      {"data", GUMBO_TAG_DATA},
+      {"applet", GUMBO_TAG_APPLET},
+      {"fieldset", GUMBO_TAG_FIELDSET},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {"textarea", GUMBO_TAG_TEXTAREA},
+      {"abbr", GUMBO_TAG_ABBR},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {"figure", GUMBO_TAG_FIGURE},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {"optgroup", GUMBO_TAG_OPTGROUP},
+      {"meta", GUMBO_TAG_META},
+      {"tfoot", GUMBO_TAG_TFOOT},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {"ul", GUMBO_TAG_UL},
+      {"li", GUMBO_TAG_LI},
+      {"plaintext", GUMBO_TAG_PLAINTEXT},
+      {"rb", GUMBO_TAG_RB},
+      {"body", GUMBO_TAG_BODY},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {"basefont", GUMBO_TAG_BASEFONT},
+      {"ruby", GUMBO_TAG_RUBY},
+      {"mi", GUMBO_TAG_MI},
+      {"base", GUMBO_TAG_BASE},
+      {"frameset", GUMBO_TAG_FRAMESET},
+      {"summary", GUMBO_TAG_SUMMARY},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {"dd", GUMBO_TAG_DD},
+      {"frame", GUMBO_TAG_FRAME},
+      {"td", GUMBO_TAG_TD},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {"option", GUMBO_TAG_OPTION},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {"svg", GUMBO_TAG_SVG},
+      {"br", GUMBO_TAG_BR},
+      {"ol", GUMBO_TAG_OL},
+      {"dialog", GUMBO_TAG_DIALOG},
+      {"sup", GUMBO_TAG_SUP},
+      {"multicol", GUMBO_TAG_MULTICOL},
+      {"article", GUMBO_TAG_ARTICLE},
+      {"rt", GUMBO_TAG_RT},
+      {"image", GUMBO_TAG_IMAGE},
+      {"listing", GUMBO_TAG_LISTING},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {"dt", GUMBO_TAG_DT},
+      {"mglyph", GUMBO_TAG_MGLYPH},
+      {"tt", GUMBO_TAG_TT},
+      {"html", GUMBO_TAG_HTML},
+      {"wbr", GUMBO_TAG_WBR},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {"figcaption", GUMBO_TAG_FIGCAPTION},
+      {"style", GUMBO_TAG_STYLE},
+      {"strike", GUMBO_TAG_STRIKE},
+      {"dfn", GUMBO_TAG_DFN},
+      {"a", GUMBO_TAG_A},
+      {"th", GUMBO_TAG_TH},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {"hgroup", GUMBO_TAG_HGROUP},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {"mtext", GUMBO_TAG_MTEXT},
+      {"thead", GUMBO_TAG_THEAD},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {"var", GUMBO_TAG_VAR},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {"xmp", GUMBO_TAG_XMP},
+      {"kbd", GUMBO_TAG_KBD},
+      {"i", GUMBO_TAG_I},
+      {"link", GUMBO_TAG_LINK},
+      {"output", GUMBO_TAG_OUTPUT},
+      {"mark", GUMBO_TAG_MARK},
+      {"acronym", GUMBO_TAG_ACRONYM},
+      {"div", GUMBO_TAG_DIV},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {"ms", GUMBO_TAG_MS},
+      {"malignmark", GUMBO_TAG_MALIGNMARK},
+      {"blockquote", GUMBO_TAG_BLOCKQUOTE},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {"center", GUMBO_TAG_CENTER},
+      {"b", GUMBO_TAG_B},
+      {"desc", GUMBO_TAG_DESC},
+      {"canvas", GUMBO_TAG_CANVAS},
+      {"col", GUMBO_TAG_COL},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {"mn", GUMBO_TAG_MN},
+      {"track", GUMBO_TAG_TRACK},
+      {"iframe", GUMBO_TAG_IFRAME},
+      {"code", GUMBO_TAG_CODE},
+      {"sub", GUMBO_TAG_SUB},
+      {"area", GUMBO_TAG_AREA},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {"address", GUMBO_TAG_ADDRESS},
+      {"ins", GUMBO_TAG_INS},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {"cite", GUMBO_TAG_CITE},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {"input", GUMBO_TAG_INPUT},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {"keygen", GUMBO_TAG_KEYGEN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {"annotation-xml", GUMBO_TAG_ANNOTATION_XML},
+      {"colgroup", GUMBO_TAG_COLGROUP},
+      {"q", GUMBO_TAG_Q},
+      {"big", GUMBO_TAG_BIG},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {"bgsound", GUMBO_TAG_BGSOUND},
+      {"nav", GUMBO_TAG_NAV},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {"video", GUMBO_TAG_VIDEO},
+      {"img", GUMBO_TAG_IMG},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {"audio", GUMBO_TAG_AUDIO},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {"caption", GUMBO_TAG_CAPTION},
+      {"strong", GUMBO_TAG_STRONG},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {"aside", GUMBO_TAG_ASIDE},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {"button", GUMBO_TAG_BUTTON},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {"bdo", GUMBO_TAG_BDO},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {"bdi", GUMBO_TAG_BDI},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {"blink", GUMBO_TAG_BLINK},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {(char*)0,GUMBO_TAG_UNKNOWN},
+      {"rtc", GUMBO_TAG_RTC}
+    };
+
+  if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
+    {
+      register unsigned int key = hash (str, len);
+
+      if (key <= MAX_HASH_VALUE)
+        if (len == lengthtable[key])
+          {
+            register const char *s = wordlist[key].key;
+
+            if (s && (((unsigned char)*str ^ (unsigned char)*s) & ~32) == 0 && !gumbo_ascii_strncasecmp(str, s, len))
+              return &wordlist[key];
+          }
+    }
+  return 0;
+}
diff --git a/gumbo-parser/src/tag_lookup.gperf b/gumbo-parser/src/tag_lookup.gperf
new file mode 100644
index 00000000..0f7deaaa
--- /dev/null
+++ b/gumbo-parser/src/tag_lookup.gperf
@@ -0,0 +1,169 @@
+%{
+#include "tag_lookup.h"
+#include "macros.h"
+#include "ascii.h"
+%}
+
+%ignore-case
+%struct-type
+%omit-struct-type
+%compare-lengths
+%readonly-tables
+%null-strings
+%includes
+%define lookup-function-name gumbo_tag_lookup
+%define slot-name key
+%define initializer-suffix ,GUMBO_TAG_UNKNOWN
+TagHashSlot;
+
+%%
+html, GUMBO_TAG_HTML
+head, GUMBO_TAG_HEAD
+title, GUMBO_TAG_TITLE
+base, GUMBO_TAG_BASE
+link, GUMBO_TAG_LINK
+meta, GUMBO_TAG_META
+style, GUMBO_TAG_STYLE
+script, GUMBO_TAG_SCRIPT
+noscript, GUMBO_TAG_NOSCRIPT
+template, GUMBO_TAG_TEMPLATE
+body, GUMBO_TAG_BODY
+article, GUMBO_TAG_ARTICLE
+section, GUMBO_TAG_SECTION
+nav, GUMBO_TAG_NAV
+aside, GUMBO_TAG_ASIDE
+h1, GUMBO_TAG_H1
+h2, GUMBO_TAG_H2
+h3, GUMBO_TAG_H3
+h4, GUMBO_TAG_H4
+h5, GUMBO_TAG_H5
+h6, GUMBO_TAG_H6
+hgroup, GUMBO_TAG_HGROUP
+header, GUMBO_TAG_HEADER
+footer, GUMBO_TAG_FOOTER
+address, GUMBO_TAG_ADDRESS
+p, GUMBO_TAG_P
+hr, GUMBO_TAG_HR
+pre, GUMBO_TAG_PRE
+blockquote, GUMBO_TAG_BLOCKQUOTE
+ol, GUMBO_TAG_OL
+ul, GUMBO_TAG_UL
+li, GUMBO_TAG_LI
+dl, GUMBO_TAG_DL
+dt, GUMBO_TAG_DT
+dd, GUMBO_TAG_DD
+figure, GUMBO_TAG_FIGURE
+figcaption, GUMBO_TAG_FIGCAPTION
+main, GUMBO_TAG_MAIN
+div, GUMBO_TAG_DIV
+a, GUMBO_TAG_A
+em, GUMBO_TAG_EM
+strong, GUMBO_TAG_STRONG
+small, GUMBO_TAG_SMALL
+s, GUMBO_TAG_S
+cite, GUMBO_TAG_CITE
+q, GUMBO_TAG_Q
+dfn, GUMBO_TAG_DFN
+abbr, GUMBO_TAG_ABBR
+data, GUMBO_TAG_DATA
+time, GUMBO_TAG_TIME
+code, GUMBO_TAG_CODE
+var, GUMBO_TAG_VAR
+samp, GUMBO_TAG_SAMP
+kbd, GUMBO_TAG_KBD
+sub, GUMBO_TAG_SUB
+sup, GUMBO_TAG_SUP
+i, GUMBO_TAG_I
+b, GUMBO_TAG_B
+u, GUMBO_TAG_U
+mark, GUMBO_TAG_MARK
+ruby, GUMBO_TAG_RUBY
+rt, GUMBO_TAG_RT
+rp, GUMBO_TAG_RP
+bdi, GUMBO_TAG_BDI
+bdo, GUMBO_TAG_BDO
+span, GUMBO_TAG_SPAN
+br, GUMBO_TAG_BR
+wbr, GUMBO_TAG_WBR
+ins, GUMBO_TAG_INS
+del, GUMBO_TAG_DEL
+image, GUMBO_TAG_IMAGE
+img, GUMBO_TAG_IMG
+iframe, GUMBO_TAG_IFRAME
+embed, GUMBO_TAG_EMBED
+object, GUMBO_TAG_OBJECT
+param, GUMBO_TAG_PARAM
+video, GUMBO_TAG_VIDEO
+audio, GUMBO_TAG_AUDIO
+source, GUMBO_TAG_SOURCE
+track, GUMBO_TAG_TRACK
+canvas, GUMBO_TAG_CANVAS
+map, GUMBO_TAG_MAP
+area, GUMBO_TAG_AREA
+math, GUMBO_TAG_MATH
+mi, GUMBO_TAG_MI
+mo, GUMBO_TAG_MO
+mn, GUMBO_TAG_MN
+ms, GUMBO_TAG_MS
+mtext, GUMBO_TAG_MTEXT
+mglyph, GUMBO_TAG_MGLYPH
+malignmark, GUMBO_TAG_MALIGNMARK
+annotation-xml, GUMBO_TAG_ANNOTATION_XML
+svg, GUMBO_TAG_SVG
+foreignobject, GUMBO_TAG_FOREIGNOBJECT
+desc, GUMBO_TAG_DESC
+table, GUMBO_TAG_TABLE
+caption, GUMBO_TAG_CAPTION
+colgroup, GUMBO_TAG_COLGROUP
+col, GUMBO_TAG_COL
+tbody, GUMBO_TAG_TBODY
+thead, GUMBO_TAG_THEAD
+tfoot, GUMBO_TAG_TFOOT
+tr, GUMBO_TAG_TR
+td, GUMBO_TAG_TD
+th, GUMBO_TAG_TH
+form, GUMBO_TAG_FORM
+fieldset, GUMBO_TAG_FIELDSET
+legend, GUMBO_TAG_LEGEND
+label, GUMBO_TAG_LABEL
+input, GUMBO_TAG_INPUT
+button, GUMBO_TAG_BUTTON
+select, GUMBO_TAG_SELECT
+datalist, GUMBO_TAG_DATALIST
+optgroup, GUMBO_TAG_OPTGROUP
+option, GUMBO_TAG_OPTION
+textarea, GUMBO_TAG_TEXTAREA
+keygen, GUMBO_TAG_KEYGEN
+output, GUMBO_TAG_OUTPUT
+progress, GUMBO_TAG_PROGRESS
+meter, GUMBO_TAG_METER
+details, GUMBO_TAG_DETAILS
+summary, GUMBO_TAG_SUMMARY
+menu, GUMBO_TAG_MENU
+menuitem, GUMBO_TAG_MENUITEM
+applet, GUMBO_TAG_APPLET
+acronym, GUMBO_TAG_ACRONYM
+bgsound, GUMBO_TAG_BGSOUND
+dir, GUMBO_TAG_DIR
+frame, GUMBO_TAG_FRAME
+frameset, GUMBO_TAG_FRAMESET
+noframes, GUMBO_TAG_NOFRAMES
+listing, GUMBO_TAG_LISTING
+xmp, GUMBO_TAG_XMP
+nextid, GUMBO_TAG_NEXTID
+noembed, GUMBO_TAG_NOEMBED
+plaintext, GUMBO_TAG_PLAINTEXT
+rb, GUMBO_TAG_RB
+strike, GUMBO_TAG_STRIKE
+basefont, GUMBO_TAG_BASEFONT
+big, GUMBO_TAG_BIG
+blink, GUMBO_TAG_BLINK
+center, GUMBO_TAG_CENTER
+font, GUMBO_TAG_FONT
+marquee, GUMBO_TAG_MARQUEE
+multicol, GUMBO_TAG_MULTICOL
+nobr, GUMBO_TAG_NOBR
+spacer, GUMBO_TAG_SPACER
+tt, GUMBO_TAG_TT
+rtc, GUMBO_TAG_RTC
+dialog, GUMBO_TAG_DIALOG
diff --git a/gumbo-parser/src/tag_lookup.h b/gumbo-parser/src/tag_lookup.h
new file mode 100644
index 00000000..fe8454aa
--- /dev/null
+++ b/gumbo-parser/src/tag_lookup.h
@@ -0,0 +1,13 @@
+#ifndef GUMBO_TAG_LOOKUP_H_
+#define GUMBO_TAG_LOOKUP_H_
+
+#include "gumbo.h"
+
+typedef struct {
+    const char *key;
+    const GumboTag tag;
+} TagHashSlot;
+
+const TagHashSlot *gumbo_tag_lookup(const char *str, size_t len);
+
+#endif // GUMBO_TAG_LOOKUP_H_
diff --git a/gumbo-parser/src/tag_sizes.h b/gumbo-parser/src/tag_sizes.h
deleted file mode 100644
index 7c92de07..00000000
--- a/gumbo-parser/src/tag_sizes.h
+++ /dev/null
@@ -1,4 +0,0 @@
-// Generated via `gentags.py src/tag.in`.
-// Do not edit; edit src/tag.in instead.
-// clang-format off
-4, 4, 5, 4, 4, 4, 5, 6, 8, 8, 4, 7, 7, 3, 5, 2, 2, 2, 2, 2, 2, 6, 6, 6, 7, 1, 2, 3, 10, 2, 2, 2, 2, 2, 2, 6, 10, 4, 3, 1, 2, 6, 5, 1, 4, 1, 3, 4, 4, 4, 4, 3, 4, 3, 3, 3, 1, 1, 1, 4, 4, 2, 2, 3, 3, 4, 2, 3, 3, 3, 5, 3, 6, 5, 6, 5, 5, 5, 6, 5, 6, 3, 4, 4, 2, 2, 2, 2, 5, 6, 10, 14, 3, 13, 4, 5, 7, 8, 3, 5, 5, 5, 2, 2, 2, 4, 8, 6, 5, 5, 6, 6, 8, 8, 6, 8, 6, 6, 8, 5, 7, 7, 4, 8, 6, 7, 7, 3, 5, 8, 8, 7, 7, 3, 6, 7, 9, 2, 6, 8, 3, 5, 6, 4, 7, 8, 4, 6, 2, 3, 
\ No newline at end of file
diff --git a/gumbo-parser/src/tag_strings.h b/gumbo-parser/src/tag_strings.h
deleted file mode 100644
index 6540e2e6..00000000
--- a/gumbo-parser/src/tag_strings.h
+++ /dev/null
@@ -1,153 +0,0 @@
-// Generated via `gentags.py src/tag.in`.
-// Do not edit; edit src/tag.in instead.
-// clang-format off
-"html",
-"head",
-"title",
-"base",
-"link",
-"meta",
-"style",
-"script",
-"noscript",
-"template",
-"body",
-"article",
-"section",
-"nav",
-"aside",
-"h1",
-"h2",
-"h3",
-"h4",
-"h5",
-"h6",
-"hgroup",
-"header",
-"footer",
-"address",
-"p",
-"hr",
-"pre",
-"blockquote",
-"ol",
-"ul",
-"li",
-"dl",
-"dt",
-"dd",
-"figure",
-"figcaption",
-"main",
-"div",
-"a",
-"em",
-"strong",
-"small",
-"s",
-"cite",
-"q",
-"dfn",
-"abbr",
-"data",
-"time",
-"code",
-"var",
-"samp",
-"kbd",
-"sub",
-"sup",
-"i",
-"b",
-"u",
-"mark",
-"ruby",
-"rt",
-"rp",
-"bdi",
-"bdo",
-"span",
-"br",
-"wbr",
-"ins",
-"del",
-"image",
-"img",
-"iframe",
-"embed",
-"object",
-"param",
-"video",
-"audio",
-"source",
-"track",
-"canvas",
-"map",
-"area",
-"math",
-"mi",
-"mo",
-"mn",
-"ms",
-"mtext",
-"mglyph",
-"malignmark",
-"annotation-xml",
-"svg",
-"foreignobject",
-"desc",
-"table",
-"caption",
-"colgroup",
-"col",
-"tbody",
-"thead",
-"tfoot",
-"tr",
-"td",
-"th",
-"form",
-"fieldset",
-"legend",
-"label",
-"input",
-"button",
-"select",
-"datalist",
-"optgroup",
-"option",
-"textarea",
-"keygen",
-"output",
-"progress",
-"meter",
-"details",
-"summary",
-"menu",
-"menuitem",
-"applet",
-"acronym",
-"bgsound",
-"dir",
-"frame",
-"frameset",
-"noframes",
-"isindex",
-"listing",
-"xmp",
-"nextid",
-"noembed",
-"plaintext",
-"rb",
-"strike",
-"basefont",
-"big",
-"blink",
-"center",
-"font",
-"marquee",
-"multicol",
-"nobr",
-"spacer",
-"tt",
-"rtc",
diff --git a/gumbo-parser/src/token_type.h b/gumbo-parser/src/token_type.h
index eeab5078..fdee13bb 100644
--- a/gumbo-parser/src/token_type.h
+++ b/gumbo-parser/src/token_type.h
@@ -1,26 +1,6 @@
-// Copyright 2011 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// Author: jdtang@google.com (Jonathan Tang)
-
 #ifndef GUMBO_TOKEN_TYPE_H_
 #define GUMBO_TOKEN_TYPE_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 // An enum representing the type of token.
 typedef enum {
   GUMBO_TOKEN_DOCTYPE,
@@ -34,8 +14,4 @@ typedef enum {
   GUMBO_TOKEN_EOF
 } GumboTokenType;
 
-#ifdef __cplusplus
-}  // extern C
-#endif
-
-#endif  // GUMBO_TOKEN_TYPE_H_
+#endif // GUMBO_TOKEN_TYPE_H_
diff --git a/gumbo-parser/src/tokenizer.c b/gumbo-parser/src/tokenizer.c
index 307589f9..26bade14 100644
--- a/gumbo-parser/src/tokenizer.c
+++ b/gumbo-parser/src/tokenizer.c
@@ -1,69 +1,68 @@
-// Copyright 2010 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// Author: jdtang@google.com (Jonathan Tang)
-//
-// Coding conventions specific to this file:
-//
-// 1. Functions that fill in a token should be named emit_*, and should be
-// followed immediately by a return from the tokenizer (true if no error
-// occurred, false if an error occurred).  Sometimes the emit functions
-// themselves return a boolean so that they can be combined with the return
-// statement; in this case, they should match this convention.
-// 2. Functions that shuffle data from temporaries to final API structures
-// should be named finish_*, and be called just before the tokenizer exits the
-// state that accumulates the temporary.
-// 3. All internal data structures should be kept in an initialized state from
-// tokenizer creation onwards, ready to accept input.  When a buffer's flushed
-// and reset, it should be deallocated and immediately reinitialized.
-// 4. Make sure there are appropriate break statements following each state.
-// 5. Assertions on the state of the temporary and tag buffers are usually a
-// good idea, and should go at the entry point of each state when added.
-// 6. Statement order within states goes:
-//    1. Add parse errors, if appropriate.
-//    2. Call finish_* functions to build up tag state.
-//    2. Switch to new state.  Set _reconsume flag if appropriate.
-//    3. Perform any other temporary buffer manipulation.
-//    4. Emit tokens
-//    5. Return/break.
-// This order ensures that we can verify that every emit is followed by a
-// return, ensures that the correct state is recorded with any parse errors, and
-// prevents parse error position from being messed up by possible mark/resets in
-// temporary buffer manipulation.
-
-#include "tokenizer.h"
+/*
+ Copyright 2010 Google Inc.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+    https://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+/*
+ Coding conventions specific to this file:
+
+ 1. Functions that fill in a token should be named emit_*, and should be
+    followed immediately by a return from the tokenizer (true if no error
+    occurred, false if an error occurred). Sometimes the emit functions
+    themselves return a boolean so that they can be combined with the return
+    statement; in this case, they should match this convention.
+ 2. Functions that shuffle data from temporaries to final API structures
+    should be named finish_*, and be called just before the tokenizer exits the
+    state that accumulates the temporary.
+ 3. All internal data structures should be kept in an initialized state from
+    tokenizer creation onwards, ready to accept input. When a buffer's flushed
+    and reset, it should be deallocated and immediately reinitialized.
+ 4. Make sure there are appropriate break statements following each state.
+ 5. Assertions on the state of the temporary and tag buffers are usually a
+    good idea, and should go at the entry point of each state when added.
+ 6. Statement order within states goes:
+    1. Add parse errors, if appropriate.
+    2. Call finish_* functions to build up tag state.
+    2. Switch to new state. Set _reconsume flag if appropriate.
+    3. Perform any other temporary buffer manipulation.
+    4. Emit tokens
+    5. Return/break.
+    This order ensures that we can verify that every emit is followed by
+    a return, ensures that the correct state is recorded with any parse
+    errors, and prevents parse error position from being messed up by
+    possible mark/resets in temporary buffer manipulation.
+*/
 
 #include <assert.h>
-#include <stdbool.h>
 #include <string.h>
-
+#include "tokenizer.h"
+#include "ascii.h"
 #include "attribute.h"
 #include "char_ref.h"
 #include "error.h"
 #include "gumbo.h"
 #include "parser.h"
 #include "string_buffer.h"
-#include "string_piece.h"
 #include "token_type.h"
 #include "tokenizer_states.h"
 #include "utf8.h"
 #include "util.h"
 #include "vector.h"
 
-// Compared against _script_data_buffer to determine if we're in double-escaped
-// script mode.
-const GumboStringPiece kScriptTag = {"script", 6};
+// Compared against _script_data_buffer to determine if we're in
+// double-escaped script mode.
+static const GumboStringPiece kScriptTag = {.data = "script", .length = 6};
 
 // An enum for the return value of each individual state.
 typedef enum {
@@ -86,31 +85,35 @@ typedef struct GumboInternalTagState {
   // the buffer can be re-used for building up attributes.
   GumboTag _tag;
 
+  // The current tag name. It's set at the same time that _tag is set if _tag
+  // is set to GUMBO_TAG_UNKNOWN.
+  char *_name;
+
   // The starting location of the text in the buffer.
   GumboSourcePosition _start_pos;
 
-  // The current list of attributes.  This is copied (and ownership of its data
-  // transferred) to the GumboStartTag token upon completion of the tag.  New
+  // The current list of attributes. This is copied (and ownership of its data
+  // transferred) to the GumboStartTag token upon completion of the tag. New
   // attributes are added as soon as their attribute name state is complete, and
   // values are filled in by operating on _attributes.data[attributes.length-1].
   GumboVector /* GumboAttribute */ _attributes;
 
-  // If true, the next attribute value to be finished should be dropped.  This
+  // If true, the next attribute value to be finished should be dropped. This
   // happens if a duplicate attribute name is encountered - we want to consume
   // the attribute value, but shouldn't overwrite the existing value.
   bool _drop_next_attr_value;
 
   // The state that caused the tokenizer to switch into a character reference in
-  // attribute value state.  This is used to set the additional allowed
-  // character, and is switched back to on completion.  Initialized as the
+  // attribute value state. This is used to set the additional allowed
+  // character, and is switched back to on completion. Initialized as the
   // tokenizer enters the character reference state.
   GumboTokenizerEnum _attr_value_state;
 
-  // The last start tag to have been emitted by the tokenizer.  This is
+  // The last start tag to have been emitted by the tokenizer. This is
   // necessary to check for appropriate end tags.
   GumboTag _last_start_tag;
 
-  // If true, then this is a start tag.  If false, it's an end tag.  This is
+  // If true, then this is a start tag. If false, it's an end tag. This is
   // necessary to generate the appropriate token type at tag-closing time.
   bool _is_start_tag;
 
@@ -121,43 +124,43 @@ typedef struct GumboInternalTagState {
 // This is the main tokenizer state struct, containing all state used by in
 // tokenizing the input stream.
 typedef struct GumboInternalTokenizerState {
-  // The current lexer state.  Starts in GUMBO_LEX_DATA.
+  // The current lexer state. Starts in GUMBO_LEX_DATA.
   GumboTokenizerEnum _state;
 
   // A flag indicating whether the current input character needs to reconsumed
   // in another state, or whether the next input character should be read for
-  // the next iteration of the state loop.  This is set when the spec reads
+  // the next iteration of the state loop. This is set when the spec reads
   // "Reconsume the current input character in..."
   bool _reconsume_current_input;
 
-  // A flag indicating whether the current node is a foreign element.  This is
+  // A flag indicating whether the current node is a foreign element. This is
   // set by gumbo_tokenizer_set_is_current_node_foreign and checked in the
   // markup declaration state.
   bool _is_current_node_foreign;
 
-  // A flag indicating whether the tokenizer is in a CDATA section.  If so, then
+  // A flag indicating whether the tokenizer is in a CDATA section. If so, then
   // text tokens emitted will be GUMBO_TOKEN_CDATA.
   bool _is_in_cdata;
 
   // Certain states (notably character references) may emit two character tokens
-  // at once, but the contract for lex() fills in only one token at a time.  The
+  // at once, but the contract for lex() fills in only one token at a time. The
   // extra character is buffered here, and then this is checked on entry to
-  // lex().  If a character is stored here, it's immediately emitted and control
-  // returns from the lexer.  kGumboNoChar is used to represent 'no character
+  // lex(). If a character is stored here, it's immediately emitted and control
+  // returns from the lexer. kGumboNoChar is used to represent 'no character
   // stored.'
   //
   // Note that characters emitted through this mechanism will have their source
   // position marked as the character under the mark, i.e. multiple characters
-  // may be emitted with the same position.  This is desirable for character
-  // references, but unsuitable for many other cases.  Use the _temporary_buffer
+  // may be emitted with the same position. This is desirable for character
+  // references, but unsuitable for many other cases. Use the _temporary_buffer
   // mechanism if the buffered characters must have their original positions in
   // the document.
   int _buffered_emit_char;
 
   // A temporary buffer to accumulate characters, as described by the "temporary
-  // buffer" phrase in the tokenizer spec.  We use this in a somewhat unorthodox
+  // buffer" phrase in the tokenizer spec. We use this in a somewhat unorthodox
   // way: we record the specific character to go into the buffer, which may
-  // sometimes be a lowercased version of the actual input character.  However,
+  // sometimes be a lowercased version of the actual input character. However,
   // we *also* use utf8iterator_mark() to record the position at tag start.
   // When we start flushing the temporary buffer, we set _temporary_buffer_emit
   // to the start of it, and then increment it for each call to the tokenizer.
@@ -167,13 +170,13 @@ typedef struct GumboInternalTokenizerState {
   GumboStringBuffer _temporary_buffer;
 
   // The current cursor position we're emitting from within
-  // _temporary_buffer.data.  NULL whenever we're not flushing the buffer.
+  // _temporary_buffer.data. NULL whenever we're not flushing the buffer.
   const char* _temporary_buffer_emit;
 
   // The temporary buffer is also used by the spec to check whether we should
   // enter the script data double escaped state, but we can't use the same
   // buffer for both because we have to flush out "<s" as emits while still
-  // maintaining the context that will eventually become "script".  This is a
+  // maintaining the context that will eventually become "script". This is a
   // separate buffer that's used in place of the temporary buffer for states
   // that may enter the script data double escape start state.
   GumboStringBuffer _script_data_buffer;
@@ -189,7 +192,7 @@ typedef struct GumboInternalTokenizerState {
   // Current tag state.
   GumboTagState _tag_state;
 
-  // Doctype state.  We use the temporary buffer to accumulate characters (it's
+  // Doctype state. We use the temporary buffer to accumulate characters (it's
   // not used for anything else in the doctype states), and then freshly
   // allocate the strings in the doctype token, then copy it over on emit.
   GumboTokenDocType _doc_type_state;
@@ -199,8 +202,10 @@ typedef struct GumboInternalTokenizerState {
 } GumboTokenizerState;
 
 // Adds an ERR_UNEXPECTED_CODE_POINT parse error to the parser's error struct.
-static void tokenizer_add_parse_error(
-    GumboParser* parser, GumboErrorType type) {
+static void tokenizer_add_parse_error (
+  GumboParser* parser,
+  GumboErrorType type
+) {
   GumboError* error = gumbo_add_error(parser);
   if (!error) {
     return;
@@ -309,14 +314,14 @@ static void tokenizer_add_parse_error(
 }
 
 static bool is_alpha(int c) {
-  // We don't use ISO C isupper/islower functions here because they
-  // depend upon the program's locale, while the behavior of the HTML5 spec is
-  // independent of which locale the program is run in.
-  return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
+  // We don't use the ISO C isalpha() function here because it depends
+  // on the current locale, whereas the behavior in the HTML5 spec is
+  // locale-independent.
+  return ((unsigned) c | 32) - 'a' < 26;
 }
 
 static int ensure_lowercase(int c) {
-  return c >= 'A' && c <= 'Z' ? c + 0x20 : c;
+  return gumbo_ascii_tolower(c);
 }
 
 static GumboTokenType get_char_token_type(bool is_in_cdata, int c) {
@@ -346,7 +351,7 @@ static GumboTokenType get_char_token_type(bool is_in_cdata, int c) {
 // text that will eventually be emitted, it needs to be called a couple of
 // states before the spec says "Set the temporary buffer to the empty string".
 // In general, this should be called whenever there's a transition to a
-// "less-than sign state".  The initial < and possibly / then need to be
+// "less-than sign state". The initial < and possibly / then need to be
 // appended to the temporary buffer, their presence needs to be accounted for in
 // states that compare the temporary buffer against a literal value, and
 // spec stanzas that say "emit a < and / character token along with a character
@@ -356,30 +361,40 @@ static void clear_temporary_buffer(GumboParser* parser) {
   GumboTokenizerState* tokenizer = parser->_tokenizer_state;
   assert(!tokenizer->_temporary_buffer_emit);
   utf8iterator_mark(&tokenizer->_input);
-  gumbo_string_buffer_clear(parser, &tokenizer->_temporary_buffer);
+  gumbo_string_buffer_clear(&tokenizer->_temporary_buffer);
   // The temporary buffer and script data buffer are the same object in the
   // spec, so the script data buffer should be cleared as well.
-  gumbo_string_buffer_clear(parser, &tokenizer->_script_data_buffer);
+  gumbo_string_buffer_clear(&tokenizer->_script_data_buffer);
 }
 
 // Appends a codepoint to the temporary buffer.
-static void append_char_to_temporary_buffer(
-    GumboParser* parser, int codepoint) {
-  gumbo_string_buffer_append_codepoint(
-      parser, codepoint, &parser->_tokenizer_state->_temporary_buffer);
+static void append_char_to_temporary_buffer (
+  GumboParser* parser,
+  int codepoint
+) {
+  gumbo_string_buffer_append_codepoint (
+   codepoint,
+   &parser->_tokenizer_state->_temporary_buffer
+  );
 }
 
-// Checks to see if the temporary buffer equals a certain string.
-// Make sure this remains side-effect free; it's used in assertions.
 #ifndef NDEBUG
-static bool temporary_buffer_equals(GumboParser* parser, const char* text) {
-  GumboStringBuffer* buffer = &parser->_tokenizer_state->_temporary_buffer;
-  // TODO(jdtang): See if the extra strlen is a performance problem, and replace
-  // it with an explicit sizeof(literal) if necessary.  I don't think it will
-  // be, as this is only used in a couple of rare states.
-  int text_len = strlen(text);
-  return text_len == buffer->length &&
-         memcmp(buffer->data, text, text_len) == 0;
+static bool temporary_buffer_equals__ (
+  const GumboParser* parser,
+  const char* text,
+  size_t text_len
+) {
+  const GumboStringBuffer* buf = &parser->_tokenizer_state->_temporary_buffer;
+  return
+    text_len == buf->length
+    && memcmp(buf->data, text, text_len) == 0;
+}
+
+#define temporary_buffer_equals(parser, text) \
+  temporary_buffer_equals__(parser, "" text, sizeof(text) - 1)
+
+static bool temporary_buffer_is_empty(const GumboParser* parser) {
+  return parser->_tokenizer_state->_temporary_buffer.length == 0;
 }
 #endif
 
@@ -387,9 +402,9 @@ static void doc_type_state_init(GumboParser* parser) {
   GumboTokenDocType* doc_type_state =
       &parser->_tokenizer_state->_doc_type_state;
   // We initialize these to NULL here so that we don't end up leaking memory if
-  // we never see a doctype token.  When we do see a doctype token, we reset
+  // we never see a doctype token. When we do see a doctype token, we reset
   // them to a freshly-allocated empty string so that we can present a uniform
-  // interface to client code and not make them check for null.  Ownership is
+  // interface to client code and not make them check for null. Ownership is
   // transferred to the doctype token when it's emitted.
   doc_type_state->name = NULL;
   doc_type_state->public_identifier = NULL;
@@ -408,7 +423,7 @@ static void reset_token_start_point(GumboTokenizerState* tokenizer) {
 }
 
 // Sets the tag buffer original text and start point to the current iterator
-// position.  This is necessary because attribute names & values may have
+// position. This is necessary because attribute names & values may have
 // whitespace preceeding them, and so we can't assume that the actual token
 // starting point was the end of the last tag buffer usage.
 static void reset_tag_buffer_start_point(GumboParser* parser) {
@@ -423,15 +438,14 @@ static void reset_tag_buffer_start_point(GumboParser* parser) {
 // and clears the temporary buffer.
 static void finish_temporary_buffer(GumboParser* parser, const char** output) {
   GumboTokenizerState* tokenizer = parser->_tokenizer_state;
-  *output =
-      gumbo_string_buffer_to_string(parser, &tokenizer->_temporary_buffer);
+  *output = gumbo_string_buffer_to_string(&tokenizer->_temporary_buffer);
   clear_temporary_buffer(parser);
 }
 
 // Advances the iterator past the end of the token, and then fills in the
-// relevant position fields.  It's assumed that after every emit, the tokenizer
+// relevant position fields. It's assumed that after every emit, the tokenizer
 // will immediately return (letting the tree-construction stage read the filled
-// in Token).  Thus, it's safe to advance the input stream here, since it will
+// in Token). Thus, it's safe to advance the input stream here, since it will
 // bypass the advance at the bottom of the state machine loop.
 //
 // Since this advances the iterator and resets the current input, make sure to
@@ -450,7 +464,7 @@ static void finish_token(GumboParser* parser, GumboToken* token) {
   if (token->original_text.length > 0 &&
       token->original_text.data[token->original_text.length - 1] == '\r') {
     // The UTF8 iterator will ignore carriage returns in the input stream, which
-    // means that the next token may start one past a \r character.  The pointer
+    // means that the next token may start one past a \r character. The pointer
     // arithmetic above results in that \r being appended to the original text
     // of the preceding token, so we have to adjust its length here to chop the
     // \r off.
@@ -463,7 +477,7 @@ static void finish_token(GumboParser* parser, GumboToken* token) {
 static void finish_doctype_public_id(GumboParser* parser) {
   GumboTokenDocType* doc_type_state =
       &parser->_tokenizer_state->_doc_type_state;
-  gumbo_parser_deallocate(parser, (void*) doc_type_state->public_identifier);
+  gumbo_free((void*) doc_type_state->public_identifier);
   finish_temporary_buffer(parser, &doc_type_state->public_identifier);
   doc_type_state->has_public_identifier = true;
 }
@@ -473,7 +487,7 @@ static void finish_doctype_public_id(GumboParser* parser) {
 static void finish_doctype_system_id(GumboParser* parser) {
   GumboTokenDocType* doc_type_state =
       &parser->_tokenizer_state->_doc_type_state;
-  gumbo_parser_deallocate(parser, (void*) doc_type_state->system_identifier);
+  gumbo_free((void*) doc_type_state->system_identifier);
   finish_temporary_buffer(parser, &doc_type_state->system_identifier);
   doc_type_state->has_system_identifier = true;
 }
@@ -495,7 +509,7 @@ static StateResult emit_replacement_char(
   return RETURN_ERROR;
 }
 
-// Writes an EOF character token.  Always returns RETURN_SUCCESS.
+// Writes an EOF character token. Always returns RETURN_SUCCESS.
 static StateResult emit_eof(GumboParser* parser, GumboToken* output) {
   emit_char(parser, -1, output);
   return RETURN_SUCCESS;
@@ -520,7 +534,9 @@ static void emit_doctype(GumboParser* parser, GumboToken* output) {
 // Debug-only function that explicitly sets the attribute vector data to NULL so
 // it can be asserted on tag creation, verifying that there are no memory leaks.
 static void mark_tag_state_as_empty(GumboTagState* tag_state) {
+  UNUSED_IF_NDEBUG(tag_state);
 #ifndef NDEBUG
+  tag_state->_name = NULL;
   tag_state->_attributes = kGumboEmptyVector;
 #endif
 }
@@ -532,6 +548,7 @@ static StateResult emit_current_tag(GumboParser* parser, GumboToken* output) {
   if (tag_state->_is_start_tag) {
     output->type = GUMBO_TOKEN_START_TAG;
     output->v.start_tag.tag = tag_state->_tag;
+    output->v.start_tag.name = tag_state->_name;
     output->v.start_tag.attributes = tag_state->_attributes;
     output->v.start_tag.is_self_closing = tag_state->_is_self_closing;
     tag_state->_last_start_tag = tag_state->_tag;
@@ -540,23 +557,27 @@ static StateResult emit_current_tag(GumboParser* parser, GumboToken* output) {
         "Emitted start tag %s.\n", gumbo_normalized_tagname(tag_state->_tag));
   } else {
     output->type = GUMBO_TOKEN_END_TAG;
-    output->v.end_tag = tag_state->_tag;
+    output->v.end_tag.tag = tag_state->_tag;
+    output->v.end_tag.is_self_closing = tag_state->_is_self_closing;
     // In end tags, ownership of the attributes vector is not transferred to the
     // token, but it's still initialized as normal, so it must be manually
-    // deallocated.  There may also be attributes to destroy, in certain broken
+    // deallocated. There may also be attributes to destroy, in certain broken
     // cases like </div</th> (the "th" is an attribute there).
     for (unsigned int i = 0; i < tag_state->_attributes.length; ++i) {
-      gumbo_destroy_attribute(parser, tag_state->_attributes.data[i]);
+      gumbo_destroy_attribute(tag_state->_attributes.data[i]);
     }
-    gumbo_parser_deallocate(parser, tag_state->_attributes.data);
+    gumbo_free(tag_state->_attributes.data);
     mark_tag_state_as_empty(tag_state);
     gumbo_debug(
         "Emitted end tag %s.\n", gumbo_normalized_tagname(tag_state->_tag));
   }
-  gumbo_string_buffer_destroy(parser, &tag_state->_buffer);
+  gumbo_string_buffer_destroy(&tag_state->_buffer);
   finish_token(parser, output);
-  gumbo_debug("Original text = %.*s.\n", output->original_text.length,
-      output->original_text.data);
+  gumbo_debug (
+    "Original text = %.*s.\n",
+    (int) output->original_text.length,
+    output->original_text.data
+  );
   assert(output->original_text.length >= 2);
   assert(output->original_text.data[0] == '<');
   assert(output->original_text.data[output->original_text.length - 1] == '>');
@@ -570,26 +591,36 @@ static StateResult emit_current_tag(GumboParser* parser, GumboToken* output) {
 static void abandon_current_tag(GumboParser* parser) {
   GumboTagState* tag_state = &parser->_tokenizer_state->_tag_state;
   for (unsigned int i = 0; i < tag_state->_attributes.length; ++i) {
-    gumbo_destroy_attribute(parser, tag_state->_attributes.data[i]);
+    gumbo_destroy_attribute(tag_state->_attributes.data[i]);
   }
-  gumbo_parser_deallocate(parser, tag_state->_attributes.data);
+  gumbo_free(tag_state->_attributes.data);
   mark_tag_state_as_empty(tag_state);
-  gumbo_string_buffer_destroy(parser, &tag_state->_buffer);
+  gumbo_string_buffer_destroy(&tag_state->_buffer);
   gumbo_debug("Abandoning current tag.\n");
 }
 
-// Wraps the consume_char_ref function to handle its output and make the
-// appropriate TokenizerState modifications.  Returns RETURN_ERROR if a parse
+// Wraps the gumbo_consume_char_ref function to handle its output and make the
+// appropriate TokenizerState modifications. Returns RETURN_ERROR if a parse
 // error occurred, RETURN_SUCCESS otherwise.
-static StateResult emit_char_ref(GumboParser* parser,
-    int additional_allowed_char, bool is_in_attribute, GumboToken* output) {
+static StateResult emit_char_ref (
+  GumboParser* parser,
+  int additional_allowed_char,
+  bool UNUSED_ARG(is_in_attribute),
+  GumboToken* output
+) {
   GumboTokenizerState* tokenizer = parser->_tokenizer_state;
   OneOrTwoCodepoints char_ref;
-  bool status = consume_char_ref(
-      parser, &tokenizer->_input, additional_allowed_char, false, &char_ref);
+  bool status = gumbo_consume_char_ref (
+    parser,
+    &tokenizer->_input,
+    additional_allowed_char,
+    false,
+    &char_ref
+  );
   if (char_ref.first != kGumboNoChar) {
-    // consume_char_ref ends with the iterator pointing at the next character,
-    // so we need to be sure not advance it again before reading the next token.
+    // gumbo_consume_char_ref ends with the iterator pointing at the next
+    // character, so we need to be sure not advance it again before
+    // reading the next token.
     tokenizer->_reconsume_current_input = true;
     emit_char(parser, char_ref.first, output);
     tokenizer->_buffered_emit_char = char_ref.second;
@@ -599,9 +630,9 @@ static StateResult emit_char_ref(GumboParser* parser,
   return status ? RETURN_SUCCESS : RETURN_ERROR;
 }
 
-// Emits a comment token.  Comments use the temporary buffer to accumulate their
+// Emits a comment token. Comments use the temporary buffer to accumulate their
 // data, and then it's copied over and released to the 'text' field of the
-// GumboToken union.  Always returns RETURN_SUCCESS.
+// GumboToken union. Always returns RETURN_SUCCESS.
 static StateResult emit_comment(GumboParser* parser, GumboToken* output) {
   output->type = GUMBO_TOKEN_COMMENT;
   finish_temporary_buffer(parser, &output->v.text);
@@ -626,11 +657,11 @@ static bool maybe_emit_from_temporary_buffer(
   }
 
   assert(*c == utf8iterator_current(&tokenizer->_input));
-  // emit_char also advances the input stream.  We need to do some juggling of
+  // emit_char also advances the input stream. We need to do some juggling of
   // the _reconsume_current_input flag to get the proper behavior when emitting
-  // previous tokens.  Basically, _reconsume_current_input should *never* be set
+  // previous tokens. Basically, _reconsume_current_input should *never* be set
   // when emitting anything from the temporary buffer, since those characters
-  // have already been advanced past.  However, it should be preserved so that
+  // have already been advanced past. However, it should be preserved so that
   // when the *next* character is encountered again, the tokenizer knows not to
   // advance past it.
   bool saved_reconsume_state = tokenizer->_reconsume_current_input;
@@ -644,7 +675,7 @@ static bool maybe_emit_from_temporary_buffer(
 // Sets up the tokenizer to begin flushing the temporary buffer.
 // This resets the input iterator stream to the start of the last tag, sets up
 // _temporary_buffer_emit, and then (if the temporary buffer is non-empty) emits
-// the first character in it.  It returns true if a character was emitted, false
+// the first character in it. It returns true if a character was emitted, false
 // otherwise.
 static bool emit_temporary_buffer(GumboParser* parser, GumboToken* output) {
   GumboTokenizerState* tokenizer = parser->_tokenizer_state;
@@ -654,32 +685,35 @@ static bool emit_temporary_buffer(GumboParser* parser, GumboToken* output) {
   return maybe_emit_from_temporary_buffer(parser, output);
 }
 
-// Appends a codepoint to the current tag buffer.  If
+// Appends a codepoint to the current tag buffer. If
 // reinitilize_position_on_first is set, this also initializes the tag buffer
 // start point; the only time you would *not* want to pass true for this
 // parameter is if you want the original_text to include character (like an
 // opening quote) that doesn't appear in the value.
-static void append_char_to_tag_buffer(
-    GumboParser* parser, int codepoint, bool reinitilize_position_on_first) {
+static void append_char_to_tag_buffer (
+  GumboParser* parser,
+  int codepoint,
+  bool reinitilize_position_on_first
+) {
   GumboStringBuffer* buffer = &parser->_tokenizer_state->_tag_state._buffer;
   if (buffer->length == 0 && reinitilize_position_on_first) {
     reset_tag_buffer_start_point(parser);
   }
-  gumbo_string_buffer_append_codepoint(parser, codepoint, buffer);
+  gumbo_string_buffer_append_codepoint(codepoint, buffer);
 }
 
-// (Re-)initialize the tag buffer.  This also resets the original_text pointer
+// (Re-)initialize the tag buffer. This also resets the original_text pointer
 // and _start_pos field to point to the current position.
 static void initialize_tag_buffer(GumboParser* parser) {
   GumboTokenizerState* tokenizer = parser->_tokenizer_state;
   GumboTagState* tag_state = &tokenizer->_tag_state;
 
-  gumbo_string_buffer_init(parser, &tag_state->_buffer);
+  gumbo_string_buffer_init(&tag_state->_buffer);
   reset_tag_buffer_start_point(parser);
 }
 
 // Initializes the tag_state to start a new tag, keeping track of the opening
-// positions and original text.  Takes a boolean indicating whether this is a
+// positions and original text. Takes a boolean indicating whether this is a
 // start or end tag.
 static void start_new_tag(GumboParser* parser, bool is_start_tag) {
   GumboTokenizerState* tokenizer = parser->_tokenizer_state;
@@ -690,14 +724,15 @@ static void start_new_tag(GumboParser* parser, bool is_start_tag) {
   assert(is_alpha(c));
 
   initialize_tag_buffer(parser);
-  gumbo_string_buffer_append_codepoint(parser, c, &tag_state->_buffer);
+  gumbo_string_buffer_append_codepoint(c, &tag_state->_buffer);
 
+  assert(tag_state->_name == NULL);
   assert(tag_state->_attributes.data == NULL);
   // Initial size chosen by statistical analysis of a corpus of 60k webpages.
-  // 99.5% of elements have 0 attributes, 93% of the remainder have 1.  These
+  // 99.5% of elements have 0 attributes, 93% of the remainder have 1. These
   // numbers are a bit higher for more modern websites (eg. ~45% = 0, ~40% = 1
   // for the HTML5 Spec), but still have basically 99% of nodes with <= 2 attrs.
-  gumbo_vector_init(parser, 1, &tag_state->_attributes);
+  gumbo_vector_init(1, &tag_state->_attributes);
   tag_state->_drop_next_attr_value = false;
   tag_state->_is_start_tag = is_start_tag;
   tag_state->_is_self_closing = false;
@@ -708,7 +743,7 @@ static void start_new_tag(GumboParser* parser, bool is_start_tag) {
 static void copy_over_tag_buffer(GumboParser* parser, const char** output) {
   GumboTokenizerState* tokenizer = parser->_tokenizer_state;
   GumboTagState* tag_state = &tokenizer->_tag_state;
-  *output = gumbo_string_buffer_to_string(parser, &tag_state->_buffer);
+  *output = gumbo_string_buffer_to_string(&tag_state->_buffer);
 }
 
 // Fills in:
@@ -717,9 +752,12 @@ static void copy_over_tag_buffer(GumboParser* parser, const char** output) {
 // * The start_pos GumboSourcePosition with the start position of the tag
 // buffer.
 // * The end_pos GumboSourcePosition with the current source position.
-static void copy_over_original_tag_text(GumboParser* parser,
-    GumboStringPiece* original_text, GumboSourcePosition* start_pos,
-    GumboSourcePosition* end_pos) {
+static void copy_over_original_tag_text (
+  GumboParser* parser,
+  GumboStringPiece* original_text,
+  GumboSourcePosition* start_pos,
+  GumboSourcePosition* end_pos
+) {
   GumboTokenizerState* tokenizer = parser->_tokenizer_state;
   GumboTagState* tag_state = &tokenizer->_tag_state;
 
@@ -729,7 +767,7 @@ static void copy_over_original_tag_text(GumboParser* parser,
   if (original_text->data[original_text->length - 1] == '\r') {
     // Since \r is skipped by the UTF-8 iterator, it can sometimes end up
     // appended to the end of original text even when it's really the first part
-    // of the next character.  If we detect this situation, shrink the length of
+    // of the next character. If we detect this situation, shrink the length of
     // the original text by 1 to remove the carriage return.
     --original_text->length;
   }
@@ -739,8 +777,7 @@ static void copy_over_original_tag_text(GumboParser* parser,
 
 // Releases and then re-initializes the tag buffer.
 static void reinitialize_tag_buffer(GumboParser* parser) {
-  gumbo_parser_deallocate(
-      parser, parser->_tokenizer_state->_tag_state._buffer.data);
+  gumbo_free(parser->_tokenizer_state->_tag_state._buffer.data);
   initialize_tag_buffer(parser);
 }
 
@@ -750,14 +787,24 @@ static void finish_tag_name(GumboParser* parser) {
   GumboTokenizerState* tokenizer = parser->_tokenizer_state;
   GumboTagState* tag_state = &tokenizer->_tag_state;
 
-  tag_state->_tag =
-      gumbo_tagn_enum(tag_state->_buffer.data, tag_state->_buffer.length);
+  const char *data = tag_state->_buffer.data;
+  size_t length = tag_state->_buffer.length;
+  tag_state->_tag = gumbo_tagn_enum(data, length);
+  if (tag_state->_tag == GUMBO_TAG_UNKNOWN) {
+    char *name = gumbo_alloc(length + 1);
+    memcpy(name, data, length);
+    name[length] = 0;
+    tag_state->_name = name;
+  }
   reinitialize_tag_buffer(parser);
 }
 
 // Adds an ERR_DUPLICATE_ATTR parse error to the parser's error struct.
-static void add_duplicate_attr_error(GumboParser* parser, const char* attr_name,
-    int original_index, int new_index) {
+static void add_duplicate_attr_error (
+  GumboParser* parser,
+  int original_index,
+  int new_index
+) {
   GumboError* error = gumbo_add_error(parser);
   if (!error) {
     return;
@@ -773,11 +820,11 @@ static void add_duplicate_attr_error(GumboParser* parser, const char* attr_name,
 }
 
 // Creates a new attribute in the current tag, copying the current tag buffer to
-// the attribute's name.  The attribute's value starts out as the empty string
+// the attribute's name. The attribute's value starts out as the empty string
 // (following the "Boolean attributes" section of the spec) and is only
-// overwritten on finish_attribute_value().  If the attribute has already been
+// overwritten on finish_attribute_value(). If the attribute has already been
 // specified, the new attribute is dropped, a parse error is added, and the
-// function returns false.  Otherwise, this returns true.
+// function returns false. Otherwise, this returns true.
 static bool finish_attribute_name(GumboParser* parser) {
   GumboTokenizerState* tokenizer = parser->_tokenizer_state;
   GumboTagState* tag_state = &tokenizer->_tag_state;
@@ -789,30 +836,43 @@ static bool finish_attribute_name(GumboParser* parser) {
   GumboVector* /* GumboAttribute* */ attributes = &tag_state->_attributes;
   for (unsigned int i = 0; i < attributes->length; ++i) {
     GumboAttribute* attr = attributes->data[i];
-    if (strlen(attr->name) == tag_state->_buffer.length &&
-        memcmp(attr->name, tag_state->_buffer.data,
-            tag_state->_buffer.length) == 0) {
+    if (
+      strlen(attr->name) == tag_state->_buffer.length
+      && 0 == memcmp (
+        attr->name,
+        tag_state->_buffer.data,
+        tag_state->_buffer.length
+      )
+    ) {
       // Identical attribute; bail.
-      add_duplicate_attr_error(parser, attr->name, i, attributes->length);
+      add_duplicate_attr_error(parser, i, attributes->length);
       tag_state->_drop_next_attr_value = true;
       return false;
     }
   }
 
-  GumboAttribute* attr = gumbo_parser_allocate(parser, sizeof(GumboAttribute));
+  GumboAttribute* attr = gumbo_alloc(sizeof(GumboAttribute));
   attr->attr_namespace = GUMBO_ATTR_NAMESPACE_NONE;
   copy_over_tag_buffer(parser, &attr->name);
-  copy_over_original_tag_text(
-      parser, &attr->original_name, &attr->name_start, &attr->name_end);
-  attr->value = gumbo_copy_stringz(parser, "");
-  copy_over_original_tag_text(
-      parser, &attr->original_value, &attr->name_start, &attr->name_end);
-  gumbo_vector_add(parser, attr, attributes);
+  copy_over_original_tag_text (
+    parser,
+    &attr->original_name,
+    &attr->name_start,
+    &attr->name_end
+  );
+  attr->value = gumbo_strdup("");
+  copy_over_original_tag_text (
+    parser,
+    &attr->original_value,
+    &attr->name_start,
+    &attr->name_end
+  );
+  gumbo_vector_add(attr, attributes);
   reinitialize_tag_buffer(parser);
   return true;
 }
 
-// Finishes an attribute value.  This sets the value of the most recently added
+// Finishes an attribute value. This sets the value of the most recently added
 // attribute to the current contents of the tag buffer.
 static void finish_attribute_value(GumboParser* parser) {
   GumboTagState* tag_state = &parser->_tokenizer_state->_tag_state;
@@ -826,7 +886,7 @@ static void finish_attribute_value(GumboParser* parser) {
 
   GumboAttribute* attr =
       tag_state->_attributes.data[tag_state->_attributes.length - 1];
-  gumbo_parser_deallocate(parser, (void*) attr->value);
+  gumbo_free((void*) attr->value);
   copy_over_tag_buffer(parser, &attr->value);
   copy_over_original_tag_text(
       parser, &attr->original_value, &attr->value_start, &attr->value_end);
@@ -842,24 +902,27 @@ static bool is_appropriate_end_tag(GumboParser* parser) {
                                            tag_state->_buffer.length);
 }
 
-void gumbo_tokenizer_state_init(
-    GumboParser* parser, const char* text, size_t text_length) {
-  GumboTokenizerState* tokenizer =
-      gumbo_parser_allocate(parser, sizeof(GumboTokenizerState));
+void gumbo_tokenizer_state_init (
+  GumboParser* parser,
+  const char* text,
+  size_t text_length
+) {
+  GumboTokenizerState* tokenizer = gumbo_alloc(sizeof(GumboTokenizerState));
   parser->_tokenizer_state = tokenizer;
   gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
   tokenizer->_reconsume_current_input = false;
   tokenizer->_is_current_node_foreign = false;
   tokenizer->_is_in_cdata = false;
   tokenizer->_tag_state._last_start_tag = GUMBO_TAG_LAST;
+  tokenizer->_tag_state._name = NULL;
 
   tokenizer->_buffered_emit_char = kGumboNoChar;
-  gumbo_string_buffer_init(parser, &tokenizer->_temporary_buffer);
+  gumbo_string_buffer_init(&tokenizer->_temporary_buffer);
   tokenizer->_temporary_buffer_emit = NULL;
 
   mark_tag_state_as_empty(&tokenizer->_tag_state);
 
-  gumbo_string_buffer_init(parser, &tokenizer->_script_data_buffer);
+  gumbo_string_buffer_init(&tokenizer->_script_data_buffer);
   tokenizer->_token_start = text;
   utf8iterator_init(parser, text, text_length, &tokenizer->_input);
   utf8iterator_get_position(&tokenizer->_input, &tokenizer->_token_start_pos);
@@ -871,27 +934,37 @@ void gumbo_tokenizer_state_destroy(GumboParser* parser) {
   assert(tokenizer->_doc_type_state.name == NULL);
   assert(tokenizer->_doc_type_state.public_identifier == NULL);
   assert(tokenizer->_doc_type_state.system_identifier == NULL);
-  gumbo_string_buffer_destroy(parser, &tokenizer->_temporary_buffer);
-  gumbo_string_buffer_destroy(parser, &tokenizer->_script_data_buffer);
-  gumbo_parser_deallocate(parser, tokenizer);
+  gumbo_string_buffer_destroy(&tokenizer->_temporary_buffer);
+  gumbo_string_buffer_destroy(&tokenizer->_script_data_buffer);
+  assert(tokenizer->_tag_state._name == NULL);
+  assert(tokenizer->_tag_state._attributes.data == NULL);
+  gumbo_free(tokenizer);
 }
 
 void gumbo_tokenizer_set_state(GumboParser* parser, GumboTokenizerEnum state) {
   parser->_tokenizer_state->_state = state;
 }
 
-void gumbo_tokenizer_set_is_current_node_foreign(
-    GumboParser* parser, bool is_foreign) {
+void gumbo_tokenizer_set_is_current_node_foreign (
+  GumboParser* parser,
+  bool is_foreign
+) {
   if (is_foreign != parser->_tokenizer_state->_is_current_node_foreign) {
-    gumbo_debug("Toggling is_current_node_foreign to %s.\n",
-        is_foreign ? "true" : "false");
+    gumbo_debug (
+      "Toggling is_current_node_foreign to %s.\n",
+      is_foreign ? "true" : "false"
+    );
   }
   parser->_tokenizer_state->_is_current_node_foreign = is_foreign;
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#data-state
-static StateResult handle_data_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#data-state
+static StateResult handle_data_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '&':
       gumbo_tokenizer_set_state(parser, GUMBO_LEX_CHAR_REF_IN_DATA);
@@ -914,16 +987,24 @@ static StateResult handle_data_state(GumboParser* parser,
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#character-reference-in-data-state
-static StateResult handle_char_ref_in_data_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#character-reference-in-data-state
+static StateResult handle_char_ref_in_data_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int UNUSED_ARG(c),
+  GumboToken* output
+) {
   gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
   return emit_char_ref(parser, ' ', false, output);
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#rcdata-state
-static StateResult handle_rcdata_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#rcdata-state
+static StateResult handle_rcdata_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '&':
       gumbo_tokenizer_set_state(parser, GUMBO_LEX_CHAR_REF_IN_RCDATA);
@@ -943,16 +1024,24 @@ static StateResult handle_rcdata_state(GumboParser* parser,
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#character-reference-in-rcdata-state
-static StateResult handle_char_ref_in_rcdata_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#character-reference-in-rcdata-state
+static StateResult handle_char_ref_in_rcdata_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int UNUSED_ARG(c),
+  GumboToken* output
+) {
   gumbo_tokenizer_set_state(parser, GUMBO_LEX_RCDATA);
   return emit_char_ref(parser, ' ', false, output);
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#rawtext-state
-static StateResult handle_rawtext_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#rawtext-state
+static StateResult handle_rawtext_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '<':
       gumbo_tokenizer_set_state(parser, GUMBO_LEX_RAWTEXT_LT);
@@ -968,9 +1057,13 @@ static StateResult handle_rawtext_state(GumboParser* parser,
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-state
-static StateResult handle_script_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#script-data-state
+static StateResult handle_script_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '<':
       gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_LT);
@@ -986,9 +1079,13 @@ static StateResult handle_script_state(GumboParser* parser,
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#plaintext-state
-static StateResult handle_plaintext_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#plaintext-state
+static StateResult handle_plaintext_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '\0':
       return emit_replacement_char(parser, output);
@@ -999,9 +1096,13 @@ static StateResult handle_plaintext_state(GumboParser* parser,
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#tag-open-state
-static StateResult handle_tag_open_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
+static StateResult handle_tag_open_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   assert(temporary_buffer_equals(parser, "<"));
   switch (c) {
     case '!':
@@ -1032,9 +1133,13 @@ static StateResult handle_tag_open_state(GumboParser* parser,
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#end-tag-open-state
-static StateResult handle_end_tag_open_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#end-tag-open-state
+static StateResult handle_end_tag_open_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   assert(temporary_buffer_equals(parser, "</"));
   switch (c) {
     case '>':
@@ -1059,9 +1164,13 @@ static StateResult handle_end_tag_open_state(GumboParser* parser,
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#tag-name-state
-static StateResult handle_tag_name_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#tag-name-state
+static StateResult handle_tag_name_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '\t':
     case '\n':
@@ -1093,9 +1202,13 @@ static StateResult handle_tag_name_state(GumboParser* parser,
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#rcdata-less-than-sign-state
-static StateResult handle_rcdata_lt_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#rcdata-less-than-sign-state
+static StateResult handle_rcdata_lt_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   assert(temporary_buffer_equals(parser, "<"));
   if (c == '/') {
     gumbo_tokenizer_set_state(parser, GUMBO_LEX_RCDATA_END_TAG_OPEN);
@@ -1108,9 +1221,13 @@ static StateResult handle_rcdata_lt_state(GumboParser* parser,
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#rcdata-end-tag-open-state
-static StateResult handle_rcdata_end_tag_open_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#rcdata-end-tag-open-state
+static StateResult handle_rcdata_end_tag_open_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   assert(temporary_buffer_equals(parser, "</"));
   if (is_alpha(c)) {
     gumbo_tokenizer_set_state(parser, GUMBO_LEX_RCDATA_END_TAG_NAME);
@@ -1124,9 +1241,14 @@ static StateResult handle_rcdata_end_tag_open_state(GumboParser* parser,
   return true;
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#rcdata-end-tag-name-state
-static StateResult handle_rcdata_end_tag_name_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#rcdata-end-tag-name-state
+static StateResult handle_rcdata_end_tag_name_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
+  UNUSED_IF_NDEBUG(tokenizer);
   assert(tokenizer->_temporary_buffer.length >= 2);
   if (is_alpha(c)) {
     append_char_to_tag_buffer(parser, ensure_lowercase(c), true);
@@ -1156,9 +1278,13 @@ static StateResult handle_rcdata_end_tag_name_state(GumboParser* parser,
   return emit_temporary_buffer(parser, output);
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#rawtext-less-than-sign-state
-static StateResult handle_rawtext_lt_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#rawtext-less-than-sign-state
+static StateResult handle_rawtext_lt_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   assert(temporary_buffer_equals(parser, "<"));
   if (c == '/') {
     gumbo_tokenizer_set_state(parser, GUMBO_LEX_RAWTEXT_END_TAG_OPEN);
@@ -1171,9 +1297,13 @@ static StateResult handle_rawtext_lt_state(GumboParser* parser,
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#rawtext-end-tag-open-state
-static StateResult handle_rawtext_end_tag_open_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#rawtext-end-tag-open-state
+static StateResult handle_rawtext_end_tag_open_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   assert(temporary_buffer_equals(parser, "</"));
   if (is_alpha(c)) {
     gumbo_tokenizer_set_state(parser, GUMBO_LEX_RAWTEXT_END_TAG_NAME);
@@ -1186,9 +1316,13 @@ static StateResult handle_rawtext_end_tag_open_state(GumboParser* parser,
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#rawtext-end-tag-name-state
-static StateResult handle_rawtext_end_tag_name_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#rawtext-end-tag-name-state
+static StateResult handle_rawtext_end_tag_name_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   assert(tokenizer->_temporary_buffer.length >= 2);
   gumbo_debug("Last end tag: %*s\n", (int) tokenizer->_tag_state._buffer.length,
       tokenizer->_tag_state._buffer.data);
@@ -1221,9 +1355,13 @@ static StateResult handle_rawtext_end_tag_name_state(GumboParser* parser,
   return emit_temporary_buffer(parser, output);
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-less-than-sign-state
-static StateResult handle_script_lt_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#script-data-less-than-sign-state
+static StateResult handle_script_lt_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   assert(temporary_buffer_equals(parser, "<"));
   if (c == '/') {
     gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_END_TAG_OPEN);
@@ -1240,9 +1378,13 @@ static StateResult handle_script_lt_state(GumboParser* parser,
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-end-tag-open-state
-static StateResult handle_script_end_tag_open_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#script-data-end-tag-open-state
+static StateResult handle_script_end_tag_open_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   assert(temporary_buffer_equals(parser, "</"));
   if (is_alpha(c)) {
     gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_END_TAG_NAME);
@@ -1255,9 +1397,14 @@ static StateResult handle_script_end_tag_open_state(GumboParser* parser,
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-end-tag-name-state
-static StateResult handle_script_end_tag_name_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#script-data-end-tag-name-state
+static StateResult handle_script_end_tag_name_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
+  UNUSED_IF_NDEBUG(tokenizer);
   assert(tokenizer->_temporary_buffer.length >= 2);
   if (is_alpha(c)) {
     append_char_to_tag_buffer(parser, ensure_lowercase(c), true);
@@ -1287,9 +1434,13 @@ static StateResult handle_script_end_tag_name_state(GumboParser* parser,
   return emit_temporary_buffer(parser, output);
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-escape-start-state
-static StateResult handle_script_escaped_start_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#script-data-escape-start-state
+static StateResult handle_script_escaped_start_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   if (c == '-') {
     gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_ESCAPED_START_DASH);
     return emit_current_char(parser, output);
@@ -1300,9 +1451,13 @@ static StateResult handle_script_escaped_start_state(GumboParser* parser,
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-escape-start-dash-state
-static StateResult handle_script_escaped_start_dash_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#script-data-escape-start-dash-state
+static StateResult handle_script_escaped_start_dash_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   if (c == '-') {
     gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_ESCAPED_DASH_DASH);
     return emit_current_char(parser, output);
@@ -1313,9 +1468,13 @@ static StateResult handle_script_escaped_start_dash_state(GumboParser* parser,
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-escaped-state
-static StateResult handle_script_escaped_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-state
+static StateResult handle_script_escaped_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '-':
       gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_ESCAPED_DASH);
@@ -1335,9 +1494,13 @@ static StateResult handle_script_escaped_state(GumboParser* parser,
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-escaped-dash-state
-static StateResult handle_script_escaped_dash_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-dash-state
+static StateResult handle_script_escaped_dash_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '-':
       gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_ESCAPED_DASH_DASH);
@@ -1360,9 +1523,13 @@ static StateResult handle_script_escaped_dash_state(GumboParser* parser,
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-escaped-dash-dash-state
-static StateResult handle_script_escaped_dash_dash_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-dash-dash-state
+static StateResult handle_script_escaped_dash_dash_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '-':
       return emit_current_char(parser, output);
@@ -1387,9 +1554,13 @@ static StateResult handle_script_escaped_dash_dash_state(GumboParser* parser,
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-escaped-less-than-sign-state
-static StateResult handle_script_escaped_lt_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-less-than-sign-state
+static StateResult handle_script_escaped_lt_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   assert(temporary_buffer_equals(parser, "<"));
   assert(!tokenizer->_script_data_buffer.length);
   if (c == '/') {
@@ -1399,8 +1570,10 @@ static StateResult handle_script_escaped_lt_state(GumboParser* parser,
   } else if (is_alpha(c)) {
     gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED_START);
     append_char_to_temporary_buffer(parser, c);
-    gumbo_string_buffer_append_codepoint(
-        parser, ensure_lowercase(c), &tokenizer->_script_data_buffer);
+    gumbo_string_buffer_append_codepoint (
+      ensure_lowercase(c),
+      &tokenizer->_script_data_buffer
+    );
     return emit_temporary_buffer(parser, output);
   } else {
     gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_ESCAPED);
@@ -1408,9 +1581,13 @@ static StateResult handle_script_escaped_lt_state(GumboParser* parser,
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-escaped-end-tag-open-state
-static StateResult handle_script_escaped_end_tag_open_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-end-tag-open-state
+static StateResult handle_script_escaped_end_tag_open_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   assert(temporary_buffer_equals(parser, "</"));
   if (is_alpha(c)) {
     gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_ESCAPED_END_TAG_NAME);
@@ -1423,9 +1600,14 @@ static StateResult handle_script_escaped_end_tag_open_state(GumboParser* parser,
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-escaped-end-tag-name-state
-static StateResult handle_script_escaped_end_tag_name_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-end-tag-name-state
+static StateResult handle_script_escaped_end_tag_name_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
+  UNUSED_IF_NDEBUG(tokenizer);
   assert(tokenizer->_temporary_buffer.length >= 2);
   if (is_alpha(c)) {
     append_char_to_tag_buffer(parser, ensure_lowercase(c), true);
@@ -1455,9 +1637,13 @@ static StateResult handle_script_escaped_end_tag_name_state(GumboParser* parser,
   return emit_temporary_buffer(parser, output);
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-double-escape-start-state
-static StateResult handle_script_double_escaped_start_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escape-start-state
+static StateResult handle_script_double_escaped_start_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '\t':
     case '\n':
@@ -1465,16 +1651,22 @@ static StateResult handle_script_double_escaped_start_state(GumboParser* parser,
     case ' ':
     case '/':
     case '>':
-      gumbo_tokenizer_set_state(
-          parser, gumbo_string_equals(&kScriptTag,
-                      (GumboStringPiece*) &tokenizer->_script_data_buffer)
-                      ? GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED
-                      : GUMBO_LEX_SCRIPT_ESCAPED);
+      gumbo_tokenizer_set_state (
+        parser,
+        gumbo_string_equals (
+          &kScriptTag,
+          (GumboStringPiece*) &tokenizer->_script_data_buffer
+        )
+        ? GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED
+        : GUMBO_LEX_SCRIPT_ESCAPED
+      );
       return emit_current_char(parser, output);
     default:
       if (is_alpha(c)) {
-        gumbo_string_buffer_append_codepoint(
-            parser, ensure_lowercase(c), &tokenizer->_script_data_buffer);
+        gumbo_string_buffer_append_codepoint (
+          ensure_lowercase(c),
+          &tokenizer->_script_data_buffer
+        );
         return emit_current_char(parser, output);
       } else {
         gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_ESCAPED);
@@ -1484,9 +1676,13 @@ static StateResult handle_script_double_escaped_start_state(GumboParser* parser,
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-double-escaped-state
-static StateResult handle_script_double_escaped_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escaped-state
+static StateResult handle_script_double_escaped_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '-':
       gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED_DASH);
@@ -1505,9 +1701,13 @@ static StateResult handle_script_double_escaped_state(GumboParser* parser,
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-double-escaped-dash-state
-static StateResult handle_script_double_escaped_dash_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escaped-dash-state
+static StateResult handle_script_double_escaped_dash_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '-':
       gumbo_tokenizer_set_state(
@@ -1529,10 +1729,13 @@ static StateResult handle_script_double_escaped_dash_state(GumboParser* parser,
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-double-escaped-dash-dash-state
-static StateResult handle_script_double_escaped_dash_dash_state(
-    GumboParser* parser, GumboTokenizerState* tokenizer, int c,
-    GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escaped-dash-dash-state
+static StateResult handle_script_double_escaped_dash_dash_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '-':
       return emit_current_char(parser, output);
@@ -1555,12 +1758,16 @@ static StateResult handle_script_double_escaped_dash_dash_state(
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-double-escaped-less-than-sign-state
-static StateResult handle_script_double_escaped_lt_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escaped-less-than-sign-state
+static StateResult handle_script_double_escaped_lt_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   if (c == '/') {
     gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED_END);
-    gumbo_string_buffer_clear(parser, &tokenizer->_script_data_buffer);
+    gumbo_string_buffer_clear(&tokenizer->_script_data_buffer);
     return emit_current_char(parser, output);
   } else {
     gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED);
@@ -1569,9 +1776,13 @@ static StateResult handle_script_double_escaped_lt_state(GumboParser* parser,
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-double-escape-end-state
-static StateResult handle_script_double_escaped_end_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escape-end-state
+static StateResult handle_script_double_escaped_end_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '\t':
     case '\n':
@@ -1587,8 +1798,10 @@ static StateResult handle_script_double_escaped_end_state(GumboParser* parser,
       return emit_current_char(parser, output);
     default:
       if (is_alpha(c)) {
-        gumbo_string_buffer_append_codepoint(
-            parser, ensure_lowercase(c), &tokenizer->_script_data_buffer);
+        gumbo_string_buffer_append_codepoint (
+          ensure_lowercase(c),
+          &tokenizer->_script_data_buffer
+        );
         return emit_current_char(parser, output);
       } else {
         gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED);
@@ -1598,9 +1811,13 @@ static StateResult handle_script_double_escaped_end_state(GumboParser* parser,
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#before-attribute-name-state
-static StateResult handle_before_attr_name_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-name-state
+static StateResult handle_before_attr_name_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '\t':
     case '\n':
@@ -1636,9 +1853,13 @@ static StateResult handle_before_attr_name_state(GumboParser* parser,
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#attribute-name-state
-static StateResult handle_attr_name_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#attribute-name-state
+static StateResult handle_attr_name_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '\t':
     case '\n':
@@ -1679,9 +1900,13 @@ static StateResult handle_attr_name_state(GumboParser* parser,
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#after-attribute-name-state
-static StateResult handle_after_attr_name_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#after-attribute-name-state
+static StateResult handle_after_attr_name_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '\t':
     case '\n':
@@ -1719,9 +1944,13 @@ static StateResult handle_after_attr_name_state(GumboParser* parser,
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#before-attribute-value-state
-static StateResult handle_before_attr_value_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-value-state
+static StateResult handle_before_attr_value_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '\t':
     case '\n':
@@ -1768,9 +1997,13 @@ static StateResult handle_before_attr_value_state(GumboParser* parser,
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#attribute-value-double-quoted-state
-static StateResult handle_attr_value_double_quoted_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#attribute-value-double-quoted-state
+static StateResult handle_attr_value_double_quoted_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* UNUSED_ARG(output)
+) {
   switch (c) {
     case '"':
       gumbo_tokenizer_set_state(parser, GUMBO_LEX_AFTER_ATTR_VALUE_QUOTED);
@@ -1796,9 +2029,13 @@ static StateResult handle_attr_value_double_quoted_state(GumboParser* parser,
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#attribute-value-single-quoted-state
-static StateResult handle_attr_value_single_quoted_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#attribute-value-single-quoted-state
+static StateResult handle_attr_value_single_quoted_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* UNUSED_ARG(output)
+) {
   switch (c) {
     case '\'':
       gumbo_tokenizer_set_state(parser, GUMBO_LEX_AFTER_ATTR_VALUE_QUOTED);
@@ -1824,9 +2061,13 @@ static StateResult handle_attr_value_single_quoted_state(GumboParser* parser,
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#attribute-value-unquoted-state
-static StateResult handle_attr_value_unquoted_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#attribute-value-unquoted-state
+static StateResult handle_attr_value_unquoted_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '\t':
     case '\n':
@@ -1867,9 +2108,13 @@ static StateResult handle_attr_value_unquoted_state(GumboParser* parser,
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#character-reference-in-attribute-value-state
-static StateResult handle_char_ref_in_attr_value_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#character-reference-in-attribute-value-state
+static StateResult handle_char_ref_in_attr_value_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int UNUSED_ARG(c),
+  GumboToken* UNUSED_ARG(output)
+) {
   OneOrTwoCodepoints char_ref;
   int allowed_char;
   bool is_unquoted = false;
@@ -1893,9 +2138,15 @@ static StateResult handle_char_ref_in_attr_value_state(GumboParser* parser,
 
   // Ignore the status, since we don't have a convenient way of signalling that
   // a parser error has occurred when the error occurs in the middle of a
-  // multi-state token.  We'd need a flag inside the TokenizerState to do this,
+  // multi-state token. We'd need a flag inside the TokenizerState to do this,
   // but that's a low priority fix.
-  consume_char_ref(parser, &tokenizer->_input, allowed_char, true, &char_ref);
+  gumbo_consume_char_ref (
+    parser,
+    &tokenizer->_input,
+    allowed_char,
+    true,
+    &char_ref
+  );
   if (char_ref.first != kGumboNoChar) {
     tokenizer->_reconsume_current_input = true;
     append_char_to_tag_buffer(parser, char_ref.first, is_unquoted);
@@ -1909,9 +2160,13 @@ static StateResult handle_char_ref_in_attr_value_state(GumboParser* parser,
   return NEXT_CHAR;
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#after-attribute-value-quoted-state
-static StateResult handle_after_attr_value_quoted_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#after-attribute-value-quoted-state
+static StateResult handle_after_attr_value_quoted_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   finish_attribute_value(parser);
   switch (c) {
     case '\t':
@@ -1940,9 +2195,13 @@ static StateResult handle_after_attr_value_quoted_state(GumboParser* parser,
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#self-closing-start-tag-state
-static StateResult handle_self_closing_start_tag_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#self-closing-start-tag-state
+static StateResult handle_self_closing_start_tag_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '>':
       gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
@@ -1961,9 +2220,13 @@ static StateResult handle_self_closing_start_tag_state(GumboParser* parser,
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#bogus-comment-state
-static StateResult handle_bogus_comment_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#bogus-comment-state
+static StateResult handle_bogus_comment_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   while (c != '>' && c != -1) {
     if (c == '\0') {
       tokenizer_add_parse_error(parser, GUMBO_ERR_UTF8_NULL);
@@ -1977,29 +2240,48 @@ static StateResult handle_bogus_comment_state(GumboParser* parser,
   return emit_comment(parser, output);
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#markup-declaration-open-state
-static StateResult handle_markup_declaration_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
-  if (utf8iterator_maybe_consume_match(
-          &tokenizer->_input, "--", sizeof("--") - 1, true)) {
+// https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state
+static StateResult handle_markup_declaration_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int UNUSED_ARG(c),
+  GumboToken* UNUSED_ARG(output)
+) {
+  if (
+    utf8iterator_maybe_consume_match (
+      &tokenizer->_input,
+      "--",
+      sizeof("--") - 1,
+      true
+    )
+  ) {
     gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT_START);
     tokenizer->_reconsume_current_input = true;
-  } else if (utf8iterator_maybe_consume_match(
-                 &tokenizer->_input, "DOCTYPE", sizeof("DOCTYPE") - 1, false)) {
+  } else if (
+    utf8iterator_maybe_consume_match (
+      &tokenizer->_input,
+      "DOCTYPE",
+      sizeof("DOCTYPE") - 1,
+      false
+    )
+  ) {
     gumbo_tokenizer_set_state(parser, GUMBO_LEX_DOCTYPE);
     tokenizer->_reconsume_current_input = true;
     // If we get here, we know we'll eventually emit a doctype token, so now is
-    // the time to initialize the doctype strings.  (Not in doctype_state_init,
+    // the time to initialize the doctype strings. (Not in doctype_state_init,
     // since then they'll leak if ownership never gets transferred to the
     // doctype token.
-    tokenizer->_doc_type_state.name = gumbo_copy_stringz(parser, "");
-    tokenizer->_doc_type_state.public_identifier =
-        gumbo_copy_stringz(parser, "");
-    tokenizer->_doc_type_state.system_identifier =
-        gumbo_copy_stringz(parser, "");
-  } else if (tokenizer->_is_current_node_foreign &&
-             utf8iterator_maybe_consume_match(
-                 &tokenizer->_input, "[CDATA[", sizeof("[CDATA[") - 1, true)) {
+    tokenizer->_doc_type_state.name = gumbo_strdup("");
+    tokenizer->_doc_type_state.public_identifier = gumbo_strdup("");
+    tokenizer->_doc_type_state.system_identifier = gumbo_strdup("");
+  } else if (
+    tokenizer->_is_current_node_foreign
+    && utf8iterator_maybe_consume_match (
+      &tokenizer->_input,
+      "[CDATA[", sizeof("[CDATA[") - 1,
+      true
+    )
+  ) {
     gumbo_tokenizer_set_state(parser, GUMBO_LEX_CDATA);
     tokenizer->_is_in_cdata = true;
     tokenizer->_reconsume_current_input = true;
@@ -2012,9 +2294,13 @@ static StateResult handle_markup_declaration_state(GumboParser* parser,
   return NEXT_CHAR;
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#comment-start-state
-static StateResult handle_comment_start_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#comment-start-state
+static StateResult handle_comment_start_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '-':
       gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT_START_DASH);
@@ -2041,9 +2327,13 @@ static StateResult handle_comment_start_state(GumboParser* parser,
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#comment-start-dash-state
-static StateResult handle_comment_start_dash_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#comment-start-dash-state
+static StateResult handle_comment_start_dash_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '-':
       gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT_END);
@@ -2072,9 +2362,13 @@ static StateResult handle_comment_start_dash_state(GumboParser* parser,
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#comment-state
-static StateResult handle_comment_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#comment-state
+static StateResult handle_comment_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '-':
       gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT_END_DASH);
@@ -2094,9 +2388,13 @@ static StateResult handle_comment_state(GumboParser* parser,
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#comment-end-dash-state
-static StateResult handle_comment_end_dash_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#comment-end-dash-state
+static StateResult handle_comment_end_dash_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '-':
       gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT_END);
@@ -2120,9 +2418,13 @@ static StateResult handle_comment_end_dash_state(GumboParser* parser,
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#comment-end-state
-static StateResult handle_comment_end_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#comment-end-state
+static StateResult handle_comment_end_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '>':
       gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
@@ -2159,9 +2461,13 @@ static StateResult handle_comment_end_state(GumboParser* parser,
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#comment-end-bang-state
-static StateResult handle_comment_end_bang_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#comment-end-bang-state
+static StateResult handle_comment_end_bang_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '-':
       gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT_END_DASH);
@@ -2195,9 +2501,13 @@ static StateResult handle_comment_end_bang_state(GumboParser* parser,
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#doctype-state
-static StateResult handle_doctype_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#doctype-state
+static StateResult handle_doctype_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   assert(!tokenizer->_temporary_buffer.length);
   switch (c) {
     case '\t':
@@ -2221,9 +2531,13 @@ static StateResult handle_doctype_state(GumboParser* parser,
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#before-doctype-name-state
-static StateResult handle_before_doctype_name_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#before-doctype-name-state
+static StateResult handle_before_doctype_name_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '\t':
     case '\n':
@@ -2256,21 +2570,25 @@ static StateResult handle_before_doctype_name_state(GumboParser* parser,
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#doctype-name-state
-static StateResult handle_doctype_name_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#doctype-name-state
+static StateResult handle_doctype_name_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '\t':
     case '\n':
     case '\f':
     case ' ':
       gumbo_tokenizer_set_state(parser, GUMBO_LEX_AFTER_DOCTYPE_NAME);
-      gumbo_parser_deallocate(parser, (void*) tokenizer->_doc_type_state.name);
+      gumbo_free((void*) tokenizer->_doc_type_state.name);
       finish_temporary_buffer(parser, &tokenizer->_doc_type_state.name);
       return NEXT_CHAR;
     case '>':
       gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
-      gumbo_parser_deallocate(parser, (void*) tokenizer->_doc_type_state.name);
+      gumbo_free((void*) tokenizer->_doc_type_state.name);
       finish_temporary_buffer(parser, &tokenizer->_doc_type_state.name);
       emit_doctype(parser, output);
       return RETURN_SUCCESS;
@@ -2282,7 +2600,7 @@ static StateResult handle_doctype_name_state(GumboParser* parser,
       tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_EOF);
       gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
       tokenizer->_doc_type_state.force_quirks = true;
-      gumbo_parser_deallocate(parser, (void*) tokenizer->_doc_type_state.name);
+      gumbo_free((void*) tokenizer->_doc_type_state.name);
       finish_temporary_buffer(parser, &tokenizer->_doc_type_state.name);
       emit_doctype(parser, output);
       return RETURN_ERROR;
@@ -2294,9 +2612,13 @@ static StateResult handle_doctype_name_state(GumboParser* parser,
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#after-doctype-name-state
-static StateResult handle_after_doctype_name_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#after-doctype-name-state
+static StateResult handle_after_doctype_name_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '\t':
     case '\n':
@@ -2334,10 +2656,13 @@ static StateResult handle_after_doctype_name_state(GumboParser* parser,
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#after-doctype-public-keyword-state
-static StateResult handle_after_doctype_public_keyword_state(
-    GumboParser* parser, GumboTokenizerState* tokenizer, int c,
-    GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#after-doctype-public-keyword-state
+static StateResult handle_after_doctype_public_keyword_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '\t':
     case '\n':
@@ -2347,13 +2672,13 @@ static StateResult handle_after_doctype_public_keyword_state(
       return NEXT_CHAR;
     case '"':
       tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_INVALID);
-      assert(temporary_buffer_equals(parser, ""));
+      assert(temporary_buffer_is_empty(parser));
       gumbo_tokenizer_set_state(
           parser, GUMBO_LEX_DOCTYPE_PUBLIC_ID_DOUBLE_QUOTED);
       return NEXT_CHAR;
     case '\'':
       tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_INVALID);
-      assert(temporary_buffer_equals(parser, ""));
+      assert(temporary_buffer_is_empty(parser));
       gumbo_tokenizer_set_state(
           parser, GUMBO_LEX_DOCTYPE_PUBLIC_ID_SINGLE_QUOTED);
       return NEXT_CHAR;
@@ -2378,9 +2703,13 @@ static StateResult handle_after_doctype_public_keyword_state(
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#before-doctype-public-identifier-state
-static StateResult handle_before_doctype_public_id_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#before-doctype-public-identifier-state
+static StateResult handle_before_doctype_public_id_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '\t':
     case '\n':
@@ -2388,12 +2717,12 @@ static StateResult handle_before_doctype_public_id_state(GumboParser* parser,
     case ' ':
       return NEXT_CHAR;
     case '"':
-      assert(temporary_buffer_equals(parser, ""));
+      assert(temporary_buffer_is_empty(parser));
       gumbo_tokenizer_set_state(
           parser, GUMBO_LEX_DOCTYPE_PUBLIC_ID_DOUBLE_QUOTED);
       return NEXT_CHAR;
     case '\'':
-      assert(temporary_buffer_equals(parser, ""));
+      assert(temporary_buffer_is_empty(parser));
       gumbo_tokenizer_set_state(
           parser, GUMBO_LEX_DOCTYPE_PUBLIC_ID_SINGLE_QUOTED);
       return NEXT_CHAR;
@@ -2418,10 +2747,13 @@ static StateResult handle_before_doctype_public_id_state(GumboParser* parser,
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#doctype-public-identifier-(double-quoted)-state
-static StateResult handle_doctype_public_id_double_quoted_state(
-    GumboParser* parser, GumboTokenizerState* tokenizer, int c,
-    GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#doctype-public-identifier-(double-quoted)-state
+static StateResult handle_doctype_public_id_double_quoted_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '"':
       gumbo_tokenizer_set_state(parser, GUMBO_LEX_AFTER_DOCTYPE_PUBLIC_ID);
@@ -2451,10 +2783,13 @@ static StateResult handle_doctype_public_id_double_quoted_state(
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#doctype-public-identifier-(single-quoted)-state
-static StateResult handle_doctype_public_id_single_quoted_state(
-    GumboParser* parser, GumboTokenizerState* tokenizer, int c,
-    GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#doctype-public-identifier-(single-quoted)-state
+static StateResult handle_doctype_public_id_single_quoted_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '\'':
       gumbo_tokenizer_set_state(parser, GUMBO_LEX_AFTER_DOCTYPE_PUBLIC_ID);
@@ -2484,9 +2819,13 @@ static StateResult handle_doctype_public_id_single_quoted_state(
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#after-doctype-public-identifier-state
-static StateResult handle_after_doctype_public_id_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#after-doctype-public-identifier-state
+static StateResult handle_after_doctype_public_id_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '\t':
     case '\n':
@@ -2501,13 +2840,13 @@ static StateResult handle_after_doctype_public_id_state(GumboParser* parser,
       return RETURN_SUCCESS;
     case '"':
       tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_INVALID);
-      assert(temporary_buffer_equals(parser, ""));
+      assert(temporary_buffer_is_empty(parser));
       gumbo_tokenizer_set_state(
           parser, GUMBO_LEX_DOCTYPE_SYSTEM_ID_DOUBLE_QUOTED);
       return NEXT_CHAR;
     case '\'':
       tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_INVALID);
-      assert(temporary_buffer_equals(parser, ""));
+      assert(temporary_buffer_is_empty(parser));
       gumbo_tokenizer_set_state(
           parser, GUMBO_LEX_DOCTYPE_SYSTEM_ID_SINGLE_QUOTED);
       return NEXT_CHAR;
@@ -2526,10 +2865,13 @@ static StateResult handle_after_doctype_public_id_state(GumboParser* parser,
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#between-doctype-public-and-system-identifiers-state
-static StateResult handle_between_doctype_public_system_id_state(
-    GumboParser* parser, GumboTokenizerState* tokenizer, int c,
-    GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#between-doctype-public-and-system-identifiers-state
+static StateResult handle_between_doctype_public_system_id_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '\t':
     case '\n':
@@ -2541,12 +2883,12 @@ static StateResult handle_between_doctype_public_system_id_state(
       emit_doctype(parser, output);
       return RETURN_SUCCESS;
     case '"':
-      assert(temporary_buffer_equals(parser, ""));
+      assert(temporary_buffer_is_empty(parser));
       gumbo_tokenizer_set_state(
           parser, GUMBO_LEX_DOCTYPE_SYSTEM_ID_DOUBLE_QUOTED);
       return NEXT_CHAR;
     case '\'':
-      assert(temporary_buffer_equals(parser, ""));
+      assert(temporary_buffer_is_empty(parser));
       gumbo_tokenizer_set_state(
           parser, GUMBO_LEX_DOCTYPE_SYSTEM_ID_SINGLE_QUOTED);
       return NEXT_CHAR;
@@ -2565,10 +2907,13 @@ static StateResult handle_between_doctype_public_system_id_state(
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#after-doctype-system-keyword-state
-static StateResult handle_after_doctype_system_keyword_state(
-    GumboParser* parser, GumboTokenizerState* tokenizer, int c,
-    GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#after-doctype-system-keyword-state
+static StateResult handle_after_doctype_system_keyword_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '\t':
     case '\n':
@@ -2578,13 +2923,13 @@ static StateResult handle_after_doctype_system_keyword_state(
       return NEXT_CHAR;
     case '"':
       tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_INVALID);
-      assert(temporary_buffer_equals(parser, ""));
+      assert(temporary_buffer_is_empty(parser));
       gumbo_tokenizer_set_state(
           parser, GUMBO_LEX_DOCTYPE_SYSTEM_ID_DOUBLE_QUOTED);
       return NEXT_CHAR;
     case '\'':
       tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_INVALID);
-      assert(temporary_buffer_equals(parser, ""));
+      assert(temporary_buffer_is_empty(parser));
       gumbo_tokenizer_set_state(
           parser, GUMBO_LEX_DOCTYPE_SYSTEM_ID_SINGLE_QUOTED);
       return NEXT_CHAR;
@@ -2608,9 +2953,13 @@ static StateResult handle_after_doctype_system_keyword_state(
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#before-doctype-system-identifier-state
-static StateResult handle_before_doctype_system_id_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#before-doctype-system-identifier-state
+static StateResult handle_before_doctype_system_id_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '\t':
     case '\n':
@@ -2618,12 +2967,12 @@ static StateResult handle_before_doctype_system_id_state(GumboParser* parser,
     case ' ':
       return NEXT_CHAR;
     case '"':
-      assert(temporary_buffer_equals(parser, ""));
+      assert(temporary_buffer_is_empty(parser));
       gumbo_tokenizer_set_state(
           parser, GUMBO_LEX_DOCTYPE_SYSTEM_ID_DOUBLE_QUOTED);
       return NEXT_CHAR;
     case '\'':
-      assert(temporary_buffer_equals(parser, ""));
+      assert(temporary_buffer_is_empty(parser));
       gumbo_tokenizer_set_state(
           parser, GUMBO_LEX_DOCTYPE_SYSTEM_ID_SINGLE_QUOTED);
       return NEXT_CHAR;
@@ -2647,10 +2996,13 @@ static StateResult handle_before_doctype_system_id_state(GumboParser* parser,
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#doctype-system-identifier-(double-quoted)-state
-static StateResult handle_doctype_system_id_double_quoted_state(
-    GumboParser* parser, GumboTokenizerState* tokenizer, int c,
-    GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#doctype-system-identifier-(double-quoted)-state
+static StateResult handle_doctype_system_id_double_quoted_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '"':
       gumbo_tokenizer_set_state(parser, GUMBO_LEX_AFTER_DOCTYPE_SYSTEM_ID);
@@ -2680,10 +3032,13 @@ static StateResult handle_doctype_system_id_double_quoted_state(
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#doctype-system-identifier-(single-quoted)-state
-static StateResult handle_doctype_system_id_single_quoted_state(
-    GumboParser* parser, GumboTokenizerState* tokenizer, int c,
-    GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#doctype-system-identifier-(single-quoted)-state
+static StateResult handle_doctype_system_id_single_quoted_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '\'':
       gumbo_tokenizer_set_state(parser, GUMBO_LEX_AFTER_DOCTYPE_SYSTEM_ID);
@@ -2713,9 +3068,13 @@ static StateResult handle_doctype_system_id_single_quoted_state(
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#after-doctype-system-identifier-state
-static StateResult handle_after_doctype_system_id_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#after-doctype-system-identifier-state
+static StateResult handle_after_doctype_system_id_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '\t':
     case '\n':
@@ -2739,9 +3098,13 @@ static StateResult handle_after_doctype_system_id_state(GumboParser* parser,
   }
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#bogus-doctype-state
-static StateResult handle_bogus_doctype_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#bogus-doctype-state
+static StateResult handle_bogus_doctype_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   if (c == '>' || c == -1) {
     gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
     emit_doctype(parser, output);
@@ -2750,9 +3113,13 @@ static StateResult handle_bogus_doctype_state(GumboParser* parser,
   return NEXT_CHAR;
 }
 
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#cdata-section-state
-static StateResult handle_cdata_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#cdata-section-state
+static StateResult handle_cdata_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   if (c == -1 || utf8iterator_maybe_consume_match(
                      &tokenizer->_input, "]]>", sizeof("]]>") - 1, true)) {
     tokenizer->_reconsume_current_input = true;
@@ -2765,50 +3132,83 @@ static StateResult handle_cdata_state(GumboParser* parser,
   }
 }
 
-typedef StateResult (*GumboLexerStateFunction)(
-    GumboParser*, GumboTokenizerState*, int, GumboToken*);
-
-static GumboLexerStateFunction dispatch_table[] = {handle_data_state,
-    handle_char_ref_in_data_state, handle_rcdata_state,
-    handle_char_ref_in_rcdata_state, handle_rawtext_state, handle_script_state,
-    handle_plaintext_state, handle_tag_open_state, handle_end_tag_open_state,
-    handle_tag_name_state, handle_rcdata_lt_state,
-    handle_rcdata_end_tag_open_state, handle_rcdata_end_tag_name_state,
-    handle_rawtext_lt_state, handle_rawtext_end_tag_open_state,
-    handle_rawtext_end_tag_name_state, handle_script_lt_state,
-    handle_script_end_tag_open_state, handle_script_end_tag_name_state,
-    handle_script_escaped_start_state, handle_script_escaped_start_dash_state,
-    handle_script_escaped_state, handle_script_escaped_dash_state,
-    handle_script_escaped_dash_dash_state, handle_script_escaped_lt_state,
-    handle_script_escaped_end_tag_open_state,
-    handle_script_escaped_end_tag_name_state,
-    handle_script_double_escaped_start_state,
-    handle_script_double_escaped_state, handle_script_double_escaped_dash_state,
-    handle_script_double_escaped_dash_dash_state,
-    handle_script_double_escaped_lt_state,
-    handle_script_double_escaped_end_state, handle_before_attr_name_state,
-    handle_attr_name_state, handle_after_attr_name_state,
-    handle_before_attr_value_state, handle_attr_value_double_quoted_state,
-    handle_attr_value_single_quoted_state, handle_attr_value_unquoted_state,
-    handle_char_ref_in_attr_value_state, handle_after_attr_value_quoted_state,
-    handle_self_closing_start_tag_state, handle_bogus_comment_state,
-    handle_markup_declaration_state, handle_comment_start_state,
-    handle_comment_start_dash_state, handle_comment_state,
-    handle_comment_end_dash_state, handle_comment_end_state,
-    handle_comment_end_bang_state, handle_doctype_state,
-    handle_before_doctype_name_state, handle_doctype_name_state,
-    handle_after_doctype_name_state, handle_after_doctype_public_keyword_state,
-    handle_before_doctype_public_id_state,
-    handle_doctype_public_id_double_quoted_state,
-    handle_doctype_public_id_single_quoted_state,
-    handle_after_doctype_public_id_state,
-    handle_between_doctype_public_system_id_state,
-    handle_after_doctype_system_keyword_state,
-    handle_before_doctype_system_id_state,
-    handle_doctype_system_id_double_quoted_state,
-    handle_doctype_system_id_single_quoted_state,
-    handle_after_doctype_system_id_state, handle_bogus_doctype_state,
-    handle_cdata_state};
+typedef StateResult (*GumboLexerStateFunction) (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+);
+
+static GumboLexerStateFunction dispatch_table[] = {
+  handle_data_state,
+  handle_char_ref_in_data_state,
+  handle_rcdata_state,
+  handle_char_ref_in_rcdata_state,
+  handle_rawtext_state,
+  handle_script_state,
+  handle_plaintext_state,
+  handle_tag_open_state,
+  handle_end_tag_open_state,
+  handle_tag_name_state,
+  handle_rcdata_lt_state,
+  handle_rcdata_end_tag_open_state,
+  handle_rcdata_end_tag_name_state,
+  handle_rawtext_lt_state,
+  handle_rawtext_end_tag_open_state,
+  handle_rawtext_end_tag_name_state,
+  handle_script_lt_state,
+  handle_script_end_tag_open_state,
+  handle_script_end_tag_name_state,
+  handle_script_escaped_start_state,
+  handle_script_escaped_start_dash_state,
+  handle_script_escaped_state,
+  handle_script_escaped_dash_state,
+  handle_script_escaped_dash_dash_state,
+  handle_script_escaped_lt_state,
+  handle_script_escaped_end_tag_open_state,
+  handle_script_escaped_end_tag_name_state,
+  handle_script_double_escaped_start_state,
+  handle_script_double_escaped_state,
+  handle_script_double_escaped_dash_state,
+  handle_script_double_escaped_dash_dash_state,
+  handle_script_double_escaped_lt_state,
+  handle_script_double_escaped_end_state,
+  handle_before_attr_name_state,
+  handle_attr_name_state,
+  handle_after_attr_name_state,
+  handle_before_attr_value_state,
+  handle_attr_value_double_quoted_state,
+  handle_attr_value_single_quoted_state,
+  handle_attr_value_unquoted_state,
+  handle_char_ref_in_attr_value_state,
+  handle_after_attr_value_quoted_state,
+  handle_self_closing_start_tag_state,
+  handle_bogus_comment_state,
+  handle_markup_declaration_state,
+  handle_comment_start_state,
+  handle_comment_start_dash_state,
+  handle_comment_state,
+  handle_comment_end_dash_state,
+  handle_comment_end_state,
+  handle_comment_end_bang_state,
+  handle_doctype_state,
+  handle_before_doctype_name_state,
+  handle_doctype_name_state,
+  handle_after_doctype_name_state,
+  handle_after_doctype_public_keyword_state,
+  handle_before_doctype_public_id_state,
+  handle_doctype_public_id_double_quoted_state,
+  handle_doctype_public_id_single_quoted_state,
+  handle_after_doctype_public_id_state,
+  handle_between_doctype_public_system_id_state,
+  handle_after_doctype_system_keyword_state,
+  handle_before_doctype_system_id_state,
+  handle_doctype_system_id_double_quoted_state,
+  handle_doctype_system_id_single_quoted_state,
+  handle_after_doctype_system_id_state,
+  handle_bogus_doctype_state,
+  handle_cdata_state
+};
 
 bool gumbo_lex(GumboParser* parser, GumboToken* output) {
   // Because of the spec requirements that...
@@ -2820,9 +3220,9 @@ bool gumbo_lex(GumboParser* parser, GumboToken* output) {
   // state.
   //
   // ...all state must be held in the GumboTokenizer struct instead of in local
-  // variables in this function.  That allows us to return from this method with
+  // variables in this function. That allows us to return from this method with
   // a token, and then immediately jump back to the same state with the same
-  // input if we need to return a different token.  The various emit_* functions
+  // input if we need to return a different token. The various emit_* functions
   // are responsible for changing state (eg. flushing the chardata buffer,
   // reading the next input character) to avoid an infinite loop.
   GumboTokenizerState* tokenizer = parser->_tokenizer_state;
@@ -2846,10 +3246,9 @@ bool gumbo_lex(GumboParser* parser, GumboToken* output) {
     assert(!tokenizer->_temporary_buffer_emit);
     assert(tokenizer->_buffered_emit_char == kGumboNoChar);
     int c = utf8iterator_current(&tokenizer->_input);
-    gumbo_debug(
-        "Lexing character '%c' (%d) in state %d.\n", c, c, tokenizer->_state);
-    StateResult result =
-        dispatch_table[tokenizer->_state](parser, tokenizer, c, output);
+    GumboTokenizerEnum state = tokenizer->_state;
+    gumbo_debug("Lexing character '%c' (%d) in state %u.\n", c, c, state);
+    StateResult result = dispatch_table[state](parser, tokenizer, c, output);
     // We need to clear reconsume_current_input before returning to prevent
     // certain infinite loop states.
     bool should_advance = !tokenizer->_reconsume_current_input;
@@ -2867,30 +3266,29 @@ bool gumbo_lex(GumboParser* parser, GumboToken* output) {
   }
 }
 
-void gumbo_token_destroy(GumboParser* parser, GumboToken* token) {
+void gumbo_token_destroy(GumboToken* token) {
   if (!token) return;
 
   switch (token->type) {
     case GUMBO_TOKEN_DOCTYPE:
-      gumbo_parser_deallocate(parser, (void*) token->v.doc_type.name);
-      gumbo_parser_deallocate(
-          parser, (void*) token->v.doc_type.public_identifier);
-      gumbo_parser_deallocate(
-          parser, (void*) token->v.doc_type.system_identifier);
+      gumbo_free((void*) token->v.doc_type.name);
+      gumbo_free((void*) token->v.doc_type.public_identifier);
+      gumbo_free((void*) token->v.doc_type.system_identifier);
       return;
     case GUMBO_TOKEN_START_TAG:
       for (unsigned int i = 0; i < token->v.start_tag.attributes.length; ++i) {
         GumboAttribute* attr = token->v.start_tag.attributes.data[i];
         if (attr) {
           // May have been nulled out if this token was merged with another.
-          gumbo_destroy_attribute(parser, attr);
+          gumbo_destroy_attribute(attr);
         }
       }
-      gumbo_parser_deallocate(
-          parser, (void*) token->v.start_tag.attributes.data);
+      gumbo_free((void*) token->v.start_tag.attributes.data);
+      if (token->v.start_tag.tag == GUMBO_TAG_UNKNOWN)
+        gumbo_free((void*) token->v.start_tag.name);
       return;
     case GUMBO_TOKEN_COMMENT:
-      gumbo_parser_deallocate(parser, (void*) token->v.text);
+      gumbo_free((void*) token->v.text);
       return;
     default:
       return;
diff --git a/gumbo-parser/src/tokenizer.h b/gumbo-parser/src/tokenizer.h
index 1e2a2ca7..b1f43a92 100644
--- a/gumbo-parser/src/tokenizer.h
+++ b/gumbo-parser/src/tokenizer.h
@@ -1,25 +1,9 @@
-// Copyright 2010 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// Author: jdtang@google.com (Jonathan Tang)
-//
-// This contains an implementation of a tokenizer for HTML5.  It consumes a
-// buffer of UTF-8 characters, and then emits a stream of tokens.
-
 #ifndef GUMBO_TOKENIZER_H_
 #define GUMBO_TOKENIZER_H_
 
+// This contains an implementation of a tokenizer for HTML5. It consumes a
+// buffer of UTF-8 characters, and then emits a stream of tokens.
+
 #include <stdbool.h>
 #include <stddef.h>
 
@@ -49,11 +33,18 @@ typedef struct GumboInternalTokenDocType {
 // Struct containing all information pertaining to start tag tokens.
 typedef struct GumboInternalTokenStartTag {
   GumboTag tag;
+  const char *name;
   GumboVector /* GumboAttribute */ attributes;
   bool is_self_closing;
 } GumboTokenStartTag;
 
-// A data structure representing a single token in the input stream.  This
+// Struct containing all information pertaining to end tag tokens.
+typedef struct GumboInternalTokenEndTag {
+  GumboTag tag;
+  bool is_self_closing;
+} GumboTokenEndTag;
+
+// A data structure representing a single token in the input stream. This
 // contains an enum for the type, the source position, a GumboStringPiece
 // pointing to the original text, and then a union for any parsed data.
 typedef struct GumboInternalToken {
@@ -63,7 +54,7 @@ typedef struct GumboInternalToken {
   union {
     GumboTokenDocType doc_type;
     GumboTokenStartTag start_tag;
-    GumboTag end_tag;
+    GumboTokenEndTag end_tag;
     const char* text;  // For comments.
     int character;     // For character, whitespace, null, and EOF tokens.
   } v;
@@ -71,28 +62,35 @@ typedef struct GumboInternalToken {
 
 // Initializes the tokenizer state within the GumboParser object, setting up a
 // parse of the specified text.
-void gumbo_tokenizer_state_init(
-    struct GumboInternalParser* parser, const char* text, size_t text_length);
+void gumbo_tokenizer_state_init (
+  struct GumboInternalParser* parser,
+  const char* text,
+  size_t text_length
+);
 
 // Destroys the tokenizer state within the GumboParser object, freeing any
 // dynamically-allocated structures within it.
 void gumbo_tokenizer_state_destroy(struct GumboInternalParser* parser);
 
-// Sets the tokenizer state to the specified value.  This is needed by some
+// Sets the tokenizer state to the specified value. This is needed by some
 // parser states, which alter the state of the tokenizer in response to tags
 // seen.
-void gumbo_tokenizer_set_state(
-    struct GumboInternalParser* parser, GumboTokenizerEnum state);
+void gumbo_tokenizer_set_state (
+  struct GumboInternalParser* parser,
+  GumboTokenizerEnum state
+);
 
-// Flags whether the current node is a foreign content element.  This is
+// Flags whether the current node is a foreign content element. This is
 // necessary for the markup declaration open state, where the tokenizer must be
 // aware of the state of the parser to properly tokenize bad comment tags.
-// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#markup-declaration-open-state
-void gumbo_tokenizer_set_is_current_node_foreign(
-    struct GumboInternalParser* parser, bool is_foreign);
+// https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state
+void gumbo_tokenizer_set_is_current_node_foreign (
+  struct GumboInternalParser* parser,
+  bool is_foreign
+);
 
 // Lexes a single token from the specified buffer, filling the output with the
-// parsed GumboToken data structure.  Returns true for a successful
+// parsed GumboToken data structure. Returns true for a successful
 // tokenization, false if a parse error occurs.
 //
 // Example:
@@ -101,23 +99,22 @@ void gumbo_tokenizer_set_is_current_node_foreign(
 //   gumbo_tokenizer_state_init(&parser, text, strlen(text));
 //   while (gumbo_lex(&parser, &output)) {
 //     ...do stuff with output.
-//     gumbo_token_destroy(&parser, &token);
+//     gumbo_token_destroy(&token);
 //   }
 //   gumbo_tokenizer_state_destroy(&parser);
 bool gumbo_lex(struct GumboInternalParser* parser, GumboToken* output);
 
-// Frees the internally-allocated pointers within an GumboToken.  Note that this
+// Frees the internally-allocated pointers within a GumboToken. Note that this
 // doesn't free the token itself, since oftentimes it will be allocated on the
-// stack.  A simple call to free() (or GumboParser->deallocator, if
-// appropriate) can handle that.
+// stack.
 //
 // Note that if you are handing over ownership of the internal strings to some
 // other data structure - for example, a parse tree - these do not need to be
 // freed.
-void gumbo_token_destroy(struct GumboInternalParser* parser, GumboToken* token);
+void gumbo_token_destroy(GumboToken* token);
 
 #ifdef __cplusplus
 }
 #endif
 
-#endif  // GUMBO_TOKENIZER_H_
+#endif // GUMBO_TOKENIZER_H_
diff --git a/gumbo-parser/src/tokenizer_states.h b/gumbo-parser/src/tokenizer_states.h
index 80659f5f..cb3f4b24 100644
--- a/gumbo-parser/src/tokenizer_states.h
+++ b/gumbo-parser/src/tokenizer_states.h
@@ -1,32 +1,16 @@
-// Copyright 2011 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// Author: jdtang@google.com (Jonathan Tang)
-//
-// This contains the list of states used in the tokenizer.  Although at first
+#ifndef GUMBO_TOKENIZER_STATES_H_
+#define GUMBO_TOKENIZER_STATES_H_
+
+// This contains the list of states used in the tokenizer. Although at first
 // glance it seems like these could be kept internal to the tokenizer, several
 // of the actions in the parser require that it reach into the tokenizer and
-// reset the tokenizer state.  For that to work, it needs to have the
+// reset the tokenizer state. For that to work, it needs to have the
 // definitions of individual states available.
 //
 // This may also be useful for providing more detailed error messages for parse
 // errors, as we can match up states and inputs in a table without having to
 // clutter the tokenizer code with lots of precise error messages.
 
-#ifndef GUMBO_TOKENIZER_STATES_H_
-#define GUMBO_TOKENIZER_STATES_H_
-
 // The ordering of this enum is also used to build the dispatch table for the
 // tokenizer state machine, so if it is changed, be sure to update that too.
 typedef enum {
@@ -100,4 +84,4 @@ typedef enum {
   GUMBO_LEX_CDATA
 } GumboTokenizerEnum;
 
-#endif  // GUMBO_TOKENIZER_STATES_H_
+#endif // GUMBO_TOKENIZER_STATES_H_
diff --git a/gumbo-parser/src/utf8.c b/gumbo-parser/src/utf8.c
index fdd6f837..e1c34f3f 100644
--- a/gumbo-parser/src/utf8.c
+++ b/gumbo-parser/src/utf8.c
@@ -1,59 +1,53 @@
-// Copyright 2010 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// Author: jdtang@google.com (Jonathan Tang)
+/*
+ Copyright 2018 Craig Barnes.
+ Copyright 2010 Google Inc.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+    https://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
 
 #include "utf8.h"
 
 #include <assert.h>
 #include <stdint.h>
 #include <string.h>
-#include <strings.h>  // For strncasecmp.
 
 #include "error.h"
 #include "gumbo.h"
 #include "parser.h"
-#include "util.h"
+#include "ascii.h"
 #include "vector.h"
 
 const int kUtf8ReplacementChar = 0xFFFD;
 
-// Reference material:
-// Wikipedia: http://en.wikipedia.org/wiki/UTF-8#Description
-// RFC 3629: http://tools.ietf.org/html/rfc3629
-// HTML5 Unicode handling:
-// http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html#preprocessing-the-input-stream
-//
-// This implementation is based on a DFA-based decoder by Bjoern Hoehrmann
-// <bjoern@hoehrmann.de>.  We wrap the inner table-based decoder routine in our
-// own handling for newlines, tabs, invalid continuation bytes, and other
-// conditions that the HTML5 spec fully specifies but normal UTF8 decoders do
-// not handle.
-// See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.  Full text of
-// the license agreement and code follows.
+// References:
+// * https://tools.ietf.org/html/rfc3629
+// * https://html.spec.whatwg.org/multipage/parsing.html#preprocessing-the-input-stream
 
-// Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
-
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to
-// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
-// of the Software, and to permit persons to whom the Software is furnished to
-// do
-// so, subject to the following conditions:
+// The following code is a DFA-based UTF-8 decoder by Bjoern Hoehrmann.
+// We wrap the inner table-based decoder routine in our own handling for
+// newlines, tabs, invalid continuation bytes, and other conditions that
+// the HTML5 spec fully specifies but normal UTF-8 decoders do not handle.
+// See https://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
 
+// Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
 // The above copyright notice and this permission notice shall be included in
 // all copies or substantial portions of the Software.
 
@@ -61,35 +55,33 @@ const int kUtf8ReplacementChar = 0xFFFD;
 #define UTF8_REJECT 12
 
 static const uint8_t utf8d[] = {
-    // The first part of the table maps bytes to character classes that
-    // to reduce the size of the transition table and create bitmasks.
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9,
-    9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 2, 2, 2, 2, 2, 2,
-    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 10,
-    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 11, 6, 6, 6, 5, 8, 8, 8, 8, 8,
-    8, 8, 8, 8, 8, 8,
-
-    // The second part is a transition table that maps a combination
-    // of a state of the automaton and a character class to a state.
-    0, 12, 24, 36, 60, 96, 84, 12, 12, 12, 48, 72, 12, 12, 12, 12, 12, 12, 12,
-    12, 12, 12, 12, 12, 12, 0, 12, 12, 12, 12, 12, 0, 12, 0, 12, 12, 12, 24, 12,
-    12, 12, 12, 12, 24, 12, 24, 12, 12, 12, 12, 12, 12, 12, 12, 12, 24, 12, 12,
-    12, 12, 12, 24, 12, 12, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, 12, 12, 12,
-    12, 12, 36, 12, 36, 12, 12, 12, 36, 12, 12, 12, 12, 12, 36, 12, 36, 12, 12,
-    12, 36, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  // The first part of the table maps bytes to character classes that
+  // to reduce the size of the transition table and create bitmasks.
+   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,  9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+   8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+  10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
+
+  // The second part is a transition table that maps a combination
+  // of a state of the automaton and a character class to a state.
+   0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
+  12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
+  12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
+  12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
+  12,36,12,12,12,12,12,12,12,12,12,12,
 };
 
-uint32_t static inline decode(uint32_t* state, uint32_t* codep, uint32_t byte) {
+static inline uint32_t decode(uint32_t* state, uint32_t* codep, uint32_t byte) {
   uint32_t type = utf8d[byte];
 
-  *codep = (*state != UTF8_ACCEPT) ? (byte & 0x3fu) | (*codep << 6)
-                                   : (0xff >> type) & (byte);
+  *codep =
+    (*state != UTF8_ACCEPT)
+      ? (byte & 0x3fu) | (*codep << 6)
+      : (0xff >> type) & (byte);
 
   *state = utf8d[256 + *state + type];
   return *state;
@@ -113,8 +105,8 @@ static void add_error(Utf8Iterator* iter, GumboErrorType type) {
   // At the point the error is recorded, the code point hasn't been computed
   // yet (and can't be, because it's invalid), so we need to build up the raw
   // hex value from the bytes under the cursor.
-  uint64_t code_point = 0;
-  for (int i = 0; i < iter->_width; ++i) {
+  uint32_t code_point = 0;
+  for (size_t i = 0; i < iter->_width; ++i) {
     code_point = (code_point << 8) | (unsigned char) iter->_start[i];
   }
   error->v.codepoint = code_point;
@@ -139,10 +131,10 @@ static void read_char(Utf8Iterator* iter) {
     if (state == UTF8_ACCEPT) {
       iter->_width = c - iter->_start + 1;
       // This is the special handling for carriage returns that is mandated by
-      // the HTML5 spec.  Since we're looking for particular 7-bit literal
+      // the HTML5 spec. Since we're looking for particular 7-bit literal
       // characters, we operate in terms of chars and only need a check for iter
       // overrun, instead of having to read in a full next code point.
-      // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#preprocessing-the-input-stream
+      // https://html.spec.whatwg.org/multipage/parsing.html#preprocessing-the-input-stream
       if (code_point == '\r') {
         assert(iter->_width == 1);
         const char* next = c + 1;
@@ -171,8 +163,8 @@ static void read_char(Utf8Iterator* iter) {
     }
   }
   // If we got here without exiting early, then we've reached the end of the
-  // iterator.  Add an error for truncated input, set the width to consume the
-  // rest of the iterator, and emit a replacement character.  The next time we
+  // iterator. Add an error for truncated input, set the width to consume the
+  // rest of the iterator, and emit a replacement character. The next time we
   // enter this method, it will detect that there's no input to consume and
   // output an EOF.
   iter->_current = kUtf8ReplacementChar;
@@ -196,13 +188,23 @@ static void update_position(Utf8Iterator* iter) {
 // Returns true if this Unicode code point is in the list of characters
 // forbidden by the HTML5 spec, such as undefined control chars.
 bool utf8_is_invalid_code_point(int c) {
-  return (c >= 0x1 && c <= 0x8) || c == 0xB || (c >= 0xE && c <= 0x1F) ||
-         (c >= 0x7F && c <= 0x9F) || (c >= 0xFDD0 && c <= 0xFDEF) ||
-         ((c & 0xFFFF) == 0xFFFE) || ((c & 0xFFFF) == 0xFFFF);
+  return
+    (c >= 0x1 && c <= 0x8)
+    || c == 0xB
+    || (c >= 0xE && c <= 0x1F)
+    || (c >= 0x7F && c <= 0x9F)
+    || (c >= 0xFDD0 && c <= 0xFDEF)
+    || ((c & 0xFFFF) == 0xFFFE)
+    || ((c & 0xFFFF) == 0xFFFF)
+  ;
 }
 
-void utf8iterator_init(GumboParser* parser, const char* source,
-    size_t source_length, Utf8Iterator* iter) {
+void utf8iterator_init (
+  GumboParser* parser,
+  const char* source,
+  size_t source_length,
+  Utf8Iterator* iter
+) {
   iter->_start = source;
   iter->_end = source + source_length;
   iter->_pos.line = 1;
@@ -220,10 +222,14 @@ void utf8iterator_next(Utf8Iterator* iter) {
   read_char(iter);
 }
 
-int utf8iterator_current(const Utf8Iterator* iter) { return iter->_current; }
+int utf8iterator_current(const Utf8Iterator* iter) {
+  return iter->_current;
+}
 
-void utf8iterator_get_position(
-    const Utf8Iterator* iter, GumboSourcePosition* output) {
+void utf8iterator_get_position (
+  const Utf8Iterator* iter,
+  GumboSourcePosition* output
+) {
   *output = iter->_pos;
 }
 
@@ -235,13 +241,22 @@ const char* utf8iterator_get_end_pointer(const Utf8Iterator* iter) {
   return iter->_end;
 }
 
-bool utf8iterator_maybe_consume_match(Utf8Iterator* iter, const char* prefix,
-    size_t length, bool case_sensitive) {
-  bool matched = (iter->_start + length <= iter->_end) &&
-                 (case_sensitive ? !strncmp(iter->_start, prefix, length)
-                                 : !strncasecmp(iter->_start, prefix, length));
+bool utf8iterator_maybe_consume_match (
+  Utf8Iterator* iter,
+  const char* prefix,
+  size_t length,
+  bool case_sensitive
+) {
+  bool matched =
+    (iter->_start + length <= iter->_end)
+    && (
+      case_sensitive
+        ? !strncmp(iter->_start, prefix, length)
+        : !gumbo_ascii_strncasecmp(iter->_start, prefix, length)
+    )
+  ;
   if (matched) {
-    for (unsigned int i = 0; i < length; ++i) {
+    for (size_t i = 0; i < length; ++i) {
       utf8iterator_next(iter);
     }
     return true;
diff --git a/gumbo-parser/src/utf8.h b/gumbo-parser/src/utf8.h
index bd31a781..0c52e5fa 100644
--- a/gumbo-parser/src/utf8.h
+++ b/gumbo-parser/src/utf8.h
@@ -1,41 +1,26 @@
-// Copyright 2010 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// Author: jdtang@google.com (Jonathan Tang)
-//
-// This contains an implementation of a UTF8 iterator and decoder suitable for
-// an HTML5 parser.  This does a bit more than straight UTF-8 decoding.  The
+#ifndef GUMBO_UTF8_H_
+#define GUMBO_UTF8_H_
+
+// This contains an implementation of a UTF-8 iterator and decoder suitable for
+// a HTML5 parser. This does a bit more than straight UTF-8 decoding. The
 // HTML5 spec specifies that:
 // 1. Decoding errors are parse errors.
-// 2. Certain other codepoints (eg. control characters) are parse errors.
+// 2. Certain other codepoints (e.g. control characters) are parse errors.
 // 3. Carriage returns and CR/LF groups are converted to line feeds.
-// http://www.whatwg.org/specs/web-apps/current-work/multipage/infrastructure.html#decoded-as-utf-8,-with-error-handling
+// https://encoding.spec.whatwg.org/#utf-8-decode
 //
-// Also, we want to keep track of source positions for error handling.  As a
+// Also, we want to keep track of source positions for error handling. As a
 // result, we fold all that functionality into this decoder, and can't use an
 // off-the-shelf library.
 //
 // This header is internal-only, which is why we prefix functions with only
 // utf8_ or utf8_iterator_ instead of gumbo_utf8_.
 
-#ifndef GUMBO_UTF8_H_
-#define GUMBO_UTF8_H_
-
 #include <stdbool.h>
 #include <stddef.h>
 
 #include "gumbo.h"
+#include "macros.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -51,7 +36,7 @@ typedef struct GumboInternalUtf8Iterator {
   // Points at the start of the code point most recently read into 'current'.
   const char* _start;
 
-  // Points at the mark.  The mark is initially set to the beginning of the
+  // Points at the mark. The mark is initially set to the beginning of the
   // input.
   const char* _mark;
 
@@ -62,7 +47,7 @@ typedef struct GumboInternalUtf8Iterator {
   int _current;
 
   // The width in bytes of the current code point.
-  int _width;
+  size_t _width;
 
   // The SourcePosition for the current location.
   GumboSourcePosition _pos;
@@ -77,12 +62,16 @@ typedef struct GumboInternalUtf8Iterator {
 
 // Returns true if this Unicode code point is in the list of characters
 // forbidden by the HTML5 spec, such as NUL bytes and undefined control chars.
-bool utf8_is_invalid_code_point(int c);
+bool utf8_is_invalid_code_point(int c) CONST_FN;
 
-// Initializes a new Utf8Iterator from the given byte buffer.  The source does
+// Initializes a new Utf8Iterator from the given byte buffer. The source does
 // not have to be NUL-terminated, but the length must be passed in explicitly.
-void utf8iterator_init(struct GumboInternalParser* parser, const char* source,
-    size_t source_length, Utf8Iterator* iter);
+void utf8iterator_init (
+  struct GumboInternalParser* parser,
+  const char* source,
+  size_t source_length,
+  Utf8Iterator* iter
+);
 
 // Advances the current position by one code point.
 void utf8iterator_next(Utf8Iterator* iter);
@@ -97,23 +86,27 @@ void utf8iterator_get_position(
 // Retrieves a character pointer to the start of the current character.
 const char* utf8iterator_get_char_pointer(const Utf8Iterator* iter);
 
-// Retrieves a character pointer to 1 past the end of the buffer.  This is
+// Retrieves a character pointer to 1 past the end of the buffer. This is
 // necessary for certain state machines and string comparisons that would like
 // to look directly for ASCII text in the buffer without going through the
 // decoder.
 const char* utf8iterator_get_end_pointer(const Utf8Iterator* iter);
 
 // If the upcoming text in the buffer matches the specified prefix (which has
-// length 'length'), consume it and return true.  Otherwise, return false with
-// no other effects.  If the length of the string would overflow the buffer,
-// this returns false.  Note that prefix should not contain null bytes because
-// of the use of strncmp/strncasecmp internally.  All existing use-cases adhere
+// length 'length'), consume it and return true. Otherwise, return false with
+// no other effects. If the length of the string would overflow the buffer,
+// this returns false. Note that prefix should not contain null bytes because
+// of the use of strncmp/strncasecmp internally. All existing use-cases adhere
 // to this.
-bool utf8iterator_maybe_consume_match(
-    Utf8Iterator* iter, const char* prefix, size_t length, bool case_sensitive);
+bool utf8iterator_maybe_consume_match (
+  Utf8Iterator* iter,
+  const char* prefix,
+  size_t length,
+  bool case_sensitive
+);
 
 // "Marks" a particular location of interest in the input stream, so that it can
-// later be reset() to.  There's also the ability to record an error at the
+// later be reset() to. There's also the ability to record an error at the
 // point that was marked, as oftentimes that's more useful than the last
 // character before the error was detected.
 void utf8iterator_mark(Utf8Iterator* iter);
@@ -123,10 +116,13 @@ void utf8iterator_reset(Utf8Iterator* iter);
 
 // Sets the position and original text fields of an error to the value at the
 // mark.
-void utf8iterator_fill_error_at_mark(
-    Utf8Iterator* iter, struct GumboInternalError* error);
+void utf8iterator_fill_error_at_mark (
+  Utf8Iterator* iter,
+  struct GumboInternalError* error
+);
 
 #ifdef __cplusplus
 }
 #endif
-#endif  // GUMBO_UTF8_H_
+
+#endif // GUMBO_UTF8_H_
diff --git a/gumbo-parser/src/util.c b/gumbo-parser/src/util.c
index 5a24c115..5af20524 100644
--- a/gumbo-parser/src/util.c
+++ b/gumbo-parser/src/util.c
@@ -1,58 +1,68 @@
-// Copyright 2010 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// Author: jdtang@google.com (Jonathan Tang)
+/*
+ Copyright 2017-2018 Craig Barnes.
+ Copyright 2010 Google Inc.
 
-#include "util.h"
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+    https://www.apache.org/licenses/LICENSE-2.0
 
-#include <assert.h>
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <strings.h>
-#include <stdarg.h>
-#include <stdio.h>
-
+#include "util.h"
 #include "gumbo.h"
-#include "parser.h"
 
-// TODO(jdtang): This should be elsewhere, but there's no .c file for
-// SourcePositions and yet the constant needs some linkage, so this is as good
-// as any.
-const GumboSourcePosition kGumboEmptySourcePosition = {0, 0, 0};
+void* gumbo_alloc(size_t size) {
+  void* ptr = malloc(size);
+  if (unlikely(ptr == NULL)) {
+    perror(__func__);
+    abort();
+  }
+  return ptr;
+}
 
-void* gumbo_parser_allocate(GumboParser* parser, size_t num_bytes) {
-  return parser->_options->allocator(parser->_options->userdata, num_bytes);
+void* gumbo_realloc(void* ptr, size_t size) {
+  ptr = realloc(ptr, size);
+  if (unlikely(ptr == NULL)) {
+    perror(__func__);
+    abort();
+  }
+  return ptr;
 }
 
-void gumbo_parser_deallocate(GumboParser* parser, void* ptr) {
-  parser->_options->deallocator(parser->_options->userdata, ptr);
+void gumbo_free(void* ptr) {
+  free(ptr);
 }
 
-char* gumbo_copy_stringz(GumboParser* parser, const char* str) {
-  char* buffer = gumbo_parser_allocate(parser, strlen(str) + 1);
-  strcpy(buffer, str);
-  return buffer;
+char* gumbo_strdup(const char* str) {
+  const size_t size = strlen(str) + 1;
+  // The strdup(3) function isn't available in strict "-std=c99" mode
+  // (it's part of POSIX, not C99), so use malloc(3) and memcpy(3)
+  // instead:
+  char* buffer = gumbo_alloc(size);
+  return memcpy(buffer, str, size);
 }
 
-// Debug function to trace operation of the parser.  Pass --copts=-DGUMBO_DEBUG
-// to use.
-void gumbo_debug(const char* format, ...) {
 #ifdef GUMBO_DEBUG
+#include <stdarg.h>
+// Debug function to trace operation of the parser
+// (define GUMBO_DEBUG to use).
+void gumbo_debug(const char* format, ...) {
   va_list args;
   va_start(args, format);
   vprintf(format, args);
   va_end(args);
   fflush(stdout);
-#endif
 }
+#else
+void gumbo_debug(const char* UNUSED_ARG(format), ...) {}
+#endif
diff --git a/gumbo-parser/src/util.h b/gumbo-parser/src/util.h
index 6ad65649..dfdf465b 100644
--- a/gumbo-parser/src/util.h
+++ b/gumbo-parser/src/util.h
@@ -1,60 +1,30 @@
-// Copyright 2010 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// Author: jdtang@google.com (Jonathan Tang)
-//
-// This contains some utility functions that didn't fit into any of the other
-// headers.
-
 #ifndef GUMBO_UTIL_H_
 #define GUMBO_UTIL_H_
-#ifdef _MSC_VER
-#define _CRT_SECURE_NO_WARNINGS
-#endif
+
 #include <stdbool.h>
 #include <stddef.h>
+#include "macros.h"
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-// Forward declaration since it's passed into some of the functions in this
-// header.
-struct GumboInternalParser;
-
 // Utility function for allocating & copying a null-terminated string into a
-// freshly-allocated buffer.  This is necessary for proper memory management; we
+// freshly-allocated buffer. This is necessary for proper memory management; we
 // have the convention that all const char* in parse tree structures are
 // freshly-allocated, so if we didn't copy, we'd try to delete a literal string
 // when the parse tree is destroyed.
-char* gumbo_copy_stringz(struct GumboInternalParser* parser, const char* str);
-
-// Allocate a chunk of memory, using the allocator specified in the Parser's
-// config options.
-void* gumbo_parser_allocate(
-    struct GumboInternalParser* parser, size_t num_bytes);
+char* gumbo_strdup(const char* str) XMALLOC NONNULL_ARGS;
 
-// Deallocate a chunk of memory, using the deallocator specified in the Parser's
-// config options.
-void gumbo_parser_deallocate(struct GumboInternalParser* parser, void* ptr);
+void* gumbo_alloc(size_t size) XMALLOC;
+void* gumbo_realloc(void* ptr, size_t size) RETURNS_NONNULL;
+void gumbo_free(void* ptr);
 
-// Debug wrapper for printf, to make it easier to turn off debugging info when
-// required.
-void gumbo_debug(const char* format, ...);
+// Debug wrapper for printf
+void gumbo_debug(const char* format, ...) PRINTF(1);
 
 #ifdef __cplusplus
 }
 #endif
 
-#endif  // GUMBO_UTIL_H_
+#endif // GUMBO_UTIL_H_
diff --git a/gumbo-parser/src/vector.c b/gumbo-parser/src/vector.c
index 51758dfe..d4dfa2af 100644
--- a/gumbo-parser/src/vector.c
+++ b/gumbo-parser/src/vector.c
@@ -1,81 +1,70 @@
-// Copyright 2010 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// Author: jdtang@google.com (Jonathan Tang)
+/*
+ Copyright 2018 Craig Barnes.
+ Copyright 2010 Google Inc.
 
-#include "vector.h"
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+    https://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
 
 #include <assert.h>
 #include <stdlib.h>
 #include <string.h>
-#include <strings.h>
-
+#include "vector.h"
 #include "util.h"
 
-struct GumboInternalParser;
-
-const GumboVector kGumboEmptyVector = {NULL, 0, 0};
+const GumboVector kGumboEmptyVector = { \
+  .data = NULL, \
+  .length = 0, \
+  .capacity = 0 \
+};
 
-void gumbo_vector_init(struct GumboInternalParser* parser,
-    size_t initial_capacity, GumboVector* vector) {
+void gumbo_vector_init(unsigned int initial_capacity, GumboVector* vector) {
   vector->length = 0;
   vector->capacity = initial_capacity;
   if (initial_capacity > 0) {
-    vector->data =
-        gumbo_parser_allocate(parser, sizeof(void*) * initial_capacity);
+    vector->data = gumbo_alloc(sizeof(void*) * initial_capacity);
   } else {
     vector->data = NULL;
   }
 }
 
-void gumbo_vector_destroy(
-    struct GumboInternalParser* parser, GumboVector* vector) {
+void gumbo_vector_destroy(GumboVector* vector) {
   if (vector->capacity > 0) {
-    gumbo_parser_deallocate(parser, vector->data);
+    gumbo_free(vector->data);
   }
 }
 
-static void enlarge_vector_if_full(
-    struct GumboInternalParser* parser, GumboVector* vector) {
+static void enlarge_vector_if_full(GumboVector* vector) {
   if (vector->length >= vector->capacity) {
     if (vector->capacity) {
-      size_t old_num_bytes = sizeof(void*) * vector->capacity;
       vector->capacity *= 2;
       size_t num_bytes = sizeof(void*) * vector->capacity;
-      void** temp = gumbo_parser_allocate(parser, num_bytes);
-      memcpy(temp, vector->data, old_num_bytes);
-      gumbo_parser_deallocate(parser, vector->data);
-      vector->data = temp;
+      vector->data = gumbo_realloc(vector->data, num_bytes);
     } else {
       // 0-capacity vector; no previous array to deallocate.
       vector->capacity = 2;
-      vector->data =
-          gumbo_parser_allocate(parser, sizeof(void*) * vector->capacity);
+      vector->data = gumbo_alloc(sizeof(void*) * vector->capacity);
     }
   }
 }
 
-void gumbo_vector_add(
-    struct GumboInternalParser* parser, void* element, GumboVector* vector) {
-  enlarge_vector_if_full(parser, vector);
+void gumbo_vector_add(void* element, GumboVector* vector) {
+  enlarge_vector_if_full(vector);
   assert(vector->data);
   assert(vector->length < vector->capacity);
   vector->data[vector->length++] = element;
 }
 
-void* gumbo_vector_pop(
-    struct GumboInternalParser* parser, GumboVector* vector) {
+void* gumbo_vector_pop(GumboVector* vector) {
   if (vector->length == 0) {
     return NULL;
   }
@@ -91,33 +80,38 @@ int gumbo_vector_index_of(GumboVector* vector, const void* element) {
   return -1;
 }
 
-void gumbo_vector_insert_at(struct GumboInternalParser* parser, void* element,
-    unsigned int index, GumboVector* vector) {
-  assert(index >= 0);
+void gumbo_vector_insert_at (
+  void* element,
+  unsigned int index,
+  GumboVector* vector
+) {
   assert(index <= vector->length);
-  enlarge_vector_if_full(parser, vector);
+  enlarge_vector_if_full(vector);
   ++vector->length;
-  memmove(&vector->data[index + 1], &vector->data[index],
-      sizeof(void*) * (vector->length - index - 1));
+  memmove (
+    &vector->data[index + 1],
+    &vector->data[index],
+    sizeof(void*) * (vector->length - index - 1)
+  );
   vector->data[index] = element;
 }
 
-void gumbo_vector_remove(
-    struct GumboInternalParser* parser, void* node, GumboVector* vector) {
+void gumbo_vector_remove(void* node, GumboVector* vector) {
   int index = gumbo_vector_index_of(vector, node);
   if (index == -1) {
     return;
   }
-  gumbo_vector_remove_at(parser, index, vector);
+  gumbo_vector_remove_at(index, vector);
 }
 
-void* gumbo_vector_remove_at(struct GumboInternalParser* parser,
-    unsigned int index, GumboVector* vector) {
-  assert(index >= 0);
+void* gumbo_vector_remove_at(unsigned int index, GumboVector* vector) {
   assert(index < vector->length);
   void* result = vector->data[index];
-  memmove(&vector->data[index], &vector->data[index + 1],
-      sizeof(void*) * (vector->length - index - 1));
+  memmove (
+    &vector->data[index],
+    &vector->data[index + 1],
+    sizeof(void*) * (vector->length - index - 1)
+  );
   --vector->length;
   return result;
 }
diff --git a/gumbo-parser/src/vector.h b/gumbo-parser/src/vector.h
index 70fe6fa6..5e164de3 100644
--- a/gumbo-parser/src/vector.h
+++ b/gumbo-parser/src/vector.h
@@ -1,19 +1,3 @@
-// Copyright 2010 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// Author: jdtang@google.com (Jonathan Tang)
-
 #ifndef GUMBO_VECTOR_H_
 #define GUMBO_VECTOR_H_
 
@@ -23,45 +7,39 @@
 extern "C" {
 #endif
 
-// Forward declaration since it's passed into some of the functions in this
-// header.
-struct GumboInternalParser;
-
 // Initializes a new GumboVector with the specified initial capacity.
-void gumbo_vector_init(struct GumboInternalParser* parser,
-    size_t initial_capacity, GumboVector* vector);
+void gumbo_vector_init(unsigned int initial_capacity, GumboVector* vector);
 
-// Frees the memory used by an GumboVector.  Does not free the contained
+// Frees the memory used by a GumboVector. Does not free the contained
 // pointers.
-void gumbo_vector_destroy(
-    struct GumboInternalParser* parser, GumboVector* vector);
+void gumbo_vector_destroy(GumboVector* vector);
 
-// Adds a new element to an GumboVector.
-void gumbo_vector_add(
-    struct GumboInternalParser* parser, void* element, GumboVector* vector);
+// Adds a new element to a GumboVector.
+void gumbo_vector_add(void* element, GumboVector* vector);
 
 // Removes and returns the element most recently added to the GumboVector.
-// Ownership is transferred to caller.  Capacity is unchanged.  If the vector is
+// Ownership is transferred to caller. Capacity is unchanged. If the vector is
 // empty, NULL is returned.
-void* gumbo_vector_pop(struct GumboInternalParser* parser, GumboVector* vector);
+void* gumbo_vector_pop(GumboVector* vector);
 
-// Inserts an element at a specific index.  This is potentially O(N) time, but
+// Inserts an element at a specific index. This is potentially O(N) time, but
 // is necessary for some of the spec's behavior.
-void gumbo_vector_insert_at(struct GumboInternalParser* parser, void* element,
-    unsigned int index, GumboVector* vector);
+void gumbo_vector_insert_at (
+  void* element,
+  unsigned int index,
+  GumboVector* vector
+);
 
 // Removes an element from the vector, or does nothing if the element is not in
 // the vector.
-void gumbo_vector_remove(
-    struct GumboInternalParser* parser, void* element, GumboVector* vector);
+void gumbo_vector_remove(void* element, GumboVector* vector);
 
-// Removes and returns an element at a specific index.  Note that this is
+// Removes and returns an element at a specific index. Note that this is
 // potentially O(N) time and should be used sparingly.
-void* gumbo_vector_remove_at(struct GumboInternalParser* parser,
-    unsigned int index, GumboVector* vector);
+void* gumbo_vector_remove_at(unsigned int index, GumboVector* vector);
 
 #ifdef __cplusplus
 }
 #endif
 
-#endif  // GUMBO_VECTOR_H_
+#endif // GUMBO_VECTOR_H_
diff --git a/gumbo-parser/test/attribute.cc b/gumbo-parser/test/attribute.cc
new file mode 100644
index 00000000..e55734aa
--- /dev/null
+++ b/gumbo-parser/test/attribute.cc
@@ -0,0 +1,49 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: jdtang@google.com (Jonathan Tang)
+
+#include "attribute.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "gtest/gtest.h"
+#include "test_utils.h"
+#include "vector.h"
+
+namespace {
+
+class GumboAttributeTest : public GumboTest {
+ protected:
+  GumboAttributeTest() { gumbo_vector_init(2, &vector_); }
+
+  ~GumboAttributeTest() { gumbo_vector_destroy(&vector_); }
+
+  GumboVector vector_;
+};
+
+TEST_F(GumboAttributeTest, GetAttribute) {
+  GumboAttribute attr1;
+  GumboAttribute attr2;
+  attr1.name = "";
+  attr2.name = "foo";
+
+  gumbo_vector_add(&attr1, &vector_);
+  gumbo_vector_add(&attr2, &vector_);
+  EXPECT_EQ(&attr2, gumbo_get_attribute(&vector_, "foo"));
+  EXPECT_EQ(NULL, gumbo_get_attribute(&vector_, "bar"));
+}
+
+}  // namespace
diff --git a/gumbo-parser/test/char_ref.cc b/gumbo-parser/test/char_ref.cc
new file mode 100644
index 00000000..de3cf06a
--- /dev/null
+++ b/gumbo-parser/test/char_ref.cc
@@ -0,0 +1,172 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: jdtang@google.com (Jonathan Tang)
+//
+// GUnit char_ref tests. These are quick smoke tests, mostly to identify
+// crashing bugs so that they can be fixed without having to debug
+// multi-language tests. As such, they focus on coverage rather than
+// completeness. For testing the full spec, use char_ref_py_tests, which share
+// their testdata with the Python html5lib library.
+
+#include "char_ref.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#include "gtest/gtest.h"
+#include "test_utils.h"
+#include "utf8.h"
+
+namespace {
+
+class CharRefTest : public GumboTest {
+ protected:
+  bool ConsumeCharRef(const char* input) {
+    return ConsumeCharRef(input, ' ', false);
+  }
+
+  bool ConsumeCharRef(
+      const char* input, int additional_allowed_char, bool is_in_attribute) {
+    text_ = input;
+    utf8iterator_init(&parser_, input, strlen(input), &iter_);
+    bool result = gumbo_consume_char_ref(
+        &parser_, &iter_, additional_allowed_char, is_in_attribute, &output_);
+    fflush(stdout);
+    return result;
+  }
+
+  Utf8Iterator iter_;
+  OneOrTwoCodepoints output_;
+};
+
+TEST_F(CharRefTest, Whitespace) {
+  EXPECT_TRUE(ConsumeCharRef(" &nbsp;"));
+  EXPECT_EQ(kGumboNoChar, output_.first);
+  EXPECT_EQ(kGumboNoChar, output_.second);
+}
+
+TEST_F(CharRefTest, NumericHex) {
+  EXPECT_TRUE(ConsumeCharRef("&#x12ab;"));
+  EXPECT_EQ(0x12ab, output_.first);
+  EXPECT_EQ(kGumboNoChar, output_.second);
+}
+
+TEST_F(CharRefTest, NumericDecimal) {
+  EXPECT_TRUE(ConsumeCharRef("&#1234;"));
+  EXPECT_EQ(1234, output_.first);
+  EXPECT_EQ(kGumboNoChar, output_.second);
+}
+
+TEST_F(CharRefTest, NumericInvalidDigit) {
+  errors_are_expected_ = true;
+  EXPECT_FALSE(ConsumeCharRef("&#google"));
+  EXPECT_EQ(kGumboNoChar, output_.first);
+  EXPECT_EQ(kGumboNoChar, output_.second);
+  EXPECT_EQ('&', utf8iterator_current(&iter_));
+}
+
+TEST_F(CharRefTest, NumericNoSemicolon) {
+  errors_are_expected_ = true;
+  EXPECT_FALSE(ConsumeCharRef("&#1234google"));
+  EXPECT_EQ(1234, output_.first);
+  EXPECT_EQ(kGumboNoChar, output_.second);
+  EXPECT_EQ('g', utf8iterator_current(&iter_));
+}
+
+TEST_F(CharRefTest, NumericReplacement) {
+  errors_are_expected_ = true;
+  EXPECT_FALSE(ConsumeCharRef("&#X82"));
+  // Low quotation mark character.
+  EXPECT_EQ(0x201A, output_.first);
+  EXPECT_EQ(kGumboNoChar, output_.second);
+}
+
+TEST_F(CharRefTest, NumericInvalid) {
+  errors_are_expected_ = true;
+  EXPECT_FALSE(ConsumeCharRef("&#xDA00"));
+  EXPECT_EQ(0xFFFD, output_.first);
+  EXPECT_EQ(kGumboNoChar, output_.second);
+}
+
+TEST_F(CharRefTest, NumericUtfInvalid) {
+  errors_are_expected_ = true;
+  EXPECT_FALSE(ConsumeCharRef("&#x007"));
+  EXPECT_EQ(0x7, output_.first);
+  EXPECT_EQ(kGumboNoChar, output_.second);
+}
+
+TEST_F(CharRefTest, NamedReplacement) {
+  EXPECT_TRUE(ConsumeCharRef("&lt;"));
+  EXPECT_EQ('<', output_.first);
+  EXPECT_EQ(kGumboNoChar, output_.second);
+}
+
+TEST_F(CharRefTest, NamedReplacementNoSemicolon) {
+  errors_are_expected_ = true;
+  EXPECT_FALSE(ConsumeCharRef("&gt"));
+  EXPECT_EQ('>', output_.first);
+  EXPECT_EQ(kGumboNoChar, output_.second);
+}
+
+TEST_F(CharRefTest, NamedReplacementWithInvalidUtf8) {
+  errors_are_expected_ = true;
+  EXPECT_TRUE(ConsumeCharRef("&\xc3\xa5"));
+  EXPECT_EQ(kGumboNoChar, output_.first);
+  EXPECT_EQ(kGumboNoChar, output_.second);
+}
+
+TEST_F(CharRefTest, NamedReplacementInvalid) {
+  errors_are_expected_ = true;
+  EXPECT_FALSE(ConsumeCharRef("&google;"));
+  EXPECT_EQ(kGumboNoChar, output_.first);
+  EXPECT_EQ(kGumboNoChar, output_.second);
+  EXPECT_EQ('&', utf8iterator_current(&iter_));
+}
+
+// TEST_F(CharRefTest, NamedReplacementInvalidNoSemicolon) {
+//  EXPECT_FALSE(ConsumeCharRef("&google"));
+//  EXPECT_EQ(kGumboNoChar, output_.first);
+//  EXPECT_EQ(kGumboNoChar, output_.second);
+//  EXPECT_EQ('&', utf8iterator_current(&iter_));
+//}
+
+TEST_F(CharRefTest, AdditionalAllowedChar) {
+  EXPECT_TRUE(ConsumeCharRef("&\"", '"', false));
+  EXPECT_EQ(kGumboNoChar, output_.first);
+  EXPECT_EQ(kGumboNoChar, output_.second);
+  EXPECT_EQ('&', utf8iterator_current(&iter_));
+}
+
+TEST_F(CharRefTest, InAttribute) {
+  EXPECT_TRUE(ConsumeCharRef("&noted", ' ', true));
+  EXPECT_EQ(kGumboNoChar, output_.first);
+  EXPECT_EQ(kGumboNoChar, output_.second);
+  EXPECT_EQ('&', utf8iterator_current(&iter_));
+}
+
+TEST_F(CharRefTest, MultiChars) {
+  EXPECT_TRUE(ConsumeCharRef("&notindot;"));
+  EXPECT_EQ(0x22F5, output_.first);
+  EXPECT_EQ(0x0338, output_.second);
+}
+
+TEST_F(CharRefTest, CharAfter) {
+  EXPECT_TRUE(ConsumeCharRef("&lt;x"));
+  EXPECT_EQ('<', output_.first);
+  EXPECT_EQ(kGumboNoChar, output_.second);
+  EXPECT_EQ('x', utf8iterator_current(&iter_));
+}
+
+}  // namespace
diff --git a/gumbo-parser/test/parser.cc b/gumbo-parser/test/parser.cc
new file mode 100644
index 00000000..d85e286a
--- /dev/null
+++ b/gumbo-parser/test/parser.cc
@@ -0,0 +1,1996 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: jdtang@google.com (Jonathan Tang)
+
+#include <string>
+#include "gumbo.h"
+#include "gtest/gtest.h"
+#include "test_utils.h"
+
+namespace {
+
+class GumboParserTest : public ::testing::Test {
+ protected:
+  GumboParserTest()
+    : options_(kGumboDefaultOptions), output_(NULL), root_(NULL)
+  {}
+
+  virtual ~GumboParserTest() {
+    if (output_) {
+      gumbo_destroy_output(output_);
+    }
+  }
+
+  virtual void Parse(const char* input) {
+    if (output_) {
+      gumbo_destroy_output(output_);
+    }
+
+    output_ = gumbo_parse_with_options(&options_, input, strlen(input));
+    // The naming inconsistency is because these tests were initially written
+    // when gumbo_parse returned the document element instead of an GumboOutput
+    // structure.
+    root_ = output_->document;
+  }
+
+  virtual void ParseFragment(
+      const char* input, GumboTag context, GumboNamespaceEnum context_ns) {
+    if (output_) {
+      gumbo_destroy_output(output_);
+    }
+
+    options_.fragment_context = context;
+    options_.fragment_namespace = context_ns;
+    output_ = gumbo_parse_with_options(&options_, input, strlen(input));
+    root_ = output_->document;
+  }
+
+  virtual void Parse(const std::string& input) {
+    // This overload is so we can test/demonstrate that computing offsets from
+    // the .data() member of an STL string works properly.
+    if (output_) {
+      gumbo_destroy_output(output_);
+    }
+
+    output_ = gumbo_parse_with_options(&options_, input.data(), input.length());
+    root_ = output_->document;
+    SanityCheckPointers(input.data(), input.length(), output_->root, 1000);
+  }
+
+  GumboOptions options_;
+  GumboOutput* output_;
+  GumboNode* root_;
+};
+
+TEST_F(GumboParserTest, NullDocument) {
+  Parse("");
+  ASSERT_TRUE(root_);
+  ASSERT_EQ(GUMBO_NODE_DOCUMENT, root_->type);
+  EXPECT_EQ(GUMBO_INSERTION_BY_PARSER, root_->parse_flags);
+
+  GumboNode* body;
+  GetAndAssertBody(root_, &body);
+}
+
+TEST_F(GumboParserTest, ParseTwice) {
+  Parse("");
+  ASSERT_TRUE(root_);
+  ASSERT_EQ(GUMBO_NODE_DOCUMENT, root_->type);
+
+  std::string second_input("");
+  Parse(second_input);
+  ASSERT_TRUE(root_);
+  ASSERT_EQ(GUMBO_NODE_DOCUMENT, root_->type);
+
+  GumboNode* body;
+  GetAndAssertBody(root_, &body);
+}
+
+TEST_F(GumboParserTest, OneChar) {
+  std::string input("T");
+  Parse(input);
+  ASSERT_TRUE(root_);
+  ASSERT_EQ(GUMBO_NODE_DOCUMENT, root_->type);
+  EXPECT_EQ(GUMBO_INSERTION_BY_PARSER, root_->parse_flags);
+  ASSERT_EQ(1, GetChildCount(root_));
+
+  GumboNode* html = GetChild(root_, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, html->type);
+  EXPECT_TRUE(html->parse_flags & GUMBO_INSERTION_BY_PARSER);
+  EXPECT_TRUE(html->parse_flags & GUMBO_INSERTION_IMPLICIT_END_TAG);
+  EXPECT_TRUE(html->parse_flags & GUMBO_INSERTION_IMPLIED);
+  EXPECT_EQ(GUMBO_TAG_HTML, html->v.element.tag);
+  ASSERT_EQ(2, GetChildCount(html));
+
+  GumboNode* head = GetChild(html, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, head->type);
+  EXPECT_EQ(GUMBO_TAG_HEAD, head->v.element.tag);
+  EXPECT_EQ(0, GetChildCount(head));
+
+  GumboNode* body = GetChild(html, 1);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, body->type);
+  EXPECT_EQ(GUMBO_TAG_BODY, body->v.element.tag);
+  ASSERT_EQ(1, GetChildCount(body));
+  EXPECT_EQ(1, body->v.element.start_pos.line);
+  EXPECT_EQ(1, body->v.element.start_pos.column);
+  EXPECT_EQ(0, body->v.element.start_pos.offset);
+  EXPECT_EQ(1, body->v.element.end_pos.line);
+  EXPECT_EQ(2, body->v.element.end_pos.column);
+  EXPECT_EQ(1, body->v.element.end_pos.offset);
+
+  GumboNode* text = GetChild(body, 0);
+  ASSERT_EQ(GUMBO_NODE_TEXT, text->type);
+  EXPECT_STREQ("T", text->v.text.text);
+  EXPECT_EQ(1, text->v.text.start_pos.line);
+  EXPECT_EQ(1, text->v.text.start_pos.column);
+  EXPECT_EQ(0, text->v.text.start_pos.offset);
+  EXPECT_EQ(input.data(), text->v.text.original_text.data);
+  EXPECT_EQ(1, text->v.text.original_text.length);
+}
+
+TEST_F(GumboParserTest, TextOnly) {
+  Parse("Test");
+  EXPECT_EQ(1, output_->errors.length);  // No doctype.
+  ASSERT_EQ(1, GetChildCount(root_));
+
+  GumboNode* html = GetChild(root_, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, html->type);
+  EXPECT_EQ(GUMBO_TAG_HTML, html->v.element.tag);
+  ASSERT_EQ(2, GetChildCount(html));
+
+  GumboNode* head = GetChild(html, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, head->type);
+  EXPECT_EQ(GUMBO_TAG_HEAD, head->v.element.tag);
+  EXPECT_EQ(0, GetChildCount(head));
+
+  GumboNode* body = GetChild(html, 1);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, body->type);
+  EXPECT_EQ(GUMBO_TAG_BODY, body->v.element.tag);
+  ASSERT_EQ(1, GetChildCount(body));
+
+  GumboNode* text = GetChild(body, 0);
+  ASSERT_EQ(GUMBO_NODE_TEXT, text->type);
+  EXPECT_STREQ("Test", text->v.text.text);
+}
+
+TEST_F(GumboParserTest, SelfClosingTagError) {
+  Parse("<div/>");
+  // No DOCTYPE
+  // Tag cannot be self-closing
+  // EOF with div still open
+  EXPECT_EQ(3, output_->errors.length);
+}
+
+TEST_F(GumboParserTest, SelfClosingTagWithComplexProcessing) {
+  Parse("");
+  ASSERT_EQ(1, output_->errors.length); // No doctype.
+  ASSERT_EQ(1, GetChildCount(root_));
+
+  GumboNode* html = GetChild(root_, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, html->type);
+  EXPECT_EQ(GUMBO_TAG_HTML, html->v.element.tag);
+  ASSERT_EQ(2, GetChildCount(html));
+
+  GumboNode* head = GetChild(html, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, head->type);
+  EXPECT_EQ(GUMBO_TAG_HEAD, head->v.element.tag);
+  EXPECT_EQ(0, GetChildCount(head));
+
+  GumboNode* body = GetChild(html, 1);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, body->type);
+  EXPECT_EQ(GUMBO_TAG_BODY, body->v.element.tag);
+  ASSERT_EQ(1, GetChildCount(body));
+
+  GumboNode* br = GetChild(body, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, br->type);
+  EXPECT_EQ(GUMBO_TAG_BR, br->v.element.tag);
+}
+
+TEST_F(GumboParserTest, UnexpectedEndBreak) {
+  Parse("</br><div></div>");
+
+  GumboNode* body;
+  GetAndAssertBody(root_, &body);
+  ASSERT_EQ(2, GetChildCount(body));
+
+  GumboNode* br = GetChild(body, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, br->type);
+  EXPECT_EQ(GUMBO_TAG_BR, br->v.element.tag);
+  ASSERT_EQ(0, GetChildCount(br));
+
+  GumboNode* div = GetChild(body, 1);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, div->type);
+  EXPECT_EQ(GUMBO_TAG_DIV, div->v.element.tag);
+  ASSERT_EQ(0, GetChildCount(div));
+}
+
+TEST_F(GumboParserTest, CaseSensitiveAttributes) {
+  Parse("<div class=CamelCase>");
+  GumboNode* body;
+  GetAndAssertBody(root_, &body);
+  ASSERT_EQ(1, GetChildCount(body));
+
+  GumboNode* div = GetChild(body, 0);
+  GumboVector* attributes = &div->v.element.attributes;
+  ASSERT_EQ(1, attributes->length);
+
+  GumboAttribute* clas = static_cast<GumboAttribute*>(attributes->data[0]);
+  EXPECT_EQ(GUMBO_ATTR_NAMESPACE_NONE, clas->attr_namespace);
+  EXPECT_STREQ("class", clas->name);
+  EXPECT_STREQ("CamelCase", clas->value);
+}
+
+TEST_F(GumboParserTest, ExplicitHtmlStructure) {
+  Parse(
+      "<!doctype html>\n<html>"
+      "<head><title>Foo</title></head>\n"
+      "<body><div class=bar>Test</div></body></html>");
+  ASSERT_EQ(1, GetChildCount(root_));
+  EXPECT_EQ(0, output_->errors.length);
+
+  ASSERT_EQ(GUMBO_NODE_DOCUMENT, root_->type);
+  EXPECT_STREQ("html", root_->v.document.name);
+  EXPECT_STREQ("", root_->v.document.public_identifier);
+  EXPECT_STREQ("", root_->v.document.system_identifier);
+
+  GumboNode* html = GetChild(root_, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, html->type);
+  EXPECT_EQ(GUMBO_INSERTION_NORMAL, html->parse_flags);
+  EXPECT_EQ(GUMBO_TAG_HTML, html->v.element.tag);
+  EXPECT_EQ(2, html->v.element.start_pos.line);
+  EXPECT_EQ(1, html->v.element.start_pos.column);
+  EXPECT_EQ(16, html->v.element.start_pos.offset);
+  EXPECT_EQ(3, html->v.element.end_pos.line);
+  EXPECT_EQ(39, html->v.element.end_pos.column);
+  EXPECT_EQ(92, html->v.element.end_pos.offset);
+  EXPECT_EQ("<html>", ToString(html->v.element.original_tag));
+  EXPECT_EQ("</html>", ToString(html->v.element.original_end_tag));
+  ASSERT_EQ(3, GetChildCount(html));
+
+  GumboNode* head = GetChild(html, 0);
+  EXPECT_EQ(GUMBO_INSERTION_NORMAL, head->parse_flags);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, head->type);
+  EXPECT_EQ(GUMBO_TAG_HEAD, head->v.element.tag);
+  EXPECT_EQ(html, head->parent);
+  EXPECT_EQ(0, head->index_within_parent);
+  EXPECT_EQ(1, GetChildCount(head));
+
+  GumboNode* body = GetChild(html, 2);
+  EXPECT_EQ(GUMBO_INSERTION_NORMAL, body->parse_flags);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, body->type);
+  EXPECT_EQ(GUMBO_TAG_BODY, body->v.element.tag);
+  EXPECT_EQ(html, body->parent);
+  EXPECT_EQ(3, body->v.element.start_pos.line);
+  EXPECT_EQ(1, body->v.element.start_pos.column);
+  EXPECT_EQ(54, body->v.element.start_pos.offset);
+  EXPECT_EQ(3, body->v.element.end_pos.line);
+  EXPECT_EQ(32, body->v.element.end_pos.column);
+  EXPECT_EQ(85, body->v.element.end_pos.offset);
+  EXPECT_EQ("<body>", ToString(body->v.element.original_tag));
+  EXPECT_EQ("</body>", ToString(body->v.element.original_end_tag));
+  EXPECT_EQ(2, body->index_within_parent);
+  ASSERT_EQ(1, GetChildCount(body));
+
+  GumboNode* div = GetChild(body, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, div->type);
+  EXPECT_EQ(GUMBO_TAG_DIV, div->v.element.tag);
+  EXPECT_EQ(body, div->parent);
+  EXPECT_EQ(0, div->index_within_parent);
+  ASSERT_EQ(1, GetChildCount(div));
+
+  ASSERT_EQ(1, GetAttributeCount(div));
+  GumboAttribute* clas = GetAttribute(div, 0);
+  EXPECT_STREQ("class", clas->name);
+  EXPECT_EQ("class", ToString(clas->original_name));
+  EXPECT_STREQ("bar", clas->value);
+  EXPECT_EQ("bar", ToString(clas->original_value));
+
+  GumboNode* text = GetChild(div, 0);
+  ASSERT_EQ(GUMBO_NODE_TEXT, text->type);
+  EXPECT_STREQ("Test", text->v.text.text);
+}
+
+TEST_F(GumboParserTest, Whitespace) {
+  Parse("<ul>\n  <li>Text\n</ul>");
+
+  GumboNode* body;
+  GetAndAssertBody(root_, &body);
+  ASSERT_EQ(1, GetChildCount(body));
+
+  GumboNode* ul = GetChild(body, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, ul->type);
+  EXPECT_EQ(GUMBO_TAG_UL, ul->v.element.tag);
+  ASSERT_EQ(2, GetChildCount(ul));
+
+  GumboNode* whitespace = GetChild(ul, 0);
+  ASSERT_EQ(GUMBO_NODE_WHITESPACE, whitespace->type);
+  EXPECT_STREQ("\n  ", whitespace->v.text.text);
+
+  GumboNode* li = GetChild(ul, 1);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, li->type);
+  EXPECT_EQ(GUMBO_TAG_LI, li->v.element.tag);
+  ASSERT_EQ(1, GetChildCount(li));
+
+  GumboNode* text = GetChild(li, 0);
+  ASSERT_EQ(GUMBO_NODE_TEXT, text->type);
+  EXPECT_STREQ("Text\n", text->v.text.text);
+}
+
+TEST_F(GumboParserTest, DuplicateAttributes) {
+  std::string text("<input checked=\"false\" checked=true id=foo id='bar'>");
+  Parse(text);
+
+  GumboNode* body;
+  GetAndAssertBody(root_, &body);
+  ASSERT_EQ(1, GetChildCount(body));
+
+  GumboNode* input = GetChild(body, 0);
+  EXPECT_EQ(GUMBO_INSERTION_IMPLICIT_END_TAG, input->parse_flags);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, input->type);
+  EXPECT_EQ(GUMBO_TAG_INPUT, input->v.element.tag);
+  EXPECT_EQ(0, GetChildCount(input));
+  ASSERT_EQ(2, GetAttributeCount(input));
+
+  GumboAttribute* checked = GetAttribute(input, 0);
+  EXPECT_STREQ("checked", checked->name);
+  EXPECT_STREQ("false", checked->value);
+  EXPECT_EQ(1, checked->name_start.line);
+  EXPECT_EQ(8, checked->name_start.column);
+  EXPECT_EQ(15, checked->name_end.column);
+  EXPECT_EQ(16, checked->value_start.column);
+  EXPECT_EQ(23, checked->value_end.column);
+  EXPECT_EQ(7, checked->original_name.data - text.data());
+  EXPECT_EQ(7, checked->original_name.length);
+  EXPECT_EQ(15, checked->original_value.data - text.data());
+  EXPECT_EQ(7, checked->original_value.length);
+
+  GumboAttribute* id = GetAttribute(input, 1);
+  EXPECT_STREQ("id", id->name);
+  EXPECT_STREQ("foo", id->value);
+
+  // TODO(jdtang): Run some assertions on the parse error that's added.
+}
+
+TEST_F(GumboParserTest, LinkTagsInHead) {
+  Parse(
+      "<html>\n"
+      "  <head>\n"
+      "    <title>Sample title></title>\n\n"
+      "    <link rel=stylesheet>\n"
+      "    <link rel=author>\n"
+      "  </head>\n"
+      "  <body>Foo</body>");
+  ASSERT_EQ(1, GetChildCount(root_));
+
+  GumboNode* html = GetChild(root_, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, html->type);
+  EXPECT_EQ(GUMBO_INSERTION_IMPLICIT_END_TAG, html->parse_flags);
+  EXPECT_EQ(GUMBO_TAG_HTML, html->v.element.tag);
+  ASSERT_EQ(3, GetChildCount(html));
+
+  GumboNode* head = GetChild(html, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, head->type);
+  EXPECT_EQ(GUMBO_INSERTION_NORMAL, head->parse_flags);
+  EXPECT_EQ(GUMBO_TAG_HEAD, head->v.element.tag);
+  EXPECT_EQ(7, GetChildCount(head));
+
+  GumboNode* text1 = GetChild(head, 2);
+  ASSERT_EQ(GUMBO_NODE_WHITESPACE, text1->type);
+  EXPECT_STREQ("\n\n    ", text1->v.text.text);
+
+  GumboNode* link1 = GetChild(head, 3);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, link1->type);
+  EXPECT_EQ(GUMBO_TAG_LINK, link1->v.element.tag);
+  EXPECT_EQ(GUMBO_INSERTION_IMPLICIT_END_TAG, link1->parse_flags);
+  EXPECT_EQ(0, GetChildCount(link1));
+
+  GumboNode* text2 = GetChild(head, 4);
+  ASSERT_EQ(GUMBO_NODE_WHITESPACE, text2->type);
+  EXPECT_STREQ("\n    ", text2->v.text.text);
+
+  GumboNode* link2 = GetChild(head, 5);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, link2->type);
+  EXPECT_EQ(GUMBO_TAG_LINK, link2->v.element.tag);
+  EXPECT_EQ(GUMBO_INSERTION_IMPLICIT_END_TAG, link2->parse_flags);
+  EXPECT_EQ(0, GetChildCount(link2));
+
+  GumboNode* text3 = GetChild(head, 6);
+  ASSERT_EQ(GUMBO_NODE_WHITESPACE, text3->type);
+  EXPECT_STREQ("\n  ", text3->v.text.text);
+
+  GumboNode* body = GetChild(html, 2);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, body->type);
+  EXPECT_EQ(GUMBO_INSERTION_NORMAL, body->parse_flags);
+  EXPECT_EQ(GUMBO_TAG_BODY, body->v.element.tag);
+  ASSERT_EQ(1, GetChildCount(body));
+}
+
+TEST_F(GumboParserTest, WhitespaceBeforeHtml) {
+  Parse("<!doctype html>\n<html>Test</html>");
+  ASSERT_EQ(1, GetChildCount(root_));
+
+  GumboNode* body = GetChild(GetChild(root_, 0), 1);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, body->type);
+  EXPECT_EQ(GUMBO_TAG_BODY, GetTag(body));
+  ASSERT_EQ(1, GetChildCount(body));
+
+  GumboNode* text = GetChild(body, 0);
+  ASSERT_EQ(GUMBO_NODE_TEXT, text->type);
+  EXPECT_STREQ("Test", text->v.text.text);
+}
+
+TEST_F(GumboParserTest, TextAfterHtml) {
+  Parse("<html>Test</html> after doc");
+  GumboNode* body;
+  GetAndAssertBody(root_, &body);
+
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, body->type);
+  EXPECT_EQ(GUMBO_TAG_BODY, GetTag(body));
+  ASSERT_EQ(1, GetChildCount(body));
+
+  GumboNode* text = GetChild(body, 0);
+  ASSERT_EQ(GUMBO_NODE_TEXT, text->type);
+  EXPECT_STREQ("Test after doc", text->v.text.text);
+}
+
+TEST_F(GumboParserTest, WhitespaceInHead) {
+  Parse("<html>  Test</html>");
+
+  GumboNode* html = GetChild(root_, 0);
+  EXPECT_EQ(GUMBO_NODE_ELEMENT, html->type);
+  EXPECT_EQ(GUMBO_TAG_HTML, GetTag(html));
+  EXPECT_EQ(2, GetChildCount(html));
+
+  GumboNode* head = GetChild(html, 0);
+  EXPECT_EQ(GUMBO_NODE_ELEMENT, head->type);
+  EXPECT_EQ(GUMBO_TAG_HEAD, GetTag(head));
+  EXPECT_EQ(0, GetChildCount(head));
+
+  GumboNode* body = GetChild(html, 1);
+  EXPECT_EQ(GUMBO_NODE_ELEMENT, body->type);
+  EXPECT_EQ(GUMBO_TAG_BODY, GetTag(body));
+
+  GumboNode* text = GetChild(body, 0);
+  ASSERT_EQ(GUMBO_NODE_TEXT, text->type);
+  EXPECT_STREQ("Test", text->v.text.text);
+}
+
+TEST_F(GumboParserTest, Doctype) {
+  Parse("<!doctype html>Test");
+  GumboDocument* doc = &root_->v.document;
+  EXPECT_EQ(1, doc->children.length);
+  EXPECT_EQ(GUMBO_DOCTYPE_NO_QUIRKS, doc->doc_type_quirks_mode);
+
+  EXPECT_STREQ("html", doc->name);
+  EXPECT_STREQ("", doc->public_identifier);
+  EXPECT_STREQ("", doc->system_identifier);
+}
+
+TEST_F(GumboParserTest, InvalidDoctype) {
+  Parse("Test<!doctype root_element SYSTEM \"DTD_location\">");
+
+  // Default doc token; the declared one is ignored.
+  GumboDocument* doc = &root_->v.document;
+  EXPECT_EQ(1, doc->children.length);
+  EXPECT_EQ(GUMBO_DOCTYPE_QUIRKS, doc->doc_type_quirks_mode);
+
+  EXPECT_STREQ("", doc->name);
+  EXPECT_STREQ("", doc->public_identifier);
+  EXPECT_STREQ("", doc->system_identifier);
+
+  GumboNode* body;
+  GetAndAssertBody(root_, &body);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, body->type);
+  EXPECT_EQ(GUMBO_TAG_BODY, GetTag(body));
+  ASSERT_EQ(1, GetChildCount(body));
+
+  GumboNode* text = GetChild(body, 0);
+  ASSERT_EQ(GUMBO_NODE_TEXT, text->type);
+  EXPECT_STREQ("Test", text->v.text.text);
+}
+
+TEST_F(GumboParserTest, SingleComment) {
+  Parse("<!-- comment -->");
+  GumboNode* comment = GetChild(root_, 0);
+  ASSERT_EQ(GUMBO_NODE_COMMENT, comment->type);
+  EXPECT_STREQ(" comment ", comment->v.text.text);
+}
+
+TEST_F(GumboParserTest, CommentInText) {
+  Parse("Start <!-- comment --> end");
+  GumboNode* body;
+  GetAndAssertBody(root_, &body);
+  ASSERT_EQ(3, GetChildCount(body));
+
+  GumboNode* start = GetChild(body, 0);
+  ASSERT_EQ(GUMBO_NODE_TEXT, start->type);
+  EXPECT_STREQ("Start ", start->v.text.text);
+
+  GumboNode* comment = GetChild(body, 1);
+  ASSERT_EQ(GUMBO_NODE_COMMENT, comment->type);
+  EXPECT_EQ(body, comment->parent);
+  EXPECT_EQ(1, comment->index_within_parent);
+  EXPECT_STREQ(" comment ", comment->v.text.text);
+
+  GumboNode* end = GetChild(body, 2);
+  ASSERT_EQ(GUMBO_NODE_TEXT, end->type);
+  EXPECT_STREQ(" end", end->v.text.text);
+}
+
+TEST_F(GumboParserTest, CommentBeforeNode) {
+  Parse("<!--This is a comment-->\n<h1>hello world!</h1>");
+  GumboNode* comment = GetChild(root_, 0);
+  ASSERT_EQ(GUMBO_NODE_COMMENT, comment->type);
+  EXPECT_STREQ("This is a comment", comment->v.text.text);
+  EXPECT_EQ(
+      "<!--This is a comment-->", ToString(comment->v.text.original_text));
+
+  // Newline is ignored per the rules for "initial" insertion mode.
+
+  GumboNode* body;
+  GetAndAssertBody(root_, &body);
+  ASSERT_EQ(1, GetChildCount(body));
+
+  GumboNode* h1 = GetChild(body, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, h1->type);
+  EXPECT_EQ(GUMBO_TAG_H1, h1->v.element.tag);
+}
+
+TEST_F(GumboParserTest, CommentInVerbatimMode) {
+  Parse("<body> <div id='onegoogle'>Text</div>  </body><!-- comment \n\n-->");
+
+  GumboNode* html = GetChild(root_, 0);
+  EXPECT_EQ(GUMBO_NODE_ELEMENT, html->type);
+  EXPECT_EQ(GUMBO_TAG_HTML, GetTag(html));
+  EXPECT_EQ(GUMBO_INSERTION_BY_PARSER | GUMBO_INSERTION_IMPLIED |
+                GUMBO_INSERTION_IMPLICIT_END_TAG,
+      html->parse_flags);
+  EXPECT_EQ(3, GetChildCount(html));
+
+  GumboNode* body = GetChild(html, 1);
+  EXPECT_EQ(GUMBO_NODE_ELEMENT, body->type);
+  EXPECT_EQ(GUMBO_TAG_BODY, GetTag(body));
+  EXPECT_EQ(GUMBO_INSERTION_NORMAL, body->parse_flags);
+  EXPECT_EQ(3, GetChildCount(body));
+
+  GumboNode* comment = GetChild(html, 2);
+  ASSERT_EQ(GUMBO_NODE_COMMENT, comment->type);
+  EXPECT_EQ(GUMBO_INSERTION_NORMAL, comment->parse_flags);
+  EXPECT_STREQ(" comment \n\n", comment->v.text.text);
+}
+
+TEST_F(GumboParserTest, UnknownTag) {
+  Parse("<foo>12</FOO>");
+  GumboNode* body;
+  GetAndAssertBody(root_, &body);
+  ASSERT_EQ(1, GetChildCount(body));
+
+  GumboNode* foo = GetChild(body, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, foo->type);
+  EXPECT_EQ(GUMBO_TAG_UNKNOWN, GetTag(foo));
+  EXPECT_EQ("<foo>", ToString(foo->v.element.original_tag));
+  // According to the spec, the misplaced end tag is ignored, and so we return
+  // an empty original_end_tag text. We may want to extend our error-reporting
+  // a bit so that we close off the tag that it *would have closed*, had the
+  // HTML been correct, along with a parse flag that says the end tag was in the
+  // wrong place.
+  EXPECT_EQ("", ToString(foo->v.element.original_end_tag));
+}
+
+TEST_F(GumboParserTest, UnknownTag2) {
+  Parse("<div><sarcasm><div></div></sarcasm></div>");
+  GumboNode* body;
+  GetAndAssertBody(root_, &body);
+  ASSERT_EQ(1, GetChildCount(body));
+
+  GumboNode* div = GetChild(body, 0);
+  ASSERT_EQ(1, GetChildCount(div));
+  GumboNode* sarcasm = GetChild(div, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, sarcasm->type);
+  EXPECT_EQ(GUMBO_TAG_UNKNOWN, GetTag(sarcasm));
+  EXPECT_EQ("<sarcasm>", ToString(sarcasm->v.element.original_tag));
+  EXPECT_EQ("</sarcasm>", ToString(sarcasm->v.element.original_end_tag));
+}
+
+TEST_F(GumboParserTest, InvalidEndTag) {
+  Parse("<a><img src=foo.jpg></img></a>");
+  GumboNode* body;
+  GetAndAssertBody(root_, &body);
+  ASSERT_EQ(1, GetChildCount(body));
+
+  GumboNode* a = GetChild(body, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, a->type);
+  EXPECT_EQ(GUMBO_TAG_A, GetTag(a));
+  ASSERT_EQ(1, GetChildCount(a));
+
+  GumboNode* img = GetChild(a, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, img->type);
+  EXPECT_EQ(GUMBO_TAG_IMG, GetTag(img));
+  ASSERT_EQ(0, GetChildCount(img));
+}
+
+TEST_F(GumboParserTest, Tables) {
+  Parse(
+      "<html><table>\n"
+      "  <tr></invalid-tag>\n"
+      "    <th>One</th>\n"
+      "    <td>Two</td>\n"
+      "  </tr>\n"
+      "  <iframe></iframe>"
+      "</table><tr></tr><div></div></html>");
+  GumboNode* body;
+  GetAndAssertBody(root_, &body);
+  ASSERT_EQ(4, GetChildCount(body));
+
+  GumboNode* br = GetChild(body, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, br->type);
+  EXPECT_EQ(GUMBO_TAG_BR, GetTag(br));
+  EXPECT_EQ(body, br->parent);
+  EXPECT_EQ(0, br->index_within_parent);
+  ASSERT_EQ(0, GetChildCount(br));
+
+  GumboNode* iframe = GetChild(body, 1);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, iframe->type);
+  EXPECT_EQ(GUMBO_TAG_IFRAME, GetTag(iframe));
+  ASSERT_EQ(0, GetChildCount(iframe));
+
+  GumboNode* table = GetChild(body, 2);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, table->type);
+  EXPECT_EQ(GUMBO_TAG_TABLE, GetTag(table));
+  EXPECT_EQ(body, table->parent);
+  EXPECT_EQ(2, table->index_within_parent);
+  ASSERT_EQ(2, GetChildCount(table));
+
+  GumboNode* table_text = GetChild(table, 0);
+  ASSERT_EQ(GUMBO_NODE_WHITESPACE, table_text->type);
+  EXPECT_STREQ("\n  ", table_text->v.text.text);
+
+  GumboNode* tbody = GetChild(table, 1);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, tbody->type);
+  EXPECT_EQ(GUMBO_TAG_TBODY, GetTag(tbody));
+  ASSERT_EQ(2, GetChildCount(tbody));
+  // Second node is whitespace.
+
+  GumboNode* tr = GetChild(tbody, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, tr->type);
+  EXPECT_EQ(GUMBO_TAG_TR, GetTag(tr));
+  ASSERT_EQ(5, GetChildCount(tr));  // Including whitespace.
+
+  GumboNode* tr_text = GetChild(tr, 0);
+  ASSERT_EQ(GUMBO_NODE_WHITESPACE, tr_text->type);
+  EXPECT_EQ(tr, tr_text->parent);
+  EXPECT_EQ(0, tr_text->index_within_parent);
+  EXPECT_STREQ("\n    ", tr_text->v.text.text);
+
+  GumboNode* th = GetChild(tr, 1);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, th->type);
+  EXPECT_EQ(GUMBO_TAG_TH, GetTag(th));
+  EXPECT_EQ(tr, th->parent);
+  EXPECT_EQ(1, th->index_within_parent);
+  ASSERT_EQ(1, GetChildCount(th));
+
+  GumboNode* th_text = GetChild(th, 0);
+  ASSERT_EQ(GUMBO_NODE_TEXT, th_text->type);
+  EXPECT_STREQ("One", th_text->v.text.text);
+
+  GumboNode* td = GetChild(tr, 3);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, td->type);
+  EXPECT_EQ(GUMBO_TAG_TD, GetTag(td));
+  ASSERT_EQ(1, GetChildCount(td));
+
+  GumboNode* td_text = GetChild(td, 0);
+  ASSERT_EQ(GUMBO_NODE_TEXT, td_text->type);
+  EXPECT_STREQ("Two", td_text->v.text.text);
+
+  GumboNode* td2_text = GetChild(td, 0);
+  ASSERT_EQ(GUMBO_NODE_TEXT, td2_text->type);
+  EXPECT_STREQ("Two", td2_text->v.text.text);
+
+  GumboNode* div = GetChild(body, 3);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, div->type);
+  EXPECT_EQ(GUMBO_TAG_DIV, GetTag(div));
+  ASSERT_EQ(0, GetChildCount(div));
+}
+
+TEST_F(GumboParserTest, StartParagraphInTable) {
+  Parse("<table></tr></td>foo</table>");
+
+  GumboNode* body;
+  GetAndAssertBody(root_, &body);
+  ASSERT_EQ(2, GetChildCount(body));
+
+  GumboNode* paragraph = GetChild(body, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, paragraph->type);
+  EXPECT_EQ(GUMBO_TAG_P, GetTag(paragraph));
+  EXPECT_EQ(body, paragraph->parent);
+  EXPECT_EQ(0, paragraph->index_within_parent);
+  ASSERT_EQ(1, GetChildCount(paragraph));
+
+  GumboNode* text = GetChild(paragraph, 0);
+  ASSERT_EQ(GUMBO_NODE_TEXT, text->type);
+  EXPECT_STREQ("foo", text->v.text.text);
+
+  GumboNode* table = GetChild(body, 1);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, table->type);
+  EXPECT_EQ(GUMBO_TAG_TABLE, GetTag(table));
+  EXPECT_EQ(body, table->parent);
+  EXPECT_EQ(1, table->index_within_parent);
+  ASSERT_EQ(0, GetChildCount(table));
+}
+
+TEST_F(GumboParserTest, EndParagraphInTable) {
+  Parse("<table></table>");
+
+  GumboNode* body;
+  GetAndAssertBody(root_, &body);
+  ASSERT_EQ(2, GetChildCount(body));
+
+  GumboNode* paragraph = GetChild(body, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, paragraph->type);
+  EXPECT_EQ(GUMBO_TAG_P, GetTag(paragraph));
+  EXPECT_EQ(body, paragraph->parent);
+  EXPECT_EQ(0, paragraph->index_within_parent);
+  ASSERT_EQ(0, GetChildCount(paragraph));
+
+  GumboNode* table = GetChild(body, 1);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, table->type);
+  EXPECT_EQ(GUMBO_TAG_TABLE, GetTag(table));
+  EXPECT_EQ(body, table->parent);
+  EXPECT_EQ(1, table->index_within_parent);
+  ASSERT_EQ(0, GetChildCount(table));
+}
+
+TEST_F(GumboParserTest, UnknownTagInTable) {
+  Parse("<table><foo>bar</table>");
+
+  GumboNode* body;
+  GetAndAssertBody(root_, &body);
+  ASSERT_EQ(2, GetChildCount(body));
+
+  GumboNode* foo = GetChild(body, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, foo->type);
+  EXPECT_EQ(GUMBO_TAG_UNKNOWN, GetTag(foo));
+  EXPECT_EQ("<foo>", ToString(foo->v.element.original_tag));
+  EXPECT_EQ(body, foo->parent);
+  EXPECT_EQ(0, foo->index_within_parent);
+  ASSERT_EQ(1, GetChildCount(foo));
+
+  GumboNode* bar = GetChild(foo, 0);
+  ASSERT_EQ(GUMBO_NODE_TEXT, bar->type);
+  EXPECT_STREQ("bar", bar->v.text.text);
+
+  GumboNode* table = GetChild(body, 1);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, table->type);
+  EXPECT_EQ(GUMBO_TAG_TABLE, GetTag(table));
+  EXPECT_EQ(body, table->parent);
+  EXPECT_EQ(1, table->index_within_parent);
+  ASSERT_EQ(0, GetChildCount(table));
+}
+
+TEST_F(GumboParserTest, UnclosedTableTags) {
+  Parse(
+      "<html><table>\n"
+      "  <tr>\n"
+      "    <td>One\n"
+      "    <td>Two\n"
+      "  <tr><td>Row2\n"
+      "  <tr><td>Row3\n"
+      "</table>\n"
+      "</html>");
+  GumboNode* body;
+  GetAndAssertBody(root_, &body);
+  ASSERT_EQ(2, GetChildCount(body));
+
+  GumboNode* table = GetChild(body, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, table->type);
+  EXPECT_EQ(GUMBO_TAG_TABLE, GetTag(table));
+  ASSERT_EQ(2, GetChildCount(table));
+
+  GumboNode* table_text = GetChild(table, 0);
+  ASSERT_EQ(GUMBO_NODE_WHITESPACE, table_text->type);
+  EXPECT_STREQ("\n  ", table_text->v.text.text);
+
+  GumboNode* tbody = GetChild(table, 1);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, tbody->type);
+  EXPECT_EQ(GUMBO_TAG_TBODY, GetTag(tbody));
+  ASSERT_EQ(3, GetChildCount(tbody));
+
+  GumboNode* tr = GetChild(tbody, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, tr->type);
+  EXPECT_EQ(GUMBO_TAG_TR, GetTag(tr));
+  ASSERT_EQ(3, GetChildCount(tr));
+
+  GumboNode* tr_text = GetChild(tr, 0);
+  ASSERT_EQ(GUMBO_NODE_WHITESPACE, tr_text->type);
+  EXPECT_STREQ("\n    ", tr_text->v.text.text);
+
+  GumboNode* td1 = GetChild(tr, 1);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, td1->type);
+  EXPECT_EQ(GUMBO_TAG_TD, GetTag(td1));
+  ASSERT_EQ(1, GetChildCount(td1));
+
+  GumboNode* td1_text = GetChild(td1, 0);
+  ASSERT_EQ(GUMBO_NODE_TEXT, td1_text->type);
+  EXPECT_STREQ("One\n    ", td1_text->v.text.text);
+
+  GumboNode* td2 = GetChild(tr, 2);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, td2->type);
+  EXPECT_EQ(GUMBO_TAG_TD, GetTag(td2));
+  ASSERT_EQ(1, GetChildCount(td2));
+
+  GumboNode* td2_text = GetChild(td2, 0);
+  ASSERT_EQ(GUMBO_NODE_TEXT, td2_text->type);
+  EXPECT_STREQ("Two\n  ", td2_text->v.text.text);
+
+  GumboNode* tr3 = GetChild(tbody, 2);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, tr3->type);
+  EXPECT_EQ(GUMBO_TAG_TR, GetTag(tr3));
+  ASSERT_EQ(1, GetChildCount(tr3));
+
+  GumboNode* body_text = GetChild(body, 1);
+  ASSERT_EQ(GUMBO_NODE_WHITESPACE, body_text->type);
+  EXPECT_STREQ("\n", body_text->v.text.text);
+}
+
+TEST_F(GumboParserTest, MisnestedTable) {
+  Parse("<table><tr><div><td></div></table>");
+
+  GumboNode* body;
+  GetAndAssertBody(root_, &body);
+  ASSERT_EQ(2, GetChildCount(body));
+
+  GumboNode* div = GetChild(body, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, div->type);
+  EXPECT_EQ(GUMBO_TAG_DIV, GetTag(div));
+  ASSERT_EQ(0, GetChildCount(div));
+
+  GumboNode* table = GetChild(body, 1);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, table->type);
+  EXPECT_EQ(GUMBO_TAG_TABLE, GetTag(table));
+  ASSERT_EQ(1, GetChildCount(table));
+
+  GumboNode* tbody = GetChild(table, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, tbody->type);
+  EXPECT_EQ(GUMBO_TAG_TBODY, GetTag(tbody));
+  ASSERT_EQ(1, GetChildCount(tbody));
+
+  GumboNode* tr = GetChild(tbody, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, tr->type);
+  EXPECT_EQ(GUMBO_TAG_TR, GetTag(tr));
+  ASSERT_EQ(1, GetChildCount(tr));
+
+  GumboNode* td = GetChild(tr, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, td->type);
+  EXPECT_EQ(GUMBO_TAG_TD, GetTag(td));
+  ASSERT_EQ(0, GetChildCount(td));
+}
+
+TEST_F(GumboParserTest, MisnestedTable2) {
+  Parse("<table><td>Cell1<table><th>Cell2<tr>Cell3</table>");
+
+  GumboNode* body;
+  GetAndAssertBody(root_, &body);
+  ASSERT_EQ(1, GetChildCount(body));
+
+  GumboNode* table1 = GetChild(body, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, table1->type);
+  EXPECT_EQ(GUMBO_TAG_TABLE, GetTag(table1));
+  ASSERT_EQ(1, GetChildCount(table1));
+
+  GumboNode* tbody1 = GetChild(table1, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, tbody1->type);
+  EXPECT_EQ(GUMBO_TAG_TBODY, GetTag(tbody1));
+  ASSERT_EQ(1, GetChildCount(tbody1));
+
+  GumboNode* tr1 = GetChild(tbody1, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, tr1->type);
+  EXPECT_EQ(GUMBO_TAG_TR, GetTag(tr1));
+  ASSERT_EQ(1, GetChildCount(tr1));
+
+  GumboNode* td1 = GetChild(tr1, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, td1->type);
+  EXPECT_EQ(GUMBO_TAG_TD, GetTag(td1));
+  ASSERT_EQ(3, GetChildCount(td1));
+
+  GumboNode* cell1 = GetChild(td1, 0);
+  ASSERT_EQ(GUMBO_NODE_TEXT, cell1->type);
+  EXPECT_STREQ("Cell1", cell1->v.text.text);
+
+  // Foster-parented out of the inner <tr>
+  GumboNode* cell3 = GetChild(td1, 1);
+  ASSERT_EQ(GUMBO_NODE_TEXT, cell3->type);
+  EXPECT_STREQ("Cell3", cell3->v.text.text);
+
+  GumboNode* table2 = GetChild(td1, 2);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, table2->type);
+  EXPECT_EQ(GUMBO_TAG_TABLE, GetTag(table2));
+  ASSERT_EQ(1, GetChildCount(table2));
+
+  GumboNode* tbody2 = GetChild(table2, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, tbody2->type);
+  EXPECT_EQ(GUMBO_TAG_TBODY, GetTag(tbody2));
+  ASSERT_EQ(2, GetChildCount(tbody2));
+
+  GumboNode* tr2 = GetChild(tbody2, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, tr2->type);
+  EXPECT_EQ(GUMBO_TAG_TR, GetTag(tr2));
+  ASSERT_EQ(1, GetChildCount(tr2));
+
+  GumboNode* th = GetChild(tr2, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, th->type);
+  EXPECT_EQ(GUMBO_TAG_TH, GetTag(th));
+  ASSERT_EQ(1, GetChildCount(th));
+
+  GumboNode* cell2 = GetChild(th, 0);
+  ASSERT_EQ(GUMBO_NODE_TEXT, cell2->type);
+  EXPECT_STREQ("Cell2", cell2->v.text.text);
+
+  GumboNode* tr3 = GetChild(tbody2, 1);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, tr3->type);
+  EXPECT_EQ(GUMBO_TAG_TR, GetTag(tr3));
+  ASSERT_EQ(0, GetChildCount(tr3));
+}
+
+TEST_F(GumboParserTest, Select) {
+  Parse("<select><option>One<option>Two</select><div></div>");
+
+  GumboNode* body;
+  GetAndAssertBody(root_, &body);
+  ASSERT_EQ(2, GetChildCount(body));
+
+  GumboNode* select = GetChild(body, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, select->type);
+  EXPECT_EQ(GUMBO_TAG_SELECT, GetTag(select));
+  ASSERT_EQ(2, GetChildCount(select));
+
+  GumboNode* option1 = GetChild(select, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, option1->type);
+  EXPECT_EQ(GUMBO_TAG_OPTION, GetTag(option1));
+  ASSERT_EQ(1, GetChildCount(option1));
+
+  GumboNode* option2 = GetChild(select, 1);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, option2->type);
+  EXPECT_EQ(GUMBO_TAG_OPTION, GetTag(option2));
+  ASSERT_EQ(1, GetChildCount(option2));
+
+  GumboNode* div = GetChild(body, 1);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, div->type);
+  EXPECT_EQ(GUMBO_TAG_DIV, GetTag(div));
+  ASSERT_EQ(0, GetChildCount(div));
+}
+
+TEST_F(GumboParserTest, ComplicatedSelect) {
+  Parse(
+      "<select><div class=foo></div><optgroup><option>Option"
+      "</option><input></optgroup></select>");
+
+  GumboNode* body;
+  GetAndAssertBody(root_, &body);
+  ASSERT_EQ(2, GetChildCount(body));
+
+  GumboNode* select = GetChild(body, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, select->type);
+  EXPECT_EQ(GUMBO_TAG_SELECT, GetTag(select));
+  ASSERT_EQ(1, GetChildCount(select));
+
+  GumboNode* optgroup = GetChild(select, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, optgroup->type);
+  EXPECT_EQ(GUMBO_TAG_OPTGROUP, GetTag(optgroup));
+  ASSERT_EQ(1, GetChildCount(optgroup));
+
+  GumboNode* option = GetChild(optgroup, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, option->type);
+  EXPECT_EQ(GUMBO_TAG_OPTION, GetTag(option));
+  ASSERT_EQ(1, GetChildCount(option));
+
+  GumboNode* text = GetChild(option, 0);
+  ASSERT_EQ(GUMBO_NODE_TEXT, text->type);
+  EXPECT_STREQ("Option", text->v.text.text);
+
+  GumboNode* input = GetChild(body, 1);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, input->type);
+  EXPECT_EQ(GUMBO_TAG_INPUT, GetTag(input));
+  ASSERT_EQ(0, GetChildCount(input));
+}
+
+TEST_F(GumboParserTest, DoubleSelect) {
+  Parse("<select><select><div></div>");
+
+  GumboNode* body;
+  GetAndAssertBody(root_, &body);
+  ASSERT_EQ(2, GetChildCount(body));
+
+  GumboNode* select = GetChild(body, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, select->type);
+  EXPECT_EQ(GUMBO_TAG_SELECT, GetTag(select));
+  ASSERT_EQ(0, GetChildCount(select));
+
+  GumboNode* div = GetChild(body, 1);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, div->type);
+  EXPECT_EQ(GUMBO_TAG_DIV, GetTag(div));
+  ASSERT_EQ(0, GetChildCount(div));
+}
+
+TEST_F(GumboParserTest, InputInSelect) {
+  Parse("<select><input /><div></div>");
+
+  GumboNode* body;
+  GetAndAssertBody(root_, &body);
+  ASSERT_EQ(3, GetChildCount(body));
+
+  GumboNode* select = GetChild(body, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, select->type);
+  EXPECT_EQ(GUMBO_TAG_SELECT, GetTag(select));
+  ASSERT_EQ(0, GetChildCount(select));
+
+  GumboNode* input = GetChild(body, 1);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, input->type);
+  EXPECT_EQ(GUMBO_TAG_INPUT, GetTag(input));
+  ASSERT_EQ(0, GetChildCount(input));
+
+  GumboNode* div = GetChild(body, 2);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, div->type);
+  EXPECT_EQ(GUMBO_TAG_DIV, GetTag(div));
+  ASSERT_EQ(0, GetChildCount(div));
+}
+
+TEST_F(GumboParserTest, SelectInTable) {
+  Parse("<table><td><select><option value=1></table>");
+
+  GumboNode* body;
+  GetAndAssertBody(root_, &body);
+  ASSERT_EQ(1, GetChildCount(body));
+
+  GumboNode* table = GetChild(body, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, table->type);
+  EXPECT_EQ(GUMBO_TAG_TABLE, GetTag(table));
+  ASSERT_EQ(1, GetChildCount(table));
+
+  GumboNode* tbody = GetChild(table, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, tbody->type);
+  EXPECT_EQ(GUMBO_TAG_TBODY, GetTag(tbody));
+  ASSERT_EQ(1, GetChildCount(tbody));
+
+  GumboNode* tr = GetChild(tbody, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, tr->type);
+  EXPECT_EQ(GUMBO_TAG_TR, GetTag(tr));
+  ASSERT_EQ(1, GetChildCount(tr));
+
+  GumboNode* td = GetChild(tr, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, td->type);
+  EXPECT_EQ(GUMBO_TAG_TD, GetTag(td));
+  ASSERT_EQ(1, GetChildCount(td));
+
+  GumboNode* select = GetChild(td, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, select->type);
+  EXPECT_EQ(GUMBO_TAG_SELECT, GetTag(select));
+  ASSERT_EQ(1, GetChildCount(select));
+
+  GumboNode* option = GetChild(select, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, option->type);
+  EXPECT_EQ(GUMBO_TAG_OPTION, GetTag(option));
+  ASSERT_EQ(0, GetChildCount(option));
+}
+
+TEST_F(GumboParserTest, ImplicitColgroup) {
+  Parse("<table><col /><col /></table>");
+
+  GumboNode* body;
+  GetAndAssertBody(root_, &body);
+  ASSERT_EQ(1, GetChildCount(body));
+
+  GumboNode* table = GetChild(body, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, table->type);
+  EXPECT_EQ(GUMBO_TAG_TABLE, GetTag(table));
+  ASSERT_EQ(1, GetChildCount(table));
+
+  GumboNode* colgroup = GetChild(table, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, colgroup->type);
+  EXPECT_EQ(GUMBO_TAG_COLGROUP, GetTag(colgroup));
+  ASSERT_EQ(2, GetChildCount(colgroup));
+
+  GumboNode* col1 = GetChild(colgroup, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, col1->type);
+  EXPECT_EQ(GUMBO_TAG_COL, GetTag(col1));
+  ASSERT_EQ(0, GetChildCount(col1));
+
+  GumboNode* col2 = GetChild(colgroup, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, col2->type);
+  EXPECT_EQ(GUMBO_TAG_COL, GetTag(col2));
+  ASSERT_EQ(0, GetChildCount(col2));
+}
+
+TEST_F(GumboParserTest, Form) {
+  Parse("<form><input type=hidden /></form>After form");
+
+  GumboNode* body;
+  GetAndAssertBody(root_, &body);
+  ASSERT_EQ(2, GetChildCount(body));
+
+  GumboNode* form = GetChild(body, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, form->type);
+  EXPECT_EQ(GUMBO_TAG_FORM, GetTag(form));
+  ASSERT_EQ(1, GetChildCount(form));
+
+  GumboNode* input = GetChild(form, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, input->type);
+  EXPECT_EQ(GUMBO_TAG_INPUT, GetTag(input));
+  ASSERT_EQ(0, GetChildCount(input));
+
+  GumboNode* text = GetChild(body, 1);
+  ASSERT_EQ(GUMBO_NODE_TEXT, text->type);
+  EXPECT_STREQ("After form", text->v.text.text);
+}
+
+// See: https://github.com/google/gumbo-parser/issues/350
+TEST_F(GumboParserTest, FormEndPos) {
+  Parse(" <form><input type=hidden /></form>");
+
+  GumboNode* body;
+  GetAndAssertBody(root_, &body);
+  ASSERT_EQ(1, GetChildCount(body));
+
+  GumboNode* form = GetChild(body, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, form->type);
+  EXPECT_EQ(GUMBO_TAG_FORM, GetTag(form));
+  ASSERT_EQ(1, GetChildCount(form));
+
+  ASSERT_EQ(form->v.element.start_pos.offset, 1);
+  ASSERT_EQ(form->v.element.end_pos.offset, 28);
+}
+
+TEST_F(GumboParserTest, NestedForm) {
+  Parse("<form><label>Label</label><form><input id=input2></form>After form");
+
+  GumboNode* body;
+  GetAndAssertBody(root_, &body);
+  ASSERT_EQ(2, GetChildCount(body));
+
+  GumboNode* form = GetChild(body, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, form->type);
+  EXPECT_EQ(GUMBO_TAG_FORM, GetTag(form));
+  ASSERT_EQ(2, GetChildCount(form));
+
+  GumboNode* label = GetChild(form, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, label->type);
+  EXPECT_EQ(GUMBO_TAG_LABEL, GetTag(label));
+  ASSERT_EQ(1, GetChildCount(label));
+
+  GumboNode* input = GetChild(form, 1);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, input->type);
+  EXPECT_EQ(GUMBO_TAG_INPUT, GetTag(input));
+  ASSERT_EQ(0, GetChildCount(input));
+
+  GumboNode* text = GetChild(body, 1);
+  ASSERT_EQ(GUMBO_NODE_TEXT, text->type);
+  EXPECT_STREQ("After form", text->v.text.text);
+}
+
+TEST_F(GumboParserTest, MisnestedFormInTable) {
+  // Parse of this is somewhat weird. The first <form> is opened outside the
+  // table, so when </form> checks to see if there's a form in scope, it stops
+  // at the <table> boundary and returns null. The form pointer is nulled out
+  // anyway, though, which means that the second form (parsed in the table body
+  // state) ends up creating an element. It's immediately popped off
+  // the stack, but the form element pointer remains set to that node (which is
+  // not on the stack of open elements). The final </form> tag triggers the
+  // "does not have node in scope" clause and is ignored. (Note that this is
+  // different from "has a form element in scope" - the first form is still in
+  // scope at that point, but the form pointer does not point to it.) Then the
+  // original <form> element is closed implicitly when the table cell is closed.
+  Parse(
+      "<table><tr><td>"
+      "<form><table><tr><td></td></tr></form>"
+      "<form></tr></table></form>"
+      "</td></tr></table");
+
+  GumboNode* body;
+  GetAndAssertBody(root_, &body);
+  ASSERT_EQ(1, GetChildCount(body));
+
+  GumboNode* table1 = GetChild(body, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, table1->type);
+  EXPECT_EQ(GUMBO_TAG_TABLE, GetTag(table1));
+  ASSERT_EQ(1, GetChildCount(table1));
+
+  GumboNode* tbody1 = GetChild(table1, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, tbody1->type);
+  EXPECT_EQ(GUMBO_TAG_TBODY, GetTag(tbody1));
+  ASSERT_EQ(1, GetChildCount(tbody1));
+
+  GumboNode* tr1 = GetChild(tbody1, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, tr1->type);
+  EXPECT_EQ(GUMBO_TAG_TR, GetTag(tr1));
+  ASSERT_EQ(1, GetChildCount(tr1));
+
+  GumboNode* td1 = GetChild(tr1, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, td1->type);
+  EXPECT_EQ(GUMBO_TAG_TD, GetTag(td1));
+  ASSERT_EQ(1, GetChildCount(td1));
+
+  GumboNode* form1 = GetChild(td1, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, form1->type);
+  EXPECT_EQ(GUMBO_TAG_FORM, GetTag(form1));
+  ASSERT_EQ(1, GetChildCount(form1));
+
+  GumboNode* table2 = GetChild(form1, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, table2->type);
+  EXPECT_EQ(GUMBO_TAG_TABLE, GetTag(table2));
+  ASSERT_EQ(1, GetChildCount(table2));
+
+  GumboNode* tbody2 = GetChild(table2, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, tbody2->type);
+  EXPECT_EQ(GUMBO_TAG_TBODY, GetTag(tbody2));
+  ASSERT_EQ(2, GetChildCount(tbody2));
+
+  GumboNode* tr2 = GetChild(tbody2, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, tr2->type);
+  EXPECT_EQ(GUMBO_TAG_TR, GetTag(tr2));
+  ASSERT_EQ(1, GetChildCount(tr2));
+
+  GumboNode* form2 = GetChild(tbody2, 1);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, form2->type);
+  EXPECT_EQ(GUMBO_TAG_FORM, GetTag(form2));
+  ASSERT_EQ(0, GetChildCount(form2));
+}
+
+TEST_F(GumboParserTest, NestedRawtextTags) {
+  Parse(
+      "<noscript><noscript jstag=false>"
+      "<style>div{text-align:center}</style></noscript>");
+
+  GumboNode* html = GetChild(root_, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, html->type);
+  EXPECT_EQ(GUMBO_TAG_HTML, GetTag(html));
+  EXPECT_EQ(GUMBO_INSERTION_BY_PARSER | GUMBO_INSERTION_IMPLICIT_END_TAG |
+                GUMBO_INSERTION_IMPLIED,
+      html->parse_flags);
+  ASSERT_EQ(2, GetChildCount(html));
+
+  GumboNode* head = GetChild(html, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, head->type);
+  EXPECT_EQ(GUMBO_TAG_HEAD, GetTag(head));
+  EXPECT_EQ(GUMBO_INSERTION_BY_PARSER | GUMBO_INSERTION_IMPLICIT_END_TAG |
+                GUMBO_INSERTION_IMPLIED,
+      head->parse_flags);
+  ASSERT_EQ(1, GetChildCount(head));
+
+  GumboNode* noscript = GetChild(head, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, noscript->type);
+  EXPECT_EQ(GUMBO_TAG_NOSCRIPT, GetTag(noscript));
+  ASSERT_EQ(1, GetChildCount(noscript));
+
+  GumboNode* style = GetChild(noscript, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, style->type);
+  EXPECT_EQ(GUMBO_TAG_STYLE, GetTag(style));
+  ASSERT_EQ(1, GetChildCount(style));
+
+  GumboNode* text = GetChild(style, 0);
+  ASSERT_EQ(GUMBO_NODE_TEXT, text->type);
+  EXPECT_STREQ("div{text-align:center}", text->v.text.text);
+}
+
+TEST_F(GumboParserTest, RawtextInBody) {
+  Parse("<body><noembed jsif=false></noembed>");
+
+  GumboNode* body;
+  GetAndAssertBody(root_, &body);
+  ASSERT_EQ(1, GetChildCount(body));
+
+  GumboNode* noembed = GetChild(body, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, noembed->type);
+  EXPECT_EQ(GUMBO_TAG_NOEMBED, GetTag(noembed));
+  EXPECT_EQ(1, GetAttributeCount(noembed));
+}
+
+TEST_F(GumboParserTest, MetaBeforeHead) {
+  Parse (
+    "<html><meta http-equiv='content-type' "
+    "content='text/html; charset=UTF-8' /><head></head>"
+  );
+
+  GumboNode* body;
+  GetAndAssertBody(root_, &body);
+  // Testing for a memory leak here, but
+  // TODO(jdtang): Flesh out structural asserts.
+}
+
+TEST_F(GumboParserTest, NoahsArkClause) {
+  Parse (
+    "<font size=4><font color=red><font size=4><font size=4>"
+    "<font size=4><font size=4><font size=4><font color=red>X"
+  );
+
+  GumboNode* body;
+  GetAndAssertBody(root_, &body);
+  ASSERT_EQ(2, GetChildCount(body));
+
+  GumboNode* p1 = GetChild(body, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, p1->type);
+  EXPECT_EQ(GUMBO_TAG_P, p1->v.element.tag);
+  ASSERT_EQ(1, GetChildCount(p1));
+
+  GumboNode* size1 = GetChild(p1, 0);
+  GumboNode* red1 = GetChild(size1, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, red1->type);
+  EXPECT_EQ(GUMBO_TAG_FONT, red1->v.element.tag);
+  ASSERT_EQ(1, GetAttributeCount(red1));
+  GumboAttribute* red1_attr = GetAttribute(red1, 0);
+  EXPECT_STREQ("color", red1_attr->name);
+  EXPECT_STREQ("red", red1_attr->value);
+  ASSERT_EQ(1, GetChildCount(red1));
+
+  GumboNode* p2 = GetChild(body, 1);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, p2->type);
+  EXPECT_EQ(GUMBO_TAG_P, p2->v.element.tag);
+  ASSERT_EQ(1, GetChildCount(p2));
+
+  GumboNode* red2 = GetChild(p2, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, red2->type);
+  EXPECT_EQ(GUMBO_TAG_FONT, red2->v.element.tag);
+  ASSERT_EQ(1, GetAttributeCount(red2));
+  GumboAttribute* red2_attr = GetAttribute(red2, 0);
+  EXPECT_STREQ("color", red2_attr->name);
+  EXPECT_STREQ("red", red2_attr->value);
+  ASSERT_EQ(1, GetChildCount(red2));
+}
+
+TEST_F(GumboParserTest, AdoptionAgency1) {
+  // https://html.spec.whatwg.org/multipage/parsing.html#misnested-tags:-b-i-/b-/i
+  Parse("1<b>2<i>3</b>4</i>5");
+  ASSERT_EQ(1, GetChildCount(root_));
+
+  GumboNode* html = GetChild(root_, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, html->type);
+  EXPECT_EQ(GUMBO_INSERTION_BY_PARSER | GUMBO_INSERTION_IMPLICIT_END_TAG |
+                GUMBO_INSERTION_IMPLIED,
+      html->parse_flags);
+  EXPECT_EQ(GUMBO_TAG_HTML, html->v.element.tag);
+  ASSERT_EQ(2, GetChildCount(html));
+
+  GumboNode* body = GetChild(html, 1);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, body->type);
+  EXPECT_EQ(GUMBO_INSERTION_BY_PARSER | GUMBO_INSERTION_IMPLICIT_END_TAG |
+                GUMBO_INSERTION_IMPLIED,
+      body->parse_flags);
+  EXPECT_EQ(GUMBO_TAG_BODY, body->v.element.tag);
+  ASSERT_EQ(1, GetChildCount(body));
+
+  GumboNode* p = GetChild(body, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, p->type);
+  EXPECT_EQ(GUMBO_INSERTION_NORMAL, p->parse_flags);
+  EXPECT_EQ(GUMBO_TAG_P, p->v.element.tag);
+  ASSERT_EQ(4, GetChildCount(p));
+
+  GumboNode* text1 = GetChild(p, 0);
+  ASSERT_EQ(GUMBO_NODE_TEXT, text1->type);
+  EXPECT_EQ(GUMBO_INSERTION_NORMAL, text1->parse_flags);
+  EXPECT_STREQ("1", text1->v.text.text);
+
+  GumboNode* b = GetChild(p, 1);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, b->type);
+  EXPECT_EQ(GUMBO_INSERTION_NORMAL, b->parse_flags);
+  EXPECT_EQ(GUMBO_TAG_B, b->v.element.tag);
+  ASSERT_EQ(2, GetChildCount(b));
+
+  GumboNode* text2 = GetChild(b, 0);
+  ASSERT_EQ(GUMBO_NODE_TEXT, text2->type);
+  EXPECT_EQ(GUMBO_INSERTION_NORMAL, text2->parse_flags);
+  EXPECT_STREQ("2", text2->v.text.text);
+
+  GumboNode* i = GetChild(b, 1);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, i->type);
+  EXPECT_EQ(GUMBO_INSERTION_IMPLICIT_END_TAG, i->parse_flags);
+  EXPECT_EQ(GUMBO_TAG_I, i->v.element.tag);
+  ASSERT_EQ(1, GetChildCount(i));
+
+  GumboNode* text3 = GetChild(i, 0);
+  ASSERT_EQ(GUMBO_NODE_TEXT, text3->type);
+  EXPECT_EQ(GUMBO_INSERTION_NORMAL, text2->parse_flags);
+  EXPECT_STREQ("3", text3->v.text.text);
+
+  GumboNode* i2 = GetChild(p, 2);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, i2->type);
+  EXPECT_EQ(GUMBO_INSERTION_BY_PARSER |
+                GUMBO_INSERTION_RECONSTRUCTED_FORMATTING_ELEMENT,
+      i2->parse_flags);
+  EXPECT_EQ(GUMBO_TAG_I, i2->v.element.tag);
+  ASSERT_EQ(1, GetChildCount(i2));
+
+  GumboNode* text4 = GetChild(i2, 0);
+  ASSERT_EQ(GUMBO_NODE_TEXT, text4->type);
+  EXPECT_EQ(GUMBO_INSERTION_NORMAL, text2->parse_flags);
+  EXPECT_STREQ("4", text4->v.text.text);
+
+  GumboNode* text5 = GetChild(p, 3);
+  ASSERT_EQ(GUMBO_NODE_TEXT, text5->type);
+  EXPECT_EQ(GUMBO_INSERTION_NORMAL, text2->parse_flags);
+  EXPECT_STREQ("5", text5->v.text.text);
+}
+
+TEST_F(GumboParserTest, AdoptionAgency2) {
+  // https://html.spec.whatwg.org/multipage/parsing.html#misnested-tags:-b-p-/b-/p
+  Parse("<b>12</b>3");
+  ASSERT_EQ(1, GetChildCount(root_));
+
+  GumboNode* html = GetChild(root_, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, html->type);
+  EXPECT_EQ(GUMBO_INSERTION_BY_PARSER | GUMBO_INSERTION_IMPLICIT_END_TAG |
+                GUMBO_INSERTION_IMPLIED,
+      html->parse_flags);
+  EXPECT_EQ(GUMBO_TAG_HTML, html->v.element.tag);
+  ASSERT_EQ(2, GetChildCount(html));
+
+  GumboNode* body = GetChild(html, 1);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, body->type);
+  EXPECT_EQ(GUMBO_INSERTION_BY_PARSER | GUMBO_INSERTION_IMPLICIT_END_TAG |
+                GUMBO_INSERTION_IMPLIED,
+      body->parse_flags);
+  EXPECT_EQ(GUMBO_TAG_BODY, body->v.element.tag);
+  ASSERT_EQ(2, GetChildCount(body));
+
+  GumboNode* b = GetChild(body, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, b->type);
+  EXPECT_EQ(GUMBO_INSERTION_IMPLICIT_END_TAG, b->parse_flags);
+  EXPECT_EQ(GUMBO_TAG_B, b->v.element.tag);
+  ASSERT_EQ(1, GetChildCount(b));
+
+  GumboNode* text1 = GetChild(b, 0);
+  ASSERT_EQ(GUMBO_NODE_TEXT, text1->type);
+  EXPECT_EQ(GUMBO_INSERTION_NORMAL, text1->parse_flags);
+  EXPECT_STREQ("1", text1->v.text.text);
+
+  GumboNode* p = GetChild(body, 1);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, p->type);
+  EXPECT_EQ(GUMBO_INSERTION_ADOPTION_AGENCY_MOVED, p->parse_flags);
+  EXPECT_EQ(GUMBO_TAG_P, p->v.element.tag);
+  ASSERT_EQ(2, GetChildCount(p));
+
+  GumboNode* b2 = GetChild(p, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, b2->type);
+  EXPECT_EQ(GUMBO_INSERTION_ADOPTION_AGENCY_CLONED | GUMBO_INSERTION_BY_PARSER,
+      b2->parse_flags);
+  EXPECT_EQ(GUMBO_TAG_B, b2->v.element.tag);
+  ASSERT_EQ(1, GetChildCount(b2));
+
+  GumboNode* text2 = GetChild(b2, 0);
+  ASSERT_EQ(GUMBO_NODE_TEXT, text2->type);
+  EXPECT_EQ(GUMBO_INSERTION_NORMAL, text2->parse_flags);
+  EXPECT_STREQ("2", text2->v.text.text);
+
+  GumboNode* text3 = GetChild(p, 1);
+  ASSERT_EQ(GUMBO_NODE_TEXT, text3->type);
+  EXPECT_EQ(GUMBO_INSERTION_NORMAL, text2->parse_flags);
+  EXPECT_STREQ("3", text3->v.text.text);
+}
+
+TEST_F(GumboParserTest, AdoptionAgency3) {
+  Parse("<div><a><b><u><i><code><div></a>");
+}
+
+TEST_F(GumboParserTest, ImplicitlyCloseLists) {
+  Parse(
+      "<ul>\n"
+      "  <li>First\n"
+      "  <li>Second\n"
+      "</ul>");
+
+  GumboNode* body;
+  GetAndAssertBody(root_, &body);
+  ASSERT_EQ(1, GetChildCount(body));
+
+  GumboNode* ul = GetChild(body, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, ul->type);
+  EXPECT_EQ(GUMBO_TAG_UL, GetTag(ul));
+  ASSERT_EQ(3, GetChildCount(ul));
+
+  GumboNode* text = GetChild(ul, 0);
+  ASSERT_EQ(GUMBO_NODE_WHITESPACE, text->type);
+  EXPECT_EQ(GUMBO_INSERTION_NORMAL, text->parse_flags);
+  EXPECT_STREQ("\n  ", text->v.text.text);
+
+  GumboNode* li1 = GetChild(ul, 1);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, li1->type);
+  EXPECT_EQ(GUMBO_TAG_LI, GetTag(li1));
+  ASSERT_EQ(1, GetChildCount(li1));
+
+  GumboNode* li2 = GetChild(ul, 2);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, li2->type);
+  EXPECT_EQ(GUMBO_TAG_LI, GetTag(li2));
+  ASSERT_EQ(1, GetChildCount(li2));
+}
+
+TEST_F(GumboParserTest, CData) {
+  Parse("<svg><![CDATA[this is text]]></svg>");
+
+  GumboNode* body;
+  GetAndAssertBody(root_, &body);
+  ASSERT_EQ(1, GetChildCount(body));
+
+  GumboNode* svg = GetChild(body, 0);
+  ASSERT_EQ(1, GetChildCount(svg));
+
+  GumboNode* cdata = GetChild(svg, 0);
+  ASSERT_EQ(GUMBO_NODE_CDATA, cdata->type);
+  EXPECT_STREQ("this is text", cdata->v.text.text);
+}
+
+TEST_F(GumboParserTest, CDataUnsafe) {
+  // Can't use Parse() because of the strlen
+  output_ =
+      gumbo_parse_with_options(&options_, "<svg><![CDATA[\0filler\0text\0]]>",
+          sizeof("<svg><![CDATA[\0filler\0text\0]]>") - 1);
+  root_ = output_->document;
+
+  GumboNode* body;
+  GetAndAssertBody(root_, &body);
+  ASSERT_EQ(1, GetChildCount(body));
+
+  GumboNode* svg = GetChild(body, 0);
+  ASSERT_EQ(1, GetChildCount(svg));
+
+  GumboNode* cdata = GetChild(svg, 0);
+  ASSERT_EQ(GUMBO_NODE_CDATA, cdata->type);
+  // \xEF\xBF\xBD = unicode replacement char
+  EXPECT_STREQ(
+      "\xEF\xBF\xBD"
+      "filler\xEF\xBF\xBD"
+      "text\xEF\xBF\xBD",
+      cdata->v.text.text);
+}
+
+TEST_F(GumboParserTest, CDataInBody) {
+  Parse("<div><![CDATA[this is text]]></div>");
+
+  GumboNode* body;
+  GetAndAssertBody(root_, &body);
+  ASSERT_EQ(1, GetChildCount(body));
+
+  GumboNode* div = GetChild(body, 0);
+  ASSERT_EQ(1, GetChildCount(div));
+
+  GumboNode* cdata = GetChild(div, 0);
+  ASSERT_EQ(GUMBO_NODE_COMMENT, cdata->type);
+  EXPECT_STREQ("[CDATA[this is text]]", cdata->v.text.text);
+}
+
+TEST_F(GumboParserTest, FormattingTagsInHeading) {
+  Parse("<h2>This is <b>old</h2>text");
+
+  GumboNode* body;
+  GetAndAssertBody(root_, &body);
+  ASSERT_EQ(2, GetChildCount(body));
+
+  GumboNode* h2 = GetChild(body, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, h2->type);
+  EXPECT_EQ(GUMBO_TAG_H2, GetTag(h2));
+  ASSERT_EQ(2, GetChildCount(h2));
+
+  GumboNode* text1 = GetChild(h2, 0);
+  ASSERT_EQ(GUMBO_NODE_TEXT, text1->type);
+  EXPECT_EQ(GUMBO_INSERTION_NORMAL, text1->parse_flags);
+  EXPECT_STREQ("This is ", text1->v.text.text);
+
+  GumboNode* b = GetChild(h2, 1);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, b->type);
+  EXPECT_EQ(GUMBO_TAG_B, GetTag(b));
+  EXPECT_EQ(GUMBO_INSERTION_IMPLICIT_END_TAG, b->parse_flags);
+  ASSERT_EQ(1, GetChildCount(b));
+
+  GumboNode* text2 = GetChild(b, 0);
+  ASSERT_EQ(GUMBO_NODE_TEXT, text2->type);
+  EXPECT_EQ(GUMBO_INSERTION_NORMAL, text2->parse_flags);
+  EXPECT_STREQ("old", text2->v.text.text);
+
+  GumboNode* b2 = GetChild(body, 1);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, b2->type);
+  EXPECT_EQ(GUMBO_TAG_B, GetTag(b2));
+  EXPECT_EQ(GUMBO_INSERTION_IMPLICIT_END_TAG | GUMBO_INSERTION_BY_PARSER |
+                GUMBO_INSERTION_RECONSTRUCTED_FORMATTING_ELEMENT,
+      b2->parse_flags);
+  ASSERT_EQ(1, GetChildCount(b2));
+
+  GumboNode* text3 = GetChild(b2, 0);
+  ASSERT_EQ(GUMBO_NODE_TEXT, text3->type);
+  EXPECT_EQ(GUMBO_INSERTION_NORMAL, text3->parse_flags);
+  EXPECT_STREQ("text", text3->v.text.text);
+}
+
+TEST_F(GumboParserTest, ExtraReconstruction) {
+  Parse("<span><b></span>");
+
+  GumboNode* body;
+  GetAndAssertBody(root_, &body);
+  ASSERT_EQ(2, GetChildCount(body));
+
+  EXPECT_EQ(GUMBO_TAG_SPAN, GetTag(GetChild(body, 0)));
+  EXPECT_EQ(GUMBO_TAG_P, GetTag(GetChild(body, 1)));
+}
+
+TEST_F(GumboParserTest, LinkifiedHeading) {
+  Parse("<li><h3><a href=#foo>Text</a></h3><div>Summary</div>");
+
+  GumboNode* body;
+  GetAndAssertBody(root_, &body);
+  ASSERT_EQ(1, GetChildCount(body));
+
+  GumboNode* li = GetChild(body, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, li->type);
+  EXPECT_EQ(GUMBO_TAG_LI, GetTag(li));
+  ASSERT_EQ(2, GetChildCount(li));
+
+  GumboNode* h3 = GetChild(li, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, h3->type);
+  EXPECT_EQ(GUMBO_TAG_H3, GetTag(h3));
+  ASSERT_EQ(1, GetChildCount(h3));
+
+  GumboNode* anchor = GetChild(h3, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, anchor->type);
+  EXPECT_EQ(GUMBO_TAG_A, GetTag(anchor));
+  ASSERT_EQ(1, GetChildCount(anchor));
+
+  GumboNode* div = GetChild(li, 1);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, div->type);
+  EXPECT_EQ(GUMBO_TAG_DIV, GetTag(div));
+  ASSERT_EQ(1, GetChildCount(div));
+}
+
+TEST_F(GumboParserTest, MisnestedHeading) {
+  Parse(
+      "<h1>"
+      "  <section>"
+      "    <h2>"
+      "      <dl><dt>List"
+      "    </h1>"
+      "  </section>"
+      "  Heading1"
+      "<h3>Heading3</h4>"
+      "After</h3> text");
+  // The parse of this is pretty weird: according to the spec, it should be:
+  // <html>
+  //   <head></head>
+  //   <body>
+  //     <h1>
+  //       <section>
+  //         <h2><dl><dt>List</dt></dl></h2>
+  //       </section>
+  //       Heading1
+  //     </h1>
+  //     <h3>Heading3</h3>
+  //     After text
+  //   </body>
+  // </html>
+  // Explanation:
+  // <html>, <head>, and <body> tags are implied. The opening <h1> and <section
+  // tags function as expected. Because the current node is <section>, the <h2>
+  // does *not* close the existing <h1>, and then we enter a definition list.
+  // The closing </h1>, even though it's misnested, causes the <dt> to be closed
+  // implicitly, then also closes the <dl> and <h2> as a parse error. <h1> is
+  // still open, and so "Heading1" goes into it. Because the current node is a
+  // heading tag, <h3> closes it (as a parse error) and reopens a new <h3> node,
+  // which is closed by the </h4> tag. The remaining text goes straight into
+  // the <body>; since no heading is open, the </h3> tag is ignored and the
+  // second run is condensed into the first.
+  // TODO(jdtang): Make sure that parse_flags are set appropriately for this.
+  GumboNode* body;
+  GetAndAssertBody(root_, &body);
+  ASSERT_EQ(3, GetChildCount(body));
+
+  GumboNode* h1 = GetChild(body, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, h1->type);
+  EXPECT_EQ(GUMBO_TAG_H1, GetTag(h1));
+  ASSERT_EQ(3, GetChildCount(h1));
+  // Child 1 is whitespace, as it is for many of these nodes.
+
+  GumboNode* section = GetChild(h1, 1);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, section->type);
+  EXPECT_EQ(GUMBO_TAG_SECTION, GetTag(section));
+  ASSERT_EQ(3, GetChildCount(section));
+
+  GumboNode* h2 = GetChild(section, 1);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, h2->type);
+  EXPECT_EQ(GUMBO_TAG_H2, GetTag(h2));
+  ASSERT_EQ(2, GetChildCount(h2));
+
+  GumboNode* dl = GetChild(h2, 1);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, dl->type);
+  EXPECT_EQ(GUMBO_TAG_DL, GetTag(dl));
+  ASSERT_EQ(1, GetChildCount(dl));
+
+  GumboNode* dt = GetChild(dl, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, dt->type);
+  EXPECT_EQ(GUMBO_TAG_DT, GetTag(dt));
+  ASSERT_EQ(1, GetChildCount(dt));
+
+  GumboNode* text1 = GetChild(dt, 0);
+  ASSERT_EQ(GUMBO_NODE_TEXT, text1->type);
+  EXPECT_EQ(GUMBO_INSERTION_NORMAL, text1->parse_flags);
+  EXPECT_STREQ("List    ", text1->v.text.text);
+
+  GumboNode* text2 = GetChild(h1, 2);
+  ASSERT_EQ(GUMBO_NODE_TEXT, text2->type);
+  EXPECT_EQ(GUMBO_INSERTION_NORMAL, text2->parse_flags);
+  EXPECT_STREQ("  Heading1", text2->v.text.text);
+
+  GumboNode* h3 = GetChild(body, 1);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, h3->type);
+  EXPECT_EQ(GUMBO_TAG_H3, GetTag(h3));
+  EXPECT_EQ(1, GetChildCount(h3));
+
+  GumboNode* text3 = GetChild(h3, 0);
+  ASSERT_EQ(GUMBO_NODE_TEXT, text3->type);
+  EXPECT_EQ(GUMBO_INSERTION_NORMAL, text3->parse_flags);
+  EXPECT_STREQ("Heading3", text3->v.text.text);
+
+  GumboNode* text4 = GetChild(body, 2);
+  ASSERT_EQ(GUMBO_NODE_TEXT, text4->type);
+  EXPECT_EQ(GUMBO_INSERTION_NORMAL, text4->parse_flags);
+  EXPECT_STREQ("After text", text4->v.text.text);
+}
+
+TEST_F(GumboParserTest, DoubleBody) {
+  Parse("<body class=first><body class=second id=merged>Text</body></body>");
+
+  GumboNode* body;
+  GetAndAssertBody(root_, &body);
+  ASSERT_EQ(1, GetChildCount(body));
+  ASSERT_EQ(2, GetAttributeCount(body));
+
+  GumboAttribute* clas = GetAttribute(body, 0);
+  EXPECT_STREQ("class", clas->name);
+  EXPECT_STREQ("first", clas->value);
+
+  GumboAttribute* id = GetAttribute(body, 1);
+  EXPECT_STREQ("id", id->name);
+  EXPECT_STREQ("merged", id->value);
+
+  GumboNode* text = GetChild(body, 0);
+  ASSERT_EQ(GUMBO_NODE_TEXT, text->type);
+  EXPECT_EQ(GUMBO_INSERTION_NORMAL, text->parse_flags);
+  EXPECT_STREQ("Text", text->v.text.text);
+}
+
+TEST_F(GumboParserTest, ThInMathMl) {
+  Parse("<math><th><mI><table></table><tr></table><div><tr>0");
+  GumboNode* body;
+  GetAndAssertBody(root_, &body);
+  ASSERT_EQ(1, GetChildCount(body));
+
+  GumboNode* math = GetChild(body, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, math->type);
+  EXPECT_EQ(GUMBO_TAG_MATH, math->v.element.tag);
+  EXPECT_EQ(GUMBO_NAMESPACE_MATHML, math->v.element.tag_namespace);
+  ASSERT_EQ(1, GetChildCount(math));
+
+  GumboNode* th = GetChild(math, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, th->type);
+  EXPECT_EQ(GUMBO_TAG_TH, th->v.element.tag);
+  EXPECT_EQ(GUMBO_NAMESPACE_MATHML, th->v.element.tag_namespace);
+  ASSERT_EQ(1, GetChildCount(th));
+
+  GumboNode* mi = GetChild(th, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, mi->type);
+  EXPECT_EQ(GUMBO_TAG_MI, mi->v.element.tag);
+  EXPECT_EQ(GUMBO_NAMESPACE_MATHML, mi->v.element.tag_namespace);
+  ASSERT_EQ(2, GetChildCount(mi));
+
+  GumboNode* table = GetChild(mi, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, table->type);
+  EXPECT_EQ(GUMBO_TAG_TABLE, table->v.element.tag);
+  EXPECT_EQ(GUMBO_NAMESPACE_HTML, table->v.element.tag_namespace);
+  ASSERT_EQ(0, GetChildCount(table));
+
+  GumboNode* div = GetChild(mi, 1);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, div->type);
+  EXPECT_EQ(GUMBO_TAG_DIV, div->v.element.tag);
+  EXPECT_EQ(GUMBO_NAMESPACE_HTML, div->v.element.tag_namespace);
+  ASSERT_EQ(1, GetChildCount(div));
+
+  GumboNode* text = GetChild(div, 0);
+  ASSERT_EQ(GUMBO_NODE_TEXT, text->type);
+  EXPECT_STREQ("0", text->v.text.text);
+}
+
+TEST_F(GumboParserTest, TdInMathml) {
+  Parse("<table><th><math><td></tr>");
+  GumboNode* body;
+  GetAndAssertBody(root_, &body);
+  ASSERT_EQ(1, GetChildCount(body));
+
+  GumboNode* table = GetChild(body, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, table->type);
+  EXPECT_EQ(GUMBO_TAG_TABLE, table->v.element.tag);
+  EXPECT_EQ(GUMBO_NAMESPACE_HTML, table->v.element.tag_namespace);
+  ASSERT_EQ(1, GetChildCount(table));
+
+  GumboNode* tbody = GetChild(table, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, tbody->type);
+  EXPECT_EQ(GUMBO_TAG_TBODY, tbody->v.element.tag);
+  EXPECT_EQ(GUMBO_NAMESPACE_HTML, tbody->v.element.tag_namespace);
+  ASSERT_EQ(1, GetChildCount(tbody));
+
+  GumboNode* tr = GetChild(tbody, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, tr->type);
+  EXPECT_EQ(GUMBO_TAG_TR, tr->v.element.tag);
+  EXPECT_EQ(GUMBO_NAMESPACE_HTML, tr->v.element.tag_namespace);
+  ASSERT_EQ(1, GetChildCount(tr));
+
+  GumboNode* th = GetChild(tr, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, th->type);
+  EXPECT_EQ(GUMBO_TAG_TH, th->v.element.tag);
+  EXPECT_EQ(GUMBO_NAMESPACE_HTML, th->v.element.tag_namespace);
+  ASSERT_EQ(1, GetChildCount(th));
+
+  GumboNode* math = GetChild(th, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, math->type);
+  EXPECT_EQ(GUMBO_TAG_MATH, math->v.element.tag);
+  EXPECT_EQ(GUMBO_NAMESPACE_MATHML, math->v.element.tag_namespace);
+  ASSERT_EQ(1, GetChildCount(math));
+
+  GumboNode* td = GetChild(math, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, td->type);
+  EXPECT_EQ(GUMBO_TAG_TD, td->v.element.tag);
+  EXPECT_EQ(GUMBO_NAMESPACE_MATHML, td->v.element.tag_namespace);
+  ASSERT_EQ(0, GetChildCount(td));
+}
+
+TEST_F(GumboParserTest, SelectInForeignContent) {
+  Parse("<svg><select><foreignobject><select><select><select>");
+}
+
+TEST_F(GumboParserTest, TemplateInForeignContent) {
+  Parse("<template><svg><template>");
+
+  GumboNode* body;
+  GetAndAssertBody(root_, &body);
+  EXPECT_EQ(0, GetChildCount(body));
+
+  GumboNode* html = GetChild(root_, 0);
+  ASSERT_EQ(2, GetChildCount(html));
+
+  GumboNode* head = GetChild(html, 0);
+  ASSERT_EQ(1, GetChildCount(head));
+
+  GumboNode* template_node = GetChild(head, 0);
+  ASSERT_EQ(GUMBO_NODE_TEMPLATE, template_node->type);
+  EXPECT_EQ(GUMBO_TAG_TEMPLATE, template_node->v.element.tag);
+  ASSERT_EQ(1, GetChildCount(template_node));
+
+  GumboNode* svg_node = GetChild(template_node, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, svg_node->type);
+  EXPECT_EQ(GUMBO_TAG_SVG, svg_node->v.element.tag);
+  EXPECT_EQ(GUMBO_NAMESPACE_SVG, svg_node->v.element.tag_namespace);
+  ASSERT_EQ(1, GetChildCount(svg_node));
+
+  GumboNode* svg_template = GetChild(svg_node, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, svg_template->type);
+  EXPECT_EQ(GUMBO_TAG_TEMPLATE, svg_template->v.element.tag);
+  EXPECT_EQ(GUMBO_NAMESPACE_SVG, svg_template->v.element.tag_namespace);
+  EXPECT_EQ(0, GetChildCount(svg_template));
+}
+
+TEST_F(GumboParserTest, TemplateNull) {
+  output_ = gumbo_parse_with_options(
+      &options_, "<template>\0", sizeof("<template>\0") - 1);
+  root_ = output_->document;
+
+  GumboNode* body;
+  GetAndAssertBody(root_, &body);
+  EXPECT_EQ(0, GetChildCount(body));
+
+  GumboNode* html = GetChild(root_, 0);
+  ASSERT_EQ(2, GetChildCount(html));
+
+  GumboNode* head = GetChild(html, 0);
+  ASSERT_EQ(1, GetChildCount(head));
+
+  GumboNode* template_node = GetChild(head, 0);
+  ASSERT_EQ(GUMBO_NODE_TEMPLATE, template_node->type);
+  EXPECT_EQ(GUMBO_TAG_TEMPLATE, template_node->v.element.tag);
+  ASSERT_EQ(0, GetChildCount(template_node));
+}
+
+TEST_F(GumboParserTest, FragmentWithNamespace) {
+  ParseFragment("<div></div>", GUMBO_TAG_TITLE, GUMBO_NAMESPACE_SVG);
+
+  EXPECT_EQ(1, GetChildCount(root_));
+  GumboNode* html = GetChild(root_, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, html->type);
+  EXPECT_EQ(GUMBO_TAG_HTML, html->v.element.tag);
+  EXPECT_EQ(1, GetChildCount(html));
+
+  GumboNode* div = GetChild(html, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, div->type);
+  EXPECT_EQ(GUMBO_TAG_DIV, div->v.element.tag);
+  EXPECT_EQ(0, GetChildCount(div));
+}
+
+TEST_F(GumboParserTest, FragmentWithTwoNodes) {
+  ParseFragment("<h1>Hi</h1>", GUMBO_TAG_BODY, GUMBO_NAMESPACE_HTML);
+
+  EXPECT_EQ(1, GetChildCount(root_));
+
+  GumboNode* html = GetChild(root_, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, html->type);
+  EXPECT_EQ(GUMBO_TAG_HTML, html->v.element.tag);
+  EXPECT_EQ(2, GetChildCount(html));
+
+  GumboNode* h1 = GetChild(html, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, h1->type);
+  EXPECT_EQ(GUMBO_TAG_H1, h1->v.element.tag);
+  EXPECT_EQ(1, GetChildCount(h1));
+
+  GumboNode* br = GetChild(html, 1);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, br->type);
+  EXPECT_EQ(GUMBO_TAG_BR, br->v.element.tag);
+  EXPECT_EQ(0, GetChildCount(br));
+}
+
+TEST_F(GumboParserTest, CrazyName) {
+  Parse("<body><WhatAcrazyNAME></WhatAcrazyNAME>");
+
+  GumboNode* body;
+  GetAndAssertBody(root_, &body);
+  ASSERT_EQ(1, GetChildCount(body));
+
+  GumboNode* node = GetChild(body, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, node->type);
+  ASSERT_EQ(std::string("whatacrazyname"), node->v.element.name);
+}
+
+TEST_F(GumboParserTest, SVGForeignObjectName) {
+  Parse("<body><SVG><FOREIGNOBJECT></FOREIGNOBJECT></SVG>");
+
+  GumboNode* body;
+  GetAndAssertBody(root_, &body);
+  ASSERT_EQ(1, GetChildCount(body));
+
+  GumboNode* svg = GetChild(body, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, svg->type);
+  ASSERT_EQ(GUMBO_TAG_SVG, svg->v.element.tag);
+  ASSERT_EQ(std::string("svg"), svg->v.element.name);
+
+  GumboNode* node = GetChild(svg, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, node->type);
+  ASSERT_EQ(GUMBO_TAG_FOREIGNOBJECT, node->v.element.tag);
+  ASSERT_EQ(std::string("foreignObject"), node->v.element.name);
+}
+
+TEST_F(GumboParserTest, NonSVGForeignObjectName) {
+  Parse("<body><MATH><FOREIGNOBJECT></FOREIGNOBJECT></math>");
+
+  GumboNode* body;
+  GetAndAssertBody(root_, &body);
+  ASSERT_EQ(1, GetChildCount(body));
+
+  GumboNode* math = GetChild(body, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, math->type);
+  ASSERT_EQ(GUMBO_TAG_MATH, math->v.element.tag);
+  ASSERT_EQ(std::string("math"), math->v.element.name);
+
+  GumboNode* node = GetChild(math, 0);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, node->type);
+  ASSERT_EQ(GUMBO_TAG_FOREIGNOBJECT, node->v.element.tag);
+  // Note the lowercase o compared to above
+  ASSERT_EQ(std::string("foreignobject"), node->v.element.name);
+}
+
+// TEST_F(GumboParserTest, UnknownElementMismatch) {
+//   // XXX: This should be a parse error, possibly two.
+//   Parse("<!DOCTYPE html><body><foo></bar>");
+//   EXPECT_TRUE(output_->errors.length != 0);
+// }
+
+}  // namespace
diff --git a/gumbo-parser/test/string_buffer.cc b/gumbo-parser/test/string_buffer.cc
new file mode 100644
index 00000000..01102985
--- /dev/null
+++ b/gumbo-parser/test/string_buffer.cc
@@ -0,0 +1,101 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: jdtang@google.com (Jonathan Tang)
+
+#include "string_buffer.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "gtest/gtest.h"
+#include "test_utils.h"
+#include "util.h"
+
+namespace {
+
+#define INIT_GUMBO_STRING(varname, literal) \
+  GumboStringPiece varname = {literal, sizeof(literal) - 1}
+
+class GumboStringBufferTest : public GumboTest {
+ protected:
+  GumboStringBufferTest() { gumbo_string_buffer_init(&buffer_); }
+
+  ~GumboStringBufferTest() { gumbo_string_buffer_destroy(&buffer_); }
+
+  void NullTerminateBuffer() { buffer_.data[buffer_.length++] = 0; }
+
+  GumboStringBuffer buffer_;
+};
+
+TEST_F(GumboStringBufferTest, Reserve) {
+  gumbo_string_buffer_reserve(21, &buffer_);
+  EXPECT_EQ(40, buffer_.capacity);
+  strcpy(buffer_.data, "01234567890123456789");
+  buffer_.length = 20;
+  NullTerminateBuffer();
+  EXPECT_EQ(21, buffer_.length);
+  EXPECT_STREQ("01234567890123456789", buffer_.data);
+}
+
+TEST_F(GumboStringBufferTest, AppendString) {
+  INIT_GUMBO_STRING(str, "01234567");
+  gumbo_string_buffer_append_string(&str, &buffer_);
+  NullTerminateBuffer();
+  EXPECT_STREQ("01234567", buffer_.data);
+}
+
+TEST_F(GumboStringBufferTest, AppendStringWithResize) {
+  INIT_GUMBO_STRING(str, "01234567");
+  gumbo_string_buffer_append_string(&str, &buffer_);
+  gumbo_string_buffer_append_string(&str, &buffer_);
+  NullTerminateBuffer();
+  EXPECT_STREQ("0123456701234567", buffer_.data);
+}
+
+TEST_F(GumboStringBufferTest, AppendCodepoint_1Byte) {
+  gumbo_string_buffer_append_codepoint('a', &buffer_);
+  NullTerminateBuffer();
+  EXPECT_STREQ("a", buffer_.data);
+}
+
+TEST_F(GumboStringBufferTest, AppendCodepoint_2Bytes) {
+  gumbo_string_buffer_append_codepoint(0xE5, &buffer_);
+  NullTerminateBuffer();
+  EXPECT_STREQ("\xC3\xA5", buffer_.data);
+}
+
+TEST_F(GumboStringBufferTest, AppendCodepoint_3Bytes) {
+  gumbo_string_buffer_append_codepoint(0x39E7, &buffer_);
+  NullTerminateBuffer();
+  EXPECT_STREQ("\xE3\xA7\xA7", buffer_.data);
+}
+
+TEST_F(GumboStringBufferTest, AppendCodepoint_4Bytes) {
+  gumbo_string_buffer_append_codepoint(0x679E7, &buffer_);
+  NullTerminateBuffer();
+  EXPECT_STREQ("\xF1\xA7\xA7\xA7", buffer_.data);
+}
+
+TEST_F(GumboStringBufferTest, ToString) {
+  gumbo_string_buffer_reserve(8, &buffer_);
+  strcpy(buffer_.data, "012345");
+  buffer_.length = 7;
+
+  char* dest = gumbo_string_buffer_to_string(&buffer_);
+  EXPECT_STREQ("012345", dest);
+  gumbo_free(dest);
+}
+
+}  // namespace
diff --git a/gumbo-parser/test/string_piece.cc b/gumbo-parser/test/string_piece.cc
new file mode 100644
index 00000000..db2d5f3c
--- /dev/null
+++ b/gumbo-parser/test/string_piece.cc
@@ -0,0 +1,67 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: jdtang@google.com (Jonathan Tang)
+
+#include "test_utils.h"
+
+namespace {
+
+typedef GumboTest GumboStringPieceTest;
+
+#define STRING(s) {"" s, sizeof(s) - 1}
+
+TEST_F(GumboStringPieceTest, Equal) {
+  const GumboStringPiece str1 = STRING("foo");
+  const GumboStringPiece str2 = STRING("foo");
+  EXPECT_TRUE(gumbo_string_equals(&str1, &str2));
+}
+
+TEST_F(GumboStringPieceTest, NotEqual_DifferingCase) {
+  const GumboStringPiece str1 = STRING("foo");
+  const GumboStringPiece str2 = STRING("Foo");
+  EXPECT_FALSE(gumbo_string_equals(&str1, &str2));
+}
+
+TEST_F(GumboStringPieceTest, NotEqual_Str1Shorter) {
+  const GumboStringPiece str1 = STRING("foo");
+  const GumboStringPiece str2 = STRING("foobar");
+  EXPECT_FALSE(gumbo_string_equals(&str1, &str2));
+}
+
+TEST_F(GumboStringPieceTest, NotEqual_Str2Shorter) {
+  const GumboStringPiece str1 = STRING("foobar");
+  const GumboStringPiece str2 = STRING("foo");
+  EXPECT_FALSE(gumbo_string_equals(&str1, &str2));
+}
+
+TEST_F(GumboStringPieceTest, NotEqual_DifferentText) {
+  const GumboStringPiece str1 = STRING("bar");
+  const GumboStringPiece str2 = STRING("foo");
+  EXPECT_FALSE(gumbo_string_equals(&str1, &str2));
+}
+
+TEST_F(GumboStringPieceTest, CaseEqual) {
+  const GumboStringPiece str1 = STRING("foo");
+  const GumboStringPiece str2 = STRING("fOO");
+  EXPECT_TRUE(gumbo_string_equals_ignore_case(&str1, &str2));
+}
+
+TEST_F(GumboStringPieceTest, CaseNotEqual_Str2Shorter) {
+  const GumboStringPiece str1 = STRING("foobar");
+  const GumboStringPiece str2 = STRING("foo");
+  EXPECT_FALSE(gumbo_string_equals_ignore_case(&str1, &str2));
+}
+
+}  // namespace
diff --git a/gumbo-parser/test/test_utils.cc b/gumbo-parser/test/test_utils.cc
new file mode 100644
index 00000000..b39bac7c
--- /dev/null
+++ b/gumbo-parser/test/test_utils.cc
@@ -0,0 +1,166 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: jdtang@google.com (Jonathan Tang)
+
+#include "test_utils.h"
+
+#include "error.h"
+#include "util.h"
+
+int GetChildCount(GumboNode* node) {
+  if (node->type == GUMBO_NODE_DOCUMENT) {
+    return node->v.document.children.length;
+  } else {
+    return node->v.element.children.length;
+  }
+}
+
+GumboTag GetTag(GumboNode* node) { return node->v.element.tag; }
+
+GumboNode* GetChild(GumboNode* parent, int index) {
+  if (parent->type == GUMBO_NODE_DOCUMENT) {
+    return static_cast<GumboNode*>(parent->v.document.children.data[index]);
+  } else {
+    return static_cast<GumboNode*>(parent->v.element.children.data[index]);
+  }
+}
+
+int GetAttributeCount(GumboNode* node) {
+  return node->v.element.attributes.length;
+}
+
+GumboAttribute* GetAttribute(GumboNode* node, int index) {
+  return static_cast<GumboAttribute*>(node->v.element.attributes.data[index]);
+}
+
+// Convenience function to do some basic assertions on the structure of the
+// document (nodes are elements, nodes have the right tags) and then return
+// the body node.
+void GetAndAssertBody(GumboNode* root, GumboNode** body) {
+  GumboNode* html = NULL;
+  for (int i = 0; i < GetChildCount(root); ++i) {
+    GumboNode* child = GetChild(root, i);
+    if (child->type != GUMBO_NODE_ELEMENT) {
+      ASSERT_EQ(GUMBO_NODE_COMMENT, child->type);
+      continue;
+    }
+    ASSERT_TRUE(html == NULL);
+    html = child;
+  }
+  ASSERT_TRUE(html != NULL);
+  ASSERT_EQ(GUMBO_NODE_ELEMENT, html->type);
+  EXPECT_EQ(GUMBO_TAG_HTML, GetTag(html));
+
+  // There may be comment/whitespace nodes; this walks through the children of
+  // <html> and assigns head/body based on them, or assert-fails if there are
+  // fewer/more than 2 such nodes.
+  GumboNode* head = NULL;
+  *body = NULL;
+  for (int i = 0; i < GetChildCount(html); ++i) {
+    GumboNode* child = GetChild(html, i);
+    if (child->type != GUMBO_NODE_ELEMENT) {
+      continue;
+    }
+
+    if (!head) {
+      head = child;
+      EXPECT_EQ(GUMBO_TAG_HEAD, GetTag(head));
+    } else if (!(*body)) {
+      *body = child;
+      EXPECT_EQ(GUMBO_TAG_BODY, GetTag(*body));
+    } else {
+      ASSERT_TRUE("More than two elements found inside <html>" != NULL);
+    }
+  }
+  EXPECT_TRUE(head != NULL);
+  ASSERT_TRUE(*body != NULL);
+}
+
+void SanityCheckPointers(
+    const char* input, size_t input_length, const GumboNode* node, int depth) {
+  ASSERT_GE(input_length, (size_t) 0);
+  ASSERT_TRUE(node != NULL);
+  // There are some truly pathological HTML documents out there - the
+  // integration tests for this include one where the DOM "tree" is actually a
+  // linked list 27,500 nodes deep - and so we need a limit on the recursion
+  // depth here to avoid blowing the stack. Alternatively, we could externalize
+  // the stack and use an iterative algorithm, but that gets us very little for
+  // the additional programming complexity.
+  if (node->type == GUMBO_NODE_DOCUMENT || depth > 400) {
+    // Don't sanity-check the document as well...we start with the root.
+    return;
+  }
+  if (node->type == GUMBO_NODE_ELEMENT) {
+    const GumboElement* element = &node->v.element;
+    // Sanity checks on original* pointers, making sure they fall within the
+    // original input.
+    if (element->original_tag.data && element->original_tag.length) {
+      EXPECT_GE(element->original_tag.data, input);
+      EXPECT_LT(element->original_tag.data, input + input_length);
+      EXPECT_LE(element->original_tag.length, input_length);
+    }
+    if (element->original_end_tag.data && element->original_tag.length) {
+      EXPECT_GE(element->original_end_tag.data, input);
+      EXPECT_LT(element->original_end_tag.data, input + input_length);
+      EXPECT_LE(element->original_end_tag.length, input_length);
+    }
+    EXPECT_GE(element->start_pos.offset, 0);
+    EXPECT_LE(element->start_pos.offset, input_length);
+    EXPECT_GE(element->end_pos.offset, 0);
+    EXPECT_LE(element->end_pos.offset, input_length);
+
+    const GumboVector* children = &element->children;
+    for (unsigned int i = 0; i < children->length; ++i) {
+      const GumboNode* child = static_cast<const GumboNode*>(children->data[i]);
+      // Checks on parent/child links.
+      ASSERT_TRUE(child != NULL);
+      EXPECT_EQ(node, child->parent);
+      EXPECT_EQ(i, child->index_within_parent);
+      SanityCheckPointers(input, input_length, child, depth + 1);
+    }
+  } else {
+    const GumboText* text = &node->v.text;
+    EXPECT_GE(text->original_text.data, input);
+    EXPECT_LT(text->original_text.data, input + input_length);
+    EXPECT_LE(text->original_text.length, input_length);
+    EXPECT_GE(text->start_pos.offset, 0);
+    EXPECT_LT(text->start_pos.offset, input_length);
+  }
+}
+
+GumboTest::GumboTest()
+    : options_(kGumboDefaultOptions), errors_are_expected_(false), text_("") {
+  options_.max_errors = 100;
+  parser_._options = &options_;
+  parser_._output = static_cast<GumboOutput*>(gumbo_alloc(sizeof(GumboOutput)));
+  gumbo_init_errors(&parser_);
+}
+
+GumboTest::~GumboTest() {
+  if (!errors_are_expected_) {
+    // TODO(jdtang): A googlemock matcher may be a more appropriate solution for
+    // this; we only want to pretty-print errors that are not an expected
+    // output of the test.
+    for (unsigned int i = 0; i < parser_._output->errors.length && i < 1; ++i) {
+      gumbo_print_caret_diagnostic (
+        static_cast<GumboError*>(parser_._output->errors.data[i]),
+        text_,
+	strlen(text_)
+      );
+    }
+  }
+  gumbo_destroy_errors(&parser_);
+  gumbo_free(parser_._output);
+}
diff --git a/gumbo-parser/test/test_utils.h b/gumbo-parser/test/test_utils.h
new file mode 100644
index 00000000..620ba99c
--- /dev/null
+++ b/gumbo-parser/test/test_utils.h
@@ -0,0 +1,65 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: jdtang@google.com (Jonathan Tang)
+
+#ifndef GUMBO_TEST_UTILS_H_
+#define GUMBO_TEST_UTILS_H_
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <string>
+
+#include "gtest/gtest.h"
+#include "gumbo.h"
+#include "parser.h"
+
+inline std::string ToString(const GumboStringPiece& str) {
+  return std::string(str.data, str.length);
+}
+
+int GetChildCount(GumboNode* node);
+GumboTag GetTag(GumboNode* node);
+GumboNode* GetChild(GumboNode* parent, int index);
+int GetAttributeCount(GumboNode* node);
+GumboAttribute* GetAttribute(GumboNode* node, int index);
+
+// Convenience function to do some basic assertions on the structure of the
+// document (nodes are elements, nodes have the right tags) and then return
+// the body node.
+void GetAndAssertBody(GumboNode* root, GumboNode** body);
+
+void SanityCheckPointers (
+  const char* input,
+  size_t input_length,
+  const GumboNode* node,
+  int depth
+);
+
+// Base class for Gumbo tests. This provides an GumboParser object that's
+// been initialized to sane values, as normally happens in the beginning of
+// gumbo_parse, and then a destructor that cleans up after it.
+class GumboTest : public ::testing::Test {
+ protected:
+  GumboTest();
+  virtual ~GumboTest();
+
+  GumboOptions options_;
+  GumboParser parser_;
+  bool errors_are_expected_;
+  const char* text_;
+};
+
+#endif  // GUMBO_TEST_UTILS_H_
diff --git a/gumbo-parser/test/tokenizer.cc b/gumbo-parser/test/tokenizer.cc
new file mode 100644
index 00000000..4760da83
--- /dev/null
+++ b/gumbo-parser/test/tokenizer.cc
@@ -0,0 +1,727 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: jdtang@google.com (Jonathan Tang)
+
+#include "tokenizer.h"
+
+#include <stdio.h>
+
+#include "gtest/gtest.h"
+#include "test_utils.h"
+
+extern const char* kGumboTagNames[];
+
+namespace {
+
+// Tests for tokenizer.c
+class GumboTokenizerTest : public GumboTest {
+ protected:
+  GumboTokenizerTest() { gumbo_tokenizer_state_init(&parser_, "", 0); }
+
+  virtual ~GumboTokenizerTest() {
+    gumbo_tokenizer_state_destroy(&parser_);
+    gumbo_token_destroy(&token_);
+  }
+
+  void SetInput(const char* input) {
+    text_ = input;
+    gumbo_tokenizer_state_destroy(&parser_);
+    gumbo_tokenizer_state_init(&parser_, input, strlen(input));
+  }
+
+  void Advance(int num_tokens) {
+    for (int i = 0; i < num_tokens; ++i) {
+      EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+      gumbo_token_destroy(&token_);
+    }
+  }
+
+  GumboToken token_;
+};
+
+TEST(GumboTagEnumTest, TagEnumIncludesAllTags) {
+  EXPECT_EQ(0, GUMBO_TAG_HTML);
+  for (unsigned int i = 0; i < (unsigned int) GUMBO_TAG_UNKNOWN; i++) {
+    const char* tagname = gumbo_normalized_tagname((GumboTag)i);
+    EXPECT_FALSE(tagname == NULL);
+    EXPECT_FALSE(tagname[0] == '\0');
+    EXPECT_TRUE(strlen(tagname) < 15);
+  }
+  EXPECT_STREQ("", gumbo_normalized_tagname(GUMBO_TAG_UNKNOWN));
+  EXPECT_STREQ("html", gumbo_normalized_tagname(GUMBO_TAG_HTML));
+  EXPECT_STREQ("a", gumbo_normalized_tagname(GUMBO_TAG_A));
+  EXPECT_STREQ("dialog", gumbo_normalized_tagname(GUMBO_TAG_DIALOG));
+  EXPECT_STREQ("template", gumbo_normalized_tagname(GUMBO_TAG_TEMPLATE));
+}
+
+TEST(GumboTagEnumTest, TagLookupCaseSensitivity) {
+  EXPECT_EQ(GUMBO_TAG_HTML, gumbo_tagn_enum("HTML", 4));
+  EXPECT_EQ(GUMBO_TAG_BODY, gumbo_tagn_enum("boDy", 4));
+  EXPECT_EQ(GUMBO_TAG_A, gumbo_tagn_enum("A", 1));
+  EXPECT_EQ(GUMBO_TAG_A, gumbo_tagn_enum("a", 1));
+  EXPECT_EQ(GUMBO_TAG_TEMPLATE, gumbo_tagn_enum("Template", 8));
+  EXPECT_EQ(GUMBO_TAG_DIALOG, gumbo_tagn_enum("diAloG", 6));
+  EXPECT_EQ(GUMBO_TAG_ANNOTATION_XML, gumbo_tagn_enum("annotation-xml", 14));
+  EXPECT_EQ(GUMBO_TAG_ANNOTATION_XML, gumbo_tagn_enum("ANNOTATION-XML", 14));
+  EXPECT_EQ(GUMBO_TAG_UNKNOWN, gumbo_tagn_enum("ANNOTATION-XML-", 15));
+  EXPECT_EQ(GUMBO_TAG_UNKNOWN, gumbo_tagn_enum("ANNOTATION-XM", 13));
+  EXPECT_EQ(GUMBO_TAG_UNKNOWN, gumbo_tagn_enum("", 0));
+  EXPECT_EQ(GUMBO_TAG_B, gumbo_tagn_enum("b", 1));
+  EXPECT_EQ(GUMBO_TAG_I, gumbo_tagn_enum("i", 1));
+  EXPECT_EQ(GUMBO_TAG_U, gumbo_tagn_enum("u", 1));
+  EXPECT_EQ(GUMBO_TAG_UNKNOWN, gumbo_tagn_enum("x", 1));
+  EXPECT_EQ(GUMBO_TAG_UNKNOWN, gumbo_tagn_enum("c", 1));
+}
+
+TEST_F(GumboTokenizerTest, PartialTag) {
+  SetInput("<a");
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_EOF, token_.type);
+}
+
+TEST_F(GumboTokenizerTest, PartialTagWithAttributes) {
+  SetInput("<a href=foo /");
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_EOF, token_.type);
+}
+
+TEST_F(GumboTokenizerTest, LexCharToken) {
+  SetInput("a");
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type);
+  EXPECT_EQ(1, token_.position.column);
+  EXPECT_EQ(1, token_.position.line);
+  EXPECT_EQ(0, token_.position.offset);
+  EXPECT_EQ('a', *token_.original_text.data);
+  EXPECT_EQ(1, token_.original_text.length);
+  EXPECT_EQ('a', token_.v.character);
+
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_EOF, token_.type);
+  EXPECT_EQ(1, token_.position.offset);
+}
+
+TEST_F(GumboTokenizerTest, LexCharRef) {
+  SetInput("&nbsp; Text");
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type);
+  EXPECT_EQ(1, token_.position.column);
+  EXPECT_EQ(1, token_.position.line);
+  EXPECT_EQ(0, token_.position.offset);
+  EXPECT_EQ('&', *token_.original_text.data);
+  EXPECT_EQ(6, token_.original_text.length);
+  EXPECT_EQ(0xA0, token_.v.character);
+
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_WHITESPACE, token_.type);
+  EXPECT_EQ(' ', *token_.original_text.data);
+  EXPECT_EQ(' ', token_.v.character);
+}
+
+TEST_F(GumboTokenizerTest, LexCharRef_NotCharRef) {
+  SetInput("&xyz");
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type);
+  EXPECT_EQ(0, token_.position.offset);
+  EXPECT_EQ('&', token_.v.character);
+
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type);
+  EXPECT_EQ(1, token_.position.offset);
+  EXPECT_EQ('x', token_.v.character);
+}
+
+TEST_F(GumboTokenizerTest, LeadingWhitespace) {
+  SetInput(
+      "<div>\n"
+      "  <span class=foo>");
+  Advance(4);
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));  // <span>
+
+  GumboTokenStartTag* start_tag = &token_.v.start_tag;
+  EXPECT_EQ(GUMBO_TAG_SPAN, start_tag->tag);
+  EXPECT_EQ(2, token_.position.line);
+  EXPECT_EQ(3, token_.position.column);
+  ASSERT_EQ(1, start_tag->attributes.length);
+
+  GumboAttribute* clas =
+      static_cast<GumboAttribute*>(start_tag->attributes.data[0]);
+  EXPECT_STREQ("class", clas->name);
+  EXPECT_EQ("class", ToString(clas->original_name));
+  EXPECT_EQ(2, clas->name_start.line);
+  EXPECT_EQ(9, clas->name_start.column);
+  EXPECT_EQ(14, clas->name_end.column);
+  EXPECT_STREQ("foo", clas->value);
+  EXPECT_EQ("foo", ToString(clas->original_value));
+  EXPECT_EQ(15, clas->value_start.column);
+  EXPECT_EQ(18, clas->value_end.column);
+}
+
+TEST_F(GumboTokenizerTest, Doctype) {
+  SetInput("<!doctype html>");
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  ASSERT_EQ(GUMBO_TOKEN_DOCTYPE, token_.type);
+  EXPECT_EQ(0, token_.position.offset);
+
+  GumboTokenDocType* doc_type = &token_.v.doc_type;
+  EXPECT_FALSE(doc_type->force_quirks);
+  EXPECT_FALSE(doc_type->has_public_identifier);
+  EXPECT_FALSE(doc_type->has_system_identifier);
+  EXPECT_STREQ("html", doc_type->name);
+  EXPECT_STREQ("", doc_type->public_identifier);
+  EXPECT_STREQ("", doc_type->system_identifier);
+}
+
+TEST_F(GumboTokenizerTest, DoctypePublic) {
+  SetInput(
+      "<!DOCTYPE html PUBLIC "
+      "\"-//W3C//DTD XHTML 1.0 Transitional//EN\" "
+      "'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'>");
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  ASSERT_EQ(GUMBO_TOKEN_DOCTYPE, token_.type);
+  EXPECT_EQ(0, token_.position.offset);
+
+  GumboTokenDocType* doc_type = &token_.v.doc_type;
+  EXPECT_FALSE(doc_type->force_quirks);
+  EXPECT_TRUE(doc_type->has_public_identifier);
+  EXPECT_TRUE(doc_type->has_system_identifier);
+  EXPECT_STREQ("html", doc_type->name);
+  EXPECT_STREQ(
+      "-//W3C//DTD XHTML 1.0 Transitional//EN", doc_type->public_identifier);
+  EXPECT_STREQ("http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd",
+      doc_type->system_identifier);
+}
+
+TEST_F(GumboTokenizerTest, DoctypeSystem) {
+  SetInput("<!DOCtype root_element SYSTEM \"DTD_location\">");
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  ASSERT_EQ(GUMBO_TOKEN_DOCTYPE, token_.type);
+  EXPECT_EQ(0, token_.position.offset);
+
+  GumboTokenDocType* doc_type = &token_.v.doc_type;
+  EXPECT_FALSE(doc_type->force_quirks);
+  EXPECT_FALSE(doc_type->has_public_identifier);
+  EXPECT_TRUE(doc_type->has_system_identifier);
+  EXPECT_STREQ("root_element", doc_type->name);
+  EXPECT_STREQ("DTD_location", doc_type->system_identifier);
+}
+
+TEST_F(GumboTokenizerTest, DoctypeUnterminated) {
+  SetInput("<!DOCTYPE a PUBLIC''");
+  EXPECT_FALSE(gumbo_lex(&parser_, &token_));
+  ASSERT_EQ(GUMBO_TOKEN_DOCTYPE, token_.type);
+  EXPECT_EQ(0, token_.position.offset);
+
+  GumboTokenDocType* doc_type = &token_.v.doc_type;
+  EXPECT_TRUE(doc_type->force_quirks);
+  EXPECT_TRUE(doc_type->has_public_identifier);
+  EXPECT_FALSE(doc_type->has_system_identifier);
+  EXPECT_STREQ("a", doc_type->name);
+  EXPECT_STREQ("", doc_type->system_identifier);
+}
+
+TEST_F(GumboTokenizerTest, RawtextEnd) {
+  SetInput("<title>x ignores <tag></title>");
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_START_TAG, token_.type);
+  EXPECT_EQ(GUMBO_TAG_TITLE, token_.v.start_tag.tag);
+
+  gumbo_tokenizer_set_state(&parser_, GUMBO_LEX_RAWTEXT);
+  gumbo_token_destroy(&token_);
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type);
+  EXPECT_EQ('x', token_.v.character);
+  gumbo_token_destroy(&token_);
+
+  Advance(9);
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type);
+  EXPECT_EQ('<', token_.v.character);
+  gumbo_token_destroy(&token_);
+
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type);
+  EXPECT_EQ('t', token_.v.character);
+  gumbo_token_destroy(&token_);
+
+  Advance(3);
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_END_TAG, token_.type);
+  EXPECT_EQ(GUMBO_TAG_TITLE, token_.v.end_tag.tag);
+}
+
+TEST_F(GumboTokenizerTest, RCDataEnd) {
+  SetInput("<title>x</title>");
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_START_TAG, token_.type);
+  EXPECT_EQ(GUMBO_TAG_TITLE, token_.v.start_tag.tag);
+
+  gumbo_tokenizer_set_state(&parser_, GUMBO_LEX_RCDATA);
+  gumbo_token_destroy(&token_);
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type);
+  EXPECT_EQ('x', token_.v.character);
+
+  gumbo_token_destroy(&token_);
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_END_TAG, token_.type);
+  EXPECT_EQ(GUMBO_TAG_TITLE, token_.v.end_tag.tag);
+}
+
+TEST_F(GumboTokenizerTest, ScriptEnd) {
+  SetInput("<script>x = '\"></';</script>");
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_START_TAG, token_.type);
+  EXPECT_EQ(GUMBO_TAG_SCRIPT, token_.v.start_tag.tag);
+
+  gumbo_tokenizer_set_state(&parser_, GUMBO_LEX_SCRIPT);
+  gumbo_token_destroy(&token_);
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type);
+  EXPECT_EQ('x', token_.v.character);
+
+  gumbo_token_destroy(&token_);
+  Advance(6);
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type);
+  EXPECT_EQ('<', token_.v.character);
+
+  gumbo_token_destroy(&token_);
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type);
+  EXPECT_EQ('/', token_.v.character);
+
+  gumbo_token_destroy(&token_);
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type);
+  EXPECT_EQ('\'', token_.v.character);
+
+  gumbo_token_destroy(&token_);
+  Advance(1);
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_END_TAG, token_.type);
+  EXPECT_EQ(GUMBO_TAG_SCRIPT, token_.v.end_tag.tag);
+}
+
+TEST_F(GumboTokenizerTest, ScriptEscapedEnd) {
+  SetInput("<title>x</title>");
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_START_TAG, token_.type);
+  EXPECT_EQ(GUMBO_TAG_TITLE, token_.v.start_tag.tag);
+
+  gumbo_tokenizer_set_state(&parser_, GUMBO_LEX_SCRIPT_ESCAPED);
+  gumbo_token_destroy(&token_);
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type);
+  EXPECT_EQ('x', token_.v.character);
+
+  gumbo_token_destroy(&token_);
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_END_TAG, token_.type);
+  EXPECT_EQ(GUMBO_TAG_TITLE, token_.v.end_tag.tag);
+}
+
+TEST_F(GumboTokenizerTest, ScriptCommentEscaped) {
+  SetInput(
+      "<script><!-- var foo = x < 7 + '</div>-- <A href=\"foo\"></a>';\n"
+      "-->\n"
+      "</script>");
+  Advance(1);
+  gumbo_tokenizer_set_state(&parser_, GUMBO_LEX_SCRIPT);
+  Advance(15);
+
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type);
+  EXPECT_EQ('x', token_.v.character);
+
+  gumbo_token_destroy(&token_);
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_WHITESPACE, token_.type);
+  EXPECT_EQ(' ', token_.v.character);
+
+  gumbo_token_destroy(&token_);
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type);
+  EXPECT_EQ('<', token_.v.character);
+
+  gumbo_token_destroy(&token_);
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_WHITESPACE, token_.type);
+  EXPECT_EQ(' ', token_.v.character);
+
+  gumbo_token_destroy(&token_);
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type);
+  EXPECT_EQ('7', token_.v.character);
+
+  gumbo_token_destroy(&token_);
+  Advance(4);
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type);
+  EXPECT_EQ('<', token_.v.character);
+
+  gumbo_token_destroy(&token_);
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type);
+  EXPECT_EQ('/', token_.v.character);
+
+  gumbo_token_destroy(&token_);
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type);
+  EXPECT_EQ('d', token_.v.character);
+  gumbo_token_destroy(&token_);
+  Advance(25);
+}
+
+TEST_F(GumboTokenizerTest, ScriptEscapedEmbeddedLessThan) {
+  SetInput("<script>/*<![CDATA[*/ x<7 /*]]>*/</script>");
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_START_TAG, token_.type);
+  EXPECT_EQ(GUMBO_TAG_SCRIPT, token_.v.start_tag.tag);
+
+  gumbo_tokenizer_set_state(&parser_, GUMBO_LEX_SCRIPT);
+  gumbo_token_destroy(&token_);
+  Advance(14);
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type);
+  EXPECT_EQ('x', token_.v.character);
+
+  gumbo_token_destroy(&token_);
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type);
+  EXPECT_EQ('<', token_.v.character);
+
+  gumbo_token_destroy(&token_);
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type);
+  EXPECT_EQ('7', token_.v.character);
+
+  gumbo_token_destroy(&token_);
+  Advance(8);
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_END_TAG, token_.type);
+  EXPECT_EQ(GUMBO_TAG_SCRIPT, token_.v.end_tag.tag);
+}
+
+TEST_F(GumboTokenizerTest, ScriptHasTagEmbedded) {
+  SetInput("<script>var foo = '</div>';</script>");
+  Advance(1);
+  gumbo_tokenizer_set_state(&parser_, GUMBO_LEX_SCRIPT);
+  Advance(11);
+
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type);
+  EXPECT_EQ('<', token_.v.character);
+
+  gumbo_token_destroy(&token_);
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type);
+  EXPECT_EQ('/', token_.v.character);
+
+  gumbo_token_destroy(&token_);
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type);
+  EXPECT_EQ('d', token_.v.character);
+
+  gumbo_token_destroy(&token_);
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type);
+  EXPECT_EQ('i', token_.v.character);
+}
+
+TEST_F(GumboTokenizerTest, ScriptDoubleEscaped) {
+  SetInput(
+      "<script><!--var foo = '<a href=\"foo\"></a>\n"
+      "<sCrIpt>i--<f</script>'-->;</script>");
+  Advance(1);
+  gumbo_tokenizer_set_state(&parser_, GUMBO_LEX_SCRIPT);
+  Advance(34);
+
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type);
+  EXPECT_EQ('<', token_.v.character);
+
+  gumbo_token_destroy(&token_);
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type);
+  EXPECT_EQ('s', token_.v.character);
+
+  gumbo_token_destroy(&token_);
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type);
+  EXPECT_EQ('C', token_.v.character);
+
+  gumbo_token_destroy(&token_);
+  Advance(20);
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type);
+  EXPECT_EQ('-', token_.v.character);
+
+  gumbo_token_destroy(&token_);
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type);
+  EXPECT_EQ('-', token_.v.character);
+
+  gumbo_token_destroy(&token_);
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type);
+  EXPECT_EQ('>', token_.v.character);
+}
+
+TEST_F(GumboTokenizerTest, CData) {
+  // SetInput uses strlen and so can't handle nulls.
+  text_ = "<![CDATA[\0filler\0text\0]]>";
+  gumbo_tokenizer_state_destroy(&parser_);
+  gumbo_tokenizer_state_init(
+      &parser_, text_, sizeof("<![CDATA[\0filler\0text\0]]>") - 1);
+  gumbo_tokenizer_set_is_current_node_foreign(&parser_, true);
+
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_NULL, token_.type);
+  EXPECT_EQ(0, token_.v.character);
+
+  gumbo_token_destroy(&token_);
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_CDATA, token_.type);
+  EXPECT_EQ('f', token_.v.character);
+}
+
+TEST_F(GumboTokenizerTest, StyleHasTagEmbedded) {
+  SetInput("<style>/* For <head> */</style>");
+  Advance(1);
+  gumbo_tokenizer_set_state(&parser_, GUMBO_LEX_RCDATA);
+  Advance(7);
+
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type);
+  EXPECT_EQ('<', token_.v.character);
+
+  gumbo_token_destroy(&token_);
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type);
+  EXPECT_EQ('h', token_.v.character);
+
+  gumbo_token_destroy(&token_);
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type);
+  EXPECT_EQ('e', token_.v.character);
+}
+
+TEST_F(GumboTokenizerTest, PreWithNewlines) {
+  SetInput("<!DOCTYPE html><pre>\r\na</pre>");
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  ASSERT_EQ(GUMBO_TOKEN_DOCTYPE, token_.type);
+  EXPECT_EQ(0, token_.position.offset);
+
+  gumbo_token_destroy(&token_);
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  ASSERT_EQ(GUMBO_TOKEN_START_TAG, token_.type);
+  EXPECT_EQ("<pre>", ToString(token_.original_text));
+  EXPECT_EQ(15, token_.position.offset);
+}
+
+TEST_F(GumboTokenizerTest, SelfClosingStartTag) {
+  SetInput("<br />");
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  ASSERT_EQ(GUMBO_TOKEN_START_TAG, token_.type);
+  EXPECT_EQ(0, token_.position.offset);
+  EXPECT_EQ("<br />", ToString(token_.original_text));
+
+  GumboTokenStartTag* start_tag = &token_.v.start_tag;
+  EXPECT_EQ(GUMBO_TAG_BR, start_tag->tag);
+  EXPECT_EQ(0, start_tag->attributes.length);
+  EXPECT_TRUE(start_tag->is_self_closing);
+}
+
+TEST_F(GumboTokenizerTest, SelfClosingEndTag) {
+  SetInput("</p />");
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  ASSERT_EQ(GUMBO_TOKEN_END_TAG, token_.type);
+  EXPECT_EQ(0, token_.position.offset);
+  EXPECT_EQ("</p />", ToString(token_.original_text));
+
+  GumboTokenEndTag* end_tag = &token_.v.end_tag;
+  EXPECT_EQ(GUMBO_TAG_P, end_tag->tag);
+  EXPECT_TRUE(end_tag->is_self_closing);
+}
+
+TEST_F(GumboTokenizerTest, OpenTagWithAttributes) {
+  SetInput("<a href ='/search?q=foo&amp;hl=en'  id=link>");
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  ASSERT_EQ(GUMBO_TOKEN_START_TAG, token_.type);
+
+  GumboTokenStartTag* start_tag = &token_.v.start_tag;
+  EXPECT_EQ(GUMBO_TAG_A, start_tag->tag);
+  EXPECT_FALSE(start_tag->is_self_closing);
+  ASSERT_EQ(2, start_tag->attributes.length);
+
+  GumboAttribute* href =
+      static_cast<GumboAttribute*>(start_tag->attributes.data[0]);
+  EXPECT_STREQ("href", href->name);
+  EXPECT_EQ("href", ToString(href->original_name));
+  EXPECT_STREQ("/search?q=foo&hl=en", href->value);
+  EXPECT_EQ("'/search?q=foo&amp;hl=en'", ToString(href->original_value));
+
+  GumboAttribute* id =
+      static_cast<GumboAttribute*>(start_tag->attributes.data[1]);
+  EXPECT_STREQ("id", id->name);
+  EXPECT_EQ("id", ToString(id->original_name));
+  EXPECT_STREQ("link", id->value);
+  EXPECT_EQ("link", ToString(id->original_value));
+}
+
+TEST_F(GumboTokenizerTest, BogusComment1) {
+  SetInput("<?xml is bogus-comment>Text");
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  ASSERT_EQ(GUMBO_TOKEN_COMMENT, token_.type);
+  EXPECT_STREQ("?xml is bogus-comment", token_.v.text);
+
+  gumbo_token_destroy(&token_);
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type);
+  EXPECT_EQ('T', token_.v.character);
+
+  errors_are_expected_ = true;
+}
+
+TEST_F(GumboTokenizerTest, BogusComment2) {
+  SetInput("</#bogus-comment");
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  ASSERT_EQ(GUMBO_TOKEN_COMMENT, token_.type);
+  EXPECT_STREQ("#bogus-comment", token_.v.text);
+
+  gumbo_token_destroy(&token_);
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  EXPECT_EQ(GUMBO_TOKEN_EOF, token_.type);
+  errors_are_expected_ = true;
+}
+
+TEST_F(GumboTokenizerTest, MultilineAttribute) {
+  SetInput(
+      "<foo long_attr=\"SomeCode;\n"
+      "  calls_a_big_long_function();\n"
+      "  return true;\" />");
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  ASSERT_EQ(GUMBO_TOKEN_START_TAG, token_.type);
+
+  GumboTokenStartTag* start_tag = &token_.v.start_tag;
+  EXPECT_EQ(GUMBO_TAG_UNKNOWN, start_tag->tag);
+  EXPECT_TRUE(start_tag->is_self_closing);
+  ASSERT_EQ(1, start_tag->attributes.length);
+
+  GumboAttribute* long_attr =
+      static_cast<GumboAttribute*>(start_tag->attributes.data[0]);
+  EXPECT_STREQ("long_attr", long_attr->name);
+  EXPECT_EQ("long_attr", ToString(long_attr->original_name));
+  EXPECT_STREQ(
+      "SomeCode;\n"
+      "  calls_a_big_long_function();\n"
+      "  return true;",
+      long_attr->value);
+}
+
+TEST_F(GumboTokenizerTest, DoubleAmpersand) {
+  SetInput("<span jsif=\"foo && bar\">");
+  EXPECT_TRUE(gumbo_lex(&parser_, &token_));
+  ASSERT_EQ(GUMBO_TOKEN_START_TAG, token_.type);
+
+  GumboTokenStartTag* start_tag = &token_.v.start_tag;
+  EXPECT_EQ(GUMBO_TAG_SPAN, start_tag->tag);
+  EXPECT_FALSE(start_tag->is_self_closing);
+  ASSERT_EQ(1, start_tag->attributes.length);
+
+  GumboAttribute* jsif =
+      static_cast<GumboAttribute*>(start_tag->attributes.data[0]);
+  EXPECT_STREQ("jsif", jsif->name);
+  EXPECT_EQ("jsif", ToString(jsif->original_name));
+  EXPECT_STREQ("foo && bar", jsif->value);
+  EXPECT_EQ("\"foo && bar\"", ToString(jsif->original_value));
+}
+
+TEST_F(GumboTokenizerTest, MatchedTagPair) {
+  SetInput("<div id=dash<-Dash data-test=\"bar\">a</div>");
+  ASSERT_TRUE(gumbo_lex(&parser_, &token_));
+  ASSERT_EQ(GUMBO_TOKEN_START_TAG, token_.type);
+  EXPECT_EQ(0, token_.position.offset);
+
+  GumboTokenStartTag* start_tag = &token_.v.start_tag;
+  EXPECT_EQ(GUMBO_TAG_DIV, start_tag->tag);
+  EXPECT_FALSE(start_tag->is_self_closing);
+  ASSERT_EQ(2, start_tag->attributes.length);
+
+  GumboAttribute* id =
+      static_cast<GumboAttribute*>(start_tag->attributes.data[0]);
+  EXPECT_STREQ("id", id->name);
+  EXPECT_EQ("id", ToString(id->original_name));
+  EXPECT_EQ(1, id->name_start.line);
+  EXPECT_EQ(5, id->name_start.offset);
+  EXPECT_EQ(6, id->name_start.column);
+  EXPECT_EQ(8, id->name_end.column);
+  EXPECT_STREQ("dash<-Dash", id->value);
+  EXPECT_EQ("dash<-Dash", ToString(id->original_value));
+  EXPECT_EQ(9, id->value_start.column);
+  EXPECT_EQ(19, id->value_end.column);
+
+  GumboAttribute* data_attr =
+      static_cast<GumboAttribute*>(start_tag->attributes.data[1]);
+  EXPECT_STREQ("data-test", data_attr->name);
+  EXPECT_EQ("data-test", ToString(data_attr->original_name));
+  EXPECT_EQ(20, data_attr->name_start.column);
+  EXPECT_EQ(29, data_attr->name_end.column);
+  EXPECT_STREQ("bar", data_attr->value);
+  EXPECT_EQ("\"bar\"", ToString(data_attr->original_value));
+  EXPECT_EQ(30, data_attr->value_start.column);
+  EXPECT_EQ(35, data_attr->value_end.column);
+
+  gumbo_token_destroy(&token_);
+  ASSERT_TRUE(gumbo_lex(&parser_, &token_));
+  ASSERT_EQ(GUMBO_TOKEN_CHARACTER, token_.type);
+  EXPECT_EQ(35, token_.position.offset);
+  EXPECT_EQ('a', token_.v.character);
+
+  gumbo_token_destroy(&token_);
+  ASSERT_TRUE(gumbo_lex(&parser_, &token_));
+  ASSERT_EQ(GUMBO_TOKEN_END_TAG, token_.type);
+  EXPECT_EQ(GUMBO_TAG_DIV, token_.v.end_tag.tag);
+  errors_are_expected_ = true;
+}
+
+TEST_F(GumboTokenizerTest, BogusEndTag) {
+  // According to the spec, the correct parse of this is an end tag token for
+  // "<div<>" (notice the ending bracket) with the attribute "th=th" (ignored
+  // because end tags don't take attributes), with the tokenizer passing through
+  // the self-closing tag state in the process.
+  SetInput("</div</th>");
+  ASSERT_TRUE(gumbo_lex(&parser_, &token_));
+  ASSERT_EQ(GUMBO_TOKEN_END_TAG, token_.type);
+  EXPECT_EQ(0, token_.position.offset);
+  EXPECT_EQ(GUMBO_TAG_UNKNOWN, token_.v.end_tag.tag);
+  EXPECT_EQ("</div</th>", ToString(token_.original_text));
+  errors_are_expected_ = true;
+}
+
+TEST_F(GumboTokenizerTest, NullInTagNameState) {
+  char input[] = { '<', 'x', 0, 'x', '>' };
+  text_ = input;
+  gumbo_tokenizer_state_destroy(&parser_);
+  gumbo_tokenizer_state_init(&parser_, input, sizeof input);
+  ASSERT_TRUE(gumbo_lex(&parser_, &token_));
+  ASSERT_EQ(GUMBO_TOKEN_START_TAG, token_.type);
+  EXPECT_EQ(0, token_.position.offset);
+  EXPECT_EQ(GUMBO_TAG_UNKNOWN, token_.v.start_tag.tag);
+  EXPECT_EQ(std::string("x\xEF\xBF\xBDx"), token_.v.start_tag.name);
+  errors_are_expected_ = true;
+}
+}  // namespace
diff --git a/gumbo-parser/test/utf8.cc b/gumbo-parser/test/utf8.cc
new file mode 100644
index 00000000..6cc6a441
--- /dev/null
+++ b/gumbo-parser/test/utf8.cc
@@ -0,0 +1,598 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: jdtang@google.com (Jonathan Tang)
+
+#include "utf8.h"
+
+#include <string.h>
+
+#include "gtest/gtest.h"
+#include "error.h"
+#include "gumbo.h"
+#include "test_utils.h"
+
+namespace {
+
+// Tests for utf8.c
+class Utf8Test : public GumboTest {
+ protected:
+  void Advance(int num_chars) {
+    for (int i = 0; i < num_chars; ++i) {
+      utf8iterator_next(&input_);
+    }
+  }
+
+  void ResetText(const char* text) {
+    text_ = text;
+    utf8iterator_init(&parser_, text, strlen(text), &input_);
+  }
+
+  GumboError* GetFirstError() {
+    return static_cast<GumboError*>(parser_._output->errors.data[0]);
+  }
+
+  int GetNumErrors() { return parser_._output->errors.length; }
+
+  Utf8Iterator input_;
+};
+
+TEST_F(Utf8Test, EmptyString) {
+  ResetText("");
+  EXPECT_EQ(-1, utf8iterator_current(&input_));
+}
+
+TEST_F(Utf8Test, GetPosition_EmptyString) {
+  ResetText("");
+  GumboSourcePosition pos;
+
+  utf8iterator_get_position(&input_, &pos);
+  EXPECT_EQ(1, pos.line);
+  EXPECT_EQ(1, pos.column);
+  EXPECT_EQ(0, pos.offset);
+}
+
+TEST_F(Utf8Test, Null) {
+  // Can't use ResetText, as the implicit strlen will choke on the null.
+  text_ = "\0f";
+  utf8iterator_init(&parser_, text_, 2, &input_);
+
+  EXPECT_EQ(0, utf8iterator_current(&input_));
+  EXPECT_EQ('\0', *utf8iterator_get_char_pointer(&input_));
+  utf8iterator_next(&input_);
+  EXPECT_EQ('f', utf8iterator_current(&input_));
+  EXPECT_EQ('f', *utf8iterator_get_char_pointer(&input_));
+}
+
+TEST_F(Utf8Test, OneByteChar) {
+  ResetText("a");
+
+  EXPECT_EQ(0, GetNumErrors());
+  EXPECT_EQ('a', utf8iterator_current(&input_));
+  EXPECT_EQ('a', *utf8iterator_get_char_pointer(&input_));
+
+  GumboSourcePosition pos;
+  utf8iterator_get_position(&input_, &pos);
+  EXPECT_EQ(1, pos.line);
+  EXPECT_EQ(1, pos.column);
+  EXPECT_EQ(0, pos.offset);
+
+  utf8iterator_next(&input_);
+  EXPECT_EQ(-1, utf8iterator_current(&input_));
+}
+
+TEST_F(Utf8Test, ContinuationByte) {
+  ResetText("\x85");
+
+  EXPECT_EQ(1, GetNumErrors());
+  EXPECT_EQ(0xFFFD, utf8iterator_current(&input_));
+  EXPECT_EQ('\x85', *utf8iterator_get_char_pointer(&input_));
+
+  errors_are_expected_ = true;
+  GumboError* error = GetFirstError();
+  EXPECT_EQ(GUMBO_ERR_UTF8_INVALID, error->type);
+  EXPECT_EQ('\x85', *error->original_text);
+  EXPECT_EQ(0x85, error->v.codepoint);
+
+  utf8iterator_next(&input_);
+  EXPECT_EQ(-1, utf8iterator_current(&input_));
+}
+
+TEST_F(Utf8Test, MultipleContinuationBytes) {
+  ResetText("a\x85\xA0\xC2x\x9A");
+  EXPECT_EQ('a', utf8iterator_current(&input_));
+
+  utf8iterator_next(&input_);
+  EXPECT_EQ(0xFFFD, utf8iterator_current(&input_));
+
+  utf8iterator_next(&input_);
+  EXPECT_EQ(0xFFFD, utf8iterator_current(&input_));
+
+  utf8iterator_next(&input_);
+  EXPECT_EQ(0xFFFD, utf8iterator_current(&input_));
+
+  utf8iterator_next(&input_);
+  EXPECT_EQ('x', utf8iterator_current(&input_));
+
+  utf8iterator_next(&input_);
+  EXPECT_EQ(0xFFFD, utf8iterator_current(&input_));
+
+  utf8iterator_next(&input_);
+  EXPECT_EQ(-1, utf8iterator_current(&input_));
+
+  utf8iterator_next(&input_);
+  EXPECT_EQ(4, GetNumErrors());
+}
+
+TEST_F(Utf8Test, OverlongEncoding) {
+  // \xC0\x75 = 11000000 01110101.
+  ResetText("\xC0\x75");
+
+  ASSERT_EQ(1, GetNumErrors());
+  EXPECT_EQ(0xFFFD, utf8iterator_current(&input_));
+  EXPECT_EQ('\xC0', *utf8iterator_get_char_pointer(&input_));
+
+  errors_are_expected_ = true;
+  GumboError* error = GetFirstError();
+  EXPECT_EQ(GUMBO_ERR_UTF8_INVALID, error->type);
+  EXPECT_EQ(1, error->position.line);
+  EXPECT_EQ(1, error->position.column);
+  EXPECT_EQ(0, error->position.offset);
+  EXPECT_EQ('\xC0', *error->original_text);
+  EXPECT_EQ(0xC0, error->v.codepoint);
+
+  utf8iterator_next(&input_);
+  EXPECT_EQ(0x75, utf8iterator_current(&input_));
+  EXPECT_EQ('\x75', *utf8iterator_get_char_pointer(&input_));
+
+  utf8iterator_next(&input_);
+  EXPECT_EQ(-1, utf8iterator_current(&input_));
+}
+
+TEST_F(Utf8Test, OverlongEncodingWithContinuationByte) {
+  // \xC0\x85 = 11000000 10000101.
+  ResetText("\xC0\x85");
+
+  ASSERT_EQ(1, GetNumErrors());
+  EXPECT_EQ(0xFFFD, utf8iterator_current(&input_));
+  EXPECT_EQ('\xC0', *utf8iterator_get_char_pointer(&input_));
+
+  utf8iterator_next(&input_);
+  EXPECT_EQ(0xFFFD, utf8iterator_current(&input_));
+
+  errors_are_expected_ = true;
+  GumboError* error = GetFirstError();
+  EXPECT_EQ(GUMBO_ERR_UTF8_INVALID, error->type);
+  EXPECT_EQ(1, error->position.line);
+  EXPECT_EQ(1, error->position.column);
+  EXPECT_EQ(0, error->position.offset);
+  EXPECT_EQ('\xC0', *error->original_text);
+  EXPECT_EQ(0xC0, error->v.codepoint);
+
+  utf8iterator_next(&input_);
+  EXPECT_EQ(-1, utf8iterator_current(&input_));
+}
+
+TEST_F(Utf8Test, TwoByteChar) {
+  // \xC3\xA5 = 11000011 10100101.
+  ResetText("\xC3\xA5o");
+
+  EXPECT_EQ(0, GetNumErrors());
+  // Codepoint = 000 11100101 = 0xE5.
+  EXPECT_EQ(0xE5, utf8iterator_current(&input_));
+  EXPECT_EQ('\xC3', *utf8iterator_get_char_pointer(&input_));
+
+  GumboSourcePosition pos;
+  utf8iterator_get_position(&input_, &pos);
+  EXPECT_EQ(1, pos.line);
+  EXPECT_EQ(1, pos.column);
+  EXPECT_EQ(0, pos.offset);
+
+  utf8iterator_next(&input_);
+  EXPECT_EQ('o', utf8iterator_current(&input_));
+
+  utf8iterator_get_position(&input_, &pos);
+  EXPECT_EQ(1, pos.line);
+  EXPECT_EQ(2, pos.column);
+  EXPECT_EQ(2, pos.offset);
+}
+
+TEST_F(Utf8Test, TwoByteChar2) {
+  // \xC2\xA5 = 11000010 10100101.
+  ResetText("\xC2\xA5");
+
+  EXPECT_EQ(0, GetNumErrors());
+  // Codepoint = 000 10100101 = 0xA5.
+  EXPECT_EQ(0xA5, utf8iterator_current(&input_));
+  EXPECT_EQ('\xC2', *utf8iterator_get_char_pointer(&input_));
+
+  utf8iterator_next(&input_);
+  EXPECT_EQ(-1, utf8iterator_current(&input_));
+}
+
+TEST_F(Utf8Test, ThreeByteChar) {
+  // \xE3\xA7\xA7 = 11100011 10100111 10100111
+  ResetText("\xE3\xA7\xA7\xB0");
+
+  EXPECT_EQ(0, GetNumErrors());
+  // Codepoint = 00111001 11100111 = 0x39E7
+  EXPECT_EQ(0x39E7, utf8iterator_current(&input_));
+  EXPECT_EQ('\xE3', *utf8iterator_get_char_pointer(&input_));
+
+  utf8iterator_next(&input_);
+  EXPECT_EQ(1, GetNumErrors());
+  EXPECT_EQ(0xFFFD, utf8iterator_current(&input_));
+  EXPECT_EQ('\xB0', *utf8iterator_get_char_pointer(&input_));
+
+  GumboSourcePosition pos;
+  utf8iterator_get_position(&input_, &pos);
+  EXPECT_EQ(1, pos.line);
+  EXPECT_EQ(2, pos.column);
+  EXPECT_EQ(3, pos.offset);
+
+  utf8iterator_next(&input_);
+  EXPECT_EQ(-1, utf8iterator_current(&input_));
+}
+
+TEST_F(Utf8Test, FourByteChar) {
+  // \xC3\x9A = 11000011 10011010
+  // \xF1\xA7\xA7\xA7 = 11110001 10100111 10100111 10100111
+  ResetText("\xC3\x9A\xF1\xA7\xA7\xA7");
+
+  // Codepoint = 000 11011010 = 0xDA.
+  EXPECT_EQ(0xDA, utf8iterator_current(&input_));
+  EXPECT_EQ('\xC3', *utf8iterator_get_char_pointer(&input_));
+
+  utf8iterator_next(&input_);
+  // Codepoint = 00110 01111001 11100111 = 0x679E7.
+  EXPECT_EQ(0x679E7, utf8iterator_current(&input_));
+  EXPECT_EQ('\xF1', *utf8iterator_get_char_pointer(&input_));
+
+  utf8iterator_next(&input_);
+  EXPECT_EQ(-1, utf8iterator_current(&input_));
+}
+
+TEST_F(Utf8Test, FourByteCharWithoutContinuationChars) {
+  // \xF1\xA7\xA7\xA7 = 11110001 10100111 10100111 10100111
+  ResetText("\xF1\xA7\xA7-");
+
+  EXPECT_EQ(1, GetNumErrors());
+  EXPECT_EQ(0xFFFD, utf8iterator_current(&input_));
+  EXPECT_EQ('\xF1', *utf8iterator_get_char_pointer(&input_));
+
+  utf8iterator_next(&input_);
+  EXPECT_EQ('-', utf8iterator_current(&input_));
+
+  utf8iterator_next(&input_);
+  EXPECT_EQ(-1, utf8iterator_current(&input_));
+}
+
+TEST_F(Utf8Test, FiveByteCharIsError) {
+  ResetText("\xF6\xA7\xA7\xA7\xA7x");
+
+  EXPECT_EQ(1, GetNumErrors());
+  EXPECT_EQ(0xFFFD, utf8iterator_current(&input_));
+
+  utf8iterator_next(&input_);
+  utf8iterator_next(&input_);
+  utf8iterator_next(&input_);
+  utf8iterator_next(&input_);
+  EXPECT_EQ(0xFFFD, utf8iterator_current(&input_));
+  utf8iterator_next(&input_);
+  EXPECT_EQ('x', utf8iterator_current(&input_));
+}
+
+TEST_F(Utf8Test, SixByteCharIsError) {
+  ResetText("\xF8\xA7\xA7\xA7\xA7\xA7x");
+
+  EXPECT_EQ(1, GetNumErrors());
+  EXPECT_EQ(0xFFFD, utf8iterator_current(&input_));
+
+  utf8iterator_next(&input_);
+  utf8iterator_next(&input_);
+  utf8iterator_next(&input_);
+  utf8iterator_next(&input_);
+  utf8iterator_next(&input_);
+  EXPECT_EQ(0xFFFD, utf8iterator_current(&input_));
+  utf8iterator_next(&input_);
+  EXPECT_EQ('x', utf8iterator_current(&input_));
+}
+
+TEST_F(Utf8Test, SevenByteCharIsError) {
+  ResetText("\xFC\xA7\xA7\xA7\xA7\xA7\xA7x");
+
+  EXPECT_EQ(1, GetNumErrors());
+  EXPECT_EQ(0xFFFD, utf8iterator_current(&input_));
+
+  utf8iterator_next(&input_);
+  utf8iterator_next(&input_);
+  utf8iterator_next(&input_);
+  utf8iterator_next(&input_);
+  utf8iterator_next(&input_);
+  utf8iterator_next(&input_);
+  EXPECT_EQ(0xFFFD, utf8iterator_current(&input_));
+  utf8iterator_next(&input_);
+  EXPECT_EQ('x', utf8iterator_current(&input_));
+}
+
+TEST_F(Utf8Test, 0xFFIsError) {
+  ResetText("\xFFx");
+
+  EXPECT_EQ(1, GetNumErrors());
+  EXPECT_EQ(0xFFFD, utf8iterator_current(&input_));
+
+  utf8iterator_next(&input_);
+  EXPECT_EQ('x', utf8iterator_current(&input_));
+}
+
+TEST_F(Utf8Test, InvalidControlCharIsError) {
+  ResetText("\x1Bx");
+
+  EXPECT_EQ(1, GetNumErrors());
+  EXPECT_EQ(0xFFFD, utf8iterator_current(&input_));
+
+  utf8iterator_next(&input_);
+  EXPECT_EQ('x', utf8iterator_current(&input_));
+}
+
+TEST_F(Utf8Test, TruncatedInput) {
+  ResetText("\xF1\xA7");
+
+  EXPECT_EQ(1, GetNumErrors());
+  EXPECT_EQ(0xFFFD, utf8iterator_current(&input_));
+
+  errors_are_expected_ = true;
+  GumboError* error = GetFirstError();
+  EXPECT_EQ(GUMBO_ERR_UTF8_TRUNCATED, error->type);
+  EXPECT_EQ(1, error->position.line);
+  EXPECT_EQ(1, error->position.column);
+  EXPECT_EQ(0, error->position.offset);
+  EXPECT_EQ('\xF1', *error->original_text);
+  EXPECT_EQ(0xF1A7, error->v.codepoint);
+
+  utf8iterator_next(&input_);
+  EXPECT_EQ(-1, utf8iterator_current(&input_));
+}
+
+TEST_F(Utf8Test, Html5SpecExample) {
+  // This example has since been removed from the spec, and the spec has been
+  // changed to reference the Unicode Standard 6.2, 5.22 "Best practices for
+  // U+FFFD substitution."
+  ResetText("\x41\x98\xBA\x42\xE2\x98\x43\xE2\x98\xBA\xE2\x98");
+
+  EXPECT_EQ('A', utf8iterator_current(&input_));
+
+  utf8iterator_next(&input_);
+  EXPECT_EQ(0xFFFD, utf8iterator_current(&input_));
+
+  utf8iterator_next(&input_);
+  EXPECT_EQ(0xFFFD, utf8iterator_current(&input_));
+
+  utf8iterator_next(&input_);
+  EXPECT_EQ('B', utf8iterator_current(&input_));
+
+  utf8iterator_next(&input_);
+  EXPECT_EQ(0xFFFD, utf8iterator_current(&input_));
+
+  utf8iterator_next(&input_);
+  EXPECT_EQ('C', utf8iterator_current(&input_));
+
+  utf8iterator_next(&input_);
+  // \xE2\x98\xBA = 11100010 10011000 10111010
+  // Codepoint = 00100110 00111010 = 0x263A
+  EXPECT_EQ(0x263A, utf8iterator_current(&input_));
+
+  utf8iterator_next(&input_);
+  EXPECT_EQ(0xFFFD, utf8iterator_current(&input_));
+  utf8iterator_next(&input_);
+}
+
+TEST_F(Utf8Test, MultipleEOFReads) {
+  ResetText("a");
+  Advance(2);
+  EXPECT_EQ(-1, utf8iterator_current(&input_));
+
+  utf8iterator_next(&input_);
+  EXPECT_EQ(-1, utf8iterator_current(&input_));
+}
+
+TEST_F(Utf8Test, AsciiOnly) {
+  ResetText("hello");
+  Advance(4);
+
+  EXPECT_EQ('o', utf8iterator_current(&input_));
+  EXPECT_EQ('o', *utf8iterator_get_char_pointer(&input_));
+
+  GumboSourcePosition pos;
+  utf8iterator_get_position(&input_, &pos);
+  EXPECT_EQ(1, pos.line);
+  EXPECT_EQ(5, pos.column);
+  EXPECT_EQ(4, pos.offset);
+
+  Advance(1);
+  EXPECT_EQ(-1, utf8iterator_current(&input_));
+}
+
+TEST_F(Utf8Test, NewlinePosition) {
+  ResetText("a\nnewline");
+  Advance(1);
+
+  // Newline itself should register as being at the end of a line.
+  GumboSourcePosition pos;
+  utf8iterator_get_position(&input_, &pos);
+  EXPECT_EQ(1, pos.line);
+  EXPECT_EQ(2, pos.column);
+  EXPECT_EQ(1, pos.offset);
+
+  // The next character should be at the next line.
+  Advance(1);
+  utf8iterator_get_position(&input_, &pos);
+  EXPECT_EQ(2, pos.line);
+  EXPECT_EQ(1, pos.column);
+  EXPECT_EQ(2, pos.offset);
+}
+
+TEST_F(Utf8Test, TabPositionFreshTabstop) {
+  ResetText("a\n\ttab");
+  Advance(sizeof("a\n\t") - 1);
+
+  GumboSourcePosition pos;
+  utf8iterator_get_position(&input_, &pos);
+  EXPECT_EQ(2, pos.line);
+  EXPECT_EQ(8, pos.column);
+  EXPECT_EQ(3, pos.offset);
+}
+
+TEST_F(Utf8Test, TabPositionMidTabstop) {
+  ResetText("a tab\tinline");
+  Advance(sizeof("a tab\t") - 1);
+
+  GumboSourcePosition pos;
+  utf8iterator_get_position(&input_, &pos);
+  EXPECT_EQ(1, pos.line);
+  EXPECT_EQ(8, pos.column);
+  EXPECT_EQ(6, pos.offset);
+}
+
+TEST_F(Utf8Test, ConfigurableTabstop) {
+  options_.tab_stop = 4;
+  ResetText("a\n\ttab");
+  Advance(sizeof("a\n\t") - 1);
+
+  GumboSourcePosition pos;
+  utf8iterator_get_position(&input_, &pos);
+  EXPECT_EQ(2, pos.line);
+  EXPECT_EQ(4, pos.column);
+  EXPECT_EQ(3, pos.offset);
+}
+
+TEST_F(Utf8Test, CRLF) {
+  ResetText("Windows\r\nlinefeeds");
+  Advance(sizeof("Windows") - 1);
+
+  EXPECT_EQ('\n', utf8iterator_current(&input_));
+  EXPECT_EQ('\n', *utf8iterator_get_char_pointer(&input_));
+
+  GumboSourcePosition pos;
+  utf8iterator_get_position(&input_, &pos);
+  EXPECT_EQ(1, pos.line);
+  // The carriage return should be ignore in column calculations, treating the
+  // CRLF combination as one character.
+  EXPECT_EQ(8, pos.column);
+  // However, it should not be ignored in computing offsets, which are often
+  // used by other tools to index into the original buffer. We don't expect
+  // other unicode-aware tools to have the same \r\n handling as HTML5.
+  EXPECT_EQ(8, pos.offset);
+}
+
+TEST_F(Utf8Test, CarriageReturn) {
+  ResetText("Mac\rlinefeeds");
+  Advance(sizeof("Mac") - 1);
+
+  EXPECT_EQ('\n', utf8iterator_current(&input_));
+  // We don't change the original pointer, which is part of the const input
+  // buffer. original_text pointers will see a carriage return as original
+  // written.
+  EXPECT_EQ('\r', *utf8iterator_get_char_pointer(&input_));
+
+  GumboSourcePosition pos;
+  utf8iterator_get_position(&input_, &pos);
+  EXPECT_EQ(1, pos.line);
+  EXPECT_EQ(4, pos.column);
+  EXPECT_EQ(3, pos.offset);
+
+  Advance(1);
+  EXPECT_EQ('l', utf8iterator_current(&input_));
+  EXPECT_EQ('l', *utf8iterator_get_char_pointer(&input_));
+
+  utf8iterator_get_position(&input_, &pos);
+  EXPECT_EQ(2, pos.line);
+  EXPECT_EQ(1, pos.column);
+  EXPECT_EQ(4, pos.offset);
+}
+
+TEST_F(Utf8Test, Matches) {
+  ResetText("\xC2\xA5goobar");
+  Advance(1);
+  EXPECT_TRUE(utf8iterator_maybe_consume_match(&input_, "goo", 3, true));
+  EXPECT_EQ('b', utf8iterator_current(&input_));
+}
+
+TEST_F(Utf8Test, MatchesOverflow) {
+  ResetText("goo");
+  EXPECT_FALSE(utf8iterator_maybe_consume_match(&input_, "goobar", 6, true));
+  EXPECT_EQ('g', utf8iterator_current(&input_));
+}
+
+TEST_F(Utf8Test, MatchesEof) {
+  ResetText("goo");
+  EXPECT_TRUE(utf8iterator_maybe_consume_match(&input_, "goo", 3, true));
+  EXPECT_EQ(-1, utf8iterator_current(&input_));
+}
+
+TEST_F(Utf8Test, MatchesCaseSensitivity) {
+  ResetText("gooBAR");
+  EXPECT_FALSE(utf8iterator_maybe_consume_match(&input_, "goobar", 6, true));
+  EXPECT_EQ('g', utf8iterator_current(&input_));
+}
+
+TEST_F(Utf8Test, MatchesCaseInsensitive) {
+  ResetText("gooBAR");
+  EXPECT_TRUE(utf8iterator_maybe_consume_match(&input_, "goobar", 6, false));
+  EXPECT_EQ(-1, utf8iterator_current(&input_));
+}
+
+TEST_F(Utf8Test, MatchFollowedByNullByte) {
+  // Can't use ResetText, as the implicit strlen will choke on the null.
+  text_ = "CDATA\0f";
+  utf8iterator_init(&parser_, text_, 7, &input_);
+
+  EXPECT_TRUE(utf8iterator_maybe_consume_match(
+      &input_, "cdata", sizeof("cdata") - 1, false));
+
+  EXPECT_EQ(0, utf8iterator_current(&input_));
+  EXPECT_EQ('\0', *utf8iterator_get_char_pointer(&input_));
+  utf8iterator_next(&input_);
+  EXPECT_EQ('f', utf8iterator_current(&input_));
+  EXPECT_EQ('f', *utf8iterator_get_char_pointer(&input_));
+}
+
+TEST_F(Utf8Test, MarkReset) {
+  ResetText("this is a test");
+  Advance(5);
+  EXPECT_EQ('i', utf8iterator_current(&input_));
+  utf8iterator_mark(&input_);
+
+  Advance(3);
+  EXPECT_EQ('a', utf8iterator_current(&input_));
+
+  GumboError error;
+  utf8iterator_fill_error_at_mark(&input_, &error);
+  EXPECT_EQ('i', *error.original_text);
+  EXPECT_EQ(1, error.position.line);
+  EXPECT_EQ(6, error.position.column);
+  EXPECT_EQ(5, error.position.offset);
+
+  utf8iterator_reset(&input_);
+  EXPECT_EQ('i', utf8iterator_current(&input_));
+  EXPECT_EQ('i', *utf8iterator_get_char_pointer(&input_));
+
+  GumboSourcePosition position;
+  utf8iterator_get_position(&input_, &position);
+  EXPECT_EQ(1, error.position.line);
+  EXPECT_EQ(6, error.position.column);
+  EXPECT_EQ(5, error.position.offset);
+}
+
+}  // namespace
diff --git a/gumbo-parser/test/vector.cc b/gumbo-parser/test/vector.cc
new file mode 100644
index 00000000..33b15c36
--- /dev/null
+++ b/gumbo-parser/test/vector.cc
@@ -0,0 +1,129 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: jdtang@google.com (Jonathan Tang)
+
+#include <stddef.h>
+#include "vector.h"
+#include "gtest/gtest.h"
+#include "test_utils.h"
+
+namespace {
+
+class GumboVectorTest : public GumboTest {
+ protected:
+  GumboVectorTest(): one_(1), two_(2), three_(3) {
+    gumbo_vector_init(2, &vector_);
+  }
+
+  ~GumboVectorTest() { gumbo_vector_destroy(&vector_); }
+
+  GumboVector vector_;
+
+  // dummy ints that we can use to take addresses of.
+  int one_;
+  int two_;
+  int three_;
+};
+
+TEST_F(GumboVectorTest, Init) {
+  EXPECT_EQ(0, vector_.length);
+  EXPECT_EQ(2, vector_.capacity);
+}
+
+TEST_F(GumboVectorTest, InitZeroCapacity) {
+  gumbo_vector_destroy(&vector_);
+  gumbo_vector_init(0, &vector_);
+
+  gumbo_vector_add(&one_, &vector_);
+  EXPECT_EQ(1, vector_.length);
+  EXPECT_EQ(1, *(static_cast<int*>(vector_.data[0])));
+}
+
+TEST_F(GumboVectorTest, Add) {
+  gumbo_vector_add(&one_, &vector_);
+  EXPECT_EQ(1, vector_.length);
+  EXPECT_EQ(1, *(static_cast<int*>(vector_.data[0])));
+  EXPECT_EQ(0, gumbo_vector_index_of(&vector_, &one_));
+  EXPECT_EQ(-1, gumbo_vector_index_of(&vector_, &two_));
+}
+
+TEST_F(GumboVectorTest, AddMultiple) {
+  gumbo_vector_add(&one_, &vector_);
+  gumbo_vector_add(&two_, &vector_);
+  EXPECT_EQ(2, vector_.length);
+  EXPECT_EQ(2, *(static_cast<int*>(vector_.data[1])));
+  EXPECT_EQ(1, gumbo_vector_index_of(&vector_, &two_));
+}
+
+TEST_F(GumboVectorTest, Realloc) {
+  gumbo_vector_add(&one_, &vector_);
+  gumbo_vector_add(&two_, &vector_);
+  gumbo_vector_add(&three_, &vector_);
+  EXPECT_EQ(3, vector_.length);
+  EXPECT_EQ(4, vector_.capacity);
+  EXPECT_EQ(3, *(static_cast<int*>(vector_.data[2])));
+}
+
+TEST_F(GumboVectorTest, Pop) {
+  gumbo_vector_add(&one_, &vector_);
+  int result = *static_cast<int*>(gumbo_vector_pop(&vector_));
+  EXPECT_EQ(1, result);
+  EXPECT_EQ(0, vector_.length);
+}
+
+TEST_F(GumboVectorTest, PopEmpty) {
+  EXPECT_EQ(NULL, gumbo_vector_pop(&vector_));
+}
+
+TEST_F(GumboVectorTest, InsertAtFirst) {
+  gumbo_vector_add(&one_, &vector_);
+  gumbo_vector_add(&two_, &vector_);
+  gumbo_vector_insert_at(&three_, 0, &vector_);
+  EXPECT_EQ(3, vector_.length);
+  int result = *static_cast<int*>(vector_.data[0]);
+  EXPECT_EQ(3, result);
+}
+
+TEST_F(GumboVectorTest, InsertAtLast) {
+  gumbo_vector_add(&one_, &vector_);
+  gumbo_vector_add(&two_, &vector_);
+  gumbo_vector_insert_at(&three_, 2, &vector_);
+  EXPECT_EQ(3, vector_.length);
+  int result = *static_cast<int*>(vector_.data[2]);
+  EXPECT_EQ(3, result);
+}
+
+TEST_F(GumboVectorTest, Remove) {
+  gumbo_vector_add(&one_, &vector_);
+  gumbo_vector_add(&two_, &vector_);
+  gumbo_vector_add(&three_, &vector_);
+  gumbo_vector_remove(&two_, &vector_);
+  EXPECT_EQ(2, vector_.length);
+  int three = *static_cast<int*>(vector_.data[1]);
+  EXPECT_EQ(3, three);
+}
+
+TEST_F(GumboVectorTest, RemoveAt) {
+  gumbo_vector_add(&one_, &vector_);
+  gumbo_vector_add(&two_, &vector_);
+  gumbo_vector_add(&three_, &vector_);
+  int result = *static_cast<int*>(gumbo_vector_remove_at(1, &vector_));
+  EXPECT_EQ(2, result);
+  EXPECT_EQ(2, vector_.length);
+  int three = *static_cast<int*>(vector_.data[1]);
+  EXPECT_EQ(3, three);
+}
+
+}  // namespace
diff --git a/gumbo-parser/visualc/include/strings.h b/gumbo-parser/visualc/include/strings.h
deleted file mode 100644
index 59750dec..00000000
--- a/gumbo-parser/visualc/include/strings.h
+++ /dev/null
@@ -1,4 +0,0 @@
-/*Dummy file to satisfy source file dependencies on Windows platform*/
-#define strcasecmp _stricmp
-#define strncasecmp _strnicmp
-#define inline __inline
diff --git a/lib/nokogumbo.rb b/lib/nokogumbo.rb
index 3b457ed5..a88e4460 100644
--- a/lib/nokogumbo.rb
+++ b/lib/nokogumbo.rb
@@ -170,4 +170,22 @@ def self.reencode(body, content_type=nil)
       body.encode(Encoding::UTF_8)
     end
   end
+
+  # Monkey patch
+  module XML
+    class Node
+      # HTML elements can have attributes that contain colons.
+      # Nokogiri::XML::Node#[]= treats names with colons as a prefixed QName
+      # and tries to create an attribute in a namespace. This is especially
+      # annoying with attribute names like xml:lang since libxml2 will
+      # actually create the xml namespace if it doesn't exist already.
+      def add_child_node_and_reparent_attrs node
+        add_child_node(node)
+        node.attribute_nodes.find_all { |a| a.namespace }.each do |attr|
+          attr.remove
+          node[attr.name] = attr.value
+        end
+      end
+    end
+  end
 end
diff --git a/test/test_tree-construction.rb b/test/test_tree-construction.rb
new file mode 100644
index 00000000..a89f99bf
--- /dev/null
+++ b/test/test_tree-construction.rb
@@ -0,0 +1,250 @@
+# encoding: utf-8
+require 'nokogumbo'
+require 'minitest/autorun'
+
+# class TestTreeConstructionBase < Minitest::Test
+#   def fragment(s)
+#     Nokogiri::HTML5.fragment(s, context, max_parse_errors: 100)
+#   end
+# 
+#   def parse(s)
+#     Nokogiri::HTML5.parse(s, max_parse_errors: 100)
+#   end
+# end
+
+def parse_test(test_data)
+  test = { script: :both }
+  #index = test_data.start_with?("#errors\n") ? 0 : test_data.index("\n#errors\n")
+  index = /(?:^#errors\n|\n#errors\n)/ =~ test_data
+  abort "Expected #errors in\n#{test_data}" if index.nil?
+  skip_amount = $~[0].length
+  # Omit the final new line
+  test[:data] = test_data[0...index]
+
+  # Process the rest line by line
+  lines = test_data[index+skip_amount..-1].split("\n")
+  index = lines.find_index do |line|
+    line == '#document-fragment' ||
+      line == '#document' ||
+      line == '#script-off' ||
+      line == '#script-on'
+  end
+  abort 'Expected #document' if index.nil?
+  test[:errors] = lines[0...index]
+    .map { |line| line.chomp }
+    .keep_if { |line| line != '#new-errors' }
+
+  if lines[index] == '#document-fragment'
+    test[:context] = lines[index+1].chomp.split(' ', 2)
+    index += 2
+  end
+  abort "failed to find fragment: #{index}: #{lines[index]}" if test_data.include?("#document-fragment") && test[:context].nil?
+
+  if lines[index] =~ /#script-(on|off)/
+    test[:script] = $~[1].to_sym
+    index += 1
+  end
+
+  abort "Expected #document, got #{lines[index]}" unless lines[index] == '#document'
+  index += 1
+
+  document = {
+    type: :document,
+    children: []
+  }
+  open_nodes = [document]
+  # puts "Processing document:"
+  # lines[index..-1].each { |line| puts line }
+  while index < lines.length
+    abort "Expected '| ' but got #{lines[index]}" unless /^\| ( *)([^ ].*$)/ =~ lines[index]
+    depth = $~[1].length
+    if depth.odd?
+      abort "Invalid nesting depth"
+    else
+      depth = depth / 2
+    end
+    abort "Too deep" if depth >= open_nodes.length
+
+    node = {}
+    node_text = $~[2]
+    if node_text[0] == '"'
+      if node_text == '"' || node_text[-1] != '"'
+        loop do
+          index += 1
+          node_text << "\n" + lines[index]
+          break if node_text[-1] == '"'
+        end
+      end
+      node[:type] = :text
+      node[:contents] = node_text[1..-2]
+    elsif /^<!DOCTYPE ([^ >]*)(?: "([^"]*)" "(.*)")?>$/ =~ node_text
+      node[:type] = :doctype
+      node[:name] = $~[1]
+      node[:public_id] = $~[2].nil? || $~[2].empty? ? nil : $~[2]
+      node[:system_id] = $~[3].nil? || $~[3].empty? ? nil : $~[3]
+    elsif /^<!-- (.*) -->$/ =~ node_text
+      node[:type] = :comment
+      node[:contents] = $~[1]
+    elsif /^<(svg |math )?(.+)>$/ =~ node_text
+      node[:type] = :element
+      node[:ns] = $~[1].nil? ? nil : $~[1].rstrip
+      node[:tag] = $~[2]
+      node[:attributes] = []
+      node[:children] = []
+    elsif /^([^ ]+ )?([^=]+)="(.*)"$/ =~ node_text
+      node[:type] = :attribute
+      node[:ns] = $~[1].nil? ? nil : $~[1].rstrip
+      node[:name] = $~[2]
+      node[:value] = $~[3]
+    elsif node_text == 'content'
+      node[:type] = :template
+    else
+      abort "Unexpected node_text: #{node_text}"
+    end
+
+    if node[:type] == :attribute
+      abort "depth #{depth} != #{open_nodes.length}" unless depth == open_nodes.length - 1
+      abort "type :#{open_nodes[-1][:type]} != :element" unless open_nodes[-1][:type] == :element
+      abort "element has children" unless open_nodes[-1][:children].empty?
+      open_nodes[-1][:attributes] << node
+    elsif node[:type] == :template
+      abort "depth #{depth} != #{open_nodes.length}" unless depth == open_nodes.length - 1
+      abort "type :#{open_nodes[-1][:type]} != :element" unless open_nodes[-1][:type] == :element
+      abort "tag :#{open_nodes[-1][:tag]} != template" unless open_nodes[-1][:tag] == 'template'
+      abort "template has children before the 'content'" unless open_nodes[-1][:children].empty?
+      # Hack. We want the children of this template node to be reparented as
+      # children of the template element.
+      # XXX: Template contents are _not_ supposed to be children of the
+      # template, but we currently mishandle this.
+      open_nodes << open_nodes[-1]
+    else
+      open_nodes[depth][:children] << node
+      open_nodes[depth+1..-1] = []
+      if node[:type] == :element
+        open_nodes << node
+      end
+    end
+    index += 1
+  end
+  test[:document] = document
+  test
+end
+
+class TestTreeConstructionBase < Minitest::Test
+  def assert_equal_or_nil(exp, act)
+    if exp.nil?
+      assert_nil act
+    else
+      assert_equal exp, act
+    end
+  end
+
+  def compare_nodes(node, ng_node)
+    case ng_node.type
+    when Nokogiri::XML::Node::ELEMENT_NODE
+      assert_equal node[:type], :element
+      # XXX: HTML doesn't serialize namespaces and nokogumbo doesn't attach
+      # them to elements.
+      # assert_equal_or_nil node[:ns], ng_node.namespace&.prefix
+      assert_equal node[:tag], ng_node.name
+      attributes = ng_node.attributes
+      assert_equal node[:attributes].length, attributes.length
+      node[:attributes].each do |attr|
+        #ng_attr = ng_node.attribute_with_ns(attr[:name], attr[:ns])
+        attr_name = attr[:ns].nil? ? attr[:name] : "#{attr[:ns]}:#{attr[:name]}"
+        # This does not work with 'xml:lang'!
+        # ng_attr = ng_node.get_attribute(attr_name)
+        ng_attr = attributes[attr_name].nil? ? nil : attributes[attr_name].value
+        # This changes the tree. grr
+        # refute ng_attr.nil?, "Couldn't find attribute '#{attr_name}' on #{ng_node}"
+        refute ng_attr.nil?, "Couldn't find attribute '#{attr_name}'"
+        assert_equal attr[:value], ng_attr
+      end
+      assert_equal node[:children].length, ng_node.children.length,
+        "Element <#{node[:tag]}> has wrong number of children: #{ng_node.children.map { |c| c.name }}"
+    when Nokogiri::XML::Node::TEXT_NODE, Nokogiri::XML::Node::CDATA_SECTION_NODE
+      # We preserve the CDATA in the tree, but the tests represent it as text.
+      assert_equal node[:type], :text
+      assert_equal node[:contents], ng_node.content
+    when Nokogiri::XML::Node::COMMENT_NODE
+      assert_equal node[:type], :comment
+      assert_equal node[:contents], ng_node.content
+    when Nokogiri::XML::Node::HTML_DOCUMENT_NODE
+      assert_equal node[:type], :document
+      assert_equal node[:children].length, ng_node.children.length
+    when Nokogiri::XML::Node::DTD_NODE
+      assert_equal node[:type], :doctype
+      assert_equal node[:name], ng_node.name
+      assert_equal_or_nil node[:public_id], ng_node.external_id
+      assert_equal_or_nil node[:system_id], ng_node.system_id
+    else
+      flunk "Unknown node type #{ng_node.type} (expected #{node[:type]})"
+    end
+  end
+
+  def run_test
+    skip "Scripting tests not supported" if @test[:script] == :on
+    skip "Fragment tests not supported" unless @test[:context].nil?
+    doc = Nokogiri::HTML5.parse(@test[:data], max_parse_errors: @test[:errors].length + 1)
+    # assert_equal doc.errors.length, @test[:errors].length
+
+    # Walk the tree.
+    exp_nodes = [@test[:document]]
+    act_nodes = [doc]
+    children = [0]
+    compare_nodes(exp_nodes[0], doc)
+    while children.any?
+      child_index = children[-1]
+      exp = exp_nodes[-1]
+      act = act_nodes[-1]
+      if child_index == exp[:children].length
+        exp_nodes.pop
+        act_nodes.pop
+        children.pop
+        next
+      end
+      exp_child = exp[:children][child_index]
+      act_child = act.children[child_index]
+      compare_nodes(exp_child, act_child)
+      children[-1] = child_index + 1
+      if exp_child.has_key?(:children)
+        exp_nodes << exp_child
+        act_nodes << act_child
+        children << 0
+      end
+    end
+  end
+end
+
+tc_path = File.expand_path('../html5lib-tests/tree-construction', __FILE__)
+Dir[File.join(tc_path, '*.dat')].each do |path|
+  test_name = "TestTreeConstruction" + File.basename(path, '.dat')
+    .split(/[_-]/)
+    .map { |s| s.capitalize }
+    .join('')
+  tests = []
+  File.open(path, "r") do |f|
+    f.each("\n\n#data\n") do |test_data|
+      if test_data.start_with?("#data\n")
+        test_data = test_data[6..-1]
+      end
+      if test_data.end_with?("\n\n#data\n")
+        test_data = test_data[0..-9]
+      end
+      tests << parse_test(test_data)
+    end
+  end
+
+  klass = Class.new(TestTreeConstructionBase) do
+    tests.each_with_index do |test, index|
+      define_method "test_#{index}".to_sym do
+        @test = test
+        @index = index
+        run_test
+      end
+    end
+  end
+  Object.const_set test_name, klass
+end
+
+# vim: set sw=2 sts=2 ts=8 et: