From 98244cc1e9c9df53b4c9dd02faa1327fddca9fd1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= <adam@adamziel.com>
Date: Tue, 29 Oct 2024 15:48:59 +0100
Subject: [PATCH 1/6] [Data Liberation] Merge both XML processors into a single
 WP_XML_Processor

Description TBD
---
 .../playground/data-liberation/bootstrap.php  |    1 -
 .../playground/data-liberation/phpunit.xml    |    1 -
 .../src/xml-api/WP_XML_Processor.php          | 2827 +++++++++++++++-
 .../src/xml-api/WP_XML_Tag_Processor.php      | 2837 -----------------
 .../tests/WPXMLProcessorTests.php             | 1386 +++++++-
 .../tests/WPXMLTagProcessorTests.php          | 1426 ---------
 6 files changed, 4049 insertions(+), 4429 deletions(-)
 delete mode 100644 packages/playground/data-liberation/src/xml-api/WP_XML_Tag_Processor.php
 delete mode 100644 packages/playground/data-liberation/tests/WPXMLTagProcessorTests.php
diff --git a/packages/playground/data-liberation/bootstrap.php b/packages/playground/data-liberation/bootstrap.php
index 9ab7a0dd86..fb4bbd45d6 100644
--- a/packages/playground/data-liberation/bootstrap.php
+++ b/packages/playground/data-liberation/bootstrap.php
@@ -30,7 +30,6 @@
 require_once __DIR__ . '/src/WP_URL.php';
 
 require_once __DIR__ . '/src/xml-api/WP_XML_Decoder.php';
-require_once __DIR__ . '/src/xml-api/WP_XML_Tag_Processor.php';
 require_once __DIR__ . '/src/xml-api/WP_XML_Processor.php';
 require_once __DIR__ . '/src/WP_WXR_URL_Rewrite_Processor.php';
 
diff --git a/packages/playground/data-liberation/phpunit.xml b/packages/playground/data-liberation/phpunit.xml
index a3e66da030..50553590e5 100644
--- a/packages/playground/data-liberation/phpunit.xml
+++ b/packages/playground/data-liberation/phpunit.xml
@@ -9,7 +9,6 @@
       <file>tests/WPBlockMarkupUrlProcessorTests.php</file>
       <file>tests/URLParserWHATWGComplianceTests.php</file>
       <file>tests/WPXMLProcessorTests.php</file>
-      <file>tests/WPXMLTagProcessorTests.php</file>
       <file>tests/UrldecodeNTests.php</file>
     </testsuite>
   </testsuites>
diff --git a/packages/playground/data-liberation/src/xml-api/WP_XML_Processor.php b/packages/playground/data-liberation/src/xml-api/WP_XML_Processor.php
index bef650e33c..beffee8e2b 100644
--- a/packages/playground/data-liberation/src/xml-api/WP_XML_Processor.php
+++ b/packages/playground/data-liberation/src/xml-api/WP_XML_Processor.php
@@ -2,15 +2,589 @@
 /**
  * XML API: WP_XML_Processor class
  *
+ * Scans through an XML document to find specific tags, then
+ * transforms those tags by adding, removing, or updating the
+ * values of the XML attributes within that tag (opener).
+ *
+ * It implements a subset of the XML 1.0 specification (https://www.w3.org/TR/xml/)
+ * and supports XML documents with the following characteristics:
+ *
+ * * XML 1.0
+ * * Well-formed
+ * * UTF-8 encoded
+ * * Not standalone (so can use external entities)
+ * * No DTD, DOCTYPE, ATTLIST, ENTITY, or conditional sections
+ *
+ * ### Possible future direction for this module
+ *
+ * The final goal is to support both 1.0 and 1.1 depending on the
+ * initial processing instruction (<?xml version="1.0" ?>). We're
+ * starting with 1.0, however, because most that's what most WXR
+ * files declare.
+ *
+ * ## Future work
+ *
+ * @TODO: Skip over the following syntax elements:
+ *        * <!DOCTYPE, see https://www.w3.org/TR/xml/#sec-prolog-dtd
+ *        * <!ATTLIST, see https://www.w3.org/TR/xml/#attdecls
+ *        * <!ENTITY, see https://www.w3.org/TR/xml/#sec-entity-decl
+ *        * <!NOTATION, see https://www.w3.org/TR/xml/#sec-entity-decl
+ *        * Conditional sections, see https://www.w3.org/TR/xml/#sec-condition-sect
+ *
+ * @TODO Explore declaring elements as PCdata directly in the XML document,
+ *       for example as follows:
+ *
+ *       <!ELEMENT p (#PCDATA|emph)* >
+ *
+ *       or
+ *
+ *       <!DOCTYPE test [
+ *           <!ELEMENT test (#PCDATA) >
+ *           <!ENTITY % xx '&#37;zz;'>
+ *           <!ENTITY % zz '&#60;!ENTITY tricky "error-prone" >' >
+ *           %xx;
+ *       ]>
+ *
+ * @TODO: Support XML 1.1.
  * @package WordPress
  * @subpackage HTML-API
  * @since WP_VERSION
  */
 
 /**
+ * Core class used to modify attributes in an XML document for tags matching a query.
+ *
+ * ## Usage
+ *
+ * Use of this class requires three steps:
+ *
+ *  1. Create a new class instance with your input XML document.
+ *  2. Find the tag(s) you are looking for.
+ *  3. Request changes to the attributes in those tag(s).
+ *
+ * Example:
+ *
+ *     $tags = new WP_XML_Processor( $xml );
+ *     if ( $tags->next_tag( 'wp:option' ) ) {
+ *         $tags->set_attribute( 'selected', 'yes' );
+ *     }
+ *
+ * ### Finding tags
+ *
+ * The `next_tag()` function moves the internal cursor through
+ * your input XML document until it finds a tag meeting any of
+ * the supplied restrictions in the optional query argument. If
+ * no argument is provided then it will find the next XML tag,
+ * regardless of what kind it is.
+ *
+ * If you want to _find whatever the next tag is_:
+ *
+ *     $tags->next_tag();
+ *
+ * | Goal                                                      | Query                                                                           |
+ * |-----------------------------------------------------------|---------------------------------------------------------------------------------|
+ * | Find any tag.                                             | `$tags->next_tag();`                                                            |
+ * | Find next image tag.                                      | `$tags->next_tag( array( 'tag_name' => 'wp:image' ) );`                              |
+ * | Find next image tag (without passing the array).          | `$tags->next_tag( 'wp:image' );`                                                     |
+ *
+ * If a tag was found meeting your criteria then `next_tag()`
+ * will return `true` and you can proceed to modify it. If it
+ * returns `false`, however, it failed to find the tag and
+ * moved the cursor to the end of the file.
+ *
+ * Once the cursor reaches the end of the file the processor
+ * is done and if you want to reach an earlier tag you will
+ * need to recreate the processor and start over, as it's
+ * unable to back up or move in reverse.
+ *
+ * See the section on bookmarks for an exception to this
+ * no-backing-up rule.
+ *
+ * #### Custom queries
+ *
+ * Sometimes it's necessary to further inspect an XML tag than
+ * the query syntax here permits. In these cases one may further
+ * inspect the search results using the read-only functions
+ * provided by the processor or external state or variables.
+ *
+ * Example:
+ *
+ *     // Paint up to the first five `wp:musician` or `wp:actor` tags marked with the "jazzy" style.
+ *     $remaining_count = 5;
+ *     while ( $remaining_count > 0 && $tags->next_tag() ) {
+ *         if (
+ *              ( 'wp:musician' === $tags->get_tag() || 'wp:actor' === $tags->get_tag() ) &&
+ *              'jazzy' === $tags->get_attribute( 'data-style' )
+ *         ) {
+ *             $tags->set_attribute( 'wp:theme-style', 'theme-style-everest-jazz' );
+ *             $remaining_count--;
+ *         }
+ *     }
+ *
+ * `get_attribute()` will return `null` if the attribute wasn't present
+ * on the tag when it was called. It may return `""` (the empty string)
+ * in cases where the attribute was present but its value was empty.
+ * For boolean attributes, those whose name is present but no value is
+ * given, it will return `true` (the only way to set `false` for an
+ * attribute is to remove it).
+ *
+ * #### When matching fails
+ *
+ * When `next_tag()` returns `false` it could mean different things:
+ *
+ *  - The requested tag wasn't found in the input document.
+ *  - The input document ended in the middle of an XML syntax element.
+ *
+ * When a document ends in the middle of a syntax element it will pause
+ * the processor. This is to make it possible in the future to extend the
+ * input document and proceed - an important requirement for chunked
+ * streaming parsing of a document.
+ *
+ * Example:
+ *
+ *     $processor = new WP_XML_Processor( 'This <wp:content is="a" partial="token' );
+ *     false === $processor->next_tag();
+ *
+ * If a special element (see next section) is encountered but no closing tag
+ * is found it will count as an incomplete tag. The parser will pause as if
+ * the opening tag were incomplete.
+ *
+ * Example:
+ *
+ *     $processor = new WP_XML_Processor( '<style>// there could be more styling to come' );
+ *     false === $processor->next_tag();
+ *
+ *     $processor = new WP_XML_Processor( '<style>// this is everything</style><wp:content>' );
+ *     true === $processor->next_tag( 'DIV' );
+ *
+ * #### Special elements
+ *
+ * All XML elements are handled in the same way, except when you mark
+ * them as PCdata elements. These are special because their contents
+ * is treated as text, even if it looks like XML tags.
+ *
+ * Example:
+ *
+ *    $processor = new WP_XML_Processor( '<root><wp:post-content>Text inside</input></wp:post-content><</root>' );
+ *    $processor->declare_element_as_pcdata('wp:post-content');
+ *    $processor->next_tag('wp:post-content');
+ *    $processor->next_token();
+ *    echo $processor->get_modifiable_text(); // Text inside</input>
+ *
+ * ### Modifying XML attributes for a found tag
+ *
+ * Once you've found the start of an opening tag you can modify
+ * any number of the attributes on that tag. You can set a new
+ * value for an attribute, remove the entire attribute, or do
+ * nothing and move on to the next opening tag.
+ *
+ * Example:
+ *
+ *     if ( $tags->next_tag( 'wp:user-group' ) ) {
+ *         $tags->set_attribute( 'name', 'Content editors' );
+ *         $tags->remove_attribute( 'data-test-id' );
+ *     }
+ *
+ * If `set_attribute()` is called for an existing attribute it will
+ * overwrite the existing value. Similarly, calling `remove_attribute()`
+ * for a non-existing attribute has no effect on the document. Both
+ * of these methods are safe to call without knowing if a given attribute
+ * exists beforehand.
+ *
+ * ### Bookmarks
+ *
+ * While scanning through the input XML document it's possible to set
+ * a named bookmark when a particular tag is found. Later on, after
+ * continuing to scan other tags, it's possible to `seek` to one of
+ * the set bookmarks and then proceed again from that point forward.
+ *
+ * Because bookmarks create processing overhead one should avoid
+ * creating too many of them. As a rule, create only bookmarks
+ * of known string literal names; avoid creating "mark_{$index}"
+ * and so on. It's fine from a performance standpoint to create a
+ * bookmark and update it frequently, such as within a loop.
+ *
+ *     $total_todos = 0;
+ *     while ( $p->next_tag( array( 'tag_name' => 'wp:todo-list' ) ) ) {
+ *         $p->set_bookmark( 'list-start' );
+ *         while ( $p->next_tag( array( 'tag_closers' => 'visit' ) ) ) {
+ *             if ( 'wp:todo' === $p->get_tag() && $p->is_tag_closer() ) {
+ *                 $p->set_bookmark( 'list-end' );
+ *                 $p->seek( 'list-start' );
+ *                 $p->set_attribute( 'data-contained-todos', (string) $total_todos );
+ *                 $total_todos = 0;
+ *                 $p->seek( 'list-end' );
+ *                 break;
+ *             }
+ *
+ *             if ( 'wp:todo-item' === $p->get_tag() && ! $p->is_tag_closer() ) {
+ *                 $total_todos++;
+ *             }
+ *         }
+ *     }
+ *
+ * ## Tokens and finer-grained processing.
+ *
+ * It's possible to scan through every lexical token in the
+ * XML document using the `next_token()` function. This
+ * alternative form takes no argument and provides no built-in
+ * query syntax.
+ *
+ * Example:
+ *
+ *      $title = '(untitled)';
+ *      $text  = '';
+ *      while ( $processor->next_token() ) {
+ *          switch ( $processor->get_token_name() ) {
+ *              case '#text':
+ *                  $text .= $processor->get_modifiable_text();
+ *                  break;
+ *
+ *              case 'wp:new-line':
+ *                  $text .= "\n";
+ *                  break;
+ *
+ *              case 'wp:title':
+ *                  $title = $processor->get_modifiable_text();
+ *                  break;
+ *          }
+ *      }
+ *      return trim( "# {$title}\n\n{$text}" );
+ *
+ * ### Tokens and _modifiable text_.
+ *
+ * #### Other tokens with modifiable text.
+ *
+ * There are also non-elements which are void/self-closing in nature and contain
+ * modifiable text that is part of that individual syntax token itself.
+ *
+ *  - `#text` nodes, whose entire token _is_ the modifiable text.
+ *  - XML comments and tokens that become comments due to some syntax error. The
+ *    text for these tokens is the portion of the comment inside of the syntax.
+ *    E.g. for `<!-- comment -->` the text is `" comment "` (note the spaces are included).
+ *  - `CDATA` sections, whose text is the content inside of the section itself. E.g. for
+ *    `<![CDATA[some content]]>` the text is `"some content"`.
+ *  - XML Processing instruction nodes like `<?xml __( "Like" ); ?>` (with restrictions [1]).
+ *
+ * [1]: XML requires "xml" as a processing instruction name. The Tag Processor captures the entire
+ *      processing instruction as a single token up to the closing `?>`.
+ *
+ * ## Design and limitations
+ *
+ * The Tag Processor is designed to linearly scan XML documents and tokenize
+ * XML tags and their attributes. It's designed to do this as efficiently as
+ * possible without compromising parsing integrity. Therefore it will be
+ * slower than some methods of modifying XML, such as those incorporating
+ * over-simplified PCRE patterns, but will not introduce the defects and
+ * failures that those methods bring in, which lead to broken page renders
+ * and often to security vulnerabilities. On the other hand, it will be faster
+ * than full-blown XML parsers such as DOMDocument and use considerably
+ * less memory. It requires a negligible memory overhead, enough to consider
+ * it a zero-overhead system.
+ *
+ * The performance characteristics are maintained by avoiding tree construction.
+ *
+ * The Tag Processor's checks the most important aspects of XML integrity as it scans
+ * through the document. It verifies that a single root element exists, that are
+ * no unclosed tags, and that each opener tag has a corresponding closer. It also
+ * ensures no duplicate attributes exist on a single tag.
+ *
+ * At the same time, The Tag Processor also skips expensive validation of XML entities
+ * in the document. The Tag Processor will initially pass through the invalid entity references
+ * and only fail when the developer attempts to read their value. If that doesn't happen,
+ * the invalid values will be left untouched in the final document.
+ *
+ * Most operations within the Tag Processor are designed to minimize the difference
+ * between an input and output document for any given change. For example, the
+ * `set_attribure` and `remove_attribute` methods preserve whitespace and the attribute
+ * ordering within the element definition. An exception to this rule is that all attribute
+ * updates store their values as double-quoted strings, meaning that attributes on input with
+ * single-quoted or unquoted values will appear in the output with double-quotes.
+ *
+ * ### Text Encoding
+ *
+ * The Tag Processor assumes that the input XML document is encoded with a
+ * UTF-8 encoding and will refuse to process documents that declare other encodings.
+ *
  * @since WP_VERSION
  */
-class WP_XML_Processor extends WP_XML_Tag_Processor implements WP_Stream_Processor {
+class WP_XML_Processor {
+	/**
+	 * The maximum number of bookmarks allowed to exist at
+	 * any given time.
+	 *
+	 * @since WP_VERSION
+	 * @var int
+	 *
+	 * @see WP_XML_Processor::set_bookmark()
+	 */
+	const MAX_BOOKMARKS = 10;
+
+	/**
+	 * Maximum number of times seek() can be called.
+	 * Prevents accidental infinite loops.
+	 *
+	 * @since WP_VERSION
+	 * @var int
+	 *
+	 * @see WP_XML_Processor::seek()
+	 */
+	const MAX_SEEK_OPS = 1000;
+
+	/**
+	 * The XML document to parse.
+	 *
+	 * @since WP_VERSION
+	 * @var string
+	 */
+	public $xml;
+	
+	/**
+	 * Specifies mode of operation of the parser at any given time.
+	 *
+	 * | State           | Meaning                                                                |
+	 * | ----------------|------------------------------------------------------------------------|
+	 * | *Ready*           | The parser is ready to run.                                          |
+	 * | *Complete*        | There is nothing left to parse.                                      |
+	 * | *Incomplete*      | The XML ended in the middle of a token; nothing more can be parsed.  |
+	 * | *Matched tag*     | Found an XML tag; it's possible to modify its attributes.            |
+	 * | *Text node*       | Found a #text node; this is plaintext and modifiable.                |
+	 * | *CDATA node*      | Found a CDATA section; this is modifiable.                           |
+	 * | *PI node*         | Found a processing instruction; this is modifiable.                  |
+	 * | *XML declaration* | Found an XML declaration; this is modifiable.                        |
+	 * | *Comment*         | Found a comment or bogus comment; this is modifiable.                |
+	 *
+	 * @since WP_VERSION
+	 *
+	 * @see WP_XML_Processor::STATE_READY
+	 * @see WP_XML_Processor::STATE_COMPLETE
+	 * @see WP_XML_Processor::STATE_INCOMPLETE_INPUT
+	 * @see WP_XML_Processor::STATE_MATCHED_TAG
+	 * @see WP_XML_Processor::STATE_TEXT_NODE
+	 * @see WP_XML_Processor::STATE_CDATA_NODE
+	 * @see WP_XML_Processor::STATE_PI_NODE
+	 * @see WP_XML_Processor::STATE_XML_DECLARATION
+	 * @see WP_XML_Processor::STATE_COMMENT
+	 *
+	 * @var string
+	 */
+	protected $parser_state = self::STATE_READY;
+
+	/**
+	 * Whether we stopped at an incomplete text node.
+	 *
+	 * If we are before the last tag in the document, every text
+	 * node is incomplete until we find the next tag. However,
+	 * if we are after the last tag, an incomplete all-whitespace
+	 * node may either mean we're the end of the document or
+	 * that we're still waiting for more data/
+	 *
+	 * This flag allows us to differentiate between these two
+	 * cases in context-aware APIs such as WP_XML_Processor.
+	 *
+	 * @var bool
+	 */
+	protected $is_incomplete_text_node = false;
+
+	/**
+	 * How many bytes from the original XML document have been read and parsed.
+	 *
+	 * This value points to the latest byte offset in the input document which
+	 * has been already parsed. It is the internal cursor for the Tag Processor
+	 * and updates while scanning through the XML tokens.
+	 *
+	 * @since WP_VERSION
+	 * @var int
+	 */
+	public $bytes_already_parsed = 0;
+
+	/**
+	 * Byte offset in input document where current token starts.
+	 *
+	 * Example:
+	 *
+	 *     <wp:content id="test">...
+	 *     01234
+	 *     - token starts at 0
+	 *
+	 * @since WP_VERSION
+	 *
+	 * @var int|null
+	 */
+	protected $token_starts_at;
+
+	/**
+	 * Byte length of current token.
+	 *
+	 * Example:
+	 *
+	 *     <wp:content id="test">...
+	 *     012345678901234
+	 *     - token length is 14 - 0 = 14
+	 *
+	 *     a <!-- comment --> is a token.
+	 *     0123456789 123456789 123456789
+	 *     - token length is 17 - 2 = 15
+	 *
+	 * @since WP_VERSION
+	 *
+	 * @var int|null
+	 */
+	private $token_length;
+
+	/**
+	 * Byte offset in input document where current tag name starts.
+	 *
+	 * Example:
+	 *
+	 *     <wp:content id="test">...
+	 *     01234
+	 *      - tag name starts at 1
+	 *
+	 * @since WP_VERSION
+	 *
+	 * @var int|null
+	 */
+	private $tag_name_starts_at;
+
+	/**
+	 * Byte length of current tag name.
+	 *
+	 * Example:
+	 *
+	 *     <wp:content id="test">...
+	 *     01234
+	 *      --- tag name length is 3
+	 *
+	 * @since WP_VERSION
+	 *
+	 * @var int|null
+	 */
+	private $tag_name_length;
+
+	/**
+	 * Byte offset into input document where current modifiable text starts.
+	 *
+	 * @since WP_VERSION
+	 *
+	 * @var int
+	 */
+	private $text_starts_at;
+
+	/**
+	 * Byte length of modifiable text.
+	 *
+	 * @since WP_VERSION
+	 *
+	 * @var string
+	 */
+	private $text_length;
+
+	/**
+	 * Whether the current tag is an opening tag, e.g. <wp:content>, or a closing tag, e.g. </wp:content>.
+	 *
+	 * @var bool
+	 */
+	private $is_closing_tag;
+
+	/**
+	 * Stores an explanation for why something failed, if it did.
+	 *
+	 * @see self::get_last_error
+	 *
+	 * @since WP_VERSION
+	 *
+	 * @var string|null
+	 */
+	protected $last_error = null;
+
+	/**
+	 * Lazily-built index of attributes found within an XML tag, keyed by the attribute name.
+	 *
+	 * Example:
+	 *
+	 *     // Supposing the parser is working through this content
+	 *     // and stops after recognizing the `id` attribute.
+	 *     // <wp:content id="test-4" class=outline title="data:text/plain;base64=asdk3nk1j3fo8">
+	 *     //                 ^ parsing will continue from this point.
+	 *     $this->attributes = array(
+	 *         'id' => new WP_HTML_Attribute_Token( 'id', 9, 6, 5, 11, false )
+	 *     );
+	 *
+	 *     // When picking up parsing again, or when asking to find the
+	 *     // `class` attribute we will continue and add to this array.
+	 *     $this->attributes = array(
+	 *         'id'    => new WP_HTML_Attribute_Token( 'id', 9, 6, 5, 11, false ),
+	 *         'class' => new WP_HTML_Attribute_Token( 'class', 23, 7, 17, 13, false )
+	 *     );
+	 *
+	 * @since WP_VERSION
+	 * @var WP_HTML_Attribute_Token[]
+	 */
+	private $attributes = array();
+
+	/**
+	 * Tracks a semantic location in the original XML which
+	 * shifts with updates as they are applied to the document.
+	 *
+	 * @since WP_VERSION
+	 * @var WP_HTML_Span[]
+	 */
+	protected $bookmarks = array();
+
+	/**
+	 * Lexical replacements to apply to input XML document.
+	 *
+	 * "Lexical" in this class refers to the part of this class which
+	 * operates on pure text _as text_ and not as XML. There's a line
+	 * between the public interface, with XML-semantic methods like
+	 * `set_attribute` and `add_class`, and an internal state that tracks
+	 * text offsets in the input document.
+	 *
+	 * When higher-level XML methods are called, those have to transform their
+	 * operations (such as setting an attribute's value) into text diffing
+	 * operations (such as replacing the sub-string from indices A to B with
+	 * some given new string). These text-diffing operations are the lexical
+	 * updates.
+	 *
+	 * As new higher-level methods are added they need to collapse their
+	 * operations into these lower-level lexical updates since that's the
+	 * Tag Processor's internal language of change. Any code which creates
+	 * these lexical updates must ensure that they do not cross XML syntax
+	 * boundaries, however, so these should never be exposed outside of this
+	 * class or any classes which intentionally expand its functionality.
+	 *
+	 * These are enqueued while editing the document instead of being immediately
+	 * applied to avoid processing overhead, string allocations, and string
+	 * copies when applying many updates to a single document.
+	 *
+	 * Example:
+	 *
+	 *     // Replace an attribute stored with a new value, indices
+	 *     // sourced from the lazily-parsed XML recognizer.
+	 *     $start  = $attributes['src']->start;
+	 *     $length = $attributes['src']->length;
+	 *     $modifications[] = new WP_HTML_Text_Replacement( $start, $length, $new_value );
+	 *
+	 *     // Correspondingly, something like this will appear in this array.
+	 *     $lexical_updates = array(
+	 *         WP_HTML_Text_Replacement( 14, 28, 'https://my-site.my-domain/wp-content/uploads/2014/08/kittens.jpg' )
+	 *     );
+	 *
+	 * @since WP_VERSION
+	 * @var WP_HTML_Text_Replacement[]
+	 */
+	protected $lexical_updates = array();
+
+	/**
+	 * Tracks and limits `seek()` calls to prevent accidental infinite loops.
+	 *
+	 * @since WP_VERSION
+	 * @var int
+	 *
+	 * @see WP_XML_Processor::seek()
+	 */
+	protected $seek_count = 0;
 
 	/**
 	 * Indicates the current parsing stage.
@@ -32,9 +606,9 @@ class WP_XML_Processor extends WP_XML_Tag_Processor implements WP_Stream_Process
 	 * | *Element*       | The parser is parsing the root element.                             |
 	 * | *Misc*          | The parser is parsing miscellaneous content.                        |
 	 *
-	 * @see WP_XML_Tag_Processor::IN_PROLOG_CONTEXT
-	 * @see WP_XML_Tag_Processor::IN_ELEMENT_CONTEXT
-	 * @see WP_XML_Tag_Processor::IN_MISC_CONTEXT
+	 * @see WP_XML_Processor::IN_PROLOG_CONTEXT
+	 * @see WP_XML_Processor::IN_ELEMENT_CONTEXT
+	 * @see WP_XML_Processor::IN_MISC_CONTEXT
 	 *
 	 * @since WP_VERSION
 	 * @var bool
@@ -50,6 +624,21 @@ class WP_XML_Processor extends WP_XML_Tag_Processor implements WP_Stream_Process
 	 */
 	public $stack_of_open_elements = array();
 
+	public $had_previous_chunks = false;
+
+	/**
+	 * Constructor.
+	 *
+	 * @since WP_VERSION
+	 *
+	 * @param string $xml XML to process.
+	 */
+	public function __construct( $xml, $breadcrumbs = array(), $parser_context = self::IN_PROLOG_CONTEXT ) {
+		$this->xml = $xml;
+		$this->stack_of_open_elements = $breadcrumbs;
+		$this->parser_context         = $parser_context;
+	}
+
 	public static function stream( $node_visitor_callback ) {
 		$xml_processor = new WP_XML_Processor( '', array(), WP_XML_Processor::IN_PROLOG_CONTEXT );
 		return new ProcessorByteStream(
@@ -90,14 +679,13 @@ function ( $state ) use ( $xml_processor, $node_visitor_callback ) {
 		);
 	}
 
-
 	public function pause() {
 		return array(
 			'xml' => $this->xml,
 			// @TODO: Include all the information below in the bookmark:
 			'bytes_already_parsed' => $this->token_starts_at,
 			'breadcrumbs' => $this->get_breadcrumbs(),
-			'parser_context' => $this->get_parser_context(),
+			'parser_context' => $this->parser_context,
 			'stack_of_open_elements' => $this->stack_of_open_elements,
 		);
 	}
@@ -127,7 +715,7 @@ public function flush_processed_xml() {
 		$unprocessed_xml = $this->get_unprocessed_xml();
 
 		$breadcrumbs    = $this->get_breadcrumbs();
-		$parser_context = $this->get_parser_context();
+		$parser_context = $this->parser_context;
 
 		$this->reset_state();
 
@@ -138,116 +726,199 @@ public function flush_processed_xml() {
 
 		return $processed_xml;
 	}
-
-	/**
-	 * Constructor.
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @param string $xml XML to process.
-	 */
-	public function __construct( $xml, $breadcrumbs = array(), $parser_context = self::IN_PROLOG_CONTEXT ) {
-		parent::__construct( $xml );
-		$this->stack_of_open_elements = $breadcrumbs;
-		$this->parser_context         = $parser_context;
-	}
-
-	public function get_parser_context() {
-		return $this->parser_context;
-	}
-
+ 
 	/**
-	 * Finds the next element matching the $query.
+	 * Internal method which finds the next token in the XML document.
 	 *
-	 * This doesn't currently have a way to represent non-tags and doesn't process
-	 * semantic rules for text nodes. For access to the raw tokens consider using
-	 * WP_XML_Tag_Processor instead.
+	 * This method is a protected internal function which implements the logic for
+	 * finding the next token in a document. It exists so that the parser can update
+	 * its state without affecting the location of the cursor in the document and
+	 * without triggering subclass methods for things like `next_token()`, e.g. when
+	 * applying patches before searching for the next token.
 	 *
-	 * @since WP_VERSION
+	 * @since 6.5.0
 	 *
-	 * @param array|string|null $query {
-	 *     Optional. Which tag name to find, having which class, etc. Default is to find any tag.
+	 * @access private
 	 *
-	 *     @type string|null $tag_name     Which tag to find, or `null` for "any tag."
-	 *     @type int|null    $match_offset Find the Nth tag matching all search criteria.
-	 *                                     1 for "first" tag, 3 for "third," etc.
-	 *                                     Defaults to first tag.
-	 *     @type string[]    $breadcrumbs  DOM sub-path at which element is found, e.g. `array( 'FIGURE', 'IMG' )`.
-	 *                                     May also contain the wildcard `*` which matches a single element, e.g. `array( 'SECTION', '*' )`.
-	 * }
-	 * @return bool Whether a tag was matched.
+	 * @return bool Whether a token was parsed.
 	 */
-	public function next_tag( $query = null ) {
-		if ( null === $query ) {
-			while ( $this->step() ) {
-				if ( '#tag' !== $this->get_token_type() ) {
-					continue;
-				}
-
-				if ( ! $this->is_tag_closer() ) {
-					return true;
-				}
-			}
+	protected function base_class_next_token() {
+		$was_at = $this->bytes_already_parsed;
+		$this->after_tag();
 
+		// Don't proceed if there's nothing more to scan.
+		if (
+			self::STATE_COMPLETE === $this->parser_state ||
+			self::STATE_INCOMPLETE_INPUT === $this->parser_state ||
+			null !== $this->last_error
+		) {
 			return false;
 		}
 
-		if ( is_string( $query ) ) {
-			$query = array( 'breadcrumbs' => array( $query ) );
+		/*
+		 * The next step in the parsing loop determines the parsing state;
+		 * clear it so that state doesn't linger from the previous step.
+		 */
+		$this->parser_state = self::STATE_READY;
+
+		if ( $this->bytes_already_parsed >= strlen( $this->xml ) ) {
+			$this->parser_state = self::STATE_COMPLETE;
+			return false;
 		}
 
-		if ( ! is_array( $query ) ) {
-			_doing_it_wrong(
-				__METHOD__,
-				__( 'Please pass a query array to this function.' ),
-				'WP_VERSION'
-			);
+		// Find the next tag if it exists.
+		if ( false === $this->parse_next_tag() ) {
+			if ( self::STATE_INCOMPLETE_INPUT === $this->parser_state ) {
+				$this->bytes_already_parsed = $was_at;
+			}
+
 			return false;
 		}
 
-		if ( ! ( array_key_exists( 'breadcrumbs', $query ) && is_array( $query['breadcrumbs'] ) ) ) {
-			while ( $this->step() ) {
-				if ( '#tag' !== $this->get_token_type() ) {
-					continue;
-				}
+		if ( null !== $this->last_error ) {
+			return false;
+		}
 
-				if ( ! $this->is_tag_closer() ) {
-					return true;
-				}
+		/*
+		 * For legacy reasons the rest of this function handles tags and their
+		 * attributes. If the processor has reached the end of the document
+		 * or if it matched any other token then it should return here to avoid
+		 * attempting to process tag-specific syntax.
+		 */
+		if (
+			self::STATE_INCOMPLETE_INPUT !== $this->parser_state &&
+			self::STATE_COMPLETE !== $this->parser_state &&
+			self::STATE_MATCHED_TAG !== $this->parser_state
+		) {
+			return true;
+		}
+
+		if ( $this->is_closing_tag ) {
+			$this->skip_whitespace();
+		} else {
+			// Parse all of its attributes.
+			while ( $this->parse_next_attribute() ) {
+				continue;
 			}
+		}
 
+		if ( null !== $this->last_error ) {
 			return false;
 		}
 
-		if ( isset( $query['tag_closers'] ) && 'visit' === $query['tag_closers'] ) {
+		// Ensure that the tag closes before the end of the document.
+		if (
+			self::STATE_INCOMPLETE_INPUT === $this->parser_state ||
+			$this->bytes_already_parsed >= strlen( $this->xml )
+		) {
+			// Does this appropriately clear state (parsed attributes)?
+			$this->parser_state         = self::STATE_INCOMPLETE_INPUT;
+			$this->bytes_already_parsed = $was_at;
+
+			return false;
+		}
+
+		$tag_ends_at = strpos( $this->xml, '>', $this->bytes_already_parsed );
+		if ( false === $tag_ends_at ) {
+			$this->parser_state         = self::STATE_INCOMPLETE_INPUT;
+			$this->bytes_already_parsed = $was_at;
+
+			return false;
+		}
+
+		if ( $this->is_closing_tag && $tag_ends_at !== $this->bytes_already_parsed ) {
+			$this->last_error = self::ERROR_SYNTAX;
 			_doing_it_wrong(
 				__METHOD__,
-				__( 'Cannot visit tag closers in XML Processor.' ),
+				__( 'Invalid closing tag encountered.' ),
 				'WP_VERSION'
 			);
 			return false;
 		}
 
-		$breadcrumbs  = $query['breadcrumbs'];
-		$match_offset = isset( $query['match_offset'] ) ? (int) $query['match_offset'] : 1;
+		$this->parser_state         = self::STATE_MATCHED_TAG;
+		$this->bytes_already_parsed = $tag_ends_at + 1;
+		$this->token_length         = $this->bytes_already_parsed - $this->token_starts_at;
 
-		while ( $match_offset > 0 && $this->step() ) {
-			if ( '#tag' !== $this->get_token_type() ) {
-				continue;
-			}
+		/*
+		 * If we are in a PCData element, everything until the closer
+		 * is considered text.
+		 */
+		if ( ! $this->is_pcdata_element() ) {
+			return true;
+		}
 
-			if ( $this->matches_breadcrumbs( $breadcrumbs ) && 0 === --$match_offset ) {
-				return true;
-			}
+		/*
+		 * Preserve the opening tag pointers, as these will be overwritten
+		 * when finding the closing tag. They will be reset after finding
+		 * the closing to tag to point to the opening of the special atomic
+		 * tag sequence.
+		 */
+		$tag_name_starts_at = $this->tag_name_starts_at;
+		$tag_name_length    = $this->tag_name_length;
+		$tag_ends_at        = $this->token_starts_at + $this->token_length;
+		$attributes         = $this->attributes;
+
+		$found_closer = $this->skip_pcdata( $this->get_tag() );
+
+		// Closer not found, the document is incomplete.
+		if ( false === $found_closer ) {
+			$this->parser_state         = self::STATE_INCOMPLETE_INPUT;
+			$this->bytes_already_parsed = $was_at;
+			return false;
 		}
 
-		return false;
+		/*
+		 * The values here look like they reference the opening tag but they reference
+		 * the closing tag instead. This is why the opening tag values were stored
+		 * above in a variable. It reads confusingly here, but that's because the
+		 * functions that skip the contents have moved all the internal cursors past
+		 * the inner content of the tag.
+		 */
+		$this->token_starts_at    = $was_at;
+		$this->token_length       = $this->bytes_already_parsed - $this->token_starts_at;
+		$this->text_starts_at     = $tag_ends_at;
+		$this->text_length        = $this->tag_name_starts_at - $this->text_starts_at;
+		$this->tag_name_starts_at = $tag_name_starts_at;
+		$this->tag_name_length    = $tag_name_length;
+		$this->attributes         = $attributes;
+
+		return true;
+	}
+
+	/**
+	 * Whether the processor paused because the input XML document ended
+	 * in the middle of a syntax element, such as in the middle of a tag.
+	 *
+	 * Example:
+	 *
+	 *     $processor = new WP_XML_Processor( '<input type="text" value="Th' );
+	 *     false      === $processor->get_next_tag();
+	 *     true       === $processor->is_paused_at_incomplete_token();
+	 *
+	 * @since WP_VERSION
+	 *
+	 * @return bool Whether the parse paused at the start of an incomplete token.
+	 */
+	public function is_paused_at_incomplete_input(): bool {
+		return self::STATE_INCOMPLETE_INPUT === $this->parser_state;
+	}
+
+	/**
+	 * Whether the processor finished processing.
+	 *
+	 * @since WP_VERSION
+	 *
+	 * @return bool Whether the processor finished processing.
+	 */
+	public function is_finished(): bool {
+		return self::STATE_COMPLETE === $this->parser_state;
 	}
 
-	/*
+	/**
 	 * Sets a bookmark in the XML document.
 	 *
-	 * Bookmarks represent specific places or tokens in the HTML
+	 * Bookmarks represent specific places or tokens in the XML
 	 * document, such as a tag opener or closer. When applying
 	 * edits to a document, such as setting an attribute, the
 	 * text offsets of that token may shift; the bookmark is
@@ -267,7 +938,7 @@ public function next_tag( $query = null ) {
 	 *                              \-|-- it shifts with edits
 	 *
 	 * Bookmarks provide the ability to seek to a previously-scanned
-	 * place in the HTML document. This avoids the need to re-scan
+	 * place in the XML document. This avoids the need to re-scan
 	 * the entire document.
 	 *
 	 * Example:
@@ -276,7 +947,7 @@ public function next_tag( $query = null ) {
 	 *                                 ^^^^
 	 *                                 want to note this last item
 	 *
-	 *     $p = new WP_HTML_Tag_Processor( $html );
+	 *     $p = new WP_XML_Processor( $xml );
 	 *     $in_list = false;
 	 *     while ( $p->next_tag( array( 'tag_closers' => $in_list ? 'visit' : 'skip' ) ) ) {
 	 *         if ( 'UL' === $p->get_tag() ) {
@@ -299,106 +970,1758 @@ public function next_tag( $query = null ) {
 	 *         }
 	 *     }
 	 *
-	 * Bookmarks intentionally hide the internal string offsets
-	 * to which they refer. They are maintained internally as
-	 * updates are applied to the HTML document and therefore
-	 * retain their "position" - the location to which they
-	 * originally pointed. The inability to use bookmarks with
-	 * functions like `substr` is therefore intentional to guard
-	 * against accidentally breaking the HTML.
+	 * Bookmarks intentionally hide the internal string offsets
+	 * to which they refer. They are maintained internally as
+	 * updates are applied to the XML document and therefore
+	 * retain their "position" - the location to which they
+	 * originally pointed. The inability to use bookmarks with
+	 * functions like `substr` is therefore intentional to guard
+	 * against accidentally breaking the XML.
+	 *
+	 * Because bookmarks allocate memory and require processing
+	 * for every applied update, they are limited and require
+	 * a name. They should not be created with programmatically-made
+	 * names, such as "li_{$index}" with some loop. As a general
+	 * rule they should only be created with string-literal names
+	 * like "start-of-section" or "last-paragraph".
+	 *
+	 * Bookmarks are a powerful tool to enable complicated behavior.
+	 * Consider double-checking that you need this tool if you are
+	 * reaching for it, as inappropriate use could lead to broken
+	 * XML structure or unwanted processing overhead.
+	 *
+	 * @since WP_VERSION
+	 *
+	 * @param string $name Identifies this particular bookmark.
+	 * @return bool Whether the bookmark was successfully created.
+	 */
+	public function set_bookmark( $name ) {
+		// It only makes sense to set a bookmark if the parser has paused on a concrete token.
+		if (
+			self::STATE_COMPLETE === $this->parser_state ||
+			self::STATE_INCOMPLETE_INPUT === $this->parser_state
+		) {
+			return false;
+		}
+
+		if ( ! array_key_exists( $name, $this->bookmarks ) && count( $this->bookmarks ) >= static::MAX_BOOKMARKS ) {
+			_doing_it_wrong(
+				__METHOD__,
+				__( 'Too many bookmarks: cannot create any more.' ),
+				'WP_VERSION'
+			);
+			return false;
+		}
+
+		$this->bookmarks[ $name ] = new WP_HTML_Span( $this->token_starts_at, $this->token_length );
+
+		return true;
+	}
+
+
+	/**
+	 * Removes a bookmark that is no longer needed.
+	 *
+	 * Releasing a bookmark frees up the small
+	 * performance overhead it requires.
+	 *
+	 * @param string $name Name of the bookmark to remove.
+	 * @return bool Whether the bookmark already existed before removal.
+	 */
+	public function release_bookmark( $name ) {
+		if ( ! array_key_exists( $name, $this->bookmarks ) ) {
+			return false;
+		}
+
+		unset( $this->bookmarks[ $name ] );
+
+		return true;
+	}
+
+	/**
+	 * Skips contents of PCDATA element.
+	 *
+	 * @since WP_VERSION
+	 *
+	 * @see https://www.w3.org/TR/xml/#sec-mixed-content
+	 *
+	 * @param string $tag_name The tag name which will close the PCDATA region.
+	 * @return false|int Byte offset of the closing tag, or false if not found.
+	 */
+	private function skip_pcdata( $tag_name ) {
+		$xml        = $this->xml;
+		$doc_length = strlen( $xml );
+		$tag_length = strlen( $tag_name );
+
+		$at = $this->bytes_already_parsed;
+		while ( false !== $at && $at < $doc_length ) {
+			$at                       = strpos( $this->xml, '</' . $tag_name, $at );
+			$this->tag_name_starts_at = $at;
+
+			// Fail if there is no possible tag closer.
+			if ( false === $at ) {
+				return false;
+			}
+
+			$at                        += 2 + $tag_length;
+			$at                        += strspn( $this->xml, " \t\f\r\n", $at );
+			$this->bytes_already_parsed = $at;
+
+			/*
+			 * Ensure that the tag name terminates to avoid matching on
+			 * substrings of a longer tag name. For example, the sequence
+			 * "</wp:contentrug" should not match for "</wp:content" even
+			 * though "wp:content" is found within the text.
+			 */
+			if ( $at >= strlen( $xml ) ) {
+				return false;
+			}
+			if ( '>' === $xml[ $at ] ) {
+				$this->bytes_already_parsed = $at + 1;
+				return true;
+			}
+		}
+
+		return false;
+	}
+
+	/**
+	 * Returns the last error, if any.
+	 *
+	 * Various situations lead to parsing failure but this class will
+	 * return `false` in all those cases. To determine why something
+	 * failed it's possible to request the last error. This can be
+	 * helpful to know to distinguish whether a given tag couldn't
+	 * be found or if content in the document caused the processor
+	 * to give up and abort processing.
+	 *
+	 * Example
+	 *
+	 *     $processor = WP_XML_Processor::create_fragment( '<wp:content invalid-attr></wp:content>' );
+	 *     false === $processor->next_tag();
+	 *     WP_XML_Processor::ERROR_SYNTAX === $processor->get_last_error();
+	 *
+	 * @since WP_VERSION
+	 *
+	 * @see self::ERROR_UNSUPPORTED
+	 * @see self::ERROR_EXCEEDED_MAX_BOOKMARKS
+	 *
+	 * @return string|null The last error, if one exists, otherwise null.
+	 */
+	public function get_last_error(): ?string {
+		return $this->last_error;
+	}
+
+	/**
+	 * Tag names declared as PCDATA elements.
+	 *
+	 * PCDATA elements are elements in which everything is treated as
+	 * text, even syntax that may look like other elements, closers,
+	 * processing instructions, etc.
+	 *
+	 * Example:
+	 *
+	 *     <root>
+	 *         <my-pcdata>
+	 *             This text contains syntax that seems
+	 *             like XML nodes:
+	 *
+	 *             <input />
+	 *             </seemingly invalid element --/>
+	 *             <!-- is this a comment? -->
+	 *             <?xml version="1.0" ?>
+	 *
+	 *             &amp;&lt;&gt;&quot;&apos;
+	 *
+	 *             But! It's all treated as text.
+	 *         </my-pcdata>
+	 *    </root>
+	 *
+	 * @var array
+	 */
+	private $pcdata_elements = array();
+
+	/**
+	 * Declares an element as PCDATA.
+	 *
+	 * PCDATA elements are elements in which everything is treated as
+	 * text, even syntax that may look like other elements, closers,
+	 * processing instructions, etc.
+	 *
+	 * For example:
+	 *
+	 *      $processor = new WP_XML_Processor(
+	 *      <<<XML
+	 *          <root>
+	 *              <my-pcdata>
+	 *                  This text uses syntax that may seem
+	 *                  like XML nodes:
+	 *
+	 *                  <input />
+	 *                  </seemingly invalid element --/>
+	 *                  <!-- is this a comment? -->
+	 *                  <?xml version="1.0" ?>
+	 *
+	 *                  &amp;&lt;&gt;&quot;&apos;
+	 *
+	 *                  But! It's all treated as text.
+	 *              </my-pcdata>
+	 *         </root>
+	 *      XML
+	 *      );
+	 *
+	 *      $processor->declare_element_as_pcdata('my-pcdata');
+	 *      $processor->next_tag('my-pcdata');
+	 *      $processor->next_token();
+	 *
+	 *      // Returns everything inside the <my-pcdata>
+	 *      // element as text:
+	 *      $processor->get_modifiable_text();
+	 *
+	 * @param string $element_name The name of the element to declare as PCDATA.
+	 * @return void
+	 */
+	public function declare_element_as_pcdata( $element_name ) {
+		$this->pcdata_elements[ $element_name ] = true;
+	}
+
+	/**
+	 * Indicates if the currently matched tag is a PCDATA element.
+	 *
+	 * @since WP_VERSION
+	 *
+	 * @return bool Whether the currently matched tag is a PCDATA element.
+	 */
+	public function is_pcdata_element() {
+		return array_key_exists( $this->get_tag(), $this->pcdata_elements );
+	}
+
+
+	/**
+	 * Finds the next element matching the $query.
+	 *
+	 * This doesn't currently have a way to represent non-tags and doesn't process
+	 * semantic rules for text nodes. For access to the raw tokens consider using
+	 * WP_XML_Processor instead.
+	 *
+	 * @since WP_VERSION
+	 *
+	 * @param array|string|null $query {
+	 *     Optional. Which tag name to find, having which class, etc. Default is to find any tag.
+	 *
+	 *     @type string|null $tag_name     Which tag to find, or `null` for "any tag."
+	 *     @type int|null    $match_offset Find the Nth tag matching all search criteria.
+	 *                                     1 for "first" tag, 3 for "third," etc.
+	 *                                     Defaults to first tag.
+	 *     @type string[]    $breadcrumbs  DOM sub-path at which element is found, e.g. `array( 'FIGURE', 'IMG' )`.
+	 *                                     May also contain the wildcard `*` which matches a single element, e.g. `array( 'SECTION', '*' )`.
+	 * }
+	 * @return bool Whether a tag was matched.
+	 */
+	public function next_tag( $query = null ) {
+		if ( null === $query ) {
+			while ( $this->step() ) {
+				if ( '#tag' !== $this->get_token_type() ) {
+					continue;
+				}
+
+				if ( ! $this->is_tag_closer() ) {
+					return true;
+				}
+			}
+
+			return false;
+		}
+
+		if ( is_string( $query ) ) {
+			$query = array( 'breadcrumbs' => array( $query ) );
+		}
+
+		if ( ! is_array( $query ) ) {
+			_doing_it_wrong(
+				__METHOD__,
+				__( 'Please pass a query array to this function.' ),
+				'WP_VERSION'
+			);
+			return false;
+		}
+
+		if ( ! ( array_key_exists( 'breadcrumbs', $query ) && is_array( $query['breadcrumbs'] ) ) ) {
+			while ( $this->step() ) {
+				if ( '#tag' !== $this->get_token_type() ) {
+					continue;
+				}
+
+				if ( ! $this->is_tag_closer() ) {
+					return true;
+				}
+			}
+
+			return false;
+		}
+
+		if ( isset( $query['tag_closers'] ) && 'visit' === $query['tag_closers'] ) {
+			_doing_it_wrong(
+				__METHOD__,
+				__( 'Cannot visit tag closers in XML Processor.' ),
+				'WP_VERSION'
+			);
+			return false;
+		}
+
+		$breadcrumbs  = $query['breadcrumbs'];
+		$match_offset = isset( $query['match_offset'] ) ? (int) $query['match_offset'] : 1;
+
+		while ( $match_offset > 0 && $this->step() ) {
+			if ( '#tag' !== $this->get_token_type() ) {
+				continue;
+			}
+
+			if ( $this->matches_breadcrumbs( $breadcrumbs ) && 0 === --$match_offset ) {
+				return true;
+			}
+		}
+
+		return false;
+	}
+
+	/**
+	 * Parses the next tag.
+	 *
+	 * This will find and start parsing the next tag, including
+	 * the opening `<`, the potential closer `/`, and the tag
+	 * name. It does not parse the attributes or scan to the
+	 * closing `>`; these are left for other methods.
+	 *
+	 * @since WP_VERSION
+	 *
+	 * @return bool Whether a tag was found before the end of the document.
+	 */
+	private function parse_next_tag() {
+		$this->after_tag();
+
+		$xml        = $this->xml;
+		$doc_length = strlen( $xml );
+		$was_at     = $this->bytes_already_parsed;
+		$at         = $was_at;
+
+		while ( false !== $at && $at < $doc_length ) {
+			$at = strpos( $xml, '<', $at );
+
+			/*
+			 * There may be no text nodes outside of elements.
+			 * If this character sequence was encountered outside of
+			 * the root element, it is a syntax error. WP_XML_Processor
+			 * does not have that context – it is up to the API consumer,
+			 * such as WP_Tag_Processor, to handle this scenario.
+			 */
+			if ( false === $at ) {
+				$this->parser_state            = self::STATE_INCOMPLETE_INPUT;
+				$this->is_incomplete_text_node = true;
+				$this->text_starts_at          = $was_at;
+				$this->text_length             = $doc_length - $was_at;
+				return false;
+			}
+
+			if ( $at > $was_at ) {
+				$this->parser_state         = self::STATE_TEXT_NODE;
+				$this->token_starts_at      = $was_at;
+				$this->token_length         = $at - $was_at;
+				$this->text_starts_at       = $was_at;
+				$this->text_length          = $this->token_length;
+				$this->bytes_already_parsed = $at;
+
+				return true;
+			}
+
+			$this->token_starts_at = $at;
+
+			if ( $at + 1 < $doc_length && '/' === $this->xml[ $at + 1 ] ) {
+				$this->is_closing_tag = true;
+				++$at;
+			} else {
+				$this->is_closing_tag = false;
+			}
+
+			if ( $at + 1 >= $doc_length ) {
+				$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+				return false;
+			}
+
+			/*
+			 * XML tag names are defined by the same `Name` grammar rule as attribute
+			 * names.
+			 *
+			 * Reference:
+			 * * https://www.w3.org/TR/xml/#NT-STag
+			 * * https://www.w3.org/TR/xml/#NT-Name
+			 */
+			$tag_name_length = $this->parse_name( $at + 1 );
+			if ( $tag_name_length > 0 ) {
+				++$at;
+				$this->parser_state         = self::STATE_MATCHED_TAG;
+				$this->tag_name_starts_at   = $at;
+				$this->tag_name_length      = $tag_name_length;
+				$this->token_length         = $this->tag_name_length;
+				$this->bytes_already_parsed = $at + $this->tag_name_length;
+
+				return true;
+			}
+
+			/*
+			 * Abort if no tag is found before the end of
+			 * the document. There is nothing left to parse.
+			 */
+			if ( $at + 1 >= $doc_length ) {
+				$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+
+				return false;
+			}
+
+			/*
+			 * `<!` indicates one of a few possible constructs:
+			 */
+			if ( ! $this->is_closing_tag && '!' === $xml[ $at + 1 ] ) {
+				/*
+				 * `<!--` mark a beginning of a comment.
+				 * https://www.w3.org/TR/xml/#sec-comments
+				 */
+				if (
+					$doc_length > $at + 3 &&
+					'-' === $xml[ $at + 2 ] &&
+					'-' === $xml[ $at + 3 ]
+				) {
+					$closer_at = $at + 4;
+					// If it's not possible to close the comment then there is nothing more to scan.
+					if ( $doc_length <= $closer_at ) {
+						$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+
+						return false;
+					}
+
+					/*
+					 * Comments may only be closed by a --> sequence.
+					 */
+					--$closer_at; // Pre-increment inside condition below reduces risk of accidental infinite looping.
+					while ( ++$closer_at < $doc_length ) {
+						$closer_at = strpos( $xml, '--', $closer_at );
+						if ( false === $closer_at || $closer_at + 2 === $doc_length ) {
+							$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+							return false;
+						}
+
+						/*
+						 * The string " -- " (double-hyphen) must not occur within comments
+						 * See https://www.w3.org/TR/xml/#sec-comments
+						 */
+						if ( '>' !== $xml[ $closer_at + 2 ] ) {
+							$this->last_error = self::ERROR_SYNTAX;
+							_doing_it_wrong(
+								__METHOD__,
+								__( 'Invalid comment syntax encountered.' ),
+								'WP_VERSION'
+							);
+							return false;
+						}
+
+						$this->parser_state         = self::STATE_COMMENT;
+						$this->token_length         = $closer_at + 3 - $this->token_starts_at;
+						$this->text_starts_at       = $this->token_starts_at + 4;
+						$this->text_length          = $closer_at - $this->text_starts_at;
+						$this->bytes_already_parsed = $closer_at + 3;
+						return true;
+					}
+				}
+
+				/*
+				 * Identify CDATA sections.
+				 *
+				 * Within a CDATA section, everything until the ]]> string is treated
+				 * as data, not markup. Left angle brackets and ampersands may occur in
+				 * their literal form; they need not (and cannot) be escaped using "&lt;"
+				 * and "&amp;". CDATA sections cannot nest.
+				 *
+				 * See https://www.w3.org/TR/xml11.xml/#sec-cdata-sect
+				 */
+				if (
+					! $this->is_closing_tag &&
+					$doc_length > $this->token_starts_at + 8 &&
+					'[' === $xml[ $this->token_starts_at + 2 ] &&
+					'C' === $xml[ $this->token_starts_at + 3 ] &&
+					'D' === $xml[ $this->token_starts_at + 4 ] &&
+					'A' === $xml[ $this->token_starts_at + 5 ] &&
+					'T' === $xml[ $this->token_starts_at + 6 ] &&
+					'A' === $xml[ $this->token_starts_at + 7 ] &&
+					'[' === $xml[ $this->token_starts_at + 8 ]
+				) {
+					$closer_at = strpos( $xml, ']]>', $at + 1 );
+					if ( false === $closer_at ) {
+						$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+
+						return false;
+					}
+
+					$this->parser_state         = self::STATE_CDATA_NODE;
+					$this->token_length         = $closer_at + 1 - $this->token_starts_at;
+					$this->text_starts_at       = $this->token_starts_at + 9;
+					$this->text_length          = $closer_at - $this->text_starts_at;
+					$this->bytes_already_parsed = $closer_at + 3;
+					return true;
+				}
+
+				/*
+				 * Anything else here is either unsupported at this point or invalid
+				 * syntax. See the class-level @TODO annotations for more information.
+				 */
+				$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+
+				return false;
+			}
+
+			/*
+			 * An `<?xml` token at the beginning of the document marks a start of an
+			 * xml declaration.
+			 * See https://www.w3.org/TR/xml/#sec-prolog-dtd
+			 */
+			if (
+				0 === $at &&
+				! $this->had_previous_chunks &&
+				! $this->is_closing_tag &&
+				'?' === $xml[ $at + 1 ] &&
+				'x' === $xml[ $at + 2 ] &&
+				'm' === $xml[ $at + 3 ] &&
+				'l' === $xml[ $at + 4 ]
+			) {
+				// Setting the parser state early for the get_attribute() calls later in this
+				// branch.
+				$this->parser_state = self::STATE_XML_DECLARATION;
+
+				$at += 5;
+
+				// Skip whitespace.
+				$at += strspn( $this->xml, " \t\f\r\n", $at );
+
+				$this->bytes_already_parsed = $at;
+
+				/*
+				 * Reuse parse_next_attribute() to parse the XML declaration attributes.
+				 * Technically, only "version", "encoding", and "standalone" are accepted
+				 * and, unlike regular tag attributes, their values can contain any character
+				 * other than the opening quote. However, the "<" and "&" characters are very
+				 * unlikely to be encountered and cause trouble, so this code path liberally
+				 * does not provide a dedicated parsing logic.
+				 */
+				while ( false !== $this->parse_next_attribute() ) {
+					$this->skip_whitespace();
+					// Parse until the XML declaration closer.
+					if ( '?' === $xml[ $this->bytes_already_parsed ] ) {
+						break;
+					}
+				}
+
+				if ( null !== $this->last_error ) {
+					return false;
+				}
+
+				foreach ( $this->attributes as $name => $attribute ) {
+					if ( 'version' !== $name && 'encoding' !== $name && 'standalone' !== $name ) {
+						$this->last_error = self::ERROR_SYNTAX;
+						_doing_it_wrong(
+							__METHOD__,
+							__( 'Invalid attribute found in XML declaration.' ),
+							'WP_VERSION'
+						);
+						return false;
+					}
+				}
+
+				if ( '1.0' !== $this->get_attribute( 'version' ) ) {
+					$this->last_error = self::ERROR_UNSUPPORTED;
+					_doing_it_wrong(
+						__METHOD__,
+						__( 'Unsupported XML version declared' ),
+						'WP_VERSION'
+					);
+					return false;
+				}
+
+				/**
+				 * Standalone XML documents have no external dependencies,
+				 * including predefined entities like `&nbsp;` and `&copy;`.
+				 *
+				 * See https://www.w3.org/TR/xml/#sec-predefined-ent.
+				 */
+				if ( null !== $this->get_attribute( 'encoding' )
+					&& 'UTF-8' !== strtoupper( $this->get_attribute( 'encoding' ) )
+				) {
+					$this->last_error = self::ERROR_UNSUPPORTED;
+					_doing_it_wrong(
+						__METHOD__,
+						__( 'Unsupported XML encoding declared, only UTF-8 is supported.' ),
+						'WP_VERSION'
+					);
+					return false;
+				}
+				if ( null !== $this->get_attribute( 'standalone' )
+					&& 'YES' !== strtoupper( $this->get_attribute( 'standalone' ) )
+				) {
+					$this->last_error = self::ERROR_UNSUPPORTED;
+					_doing_it_wrong(
+						__METHOD__,
+						__( 'Standalone XML documents are not supported.' ),
+						'WP_VERSION'
+					);
+					return false;
+				}
+
+				$at = $this->bytes_already_parsed;
+
+				// Skip whitespace.
+				$at += strspn( $this->xml, " \t\f\r\n", $at );
+
+				// Consume the closer.
+				if ( ! (
+					$at + 2 <= $doc_length &&
+					'?' === $xml[ $at ] &&
+					'>' === $xml[ $at + 1 ]
+				) ) {
+					$this->last_error = self::ERROR_SYNTAX;
+					_doing_it_wrong(
+						__METHOD__,
+						__( 'XML declaration closer not found.' ),
+						'WP_VERSION'
+					);
+					return false;
+				}
+
+				$this->token_length         = $at + 2 - $this->token_starts_at;
+				$this->text_starts_at       = $this->token_starts_at + 2;
+				$this->text_length          = $at - $this->text_starts_at;
+				$this->bytes_already_parsed = $at + 2;
+				$this->parser_state         = self::STATE_XML_DECLARATION;
+
+				return true;
+			}
+
+			/*
+			 * `<?` denotes a processing instruction.
+			 * See https://www.w3.org/TR/xml/#sec-pi
+			 */
+			if (
+				! $this->is_closing_tag &&
+				'?' === $xml[ $at + 1 ]
+			) {
+				if ( $at + 4 >= $doc_length ) {
+					$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+
+					return false;
+				}
+
+				if ( ! (
+					( 'x' === $xml[ $at + 2 ] || 'X' === $xml[ $at + 2 ] ) &&
+					( 'm' === $xml[ $at + 3 ] || 'M' === $xml[ $at + 3 ] ) &&
+					( 'l' === $xml[ $at + 4 ] || 'L' === $xml[ $at + 4 ] )
+				) ) {
+					_doing_it_wrong(
+						__METHOD__,
+						__( 'Invalid processing instruction target.' ),
+						'WP_VERSION'
+					);
+					return false;
+				}
+
+				$at += 5;
+
+				// Skip whitespace.
+				$this->skip_whitespace();
+
+				/*
+				 * Find the closer.
+				 *
+				 * We could, at this point, only consume the bytes allowed by the specification, that is:
+				 *
+				 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] // any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
+				 *
+				 * However, that would require running a slow regular-expression engine for, seemingly,
+				 * little benefit. For now, we are going to pretend that all bytes are allowed until the
+				 * closing ?> is found. Some failures may pass unnoticed. That may not be a problem in practice,
+				 * but if it is then this code path will require a stricter implementation.
+				 */
+				$closer_at = strpos( $xml, '?>', $at );
+				if ( false === $closer_at ) {
+					$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+
+					return false;
+				}
+
+				$this->parser_state         = self::STATE_PI_NODE;
+				$this->token_length         = $closer_at + 5 - $this->token_starts_at;
+				$this->text_starts_at       = $this->token_starts_at + 5;
+				$this->text_length          = $closer_at - $this->text_starts_at;
+				$this->bytes_already_parsed = $closer_at + 2;
+
+				return true;
+			}
+
+			++$at;
+		}
+
+		return false;
+	}
+
+	/**
+	 * Parses the next attribute.
+	 *
+	 * @since WP_VERSION
+	 *
+	 * @return bool Whether an attribute was found before the end of the document.
+	 */
+	private function parse_next_attribute() {
+		// Skip whitespace and slashes.
+		$this->bytes_already_parsed += strspn( $this->xml, " \t\f\r\n/", $this->bytes_already_parsed );
+		if ( $this->bytes_already_parsed >= strlen( $this->xml ) ) {
+			$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+			return false;
+		}
+
+		// No more attributes to parse.
+		if ( '>' === $this->xml[ $this->bytes_already_parsed ] ) {
+			return false;
+		}
+
+		$attribute_start       = $this->bytes_already_parsed;
+		$attribute_name_length = $this->parse_name( $this->bytes_already_parsed );
+		if ( 0 === $attribute_name_length ) {
+			$this->last_error = self::ERROR_SYNTAX;
+			_doing_it_wrong(
+				__METHOD__,
+				__( 'Invalid attribute name encountered.' ),
+				'WP_VERSION'
+			);
+		}
+		$this->bytes_already_parsed += $attribute_name_length;
+		$attribute_name              = substr( $this->xml, $attribute_start, $attribute_name_length );
+		$this->skip_whitespace();
+
+		// Parse attribute value.
+		++$this->bytes_already_parsed;
+		$this->skip_whitespace();
+		if ( $this->bytes_already_parsed >= strlen( $this->xml ) ) {
+			$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+			return false;
+		}
+		switch ( $this->xml[ $this->bytes_already_parsed ] ) {
+			case "'":
+			case '"':
+				$quote       = $this->xml[ $this->bytes_already_parsed ];
+				$value_start = $this->bytes_already_parsed + 1;
+				/**
+				 * XML attributes cannot contain the characters "<" or "&".
+				 *
+				 * This only checks for "<" because it's reasonably fast.
+				 * Ampersands are actually allowed when used as the start
+				 * of an entity reference, but enforcing that would require
+				 * an expensive and complex check. It doesn't seem to be
+				 * worth it.
+				 *
+				 * @TODO: Discuss enforcing or abandoning the ampersand rule
+				 *        and document the rationale.
+				 */
+				$value_length  = strcspn( $this->xml, "<$quote", $value_start );
+				$attribute_end = $value_start + $value_length + 1;
+
+				if ( $attribute_end - 1 >= strlen( $this->xml ) ) {
+					$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+
+					return false;
+				}
+
+				if ( $this->xml[ $attribute_end - 1 ] !== $quote ) {
+					$this->last_error = self::ERROR_SYNTAX;
+					_doing_it_wrong(
+						__METHOD__,
+						__( 'A disallowed character encountered in an attribute value (either < or &).' ),
+						'WP_VERSION'
+					);
+				}
+				$this->bytes_already_parsed = $attribute_end;
+				break;
+
+			default:
+				$this->last_error = self::ERROR_SYNTAX;
+				_doing_it_wrong(
+					__METHOD__,
+					__( 'Unquoted attribute value encountered.' ),
+					'WP_VERSION'
+				);
+				return false;
+		}
+
+		if ( $attribute_end >= strlen( $this->xml ) ) {
+			$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+			return false;
+		}
+
+		if ( $this->is_closing_tag ) {
+			return true;
+		}
+
+		if ( array_key_exists( $attribute_name, $this->attributes ) ) {
+			$this->last_error = self::ERROR_SYNTAX;
+			_doing_it_wrong(
+				__METHOD__,
+				__( 'Duplicate attribute found in an XML tag.' ),
+				'WP_VERSION'
+			);
+			return false;
+		}
+
+		$this->attributes[ $attribute_name ] = new WP_HTML_Attribute_Token(
+			$attribute_name,
+			$value_start,
+			$value_length,
+			$attribute_start,
+			$attribute_end - $attribute_start,
+			false
+		);
+
+		return true;
+	}
+
+	/**
+	 * Move the internal cursor past any immediate successive whitespace.
+	 *
+	 * @since WP_VERSION
+	 */
+	private function skip_whitespace() {
+		$this->bytes_already_parsed += strspn( $this->xml, " \t\f\r\n", $this->bytes_already_parsed );
+	}
+
+	// Describes the first character of the attribute name:
+	// NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
+	// See https://www.w3.org/TR/xml/#NT-Name
+	const NAME_START_CHAR_PATTERN = ':a-z_A-Z\x{C0}-\x{D6}\x{D8}-\x{F6}\x{F8}-\x{2FF}\x{370}-\x{37D}\x{37F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}';
+	const NAME_CHAR_PATTERN       = '\-\.0-9\x{B7}\x{0300}-\x{036F}\x{203F}-\x{2040}:a-z_A-Z\x{C0}-\x{D6}\x{D8}-\x{F6}\x{F8}-\x{2FF}\x{370}-\x{37D}\x{37F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}';
+	private function parse_name( $offset ) {
+		if ( 1 !== preg_match(
+			'~[' . self::NAME_START_CHAR_PATTERN . ']~Ssu',
+			$this->xml[ $offset ],
+			$matches
+		) ) {
+			return 0;
+		}
+
+		$name_length = 1;
+
+		// Consume the rest of the name
+		preg_match(
+			'~\G([' . self::NAME_CHAR_PATTERN . ']+)~Ssu',
+			$this->xml,
+			$matches,
+			0,
+			$offset + 1
+		);
+
+		if ( is_array( $matches ) && count( $matches ) > 0 ) {
+			$name_length += strlen( $matches[0] );
+		}
+
+		return $name_length;
+	}
+
+	/**
+	 * Applies attribute updates and cleans up once a tag is fully parsed.
+	 *
+	 * @since WP_VERSION
+	 */
+	private function after_tag() {
+		/*
+		 * Purge updates if there are too many. The actual count isn't
+		 * scientific, but a few values from 100 to a few thousand were
+		 * tests to find a practically-useful limit.
+		 *
+		 * If the update queue grows too big, then the Tag Processor
+		 * will spend more time iterating through them and lose the
+		 * efficiency gains of deferring applying them.
+		 */
+		if ( 1000 < count( $this->lexical_updates ) ) {
+			$this->get_updated_xml();
+		}
+
+		foreach ( $this->lexical_updates as $name => $update ) {
+			/*
+			 * Any updates appearing after the cursor should be applied
+			 * before proceeding, otherwise they may be overlooked.
+			 */
+			if ( $update->start >= $this->bytes_already_parsed ) {
+				$this->get_updated_xml();
+				break;
+			}
+
+			if ( is_int( $name ) ) {
+				continue;
+			}
+
+			$this->lexical_updates[] = $update;
+			unset( $this->lexical_updates[ $name ] );
+		}
+
+		$this->is_incomplete_text_node = false;
+		$this->token_starts_at         = null;
+		$this->token_length            = null;
+		$this->tag_name_starts_at      = null;
+		$this->tag_name_length         = null;
+		$this->text_starts_at          = 0;
+		$this->text_length             = 0;
+		$this->is_closing_tag          = null;
+		$this->attributes              = array();
+	}
+
+	protected function reset_state() {
+		$this->xml                     = '';
+		$this->parser_state            = self::STATE_READY;
+		$this->is_incomplete_text_node = false;
+		$this->bytes_already_parsed    = 0;
+		$this->token_starts_at         = null;
+		$this->token_length            = null;
+		$this->tag_name_starts_at      = null;
+		$this->tag_name_length         = null;
+		$this->text_starts_at          = 0;
+		$this->text_length             = 0;
+		$this->is_closing_tag          = null;
+		$this->last_error              = null;
+		$this->attributes              = array();
+		$this->bookmarks               = array();
+		$this->lexical_updates         = array();
+		$this->seek_count              = 0;
+		$this->had_previous_chunks     = false;
+	}
+
+	/**
+	 * Applies attribute updates to XML document.
+	 *
+	 * @since WP_VERSION
+	 *
+	 * @param int $shift_this_point Accumulate and return shift for this position.
+	 * @return int How many bytes the given pointer moved in response to the updates.
+	 */
+	private function apply_attributes_updates( $shift_this_point = 0 ) {
+		if ( ! count( $this->lexical_updates ) ) {
+			return 0;
+		}
+
+		$accumulated_shift_for_given_point = 0;
+
+		/*
+		 * Attribute updates can be enqueued in any order but updates
+		 * to the document must occur in lexical order; that is, each
+		 * replacement must be made before all others which follow it
+		 * at later string indices in the input document.
+		 *
+		 * Sorting avoid making out-of-order replacements which
+		 * can lead to mangled output, partially-duplicated
+		 * attributes, and overwritten attributes.
+		 */
+		usort( $this->lexical_updates, array( self::class, 'sort_start_ascending' ) );
+
+		$bytes_already_copied = 0;
+		$output_buffer        = '';
+		foreach ( $this->lexical_updates as $diff ) {
+			$shift = strlen( $diff->text ) - $diff->length;
+
+			// Adjust the cursor position by however much an update affects it.
+			if ( $diff->start < $this->bytes_already_parsed ) {
+				$this->bytes_already_parsed += $shift;
+			}
+
+			// Accumulate shift of the given pointer within this function call.
+			if ( $diff->start <= $shift_this_point ) {
+				$accumulated_shift_for_given_point += $shift;
+			}
+
+			$output_buffer       .= substr( $this->xml, $bytes_already_copied, $diff->start - $bytes_already_copied );
+			$output_buffer       .= $diff->text;
+			$bytes_already_copied = $diff->start + $diff->length;
+		}
+
+		$this->xml = $output_buffer . substr( $this->xml, $bytes_already_copied );
+
+		/*
+		 * Adjust bookmark locations to account for how the text
+		 * replacements adjust offsets in the input document.
+		 */
+		foreach ( $this->bookmarks as $bookmark_name => $bookmark ) {
+			$bookmark_end = $bookmark->start + $bookmark->length;
+
+			/*
+			 * Each lexical update which appears before the bookmark's endpoints
+			 * might shift the offsets for those endpoints. Loop through each change
+			 * and accumulate the total shift for each bookmark, then apply that
+			 * shift after tallying the full delta.
+			 */
+			$head_delta = 0;
+			$tail_delta = 0;
+
+			foreach ( $this->lexical_updates as $diff ) {
+				$diff_end = $diff->start + $diff->length;
+
+				if ( $bookmark->start < $diff->start && $bookmark_end < $diff->start ) {
+					break;
+				}
+
+				if ( $bookmark->start >= $diff->start && $bookmark_end < $diff_end ) {
+					$this->release_bookmark( $bookmark_name );
+					continue 2;
+				}
+
+				$delta = strlen( $diff->text ) - $diff->length;
+
+				if ( $bookmark->start >= $diff->start ) {
+					$head_delta += $delta;
+				}
+
+				if ( $bookmark_end >= $diff_end ) {
+					$tail_delta += $delta;
+				}
+			}
+
+			$bookmark->start  += $head_delta;
+			$bookmark->length += $tail_delta - $head_delta;
+		}
+
+		$this->lexical_updates = array();
+
+		return $accumulated_shift_for_given_point;
+	}
+
+	/**
+	 * Checks whether a bookmark with the given name exists.
+	 *
+	 * @since WP_VERSION
+	 *
+	 * @param string $bookmark_name Name to identify a bookmark that potentially exists.
+	 * @return bool Whether that bookmark exists.
+	 */
+	public function has_bookmark( $bookmark_name ) {
+		return array_key_exists( $bookmark_name, $this->bookmarks );
+	}
+
+	/**
+	 * Move the internal cursor in the Tag Processor to a given bookmark's location.
+	 *
+	 * Be careful! Seeking backwards to a previous location resets the parser to the
+	 * start of the document and reparses the entire contents up until it finds the
+	 * sought-after bookmarked location.
+	 *
+	 * In order to prevent accidental infinite loops, there's a
+	 * maximum limit on the number of times seek() can be called.
+	 *
+	 * @since WP_VERSION
+	 *
+	 * @param string $bookmark_name Jump to the place in the document identified by this bookmark name.
+	 * @return bool Whether the internal cursor was successfully moved to the bookmark's location.
+	 */
+	public function seek( $bookmark_name ) {
+		if ( ! array_key_exists( $bookmark_name, $this->bookmarks ) ) {
+			_doing_it_wrong(
+				__METHOD__,
+				__( 'Unknown bookmark name.' ),
+				'WP_VERSION'
+			);
+			return false;
+		}
+
+		if ( ++$this->seek_count > static::MAX_SEEK_OPS ) {
+			_doing_it_wrong(
+				__METHOD__,
+				__( 'Too many calls to seek() - this can lead to performance issues.' ),
+				'WP_VERSION'
+			);
+			return false;
+		}
+
+		// Flush out any pending updates to the document.
+		$this->get_updated_xml();
+
+		// Point this tag processor before the sought tag opener and consume it.
+		$this->bytes_already_parsed = $this->bookmarks[ $bookmark_name ]->start;
+		$this->parser_state         = self::STATE_READY;
+		return $this->base_class_next_token();
+	}
+
+	/**
+	 * Compare two WP_HTML_Text_Replacement objects.
+	 *
+	 * @since WP_VERSION
+	 *
+	 * @param WP_HTML_Text_Replacement $a First attribute update.
+	 * @param WP_HTML_Text_Replacement $b Second attribute update.
+	 * @return int Comparison value for string order.
+	 */
+	private static function sort_start_ascending( $a, $b ) {
+		$by_start = $a->start - $b->start;
+		if ( 0 !== $by_start ) {
+			return $by_start;
+		}
+
+		$by_text = isset( $a->text, $b->text ) ? strcmp( $a->text, $b->text ) : 0;
+		if ( 0 !== $by_text ) {
+			return $by_text;
+		}
+
+		/*
+		 * This code should be unreachable, because it implies the two replacements
+		 * start at the same location and contain the same text.
+		 */
+		return $a->length - $b->length;
+	}
+
+	/**
+	 * Return the enqueued value for a given attribute, if one exists.
+	 *
+	 * Enqueued updates can take different data types:
+	 *  - If an update is enqueued and is boolean, the return will be `true`
+	 *  - If an update is otherwise enqueued, the return will be the string value of that update.
+	 *  - If an attribute is enqueued to be removed, the return will be `null` to indicate that.
+	 *  - If no updates are enqueued, the return will be `false` to differentiate from "removed."
+	 *
+	 * @since WP_VERSION
+	 *
+	 * @param string $comparable_name The attribute name in its comparable form.
+	 * @return string|boolean|null Value of enqueued update if present, otherwise false.
+	 */
+	private function get_enqueued_attribute_value( $comparable_name ) {
+		if ( self::STATE_MATCHED_TAG !== $this->parser_state ) {
+			return false;
+		}
+
+		if ( ! isset( $this->lexical_updates[ $comparable_name ] ) ) {
+			return false;
+		}
+
+		$enqueued_text = $this->lexical_updates[ $comparable_name ]->text;
+
+		// Removed attributes erase the entire span.
+		if ( '' === $enqueued_text ) {
+			return null;
+		}
+
+		/*
+		 * Boolean attribute updates are just the attribute name without a corresponding value.
+		 *
+		 * This value might differ from the given comparable name in that there could be leading
+		 * or trailing whitespace, and that the casing follows the name given in `set_attribute`.
+		 *
+		 * Example:
+		 *
+		 *     $p->set_attribute( 'data-TEST-id', 'update' );
+		 *     'update' === $p->get_enqueued_attribute_value( 'data-test-id' );
+		 *
+		 * Detect this difference based on the absence of the `=`, which _must_ exist in any
+		 * attribute containing a value, e.g. `<input type="text" enabled />`.
+		 *                                            ¹           ²
+		 *                                       1. Attribute with a string value.
+		 *                                       2. Boolean attribute whose value is `true`.
+		 */
+		$equals_at = strpos( $enqueued_text, '=' );
+		if ( false === $equals_at ) {
+			return true;
+		}
+
+		/*
+		 * Finally, a normal update's value will appear after the `=` and
+		 * be double-quoted, as performed incidentally by `set_attribute`.
+		 *
+		 * e.g. `type="text"`
+		 *           ¹²    ³
+		 *        1. Equals is here.
+		 *        2. Double-quoting starts one after the equals sign.
+		 *        3. Double-quoting ends at the last character in the update.
+		 */
+		$enqueued_value = substr( $enqueued_text, $equals_at + 2, -1 );
+		/*
+		 * We're deliberately not decoding entities in attribute values:
+		 *
+		 *     Attribute values must not contain direct or indirect entity references to external entities.
+		 *
+		 * See https://www.w3.org/TR/xml/#sec-starttags.
+		 */
+		return $enqueued_value;
+	}
+
+	/**
+	 * Returns the value of a requested attribute from a matched tag opener if that attribute exists.
+	 *
+	 * Example:
+	 *
+	 *     $p = new WP_XML_Processor( '<wp:content enabled class="test" data-test-id="14">Test</wp:content>' );
+	 *     $p->next_tag( array( 'class_name' => 'test' ) ) === true;
+	 *     $p->get_attribute( 'data-test-id' ) === '14';
+	 *     $p->get_attribute( 'enabled' ) === true;
+	 *     $p->get_attribute( 'aria-label' ) === null;
+	 *
+	 *     $p->next_tag() === false;
+	 *     $p->get_attribute( 'class' ) === null;
+	 *
+	 * @since WP_VERSION
+	 *
+	 * @param string $name Name of attribute whose value is requested.
+	 * @return string|true|null Value of attribute or `null` if not available. Boolean attributes return `true`.
+	 */
+	public function get_attribute( $name ) {
+		if (
+			self::STATE_MATCHED_TAG !== $this->parser_state &&
+			self::STATE_XML_DECLARATION !== $this->parser_state
+		) {
+			return null;
+		}
+
+		// Return any enqueued attribute value updates if they exist.
+		$enqueued_value = $this->get_enqueued_attribute_value( $name );
+		if ( false !== $enqueued_value ) {
+			return $enqueued_value;
+		}
+
+		if ( ! isset( $this->attributes[ $name ] ) ) {
+			return null;
+		}
+
+		$attribute = $this->attributes[ $name ];
+		$raw_value = substr( $this->xml, $attribute->value_starts_at, $attribute->value_length );
+
+		$decoded = WP_XML_Decoder::decode( $raw_value );
+		if ( ! isset( $decoded ) ) {
+			/**
+			 * If the attribute contained an invalid value, it's
+			 * a fatal error.
+			 *
+			 * @see WP_XML_Decoder::decode()
+			 */
+			$this->last_error = self::ERROR_SYNTAX;
+			_doing_it_wrong(
+				__METHOD__,
+				__( 'Invalid attribute value encountered.' ),
+				'WP_VERSION'
+			);
+			return false;
+		}
+
+		return $decoded;
+	}
+
+	/**
+	 * Gets names of all attributes matching a given prefix in the current tag.
+	 *
+	 * Note that matching is case-sensitive. This is in accordance with the spec.
+	 *
+	 * Example:
+	 *
+	 *     $p = new WP_XML_Processor( '<wp:content data-ENABLED="1" class="test" DATA-test-id="14">Test</wp:content>' );
+	 *     $p->next_tag( array( 'class_name' => 'test' ) ) === true;
+	 *     $p->get_attribute_names_with_prefix( 'data-' ) === array( 'data-ENABLED' );
+	 *     $p->get_attribute_names_with_prefix( 'DATA-' ) === array( 'DATA-test-id' );
+	 *     $p->get_attribute_names_with_prefix( 'DAta-' ) === array();
+	 *
+	 * @since WP_VERSION
+	 *
+	 * @param string $prefix Prefix of requested attribute names.
+	 * @return array|null List of attribute names, or `null` when no tag opener is matched.
+	 */
+	public function get_attribute_names_with_prefix( $prefix ) {
+		if (
+			self::STATE_MATCHED_TAG !== $this->parser_state ||
+			$this->is_closing_tag
+		) {
+			return null;
+		}
+
+		$matches = array();
+		foreach ( array_keys( $this->attributes ) as $attr_name ) {
+			if ( str_starts_with( $attr_name, $prefix ) ) {
+				$matches[] = $attr_name;
+			}
+		}
+		return $matches;
+	}
+
+	/**
+	 * Returns the uppercase name of the matched tag.
+	 *
+	 * Example:
+	 *
+	 *     $p = new WP_XML_Processor( '<wp:content class="test">Test</wp:content>' );
+	 *     $p->next_tag() === true;
+	 *     $p->get_tag() === 'DIV';
+	 *
+	 *     $p->next_tag() === false;
+	 *     $p->get_tag() === null;
+	 *
+	 * @since WP_VERSION
+	 *
+	 * @return string|null Name of currently matched tag in input XML, or `null` if none found.
+	 */
+	public function get_tag() {
+		if ( null === $this->tag_name_starts_at ) {
+			return null;
+		}
+
+		$tag_name = substr( $this->xml, $this->tag_name_starts_at, $this->tag_name_length );
+
+		if ( self::STATE_MATCHED_TAG === $this->parser_state ) {
+			return $tag_name;
+		}
+
+		return null;
+	}
+
+	/**
+	 * Indicates if the currently matched tag is an empty element tag.
+	 *
+	 * XML tags ending with a solidus ("/") are parsed as empty elements. They have no
+	 * content and no matching closer is expected.
+
+	 * @since WP_VERSION
+	 *
+	 * @return bool Whether the currently matched tag is an empty element tag.
+	 */
+	public function is_empty_element() {
+		if ( self::STATE_MATCHED_TAG !== $this->parser_state ) {
+			return false;
+		}
+
+		/*
+		 * An empty element tag is defined by the solidus at the _end_ of the tag, not the beginning.
+		 *
+		 * Example:
+		 *
+		 *     <figure />
+		 *             ^ this appears one character before the end of the closing ">".
+		 */
+		return '/' === $this->xml[ $this->token_starts_at + $this->token_length - 2 ];
+	}
+
+	/**
+	 * Indicates if the current tag token is a tag closer.
+	 *
+	 * Example:
+	 *
+	 *     $p = new WP_XML_Processor( '<wp:content></wp:content>' );
+	 *     $p->next_tag( array( 'tag_name' => 'wp:content', 'tag_closers' => 'visit' ) );
+	 *     $p->is_tag_closer() === false;
+	 *
+	 *     $p->next_tag( array( 'tag_name' => 'wp:content', 'tag_closers' => 'visit' ) );
+	 *     $p->is_tag_closer() === true;
 	 *
-	 * Because bookmarks allocate memory and require processing
-	 * for every applied update, they are limited and require
-	 * a name. They should not be created with programmatically-made
-	 * names, such as "li_{$index}" with some loop. As a general
-	 * rule they should only be created with string-literal names
-	 * like "start-of-section" or "last-paragraph".
+	 * @since WP_VERSION
 	 *
-	 * Bookmarks are a powerful tool to enable complicated behavior.
-	 * Consider double-checking that you need this tool if you are
-	 * reaching for it, as inappropriate use could lead to broken
-	 * HTML structure or unwanted processing overhead.
+	 * @return bool Whether the current tag is a tag closer.
+	 */
+	public function is_tag_closer() {
+		return (
+			self::STATE_MATCHED_TAG === $this->parser_state &&
+			$this->is_closing_tag
+		);
+	}
+
+	/**
+	 * Indicates the kind of matched token, if any.
+	 *
+	 * This differs from `get_token_name()` in that it always
+	 * returns a static string indicating the type, whereas
+	 * `get_token_name()` may return values derived from the
+	 * token itself, such as a tag name or processing
+	 * instruction tag.
+	 *
+	 * Possible values:
+	 *  - `#tag` when matched on a tag.
+	 *  - `#text` when matched on a text node.
+	 *  - `#cdata-section` when matched on a CDATA node.
+	 *  - `#comment` when matched on a comment.
+	 *  - `#presumptuous-tag` when matched on an empty tag closer.
 	 *
 	 * @since WP_VERSION
 	 *
-	 * @param string $bookmark_name Identifies this particular bookmark.
-	 * @return bool Whether the bookmark was successfully created.
+	 * @return string|null What kind of token is matched, or null.
 	 */
-	public function set_bookmark( $bookmark_name ) {
-		return parent::set_bookmark( "_{$bookmark_name}" );
+	public function get_token_type() {
+		switch ( $this->parser_state ) {
+			case self::STATE_MATCHED_TAG:
+				return '#tag';
+
+			default:
+				return $this->get_token_name();
+		}
 	}
 
 	/**
-	 * Moves the internal cursor in the HTML Processor to a given bookmark's location.
+	 * Returns the node name represented by the token.
 	 *
-	 * Be careful! Seeking backwards to a previous location resets the parser to the
-	 * start of the document and reparses the entire contents up until it finds the
-	 * sought-after bookmarked location.
+	 * This matches the DOM API value `nodeName`. Some values
+	 * are static, such as `#text` for a text node, while others
+	 * are dynamically generated from the token itself.
 	 *
-	 * In order to prevent accidental infinite loops, there's a
-	 * maximum limit on the number of times seek() can be called.
+	 * Dynamic names:
+	 *  - Uppercase tag name for tag matches.
 	 *
-	 * @throws Exception When unable to allocate a bookmark for the next token in the input HTML document.
+	 * Note that if the Tag Processor is not matched on a token
+	 * then this function will return `null`, either because it
+	 * hasn't yet found a token or because it reached the end
+	 * of the document without matching a token.
 	 *
 	 * @since WP_VERSION
 	 *
-	 * @param string $bookmark_name Jump to the place in the document identified by this bookmark name.
-	 * @return bool Whether the internal cursor was successfully moved to the bookmark's location.
+	 * @return string|null Name of the matched token.
 	 */
-	public function seek( $bookmark_name ) {
-		// Flush any pending updates to the document before beginning.
-		$this->get_updated_xml();
-		return parent::seek( "_{$bookmark_name}" );
+	public function get_token_name() {
+		switch ( $this->parser_state ) {
+			case self::STATE_MATCHED_TAG:
+				return $this->get_tag();
+
+			case self::STATE_TEXT_NODE:
+				return '#text';
+
+			case self::STATE_CDATA_NODE:
+				return '#cdata-section';
+
+			case self::STATE_XML_DECLARATION:
+				return '#xml-declaration';
+
+			case self::STATE_PI_NODE:
+				return '#processing-instructions';
+
+			case self::STATE_COMMENT:
+				return '#comment';
+		}
 	}
 
 	/**
-	 * Removes a bookmark that is no longer needed.
+	 * Returns the modifiable text for a matched token, or an empty string.
 	 *
-	 * Releasing a bookmark frees up the small
-	 * performance overhead it requires.
+	 * Modifiable text is text content that may be read and changed without
+	 * changing the XML structure of the document around it. This includes
+	 * the contents of `#text` nodes in the XML as well as the inner
+	 * contents of XML comments, Processing Instructions, and others, even
+	 * though these nodes aren't part of a parsed DOM tree. They also contain
+	 * the contents of SCRIPT and STYLE tags, of TEXTAREA tags, and of any
+	 * other section in an XML document which cannot contain XML markup (DATA).
+	 *
+	 * If a token has no modifiable text then an empty string is returned to
+	 * avoid needless crashing or type errors. An empty string does not mean
+	 * that a token has modifiable text, and a token with modifiable text may
+	 * have an empty string (e.g. a comment with no contents).
 	 *
 	 * @since WP_VERSION
 	 *
-	 * @param string $bookmark_name Name of the bookmark to remove.
-	 * @return bool Whether the bookmark already existed before removal.
+	 * @return string
+	 */
+	public function get_modifiable_text() {
+		if ( null === $this->text_starts_at ) {
+			return '';
+		}
+
+		$text = substr( $this->xml, $this->text_starts_at, $this->text_length );
+
+		/*
+		 * > the XML processor must behave as if it normalized all line breaks in external parsed
+		 * > entities (including the document entity) on input, before parsing, by translating both
+		 * > the two-character sequence #xD #xA and any #xD that is not followed by #xA to a single
+		 * > #xA character.
+		 *
+		 * See https://www.w3.org/TR/xml/#sec-line-ends
+		 */
+		$text = str_replace( array( "\r\n", "\r" ), "\n", $text );
+
+		// Comment data, CDATA sections, and PCData tags contents are not decoded any further.
+		if (
+			self::STATE_CDATA_NODE === $this->parser_state ||
+			self::STATE_COMMENT === $this->parser_state ||
+			$this->is_pcdata_element()
+		) {
+			return $text;
+		}
+
+		$decoded = WP_XML_Decoder::decode( $text );
+		if ( ! isset( $decoded ) ) {
+			/**
+			 * If the attribute contained an invalid value, it's
+			 * a fatal error.
+			 *
+			 * @see WP_XML_Decoder::decode()
+			 */
+
+			$this->last_error = self::ERROR_SYNTAX;
+			var_dump( $text );
+			_doing_it_wrong(
+				__METHOD__,
+				__( 'Invalid text content encountered.' ),
+				'WP_VERSION'
+			);
+			return false;
+		}
+		return $decoded;
+	}
+
+	public function set_modifiable_text( $new_value ) {
+		switch ( $this->parser_state ) {
+			case self::STATE_TEXT_NODE:
+			case self::STATE_COMMENT:
+				$this->lexical_updates[] = new WP_HTML_Text_Replacement(
+					$this->text_starts_at,
+					$this->text_length,
+					// @TODO This is naive, let's rethink this.
+					htmlspecialchars( $new_value, ENT_XML1, 'UTF-8' )
+				);
+				return true;
+
+			case self::STATE_CDATA_NODE:
+				$this->lexical_updates[] = new WP_HTML_Text_Replacement(
+					$this->text_starts_at,
+					$this->text_length,
+					// @TODO This is naive, let's rethink this.
+					str_replace( ']]>', ']]&gt;', $new_value )
+				);
+				return true;
+			default:
+				_doing_it_wrong(
+					__METHOD__,
+					__( 'Cannot set text content on a non-text node.' ),
+					'WP_VERSION'
+				);
+				return false;
+		}
+	}
+
+	/**
+	 * Updates or creates a new attribute on the currently matched tag with the passed value.
+	 *
+	 * For boolean attributes special handling is provided:
+	 *  - When `true` is passed as the value, then only the attribute name is added to the tag.
+	 *  - When `false` is passed, the attribute gets removed if it existed before.
+	 *
+	 * For string attributes, the value is escaped using the `esc_attr` function.
+	 *
+	 * @since WP_VERSION
+	 *
+	 * @param string      $name  The attribute name to target.
+	 * @param string|bool $value The new attribute value.
+	 * @return bool Whether an attribute value was set.
 	 */
-	public function release_bookmark( $bookmark_name ) {
-		return parent::release_bookmark( "_{$bookmark_name}" );
+	public function set_attribute( $name, $value ) {
+		if ( ! is_string( $value ) ) {
+			_doing_it_wrong(
+				__METHOD__,
+				__( 'Non-string attribute values cannot be passed to set_attribute().' ),
+				'WP_VERSION'
+			);
+			return false;
+		}
+		if (
+			self::STATE_MATCHED_TAG !== $this->parser_state ||
+			$this->is_closing_tag
+		) {
+			return false;
+		}
+
+		$value             = htmlspecialchars( $value, ENT_XML1, 'UTF-8' );
+		$updated_attribute = "{$name}=\"{$value}\"";
+
+		/*
+		 * > An attribute name must not appear more than once
+		 * > in the same start-tag or empty-element tag.
+		 *     - XML 1.0 spec
+		 *
+		 * @see https://www.w3.org/TR/xml/#sec-starttags
+		 */
+		if ( isset( $this->attributes[ $name ] ) ) {
+			/*
+			 * Update an existing attribute.
+			 *
+			 * Example – set attribute id to "new" in <wp:content id="initial_id" />:
+			 *
+			 *     <wp:content id="initial_id"/>
+			 *          ^-------------^
+			 *          start         end
+			 *     replacement: `id="new"`
+			 *
+			 *     Result: <wp:content id="new"/>
+			 */
+			$existing_attribute             = $this->attributes[ $name ];
+			$this->lexical_updates[ $name ] = new WP_HTML_Text_Replacement(
+				$existing_attribute->start,
+				$existing_attribute->length,
+				$updated_attribute
+			);
+		} else {
+			/*
+			 * Create a new attribute at the tag's name end.
+			 *
+			 * Example – add attribute id="new" to <wp:content />:
+			 *
+			 *     <wp:content/>
+			 *         ^
+			 *         start and end
+			 *     replacement: ` id="new"`
+			 *
+			 *     Result: <wp:content id="new"/>
+			 */
+			$this->lexical_updates[ $name ] = new WP_HTML_Text_Replacement(
+				$this->tag_name_starts_at + $this->tag_name_length,
+				0,
+				' ' . $updated_attribute
+			);
+		}
+
+		return true;
 	}
 
 	/**
-	 * Checks whether a bookmark with the given name exists.
+	 * Remove an attribute from the currently-matched tag.
 	 *
-	 * @since 6.5.0
+	 * @since WP_VERSION
 	 *
-	 * @param string $bookmark_name Name to identify a bookmark that potentially exists.
-	 * @return bool Whether that bookmark exists.
+	 * @param string $name The attribute name to remove.
+	 * @return bool Whether an attribute was removed.
 	 */
-	public function has_bookmark( $bookmark_name ) {
-		return parent::has_bookmark( "_{$bookmark_name}" );
+	public function remove_attribute( $name ) {
+		if (
+			self::STATE_MATCHED_TAG !== $this->parser_state ||
+			$this->is_closing_tag
+		) {
+			return false;
+		}
+
+		/*
+		 * If updating an attribute that didn't exist in the input
+		 * document, then remove the enqueued update and move on.
+		 *
+		 * For example, this might occur when calling `remove_attribute()`
+		 * after calling `set_attribute()` for the same attribute
+		 * and when that attribute wasn't originally present.
+		 */
+		if ( ! isset( $this->attributes[ $name ] ) ) {
+			if ( isset( $this->lexical_updates[ $name ] ) ) {
+				unset( $this->lexical_updates[ $name ] );
+			}
+			return false;
+		}
+
+		/*
+		 * Removes an existing tag attribute.
+		 *
+		 * Example – remove the attribute id from <wp:content id="main"/>:
+		 *    <wp:content id="initial_id"/>
+		 *         ^-------------^
+		 *         start         end
+		 *    replacement: ``
+		 *
+		 *    Result: <wp:content />
+		 */
+		$this->lexical_updates[ $name ] = new WP_HTML_Text_Replacement(
+			$this->attributes[ $name ]->start,
+			$this->attributes[ $name ]->length,
+			''
+		);
+
+		return true;
 	}
 
 	/**
-	 * Low-level token iteration is not available in WP_XML_Processor
-	 * as it could lead to undefined behaviors.
+	 * Returns the string representation of the XML Tag Processor.
+	 *
+	 * @since WP_VERSION
 	 *
-	 * @use WP_XML_Processor::next_tag() instead.
+	 * @see WP_XML_Processor::get_updated_xml()
 	 *
-	 * @return false
+	 * @return string The processed XML.
 	 */
-	public function next_token() {
-		return $this->step();
+	public function __toString() {
+		return $this->get_updated_xml();
 	}
 
 	/**
-	 * Steps through the XML document and stop at the next tag, if any.
+	 * Returns the string representation of the XML Tag Processor.
 	 *
 	 * @since WP_VERSION
 	 *
-	 * @param string $node_to_process Whether to parse the next node or reprocess the current node.
-	 * @return bool Whether a tag was matched.
+	 * @return string The processed XML.
+	 */
+	public function get_updated_xml() {
+		$requires_no_updating = 0 === count( $this->lexical_updates );
+
+		/*
+		 * When there is nothing more to update and nothing has already been
+		 * updated, return the original document and avoid a string copy.
+		 */
+		if ( $requires_no_updating ) {
+			return $this->xml;
+		}
+
+		/*
+		 * Keep track of the position right before the current tag. This will
+		 * be necessary for reparsing the current tag after updating the XML.
+		 */
+		$before_current_tag = $this->token_starts_at;
+
+		/*
+		 * 1. Apply the enqueued edits and update all the pointers to reflect those changes.
+		 */
+		$before_current_tag += $this->apply_attributes_updates( $before_current_tag );
+
+		/*
+		 * 2. Rewind to before the current tag and reparse to get updated attributes.
+		 *
+		 * At this point the internal cursor points to the end of the tag name.
+		 * Rewind before the tag name starts so that it's as if the cursor didn't
+		 * move; a call to `next_tag()` will reparse the recently-updated attributes
+		 * and additional calls to modify the attributes will apply at this same
+		 * location, but in order to avoid issues with subclasses that might add
+		 * behaviors to `next_tag()`, the internal methods should be called here
+		 * instead.
+		 *
+		 * It's important to note that in this specific place there will be no change
+		 * because the processor was already at a tag when this was called and it's
+		 * rewinding only to the beginning of this very tag before reprocessing it
+		 * and its attributes.
+		 *
+		 * <p>Previous XML<em>More XML</em></p>
+		 *                 ↑  │ back up by the length of the tag name plus the opening <
+		 *                 └←─┘ back up by strlen("em") + 1 ==> 3
+		 */
+		$this->bytes_already_parsed = $before_current_tag;
+		$this->base_class_next_token();
+
+		return $this->xml;
+	}
+
+	/**
+	 * Finds the next token in the XML document.
+	 *
+	 * An XML document can be viewed as a stream of tokens,
+	 * where tokens are things like XML tags, XML comments,
+	 * text nodes, etc. This method finds the next token in
+	 * the XML document and returns whether it found one.
+	 *
+	 * If it starts parsing a token and reaches the end of the
+	 * document then it will seek to the start of the last
+	 * token and pause, returning `false` to indicate that it
+	 * failed to find a complete token.
+	 *
+	 * Possible token types, based on the XML specification:
+	 *
+	 *  - an XML tag, whether opening, closing, or void.
+	 *  - a text node - the plaintext inside tags.
+	 *  - an XML comment.
+	 *  - a processing instruction, e.g. `<?xml version="1.0" ?>`.
+	 *
+	 * The Tag Processor currently only supports the tag token.
+	 *
+	 * @since WP_VERSION
+	 *
+	 * @access private
+	 *
+	 * @return bool Whether a token was parsed.
 	 */
-	private function step( $node_to_process = self::PROCESS_NEXT_NODE ) {
+	public function step( $node_to_process = self::PROCESS_NEXT_NODE ) {
 		// Refuse to proceed if there was a previous error.
 		if ( null !== $this->last_error ) {
 			return false;
@@ -406,8 +2729,8 @@ private function step( $node_to_process = self::PROCESS_NEXT_NODE ) {
 
 		// Finish stepping when there are no more tokens in the document.
 		if (
-			WP_XML_Tag_Processor::STATE_INCOMPLETE_INPUT === $this->parser_state ||
-			WP_XML_Tag_Processor::STATE_COMPLETE === $this->parser_state
+			WP_XML_Processor::STATE_INCOMPLETE_INPUT === $this->parser_state ||
+			WP_XML_Processor::STATE_COMPLETE === $this->parser_state
 		) {
 			return false;
 		}
@@ -416,42 +2739,49 @@ private function step( $node_to_process = self::PROCESS_NEXT_NODE ) {
 			if ( $this->is_empty_element() ) {
 				$this->pop_open_element();
 			}
-			$this->base_class_next_token();
 		}
 
-		static $i = 0;
 		switch ( $this->parser_context ) {
 			case self::IN_PROLOG_CONTEXT:
-				return $this->step_in_prolog();
+				return $this->step_in_prolog( $node_to_process );
 			case self::IN_ELEMENT_CONTEXT:
-				return $this->step_in_element();
+				return $this->step_in_element( $node_to_process );
 			case self::IN_MISC_CONTEXT:
-				return $this->step_in_misc();
+				return $this->step_in_misc( $node_to_process );
 			default:
 				$this->last_error = self::ERROR_UNSUPPORTED;
 				return false;
 		}
 	}
 
+	public function next_token( $node_to_process = self::PROCESS_NEXT_NODE ) {
+		return $this->step( $node_to_process );
+	}
+
 	/**
 	 * Parses the next node in the 'prolog' part of the XML document.
 	 *
 	 * @since WP_VERSION
 	 *
 	 * @see https://www.w3.org/TR/xml/#NT-document.
-	 * @see WP_XML_Tag_Processor::step
+	 * @see WP_XML_Processor::step
 	 *
 	 * @return bool Whether a node was found.
 	 */
-	private function step_in_prolog() {
+	private function step_in_prolog( $node_to_process = self::PROCESS_NEXT_NODE ) {
+		if ( self::PROCESS_NEXT_NODE === $node_to_process ) {
+			if ( false === $this->base_class_next_token() ) {
+				return false;
+			}
+		}
 		// XML requires a root element. If we've reached the end of data in the prolog stage,
 		// before finding a root element, then the document is incomplete.
-		if ( WP_XML_Tag_Processor::STATE_COMPLETE === $this->parser_state ) {
+		if ( WP_XML_Processor::STATE_COMPLETE === $this->parser_state ) {
 			$this->parser_state = self::STATE_INCOMPLETE_INPUT;
 			return false;
 		}
 		// Do not step if we paused due to an incomplete input.
-		if ( WP_XML_Tag_Processor::STATE_INCOMPLETE_INPUT === $this->parser_state ) {
+		if ( WP_XML_Processor::STATE_INCOMPLETE_INPUT === $this->parser_state ) {
 			return false;
 		}
 		switch ( $this->get_token_type() ) {
@@ -484,11 +2814,16 @@ private function step_in_prolog() {
 	 * @since WP_VERSION
 	 *
 	 * @see https://www.w3.org/TR/xml/#NT-document.
-	 * @see WP_XML_Tag_Processor::step
+	 * @see WP_XML_Processor::step
 	 *
 	 * @return bool Whether a node was found.
 	 */
-	private function step_in_element() {
+	private function step_in_element( $node_to_process = self::PROCESS_NEXT_NODE ) {
+		if ( self::PROCESS_NEXT_NODE === $node_to_process ) {
+			if ( false === $this->base_class_next_token() ) {
+				return false;
+			}
+		}
 		// An XML document isn't complete until the root element is closed.
 		if ( self::STATE_COMPLETE === $this->parser_state &&
 			count( $this->stack_of_open_elements ) > 0
@@ -497,7 +2832,7 @@ private function step_in_element() {
 			return false;
 		}
 		// Do not step if we paused due to an incomplete input.
-		if ( WP_XML_Tag_Processor::STATE_INCOMPLETE_INPUT === $this->parser_state ) {
+		if ( WP_XML_Processor::STATE_INCOMPLETE_INPUT === $this->parser_state ) {
 			return false;
 		}
 
@@ -541,15 +2876,22 @@ private function step_in_element() {
 	 * @since WP_VERSION
 	 *
 	 * @see https://www.w3.org/TR/xml/#NT-document.
-	 * @see WP_XML_Tag_Processor::step
+	 * @see WP_XML_Processor::step
 	 *
 	 * @return bool Whether a node was found.
 	 */
-	private function step_in_misc() {
+	private function step_in_misc( $node_to_process = self::PROCESS_NEXT_NODE ) {
+		if ( self::PROCESS_NEXT_NODE === $node_to_process ) {
+			/**
+			 * Don't short-circuit on failure yet! In misc stage,
+			 * we're still expecting text nodes and comments – even
+			 * if there are no more tags left in the document.
+			 */
+			$this->base_class_next_token();
+		}
 		if ( self::STATE_COMPLETE === $this->parser_state ) {
 			return true;
 		}
-
 		switch ( $this->get_token_type() ) {
 			case '#comment':
 			case '#processing-instructions':
@@ -572,7 +2914,7 @@ private function step_in_misc() {
 				 * once more data becomes available in.
 				 */
 				if (
-					WP_XML_Tag_Processor::STATE_INCOMPLETE_INPUT === $this->parser_state &&
+					WP_XML_Processor::STATE_INCOMPLETE_INPUT === $this->parser_state &&
 					$this->is_incomplete_text_node
 				) {
 					$text = $this->get_modifiable_text();
@@ -622,7 +2964,7 @@ public function get_breadcrumbs() {
 	 *
 	 * Example:
 	 *
-	 *     $processor = new WP_XML_Tag_Processor( '<root><wp:post><content><image /></content></wp:post></root>' );
+	 *     $processor = new WP_XML_Processor( '<root><wp:post><content><image /></content></wp:post></root>' );
 	 *     $processor->next_tag( 'img' );
 	 *     true  === $processor->matches_breadcrumbs( array( 'content', 'image' ) );
 	 *     true  === $processor->matches_breadcrumbs( array( 'wp:post', 'content', 'image' ) );
@@ -711,6 +3053,173 @@ private function last_open_element() {
 		return end( $this->stack_of_open_elements );
 	}
 
+	/**
+	 * @TODO Decide whether this method should even exist.
+	 */
+	protected function get_processed_xml() {
+		// Flush updates
+		$this->get_updated_xml();
+		return substr( $this->xml, 0, $this->bytes_already_parsed );
+	}
+
+	/**
+	 * @TODO Decide whether this method should even exist.
+	 */
+	protected function get_unprocessed_xml() {
+		// Flush updates
+		$this->get_updated_xml();
+		return substr( $this->xml, $this->bytes_already_parsed );
+	}
+
+
+	/**
+	 * Parser Ready State.
+	 *
+	 * Indicates that the parser is ready to run and waiting for a state transition.
+	 * It may not have started yet, or it may have just finished parsing a token and
+	 * is ready to find the next one.
+	 *
+	 * @since WP_VERSION
+	 *
+	 * @access private
+	 */
+	const STATE_READY = 'STATE_READY';
+
+	/**
+	 * Parser Complete State.
+	 *
+	 * Indicates that the parser has reached the end of the document and there is
+	 * nothing left to scan. It finished parsing the last token completely.
+	 *
+	 * @since WP_VERSION
+	 *
+	 * @access private
+	 */
+	const STATE_COMPLETE = 'STATE_COMPLETE';
+
+	/**
+	 * Parser Incomplete Input State.
+	 *
+	 * Indicates that the parser has reached the end of the document before finishing
+	 * a token. It started parsing a token but there is a possibility that the input
+	 * XML document was truncated in the middle of a token.
+	 *
+	 * The parser is reset at the start of the incomplete token and has paused. There
+	 * is nothing more than can be scanned unless provided a more complete document.
+	 *
+	 * @since WP_VERSION
+	 *
+	 * @access private
+	 */
+	const STATE_INCOMPLETE_INPUT = 'STATE_INCOMPLETE_INPUT';
+
+	/**
+	 * Parser Invalid Input State.
+	 *
+	 * Indicates that the parsed xml document contains malformed input and cannot be parsed.
+	 *
+	 * @since WP_VERSION
+	 *
+	 * @access private
+	 */
+	const STATE_INVALID_DOCUMENT = 'STATE_INVALID_DOCUMENT';
+
+	/**
+	 * Parser Matched Tag State.
+	 *
+	 * Indicates that the parser has found an XML tag and it's possible to get
+	 * the tag name and read or modify its attributes (if it's not a closing tag).
+	 *
+	 * @since WP_VERSION
+	 *
+	 * @access private
+	 */
+	const STATE_MATCHED_TAG = 'STATE_MATCHED_TAG';
+
+	/**
+	 * Parser Text Node State.
+	 *
+	 * Indicates that the parser has found a text node and it's possible
+	 * to read and modify that text.
+	 *
+	 * @since WP_VERSION
+	 *
+	 * @access private
+	 */
+	const STATE_TEXT_NODE = 'STATE_TEXT_NODE';
+
+	/**
+	 * Parser CDATA Node State.
+	 *
+	 * Indicates that the parser has found a CDATA node and it's possible
+	 * to read and modify its modifiable text. Note that in XML there are
+	 * no CDATA nodes outside of foreign content (SVG and MathML). Outside
+	 * of foreign content, they are treated as XML comments.
+	 *
+	 * @since WP_VERSION
+	 *
+	 * @access private
+	 */
+	const STATE_CDATA_NODE = 'STATE_CDATA_NODE';
+
+	/**
+	 * Indicates that the parser has found an XML processing instruction.
+	 *
+	 * @since WP_VERSION
+	 *
+	 * @access private
+	 */
+	const STATE_PI_NODE = 'STATE_PI_NODE';
+
+	/**
+	 * Indicates that the parser has found an XML declaration
+	 *
+	 * @since WP_VERSION
+	 *
+	 * @access private
+	 */
+	const STATE_XML_DECLARATION = 'STATE_XML_DECLARATION';
+
+	/**
+	 * Indicates that the parser has found an XML comment and it's
+	 * possible to read and modify its modifiable text.
+	 *
+	 * @since WP_VERSION
+	 *
+	 * @access private
+	 */
+	const STATE_COMMENT = 'STATE_COMMENT';
+
+	/**
+	 * Indicates that the parser encountered unsupported syntax and has bailed.
+	 *
+	 * @since WP_VERSION
+	 *
+	 * @var string
+	 */
+	const ERROR_SYNTAX = 'syntax';
+
+	/**
+	 * Indicates that the provided XML document contains a declaration that is
+	 * unsupported by the parser.
+	 *
+	 * @since WP_VERSION
+	 *
+	 * @var string
+	 */
+	const ERROR_UNSUPPORTED = 'unsupported';
+
+	/**
+	 * Indicates that the parser encountered more XML tokens than it
+	 * was able to process and has bailed.
+	 *
+	 * @since WP_VERSION
+	 *
+	 * @var string
+	 */
+	const ERROR_EXCEEDED_MAX_BOOKMARKS = 'exceeded-max-bookmarks';
+
+
 	/**
 	 * Indicates that we're parsing the `prolog` part of the XML
 	 * document.
diff --git a/packages/playground/data-liberation/src/xml-api/WP_XML_Tag_Processor.php b/packages/playground/data-liberation/src/xml-api/WP_XML_Tag_Processor.php
deleted file mode 100644
index 26ac382e2e..0000000000
--- a/packages/playground/data-liberation/src/xml-api/WP_XML_Tag_Processor.php
+++ /dev/null
@@ -1,2837 +0,0 @@
-<?php
-/**
- * XML API: WP_XML_Tag_Processor class
- *
- * Scans through an XML document to find specific tags, then
- * transforms those tags by adding, removing, or updating the
- * values of the XML attributes within that tag (opener).
- *
- * It implements a subset of the XML 1.0 specification (https://www.w3.org/TR/xml/)
- * and supports XML documents with the following characteristics:
- *
- * * XML 1.0
- * * Well-formed
- * * UTF-8 encoded
- * * Not standalone (so can use external entities)
- * * No DTD, DOCTYPE, ATTLIST, ENTITY, or conditional sections
- *
- * ### Possible future direction for this module
- *
- * The final goal is to support both 1.0 and 1.1 depending on the
- * initial processing instruction (<?xml version="1.0" ?>). We're
- * starting with 1.0, however, because most that's what most WXR
- * files declare.
- *
- * ## Future work
- *
- * @TODO: Skip over the following syntax elements:
- *        * <!DOCTYPE, see https://www.w3.org/TR/xml/#sec-prolog-dtd
- *        * <!ATTLIST, see https://www.w3.org/TR/xml/#attdecls
- *        * <!ENTITY, see https://www.w3.org/TR/xml/#sec-entity-decl
- *        * <!NOTATION, see https://www.w3.org/TR/xml/#sec-entity-decl
- *        * Conditional sections, see https://www.w3.org/TR/xml/#sec-condition-sect
- *
- * @TODO Explore declaring elements as PCdata directly in the XML document,
- *       for example as follows:
- *
- *       <!ELEMENT p (#PCDATA|emph)* >
- *
- *       or
- *
- *       <!DOCTYPE test [
- *           <!ELEMENT test (#PCDATA) >
- *           <!ENTITY % xx '&#37;zz;'>
- *           <!ENTITY % zz '&#60;!ENTITY tricky "error-prone" >' >
- *           %xx;
- *       ]>
- *
- * @TODO: Support XML 1.1.
- * @package WordPress
- * @subpackage HTML-API
- * @since WP_VERSION
- */
-
-/**
- * Core class used to modify attributes in an XML document for tags matching a query.
- *
- * ## Usage
- *
- * Use of this class requires three steps:
- *
- *  1. Create a new class instance with your input XML document.
- *  2. Find the tag(s) you are looking for.
- *  3. Request changes to the attributes in those tag(s).
- *
- * Example:
- *
- *     $tags = new WP_XML_Tag_Processor( $xml );
- *     if ( $tags->next_tag( 'wp:option' ) ) {
- *         $tags->set_attribute( 'selected', 'yes' );
- *     }
- *
- * ### Finding tags
- *
- * The `next_tag()` function moves the internal cursor through
- * your input XML document until it finds a tag meeting any of
- * the supplied restrictions in the optional query argument. If
- * no argument is provided then it will find the next XML tag,
- * regardless of what kind it is.
- *
- * If you want to _find whatever the next tag is_:
- *
- *     $tags->next_tag();
- *
- * | Goal                                                      | Query                                                                           |
- * |-----------------------------------------------------------|---------------------------------------------------------------------------------|
- * | Find any tag.                                             | `$tags->next_tag();`                                                            |
- * | Find next image tag.                                      | `$tags->next_tag( array( 'tag_name' => 'wp:image' ) );`                              |
- * | Find next image tag (without passing the array).          | `$tags->next_tag( 'wp:image' );`                                                     |
- *
- * If a tag was found meeting your criteria then `next_tag()`
- * will return `true` and you can proceed to modify it. If it
- * returns `false`, however, it failed to find the tag and
- * moved the cursor to the end of the file.
- *
- * Once the cursor reaches the end of the file the processor
- * is done and if you want to reach an earlier tag you will
- * need to recreate the processor and start over, as it's
- * unable to back up or move in reverse.
- *
- * See the section on bookmarks for an exception to this
- * no-backing-up rule.
- *
- * #### Custom queries
- *
- * Sometimes it's necessary to further inspect an XML tag than
- * the query syntax here permits. In these cases one may further
- * inspect the search results using the read-only functions
- * provided by the processor or external state or variables.
- *
- * Example:
- *
- *     // Paint up to the first five `wp:musician` or `wp:actor` tags marked with the "jazzy" style.
- *     $remaining_count = 5;
- *     while ( $remaining_count > 0 && $tags->next_tag() ) {
- *         if (
- *              ( 'wp:musician' === $tags->get_tag() || 'wp:actor' === $tags->get_tag() ) &&
- *              'jazzy' === $tags->get_attribute( 'data-style' )
- *         ) {
- *             $tags->set_attribute( 'wp:theme-style', 'theme-style-everest-jazz' );
- *             $remaining_count--;
- *         }
- *     }
- *
- * `get_attribute()` will return `null` if the attribute wasn't present
- * on the tag when it was called. It may return `""` (the empty string)
- * in cases where the attribute was present but its value was empty.
- * For boolean attributes, those whose name is present but no value is
- * given, it will return `true` (the only way to set `false` for an
- * attribute is to remove it).
- *
- * #### When matching fails
- *
- * When `next_tag()` returns `false` it could mean different things:
- *
- *  - The requested tag wasn't found in the input document.
- *  - The input document ended in the middle of an XML syntax element.
- *
- * When a document ends in the middle of a syntax element it will pause
- * the processor. This is to make it possible in the future to extend the
- * input document and proceed - an important requirement for chunked
- * streaming parsing of a document.
- *
- * Example:
- *
- *     $processor = new WP_XML_Tag_Processor( 'This <wp:content is="a" partial="token' );
- *     false === $processor->next_tag();
- *
- * If a special element (see next section) is encountered but no closing tag
- * is found it will count as an incomplete tag. The parser will pause as if
- * the opening tag were incomplete.
- *
- * Example:
- *
- *     $processor = new WP_XML_Tag_Processor( '<style>// there could be more styling to come' );
- *     false === $processor->next_tag();
- *
- *     $processor = new WP_XML_Tag_Processor( '<style>// this is everything</style><wp:content>' );
- *     true === $processor->next_tag( 'DIV' );
- *
- * #### Special elements
- *
- * All XML elements are handled in the same way, except when you mark
- * them as PCdata elements. These are special because their contents
- * is treated as text, even if it looks like XML tags.
- *
- * Example:
- *
- *    $processor = new WP_XML_Tag_Processor( '<root><wp:post-content>Text inside</input></wp:post-content><</root>' );
- *    $processor->declare_element_as_pcdata('wp:post-content');
- *    $processor->next_tag('wp:post-content');
- *    $processor->next_token();
- *    echo $processor->get_modifiable_text(); // Text inside</input>
- *
- * ### Modifying XML attributes for a found tag
- *
- * Once you've found the start of an opening tag you can modify
- * any number of the attributes on that tag. You can set a new
- * value for an attribute, remove the entire attribute, or do
- * nothing and move on to the next opening tag.
- *
- * Example:
- *
- *     if ( $tags->next_tag( 'wp:user-group' ) ) {
- *         $tags->set_attribute( 'name', 'Content editors' );
- *         $tags->remove_attribute( 'data-test-id' );
- *     }
- *
- * If `set_attribute()` is called for an existing attribute it will
- * overwrite the existing value. Similarly, calling `remove_attribute()`
- * for a non-existing attribute has no effect on the document. Both
- * of these methods are safe to call without knowing if a given attribute
- * exists beforehand.
- *
- * ### Bookmarks
- *
- * While scanning through the input XML document it's possible to set
- * a named bookmark when a particular tag is found. Later on, after
- * continuing to scan other tags, it's possible to `seek` to one of
- * the set bookmarks and then proceed again from that point forward.
- *
- * Because bookmarks create processing overhead one should avoid
- * creating too many of them. As a rule, create only bookmarks
- * of known string literal names; avoid creating "mark_{$index}"
- * and so on. It's fine from a performance standpoint to create a
- * bookmark and update it frequently, such as within a loop.
- *
- *     $total_todos = 0;
- *     while ( $p->next_tag( array( 'tag_name' => 'wp:todo-list' ) ) ) {
- *         $p->set_bookmark( 'list-start' );
- *         while ( $p->next_tag( array( 'tag_closers' => 'visit' ) ) ) {
- *             if ( 'wp:todo' === $p->get_tag() && $p->is_tag_closer() ) {
- *                 $p->set_bookmark( 'list-end' );
- *                 $p->seek( 'list-start' );
- *                 $p->set_attribute( 'data-contained-todos', (string) $total_todos );
- *                 $total_todos = 0;
- *                 $p->seek( 'list-end' );
- *                 break;
- *             }
- *
- *             if ( 'wp:todo-item' === $p->get_tag() && ! $p->is_tag_closer() ) {
- *                 $total_todos++;
- *             }
- *         }
- *     }
- *
- * ## Tokens and finer-grained processing.
- *
- * It's possible to scan through every lexical token in the
- * XML document using the `next_token()` function. This
- * alternative form takes no argument and provides no built-in
- * query syntax.
- *
- * Example:
- *
- *      $title = '(untitled)';
- *      $text  = '';
- *      while ( $processor->next_token() ) {
- *          switch ( $processor->get_token_name() ) {
- *              case '#text':
- *                  $text .= $processor->get_modifiable_text();
- *                  break;
- *
- *              case 'wp:new-line':
- *                  $text .= "\n";
- *                  break;
- *
- *              case 'wp:title':
- *                  $title = $processor->get_modifiable_text();
- *                  break;
- *          }
- *      }
- *      return trim( "# {$title}\n\n{$text}" );
- *
- * ### Tokens and _modifiable text_.
- *
- * #### Other tokens with modifiable text.
- *
- * There are also non-elements which are void/self-closing in nature and contain
- * modifiable text that is part of that individual syntax token itself.
- *
- *  - `#text` nodes, whose entire token _is_ the modifiable text.
- *  - XML comments and tokens that become comments due to some syntax error. The
- *    text for these tokens is the portion of the comment inside of the syntax.
- *    E.g. for `<!-- comment -->` the text is `" comment "` (note the spaces are included).
- *  - `CDATA` sections, whose text is the content inside of the section itself. E.g. for
- *    `<![CDATA[some content]]>` the text is `"some content"`.
- *  - XML Processing instruction nodes like `<?xml __( "Like" ); ?>` (with restrictions [1]).
- *
- * [1]: XML requires "xml" as a processing instruction name. The Tag Processor captures the entire
- *      processing instruction as a single token up to the closing `?>`.
- *
- * ## Design and limitations
- *
- * The Tag Processor is designed to linearly scan XML documents and tokenize
- * XML tags and their attributes. It's designed to do this as efficiently as
- * possible without compromising parsing integrity. Therefore it will be
- * slower than some methods of modifying XML, such as those incorporating
- * over-simplified PCRE patterns, but will not introduce the defects and
- * failures that those methods bring in, which lead to broken page renders
- * and often to security vulnerabilities. On the other hand, it will be faster
- * than full-blown XML parsers such as DOMDocument and use considerably
- * less memory. It requires a negligible memory overhead, enough to consider
- * it a zero-overhead system.
- *
- * The performance characteristics are maintained by avoiding tree construction.
- *
- * The Tag Processor's checks the most important aspects of XML integrity as it scans
- * through the document. It verifies that a single root element exists, that are
- * no unclosed tags, and that each opener tag has a corresponding closer. It also
- * ensures no duplicate attributes exist on a single tag.
- *
- * At the same time, The Tag Processor also skips expensive validation of XML entities
- * in the document. The Tag Processor will initially pass through the invalid entity references
- * and only fail when the developer attempts to read their value. If that doesn't happen,
- * the invalid values will be left untouched in the final document.
- *
- * Most operations within the Tag Processor are designed to minimize the difference
- * between an input and output document for any given change. For example, the
- * `set_attribure` and `remove_attribute` methods preserve whitespace and the attribute
- * ordering within the element definition. An exception to this rule is that all attribute
- * updates store their values as double-quoted strings, meaning that attributes on input with
- * single-quoted or unquoted values will appear in the output with double-quotes.
- *
- * ### Text Encoding
- *
- * The Tag Processor assumes that the input XML document is encoded with a
- * UTF-8 encoding and will refuse to process documents that declare other encodings.
- *
- * @since WP_VERSION
- */
-class WP_XML_Tag_Processor {
-	/**
-	 * The maximum number of bookmarks allowed to exist at
-	 * any given time.
-	 *
-	 * @since WP_VERSION
-	 * @var int
-	 *
-	 * @see WP_XML_Tag_Processor::set_bookmark()
-	 */
-	const MAX_BOOKMARKS = 10;
-
-	/**
-	 * Maximum number of times seek() can be called.
-	 * Prevents accidental infinite loops.
-	 *
-	 * @since WP_VERSION
-	 * @var int
-	 *
-	 * @see WP_XML_Tag_Processor::seek()
-	 */
-	const MAX_SEEK_OPS = 1000;
-
-	/**
-	 * The XML document to parse.
-	 *
-	 * @since WP_VERSION
-	 * @var string
-	 */
-	public $xml;
-
-	/**
-	 * The last query passed to next_tag().
-	 *
-	 * @since WP_VERSION
-	 * @var array|null
-	 */
-	private $last_query;
-
-	/**
-	 * The tag name this processor currently scans for.
-	 *
-	 * @since WP_VERSION
-	 * @var string|null
-	 */
-	private $sought_tag_name;
-
-	/**
-	 * The match offset this processor currently scans for.
-	 *
-	 * @since WP_VERSION
-	 * @var int|null
-	 */
-	private $sought_match_offset;
-
-	/**
-	 * Whether to visit tag closers, e.g. </wp:content>, when walking an input document.
-	 *
-	 * @since WP_VERSION
-	 * @var bool
-	 */
-	private $stop_on_tag_closers;
-
-	/**
-	 * Specifies mode of operation of the parser at any given time.
-	 *
-	 * | State           | Meaning                                                                |
-	 * | ----------------|------------------------------------------------------------------------|
-	 * | *Ready*           | The parser is ready to run.                                          |
-	 * | *Complete*        | There is nothing left to parse.                                      |
-	 * | *Incomplete*      | The XML ended in the middle of a token; nothing more can be parsed.  |
-	 * | *Matched tag*     | Found an XML tag; it's possible to modify its attributes.            |
-	 * | *Text node*       | Found a #text node; this is plaintext and modifiable.                |
-	 * | *CDATA node*      | Found a CDATA section; this is modifiable.                           |
-	 * | *PI node*         | Found a processing instruction; this is modifiable.                  |
-	 * | *XML declaration* | Found an XML declaration; this is modifiable.                        |
-	 * | *Comment*         | Found a comment or bogus comment; this is modifiable.                |
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @see WP_XML_Tag_Processor::STATE_READY
-	 * @see WP_XML_Tag_Processor::STATE_COMPLETE
-	 * @see WP_XML_Tag_Processor::STATE_INCOMPLETE_INPUT
-	 * @see WP_XML_Tag_Processor::STATE_MATCHED_TAG
-	 * @see WP_XML_Tag_Processor::STATE_TEXT_NODE
-	 * @see WP_XML_Tag_Processor::STATE_CDATA_NODE
-	 * @see WP_XML_Tag_Processor::STATE_PI_NODE
-	 * @see WP_XML_Tag_Processor::STATE_XML_DECLARATION
-	 * @see WP_XML_Tag_Processor::STATE_COMMENT
-	 *
-	 * @var string
-	 */
-	protected $parser_state = self::STATE_READY;
-
-	/**
-	 * Whether we stopped at an incomplete text node.
-	 *
-	 * If we are before the last tag in the document, every text
-	 * node is incomplete until we find the next tag. However,
-	 * if we are after the last tag, an incomplete all-whitespace
-	 * node may either mean we're the end of the document or
-	 * that we're still waiting for more data/
-	 *
-	 * This flag allows us to differentiate between these two
-	 * cases in context-aware APIs such as WP_XML_Processor.
-	 *
-	 * @var bool
-	 */
-	protected $is_incomplete_text_node = false;
-
-	/**
-	 * How many bytes from the original XML document have been read and parsed.
-	 *
-	 * This value points to the latest byte offset in the input document which
-	 * has been already parsed. It is the internal cursor for the Tag Processor
-	 * and updates while scanning through the XML tokens.
-	 *
-	 * @since WP_VERSION
-	 * @var int
-	 */
-	public $bytes_already_parsed = 0;
-
-	/**
-	 * Byte offset in input document where current token starts.
-	 *
-	 * Example:
-	 *
-	 *     <wp:content id="test">...
-	 *     01234
-	 *     - token starts at 0
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @var int|null
-	 */
-	protected $token_starts_at;
-
-	/**
-	 * Byte length of current token.
-	 *
-	 * Example:
-	 *
-	 *     <wp:content id="test">...
-	 *     012345678901234
-	 *     - token length is 14 - 0 = 14
-	 *
-	 *     a <!-- comment --> is a token.
-	 *     0123456789 123456789 123456789
-	 *     - token length is 17 - 2 = 15
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @var int|null
-	 */
-	private $token_length;
-
-	/**
-	 * Byte offset in input document where current tag name starts.
-	 *
-	 * Example:
-	 *
-	 *     <wp:content id="test">...
-	 *     01234
-	 *      - tag name starts at 1
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @var int|null
-	 */
-	private $tag_name_starts_at;
-
-	/**
-	 * Byte length of current tag name.
-	 *
-	 * Example:
-	 *
-	 *     <wp:content id="test">...
-	 *     01234
-	 *      --- tag name length is 3
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @var int|null
-	 */
-	private $tag_name_length;
-
-	/**
-	 * Byte offset into input document where current modifiable text starts.
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @var int
-	 */
-	private $text_starts_at;
-
-	/**
-	 * Byte length of modifiable text.
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @var string
-	 */
-	private $text_length;
-
-	/**
-	 * Whether the current tag is an opening tag, e.g. <wp:content>, or a closing tag, e.g. </wp:content>.
-	 *
-	 * @var bool
-	 */
-	private $is_closing_tag;
-
-	/**
-	 * Stores an explanation for why something failed, if it did.
-	 *
-	 * @see self::get_last_error
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @var string|null
-	 */
-	protected $last_error = null;
-
-	/**
-	 * Lazily-built index of attributes found within an XML tag, keyed by the attribute name.
-	 *
-	 * Example:
-	 *
-	 *     // Supposing the parser is working through this content
-	 *     // and stops after recognizing the `id` attribute.
-	 *     // <wp:content id="test-4" class=outline title="data:text/plain;base64=asdk3nk1j3fo8">
-	 *     //                 ^ parsing will continue from this point.
-	 *     $this->attributes = array(
-	 *         'id' => new WP_HTML_Attribute_Token( 'id', 9, 6, 5, 11, false )
-	 *     );
-	 *
-	 *     // When picking up parsing again, or when asking to find the
-	 *     // `class` attribute we will continue and add to this array.
-	 *     $this->attributes = array(
-	 *         'id'    => new WP_HTML_Attribute_Token( 'id', 9, 6, 5, 11, false ),
-	 *         'class' => new WP_HTML_Attribute_Token( 'class', 23, 7, 17, 13, false )
-	 *     );
-	 *
-	 * @since WP_VERSION
-	 * @var WP_HTML_Attribute_Token[]
-	 */
-	private $attributes = array();
-
-	/**
-	 * Tracks a semantic location in the original XML which
-	 * shifts with updates as they are applied to the document.
-	 *
-	 * @since WP_VERSION
-	 * @var WP_HTML_Span[]
-	 */
-	protected $bookmarks = array();
-
-	/**
-	 * Lexical replacements to apply to input XML document.
-	 *
-	 * "Lexical" in this class refers to the part of this class which
-	 * operates on pure text _as text_ and not as XML. There's a line
-	 * between the public interface, with XML-semantic methods like
-	 * `set_attribute` and `add_class`, and an internal state that tracks
-	 * text offsets in the input document.
-	 *
-	 * When higher-level XML methods are called, those have to transform their
-	 * operations (such as setting an attribute's value) into text diffing
-	 * operations (such as replacing the sub-string from indices A to B with
-	 * some given new string). These text-diffing operations are the lexical
-	 * updates.
-	 *
-	 * As new higher-level methods are added they need to collapse their
-	 * operations into these lower-level lexical updates since that's the
-	 * Tag Processor's internal language of change. Any code which creates
-	 * these lexical updates must ensure that they do not cross XML syntax
-	 * boundaries, however, so these should never be exposed outside of this
-	 * class or any classes which intentionally expand its functionality.
-	 *
-	 * These are enqueued while editing the document instead of being immediately
-	 * applied to avoid processing overhead, string allocations, and string
-	 * copies when applying many updates to a single document.
-	 *
-	 * Example:
-	 *
-	 *     // Replace an attribute stored with a new value, indices
-	 *     // sourced from the lazily-parsed XML recognizer.
-	 *     $start  = $attributes['src']->start;
-	 *     $length = $attributes['src']->length;
-	 *     $modifications[] = new WP_HTML_Text_Replacement( $start, $length, $new_value );
-	 *
-	 *     // Correspondingly, something like this will appear in this array.
-	 *     $lexical_updates = array(
-	 *         WP_HTML_Text_Replacement( 14, 28, 'https://my-site.my-domain/wp-content/uploads/2014/08/kittens.jpg' )
-	 *     );
-	 *
-	 * @since WP_VERSION
-	 * @var WP_HTML_Text_Replacement[]
-	 */
-	protected $lexical_updates = array();
-
-	/**
-	 * Tracks and limits `seek()` calls to prevent accidental infinite loops.
-	 *
-	 * @since WP_VERSION
-	 * @var int
-	 *
-	 * @see WP_XML_Tag_Processor::seek()
-	 */
-	protected $seek_count = 0;
-
-	public $had_previous_chunks = false;
-
-	/**
-	 * Constructor.
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @param string $xml XML to process.
-	 */
-	public function __construct( $xml ) {
-		$this->xml = $xml;
-	}
-
-	/**
-	 * Finds the next element matching the $query.
-	 *
-	 * This doesn't currently have a way to represent non-tags and doesn't process
-	 * semantic rules for text nodes.
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @param array|string|null $query {
-	 *     Optional. Which element name to find. Default is to find any tag.
-	 *
-	 *     @type string|null $tag_name     Which tag to find, or `null` for "any tag."
-	 *     @type int|null    $match_offset Find the Nth tag matching all search criteria.
-	 *                                     1 for "first" tag, 3 for "third," etc.
-	 *                                     Defaults to first tag.
-	 *     @type string|null $tag_closers  "visit" or "skip": whether to stop on tag closers, e.g. </wp:content>.
-	 * }
-	 * @return bool Whether a tag was matched.
-	 */
-	public function next_tag( $query = null ) {
-		$this->parse_query( $query );
-		$already_found = 0;
-
-		do {
-			if ( false === $this->base_class_next_token() ) {
-				return false;
-			}
-
-			if ( self::STATE_MATCHED_TAG !== $this->parser_state ) {
-				continue;
-			}
-
-			if ( $this->matches() ) {
-				++$already_found;
-			}
-		} while ( $already_found < $this->sought_match_offset );
-
-		return true;
-	}
-
-	/**
-	 * Finds the next token in the XML document.
-	 *
-	 * An XML document can be viewed as a stream of tokens,
-	 * where tokens are things like XML tags, XML comments,
-	 * text nodes, etc. This method finds the next token in
-	 * the XML document and returns whether it found one.
-	 *
-	 * If it starts parsing a token and reaches the end of the
-	 * document then it will seek to the start of the last
-	 * token and pause, returning `false` to indicate that it
-	 * failed to find a complete token.
-	 *
-	 * Possible token types, based on the XML specification:
-	 *
-	 *  - an XML tag, whether opening, closing, or void.
-	 *  - a text node - the plaintext inside tags.
-	 *  - an XML comment.
-	 *  - a processing instruction, e.g. `<?xml version="1.0" ?>`.
-	 *
-	 * The Tag Processor currently only supports the tag token.
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @access private
-	 *
-	 * @return bool Whether a token was parsed.
-	 */
-	public function next_token() {
-		return $this->base_class_next_token();
-	}
-
-	/**
-	 * Internal method which finds the next token in the HTML document.
-	 *
-	 * This method is a protected internal function which implements the logic for
-	 * finding the next token in a document. It exists so that the parser can update
-	 * its state without affecting the location of the cursor in the document and
-	 * without triggering subclass methods for things like `next_token()`, e.g. when
-	 * applying patches before searching for the next token.
-	 *
-	 * @since 6.5.0
-	 *
-	 * @access private
-	 *
-	 * @return bool Whether a token was parsed.
-	 */
-	protected function base_class_next_token() {
-		$was_at = $this->bytes_already_parsed;
-		$this->after_tag();
-
-		// Don't proceed if there's nothing more to scan.
-		if (
-			self::STATE_COMPLETE === $this->parser_state ||
-			self::STATE_INCOMPLETE_INPUT === $this->parser_state ||
-			null !== $this->last_error
-		) {
-			return false;
-		}
-
-		/*
-		 * The next step in the parsing loop determines the parsing state;
-		 * clear it so that state doesn't linger from the previous step.
-		 */
-		$this->parser_state = self::STATE_READY;
-
-		if ( $this->bytes_already_parsed >= strlen( $this->xml ) ) {
-			$this->parser_state = self::STATE_COMPLETE;
-			return false;
-		}
-
-		// Find the next tag if it exists.
-		if ( false === $this->parse_next_tag() ) {
-			if ( self::STATE_INCOMPLETE_INPUT === $this->parser_state ) {
-				$this->bytes_already_parsed = $was_at;
-			}
-
-			return false;
-		}
-
-		if ( null !== $this->last_error ) {
-			return false;
-		}
-
-		/*
-		 * For legacy reasons the rest of this function handles tags and their
-		 * attributes. If the processor has reached the end of the document
-		 * or if it matched any other token then it should return here to avoid
-		 * attempting to process tag-specific syntax.
-		 */
-		if (
-			self::STATE_INCOMPLETE_INPUT !== $this->parser_state &&
-			self::STATE_COMPLETE !== $this->parser_state &&
-			self::STATE_MATCHED_TAG !== $this->parser_state
-		) {
-			return true;
-		}
-
-		if ( $this->is_closing_tag ) {
-			$this->skip_whitespace();
-		} else {
-			// Parse all of its attributes.
-			while ( $this->parse_next_attribute() ) {
-				continue;
-			}
-		}
-
-		if ( null !== $this->last_error ) {
-			return false;
-		}
-
-		// Ensure that the tag closes before the end of the document.
-		if (
-			self::STATE_INCOMPLETE_INPUT === $this->parser_state ||
-			$this->bytes_already_parsed >= strlen( $this->xml )
-		) {
-			// Does this appropriately clear state (parsed attributes)?
-			$this->parser_state         = self::STATE_INCOMPLETE_INPUT;
-			$this->bytes_already_parsed = $was_at;
-
-			return false;
-		}
-
-		$tag_ends_at = strpos( $this->xml, '>', $this->bytes_already_parsed );
-		if ( false === $tag_ends_at ) {
-			$this->parser_state         = self::STATE_INCOMPLETE_INPUT;
-			$this->bytes_already_parsed = $was_at;
-
-			return false;
-		}
-
-		if ( $this->is_closing_tag && $tag_ends_at !== $this->bytes_already_parsed ) {
-			$this->last_error = self::ERROR_SYNTAX;
-			_doing_it_wrong(
-				__METHOD__,
-				__( 'Invalid closing tag encountered.' ),
-				'WP_VERSION'
-			);
-			return false;
-		}
-
-		$this->parser_state         = self::STATE_MATCHED_TAG;
-		$this->bytes_already_parsed = $tag_ends_at + 1;
-		$this->token_length         = $this->bytes_already_parsed - $this->token_starts_at;
-
-		/*
-		 * If we are in a PCData element, everything until the closer
-		 * is considered text.
-		 */
-		if ( ! $this->is_pcdata_element() ) {
-			return true;
-		}
-
-		/*
-		 * Preserve the opening tag pointers, as these will be overwritten
-		 * when finding the closing tag. They will be reset after finding
-		 * the closing to tag to point to the opening of the special atomic
-		 * tag sequence.
-		 */
-		$tag_name_starts_at = $this->tag_name_starts_at;
-		$tag_name_length    = $this->tag_name_length;
-		$tag_ends_at        = $this->token_starts_at + $this->token_length;
-		$attributes         = $this->attributes;
-
-		$found_closer = $this->skip_pcdata( $this->get_tag() );
-
-		// Closer not found, the document is incomplete.
-		if ( false === $found_closer ) {
-			$this->parser_state         = self::STATE_INCOMPLETE_INPUT;
-			$this->bytes_already_parsed = $was_at;
-			return false;
-		}
-
-		/*
-		 * The values here look like they reference the opening tag but they reference
-		 * the closing tag instead. This is why the opening tag values were stored
-		 * above in a variable. It reads confusingly here, but that's because the
-		 * functions that skip the contents have moved all the internal cursors past
-		 * the inner content of the tag.
-		 */
-		$this->token_starts_at    = $was_at;
-		$this->token_length       = $this->bytes_already_parsed - $this->token_starts_at;
-		$this->text_starts_at     = $tag_ends_at;
-		$this->text_length        = $this->tag_name_starts_at - $this->text_starts_at;
-		$this->tag_name_starts_at = $tag_name_starts_at;
-		$this->tag_name_length    = $tag_name_length;
-		$this->attributes         = $attributes;
-
-		return true;
-	}
-
-	/**
-	 * Whether the processor paused because the input XML document ended
-	 * in the middle of a syntax element, such as in the middle of a tag.
-	 *
-	 * Example:
-	 *
-	 *     $processor = new WP_XML_Tag_Processor( '<input type="text" value="Th' );
-	 *     false      === $processor->get_next_tag();
-	 *     true       === $processor->is_paused_at_incomplete_token();
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @return bool Whether the parse paused at the start of an incomplete token.
-	 */
-	public function is_paused_at_incomplete_input(): bool {
-		return self::STATE_INCOMPLETE_INPUT === $this->parser_state;
-	}
-
-	/**
-	 * Whether the processor finished processing.
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @return bool Whether the processor finished processing.
-	 */
-	public function is_finished(): bool {
-		return self::STATE_COMPLETE === $this->parser_state;
-	}
-
-	/**
-	 * Sets a bookmark in the XML document.
-	 *
-	 * Bookmarks represent specific places or tokens in the XML
-	 * document, such as a tag opener or closer. When applying
-	 * edits to a document, such as setting an attribute, the
-	 * text offsets of that token may shift; the bookmark is
-	 * kept updated with those shifts and remains stable unless
-	 * the entire span of text in which the token sits is removed.
-	 *
-	 * Release bookmarks when they are no longer needed.
-	 *
-	 * Example:
-	 *
-	 *     <main><h2>Surprising fact you may not know!</h2></main>
-	 *           ^  ^
-	 *            \-|-- this `H2` opener bookmark tracks the token
-	 *
-	 *     <main class="clickbait"><h2>Surprising fact you may no…
-	 *                             ^  ^
-	 *                              \-|-- it shifts with edits
-	 *
-	 * Bookmarks provide the ability to seek to a previously-scanned
-	 * place in the XML document. This avoids the need to re-scan
-	 * the entire document.
-	 *
-	 * Example:
-	 *
-	 *     <ul><li>One</li><li>Two</li><li>Three</li></ul>
-	 *                                 ^^^^
-	 *                                 want to note this last item
-	 *
-	 *     $p = new WP_XML_Tag_Processor( $xml );
-	 *     $in_list = false;
-	 *     while ( $p->next_tag( array( 'tag_closers' => $in_list ? 'visit' : 'skip' ) ) ) {
-	 *         if ( 'UL' === $p->get_tag() ) {
-	 *             if ( $p->is_tag_closer() ) {
-	 *                 $in_list = false;
-	 *                 $p->set_bookmark( 'resume' );
-	 *                 if ( $p->seek( 'last-li' ) ) {
-	 *                     $p->add_class( 'last-li' );
-	 *                 }
-	 *                 $p->seek( 'resume' );
-	 *                 $p->release_bookmark( 'last-li' );
-	 *                 $p->release_bookmark( 'resume' );
-	 *             } else {
-	 *                 $in_list = true;
-	 *             }
-	 *         }
-	 *
-	 *         if ( 'LI' === $p->get_tag() ) {
-	 *             $p->set_bookmark( 'last-li' );
-	 *         }
-	 *     }
-	 *
-	 * Bookmarks intentionally hide the internal string offsets
-	 * to which they refer. They are maintained internally as
-	 * updates are applied to the XML document and therefore
-	 * retain their "position" - the location to which they
-	 * originally pointed. The inability to use bookmarks with
-	 * functions like `substr` is therefore intentional to guard
-	 * against accidentally breaking the XML.
-	 *
-	 * Because bookmarks allocate memory and require processing
-	 * for every applied update, they are limited and require
-	 * a name. They should not be created with programmatically-made
-	 * names, such as "li_{$index}" with some loop. As a general
-	 * rule they should only be created with string-literal names
-	 * like "start-of-section" or "last-paragraph".
-	 *
-	 * Bookmarks are a powerful tool to enable complicated behavior.
-	 * Consider double-checking that you need this tool if you are
-	 * reaching for it, as inappropriate use could lead to broken
-	 * XML structure or unwanted processing overhead.
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @param string $name Identifies this particular bookmark.
-	 * @return bool Whether the bookmark was successfully created.
-	 */
-	public function set_bookmark( $name ) {
-		// It only makes sense to set a bookmark if the parser has paused on a concrete token.
-		if (
-			self::STATE_COMPLETE === $this->parser_state ||
-			self::STATE_INCOMPLETE_INPUT === $this->parser_state
-		) {
-			return false;
-		}
-
-		if ( ! array_key_exists( $name, $this->bookmarks ) && count( $this->bookmarks ) >= static::MAX_BOOKMARKS ) {
-			_doing_it_wrong(
-				__METHOD__,
-				__( 'Too many bookmarks: cannot create any more.' ),
-				'WP_VERSION'
-			);
-			return false;
-		}
-
-		$this->bookmarks[ $name ] = new WP_HTML_Span( $this->token_starts_at, $this->token_length );
-
-		return true;
-	}
-
-
-	/**
-	 * Removes a bookmark that is no longer needed.
-	 *
-	 * Releasing a bookmark frees up the small
-	 * performance overhead it requires.
-	 *
-	 * @param string $name Name of the bookmark to remove.
-	 * @return bool Whether the bookmark already existed before removal.
-	 */
-	public function release_bookmark( $name ) {
-		if ( ! array_key_exists( $name, $this->bookmarks ) ) {
-			return false;
-		}
-
-		unset( $this->bookmarks[ $name ] );
-
-		return true;
-	}
-
-	/**
-	 * Skips contents of PCDATA element.
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @see https://www.w3.org/TR/xml/#sec-mixed-content
-	 *
-	 * @param string $tag_name The tag name which will close the PCDATA region.
-	 * @return false|int Byte offset of the closing tag, or false if not found.
-	 */
-	private function skip_pcdata( $tag_name ) {
-		$xml        = $this->xml;
-		$doc_length = strlen( $xml );
-		$tag_length = strlen( $tag_name );
-
-		$at = $this->bytes_already_parsed;
-		while ( false !== $at && $at < $doc_length ) {
-			$at                       = strpos( $this->xml, '</' . $tag_name, $at );
-			$this->tag_name_starts_at = $at;
-
-			// Fail if there is no possible tag closer.
-			if ( false === $at ) {
-				return false;
-			}
-
-			$at                        += 2 + $tag_length;
-			$at                        += strspn( $this->xml, " \t\f\r\n", $at );
-			$this->bytes_already_parsed = $at;
-
-			/*
-			 * Ensure that the tag name terminates to avoid matching on
-			 * substrings of a longer tag name. For example, the sequence
-			 * "</wp:contentrug" should not match for "</wp:content" even
-			 * though "wp:content" is found within the text.
-			 */
-			if ( $at >= strlen( $xml ) ) {
-				return false;
-			}
-			if ( '>' === $xml[ $at ] ) {
-				$this->bytes_already_parsed = $at + 1;
-				return true;
-			}
-		}
-
-		return false;
-	}
-
-	/**
-	 * Returns the last error, if any.
-	 *
-	 * Various situations lead to parsing failure but this class will
-	 * return `false` in all those cases. To determine why something
-	 * failed it's possible to request the last error. This can be
-	 * helpful to know to distinguish whether a given tag couldn't
-	 * be found or if content in the document caused the processor
-	 * to give up and abort processing.
-	 *
-	 * Example
-	 *
-	 *     $processor = WP_XML_Tag_Processor::create_fragment( '<wp:content invalid-attr></wp:content>' );
-	 *     false === $processor->next_tag();
-	 *     WP_XML_Tag_Processor::ERROR_SYNTAX === $processor->get_last_error();
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @see self::ERROR_UNSUPPORTED
-	 * @see self::ERROR_EXCEEDED_MAX_BOOKMARKS
-	 *
-	 * @return string|null The last error, if one exists, otherwise null.
-	 */
-	public function get_last_error(): ?string {
-		return $this->last_error;
-	}
-
-	/**
-	 * Tag names declared as PCDATA elements.
-	 *
-	 * PCDATA elements are elements in which everything is treated as
-	 * text, even syntax that may look like other elements, closers,
-	 * processing instructions, etc.
-	 *
-	 * Example:
-	 *
-	 *     <root>
-	 *         <my-pcdata>
-	 *             This text contains syntax that seems
-	 *             like XML nodes:
-	 *
-	 *             <input />
-	 *             </seemingly invalid element --/>
-	 *             <!-- is this a comment? -->
-	 *             <?xml version="1.0" ?>
-	 *
-	 *             &amp;&lt;&gt;&quot;&apos;
-	 *
-	 *             But! It's all treated as text.
-	 *         </my-pcdata>
-	 *    </root>
-	 *
-	 * @var array
-	 */
-	private $pcdata_elements = array();
-
-	/**
-	 * Declares an element as PCDATA.
-	 *
-	 * PCDATA elements are elements in which everything is treated as
-	 * text, even syntax that may look like other elements, closers,
-	 * processing instructions, etc.
-	 *
-	 * For example:
-	 *
-	 *      $processor = new WP_XML_Tag_Processor(
-	 *      <<<XML
-	 *          <root>
-	 *              <my-pcdata>
-	 *                  This text uses syntax that may seem
-	 *                  like XML nodes:
-	 *
-	 *                  <input />
-	 *                  </seemingly invalid element --/>
-	 *                  <!-- is this a comment? -->
-	 *                  <?xml version="1.0" ?>
-	 *
-	 *                  &amp;&lt;&gt;&quot;&apos;
-	 *
-	 *                  But! It's all treated as text.
-	 *              </my-pcdata>
-	 *         </root>
-	 *      XML
-	 *      );
-	 *
-	 *      $processor->declare_element_as_pcdata('my-pcdata');
-	 *      $processor->next_tag('my-pcdata');
-	 *      $processor->next_token();
-	 *
-	 *      // Returns everything inside the <my-pcdata>
-	 *      // element as text:
-	 *      $processor->get_modifiable_text();
-	 *
-	 * @param string $element_name The name of the element to declare as PCDATA.
-	 * @return void
-	 */
-	public function declare_element_as_pcdata( $element_name ) {
-		$this->pcdata_elements[ $element_name ] = true;
-	}
-
-	/**
-	 * Indicates if the currently matched tag is a PCDATA element.
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @return bool Whether the currently matched tag is a PCDATA element.
-	 */
-	public function is_pcdata_element() {
-		return array_key_exists( $this->get_tag(), $this->pcdata_elements );
-	}
-
-	/**
-	 * Parses the next tag.
-	 *
-	 * This will find and start parsing the next tag, including
-	 * the opening `<`, the potential closer `/`, and the tag
-	 * name. It does not parse the attributes or scan to the
-	 * closing `>`; these are left for other methods.
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @return bool Whether a tag was found before the end of the document.
-	 */
-	private function parse_next_tag() {
-		$this->after_tag();
-
-		$xml        = $this->xml;
-		$doc_length = strlen( $xml );
-		$was_at     = $this->bytes_already_parsed;
-		$at         = $was_at;
-
-		while ( false !== $at && $at < $doc_length ) {
-			$at = strpos( $xml, '<', $at );
-
-			/*
-			 * There may be no text nodes outside of elements.
-			 * If this character sequence was encountered outside of
-			 * the root element, it is a syntax error. WP_XML_Tag_Processor
-			 * does not have that context – it is up to the API consumer,
-			 * such as WP_Tag_Processor, to handle this scenario.
-			 */
-			if ( false === $at ) {
-				$this->parser_state            = self::STATE_INCOMPLETE_INPUT;
-				$this->is_incomplete_text_node = true;
-				$this->text_starts_at          = $was_at;
-				$this->text_length             = $doc_length - $was_at;
-				return false;
-			}
-
-			if ( $at > $was_at ) {
-				$this->parser_state         = self::STATE_TEXT_NODE;
-				$this->token_starts_at      = $was_at;
-				$this->token_length         = $at - $was_at;
-				$this->text_starts_at       = $was_at;
-				$this->text_length          = $this->token_length;
-				$this->bytes_already_parsed = $at;
-
-				return true;
-			}
-
-			$this->token_starts_at = $at;
-
-			if ( $at + 1 < $doc_length && '/' === $this->xml[ $at + 1 ] ) {
-				$this->is_closing_tag = true;
-				++$at;
-			} else {
-				$this->is_closing_tag = false;
-			}
-
-			if ( $at + 1 >= $doc_length ) {
-				$this->parser_state = self::STATE_INCOMPLETE_INPUT;
-				return false;
-			}
-
-			/*
-			 * XML tag names are defined by the same `Name` grammar rule as attribute
-			 * names.
-			 *
-			 * Reference:
-			 * * https://www.w3.org/TR/xml/#NT-STag
-			 * * https://www.w3.org/TR/xml/#NT-Name
-			 */
-			$tag_name_length = $this->parse_name( $at + 1 );
-			if ( $tag_name_length > 0 ) {
-				++$at;
-				$this->parser_state         = self::STATE_MATCHED_TAG;
-				$this->tag_name_starts_at   = $at;
-				$this->tag_name_length      = $tag_name_length;
-				$this->token_length         = $this->tag_name_length;
-				$this->bytes_already_parsed = $at + $this->tag_name_length;
-
-				return true;
-			}
-
-			/*
-			 * Abort if no tag is found before the end of
-			 * the document. There is nothing left to parse.
-			 */
-			if ( $at + 1 >= $doc_length ) {
-				$this->parser_state = self::STATE_INCOMPLETE_INPUT;
-
-				return false;
-			}
-
-			/*
-			 * `<!` indicates one of a few possible constructs:
-			 */
-			if ( ! $this->is_closing_tag && '!' === $xml[ $at + 1 ] ) {
-				/*
-				 * `<!--` mark a beginning of a comment.
-				 * https://www.w3.org/TR/xml/#sec-comments
-				 */
-				if (
-					$doc_length > $at + 3 &&
-					'-' === $xml[ $at + 2 ] &&
-					'-' === $xml[ $at + 3 ]
-				) {
-					$closer_at = $at + 4;
-					// If it's not possible to close the comment then there is nothing more to scan.
-					if ( $doc_length <= $closer_at ) {
-						$this->parser_state = self::STATE_INCOMPLETE_INPUT;
-
-						return false;
-					}
-
-					/*
-					 * Comments may only be closed by a --> sequence.
-					 */
-					--$closer_at; // Pre-increment inside condition below reduces risk of accidental infinite looping.
-					while ( ++$closer_at < $doc_length ) {
-						$closer_at = strpos( $xml, '--', $closer_at );
-						if ( false === $closer_at || $closer_at + 2 === $doc_length ) {
-							$this->parser_state = self::STATE_INCOMPLETE_INPUT;
-							return false;
-						}
-
-						/*
-						 * The string " -- " (double-hyphen) must not occur within comments
-						 * See https://www.w3.org/TR/xml/#sec-comments
-						 */
-						if ( '>' !== $xml[ $closer_at + 2 ] ) {
-							$this->last_error = self::ERROR_SYNTAX;
-							_doing_it_wrong(
-								__METHOD__,
-								__( 'Invalid comment syntax encountered.' ),
-								'WP_VERSION'
-							);
-							return false;
-						}
-
-						$this->parser_state         = self::STATE_COMMENT;
-						$this->token_length         = $closer_at + 3 - $this->token_starts_at;
-						$this->text_starts_at       = $this->token_starts_at + 4;
-						$this->text_length          = $closer_at - $this->text_starts_at;
-						$this->bytes_already_parsed = $closer_at + 3;
-						return true;
-					}
-				}
-
-				/*
-				 * Identify CDATA sections.
-				 *
-				 * Within a CDATA section, everything until the ]]> string is treated
-				 * as data, not markup. Left angle brackets and ampersands may occur in
-				 * their literal form; they need not (and cannot) be escaped using "&lt;"
-				 * and "&amp;". CDATA sections cannot nest.
-				 *
-				 * See https://www.w3.org/TR/xml11.xml/#sec-cdata-sect
-				 */
-				if (
-					! $this->is_closing_tag &&
-					$doc_length > $this->token_starts_at + 8 &&
-					'[' === $xml[ $this->token_starts_at + 2 ] &&
-					'C' === $xml[ $this->token_starts_at + 3 ] &&
-					'D' === $xml[ $this->token_starts_at + 4 ] &&
-					'A' === $xml[ $this->token_starts_at + 5 ] &&
-					'T' === $xml[ $this->token_starts_at + 6 ] &&
-					'A' === $xml[ $this->token_starts_at + 7 ] &&
-					'[' === $xml[ $this->token_starts_at + 8 ]
-				) {
-					$closer_at = strpos( $xml, ']]>', $at + 1 );
-					if ( false === $closer_at ) {
-						$this->parser_state = self::STATE_INCOMPLETE_INPUT;
-
-						return false;
-					}
-
-					$this->parser_state         = self::STATE_CDATA_NODE;
-					$this->token_length         = $closer_at + 1 - $this->token_starts_at;
-					$this->text_starts_at       = $this->token_starts_at + 9;
-					$this->text_length          = $closer_at - $this->text_starts_at;
-					$this->bytes_already_parsed = $closer_at + 3;
-					return true;
-				}
-
-				/*
-				 * Anything else here is either unsupported at this point or invalid
-				 * syntax. See the class-level @TODO annotations for more information.
-				 */
-				$this->parser_state = self::STATE_INCOMPLETE_INPUT;
-
-				return false;
-			}
-
-			/*
-			 * An `<?xml` token at the beginning of the document marks a start of an
-			 * xml declaration.
-			 * See https://www.w3.org/TR/xml/#sec-prolog-dtd
-			 */
-			if (
-				0 === $at &&
-				! $this->had_previous_chunks &&
-				! $this->is_closing_tag &&
-				'?' === $xml[ $at + 1 ] &&
-				'x' === $xml[ $at + 2 ] &&
-				'm' === $xml[ $at + 3 ] &&
-				'l' === $xml[ $at + 4 ]
-			) {
-				// Setting the parser state early for the get_attribute() calls later in this
-				// branch.
-				$this->parser_state = self::STATE_XML_DECLARATION;
-
-				$at += 5;
-
-				// Skip whitespace.
-				$at += strspn( $this->xml, " \t\f\r\n", $at );
-
-				$this->bytes_already_parsed = $at;
-
-				/*
-				 * Reuse parse_next_attribute() to parse the XML declaration attributes.
-				 * Technically, only "version", "encoding", and "standalone" are accepted
-				 * and, unlike regular tag attributes, their values can contain any character
-				 * other than the opening quote. However, the "<" and "&" characters are very
-				 * unlikely to be encountered and cause trouble, so this code path liberally
-				 * does not provide a dedicated parsing logic.
-				 */
-				while ( false !== $this->parse_next_attribute() ) {
-					$this->skip_whitespace();
-					// Parse until the XML declaration closer.
-					if ( '?' === $xml[ $this->bytes_already_parsed ] ) {
-						break;
-					}
-				}
-
-				if ( null !== $this->last_error ) {
-					return false;
-				}
-
-				foreach ( $this->attributes as $name => $attribute ) {
-					if ( 'version' !== $name && 'encoding' !== $name && 'standalone' !== $name ) {
-						$this->last_error = self::ERROR_SYNTAX;
-						_doing_it_wrong(
-							__METHOD__,
-							__( 'Invalid attribute found in XML declaration.' ),
-							'WP_VERSION'
-						);
-						return false;
-					}
-				}
-
-				if ( '1.0' !== $this->get_attribute( 'version' ) ) {
-					$this->last_error = self::ERROR_UNSUPPORTED;
-					_doing_it_wrong(
-						__METHOD__,
-						__( 'Unsupported XML version declared' ),
-						'WP_VERSION'
-					);
-					return false;
-				}
-
-				/**
-				 * Standalone XML documents have no external dependencies,
-				 * including predefined entities like `&nbsp;` and `&copy;`.
-				 *
-				 * See https://www.w3.org/TR/xml/#sec-predefined-ent.
-				 */
-				if ( null !== $this->get_attribute( 'encoding' )
-					&& 'UTF-8' !== strtoupper( $this->get_attribute( 'encoding' ) )
-				) {
-					$this->last_error = self::ERROR_UNSUPPORTED;
-					_doing_it_wrong(
-						__METHOD__,
-						__( 'Unsupported XML encoding declared, only UTF-8 is supported.' ),
-						'WP_VERSION'
-					);
-					return false;
-				}
-				if ( null !== $this->get_attribute( 'standalone' )
-					&& 'YES' !== strtoupper( $this->get_attribute( 'standalone' ) )
-				) {
-					$this->last_error = self::ERROR_UNSUPPORTED;
-					_doing_it_wrong(
-						__METHOD__,
-						__( 'Standalone XML documents are not supported.' ),
-						'WP_VERSION'
-					);
-					return false;
-				}
-
-				$at = $this->bytes_already_parsed;
-
-				// Skip whitespace.
-				$at += strspn( $this->xml, " \t\f\r\n", $at );
-
-				// Consume the closer.
-				if ( ! (
-					$at + 2 <= $doc_length &&
-					'?' === $xml[ $at ] &&
-					'>' === $xml[ $at + 1 ]
-				) ) {
-					$this->last_error = self::ERROR_SYNTAX;
-					_doing_it_wrong(
-						__METHOD__,
-						__( 'XML declaration closer not found.' ),
-						'WP_VERSION'
-					);
-					return false;
-				}
-
-				$this->token_length         = $at + 2 - $this->token_starts_at;
-				$this->text_starts_at       = $this->token_starts_at + 2;
-				$this->text_length          = $at - $this->text_starts_at;
-				$this->bytes_already_parsed = $at + 2;
-				$this->parser_state         = self::STATE_XML_DECLARATION;
-
-				return true;
-			}
-
-			/*
-			 * `<?` denotes a processing instruction.
-			 * See https://www.w3.org/TR/xml/#sec-pi
-			 */
-			if (
-				! $this->is_closing_tag &&
-				'?' === $xml[ $at + 1 ]
-			) {
-				if ( $at + 4 >= $doc_length ) {
-					$this->parser_state = self::STATE_INCOMPLETE_INPUT;
-
-					return false;
-				}
-
-				if ( ! (
-					( 'x' === $xml[ $at + 2 ] || 'X' === $xml[ $at + 2 ] ) &&
-					( 'm' === $xml[ $at + 3 ] || 'M' === $xml[ $at + 3 ] ) &&
-					( 'l' === $xml[ $at + 4 ] || 'L' === $xml[ $at + 4 ] )
-				) ) {
-					_doing_it_wrong(
-						__METHOD__,
-						__( 'Invalid processing instruction target.' ),
-						'WP_VERSION'
-					);
-					return false;
-				}
-
-				$at += 5;
-
-				// Skip whitespace.
-				$this->skip_whitespace();
-
-				/*
-				 * Find the closer.
-				 *
-				 * We could, at this point, only consume the bytes allowed by the specification, that is:
-				 *
-				 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] // any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
-				 *
-				 * However, that would require running a slow regular-expression engine for, seemingly,
-				 * little benefit. For now, we are going to pretend that all bytes are allowed until the
-				 * closing ?> is found. Some failures may pass unnoticed. That may not be a problem in practice,
-				 * but if it is then this code path will require a stricter implementation.
-				 */
-				$closer_at = strpos( $xml, '?>', $at );
-				if ( false === $closer_at ) {
-					$this->parser_state = self::STATE_INCOMPLETE_INPUT;
-
-					return false;
-				}
-
-				$this->parser_state         = self::STATE_PI_NODE;
-				$this->token_length         = $closer_at + 5 - $this->token_starts_at;
-				$this->text_starts_at       = $this->token_starts_at + 5;
-				$this->text_length          = $closer_at - $this->text_starts_at;
-				$this->bytes_already_parsed = $closer_at + 2;
-
-				return true;
-			}
-
-			++$at;
-		}
-
-		return false;
-	}
-
-	/**
-	 * Parses the next attribute.
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @return bool Whether an attribute was found before the end of the document.
-	 */
-	private function parse_next_attribute() {
-		// Skip whitespace and slashes.
-		$this->bytes_already_parsed += strspn( $this->xml, " \t\f\r\n/", $this->bytes_already_parsed );
-		if ( $this->bytes_already_parsed >= strlen( $this->xml ) ) {
-			$this->parser_state = self::STATE_INCOMPLETE_INPUT;
-			return false;
-		}
-
-		// No more attributes to parse.
-		if ( '>' === $this->xml[ $this->bytes_already_parsed ] ) {
-			return false;
-		}
-
-		$attribute_start       = $this->bytes_already_parsed;
-		$attribute_name_length = $this->parse_name( $this->bytes_already_parsed );
-		if ( 0 === $attribute_name_length ) {
-			$this->last_error = self::ERROR_SYNTAX;
-			_doing_it_wrong(
-				__METHOD__,
-				__( 'Invalid attribute name encountered.' ),
-				'WP_VERSION'
-			);
-		}
-		$this->bytes_already_parsed += $attribute_name_length;
-		$attribute_name              = substr( $this->xml, $attribute_start, $attribute_name_length );
-		$this->skip_whitespace();
-
-		// Parse attribute value.
-		++$this->bytes_already_parsed;
-		$this->skip_whitespace();
-		if ( $this->bytes_already_parsed >= strlen( $this->xml ) ) {
-			$this->parser_state = self::STATE_INCOMPLETE_INPUT;
-			return false;
-		}
-		switch ( $this->xml[ $this->bytes_already_parsed ] ) {
-			case "'":
-			case '"':
-				$quote       = $this->xml[ $this->bytes_already_parsed ];
-				$value_start = $this->bytes_already_parsed + 1;
-				/**
-				 * XML attributes cannot contain the characters "<" or "&".
-				 *
-				 * This only checks for "<" because it's reasonably fast.
-				 * Ampersands are actually allowed when used as the start
-				 * of an entity reference, but enforcing that would require
-				 * an expensive and complex check. It doesn't seem to be
-				 * worth it.
-				 *
-				 * @TODO: Discuss enforcing or abandoning the ampersand rule
-				 *        and document the rationale.
-				 */
-				$value_length  = strcspn( $this->xml, "<$quote", $value_start );
-				$attribute_end = $value_start + $value_length + 1;
-
-				if ( $attribute_end - 1 >= strlen( $this->xml ) ) {
-					$this->parser_state = self::STATE_INCOMPLETE_INPUT;
-
-					return false;
-				}
-
-				if ( $this->xml[ $attribute_end - 1 ] !== $quote ) {
-					$this->last_error = self::ERROR_SYNTAX;
-					_doing_it_wrong(
-						__METHOD__,
-						__( 'A disallowed character encountered in an attribute value (either < or &).' ),
-						'WP_VERSION'
-					);
-				}
-				$this->bytes_already_parsed = $attribute_end;
-				break;
-
-			default:
-				$this->last_error = self::ERROR_SYNTAX;
-				_doing_it_wrong(
-					__METHOD__,
-					__( 'Unquoted attribute value encountered.' ),
-					'WP_VERSION'
-				);
-				return false;
-		}
-
-		if ( $attribute_end >= strlen( $this->xml ) ) {
-			$this->parser_state = self::STATE_INCOMPLETE_INPUT;
-			return false;
-		}
-
-		if ( $this->is_closing_tag ) {
-			return true;
-		}
-
-		if ( array_key_exists( $attribute_name, $this->attributes ) ) {
-			$this->last_error = self::ERROR_SYNTAX;
-			_doing_it_wrong(
-				__METHOD__,
-				__( 'Duplicate attribute found in an XML tag.' ),
-				'WP_VERSION'
-			);
-			return false;
-		}
-
-		$this->attributes[ $attribute_name ] = new WP_HTML_Attribute_Token(
-			$attribute_name,
-			$value_start,
-			$value_length,
-			$attribute_start,
-			$attribute_end - $attribute_start,
-			false
-		);
-
-		return true;
-	}
-
-	/**
-	 * Move the internal cursor past any immediate successive whitespace.
-	 *
-	 * @since WP_VERSION
-	 */
-	private function skip_whitespace() {
-		$this->bytes_already_parsed += strspn( $this->xml, " \t\f\r\n", $this->bytes_already_parsed );
-	}
-
-	// Describes the first character of the attribute name:
-	// NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
-	// See https://www.w3.org/TR/xml/#NT-Name
-	const NAME_START_CHAR_PATTERN = ':a-z_A-Z\x{C0}-\x{D6}\x{D8}-\x{F6}\x{F8}-\x{2FF}\x{370}-\x{37D}\x{37F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}';
-	const NAME_CHAR_PATTERN       = '\-\.0-9\x{B7}\x{0300}-\x{036F}\x{203F}-\x{2040}:a-z_A-Z\x{C0}-\x{D6}\x{D8}-\x{F6}\x{F8}-\x{2FF}\x{370}-\x{37D}\x{37F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}';
-	private function parse_name( $offset ) {
-		if ( 1 !== preg_match(
-			'~[' . self::NAME_START_CHAR_PATTERN . ']~Ssu',
-			$this->xml[ $offset ],
-			$matches
-		) ) {
-			return 0;
-		}
-
-		$name_length = 1;
-
-		// Consume the rest of the name
-		preg_match(
-			'~\G([' . self::NAME_CHAR_PATTERN . ']+)~Ssu',
-			$this->xml,
-			$matches,
-			0,
-			$offset + 1
-		);
-
-		if ( is_array( $matches ) && count( $matches ) > 0 ) {
-			$name_length += strlen( $matches[0] );
-		}
-
-		return $name_length;
-	}
-
-	/**
-	 * Applies attribute updates and cleans up once a tag is fully parsed.
-	 *
-	 * @since WP_VERSION
-	 */
-	private function after_tag() {
-		/*
-		 * Purge updates if there are too many. The actual count isn't
-		 * scientific, but a few values from 100 to a few thousand were
-		 * tests to find a practically-useful limit.
-		 *
-		 * If the update queue grows too big, then the Tag Processor
-		 * will spend more time iterating through them and lose the
-		 * efficiency gains of deferring applying them.
-		 */
-		if ( 1000 < count( $this->lexical_updates ) ) {
-			$this->get_updated_xml();
-		}
-
-		foreach ( $this->lexical_updates as $name => $update ) {
-			/*
-			 * Any updates appearing after the cursor should be applied
-			 * before proceeding, otherwise they may be overlooked.
-			 */
-			if ( $update->start >= $this->bytes_already_parsed ) {
-				$this->get_updated_xml();
-				break;
-			}
-
-			if ( is_int( $name ) ) {
-				continue;
-			}
-
-			$this->lexical_updates[] = $update;
-			unset( $this->lexical_updates[ $name ] );
-		}
-
-		$this->is_incomplete_text_node = false;
-		$this->token_starts_at         = null;
-		$this->token_length            = null;
-		$this->tag_name_starts_at      = null;
-		$this->tag_name_length         = null;
-		$this->text_starts_at          = 0;
-		$this->text_length             = 0;
-		$this->is_closing_tag          = null;
-		$this->attributes              = array();
-	}
-
-	protected function reset_state() {
-		$this->xml                     = '';
-		$this->last_query              = null;
-		$this->sought_tag_name         = null;
-		$this->sought_match_offset     = 0;
-		$this->stop_on_tag_closers     = false;
-		$this->parser_state            = self::STATE_READY;
-		$this->is_incomplete_text_node = false;
-		$this->bytes_already_parsed    = 0;
-		$this->token_starts_at         = null;
-		$this->token_length            = null;
-		$this->tag_name_starts_at      = null;
-		$this->tag_name_length         = null;
-		$this->text_starts_at          = 0;
-		$this->text_length             = 0;
-		$this->is_closing_tag          = null;
-		$this->last_error              = null;
-		$this->attributes              = array();
-		$this->bookmarks               = array();
-		$this->lexical_updates         = array();
-		$this->seek_count              = 0;
-		$this->had_previous_chunks     = false;
-	}
-
-	/**
-	 * Applies attribute updates to XML document.
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @param int $shift_this_point Accumulate and return shift for this position.
-	 * @return int How many bytes the given pointer moved in response to the updates.
-	 */
-	private function apply_attributes_updates( $shift_this_point = 0 ) {
-		if ( ! count( $this->lexical_updates ) ) {
-			return 0;
-		}
-
-		$accumulated_shift_for_given_point = 0;
-
-		/*
-		 * Attribute updates can be enqueued in any order but updates
-		 * to the document must occur in lexical order; that is, each
-		 * replacement must be made before all others which follow it
-		 * at later string indices in the input document.
-		 *
-		 * Sorting avoid making out-of-order replacements which
-		 * can lead to mangled output, partially-duplicated
-		 * attributes, and overwritten attributes.
-		 */
-		usort( $this->lexical_updates, array( self::class, 'sort_start_ascending' ) );
-
-		$bytes_already_copied = 0;
-		$output_buffer        = '';
-		foreach ( $this->lexical_updates as $diff ) {
-			$shift = strlen( $diff->text ) - $diff->length;
-
-			// Adjust the cursor position by however much an update affects it.
-			if ( $diff->start < $this->bytes_already_parsed ) {
-				$this->bytes_already_parsed += $shift;
-			}
-
-			// Accumulate shift of the given pointer within this function call.
-			if ( $diff->start <= $shift_this_point ) {
-				$accumulated_shift_for_given_point += $shift;
-			}
-
-			$output_buffer       .= substr( $this->xml, $bytes_already_copied, $diff->start - $bytes_already_copied );
-			$output_buffer       .= $diff->text;
-			$bytes_already_copied = $diff->start + $diff->length;
-		}
-
-		$this->xml = $output_buffer . substr( $this->xml, $bytes_already_copied );
-
-		/*
-		 * Adjust bookmark locations to account for how the text
-		 * replacements adjust offsets in the input document.
-		 */
-		foreach ( $this->bookmarks as $bookmark_name => $bookmark ) {
-			$bookmark_end = $bookmark->start + $bookmark->length;
-
-			/*
-			 * Each lexical update which appears before the bookmark's endpoints
-			 * might shift the offsets for those endpoints. Loop through each change
-			 * and accumulate the total shift for each bookmark, then apply that
-			 * shift after tallying the full delta.
-			 */
-			$head_delta = 0;
-			$tail_delta = 0;
-
-			foreach ( $this->lexical_updates as $diff ) {
-				$diff_end = $diff->start + $diff->length;
-
-				if ( $bookmark->start < $diff->start && $bookmark_end < $diff->start ) {
-					break;
-				}
-
-				if ( $bookmark->start >= $diff->start && $bookmark_end < $diff_end ) {
-					$this->release_bookmark( $bookmark_name );
-					continue 2;
-				}
-
-				$delta = strlen( $diff->text ) - $diff->length;
-
-				if ( $bookmark->start >= $diff->start ) {
-					$head_delta += $delta;
-				}
-
-				if ( $bookmark_end >= $diff_end ) {
-					$tail_delta += $delta;
-				}
-			}
-
-			$bookmark->start  += $head_delta;
-			$bookmark->length += $tail_delta - $head_delta;
-		}
-
-		$this->lexical_updates = array();
-
-		return $accumulated_shift_for_given_point;
-	}
-
-	/**
-	 * Checks whether a bookmark with the given name exists.
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @param string $bookmark_name Name to identify a bookmark that potentially exists.
-	 * @return bool Whether that bookmark exists.
-	 */
-	public function has_bookmark( $bookmark_name ) {
-		return array_key_exists( $bookmark_name, $this->bookmarks );
-	}
-
-	public function get_processed_xml() {
-		// Flush updates
-		$this->get_updated_xml();
-		return substr( $this->xml, 0, $this->bytes_already_parsed );
-	}
-
-	public function get_unprocessed_xml() {
-		// Flush updates
-		$this->get_updated_xml();
-		return substr( $this->xml, $this->bytes_already_parsed );
-	}
-
-
-	/**
-	 * Move the internal cursor in the Tag Processor to a given bookmark's location.
-	 *
-	 * In order to prevent accidental infinite loops, there's a
-	 * maximum limit on the number of times seek() can be called.
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @param string $bookmark_name Jump to the place in the document identified by this bookmark name.
-	 * @return bool Whether the internal cursor was successfully moved to the bookmark's location.
-	 */
-	public function seek( $bookmark_name ) {
-		if ( ! array_key_exists( $bookmark_name, $this->bookmarks ) ) {
-			_doing_it_wrong(
-				__METHOD__,
-				__( 'Unknown bookmark name.' ),
-				'WP_VERSION'
-			);
-			return false;
-		}
-
-		if ( ++$this->seek_count > static::MAX_SEEK_OPS ) {
-			_doing_it_wrong(
-				__METHOD__,
-				__( 'Too many calls to seek() - this can lead to performance issues.' ),
-				'WP_VERSION'
-			);
-			return false;
-		}
-
-		// Flush out any pending updates to the document.
-		$this->get_updated_xml();
-
-		// Point this tag processor before the sought tag opener and consume it.
-		$this->bytes_already_parsed = $this->bookmarks[ $bookmark_name ]->start;
-		$this->parser_state         = self::STATE_READY;
-		return $this->base_class_next_token();
-	}
-
-	/**
-	 * Compare two WP_HTML_Text_Replacement objects.
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @param WP_HTML_Text_Replacement $a First attribute update.
-	 * @param WP_HTML_Text_Replacement $b Second attribute update.
-	 * @return int Comparison value for string order.
-	 */
-	private static function sort_start_ascending( $a, $b ) {
-		$by_start = $a->start - $b->start;
-		if ( 0 !== $by_start ) {
-			return $by_start;
-		}
-
-		$by_text = isset( $a->text, $b->text ) ? strcmp( $a->text, $b->text ) : 0;
-		if ( 0 !== $by_text ) {
-			return $by_text;
-		}
-
-		/*
-		 * This code should be unreachable, because it implies the two replacements
-		 * start at the same location and contain the same text.
-		 */
-		return $a->length - $b->length;
-	}
-
-	/**
-	 * Return the enqueued value for a given attribute, if one exists.
-	 *
-	 * Enqueued updates can take different data types:
-	 *  - If an update is enqueued and is boolean, the return will be `true`
-	 *  - If an update is otherwise enqueued, the return will be the string value of that update.
-	 *  - If an attribute is enqueued to be removed, the return will be `null` to indicate that.
-	 *  - If no updates are enqueued, the return will be `false` to differentiate from "removed."
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @param string $comparable_name The attribute name in its comparable form.
-	 * @return string|boolean|null Value of enqueued update if present, otherwise false.
-	 */
-	private function get_enqueued_attribute_value( $comparable_name ) {
-		if ( self::STATE_MATCHED_TAG !== $this->parser_state ) {
-			return false;
-		}
-
-		if ( ! isset( $this->lexical_updates[ $comparable_name ] ) ) {
-			return false;
-		}
-
-		$enqueued_text = $this->lexical_updates[ $comparable_name ]->text;
-
-		// Removed attributes erase the entire span.
-		if ( '' === $enqueued_text ) {
-			return null;
-		}
-
-		/*
-		 * Boolean attribute updates are just the attribute name without a corresponding value.
-		 *
-		 * This value might differ from the given comparable name in that there could be leading
-		 * or trailing whitespace, and that the casing follows the name given in `set_attribute`.
-		 *
-		 * Example:
-		 *
-		 *     $p->set_attribute( 'data-TEST-id', 'update' );
-		 *     'update' === $p->get_enqueued_attribute_value( 'data-test-id' );
-		 *
-		 * Detect this difference based on the absence of the `=`, which _must_ exist in any
-		 * attribute containing a value, e.g. `<input type="text" enabled />`.
-		 *                                            ¹           ²
-		 *                                       1. Attribute with a string value.
-		 *                                       2. Boolean attribute whose value is `true`.
-		 */
-		$equals_at = strpos( $enqueued_text, '=' );
-		if ( false === $equals_at ) {
-			return true;
-		}
-
-		/*
-		 * Finally, a normal update's value will appear after the `=` and
-		 * be double-quoted, as performed incidentally by `set_attribute`.
-		 *
-		 * e.g. `type="text"`
-		 *           ¹²    ³
-		 *        1. Equals is here.
-		 *        2. Double-quoting starts one after the equals sign.
-		 *        3. Double-quoting ends at the last character in the update.
-		 */
-		$enqueued_value = substr( $enqueued_text, $equals_at + 2, -1 );
-		/*
-		 * We're deliberately not decoding entities in attribute values:
-		 *
-		 *     Attribute values must not contain direct or indirect entity references to external entities.
-		 *
-		 * See https://www.w3.org/TR/xml/#sec-starttags.
-		 */
-		return $enqueued_value;
-	}
-
-	/**
-	 * Returns the value of a requested attribute from a matched tag opener if that attribute exists.
-	 *
-	 * Example:
-	 *
-	 *     $p = new WP_XML_Tag_Processor( '<wp:content enabled class="test" data-test-id="14">Test</wp:content>' );
-	 *     $p->next_tag( array( 'class_name' => 'test' ) ) === true;
-	 *     $p->get_attribute( 'data-test-id' ) === '14';
-	 *     $p->get_attribute( 'enabled' ) === true;
-	 *     $p->get_attribute( 'aria-label' ) === null;
-	 *
-	 *     $p->next_tag() === false;
-	 *     $p->get_attribute( 'class' ) === null;
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @param string $name Name of attribute whose value is requested.
-	 * @return string|true|null Value of attribute or `null` if not available. Boolean attributes return `true`.
-	 */
-	public function get_attribute( $name ) {
-		if (
-			self::STATE_MATCHED_TAG !== $this->parser_state &&
-			self::STATE_XML_DECLARATION !== $this->parser_state
-		) {
-			return null;
-		}
-
-		// Return any enqueued attribute value updates if they exist.
-		$enqueued_value = $this->get_enqueued_attribute_value( $name );
-		if ( false !== $enqueued_value ) {
-			return $enqueued_value;
-		}
-
-		if ( ! isset( $this->attributes[ $name ] ) ) {
-			return null;
-		}
-
-		$attribute = $this->attributes[ $name ];
-		$raw_value = substr( $this->xml, $attribute->value_starts_at, $attribute->value_length );
-
-		$decoded = WP_XML_Decoder::decode( $raw_value );
-		if ( ! isset( $decoded ) ) {
-			/**
-			 * If the attribute contained an invalid value, it's
-			 * a fatal error.
-			 *
-			 * @see WP_XML_Decoder::decode()
-			 */
-			$this->last_error = self::ERROR_SYNTAX;
-			_doing_it_wrong(
-				__METHOD__,
-				__( 'Invalid attribute value encountered.' ),
-				'WP_VERSION'
-			);
-			return false;
-		}
-
-		return $decoded;
-	}
-
-	/**
-	 * Gets names of all attributes matching a given prefix in the current tag.
-	 *
-	 * Note that matching is case-sensitive. This is in accordance with the spec.
-	 *
-	 * Example:
-	 *
-	 *     $p = new WP_XML_Tag_Processor( '<wp:content data-ENABLED="1" class="test" DATA-test-id="14">Test</wp:content>' );
-	 *     $p->next_tag( array( 'class_name' => 'test' ) ) === true;
-	 *     $p->get_attribute_names_with_prefix( 'data-' ) === array( 'data-ENABLED' );
-	 *     $p->get_attribute_names_with_prefix( 'DATA-' ) === array( 'DATA-test-id' );
-	 *     $p->get_attribute_names_with_prefix( 'DAta-' ) === array();
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @param string $prefix Prefix of requested attribute names.
-	 * @return array|null List of attribute names, or `null` when no tag opener is matched.
-	 */
-	public function get_attribute_names_with_prefix( $prefix ) {
-		if (
-			self::STATE_MATCHED_TAG !== $this->parser_state ||
-			$this->is_closing_tag
-		) {
-			return null;
-		}
-
-		$matches = array();
-		foreach ( array_keys( $this->attributes ) as $attr_name ) {
-			if ( str_starts_with( $attr_name, $prefix ) ) {
-				$matches[] = $attr_name;
-			}
-		}
-		return $matches;
-	}
-
-	/**
-	 * Returns the uppercase name of the matched tag.
-	 *
-	 * Example:
-	 *
-	 *     $p = new WP_XML_Tag_Processor( '<wp:content class="test">Test</wp:content>' );
-	 *     $p->next_tag() === true;
-	 *     $p->get_tag() === 'DIV';
-	 *
-	 *     $p->next_tag() === false;
-	 *     $p->get_tag() === null;
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @return string|null Name of currently matched tag in input XML, or `null` if none found.
-	 */
-	public function get_tag() {
-		if ( null === $this->tag_name_starts_at ) {
-			return null;
-		}
-
-		$tag_name = substr( $this->xml, $this->tag_name_starts_at, $this->tag_name_length );
-
-		if ( self::STATE_MATCHED_TAG === $this->parser_state ) {
-			return $tag_name;
-		}
-
-		return null;
-	}
-
-	/**
-	 * Indicates if the currently matched tag is an empty element tag.
-	 *
-	 * XML tags ending with a solidus ("/") are parsed as empty elements. They have no
-	 * content and no matching closer is expected.
-
-	 * @since WP_VERSION
-	 *
-	 * @return bool Whether the currently matched tag is an empty element tag.
-	 */
-	public function is_empty_element() {
-		if ( self::STATE_MATCHED_TAG !== $this->parser_state ) {
-			return false;
-		}
-
-		/*
-		 * An empty element tag is defined by the solidus at the _end_ of the tag, not the beginning.
-		 *
-		 * Example:
-		 *
-		 *     <figure />
-		 *             ^ this appears one character before the end of the closing ">".
-		 */
-		return '/' === $this->xml[ $this->token_starts_at + $this->token_length - 2 ];
-	}
-
-	/**
-	 * Indicates if the current tag token is a tag closer.
-	 *
-	 * Example:
-	 *
-	 *     $p = new WP_XML_Tag_Processor( '<wp:content></wp:content>' );
-	 *     $p->next_tag( array( 'tag_name' => 'wp:content', 'tag_closers' => 'visit' ) );
-	 *     $p->is_tag_closer() === false;
-	 *
-	 *     $p->next_tag( array( 'tag_name' => 'wp:content', 'tag_closers' => 'visit' ) );
-	 *     $p->is_tag_closer() === true;
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @return bool Whether the current tag is a tag closer.
-	 */
-	public function is_tag_closer() {
-		return (
-			self::STATE_MATCHED_TAG === $this->parser_state &&
-			$this->is_closing_tag
-		);
-	}
-
-	/**
-	 * Indicates the kind of matched token, if any.
-	 *
-	 * This differs from `get_token_name()` in that it always
-	 * returns a static string indicating the type, whereas
-	 * `get_token_name()` may return values derived from the
-	 * token itself, such as a tag name or processing
-	 * instruction tag.
-	 *
-	 * Possible values:
-	 *  - `#tag` when matched on a tag.
-	 *  - `#text` when matched on a text node.
-	 *  - `#cdata-section` when matched on a CDATA node.
-	 *  - `#comment` when matched on a comment.
-	 *  - `#presumptuous-tag` when matched on an empty tag closer.
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @return string|null What kind of token is matched, or null.
-	 */
-	public function get_token_type() {
-		switch ( $this->parser_state ) {
-			case self::STATE_MATCHED_TAG:
-				return '#tag';
-
-			default:
-				return $this->get_token_name();
-		}
-	}
-
-	/**
-	 * Returns the node name represented by the token.
-	 *
-	 * This matches the DOM API value `nodeName`. Some values
-	 * are static, such as `#text` for a text node, while others
-	 * are dynamically generated from the token itself.
-	 *
-	 * Dynamic names:
-	 *  - Uppercase tag name for tag matches.
-	 *
-	 * Note that if the Tag Processor is not matched on a token
-	 * then this function will return `null`, either because it
-	 * hasn't yet found a token or because it reached the end
-	 * of the document without matching a token.
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @return string|null Name of the matched token.
-	 */
-	public function get_token_name() {
-		switch ( $this->parser_state ) {
-			case self::STATE_MATCHED_TAG:
-				return $this->get_tag();
-
-			case self::STATE_TEXT_NODE:
-				return '#text';
-
-			case self::STATE_CDATA_NODE:
-				return '#cdata-section';
-
-			case self::STATE_XML_DECLARATION:
-				return '#xml-declaration';
-
-			case self::STATE_PI_NODE:
-				return '#processing-instructions';
-
-			case self::STATE_COMMENT:
-				return '#comment';
-		}
-	}
-
-	/**
-	 * Returns the modifiable text for a matched token, or an empty string.
-	 *
-	 * Modifiable text is text content that may be read and changed without
-	 * changing the XML structure of the document around it. This includes
-	 * the contents of `#text` nodes in the XML as well as the inner
-	 * contents of XML comments, Processing Instructions, and others, even
-	 * though these nodes aren't part of a parsed DOM tree. They also contain
-	 * the contents of SCRIPT and STYLE tags, of TEXTAREA tags, and of any
-	 * other section in an XML document which cannot contain XML markup (DATA).
-	 *
-	 * If a token has no modifiable text then an empty string is returned to
-	 * avoid needless crashing or type errors. An empty string does not mean
-	 * that a token has modifiable text, and a token with modifiable text may
-	 * have an empty string (e.g. a comment with no contents).
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @return string
-	 */
-	public function get_modifiable_text() {
-		if ( null === $this->text_starts_at ) {
-			return '';
-		}
-
-		$text = substr( $this->xml, $this->text_starts_at, $this->text_length );
-
-		/*
-		 * > the XML processor must behave as if it normalized all line breaks in external parsed
-		 * > entities (including the document entity) on input, before parsing, by translating both
-		 * > the two-character sequence #xD #xA and any #xD that is not followed by #xA to a single
-		 * > #xA character.
-		 *
-		 * See https://www.w3.org/TR/xml/#sec-line-ends
-		 */
-		$text = str_replace( array( "\r\n", "\r" ), "\n", $text );
-
-		// Comment data, CDATA sections, and PCData tags contents are not decoded any further.
-		if (
-			self::STATE_CDATA_NODE === $this->parser_state ||
-			self::STATE_COMMENT === $this->parser_state ||
-			$this->is_pcdata_element()
-		) {
-			return $text;
-		}
-
-		$decoded = WP_XML_Decoder::decode( $text );
-		if ( ! isset( $decoded ) ) {
-			/**
-			 * If the attribute contained an invalid value, it's
-			 * a fatal error.
-			 *
-			 * @see WP_XML_Decoder::decode()
-			 */
-
-			$this->last_error = self::ERROR_SYNTAX;
-			var_dump( $text );
-			_doing_it_wrong(
-				__METHOD__,
-				__( 'Invalid text content encountered.' ),
-				'WP_VERSION'
-			);
-			return false;
-		}
-		return $decoded;
-	}
-
-	public function set_modifiable_text( $new_value ) {
-		switch ( $this->parser_state ) {
-			case self::STATE_TEXT_NODE:
-			case self::STATE_COMMENT:
-				$this->lexical_updates[] = new WP_HTML_Text_Replacement(
-					$this->text_starts_at,
-					$this->text_length,
-					// @TODO This is naive, let's rethink this.
-					htmlspecialchars( $new_value, ENT_XML1, 'UTF-8' )
-				);
-				return true;
-
-			case self::STATE_CDATA_NODE:
-				$this->lexical_updates[] = new WP_HTML_Text_Replacement(
-					$this->text_starts_at,
-					$this->text_length,
-					// @TODO This is naive, let's rethink this.
-					str_replace( ']]>', ']]&gt;', $new_value )
-				);
-				return true;
-			default:
-				_doing_it_wrong(
-					__METHOD__,
-					__( 'Cannot set text content on a non-text node.' ),
-					'WP_VERSION'
-				);
-				return false;
-		}
-	}
-
-	/**
-	 * Updates or creates a new attribute on the currently matched tag with the passed value.
-	 *
-	 * For boolean attributes special handling is provided:
-	 *  - When `true` is passed as the value, then only the attribute name is added to the tag.
-	 *  - When `false` is passed, the attribute gets removed if it existed before.
-	 *
-	 * For string attributes, the value is escaped using the `esc_attr` function.
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @param string      $name  The attribute name to target.
-	 * @param string|bool $value The new attribute value.
-	 * @return bool Whether an attribute value was set.
-	 */
-	public function set_attribute( $name, $value ) {
-		if ( ! is_string( $value ) ) {
-			_doing_it_wrong(
-				__METHOD__,
-				__( 'Non-string attribute values cannot be passed to set_attribute().' ),
-				'WP_VERSION'
-			);
-			return false;
-		}
-		if (
-			self::STATE_MATCHED_TAG !== $this->parser_state ||
-			$this->is_closing_tag
-		) {
-			return false;
-		}
-
-		$value             = htmlspecialchars( $value, ENT_XML1, 'UTF-8' );
-		$updated_attribute = "{$name}=\"{$value}\"";
-
-		/*
-		 * > An attribute name must not appear more than once
-		 * > in the same start-tag or empty-element tag.
-		 *     - XML 1.0 spec
-		 *
-		 * @see https://www.w3.org/TR/xml/#sec-starttags
-		 */
-		if ( isset( $this->attributes[ $name ] ) ) {
-			/*
-			 * Update an existing attribute.
-			 *
-			 * Example – set attribute id to "new" in <wp:content id="initial_id" />:
-			 *
-			 *     <wp:content id="initial_id"/>
-			 *          ^-------------^
-			 *          start         end
-			 *     replacement: `id="new"`
-			 *
-			 *     Result: <wp:content id="new"/>
-			 */
-			$existing_attribute             = $this->attributes[ $name ];
-			$this->lexical_updates[ $name ] = new WP_HTML_Text_Replacement(
-				$existing_attribute->start,
-				$existing_attribute->length,
-				$updated_attribute
-			);
-		} else {
-			/*
-			 * Create a new attribute at the tag's name end.
-			 *
-			 * Example – add attribute id="new" to <wp:content />:
-			 *
-			 *     <wp:content/>
-			 *         ^
-			 *         start and end
-			 *     replacement: ` id="new"`
-			 *
-			 *     Result: <wp:content id="new"/>
-			 */
-			$this->lexical_updates[ $name ] = new WP_HTML_Text_Replacement(
-				$this->tag_name_starts_at + $this->tag_name_length,
-				0,
-				' ' . $updated_attribute
-			);
-		}
-
-		return true;
-	}
-
-	/**
-	 * Remove an attribute from the currently-matched tag.
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @param string $name The attribute name to remove.
-	 * @return bool Whether an attribute was removed.
-	 */
-	public function remove_attribute( $name ) {
-		if (
-			self::STATE_MATCHED_TAG !== $this->parser_state ||
-			$this->is_closing_tag
-		) {
-			return false;
-		}
-
-		/*
-		 * If updating an attribute that didn't exist in the input
-		 * document, then remove the enqueued update and move on.
-		 *
-		 * For example, this might occur when calling `remove_attribute()`
-		 * after calling `set_attribute()` for the same attribute
-		 * and when that attribute wasn't originally present.
-		 */
-		if ( ! isset( $this->attributes[ $name ] ) ) {
-			if ( isset( $this->lexical_updates[ $name ] ) ) {
-				unset( $this->lexical_updates[ $name ] );
-			}
-			return false;
-		}
-
-		/*
-		 * Removes an existing tag attribute.
-		 *
-		 * Example – remove the attribute id from <wp:content id="main"/>:
-		 *    <wp:content id="initial_id"/>
-		 *         ^-------------^
-		 *         start         end
-		 *    replacement: ``
-		 *
-		 *    Result: <wp:content />
-		 */
-		$this->lexical_updates[ $name ] = new WP_HTML_Text_Replacement(
-			$this->attributes[ $name ]->start,
-			$this->attributes[ $name ]->length,
-			''
-		);
-
-		return true;
-	}
-
-	/**
-	 * Returns the string representation of the XML Tag Processor.
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @see WP_XML_Tag_Processor::get_updated_xml()
-	 *
-	 * @return string The processed XML.
-	 */
-	public function __toString() {
-		return $this->get_updated_xml();
-	}
-
-	/**
-	 * Returns the string representation of the XML Tag Processor.
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @return string The processed XML.
-	 */
-	public function get_updated_xml() {
-		$requires_no_updating = 0 === count( $this->lexical_updates );
-
-		/*
-		 * When there is nothing more to update and nothing has already been
-		 * updated, return the original document and avoid a string copy.
-		 */
-		if ( $requires_no_updating ) {
-			return $this->xml;
-		}
-
-		/*
-		 * Keep track of the position right before the current tag. This will
-		 * be necessary for reparsing the current tag after updating the XML.
-		 */
-		$before_current_tag = $this->token_starts_at;
-
-		/*
-		 * 1. Apply the enqueued edits and update all the pointers to reflect those changes.
-		 */
-		$before_current_tag += $this->apply_attributes_updates( $before_current_tag );
-
-		/*
-		 * 2. Rewind to before the current tag and reparse to get updated attributes.
-		 *
-		 * At this point the internal cursor points to the end of the tag name.
-		 * Rewind before the tag name starts so that it's as if the cursor didn't
-		 * move; a call to `next_tag()` will reparse the recently-updated attributes
-		 * and additional calls to modify the attributes will apply at this same
-		 * location, but in order to avoid issues with subclasses that might add
-		 * behaviors to `next_tag()`, the internal methods should be called here
-		 * instead.
-		 *
-		 * It's important to note that in this specific place there will be no change
-		 * because the processor was already at a tag when this was called and it's
-		 * rewinding only to the beginning of this very tag before reprocessing it
-		 * and its attributes.
-		 *
-		 * <p>Previous XML<em>More XML</em></p>
-		 *                 ↑  │ back up by the length of the tag name plus the opening <
-		 *                 └←─┘ back up by strlen("em") + 1 ==> 3
-		 */
-		$this->bytes_already_parsed = $before_current_tag;
-		$this->base_class_next_token();
-
-		return $this->xml;
-	}
-
-	/**
-	 * Parses tag query input into internal search criteria.
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @param array|string|null $query {
-	 *     Optional. Which tag name to find, having which class, etc. Default is to find any tag.
-	 *
-	 *     @type string|null $tag_name     Which tag to find, or `null` for "any tag."
-	 *     @type int|null    $match_offset Find the Nth tag matching all search criteria.
-	 *                                     1 for "first" tag, 3 for "third," etc.
-	 *                                     Defaults to first tag.
-	 *     @type string      $tag_closers  "visit" or "skip": whether to stop on tag closers, e.g. </wp:content>.
-	 * }
-	 */
-	private function parse_query( $query ) {
-		if ( null !== $query && $query === $this->last_query ) {
-			return;
-		}
-
-		$this->last_query          = $query;
-		$this->sought_tag_name     = null;
-		$this->sought_match_offset = 1;
-		$this->stop_on_tag_closers = false;
-
-		// A single string value means "find the tag of this name".
-		if ( is_string( $query ) ) {
-			$this->sought_tag_name = $query;
-			return;
-		}
-
-		// An empty query parameter applies no restrictions on the search.
-		if ( null === $query ) {
-			return;
-		}
-
-		// If not using the string interface, an associative array is required.
-		if ( ! is_array( $query ) ) {
-			_doing_it_wrong(
-				__METHOD__,
-				__( 'The query argument must be an array or a tag name.' ),
-				'WP_VERSION'
-			);
-			return;
-		}
-
-		if ( isset( $query['tag_name'] ) && is_string( $query['tag_name'] ) ) {
-			$this->sought_tag_name = $query['tag_name'];
-		}
-
-		if ( isset( $query['match_offset'] ) && is_int( $query['match_offset'] ) && 0 < $query['match_offset'] ) {
-			$this->sought_match_offset = $query['match_offset'];
-		}
-
-		if ( isset( $query['tag_closers'] ) ) {
-			$this->stop_on_tag_closers = 'visit' === $query['tag_closers'];
-		}
-	}
-
-
-	/**
-	 * Checks whether a given tag and its attributes match the search criteria.
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @return bool Whether the given tag and its attribute match the search criteria.
-	 */
-	private function matches() {
-		if ( $this->is_closing_tag && ! $this->stop_on_tag_closers ) {
-			return false;
-		}
-
-		// Does the tag name match the requested tag name in a case-insensitive manner?
-		if ( null !== $this->sought_tag_name ) {
-			/*
-			 * String (byte) length lookup is fast. If they aren't the
-			 * same length then they can't be the same string values.
-			 */
-			if ( strlen( $this->sought_tag_name ) !== $this->tag_name_length ) {
-				return false;
-			}
-
-			/*
-			 * Check each character to determine if they are the same.
-			 */
-			for ( $i = 0; $i < $this->tag_name_length; $i++ ) {
-				if ( $this->xml[ $this->tag_name_starts_at + $i ] !== $this->sought_tag_name[ $i ] ) {
-					return false;
-				}
-			}
-		}
-
-		return true;
-	}
-
-	/**
-	 * Parser Ready State.
-	 *
-	 * Indicates that the parser is ready to run and waiting for a state transition.
-	 * It may not have started yet, or it may have just finished parsing a token and
-	 * is ready to find the next one.
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @access private
-	 */
-	const STATE_READY = 'STATE_READY';
-
-	/**
-	 * Parser Complete State.
-	 *
-	 * Indicates that the parser has reached the end of the document and there is
-	 * nothing left to scan. It finished parsing the last token completely.
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @access private
-	 */
-	const STATE_COMPLETE = 'STATE_COMPLETE';
-
-	/**
-	 * Parser Incomplete Input State.
-	 *
-	 * Indicates that the parser has reached the end of the document before finishing
-	 * a token. It started parsing a token but there is a possibility that the input
-	 * XML document was truncated in the middle of a token.
-	 *
-	 * The parser is reset at the start of the incomplete token and has paused. There
-	 * is nothing more than can be scanned unless provided a more complete document.
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @access private
-	 */
-	const STATE_INCOMPLETE_INPUT = 'STATE_INCOMPLETE_INPUT';
-
-	/**
-	 * Parser Invalid Input State.
-	 *
-	 * Indicates that the parsed xml document contains malformed input and cannot be parsed.
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @access private
-	 */
-	const STATE_INVALID_DOCUMENT = 'STATE_INVALID_DOCUMENT';
-
-	/**
-	 * Parser Matched Tag State.
-	 *
-	 * Indicates that the parser has found an XML tag and it's possible to get
-	 * the tag name and read or modify its attributes (if it's not a closing tag).
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @access private
-	 */
-	const STATE_MATCHED_TAG = 'STATE_MATCHED_TAG';
-
-	/**
-	 * Parser Text Node State.
-	 *
-	 * Indicates that the parser has found a text node and it's possible
-	 * to read and modify that text.
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @access private
-	 */
-	const STATE_TEXT_NODE = 'STATE_TEXT_NODE';
-
-	/**
-	 * Parser CDATA Node State.
-	 *
-	 * Indicates that the parser has found a CDATA node and it's possible
-	 * to read and modify its modifiable text. Note that in XML there are
-	 * no CDATA nodes outside of foreign content (SVG and MathML). Outside
-	 * of foreign content, they are treated as XML comments.
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @access private
-	 */
-	const STATE_CDATA_NODE = 'STATE_CDATA_NODE';
-
-	/**
-	 * Indicates that the parser has found an XML processing instruction.
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @access private
-	 */
-	const STATE_PI_NODE = 'STATE_PI_NODE';
-
-	/**
-	 * Indicates that the parser has found an XML declaration
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @access private
-	 */
-	const STATE_XML_DECLARATION = 'STATE_XML_DECLARATION';
-
-	/**
-	 * Indicates that the parser has found an XML comment and it's
-	 * possible to read and modify its modifiable text.
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @access private
-	 */
-	const STATE_COMMENT = 'STATE_COMMENT';
-
-	/**
-	 * Indicates that the parser encountered unsupported syntax and has bailed.
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @var string
-	 */
-	const ERROR_SYNTAX = 'syntax';
-
-	/**
-	 * Indicates that the provided XML document contains a declaration that is
-	 * unsupported by the parser.
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @var string
-	 */
-	const ERROR_UNSUPPORTED = 'unsupported';
-
-	/**
-	 * Indicates that the parser encountered more XML tokens than it
-	 * was able to process and has bailed.
-	 *
-	 * @since WP_VERSION
-	 *
-	 * @var string
-	 */
-	const ERROR_EXCEEDED_MAX_BOOKMARKS = 'exceeded-max-bookmarks';
-}
diff --git a/packages/playground/data-liberation/tests/WPXMLProcessorTests.php b/packages/playground/data-liberation/tests/WPXMLProcessorTests.php
index 276eb7c311..0c5ffede64 100644
--- a/packages/playground/data-liberation/tests/WPXMLProcessorTests.php
+++ b/packages/playground/data-liberation/tests/WPXMLProcessorTests.php
@@ -5,7 +5,6 @@
  * @package WordPress
  * @subpackage XML-API
  */
-
 use PHPUnit\Framework\TestCase;
 
 /**
@@ -14,6 +13,1383 @@
  * @coversDefaultClass WP_XML_Processor
  */
 class WPXMLProcessorTests extends TestCase {
+	const XML_SIMPLE       = '<wp:content id="first"><wp:text id="second">Text</wp:text></wp:content>';
+	const XML_WITH_CLASSES = '<wp:content wp:post-type="main with-border" id="first"><wp:text wp:post-type="not-main bold with-border" id="second">Text</wp:text></wp:content>';
+	const XML_MALFORMED    = '<wp:content><wp:text wp:post-type="d-md-none" Notifications</wp:text><wp:text wp:post-type="d-none d-md-inline">Back to notifications</wp:text></wp:content>';
+
+	/**
+	 * @ticket 61365
+	 *
+	 * @covers WP_XML_Processor::get_tag
+	 */
+	public function test_get_tag_returns_null_before_finding_tags() {
+		$processor = new WP_XML_Processor( '<wp:content>Test</wp:content>' );
+
+		$this->assertNull( $processor->get_tag(), 'Calling get_tag() without selecting a tag did not return null' );
+	}
+
+	/**
+	 * @ticket 61365
+	 *
+	 * @covers WP_XML_Processor::get_tag
+	 */
+	public function test_get_tag_returns_null_when_not_in_open_tag() {
+		$processor = new WP_XML_Processor( '<wp:content>Test</wp:content>' );
+
+		$this->assertFalse( $processor->next_tag( 'p' ), 'Querying a non-existing tag did not return false' );
+		$this->assertNull( $processor->get_tag(), 'Accessing a non-existing tag did not return null' );
+	}
+
+	/**
+	 * @ticket 61365
+	 *
+	 * @covers WP_XML_Processor::get_tag
+	 */
+	public function test_get_tag_returns_open_tag_name() {
+		$processor = new WP_XML_Processor( '<wp:content>Test</wp:content>' );
+
+		$this->assertTrue( $processor->next_tag( 'wp:content' ), 'Querying an existing tag did not return true' );
+		$this->assertSame( 'wp:content', $processor->get_tag(), 'Accessing an existing tag name did not return "div"' );
+	}
+
+	/**
+	 * @ticket 61365
+	 *
+	 * @covers WP_XML_Processor::is_empty_element
+	 *
+	 * @dataProvider data_is_empty_element
+	 *
+	 * @param string $xml Input XML whose first tag might contain the self-closing flag `/`.
+	 * @param bool $flag_is_set Whether the input XML's first tag contains the self-closing flag.
+	 */
+	public function test_is_empty_element_matches_input_xml( $xml, $flag_is_set ) {
+		$processor = new WP_XML_Processor( $xml );
+		$processor->next_tag( array( 'tag_closers' => 'visit' ) );
+
+		if ( $flag_is_set ) {
+			$this->assertTrue( $processor->is_empty_element(), 'Did not find the empty element tag when it was present.' );
+		} else {
+			$this->assertFalse( $processor->is_empty_element(), 'Found the empty element tag when it was absent.' );
+		}
+	}
+
+	/**
+	 * Data provider. XML tags which might have a self-closing flag, and an indicator if they do.
+	 *
+	 * @return array[]
+	 */
+	public static function data_is_empty_element() {
+		return array(
+			// These should not have a self-closer, and will leave an element un-closed if it's assumed they are self-closing.
+			'Self-closing flag on non-void XML element'    => array( '<wp:content />', true ),
+			'No self-closing flag on non-void XML element' => array( '<wp:content>', false ),
+			// These should not have a self-closer, but are benign when used because the elements are void.
+			'Self-closing flag on void XML element'        => array( '<photo />', true ),
+			'No self-closing flag on void XML element'     => array( '<photo>', false ),
+			'Self-closing flag on void XML element without spacing' => array( '<photo/>', true ),
+			// These should not have a self-closer, but as part of a tag closer they are entirely ignored.
+			'No self-closing flag on tag closer'           => array( '</textarea>', false ),
+			// These can and should have self-closers, and will leave an element un-closed if it's assumed they aren't self-closing.
+			'Self-closing flag on a foreign element'       => array( '<circle />', true ),
+			'No self-closing flag on a foreign element'    => array( '<circle>', false ),
+			// These involve syntax peculiarities.
+			'Self-closing flag after extra spaces'         => array( '<wp:content      />', true ),
+			'Self-closing flag after quoted attribute'     => array( '<wp:content id="test"/>', true ),
+		);
+	}
+
+	/**
+	 * @ticket 61365
+	 *
+	 * @covers WP_XML_Processor::get_attribute
+	 */
+	public function test_get_attribute_returns_null_when_not_in_open_tag() {
+		$processor = new WP_XML_Processor( '<wp:content wp:post-type="test">Test</wp:content>' );
+
+		$this->assertFalse( $processor->next_tag( 'p' ), 'Querying a non-existing tag did not return false' );
+		$this->assertNull( $processor->get_attribute( 'wp:post-type' ), 'Accessing an attribute of a non-existing tag did not return null' );
+	}
+
+	/**
+	 * @ticket 61365
+	 *
+	 * @covers WP_XML_Processor::get_attribute
+	 */
+	public function test_get_attribute_returns_null_when_in_closing_tag() {
+		$processor = new WP_XML_Processor( '<wp:content wp:post-type="test">Test</wp:content>' );
+
+		$this->assertTrue( $processor->next_tag( 'wp:content' ), 'Querying an existing tag did not return true' );
+		$this->assertTrue( $processor->next_token(), 'Querying an existing closing tag did not return true' );
+		$this->assertTrue( $processor->next_token(), 'Querying an existing closing tag did not return true' );
+		$this->assertNull( $processor->get_attribute( 'wp:post-type' ), 'Accessing an attribute of a closing tag did not return null' );
+	}
+
+	/**
+	 * @ticket 61365
+	 *
+	 * @covers WP_XML_Processor::get_attribute
+	 */
+	public function test_get_attribute_returns_null_when_attribute_missing() {
+		$processor = new WP_XML_Processor( '<wp:content wp:post-type="test">Test</wp:content>' );
+
+		$this->assertTrue( $processor->next_tag( 'wp:content' ), 'Querying an existing tag did not return true' );
+		$this->assertNull( $processor->get_attribute( 'test-id' ), 'Accessing a non-existing attribute did not return null' );
+	}
+
+	/**
+	 * @ticket 61365
+	 *
+	 * @expectedIncorrectUsage WP_XML_Processor::base_class_next_token
+	 * @covers WP_XML_Processor::get_attribute
+	 */
+	public function test_attributes_are_rejected_in_tag_closers() {
+		$processor = new WP_XML_Processor( '<wp:content>Test</wp:content wp:post-type="test">' );
+
+		$this->assertTrue( $processor->next_tag( 'wp:content' ), 'Querying an existing tag did not return true' );
+		$this->assertTrue( $processor->next_token(), 'Querying a text node did not return true.' );
+		$this->assertFalse( $processor->next_token(), 'Querying an existing but invalid closing tag did not return false.' );
+	}
+
+	/**
+	 * @ticket 61365
+	 *
+	 * @covers WP_XML_Processor::get_attribute
+	 */
+	public function test_get_attribute_returns_attribute_value() {
+		$processor = new WP_XML_Processor( '<wp:content wp:post-type="test">Test</wp:content>' );
+
+		$this->assertTrue( $processor->next_tag( 'wp:content' ), 'Querying an existing tag did not return true' );
+		$this->assertSame( 'test', $processor->get_attribute( 'wp:post-type' ), 'Accessing a wp:post-type="test" attribute value did not return "test"' );
+	}
+
+	/**
+	 * @ticket 61365
+	 * @expectedIncorrectUsage WP_XML_Processor::parse_next_attribute
+	 *
+	 * @covers WP_XML_Processor::get_attribute
+	 */
+	public function test_parsing_stops_on_malformed_attribute_value_no_value() {
+		$processor = new WP_XML_Processor( '<wp:content enabled wp:post-type="test">Test</wp:content>' );
+
+		$this->assertFalse( $processor->next_tag(), 'Querying a malformed start tag did not return false' );
+	}
+
+	/**
+	 * @ticket 61365
+	 * @expectedIncorrectUsage WP_XML_Processor::parse_next_attribute
+	 *
+	 * @covers WP_XML_Processor::get_attribute
+	 */
+	public function test_parsing_stops_on_malformed_attribute_value_no_quotes() {
+		$processor = new WP_XML_Processor( '<wp:content enabled=1 wp:post-type="test">Test</wp:content>' );
+
+		$this->assertFalse( $processor->next_tag(), 'Querying a malformed start tag did not return false' );
+	}
+
+	/**
+	 * @ticket 61365
+	 * @expectedIncorrectUsage WP_XML_Processor::get_attribute
+	 *
+	 * @covers WP_XML_Processor::get_attribute
+	 */
+	public function test_malformed_attribute_value_containing_ampersand_is_treated_as_plaintext() {
+		$processor = new WP_XML_Processor( '<wp:content enabled="WordPress & WordPress">Test</wp:content>' );
+
+		$this->assertTrue( $processor->next_tag(), 'Querying a tag did not return true' );
+        $this->assertEquals('WordPress & WordPress', $processor->get_attribute('enabled'));
+	}
+
+	/**
+	 * @ticket 61365
+	 * @expectedIncorrectUsage WP_XML_Processor::get_attribute
+	 *
+	 * @covers WP_XML_Processor::get_attribute
+	 */
+	public function test_malformed_attribute_value_containing_entity_without_semicolon_is_treated_as_plaintext() {
+		$processor = new WP_XML_Processor( '<wp:content enabled="&#x94">Test</wp:content>' );
+
+		$this->assertTrue( $processor->next_tag(), 'Querying a tag did not return true' );
+		$this->assertEquals('&#x94', $processor->get_attribute('enabled'));
+	}
+
+	/**
+	 * @ticket 61365
+	 * @expectedIncorrectUsage WP_XML_Processor::parse_next_attribute
+	 *
+	 * @covers WP_XML_Processor::get_attribute
+	 */
+	public function test_parsing_stops_on_malformed_attribute_value_contains_lt_character() {
+		$processor = new WP_XML_Processor( '<wp:content enabled="I love <3 this">Test</wp:content>' );
+
+		$this->assertFalse( $processor->next_tag(), 'Querying a malformed start tag did not return false' );
+	}
+
+	/**
+	 * @ticket 61365
+	 * @expectedIncorrectUsage WP_XML_Processor::parse_next_attribute
+	 *
+	 * @covers WP_XML_Processor::get_attribute
+	 */
+	public function test_parsing_stops_on_malformed_tags_duplicate_attributes() {
+		$processor = new WP_XML_Processor( '<wp:content id="update-me" id="ignored-id"><wp:text id="second">Text</wp:text></wp:content>' );
+
+		$this->assertFalse( $processor->next_tag() );
+	}
+
+	/**
+	 * @ticket 61365
+	 * @expectedIncorrectUsage WP_XML_Processor::parse_next_attribute
+	 *
+	 * @covers WP_XML_Processor::get_attribute
+	 */
+	public function test_parsing_stops_on_malformed_attribute_name_contains_slash() {
+		$processor = new WP_XML_Processor( '<wp:content a/b="test">Test</wp:content>' );
+
+		$this->assertFalse( $processor->next_tag(), 'Querying a malformed start tag did not return false' );
+	}
+
+	/**
+	 * @ticket 61365
+	 *
+	 * @covers WP_XML_Processor::get_attribute
+	 */
+	public function test_get_modifiable_text_returns_a_decoded_value() {
+		$processor = new WP_XML_Processor( '<root>&#x201C;&#x1f604;&#x201D;</root>' );
+
+		$processor->next_tag( 'root' );
+		$processor->next_token();
+
+		$this->assertEquals(
+			'“😄”',
+			$processor->get_modifiable_text(),
+			'Reading an encoded text did not decode it.'
+		);
+	}
+
+	/**
+	 * @ticket 61365
+	 *
+	 * @covers WP_XML_Processor::get_attribute
+	 */
+	public function test_get_attribute_returns_a_decoded_value() {
+		$processor = new WP_XML_Processor( '<root encoded-data="&#x201C;&#x1f604;&#x201D;"></root>' );
+
+		$this->assertTrue( $processor->next_tag( 'root' ), 'Querying a tag did not return true' );
+		$this->assertEquals(
+			'“😄”',
+			$processor->get_attribute( 'encoded-data' ),
+			'Reading an encoded attribute did not decode it.'
+		);
+	}
+
+	/**
+	 * @ticket 61365
+	 *
+	 * @covers WP_XML_Processor::get_attribute
+	 *
+	 * @param string $attribute_name Name of data-enabled attribute with case variations.
+	 */
+	public function test_get_attribute_is_case_sensitive() {
+		$processor = new WP_XML_Processor( '<wp:content DATA-enabled="true">Test</wp:content>' );
+		$processor->next_tag();
+
+		$this->assertEquals(
+			'true',
+			$processor->get_attribute( 'DATA-enabled' ),
+			'Accessing an attribute by a same-cased name did return not its value'
+		);
+
+		$this->assertNull(
+			$processor->get_attribute( 'data-enabled' ),
+			'Accessing an attribute by a differently-cased name did return its value'
+		);
+	}
+
+
+	/**
+	 * @ticket 61365
+	 *
+	 * @covers WP_XML_Processor::remove_attribute
+	 */
+	public function test_remove_attribute_is_case_sensitive() {
+		$processor = new WP_XML_Processor( '<wp:content DATA-enabled="true">Test</wp:content>' );
+		$processor->next_tag();
+		$processor->remove_attribute( 'data-enabled' );
+
+		$this->assertSame( '<wp:content DATA-enabled="true">Test</wp:content>', $processor->get_updated_xml(), 'A case-sensitive remove_attribute call did remove the attribute' );
+
+		$processor->remove_attribute( 'DATA-enabled' );
+
+		$this->assertSame( '<wp:content >Test</wp:content>', $processor->get_updated_xml(), 'A case-sensitive remove_attribute call did not remove the attribute' );
+	}
+
+	/**
+	 * @ticket 61365
+	 *
+	 * @covers WP_XML_Processor::set_attribute
+	 */
+	public function test_set_attribute_is_case_sensitive() {
+		$processor = new WP_XML_Processor( '<wp:content DATA-enabled="true">Test</wp:content>' );
+		$processor->next_tag();
+		$processor->set_attribute( 'data-enabled', 'abc' );
+
+		$this->assertSame( '<wp:content data-enabled="abc" DATA-enabled="true">Test</wp:content>', $processor->get_updated_xml(), 'A case-insensitive set_attribute call did not update the existing attribute' );
+	}
+
+	/**
+	 * @ticket 61365
+	 *
+	 * @covers WP_XML_Processor::get_attribute_names_with_prefix
+	 */
+	public function test_get_attribute_names_with_prefix_returns_null_before_finding_tags() {
+		$processor = new WP_XML_Processor( '<wp:content data-foo="bar">Test</wp:content>' );
+		$this->assertNull(
+			$processor->get_attribute_names_with_prefix( 'data-' ),
+			'Accessing attributes by their prefix did not return null when no tag was selected'
+		);
+	}
+
+	/**
+	 * @ticket 61365
+	 *
+	 * @covers WP_XML_Processor::get_attribute_names_with_prefix
+	 */
+	public function test_get_attribute_names_with_prefix_returns_null_when_not_in_open_tag() {
+		$processor = new WP_XML_Processor( '<wp:content data-foo="bar">Test</wp:content>' );
+		$processor->next_tag( 'p' );
+		$this->assertNull( $processor->get_attribute_names_with_prefix( 'data-' ), 'Accessing attributes of a non-existing tag did not return null' );
+	}
+
+	/**
+	 * @ticket 61365
+	 *
+	 * @covers WP_XML_Processor::get_attribute_names_with_prefix
+	 */
+	public function test_get_attribute_names_with_prefix_returns_null_when_in_closing_tag() {
+		$processor = new WP_XML_Processor( '<wp:content data-foo="bar">Test</wp:content>' );
+		$processor->next_tag( 'wp:content' );
+		$processor->next_tag( array( 'tag_closers' => 'visit' ) );
+
+		$this->assertNull( $processor->get_attribute_names_with_prefix( 'data-' ), 'Accessing attributes of a closing tag did not return null' );
+	}
+
+	/**
+	 * @ticket 61365
+	 *
+	 * @covers WP_XML_Processor::get_attribute_names_with_prefix
+	 */
+	public function test_get_attribute_names_with_prefix_returns_empty_array_when_no_attributes_present() {
+		$processor = new WP_XML_Processor( '<wp:content>Test</wp:content>' );
+		$processor->next_tag( 'wp:content' );
+
+		$this->assertSame( array(), $processor->get_attribute_names_with_prefix( 'data-' ), 'Accessing the attributes on a tag without any did not return an empty array' );
+	}
+
+	/**
+	 * @ticket 61365
+	 *
+	 * @covers WP_XML_Processor::get_attribute_names_with_prefix
+	 */
+	public function test_get_attribute_names_with_prefix_returns_matching_attribute_names_in_original_case() {
+		$processor = new WP_XML_Processor( '<wp:content DATA-enabled="yes" wp:post-type="test" data-test-ID="14">Test</wp:content>' );
+		$processor->next_tag();
+
+		$this->assertSame(
+			array( 'data-test-ID' ),
+			$processor->get_attribute_names_with_prefix( 'data-' ),
+			'Accessing attributes by their prefix did not return their lowercase names'
+		);
+	}
+
+	/**
+	 * @ticket 61365
+	 *
+	 * @covers WP_XML_Processor::get_attribute_names_with_prefix
+	 */
+	public function test_get_attribute_names_with_prefix_returns_attribute_added_by_set_attribute() {
+		$processor = new WP_XML_Processor( '<wp:content data-foo="bar">Test</wp:content>' );
+		$processor->next_tag();
+		$processor->set_attribute( 'data-test-id', '14' );
+
+		$this->assertSame(
+			'<wp:content data-test-id="14" data-foo="bar">Test</wp:content>',
+			$processor->get_updated_xml(),
+			"Updated XML doesn't include attribute added via set_attribute"
+		);
+		$this->assertSame(
+			array( 'data-test-id', 'data-foo' ),
+			$processor->get_attribute_names_with_prefix( 'data-' ),
+			"Accessing attribute names doesn't find attribute added via set_attribute"
+		);
+	}
+
+	/**
+	 * @ticket 61365
+	 *
+	 * @covers WP_XML_Processor::__toString
+	 */
+	public function test_to_string_returns_updated_xml() {
+		$processor = new WP_XML_Processor( '<line id="remove" /><wp:content enabled="yes" wp:post-type="test">Test</wp:content><wp:text id="span-id"></wp:text>' );
+		$processor->next_tag();
+		$processor->remove_attribute( 'id' );
+
+		$processor->next_tag();
+		$processor->set_attribute( 'id', 'wp:content-id-1' );
+
+		$this->assertSame(
+			$processor->get_updated_xml(),
+			(string) $processor,
+			'get_updated_xml() returned a different value than __toString()'
+		);
+	}
+
+	/**
+	 * @ticket 61365
+	 *
+	 * @covers WP_XML_Processor::get_updated_xml
+	 */
+	public function test_get_updated_xml_applies_the_updates_so_far_and_keeps_the_processor_on_the_current_tag() {
+		$processor = new WP_XML_Processor( '<line id="remove" /><wp:content enabled="yes" wp:post-type="test">Test</wp:content><wp:text id="span-id"></wp:text>' );
+		$processor->next_tag();
+		$processor->remove_attribute( 'id' );
+
+		$processor->next_tag();
+		$processor->set_attribute( 'id', 'wp:content-id-1' );
+
+		$this->assertSame(
+			'<line  /><wp:content id="wp:content-id-1" enabled="yes" wp:post-type="test">Test</wp:content><wp:text id="span-id"></wp:text>',
+			$processor->get_updated_xml(),
+			'Calling get_updated_xml after updating the attributes of the second tag returned different XML than expected'
+		);
+
+		$processor->set_attribute( 'id', 'wp:content-id-2' );
+
+		$this->assertSame(
+			'<line  /><wp:content id="wp:content-id-2" enabled="yes" wp:post-type="test">Test</wp:content><wp:text id="span-id"></wp:text>',
+			$processor->get_updated_xml(),
+			'Calling get_updated_xml after updating the attributes of the second tag for the second time returned different XML than expected'
+		);
+
+		$processor->next_tag();
+		$processor->remove_attribute( 'id' );
+
+		$this->assertSame(
+			'<line  /><wp:content id="wp:content-id-2" enabled="yes" wp:post-type="test">Test</wp:content><wp:text ></wp:text>',
+			$processor->get_updated_xml(),
+			'Calling get_updated_xml after removing the id attribute of the third tag returned different XML than expected'
+		);
+	}
+
+	/**
+	 * @ticket 61365
+	 *
+	 * @covers WP_XML_Processor::get_updated_xml
+	 */
+	public function test_get_updated_xml_without_updating_any_attributes_returns_the_original_xml() {
+		$processor = new WP_XML_Processor( self::XML_SIMPLE );
+
+		$this->assertSame(
+			self::XML_SIMPLE,
+			$processor->get_updated_xml(),
+			'Casting WP_XML_Processor to a string without performing any updates did not return the initial XML snippet'
+		);
+	}
+
+	/**
+	 * Ensures that when seeking to an earlier spot in the document that
+	 * all previously-enqueued updates are applied as they ought to be.
+	 *
+	 * @ticket 61365
+	 * @expectedIncorrectUsage WP_XML_Processor::parse_next_attribute
+	 */
+	public function test_get_updated_xml_applies_updates_to_content_after_seeking_to_before_parsed_bytes() {
+		$processor = new WP_XML_Processor( '<wp:content><photo hidden></wp:content>' );
+
+		$processor->next_tag();
+		$processor->set_attribute( 'wonky', 'true' );
+		$processor->next_tag();
+		$processor->set_bookmark( 'here' );
+
+		$processor->next_tag( array( 'tag_closers' => 'visit' ) );
+		$processor->seek( 'here' );
+
+		$this->assertSame( '<wp:content wonky="true"><photo hidden></wp:content>', $processor->get_updated_xml() );
+	}
+
+	public function test_declare_element_as_pcdata() {
+		$text      = '
+			This text contains syntax that may seem
+			like XML nodes:
+
+			<input />
+			</seemingly invalid element --/>
+			<!-- is this a comment? -->
+			<?xml version="1.0" ?>
+
+			&amp;&lt;&gt;&quot;&apos;
+
+			But! It is all treated as text.
+		';
+		$processor = new WP_XML_Processor(
+			"<root><my-pcdata>$text</my-pcdata></root>"
+		);
+		$processor->declare_element_as_pcdata( 'my-pcdata' );
+		$processor->next_tag( 'my-pcdata' );
+
+		$this->assertEquals(
+			$text,
+			$processor->get_modifiable_text(),
+			'get_modifiable_text() did not return the expected text'
+		);
+	}
+
+	/**
+	 * Ensures that bookmarks start and length correctly describe a given token in XML.
+	 *
+	 * @ticket 61365
+	 *
+	 * @dataProvider data_xml_nth_token_substring
+	 *
+	 * @param string $xml            Input XML.
+	 * @param int    $match_nth_token Which token to inspect from input XML.
+	 * @param string $expected_match  Expected full raw token bookmark should capture.
+	 */
+	public function test_token_bookmark_span( string $xml, int $match_nth_token, string $expected_match ) {
+		$processor = new class( $xml ) extends WP_XML_Processor {
+			/**
+			 * Returns the raw span of XML for the currently-matched
+			 * token, or null if not paused on any token.
+			 *
+			 * @return string|null Raw XML content of currently-matched token,
+			 *                     otherwise `null` if not matched.
+			 */
+			public function get_raw_token() {
+				if (
+					WP_XML_Processor::STATE_READY === $this->parser_state ||
+					WP_XML_Processor::STATE_INCOMPLETE_INPUT === $this->parser_state ||
+					WP_XML_Processor::STATE_COMPLETE === $this->parser_state
+				) {
+					return null;
+				}
+
+				$this->set_bookmark( 'mark' );
+				$mark = $this->bookmarks['mark'];
+
+				return substr( $this->xml, $mark->start, $mark->length );
+			}
+		};
+
+		for ( $i = 0; $i < $match_nth_token; $i++ ) {
+			$processor->next_token();
+		}
+
+		$raw_token = $processor->get_raw_token();
+		$this->assertIsString(
+			$raw_token,
+			"Failed to find raw token at position {$match_nth_token}: check test data provider."
+		);
+
+		$this->assertSame(
+			$expected_match,
+			$raw_token,
+			'Bookmarked wrong span of text for full matched token.'
+		);
+	}
+
+	/**
+	 * Data provider.
+	 *
+	 * @return array
+	 */
+	public static function data_xml_nth_token_substring() {
+		return array(
+			// Tags.
+			'DIV start tag'                 => array( '<wp:content>', 1, '<wp:content>' ),
+			'DIV start tag with attributes' => array( '<wp:content wp:post-type="x" disabled="yes">', 1, '<wp:content wp:post-type="x" disabled="yes">' ),
+			'Nested DIV'                    => array( '<wp:content><wp:content b="yes">', 2, '<wp:content b="yes">' ),
+			'Sibling DIV'                   => array( '<wp:content></wp:content><wp:content b="yes">', 3, '<wp:content b="yes">' ),
+			'DIV before text'               => array( '<wp:content> text', 1, '<wp:content>' ),
+			'DIV after comment'             => array( '<root><!-- comment --><wp:content>', 3, '<wp:content>' ),
+			'DIV before comment'            => array( '<wp:content><!-- c --> ', 1, '<wp:content>' ),
+			'Start "self-closing" tag'      => array( '<wp:content />', 1, '<wp:content />' ),
+			'Void tag'                      => array( '<photo src="img.png">', 1, '<photo src="img.png">' ),
+			'Void tag w/self-closing flag'  => array( '<photo src="img.png" />', 1, '<photo src="img.png" />' ),
+			'Void tag inside DIV'           => array( '<wp:content><photo src="img.png"></wp:content>', 2, '<photo src="img.png">' ),
+
+			// Text.
+			'Text'                          => array( 'Just text</data>', 1, 'Just text' ),
+			'Text in DIV'                   => array( '<wp:content>Text<wp:content>', 2, 'Text' ),
+			'Text before DIV'               => array( 'Text<wp:content>', 1, 'Text' ),
+			'Text after comment'            => array( '<!-- comment -->Text<!-- c -->', 2, 'Text' ),
+			'Text before comment'           => array( 'Text<!-- c --> ', 1, 'Text' ),
+
+			// Comments.
+			'Comment'                       => array( '<!-- comment -->', 1, '<!-- comment -->' ),
+			'Comment in DIV'                => array( '<wp:content><!-- comment --><wp:content>', 2, '<!-- comment -->' ),
+			'Comment before DIV'            => array( '<!-- comment --><wp:content>', 1, '<!-- comment -->' ),
+			'Comment after DIV'             => array( '<wp:content></wp:content><!-- comment -->', 3, '<!-- comment -->' ),
+			'Comment after comment'         => array( '<!-- comment --><!-- comment -->', 2, '<!-- comment -->' ),
+			'Comment before comment'        => array( '<!-- comment --><!-- c --> ', 1, '<!-- comment -->' ),
+			'Empty comment'                 => array( '<!---->', 1, '<!---->' ),
+		);
+	}
+
+	/**
+	 * @ticket 61365
+	 *
+	 * @covers WP_XML_Processor::next_tag
+	 */
+	public function test_next_tag_with_no_arguments_should_find_the_next_existing_tag() {
+		$processor = new WP_XML_Processor( self::XML_SIMPLE );
+
+		$this->assertTrue( $processor->next_tag(), 'Querying an existing tag did not return true' );
+	}
+
+	/**
+	 * @ticket 61365
+	 *
+	 * @covers WP_XML_Processor::next_tag
+	 */
+	public function test_next_tag_should_return_false_for_a_non_existing_tag() {
+		$processor = new WP_XML_Processor( self::XML_SIMPLE );
+
+		$this->assertFalse( $processor->next_tag( 'p' ), 'Querying a non-existing tag did not return false' );
+	}
+
+	/**
+	 * @ticket 61365
+	 *
+	 * @covers WP_XML_Processor::get_modifiable_text
+	 */
+	public function test_normalizes_carriage_returns_in_text_nodes() {
+		$processor = new WP_XML_Processor(
+			"<wp:content>We are\rnormalizing\r\n\nthe\n\r\r\r\ncarriage returns</wp:content>"
+		);
+		$processor->next_tag();
+		$processor->next_token();
+		$this->assertEquals(
+			"We are\nnormalizing\n\nthe\n\n\n\ncarriage returns",
+			$processor->get_modifiable_text(),
+			'get_raw_token() did not normalize the carriage return characters'
+		);
+	}
+
+	/**
+	 * @ticket 61365
+	 *
+	 * @covers WP_XML_Processor::get_modifiable_text
+	 */
+	public function test_normalizes_carriage_returns_in_cdata() {
+		$processor = new WP_XML_Processor(
+			"<wp:content><![CDATA[We are\rnormalizing\r\n\nthe\n\r\r\r\ncarriage returns]]>"
+		);
+		$processor->next_tag();
+		$processor->next_token();
+		$this->assertEquals(
+			"We are\nnormalizing\n\nthe\n\n\n\ncarriage returns",
+			$processor->get_modifiable_text(),
+			'get_raw_token() did not normalize the carriage return characters'
+		);
+	}
+
+	/**
+	 * @ticket 61365
+	 * @ticket 61365
+	 *
+	 * @covers WP_XML_Processor::next_tag
+	 * @covers WP_XML_Processor::is_tag_closer
+	 */
+	public function test_next_tag_should_not_stop_on_closers() {
+		$processor = new WP_XML_Processor( '<wp:content><photo /></wp:content>' );
+
+		$this->assertTrue( $processor->next_tag( array( 'breadcrumbs' => array( 'wp:content' ) ) ), 'Did not find desired tag opener' );
+		$this->assertFalse( $processor->next_tag( array( 'breadcrumbs' => array( 'wp:content' ) ) ), 'Visited an unwanted tag, a tag closer' );
+	}
+
+	/**
+	 * Verifies that updates to a document before calls to `get_updated_xml()` don't
+	 * lead to the Tag Processor jumping to the wrong tag after the updates.
+	 *
+	 * @ticket 61365
+	 *
+	 * @covers WP_XML_Processor::get_updated_xml
+	 */
+	public function test_internal_pointer_returns_to_original_spot_after_inserting_content_before_cursor() {
+		$tags = new WP_XML_Processor( '<root><wp:content>outside</wp:content><section><wp:content><photo>inside</wp:content></section></root>' );
+
+		$tags->next_tag();
+		$tags->next_tag();
+		$tags->set_attribute( 'wp:post-type', 'foo' );
+		$tags->next_tag( 'section' );
+
+		// Return to this spot after moving ahead.
+		$tags->set_bookmark( 'here' );
+
+		// Move ahead.
+		$tags->next_tag( 'photo' );
+		$tags->seek( 'here' );
+		$this->assertSame( '<root><wp:content wp:post-type="foo">outside</wp:content><section><wp:content><photo>inside</wp:content></section></root>', $tags->get_updated_xml() );
+		$this->assertSame( 'section', $tags->get_tag() );
+		$this->assertFalse( $tags->is_tag_closer() );
+	}
+
+	/**
+	 * @ticket 61365
+	 *
+	 * @covers WP_XML_Processor::set_attribute
+	 */
+	public function test_set_attribute_on_a_non_existing_tag_does_not_change_the_markup() {
+		$processor = new WP_XML_Processor( self::XML_SIMPLE );
+
+		$this->assertFalse( $processor->next_tag( 'p' ), 'Querying a non-existing tag did not return false' );
+		$this->assertFalse( $processor->next_tag( 'wp:content' ), 'Querying a non-existing tag did not return false' );
+
+		$processor->set_attribute( 'id', 'primary' );
+
+		$this->assertSame(
+			self::XML_SIMPLE,
+			$processor->get_updated_xml(),
+			'Calling get_updated_xml after updating a non-existing tag returned an XML that was different from the original XML'
+		);
+	}
+
+	/**
+	 * @ticket 61365
+	 *
+	 * @covers WP_XML_Processor::set_attribute
+	 * @covers WP_XML_Processor::remove_attribute
+	 * @covers WP_XML_Processor::add_class
+	 * @covers WP_XML_Processor::remove_class
+	 */
+	public function test_attribute_ops_on_tag_closer_do_not_change_the_markup() {
+		$processor = new WP_XML_Processor( '<wp:content id="3"></wp:content>' );
+		$processor->next_token();
+		$this->assertFalse( $processor->is_tag_closer(), 'Skipped tag opener' );
+
+		$processor->next_token();
+		$this->assertTrue( $processor->is_tag_closer(), 'Skipped tag closer' );
+		$this->assertFalse( $processor->set_attribute( 'id', 'test' ), "Allowed setting an attribute on a tag closer when it shouldn't have" );
+		$this->assertFalse( $processor->remove_attribute( 'invalid-id' ), "Allowed removing an attribute on a tag closer when it shouldn't have" );
+		$this->assertSame(
+			'<wp:content id="3"></wp:content>',
+			$processor->get_updated_xml(),
+			'Calling get_updated_xml after updating a non-existing tag returned an XML that was different from the original XML'
+		);
+	}
+
+
+	/**
+	 * @ticket 61365
+	 *
+	 * @covers WP_XML_Processor::set_attribute
+	 */
+	public function test_set_attribute_with_a_non_existing_attribute_adds_a_new_attribute_to_the_markup() {
+		$processor = new WP_XML_Processor( self::XML_SIMPLE );
+		$processor->next_tag();
+		$processor->set_attribute( 'test-attribute', 'test-value' );
+
+		$this->assertSame(
+			'<wp:content test-attribute="test-value" id="first"><wp:text id="second">Text</wp:text></wp:content>',
+			$processor->get_updated_xml(),
+			'Updated XML does not include attribute added via set_attribute()'
+		);
+		$this->assertSame(
+			'test-value',
+			$processor->get_attribute( 'test-attribute' ),
+			'get_attribute() (called after get_updated_xml()) did not return attribute added via set_attribute()'
+		);
+	}
+
+	/**
+	 * @ticket 61365
+	 *
+	 * @covers WP_XML_Processor::get_attribute
+	 */
+	public function test_get_attribute_returns_updated_values_before_they_are_applied() {
+		$processor = new WP_XML_Processor( self::XML_SIMPLE );
+		$processor->next_tag();
+		$processor->set_attribute( 'test-attribute', 'test-value' );
+
+		$this->assertSame(
+			'test-value',
+			$processor->get_attribute( 'test-attribute' ),
+			'get_attribute() (called before get_updated_xml()) did not return attribute added via set_attribute()'
+		);
+		$this->assertSame(
+			'<wp:content test-attribute="test-value" id="first"><wp:text id="second">Text</wp:text></wp:content>',
+			$processor->get_updated_xml(),
+			'Updated XML does not include attribute added via set_attribute()'
+		);
+	}
+
+	/**
+	 * @ticket 61365
+	 *
+	 * @covers WP_XML_Processor::get_attribute
+	 */
+	public function test_get_attribute_returns_updated_values_before_they_are_applied_with_different_name_casing() {
+		$processor = new WP_XML_Processor( self::XML_SIMPLE );
+		$processor->next_tag();
+		$processor->set_attribute( 'test-ATTribute', 'test-value' );
+
+		$this->assertSame(
+			'test-value',
+			$processor->get_attribute( 'test-ATTribute' ),
+			'get_attribute() (called before get_updated_xml()) did not return attribute added via set_attribute()'
+		);
+		$this->assertSame(
+			'<wp:content test-ATTribute="test-value" id="first"><wp:text id="second">Text</wp:text></wp:content>',
+			$processor->get_updated_xml(),
+			'Updated XML does not include attribute added via set_attribute()'
+		);
+	}
+
+
+	/**
+	 * @ticket 61365
+	 *
+	 * @covers WP_XML_Processor::get_attribute
+	 */
+	public function test_get_attribute_reflects_removed_attribute_before_it_is_applied() {
+		$processor = new WP_XML_Processor( self::XML_SIMPLE );
+		$processor->next_tag();
+		$processor->remove_attribute( 'id' );
+
+		$this->assertNull(
+			$processor->get_attribute( 'id' ),
+			'get_attribute() (called before get_updated_xml()) returned attribute that was removed by remove_attribute()'
+		);
+		$this->assertSame(
+			'<wp:content ><wp:text id="second">Text</wp:text></wp:content>',
+			$processor->get_updated_xml(),
+			'Updated XML includes attribute that was removed by remove_attribute()'
+		);
+	}
+
+	/**
+	 * @ticket 61365
+	 *
+	 * @covers WP_XML_Processor::get_attribute
+	 */
+	public function test_get_attribute_reflects_adding_and_then_removing_an_attribute_before_those_updates_are_applied() {
+		$processor = new WP_XML_Processor( self::XML_SIMPLE );
+		$processor->next_tag();
+		$processor->set_attribute( 'test-attribute', 'test-value' );
+		$processor->remove_attribute( 'test-attribute' );
+
+		$this->assertNull(
+			$processor->get_attribute( 'test-attribute' ),
+			'get_attribute() (called before get_updated_xml()) returned attribute that was added via set_attribute() and then removed by remove_attribute()'
+		);
+		$this->assertSame(
+			self::XML_SIMPLE,
+			$processor->get_updated_xml(),
+			'Updated XML includes attribute that was added via set_attribute() and then removed by remove_attribute()'
+		);
+	}
+
+	/**
+	 * @ticket 61365
+	 *
+	 * @covers WP_XML_Processor::get_attribute
+	 */
+	public function test_get_attribute_reflects_setting_and_then_removing_an_existing_attribute_before_those_updates_are_applied() {
+		$processor = new WP_XML_Processor( self::XML_SIMPLE );
+		$processor->next_tag();
+		$processor->set_attribute( 'id', 'test-value' );
+		$processor->remove_attribute( 'id' );
+
+		$this->assertNull(
+			$processor->get_attribute( 'id' ),
+			'get_attribute() (called before get_updated_xml()) returned attribute that was overwritten by set_attribute() and then removed by remove_attribute()'
+		);
+		$this->assertSame(
+			'<wp:content ><wp:text id="second">Text</wp:text></wp:content>',
+			$processor->get_updated_xml(),
+			'Updated XML includes attribute that was overwritten by set_attribute() and then removed by remove_attribute()'
+		);
+	}
+
+	/**
+	 * @ticket 61365
+	 *
+	 * @covers WP_XML_Processor::set_attribute
+	 */
+	public function test_set_attribute_with_an_existing_attribute_name_updates_its_value_in_the_markup() {
+		$processor = new WP_XML_Processor( self::XML_SIMPLE );
+		$processor->next_tag();
+		$processor->set_attribute( 'id', 'new-id' );
+		$this->assertSame(
+			'<wp:content id="new-id"><wp:text id="second">Text</wp:text></wp:content>',
+			$processor->get_updated_xml(),
+			'Existing attribute was not updated'
+		);
+	}
+
+	/**
+	 * Ensures that when setting an attribute multiple times that only
+	 * one update flushes out into the updated XML.
+	 *
+	 * @ticket 61365
+	 *
+	 * @covers WP_XML_Processor::set_attribute
+	 */
+	public function test_set_attribute_with_case_variants_updates_only_the_original_first_copy() {
+		$processor = new WP_XML_Processor( '<wp:content data-enabled="5">' );
+		$processor->next_tag();
+		$processor->set_attribute( 'data-enabled', 'canary1' );
+		$processor->set_attribute( 'data-enabled', 'canary2' );
+		$processor->set_attribute( 'data-enabled', 'canary3' );
+
+		$this->assertSame( '<wp:content data-enabled="canary3">', strtolower( $processor->get_updated_xml() ) );
+	}
+
+	/**
+	 * @ticket 61365
+	 *
+	 * @covers WP_XML_Processor::next_tag
+	 * @covers WP_XML_Processor::set_attribute
+	 */
+	public function test_next_tag_and_set_attribute_in_a_loop_update_all_tags_in_the_markup() {
+		$processor = new WP_XML_Processor( self::XML_SIMPLE );
+		while ( $processor->next_tag() ) {
+			$processor->set_attribute( 'data-foo', 'bar' );
+		}
+
+		$this->assertSame(
+			'<wp:content data-foo="bar" id="first"><wp:text data-foo="bar" id="second">Text</wp:text></wp:content>',
+			$processor->get_updated_xml(),
+			'Not all tags were updated when looping with next_tag() and set_attribute()'
+		);
+	}
+
+	/**
+	 * @ticket 61365
+	 *
+	 * @covers WP_XML_Processor::remove_attribute
+	 */
+	public function test_remove_attribute_with_an_existing_attribute_name_removes_it_from_the_markup() {
+		$processor = new WP_XML_Processor( self::XML_SIMPLE );
+		$processor->next_tag();
+		$processor->remove_attribute( 'id' );
+
+		$this->assertSame(
+			'<wp:content ><wp:text id="second">Text</wp:text></wp:content>',
+			$processor->get_updated_xml(),
+			'Attribute was not removed'
+		);
+	}
+
+	/**
+	 * @ticket 61365
+	 *
+	 * @covers WP_XML_Processor::remove_attribute
+	 */
+	public function test_remove_attribute_with_a_non_existing_attribute_name_does_not_change_the_markup() {
+		$processor = new WP_XML_Processor( self::XML_SIMPLE );
+		$processor->next_tag();
+		$processor->remove_attribute( 'no-such-attribute' );
+
+		$this->assertSame(
+			self::XML_SIMPLE,
+			$processor->get_updated_xml(),
+			'Content was changed when attempting to remove an attribute that did not exist'
+		);
+	}
+
+	/**
+	 * @ticket 61365
+	 *
+	 * @covers WP_XML_Processor::next_tag
+	 */
+	public function test_correctly_parses_xml_attributes_wrapped_in_single_quotation_marks() {
+		$processor = new WP_XML_Processor(
+			'<wp:content id=\'first\'><wp:text id=\'second\'>Text</wp:text></wp:content>'
+		);
+		$processor->next_tag(
+			array(
+				'breadcrumbs' => array( 'wp:content' ),
+				'id'          => 'first',
+			)
+		);
+		$processor->remove_attribute( 'id' );
+		$processor->next_tag(
+			array(
+				'breadcrumbs' => array( 'wp:text' ),
+				'id'          => 'second',
+			)
+		);
+		$processor->set_attribute( 'id', 'single-quote' );
+		$this->assertSame(
+			'<wp:content ><wp:text id="single-quote">Text</wp:text></wp:content>',
+			$processor->get_updated_xml(),
+			'Did not remove single-quoted attribute'
+		);
+	}
+
+	/**
+	 * @ticket 61365
+	 * @expectedIncorrectUsage WP_XML_Processor::parse_next_attribute
+	 * @expectedIncorrectUsage WP_XML_Processor::set_attribute
+	 *
+	 * @covers WP_XML_Processor::set_attribute
+	 */
+	public function test_setting_an_attribute_to_false_is_rejected() {
+		$processor = new WP_XML_Processor(
+			'<form action="/action_page.php"><input checked type="checkbox" name="vehicle" value="Bike"><label for="vehicle">I have a bike</label></form>'
+		);
+		$processor->next_tag( 'input' );
+		$this->assertFalse(
+			$processor->set_attribute( 'checked', false ),
+			'Accepted a boolean attribute name.'
+		);
+	}
+
+	/**
+	 * @ticket 61365
+	 * @expectedIncorrectUsage WP_XML_Processor::set_attribute
+	 *
+	 * @covers WP_XML_Processor::set_attribute
+	 */
+	public function test_setting_a_missing_attribute_to_false_does_not_change_the_markup() {
+		$xml_input = '<form action="/action_page.php"><input type="checkbox" name="vehicle" value="Bike"><label for="vehicle">I have a bike</label></form>';
+		$processor = new WP_XML_Processor( $xml_input );
+		$processor->next_tag( 'input' );
+		$processor->set_attribute( 'checked', false );
+		$this->assertSame(
+			$xml_input,
+			$processor->get_updated_xml(),
+			'Changed the markup unexpectedly when setting a non-existing attribute to false'
+		);
+	}
+
+	/**
+	 * Ensures that unclosed and invalid comments trigger warnings or errors.
+	 *
+	 * @ticket 61365
+	 *
+	 * @covers WP_XML_Processor::next_tag
+	 * @covers WP_XML_Processor::paused_at_incomplete_token
+	 *
+	 * @dataProvider data_xml_with_unclosed_comments
+	 *
+	 * @param string $xml_ending_before_comment_close XML with opened comments that aren't closed.
+	 */
+	public function test_documents_may_end_with_unclosed_comment( $xml_ending_before_comment_close ) {
+		$processor = new WP_XML_Processor( $xml_ending_before_comment_close );
+
+		$this->assertFalse(
+			$processor->next_tag(),
+			"Should not have found any tag, but found {$processor->get_tag()}."
+		);
+
+		$this->assertTrue(
+			$processor->is_paused_at_incomplete_input(),
+			"Should have indicated that the parser found an incomplete token but didn't."
+		);
+	}
+
+	/**
+	 * Data provider.
+	 *
+	 * @return array[]
+	 */
+	public static function data_xml_with_unclosed_comments() {
+		return array(
+			'Shortest open valid comment' => array( '<!--' ),
+			'Basic truncated comment'     => array( '<!-- this ends --' ),
+		);
+	}
+
+	/**
+	 * Ensures that the processor doesn't attempt to match an incomplete token.
+	 *
+	 * @ticket 61365
+	 *
+	 * @covers WP_XML_Processor::next_tag
+	 * @covers WP_XML_Processor::paused_at_incomplete_token
+	 *
+	 * @dataProvider data_incomplete_syntax_elements
+	 *
+	 * @param string $incomplete_xml XML text containing some kind of incomplete syntax.
+	 */
+	public function test_next_tag_returns_false_for_incomplete_syntax_elements( $incomplete_xml ) {
+		$processor = new WP_XML_Processor( $incomplete_xml );
+
+		$processor->next_tag();
+		$this->assertFalse(
+			$processor->next_tag(),
+			"Shouldn't have found any tags but found {$processor->get_tag()}."
+		);
+
+		$this->assertTrue(
+			$processor->is_paused_at_incomplete_input(),
+			"Should have indicated that the parser found an incomplete token but didn't."
+		);
+	}
+
+	/**
+	 * Data provider.
+	 *
+	 * @return array[]
+	 */
+	public static function data_incomplete_syntax_elements() {
+		return array(
+			'Incomplete tag name'                         => array( '<root><swit' ),
+			'Incomplete tag (no attributes)'              => array( '<root><wp:content' ),
+			'Incomplete tag (attributes)'                 => array( '<root><wp:content inert="yes" title="test"' ),
+			'Incomplete attribute (before =)'             => array( '<root><button disabled' ),
+			'Incomplete attribute (before ")'             => array( '<root><button disabled=' ),
+			'Incomplete attribute (before closing quote)' => array( '<root><button disabled="value started' ),
+			'Incomplete attribute (single quoted)'        => array( "<root><li wp:post-type='just-another class" ),
+			'Incomplete attribute (double quoted)'        => array( '<root><iframe src="https://www.example.com/embed/abcdef' ),
+			'Incomplete comment (normative)'              => array( '<root><!-- without end' ),
+			'Incomplete comment (missing --)'             => array( '<root><!-- without end --' ),
+			'Incomplete CDATA'                            => array( '<root><![CDATA[something inside of here needs to get out' ),
+			'Partial CDATA'                               => array( '<root><![CDA' ),
+			'Partially closed CDATA]'                     => array( '<root><![CDATA[cannot escape]' ),
+		);
+	}
+
+	/**
+	 * Ensures that the processor doesn't attempt to match an incomplete text node.
+	 *
+	 * @ticket 61365
+	 *
+	 * @covers WP_XML_Processor::next_tag
+	 * @covers WP_XML_Processor::paused_at_incomplete_token
+	 *
+	 * @dataProvider data_incomplete_text_nodes
+	 *
+	 * @param string $incomplete_xml XML text containing some kind of incomplete syntax.
+	 */
+	public function test_next_tag_returns_false_for_incomplete_text_nodes( $incomplete_xml, $node_at = 1 ) {
+		$processor = new WP_XML_Processor( $incomplete_xml );
+
+		for ( $i = 0; $i < $node_at; $i++ ) {
+			$this->assertTrue(
+				$processor->next_token(),
+				"Failed to find text node {$i} in incomplete XML."
+			);
+		}
+
+		$this->assertFalse(
+			$processor->next_token(),
+			"Shouldn't have found any more text nodes but found '{$processor->get_modifiable_text()}'."
+		);
+
+		$this->assertTrue(
+			$processor->is_paused_at_incomplete_input(),
+			"Should have indicated that the parser found an incomplete token but didn't."
+		);
+	}
+
+	/**
+	 * Data provider.
+	 *
+	 * @return array[]
+	 */
+	public static function data_incomplete_text_nodes() {
+		return array(
+			'Incomplete text node after a tag'   => array( '<data>This is a text node', 1 ),
+			'Incomplete text node after (CDATA)' => array( '<data>This is a text node<![CDATA[ and this is a second text node ]]> and this is the third text node.', 3 ),
+		);
+	}
+
+	/**
+	 * The string " -- " (double-hyphen) must not occur within comments.
+	 *
+	 * @expectedIncorrectUsage WP_XML_Processor::parse_next_tag
+	 * @covers WP_XML_Processor::next_tag
+	 */
+	public function test_rejects_malformed_comments() {
+		$processor = new WP_XML_Processor( '<!-- comment -- oh, I did not close it after the initial double dash -->' );
+		$this->assertFalse( $processor->next_token(), 'Did not reject a malformed XML comment.' );
+	}
+
+	/**
+	 * @covers WP_XML_Processor::next_tag
+	 */
+	public function test_handles_malformed_taglike_open_short_xml() {
+		$processor = new WP_XML_Processor( '<' );
+		$result    = $processor->next_tag();
+		$this->assertFalse( $result, 'Did not handle "<" xml properly.' );
+	}
+
+	/**
+	 * @covers WP_XML_Processor::next_tag
+	 */
+	public function test_handles_malformed_taglike_close_short_xml() {
+		$processor = new WP_XML_Processor( '</ ' );
+		$result    = $processor->next_tag();
+		$this->assertFalse( $result, 'Did not handle "</ " xml properly.' );
+	}
+
+	/**
+	 * @expectedIncorrectUsage WP_XML_Processor::base_class_next_token
+	 * @covers WP_XML_Processor::next_tag
+	 */
+	public function test_rejects_empty_element_that_is_also_a_closer() {
+		$processor = new WP_XML_Processor( '</wp:content/> ' );
+		$result    = $processor->next_tag();
+		$this->assertFalse( $result, 'Did not handle "</wp:content/>" xml properly.' );
+	}
+
+	/**
+	 * Ensures that non-tag syntax starting with `<` is rejected.
+	 *
+	 * @ticket 61365
+	 */
+	public function test_single_text_node_with_taglike_text() {
+		$processor = new WP_XML_Processor( '<root>This is a text node< /A>' );
+		$this->assertTrue( $processor->next_token(), 'A root node was not found.' );
+		$this->assertTrue( $processor->next_token(), 'A valid text node was not found.' );
+		$this->assertEquals( 'This is a text node', $processor->get_modifiable_text(), 'The contents of a valid text node were not correctly captured.' );
+		$this->assertFalse( $processor->next_tag(), 'A malformed XML markup was not rejected.' );
+	}
+
+	/**
+	 * Ensures that non-tag syntax starting with `<` is rejected.
+	 *
+	 * @ticket 61365
+	 */
+	public function test_parses_CDATA() {
+		$processor = new WP_XML_Processor( '<root><![CDATA[This is a CDATA text node.]]></root>' );
+		$processor->next_tag();
+		$this->assertTrue( $processor->next_token(), 'The first text node was not found.' );      $this->assertEquals(
+			'This is a CDATA text node.',
+			$processor->get_modifiable_text(),
+			'The contents of a a CDATA text node were not correctly captured.'
+		);
+	}
+
+	/**
+	 * @ticket 61365
+	 */
+	public function test_yields_CDATA_a_separate_text_node() {
+		$processor = new WP_XML_Processor( '<root>This is the first text node <![CDATA[ and this is a second text node ]]> and this is the third text node.</root>' );
+
+		$processor->next_token();
+		$this->assertTrue( $processor->next_token(), 'The first text node was not found.' );
+		$this->assertEquals(
+			'This is the first text node ',
+			$processor->get_modifiable_text(),
+			'The contents of a valid text node were not correctly captured.'
+		);
+
+		$this->assertTrue( $processor->next_token(), 'The CDATA text node was not found.' );
+		$this->assertEquals(
+			' and this is a second text node ',
+			$processor->get_modifiable_text(),
+			'The contents of a a CDATA text node were not correctly captured.'
+		);
+
+		$this->assertTrue( $processor->next_token(), 'The text node was not found.' );
+		$this->assertEquals(
+			' and this is the third text node.',
+			$processor->get_modifiable_text(),
+			'The contents of a valid text node were not correctly captured.'
+		);
+	}
+
+	/**
+	 *
+	 * @ticket 61365
+	 */
+	public function test_xml_declaration() {
+		$processor = new WP_XML_Processor( '<?xml version="1.0" encoding="UTF-8" ?>' );
+		$this->assertTrue( $processor->next_token(), 'The XML declaration was not found.' );
+		$this->assertEquals(
+			'#xml-declaration',
+			$processor->get_token_type(),
+			'The XML declaration was not correctly identified.'
+		);
+		$this->assertEquals( '1.0', $processor->get_attribute( 'version' ), 'The version attribute was not correctly captured.' );
+		$this->assertEquals( 'UTF-8', $processor->get_attribute( 'encoding' ), 'The encoding attribute was not correctly captured.' );
+	}
+
+	/**
+	 *
+	 * @ticket 61365
+	 */
+	public function test_xml_declaration_with_single_quotes() {
+		$processor = new WP_XML_Processor( "<?xml version='1.0' encoding='UTF-8' ?>" );
+		$this->assertTrue( $processor->next_token(), 'The XML declaration was not found.' );
+		$this->assertEquals(
+			'#xml-declaration',
+			$processor->get_token_type(),
+			'The XML declaration was not correctly identified.'
+		);
+		$this->assertEquals( '1.0', $processor->get_attribute( 'version' ), 'The version attribute was not correctly captured.' );
+		$this->assertEquals( 'UTF-8', $processor->get_attribute( 'encoding' ), 'The encoding attribute was not correctly captured.' );
+	}
+
+	/**
+	 *
+	 * @ticket 61365
+	 */
+	public function test_processor_instructions() {
+		$processor = new WP_XML_Processor(
+			// The first <?xml tag is an xml declaration.
+			'<?xml version="1.0" encoding="UTF-8" ?>' .
+			// The second <?xml tag is a processing instruction.
+			'<?xml stylesheet type="text/xsl" href="style.xsl" ?>'
+		);
+		$this->assertTrue( $processor->next_token(), 'The XML declaration was not found.' );
+		$this->assertTrue( $processor->next_token(), 'The processing instruction was not found.' );
+		$this->assertEquals(
+			'#processing-instructions',
+			$processor->get_token_type(),
+			'The processing instruction was not correctly identified.'
+		);
+		$this->assertEquals( ' stylesheet type="text/xsl" href="style.xsl" ', $processor->get_modifiable_text(), 'The modifiable text was not correctly captured.' );
+	}
+
+	/**
+	 * Ensures that updates which are enqueued in front of the cursor
+	 * are applied before moving forward in the document.
+	 *
+	 * @ticket 61365
+	 */
+	public function test_applies_updates_before_proceeding() {
+		$xml = '<root><wp:content><photo/></wp:content><wp:content><photo/></wp:content></root>';
+
+		$subclass = new class( $xml ) extends WP_XML_Processor {
+			/**
+			 * Inserts raw text after the current token.
+			 *
+			 * @param string $new_xml Raw text to insert.
+			 */
+			public function insert_after( $new_xml ) {
+				$this->set_bookmark( 'here' );
+				$this->lexical_updates[] = new WP_HTML_Text_Replacement(
+					$this->bookmarks['here']->start + $this->bookmarks['here']->length,
+					0,
+					$new_xml
+				);
+			}
+		};
+
+		$subclass->next_tag( 'photo' );
+		$subclass->insert_after( '<p>snow-capped</p>' );
+
+		$subclass->next_tag();
+		$this->assertSame(
+			'p',
+			$subclass->get_tag(),
+			'Should have matched inserted XML as next tag.'
+		);
+
+		$subclass->next_tag( 'photo' );
+		$subclass->set_attribute( 'alt', 'mountain' );
+
+		$this->assertSame(
+			'<root><wp:content><photo/><p>snow-capped</p></wp:content><wp:content><photo alt="mountain"/></wp:content></root>',
+			$subclass->get_updated_xml(),
+			'Should have properly applied the update from in front of the cursor.'
+		);
+	}
+
 
 	/**
 	 * @ticket 61365
@@ -134,7 +1510,7 @@ public function test_no_text_allowed_after_root_element() {
 		$this->assertTrue( $processor->next_tag(), 'Did not find a tag.' );
 		$this->assertFalse( $processor->next_tag(), 'Found a non-existent tag.' );
 		$this->assertEquals(
-			WP_XML_Tag_Processor::ERROR_SYNTAX,
+			WP_XML_Processor::ERROR_SYNTAX,
 			$processor->get_last_error(),
 			'Did not run into a parse error after the root element'
 		);
@@ -183,7 +1559,7 @@ public function test_elements_not_allowed_after_root_element() {
 		$this->assertTrue( $processor->next_tag(), 'Did not find a tag.' );
 		$this->assertFalse( $processor->next_tag(), 'Fount an illegal tag.' );
 		$this->assertEquals(
-			WP_XML_Tag_Processor::ERROR_SYNTAX,
+			WP_XML_Processor::ERROR_SYNTAX,
 			$processor->get_last_error(),
 			'Did not run into a parse error after the root element'
 		);
@@ -212,7 +1588,7 @@ public function test_cdata_not_allowed_after_root_element() {
 		$this->assertTrue( $processor->next_tag(), 'Did not find a tag.' );
 		$this->assertFalse( $processor->next_tag(), 'Did not reject a comment node after the root element' );
 		$this->assertEquals(
-			WP_XML_Tag_Processor::ERROR_SYNTAX,
+			WP_XML_Processor::ERROR_SYNTAX,
 			$processor->get_last_error(),
 			'Did not run into a parse error after the root element'
 		);
@@ -249,4 +1625,4 @@ public function test_unclosed_root_yields_incomplete_input() {
 		}
 		$this->assertTrue( $processor->is_paused_at_incomplete_input(), 'Did not indicate that the XML input was incomplete.' );
 	}
-}
+}
\ No newline at end of file
diff --git a/packages/playground/data-liberation/tests/WPXMLTagProcessorTests.php b/packages/playground/data-liberation/tests/WPXMLTagProcessorTests.php
deleted file mode 100644
index c336371ec8..0000000000
--- a/packages/playground/data-liberation/tests/WPXMLTagProcessorTests.php
+++ /dev/null
@@ -1,1426 +0,0 @@
-<?php
-/**
- * Unit tests covering WP_XML_Tag_Processor functionality.
- *
- * @package WordPress
- * @subpackage XML-API
- */
-use PHPUnit\Framework\TestCase;
-
-/**
- * @group xml-api
- *
- * @coversDefaultClass WP_XML_Tag_Processor
- */
-class WPXMLTagProcessorTests extends TestCase {
-	const XML_SIMPLE       = '<wp:content id="first"><wp:text id="second">Text</wp:text></wp:content>';
-	const XML_WITH_CLASSES = '<wp:content wp:post-type="main with-border" id="first"><wp:text wp:post-type="not-main bold with-border" id="second">Text</wp:text></wp:content>';
-	const XML_MALFORMED    = '<wp:content><wp:text wp:post-type="d-md-none" Notifications</wp:text><wp:text wp:post-type="d-none d-md-inline">Back to notifications</wp:text></wp:content>';
-
-	/**
-	 * @ticket 61365
-	 *
-	 * @covers WP_XML_Tag_Processor::get_tag
-	 */
-	public function test_get_tag_returns_null_before_finding_tags() {
-		$processor = new WP_XML_Tag_Processor( '<wp:content>Test</wp:content>' );
-
-		$this->assertNull( $processor->get_tag(), 'Calling get_tag() without selecting a tag did not return null' );
-	}
-
-	/**
-	 * @ticket 61365
-	 *
-	 * @covers WP_XML_Tag_Processor::get_tag
-	 */
-	public function test_get_tag_returns_null_when_not_in_open_tag() {
-		$processor = new WP_XML_Tag_Processor( '<wp:content>Test</wp:content>' );
-
-		$this->assertFalse( $processor->next_tag( 'p' ), 'Querying a non-existing tag did not return false' );
-		$this->assertNull( $processor->get_tag(), 'Accessing a non-existing tag did not return null' );
-	}
-
-	/**
-	 * @ticket 61365
-	 *
-	 * @covers WP_XML_Tag_Processor::get_tag
-	 */
-	public function test_get_tag_returns_open_tag_name() {
-		$processor = new WP_XML_Tag_Processor( '<wp:content>Test</wp:content>' );
-
-		$this->assertTrue( $processor->next_tag( 'wp:content' ), 'Querying an existing tag did not return true' );
-		$this->assertSame( 'wp:content', $processor->get_tag(), 'Accessing an existing tag name did not return "div"' );
-	}
-
-	/**
-	 * @ticket 61365
-	 *
-	 * @covers WP_XML_Tag_Processor::is_empty_element
-	 *
-	 * @dataProvider data_is_empty_element
-	 *
-	 * @param string $xml Input XML whose first tag might contain the self-closing flag `/`.
-	 * @param bool $flag_is_set Whether the input XML's first tag contains the self-closing flag.
-	 */
-	public function test_is_empty_element_matches_input_xml( $xml, $flag_is_set ) {
-		$processor = new WP_XML_Tag_Processor( $xml );
-		$processor->next_tag( array( 'tag_closers' => 'visit' ) );
-
-		if ( $flag_is_set ) {
-			$this->assertTrue( $processor->is_empty_element(), 'Did not find the empty element tag when it was present.' );
-		} else {
-			$this->assertFalse( $processor->is_empty_element(), 'Found the empty element tag when it was absent.' );
-		}
-	}
-
-	/**
-	 * Data provider. XML tags which might have a self-closing flag, and an indicator if they do.
-	 *
-	 * @return array[]
-	 */
-	public static function data_is_empty_element() {
-		return array(
-			// These should not have a self-closer, and will leave an element un-closed if it's assumed they are self-closing.
-			'Self-closing flag on non-void XML element'    => array( '<wp:content />', true ),
-			'No self-closing flag on non-void XML element' => array( '<wp:content>', false ),
-			// These should not have a self-closer, but are benign when used because the elements are void.
-			'Self-closing flag on void XML element'        => array( '<photo />', true ),
-			'No self-closing flag on void XML element'     => array( '<photo>', false ),
-			'Self-closing flag on void XML element without spacing' => array( '<photo/>', true ),
-			// These should not have a self-closer, but as part of a tag closer they are entirely ignored.
-			'No self-closing flag on tag closer'           => array( '</textarea>', false ),
-			// These can and should have self-closers, and will leave an element un-closed if it's assumed they aren't self-closing.
-			'Self-closing flag on a foreign element'       => array( '<circle />', true ),
-			'No self-closing flag on a foreign element'    => array( '<circle>', false ),
-			// These involve syntax peculiarities.
-			'Self-closing flag after extra spaces'         => array( '<wp:content      />', true ),
-			'Self-closing flag after quoted attribute'     => array( '<wp:content id="test"/>', true ),
-		);
-	}
-
-	/**
-	 * @ticket 61365
-	 *
-	 * @covers WP_XML_Tag_Processor::get_attribute
-	 */
-	public function test_get_attribute_returns_null_when_not_in_open_tag() {
-		$processor = new WP_XML_Tag_Processor( '<wp:content wp:post-type="test">Test</wp:content>' );
-
-		$this->assertFalse( $processor->next_tag( 'p' ), 'Querying a non-existing tag did not return false' );
-		$this->assertNull( $processor->get_attribute( 'wp:post-type' ), 'Accessing an attribute of a non-existing tag did not return null' );
-	}
-
-	/**
-	 * @ticket 61365
-	 *
-	 * @covers WP_XML_Tag_Processor::get_attribute
-	 */
-	public function test_get_attribute_returns_null_when_in_closing_tag() {
-		$processor = new WP_XML_Tag_Processor( '<wp:content wp:post-type="test">Test</wp:content>' );
-
-		$this->assertTrue( $processor->next_tag( 'wp:content' ), 'Querying an existing tag did not return true' );
-		$this->assertTrue( $processor->next_tag( array( 'tag_closers' => 'visit' ) ), 'Querying an existing closing tag did not return true' );
-		$this->assertNull( $processor->get_attribute( 'wp:post-type' ), 'Accessing an attribute of a closing tag did not return null' );
-	}
-
-	/**
-	 * @ticket 61365
-	 *
-	 * @covers WP_XML_Tag_Processor::get_attribute
-	 */
-	public function test_get_attribute_returns_null_when_attribute_missing() {
-		$processor = new WP_XML_Tag_Processor( '<wp:content wp:post-type="test">Test</wp:content>' );
-
-		$this->assertTrue( $processor->next_tag( 'wp:content' ), 'Querying an existing tag did not return true' );
-		$this->assertNull( $processor->get_attribute( 'test-id' ), 'Accessing a non-existing attribute did not return null' );
-	}
-
-	/**
-	 * @ticket 61365
-	 *
-	 * @expectedIncorrectUsage WP_XML_Tag_Processor::base_class_next_token
-	 * @covers WP_XML_Tag_Processor::get_attribute
-	 */
-	public function test_attributes_are_rejected_in_tag_closers() {
-		$processor = new WP_XML_Tag_Processor( '<wp:content>Test</wp:content wp:post-type="test">' );
-
-		$this->assertTrue( $processor->next_tag( 'wp:content' ), 'Querying an existing tag did not return true' );
-		$this->assertFalse( $processor->next_tag( array( 'tag_closers' => 'visit' ) ), 'Querying an existing but invalid closing tag did not return false.' );
-	}
-
-	/**
-	 * @ticket 61365
-	 *
-	 * @covers WP_XML_Tag_Processor::get_attribute
-	 */
-	public function test_get_attribute_returns_attribute_value() {
-		$processor = new WP_XML_Tag_Processor( '<wp:content wp:post-type="test">Test</wp:content>' );
-
-		$this->assertTrue( $processor->next_tag( 'wp:content' ), 'Querying an existing tag did not return true' );
-		$this->assertSame( 'test', $processor->get_attribute( 'wp:post-type' ), 'Accessing a wp:post-type="test" attribute value did not return "test"' );
-	}
-
-	/**
-	 * @ticket 61365
-	 * @expectedIncorrectUsage WP_XML_Tag_Processor::parse_next_attribute
-	 *
-	 * @covers WP_XML_Tag_Processor::get_attribute
-	 */
-	public function test_parsing_stops_on_malformed_attribute_value_no_value() {
-		$processor = new WP_XML_Tag_Processor( '<wp:content enabled wp:post-type="test">Test</wp:content>' );
-
-		$this->assertFalse( $processor->next_tag(), 'Querying a malformed start tag did not return false' );
-	}
-
-	/**
-	 * @ticket 61365
-	 * @expectedIncorrectUsage WP_XML_Tag_Processor::parse_next_attribute
-	 *
-	 * @covers WP_XML_Tag_Processor::get_attribute
-	 */
-	public function test_parsing_stops_on_malformed_attribute_value_no_quotes() {
-		$processor = new WP_XML_Tag_Processor( '<wp:content enabled=1 wp:post-type="test">Test</wp:content>' );
-
-		$this->assertFalse( $processor->next_tag(), 'Querying a malformed start tag did not return false' );
-	}
-
-	/**
-	 * @ticket 61365
-	 * @expectedIncorrectUsage WP_XML_Tag_Processor::get_attribute
-	 *
-	 * @covers WP_XML_Tag_Processor::get_attribute
-	 */
-	public function test_malformed_attribute_value_containing_ampersand_is_treated_as_plaintext() {
-		$processor = new WP_XML_Tag_Processor( '<wp:content enabled="WordPress & WordPress">Test</wp:content>' );
-
-		$this->assertTrue( $processor->next_tag(), 'Querying a tag did not return true' );
-        $this->assertEquals('WordPress & WordPress', $processor->get_attribute('enabled'));
-	}
-
-	/**
-	 * @ticket 61365
-	 * @expectedIncorrectUsage WP_XML_Tag_Processor::get_attribute
-	 *
-	 * @covers WP_XML_Tag_Processor::get_attribute
-	 */
-	public function test_malformed_attribute_value_containing_entity_without_semicolon_is_treated_as_plaintext() {
-		$processor = new WP_XML_Tag_Processor( '<wp:content enabled="&#x94">Test</wp:content>' );
-
-		$this->assertTrue( $processor->next_tag(), 'Querying a tag did not return true' );
-		$this->assertEquals('&#x94', $processor->get_attribute('enabled'));
-	}
-
-	/**
-	 * @ticket 61365
-	 * @expectedIncorrectUsage WP_XML_Tag_Processor::parse_next_attribute
-	 *
-	 * @covers WP_XML_Tag_Processor::get_attribute
-	 */
-	public function test_parsing_stops_on_malformed_attribute_value_contains_lt_character() {
-		$processor = new WP_XML_Tag_Processor( '<wp:content enabled="I love <3 this">Test</wp:content>' );
-
-		$this->assertFalse( $processor->next_tag(), 'Querying a malformed start tag did not return false' );
-	}
-
-	/**
-	 * @ticket 61365
-	 * @expectedIncorrectUsage WP_XML_Tag_Processor::parse_next_attribute
-	 *
-	 * @covers WP_XML_Tag_Processor::get_attribute
-	 */
-	public function test_parsing_stops_on_malformed_tags_duplicate_attributes() {
-		$processor = new WP_XML_Tag_Processor( '<wp:content id="update-me" id="ignored-id"><wp:text id="second">Text</wp:text></wp:content>' );
-
-		$this->assertFalse( $processor->next_tag() );
-	}
-
-	/**
-	 * @ticket 61365
-	 * @expectedIncorrectUsage WP_XML_Tag_Processor::parse_next_attribute
-	 *
-	 * @covers WP_XML_Tag_Processor::get_attribute
-	 */
-	public function test_parsing_stops_on_malformed_attribute_name_contains_slash() {
-		$processor = new WP_XML_Tag_Processor( '<wp:content a/b="test">Test</wp:content>' );
-
-		$this->assertFalse( $processor->next_tag(), 'Querying a malformed start tag did not return false' );
-	}
-
-	/**
-	 * @ticket 61365
-	 *
-	 * @covers WP_XML_Tag_Processor::get_attribute
-	 */
-	public function test_get_modifiable_text_returns_a_decoded_value() {
-		$processor = new WP_XML_Tag_Processor( '<root>&#x201C;&#x1f604;&#x201D;</root>' );
-
-		$processor->next_tag( 'root' );
-		$processor->next_token();
-
-		$this->assertEquals(
-			'“😄”',
-			$processor->get_modifiable_text(),
-			'Reading an encoded text did not decode it.'
-		);
-	}
-
-	/**
-	 * @ticket 61365
-	 *
-	 * @covers WP_XML_Tag_Processor::get_attribute
-	 */
-	public function test_get_attribute_returns_a_decoded_value() {
-		$processor = new WP_XML_Tag_Processor( '<root encoded-data="&#x201C;&#x1f604;&#x201D;"></root>' );
-
-		$this->assertTrue( $processor->next_tag( 'root' ), 'Querying a tag did not return true' );
-		$this->assertEquals(
-			'“😄”',
-			$processor->get_attribute( 'encoded-data' ),
-			'Reading an encoded attribute did not decode it.'
-		);
-	}
-
-	/**
-	 * @ticket 61365
-	 *
-	 * @covers WP_XML_Tag_Processor::get_attribute
-	 *
-	 * @param string $attribute_name Name of data-enabled attribute with case variations.
-	 */
-	public function test_get_attribute_is_case_sensitive() {
-		$processor = new WP_XML_Tag_Processor( '<wp:content DATA-enabled="true">Test</wp:content>' );
-		$processor->next_tag();
-
-		$this->assertEquals(
-			'true',
-			$processor->get_attribute( 'DATA-enabled' ),
-			'Accessing an attribute by a same-cased name did return not its value'
-		);
-
-		$this->assertNull(
-			$processor->get_attribute( 'data-enabled' ),
-			'Accessing an attribute by a differently-cased name did return its value'
-		);
-	}
-
-
-	/**
-	 * @ticket 61365
-	 *
-	 * @covers WP_XML_Tag_Processor::remove_attribute
-	 */
-	public function test_remove_attribute_is_case_sensitive() {
-		$processor = new WP_XML_Tag_Processor( '<wp:content DATA-enabled="true">Test</wp:content>' );
-		$processor->next_tag();
-		$processor->remove_attribute( 'data-enabled' );
-
-		$this->assertSame( '<wp:content DATA-enabled="true">Test</wp:content>', $processor->get_updated_xml(), 'A case-sensitive remove_attribute call did remove the attribute' );
-
-		$processor->remove_attribute( 'DATA-enabled' );
-
-		$this->assertSame( '<wp:content >Test</wp:content>', $processor->get_updated_xml(), 'A case-sensitive remove_attribute call did not remove the attribute' );
-	}
-
-	/**
-	 * @ticket 61365
-	 *
-	 * @covers WP_XML_Tag_Processor::set_attribute
-	 */
-	public function test_set_attribute_is_case_sensitive() {
-		$processor = new WP_XML_Tag_Processor( '<wp:content DATA-enabled="true">Test</wp:content>' );
-		$processor->next_tag();
-		$processor->set_attribute( 'data-enabled', 'abc' );
-
-		$this->assertSame( '<wp:content data-enabled="abc" DATA-enabled="true">Test</wp:content>', $processor->get_updated_xml(), 'A case-insensitive set_attribute call did not update the existing attribute' );
-	}
-
-	/**
-	 * @ticket 61365
-	 *
-	 * @covers WP_XML_Tag_Processor::get_attribute_names_with_prefix
-	 */
-	public function test_get_attribute_names_with_prefix_returns_null_before_finding_tags() {
-		$processor = new WP_XML_Tag_Processor( '<wp:content data-foo="bar">Test</wp:content>' );
-		$this->assertNull(
-			$processor->get_attribute_names_with_prefix( 'data-' ),
-			'Accessing attributes by their prefix did not return null when no tag was selected'
-		);
-	}
-
-	/**
-	 * @ticket 61365
-	 *
-	 * @covers WP_XML_Tag_Processor::get_attribute_names_with_prefix
-	 */
-	public function test_get_attribute_names_with_prefix_returns_null_when_not_in_open_tag() {
-		$processor = new WP_XML_Tag_Processor( '<wp:content data-foo="bar">Test</wp:content>' );
-		$processor->next_tag( 'p' );
-		$this->assertNull( $processor->get_attribute_names_with_prefix( 'data-' ), 'Accessing attributes of a non-existing tag did not return null' );
-	}
-
-	/**
-	 * @ticket 61365
-	 *
-	 * @covers WP_XML_Tag_Processor::get_attribute_names_with_prefix
-	 */
-	public function test_get_attribute_names_with_prefix_returns_null_when_in_closing_tag() {
-		$processor = new WP_XML_Tag_Processor( '<wp:content data-foo="bar">Test</wp:content>' );
-		$processor->next_tag( 'wp:content' );
-		$processor->next_tag( array( 'tag_closers' => 'visit' ) );
-
-		$this->assertNull( $processor->get_attribute_names_with_prefix( 'data-' ), 'Accessing attributes of a closing tag did not return null' );
-	}
-
-	/**
-	 * @ticket 61365
-	 *
-	 * @covers WP_XML_Tag_Processor::get_attribute_names_with_prefix
-	 */
-	public function test_get_attribute_names_with_prefix_returns_empty_array_when_no_attributes_present() {
-		$processor = new WP_XML_Tag_Processor( '<wp:content>Test</wp:content>' );
-		$processor->next_tag( 'wp:content' );
-
-		$this->assertSame( array(), $processor->get_attribute_names_with_prefix( 'data-' ), 'Accessing the attributes on a tag without any did not return an empty array' );
-	}
-
-	/**
-	 * @ticket 61365
-	 *
-	 * @covers WP_XML_Tag_Processor::get_attribute_names_with_prefix
-	 */
-	public function test_get_attribute_names_with_prefix_returns_matching_attribute_names_in_original_case() {
-		$processor = new WP_XML_Tag_Processor( '<wp:content DATA-enabled="yes" wp:post-type="test" data-test-ID="14">Test</wp:content>' );
-		$processor->next_tag();
-
-		$this->assertSame(
-			array( 'data-test-ID' ),
-			$processor->get_attribute_names_with_prefix( 'data-' ),
-			'Accessing attributes by their prefix did not return their lowercase names'
-		);
-	}
-
-	/**
-	 * @ticket 61365
-	 *
-	 * @covers WP_XML_Tag_Processor::get_attribute_names_with_prefix
-	 */
-	public function test_get_attribute_names_with_prefix_returns_attribute_added_by_set_attribute() {
-		$processor = new WP_XML_Tag_Processor( '<wp:content data-foo="bar">Test</wp:content>' );
-		$processor->next_tag();
-		$processor->set_attribute( 'data-test-id', '14' );
-
-		$this->assertSame(
-			'<wp:content data-test-id="14" data-foo="bar">Test</wp:content>',
-			$processor->get_updated_xml(),
-			"Updated XML doesn't include attribute added via set_attribute"
-		);
-		$this->assertSame(
-			array( 'data-test-id', 'data-foo' ),
-			$processor->get_attribute_names_with_prefix( 'data-' ),
-			"Accessing attribute names doesn't find attribute added via set_attribute"
-		);
-	}
-
-	/**
-	 * @ticket 61365
-	 *
-	 * @covers WP_XML_Tag_Processor::__toString
-	 */
-	public function test_to_string_returns_updated_xml() {
-		$processor = new WP_XML_Tag_Processor( '<line id="remove" /><wp:content enabled="yes" wp:post-type="test">Test</wp:content><wp:text id="span-id"></wp:text>' );
-		$processor->next_tag();
-		$processor->remove_attribute( 'id' );
-
-		$processor->next_tag();
-		$processor->set_attribute( 'id', 'wp:content-id-1' );
-
-		$this->assertSame(
-			$processor->get_updated_xml(),
-			(string) $processor,
-			'get_updated_xml() returned a different value than __toString()'
-		);
-	}
-
-	/**
-	 * @ticket 61365
-	 *
-	 * @covers WP_XML_Tag_Processor::get_updated_xml
-	 */
-	public function test_get_updated_xml_applies_the_updates_so_far_and_keeps_the_processor_on_the_current_tag() {
-		$processor = new WP_XML_Tag_Processor( '<line id="remove" /><wp:content enabled="yes" wp:post-type="test">Test</wp:content><wp:text id="span-id"></wp:text>' );
-		$processor->next_tag();
-		$processor->remove_attribute( 'id' );
-
-		$processor->next_tag();
-		$processor->set_attribute( 'id', 'wp:content-id-1' );
-
-		$this->assertSame(
-			'<line  /><wp:content id="wp:content-id-1" enabled="yes" wp:post-type="test">Test</wp:content><wp:text id="span-id"></wp:text>',
-			$processor->get_updated_xml(),
-			'Calling get_updated_xml after updating the attributes of the second tag returned different XML than expected'
-		);
-
-		$processor->set_attribute( 'id', 'wp:content-id-2' );
-
-		$this->assertSame(
-			'<line  /><wp:content id="wp:content-id-2" enabled="yes" wp:post-type="test">Test</wp:content><wp:text id="span-id"></wp:text>',
-			$processor->get_updated_xml(),
-			'Calling get_updated_xml after updating the attributes of the second tag for the second time returned different XML than expected'
-		);
-
-		$processor->next_tag();
-		$processor->remove_attribute( 'id' );
-
-		$this->assertSame(
-			'<line  /><wp:content id="wp:content-id-2" enabled="yes" wp:post-type="test">Test</wp:content><wp:text ></wp:text>',
-			$processor->get_updated_xml(),
-			'Calling get_updated_xml after removing the id attribute of the third tag returned different XML than expected'
-		);
-	}
-
-	/**
-	 * @ticket 61365
-	 *
-	 * @covers WP_XML_Tag_Processor::get_updated_xml
-	 */
-	public function test_get_updated_xml_without_updating_any_attributes_returns_the_original_xml() {
-		$processor = new WP_XML_Tag_Processor( self::XML_SIMPLE );
-
-		$this->assertSame(
-			self::XML_SIMPLE,
-			$processor->get_updated_xml(),
-			'Casting WP_XML_Tag_Processor to a string without performing any updates did not return the initial XML snippet'
-		);
-	}
-
-	/**
-	 * Ensures that when seeking to an earlier spot in the document that
-	 * all previously-enqueued updates are applied as they ought to be.
-	 *
-	 * @ticket 61365
-	 * @expectedIncorrectUsage WP_XML_Tag_Processor::parse_next_attribute
-	 */
-	public function test_get_updated_xml_applies_updates_to_content_after_seeking_to_before_parsed_bytes() {
-		$processor = new WP_XML_Tag_Processor( '<wp:content><photo hidden></wp:content>' );
-
-		$processor->next_tag();
-		$processor->set_attribute( 'wonky', 'true' );
-		$processor->next_tag();
-		$processor->set_bookmark( 'here' );
-
-		$processor->next_tag( array( 'tag_closers' => 'visit' ) );
-		$processor->seek( 'here' );
-
-		$this->assertSame( '<wp:content wonky="true"><photo hidden></wp:content>', $processor->get_updated_xml() );
-	}
-
-	public function test_declare_element_as_pcdata() {
-		$text      = '
-			This text contains syntax that may seem
-			like XML nodes:
-
-			<input />
-			</seemingly invalid element --/>
-			<!-- is this a comment? -->
-			<?xml version="1.0" ?>
-
-			&amp;&lt;&gt;&quot;&apos;
-
-			But! It is all treated as text.
-		';
-		$processor = new WP_XML_Tag_Processor(
-			"<root><my-pcdata>$text</my-pcdata></root>"
-		);
-		$processor->declare_element_as_pcdata( 'my-pcdata' );
-		$processor->next_tag( 'my-pcdata' );
-
-		$this->assertEquals(
-			$text,
-			$processor->get_modifiable_text(),
-			'get_modifiable_text() did not return the expected text'
-		);
-	}
-
-	/**
-	 * Ensures that bookmarks start and length correctly describe a given token in XML.
-	 *
-	 * @ticket 61365
-	 *
-	 * @dataProvider data_xml_nth_token_substring
-	 *
-	 * @param string $xml            Input XML.
-	 * @param int    $match_nth_token Which token to inspect from input XML.
-	 * @param string $expected_match  Expected full raw token bookmark should capture.
-	 */
-	public function test_token_bookmark_span( string $xml, int $match_nth_token, string $expected_match ) {
-		$processor = new class( $xml ) extends WP_XML_Tag_Processor {
-			/**
-			 * Returns the raw span of XML for the currently-matched
-			 * token, or null if not paused on any token.
-			 *
-			 * @return string|null Raw XML content of currently-matched token,
-			 *                     otherwise `null` if not matched.
-			 */
-			public function get_raw_token() {
-				if (
-					WP_XML_Tag_Processor::STATE_READY === $this->parser_state ||
-					WP_XML_Tag_Processor::STATE_INCOMPLETE_INPUT === $this->parser_state ||
-					WP_XML_Tag_Processor::STATE_COMPLETE === $this->parser_state
-				) {
-					return null;
-				}
-
-				$this->set_bookmark( 'mark' );
-				$mark = $this->bookmarks['mark'];
-
-				return substr( $this->xml, $mark->start, $mark->length );
-			}
-		};
-
-		for ( $i = 0; $i < $match_nth_token; $i++ ) {
-			$processor->next_token();
-		}
-
-		$raw_token = $processor->get_raw_token();
-		$this->assertIsString(
-			$raw_token,
-			"Failed to find raw token at position {$match_nth_token}: check test data provider."
-		);
-
-		$this->assertSame(
-			$expected_match,
-			$raw_token,
-			'Bookmarked wrong span of text for full matched token.'
-		);
-	}
-
-	/**
-	 * Data provider.
-	 *
-	 * @return array
-	 */
-	public static function data_xml_nth_token_substring() {
-		return array(
-			// Tags.
-			'DIV start tag'                 => array( '<wp:content>', 1, '<wp:content>' ),
-			'DIV start tag with attributes' => array( '<wp:content wp:post-type="x" disabled="yes">', 1, '<wp:content wp:post-type="x" disabled="yes">' ),
-			'Nested DIV'                    => array( '<wp:content><wp:content b="yes">', 2, '<wp:content b="yes">' ),
-			'Sibling DIV'                   => array( '<wp:content></wp:content><wp:content b="yes">', 3, '<wp:content b="yes">' ),
-			'DIV after text'                => array( 'text <wp:content>', 2, '<wp:content>' ),
-			'DIV before text'               => array( '<wp:content> text', 1, '<wp:content>' ),
-			'DIV after comment'             => array( '<root><!-- comment --><wp:content>', 3, '<wp:content>' ),
-			'DIV before comment'            => array( '<wp:content><!-- c --> ', 1, '<wp:content>' ),
-			'Start "self-closing" tag'      => array( '<wp:content />', 1, '<wp:content />' ),
-			'Void tag'                      => array( '<photo src="img.png">', 1, '<photo src="img.png">' ),
-			'Void tag w/self-closing flag'  => array( '<photo src="img.png" />', 1, '<photo src="img.png" />' ),
-			'Void tag inside DIV'           => array( '<wp:content><photo src="img.png"></wp:content>', 2, '<photo src="img.png">' ),
-
-			// Text.
-			'Text'                          => array( 'Just text</data>', 1, 'Just text' ),
-			'Text in DIV'                   => array( '<wp:content>Text<wp:content>', 2, 'Text' ),
-			'Text before DIV'               => array( 'Text<wp:content>', 1, 'Text' ),
-			'Text after comment'            => array( '<!-- comment -->Text<!-- c -->', 2, 'Text' ),
-			'Text before comment'           => array( 'Text<!-- c --> ', 1, 'Text' ),
-
-			// Comments.
-			'Comment'                       => array( '<!-- comment -->', 1, '<!-- comment -->' ),
-			'Comment in DIV'                => array( '<wp:content><!-- comment --><wp:content>', 2, '<!-- comment -->' ),
-			'Comment before DIV'            => array( '<!-- comment --><wp:content>', 1, '<!-- comment -->' ),
-			'Comment after DIV'             => array( '<wp:content></wp:content><!-- comment -->', 3, '<!-- comment -->' ),
-			'Comment after comment'         => array( '<!-- comment --><!-- comment -->', 2, '<!-- comment -->' ),
-			'Comment before comment'        => array( '<!-- comment --><!-- c --> ', 1, '<!-- comment -->' ),
-			'Empty comment'                 => array( '<!---->', 1, '<!---->' ),
-		);
-	}
-
-	/**
-	 * @ticket 61365
-	 *
-	 * @covers WP_XML_Tag_Processor::next_tag
-	 */
-	public function test_next_tag_with_no_arguments_should_find_the_next_existing_tag() {
-		$processor = new WP_XML_Tag_Processor( self::XML_SIMPLE );
-
-		$this->assertTrue( $processor->next_tag(), 'Querying an existing tag did not return true' );
-	}
-
-	/**
-	 * @ticket 61365
-	 *
-	 * @covers WP_XML_Tag_Processor::next_tag
-	 */
-	public function test_next_tag_should_return_false_for_a_non_existing_tag() {
-		$processor = new WP_XML_Tag_Processor( self::XML_SIMPLE );
-
-		$this->assertFalse( $processor->next_tag( 'p' ), 'Querying a non-existing tag did not return false' );
-	}
-
-	/**
-	 * @ticket 61365
-	 *
-	 * @covers WP_XML_Tag_Processor::get_modifiable_text
-	 */
-	public function test_normalizes_carriage_returns_in_text_nodes() {
-		$processor = new WP_XML_Tag_Processor(
-			"<wp:content>We are\rnormalizing\r\n\nthe\n\r\r\r\ncarriage returns</wp:content>"
-		);
-		$processor->next_tag();
-		$processor->next_token();
-		$this->assertEquals(
-			"We are\nnormalizing\n\nthe\n\n\n\ncarriage returns",
-			$processor->get_modifiable_text(),
-			'get_raw_token() did not normalize the carriage return characters'
-		);
-	}
-
-	/**
-	 * @ticket 61365
-	 *
-	 * @covers WP_XML_Tag_Processor::get_modifiable_text
-	 */
-	public function test_normalizes_carriage_returns_in_cdata() {
-		$processor = new WP_XML_Tag_Processor(
-			"<wp:content><![CDATA[We are\rnormalizing\r\n\nthe\n\r\r\r\ncarriage returns]]>"
-		);
-		$processor->next_tag();
-		$processor->next_token();
-		$this->assertEquals(
-			"We are\nnormalizing\n\nthe\n\n\n\ncarriage returns",
-			$processor->get_modifiable_text(),
-			'get_raw_token() did not normalize the carriage return characters'
-		);
-	}
-
-	/**
-	 * @ticket 61365
-	 * @ticket 61365
-	 *
-	 * @covers WP_XML_Tag_Processor::next_tag
-	 * @covers WP_XML_Tag_Processor::is_tag_closer
-	 */
-	public function test_next_tag_should_stop_on_closers_only_when_requested() {
-		$processor = new WP_XML_Tag_Processor( '<wp:content><photo /></wp:content>' );
-
-		$this->assertTrue( $processor->next_tag( array( 'tag_name' => 'wp:content' ) ), 'Did not find desired tag opener' );
-		$this->assertFalse( $processor->next_tag( array( 'tag_name' => 'wp:content' ) ), 'Visited an unwanted tag, a tag closer' );
-
-		$processor = new WP_XML_Tag_Processor( '<wp:content><photo /></wp:content>' );
-		$processor->next_tag(
-			array(
-				'tag_name'    => 'wp:content',
-				'tag_closers' => 'visit',
-			)
-		);
-
-		$this->assertFalse( $processor->is_tag_closer(), 'Indicated a tag opener is a tag closer' );
-		$this->assertTrue(
-			$processor->next_tag(
-				array(
-					'tag_name'    => 'wp:content',
-					'tag_closers' => 'visit',
-				)
-			),
-			'Did not stop at desired tag closer'
-		);
-		$this->assertTrue( $processor->is_tag_closer(), 'Indicated a tag closer is a tag opener' );
-
-		$processor = new WP_XML_Tag_Processor( '<wp:content>' );
-		$this->assertTrue( $processor->next_tag( array( 'tag_closers' => 'visit' ) ), "Did not find a tag opener when tag_closers was set to 'visit'" );
-		$this->assertFalse( $processor->next_tag( array( 'tag_closers' => 'visit' ) ), "Found a closer where there wasn't one" );
-	}
-
-	/**
-	 * Verifies that updates to a document before calls to `get_updated_xml()` don't
-	 * lead to the Tag Processor jumping to the wrong tag after the updates.
-	 *
-	 * @ticket 61365
-	 *
-	 * @covers WP_XML_Tag_Processor::get_updated_xml
-	 */
-	public function test_internal_pointer_returns_to_original_spot_after_inserting_content_before_cursor() {
-		$tags = new WP_XML_Tag_Processor( '<root><wp:content>outside</wp:content><section><wp:content><photo>inside</wp:content></section></root>' );
-
-		$tags->next_tag();
-		$tags->next_tag();
-		$tags->set_attribute( 'wp:post-type', 'foo' );
-		$tags->next_tag( 'section' );
-
-		// Return to this spot after moving ahead.
-		$tags->set_bookmark( 'here' );
-
-		// Move ahead.
-		$tags->next_tag( 'photo' );
-		$tags->seek( 'here' );
-		$this->assertSame( '<root><wp:content wp:post-type="foo">outside</wp:content><section><wp:content><photo>inside</wp:content></section></root>', $tags->get_updated_xml() );
-		$this->assertSame( 'section', $tags->get_tag() );
-		$this->assertFalse( $tags->is_tag_closer() );
-	}
-
-	/**
-	 * @ticket 61365
-	 *
-	 * @covers WP_XML_Tag_Processor::set_attribute
-	 */
-	public function test_set_attribute_on_a_non_existing_tag_does_not_change_the_markup() {
-		$processor = new WP_XML_Tag_Processor( self::XML_SIMPLE );
-
-		$this->assertFalse( $processor->next_tag( 'p' ), 'Querying a non-existing tag did not return false' );
-		$this->assertFalse( $processor->next_tag( 'wp:content' ), 'Querying a non-existing tag did not return false' );
-
-		$processor->set_attribute( 'id', 'primary' );
-
-		$this->assertSame(
-			self::XML_SIMPLE,
-			$processor->get_updated_xml(),
-			'Calling get_updated_xml after updating a non-existing tag returned an XML that was different from the original XML'
-		);
-	}
-
-	/**
-	 * @ticket 61365
-	 *
-	 * @covers WP_XML_Tag_Processor::set_attribute
-	 * @covers WP_XML_Tag_Processor::remove_attribute
-	 * @covers WP_XML_Tag_Processor::add_class
-	 * @covers WP_XML_Tag_Processor::remove_class
-	 */
-	public function test_attribute_ops_on_tag_closer_do_not_change_the_markup() {
-		$processor = new WP_XML_Tag_Processor( '<wp:content id="3"></wp:content>' );
-		$processor->next_tag(
-			array(
-				'tag_name'    => 'wp:content',
-				'tag_closers' => 'visit',
-			)
-		);
-
-		$this->assertFalse( $processor->is_tag_closer(), 'Skipped tag opener' );
-
-		$processor->next_tag(
-			array(
-				'tag_name'    => 'wp:content',
-				'tag_closers' => 'visit',
-			)
-		);
-
-		$this->assertTrue( $processor->is_tag_closer(), 'Skipped tag closer' );
-		$this->assertFalse( $processor->set_attribute( 'id', 'test' ), "Allowed setting an attribute on a tag closer when it shouldn't have" );
-		$this->assertFalse( $processor->remove_attribute( 'invalid-id' ), "Allowed removing an attribute on a tag closer when it shouldn't have" );
-		$this->assertSame(
-			'<wp:content id="3"></wp:content>',
-			$processor->get_updated_xml(),
-			'Calling get_updated_xml after updating a non-existing tag returned an XML that was different from the original XML'
-		);
-	}
-
-
-	/**
-	 * @ticket 61365
-	 *
-	 * @covers WP_XML_Tag_Processor::set_attribute
-	 */
-	public function test_set_attribute_with_a_non_existing_attribute_adds_a_new_attribute_to_the_markup() {
-		$processor = new WP_XML_Tag_Processor( self::XML_SIMPLE );
-		$processor->next_tag();
-		$processor->set_attribute( 'test-attribute', 'test-value' );
-
-		$this->assertSame(
-			'<wp:content test-attribute="test-value" id="first"><wp:text id="second">Text</wp:text></wp:content>',
-			$processor->get_updated_xml(),
-			'Updated XML does not include attribute added via set_attribute()'
-		);
-		$this->assertSame(
-			'test-value',
-			$processor->get_attribute( 'test-attribute' ),
-			'get_attribute() (called after get_updated_xml()) did not return attribute added via set_attribute()'
-		);
-	}
-
-	/**
-	 * @ticket 61365
-	 *
-	 * @covers WP_XML_Tag_Processor::get_attribute
-	 */
-	public function test_get_attribute_returns_updated_values_before_they_are_applied() {
-		$processor = new WP_XML_Tag_Processor( self::XML_SIMPLE );
-		$processor->next_tag();
-		$processor->set_attribute( 'test-attribute', 'test-value' );
-
-		$this->assertSame(
-			'test-value',
-			$processor->get_attribute( 'test-attribute' ),
-			'get_attribute() (called before get_updated_xml()) did not return attribute added via set_attribute()'
-		);
-		$this->assertSame(
-			'<wp:content test-attribute="test-value" id="first"><wp:text id="second">Text</wp:text></wp:content>',
-			$processor->get_updated_xml(),
-			'Updated XML does not include attribute added via set_attribute()'
-		);
-	}
-
-	/**
-	 * @ticket 61365
-	 *
-	 * @covers WP_XML_Tag_Processor::get_attribute
-	 */
-	public function test_get_attribute_returns_updated_values_before_they_are_applied_with_different_name_casing() {
-		$processor = new WP_XML_Tag_Processor( self::XML_SIMPLE );
-		$processor->next_tag();
-		$processor->set_attribute( 'test-ATTribute', 'test-value' );
-
-		$this->assertSame(
-			'test-value',
-			$processor->get_attribute( 'test-ATTribute' ),
-			'get_attribute() (called before get_updated_xml()) did not return attribute added via set_attribute()'
-		);
-		$this->assertSame(
-			'<wp:content test-ATTribute="test-value" id="first"><wp:text id="second">Text</wp:text></wp:content>',
-			$processor->get_updated_xml(),
-			'Updated XML does not include attribute added via set_attribute()'
-		);
-	}
-
-
-	/**
-	 * @ticket 61365
-	 *
-	 * @covers WP_XML_Tag_Processor::get_attribute
-	 */
-	public function test_get_attribute_reflects_removed_attribute_before_it_is_applied() {
-		$processor = new WP_XML_Tag_Processor( self::XML_SIMPLE );
-		$processor->next_tag();
-		$processor->remove_attribute( 'id' );
-
-		$this->assertNull(
-			$processor->get_attribute( 'id' ),
-			'get_attribute() (called before get_updated_xml()) returned attribute that was removed by remove_attribute()'
-		);
-		$this->assertSame(
-			'<wp:content ><wp:text id="second">Text</wp:text></wp:content>',
-			$processor->get_updated_xml(),
-			'Updated XML includes attribute that was removed by remove_attribute()'
-		);
-	}
-
-	/**
-	 * @ticket 61365
-	 *
-	 * @covers WP_XML_Tag_Processor::get_attribute
-	 */
-	public function test_get_attribute_reflects_adding_and_then_removing_an_attribute_before_those_updates_are_applied() {
-		$processor = new WP_XML_Tag_Processor( self::XML_SIMPLE );
-		$processor->next_tag();
-		$processor->set_attribute( 'test-attribute', 'test-value' );
-		$processor->remove_attribute( 'test-attribute' );
-
-		$this->assertNull(
-			$processor->get_attribute( 'test-attribute' ),
-			'get_attribute() (called before get_updated_xml()) returned attribute that was added via set_attribute() and then removed by remove_attribute()'
-		);
-		$this->assertSame(
-			self::XML_SIMPLE,
-			$processor->get_updated_xml(),
-			'Updated XML includes attribute that was added via set_attribute() and then removed by remove_attribute()'
-		);
-	}
-
-	/**
-	 * @ticket 61365
-	 *
-	 * @covers WP_XML_Tag_Processor::get_attribute
-	 */
-	public function test_get_attribute_reflects_setting_and_then_removing_an_existing_attribute_before_those_updates_are_applied() {
-		$processor = new WP_XML_Tag_Processor( self::XML_SIMPLE );
-		$processor->next_tag();
-		$processor->set_attribute( 'id', 'test-value' );
-		$processor->remove_attribute( 'id' );
-
-		$this->assertNull(
-			$processor->get_attribute( 'id' ),
-			'get_attribute() (called before get_updated_xml()) returned attribute that was overwritten by set_attribute() and then removed by remove_attribute()'
-		);
-		$this->assertSame(
-			'<wp:content ><wp:text id="second">Text</wp:text></wp:content>',
-			$processor->get_updated_xml(),
-			'Updated XML includes attribute that was overwritten by set_attribute() and then removed by remove_attribute()'
-		);
-	}
-
-	/**
-	 * @ticket 61365
-	 *
-	 * @covers WP_XML_Tag_Processor::set_attribute
-	 */
-	public function test_set_attribute_with_an_existing_attribute_name_updates_its_value_in_the_markup() {
-		$processor = new WP_XML_Tag_Processor( self::XML_SIMPLE );
-		$processor->next_tag();
-		$processor->set_attribute( 'id', 'new-id' );
-		$this->assertSame(
-			'<wp:content id="new-id"><wp:text id="second">Text</wp:text></wp:content>',
-			$processor->get_updated_xml(),
-			'Existing attribute was not updated'
-		);
-	}
-
-	/**
-	 * Ensures that when setting an attribute multiple times that only
-	 * one update flushes out into the updated XML.
-	 *
-	 * @ticket 61365
-	 *
-	 * @covers WP_XML_Tag_Processor::set_attribute
-	 */
-	public function test_set_attribute_with_case_variants_updates_only_the_original_first_copy() {
-		$processor = new WP_XML_Tag_Processor( '<wp:content data-enabled="5">' );
-		$processor->next_tag();
-		$processor->set_attribute( 'data-enabled', 'canary1' );
-		$processor->set_attribute( 'data-enabled', 'canary2' );
-		$processor->set_attribute( 'data-enabled', 'canary3' );
-
-		$this->assertSame( '<wp:content data-enabled="canary3">', strtolower( $processor->get_updated_xml() ) );
-	}
-
-	/**
-	 * @ticket 61365
-	 *
-	 * @covers WP_XML_Tag_Processor::next_tag
-	 * @covers WP_XML_Tag_Processor::set_attribute
-	 */
-	public function test_next_tag_and_set_attribute_in_a_loop_update_all_tags_in_the_markup() {
-		$processor = new WP_XML_Tag_Processor( self::XML_SIMPLE );
-		while ( $processor->next_tag() ) {
-			$processor->set_attribute( 'data-foo', 'bar' );
-		}
-
-		$this->assertSame(
-			'<wp:content data-foo="bar" id="first"><wp:text data-foo="bar" id="second">Text</wp:text></wp:content>',
-			$processor->get_updated_xml(),
-			'Not all tags were updated when looping with next_tag() and set_attribute()'
-		);
-	}
-
-	/**
-	 * @ticket 61365
-	 *
-	 * @covers WP_XML_Tag_Processor::remove_attribute
-	 */
-	public function test_remove_attribute_with_an_existing_attribute_name_removes_it_from_the_markup() {
-		$processor = new WP_XML_Tag_Processor( self::XML_SIMPLE );
-		$processor->next_tag();
-		$processor->remove_attribute( 'id' );
-
-		$this->assertSame(
-			'<wp:content ><wp:text id="second">Text</wp:text></wp:content>',
-			$processor->get_updated_xml(),
-			'Attribute was not removed'
-		);
-	}
-
-	/**
-	 * @ticket 61365
-	 *
-	 * @covers WP_XML_Tag_Processor::remove_attribute
-	 */
-	public function test_remove_attribute_with_a_non_existing_attribute_name_does_not_change_the_markup() {
-		$processor = new WP_XML_Tag_Processor( self::XML_SIMPLE );
-		$processor->next_tag();
-		$processor->remove_attribute( 'no-such-attribute' );
-
-		$this->assertSame(
-			self::XML_SIMPLE,
-			$processor->get_updated_xml(),
-			'Content was changed when attempting to remove an attribute that did not exist'
-		);
-	}
-
-	/**
-	 * @ticket 61365
-	 *
-	 * @covers WP_XML_Tag_Processor::next_tag
-	 */
-	public function test_correctly_parses_xml_attributes_wrapped_in_single_quotation_marks() {
-		$processor = new WP_XML_Tag_Processor(
-			'<wp:content id=\'first\'><wp:text id=\'second\'>Text</wp:text></wp:content>'
-		);
-		$processor->next_tag(
-			array(
-				'tag_name' => 'wp:content',
-				'id'       => 'first',
-			)
-		);
-		$processor->remove_attribute( 'id' );
-		$processor->next_tag(
-			array(
-				'tag_name' => 'wp:text',
-				'id'       => 'second',
-			)
-		);
-		$processor->set_attribute( 'id', 'single-quote' );
-		$this->assertSame(
-			'<wp:content ><wp:text id="single-quote">Text</wp:text></wp:content>',
-			$processor->get_updated_xml(),
-			'Did not remove single-quoted attribute'
-		);
-	}
-
-	/**
-	 * @ticket 61365
-	 * @expectedIncorrectUsage WP_XML_Tag_Processor::parse_next_attribute
-	 * @expectedIncorrectUsage WP_XML_Tag_Processor::set_attribute
-	 *
-	 * @covers WP_XML_Tag_Processor::set_attribute
-	 */
-	public function test_setting_an_attribute_to_false_is_rejected() {
-		$processor = new WP_XML_Tag_Processor(
-			'<form action="/action_page.php"><input checked type="checkbox" name="vehicle" value="Bike"><label for="vehicle">I have a bike</label></form>'
-		);
-		$processor->next_tag( 'input' );
-		$this->assertFalse(
-			$processor->set_attribute( 'checked', false ),
-			'Accepted a boolean attribute name.'
-		);
-	}
-
-	/**
-	 * @ticket 61365
-	 * @expectedIncorrectUsage WP_XML_Tag_Processor::set_attribute
-	 *
-	 * @covers WP_XML_Tag_Processor::set_attribute
-	 */
-	public function test_setting_a_missing_attribute_to_false_does_not_change_the_markup() {
-		$xml_input = '<form action="/action_page.php"><input type="checkbox" name="vehicle" value="Bike"><label for="vehicle">I have a bike</label></form>';
-		$processor = new WP_XML_Tag_Processor( $xml_input );
-		$processor->next_tag( 'input' );
-		$processor->set_attribute( 'checked', false );
-		$this->assertSame(
-			$xml_input,
-			$processor->get_updated_xml(),
-			'Changed the markup unexpectedly when setting a non-existing attribute to false'
-		);
-	}
-
-	/**
-	 * Ensures that unclosed and invalid comments trigger warnings or errors.
-	 *
-	 * @ticket 61365
-	 *
-	 * @covers WP_XML_Tag_Processor::next_tag
-	 * @covers WP_XML_Tag_Processor::paused_at_incomplete_token
-	 *
-	 * @dataProvider data_xml_with_unclosed_comments
-	 *
-	 * @param string $xml_ending_before_comment_close XML with opened comments that aren't closed.
-	 */
-	public function test_documents_may_end_with_unclosed_comment( $xml_ending_before_comment_close ) {
-		$processor = new WP_XML_Tag_Processor( $xml_ending_before_comment_close );
-
-		$this->assertFalse(
-			$processor->next_tag(),
-			"Should not have found any tag, but found {$processor->get_tag()}."
-		);
-
-		$this->assertTrue(
-			$processor->is_paused_at_incomplete_input(),
-			"Should have indicated that the parser found an incomplete token but didn't."
-		);
-	}
-
-	/**
-	 * Data provider.
-	 *
-	 * @return array[]
-	 */
-	public static function data_xml_with_unclosed_comments() {
-		return array(
-			'Shortest open valid comment' => array( '<!--' ),
-			'Basic truncated comment'     => array( '<!-- this ends --' ),
-		);
-	}
-
-	/**
-	 * Ensures that the processor doesn't attempt to match an incomplete token.
-	 *
-	 * @ticket 61365
-	 *
-	 * @covers WP_XML_Tag_Processor::next_tag
-	 * @covers WP_XML_Tag_Processor::paused_at_incomplete_token
-	 *
-	 * @dataProvider data_incomplete_syntax_elements
-	 *
-	 * @param string $incomplete_xml XML text containing some kind of incomplete syntax.
-	 */
-	public function test_next_tag_returns_false_for_incomplete_syntax_elements( $incomplete_xml ) {
-		$processor = new WP_XML_Tag_Processor( $incomplete_xml );
-
-		$this->assertFalse(
-			$processor->next_tag(),
-			"Shouldn't have found any tags but found {$processor->get_tag()}."
-		);
-
-		$this->assertTrue(
-			$processor->is_paused_at_incomplete_input(),
-			"Should have indicated that the parser found an incomplete token but didn't."
-		);
-	}
-
-	/**
-	 * Data provider.
-	 *
-	 * @return array[]
-	 */
-	public static function data_incomplete_syntax_elements() {
-		return array(
-			'Incomplete tag name'                         => array( '<swit' ),
-			'Incomplete tag (no attributes)'              => array( '<wp:content' ),
-			'Incomplete tag (attributes)'                 => array( '<wp:content inert="yes" title="test"' ),
-			'Incomplete attribute (before =)'             => array( '<button disabled' ),
-			'Incomplete attribute (before ")'             => array( '<button disabled=' ),
-			'Incomplete attribute (before closing quote)' => array( '<button disabled="value started' ),
-			'Incomplete attribute (single quoted)'        => array( "<li wp:post-type='just-another class" ),
-			'Incomplete attribute (double quoted)'        => array( '<iframe src="https://www.example.com/embed/abcdef' ),
-			'Incomplete comment (normative)'              => array( '<!-- without end' ),
-			'Incomplete comment (missing --)'             => array( '<!-- without end --' ),
-			'Incomplete CDATA'                            => array( '<![CDATA[something inside of here needs to get out' ),
-			'Partial CDATA'                               => array( '<![CDA' ),
-			'Partially closed CDATA]'                     => array( '<![CDATA[cannot escape]' ),
-		);
-	}
-
-	/**
-	 * Ensures that the processor doesn't attempt to match an incomplete text node.
-	 *
-	 * @ticket 61365
-	 *
-	 * @covers WP_XML_Tag_Processor::next_tag
-	 * @covers WP_XML_Tag_Processor::paused_at_incomplete_token
-	 *
-	 * @dataProvider data_incomplete_text_nodes
-	 *
-	 * @param string $incomplete_xml XML text containing some kind of incomplete syntax.
-	 */
-	public function test_next_tag_returns_false_for_incomplete_text_nodes( $incomplete_xml, $node_at = 1 ) {
-		$processor = new WP_XML_Tag_Processor( $incomplete_xml );
-
-		for ( $i = 0; $i < $node_at - 1; $i++ ) {
-			$this->assertTrue(
-				$processor->next_token(),
-				"Failed to find text node {$i} in incomplete XML."
-			);
-		}
-
-		$this->assertFalse(
-			$processor->next_token(),
-			"Shouldn't have found any more text nodes but found {$processor->get_modifiable_text()}."
-		);
-
-		$this->assertTrue(
-			$processor->is_paused_at_incomplete_input(),
-			"Should have indicated that the parser found an incomplete token but didn't."
-		);
-	}
-
-	/**
-	 * Data provider.
-	 *
-	 * @return array[]
-	 */
-	public static function data_incomplete_text_nodes() {
-		return array(
-			'Incomplete text node'               => array( 'This is a text node' ),
-			'Incomplete text node after a tag'   => array( '<data>This is a text node', 2 ),
-			'Incomplete text node after (CDATA)' => array( 'This is a text node<![CDATA[ and this is a second text node ]]> and this is the third text node.', 3 ),
-		);
-	}
-
-	/**
-	 * The string " -- " (double-hyphen) must not occur within comments.
-	 *
-	 * @expectedIncorrectUsage WP_XML_Tag_Processor::parse_next_tag
-	 * @covers WP_XML_Tag_Processor::next_tag
-	 */
-	public function test_rejects_malformed_comments() {
-		$processor = new WP_XML_Tag_Processor( '<!-- comment -- oh, I did not close it after the initial double dash -->' );
-		$this->assertFalse( $processor->next_token(), 'Did not reject a malformed XML comment.' );
-	}
-
-	/**
-	 * @covers WP_XML_Tag_Processor::next_tag
-	 */
-	public function test_handles_malformed_taglike_open_short_xml() {
-		$processor = new WP_XML_Tag_Processor( '<' );
-		$result    = $processor->next_tag();
-		$this->assertFalse( $result, 'Did not handle "<" xml properly.' );
-	}
-
-	/**
-	 * @covers WP_XML_Tag_Processor::next_tag
-	 */
-	public function test_handles_malformed_taglike_close_short_xml() {
-		$processor = new WP_XML_Tag_Processor( '</ ' );
-		$result    = $processor->next_tag();
-		$this->assertFalse( $result, 'Did not handle "</ " xml properly.' );
-	}
-
-	/**
-	 * @expectedIncorrectUsage WP_XML_Tag_Processor::base_class_next_token
-	 * @covers WP_XML_Tag_Processor::next_tag
-	 */
-	public function test_rejects_empty_element_that_is_also_a_closer() {
-		$processor = new WP_XML_Tag_Processor( '</wp:content/> ' );
-		$result    = $processor->next_tag();
-		$this->assertFalse( $result, 'Did not handle "</wp:content/>" xml properly.' );
-	}
-
-	/**
-	 * Ensures that non-tag syntax starting with `<` is rejected.
-	 *
-	 * @ticket 61365
-	 */
-	public function test_single_text_node_with_taglike_text() {
-		$processor = new WP_XML_Tag_Processor( 'This is a text node< /A>' );
-		$this->assertTrue( $processor->next_token(), 'A valid text node was not found.' );
-		$this->assertEquals( 'This is a text node', $processor->get_modifiable_text(), 'The contents of a valid text node were not correctly captured.' );
-		$this->assertFalse( $processor->next_tag(), 'A malformed XML markup was not rejected.' );
-	}
-
-	/**
-	 * Ensures that non-tag syntax starting with `<` is rejected.
-	 *
-	 * @ticket 61365
-	 */
-	public function test_parses_CDATA() {
-		$processor = new WP_XML_Tag_Processor( '<root><![CDATA[This is a CDATA text node.]]></root>' );
-		$processor->next_tag();
-		$this->assertTrue( $processor->next_token(), 'The first text node was not found.' );      $this->assertEquals(
-			'This is a CDATA text node.',
-			$processor->get_modifiable_text(),
-			'The contents of a a CDATA text node were not correctly captured.'
-		);
-	}
-
-	/**
-	 * @ticket 61365
-	 */
-	public function test_yields_CDATA_a_separate_text_node() {
-		$processor = new WP_XML_Tag_Processor( '<root>This is the first text node <![CDATA[ and this is a second text node ]]> and this is the third text node.</root>' );
-
-		$processor->next_token();
-		$this->assertTrue( $processor->next_token(), 'The first text node was not found.' );
-		$this->assertEquals(
-			'This is the first text node ',
-			$processor->get_modifiable_text(),
-			'The contents of a valid text node were not correctly captured.'
-		);
-
-		$this->assertTrue( $processor->next_token(), 'The CDATA text node was not found.' );
-		$this->assertEquals(
-			' and this is a second text node ',
-			$processor->get_modifiable_text(),
-			'The contents of a a CDATA text node were not correctly captured.'
-		);
-
-		$this->assertTrue( $processor->next_token(), 'The text node was not found.' );
-		$this->assertEquals(
-			' and this is the third text node.',
-			$processor->get_modifiable_text(),
-			'The contents of a valid text node were not correctly captured.'
-		);
-	}
-
-	/**
-	 *
-	 * @ticket 61365
-	 */
-	public function test_xml_declaration() {
-		$processor = new WP_XML_Tag_Processor( '<?xml version="1.0" encoding="UTF-8" ?>' );
-		$this->assertTrue( $processor->next_token(), 'The XML declaration was not found.' );
-		$this->assertEquals(
-			'#xml-declaration',
-			$processor->get_token_type(),
-			'The XML declaration was not correctly identified.'
-		);
-		$this->assertEquals( '1.0', $processor->get_attribute( 'version' ), 'The version attribute was not correctly captured.' );
-		$this->assertEquals( 'UTF-8', $processor->get_attribute( 'encoding' ), 'The encoding attribute was not correctly captured.' );
-	}
-
-	/**
-	 *
-	 * @ticket 61365
-	 */
-	public function test_xml_declaration_with_single_quotes() {
-		$processor = new WP_XML_Tag_Processor( "<?xml version='1.0' encoding='UTF-8' ?>" );
-		$this->assertTrue( $processor->next_token(), 'The XML declaration was not found.' );
-		$this->assertEquals(
-			'#xml-declaration',
-			$processor->get_token_type(),
-			'The XML declaration was not correctly identified.'
-		);
-		$this->assertEquals( '1.0', $processor->get_attribute( 'version' ), 'The version attribute was not correctly captured.' );
-		$this->assertEquals( 'UTF-8', $processor->get_attribute( 'encoding' ), 'The encoding attribute was not correctly captured.' );
-	}
-
-	/**
-	 *
-	 * @ticket 61365
-	 */
-	public function test_processor_instructions() {
-		$processor = new WP_XML_Tag_Processor(
-			// The first <?xml tag is an xml declaration.
-			'<?xml version="1.0" encoding="UTF-8" ?>' .
-			// The second <?xml tag is a processing instruction.
-			'<?xml stylesheet type="text/xsl" href="style.xsl" ?>'
-		);
-		$this->assertTrue( $processor->next_token(), 'The XML declaration was not found.' );
-		$this->assertTrue( $processor->next_token(), 'The processing instruction was not found.' );
-		$this->assertEquals(
-			'#processing-instructions',
-			$processor->get_token_type(),
-			'The processing instruction was not correctly identified.'
-		);
-		$this->assertEquals( ' stylesheet type="text/xsl" href="style.xsl" ', $processor->get_modifiable_text(), 'The modifiable text was not correctly captured.' );
-	}
-
-	/**
-	 * Ensures that updates which are enqueued in front of the cursor
-	 * are applied before moving forward in the document.
-	 *
-	 * @ticket 61365
-	 */
-	public function test_applies_updates_before_proceeding() {
-		$xml = '<root><wp:content><photo/></wp:content><wp:content><photo/></wp:content></root>';
-
-		$subclass = new class( $xml ) extends WP_XML_Tag_Processor {
-			/**
-			 * Inserts raw text after the current token.
-			 *
-			 * @param string $new_xml Raw text to insert.
-			 */
-			public function insert_after( $new_xml ) {
-				$this->set_bookmark( 'here' );
-				$this->lexical_updates[] = new WP_HTML_Text_Replacement(
-					$this->bookmarks['here']->start + $this->bookmarks['here']->length,
-					0,
-					$new_xml
-				);
-			}
-		};
-
-		$subclass->next_tag( 'photo' );
-		$subclass->insert_after( '<p>snow-capped</p>' );
-
-		$subclass->next_tag();
-		$this->assertSame(
-			'p',
-			$subclass->get_tag(),
-			'Should have matched inserted XML as next tag.'
-		);
-
-		$subclass->next_tag( 'photo' );
-		$subclass->set_attribute( 'alt', 'mountain' );
-
-		$this->assertSame(
-			'<root><wp:content><photo/><p>snow-capped</p></wp:content><wp:content><photo alt="mountain"/></wp:content></root>',
-			$subclass->get_updated_xml(),
-			'Should have properly applied the update from in front of the cursor.'
-		);
-	}
-}
\ No newline at end of file

From 986c066f5b10a620675e4927e82af056ad62bddd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= <adam@adamziel.com>
Date: Tue, 29 Oct 2024 18:41:47 +0100
Subject: [PATCH 2/6] Introduce input_finished() method to distinguish between
 "waiting for more input" state and "parse error" state

---
 .../playground/data-liberation/bootstrap.php  |   4 +-
 .../src/stream-api/WP_Stream_Processor.php    |   1 +
 .../src/xml-api/WP_XML_Processor.php          | 242 +++++++++++-------
 .../tests/WPXMLProcessorTests.php             | 168 ++++++------
 4 files changed, 245 insertions(+), 170 deletions(-)

diff --git a/packages/playground/data-liberation/bootstrap.php b/packages/playground/data-liberation/bootstrap.php
index fb4bbd45d6..9c15be7f13 100644
--- a/packages/playground/data-liberation/bootstrap.php
+++ b/packages/playground/data-liberation/bootstrap.php
@@ -37,8 +37,8 @@
 
 
 // Polyfill WordPress core functions
-function _doing_it_wrong() {
-
+function _doing_it_wrong($method, $message, $version) {
+	// var_dump($method, $message, $version);
 }
 
 function __($input) {
diff --git a/packages/playground/data-liberation/src/stream-api/WP_Stream_Processor.php b/packages/playground/data-liberation/src/stream-api/WP_Stream_Processor.php
index a954a31cf0..d07b092120 100644
--- a/packages/playground/data-liberation/src/stream-api/WP_Stream_Processor.php
+++ b/packages/playground/data-liberation/src/stream-api/WP_Stream_Processor.php
@@ -2,6 +2,7 @@
 
 interface WP_Stream_Processor {
 	public function append_bytes( string $bytes );
+	public function input_finished(): void;
 	public function is_finished(): bool;
 	public function is_paused_at_incomplete_input(): bool;
 	public function get_last_error(): ?string;
diff --git a/packages/playground/data-liberation/src/xml-api/WP_XML_Processor.php b/packages/playground/data-liberation/src/xml-api/WP_XML_Processor.php
index beffee8e2b..2efeb73893 100644
--- a/packages/playground/data-liberation/src/xml-api/WP_XML_Processor.php
+++ b/packages/playground/data-liberation/src/xml-api/WP_XML_Processor.php
@@ -22,7 +22,10 @@
  * starting with 1.0, however, because most that's what most WXR
  * files declare.
  *
- * ## Future work
+ * @TODO: Add input_finished() or similar method to tell the processor there 
+ *        will not be any more input. This will enable distinguishing
+ *        between incomplete tokens and syntax errors and will make
+ *        the step() logic more reliable.
  *
  * @TODO: Skip over the following syntax elements:
  *        * <!DOCTYPE, see https://www.w3.org/TR/xml/#sec-prolog-dtd
@@ -386,6 +389,13 @@ class WP_XML_Processor {
 	 */
 	protected $is_incomplete_text_node = false;
 
+	/**
+	 * Whether the input has been finished.
+	 *
+	 * @var bool
+	 */
+	protected $expecting_more_input = true;
+
 	/**
 	 * How many bytes from the original XML document have been read and parsed.
 	 *
@@ -626,17 +636,42 @@ class WP_XML_Processor {
 
 	public $had_previous_chunks = false;
 
+	/**
+	 * 
+	 */
+	public static function from_string( $xml, $known_definite_encoding = 'UTF-8' ) {
+		if ( 'UTF-8' !== $known_definite_encoding ) {
+			return null;
+		}
+
+		$processor = new WP_XML_Processor( $xml );
+		$processor->input_finished();
+		return $processor;
+	}
+
+	public static function from_stream( $xml, $known_definite_encoding = 'UTF-8' ) {
+		if ( 'UTF-8' !== $known_definite_encoding ) {
+			return null;
+		}
+		return new WP_XML_Processor( $xml );
+	}
+
 	/**
 	 * Constructor.
 	 *
-	 * @since WP_VERSION
+	 * Do not use this method. Use the static creator methods instead.
+	 *
+	 * @access private
+	 *
+	 * @since 6.4.0
 	 *
-	 * @param string $xml XML to process.
+	 * @see WP_XML_Processor::create_fragment()
+	 * @see WP_XML_Processor::create_stream()
+	 *
+	 * @param string      $xml            XML to process.
 	 */
-	public function __construct( $xml, $breadcrumbs = array(), $parser_context = self::IN_PROLOG_CONTEXT ) {
+	protected function __construct( $xml ) {
 		$this->xml = $xml;
-		$this->stack_of_open_elements = $breadcrumbs;
-		$this->parser_context         = $parser_context;
 	}
 
 	public static function stream( $node_visitor_callback ) {
@@ -659,13 +694,14 @@ function ( $state ) use ( $xml_processor, $node_visitor_callback ) {
 				if ( $tokens_found > 0 ) {
 					$buffer .= $xml_processor->flush_processed_xml();
 				} elseif (
-				$tokens_found === 0 &&
-				! $xml_processor->is_paused_at_incomplete_input() &&
-				$xml_processor->get_current_depth() === 0
+					$tokens_found === 0 &&
+					! $xml_processor->is_paused_at_incomplete_input() &&
+					$xml_processor->get_current_depth() === 0
 				) {
 					// We've reached the end of the document, let's finish up.
 					// @TODO: Fix this so it doesn't return the entire XML
-					$buffer .= $xml_processor->get_unprocessed_xml();
+					$buffer .= $xml_processor->flush_processed_xml();
+					$buffer .= $xml_processor->get_updated_xml();
 					$state->finish();
 				}
 
@@ -706,13 +742,33 @@ public function resume( $paused ) {
 	 */
 	public function append_bytes( string $next_chunk ) {
 		$this->xml .= $next_chunk;
+		if ( $this->expecting_more_input ) {
+			_doing_it_wrong(
+				__METHOD__,
+				__( 'Cannot append bytes after the last input chunk was provided and input_finished() was called.' ),
+				'WP_VERSION'
+			);
+			return false;
+		}
+		return true;
+	}
+
+	/**
+	 * Indicates that all the XML document bytes have been provided.
+	 * 
+	 * After calling this method, the processor will emit errors where
+	 * previously it would have entered the STATE_INCOMPLETE_INPUT state.
+	 */
+	public function input_finished() {
+		$this->expecting_more_input = false;
 	}
 
 	public function flush_processed_xml() {
+		// Flush updates
 		$this->get_updated_xml();
 
-		$processed_xml   = $this->get_processed_xml();
-		$unprocessed_xml = $this->get_unprocessed_xml();
+		$processed_xml   = substr( $this->xml, 0, $this->bytes_already_parsed );
+		$unprocessed_xml = substr( $this->xml, $this->bytes_already_parsed );
 
 		$breadcrumbs    = $this->get_breadcrumbs();
 		$parser_context = $this->parser_context;
@@ -762,7 +818,11 @@ protected function base_class_next_token() {
 		$this->parser_state = self::STATE_READY;
 
 		if ( $this->bytes_already_parsed >= strlen( $this->xml ) ) {
-			$this->parser_state = self::STATE_COMPLETE;
+			if ( $this->expecting_more_input ) {
+				$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+			} else {
+				$this->parser_state = self::STATE_COMPLETE;
+			}
 			return false;
 		}
 
@@ -1307,20 +1367,8 @@ private function parse_next_tag() {
 
 		while ( false !== $at && $at < $doc_length ) {
 			$at = strpos( $xml, '<', $at );
-
-			/*
-			 * There may be no text nodes outside of elements.
-			 * If this character sequence was encountered outside of
-			 * the root element, it is a syntax error. WP_XML_Processor
-			 * does not have that context – it is up to the API consumer,
-			 * such as WP_Tag_Processor, to handle this scenario.
-			 */
 			if ( false === $at ) {
-				$this->parser_state            = self::STATE_INCOMPLETE_INPUT;
-				$this->is_incomplete_text_node = true;
-				$this->text_starts_at          = $was_at;
-				$this->text_length             = $doc_length - $was_at;
-				return false;
+				break;
 			}
 
 			if ( $at > $was_at ) {
@@ -1666,6 +1714,26 @@ private function parse_next_tag() {
 			++$at;
 		}
 
+		// There's no more tag openers and we're not expecting more data –
+		// this mist be a trailing text node.
+		if ( ! $this->expecting_more_input ) {
+			$this->parser_state         = self::STATE_TEXT_NODE;
+			$this->token_starts_at      = $was_at;
+			$this->token_length         = $doc_length - $was_at;
+			$this->text_starts_at       = $was_at;
+			$this->text_length          = $doc_length - $was_at;
+			$this->bytes_already_parsed = $doc_length;
+			return true;
+		}
+
+		/*
+		 * This does not imply an incomplete parse; it indicates that there
+		 * can be nothing left in the document other than a #text node.
+		 */
+		$this->parser_state            = self::STATE_INCOMPLETE_INPUT;
+		$this->is_incomplete_text_node = true;
+		$this->text_starts_at          = $was_at;
+		$this->text_length             = $doc_length - $was_at;
 		return false;
 	}
 
@@ -2450,7 +2518,6 @@ public function get_modifiable_text() {
 			 */
 
 			$this->last_error = self::ERROR_SYNTAX;
-			var_dump( $text );
 			_doing_it_wrong(
 				__METHOD__,
 				__( 'Invalid text content encountered.' ),
@@ -2694,34 +2761,21 @@ public function get_updated_xml() {
 	}
 
 	/**
-	 * Finds the next token in the XML document.
-	 *
-	 * An XML document can be viewed as a stream of tokens,
-	 * where tokens are things like XML tags, XML comments,
-	 * text nodes, etc. This method finds the next token in
-	 * the XML document and returns whether it found one.
-	 *
-	 * If it starts parsing a token and reaches the end of the
-	 * document then it will seek to the start of the last
-	 * token and pause, returning `false` to indicate that it
-	 * failed to find a complete token.
-	 *
-	 * Possible token types, based on the XML specification:
-	 *
-	 *  - an XML tag, whether opening, closing, or void.
-	 *  - a text node - the plaintext inside tags.
-	 *  - an XML comment.
-	 *  - a processing instruction, e.g. `<?xml version="1.0" ?>`.
-	 *
-	 * The Tag Processor currently only supports the tag token.
+	 * Moves the internal cursor to the next token in the XML document
+	 * according to the XML specification.
+	 * 
+	 * It considers the current XML context (prolog, element, or misc)
+	 * and only expects the nodes that are allowed in that context.
 	 *
 	 * @since WP_VERSION
 	 *
 	 * @access private
 	 *
+	 * @param int $node_to_process Whether to process the next node or 
+	 *            reprocess the current node, e.g. using another parser context.
 	 * @return bool Whether a token was parsed.
 	 */
-	public function step( $node_to_process = self::PROCESS_NEXT_NODE ) {
+	private function step( $node_to_process = self::PROCESS_NEXT_NODE ) {
 		// Refuse to proceed if there was a previous error.
 		if ( null !== $this->last_error ) {
 			return false;
@@ -2754,6 +2808,30 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ) {
 		}
 	}
 
+	/**
+	 * Finds the next token in the XML document.
+	 *
+	 * An XML document can be viewed as a stream of tokens,
+	 * where tokens are things like XML tags, XML comments,
+	 * text nodes, etc. This method finds the next token in
+	 * the XML document and returns whether it found one.
+	 *
+	 * If it starts parsing a token and reaches the end of the
+	 * document then it will seek to the start of the last
+	 * token and pause, returning `false` to indicate that it
+	 * failed to find a complete token.
+	 *
+	 * Possible token types, based on the XML specification:
+	 *
+	 *  - an XML tag
+	 *  - a text node - the plaintext inside tags.
+	 *  - a CData section
+	 *  - an XML comment.
+	 *  - a DOCTYPE declaration.
+	 *  - a processing instruction, e.g. `<?xml mode="WordPress" ?>`.
+	 *
+	 * @return bool Whether a token was parsed.
+	 */
 	public function next_token( $node_to_process = self::PROCESS_NEXT_NODE ) {
 		return $this->step( $node_to_process );
 	}
@@ -2770,10 +2848,17 @@ public function next_token( $node_to_process = self::PROCESS_NEXT_NODE ) {
 	 */
 	private function step_in_prolog( $node_to_process = self::PROCESS_NEXT_NODE ) {
 		if ( self::PROCESS_NEXT_NODE === $node_to_process ) {
-			if ( false === $this->base_class_next_token() ) {
+			$has_next_node = $this->base_class_next_token();
+			if (
+				false === $has_next_node &&
+				! $this->expecting_more_input
+			) {
+				$this->last_error = self::ERROR_SYNTAX;
+				_doing_it_wrong( __METHOD__, 'The root element was not found.', 'WP_VERSION' );
 				return false;
 			}
 		}
+
 		// XML requires a root element. If we've reached the end of data in the prolog stage,
 		// before finding a root element, then the document is incomplete.
 		if ( WP_XML_Processor::STATE_COMPLETE === $this->parser_state ) {
@@ -2820,17 +2905,17 @@ private function step_in_prolog( $node_to_process = self::PROCESS_NEXT_NODE ) {
 	 */
 	private function step_in_element( $node_to_process = self::PROCESS_NEXT_NODE ) {
 		if ( self::PROCESS_NEXT_NODE === $node_to_process ) {
-			if ( false === $this->base_class_next_token() ) {
+			$has_next_node = $this->base_class_next_token();
+			if (
+				false === $has_next_node &&
+				! $this->expecting_more_input
+			) {
+				$this->last_error = self::ERROR_SYNTAX;
+				_doing_it_wrong( __METHOD__, 'A tag was not closed.', 'WP_VERSION' );
 				return false;
 			}
 		}
-		// An XML document isn't complete until the root element is closed.
-		if ( self::STATE_COMPLETE === $this->parser_state &&
-			count( $this->stack_of_open_elements ) > 0
-		) {
-			$this->parser_state = self::STATE_INCOMPLETE_INPUT;
-			return false;
-		}
+
 		// Do not step if we paused due to an incomplete input.
 		if ( WP_XML_Processor::STATE_INCOMPLETE_INPUT === $this->parser_state ) {
 			return false;
@@ -2882,13 +2967,17 @@ private function step_in_element( $node_to_process = self::PROCESS_NEXT_NODE ) {
 	 */
 	private function step_in_misc( $node_to_process = self::PROCESS_NEXT_NODE ) {
 		if ( self::PROCESS_NEXT_NODE === $node_to_process ) {
-			/**
-			 * Don't short-circuit on failure yet! In misc stage,
-			 * we're still expecting text nodes and comments – even
-			 * if there are no more tags left in the document.
-			 */
-			$this->base_class_next_token();
+			$has_next_node = $this->base_class_next_token();
+			if (
+				false === $has_next_node &&
+				! $this->expecting_more_input
+			) {
+				// Parsing is complete.
+				$this->parser_state = self::STATE_COMPLETE;
+				return true;
+			}
 		}
+
 		if ( self::STATE_COMPLETE === $this->parser_state ) {
 			return true;
 		}
@@ -2901,7 +2990,7 @@ private function step_in_misc( $node_to_process = self::PROCESS_NEXT_NODE ) {
 				$whitespaces = strspn( $text, " \t\n\r" );
 				if ( strlen( $text ) !== $whitespaces ) {
 					$this->last_error = self::ERROR_SYNTAX;
-					_doing_it_wrong( __METHOD__, 'Unexpected token type in prolog stage.', 'WP_VERSION' );
+					_doing_it_wrong( __METHOD__, 'Unexpected token type "' . $this->get_token_type() . '" in misc stage.', 'WP_VERSION' );
 					return false;
 				}
 				return $this->step();
@@ -2927,7 +3016,7 @@ private function step_in_misc( $node_to_process = self::PROCESS_NEXT_NODE ) {
 				}
 
 				$this->last_error = self::ERROR_SYNTAX;
-				_doing_it_wrong( __METHOD__, 'Unexpected token type in misc stage.', 'WP_VERSION' );
+				_doing_it_wrong( __METHOD__, 'Unexpected token type "' . $this->get_token_type() . '" in misc stage.', 'WP_VERSION' );
 				return false;
 		}
 	}
@@ -3049,29 +3138,6 @@ private function push_open_element( $tag_name ) {
 		);
 	}
 
-	private function last_open_element() {
-		return end( $this->stack_of_open_elements );
-	}
-
-	/**
-	 * @TODO Decide whether this method should even exist.
-	 */
-	protected function get_processed_xml() {
-		// Flush updates
-		$this->get_updated_xml();
-		return substr( $this->xml, 0, $this->bytes_already_parsed );
-	}
-
-	/**
-	 * @TODO Decide whether this method should even exist.
-	 */
-	protected function get_unprocessed_xml() {
-		// Flush updates
-		$this->get_updated_xml();
-		return substr( $this->xml, $this->bytes_already_parsed );
-	}
-
-
 	/**
 	 * Parser Ready State.
 	 *
diff --git a/packages/playground/data-liberation/tests/WPXMLProcessorTests.php b/packages/playground/data-liberation/tests/WPXMLProcessorTests.php
index 0c5ffede64..441aeff1a5 100644
--- a/packages/playground/data-liberation/tests/WPXMLProcessorTests.php
+++ b/packages/playground/data-liberation/tests/WPXMLProcessorTests.php
@@ -23,7 +23,7 @@ class WPXMLProcessorTests extends TestCase {
 	 * @covers WP_XML_Processor::get_tag
 	 */
 	public function test_get_tag_returns_null_before_finding_tags() {
-		$processor = new WP_XML_Processor( '<wp:content>Test</wp:content>' );
+		$processor = WP_XML_Processor::from_string( '<wp:content>Test</wp:content>' );
 
 		$this->assertNull( $processor->get_tag(), 'Calling get_tag() without selecting a tag did not return null' );
 	}
@@ -34,7 +34,7 @@ public function test_get_tag_returns_null_before_finding_tags() {
 	 * @covers WP_XML_Processor::get_tag
 	 */
 	public function test_get_tag_returns_null_when_not_in_open_tag() {
-		$processor = new WP_XML_Processor( '<wp:content>Test</wp:content>' );
+		$processor = WP_XML_Processor::from_string( '<wp:content>Test</wp:content>' );
 
 		$this->assertFalse( $processor->next_tag( 'p' ), 'Querying a non-existing tag did not return false' );
 		$this->assertNull( $processor->get_tag(), 'Accessing a non-existing tag did not return null' );
@@ -46,7 +46,7 @@ public function test_get_tag_returns_null_when_not_in_open_tag() {
 	 * @covers WP_XML_Processor::get_tag
 	 */
 	public function test_get_tag_returns_open_tag_name() {
-		$processor = new WP_XML_Processor( '<wp:content>Test</wp:content>' );
+		$processor = WP_XML_Processor::from_string( '<wp:content>Test</wp:content>' );
 
 		$this->assertTrue( $processor->next_tag( 'wp:content' ), 'Querying an existing tag did not return true' );
 		$this->assertSame( 'wp:content', $processor->get_tag(), 'Accessing an existing tag name did not return "div"' );
@@ -63,7 +63,7 @@ public function test_get_tag_returns_open_tag_name() {
 	 * @param bool $flag_is_set Whether the input XML's first tag contains the self-closing flag.
 	 */
 	public function test_is_empty_element_matches_input_xml( $xml, $flag_is_set ) {
-		$processor = new WP_XML_Processor( $xml );
+		$processor = WP_XML_Processor::from_string( $xml );
 		$processor->next_tag( array( 'tag_closers' => 'visit' ) );
 
 		if ( $flag_is_set ) {
@@ -104,7 +104,7 @@ public static function data_is_empty_element() {
 	 * @covers WP_XML_Processor::get_attribute
 	 */
 	public function test_get_attribute_returns_null_when_not_in_open_tag() {
-		$processor = new WP_XML_Processor( '<wp:content wp:post-type="test">Test</wp:content>' );
+		$processor = WP_XML_Processor::from_string( '<wp:content wp:post-type="test">Test</wp:content>' );
 
 		$this->assertFalse( $processor->next_tag( 'p' ), 'Querying a non-existing tag did not return false' );
 		$this->assertNull( $processor->get_attribute( 'wp:post-type' ), 'Accessing an attribute of a non-existing tag did not return null' );
@@ -116,7 +116,7 @@ public function test_get_attribute_returns_null_when_not_in_open_tag() {
 	 * @covers WP_XML_Processor::get_attribute
 	 */
 	public function test_get_attribute_returns_null_when_in_closing_tag() {
-		$processor = new WP_XML_Processor( '<wp:content wp:post-type="test">Test</wp:content>' );
+		$processor = WP_XML_Processor::from_string( '<wp:content wp:post-type="test">Test</wp:content>' );
 
 		$this->assertTrue( $processor->next_tag( 'wp:content' ), 'Querying an existing tag did not return true' );
 		$this->assertTrue( $processor->next_token(), 'Querying an existing closing tag did not return true' );
@@ -130,7 +130,7 @@ public function test_get_attribute_returns_null_when_in_closing_tag() {
 	 * @covers WP_XML_Processor::get_attribute
 	 */
 	public function test_get_attribute_returns_null_when_attribute_missing() {
-		$processor = new WP_XML_Processor( '<wp:content wp:post-type="test">Test</wp:content>' );
+		$processor = WP_XML_Processor::from_string( '<wp:content wp:post-type="test">Test</wp:content>' );
 
 		$this->assertTrue( $processor->next_tag( 'wp:content' ), 'Querying an existing tag did not return true' );
 		$this->assertNull( $processor->get_attribute( 'test-id' ), 'Accessing a non-existing attribute did not return null' );
@@ -143,7 +143,7 @@ public function test_get_attribute_returns_null_when_attribute_missing() {
 	 * @covers WP_XML_Processor::get_attribute
 	 */
 	public function test_attributes_are_rejected_in_tag_closers() {
-		$processor = new WP_XML_Processor( '<wp:content>Test</wp:content wp:post-type="test">' );
+		$processor = WP_XML_Processor::from_string( '<wp:content>Test</wp:content wp:post-type="test">' );
 
 		$this->assertTrue( $processor->next_tag( 'wp:content' ), 'Querying an existing tag did not return true' );
 		$this->assertTrue( $processor->next_token(), 'Querying a text node did not return true.' );
@@ -156,7 +156,7 @@ public function test_attributes_are_rejected_in_tag_closers() {
 	 * @covers WP_XML_Processor::get_attribute
 	 */
 	public function test_get_attribute_returns_attribute_value() {
-		$processor = new WP_XML_Processor( '<wp:content wp:post-type="test">Test</wp:content>' );
+		$processor = WP_XML_Processor::from_string( '<wp:content wp:post-type="test">Test</wp:content>' );
 
 		$this->assertTrue( $processor->next_tag( 'wp:content' ), 'Querying an existing tag did not return true' );
 		$this->assertSame( 'test', $processor->get_attribute( 'wp:post-type' ), 'Accessing a wp:post-type="test" attribute value did not return "test"' );
@@ -169,7 +169,7 @@ public function test_get_attribute_returns_attribute_value() {
 	 * @covers WP_XML_Processor::get_attribute
 	 */
 	public function test_parsing_stops_on_malformed_attribute_value_no_value() {
-		$processor = new WP_XML_Processor( '<wp:content enabled wp:post-type="test">Test</wp:content>' );
+		$processor = WP_XML_Processor::from_string( '<wp:content enabled wp:post-type="test">Test</wp:content>' );
 
 		$this->assertFalse( $processor->next_tag(), 'Querying a malformed start tag did not return false' );
 	}
@@ -181,7 +181,7 @@ public function test_parsing_stops_on_malformed_attribute_value_no_value() {
 	 * @covers WP_XML_Processor::get_attribute
 	 */
 	public function test_parsing_stops_on_malformed_attribute_value_no_quotes() {
-		$processor = new WP_XML_Processor( '<wp:content enabled=1 wp:post-type="test">Test</wp:content>' );
+		$processor = WP_XML_Processor::from_string( '<wp:content enabled=1 wp:post-type="test">Test</wp:content>' );
 
 		$this->assertFalse( $processor->next_tag(), 'Querying a malformed start tag did not return false' );
 	}
@@ -193,7 +193,7 @@ public function test_parsing_stops_on_malformed_attribute_value_no_quotes() {
 	 * @covers WP_XML_Processor::get_attribute
 	 */
 	public function test_malformed_attribute_value_containing_ampersand_is_treated_as_plaintext() {
-		$processor = new WP_XML_Processor( '<wp:content enabled="WordPress & WordPress">Test</wp:content>' );
+		$processor = WP_XML_Processor::from_string( '<wp:content enabled="WordPress & WordPress">Test</wp:content>' );
 
 		$this->assertTrue( $processor->next_tag(), 'Querying a tag did not return true' );
         $this->assertEquals('WordPress & WordPress', $processor->get_attribute('enabled'));
@@ -206,7 +206,7 @@ public function test_malformed_attribute_value_containing_ampersand_is_treated_a
 	 * @covers WP_XML_Processor::get_attribute
 	 */
 	public function test_malformed_attribute_value_containing_entity_without_semicolon_is_treated_as_plaintext() {
-		$processor = new WP_XML_Processor( '<wp:content enabled="&#x94">Test</wp:content>' );
+		$processor = WP_XML_Processor::from_string( '<wp:content enabled="&#x94">Test</wp:content>' );
 
 		$this->assertTrue( $processor->next_tag(), 'Querying a tag did not return true' );
 		$this->assertEquals('&#x94', $processor->get_attribute('enabled'));
@@ -219,7 +219,7 @@ public function test_malformed_attribute_value_containing_entity_without_semicol
 	 * @covers WP_XML_Processor::get_attribute
 	 */
 	public function test_parsing_stops_on_malformed_attribute_value_contains_lt_character() {
-		$processor = new WP_XML_Processor( '<wp:content enabled="I love <3 this">Test</wp:content>' );
+		$processor = WP_XML_Processor::from_string( '<wp:content enabled="I love <3 this">Test</wp:content>' );
 
 		$this->assertFalse( $processor->next_tag(), 'Querying a malformed start tag did not return false' );
 	}
@@ -231,7 +231,7 @@ public function test_parsing_stops_on_malformed_attribute_value_contains_lt_char
 	 * @covers WP_XML_Processor::get_attribute
 	 */
 	public function test_parsing_stops_on_malformed_tags_duplicate_attributes() {
-		$processor = new WP_XML_Processor( '<wp:content id="update-me" id="ignored-id"><wp:text id="second">Text</wp:text></wp:content>' );
+		$processor = WP_XML_Processor::from_string( '<wp:content id="update-me" id="ignored-id"><wp:text id="second">Text</wp:text></wp:content>' );
 
 		$this->assertFalse( $processor->next_tag() );
 	}
@@ -243,7 +243,7 @@ public function test_parsing_stops_on_malformed_tags_duplicate_attributes() {
 	 * @covers WP_XML_Processor::get_attribute
 	 */
 	public function test_parsing_stops_on_malformed_attribute_name_contains_slash() {
-		$processor = new WP_XML_Processor( '<wp:content a/b="test">Test</wp:content>' );
+		$processor = WP_XML_Processor::from_string( '<wp:content a/b="test">Test</wp:content>' );
 
 		$this->assertFalse( $processor->next_tag(), 'Querying a malformed start tag did not return false' );
 	}
@@ -254,7 +254,7 @@ public function test_parsing_stops_on_malformed_attribute_name_contains_slash()
 	 * @covers WP_XML_Processor::get_attribute
 	 */
 	public function test_get_modifiable_text_returns_a_decoded_value() {
-		$processor = new WP_XML_Processor( '<root>&#x201C;&#x1f604;&#x201D;</root>' );
+		$processor = WP_XML_Processor::from_string( '<root>&#x201C;&#x1f604;&#x201D;</root>' );
 
 		$processor->next_tag( 'root' );
 		$processor->next_token();
@@ -272,7 +272,7 @@ public function test_get_modifiable_text_returns_a_decoded_value() {
 	 * @covers WP_XML_Processor::get_attribute
 	 */
 	public function test_get_attribute_returns_a_decoded_value() {
-		$processor = new WP_XML_Processor( '<root encoded-data="&#x201C;&#x1f604;&#x201D;"></root>' );
+		$processor = WP_XML_Processor::from_string( '<root encoded-data="&#x201C;&#x1f604;&#x201D;"></root>' );
 
 		$this->assertTrue( $processor->next_tag( 'root' ), 'Querying a tag did not return true' );
 		$this->assertEquals(
@@ -290,7 +290,7 @@ public function test_get_attribute_returns_a_decoded_value() {
 	 * @param string $attribute_name Name of data-enabled attribute with case variations.
 	 */
 	public function test_get_attribute_is_case_sensitive() {
-		$processor = new WP_XML_Processor( '<wp:content DATA-enabled="true">Test</wp:content>' );
+		$processor = WP_XML_Processor::from_string( '<wp:content DATA-enabled="true">Test</wp:content>' );
 		$processor->next_tag();
 
 		$this->assertEquals(
@@ -312,7 +312,7 @@ public function test_get_attribute_is_case_sensitive() {
 	 * @covers WP_XML_Processor::remove_attribute
 	 */
 	public function test_remove_attribute_is_case_sensitive() {
-		$processor = new WP_XML_Processor( '<wp:content DATA-enabled="true">Test</wp:content>' );
+		$processor = WP_XML_Processor::from_string( '<wp:content DATA-enabled="true">Test</wp:content>' );
 		$processor->next_tag();
 		$processor->remove_attribute( 'data-enabled' );
 
@@ -329,7 +329,7 @@ public function test_remove_attribute_is_case_sensitive() {
 	 * @covers WP_XML_Processor::set_attribute
 	 */
 	public function test_set_attribute_is_case_sensitive() {
-		$processor = new WP_XML_Processor( '<wp:content DATA-enabled="true">Test</wp:content>' );
+		$processor = WP_XML_Processor::from_string( '<wp:content DATA-enabled="true">Test</wp:content>' );
 		$processor->next_tag();
 		$processor->set_attribute( 'data-enabled', 'abc' );
 
@@ -342,7 +342,7 @@ public function test_set_attribute_is_case_sensitive() {
 	 * @covers WP_XML_Processor::get_attribute_names_with_prefix
 	 */
 	public function test_get_attribute_names_with_prefix_returns_null_before_finding_tags() {
-		$processor = new WP_XML_Processor( '<wp:content data-foo="bar">Test</wp:content>' );
+		$processor = WP_XML_Processor::from_string( '<wp:content data-foo="bar">Test</wp:content>' );
 		$this->assertNull(
 			$processor->get_attribute_names_with_prefix( 'data-' ),
 			'Accessing attributes by their prefix did not return null when no tag was selected'
@@ -355,7 +355,7 @@ public function test_get_attribute_names_with_prefix_returns_null_before_finding
 	 * @covers WP_XML_Processor::get_attribute_names_with_prefix
 	 */
 	public function test_get_attribute_names_with_prefix_returns_null_when_not_in_open_tag() {
-		$processor = new WP_XML_Processor( '<wp:content data-foo="bar">Test</wp:content>' );
+		$processor = WP_XML_Processor::from_string( '<wp:content data-foo="bar">Test</wp:content>' );
 		$processor->next_tag( 'p' );
 		$this->assertNull( $processor->get_attribute_names_with_prefix( 'data-' ), 'Accessing attributes of a non-existing tag did not return null' );
 	}
@@ -366,7 +366,7 @@ public function test_get_attribute_names_with_prefix_returns_null_when_not_in_op
 	 * @covers WP_XML_Processor::get_attribute_names_with_prefix
 	 */
 	public function test_get_attribute_names_with_prefix_returns_null_when_in_closing_tag() {
-		$processor = new WP_XML_Processor( '<wp:content data-foo="bar">Test</wp:content>' );
+		$processor = WP_XML_Processor::from_string( '<wp:content data-foo="bar">Test</wp:content>' );
 		$processor->next_tag( 'wp:content' );
 		$processor->next_tag( array( 'tag_closers' => 'visit' ) );
 
@@ -379,7 +379,7 @@ public function test_get_attribute_names_with_prefix_returns_null_when_in_closin
 	 * @covers WP_XML_Processor::get_attribute_names_with_prefix
 	 */
 	public function test_get_attribute_names_with_prefix_returns_empty_array_when_no_attributes_present() {
-		$processor = new WP_XML_Processor( '<wp:content>Test</wp:content>' );
+		$processor = WP_XML_Processor::from_string( '<wp:content>Test</wp:content>' );
 		$processor->next_tag( 'wp:content' );
 
 		$this->assertSame( array(), $processor->get_attribute_names_with_prefix( 'data-' ), 'Accessing the attributes on a tag without any did not return an empty array' );
@@ -391,7 +391,7 @@ public function test_get_attribute_names_with_prefix_returns_empty_array_when_no
 	 * @covers WP_XML_Processor::get_attribute_names_with_prefix
 	 */
 	public function test_get_attribute_names_with_prefix_returns_matching_attribute_names_in_original_case() {
-		$processor = new WP_XML_Processor( '<wp:content DATA-enabled="yes" wp:post-type="test" data-test-ID="14">Test</wp:content>' );
+		$processor = WP_XML_Processor::from_string( '<wp:content DATA-enabled="yes" wp:post-type="test" data-test-ID="14">Test</wp:content>' );
 		$processor->next_tag();
 
 		$this->assertSame(
@@ -407,7 +407,7 @@ public function test_get_attribute_names_with_prefix_returns_matching_attribute_
 	 * @covers WP_XML_Processor::get_attribute_names_with_prefix
 	 */
 	public function test_get_attribute_names_with_prefix_returns_attribute_added_by_set_attribute() {
-		$processor = new WP_XML_Processor( '<wp:content data-foo="bar">Test</wp:content>' );
+		$processor = WP_XML_Processor::from_string( '<wp:content data-foo="bar">Test</wp:content>' );
 		$processor->next_tag();
 		$processor->set_attribute( 'data-test-id', '14' );
 
@@ -429,7 +429,7 @@ public function test_get_attribute_names_with_prefix_returns_attribute_added_by_
 	 * @covers WP_XML_Processor::__toString
 	 */
 	public function test_to_string_returns_updated_xml() {
-		$processor = new WP_XML_Processor( '<line id="remove" /><wp:content enabled="yes" wp:post-type="test">Test</wp:content><wp:text id="span-id"></wp:text>' );
+		$processor = WP_XML_Processor::from_string( '<line id="remove" /><wp:content enabled="yes" wp:post-type="test">Test</wp:content><wp:text id="span-id"></wp:text>' );
 		$processor->next_tag();
 		$processor->remove_attribute( 'id' );
 
@@ -449,7 +449,7 @@ public function test_to_string_returns_updated_xml() {
 	 * @covers WP_XML_Processor::get_updated_xml
 	 */
 	public function test_get_updated_xml_applies_the_updates_so_far_and_keeps_the_processor_on_the_current_tag() {
-		$processor = new WP_XML_Processor( '<line id="remove" /><wp:content enabled="yes" wp:post-type="test">Test</wp:content><wp:text id="span-id"></wp:text>' );
+		$processor = WP_XML_Processor::from_string( '<line id="remove" /><wp:content enabled="yes" wp:post-type="test">Test</wp:content><wp:text id="span-id"></wp:text>' );
 		$processor->next_tag();
 		$processor->remove_attribute( 'id' );
 
@@ -486,7 +486,7 @@ public function test_get_updated_xml_applies_the_updates_so_far_and_keeps_the_pr
 	 * @covers WP_XML_Processor::get_updated_xml
 	 */
 	public function test_get_updated_xml_without_updating_any_attributes_returns_the_original_xml() {
-		$processor = new WP_XML_Processor( self::XML_SIMPLE );
+		$processor = WP_XML_Processor::from_string( self::XML_SIMPLE );
 
 		$this->assertSame(
 			self::XML_SIMPLE,
@@ -503,7 +503,7 @@ public function test_get_updated_xml_without_updating_any_attributes_returns_the
 	 * @expectedIncorrectUsage WP_XML_Processor::parse_next_attribute
 	 */
 	public function test_get_updated_xml_applies_updates_to_content_after_seeking_to_before_parsed_bytes() {
-		$processor = new WP_XML_Processor( '<wp:content><photo hidden></wp:content>' );
+		$processor = WP_XML_Processor::from_string( '<wp:content><photo hidden></wp:content>' );
 
 		$processor->next_tag();
 		$processor->set_attribute( 'wonky', 'true' );
@@ -530,7 +530,7 @@ public function test_declare_element_as_pcdata() {
 
 			But! It is all treated as text.
 		';
-		$processor = new WP_XML_Processor(
+		$processor = WP_XML_Processor::from_string(
 			"<root><my-pcdata>$text</my-pcdata></root>"
 		);
 		$processor->declare_element_as_pcdata( 'my-pcdata' );
@@ -556,6 +556,10 @@ public function test_declare_element_as_pcdata() {
 	 */
 	public function test_token_bookmark_span( string $xml, int $match_nth_token, string $expected_match ) {
 		$processor = new class( $xml ) extends WP_XML_Processor {
+			public function __construct( $xml ) {
+				parent::__construct( $xml );
+			}
+			
 			/**
 			 * Returns the raw span of XML for the currently-matched
 			 * token, or null if not paused on any token.
@@ -640,7 +644,7 @@ public static function data_xml_nth_token_substring() {
 	 * @covers WP_XML_Processor::next_tag
 	 */
 	public function test_next_tag_with_no_arguments_should_find_the_next_existing_tag() {
-		$processor = new WP_XML_Processor( self::XML_SIMPLE );
+		$processor = WP_XML_Processor::from_string( self::XML_SIMPLE );
 
 		$this->assertTrue( $processor->next_tag(), 'Querying an existing tag did not return true' );
 	}
@@ -651,7 +655,7 @@ public function test_next_tag_with_no_arguments_should_find_the_next_existing_ta
 	 * @covers WP_XML_Processor::next_tag
 	 */
 	public function test_next_tag_should_return_false_for_a_non_existing_tag() {
-		$processor = new WP_XML_Processor( self::XML_SIMPLE );
+		$processor = WP_XML_Processor::from_string( self::XML_SIMPLE );
 
 		$this->assertFalse( $processor->next_tag( 'p' ), 'Querying a non-existing tag did not return false' );
 	}
@@ -662,7 +666,7 @@ public function test_next_tag_should_return_false_for_a_non_existing_tag() {
 	 * @covers WP_XML_Processor::get_modifiable_text
 	 */
 	public function test_normalizes_carriage_returns_in_text_nodes() {
-		$processor = new WP_XML_Processor(
+		$processor = WP_XML_Processor::from_string(
 			"<wp:content>We are\rnormalizing\r\n\nthe\n\r\r\r\ncarriage returns</wp:content>"
 		);
 		$processor->next_tag();
@@ -680,7 +684,7 @@ public function test_normalizes_carriage_returns_in_text_nodes() {
 	 * @covers WP_XML_Processor::get_modifiable_text
 	 */
 	public function test_normalizes_carriage_returns_in_cdata() {
-		$processor = new WP_XML_Processor(
+		$processor = WP_XML_Processor::from_string(
 			"<wp:content><![CDATA[We are\rnormalizing\r\n\nthe\n\r\r\r\ncarriage returns]]>"
 		);
 		$processor->next_tag();
@@ -700,7 +704,7 @@ public function test_normalizes_carriage_returns_in_cdata() {
 	 * @covers WP_XML_Processor::is_tag_closer
 	 */
 	public function test_next_tag_should_not_stop_on_closers() {
-		$processor = new WP_XML_Processor( '<wp:content><photo /></wp:content>' );
+		$processor = WP_XML_Processor::from_string( '<wp:content><photo /></wp:content>' );
 
 		$this->assertTrue( $processor->next_tag( array( 'breadcrumbs' => array( 'wp:content' ) ) ), 'Did not find desired tag opener' );
 		$this->assertFalse( $processor->next_tag( array( 'breadcrumbs' => array( 'wp:content' ) ) ), 'Visited an unwanted tag, a tag closer' );
@@ -715,7 +719,7 @@ public function test_next_tag_should_not_stop_on_closers() {
 	 * @covers WP_XML_Processor::get_updated_xml
 	 */
 	public function test_internal_pointer_returns_to_original_spot_after_inserting_content_before_cursor() {
-		$tags = new WP_XML_Processor( '<root><wp:content>outside</wp:content><section><wp:content><photo>inside</wp:content></section></root>' );
+		$tags = WP_XML_Processor::from_string( '<root><wp:content>outside</wp:content><section><wp:content><photo>inside</wp:content></section></root>' );
 
 		$tags->next_tag();
 		$tags->next_tag();
@@ -739,7 +743,7 @@ public function test_internal_pointer_returns_to_original_spot_after_inserting_c
 	 * @covers WP_XML_Processor::set_attribute
 	 */
 	public function test_set_attribute_on_a_non_existing_tag_does_not_change_the_markup() {
-		$processor = new WP_XML_Processor( self::XML_SIMPLE );
+		$processor = WP_XML_Processor::from_string( self::XML_SIMPLE );
 
 		$this->assertFalse( $processor->next_tag( 'p' ), 'Querying a non-existing tag did not return false' );
 		$this->assertFalse( $processor->next_tag( 'wp:content' ), 'Querying a non-existing tag did not return false' );
@@ -762,7 +766,7 @@ public function test_set_attribute_on_a_non_existing_tag_does_not_change_the_mar
 	 * @covers WP_XML_Processor::remove_class
 	 */
 	public function test_attribute_ops_on_tag_closer_do_not_change_the_markup() {
-		$processor = new WP_XML_Processor( '<wp:content id="3"></wp:content>' );
+		$processor = WP_XML_Processor::from_string( '<wp:content id="3"></wp:content>' );
 		$processor->next_token();
 		$this->assertFalse( $processor->is_tag_closer(), 'Skipped tag opener' );
 
@@ -784,7 +788,7 @@ public function test_attribute_ops_on_tag_closer_do_not_change_the_markup() {
 	 * @covers WP_XML_Processor::set_attribute
 	 */
 	public function test_set_attribute_with_a_non_existing_attribute_adds_a_new_attribute_to_the_markup() {
-		$processor = new WP_XML_Processor( self::XML_SIMPLE );
+		$processor = WP_XML_Processor::from_string( self::XML_SIMPLE );
 		$processor->next_tag();
 		$processor->set_attribute( 'test-attribute', 'test-value' );
 
@@ -806,7 +810,7 @@ public function test_set_attribute_with_a_non_existing_attribute_adds_a_new_attr
 	 * @covers WP_XML_Processor::get_attribute
 	 */
 	public function test_get_attribute_returns_updated_values_before_they_are_applied() {
-		$processor = new WP_XML_Processor( self::XML_SIMPLE );
+		$processor = WP_XML_Processor::from_string( self::XML_SIMPLE );
 		$processor->next_tag();
 		$processor->set_attribute( 'test-attribute', 'test-value' );
 
@@ -828,7 +832,7 @@ public function test_get_attribute_returns_updated_values_before_they_are_applie
 	 * @covers WP_XML_Processor::get_attribute
 	 */
 	public function test_get_attribute_returns_updated_values_before_they_are_applied_with_different_name_casing() {
-		$processor = new WP_XML_Processor( self::XML_SIMPLE );
+		$processor = WP_XML_Processor::from_string( self::XML_SIMPLE );
 		$processor->next_tag();
 		$processor->set_attribute( 'test-ATTribute', 'test-value' );
 
@@ -851,7 +855,7 @@ public function test_get_attribute_returns_updated_values_before_they_are_applie
 	 * @covers WP_XML_Processor::get_attribute
 	 */
 	public function test_get_attribute_reflects_removed_attribute_before_it_is_applied() {
-		$processor = new WP_XML_Processor( self::XML_SIMPLE );
+		$processor = WP_XML_Processor::from_string( self::XML_SIMPLE );
 		$processor->next_tag();
 		$processor->remove_attribute( 'id' );
 
@@ -872,7 +876,7 @@ public function test_get_attribute_reflects_removed_attribute_before_it_is_appli
 	 * @covers WP_XML_Processor::get_attribute
 	 */
 	public function test_get_attribute_reflects_adding_and_then_removing_an_attribute_before_those_updates_are_applied() {
-		$processor = new WP_XML_Processor( self::XML_SIMPLE );
+		$processor = WP_XML_Processor::from_string( self::XML_SIMPLE );
 		$processor->next_tag();
 		$processor->set_attribute( 'test-attribute', 'test-value' );
 		$processor->remove_attribute( 'test-attribute' );
@@ -894,7 +898,7 @@ public function test_get_attribute_reflects_adding_and_then_removing_an_attribut
 	 * @covers WP_XML_Processor::get_attribute
 	 */
 	public function test_get_attribute_reflects_setting_and_then_removing_an_existing_attribute_before_those_updates_are_applied() {
-		$processor = new WP_XML_Processor( self::XML_SIMPLE );
+		$processor = WP_XML_Processor::from_string( self::XML_SIMPLE );
 		$processor->next_tag();
 		$processor->set_attribute( 'id', 'test-value' );
 		$processor->remove_attribute( 'id' );
@@ -916,7 +920,7 @@ public function test_get_attribute_reflects_setting_and_then_removing_an_existin
 	 * @covers WP_XML_Processor::set_attribute
 	 */
 	public function test_set_attribute_with_an_existing_attribute_name_updates_its_value_in_the_markup() {
-		$processor = new WP_XML_Processor( self::XML_SIMPLE );
+		$processor = WP_XML_Processor::from_string( self::XML_SIMPLE );
 		$processor->next_tag();
 		$processor->set_attribute( 'id', 'new-id' );
 		$this->assertSame(
@@ -935,7 +939,7 @@ public function test_set_attribute_with_an_existing_attribute_name_updates_its_v
 	 * @covers WP_XML_Processor::set_attribute
 	 */
 	public function test_set_attribute_with_case_variants_updates_only_the_original_first_copy() {
-		$processor = new WP_XML_Processor( '<wp:content data-enabled="5">' );
+		$processor = WP_XML_Processor::from_string( '<wp:content data-enabled="5">' );
 		$processor->next_tag();
 		$processor->set_attribute( 'data-enabled', 'canary1' );
 		$processor->set_attribute( 'data-enabled', 'canary2' );
@@ -951,7 +955,7 @@ public function test_set_attribute_with_case_variants_updates_only_the_original_
 	 * @covers WP_XML_Processor::set_attribute
 	 */
 	public function test_next_tag_and_set_attribute_in_a_loop_update_all_tags_in_the_markup() {
-		$processor = new WP_XML_Processor( self::XML_SIMPLE );
+		$processor = WP_XML_Processor::from_string( self::XML_SIMPLE );
 		while ( $processor->next_tag() ) {
 			$processor->set_attribute( 'data-foo', 'bar' );
 		}
@@ -969,7 +973,7 @@ public function test_next_tag_and_set_attribute_in_a_loop_update_all_tags_in_the
 	 * @covers WP_XML_Processor::remove_attribute
 	 */
 	public function test_remove_attribute_with_an_existing_attribute_name_removes_it_from_the_markup() {
-		$processor = new WP_XML_Processor( self::XML_SIMPLE );
+		$processor = WP_XML_Processor::from_string( self::XML_SIMPLE );
 		$processor->next_tag();
 		$processor->remove_attribute( 'id' );
 
@@ -986,7 +990,7 @@ public function test_remove_attribute_with_an_existing_attribute_name_removes_it
 	 * @covers WP_XML_Processor::remove_attribute
 	 */
 	public function test_remove_attribute_with_a_non_existing_attribute_name_does_not_change_the_markup() {
-		$processor = new WP_XML_Processor( self::XML_SIMPLE );
+		$processor = WP_XML_Processor::from_string( self::XML_SIMPLE );
 		$processor->next_tag();
 		$processor->remove_attribute( 'no-such-attribute' );
 
@@ -1003,7 +1007,7 @@ public function test_remove_attribute_with_a_non_existing_attribute_name_does_no
 	 * @covers WP_XML_Processor::next_tag
 	 */
 	public function test_correctly_parses_xml_attributes_wrapped_in_single_quotation_marks() {
-		$processor = new WP_XML_Processor(
+		$processor = WP_XML_Processor::from_string(
 			'<wp:content id=\'first\'><wp:text id=\'second\'>Text</wp:text></wp:content>'
 		);
 		$processor->next_tag(
@@ -1035,7 +1039,7 @@ public function test_correctly_parses_xml_attributes_wrapped_in_single_quotation
 	 * @covers WP_XML_Processor::set_attribute
 	 */
 	public function test_setting_an_attribute_to_false_is_rejected() {
-		$processor = new WP_XML_Processor(
+		$processor = WP_XML_Processor::from_string(
 			'<form action="/action_page.php"><input checked type="checkbox" name="vehicle" value="Bike"><label for="vehicle">I have a bike</label></form>'
 		);
 		$processor->next_tag( 'input' );
@@ -1053,7 +1057,7 @@ public function test_setting_an_attribute_to_false_is_rejected() {
 	 */
 	public function test_setting_a_missing_attribute_to_false_does_not_change_the_markup() {
 		$xml_input = '<form action="/action_page.php"><input type="checkbox" name="vehicle" value="Bike"><label for="vehicle">I have a bike</label></form>';
-		$processor = new WP_XML_Processor( $xml_input );
+		$processor = WP_XML_Processor::from_string( $xml_input );
 		$processor->next_tag( 'input' );
 		$processor->set_attribute( 'checked', false );
 		$this->assertSame(
@@ -1076,7 +1080,7 @@ public function test_setting_a_missing_attribute_to_false_does_not_change_the_ma
 	 * @param string $xml_ending_before_comment_close XML with opened comments that aren't closed.
 	 */
 	public function test_documents_may_end_with_unclosed_comment( $xml_ending_before_comment_close ) {
-		$processor = new WP_XML_Processor( $xml_ending_before_comment_close );
+		$processor = WP_XML_Processor::from_string( $xml_ending_before_comment_close );
 
 		$this->assertFalse(
 			$processor->next_tag(),
@@ -1114,7 +1118,7 @@ public static function data_xml_with_unclosed_comments() {
 	 * @param string $incomplete_xml XML text containing some kind of incomplete syntax.
 	 */
 	public function test_next_tag_returns_false_for_incomplete_syntax_elements( $incomplete_xml ) {
-		$processor = new WP_XML_Processor( $incomplete_xml );
+		$processor = WP_XML_Processor::from_string( $incomplete_xml );
 
 		$processor->next_tag();
 		$this->assertFalse(
@@ -1164,7 +1168,7 @@ public static function data_incomplete_syntax_elements() {
 	 * @param string $incomplete_xml XML text containing some kind of incomplete syntax.
 	 */
 	public function test_next_tag_returns_false_for_incomplete_text_nodes( $incomplete_xml, $node_at = 1 ) {
-		$processor = new WP_XML_Processor( $incomplete_xml );
+		$processor = WP_XML_Processor::from_stream( $incomplete_xml );
 
 		for ( $i = 0; $i < $node_at; $i++ ) {
 			$this->assertTrue(
@@ -1203,7 +1207,7 @@ public static function data_incomplete_text_nodes() {
 	 * @covers WP_XML_Processor::next_tag
 	 */
 	public function test_rejects_malformed_comments() {
-		$processor = new WP_XML_Processor( '<!-- comment -- oh, I did not close it after the initial double dash -->' );
+		$processor = WP_XML_Processor::from_string( '<!-- comment -- oh, I did not close it after the initial double dash -->' );
 		$this->assertFalse( $processor->next_token(), 'Did not reject a malformed XML comment.' );
 	}
 
@@ -1211,7 +1215,7 @@ public function test_rejects_malformed_comments() {
 	 * @covers WP_XML_Processor::next_tag
 	 */
 	public function test_handles_malformed_taglike_open_short_xml() {
-		$processor = new WP_XML_Processor( '<' );
+		$processor = WP_XML_Processor::from_string( '<' );
 		$result    = $processor->next_tag();
 		$this->assertFalse( $result, 'Did not handle "<" xml properly.' );
 	}
@@ -1220,7 +1224,7 @@ public function test_handles_malformed_taglike_open_short_xml() {
 	 * @covers WP_XML_Processor::next_tag
 	 */
 	public function test_handles_malformed_taglike_close_short_xml() {
-		$processor = new WP_XML_Processor( '</ ' );
+		$processor = WP_XML_Processor::from_string( '</ ' );
 		$result    = $processor->next_tag();
 		$this->assertFalse( $result, 'Did not handle "</ " xml properly.' );
 	}
@@ -1230,7 +1234,7 @@ public function test_handles_malformed_taglike_close_short_xml() {
 	 * @covers WP_XML_Processor::next_tag
 	 */
 	public function test_rejects_empty_element_that_is_also_a_closer() {
-		$processor = new WP_XML_Processor( '</wp:content/> ' );
+		$processor = WP_XML_Processor::from_string( '</wp:content/> ' );
 		$result    = $processor->next_tag();
 		$this->assertFalse( $result, 'Did not handle "</wp:content/>" xml properly.' );
 	}
@@ -1241,7 +1245,7 @@ public function test_rejects_empty_element_that_is_also_a_closer() {
 	 * @ticket 61365
 	 */
 	public function test_single_text_node_with_taglike_text() {
-		$processor = new WP_XML_Processor( '<root>This is a text node< /A>' );
+		$processor = WP_XML_Processor::from_string( '<root>This is a text node< /A>' );
 		$this->assertTrue( $processor->next_token(), 'A root node was not found.' );
 		$this->assertTrue( $processor->next_token(), 'A valid text node was not found.' );
 		$this->assertEquals( 'This is a text node', $processor->get_modifiable_text(), 'The contents of a valid text node were not correctly captured.' );
@@ -1254,7 +1258,7 @@ public function test_single_text_node_with_taglike_text() {
 	 * @ticket 61365
 	 */
 	public function test_parses_CDATA() {
-		$processor = new WP_XML_Processor( '<root><![CDATA[This is a CDATA text node.]]></root>' );
+		$processor = WP_XML_Processor::from_string( '<root><![CDATA[This is a CDATA text node.]]></root>' );
 		$processor->next_tag();
 		$this->assertTrue( $processor->next_token(), 'The first text node was not found.' );      $this->assertEquals(
 			'This is a CDATA text node.',
@@ -1267,7 +1271,7 @@ public function test_parses_CDATA() {
 	 * @ticket 61365
 	 */
 	public function test_yields_CDATA_a_separate_text_node() {
-		$processor = new WP_XML_Processor( '<root>This is the first text node <![CDATA[ and this is a second text node ]]> and this is the third text node.</root>' );
+		$processor = WP_XML_Processor::from_string( '<root>This is the first text node <![CDATA[ and this is a second text node ]]> and this is the third text node.</root>' );
 
 		$processor->next_token();
 		$this->assertTrue( $processor->next_token(), 'The first text node was not found.' );
@@ -1297,7 +1301,7 @@ public function test_yields_CDATA_a_separate_text_node() {
 	 * @ticket 61365
 	 */
 	public function test_xml_declaration() {
-		$processor = new WP_XML_Processor( '<?xml version="1.0" encoding="UTF-8" ?>' );
+		$processor = WP_XML_Processor::from_string( '<?xml version="1.0" encoding="UTF-8" ?>' );
 		$this->assertTrue( $processor->next_token(), 'The XML declaration was not found.' );
 		$this->assertEquals(
 			'#xml-declaration',
@@ -1313,7 +1317,7 @@ public function test_xml_declaration() {
 	 * @ticket 61365
 	 */
 	public function test_xml_declaration_with_single_quotes() {
-		$processor = new WP_XML_Processor( "<?xml version='1.0' encoding='UTF-8' ?>" );
+		$processor = WP_XML_Processor::from_string( "<?xml version='1.0' encoding='UTF-8' ?>" );
 		$this->assertTrue( $processor->next_token(), 'The XML declaration was not found.' );
 		$this->assertEquals(
 			'#xml-declaration',
@@ -1329,7 +1333,7 @@ public function test_xml_declaration_with_single_quotes() {
 	 * @ticket 61365
 	 */
 	public function test_processor_instructions() {
-		$processor = new WP_XML_Processor(
+		$processor = WP_XML_Processor::from_string(
 			// The first <?xml tag is an xml declaration.
 			'<?xml version="1.0" encoding="UTF-8" ?>' .
 			// The second <?xml tag is a processing instruction.
@@ -1355,6 +1359,10 @@ public function test_applies_updates_before_proceeding() {
 		$xml = '<root><wp:content><photo/></wp:content><wp:content><photo/></wp:content></root>';
 
 		$subclass = new class( $xml ) extends WP_XML_Processor {
+			public function __construct( $xml ) {
+				parent::__construct( $xml );
+			}
+
 			/**
 			 * Inserts raw text after the current token.
 			 *
@@ -1398,7 +1406,7 @@ public function insert_after( $new_xml ) {
 	 * @covers WP_XML_Processor::get_breadcrumbs
 	 */
 	public function test_get_breadcrumbs() {
-		$processor = new WP_XML_Processor(
+		$processor = WP_XML_Processor::from_string(
 			'<wp:content>
 				<wp:text>
 					<photo />
@@ -1436,7 +1444,7 @@ public function test_get_breadcrumbs() {
 	 */
 	public function test_matches_breadcrumbs() {
 		// Initialize the WP_XML_Processor with the given XML string
-		$processor = new WP_XML_Processor( '<root><wp:post><content><image /></content></wp:post></root>' );
+		$processor = WP_XML_Processor::from_string( '<root><wp:post><content><image /></content></wp:post></root>' );
 
 		// Move to the next element with tag name 'img'
 		$processor->next_tag( 'image' );
@@ -1455,7 +1463,7 @@ public function test_matches_breadcrumbs() {
 	 */
 	public function test_next_tag_by_breadcrumbs() {
 		// Initialize the WP_XML_Processor with the given XML string
-		$processor = new WP_XML_Processor( '<root><wp:post><content><image /></content></wp:post></root>' );
+		$processor = WP_XML_Processor::from_string( '<root><wp:post><content><image /></content></wp:post></root>' );
 
 		// Move to the next element with tag name 'img'
 		$processor->next_tag(
@@ -1474,7 +1482,7 @@ public function test_next_tag_by_breadcrumbs() {
 	 */
 	public function test_get_current_depth() {
 		// Initialize the WP_XML_Processor with the given XML string
-		$processor = new WP_XML_Processor( '<?xml version="1.0" ?><root><wp:text><post /></wp:text><image /></root>' );
+		$processor = WP_XML_Processor::from_string( '<?xml version="1.0" ?><root><wp:text><post /></wp:text><image /></root>' );
 
 		// Assert that the initial depth is 0
 		$this->assertEquals( 0, $processor->get_current_depth() );
@@ -1506,7 +1514,7 @@ public function test_get_current_depth() {
 	 * @expectedIncorrectUsage WP_XML_Processor::step_in_misc
 	 */
 	public function test_no_text_allowed_after_root_element() {
-		$processor = new WP_XML_Processor( '<root></root>text' );
+		$processor = WP_XML_Processor::from_string( '<root></root>text' );
 		$this->assertTrue( $processor->next_tag(), 'Did not find a tag.' );
 		$this->assertFalse( $processor->next_tag(), 'Found a non-existent tag.' );
 		$this->assertEquals(
@@ -1520,7 +1528,7 @@ public function test_no_text_allowed_after_root_element() {
 	 * @ticket 61365
 	 */
 	public function test_whitespace_text_allowed_after_root_element() {
-		$processor = new WP_XML_Processor( '<root></root>   ' );
+		$processor = WP_XML_Processor::from_string( '<root></root>   ' );
 		$this->assertTrue( $processor->next_tag(), 'Did not find a tag.' );
 		$this->assertFalse( $processor->next_tag(), 'Found a non-existent tag.' );
 		$this->assertNull( $processor->get_last_error(), 'Ran into a parse error after the root element' );
@@ -1530,7 +1538,7 @@ public function test_whitespace_text_allowed_after_root_element() {
 	 * @ticket 61365
 	 */
 	public function test_processing_directives_allowed_after_root_element() {
-		$processor = new WP_XML_Processor( '<root></root><?xml processing directive! ?>' );
+		$processor = WP_XML_Processor::from_string( '<root></root><?xml processing directive! ?>' );
 		$this->assertTrue( $processor->next_tag(), 'Did not find a tag.' );
 		$this->assertFalse( $processor->next_tag(), 'Found a non-existent tag.' );
 		$this->assertNull( $processor->get_last_error(), 'Ran into a parse error after the root element' );
@@ -1540,7 +1548,7 @@ public function test_processing_directives_allowed_after_root_element() {
 	 * @ticket 61365
 	 */
 	public function test_mixed_misc_grammar_allowed_after_root_element() {
-		$processor = new WP_XML_Processor( '<root></root>   <?xml hey ?> <!-- comment --> <?xml another pi ?> <!-- more comments! -->' );
+		$processor = WP_XML_Processor::from_string( '<root></root>   <?xml hey ?> <!-- comment --> <?xml another pi ?> <!-- more comments! -->' );
 
 		$processor->next_tag();
 		$this->assertEquals( 'root', $processor->get_tag(), 'Did not find a tag.' );
@@ -1555,7 +1563,7 @@ public function test_mixed_misc_grammar_allowed_after_root_element() {
 	 * @expectedIncorrectUsage WP_XML_Processor::step_in_misc
 	 */
 	public function test_elements_not_allowed_after_root_element() {
-		$processor = new WP_XML_Processor( '<root></root><another-root>' );
+		$processor = WP_XML_Processor::from_string( '<root></root><another-root>' );
 		$this->assertTrue( $processor->next_tag(), 'Did not find a tag.' );
 		$this->assertFalse( $processor->next_tag(), 'Fount an illegal tag.' );
 		$this->assertEquals(
@@ -1571,7 +1579,7 @@ public function test_elements_not_allowed_after_root_element() {
 	 * @return void
 	 */
 	public function test_comments_allowed_after_root_element() {
-		$processor = new WP_XML_Processor( '<root></root><!-- comment -->' );
+		$processor = WP_XML_Processor::from_string( '<root></root><!-- comment -->' );
 		$this->assertTrue( $processor->next_tag(), 'Did not find a tag.' );
 		$this->assertFalse( $processor->next_tag(), 'Found an element node after the root element' );
 		$this->assertNull( $processor->get_last_error(), 'Ran into a parse error after the root element' );
@@ -1584,7 +1592,7 @@ public function test_comments_allowed_after_root_element() {
 	 * @return void
 	 */
 	public function test_cdata_not_allowed_after_root_element() {
-		$processor = new WP_XML_Processor( '<root></root><![CDATA[ cdata ]]>' );
+		$processor = WP_XML_Processor::from_string( '<root></root><![CDATA[ cdata ]]>' );
 		$this->assertTrue( $processor->next_tag(), 'Did not find a tag.' );
 		$this->assertFalse( $processor->next_tag(), 'Did not reject a comment node after the root element' );
 		$this->assertEquals(
@@ -1600,7 +1608,7 @@ public function test_cdata_not_allowed_after_root_element() {
 	 * @covers WP_XML_Processor::next_tag
 	 */
 	public function test_detects_invalid_document_no_root_tag() {
-		$processor = new WP_XML_Processor(
+		$processor = WP_XML_Processor::from_stream(
 			'<?xml version="1.0" encoding="UTF-8" ?>
 			 <!-- comment no root tag -->'
 		);
@@ -1614,7 +1622,7 @@ public function test_detects_invalid_document_no_root_tag() {
 	 * @covers WP_XML_Processor::next_tag
 	 */
 	public function test_unclosed_root_yields_incomplete_input() {
-		$processor = new WP_XML_Processor(
+		$processor = WP_XML_Processor::from_stream(
 			'<root inert="yes" title="test">
 				<child></child>
 				<?xml directive ?>

From 2480acd3e22e838ebaa6ae0ebb9e8b165799e515 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= <adam@adamziel.com>
Date: Tue, 29 Oct 2024 18:49:31 +0100
Subject: [PATCH 3/6] Only enter incomplete_input state when streaming

---
 .../src/xml-api/WP_XML_Processor.php          | 53 +++++++++++--------
 .../tests/WPXMLProcessorTests.php             | 48 ++++++++++++++++-
 2 files changed, 77 insertions(+), 24 deletions(-)

diff --git a/packages/playground/data-liberation/src/xml-api/WP_XML_Processor.php b/packages/playground/data-liberation/src/xml-api/WP_XML_Processor.php
index 2efeb73893..c335954b46 100644
--- a/packages/playground/data-liberation/src/xml-api/WP_XML_Processor.php
+++ b/packages/playground/data-liberation/src/xml-api/WP_XML_Processor.php
@@ -21,11 +21,9 @@
  * initial processing instruction (<?xml version="1.0" ?>). We're
  * starting with 1.0, however, because most that's what most WXR
  * files declare.
- *
- * @TODO: Add input_finished() or similar method to tell the processor there 
- *        will not be any more input. This will enable distinguishing
- *        between incomplete tokens and syntax errors and will make
- *        the step() logic more reliable.
+ * 
+ * @TODO: Track specific error states, expose informative messages, line
+ *        numbers, indexes, and other debugging info.
  *
  * @TODO: Skip over the following syntax elements:
  *        * <!DOCTYPE, see https://www.w3.org/TR/xml/#sec-prolog-dtd
@@ -872,7 +870,7 @@ protected function base_class_next_token() {
 			$this->bytes_already_parsed >= strlen( $this->xml )
 		) {
 			// Does this appropriately clear state (parsed attributes)?
-			$this->parser_state         = self::STATE_INCOMPLETE_INPUT;
+			$this->set_incomplete_input_or_parse_error();
 			$this->bytes_already_parsed = $was_at;
 
 			return false;
@@ -880,7 +878,7 @@ protected function base_class_next_token() {
 
 		$tag_ends_at = strpos( $this->xml, '>', $this->bytes_already_parsed );
 		if ( false === $tag_ends_at ) {
-			$this->parser_state         = self::STATE_INCOMPLETE_INPUT;
+			$this->set_incomplete_input_or_parse_error();
 			$this->bytes_already_parsed = $was_at;
 
 			return false;
@@ -923,7 +921,7 @@ protected function base_class_next_token() {
 
 		// Closer not found, the document is incomplete.
 		if ( false === $found_closer ) {
-			$this->parser_state         = self::STATE_INCOMPLETE_INPUT;
+			$this->set_incomplete_input_or_parse_error();
 			$this->bytes_already_parsed = $was_at;
 			return false;
 		}
@@ -1392,7 +1390,7 @@ private function parse_next_tag() {
 			}
 
 			if ( $at + 1 >= $doc_length ) {
-				$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+				$this->set_incomplete_input_or_parse_error();
 				return false;
 			}
 
@@ -1421,7 +1419,7 @@ private function parse_next_tag() {
 			 * the document. There is nothing left to parse.
 			 */
 			if ( $at + 1 >= $doc_length ) {
-				$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+				$this->set_incomplete_input_or_parse_error();
 
 				return false;
 			}
@@ -1442,7 +1440,7 @@ private function parse_next_tag() {
 					$closer_at = $at + 4;
 					// If it's not possible to close the comment then there is nothing more to scan.
 					if ( $doc_length <= $closer_at ) {
-						$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+						$this->set_incomplete_input_or_parse_error();
 
 						return false;
 					}
@@ -1454,7 +1452,7 @@ private function parse_next_tag() {
 					while ( ++$closer_at < $doc_length ) {
 						$closer_at = strpos( $xml, '--', $closer_at );
 						if ( false === $closer_at || $closer_at + 2 === $doc_length ) {
-							$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+							$this->set_incomplete_input_or_parse_error();
 							return false;
 						}
 
@@ -1504,7 +1502,7 @@ private function parse_next_tag() {
 				) {
 					$closer_at = strpos( $xml, ']]>', $at + 1 );
 					if ( false === $closer_at ) {
-						$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+						$this->set_incomplete_input_or_parse_error();
 
 						return false;
 					}
@@ -1521,7 +1519,7 @@ private function parse_next_tag() {
 				 * Anything else here is either unsupported at this point or invalid
 				 * syntax. See the class-level @TODO annotations for more information.
 				 */
-				$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+				$this->set_incomplete_input_or_parse_error();
 
 				return false;
 			}
@@ -1660,7 +1658,7 @@ private function parse_next_tag() {
 				'?' === $xml[ $at + 1 ]
 			) {
 				if ( $at + 4 >= $doc_length ) {
-					$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+					$this->set_incomplete_input_or_parse_error();
 
 					return false;
 				}
@@ -1697,7 +1695,7 @@ private function parse_next_tag() {
 				 */
 				$closer_at = strpos( $xml, '?>', $at );
 				if ( false === $closer_at ) {
-					$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+					$this->set_incomplete_input_or_parse_error();
 
 					return false;
 				}
@@ -1730,7 +1728,7 @@ private function parse_next_tag() {
 		 * This does not imply an incomplete parse; it indicates that there
 		 * can be nothing left in the document other than a #text node.
 		 */
-		$this->parser_state            = self::STATE_INCOMPLETE_INPUT;
+		$this->set_incomplete_input_or_parse_error();
 		$this->is_incomplete_text_node = true;
 		$this->text_starts_at          = $was_at;
 		$this->text_length             = $doc_length - $was_at;
@@ -1748,7 +1746,7 @@ private function parse_next_attribute() {
 		// Skip whitespace and slashes.
 		$this->bytes_already_parsed += strspn( $this->xml, " \t\f\r\n/", $this->bytes_already_parsed );
 		if ( $this->bytes_already_parsed >= strlen( $this->xml ) ) {
-			$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+			$this->set_incomplete_input_or_parse_error();
 			return false;
 		}
 
@@ -1775,7 +1773,7 @@ private function parse_next_attribute() {
 		++$this->bytes_already_parsed;
 		$this->skip_whitespace();
 		if ( $this->bytes_already_parsed >= strlen( $this->xml ) ) {
-			$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+			$this->set_incomplete_input_or_parse_error();
 			return false;
 		}
 		switch ( $this->xml[ $this->bytes_already_parsed ] ) {
@@ -1799,7 +1797,7 @@ private function parse_next_attribute() {
 				$attribute_end = $value_start + $value_length + 1;
 
 				if ( $attribute_end - 1 >= strlen( $this->xml ) ) {
-					$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+					$this->set_incomplete_input_or_parse_error();
 
 					return false;
 				}
@@ -1826,7 +1824,7 @@ private function parse_next_attribute() {
 		}
 
 		if ( $attribute_end >= strlen( $this->xml ) ) {
-			$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+			$this->set_incomplete_input_or_parse_error();
 			return false;
 		}
 
@@ -2862,7 +2860,7 @@ private function step_in_prolog( $node_to_process = self::PROCESS_NEXT_NODE ) {
 		// XML requires a root element. If we've reached the end of data in the prolog stage,
 		// before finding a root element, then the document is incomplete.
 		if ( WP_XML_Processor::STATE_COMPLETE === $this->parser_state ) {
-			$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+			$this->set_incomplete_input_or_parse_error();
 			return false;
 		}
 		// Do not step if we paused due to an incomplete input.
@@ -3138,6 +3136,17 @@ private function push_open_element( $tag_name ) {
 		);
 	}
 
+	private function set_incomplete_input_or_parse_error() {
+		if($this->expecting_more_input) {
+			$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+		} else {
+			$this->parser_state = self::STATE_INVALID_DOCUMENT;
+			$this->last_error = self::ERROR_SYNTAX;
+			// @TODO: Add a more specific error message.
+			_doing_it_wrong( __METHOD__, 'Unexpected syntax encountered.', 'WP_VERSION' );
+		}
+	}
+
 	/**
 	 * Parser Ready State.
 	 *
diff --git a/packages/playground/data-liberation/tests/WPXMLProcessorTests.php b/packages/playground/data-liberation/tests/WPXMLProcessorTests.php
index 441aeff1a5..974038ce73 100644
--- a/packages/playground/data-liberation/tests/WPXMLProcessorTests.php
+++ b/packages/playground/data-liberation/tests/WPXMLProcessorTests.php
@@ -1080,7 +1080,7 @@ public function test_setting_a_missing_attribute_to_false_does_not_change_the_ma
 	 * @param string $xml_ending_before_comment_close XML with opened comments that aren't closed.
 	 */
 	public function test_documents_may_end_with_unclosed_comment( $xml_ending_before_comment_close ) {
-		$processor = WP_XML_Processor::from_string( $xml_ending_before_comment_close );
+		$processor = WP_XML_Processor::from_stream( $xml_ending_before_comment_close );
 
 		$this->assertFalse(
 			$processor->next_tag(),
@@ -1105,6 +1105,50 @@ public static function data_xml_with_unclosed_comments() {
 		);
 	}
 
+	/**
+	 * Ensures that partial syntax triggers warnings or errors.
+	 *
+	 * @ticket 61365
+	 *
+	 * @covers WP_XML_Processor::next_tag
+	 * @covers WP_XML_Processor::paused_at_incomplete_token
+	 *
+	 * @dataProvider data_partial_syntax
+	 *
+	 * @param string $xml_ending_before_comment_close XML with partial syntax.
+	 */
+	public function test_partial_syntax_triggers_parse_error_when_streaming_is_not_used( $xml_ending_before_comment_close ) {
+		$processor = WP_XML_Processor::from_string( $xml_ending_before_comment_close );
+
+		$this->assertFalse(
+			$processor->next_tag(),
+			"Should not have found any tag, but found {$processor->get_tag()}."
+		);
+
+		$this->assertFalse(
+			$processor->is_paused_at_incomplete_input(),
+			"Should not have indicated that the parser found an incomplete token but it did."
+		);
+
+		$this->assertNotEmpty(
+			$processor->get_last_error(),
+			"Should have errors but didn't."
+		);
+	}
+
+	/**
+	 * Data provider.
+	 *
+	 * @return array[]
+	 */
+	public static function data_partial_syntax() {
+		return array(
+			'Incomplete tag name' => array( '<swit' ),
+			'Shortest open valid comment' => array( '<!--' ),
+			'Basic truncated comment'     => array( '<!-- this ends --' ),
+		);
+	}
+
 	/**
 	 * Ensures that the processor doesn't attempt to match an incomplete token.
 	 *
@@ -1118,7 +1162,7 @@ public static function data_xml_with_unclosed_comments() {
 	 * @param string $incomplete_xml XML text containing some kind of incomplete syntax.
 	 */
 	public function test_next_tag_returns_false_for_incomplete_syntax_elements( $incomplete_xml ) {
-		$processor = WP_XML_Processor::from_string( $incomplete_xml );
+		$processor = WP_XML_Processor::from_stream( $incomplete_xml );
 
 		$processor->next_tag();
 		$this->assertFalse(

From bb865d3dd0e0502e15ca8b25f524f363c62a14b9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= <adam@adamziel.com>
Date: Tue, 29 Oct 2024 21:13:36 +0100
Subject: [PATCH 4/6] =?UTF-8?q?Fix=20XML=20parsing=20bugs=20=E2=80=93=20mo?=
 =?UTF-8?q?st=20prominently,=20token=5Fstarts=5Fat=20variable=20getting=20?=
 =?UTF-8?q?out=20of=20sync=20with=20text=5Fstarts=5Fat?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../playground/data-liberation/bootstrap.php  |   1 -
 .../src/WP_WXR_URL_Rewrite_Processor.php      |   4 +-
 .../src/xml-api/WP_XML_Processor.php          | 161 ++++++++----------
 .../tests/WPWXRURLRewriterTests.php           |   2 +-
 .../tests/fixtures/wxr-simple.xml             |   3 +-
 5 files changed, 73 insertions(+), 98 deletions(-)

diff --git a/packages/playground/data-liberation/bootstrap.php b/packages/playground/data-liberation/bootstrap.php
index 9c15be7f13..cc1ce63fd7 100644
--- a/packages/playground/data-liberation/bootstrap.php
+++ b/packages/playground/data-liberation/bootstrap.php
@@ -38,7 +38,6 @@
 
 // Polyfill WordPress core functions
 function _doing_it_wrong($method, $message, $version) {
-	// var_dump($method, $message, $version);
 }
 
 function __($input) {
diff --git a/packages/playground/data-liberation/src/WP_WXR_URL_Rewrite_Processor.php b/packages/playground/data-liberation/src/WP_WXR_URL_Rewrite_Processor.php
index 34caa67513..2b6bb5b343 100644
--- a/packages/playground/data-liberation/src/WP_WXR_URL_Rewrite_Processor.php
+++ b/packages/playground/data-liberation/src/WP_WXR_URL_Rewrite_Processor.php
@@ -3,8 +3,8 @@
 class WP_WXR_URL_Rewrite_Processor {
 
 
-	public static function stream( $current_site_url, $new_site_url ) {
-		return WP_XML_Processor::stream(
+	public static function create_stream_processor( $current_site_url, $new_site_url ) {
+		return WP_XML_Processor::create_stream_processor(
 			function ( $processor ) use ( $current_site_url, $new_site_url ) {
 				if ( static::is_wxr_content_node( $processor ) ) {
 					$text         = $processor->get_modifiable_text();
diff --git a/packages/playground/data-liberation/src/xml-api/WP_XML_Processor.php b/packages/playground/data-liberation/src/xml-api/WP_XML_Processor.php
index c335954b46..36990b805b 100644
--- a/packages/playground/data-liberation/src/xml-api/WP_XML_Processor.php
+++ b/packages/playground/data-liberation/src/xml-api/WP_XML_Processor.php
@@ -371,22 +371,6 @@ class WP_XML_Processor {
 	 */
 	protected $parser_state = self::STATE_READY;
 
-	/**
-	 * Whether we stopped at an incomplete text node.
-	 *
-	 * If we are before the last tag in the document, every text
-	 * node is incomplete until we find the next tag. However,
-	 * if we are after the last tag, an incomplete all-whitespace
-	 * node may either mean we're the end of the document or
-	 * that we're still waiting for more data/
-	 *
-	 * This flag allows us to differentiate between these two
-	 * cases in context-aware APIs such as WP_XML_Processor.
-	 *
-	 * @var bool
-	 */
-	protected $is_incomplete_text_node = false;
-
 	/**
 	 * Whether the input has been finished.
 	 *
@@ -672,12 +656,11 @@ protected function __construct( $xml ) {
 		$this->xml = $xml;
 	}
 
-	public static function stream( $node_visitor_callback ) {
-		$xml_processor = new WP_XML_Processor( '', array(), WP_XML_Processor::IN_PROLOG_CONTEXT );
+	public static function create_stream_processor( $node_visitor_callback ) {
+		$xml_processor = WP_XML_Processor::from_stream( '' );
 		return new ProcessorByteStream(
 			$xml_processor,
 			function ( $state ) use ( $xml_processor, $node_visitor_callback ) {
-				$buffer = $xml_processor->flush_processed_xml();
 
 				$new_bytes = $state->consume_input_bytes();
 				if ( null !== $new_bytes ) {
@@ -689,6 +672,7 @@ function ( $state ) use ( $xml_processor, $node_visitor_callback ) {
 					$node_visitor_callback( $xml_processor );
 				}
 
+				$buffer = '';
 				if ( $tokens_found > 0 ) {
 					$buffer .= $xml_processor->flush_processed_xml();
 				} elseif (
@@ -696,8 +680,6 @@ function ( $state ) use ( $xml_processor, $node_visitor_callback ) {
 					! $xml_processor->is_paused_at_incomplete_input() &&
 					$xml_processor->get_current_depth() === 0
 				) {
-					// We've reached the end of the document, let's finish up.
-					// @TODO: Fix this so it doesn't return the entire XML
 					$buffer .= $xml_processor->flush_processed_xml();
 					$buffer .= $xml_processor->get_updated_xml();
 					$state->finish();
@@ -713,6 +695,9 @@ function ( $state ) use ( $xml_processor, $node_visitor_callback ) {
 		);
 	}
 
+	/*
+	@TODO: implement these methods for re-entrancy
+
 	public function pause() {
 		return array(
 			'xml' => $this->xml,
@@ -729,8 +714,9 @@ public function resume( $paused ) {
 		$this->stack_of_open_elements = $paused['stack_of_open_elements'];
 		$this->parser_context         = $paused['parser_context'];
 		$this->bytes_already_parsed   = $paused['bytes_already_parsed'];
-		$this->base_class_next_token();
+		$this->parse_next_token();
 	}
+	*/
 
 	/**
 	 * Wipes out the processed XML and appends the next chunk of XML to
@@ -739,8 +725,7 @@ public function resume( $paused ) {
 	 * @param string $next_chunk XML to append.
 	 */
 	public function append_bytes( string $next_chunk ) {
-		$this->xml .= $next_chunk;
-		if ( $this->expecting_more_input ) {
+		if ( ! $this->expecting_more_input ) {
 			_doing_it_wrong(
 				__METHOD__,
 				__( 'Cannot append bytes after the last input chunk was provided and input_finished() was called.' ),
@@ -748,6 +733,11 @@ public function append_bytes( string $next_chunk ) {
 			);
 			return false;
 		}
+		$this->xml .= $next_chunk;
+		$this->had_previous_chunks = true;
+		if($this->parser_state === self::STATE_INCOMPLETE_INPUT) {
+			$this->parser_state = self::STATE_READY;
+		}
 		return true;
 	}
 
@@ -796,7 +786,7 @@ public function flush_processed_xml() {
 	 *
 	 * @return bool Whether a token was parsed.
 	 */
-	protected function base_class_next_token() {
+	protected function parse_next_token() {
 		$was_at = $this->bytes_already_parsed;
 		$this->after_tag();
 
@@ -1729,7 +1719,8 @@ private function parse_next_tag() {
 		 * can be nothing left in the document other than a #text node.
 		 */
 		$this->set_incomplete_input_or_parse_error();
-		$this->is_incomplete_text_node = true;
+		$this->token_starts_at         = $was_at;
+		$this->token_length            = $doc_length - $was_at;
 		$this->text_starts_at          = $was_at;
 		$this->text_length             = $doc_length - $was_at;
 		return false;
@@ -1932,13 +1923,12 @@ private function after_tag() {
 			unset( $this->lexical_updates[ $name ] );
 		}
 
-		$this->is_incomplete_text_node = false;
 		$this->token_starts_at         = null;
 		$this->token_length            = null;
 		$this->tag_name_starts_at      = null;
 		$this->tag_name_length         = null;
-		$this->text_starts_at          = 0;
-		$this->text_length             = 0;
+		$this->text_starts_at          = null;
+		$this->text_length             = null;
 		$this->is_closing_tag          = null;
 		$this->attributes              = array();
 	}
@@ -1946,14 +1936,13 @@ private function after_tag() {
 	protected function reset_state() {
 		$this->xml                     = '';
 		$this->parser_state            = self::STATE_READY;
-		$this->is_incomplete_text_node = false;
 		$this->bytes_already_parsed    = 0;
 		$this->token_starts_at         = null;
 		$this->token_length            = null;
 		$this->tag_name_starts_at      = null;
 		$this->tag_name_length         = null;
-		$this->text_starts_at          = 0;
-		$this->text_length             = 0;
+		$this->text_starts_at          = null;
+		$this->text_length             = null;
 		$this->is_closing_tag          = null;
 		$this->last_error              = null;
 		$this->attributes              = array();
@@ -1964,14 +1953,14 @@ protected function reset_state() {
 	}
 
 	/**
-	 * Applies attribute updates to XML document.
+	 * Applies lexical updates to XML document.
 	 *
 	 * @since WP_VERSION
 	 *
 	 * @param int $shift_this_point Accumulate and return shift for this position.
 	 * @return int How many bytes the given pointer moved in response to the updates.
 	 */
-	private function apply_attributes_updates( $shift_this_point = 0 ) {
+	private function apply_lexical_updates( $shift_this_point = 0 ) {
 		if ( ! count( $this->lexical_updates ) ) {
 			return 0;
 		}
@@ -2112,7 +2101,7 @@ public function seek( $bookmark_name ) {
 		// Point this tag processor before the sought tag opener and consume it.
 		$this->bytes_already_parsed = $this->bookmarks[ $bookmark_name ]->start;
 		$this->parser_state         = self::STATE_READY;
-		return $this->base_class_next_token();
+		return $this->parse_next_token();
 	}
 
 	/**
@@ -2722,15 +2711,15 @@ public function get_updated_xml() {
 		}
 
 		/*
-		 * Keep track of the position right before the current tag. This will
-		 * be necessary for reparsing the current tag after updating the XML.
+		 * Keep track of the position right before the current token. This will
+		 * be necessary for reparsing the current token after updating the XML.
 		 */
-		$before_current_tag = $this->token_starts_at;
+		$before_current_token = $this->token_starts_at ?? 0;
 
 		/*
 		 * 1. Apply the enqueued edits and update all the pointers to reflect those changes.
 		 */
-		$before_current_tag += $this->apply_attributes_updates( $before_current_tag );
+		$before_current_token += $this->apply_lexical_updates( $before_current_token );
 
 		/*
 		 * 2. Rewind to before the current tag and reparse to get updated attributes.
@@ -2752,12 +2741,40 @@ public function get_updated_xml() {
 		 *                 ↑  │ back up by the length of the tag name plus the opening <
 		 *                 └←─┘ back up by strlen("em") + 1 ==> 3
 		 */
-		$this->bytes_already_parsed = $before_current_tag;
-		$this->base_class_next_token();
+		$this->bytes_already_parsed = $before_current_token;
+		$this->parse_next_token();
 
 		return $this->xml;
 	}
 
+	/**
+	 * Finds the next token in the XML document.
+	 *
+	 * An XML document can be viewed as a stream of tokens,
+	 * where tokens are things like XML tags, XML comments,
+	 * text nodes, etc. This method finds the next token in
+	 * the XML document and returns whether it found one.
+	 *
+	 * If it starts parsing a token and reaches the end of the
+	 * document then it will seek to the start of the last
+	 * token and pause, returning `false` to indicate that it
+	 * failed to find a complete token.
+	 *
+	 * Possible token types, based on the XML specification:
+	 *
+	 *  - an XML tag
+	 *  - a text node - the plaintext inside tags.
+	 *  - a CData section
+	 *  - an XML comment.
+	 *  - a DOCTYPE declaration.
+	 *  - a processing instruction, e.g. `<?xml mode="WordPress" ?>`.
+	 *
+	 * @return bool Whether a token was parsed.
+	 */
+	public function next_token() {
+		return $this->step();
+	}
+
 	/**
 	 * Moves the internal cursor to the next token in the XML document
 	 * according to the XML specification.
@@ -2806,34 +2823,6 @@ private function step( $node_to_process = self::PROCESS_NEXT_NODE ) {
 		}
 	}
 
-	/**
-	 * Finds the next token in the XML document.
-	 *
-	 * An XML document can be viewed as a stream of tokens,
-	 * where tokens are things like XML tags, XML comments,
-	 * text nodes, etc. This method finds the next token in
-	 * the XML document and returns whether it found one.
-	 *
-	 * If it starts parsing a token and reaches the end of the
-	 * document then it will seek to the start of the last
-	 * token and pause, returning `false` to indicate that it
-	 * failed to find a complete token.
-	 *
-	 * Possible token types, based on the XML specification:
-	 *
-	 *  - an XML tag
-	 *  - a text node - the plaintext inside tags.
-	 *  - a CData section
-	 *  - an XML comment.
-	 *  - a DOCTYPE declaration.
-	 *  - a processing instruction, e.g. `<?xml mode="WordPress" ?>`.
-	 *
-	 * @return bool Whether a token was parsed.
-	 */
-	public function next_token( $node_to_process = self::PROCESS_NEXT_NODE ) {
-		return $this->step( $node_to_process );
-	}
-
 	/**
 	 * Parses the next node in the 'prolog' part of the XML document.
 	 *
@@ -2846,7 +2835,7 @@ public function next_token( $node_to_process = self::PROCESS_NEXT_NODE ) {
 	 */
 	private function step_in_prolog( $node_to_process = self::PROCESS_NEXT_NODE ) {
 		if ( self::PROCESS_NEXT_NODE === $node_to_process ) {
-			$has_next_node = $this->base_class_next_token();
+			$has_next_node = $this->parse_next_token();
 			if (
 				false === $has_next_node &&
 				! $this->expecting_more_input
@@ -2903,7 +2892,7 @@ private function step_in_prolog( $node_to_process = self::PROCESS_NEXT_NODE ) {
 	 */
 	private function step_in_element( $node_to_process = self::PROCESS_NEXT_NODE ) {
 		if ( self::PROCESS_NEXT_NODE === $node_to_process ) {
-			$has_next_node = $this->base_class_next_token();
+			$has_next_node = $this->parse_next_token();
 			if (
 				false === $has_next_node &&
 				! $this->expecting_more_input
@@ -2965,7 +2954,7 @@ private function step_in_element( $node_to_process = self::PROCESS_NEXT_NODE ) {
 	 */
 	private function step_in_misc( $node_to_process = self::PROCESS_NEXT_NODE ) {
 		if ( self::PROCESS_NEXT_NODE === $node_to_process ) {
-			$has_next_node = $this->base_class_next_token();
+			$has_next_node = $this->parse_next_token();
 			if (
 				false === $has_next_node &&
 				! $this->expecting_more_input
@@ -2976,9 +2965,15 @@ private function step_in_misc( $node_to_process = self::PROCESS_NEXT_NODE ) {
 			}
 		}
 
+		// Do not step if we paused due to an incomplete input.
+		if ( WP_XML_Processor::STATE_INCOMPLETE_INPUT === $this->parser_state ) {
+			return false;
+		}
+
 		if ( self::STATE_COMPLETE === $this->parser_state ) {
 			return true;
 		}
+
 		switch ( $this->get_token_type() ) {
 			case '#comment':
 			case '#processing-instructions':
@@ -2993,26 +2988,6 @@ private function step_in_misc( $node_to_process = self::PROCESS_NEXT_NODE ) {
 				}
 				return $this->step();
 			default:
-				/*
-				 * If we're at the end of the document, we can never be sure
-				 * whether it's complete or are we still waiting for a comment
-				 * or a processing directive. Let's mark the parse as complete
-				 * and let the API consumer decide whether they want to re-parse
-				 * once more data becomes available in.
-				 */
-				if (
-					WP_XML_Processor::STATE_INCOMPLETE_INPUT === $this->parser_state &&
-					$this->is_incomplete_text_node
-				) {
-					$text = $this->get_modifiable_text();
-					// Non-whitespace characters are not allowed after the root element was closed.
-					$contains_only_whitespace = strlen( $text ) === strspn( $text, " \t\n\r" );
-					if ( $contains_only_whitespace ) {
-						$this->parser_state = self::STATE_COMPLETE;
-						return false;
-					}
-				}
-
 				$this->last_error = self::ERROR_SYNTAX;
 				_doing_it_wrong( __METHOD__, 'Unexpected token type "' . $this->get_token_type() . '" in misc stage.', 'WP_VERSION' );
 				return false;
@@ -3343,3 +3318,5 @@ private function set_incomplete_input_or_parse_error() {
 	 */
 	const PROCESS_CURRENT_NODE = 'process-current-node';
 }
+
+
diff --git a/packages/playground/data-liberation/tests/WPWXRURLRewriterTests.php b/packages/playground/data-liberation/tests/WPWXRURLRewriterTests.php
index 3d66d628f7..3323fdf249 100644
--- a/packages/playground/data-liberation/tests/WPWXRURLRewriterTests.php
+++ b/packages/playground/data-liberation/tests/WPWXRURLRewriterTests.php
@@ -11,7 +11,7 @@ public function test_process($fixture_path, $expected_outcome_path) {
         $chain = new WP_Stream_Chain(
             [
                 'file' => new WP_File_Byte_Stream($fixture_path, 100),
-                'wxr' => WP_WXR_URL_Rewrite_Processor::stream(
+                'wxr' => WP_WXR_URL_Rewrite_Processor::create_stream_processor(
                     'https://playground.internal/path',
                     'https://playground.wordpress.net/new-path'
                 ),
diff --git a/packages/playground/data-liberation/tests/fixtures/wxr-simple.xml b/packages/playground/data-liberation/tests/fixtures/wxr-simple.xml
index f7c4b13b07..2edf266984 100644
--- a/packages/playground/data-liberation/tests/fixtures/wxr-simple.xml
+++ b/packages/playground/data-liberation/tests/fixtures/wxr-simple.xml
@@ -92,5 +92,4 @@ https://playground.internal/path-not-taken was the second best choice.
             </wp:postmeta>
         </item>
     </channel>
-</rss>
-	
\ No newline at end of file
+</rss>
\ No newline at end of file

From d6c0b7d32e44494bb6cada028bc200a4763aec87 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= <adam@adamziel.com>
Date: Tue, 29 Oct 2024 21:14:36 +0100
Subject: [PATCH 5/6] Lint

---
 .../src/xml-api/WP_XML_Processor.php          | 80 +++++++++----------
 1 file changed, 39 insertions(+), 41 deletions(-)

diff --git a/packages/playground/data-liberation/src/xml-api/WP_XML_Processor.php b/packages/playground/data-liberation/src/xml-api/WP_XML_Processor.php
index 36990b805b..a7d2987ca9 100644
--- a/packages/playground/data-liberation/src/xml-api/WP_XML_Processor.php
+++ b/packages/playground/data-liberation/src/xml-api/WP_XML_Processor.php
@@ -21,7 +21,7 @@
  * initial processing instruction (<?xml version="1.0" ?>). We're
  * starting with 1.0, however, because most that's what most WXR
  * files declare.
- * 
+ *
  * @TODO: Track specific error states, expose informative messages, line
  *        numbers, indexes, and other debugging info.
  *
@@ -339,7 +339,7 @@ class WP_XML_Processor {
 	 * @var string
 	 */
 	public $xml;
-	
+
 	/**
 	 * Specifies mode of operation of the parser at any given time.
 	 *
@@ -619,7 +619,7 @@ class WP_XML_Processor {
 	public $had_previous_chunks = false;
 
 	/**
-	 * 
+	 *
 	 */
 	public static function from_string( $xml, $known_definite_encoding = 'UTF-8' ) {
 		if ( 'UTF-8' !== $known_definite_encoding ) {
@@ -733,9 +733,9 @@ public function append_bytes( string $next_chunk ) {
 			);
 			return false;
 		}
-		$this->xml .= $next_chunk;
+		$this->xml                .= $next_chunk;
 		$this->had_previous_chunks = true;
-		if($this->parser_state === self::STATE_INCOMPLETE_INPUT) {
+		if ( $this->parser_state === self::STATE_INCOMPLETE_INPUT ) {
 			$this->parser_state = self::STATE_READY;
 		}
 		return true;
@@ -743,7 +743,7 @@ public function append_bytes( string $next_chunk ) {
 
 	/**
 	 * Indicates that all the XML document bytes have been provided.
-	 * 
+	 *
 	 * After calling this method, the processor will emit errors where
 	 * previously it would have entered the STATE_INCOMPLETE_INPUT state.
 	 */
@@ -770,7 +770,7 @@ public function flush_processed_xml() {
 
 		return $processed_xml;
 	}
- 
+
 	/**
 	 * Internal method which finds the next token in the XML document.
 	 *
@@ -1719,10 +1719,10 @@ private function parse_next_tag() {
 		 * can be nothing left in the document other than a #text node.
 		 */
 		$this->set_incomplete_input_or_parse_error();
-		$this->token_starts_at         = $was_at;
-		$this->token_length            = $doc_length - $was_at;
-		$this->text_starts_at          = $was_at;
-		$this->text_length             = $doc_length - $was_at;
+		$this->token_starts_at = $was_at;
+		$this->token_length    = $doc_length - $was_at;
+		$this->text_starts_at  = $was_at;
+		$this->text_length     = $doc_length - $was_at;
 		return false;
 	}
 
@@ -1923,33 +1923,33 @@ private function after_tag() {
 			unset( $this->lexical_updates[ $name ] );
 		}
 
-		$this->token_starts_at         = null;
-		$this->token_length            = null;
-		$this->tag_name_starts_at      = null;
-		$this->tag_name_length         = null;
-		$this->text_starts_at          = null;
-		$this->text_length             = null;
-		$this->is_closing_tag          = null;
-		$this->attributes              = array();
+		$this->token_starts_at    = null;
+		$this->token_length       = null;
+		$this->tag_name_starts_at = null;
+		$this->tag_name_length    = null;
+		$this->text_starts_at     = null;
+		$this->text_length        = null;
+		$this->is_closing_tag     = null;
+		$this->attributes         = array();
 	}
 
 	protected function reset_state() {
-		$this->xml                     = '';
-		$this->parser_state            = self::STATE_READY;
-		$this->bytes_already_parsed    = 0;
-		$this->token_starts_at         = null;
-		$this->token_length            = null;
-		$this->tag_name_starts_at      = null;
-		$this->tag_name_length         = null;
-		$this->text_starts_at          = null;
-		$this->text_length             = null;
-		$this->is_closing_tag          = null;
-		$this->last_error              = null;
-		$this->attributes              = array();
-		$this->bookmarks               = array();
-		$this->lexical_updates         = array();
-		$this->seek_count              = 0;
-		$this->had_previous_chunks     = false;
+		$this->xml                  = '';
+		$this->parser_state         = self::STATE_READY;
+		$this->bytes_already_parsed = 0;
+		$this->token_starts_at      = null;
+		$this->token_length         = null;
+		$this->tag_name_starts_at   = null;
+		$this->tag_name_length      = null;
+		$this->text_starts_at       = null;
+		$this->text_length          = null;
+		$this->is_closing_tag       = null;
+		$this->last_error           = null;
+		$this->attributes           = array();
+		$this->bookmarks            = array();
+		$this->lexical_updates      = array();
+		$this->seek_count           = 0;
+		$this->had_previous_chunks  = false;
 	}
 
 	/**
@@ -2778,7 +2778,7 @@ public function next_token() {
 	/**
 	 * Moves the internal cursor to the next token in the XML document
 	 * according to the XML specification.
-	 * 
+	 *
 	 * It considers the current XML context (prolog, element, or misc)
 	 * and only expects the nodes that are allowed in that context.
 	 *
@@ -2786,7 +2786,7 @@ public function next_token() {
 	 *
 	 * @access private
 	 *
-	 * @param int $node_to_process Whether to process the next node or 
+	 * @param int $node_to_process Whether to process the next node or
 	 *            reprocess the current node, e.g. using another parser context.
 	 * @return bool Whether a token was parsed.
 	 */
@@ -3112,11 +3112,11 @@ private function push_open_element( $tag_name ) {
 	}
 
 	private function set_incomplete_input_or_parse_error() {
-		if($this->expecting_more_input) {
+		if ( $this->expecting_more_input ) {
 			$this->parser_state = self::STATE_INCOMPLETE_INPUT;
 		} else {
 			$this->parser_state = self::STATE_INVALID_DOCUMENT;
-			$this->last_error = self::ERROR_SYNTAX;
+			$this->last_error   = self::ERROR_SYNTAX;
 			// @TODO: Add a more specific error message.
 			_doing_it_wrong( __METHOD__, 'Unexpected syntax encountered.', 'WP_VERSION' );
 		}
@@ -3318,5 +3318,3 @@ private function set_incomplete_input_or_parse_error() {
 	 */
 	const PROCESS_CURRENT_NODE = 'process-current-node';
 }
-
-

From fd3104269d9397714e95e0a736af8e1966be6807 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= <adam@adamziel.com>
Date: Tue, 29 Oct 2024 21:24:24 +0100
Subject: [PATCH 6/6] Add todos

---
 .../data-liberation/src/stream-api/WP_File_Byte_Stream.php   | 1 +
 .../data-liberation/src/stream-api/WP_Stream_Chain.php       | 5 +++++
 .../data-liberation/src/xml-api/WP_XML_Processor.php         | 1 -
 3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/packages/playground/data-liberation/src/stream-api/WP_File_Byte_Stream.php b/packages/playground/data-liberation/src/stream-api/WP_File_Byte_Stream.php
index 284e7c37a5..b2710f84ec 100644
--- a/packages/playground/data-liberation/src/stream-api/WP_File_Byte_Stream.php
+++ b/packages/playground/data-liberation/src/stream-api/WP_File_Byte_Stream.php
@@ -11,6 +11,7 @@ public function __construct( $file_path, $chunk_size = 8096 ) {
 		$this->file_path  = $file_path;
 		$this->chunk_size = $chunk_size;
 		parent::__construct();
+		$this->append_eof();
 	}
 
 	public function pause() {
diff --git a/packages/playground/data-liberation/src/stream-api/WP_Stream_Chain.php b/packages/playground/data-liberation/src/stream-api/WP_Stream_Chain.php
index e128471a75..b6a71cfab9 100644
--- a/packages/playground/data-liberation/src/stream-api/WP_Stream_Chain.php
+++ b/packages/playground/data-liberation/src/stream-api/WP_Stream_Chain.php
@@ -16,6 +16,11 @@
  * Consult it for reasoning and usage examples:
  *
  * https://github.com/adamziel/wxr-normalize/pull/1
+ *
+ * @TODO: Allow each stream to indicate its output reached EOF
+ *        and propagate that information downstream. Otherwise,
+ *        WP_XML_Processor will always end in an "incomplete input"
+ *        state.
  */
 class WP_Stream_Chain extends WP_Byte_Stream implements ArrayAccess, Iterator {
 	private $first_stream;
diff --git a/packages/playground/data-liberation/src/xml-api/WP_XML_Processor.php b/packages/playground/data-liberation/src/xml-api/WP_XML_Processor.php
index a7d2987ca9..237419866e 100644
--- a/packages/playground/data-liberation/src/xml-api/WP_XML_Processor.php
+++ b/packages/playground/data-liberation/src/xml-api/WP_XML_Processor.php
@@ -661,7 +661,6 @@ public static function create_stream_processor( $node_visitor_callback ) {
 		return new ProcessorByteStream(
 			$xml_processor,
 			function ( $state ) use ( $xml_processor, $node_visitor_callback ) {
-
 				$new_bytes = $state->consume_input_bytes();
 				if ( null !== $new_bytes ) {
 					$xml_processor->append_bytes( $new_bytes );