From e7f096e7b73b0ef34cf7bacfca1f408a9247fb6b Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Tue, 7 Feb 2023 02:30:08 -0700 Subject: [PATCH] HTML API: Move into 6.2 Compat Folder since inclusion in Core (#47749) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * HTML API: Move into 6.2 Compat Folder since inclusion in Core Since the merge of the Tag Processor into Core in [55203][55203] it's no longer needed in Gutenberg in the experimental directory. In this patch we're moving a version of the Tag Processor which was updated during the Core merge into the `lib/compat` directory so that we can continue to provide it where necessary while avoiding conflicts that might otherwise arise by leaving it where it was. [55203]: https://core.trac.wordpress.org/changeset/55203 * Trying to relax a PHP linting rule `WordPress.WP.I18n.MissingArgDomainDefault` should not trigger for code that polyfills features present in WordPress core. --------- Co-authored-by: Greg Ziółkowski --- .../class-wp-html-attribute-token.php | 2 +- .../html-api}/class-wp-html-span.php | 2 +- .../html-api}/class-wp-html-tag-processor.php | 920 +++++--- .../class-wp-html-text-replacement.php | 2 +- lib/experimental/html/wp-html.php | 23 - lib/load.php | 10 +- phpcs.xml.dist | 1 + .../wp-html-tag-processor-bookmark-test.php | 369 ---- phpunit/html/wp-html-tag-processor-test.php | 1908 ----------------- 9 files changed, 579 insertions(+), 2658 deletions(-) rename lib/{experimental/html => compat/wordpress-6.2/html-api}/class-wp-html-attribute-token.php (98%) rename lib/{experimental/html => compat/wordpress-6.2/html-api}/class-wp-html-span.php (97%) rename lib/{experimental/html => compat/wordpress-6.2/html-api}/class-wp-html-tag-processor.php (60%) rename lib/{experimental/html => compat/wordpress-6.2/html-api}/class-wp-html-text-replacement.php (98%) delete mode 100644 lib/experimental/html/wp-html.php delete mode 100644 phpunit/html/wp-html-tag-processor-bookmark-test.php delete mode 100644 phpunit/html/wp-html-tag-processor-test.php diff --git a/lib/experimental/html/class-wp-html-attribute-token.php b/lib/compat/wordpress-6.2/html-api/class-wp-html-attribute-token.php similarity index 98% rename from lib/experimental/html/class-wp-html-attribute-token.php rename to lib/compat/wordpress-6.2/html-api/class-wp-html-attribute-token.php index 7b3d5718723581..2c52164a979f02 100644 --- a/lib/experimental/html/class-wp-html-attribute-token.php +++ b/lib/compat/wordpress-6.2/html-api/class-wp-html-attribute-token.php @@ -3,7 +3,7 @@ * HTML Tag Processor: Attribute token structure class. * * @package WordPress - * @subpackage HTML + * @subpackage HTML-API * @since 6.2.0 */ diff --git a/lib/experimental/html/class-wp-html-span.php b/lib/compat/wordpress-6.2/html-api/class-wp-html-span.php similarity index 97% rename from lib/experimental/html/class-wp-html-span.php rename to lib/compat/wordpress-6.2/html-api/class-wp-html-span.php index 39e603662b17b9..d92778cd3a2223 100644 --- a/lib/experimental/html/class-wp-html-span.php +++ b/lib/compat/wordpress-6.2/html-api/class-wp-html-span.php @@ -3,7 +3,7 @@ * HTML Span: Represents a textual span inside an HTML document. * * @package WordPress - * @subpackage HTML + * @subpackage HTML-API * @since 6.2.0 */ diff --git a/lib/experimental/html/class-wp-html-tag-processor.php b/lib/compat/wordpress-6.2/html-api/class-wp-html-tag-processor.php similarity index 60% rename from lib/experimental/html/class-wp-html-tag-processor.php rename to lib/compat/wordpress-6.2/html-api/class-wp-html-tag-processor.php index 9db2a9b09ca174..bbe2b76065b7fc 100644 --- a/lib/experimental/html/class-wp-html-tag-processor.php +++ b/lib/compat/wordpress-6.2/html-api/class-wp-html-tag-processor.php @@ -8,28 +8,26 @@ * Instead this scans linearly through a document and only parses * the HTML tag openers. * - * @TODO: Unify language around "currently-opened tag." - * @TODO: Organize unit test cases into normative tests, edge-case tests, regression tests. - * @TODO: Clean up attribute token class after is_true addition - * @TODO: Prune whitespace when removing classes/attributes: e.g. "a b c" -> "c" not " c" - * @TODO: Skip over `/` in attributes area, split attribute names by `/` - * @TODO: Decode HTML references/entities in class names when matching. - * E.g. match having class `1<"2` needs to recognize `class="1<"2"`. - * @TODO: Decode character references in `get_attribute()` - * @TODO: Properly escape attribute value in `set_attribute()` - * @TODO: Add slow mode to escape character entities in CSS class names? - * (This requires a custom decoder since `html_entity_decode()` - * doesn't handle attribute character reference decoding rules. + * ### Possible future direction for this module + * + * - Prune the whitespace when removing classes/attributes: e.g. "a b c" -> "c" not " c". + * This would increase the size of the changes for some operations but leave more + * natural-looking output HTML. + * - Decode HTML character references within class names when matching. E.g. match having + * class `1<"2` needs to recognize `class="1<"2"`. Currently the Tag Processor + * will fail to find the right tag if the class name is encoded as such. + * - Properly decode HTML character references in `get_attribute()`. PHP's + * `html_entity_decode()` is wrong in a couple ways: it doesn't account for the + * no-ambiguous-ampersand rule, and it improperly handles the way semicolons may + * or may not terminate a character reference. * * @package WordPress - * @subpackage HTML + * @subpackage HTML-API * @since 6.2.0 */ /** - * Processes an input HTML document by applying a specified set - * of patches to that input. Tokenizes HTML but does not fully - * parse the input document. + * Modifies attributes in an HTML document for tags matching a query. * * ## Usage * @@ -74,8 +72,11 @@ * * Once the cursor reaches the end of the file the processor * is done and if you want to reach an earlier tag you will - * need to recreate the processor and start over. The internal - * cursor can only proceed forward, never backing up. + * need to recreate the processor and start over, as it's + * unable to back up or move in reverse. + * + * See the section on bookmarks for an exception to this + * no-backing-up rule. * * #### Custom queries * @@ -130,8 +131,8 @@ * ### Modifying CSS classes for a found tag * * The tag processor treats the `class` attribute as a special case. - * Because it's a common operation to add or remove CSS classes you - * can do so using this interface. + * Because it's a common operation to add or remove CSS classes, this + * interface adds helper methods to make that easier. * * As with attribute values, adding or removing CSS classes is a safe * operation that doesn't require checking if the attribute or class @@ -165,17 +166,84 @@ * $tags->remove_class( 'rugby' ); * ``` * - * ## Design limitations + * When class changes are enqueued but a direct change to `class` is made via + * `set_attribute` then the changes to `set_attribute` (or `remove_attribute`) + * will take precedence over those made through `add_class` and `remove_class`. + * + * ### Bookmarks * - * @TODO: Expand this section + * While scanning through the input HTMl document it's possible to set + * a named bookmark when a particular tag is found. Later on, after + * continuing to scan other tags, it's possible to `seek` to one of + * the set bookmarks and then proceed again from that point forward. * - * - no nesting: cannot match open and close tag - * - only move forward, never backward - * - class names not decoded if they contain character references - * - only secures against HTML escaping issues; requires - * manually sanitizing or escaping values based on the needs of - * each individual attribute, since different attributes have - * different needs. + * Because bookmarks create processing overhead one should avoid + * creating too many of them. As a rule, create only bookmarks + * of known string literal names; avoid creating "mark_{$index}" + * and so on. It's fine from a performance standpoint to create a + * bookmark and update it frequently, such as within a loop. + * + * ```php + * $total_todos = 0; + * while ( $p->next_tag( [ 'tag_name' => 'UL', 'class_name' => 'todo' ] ) ) { + * $p->set_bookmark( 'list-start' ); + * while ( $p->next_tag( [ 'tag_closers' => 'visit' ] ) ) { + * if ( 'UL' === $p->get_tag() && $p->is_tag_closer() ) { + * $p->set_bookmark( 'list-end' ); + * $p->seek( 'list-start' ); + * $p->set_attribute( 'data-contained-todos', (string) $total_todos ); + * $total_todos = 0; + * $p->seek( 'list-end' ); + * break; + * } + * + * if ( 'LI' === $p->get_tag() && ! $p->is_tag_closer() ) { + * $total_todos++; + * } + * } + * } + * ``` + * + * ## Design and limitations + * + * The Tag Processor is designed to linearly scan HTML documents and tokenize + * HTML tags and their attributes. It's designed to do this as efficiently as + * possible without compromising parsing integrity. Therefore it will be + * slower than some methods of modifying HTML, such as those incorporating + * over-simplified PCRE patterns, but will not introduce the defects and + * failures that those methods bring in, which lead to broken page renders + * and often to security vulnerabilities. On the other hand, it will be faster + * than full-blown HTML parsers such as DOMDocument and use considerably + * less memory. It requires a negligible memory overhead, enough to consider + * it a zero-overhead system. + * + * The performance characteristics are maintained by avoiding tree construction + * and semantic cleanups which are specified in HTML5. Because of this, for + * example, it's not possible for the Tag Processor to associate any given + * opening tag with its corresponding closing tag, or to return the inner markup + * inside an element. Systems may be built on top of the Tag Processor to do + * this, but the Tag Processor is and should be constrained so it can remain an + * efficient, low-level, and reliable HTML scanner. + * + * The Tag Processor's design incorporates a "garbage-in-garbage-out" philosophy. + * HTML5 specifies that certain invalid content be transformed into different forms + * for display, such as removing null bytes from an input document and replacing + * invalid characters with the Unicode replacement character U+FFFD �. Where errors + * or transformations exist within the HTML5 specification, the Tag Processor leaves + * those invalid inputs untouched, passing them through to the final browser to handle. + * While this implies that certain operations will be non-spec-compliant, such as + * reading the value of an attribute with invalid content, it also preserves a + * simplicity and efficiency for handling those error cases. + * + * Most operations within the Tag Processor are designed to minimize the difference + * between an input and output document for any given change. For example, the + * `add_class` and `remove_class` methods preserve whitespace and the class ordering + * within the `class` attribute; and when encountering tags with duplicated attributes, + * the Tag Processor will leave those invalid duplicate attributes where they are but + * update the proper attribute which the browser will read for parsing its value. An + * exception to this rule is that all attribute updates store their values as + * double-quoted strings, meaning that attributes on input with single-quoted or + * unquoted values will appear in the output with double-quotes. * * @since 6.2.0 */ @@ -184,7 +252,7 @@ class WP_HTML_Tag_Processor { * The maximum number of bookmarks allowed to exist at * any given time. * - * @see set_bookmark(); + * @see set_bookmark() * @since 6.2.0 * @var int */ @@ -244,34 +312,55 @@ class WP_HTML_Tag_Processor { * Whether to visit tag closers, e.g. , when walking an input document. * * @since 6.2.0 - * @var boolean + * @var bool */ private $stop_on_tag_closers; /** - * The updated HTML document. + * Holds updated HTML as updates are applied. + * + * Updates and unmodified portions of the input document are + * appended to this value as they are applied. It will hold + * a copy of the updated document up until the point of the + * latest applied update. The fully-updated HTML document + * will comprise this value plus the part of the input document + * which follows that latest update. + * + * @see $bytes_already_copied * * @since 6.2.0 * @var string */ - private $updated_html = ''; + private $output_buffer = ''; /** - * How many bytes from the original HTML document were already read. + * How many bytes from the original HTML document have been read and parsed. + * + * This value points to the latest byte offset in the input document which + * has been already parsed. It is the internal cursor for the Tag Processor + * and updates while scanning through the HTML tokens. * * @since 6.2.0 * @var int */ - private $parsed_bytes = 0; + private $bytes_already_parsed = 0; /** - * How many bytes from the original HTML document were already treated - * with the requested replacements. + * How many bytes from the input HTML document have already been + * copied into the output buffer. + * + * Lexical updates are enqueued and processed in batches. Prior + * to any given update in the input document, there might exist + * a span of HTML unaffected by any changes. This span ought to + * be copied verbatim into the output buffer before applying the + * following update. This value will point to the starting byte + * offset in the input document where that unaffected span of + * HTML starts. * * @since 6.2.0 * @var int */ - private $updated_bytes = 0; + private $bytes_already_copied = 0; /** * Byte offset in input document where current tag name starts. @@ -284,7 +373,7 @@ class WP_HTML_Tag_Processor { * ``` * * @since 6.2.0 - * @var ?int + * @var int|null */ private $tag_name_starts_at; @@ -299,7 +388,7 @@ class WP_HTML_Tag_Processor { * ``` * * @since 6.2.0 - * @var ?int + * @var int|null */ private $tag_name_length; @@ -315,14 +404,14 @@ class WP_HTML_Tag_Processor { * ``` * * @since 6.2.0 - * @var ?int + * @var int|null */ private $tag_ends_at; /** * Whether the current tag is an opening tag, e.g.
, or a closing tag, e.g.
. * - * @var boolean + * @var bool */ private $is_closing_tag; @@ -330,7 +419,7 @@ class WP_HTML_Tag_Processor { * Lazily-built index of attributes found within an HTML tag, keyed by the attribute name. * * Example: - * + * ```php * // supposing the parser is working through this content * // and stops after recognizing the `id` attribute * //
@@ -348,7 +437,7 @@ class WP_HTML_Tag_Processor { * * // Note that only the `class` attribute value is stored in the index. * // That's because it is the only value used by this class at the moment. - * + * ``` * * @since 6.2.0 * @var WP_HTML_Attribute_Token[] @@ -364,17 +453,17 @@ class WP_HTML_Tag_Processor { * generally modified as with DOM `setAttribute` calls. * * When modifying an HTML document these will eventually be collapsed - * into a single lexical update to replace the `class` attribute. + * into a single `set_attribute( 'class', $changes )` call. * * Example: - * + * ```php * // Add the `wp-block-group` class, remove the `wp-group` class. * $classname_updates = [ * // Indexed by a comparable class name * 'wp-block-group' => WP_HTML_Tag_Processor::ADD_CLASS, * 'wp-group' => WP_HTML_Tag_Processor::REMOVE_CLASS * ]; - * + * ``` * * @since 6.2.0 * @var bool[] @@ -388,7 +477,7 @@ class WP_HTML_Tag_Processor { * @since 6.2.0 * @var WP_HTML_Span[] */ - private $bookmarks = array(); + protected $bookmarks = array(); const ADD_CLASS = true; const REMOVE_CLASS = false; @@ -397,42 +486,56 @@ class WP_HTML_Tag_Processor { /** * Lexical replacements to apply to input HTML document. * - * HTML modifications collapse into lexical replacements in order to - * provide an efficient mechanism to update documents lazily and in - * order to support a variety of semantic modifications without - * building a complicated parsing machinery. That is, it's up to - * the calling class to generate the lexical modification from the - * semantic change requested. + * "Lexical" in this class refers to the part of this class which + * operates on pure text _as text_ and not as HTML. There's a line + * between the public interface, with HTML-semantic methods like + * `set_attribute` and `add_class`, and an internal state that tracks + * text offsets in the input document. + * + * When higher-level HTML methods are called, those have to transform their + * operations (such as setting an attribute's value) into text diffing + * operations (such as replacing the sub-string from indices A to B with + * some given new string). These text-diffing operations are the lexical + * updates. + * + * As new higher-level methods are added they need to collapse their + * operations into these lower-level lexical updates since that's the + * Tag Processor's internal language of change. Any code which creates + * these lexical updates must ensure that they do not cross HTML syntax + * boundaries, however, so these should never be exposed outside of this + * class or any classes which intentionally expand its functionality. + * + * These are enqueued while editing the document instead of being immediately + * applied to avoid processing overhead, string allocations, and string + * copies when applying many updates to a single document. * * Example: - * + * ```php * // Replace an attribute stored with a new value, indices * // sourced from the lazily-parsed HTML recognizer. * $start = $attributes['src']->start; * $end = $attributes['src']->end; - * $modifications[] = new WP_HTML_Text_Replacement( $start, $end, get_the_post_thumbnail_url() ); + * $modifications[] = new WP_HTML_Text_Replacement( $start, $end, $new_value ); * - * // Correspondingly, something like this - * // will appear in the replacements array. - * $replacements = [ + * // Correspondingly, something like this will appear in this array. + * $lexical_updates = [ * WP_HTML_Text_Replacement( 14, 28, 'https://my-site.my-domain/wp-content/uploads/2014/08/kittens.jpg' ) * ]; - * + * ``` * * @since 6.2.0 * @var WP_HTML_Text_Replacement[] */ - private $lexical_updates = array(); + protected $lexical_updates = array(); /** - * Tracks how many times we've performed a `seek()` - * so that we can prevent accidental infinite loops. + * Tracks and limits `seek()` calls to prevent accidental infinite loops. * * @see seek * @since 6.2.0 * @var int */ - private $seek_count = 0; + protected $seek_count = 0; /** * Constructor. @@ -450,14 +553,15 @@ public function __construct( $html ) { * * @since 6.2.0 * - * @param array|string $query { - * Which tag name to find, having which class, etc. + * @param array|string|null $query { + * Optional. Which tag name to find, having which class, etc. Default is to find any tag. * * @type string|null $tag_name Which tag to find, or `null` for "any tag." * @type int|null $match_offset Find the Nth tag matching all search criteria. - * 0 for "first" tag, 2 for "third," etc. + * 1 for "first" tag, 3 for "third," etc. * Defaults to first tag. * @type string|null $class_name Tag must contain this whole class name to match. + * @type string|null $tag_closers "visit" or "skip": whether to stop on tag closers, e.g.
. * } * @return boolean Whether a tag was matched. */ @@ -466,49 +570,52 @@ public function next_tag( $query = null ) { $already_found = 0; do { - if ( $this->parsed_bytes >= strlen( $this->html ) ) { + if ( $this->bytes_already_parsed >= strlen( $this->html ) ) { return false; } - /* - * Unfortunately we can't try to search for only the tag name we want because that might - * lead us to skip over other tags and lose track of our place. So we need to search for - * _every_ tag and then check after we find one if it's the one we are looking for. - */ + // Find the next tag if it exists. if ( false === $this->parse_next_tag() ) { - $this->parsed_bytes = strlen( $this->html ); + $this->bytes_already_parsed = strlen( $this->html ); return false; } + // Parse all of its attributes. while ( $this->parse_next_attribute() ) { continue; } - $tag_ends_at = strpos( $this->html, '>', $this->parsed_bytes ); + // Ensure that the tag closes before the end of the document. + $tag_ends_at = strpos( $this->html, '>', $this->bytes_already_parsed ); if ( false === $tag_ends_at ) { return false; } - $this->tag_ends_at = $tag_ends_at; - $this->parsed_bytes = $tag_ends_at; + $this->tag_ends_at = $tag_ends_at; + $this->bytes_already_parsed = $tag_ends_at; + // Finally, check if the parsed tag and its attributes match the search query. if ( $this->matches() ) { ++$already_found; } - // Avoid copying the tag name string when possible. + /* + * For non-DATA sections which might contain text that looks like HTML tags but + * isn't, scan with the appropriate alternative mode. Looking at the first letter + * of the tag name as a pre-check avoids a string allocation when it's not needed. + */ $t = $this->html[ $this->tag_name_starts_at ]; - if ( 's' === $t || 'S' === $t || 't' === $t || 'T' === $t ) { + if ( ! $this->is_closing_tag && ( 's' === $t || 'S' === $t || 't' === $t || 'T' === $t ) ) { $tag_name = $this->get_tag(); if ( 'SCRIPT' === $tag_name && ! $this->skip_script_data() ) { - $this->parsed_bytes = strlen( $this->html ); + $this->bytes_already_parsed = strlen( $this->html ); return false; } elseif ( ( 'TEXTAREA' === $tag_name || 'TITLE' === $tag_name ) && ! $this->skip_rcdata( $tag_name ) ) { - $this->parsed_bytes = strlen( $this->html ); + $this->bytes_already_parsed = strlen( $this->html ); return false; } } @@ -543,7 +650,7 @@ public function next_tag( $query = null ) { * * Bookmarks provide the ability to seek to a previously-scanned * place in the HTML document. This avoids the need to re-scan - * the entire thing. + * the entire document. * * Example: * ``` @@ -575,22 +682,30 @@ public function next_tag( $query = null ) { * } * ``` * - * Because bookmarks maintain their position they don't - * expose any internal offsets for the HTML document - * and can't be used with normal string functions. + * Bookmarks intentionally hide the internal string offsets + * to which they refer. They are maintained internally as + * updates are applied to the HTML document and therefore + * retain their "position" - the location to which they + * originally pointed. The inability to use bookmarks with + * functions like `substr` is therefore intentional to guard + * against accidentally breaking the HTML. * * Because bookmarks allocate memory and require processing - * for every applied update they are limited and require - * a name. They should not be created inside a loop. - * - * Bookmarks are a powerful tool to enable complicated behavior; - * consider double-checking that you need this tool if you are + * for every applied update, they are limited and require + * a name. They should not be created with programmatically-made + * names, such as "li_{$index}" with some loop. As a general + * rule they should only be created with string-literal names + * like "start-of-section" or "last-paragraph". + * + * Bookmarks are a powerful tool to enable complicated behavior. + * Consider double-checking that you need this tool if you are * reaching for it, as inappropriate use could lead to broken * HTML structure or unwanted processing overhead. * + * @since 6.2.0 + * * @param string $name Identifies this particular bookmark. - * @return false|void - * @throws Exception Throws on invalid bookmark name if WP_DEBUG set. + * @return bool Whether the bookmark was successfully created. */ public function set_bookmark( $name ) { if ( null === $this->tag_name_starts_at ) { @@ -600,7 +715,8 @@ public function set_bookmark( $name ) { if ( ! array_key_exists( $name, $this->bookmarks ) && count( $this->bookmarks ) >= self::MAX_BOOKMARKS ) { _doing_it_wrong( __METHOD__, - __( 'Too many bookmarks: cannot create any more.', 'gutenberg' ) + __( 'Too many bookmarks: cannot create any more.' ), + '6.2.0' ); return false; } @@ -615,14 +731,13 @@ public function set_bookmark( $name ) { /** - * Removes a bookmark if you no longer need to use it. + * Removes a bookmark that is no longer needed. * - * Releasing a bookmark frees up the small performance - * overhead they require, mainly in the form of compute - * costs when modifying the document. + * Releasing a bookmark frees up the small + * performance overhead it requires. * * @param string $name Name of the bookmark to remove. - * @return bool + * @return bool Whether the bookmark already existed before removal. */ public function release_bookmark( $name ) { if ( ! array_key_exists( $name, $this->bookmarks ) ) { @@ -636,37 +751,39 @@ public function release_bookmark( $name ) { /** - * Skips the contents of the title and textarea tags until an appropriate - * tag closer is found. + * Skips contents of title and textarea tags. * * @see https://html.spec.whatwg.org/multipage/parsing.html#rcdata-state - * @param string $tag_name – the lowercase tag name which will close the RCDATA region. * @since 6.2.0 + * + * @param string $tag_name – the lowercase tag name which will close the RCDATA region. + * @return bool Whether an end to the RCDATA region was found before the end of the document. */ private function skip_rcdata( $tag_name ) { $html = $this->html; $doc_length = strlen( $html ); $tag_length = strlen( $tag_name ); - $at = $this->parsed_bytes; + $at = $this->bytes_already_parsed; while ( false !== $at && $at < $doc_length ) { $at = strpos( $this->html, '= $doc_length ) { - $this->parsed_bytes = $doc_length; + $this->bytes_already_parsed = $doc_length; return false; } $at += 2; /* - * We have to find a case-insensitive match to the tag name. - * Note also that since tag names are limited to US-ASCII - * characters we can ignore any kind of Unicode normalizing - * forms when comparing. If we get a non-ASCII character it - * will never be a match. + * Find a case-insensitive match to the tag name. + * + * Because tag names are limited to US-ASCII there is no + * need to perform any kind of Unicode normalization when + * comparing; any character which could be impacted by such + * normalization could not be part of a tag name. */ for ( $i = 0; $i < $tag_length; $i++ ) { $tag_char = $tag_name[ $i ]; @@ -678,13 +795,14 @@ private function skip_rcdata( $tag_name ) { } } - $at += $tag_length; - $this->parsed_bytes = $at; + $at += $tag_length; + $this->bytes_already_parsed = $at; /* - * Ensure we terminate the tag name, otherwise we might, - * for example, accidentally match the sequence - * "" for "". + * Ensure that the tag name terminates to avoid matching on + * substrings of a longer tag name. For example, the sequence + * "' !== $c ) { @@ -694,13 +812,13 @@ private function skip_rcdata( $tag_name ) { while ( $this->parse_next_attribute() ) { continue; } - $at = $this->parsed_bytes; + $at = $this->bytes_already_parsed; if ( $at >= strlen( $this->html ) ) { return false; } if ( '>' === $html[ $at ] || '/' === $html[ $at ] ) { - ++$this->parsed_bytes; + ++$this->bytes_already_parsed; return true; } } @@ -709,23 +827,25 @@ private function skip_rcdata( $tag_name ) { } /** - * Skips the contents of ' ), - ); - } - - /** - * @ticket 56299 - * - * @covers set_attribute - * @covers get_updated_html - * @covers get_attribute - */ - public function test_set_attribute_with_a_non_existing_attribute_adds_a_new_attribute_to_the_markup() { - $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); - $p->next_tag(); - $p->set_attribute( 'test-attribute', 'test-value' ); - $this->assertSame( - '
Text
', - $p->get_updated_html(), - 'Updated HTML does not include attribute added via set_attribute()' - ); - $this->assertSame( - 'test-value', - $p->get_attribute( 'test-attribute' ), - 'get_attribute() (called after get_updated_html()) did not return attribute added via set_attribute()' - ); - } - - /** - * @ticket 56299 - * - * @covers set_attribute - * @covers get_updated_html - * @covers get_attribute - */ - public function test_get_attribute_returns_updated_values_before_they_are_updated() { - $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); - $p->next_tag(); - $p->set_attribute( 'test-attribute', 'test-value' ); - $this->assertSame( - 'test-value', - $p->get_attribute( 'test-attribute' ), - 'get_attribute() (called before get_updated_html()) did not return attribute added via set_attribute()' - ); - $this->assertSame( - '
Text
', - $p->get_updated_html(), - 'Updated HTML does not include attribute added via set_attribute()' - ); - } - - public function test_get_attribute_returns_updated_values_before_they_are_updated_with_different_name_casing() { - $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); - $p->next_tag(); - $p->set_attribute( 'test-ATTribute', 'test-value' ); - $this->assertSame( - 'test-value', - $p->get_attribute( 'test-attribute' ), - 'get_attribute() (called before get_updated_html()) did not return attribute added via set_attribute()' - ); - $this->assertSame( - '
Text
', - $p->get_updated_html(), - 'Updated HTML does not include attribute added via set_attribute()' - ); - } - - /** - * @ticket 56299 - * - * @covers add_class - * @covers get_updated_html - * @covers get_attribute - */ - public function test_get_attribute_reflects_added_class_names_before_they_are_updated() { - $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); - $p->next_tag(); - $p->add_class( 'my-class' ); - $this->assertSame( - 'my-class', - $p->get_attribute( 'class' ), - 'get_attribute() (called before get_updated_html()) did not return class name added via add_class()' - ); - $this->assertSame( - '
Text
', - $p->get_updated_html(), - 'Updated HTML does not include class name added via add_class()' - ); - } - - /** - * @ticket 56299 - * - * @covers add_class - * @covers get_updated_html - * @covers get_attribute - */ - public function test_get_attribute_reflects_added_class_names_before_they_are_updated_and_retains_classes_from_previous_add_class_calls() { - $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); - $p->next_tag(); - $p->add_class( 'my-class' ); - $this->assertSame( - 'my-class', - $p->get_attribute( 'class' ), - 'get_attribute() (called before get_updated_html()) did not return class name added via add_class()' - ); - $p->add_class( 'my-other-class' ); - $this->assertSame( - 'my-class my-other-class', - $p->get_attribute( 'class' ), - 'get_attribute() (called before get_updated_html()) did not return class names added via subsequent add_class() calls' - ); - $this->assertSame( - '
Text
', - $p->get_updated_html(), - 'Updated HTML does not include class names added via subsequent add_class() calls' - ); - } - - /** - * @ticket 56299 - * - * @covers remove_attribute - * @covers get_attribute - * @covers get_updated_html - */ - public function test_get_attribute_reflects_removed_attribute_before_it_is_updated() { - $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); - $p->next_tag(); - $p->remove_attribute( 'id' ); - $this->assertNull( - $p->get_attribute( 'id' ), - 'get_attribute() (called before get_updated_html()) returned attribute that was removed by remove_attribute()' - ); - $this->assertSame( - '
Text
', - $p->get_updated_html(), - 'Updated HTML includes attribute that was removed by remove_attribute()' - ); - } - - /** - * @ticket 56299 - * - * @covers set_attribute - * @covers remove_attribute - * @covers get_attribute - * @covers get_updated_html - */ - public function test_get_attribute_reflects_adding_and_then_removing_an_attribute_before_it_is_updated() { - $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); - $p->next_tag(); - $p->set_attribute( 'test-attribute', 'test-value' ); - $p->remove_attribute( 'test-attribute' ); - $this->assertNull( - $p->get_attribute( 'test-attribute' ), - 'get_attribute() (called before get_updated_html()) returned attribute that was added via set_attribute() and then removed by remove_attribute()' - ); - $this->assertSame( - self::HTML_SIMPLE, - $p->get_updated_html(), - 'Updated HTML includes attribute that was added via set_attribute() and then removed by remove_attribute()' - ); - } - - /** - * @ticket 56299 - * - * @covers set_attribute - * @covers remove_attribute - * @covers get_attribute - * @covers get_updated_html - */ - public function test_get_attribute_reflects_setting_and_then_removing_an_existing_attribute_before_it_is_updated() { - $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); - $p->next_tag(); - $p->set_attribute( 'id', 'test-value' ); - $p->remove_attribute( 'id' ); - $this->assertNull( - $p->get_attribute( 'id' ), - 'get_attribute() (called before get_updated_html()) returned attribute that was overwritten by set_attribute() and then removed by remove_attribute()' - ); - $this->assertSame( - '
Text
', - $p->get_updated_html(), - 'Updated HTML includes attribute that was overwritten by set_attribute() and then removed by remove_attribute()' - ); - } - - /** - * @ticket 56299 - * - * @covers remove_class - * @covers get_updated_html - * @covers get_attribute - */ - public function test_get_attribute_reflects_removed_class_names_before_they_are_updated() { - $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); - $p->next_tag(); - $p->remove_class( 'with-border' ); - $this->assertSame( - 'main', - $p->get_attribute( 'class' ), - 'get_attribute() (called before get_updated_html()) returned the wrong attribute after calling remove_attribute()' - ); - $this->assertSame( - '
Text
', - $p->get_updated_html(), - 'Updated HTML includes wrong attribute after calling remove_attribute()' - ); - } - - /** - * @ticket 56299 - * - * @covers add_class - * @covers remove_class - * @covers get_attribute - * @covers get_updated_html - */ - public function test_get_attribute_reflects_setting_and_then_removing_a_class_name_before_it_is_updated() { - $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); - $p->next_tag(); - $p->add_class( 'foo-class' ); - $p->remove_class( 'foo-class' ); - $this->assertSame( - 'main with-border', - $p->get_attribute( 'class' ), - 'get_attribute() (called before get_updated_html()) returned class name that was added via add_class() and then removed by remove_class()' - ); - $this->assertSame( - self::HTML_WITH_CLASSES, - $p->get_updated_html(), - 'Updated HTML includes class that was added via add_class() and then removed by remove_class()' - ); - } - - /** - * @ticket 56299 - * - * @covers add_class - * @covers remove_class - * @covers get_attribute - * @covers get_updated_html - */ - public function test_get_attribute_reflects_duplicating_and_then_removing_an_existing_class_name_before_it_is_updated() { - $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); - $p->next_tag(); - $p->add_class( 'with-border' ); - $p->remove_class( 'with-border' ); - $this->assertSame( - 'main', - $p->get_attribute( 'class' ), - 'get_attribute() (called before get_updated_html()) returned class name that was duplicated via add_class() and then removed by remove_class()' - ); - $this->assertSame( - '
Text
', - $p->get_updated_html(), - 'Updated HTML includes class that was duplicated via add_class() and then removed by remove_class()' - ); - } - - /** - * According to HTML spec, only the first instance of an attribute counts. - * The other ones are ignored. - * - * @ticket 56299 - * - * @covers set_attribute - * @covers get_updated_html - */ - public function test_update_first_when_duplicated_attribute() { - $p = new WP_HTML_Tag_Processor( '
Text
' ); - $p->next_tag(); - $p->set_attribute( 'id', 'updated-id' ); - $this->assertSame( '
Text
', $p->get_updated_html() ); - } - - /** - * @ticket 56299 - * - * @covers set_attribute - * @covers get_updated_html - */ - public function test_set_attribute_with_an_existing_attribute_name_updates_its_value_in_the_markup() { - $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); - $p->next_tag(); - $p->set_attribute( 'id', 'new-id' ); - $this->assertSame( '
Text
', $p->get_updated_html() ); - } - - /** - * @ticket 56299 - * - * @covers set_attribute - * @covers get_updated_html - */ - public function test_next_tag_and_set_attribute_in_a_loop_update_all_tags_in_the_markup() { - $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); - while ( $p->next_tag() ) { - $p->set_attribute( 'data-foo', 'bar' ); - } - - $this->assertSame( '
Text
', $p->get_updated_html() ); - } - - /** - * Removing an attribute that's listed many times, e.g. `
` should remove - * all its instances and output just `
`. - * - * Today, however, WP_HTML_Tag_Processor only removes the first such attribute. It seems like a corner case - * and introducing additional complexity to correctly handle this scenario doesn't seem to be worth it. - * Let's revisit if and when this becomes a problem. - * - * This test is in place to confirm this behavior, while incorrect, is well-defined. - * - * @ticket 56299 - * - * @covers remove_attribute - * @covers get_updated_html - */ - public function test_remove_first_when_duplicated_attribute() { - $p = new WP_HTML_Tag_Processor( '
Text
' ); - $p->next_tag(); - $p->remove_attribute( 'id' ); - $this->assertSame( '
Text
', $p->get_updated_html() ); - } - - /** - * @ticket 56299 - * - * @covers remove_attribute - * @covers get_updated_html - */ - public function test_remove_attribute_with_an_existing_attribute_name_removes_it_from_the_markup() { - $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); - $p->next_tag(); - $p->remove_attribute( 'id' ); - $this->assertSame( '
Text
', $p->get_updated_html() ); - } - - /** - * @ticket 56299 - * - * @covers remove_attribute - * @covers get_updated_html - */ - public function test_remove_attribute_with_a_non_existing_attribute_name_does_not_change_the_markup() { - $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); - $p->next_tag(); - $p->remove_attribute( 'no-such-attribute' ); - $this->assertSame( self::HTML_SIMPLE, $p->get_updated_html() ); - } - - /** - * @ticket 56299 - * - * @covers add_class - * @covers get_updated_html - * @covers get_attribute - */ - public function test_add_class_creates_a_class_attribute_when_there_is_none() { - $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); - $p->next_tag(); - $p->add_class( 'foo-class' ); - $this->assertSame( - '
Text
', - $p->get_updated_html(), - 'Updated HTML does not include class name added via add_class()' - ); - $this->assertSame( - 'foo-class', - $p->get_attribute( 'class' ), - "get_attribute( 'class' ) did not return class name added via add_class()" - ); - } - - /** - * @ticket 56299 - * - * @covers add_class - * @covers get_updated_html - * @covers get_attribute - */ - public function test_calling_add_class_twice_creates_a_class_attribute_with_both_class_names_when_there_is_no_class_attribute() { - $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); - $p->next_tag(); - $p->add_class( 'foo-class' ); - $p->add_class( 'bar-class' ); - $this->assertSame( - '
Text
', - $p->get_updated_html(), - 'Updated HTML does not include class names added via subsequent add_class() calls' - ); - $this->assertSame( - 'foo-class bar-class', - $p->get_attribute( 'class' ), - "get_attribute( 'class' ) did not return class names added via subsequent add_class() calls" - ); - } - - /** - * @ticket 56299 - * - * @covers remove_class - * @covers get_updated_html - * @covers get_attribute - */ - public function test_remove_class_does_not_change_the_markup_when_there_is_no_class_attribute() { - $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); - $p->next_tag(); - $p->remove_class( 'foo-class' ); - $this->assertSame( - self::HTML_SIMPLE, - $p->get_updated_html(), - 'Updated HTML includes class name that was removed by remove_class()' - ); - $this->assertNull( - $p->get_attribute( 'class' ), - "get_attribute( 'class' ) did not return null for class name that was removed by remove_class()" - ); - } - - /** - * @ticket 56299 - * - * @covers add_class - * @covers get_updated_html - * @covers get_attribute - */ - public function test_add_class_appends_class_names_to_the_existing_class_attribute_when_one_already_exists() { - $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); - $p->next_tag(); - $p->add_class( 'foo-class' ); - $p->add_class( 'bar-class' ); - $this->assertSame( - '
Text
', - $p->get_updated_html(), - 'Updated HTML does not reflect class names added to existing class attribute via subsequent add_class() calls' - ); - $this->assertSame( - 'main with-border foo-class bar-class', - $p->get_attribute( 'class' ), - "get_attribute( 'class' ) does not reflect class names added to existing class attribute via subsequent add_class() calls" - ); - } - - /** - * @ticket 56299 - * - * @covers remove_class - * @covers get_updated_html - * @covers get_attribute - */ - public function test_remove_class_removes_a_single_class_from_the_class_attribute_when_one_exists() { - $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); - $p->next_tag(); - $p->remove_class( 'main' ); - $this->assertSame( - '
Text
', - $p->get_updated_html(), - 'Updated HTML does not reflect class name removed from existing class attribute via remove_class()' - ); - $this->assertSame( - ' with-border', - $p->get_attribute( 'class' ), - "get_attribute( 'class' ) does not reflect class name removed from existing class attribute via remove_class()" - ); - } - - /** - * @ticket 56299 - * - * @covers remove_class - * @covers get_updated_html - * @covers get_attribute - */ - public function test_calling_remove_class_with_all_listed_class_names_removes_the_existing_class_attribute_from_the_markup() { - $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); - $p->next_tag(); - $p->remove_class( 'main' ); - $p->remove_class( 'with-border' ); - $this->assertSame( - '
Text
', - $p->get_updated_html(), - 'Updated HTML does not reflect class attribute removed via subesequent remove_class() calls' - ); - $this->assertNull( - $p->get_attribute( 'class' ), - "get_attribute( 'class' ) did not return null for class attribute removed via subesequent remove_class() calls" - ); - } - - /** - * @ticket 56299 - * - * @covers add_class - * @covers get_updated_html - * @covers get_attribute - */ - public function test_add_class_does_not_add_duplicate_class_names() { - $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); - $p->next_tag(); - $p->add_class( 'with-border' ); - $this->assertSame( - '
Text
', - $p->get_updated_html(), - 'Updated HTML does not reflect deduplicated class name added via add_class()' - ); - $this->assertSame( - 'main with-border', - $p->get_attribute( 'class' ), - "get_attribute( 'class' ) does not reflect deduplicated class name added via add_class()" - ); - } - - /** - * @ticket 56299 - * - * @covers add_class - * @covers get_updated_html - * @covers get_attribute - */ - public function test_add_class_preserves_class_name_order_when_a_duplicate_class_name_is_added() { - $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); - $p->next_tag(); - $p->add_class( 'main' ); - $this->assertSame( - '
Text
', - $p->get_updated_html(), - 'Updated HTML does not reflect class name order after adding duplicated class name via add_class()' - ); - $this->assertSame( - 'main with-border', - $p->get_attribute( 'class' ), - "get_attribute( 'class' ) does not reflect class name order after adding duplicated class name added via add_class()" - ); - } - - /** - * @ticket 56299 - * - * @covers add_class - * @covers get_updated_html - * @covers get_attribute - */ - public function test_add_class_when_there_is_a_class_attribute_with_excessive_whitespaces() { - $p = new WP_HTML_Tag_Processor( - '
Text
' - ); - $p->next_tag(); - $p->add_class( 'foo-class' ); - $this->assertSame( - '
Text
', - $p->get_updated_html(), - 'Updated HTML does not reflect existing excessive whitespace after adding class name via add_class()' - ); - $this->assertSame( - ' main with-border foo-class', - $p->get_attribute( 'class' ), - "get_attribute( 'class' ) does not reflect existing excessive whitespace after adding class name via add_class()" - ); - } - - /** - * @ticket 56299 - * - * @covers remove_class - * @covers get_updated_html - * @covers get_attribute - */ - public function test_remove_class_preserves_whitespaces_when_there_is_a_class_attribute_with_excessive_whitespaces() { - $p = new WP_HTML_Tag_Processor( - '
Text
' - ); - $p->next_tag(); - $p->remove_class( 'with-border' ); - $this->assertSame( - '
Text
', - $p->get_updated_html(), - 'Updated HTML does not reflect existing excessive whitespace after removing class name via remove_class()' - ); - $this->assertSame( - ' main', - $p->get_attribute( 'class' ), - "get_attribute( 'class' ) does not reflect existing excessive whitespace after removing class name via removing_class()" - ); - } - - /** - * @ticket 56299 - * - * @covers remove_class - * @covers get_updated_html - * @covers get_attribute - */ - public function test_removing_all_classes_removes_the_existing_class_attribute_from_the_markup_even_when_excessive_whitespaces_are_present() { - $p = new WP_HTML_Tag_Processor( - '
Text
' - ); - $p->next_tag(); - $p->remove_class( 'main' ); - $p->remove_class( 'with-border' ); - $this->assertSame( - '
Text
', - $p->get_updated_html(), - 'Updated HTML does not reflect removed class attribute after removing all class names via remove_class()' - ); - $this->assertNull( - $p->get_attribute( 'class' ), - "get_attribute( 'class' ) did not return null after removing all class names via remove_class()" - ); - } - - /** - * When add_class( $different_value ) is called _after_ set_attribute( 'class', $value ), the - * final class name should be "$value $different_value". In other words, the `add_class` call - * should append its class to the one(s) set by `set_attribute`. When `add_class( $different_value )` - * is called _before_ `set_attribute( 'class', $value )`, however, the final class name should be - * "$value" instead, as any direct updates to the `class` attribute supersede any changes enqueued - * via the class builder methods. - * - * @ticket 56299 - * - * @covers add_class - * @covers set_attribute - * @covers get_updated_html - * @covers get_attribute - */ - public function test_set_attribute_takes_priority_over_add_class() { - $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); - $p->next_tag(); - $p->add_class( 'add_class' ); - $p->set_attribute( 'class', 'set_attribute' ); - $this->assertSame( - '
Text
', - $p->get_updated_html(), - "Calling get_updated_html after updating first tag's attributes did not return the expected HTML" - ); - $this->assertSame( - 'set_attribute', - $p->get_attribute( 'class' ), - "Calling get_attribute after updating first tag's attributes did not return the expected class name" - ); - - $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); - $p->next_tag(); - $p->set_attribute( 'class', 'set_attribute' ); - $p->add_class( 'add_class' ); - $this->assertSame( - '
Text
', - $p->get_updated_html(), - "Calling get_updated_html after updating first tag's attributes did not return the expected HTML" - ); - $this->assertSame( - 'set_attribute add_class', - $p->get_attribute( 'class' ), - "Calling get_attribute after updating first tag's attributes did not return the expected class name" - ); - } - - /** - * When add_class( $different_value ) is called _after_ set_attribute( 'class', $value ), the - * final class name should be "$value $different_value". In other words, the `add_class` call - * should append its class to the one(s) set by `set_attribute`. When `add_class( $different_value )` - * is called _before_ `set_attribute( 'class', $value )`, however, the final class name should be - * "$value" instead, as any direct updates to the `class` attribute supersede any changes enqueued - * via the class builder methods. - * - * This is still true if we read enqueued updates before calling `get_updated_html()`. - * - * @ticket 56299 - * - * @covers add_class - * @covers set_attribute - * @covers get_attribute - * @covers get_updated_html - */ - public function test_set_attribute_takes_priority_over_add_class_even_before_updating() { - $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); - $p->next_tag(); - $p->add_class( 'add_class' ); - $p->set_attribute( 'class', 'set_attribute' ); - $this->assertSame( - 'set_attribute', - $p->get_attribute( 'class' ), - "Calling get_attribute after updating first tag's attributes did not return the expected class name" - ); - $this->assertSame( - '
Text
', - $p->get_updated_html(), - "Calling get_updated_html after updating first tag's attributes did not return the expected HTML" - ); - - $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); - $p->next_tag(); - $p->set_attribute( 'class', 'set_attribute' ); - $p->add_class( 'add_class' ); - $this->assertSame( - 'set_attribute add_class', - $p->get_attribute( 'class' ), - "Calling get_attribute after updating first tag's attributes did not return the expected class name" - ); - $this->assertSame( - '
Text
', - $p->get_updated_html(), - "Calling get_updated_html after updating first tag's attributes did not return the expected HTML" - ); - } - - /** - * @ticket 56299 - * - * @covers set_attribute - * @covers add_class - * @covers get_attribute - * @covers get_updated_html - */ - public function test_add_class_overrides_boolean_class_attribute() { - $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); - $p->next_tag(); - $p->set_attribute( 'class', true ); - $p->add_class( 'add_class' ); - $this->assertSame( - '
Text
', - $p->get_updated_html(), - "Updated HTML doesn't reflect class added via add_class that was originally set as boolean attribute" - ); - $this->assertSame( - 'add_class', - $p->get_attribute( 'class' ), - "get_attribute (called after get_updated_html()) doesn't reflect class added via add_class that was originally set as boolean attribute" - ); - } - - /** - * @ticket 56299 - * - * @covers set_attribute - * @covers add_class - * @covers get_attribute - * @covers get_updated_html - */ - public function test_add_class_overrides_boolean_class_attribute_even_before_updating() { - $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); - $p->next_tag(); - $p->set_attribute( 'class', true ); - $p->add_class( 'add_class' ); - $this->assertSame( - 'add_class', - $p->get_attribute( 'class' ), - "get_attribute (called before get_updated_html()) doesn't reflect class added via add_class that was originally set as boolean attribute" - ); - $this->assertSame( - '
Text
', - $p->get_updated_html(), - "Updated HTML doesn't reflect class added via add_class that was originally set as boolean attribute" - ); - } - - /** - * @ticket 56299 - * - * @covers set_attribute - * @covers remove_attribute - * @covers add_class - * @covers remove_class - * @covers get_updated_html - */ - public function test_advanced_use_case() { - $input = << -
-
-
- - - - - - - -
-
-
-HTML; - - $expected_output = << -
-
-
- - - - - - - -
-
-
-HTML; - - $p = new WP_HTML_Tag_Processor( $input ); - $this->assertTrue( $p->next_tag( 'div' ), 'Querying an existing tag did not return true' ); - $p->set_attribute( 'data-details', '{ "key": "value" }' ); - $p->add_class( 'is-processed' ); - $this->assertTrue( - $p->next_tag( - array( - 'tag_name' => 'div', - 'class_name' => 'BtnGroup', - ) - ), - 'Querying an existing tag did not return true' - ); - $p->remove_class( 'BtnGroup' ); - $p->add_class( 'button-group' ); - $p->add_class( 'Another-Mixed-Case' ); - $this->assertTrue( - $p->next_tag( - array( - 'tag_name' => 'div', - 'class_name' => 'BtnGroup', - ) - ), - 'Querying an existing tag did not return true' - ); - $p->remove_class( 'BtnGroup' ); - $p->add_class( 'button-group' ); - $p->add_class( 'Another-Mixed-Case' ); - $this->assertTrue( - $p->next_tag( - array( - 'tag_name' => 'button', - 'class_name' => 'btn', - 'match_offset' => 3, - ) - ), - 'Querying an existing tag did not return true' - ); - $p->remove_attribute( 'class' ); - $this->assertFalse( $p->next_tag( 'non-existent' ), 'Querying a non-existing tag did not return false' ); - $p->set_attribute( 'class', 'test' ); - $this->assertSame( $expected_output, $p->get_updated_html(), 'Calling get_updated_html after updating the attributes did not return the expected HTML' ); - } - - /** - * @ticket 56299 - * - * @covers remove_attribute - * @covers set_attribute - * @covers get_updated_html - */ - public function test_correctly_parses_html_attributes_wrapped_in_single_quotation_marks() { - $p = new WP_HTML_Tag_Processor( - '
Text
' - ); - $p->next_tag( - array( - 'tag_name' => 'div', - 'id' => 'first', - ) - ); - $p->remove_attribute( 'id' ); - $p->next_tag( - array( - 'tag_name' => 'span', - 'id' => 'second', - ) - ); - $p->set_attribute( 'id', 'single-quote' ); - $this->assertSame( - '
Text
', - $p->get_updated_html() - ); - } - - /** - * @ticket 56299 - * - * @covers set_attribute - * @covers get_updated_html - */ - public function test_set_attribute_with_value_equals_to_true_adds_a_boolean_html_attribute_with_implicit_value() { - $p = new WP_HTML_Tag_Processor( - '
' - ); - $p->next_tag( 'input' ); - $p->set_attribute( 'checked', true ); - $this->assertSame( - '
', - $p->get_updated_html() - ); - } - - /** - * @ticket 56299 - * - * @covers set_attribute - * @covers get_updated_html - */ - public function test_setting_a_boolean_attribute_to_false_removes_it_from_the_markup() { - $p = new WP_HTML_Tag_Processor( - '
' - ); - $p->next_tag( 'input' ); - $p->set_attribute( 'checked', false ); - $this->assertSame( - '
', - $p->get_updated_html() - ); - } - - /** - * @ticket 56299 - * - * @covers set_attribute - * @covers get_updated_html - */ - public function test_setting_a_missing_attribute_to_false_does_not_change_the_markup() { - $html_input = '
'; - $p = new WP_HTML_Tag_Processor( $html_input ); - $p->next_tag( 'input' ); - $p->set_attribute( 'checked', false ); - $this->assertSame( $html_input, $p->get_updated_html() ); - } - - /** - * @ticket 56299 - * - * @covers set_attribute - * @covers get_updated_html - */ - public function test_setting_a_boolean_attribute_to_a_string_value_adds_explicit_value_to_the_markup() { - $p = new WP_HTML_Tag_Processor( - '
' - ); - $p->next_tag( 'input' ); - $p->set_attribute( 'checked', 'checked' ); - $this->assertSame( - '
', - $p->get_updated_html() - ); - } - - /** - * @ticket 56299 - * - * @covers get_tag - * @covers next_tag - */ - public function test_unclosed_script_tag_should_not_cause_an_infinite_loop() { - $p = new WP_HTML_Tag_Processor( '
', - ); - - $examples['Simple uppercase script tag'] = array( - '
', - ); - - $examples['Script with a comment opener inside should end at the next script tag closer (dash dash escaped state)'] = array( - '
-->', - ); - - $examples['Script with a comment opener and a script tag opener inside should end two script tag closer later (double escaped state)'] = array( - '
-->', - ); - - $examples['Double escaped script with a tricky opener'] = array( - '">
', - ); - - $examples['Double escaped script with a tricky closer'] = array( - '">
', - ); - - $examples['Double escaped, then escaped, then double escaped'] = array( - '
', - ); - - $examples['Script with a commented a script tag opener inside should at the next tag closer (dash dash escaped state)'] = array( - '
-->', - ); - - $examples['Script closer with another script tag in closer attributes'] = array( - '
', - ); - - $examples['Script closer with attributes'] = array( - '
', - ); - - $examples['Script opener with title closer inside'] = array( - '
', - ); - - $examples['Complex script with many parsing states'] = array( - '-->
-->', - ); - return $examples; - } - - /** - * @ticket 56299 - * - * @covers next_tag - * - * @dataProvider data_rcdata_state - */ - public function test_next_tag_ignores_the_contents_of_a_rcdata_tag( $rcdata_then_div, $rcdata_tag ) { - $p = new WP_HTML_Tag_Processor( $rcdata_then_div ); - $p->next_tag(); - $this->assertSame( strtoupper( $rcdata_tag ), $p->get_tag(), "The first found tag was not '$rcdata_tag'" ); - $p->next_tag(); - $this->assertSame( 'DIV', $p->get_tag(), "The second found tag was not 'div'" ); - } - - /** - * Data provider for test_ignores_contents_of_a_rcdata_tag(). - * - * @return array { - * @type array { - * @type string $rcdata_then_div The HTML snippet containing RCDATA and div tags. - * @type string $rcdata_tag The RCDATA tag. - * } - * } - */ - public function data_rcdata_state() { - $examples = array(); - $examples['Simple textarea'] = array( - '
', - 'TEXTAREA', - ); - - $examples['Simple title'] = array( - '<span class="d-none d-md-inline">Back to notifications</title</span>
', - 'TITLE', - ); - - $examples['Comment opener inside a textarea tag should be ignored'] = array( - '
-->', - 'TEXTAREA', - ); - - $examples['Textarea closer with another textarea tag in closer attributes'] = array( - '
', - 'TEXTAREA', - ); - - $examples['Textarea closer with attributes'] = array( - '
', - 'TEXTAREA', - ); - - $examples['Textarea opener with title closer inside'] = array( - '
', - 'TEXTAREA', - ); - return $examples; - } - - /** - * @ticket 56299 - * - * @covers next_tag - * @covers set_attribute - * @covers get_updated_html - */ - public function test_can_query_and_update_wrongly_nested_tags() { - $p = new WP_HTML_Tag_Processor( - '123

456789

' - ); - $p->next_tag( 'span' ); - $p->set_attribute( 'class', 'span-class' ); - $p->next_tag( 'p' ); - $p->set_attribute( 'class', 'p-class' ); - $this->assertSame( - '123

456789

', - $p->get_updated_html() - ); - } - - /** - * @ticket 56299 - * - * @covers next_tag - * @covers remove_attribute - * @covers get_updated_html - */ - public function test_removing_attributes_works_even_in_malformed_html() { - $p = new WP_HTML_Tag_Processor( self::HTML_MALFORMED ); - $p->next_tag( 'span' ); - $p->remove_attribute( 'Notifications<' ); - $this->assertSame( - '
Back to notifications
', - $p->get_updated_html() - ); - } - - /** - * @ticket 56299 - * - * @covers next_Tag - * @covers set_attribute - * @covers get_updated_html - */ - public function test_updating_attributes_works_even_in_malformed_html_1() { - $p = new WP_HTML_Tag_Processor( self::HTML_MALFORMED ); - $p->next_tag( 'span' ); - $p->set_attribute( 'id', 'first' ); - $p->next_tag( 'span' ); - $p->set_attribute( 'id', 'second' ); - $this->assertSame( - '
Back to notifications
', - $p->get_updated_html() - ); - } - - /** - * @ticket 56299 - * - * @covers next_tag - * @covers set_attribute - * @covers add_class - * @covers get_updated_html - * - * @dataProvider data_malformed_tag - */ - public function test_updating_attributes_works_even_in_malformed_html_2( $html_input, $html_expected ) { - $p = new WP_HTML_Tag_Processor( $html_input ); - $p->next_tag(); - $p->set_attribute( 'foo', 'bar' ); - $p->add_class( 'firstTag' ); - $p->next_tag(); - $p->add_class( 'secondTag' ); - $this->assertSame( - $html_expected, - $p->get_updated_html() - ); - } - - /** - * Data provider for test_updates_when_malformed_tag(). - * - * @return array { - * @type array { - * @type string $html_input The input HTML snippet. - * @type string $html_expected The expected HTML snippet after processing. - * } - * } - */ - public function data_malformed_tag() { - $null_byte = chr( 0 ); - $examples = array(); - $examples['Invalid entity inside attribute value'] = array( - 'test', - 'test', - ); - - $examples['HTML tag opening inside attribute value'] = array( - '
This <is> a <strong is="true">thing.
test', - '
This <is> a <strong is="true">thing.
test', - ); - - $examples['HTML tag brackets in attribute values and data markup'] = array( - '
This <is> a <strong is="true">thing.
test', - '
This <is> a <strong is="true">thing.
test', - ); - - $examples['Single and double quotes in attribute value'] = array( - '

test', - '

test', - ); - - $examples['Unquoted attribute values'] = array( - '


test', - '
test', - ); - - $examples['Double-quotes escaped in double-quote attribute value'] = array( - '
test', - '
test', - ); - - $examples['Unquoted attribute value'] = array( - '
test', - '
test', - ); - - $examples['Unquoted attribute value with tag-like value'] = array( - '
>test', - '
>test', - ); - - $examples['Unquoted attribute value with tag-like value followed by tag-like data'] = array( - '
>test', - '
>test', - ); - - $examples['1'] = array( - '
test', - '
test', - ); - - $examples['2'] = array( - '
test', - '
test', - ); - - $examples['4'] = array( - '
test', - '
test', - ); - - $examples['5'] = array( - '
code>test', - '
code>test', - ); - - $examples['6'] = array( - '
test', - '
test', - ); - - $examples['7'] = array( - '
test', - '
test', - ); - - $examples['8'] = array( - '
id="test">test', - '
id="test">test', - ); - - $examples['9'] = array( - '
test', - '
test', - ); - - $examples['10'] = array( - 'test', - 'test', - ); - - $examples['11'] = array( - 'The applicative operator <* works well in Haskell; is what?test', - 'The applicative operator <* works well in Haskell; is what?test', - ); - - $examples['12'] = array( - '<3 is a heart but is a tag.test', - '<3 is a heart but is a tag.test', - ); - - $examples['13'] = array( - 'test', - 'test', - ); - - $examples['14'] = array( - 'test', - 'test', - ); - - $examples['15'] = array( - ' a HTML Tag]]>test', - ' a HTML Tag]]>test', - ); - - $examples['16'] = array( - '
test', - '
test', - ); - - $examples['17'] = array( - '
test', - '
test', - ); - - $examples['18'] = array( - '
test', - '
test', - ); - - $examples['19'] = array( - '
test', - '
test', - ); - - $examples['20'] = array( - '
test', - '
test', - ); - - $examples['21'] = array( - '
test', - '
test', - ); - - $examples['22'] = array( - '
test', - '
test', - ); - - $examples['23'] = array( - '
test', - '
test', - ); - - $examples['24'] = array( - '
test', - '
test', - ); - - $examples['25'] = array( - '
test', - '
test', - ); - - $examples['Multiple unclosed tags treated as a single tag'] = array( - << - test -HTML - , - << - test -HTML - , - ); - - $examples['27'] = array( - '
test', - '
test', - ); - - $examples['28'] = array( - '
test', - '
test', - ); - - return $examples; - } -}