Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Explore using Tag Processor for wp_kses_hair #5

Draft
wants to merge 5 commits into
base: trunk
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/wp-includes/html-api/class-wp-html-tag-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -575,7 +575,7 @@ public function next_tag( $query = null ) {
$this->bytes_already_parsed = strlen( $this->html );
return false;
} elseif (
( 'TEXTAREA' === $tag_name || 'TITLE' === $tag_name ) &&
( 'TEXTAREA' === $tag_name || 'TITLE' === $tag_name || 'STYLE' === $tag_name ) &&
! $this->skip_rcdata( $tag_name )
) {
$this->bytes_already_parsed = strlen( $this->html );
Expand Down
193 changes: 37 additions & 156 deletions src/wp-includes/kses.php
Original file line number Diff line number Diff line change
Expand Up @@ -1329,143 +1329,32 @@ function wp_kses_attr_check( &$name, &$value, &$whole, $vless, $element, $allowe
* @return array[] Array of attribute information after parsing.
*/
function wp_kses_hair( $attr, $allowed_protocols ) {
$attrarr = array();
$mode = 0;
$attrname = '';
$uris = wp_kses_uri_attributes();

// Loop through the whole attribute list.

while ( strlen( $attr ) !== 0 ) {
$working = 0; // Was the last operation successful?

switch ( $mode ) {
case 0:
if ( preg_match( '/^([_a-zA-Z][-_a-zA-Z0-9:.]*)/', $attr, $match ) ) {
$attrname = $match[1];
$working = 1;
$mode = 1;
$attr = preg_replace( '/^[_a-zA-Z][-_a-zA-Z0-9:.]*/', '', $attr );
}

break;

case 1:
if ( preg_match( '/^\s*=\s*/', $attr ) ) { // Equals sign.
$working = 1;
$mode = 2;
$attr = preg_replace( '/^\s*=\s*/', '', $attr );
break;
}
$attributes = array();
$uris = wp_kses_uri_attributes();
$p = new WP_HTML_Tag_Processor( "<fake-tag {$attr}>" );

if ( preg_match( '/^\s+/', $attr ) ) { // Valueless.
$working = 1;
$mode = 0;
// Parse the attributes.
$p->next_tag();

if ( false === array_key_exists( $attrname, $attrarr ) ) {
$attrarr[ $attrname ] = array(
'name' => $attrname,
'value' => '',
'whole' => $attrname,
'vless' => 'y',
);
}
foreach ( $p->get_attribute_names_with_prefix( '' ) as $name ) {
$value = $p->get_attribute( $name );
$is_boolean = true === $value;

$attr = preg_replace( '/^\s+/', '', $attr );
}

break;

case 2:
if ( preg_match( '%^"([^"]*)"(\s+|/?$)%', $attr, $match ) ) {
// "value"
$thisval = $match[1];
if ( in_array( strtolower( $attrname ), $uris, true ) ) {
$thisval = wp_kses_bad_protocol( $thisval, $allowed_protocols );
}

if ( false === array_key_exists( $attrname, $attrarr ) ) {
$attrarr[ $attrname ] = array(
'name' => $attrname,
'value' => $thisval,
'whole' => "$attrname=\"$thisval\"",
'vless' => 'n',
);
}

$working = 1;
$mode = 0;
$attr = preg_replace( '/^"[^"]*"(\s+|$)/', '', $attr );
break;
}

if ( preg_match( "%^'([^']*)'(\s+|/?$)%", $attr, $match ) ) {
// 'value'
$thisval = $match[1];
if ( in_array( strtolower( $attrname ), $uris, true ) ) {
$thisval = wp_kses_bad_protocol( $thisval, $allowed_protocols );
}

if ( false === array_key_exists( $attrname, $attrarr ) ) {
$attrarr[ $attrname ] = array(
'name' => $attrname,
'value' => $thisval,
'whole' => "$attrname='$thisval'",
'vless' => 'n',
);
}

$working = 1;
$mode = 0;
$attr = preg_replace( "/^'[^']*'(\s+|$)/", '', $attr );
break;
}

if ( preg_match( "%^([^\s\"']+)(\s+|/?$)%", $attr, $match ) ) {
// value
$thisval = $match[1];
if ( in_array( strtolower( $attrname ), $uris, true ) ) {
$thisval = wp_kses_bad_protocol( $thisval, $allowed_protocols );
}

if ( false === array_key_exists( $attrname, $attrarr ) ) {
$attrarr[ $attrname ] = array(
'name' => $attrname,
'value' => $thisval,
'whole' => "$attrname=\"$thisval\"",
'vless' => 'n',
);
}

// We add quotes to conform to W3C's HTML spec.
$working = 1;
$mode = 0;
$attr = preg_replace( "%^[^\s\"']+(\s+|$)%", '', $attr );
}

break;
} // End switch.

if ( 0 === $working ) { // Not well-formed, remove and try again.
$attr = wp_kses_html_error( $attr );
$mode = 0;
if ( ! $is_boolean && in_array( $name, $uris, true ) ) {
$value = wp_kses_bad_protocol( $value, $allowed_protocols );
}
} // End while.

if ( 1 === $mode && false === array_key_exists( $attrname, $attrarr ) ) {
/*
* Special case, for when the attribute list ends with a valueless
* attribute like "selected".
*/
$attrarr[ $attrname ] = array(
'name' => $attrname,
'value' => '',
'whole' => $attrname,
'vless' => 'y',
$value = esc_attr( $value );

$attributes[ $name ] = array(
'name' => $name,
'value' => $is_boolean ? '' : $value,
'whole' => $is_boolean ? $name : "{$name}=\"{$value}\"",
'vless' => $is_boolean,
);
}

return $attrarr;
return $attributes;
}

/**
Expand All @@ -1481,41 +1370,30 @@ function wp_kses_hair( $attr, $allowed_protocols ) {
* @return array|false List of attributes found in the element. Returns false on failure.
*/
function wp_kses_attr_parse( $element ) {
$valid = preg_match( '%^(<\s*)(/\s*)?([a-zA-Z0-9]+\s*)([^>]*)(>?)$%', $element, $matches );
if ( 1 !== $valid ) {
return false;
}

$begin = $matches[1];
$slash = $matches[2];
$elname = $matches[3];
$attr = $matches[4];
$end = $matches[5];
$attributes = array();
$p = new WP_HTML_Tag_Processor( $element );

if ( '' !== $slash ) {
// Closing elements do not get parsed.
if ( ! $p->next_tag() ) {
return false;
}

// Is there a closing XHTML slash at the end of the attributes?
if ( 1 === preg_match( '%\s*/\s*$%', $attr, $matches ) ) {
$xhtml_slash = $matches[0];
$attr = substr( $attr, 0, -strlen( $xhtml_slash ) );
} else {
$xhtml_slash = '';
}
$tag_name = strtolower( $p->get_tag() );
$attributes[] = "<{$tag_name} ";

// Split it.
$attrarr = wp_kses_hair_parse( $attr );
if ( false === $attrarr ) {
return false;
foreach ( $p->get_attribute_names_with_prefix( '' ) as $name ) {
$value = $p->get_attribute( $name );

if ( true === $value ) {
$attributes[] = $name;
} else {
$value = esc_attr( $value );
$attributes[] = "$name=\"{$value}\"";
}
}

// Make sure all input is returned by adding front and back matter.
array_unshift( $attrarr, $begin . $slash . $elname );
array_push( $attrarr, $xhtml_slash . $end );
$attributes[] = '>';

return $attrarr;
return $attributes;
}

/**
Expand All @@ -1528,6 +1406,8 @@ function wp_kses_attr_parse( $element ) {
*
* @since 4.2.3
*
* @deprecated 6.3.0 no longer used as a helper function.
*
* @param string $attr Attribute list from HTML element to closing HTML element tag.
* @return array|false List of attributes found in $attr. Returns false on failure.
*/
Expand Down Expand Up @@ -1795,6 +1675,7 @@ function wp_kses_array_lc( $inarray ) {
* but it deals with quotes and apostrophes as well.
*
* @since 1.0.0
* @deprecated 6.3.0 no longer needed as a helper function
*
* @param string $attr
* @return string
Expand Down
15 changes: 8 additions & 7 deletions tests/phpunit/tests/kses.php
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class Tests_Kses extends WP_UnitTestCase {
public function test_wp_filter_post_kses_address( $content, $expected ) {
global $allowedposttags;

$this->assertSame( $expected, wp_kses( $content, $allowedposttags ) );
$this->assertEqualMarkup( $expected, wp_kses( $content, $allowedposttags ) );
}

/**
Expand Down Expand Up @@ -65,7 +65,7 @@ public function data_wp_filter_post_kses_address() {
public function test_wp_filter_post_kses_a( $content, $expected ) {
global $allowedposttags;

$this->assertSame( $expected, wp_kses( $content, $allowedposttags ) );
$this->assertEqualMarkup( $expected, wp_kses( $content, $allowedposttags ) );
}

/**
Expand Down Expand Up @@ -171,7 +171,7 @@ public function data_wp_kses_video() {
public function test_wp_filter_post_kses_abbr( $content, $expected ) {
global $allowedposttags;

$this->assertSame( $expected, wp_kses( $content, $allowedposttags ) );
$this->assertEqualMarkup( $expected, wp_kses( $content, $allowedposttags ) );
}

/**
Expand Down Expand Up @@ -725,7 +725,8 @@ public function data_hair_parse() {
* @dataProvider data_attr_parse
*/
public function test_attr_parse( $input, $output ) {
return $this->assertSame( $output, wp_kses_attr_parse( $input ) );
// @TODO: Why has this test been hard-encoding the spaces at the end of the attribute?
return $this->assertSame( $output, trim( wp_kses_attr_parse( $input ) ) );
}

public function data_attr_parse() {
Expand Down Expand Up @@ -1711,7 +1712,7 @@ public function test_wp_kses_main_tag_standard_attributes() {
* @param string $expected The expected result from KSES.
*/
public function test_wp_kses_object_tag_allowed( $html, $expected ) {
$this->assertSame( $expected, wp_kses_post( $html ) );
$this->assertEqualMarkup( $expected, wp_kses_post( $html ) );
}

/**
Expand Down Expand Up @@ -1912,7 +1913,7 @@ public function filter_wp_kses_object_added_in_html_filter( $tags, $context ) {
* @param array $allowed_html The allowed HTML to pass to KSES.
*/
public function test_wp_kses_allowed_values_list( $content, $expected, $allowed_html ) {
$this->assertSame( $expected, wp_kses( $content, $allowed_html ) );
$this->assertEqualMarkup( $expected, wp_kses( $content, $allowed_html ) );
}

/**
Expand Down Expand Up @@ -1970,7 +1971,7 @@ static function ( $datum ) {
* @param array $allowed_html The allowed HTML to pass to KSES.
*/
public function test_wp_kses_required_attribute( $content, $expected, $allowed_html ) {
$this->assertSame( $expected, wp_kses( $content, $allowed_html ) );
$this->assertEqualMarkup( $expected, wp_kses( $content, $allowed_html ) );
}

/**
Expand Down
2 changes: 1 addition & 1 deletion tests/phpunit/tests/oembed/filterResult.php
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ public function test_wp_filter_pre_oembed_custom_result( $html, $expected ) {
'html' => $html,
);
$actual = _wp_oembed_get_object()->data2html( $data, 'https://untrusted.localhost' );
$this->assertSame( $expected, $actual );
$this->assertEqualMarkup( $expected, $actual );
}

/**
Expand Down
4 changes: 2 additions & 2 deletions tests/phpunit/tests/post/filtering.php
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ public function test_post_content_disallowed_attr() {
$id = self::factory()->post->create( array( 'post_content' => $content ) );
$post = get_post( $id );

$this->assertSame( $expected, $post->post_content );
$this->assertEqualMarkup( $expected, $post->post_content );
}

/**
Expand All @@ -90,7 +90,7 @@ public function test_post_content_xhtml_empty_elem() {
$id = self::factory()->post->create( array( 'post_content' => $content ) );
$post = get_post( $id );

$this->assertSame( $expected, $post->post_content );
$this->assertEqualMarkup( $expected, $post->post_content );
}

// Make sure unbalanced tags are untouched when the balance option is off.
Expand Down
2 changes: 1 addition & 1 deletion tests/phpunit/tests/shortcode.php
Original file line number Diff line number Diff line change
Expand Up @@ -532,7 +532,7 @@ public function test_spaces_around_shortcodes() {
* @dataProvider data_escaping
*/
public function test_escaping( $input, $output ) {
return $this->assertSame( $output, do_shortcode( $input ) );
return $this->assertEqualMarkup( $output, do_shortcode( $input ) );
}

public function data_escaping() {
Expand Down
Loading