Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add tree shaking for attribute-extant selectors #2080

Merged
merged 9 commits into from
Apr 11, 2019
121 changes: 101 additions & 20 deletions includes/sanitizers/class-amp-style-sanitizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,42 @@ class AMP_Style_Sanitizer extends AMP_Base_Sanitizer {
*/
const INLINE_SPECIFICITY_MULTIPLIER = 5; // @todo The correctness of using "5" should be validated.

/**
* Array index for tag names extracted from a selector.
*
* @private
* @since 1.1
* @see \AMP_Style_Sanitizer::prepare_stylesheet()
*/
const SELECTOR_EXTRACTED_TAGS = 0;

/**
* Array index for class names extracted from a selector.
*
* @private
* @since 1.1
* @see \AMP_Style_Sanitizer::prepare_stylesheet()
*/
const SELECTOR_EXTRACTED_CLASSES = 1;

/**
* Array index for IDs extracted from a selector.
*
* @private
* @since 1.1
* @see \AMP_Style_Sanitizer::prepare_stylesheet()
*/
const SELECTOR_EXTRACTED_IDS = 2;

/**
* Array index for attributes extracted from a selector.
*
* @private
* @since 1.1
* @see \AMP_Style_Sanitizer::prepare_stylesheet()
*/
const SELECTOR_EXTRACTED_ATTRIBUTES = 3;

/**
* Array of flags used to control sanitization.
*
Expand Down Expand Up @@ -169,10 +205,25 @@ class AMP_Style_Sanitizer extends AMP_Base_Sanitizer {
/**
* Attributes used in the document.
*
* This is initially populated with boolean attributes which can be mutated by AMP at runtime,
* since they can by dynamically added at any time.
*
* @since 1.1
* @var array
*/
private $used_attributes = array();
private $used_attributes = array(
'autofocus' => true,
'checked' => true,
'controls' => true,
'disabled' => true,
'hidden' => true,
'loop' => true,
'multiple' => true,
'open' => true,
'readonly' => true,
'required' => true,
'selected' => true,
);

/**
* Tag names used in document.
Expand Down Expand Up @@ -558,7 +609,7 @@ private function has_used_tag_names( $tag_names ) {
* Check whether the attributes exist.
*
* @since 1.1
* @todo Make $attribute_names into $attributes as an associative array and implement lookups of specific values.
* @todo Make $attribute_names into $attributes as an associative array and implement lookups of specific values. Since attribute values can vary (e.g. with amp-bind), this may not be feasible.
*
* @param string[] $attribute_names Attribute names.
* @return bool Whether all supplied attributes are used.
Expand Down Expand Up @@ -1106,7 +1157,7 @@ private function fetch_external_stylesheet( $url ) {
private function process_stylesheet( $stylesheet, $options = array() ) {
$parsed = null;
$cache_key = null;
$cache_group = 'amp-parsed-stylesheet-v15'; // This should be bumped whenever the PHP-CSS-Parser is updated.
$cache_group = 'amp-parsed-stylesheet-v16'; // This should be bumped whenever the PHP-CSS-Parser is updated or parsed format is updated.

$cache_impacting_options = array_merge(
wp_array_slice_assoc(
Expand All @@ -1127,7 +1178,7 @@ private function process_stylesheet( $stylesheet, $options = array() ) {
if ( wp_using_ext_object_cache() ) {
$parsed = wp_cache_get( $cache_key, $cache_group );
} else {
$parsed = get_transient( $cache_key . $cache_group );
$parsed = get_transient( $cache_group . '-' . $cache_key );
}

/*
Expand Down Expand Up @@ -1156,7 +1207,7 @@ private function process_stylesheet( $stylesheet, $options = array() ) {
wp_cache_set( $cache_key, $parsed, $cache_group );
} else {
// The expiration is to ensure transient doesn't stick around forever since no LRU flushing like with external object cache.
set_transient( $cache_key . $cache_group, $parsed, MONTH_IN_SECONDS );
set_transient( $cache_group . '-' . $cache_key, $parsed, MONTH_IN_SECONDS );
}
}

Expand Down Expand Up @@ -1456,41 +1507,64 @@ function( $selector ) {
$selectors = explode( $between_selectors . ',', $split_stylesheet[ ++$i ] );
$declaration = $split_stylesheet[ ++$i ];

// @todo The following logic could be made much more robust if PHP-CSS-Parser did parsing of selectors. See <https://github.com/sabberworm/PHP-CSS-Parser/pull/138#issuecomment-418193262> and <https://github.com/ampproject/amp-wp/issues/2102>.
$selectors_parsed = array();
foreach ( $selectors as $selector ) {
$selectors_parsed[ $selector ] = array();

// Remove :not() and pseudo selectors to eliminate false negatives, such as with `body:not(.title-tagline-hidden) .site-branding-text`.
$reduced_selector = preg_replace( '/:[a-zA-Z0-9_-]+(\(.+?\))?/', '', $selector );

// Remove attribute selectors to eliminate false negative, such as with `.social-navigation a[href*="example.com"]:before`.
$reduced_selector = preg_replace( '/\[\w.*?\]/', '', $reduced_selector );
$reduced_selector = preg_replace( '/::?[a-zA-Z0-9_-]+(\(.+?\))?/', '', $selector );

// Ignore any selector terms that occur under a dynamic selector.
if ( $dynamic_selector_pattern ) {
$reduced_selector = preg_replace( '#((?:' . $dynamic_selector_pattern . ')(?:\.[a-z0-9_-]+)*)[^a-z0-9_-].*#si', '$1', $reduced_selector . ' ' );
}

/*
* Gather attribute names while removing attribute selectors to eliminate false negative,
* such as with `.social-navigation a[href*="example.com"]:before`.
*/
$reduced_selector = preg_replace_callback(
'/\[([A-Za-z0-9_:-]+)(\W?=[^\]]+)?\]/',
function( $matches ) use ( $selector, &$selectors_parsed ) {
$selectors_parsed[ $selector ][ self::SELECTOR_EXTRACTED_ATTRIBUTES ][] = $matches[1];
return '';
},
$reduced_selector
);

// Extract class names.
$reduced_selector = preg_replace_callback(
'/\.([a-zA-Z0-9_-]+)/',
function( $matches ) use ( $selector, &$selectors_parsed ) {
$selectors_parsed[ $selector ]['classes'][] = $matches[1];
$selectors_parsed[ $selector ][ self::SELECTOR_EXTRACTED_CLASSES ][] = $matches[1];
return '';
},
$reduced_selector
);

// Extract IDs.
$reduced_selector = preg_replace_callback(
'/#([a-zA-Z0-9_-]+)/',
function( $matches ) use ( $selector, &$selectors_parsed ) {
$selectors_parsed[ $selector ]['ids'][] = $matches[1];
$selectors_parsed[ $selector ][ self::SELECTOR_EXTRACTED_IDS ][] = $matches[1];
return '';
},
$reduced_selector
);

if ( preg_match_all( '/[a-zA-Z0-9_-]+/', $reduced_selector, $matches ) ) {
$selectors_parsed[ $selector ]['tags'] = $matches[0];
}
// Extract tag names.
$reduced_selector = preg_replace_callback(
'/[a-zA-Z0-9_-]+/',
function( $matches ) use ( $selector, &$selectors_parsed ) {
$selectors_parsed[ $selector ][ self::SELECTOR_EXTRACTED_TAGS ][] = $matches[0];
return '';
},
$reduced_selector
);

// At this point, $reduced_selector should contain just the remnants of the selector, primarily combinators.
unset( $reduced_selector );
}

$stylesheet[] = array(
Expand Down Expand Up @@ -2586,18 +2660,18 @@ private function finalize_stylesheet_set( $stylesheet_set ) {
(
// If all class names are used in the doc.
(
empty( $parsed_selector['classes'] )
empty( $parsed_selector[ self::SELECTOR_EXTRACTED_CLASSES ] )
||
$this->has_used_class_name( $parsed_selector['classes'] )
$this->has_used_class_name( $parsed_selector[ self::SELECTOR_EXTRACTED_CLASSES ] )
)
&&
// If all IDs are used in the doc.
(
empty( $parsed_selector['ids'] )
empty( $parsed_selector[ self::SELECTOR_EXTRACTED_IDS ] )
||
0 === count(
array_filter(
$parsed_selector['ids'],
$parsed_selector[ self::SELECTOR_EXTRACTED_IDS ],
function( $id ) use ( $dom ) {
return ! $dom->getElementById( $id );
}
Expand All @@ -2607,9 +2681,16 @@ function( $id ) use ( $dom ) {
&&
// If tag names are present in the doc.
(
empty( $parsed_selector['tags'] )
empty( $parsed_selector[ self::SELECTOR_EXTRACTED_TAGS ] )
||
$this->has_used_tag_names( $parsed_selector[ self::SELECTOR_EXTRACTED_TAGS ] )
)
&&
// If all attribute names are used in the doc.
(
empty( $parsed_selector[ self::SELECTOR_EXTRACTED_ATTRIBUTES ] )
||
$this->has_used_tag_names( $parsed_selector['tags'] )
$this->has_used_attributes( $parsed_selector[ self::SELECTOR_EXTRACTED_ATTRIBUTES ] )
)
)
);
Expand Down
125 changes: 122 additions & 3 deletions tests/test-amp-style-sanitizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ public function get_link_and_style_test_data() {
)
),
array(
'form [submit-success] b,div[submit-failure] b{color:green}',
'form [submit-success] b{color:green}', // The [submit-failure] selector is removed because there is no div[submit-failure].
'amp-live-list li .highlighted{background:yellow}',
'',
'body amp-list .portland{color:blue}',
Expand Down Expand Up @@ -358,7 +358,7 @@ public function get_link_and_style_test_data() {
array(),
),
'unamerican_lang_attribute_selectors_removed' => array( // USA is used for convenience here. No political statement intended.
'<html amp><head><meta charset="utf-8"><style>html[lang=en-US] {color:red} html[lang="en-US"] {color:white} html[lang^=en] {color:blue} html[lang="en-CA"] {color:red} html[lang^=ar] { color:green; } html[lang="es-MX"] { color:green; }</style></head><body><span>Test</span></body></html>',
'<html lang="en-US" amp><head><meta charset="utf-8"><style>html[lang=en-US] {color:red} html[lang="en-US"] {color:white} html[lang^=en] {color:blue} html[lang="en-CA"] {color:red} html[lang^=ar] { color:green; } html[lang="es-MX"] { color:green; }</style></head><body><span>Test</span></body></html>',
array(
'html[lang=en-US]{color:red}html[lang="en-US"]{color:white}html[lang^=en]{color:blue}',
),
Expand Down Expand Up @@ -592,6 +592,11 @@ public function get_amp_selector_data() {
'div img.logo{border:solid 1px red}',
'', // The selector is removed because there is no div element.
),
'attribute_selectors' => array(
'<div id="content" tabindex="-1"></div><button type=button>Hello</button><a href="#">Top</a><span></span>',
'[type="button"], [type="reset"], [type^="submit"] {color:red} a[href^=http]:after, a[href^="#"]:after { color:blue } span[hidden] {display:none}#content[tabindex="-1"]:focus{ outline: solid 1px red; }',
'[type="button"],[type="reset"],[type^="submit"]{color:red}a[href^=http]:after,a[href^="#"]:after{color:blue}span[hidden]{display:none}#content[tabindex="-1"]:focus{outline:solid 1px red}', // Any selector mentioning [type] or [href] will persist since value is not used for tree shaking.
),
'playbuzz' => array(
'<p>hello</p><div class="pb_feed" data-item="226dd4c0-ef13-4fee-850b-7be32bf6d121"></div>',
'p + div.pb_feed{border:solid 1px blue}',
Expand Down Expand Up @@ -655,6 +660,120 @@ public function test_amp_selector_conversion( $markup, $input, $output ) {
$this->assertEquals( $output, $stylesheets[0] );
}

/**
* Provide data for attribute selector test.
*
* @return array Data.
*/
public function get_attribute_selector_data() {
return array(
'type_attribute' => array(
'<input type="color">',
// All selectors remain because only the existence of the attribute is examined.
array(
'[type="button"]' => true,
'[type*="reset"]' => true,
'[type^="submit"]' => true,
'[type$="button"]' => true,
),
),
'tabindex_attribute' => array(
'<span tabindex="-1"></span>',
// The div[tabindex] is removed because there is no div. The span[tabindex^=2] remains because value is not considered.
array(
'div[tabindex]' => false,
'span[tabindex]' => true,
'span[tabindex=-1]' => true,
'span[tabindex^=2]' => true,
),
),
'href_attribute' => array(
'<a href="foo">Foo</a>',
array(
'a[href^=http]:after' => true,
'a[href^="#"]:after' => true,
),
),
'hidden_attribute' => array(
'<span>not hidden</span>',
// Only div[hidden] should be removed because there is no div element; the other [hidden] selectors remain because it can be dynamically added.
array(
'span[hidden]' => true,
'[hidden]' => true,
'div[hidden]' => false,
'span:not([hidden])' => true,
),
),
'selected_readonly_disabled_multiple_autofocus_required' => array(
'<input><select><option></option></select>',
array(
'[autofocus]' => true,
'[checked]' => true,
'[disabled]' => true,
'[multiple]' => true,
'[readonly]' => true,
'[required]' => true,
'[selected]' => true,
),
),
'open_attribute' => array(
'<details><summary>More</summary>Details</details>',
array(
'[open]' => true,
'amp-lightbox[open]' => false,
'details[open]' => true,
),
),
'media_attributes' => array(
'<amp-video width="720" height="305" layout="responsive" src="https://yourhost.com/videos/myvideo.mp4" poster="https://yourhost.com/posters/poster.png" artwork="https://yourhost.com/artworks/artwork.png" title="Awesome video" artist="Awesome artist" album="Amazing album"></amp-video>',
array(
'[loop]' => true,
'[controls]' => true,
),
),
);
}

/**
* Test attribute selector tree shaking.
*
* @dataProvider get_attribute_selector_data
*
* @param string $markup Source HTML markup.
* @param array $selectors Mapping of selectors to whether they are expected.
*/
public function test_attribute_selector( $markup, $selectors ) {
$style = implode(
'',
array_map(
function ( $selector ) {
return sprintf( '%s{ color: red; }', $selector );
},
array_keys( $selectors )
)
);

$html = "<html amp><head><meta charset=utf-8><style amp-custom>$style</style></head><body>$markup</body></html>";
$dom = AMP_DOM_Utils::get_dom( $html );

$sanitizer_classes = amp_get_content_sanitizers();
$sanitizer_classes['AMP_Style_Sanitizer']['remove_unused_rules'] = 'always';

$sanitized = AMP_Content_Sanitizer::sanitize_document(
$dom,
$sanitizer_classes,
array(
'use_document_element' => true,
)
);

$stylesheets = array_values( $sanitized['stylesheets'] );

$actual_selectors = array_values( array_filter( preg_split( '/{.+?}/s', $stylesheets[0] ) ) );
$expected_selectors = array_keys( array_filter( $selectors ) );
$this->assertEqualSets( $expected_selectors, $actual_selectors );
}

/**
* Data for testing CSS hack removal.
*
Expand Down Expand Up @@ -1045,7 +1164,7 @@ public function test_large_custom_css_and_rule_removal() {
@media screen {}
</style>
';
$html .= '</head><body><span class="b">...</span><span id="exists"></span></body></html>';
$html .= '</head><body><span class="b" data-value="">...</span><span id="exists"></span></body></html>';
$dom = AMP_DOM_Utils::get_dom( $html );

$error_codes = array();
Expand Down