-
-
Notifications
You must be signed in to change notification settings - Fork 1.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix splitCssText again #1640
Fix splitCssText again #1640
Changes from all commits
fb694b2
5962d03
08a80ab
43df807
d59c1ef
21beccb
e789129
a72c4e1
b3a5a12
e02e608
0347a36
d0fb0c5
e5a9550
01d96bb
86796ac
3a452e3
7a32e49
5743c7e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
--- | ||
"rrweb-snapshot": patch | ||
"rrweb": patch | ||
--- | ||
|
||
Improve performance of splitCssText for <style> elements with large css content - see #1603 |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -80,7 +80,7 @@ | |
* Browsers sometimes incorrectly escape `@import` on `.cssText` statements. | ||
* This function tries to correct the escaping. | ||
* more info: https://bugs.chromium.org/p/chromium/issues/detail?id=1472259 | ||
* @param cssImportRule | ||
Check warning on line 83 in packages/rrweb-snapshot/src/utils.ts
|
||
* @returns `cssText` with browser inconsistencies fixed, or null if not applicable. | ||
*/ | ||
export function escapeImportStatement(rule: CSSImportRule): string { | ||
|
@@ -450,8 +450,19 @@ | |
* Intention is to normalize by remove spaces, semicolons and CSS comments | ||
* so that we can compare css as authored vs. output of stringifyStylesheet | ||
*/ | ||
export function normalizeCssString(cssText: string): string { | ||
return cssText.replace(/(\/\*[^*]*\*\/)|[\s;]/g, ''); | ||
export function normalizeCssString( | ||
cssText: string, | ||
/** | ||
* _testNoPxNorm: only used as part of the 'substring matching going from many to none' | ||
* test case so that it will trigger a failure if the conditions that let to the creation of that test arise again | ||
*/ | ||
_testNoPxNorm = false, | ||
): string { | ||
if (_testNoPxNorm) { | ||
return cssText.replace(/(\/\*[^*]*\*\/)|[\s;]/g, ''); | ||
} else { | ||
return cssText.replace(/(\/\*[^*]*\*\/)|[\s;]/g, '').replace(/0px/g, '0'); | ||
} | ||
} | ||
|
||
/** | ||
|
@@ -463,19 +474,24 @@ | |
export function splitCssText( | ||
cssText: string, | ||
style: HTMLStyleElement, | ||
_testNoPxNorm = false, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would be great to get some |
||
): string[] { | ||
const childNodes = Array.from(style.childNodes); | ||
const splits: string[] = []; | ||
let iterLimit = 0; | ||
let iterCount = 0; | ||
if (childNodes.length > 1 && cssText && typeof cssText === 'string') { | ||
let cssTextNorm = normalizeCssString(cssText); | ||
let cssTextNorm = normalizeCssString(cssText, _testNoPxNorm); | ||
const normFactor = cssTextNorm.length / cssText.length; | ||
for (let i = 1; i < childNodes.length; i++) { | ||
if ( | ||
childNodes[i].textContent && | ||
typeof childNodes[i].textContent === 'string' | ||
) { | ||
const textContentNorm = normalizeCssString(childNodes[i].textContent!); | ||
const textContentNorm = normalizeCssString( | ||
childNodes[i].textContent!, | ||
_testNoPxNorm, | ||
); | ||
const jLimit = 100; // how many iterations for the first part of searching | ||
let j = 3; | ||
for (; j < textContentNorm.length; j++) { | ||
if ( | ||
|
@@ -489,31 +505,62 @@ | |
break; | ||
} | ||
for (; j < textContentNorm.length; j++) { | ||
const bit = textContentNorm.substring(0, j); | ||
let startSubstring = textContentNorm.substring(0, j); | ||
// this substring should appears only once in overall text too | ||
const bits = cssTextNorm.split(bit); | ||
let cssNormSplits = cssTextNorm.split(startSubstring); | ||
let splitNorm = -1; | ||
if (bits.length === 2) { | ||
splitNorm = cssTextNorm.indexOf(bit); | ||
if (cssNormSplits.length === 2) { | ||
splitNorm = cssNormSplits[0].length; | ||
} else if ( | ||
bits.length > 2 && | ||
bits[0] === '' && | ||
cssNormSplits.length > 2 && | ||
cssNormSplits[0] === '' && | ||
childNodes[i - 1].textContent !== '' | ||
) { | ||
// this childNode has same starting content as previous | ||
splitNorm = cssTextNorm.indexOf(bit, 1); | ||
splitNorm = cssTextNorm.indexOf(startSubstring, 1); | ||
} else if (cssNormSplits.length === 1) { | ||
// try to roll back to get multiple matches again | ||
startSubstring = startSubstring.substring( | ||
0, | ||
startSubstring.length - 1, | ||
); | ||
cssNormSplits = cssTextNorm.split(startSubstring); | ||
if (cssNormSplits.length <= 1) { | ||
// no split possible | ||
splits.push(cssText); | ||
return splits; | ||
} | ||
j = jLimit + 1; // trigger end of search | ||
} else if (j === textContentNorm.length - 1) { | ||
// we're about to end loop without a split point | ||
splitNorm = cssTextNorm.indexOf(startSubstring); | ||
} | ||
if (cssNormSplits.length >= 2 && j > jLimit) { | ||
const prevTextContent = childNodes[i - 1].textContent; | ||
if (prevTextContent && typeof prevTextContent === 'string') { | ||
// pick the first matching point which respects the previous chunk's approx size | ||
const prevMinLength = normalizeCssString(prevTextContent).length; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i guess it's somewhere here that means if you run a test twice (or some multiple times) then you don't get the same output (at least in my experience of testing whether this was deterministic when trying to figure out what was happening) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't quite understand this comment, but I would say the algorithm is indeed deterministic, but maybe you mean it will behave differently based on different sized inputs because of the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. oh, i wrote a test that ran the split multiple times and compared the output and it didn't match There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah okay, yes this is possible ... basically it stops when the normalised versions match (although I still think it would be deterministic given the same input twice). I can't imagine there being a problem if one side has more whitespace than it should have. |
||
splitNorm = cssTextNorm.indexOf(startSubstring, prevMinLength); | ||
} | ||
if (splitNorm === -1) { | ||
// fall back to pick the first matching point of many | ||
splitNorm = cssNormSplits[0].length; | ||
} | ||
} | ||
if (splitNorm !== -1) { | ||
// find the split point in the original text | ||
let k = Math.floor(splitNorm / normFactor); | ||
for (; k > 0 && k < cssText.length; ) { | ||
iterLimit += 1; | ||
if (iterLimit > 50 * childNodes.length) { | ||
iterCount += 1; | ||
if (iterCount > 50 * childNodes.length) { | ||
// quit for performance purposes | ||
splits.push(cssText); | ||
return splits; | ||
} | ||
const normPart = normalizeCssString(cssText.substring(0, k)); | ||
const normPart = normalizeCssString( | ||
cssText.substring(0, k), | ||
_testNoPxNorm, | ||
); | ||
if (normPart.length === splitNorm) { | ||
splits.push(cssText.substring(0, k)); | ||
cssText = cssText.substring(k); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i briefly tested a very naive loop parser and it was slower than regex replace - i guess because browsers/v8 are doing some magic to optimise this already
but I didn't test it over a range of inputs
this is (from my testing) at best O(n) for whitespace - and for clarity since i'm not much of a comp sci person. if you insert whitespace into the input then this gets slower the more whitespace is present
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The performance of the normalization function could be improved. I've moreso tried to ensure it's not called repeatedly on the same piece of css (with the 'binary search' style changes in #1615 ).