Skip to content

Commit

Permalink
Switched UTF string encoding approach to handle multibyte characters
Browse files Browse the repository at this point in the history
Also removed LRU cache since the caching approach seems to slow things down overall, based on benchmarking.
  • Loading branch information
Brian Vaughn committed Sep 27, 2021
1 parent 8464d69 commit f16251a
Show file tree
Hide file tree
Showing 4 changed files with 85 additions and 68 deletions.
3 changes: 0 additions & 3 deletions packages/react-devtools-shared/src/__tests__/setupEnv.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,3 @@ global.process.env.DARK_MODE_DIMMED_LOG_COLOR = DARK_MODE_DIMMED_LOG_COLOR;
global.process.env.LIGHT_MODE_DIMMED_WARNING_COLOR = LIGHT_MODE_DIMMED_WARNING_COLOR;
global.process.env.LIGHT_MODE_DIMMED_ERROR_COLOR = LIGHT_MODE_DIMMED_ERROR_COLOR;
global.process.env.LIGHT_MODE_DIMMED_LOG_COLOR = LIGHT_MODE_DIMMED_LOG_COLOR;

global.TextEncoder = require('util').TextEncoder;
global.TextDecoder = require('util').TextDecoder;
13 changes: 13 additions & 0 deletions packages/react-devtools-shared/src/__tests__/store-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,19 @@ describe('Store', () => {
`);
});

it('should handle multibyte character strings', () => {
const Component = () => null;
Component.displayName = '🟩💜🔵';

const container = document.createElement('div');

act(() => legacyRender(<Component />, container));
expect(store).toMatchInlineSnapshot(`
[root]
<🟩💜🔵>
`);
});

describe('collapseNodesByDefault:false', () => {
beforeEach(() => {
store.collapseNodesByDefault = false;
Expand Down
61 changes: 41 additions & 20 deletions packages/react-devtools-shared/src/backend/renderer.js
Original file line number Diff line number Diff line change
Expand Up @@ -1513,11 +1513,16 @@ export function attach(

type OperationsArray = Array<number>;

type StringTableEntry = {|
encodedString: Array<number>,
id: number,
|};

const pendingOperations: OperationsArray = [];
const pendingRealUnmountedIDs: Array<number> = [];
const pendingSimulatedUnmountedIDs: Array<number> = [];
let pendingOperationsQueue: Array<OperationsArray> | null = [];
const pendingStringTable: Map<string, number> = new Map();
const pendingStringTable: Map<string, StringTableEntry> = new Map();
let pendingStringTableLength: number = 0;
let pendingUnmountedRootID: number | null = null;

Expand Down Expand Up @@ -1735,13 +1740,19 @@ export function attach(
// Now fill in the string table.
// [stringTableLength, str1Length, ...str1, str2Length, ...str2, ...]
operations[i++] = pendingStringTableLength;
pendingStringTable.forEach((value, key) => {
operations[i++] = key.length;
const encodedKey = utfEncodeString(key);
for (let j = 0; j < encodedKey.length; j++) {
operations[i + j] = encodedKey[j];
pendingStringTable.forEach((entry, stringKey) => {
const encodedString = entry.encodedString;

// Don't use the string length.
// It won't work for multibyte characters (like emoji).
const length = encodedString.length;

operations[i++] = length;
for (let j = 0; j < encodedString.length; j++) {
operations[i + j] = encodedString[j];
}
i += key.length;

i += length;
});

if (numUnmountIDs > 0) {
Expand Down Expand Up @@ -1788,21 +1799,31 @@ export function attach(
pendingStringTableLength = 0;
}

function getStringID(str: string | null): number {
if (str === null) {
function getStringID(string: string | null): number {
if (string === null) {
return 0;
}
const existingID = pendingStringTable.get(str);
if (existingID !== undefined) {
return existingID;
}
const stringID = pendingStringTable.size + 1;
pendingStringTable.set(str, stringID);
// The string table total length needs to account
// both for the string length, and for the array item
// that contains the length itself. Hence + 1.
pendingStringTableLength += str.length + 1;
return stringID;
const existingEntry = pendingStringTable.get(string);
if (existingEntry !== undefined) {
return existingEntry.id;
}

const id = pendingStringTable.size + 1;
const encodedString = utfEncodeString(string);

pendingStringTable.set(string, {
encodedString,
id,
});

// The string table total length needs to account both for the string length,
// and for the array item that contains the length itself.
//
// Don't use string length for this table.
// It won't work for multibyte characters (like emoji).
pendingStringTableLength += encodedString.length + 1;

return id;
}

function recordMount(fiber: Fiber, parentFiber: Fiber | null) {
Expand Down
76 changes: 31 additions & 45 deletions packages/react-devtools-shared/src/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
* @flow
*/

import LRU from 'lru-cache';
import {
isElement,
typeOf,
Expand Down Expand Up @@ -50,19 +49,9 @@ import {localStorageGetItem, localStorageSetItem} from './storage';
import {meta} from './hydration';

import type {ComponentFilter, ElementType} from './types';
import type {LRUCache} from 'react-devtools-shared/src/types';

const cachedDisplayNames: WeakMap<Function, string> = new WeakMap();

// On large trees, encoding takes significant time.
// Try to reuse the already encoded strings.
const encodedStringCache: LRUCache<
string,
Array<number> | Uint8Array,
> = new LRU({
max: 1000,
});

export function alphaSortKeys(
a: string | number | Symbol,
b: string | number | Symbol,
Expand Down Expand Up @@ -128,47 +117,44 @@ export function getUID(): number {
return ++uidCounter;
}

const isTextEncoderSupported =
typeof TextDecoder === 'function' && typeof TextEncoder === 'function';

export function utfDecodeString(array: Array<number>): string {
if (isTextEncoderSupported) {
// Handles multi-byte characters; use if available.
return new TextDecoder().decode(new Uint8Array(array));
} else {
// Avoid spreading the array (e.g. String.fromCodePoint(...array))
// Functions arguments are first placed on the stack before the function is called
// which throws a RangeError for large arrays.
// See github.com/facebook/react/issues/22293
let string = '';
for (let i = 0; i < array.length; i++) {
const char = array[i];
string += String.fromCodePoint(char);
}
return string;
// Avoid spreading the array (e.g. String.fromCodePoint(...array))
// Functions arguments are first placed on the stack before the function is called
// which throws a RangeError for large arrays.
// See github.com/facebook/react/issues/22293
let string = '';
for (let i = 0; i < array.length; i++) {
const char = array[i];
string += String.fromCodePoint(char);
}
return string;
}

export function utfEncodeString(string: string): Array<number> | Uint8Array {
const cached = encodedStringCache.get(string);
if (cached !== undefined) {
return cached;
}
function surrogatePairToCodePoint(
charCode1: number,
charCode2: number,
): number {
return ((charCode1 & 0x3ff) << 10) + (charCode2 & 0x3ff) + 0x10000;
}

let encoded;
if (isTextEncoderSupported) {
// Handles multi-byte characters; use if available.
encoded = new TextEncoder().encode(string);
} else {
encoded = new Array(string.length);
for (let i = 0; i < string.length; i++) {
encoded[i] = string.codePointAt(i);
// Credit for this encoding approach goes to Tim Down:
// https://stackoverflow.com/questions/4877326/how-can-i-tell-if-a-string-contains-multibyte-characters-in-javascript
export function utfEncodeString(string: string): Array<number> {
const codePoints = [];
let i = 0;
let charCode;
while (i < string.length) {
charCode = string.charCodeAt(i);
if ((charCode & 0xf800) === 0xd800) {
codePoints.push(
surrogatePairToCodePoint(charCode, string.charCodeAt(++i)),
);
} else {
codePoints.push(charCode);
}
++i;
}

encodedStringCache.set(string, encoded);

return encoded;
return codePoints;
}

export function printOperationsArray(operations: Array<number>) {
Expand Down

0 comments on commit f16251a

Please sign in to comment.