Switched UTF string encoding approach to handle multibyte characters

Also removed LRU cache since the caching approach seems to slow things down overall, based on benchmarking.
facebook · Sep 27, 2021 · f16251a · f16251a
1 parent 8464d69
commit f16251a
Show file tree

Hide file tree

Showing 4 changed files with 85 additions and 68 deletions.
diff --git a/packages/react-devtools-shared/src/__tests__/setupEnv.js b/packages/react-devtools-shared/src/__tests__/setupEnv.js
@@ -24,6 +24,3 @@ global.process.env.DARK_MODE_DIMMED_LOG_COLOR = DARK_MODE_DIMMED_LOG_COLOR;
 global.process.env.LIGHT_MODE_DIMMED_WARNING_COLOR = LIGHT_MODE_DIMMED_WARNING_COLOR;
 global.process.env.LIGHT_MODE_DIMMED_ERROR_COLOR = LIGHT_MODE_DIMMED_ERROR_COLOR;
 global.process.env.LIGHT_MODE_DIMMED_LOG_COLOR = LIGHT_MODE_DIMMED_LOG_COLOR;
-
-global.TextEncoder = require('util').TextEncoder;
-global.TextDecoder = require('util').TextDecoder;
diff --git a/packages/react-devtools-shared/src/__tests__/store-test.js b/packages/react-devtools-shared/src/__tests__/store-test.js
@@ -101,6 +101,19 @@ describe('Store', () => {
     `);
   });
 
+  it('should handle multibyte character strings', () => {
+    const Component = () => null;
+    Component.displayName = '🟩💜🔵';
+
+    const container = document.createElement('div');
+
+    act(() => legacyRender(<Component />, container));
+    expect(store).toMatchInlineSnapshot(`
+      [root]
+          <🟩💜🔵>
+    `);
+  });
+
   describe('collapseNodesByDefault:false', () => {
     beforeEach(() => {
       store.collapseNodesByDefault = false;

diff --git a/packages/react-devtools-shared/src/backend/renderer.js b/packages/react-devtools-shared/src/backend/renderer.js
@@ -1513,11 +1513,16 @@ export function attach(
 
   type OperationsArray = Array<number>;
 
+  type StringTableEntry = {|
+    encodedString: Array<number>,
+    id: number,
+  |};
+
   const pendingOperations: OperationsArray = [];
   const pendingRealUnmountedIDs: Array<number> = [];
   const pendingSimulatedUnmountedIDs: Array<number> = [];
   let pendingOperationsQueue: Array<OperationsArray> | null = [];
-  const pendingStringTable: Map<string, number> = new Map();
+  const pendingStringTable: Map<string, StringTableEntry> = new Map();
   let pendingStringTableLength: number = 0;
   let pendingUnmountedRootID: number | null = null;
 
@@ -1735,13 +1740,19 @@ export function attach(
     // Now fill in the string table.
     // [stringTableLength, str1Length, ...str1, str2Length, ...str2, ...]
     operations[i++] = pendingStringTableLength;
-    pendingStringTable.forEach((value, key) => {
-      operations[i++] = key.length;
-      const encodedKey = utfEncodeString(key);
-      for (let j = 0; j < encodedKey.length; j++) {
-        operations[i + j] = encodedKey[j];
+    pendingStringTable.forEach((entry, stringKey) => {
+      const encodedString = entry.encodedString;
+
+      // Don't use the string length.
+      // It won't work for multibyte characters (like emoji).
+      const length = encodedString.length;
+
+      operations[i++] = length;
+      for (let j = 0; j < encodedString.length; j++) {
+        operations[i + j] = encodedString[j];
       }
-      i += key.length;
+
+      i += length;
     });
 
     if (numUnmountIDs > 0) {
@@ -1788,21 +1799,31 @@ export function attach(
     pendingStringTableLength = 0;
   }
 
-  function getStringID(str: string | null): number {
-    if (str === null) {
+  function getStringID(string: string | null): number {
+    if (string === null) {
       return 0;
     }
-    const existingID = pendingStringTable.get(str);
-    if (existingID !== undefined) {
-      return existingID;
-    }
-    const stringID = pendingStringTable.size + 1;
-    pendingStringTable.set(str, stringID);
-    // The string table total length needs to account
-    // both for the string length, and for the array item
-    // that contains the length itself. Hence + 1.
-    pendingStringTableLength += str.length + 1;
-    return stringID;
+    const existingEntry = pendingStringTable.get(string);
+    if (existingEntry !== undefined) {
+      return existingEntry.id;
+    }
+
+    const id = pendingStringTable.size + 1;
+    const encodedString = utfEncodeString(string);
+
+    pendingStringTable.set(string, {
+      encodedString,
+      id,
+    });
+
+    // The string table total length needs to account both for the string length,
+    // and for the array item that contains the length itself.
+    //
+    // Don't use string length for this table.
+    // It won't work for multibyte characters (like emoji).
+    pendingStringTableLength += encodedString.length + 1;
+
+    return id;
   }
 
   function recordMount(fiber: Fiber, parentFiber: Fiber | null) {

diff --git a/packages/react-devtools-shared/src/utils.js b/packages/react-devtools-shared/src/utils.js
@@ -7,7 +7,6 @@
  * @flow
  */
 
-import LRU from 'lru-cache';
 import {
   isElement,
   typeOf,
@@ -50,19 +49,9 @@ import {localStorageGetItem, localStorageSetItem} from './storage';
 import {meta} from './hydration';
 
 import type {ComponentFilter, ElementType} from './types';
-import type {LRUCache} from 'react-devtools-shared/src/types';
 
 const cachedDisplayNames: WeakMap<Function, string> = new WeakMap();
 
-// On large trees, encoding takes significant time.
-// Try to reuse the already encoded strings.
-const encodedStringCache: LRUCache<
-  string,
-  Array<number> | Uint8Array,
-> = new LRU({
-  max: 1000,
-});
-
 export function alphaSortKeys(
   a: string | number | Symbol,
   b: string | number | Symbol,
@@ -128,47 +117,44 @@ export function getUID(): number {
   return ++uidCounter;
 }
 
-const isTextEncoderSupported =
-  typeof TextDecoder === 'function' && typeof TextEncoder === 'function';
-
 export function utfDecodeString(array: Array<number>): string {
-  if (isTextEncoderSupported) {
-    // Handles multi-byte characters; use if available.
-    return new TextDecoder().decode(new Uint8Array(array));
-  } else {
-    // Avoid spreading the array (e.g. String.fromCodePoint(...array))
-    // Functions arguments are first placed on the stack before the function is called
-    // which throws a RangeError for large arrays.
-    // See github.com/facebook/react/issues/22293
-    let string = '';
-    for (let i = 0; i < array.length; i++) {
-      const char = array[i];
-      string += String.fromCodePoint(char);
-    }
-    return string;
+  // Avoid spreading the array (e.g. String.fromCodePoint(...array))
+  // Functions arguments are first placed on the stack before the function is called
+  // which throws a RangeError for large arrays.
+  // See github.com/facebook/react/issues/22293
+  let string = '';
+  for (let i = 0; i < array.length; i++) {
+    const char = array[i];
+    string += String.fromCodePoint(char);
   }
+  return string;
 }
 
-export function utfEncodeString(string: string): Array<number> | Uint8Array {
-  const cached = encodedStringCache.get(string);
-  if (cached !== undefined) {
-    return cached;
-  }
+function surrogatePairToCodePoint(
+  charCode1: number,
+  charCode2: number,
+): number {
+  return ((charCode1 & 0x3ff) << 10) + (charCode2 & 0x3ff) + 0x10000;
+}
 
-  let encoded;
-  if (isTextEncoderSupported) {
-    // Handles multi-byte characters; use if available.
-    encoded = new TextEncoder().encode(string);
-  } else {
-    encoded = new Array(string.length);
-    for (let i = 0; i < string.length; i++) {
-      encoded[i] = string.codePointAt(i);
+// Credit for this encoding approach goes to Tim Down:
+// https://stackoverflow.com/questions/4877326/how-can-i-tell-if-a-string-contains-multibyte-characters-in-javascript
+export function utfEncodeString(string: string): Array<number> {
+  const codePoints = [];
+  let i = 0;
+  let charCode;
+  while (i < string.length) {
+    charCode = string.charCodeAt(i);
+    if ((charCode & 0xf800) === 0xd800) {
+      codePoints.push(
+        surrogatePairToCodePoint(charCode, string.charCodeAt(++i)),
+      );
+    } else {
+      codePoints.push(charCode);
     }
+    ++i;
   }
-
-  encodedStringCache.set(string, encoded);
-
-  return encoded;
+  return codePoints;
 }
 
 export function printOperationsArray(operations: Array<number>) {