From ffec363fd79009d2b255dedadd05f7d3a8654adc Mon Sep 17 00:00:00 2001 From: "Mark S. Miller" Date: Wed, 24 Jan 2024 11:33:08 -0800 Subject: [PATCH] fix(pass-style)!: only well-formed strings are passable --- packages/pass-style/NEWS.md | 6 ++ packages/pass-style/index.js | 7 ++- packages/pass-style/src/passStyleOf.js | 60 ++++++++++++++++++- packages/pass-style/test/test-pass-style.js | 5 -- .../pass-style/test/test-passable-string.js | 53 ++++++++++++++++ 5 files changed, 124 insertions(+), 7 deletions(-) delete mode 100644 packages/pass-style/test/test-pass-style.js create mode 100644 packages/pass-style/test/test-passable-string.js diff --git a/packages/pass-style/NEWS.md b/packages/pass-style/NEWS.md index e69de29bb2..0957967fa0 100644 --- a/packages/pass-style/NEWS.md +++ b/packages/pass-style/NEWS.md @@ -0,0 +1,6 @@ +User-visible changes in `@endo/pass-style`: + +# next release + +- Previously, all JavaScript strings were considered Passable with `passStyleOf(str) === 'string'`. Now, only well-formed Unicode strings are considered Passable. For all others, `passStyleOf(str)` throws a diagnostic error. This brings us into closer conformance to the OCapN standard, which prohibits sending non-well-formed strings, and requires non-well-formed strings to be rejected when received. Applications that had previously handled non-well-formed strings successfully (even if inadvertantly) may now start experiences these failure. +- Exports `isWellFormedString` and `assertWellFormedString`. Unfortunately the [standard `String.prototype.isWellFormed`](https://tc39.es/proposal-is-usv-string/) first coerces its input to string, leading it to claim that some non-strings are well-formed strings. By contrast, `isWellFormedString` and `assertWellFormedString` will not judge any non-strings to be well-formed strings. diff --git a/packages/pass-style/index.js b/packages/pass-style/index.js index da985131a2..07a34fa669 100644 --- a/packages/pass-style/index.js +++ b/packages/pass-style/index.js @@ -21,7 +21,12 @@ export { passableSymbolForName, } from './src/symbol.js'; -export { passStyleOf, assertPassable } from './src/passStyleOf.js'; +export { + isWellFormedString, + assertWellFormedString, + passStyleOf, + assertPassable, +} from './src/passStyleOf.js'; export { makeTagged } from './src/makeTagged.js'; export { diff --git a/packages/pass-style/src/passStyleOf.js b/packages/pass-style/src/passStyleOf.js index a335b2bca2..05ff07277a 100644 --- a/packages/pass-style/src/passStyleOf.js +++ b/packages/pass-style/src/passStyleOf.js @@ -26,6 +26,61 @@ import { assertSafePromise } from './safe-promise.js'; const { ownKeys } = Reflect; const { isFrozen } = Object; +// @ts-expect-error TS builtin `String` type does not yet +// know about`isWellFormed` +const hasWellFormedStringMethod = !!String.prototype.isWellFormed; + +/** + * Is the argument a well-formed string? + * + * Unfortunately, the + * [standard built-in `String.prototype.isWellFormed`](https://github.com/tc39/proposal-is-usv-string) + * does a ToString on its input, causing it to judge non-strings to be + * well-formed strings if they coerce to a well-formed strings. This + * recapitulates the mistake in having the global `isNaN` coerce its inputs, + * causing it to judge non-string to be NaN if they coerce to NaN. + * + * This `isWellFormedString` function only judges well-formed strings to be + * well-formed strings. For all non-strings it returns false. + * + * @param {unknown} str + * @returns {str is string} + */ +export const isWellFormedString = hasWellFormedStringMethod + ? // @ts-expect-error TS does not yet know about `isWellFormed` + str => typeof str === 'string' && str.isWellFormed() + : str => { + if (typeof str !== 'string') { + return false; + } + for (const ch of str) { + // The string iterator iterates by Unicode code point, not + // UTF16 code unit. But if it encounters an unpaired surrogate, + // it will produce it. + const cp = /** @type {number} */ (ch.codePointAt(0)); + if (cp >= 0xd800 && cp <= 0xdfff) { + // All surrogates are in this range. The string iterator only + // produces a character in this range for unpaired surrogates, + // which only happens if the string is not well-formed. + return false; + } + } + return true; + }; +harden(isWellFormedString); + +/** + * Returns normally when `isWellFormedString(str)` would return true. + * Throws a diagnostic error when `isWellFormedString(str)` would return false. + * + * @param {unknown} str + * @returns {asserts str is string} + */ +export const assertWellFormedString = str => { + isWellFormedString(str) || Fail`Expected well-formed unicode string: ${str}`; +}; +harden(assertWellFormedString); + /** * @param {PassStyleHelper[]} passStyleHelpers * @returns {Record } @@ -124,12 +179,15 @@ const makePassStyleOf = passStyleHelpers => { const typestr = typeof inner; switch (typestr) { case 'undefined': - case 'string': case 'boolean': case 'number': case 'bigint': { return typestr; } + case 'string': { + assertWellFormedString(inner); + return 'string'; + } case 'symbol': { assertPassableSymbol(inner); return 'symbol'; diff --git a/packages/pass-style/test/test-pass-style.js b/packages/pass-style/test/test-pass-style.js deleted file mode 100644 index 9da523a4be..0000000000 --- a/packages/pass-style/test/test-pass-style.js +++ /dev/null @@ -1,5 +0,0 @@ -import { test } from './prepare-test-env-ava.js'; - -test('place holder', async t => { - t.pass(); -}); diff --git a/packages/pass-style/test/test-passable-string.js b/packages/pass-style/test/test-passable-string.js new file mode 100644 index 0000000000..a6a2536013 --- /dev/null +++ b/packages/pass-style/test/test-passable-string.js @@ -0,0 +1,53 @@ +/* eslint-disable no-useless-concat */ +import { test } from './prepare-test-env-ava.js'; + +import { + passStyleOf, + isWellFormedString, + assertWellFormedString, +} from '../src/passStyleOf.js'; + +test('test string well formedness behaviors', t => { + const gcleff1 = '\u{1D11E}'; + const gcleff2 = '\u{D834}\u{DD1E}'; + const gcleff3 = '\u{D834}' + '\u{DD1E}'; + const badcleff1 = '\u{D834}\u{D834}\u{DD1E}'; + const badcleff2 = '\u{D834}\u{DD1E}\u{D834}'; + const badcleff3 = '\u{D834}' + '\u{DD1E}\u{D834}'; + + // This test block ensures that the underlying platform behaves as we expect + t.is(gcleff1, gcleff2); + t.is(gcleff1, gcleff3); + t.is(gcleff1.length, 2); + t.is(gcleff2.length, 2); + t.is(gcleff3.length, 2); + // Uses string iterator, which iterates code points if possible, not + // UTF16 code units + t.deepEqual([...gcleff1], [gcleff1]); + t.not(badcleff1, badcleff2); + t.is(badcleff2, badcleff3); + t.is(badcleff1.length, 3); + // But if the string contains lone surrogates, the string iterator will + // produce those as characters + t.deepEqual([...badcleff1], ['\u{D834}', gcleff1]); + t.deepEqual([...badcleff2], [gcleff1, '\u{D834}']); + + t.is(passStyleOf(gcleff1), 'string'); + t.true(isWellFormedString(gcleff1)); + t.notThrows(() => assertWellFormedString(gcleff1)); + + t.throws(() => passStyleOf(badcleff1), { + message: 'Expected well-formed unicode string: "\\ud834𝄞"', + }); + t.throws(() => passStyleOf(badcleff2), { + message: 'Expected well-formed unicode string: "𝄞\\ud834"', + }); + t.false(isWellFormedString(badcleff1)); + t.false(isWellFormedString(badcleff2)); + t.throws(() => assertWellFormedString(badcleff1), { + message: 'Expected well-formed unicode string: "\\ud834𝄞"', + }); + t.throws(() => assertWellFormedString(badcleff2), { + message: 'Expected well-formed unicode string: "𝄞\\ud834"', + }); +});