Skip to content

Commit

Permalink
fix Unicode planes (#52)
Browse files Browse the repository at this point in the history
  • Loading branch information
cometkim authored Aug 31, 2024
1 parent 2c12a71 commit b045320
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 9 deletions.
5 changes: 5 additions & 0 deletions .changeset/modern-cooks-laugh.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"unicode-segmenter": patch
---

Fix `isSMP`, and add more plane utils (`isSIP`, `isTIP`, `isSSP`)
4 changes: 2 additions & 2 deletions src/grapheme.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
// @ts-check

import { bsearchRange } from './core.js';
import { isSMP } from './utils.js';
import { isBMP } from './utils.js';
import {
searchGraphemeCategory,
GraphemeCategory,
Expand Down Expand Up @@ -96,7 +96,7 @@ export function* graphemeSegments(input) {

while (true) {
segment += input[cursor++];
if (isSMP(cp)) {
if (!isBMP(cp)) {
segment += input[cursor++];
}

Expand Down
32 changes: 31 additions & 1 deletion src/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -89,5 +89,35 @@ export function isBMP(c) {
* @return {boolean}
*/
export function isSMP(c) {
return 0xffff < c;
return 0x10000 <= c && c <= 0x1ffff;
}

/**
* Check if given code point is within the SIP(Supplementary Ideographic Plane)
*
* @param {number} c Unicode code point
* @return {boolean}
*/
export function isSIP(c) {
return 0x20000 <= c && c <= 0x2ffff;
}

/**
* Check if given code point is within the TIP(Tertiary Ideographic Plane)
*
* @param {number} c Unicode code point
* @return {boolean}
*/
export function isTIP(c) {
return 0x30000 <= c && c <= 0x3ffff;
}

/**
* Check if given code point is within the SSP(Supplementary Special-purpose Plane)
*
* @param {number} c Unicode code point
* @return {boolean}
*/
export function isSSP(c) {
return 0xe0000 <= c && c <= 0xeffff;
}
45 changes: 39 additions & 6 deletions test/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ import {
takeCodePoint,
isBMP,
isSMP,
isSIP,
isTIP,
isSSP,
} from 'unicode-segmenter/utils';

fc.configureGlobal({
Expand Down Expand Up @@ -83,20 +86,50 @@ test('takeChar', async t => {

test('isBMP', () => {
fc.assert(
fc.property(fc.integer({ min: 0, max: 0xffff }),
// @ts-ignore
fc.property(fc.fullUnicode(), (data) => {
// @ts-ignore
assert.equal(isBMP(data.codePointAt(0)) ? 1 : 2, data.length);
(data) => {
assert.ok(isBMP(data));
}),
);
});

test('isSMP', () => {
fc.assert(
fc.property(fc.integer({ min: 0x10000, max: 0x1ffff }),
// @ts-ignore
fc.property(fc.fullUnicode(), (data) => {
// @ts-ignore
assert.equal(isSMP(data.codePointAt(0)) ? 2 : 1, data.length);
(data) => {
assert.ok(isSMP(data));
}),
);
});

test('isSIP', () => {
fc.assert(
fc.property(fc.integer({ min: 0x20000, max: 0x2ffff }),
// @ts-ignore
(data) => {
assert.ok(isSIP(data));
}),
);
});

test('isTIP', () => {
fc.assert(
fc.property(fc.integer({ min: 0x30000, max: 0x3ffff }),
// @ts-ignore
(data) => {
assert.ok(isTIP(data));
}),
);
});

test('isSSP', () => {
fc.assert(
fc.property(fc.integer({ min: 0xe0000, max: 0xeffff }),
// @ts-ignore
(data) => {
assert.ok(isSSP(data));
}),
);
});

0 comments on commit b045320

Please sign in to comment.