-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
#50 : do not split surrogate pair for astral characters (code point >…
… 0x10000) Tentative fix for #64 - supposed duplicate
- Loading branch information
Showing
5 changed files
with
138 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
c.fillText('✪🅼🅼🅼', 5, 165); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
var RegPack = require("../regPack") | ||
var assert = require("assert"); | ||
|
||
function runTests() { | ||
console.log("Issue #0050 - Unicode surrogate byte length : start"); | ||
testByteLength(); | ||
testSurrogatePacking(); | ||
console.log("Issue #0050 - Unicode surrogate byte length : done"); | ||
} | ||
|
||
/** | ||
* Github issue #50 - Support for characters in the astral plane | ||
* First, make sure that the astral characters (composed of two 16-bit codes, | ||
* first one in [0xD800, 0xDBFF] and second one in [0xDC00, DFFF]) are correctly read | ||
* | ||
*/ | ||
function testByteLength() { | ||
// standard ASCII | ||
var input = "0123456789abcdefghijklmnopqrstuvwxyz"; | ||
assert.equal(36, RegPack.packer.getByteLength(input)); | ||
|
||
// 2-byte UTF-8 | ||
input = "®"; | ||
assert.equal(3, RegPack.packer.getByteLength(input)); | ||
|
||
// 4-byte UTF-8 with surrogates | ||
input = "\uD83D\uDD25\uD83D\uDD25\uD83D\uDD25"; | ||
assert.equal(12, RegPack.packer.getByteLength(input)); | ||
|
||
input = "🔥🔥🔥"; | ||
assert.equal(12, RegPack.packer.getByteLength(input)); | ||
} | ||
|
||
|
||
/** | ||
* Github issue #50 - Support for characters in the astral plane | ||
* Then, check that the crusher does not attempt to break the input in between the two surrogate characters, | ||
* since a string starting with the second one would yield a malformed URI | ||
* | ||
*/ | ||
function testSurrogatePacking() { | ||
// 4-byte UTF-8 with surrogates | ||
input = "\uD83D\uDD25\uD83D\uDD25\uD83D\uDD25\uD83D\uDD25\uD83D\uDD25\uD83D\uDD25"; | ||
var options = { | ||
withMath : false, | ||
hash2DContext : false, | ||
hashWebGLContext : false, | ||
hashAudioContext : false, | ||
contextVariableName : false, | ||
contextType : parseInt(0), | ||
reassignVars : false, | ||
varsNotReassigned : [], | ||
crushGainFactor : parseFloat(1), | ||
crushLengthFactor : parseFloat(0), | ||
crushCopiesFactor : parseFloat(0), | ||
crushTiebreakerFactor : parseInt(1), | ||
wrapInSetInterval : false, | ||
timeVariableName : "" | ||
}; | ||
var result = RegPack.packer.runPacker(input, options); | ||
|
||
// Expected result : no exception thrown before, internal check successful, | ||
// and the unicode characters are excluded from the token range | ||
assert(RegPack.packer.getByteLength(result[0].result[2][1]) > 0); | ||
assert.notEqual(result[0].result[2][1].indexOf("uffff]"), -1); | ||
assert.notEqual(result[0].result[2][2].indexOf("Final check : passed"), -1); | ||
} | ||
|
||
|
||
|
||
module.exports = runTests; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
var RegPack = require("../regPack") | ||
var fs = require("fs"); | ||
var assert = require("assert"); | ||
|
||
function runTests() { | ||
console.log("Issue #0064 - EncodeURI in UTF-8 : start"); | ||
testEncodeURI(); | ||
console.log("Issue #0064 - EncodeURI in UTF-8 : done"); | ||
} | ||
|
||
|
||
/** | ||
* Github issue #64 - Accept unicode characters | ||
* Make sure the Unicode characters are explicitely filtered out | ||
* by the RegExp in the negated char class | ||
* | ||
* Associated test file : gitHub#64-URIError.js | ||
*/ | ||
function testEncodeURI() { | ||
var input = fs.readFileSync("../TestCases/gitHub#64-URIError.js", { encoding:"utf8"}); | ||
var options = { | ||
withMath : false, | ||
hash2DContext : false, | ||
hashWebGLContext : false, | ||
hashAudioContext : false, | ||
contextVariableName : false, | ||
contextType : parseInt(0), | ||
reassignVars : false, | ||
varsNotReassigned : [], | ||
crushGainFactor : parseFloat(1), | ||
crushLengthFactor : parseFloat(0), | ||
crushCopiesFactor : parseFloat(0), | ||
crushTiebreakerFactor : parseInt(1), | ||
wrapInSetInterval : false, | ||
timeVariableName : "" | ||
}; | ||
var result = RegPack.packer.runPacker(input, options); | ||
|
||
// Expected result : internal check successful, | ||
// and the unicode characters are excluded from the token range | ||
assert.notEqual(result[0].result[2][1].indexOf("uffff]"), -1); | ||
assert.notEqual(result[0].result[2][2].indexOf("Final check : passed"), -1); | ||
} | ||
|
||
module.exports = runTests; |