Skip to content

Commit

Permalink
Merge pull request #123 in EXTENSIONS/browser-extension from feature/…
Browse files Browse the repository at this point in the history
…issues/1001-encoding to master

* commit 'f5997350cd1ce8329fa05e96db59981fb32b353a':
  Removed unused variable
  Fixed tests runner
  Encoding fallback strategy
  • Loading branch information
Aleksandr Tropnikov committed Apr 20, 2018
2 parents 18cac6f + f599735 commit 0e463d3
Show file tree
Hide file tree
Showing 4 changed files with 98 additions and 2 deletions.
44 changes: 42 additions & 2 deletions Extension/lib/libs/encoding.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,19 @@ window.TextEncoder = window.TextDecoder = null;
"windows-1252":[8364,129,8218,402,8222,8230,8224,8225,710,8240,352,8249,338,141,381,143,144,8216,8217,8220,8221,8226,8211,8212,732,8482,353,8250,339,157,382,376,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255]
};

// Defines encoding fallback strategy
// First try to use Best-Fit Fallback, then Replacement fallback
global["encode-fallback-replacement"] =
{
"windows-1251": {
"replacement": 63,
"index": "windows-1252"
},
"windows-1252": {
"replacement": 63
}
};

// For strict environments where `this` inside the global scope
// is `undefined`, take a pure object instead
}(this || {}));
Expand Down Expand Up @@ -870,6 +883,22 @@ window.TextEncoder = window.TextDecoder = null;
return global['encoding-indexes'][name];
}

/**
* Searches for fallback replacement
* @param {string} name Name of the encoding.
* @returns {{replacement: String|number|*, index: Array<number>|Array<Array<number>>}}
*/
function fallbackReplacement(name) {
var fallback = global['encode-fallback-replacement'][name];
if (fallback) {
return {
"replacement": fallback.replacement,
"index": index(fallback.index)
}
}
return null;
}

/**
* @param {number} pointer The |pointer| to search for in the gb18030 index.
* @return {?number} The code point corresponding to |pointer| in |index|,
Expand Down Expand Up @@ -1615,8 +1644,9 @@ window.TextEncoder = window.TextDecoder = null;
* @implements {Encoder}
* @param {!Array.<?number>} index The encoding index.
* @param {{fatal: boolean}} options
* @param {{replacement: String|number|*, index: Array<number>|Array<Array<number>>}} fallback
*/
function SingleByteEncoder(index, options) {
function SingleByteEncoder(index, options, fallback) {
var fatal = options.fatal;
/**
* @param {Stream} stream Input stream.
Expand All @@ -1637,6 +1667,15 @@ window.TextEncoder = window.TextDecoder = null;
// single-byte.
var pointer = indexPointerFor(code_point, index);

// If encoding index table doesn't contain code point switch to fallback strategy
if (pointer == null && fallback.index) {
pointer = indexPointerFor(code_point, fallback.index);
}

if (pointer == null) {
return fallback.replacement;
}

// 4. If pointer is null, return error with code point.
if (pointer === null)
encoderError(code_point);
Expand All @@ -1655,13 +1694,14 @@ window.TextEncoder = window.TextDecoder = null;
category.encodings.forEach(function(encoding) {
var name = encoding.name;
var idx = index(name.toLowerCase());
var fallback = fallbackReplacement(name.toLowerCase());
/** @param {{fatal: boolean}} options */
decoders[name] = function(options) {
return new SingleByteDecoder(idx, options);
};
/** @param {{fatal: boolean}} options */
encoders[name] = function(options) {
return new SingleByteEncoder(idx, options);
return new SingleByteEncoder(idx, options, fallback);
};
});
});
Expand Down
20 changes: 20 additions & 0 deletions Extension/tests/miscellaneous/test-encoding.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
<!DOCTYPE html>
<html>

<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width">
<title>Encoding Tests</title>
<link rel="stylesheet" href="../qunit/qunit-2.0.1.css">
</head>

<body>
<div id="qunit"></div>
<div id="qunit-fixture"></div>
<script src="../qunit/qunit-2.0.1.js"></script>

<script type="text/javascript" src="../../lib/libs/encoding.js"></script>
<script type="text/javascript" src="test-encoding.js"></script>
</body>

</html>
32 changes: 32 additions & 0 deletions Extension/tests/miscellaneous/test-encoding.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/* global QUnit */

QUnit.test("Test Encodings", function (assert) {

function testEncodeDecode(charset) {

var encoder = new TextEncoder(charset, { NONSTANDARD_allowLegacyEncoding: true });
var decoder = new TextDecoder(charset, { NONSTANDARD_allowLegacyEncoding: true });

for (var i = 0; i < 65533; i++) {
var bytes = encoder.encode(String.fromCharCode(i));
decoder.decode(bytes);
if (i <= 0x7F) {
assert.equal(i, bytes[0]);
}
}
}

testEncodeDecode('utf-8');
testEncodeDecode('windows-1251');
testEncodeDecode('windows-1252');

// Some specific cases

// Fallback to windows-1252
var encoder = new TextEncoder('windows-1251', { NONSTANDARD_allowLegacyEncoding: true });
var bytes = encoder.encode(String.fromCharCode(244));
assert.equal(244, bytes[0]);

// Fallback to replacement
assert.equal(63, encoder.encode("Ⓢ")[0]);
});
4 changes: 4 additions & 0 deletions Extension/tests/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@ result=1
echo "Ring buffer tests"
node-qunit-phantomjs miscellaneous/test-ring-buffer.html
) || result=0
(
echo "Encoding tests"
node-qunit-phantomjs miscellaneous/test-encoding.html
) || result=0

if [ $result = 0 ]
then
Expand Down

0 comments on commit 0e463d3

Please sign in to comment.