Skip to content

Commit

Permalink
Merge pull request #122 from richardfrost/fix_unicode_whole_match
Browse files Browse the repository at this point in the history
Fix unicode whole match
  • Loading branch information
richardfrost authored Mar 16, 2019
2 parents 7db933d + 53425a7 commit f540136
Show file tree
Hide file tree
Showing 5 changed files with 17 additions and 7 deletions.
5 changes: 3 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "advancedprofanityfilter",
"version": "2.0.2",
"version": "2.0.3",
"description": "A browser extension to filter profanity from webpages.",
"main": "filter.js",
"repository": {
Expand Down Expand Up @@ -140,7 +140,8 @@
"nyc": {
"cwd": "test/",
"exclude": [
"**/*.spec.js"
"**/*.spec.js",
"built/vendor/**/*"
]
}
}
4 changes: 2 additions & 2 deletions src/script/lib/word.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,8 @@ export default class Word {
try {
if (Word.containsDoubleByte(str)) {
// Work around for lack of word boundary support for unicode characters
// (^|[\s.,'"+!?|-]*)([\w-]*куче[\w-]*)([\s.,'"+!?|-]*|$)/giu
return new RegExp('(^|' + Word._unicodeWordBoundary + '+)([\\w-]*' + Word.processPhrase(str, matchRepeated) + '[\\w-]*)(' + Word._unicodeWordBoundary + '+|$)', 'giu');
// (^|[\s.,'"+!?|-]*)([\S]*куче[\S]*)([\s.,'"+!?|-]*|$)/giu
return new RegExp('(^|' + Word._unicodeWordBoundary + '*)([\\S]*' + Word.processPhrase(str, matchRepeated) + '[\\S]*)(' + Word._unicodeWordBoundary + '*|$)', 'giu');
} else {
return new RegExp('\\b[\\w-]*' + Word.processPhrase(str, matchRepeated) + '[\\w-]*\\b', 'gi');
}
Expand Down
2 changes: 1 addition & 1 deletion src/static/manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"short_name": "Profanity Filter",
"author": "Richard Frost",
"manifest_version": 2,
"version": "2.0.2",
"version": "2.0.3",
"description": "Advanced Profanity Filter helps to clean up bad language on the websites you and your family visit.",
"icons": {
"16": "img/icon16.png",
Expand Down
9 changes: 9 additions & 0 deletions test/lib/filter.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,15 @@ describe('Filter', function() {
filter.init();
expect(filter.replaceText('The bigкучеs ran around the yard.')).to.equal('The b_______ ran around the yard.');
});

it('Should filter a whole word with (*) characters', function() {
let filter = new Filter;
filter.cfg = new Config({ words: Object.assign({}, testWords), filterMethod: 0, globalMatchMethod: 3, censorCharacter: '*', censorFixedLength: 0, preserveFirst: false, preserveLast: false });
filter.cfg.words['словен'] = { matchMethod: 2, repeat: false };
filter.init();
debugger;
expect(filter.replaceText('За пределами Словении этнические словенцы компактно')).to.equal('За пределами ******** этнические ******** компактно');
});
});
});

Expand Down
4 changes: 2 additions & 2 deletions test/lib/word.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -121,13 +121,13 @@ describe('Word', function() {
describe('Unicode', function() {
it('should build the proper RegExp for whole match', function() {
expect(Word.buildWholeRegexp('куче')).to.eql(
new RegExp('(^|[\\s.,\'"+!?|-]+)([\\w-]*куче[\\w-]*)([\\s.,\'"+!?|-]+|$)', 'giu')
new RegExp('(^|[\\s.,\'"+!?|-]*)([\\S]*куче[\\S]*)([\\s.,\'"+!?|-]*|$)', 'giu')
);
});

it('should build the proper RegExp for whole match with matchRepeated', function() {
expect(Word.buildWholeRegexp('куче', true)).to.eql(
new RegExp('(^|[\\s.,\'"+!?|-]+)([\\w-]*к+у+ч+е+[\\w-]*)([\\s.,\'"+!?|-]+|$)', 'giu')
new RegExp('(^|[\\s.,\'"+!?|-]*)([\\S]*к+у+ч+е+[\\S]*)([\\s.,\'"+!?|-]*|$)', 'giu')
);
});
});
Expand Down

0 comments on commit f540136

Please sign in to comment.