Merge pull request #122 from richardfrost/fix_unicode_whole_match

Fix unicode whole match
FrostCo · Mar 16, 2019 · f540136 · f540136
2 parents 7db933d + 53425a7
commit f540136
Show file tree

Hide file tree

Showing 5 changed files with 17 additions and 7 deletions.
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "advancedprofanityfilter",
-  "version": "2.0.2",
+  "version": "2.0.3",
   "description": "A browser extension to filter profanity from webpages.",
   "main": "filter.js",
   "repository": {
@@ -140,7 +140,8 @@
   "nyc": {
     "cwd": "test/",
     "exclude": [
-      "**/*.spec.js"
+      "**/*.spec.js",
+      "built/vendor/**/*"
     ]
   }
 }
diff --git a/src/script/lib/word.ts b/src/script/lib/word.ts
@@ -77,8 +77,8 @@ export default class Word {
     try {
       if (Word.containsDoubleByte(str)) {
         // Work around for lack of word boundary support for unicode characters
-        // (^|[\s.,'"+!?|-]*)([\w-]*куче[\w-]*)([\s.,'"+!?|-]*|$)/giu
-        return new RegExp('(^|' + Word._unicodeWordBoundary + '+)([\\w-]*' + Word.processPhrase(str, matchRepeated) + '[\\w-]*)(' + Word._unicodeWordBoundary + '+|$)', 'giu');
+        // (^|[\s.,'"+!?|-]*)([\S]*куче[\S]*)([\s.,'"+!?|-]*|$)/giu
+        return new RegExp('(^|' + Word._unicodeWordBoundary + '*)([\\S]*' + Word.processPhrase(str, matchRepeated) + '[\\S]*)(' + Word._unicodeWordBoundary + '*|$)', 'giu');
       } else {
         return new RegExp('\\b[\\w-]*' + Word.processPhrase(str, matchRepeated) + '[\\w-]*\\b', 'gi');
       }

diff --git a/src/static/manifest.json b/src/static/manifest.json
@@ -3,7 +3,7 @@
   "short_name": "Profanity Filter",
   "author": "Richard Frost",
   "manifest_version": 2,
-  "version": "2.0.2",
+  "version": "2.0.3",
   "description": "Advanced Profanity Filter helps to clean up bad language on the websites you and your family visit.",
   "icons": {
     "16": "img/icon16.png",

diff --git a/test/lib/filter.spec.js b/test/lib/filter.spec.js
@@ -151,6 +151,15 @@ describe('Filter', function() {
           filter.init();
           expect(filter.replaceText('The bigкучеs ran around the yard.')).to.equal('The b_______ ran around the yard.');
         });
+
+        it('Should filter a whole word with (*) characters', function() {
+          let filter = new Filter;
+          filter.cfg = new Config({ words: Object.assign({}, testWords), filterMethod: 0, globalMatchMethod: 3, censorCharacter: '*', censorFixedLength: 0, preserveFirst: false, preserveLast: false });
+          filter.cfg.words['словен'] = { matchMethod: 2, repeat: false };
+          filter.init();
+          debugger;
+          expect(filter.replaceText('За пределами Словении этнические словенцы компактно')).to.equal('За пределами ******** этнические ******** компактно');
+        });
       });
     });
 

diff --git a/test/lib/word.spec.js b/test/lib/word.spec.js
@@ -121,13 +121,13 @@ describe('Word', function() {
       describe('Unicode', function() {
         it('should build the proper RegExp for whole match', function() {
           expect(Word.buildWholeRegexp('куче')).to.eql(
-            new RegExp('(^|[\\s.,\'"+!?|-]+)([\\w-]*куче[\\w-]*)([\\s.,\'"+!?|-]+|$)', 'giu')
+            new RegExp('(^|[\\s.,\'"+!?|-]*)([\\S]*куче[\\S]*)([\\s.,\'"+!?|-]*|$)', 'giu')
           );
         });
 
         it('should build the proper RegExp for whole match with matchRepeated', function() {
           expect(Word.buildWholeRegexp('куче', true)).to.eql(
-            new RegExp('(^|[\\s.,\'"+!?|-]+)([\\w-]*к+у+ч+е+[\\w-]*)([\\s.,\'"+!?|-]+|$)', 'giu')
+            new RegExp('(^|[\\s.,\'"+!?|-]*)([\\S]*к+у+ч+е+[\\S]*)([\\s.,\'"+!?|-]*|$)', 'giu')
           );
         });
       });