From fbb7100d7553074ab475d9136f1704bfe24c62fd Mon Sep 17 00:00:00 2001 From: Rex Zeng Date: Thu, 12 Jul 2018 17:46:20 +0800 Subject: [PATCH] Improve JS regex detections --- components/prism-javascript.js | 30 +++++++------ components/prism-javascript.min.js | 2 +- prism.js | 30 +++++++------ tests/languages/javascript/regex_feature.test | 44 +++++++++++++------ 4 files changed, 66 insertions(+), 40 deletions(-) diff --git a/components/prism-javascript.js b/components/prism-javascript.js index 1c0ab761e8..0144fa0d22 100644 --- a/components/prism-javascript.js +++ b/components/prism-javascript.js @@ -10,22 +10,26 @@ Prism.languages.insertBefore('javascript', 'keyword', { 'regex': { pattern: /((?:^|[^$\w\xA0-\uFFFF."'\])\s])\s*)\/(\[[^\]\r\n]+]|\\.|[^/\\\[\r\n])+\/[gimyu]{0,5}(?=\s*($|[\r\n,.;})\]]))/, inside: { - 'regex-flags': { - pattern: /[gimyu]{1,5}$/, - alias: 'keyword' + 'regex-flag': { + pattern: /(\/)[gimyu]{1,5}$/, + lookbehind: true }, - 'regex-group': { - pattern: /\[(?:\\\\|\\\]|[^\]])*\]/, - alias: 'escape' + 'regex-charset1': { + pattern: /(\[(\\\]|[^\]])*\])|(\\[wds])/i, + alias: 'regex-charset' }, - 'regex-escape': { - pattern: /\\./, - alias: 'escape' + 'regex-escape': /\\(\\|\+|x[0-9a-f]{2}|u[0-9A-F]{4}|u\{\d+\}|c[a-z]|0[0-7]{1,2}|[^\d])/i, + 'regex-reference': /\\\d/, + 'regex-charset2': { + pattern: /\./, + alias: 'regex-charset' }, - 'regex-punctuation': { - pattern: /[|^$*+(){}]|(?:\?(?![:!=<]))/, - alias: 'operator' - } + 'regex-anchor': /(\^|\$|\\b)/i, + 'regex-quantifier': { + pattern: /\+|\*|(?:\{(?:\d+|\d+,|,\d+|\d+,\d+)\})|(^|[^(])\?/, + lookbehind: true + }, + 'regex-alternation': /\|/ }, lookbehind: true, greedy: true diff --git a/components/prism-javascript.min.js b/components/prism-javascript.min.js index acd8606c18..36993cbb05 100644 --- a/components/prism-javascript.min.js +++ b/components/prism-javascript.min.js @@ -1 +1 @@ -Prism.languages.javascript=Prism.languages.extend("clike",{keyword:/\b(?:as|async|await|break|case|catch|class|const|continue|debugger|default|delete|do|else|enum|export|extends|finally|for|from|function|get|if|implements|import|in|instanceof|interface|let|new|null|of|package|private|protected|public|return|set|static|super|switch|this|throw|try|typeof|var|void|while|with|yield)\b/,number:/\b(?:0[xX][\dA-Fa-f]+|0[bB][01]+|0[oO][0-7]+|NaN|Infinity)\b|(?:\b\d+\.?\d*|\B\.\d+)(?:[Ee][+-]?\d+)?/,"function":/[_$a-z\xA0-\uFFFF][$\w\xA0-\uFFFF]*(?=\s*\()/i,operator:/-[-=]?|\+[+=]?|!=?=?|<>?>?=?|=(?:==?|>)?|&[&=]?|\|[|=]?|\*\*?=?|\/=?|~|\^=?|%=?|\?|\.{3}/}),Prism.languages.insertBefore("javascript","keyword",{regex:{pattern:/((?:^|[^$\w\xA0-\uFFFF."'\])\s])\s*)\/(\[[^\]\r\n]+]|\\.|[^\/\\\[\r\n])+\/[gimyu]{0,5}(?=\s*($|[\r\n,.;})\]]))/,inside:{"regex-flags":{pattern:/[gimyu]{1,5}$/,alias:"keyword"},"regex-group":{pattern:/\[(?:\\\\|\\\]|[^\]])*\]/,alias:"escape"},"regex-escape":{pattern:/\\./,alias:"escape"},"regex-punctuation":{pattern:/[|^$*+(){}]|(?:\?(?![:!=<]))/,alias:"operator"}},lookbehind:!0,greedy:!0},"function-variable":{pattern:/[_$a-z\xA0-\uFFFF][$\w\xA0-\uFFFF]*(?=\s*=\s*(?:function\b|(?:\([^()]*\)|[_$a-z\xA0-\uFFFF][$\w\xA0-\uFFFF]*)\s*=>))/i,alias:"function"},constant:/\b[A-Z][A-Z\d_]*\b/}),Prism.languages.insertBefore("javascript","string",{"template-string":{pattern:/`(?:\\[\s\S]|\${[^}]+}|[^\\`])*`/,greedy:!0,inside:{interpolation:{pattern:/\${[^}]+}/,inside:{"interpolation-punctuation":{pattern:/^\${|}$/,alias:"punctuation"},rest:null}},string:/[\s\S]+/}}}),Prism.languages.javascript["template-string"].inside.interpolation.inside.rest=Prism.languages.javascript,Prism.languages.markup&&Prism.languages.insertBefore("markup","tag",{script:{pattern:/()[\s\S]*?(?=<\/script>)/i,lookbehind:!0,inside:Prism.languages.javascript,alias:"language-javascript",greedy:!0}}),Prism.languages.js=Prism.languages.javascript; \ No newline at end of file +Prism.languages.javascript=Prism.languages.extend("clike",{keyword:/\b(?:as|async|await|break|case|catch|class|const|continue|debugger|default|delete|do|else|enum|export|extends|finally|for|from|function|get|if|implements|import|in|instanceof|interface|let|new|null|of|package|private|protected|public|return|set|static|super|switch|this|throw|try|typeof|var|void|while|with|yield)\b/,number:/\b(?:0[xX][\dA-Fa-f]+|0[bB][01]+|0[oO][0-7]+|NaN|Infinity)\b|(?:\b\d+\.?\d*|\B\.\d+)(?:[Ee][+-]?\d+)?/,"function":/[_$a-z\xA0-\uFFFF][$\w\xA0-\uFFFF]*(?=\s*\()/i,operator:/-[-=]?|\+[+=]?|!=?=?|<>?>?=?|=(?:==?|>)?|&[&=]?|\|[|=]?|\*\*?=?|\/=?|~|\^=?|%=?|\?|\.{3}/}),Prism.languages.insertBefore("javascript","keyword",{regex:{pattern:/((?:^|[^$\w\xA0-\uFFFF."'\])\s])\s*)\/(\[[^\]\r\n]+]|\\.|[^\/\\\[\r\n])+\/[gimyu]{0,5}(?=\s*($|[\r\n,.;})\]]))/,inside:{"regex-flag":{pattern:/(\/)[gimyu]{1,5}$/,lookbehind:!0},"regex-charset1":{pattern:/(\[(\\\]|[^\]])*\])|(\\[wds])/i,alias:"regex-charset"},"regex-escape":/\\(\\|\+|x[0-9a-f]{2}|u[0-9A-F]{4}|u\{\d+\}|c[a-z]|0[0-7]{1,2}|[^\d])/i,"regex-reference":/\\\d/,"regex-charset2":{pattern:/\./,alias:"regex-charset"},"regex-anchor":/(\^|\$|\\b)/i,"regex-quantifier":{pattern:/\+|\*|(?:\{(?:\d+|\d+,|,\d+|\d+,\d+)\})|(^|[^(])\?/,lookbehind:!0},"regex-alternation":/\|/},lookbehind:!0,greedy:!0},"function-variable":{pattern:/[_$a-z\xA0-\uFFFF][$\w\xA0-\uFFFF]*(?=\s*=\s*(?:function\b|(?:\([^()]*\)|[_$a-z\xA0-\uFFFF][$\w\xA0-\uFFFF]*)\s*=>))/i,alias:"function"},constant:/\b[A-Z][A-Z\d_]*\b/}),Prism.languages.insertBefore("javascript","string",{"template-string":{pattern:/`(?:\\[\s\S]|\${[^}]+}|[^\\`])*`/,greedy:!0,inside:{interpolation:{pattern:/\${[^}]+}/,inside:{"interpolation-punctuation":{pattern:/^\${|}$/,alias:"punctuation"},rest:null}},string:/[\s\S]+/}}}),Prism.languages.javascript["template-string"].inside.interpolation.inside.rest=Prism.languages.javascript,Prism.languages.markup&&Prism.languages.insertBefore("markup","tag",{script:{pattern:/()[\s\S]*?(?=<\/script>)/i,lookbehind:!0,inside:Prism.languages.javascript,alias:"language-javascript",greedy:!0}}),Prism.languages.js=Prism.languages.javascript; \ No newline at end of file diff --git a/prism.js b/prism.js index 7b5802ab63..e88806e2f1 100644 --- a/prism.js +++ b/prism.js @@ -733,22 +733,26 @@ Prism.languages.insertBefore('javascript', 'keyword', { 'regex': { pattern: /((?:^|[^$\w\xA0-\uFFFF."'\])\s])\s*)\/(\[[^\]\r\n]+]|\\.|[^/\\\[\r\n])+\/[gimyu]{0,5}(?=\s*($|[\r\n,.;})\]]))/, inside: { - 'regex-flags': { - pattern: /[gimyu]{1,5}$/, - alias: 'keyword' + 'regex-flag': { + pattern: /(\/)[gimyu]{1,5}$/, + lookbehind: true }, - 'regex-group': { - pattern: /\[(?:\\\\|\\\]|[^\]])*\]/, - alias: 'escape' + 'regex-charset1': { + pattern: /(\[(\\\]|[^\]])*\])|(\\[wds])/i, + alias: 'regex-charset' }, - 'regex-escape': { - pattern: /\\./, - alias: 'escape' + 'regex-escape': /\\(\\|\+|x[0-9a-f]{2}|u[0-9A-F]{4}|u\{\d+\}|c[a-z]|0[0-7]{1,2}|[^\d])/i, + 'regex-reference': /\\\d/, + 'regex-charset2': { + pattern: /\./, + alias: 'regex-charset' }, - 'regex-punctuation': { - pattern: /[|^$*+(){}]|(?:\?(?![:!=<]))/, - alias: 'operator' - } + 'regex-anchor': /(\^|\$|\\b)/i, + 'regex-quantifier': { + pattern: /\+|\*|(?:\{(?:\d+|\d+,|,\d+|\d+,\d+)\})|(^|[^(])\?/, + lookbehind: true + }, + 'regex-alternation': /\|/ }, lookbehind: true, greedy: true diff --git a/tests/languages/javascript/regex_feature.test b/tests/languages/javascript/regex_feature.test index 43e7131327..144e291189 100644 --- a/tests/languages/javascript/regex_feature.test +++ b/tests/languages/javascript/regex_feature.test @@ -7,23 +7,21 @@ / '1' '2' '3' '4' '5' / [/foo/], /url\(path\)/, -/\.jpg$|\.png$/ +/\.jpg$|\.png$/, +/\xCA\xfe\u4d5a\u{12345}\076\cI?/, +/^.\1+?/ ---------------------------------------------------- [ ["regex", ["/foo bar/"]], ["punctuation", ";"], - ["regex", ["/foo/", ["regex-flags", "gimyu"]]], ["punctuation", ","], + ["regex", ["/foo/", ["regex-flag", "gimyu"]]], ["punctuation", ","], ["regex", [ "/", - ["regex-group", "[\\[\\]]"], - ["regex-punctuation", "{"], - "2,4", - ["regex-punctuation", "}"], - ["regex-punctuation", "("], - "?:foo", - ["regex-punctuation", ")"], - ["regex-punctuation", "*"], + ["regex-charset1", "[\\[\\]]"], + ["regex-quantifier", "{2,4}"], + "(?:foo)", + ["regex-quantifier", "*"], "/" ]], ["punctuation", ";"], ["regex", ["/foo\"test\"bar/"]], ["punctuation", ";"], @@ -36,11 +34,31 @@ "/", ["regex-escape", "\\."], "jpg", - ["regex-punctuation", "$"], - ["regex-punctuation", "|"], + ["regex-anchor", "$"], + ["regex-alternation", "|"], ["regex-escape", "\\."], "png", - ["regex-punctuation", "$"], + ["regex-anchor", "$"], + "/" + ]], ["punctuation", ","], + ["regex", [ + "/", + ["regex-escape", "\\xCA"], + ["regex-escape", "\\xfe"], + ["regex-escape", "\\u4d5a"], + ["regex-escape", "\\u{12345}"], + ["regex-escape", "\\076"], + ["regex-escape", "\\cI"], + ["regex-quantifier", "?"], + "/" + ]], ["punctuation", ","], + ["regex", [ + "/", + ["regex-anchor", "^"], + ["regex-charset2", "."], + ["regex-reference", "\\1"], + ["regex-quantifier", "+"], + ["regex-quantifier", "?"], "/" ]] ]