From fc215ce7a7c128abc5436519ef32714a49a72dd9 Mon Sep 17 00:00:00 2001 From: Hammad Date: Sat, 9 Apr 2022 21:04:57 -0230 Subject: [PATCH 1/6] CSS Tokenizer - reimplementing InputScanner's lookBack using peek instead --- js/src/css/beautifier.js | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/js/src/css/beautifier.js b/js/src/css/beautifier.js index 997eeb78e..743338a35 100644 --- a/js/src/css/beautifier.js +++ b/js/src/css/beautifier.js @@ -73,6 +73,20 @@ function Beautifier(source_text, options) { } +Beautifier.prototype._lookBack = function(testVal) { + var go_back_space = testVal.length; + + var j = 0; + for (var i = go_back_space + 1; i > go_back_space; i--) { + var text = this._input.peek(-i); + if(text === null || text.toLowerCase() !== testVal[j]) { + return false; + } + j += 1; + } + return true; +}; + Beautifier.prototype.eatString = function(endChars) { var result = ''; this._ch = this._input.next(); @@ -365,13 +379,13 @@ Beautifier.prototype.beautify = function() { } else if (this._ch === ":") { for (var i = 0; i < this.NON_SEMICOLON_NEWLINE_PROPERTY.length; i++) { - if (this._input.lookBack(this.NON_SEMICOLON_NEWLINE_PROPERTY[i])) { + if (this._lookBack(this.NON_SEMICOLON_NEWLINE_PROPERTY[i])) { insideNonSemiColonValues = true; break; } } - if ((insideRule || enteringConditionalGroup) && !(this._input.lookBack("&") || this.foundNestedPseudoClass()) && !this._input.lookBack("(") && !insideAtExtend && parenLevel === 0) { + if ((insideRule || enteringConditionalGroup) && !(this._lookBack("&") || this.foundNestedPseudoClass()) && !this._lookBack("(") && !insideAtExtend && parenLevel === 0) { // 'property: value' delimiter // which could be in a conditional group query this.print_string(':'); @@ -386,7 +400,7 @@ Beautifier.prototype.beautify = function() { // sass nested pseudo-class don't use a space // preserve space before pseudoclasses/pseudoelements, as it means "in any child" - if (this._input.lookBack(" ")) { + if (this._lookBack(" ")) { this._output.space_before_token = true; } if (this._input.peek() === ":") { @@ -427,7 +441,7 @@ Beautifier.prototype.beautify = function() { this._output.space_before_token = true; } } else if (this._ch === '(') { // may be a url - if (this._input.lookBack("url")) { + if (this._lookBack("url")) { this.print_string(this._ch); this.eatWhitespace(); parenLevel++; @@ -500,7 +514,7 @@ Beautifier.prototype.beautify = function() { if (whitespaceChar.test(this._ch)) { this._ch = ''; } - } else if (this._ch === '!' && !this._input.lookBack("\\")) { // !important + } else if (this._ch === '!' && !this._lookBack("\\")) { // !important this.print_string(' '); this.print_string(this._ch); } else { From 20abc4d8e5889995622a5190042f44e5bc940b1f Mon Sep 17 00:00:00 2001 From: Hammad Date: Sat, 9 Apr 2022 21:16:58 -0230 Subject: [PATCH 2/6] CSS Tokenizer - reimplementing InputScanner's peekUntilAfter using peek instead --- js/src/css/beautifier.js | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/js/src/css/beautifier.js b/js/src/css/beautifier.js index 743338a35..74f43e1b4 100644 --- a/js/src/css/beautifier.js +++ b/js/src/css/beautifier.js @@ -87,6 +87,25 @@ Beautifier.prototype._lookBack = function(testVal) { return true; }; +Beautifier.prototype._peekUntilAfter = function(pattern) { + + var i = 0; + var resulting_string = ''; + + var ch = this._input.peek(i); + while (ch) { + resulting_string += ch; + if(pattern.test(ch)) { + break; + } + + i += 1; + ch = this._input.peek(i); + } + return resulting_string; +}; + + Beautifier.prototype.eatString = function(endChars) { var result = ''; this._ch = this._input.next(); @@ -272,7 +291,7 @@ Beautifier.prototype.beautify = function() { this.print_string(this._ch); // strip trailing space, if present, for hash property checks - var variableOrRule = this._input.peekUntilAfter(/[: ,;{}()[\]\/='"]/g); + var variableOrRule = this._peekUntilAfter(/[: ,;{}()[\]\/='"]/g); if (variableOrRule.match(/[ :]$/)) { // we have a variable or pseudo-class, add it and insert one space before continuing From b3cc67127b68d2f452f52f9abbb7fccd071f4ab9 Mon Sep 17 00:00:00 2001 From: Hammad Date: Sat, 9 Apr 2022 22:15:30 -0230 Subject: [PATCH 3/6] CSS Tokenizer - reimplementing InputScanner's peek method inside the beautifier as peekString to support upcoming tokenizer --- js/src/css/beautifier.js | 42 ++++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/js/src/css/beautifier.js b/js/src/css/beautifier.js index 74f43e1b4..f00e68437 100644 --- a/js/src/css/beautifier.js +++ b/js/src/css/beautifier.js @@ -78,7 +78,7 @@ Beautifier.prototype._lookBack = function(testVal) { var j = 0; for (var i = go_back_space + 1; i > go_back_space; i--) { - var text = this._input.peek(-i); + var text = this.peekString(-i); if(text === null || text.toLowerCase() !== testVal[j]) { return false; } @@ -92,7 +92,7 @@ Beautifier.prototype._peekUntilAfter = function(pattern) { var i = 0; var resulting_string = ''; - var ch = this._input.peek(i); + var ch = this.peekString(i); while (ch) { resulting_string += ch; if(pattern.test(ch)) { @@ -100,7 +100,15 @@ Beautifier.prototype._peekUntilAfter = function(pattern) { } i += 1; - ch = this._input.peek(i); + ch = this.peekString(i); + } + return resulting_string; +}; + +Beautifier.prototype.peekString = function (index) { + var resulting_string = ''; + if(this._input.peek(index)) { + resulting_string = this._input.peek(index); } return resulting_string; }; @@ -126,9 +134,9 @@ Beautifier.prototype.eatString = function(endChars) { // newline character found; if the user has preserve_newlines off, only // the first newline will be output Beautifier.prototype.eatWhitespace = function(allowAtLeastOneNewLine) { - var result = whitespaceChar.test(this._input.peek()); + var result = whitespaceChar.test(this.peekString()); var newline_count = 0; - while (whitespaceChar.test(this._input.peek())) { + while (whitespaceChar.test(this.peekString())) { this._ch = this._input.next(); if (allowAtLeastOneNewLine && this._ch === '\n') { if (newline_count === 0 || newline_count < this._options.max_preserve_newlines) { @@ -146,7 +154,7 @@ Beautifier.prototype.eatWhitespace = function(allowAtLeastOneNewLine) { Beautifier.prototype.foundNestedPseudoClass = function() { var openParen = 0; var i = 1; - var ch = this._input.peek(i); + var ch = this.peekString(i); while (ch) { if (ch === "{") { return true; @@ -162,7 +170,7 @@ Beautifier.prototype.foundNestedPseudoClass = function() { return false; } i++; - ch = this._input.peek(i); + ch = this.peekString(i); } return false; }; @@ -246,7 +254,7 @@ Beautifier.prototype.beautify = function() { if (!this._ch) { break; - } else if (this._ch === '/' && this._input.peek() === '*') { + } else if (this._ch === '/' && this.peekString() === '*') { // /* css comment */ // Always start block comments on a new line. // This handles scenarios where a block comment immediately @@ -271,7 +279,7 @@ Beautifier.prototype.beautify = function() { // Block comments are followed by a new line so they don't // share a line with other properties this._output.add_new_line(); - } else if (this._ch === '/' && this._input.peek() === '/') { + } else if (this._ch === '/' && this.peekString() === '/') { // // single line comment // Preserves the space before a comment // on the same line as a rule @@ -285,7 +293,7 @@ Beautifier.prototype.beautify = function() { this.preserveSingleSpace(isAfterSpace); // deal with less propery mixins @{...} - if (this._input.peek() === '{') { + if (this.peekString() === '{') { this.print_string(this._ch + this.eatString('}')); } else { this.print_string(this._ch); @@ -320,7 +328,7 @@ Beautifier.prototype.beautify = function() { this.indent(); } } - } else if (this._ch === '#' && this._input.peek() === '{') { + } else if (this._ch === '#' && this.peekString() === '{') { this.preserveSingleSpace(isAfterSpace); this.print_string(this._ch + this.eatString('}')); } else if (this._ch === '{') { @@ -385,11 +393,11 @@ Beautifier.prototype.beautify = function() { this._output.add_new_line(); if (this._options.newline_between_rules && !this._output.just_added_blankline()) { - if (this._input.peek() !== '}') { + if (this.peekString() !== '}') { this._output.add_new_line(true); } } - if (this._input.peek() === ')') { + if (this.peekString() === ')') { this._output.trim(true); if (this._options.brace_style === "expand") { this._output.add_new_line(true); @@ -422,7 +430,7 @@ Beautifier.prototype.beautify = function() { if (this._lookBack(" ")) { this._output.space_before_token = true; } - if (this._input.peek() === ":") { + if (this.peekString() === ":") { // pseudo-element this._ch = this._input.next(); this.print_string("::"); @@ -451,7 +459,7 @@ Beautifier.prototype.beautify = function() { // line. Block comments are also affected, but // a new line is always output before one inside // that section - if (this._input.peek() !== '/') { + if (this.peekString() !== '/') { this._output.add_new_line(); } } else { @@ -494,7 +502,7 @@ Beautifier.prototype.beautify = function() { parenLevel--; this.outdent(); } - if (insideScssMap && this._input.peek() === ";" && this._options.selector_separator_newline) { + if (insideScssMap && this.peekString() === ";" && this._options.selector_separator_newline) { insideScssMap = false; this.outdent(); this._output.add_new_line(); @@ -541,7 +549,7 @@ Beautifier.prototype.beautify = function() { this.preserveSingleSpace(preserveAfterSpace || isAfterSpace); this.print_string(this._ch); - if (!this._output.just_added_newline() && this._input.peek() === '\n' && insideNonSemiColonValues) { + if (!this._output.just_added_newline() && this.peekString() === '\n' && insideNonSemiColonValues) { this._output.add_new_line(); } } From 7e71038e8825fa3812a0e95b909deb504dcbe9f1 Mon Sep 17 00:00:00 2001 From: Hammad Date: Sun, 10 Apr 2022 00:14:33 -0230 Subject: [PATCH 4/6] CSS Tokenizer - using peekString instead of lookBack because it is more efficient --- js/src/css/beautifier.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/js/src/css/beautifier.js b/js/src/css/beautifier.js index f00e68437..b7e89243f 100644 --- a/js/src/css/beautifier.js +++ b/js/src/css/beautifier.js @@ -412,7 +412,7 @@ Beautifier.prototype.beautify = function() { } } - if ((insideRule || enteringConditionalGroup) && !(this._lookBack("&") || this.foundNestedPseudoClass()) && !this._lookBack("(") && !insideAtExtend && parenLevel === 0) { + if ((insideRule || enteringConditionalGroup) && !(this.peekString(-2) === "&" || this.foundNestedPseudoClass()) && this.peekString(-2) !== "(" && !insideAtExtend && parenLevel === 0) { // 'property: value' delimiter // which could be in a conditional group query this.print_string(':'); @@ -541,7 +541,7 @@ Beautifier.prototype.beautify = function() { if (whitespaceChar.test(this._ch)) { this._ch = ''; } - } else if (this._ch === '!' && !this._lookBack("\\")) { // !important + } else if (this._ch === '!' && this.peekString(-2) !== "\\") { // !important this.print_string(' '); this.print_string(this._ch); } else { From eeab761df44aefea9c09791fbed47a47b818c871 Mon Sep 17 00:00:00 2001 From: Hammad Date: Mon, 11 Apr 2022 01:48:18 -0230 Subject: [PATCH 5/6] CSS Tokenizer - Strings --- js/src/css/beautifier.js | 26 ++++++++++++-- js/src/css/tokenizer.js | 76 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+), 2 deletions(-) diff --git a/js/src/css/beautifier.js b/js/src/css/beautifier.js index b7e89243f..1c1a7df4b 100644 --- a/js/src/css/beautifier.js +++ b/js/src/css/beautifier.js @@ -32,6 +32,7 @@ var Options = require('./options').Options; var Output = require('../core/output').Output; var InputScanner = require('../core/inputscanner').InputScanner; var Directives = require('../core/directives').Directives; +var Tokenizer = require('./tokenizer').Tokenizer; var directives_core = new Directives(/\/\*/, /\*\//); @@ -68,6 +69,7 @@ function Beautifier(source_text, options) { "@document": true }; this.NON_SEMICOLON_NEWLINE_PROPERTY = [ + "grid-template-areas", "grid-template" ]; @@ -197,6 +199,16 @@ Beautifier.prototype.outdent = function() { } }; +Beautifier.prototype.get_token_on_position = function() { + var token = this._position_map[this._input.__position - 1]; + this._update_pos_to_end_of_token(token); + return token; +}; + +Beautifier.prototype._update_pos_to_end_of_token = function(token) { + this._input.__position = token.position[1] + 1; +}; + /*_____________________--------------------_____________________*/ Beautifier.prototype.beautify = function() { @@ -222,10 +234,14 @@ Beautifier.prototype.beautify = function() { this._output = new Output(this._options, baseIndentString); this._input = new InputScanner(source_text); + var tokenizer = new Tokenizer(source_text, this._options); + this._tokens = tokenizer.tokenize(); + this._position_map = tokenizer.get_positional_map(); this._indentLevel = 0; this._nestedLevel = 0; this._ch = null; + var current_token = null; var parenLevel = 0; var insideRule = false; @@ -440,9 +456,13 @@ Beautifier.prototype.beautify = function() { } } } else if (this._ch === '"' || this._ch === '\'') { + current_token = this.get_token_on_position(); this.preserveSingleSpace(isAfterSpace); - this.print_string(this._ch + this.eatString(this._ch)); - this.eatWhitespace(true); + this.print_string(current_token.text); + + if (!this._output.just_added_newline() && this._input.peek() === '\n' && insideNonSemiColonValues) { + this._output.add_new_line(); + } } else if (this._ch === ';') { insideNonSemiColonValues = false; if (parenLevel === 0) { @@ -553,6 +573,8 @@ Beautifier.prototype.beautify = function() { this._output.add_new_line(); } } + + current_token = null; } var sweetCode = this._output.get_code(eol); diff --git a/js/src/css/tokenizer.js b/js/src/css/tokenizer.js index 648ca950e..9ad8feb0b 100644 --- a/js/src/css/tokenizer.js +++ b/js/src/css/tokenizer.js @@ -27,3 +27,79 @@ */ 'use strict'; + +var BaseTokenizer = require('../core/tokenizer').Tokenizer; +var BASETOKEN = require('../core/tokenizer').TOKEN; + +var TOKEN = { + STRING: 'TK_STRING', + UNKNOWN: 'TK_UNKNOWN', + START: BASETOKEN.START, + RAW: BASETOKEN.RAW, + EOF: BASETOKEN.EOF +}; + +var Tokenizer = function(input_string, options) { + BaseTokenizer.call(this, input_string, options); + + this._position_map = {}; +}; +Tokenizer.prototype = new BaseTokenizer(); + +Tokenizer.prototype._get_next_token = function(previous_token, open_token) { // jshint unused:false + var token = null; + var pos; + this._readWhitespace(); + var c = this._input.peek(); + + if(c === null) { + pos = this._input.__position; + return this._create_token_with_pos(TOKEN.EOF, '', [pos, pos]); + } + + token = token || this._read_strings(c); + if(!token) { + pos = this._input.__position; + token = this._create_token_with_pos(TOKEN.UNKNOWN, this._input.next(), [pos, pos]); + } + + return token; +}; + +Tokenizer.prototype._create_token_with_pos = function (type, text, position) { + var token = BaseTokenizer.prototype._create_token.call(this, type, text); + token.position = position; + this._position_map[position[0]] = token; + return token; +}; + +Tokenizer.prototype.get_positional_map = function () { + return this._position_map; +}; + +Tokenizer.prototype._read_strings = function(c) { + if (c === '\'' || c === '"') { + var startPos = this._input.__position; + + var result = c; + this._input.next(); // skip current char + var ch = this._input.next(); + while (ch) { + result += ch; + if (ch === "\\") { + result += this._input.next(); + } else if (c.indexOf(ch) !== -1 || ch === "\n") { + break; + } + ch = this._input.next(); + } + + var endPos = this._input.__position - 1; + return this._create_token_with_pos(TOKEN.STRING, result, [startPos, endPos]); + } + + return null; +}; + +module.exports.Tokenizer = Tokenizer; +module.exports.TOKEN = TOKEN; \ No newline at end of file From fcfb6242d4e4fe777d1ff541f9322a5ea4023427 Mon Sep 17 00:00:00 2001 From: Hammad Date: Mon, 11 Apr 2022 01:48:59 -0230 Subject: [PATCH 6/6] CSS Tokenizer - Commas --- js/src/css/beautifier.js | 3 ++- js/src/css/tokenizer.js | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/js/src/css/beautifier.js b/js/src/css/beautifier.js index 1c1a7df4b..5550e29b5 100644 --- a/js/src/css/beautifier.js +++ b/js/src/css/beautifier.js @@ -529,7 +529,8 @@ Beautifier.prototype.beautify = function() { } this.print_string(this._ch); } else if (this._ch === ',') { - this.print_string(this._ch); + current_token = this.get_token_on_position(); + this.print_string(current_token.text); this.eatWhitespace(true); if (this._options.selector_separator_newline && (!insidePropertyValue || insideScssMap) && parenLevel === 0 && !insideAtImport && !insideAtExtend) { this._output.add_new_line(); diff --git a/js/src/css/tokenizer.js b/js/src/css/tokenizer.js index 9ad8feb0b..9643c8732 100644 --- a/js/src/css/tokenizer.js +++ b/js/src/css/tokenizer.js @@ -33,6 +33,7 @@ var BASETOKEN = require('../core/tokenizer').TOKEN; var TOKEN = { STRING: 'TK_STRING', + COMMA: 'TK_COMMA', UNKNOWN: 'TK_UNKNOWN', START: BASETOKEN.START, RAW: BASETOKEN.RAW, @@ -58,6 +59,7 @@ Tokenizer.prototype._get_next_token = function(previous_token, open_token) { // } token = token || this._read_strings(c); + token = token || this._read_commas(c); if(!token) { pos = this._input.__position; token = this._create_token_with_pos(TOKEN.UNKNOWN, this._input.next(), [pos, pos]); @@ -77,6 +79,18 @@ Tokenizer.prototype.get_positional_map = function () { return this._position_map; }; +Tokenizer.prototype._read_commas = function (c) { + var token = null; + + if (c === ',') { + var pos = this._input.__position; + this._input.next(); + token = this._create_token_with_pos(TOKEN.COMMA, c, [pos, pos]); + } + + return token; +}; + Tokenizer.prototype._read_strings = function(c) { if (c === '\'' || c === '"') { var startPos = this._input.__position;