From 9c40da736905c3c2559d13bbce5af5dc320864ca Mon Sep 17 00:00:00 2001 From: harttle Date: Sun, 24 Jan 2021 12:43:32 +0800 Subject: [PATCH] fix: allow string literals contain delimiters, fixes #288 --- src/parser/tokenizer.ts | 21 ++- test/e2e/issues.ts | 7 +- test/unit/parser/tokenizer.ts | 260 +++++++++++++++++++--------------- 3 files changed, 164 insertions(+), 124 deletions(-) diff --git a/src/parser/tokenizer.ts b/src/parser/tokenizer.ts index 7949f7cd71..e53d409218 100644 --- a/src/parser/tokenizer.ts +++ b/src/parser/tokenizer.ts @@ -126,11 +126,10 @@ export class Tokenizer { return new HTMLToken(this.input, begin, this.p, this.file) } - readTagToken (options: NormalizedFullOptions): TagToken { + readTagToken (options: NormalizedFullOptions = defaultOptions): TagToken { const { file, input } = this - const { tagDelimiterRight } = options const begin = this.p - if (this.readTo(tagDelimiterRight) === -1) { + if (this.readToDelimiter(options.tagDelimiterRight) === -1) { throw this.mkError(`tag ${this.snapshot(begin)} not closed`, begin) } const token = new TagToken(input, begin, this.p, options, file) @@ -138,11 +137,23 @@ export class Tokenizer { return token } - readOutputToken (options: NormalizedFullOptions): OutputToken { + readToDelimiter (delimiter: string) { + while (this.p < this.N) { + if ((this.peekType() & QUOTE)) { + this.readQuoted() + continue + } + ++this.p + if (this.rmatch(delimiter)) return this.p + } + return -1 + } + + readOutputToken (options: NormalizedFullOptions = defaultOptions): OutputToken { const { file, input } = this const { outputDelimiterRight } = options const begin = this.p - if (this.readTo(outputDelimiterRight) === -1) { + if (this.readToDelimiter(outputDelimiterRight) === -1) { throw this.mkError(`output ${this.snapshot(begin)} not closed`, begin) } return new OutputToken(input, begin, this.p, options, file) diff --git a/test/e2e/issues.ts b/test/e2e/issues.ts index 0b8d9476cf..7b418043af 100644 --- a/test/e2e/issues.ts +++ b/test/e2e/issues.ts @@ -56,7 +56,7 @@ describe('Issues', function () { }) it('#277 Passing liquid in FilterImpl', () => { const engine = new Liquid() - engine.registerFilter('render', function (template: string, name: string) { + engine.registerFilter('render', function (this: any, template: string, name: string) { return this.liquid.parseAndRenderSync(decodeURIComponent(template), { name }) }) const html = engine.parseAndRenderSync( @@ -65,4 +65,9 @@ describe('Issues', function () { ) expect(html).to.equal('hello foo') }) + it('#288 Unexpected behavior when string literals contain }}', async () => { + const engine = new Liquid() + const html = await engine.parseAndRender(`{{ '{{' }}{{ '}}' }}`) + expect(html).to.equal('{{}}') + }) }) diff --git a/test/unit/parser/tokenizer.ts b/test/unit/parser/tokenizer.ts index 9453fafd35..4bd7096688 100644 --- a/test/unit/parser/tokenizer.ts +++ b/test/unit/parser/tokenizer.ts @@ -10,7 +10,7 @@ import { QuotedToken } from '../../../src/tokens/quoted-token' import { OutputToken } from '../../../src/tokens/output-token' import { HTMLToken } from '../../../src/tokens/html-token' -describe('Tokenize', function () { +describe('Tokenizer', function () { it('should read quoted', () => { expect(new Tokenizer('"foo" ff').readQuoted()!.getText()).to.equal('"foo"') expect(new Tokenizer(' "foo"ff').readQuoted()!.getText()).to.equal('"foo"') @@ -84,125 +84,149 @@ describe('Tokenize', function () { expect(rols.name.content).to.equal('rows') expect(rols.value!.getText()).to.equal('data["rows"]') }) - it('should read HTML token', function () { - const html = '

Lorem Ipsum

' - const tokenizer = new Tokenizer(html) - const tokens = tokenizer.readTopLevelTokens() - - expect(tokens.length).to.equal(1) - expect(tokens[0]).instanceOf(HTMLToken) - expect((tokens[0] as HTMLToken).getContent()).to.equal(html) - }) - it('should read tag token', function () { - const html = '

{% for p in a[1]%}

' - const tokenizer = new Tokenizer(html) - const tokens = tokenizer.readTopLevelTokens() - - expect(tokens.length).to.equal(3) - const tag = tokens[1] as TagToken - expect(tag).instanceOf(TagToken) - expect(tag.name).to.equal('for') - expect(tag.args).to.equal('p in a[1]') - }) - it('should allow unclosed tag inside {% raw %}', function () { - const html = '{%raw%} {%if%} {%else {%endraw%}' - const tokenizer = new Tokenizer(html) - const tokens = tokenizer.readTopLevelTokens() - - expect(tokens.length).to.equal(3) - expect(tokens[0]).to.haveOwnProperty('name', 'raw') - expect((tokens[1] as any).getContent()).to.equal(' {%if%} {%else ') - }) - it('should allow unclosed endraw tag inside {% raw %}', function () { - const html = '{%raw%} {%endraw {%raw%} {%endraw%}' - const tokenizer = new Tokenizer(html) - const tokens = tokenizer.readTopLevelTokens() - - expect(tokens.length).to.equal(3) - expect(tokens[0]).to.haveOwnProperty('name', 'raw') - expect((tokens[1] as any).getContent()).to.equal(' {%endraw {%raw%} ') - }) - it('should throw when {% raw %} not closed', function () { - const html = '{%raw%} {%endraw {%raw%}' - const tokenizer = new Tokenizer(html) - expect(() => tokenizer.readTopLevelTokens()).to.throw('raw "{%raw%} {%end..." not closed, line:1, col:8') - }) - it('should read output token', function () { - const html = '

{{foo | date: "%Y-%m-%d"}}

' - const tokenizer = new Tokenizer(html) - const tokens = tokenizer.readTopLevelTokens() - - expect(tokens.length).to.equal(3) - const output = tokens[1] as OutputToken - expect(output).instanceOf(OutputToken) - expect(output.content).to.equal('foo | date: "%Y-%m-%d"') - }) - it('should handle consecutive value and tags', function () { - const html = '{{foo}}{{bar}}{%foo%}{%bar%}' - const tokenizer = new Tokenizer(html) - const tokens = tokenizer.readTopLevelTokens() - - expect(tokens.length).to.equal(4) - const o1 = tokens[0] as OutputToken - const o2 = tokens[1] as OutputToken - const t1 = tokens[2] as TagToken - const t2 = tokens[3] as TagToken - expect(o1).instanceOf(OutputToken) - expect(o2).instanceOf(OutputToken) - expect(t1).instanceOf(TagToken) - expect(t2).instanceOf(TagToken) - - expect(o1.content).to.equal('foo') - expect(o2.content).to.equal('bar') - expect(t1.name).to.equal('foo') - expect(t1.args).to.equal('') - expect(t2.name).to.equal('bar') - expect(t2.args).to.equal('') - }) - it('should keep white spaces and newlines', function () { - const html = '{%foo%}\n{%bar %} \n {%alice%}' - const tokenizer = new Tokenizer(html) - const tokens = tokenizer.readTopLevelTokens() - expect(tokens.length).to.equal(5) - expect(tokens[1]).instanceOf(HTMLToken) - expect(tokens[1].getText()).to.equal('\n') - expect(tokens[3]).instanceOf(HTMLToken) - expect(tokens[3].getText()).to.equal(' \n ') - }) - it('should handle multiple lines tag', function () { - const html = '{%foo\na:a\nb:1.23\n%}' - const tokenizer = new Tokenizer(html) - const tokens = tokenizer.readTopLevelTokens() - expect(tokens.length).to.equal(1) - expect(tokens[0]).instanceOf(TagToken) - expect((tokens[0] as TagToken).args).to.equal('a:a\nb:1.23') - expect(tokens[0].getText()).to.equal('{%foo\na:a\nb:1.23\n%}') - }) - it('should handle multiple lines value', function () { - const html = '{{foo\n|date:\n"%Y-%m-%d"\n}}' - const tokenizer = new Tokenizer(html) - const tokens = tokenizer.readTopLevelTokens() - expect(tokens.length).to.equal(1) - expect(tokens[0]).instanceOf(OutputToken) - expect(tokens[0].getText()).to.equal('{{foo\n|date:\n"%Y-%m-%d"\n}}') - }) - it('should handle complex object property access', function () { - const html = '{{ obj["my:property with anything"] }}' - const tokenizer = new Tokenizer(html) - const tokens = tokenizer.readTopLevelTokens() - expect(tokens.length).to.equal(1) - const output = tokens[0] as OutputToken - expect(output).instanceOf(OutputToken) - expect(output.content).to.equal('obj["my:property with anything"]') + describe('#readTopLevelTokens()', () => { + it('should read HTML token', function () { + const html = '

Lorem Ipsum

' + const tokenizer = new Tokenizer(html) + const tokens = tokenizer.readTopLevelTokens() + + expect(tokens.length).to.equal(1) + expect(tokens[0]).instanceOf(HTMLToken) + expect((tokens[0] as HTMLToken).getContent()).to.equal(html) + }) + it('should read tag token', function () { + const html = '

{% for p in a[1]%}

' + const tokenizer = new Tokenizer(html) + const tokens = tokenizer.readTopLevelTokens() + + expect(tokens.length).to.equal(3) + const tag = tokens[1] as TagToken + expect(tag).instanceOf(TagToken) + expect(tag.name).to.equal('for') + expect(tag.args).to.equal('p in a[1]') + }) + it('should allow unclosed tag inside {% raw %}', function () { + const html = '{%raw%} {%if%} {%else {%endraw%}' + const tokenizer = new Tokenizer(html) + const tokens = tokenizer.readTopLevelTokens() + + expect(tokens.length).to.equal(3) + expect(tokens[0]).to.haveOwnProperty('name', 'raw') + expect((tokens[1] as any).getContent()).to.equal(' {%if%} {%else ') + }) + it('should allow unclosed endraw tag inside {% raw %}', function () { + const html = '{%raw%} {%endraw {%raw%} {%endraw%}' + const tokenizer = new Tokenizer(html) + const tokens = tokenizer.readTopLevelTokens() + + expect(tokens.length).to.equal(3) + expect(tokens[0]).to.haveOwnProperty('name', 'raw') + expect((tokens[1] as any).getContent()).to.equal(' {%endraw {%raw%} ') + }) + it('should throw when {% raw %} not closed', function () { + const html = '{%raw%} {%endraw {%raw%}' + const tokenizer = new Tokenizer(html) + expect(() => tokenizer.readTopLevelTokens()).to.throw('raw "{%raw%} {%end..." not closed, line:1, col:8') + }) + it('should read output token', function () { + const html = '

{{foo | date: "%Y-%m-%d"}}

' + const tokenizer = new Tokenizer(html) + const tokens = tokenizer.readTopLevelTokens() + + expect(tokens.length).to.equal(3) + const output = tokens[1] as OutputToken + expect(output).instanceOf(OutputToken) + expect(output.content).to.equal('foo | date: "%Y-%m-%d"') + }) + it('should handle consecutive value and tags', function () { + const html = '{{foo}}{{bar}}{%foo%}{%bar%}' + const tokenizer = new Tokenizer(html) + const tokens = tokenizer.readTopLevelTokens() + + expect(tokens.length).to.equal(4) + const o1 = tokens[0] as OutputToken + const o2 = tokens[1] as OutputToken + const t1 = tokens[2] as TagToken + const t2 = tokens[3] as TagToken + expect(o1).instanceOf(OutputToken) + expect(o2).instanceOf(OutputToken) + expect(t1).instanceOf(TagToken) + expect(t2).instanceOf(TagToken) + + expect(o1.content).to.equal('foo') + expect(o2.content).to.equal('bar') + expect(t1.name).to.equal('foo') + expect(t1.args).to.equal('') + expect(t2.name).to.equal('bar') + expect(t2.args).to.equal('') + }) + it('should keep white spaces and newlines', function () { + const html = '{%foo%}\n{%bar %} \n {%alice%}' + const tokenizer = new Tokenizer(html) + const tokens = tokenizer.readTopLevelTokens() + expect(tokens.length).to.equal(5) + expect(tokens[1]).instanceOf(HTMLToken) + expect(tokens[1].getText()).to.equal('\n') + expect(tokens[3]).instanceOf(HTMLToken) + expect(tokens[3].getText()).to.equal(' \n ') + }) + it('should handle multiple lines tag', function () { + const html = '{%foo\na:a\nb:1.23\n%}' + const tokenizer = new Tokenizer(html) + const tokens = tokenizer.readTopLevelTokens() + expect(tokens.length).to.equal(1) + expect(tokens[0]).instanceOf(TagToken) + expect((tokens[0] as TagToken).args).to.equal('a:a\nb:1.23') + expect(tokens[0].getText()).to.equal('{%foo\na:a\nb:1.23\n%}') + }) + it('should handle multiple lines value', function () { + const html = '{{foo\n|date:\n"%Y-%m-%d"\n}}' + const tokenizer = new Tokenizer(html) + const tokens = tokenizer.readTopLevelTokens() + expect(tokens.length).to.equal(1) + expect(tokens[0]).instanceOf(OutputToken) + expect(tokens[0].getText()).to.equal('{{foo\n|date:\n"%Y-%m-%d"\n}}') + }) + it('should handle complex object property access', function () { + const html = '{{ obj["my:property with anything"] }}' + const tokenizer = new Tokenizer(html) + const tokens = tokenizer.readTopLevelTokens() + expect(tokens.length).to.equal(1) + const output = tokens[0] as OutputToken + expect(output).instanceOf(OutputToken) + expect(output.content).to.equal('obj["my:property with anything"]') + }) + it('should throw if tag not closed', function () { + const html = '{% assign foo = bar {{foo}}' + const tokenizer = new Tokenizer(html) + expect(() => tokenizer.readTopLevelTokens()).to.throw(/tag "{% assign foo..." not closed/) + }) + it('should throw if output not closed', function () { + const tokenizer = new Tokenizer('{{name}') + expect(() => tokenizer.readTopLevelTokens()).to.throw(/output "{{name}" not closed/) + }) }) - it('should throw if tag not closed', function () { - const html = '{% assign foo = bar {{foo}}' - const tokenizer = new Tokenizer(html) - expect(() => tokenizer.readTopLevelTokens()).to.throw(/tag "{% assign foo..." not closed/) + describe('#readTagToken()', () => { + it('should skip quoted delimiters', function () { + const html = '{% assign a = "%} {% }} {{" %}' + const tokenizer = new Tokenizer(html) + const token = tokenizer.readTagToken() + + expect(token).instanceOf(TagToken) + expect(token.name).to.equal('assign') + expect(token.args).to.equal('a = "%} {% }} {{"') + }) }) - it('should throw if output not closed', function () { - const tokenizer = new Tokenizer('{{name}') - expect(() => tokenizer.readTopLevelTokens()).to.throw(/output "{{name}" not closed/) + describe('#readOutputToken()', () => { + it('should skip quoted delimiters', function () { + const html = '{{ "%} {%" | append: "}} {{" }}' + const tokenizer = new Tokenizer(html) + const token = tokenizer.readOutputToken() + + console.log(token) + expect(token).instanceOf(OutputToken) + expect(token.content).to.equal('"%} {%" | append: "}} {{"') + }) }) describe('#readRange()', () => { it('should read `(1..3)`', () => {