Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changed whitespace regex to /s+ (intermediate branch to fix the conflicts) #935

Merged
merged 11 commits into from
Mar 31, 2022
Merged
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Added
- Add support to parse formulas with all whitespace characters (controlled by config param `allowAllWhitespace`). (#898)

### Changed
- Removed `gpu.js` dependency and its use. (#812)

Expand Down
21 changes: 19 additions & 2 deletions src/Config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,18 @@ export interface ConfigParams {
* @category Formula Syntax
*/
language: string,
/**
* Controls the set of whitespace characters that are allowed inside a formula.
*
* When set to `false`, allows only SPACE (U+0020), CHARACTER TABULATION (U+0009), LINE FEED (U+000A), and CARRIAGE RETURN (U+000D) (compliant with OpenFormula Standard 1.3)
*
* When set to `true`, allows all whitespace characters that would be captured by the `\s` character class of the JavaScript regular expressions.
*
* @default false
*
* @category Formula Syntax
*/
allowAllWhitespace: boolean,
Copy link
Member

@warpech warpech Mar 24, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know there are many ways to do that, but I am looking for the most consistent and future-proof.

All other options in the "Formula Syntax" category allow string or array input. Have you considered naming this option ignoreWhiteSpace: 'standard' | 'any' (with standard as the default).

This allows us to add more options in the future like none, etc.

I think that the word "any" fits better than "all", because usually refers to quantity and might be confusing.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're right. It's more flexible, Done.

/**
* Sets year 1900 as a leap year.
*
Expand Down Expand Up @@ -436,6 +448,7 @@ export class Config implements ConfigParams, ParserConfig {
functionPlugins: [],
ignorePunctuation: false,
language: 'enGB',
allowAllWhitespace: false,
licenseKey: '',
leapYear1900: false,
localeLang: 'en',
Expand Down Expand Up @@ -489,6 +502,8 @@ export class Config implements ConfigParams, ParserConfig {
/** @inheritDoc */
public readonly language: string
/** @inheritDoc */
public readonly allowAllWhitespace: boolean
/** @inheritDoc */
public readonly licenseKey: string
/** @inheritDoc */
// eslint-disable-next-line @typescript-eslint/no-explicit-any
Expand Down Expand Up @@ -564,6 +579,7 @@ export class Config implements ConfigParams, ParserConfig {
leapYear1900,
localeLang,
language,
allowAllWhitespace,
licenseKey,
matchWholeCell,
arrayColumnSeparator,
Expand Down Expand Up @@ -603,6 +619,7 @@ export class Config implements ConfigParams, ParserConfig {
this.functionArgSeparator = configValueFromParam(functionArgSeparator, 'string', 'functionArgSeparator')
this.decimalSeparator = configValueFromParam(decimalSeparator, ['.', ','], 'decimalSeparator')
this.language = configValueFromParam(language, 'string', 'language')
this.allowAllWhitespace = configValueFromParam(allowAllWhitespace, 'boolean', 'allowAllWhitespace')
this.licenseKey = configValueFromParam(licenseKey, 'string', 'licenseKey')
this.thousandSeparator = configValueFromParam(thousandSeparator, ['', ',', ' ', '.'], 'thousandSeparator')
this.arrayColumnSeparator = configValueFromParam(arrayColumnSeparator, [',', ';'], 'arrayColumnSeparator')
Expand Down Expand Up @@ -677,8 +694,8 @@ export class Config implements ConfigParams, ParserConfig {
*
* @internal
*/
public get licenseKeyValidityState() {
return privatePool.get(this)!.licenseKeyValidityState
public get licenseKeyValidityState(): LicenseKeyValidityState {
return (privatePool.get(this) as Config).licenseKeyValidityState
}

public getConfig(): ConfigParams {
Expand Down
7 changes: 3 additions & 4 deletions src/parser/FormulaParser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,6 @@ import {
RParen,
StringLiteral,
TimesOp,
WhiteSpace,
} from './LexerConfig'

export interface FormulaParserResult {
Expand Down Expand Up @@ -894,15 +893,15 @@ export class FormulaLexer {
private skipWhitespacesInsideRanges(tokens: IToken[]): IToken[] {
return this.filterTokensByNeighbors(tokens, (previous: IToken, current: IToken, next: IToken) => {
return (tokenMatcher(previous, CellReference) || tokenMatcher(previous, RangeSeparator))
&& tokenMatcher(current, WhiteSpace)
&& tokenMatcher(current, this.lexerConfig.WhiteSpace)
&& (tokenMatcher(next, CellReference) || tokenMatcher(next, RangeSeparator))
})
}

private skipWhitespacesBeforeArgSeparators(tokens: IToken[]): IToken[] {
return this.filterTokensByNeighbors(tokens, (previous: IToken, current: IToken, next: IToken) => {
return !tokenMatcher(previous, this.lexerConfig.ArgSeparator)
&& tokenMatcher(current, WhiteSpace)
&& tokenMatcher(current, this.lexerConfig.WhiteSpace)
&& tokenMatcher(next, this.lexerConfig.ArgSeparator)
})
}
Expand All @@ -928,7 +927,7 @@ export class FormulaLexer {
}

private trimTrailingWhitespaces(tokens: IToken[]): IToken[] {
if (tokens.length > 0 && tokenMatcher(tokens[tokens.length - 1], WhiteSpace)) {
if (tokens.length > 0 && tokenMatcher(tokens[tokens.length - 1], this.lexerConfig.WhiteSpace)) {
tokens.pop()
}
return tokens
Expand Down
12 changes: 6 additions & 6 deletions src/parser/LexerConfig.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ import {ParserConfig} from './ParserConfig'

export const RANGE_OPERATOR = ':'
export const ABSOLUTE_OPERATOR = '$'
export const ALL_WHITESPACE_REGEXP = /\s+/
export const ODFF_WHITESPACE_REGEXP = /[ \t\n\r]+/

/* arithmetic */
// abstract for + -
Expand Down Expand Up @@ -92,12 +94,6 @@ export const StringLiteral = createToken({name: 'StringLiteral', pattern: /"([^"
/* error literal */
export const ErrorLiteral = createToken({name: 'ErrorLiteral', pattern: /#[A-Za-z0-9\/]+[?!]?/})

/* skipping whitespaces */
export const WhiteSpace = createToken({
name: 'WhiteSpace',
pattern: /[ \t\n\r]+/,
})

export interface ILexerConfig {
ArgSeparator: TokenType,
NumberLiteral: TokenType,
Expand All @@ -108,6 +104,7 @@ export interface ILexerConfig {
decimalSeparator: '.' | ',',
ArrayColSeparator: TokenType,
ArrayRowSeparator: TokenType,
WhiteSpace: TokenType,
maxColumns: number,
maxRows: number,
}
Expand All @@ -116,7 +113,9 @@ export const buildLexerConfig = (config: ParserConfig): ILexerConfig => {
const offsetProcedureNameLiteral = config.translationPackage.getFunctionTranslation('OFFSET')
const errorMapping = config.errorMapping
const functionMapping = config.translationPackage.buildFunctionMapping()
const whitespaceTokenRegexp = config.allowAllWhitespace ? ALL_WHITESPACE_REGEXP : ODFF_WHITESPACE_REGEXP

const WhiteSpace = createToken({ name: 'WhiteSpace', pattern: whitespaceTokenRegexp })
const ArrayRowSeparator = createToken({name: 'ArrayRowSep', pattern: config.arrayRowSeparator})
const ArrayColSeparator = createToken({name: 'ArrayColSep', pattern: config.arrayColumnSeparator})

Expand Down Expand Up @@ -185,6 +184,7 @@ export const buildLexerConfig = (config: ParserConfig): ILexerConfig => {
OffsetProcedureName,
ArrayRowSeparator,
ArrayColSeparator,
WhiteSpace,
allTokens,
errorMapping,
functionMapping,
Expand Down
1 change: 1 addition & 0 deletions src/parser/ParserConfig.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ export interface ParserConfig {
decimalSeparator: '.' | ',',
arrayColumnSeparator: ',' | ';',
arrayRowSeparator: ';' | '|',
allowAllWhitespace: boolean,
translationPackage: TranslationPackage,
errorMapping: Record<string, TranslatableErrorType>,
maxColumns: number,
Expand Down
45 changes: 24 additions & 21 deletions src/parser/ParserWithCaching.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* Copyright (c) 2021 Handsoncode. All rights reserved.
*/

import {IToken, tokenMatcher} from 'chevrotain'
import {IToken, tokenMatcher, ILexingResult} from 'chevrotain'
import {ErrorType, SimpleCellAddress} from '../Cell'
import {FunctionRegistry} from '../interpreter/FunctionRegistry'
import {AstNodeType, buildParsingErrorAst, RelativeDependency} from './'
Expand All @@ -24,7 +24,6 @@ import {
ILexerConfig,
ProcedureName,
RowRange,
WhiteSpace,
} from './LexerConfig'
import {ParserConfig} from './ParserConfig'
import {formatNumber} from './Unparser'
Expand Down Expand Up @@ -65,7 +64,7 @@ export class ParserWithCaching {
* @param formulaAddress - address with regard to which formula should be parsed. Impacts computed addresses in R0C0 format.
*/
public parse(text: string, formulaAddress: SimpleCellAddress): ParsingResult {
const lexerResult = this.lexer.tokenizeFormula(text)
const lexerResult = this.tokenizeFormula(text)

if (lexerResult.errors.length > 0) {
const errors = lexerResult.errors.map((e) =>
Expand All @@ -89,7 +88,7 @@ export class ParserWithCaching {
if (cacheResult !== undefined) {
++this.statsCacheUsed
} else {
const processedTokens = bindWhitespacesToTokens(lexerResult.tokens)
const processedTokens = this.bindWhitespacesToTokens(lexerResult.tokens)
const parsingResult = this.formulaParser.parseFromTokens(processedTokens, formulaAddress)

if (parsingResult.errors.length > 0) {
Expand Down Expand Up @@ -230,28 +229,32 @@ export class ParserWithCaching {
}
}
}
}

export function bindWhitespacesToTokens(tokens: IToken[]): IExtendedToken[] {
const processedTokens: IExtendedToken[] = []

const first = tokens[0]
if (!tokenMatcher(first, WhiteSpace)) {
processedTokens.push(first)
}
public bindWhitespacesToTokens(tokens: IToken[]): IExtendedToken[] {
const processedTokens: IExtendedToken[] = []

for (let i = 1; i < tokens.length; ++i) {
const current = tokens[i] as IExtendedToken
if (tokenMatcher(current, WhiteSpace)) {
continue
const first = tokens[0]
if (!tokenMatcher(first, this.lexerConfig.WhiteSpace)) {
processedTokens.push(first)
}

const previous = tokens[i - 1]
if (tokenMatcher(previous, WhiteSpace)) {
current.leadingWhitespace = previous
for (let i = 1; i < tokens.length; ++i) {
const current = tokens[i] as IExtendedToken
if (tokenMatcher(current, this.lexerConfig.WhiteSpace)) {
continue
}

const previous = tokens[i - 1]
if (tokenMatcher(previous, this.lexerConfig.WhiteSpace)) {
current.leadingWhitespace = previous
}
processedTokens.push(current)
}
processedTokens.push(current)

return processedTokens
}

return processedTokens
public tokenizeFormula(text: string): ILexingResult {
return this.lexer.tokenizeFormula(text)
}
}
18 changes: 18 additions & 0 deletions test/parser/parser.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,24 @@ describe('ParserWithCaching', () => {
expect(ast1).toEqual(ast2)
})

it('with default config should return error for non-breakable space', () => {
const parser = buildEmptyParserWithCaching(new Config())

const { ast, errors } = parser.parse('=\u00A042', adr('A1'))

expect(ast.type).toBe(AstNodeType.ERROR)
expect(errors[0].type).toBe(ParsingErrorType.LexingError)
})

it('when set allowAllWhitespace = true should accept a non-breakable space', () => {
const parser = buildEmptyParserWithCaching(new Config({ allowAllWhitespace: true }))

const { ast } = parser.parse('=\u00A042', adr('A1'))

expect(ast.type).toEqual(AstNodeType.NUMBER)
expect(ast.leadingWhitespace).toEqual('\u00A0')
})

it('error literal', () => {
const parser = buildEmptyParserWithCaching(new Config())

Expand Down
14 changes: 14 additions & 0 deletions test/parser/unparse.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -597,4 +597,18 @@ describe('whitespaces', () => {

expect(unparsed).toEqual(formula)
})

it('when allowAllWhitespace = true, should unparse a non-breakable space character', () => {
const config = new Config({ allowAllWhitespace: true })
const lexerConfig = buildLexerConfig(config)
const parser = buildEmptyParserWithCaching(config, sheetMapping)
const unparser = new Unparser(config, lexerConfig, sheetMapping.fetchDisplayName, new NamedExpressions())

const formula = '=\u00A01'
const ast = parser.parse(formula, adr('A1')).ast

const unparsed = unparser.unparse(ast, adr('A1'))

expect(unparsed).toEqual(formula)
})
})
Loading