Skip to content

Commit

Permalink
Ability to ignore any whitespace in the formulas (config option ignor…
Browse files Browse the repository at this point in the history
…eWhiteSpace)

* Changed whitespace regex to `/s+` (#877)
* Changed whitespace regex -> /s+.
* updated CHANGELOG.MD
* Add test for unparsing non-break space character
* Introduce config param ignoreWhiteSpace
* Fix eslint warning
* Change the config parameter ignoreWhiteSpace from boolean to 'standard' | 'any'

Co-authored-by: Martin Alex Philip Dawson <u1356770@gmail.com>
  • Loading branch information
sequba and MartinDawson committed Mar 31, 2022
1 parent 1e807ce commit c125bf8
Show file tree
Hide file tree
Showing 9 changed files with 115 additions and 53 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Added
- Add support to parse formulas with all whitespace characters (controlled by config param `ignoreWhiteSpace`). (#898)

### Changed
- **Breaking change**: Removed `gpu.js` dependency and its use. (#812)

Expand Down
21 changes: 19 additions & 2 deletions src/Config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,18 @@ export interface ConfigParams {
* @category Formula Syntax
*/
language: string,
/**
* Controls the set of whitespace characters that are allowed inside a formula.
*
* When set to `'standard'`, allows only SPACE (U+0020), CHARACTER TABULATION (U+0009), LINE FEED (U+000A), and CARRIAGE RETURN (U+000D) (compliant with OpenFormula Standard 1.3)
*
* When set to `'any'`, allows all whitespace characters that would be captured by the `\s` character class of the JavaScript regular expressions.
*
* @default 'standard'
*
* @category Formula Syntax
*/
ignoreWhiteSpace: 'standard' | 'any',
/**
* Sets year 1900 as a leap year.
*
Expand Down Expand Up @@ -436,6 +448,7 @@ export class Config implements ConfigParams, ParserConfig {
functionPlugins: [],
ignorePunctuation: false,
language: 'enGB',
ignoreWhiteSpace: 'standard',
licenseKey: '',
leapYear1900: false,
localeLang: 'en',
Expand Down Expand Up @@ -489,6 +502,8 @@ export class Config implements ConfigParams, ParserConfig {
/** @inheritDoc */
public readonly language: string
/** @inheritDoc */
public readonly ignoreWhiteSpace: 'standard' | 'any'
/** @inheritDoc */
public readonly licenseKey: string
/** @inheritDoc */
// eslint-disable-next-line @typescript-eslint/no-explicit-any
Expand Down Expand Up @@ -564,6 +579,7 @@ export class Config implements ConfigParams, ParserConfig {
leapYear1900,
localeLang,
language,
ignoreWhiteSpace,
licenseKey,
matchWholeCell,
arrayColumnSeparator,
Expand Down Expand Up @@ -603,6 +619,7 @@ export class Config implements ConfigParams, ParserConfig {
this.functionArgSeparator = configValueFromParam(functionArgSeparator, 'string', 'functionArgSeparator')
this.decimalSeparator = configValueFromParam(decimalSeparator, ['.', ','], 'decimalSeparator')
this.language = configValueFromParam(language, 'string', 'language')
this.ignoreWhiteSpace = configValueFromParam(ignoreWhiteSpace, ['standard', 'any'], 'ignoreWhiteSpace')
this.licenseKey = configValueFromParam(licenseKey, 'string', 'licenseKey')
this.thousandSeparator = configValueFromParam(thousandSeparator, ['', ',', ' ', '.'], 'thousandSeparator')
this.arrayColumnSeparator = configValueFromParam(arrayColumnSeparator, [',', ';'], 'arrayColumnSeparator')
Expand Down Expand Up @@ -677,8 +694,8 @@ export class Config implements ConfigParams, ParserConfig {
*
* @internal
*/
public get licenseKeyValidityState() {
return privatePool.get(this)!.licenseKeyValidityState
public get licenseKeyValidityState(): LicenseKeyValidityState {
return (privatePool.get(this) as Config).licenseKeyValidityState
}

public getConfig(): ConfigParams {
Expand Down
7 changes: 3 additions & 4 deletions src/parser/FormulaParser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,6 @@ import {
RParen,
StringLiteral,
TimesOp,
WhiteSpace,
} from './LexerConfig'

export interface FormulaParserResult {
Expand Down Expand Up @@ -894,15 +893,15 @@ export class FormulaLexer {
private skipWhitespacesInsideRanges(tokens: IToken[]): IToken[] {
return this.filterTokensByNeighbors(tokens, (previous: IToken, current: IToken, next: IToken) => {
return (tokenMatcher(previous, CellReference) || tokenMatcher(previous, RangeSeparator))
&& tokenMatcher(current, WhiteSpace)
&& tokenMatcher(current, this.lexerConfig.WhiteSpace)
&& (tokenMatcher(next, CellReference) || tokenMatcher(next, RangeSeparator))
})
}

private skipWhitespacesBeforeArgSeparators(tokens: IToken[]): IToken[] {
return this.filterTokensByNeighbors(tokens, (previous: IToken, current: IToken, next: IToken) => {
return !tokenMatcher(previous, this.lexerConfig.ArgSeparator)
&& tokenMatcher(current, WhiteSpace)
&& tokenMatcher(current, this.lexerConfig.WhiteSpace)
&& tokenMatcher(next, this.lexerConfig.ArgSeparator)
})
}
Expand All @@ -928,7 +927,7 @@ export class FormulaLexer {
}

private trimTrailingWhitespaces(tokens: IToken[]): IToken[] {
if (tokens.length > 0 && tokenMatcher(tokens[tokens.length - 1], WhiteSpace)) {
if (tokens.length > 0 && tokenMatcher(tokens[tokens.length - 1], this.lexerConfig.WhiteSpace)) {
tokens.pop()
}
return tokens
Expand Down
12 changes: 6 additions & 6 deletions src/parser/LexerConfig.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ import {ParserConfig} from './ParserConfig'

export const RANGE_OPERATOR = ':'
export const ABSOLUTE_OPERATOR = '$'
export const ALL_WHITESPACE_REGEXP = /\s+/
export const ODFF_WHITESPACE_REGEXP = /[ \t\n\r]+/

/* arithmetic */
// abstract for + -
Expand Down Expand Up @@ -92,12 +94,6 @@ export const StringLiteral = createToken({name: 'StringLiteral', pattern: /"([^"
/* error literal */
export const ErrorLiteral = createToken({name: 'ErrorLiteral', pattern: /#[A-Za-z0-9\/]+[?!]?/})

/* skipping whitespaces */
export const WhiteSpace = createToken({
name: 'WhiteSpace',
pattern: /[ \t\n\r]+/,
})

export interface ILexerConfig {
ArgSeparator: TokenType,
NumberLiteral: TokenType,
Expand All @@ -108,6 +104,7 @@ export interface ILexerConfig {
decimalSeparator: '.' | ',',
ArrayColSeparator: TokenType,
ArrayRowSeparator: TokenType,
WhiteSpace: TokenType,
maxColumns: number,
maxRows: number,
}
Expand All @@ -116,7 +113,9 @@ export const buildLexerConfig = (config: ParserConfig): ILexerConfig => {
const offsetProcedureNameLiteral = config.translationPackage.getFunctionTranslation('OFFSET')
const errorMapping = config.errorMapping
const functionMapping = config.translationPackage.buildFunctionMapping()
const whitespaceTokenRegexp = config.ignoreWhiteSpace === 'standard' ? ODFF_WHITESPACE_REGEXP : ALL_WHITESPACE_REGEXP

const WhiteSpace = createToken({ name: 'WhiteSpace', pattern: whitespaceTokenRegexp })
const ArrayRowSeparator = createToken({name: 'ArrayRowSep', pattern: config.arrayRowSeparator})
const ArrayColSeparator = createToken({name: 'ArrayColSep', pattern: config.arrayColumnSeparator})

Expand Down Expand Up @@ -185,6 +184,7 @@ export const buildLexerConfig = (config: ParserConfig): ILexerConfig => {
OffsetProcedureName,
ArrayRowSeparator,
ArrayColSeparator,
WhiteSpace,
allTokens,
errorMapping,
functionMapping,
Expand Down
1 change: 1 addition & 0 deletions src/parser/ParserConfig.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ export interface ParserConfig {
decimalSeparator: '.' | ',',
arrayColumnSeparator: ',' | ';',
arrayRowSeparator: ';' | '|',
ignoreWhiteSpace: 'standard' | 'any',
translationPackage: TranslationPackage,
errorMapping: Record<string, TranslatableErrorType>,
maxColumns: number,
Expand Down
45 changes: 24 additions & 21 deletions src/parser/ParserWithCaching.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* Copyright (c) 2021 Handsoncode. All rights reserved.
*/

import {IToken, tokenMatcher} from 'chevrotain'
import {IToken, tokenMatcher, ILexingResult} from 'chevrotain'
import {ErrorType, SimpleCellAddress} from '../Cell'
import {FunctionRegistry} from '../interpreter/FunctionRegistry'
import {AstNodeType, buildParsingErrorAst, RelativeDependency} from './'
Expand All @@ -24,7 +24,6 @@ import {
ILexerConfig,
ProcedureName,
RowRange,
WhiteSpace,
} from './LexerConfig'
import {ParserConfig} from './ParserConfig'
import {formatNumber} from './Unparser'
Expand Down Expand Up @@ -65,7 +64,7 @@ export class ParserWithCaching {
* @param formulaAddress - address with regard to which formula should be parsed. Impacts computed addresses in R0C0 format.
*/
public parse(text: string, formulaAddress: SimpleCellAddress): ParsingResult {
const lexerResult = this.lexer.tokenizeFormula(text)
const lexerResult = this.tokenizeFormula(text)

if (lexerResult.errors.length > 0) {
const errors = lexerResult.errors.map((e) =>
Expand All @@ -89,7 +88,7 @@ export class ParserWithCaching {
if (cacheResult !== undefined) {
++this.statsCacheUsed
} else {
const processedTokens = bindWhitespacesToTokens(lexerResult.tokens)
const processedTokens = this.bindWhitespacesToTokens(lexerResult.tokens)
const parsingResult = this.formulaParser.parseFromTokens(processedTokens, formulaAddress)

if (parsingResult.errors.length > 0) {
Expand Down Expand Up @@ -230,28 +229,32 @@ export class ParserWithCaching {
}
}
}
}

export function bindWhitespacesToTokens(tokens: IToken[]): IExtendedToken[] {
const processedTokens: IExtendedToken[] = []

const first = tokens[0]
if (!tokenMatcher(first, WhiteSpace)) {
processedTokens.push(first)
}
public bindWhitespacesToTokens(tokens: IToken[]): IExtendedToken[] {
const processedTokens: IExtendedToken[] = []

for (let i = 1; i < tokens.length; ++i) {
const current = tokens[i] as IExtendedToken
if (tokenMatcher(current, WhiteSpace)) {
continue
const first = tokens[0]
if (!tokenMatcher(first, this.lexerConfig.WhiteSpace)) {
processedTokens.push(first)
}

const previous = tokens[i - 1]
if (tokenMatcher(previous, WhiteSpace)) {
current.leadingWhitespace = previous
for (let i = 1; i < tokens.length; ++i) {
const current = tokens[i] as IExtendedToken
if (tokenMatcher(current, this.lexerConfig.WhiteSpace)) {
continue
}

const previous = tokens[i - 1]
if (tokenMatcher(previous, this.lexerConfig.WhiteSpace)) {
current.leadingWhitespace = previous
}
processedTokens.push(current)
}
processedTokens.push(current)

return processedTokens
}

return processedTokens
public tokenizeFormula(text: string): ILexingResult {
return this.lexer.tokenizeFormula(text)
}
}
18 changes: 18 additions & 0 deletions test/parser/parser.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,24 @@ describe('ParserWithCaching', () => {
expect(ast1).toEqual(ast2)
})

it('with default config should return error for non-breakable space', () => {
const parser = buildEmptyParserWithCaching(new Config())

const { ast, errors } = parser.parse('=\u00A042', adr('A1'))

expect(ast.type).toBe(AstNodeType.ERROR)
expect(errors[0].type).toBe(ParsingErrorType.LexingError)
})

it('when set ignoreWhiteSpace = \'any\' should accept a non-breakable space', () => {
const parser = buildEmptyParserWithCaching(new Config({ ignoreWhiteSpace: 'any' }))

const { ast } = parser.parse('=\u00A042', adr('A1'))

expect(ast.type).toEqual(AstNodeType.NUMBER)
expect(ast.leadingWhitespace).toEqual('\u00A0')
})

it('error literal', () => {
const parser = buildEmptyParserWithCaching(new Config())

Expand Down
14 changes: 14 additions & 0 deletions test/parser/unparse.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -597,4 +597,18 @@ describe('whitespaces', () => {

expect(unparsed).toEqual(formula)
})

it('when ignoreWhiteSpace = \'any\', should unparse a non-breakable space character', () => {
const config = new Config({ ignoreWhiteSpace: 'any' })
const lexerConfig = buildLexerConfig(config)
const parser = buildEmptyParserWithCaching(config, sheetMapping)
const unparser = new Unparser(config, lexerConfig, sheetMapping.fetchDisplayName, new NamedExpressions())

const formula = '=\u00A01'
const ast = parser.parse(formula, adr('A1')).ast

const unparsed = unparser.unparse(ast, adr('A1'))

expect(unparsed).toEqual(formula)
})
})
Loading

0 comments on commit c125bf8

Please sign in to comment.