Skip to content

Commit

Permalink
feat: transform negative lookaheads (#560)
Browse files Browse the repository at this point in the history
  • Loading branch information
eduardoboucas authored Jan 10, 2024
1 parent 4433b46 commit 4cf6a5e
Show file tree
Hide file tree
Showing 5 changed files with 212 additions and 17 deletions.
48 changes: 47 additions & 1 deletion node/declaration.test.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { test, expect } from 'vitest'
import { describe, test, expect } from 'vitest'

import { FunctionConfig } from './config.js'
import { Declaration, mergeDeclarations, parsePattern } from './declaration.js'
Expand Down Expand Up @@ -183,3 +183,49 @@ test('Ensures pattern match on the whole path', () => {

expect(actual).toEqual(expected)
})

describe('Transforms negative lookaheads', () => {
test('Throws if a `extractedExclusionPatterns` property is not supplied', () => {
const input = "'/((?!api|_next/static|_next/image|favicon.ico).*)'"

expect(() => parsePattern(input)).toThrowError('Regular expressions with lookaheads are not supported')
})

test('With a disjunction inside the lookahead', () => {
const input = "'/((?!api|_next/static|_next/image|favicon.ico).*)'"
const exclusions: string[] = []
const actual = parsePattern(input, exclusions)

expect(actual).toEqual("^'\\/(.*)'$")
expect(exclusions).toStrictEqual(["/^'\\/(api|_next\\/static|_next\\/image|favicon.ico.*)'$/"])
})

test('With multiple lookaheads inside a disjunction', () => {
const input = 'one(two(?!three)|four|five(?!six)|seven)'
const exclusions: string[] = []
const expected = '^one(two|four|five|seven)$'
const actual = parsePattern(input, exclusions)

expect(actual).toEqual(expected)
expect(exclusions).toStrictEqual(['/^one(twothree)$/', '/^one(fivesix)$/'])
})

test('With a lookahead outside of a disjunction', () => {
const input = 'a(b|c)(?!d)'
const exclusions: string[] = []
const expected = '^a(b|c)$'
const actual = parsePattern(input, exclusions)

expect(actual).toEqual(expected)
expect(exclusions).toStrictEqual(['/^a(b|c)d$/'])
})

test('Throws on nested lookaheads', () => {
const exclusions: string[] = []

expect(() => parsePattern('one(?!two(?!three))', exclusions)).toThrowError(
'Regular expressions with nested lookaheads are not supported',
)
expect(exclusions).toStrictEqual([])
})
})
148 changes: 137 additions & 11 deletions node/declaration.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import regexpAST from 'regexp-tree'
import regexpAST, { NodePath } from 'regexp-tree'

import { FunctionConfig, HTTPMethod, Path } from './config.js'
import { FeatureFlags } from './feature_flags.js'
Expand Down Expand Up @@ -116,20 +116,73 @@ const createDeclarationsFromFunctionConfigs = (
return declarations
}

// Validates and normalizes a pattern so that it's a valid regular expression
// in Go, which is the engine used by our edge nodes.
export const parsePattern = (pattern: string) => {
/**
* Validates and normalizes a pattern so that it's a valid regular expression
* in Go, which is the engine used by our edge nodes.
*
* @param pattern Original regular expression
* @param extractedExclusionPatterns If set, negative lookaheads (which are
* not supported in Go) are transformed into a list of exclusion patterns to
* be stored in this array
* @returns Normalized regular expression
*/
export const parsePattern = (pattern: string, extractedExclusionPatterns?: string[]) => {
let enclosedPattern = pattern
if (!pattern.startsWith('^')) enclosedPattern = `^${enclosedPattern}`
if (!pattern.endsWith('$')) enclosedPattern = `${enclosedPattern}$`

if (!pattern.startsWith('^')) {
enclosedPattern = `^${enclosedPattern}`
}

if (!pattern.endsWith('$')) {
enclosedPattern = `${enclosedPattern}$`
}

let lookaheadDepth = 0

// Holds the location of every lookahead expression found.
const lookaheads = new Set<string>()
const regexp = new RegExp(enclosedPattern)
const newRegexp = regexpAST.transform(regexp, {
Assertion(path) {
// Lookaheads are not supported. If we find one, throw an error.
if (path.node.kind === 'Lookahead') {
throw new Error('Regular expressions with lookaheads are not supported')
}
Assertion: {
// If we're entering a negative lookahead expression, register its
// location.
pre(path) {
if (path.node.kind !== 'Lookahead') {
return
}

if (!extractedExclusionPatterns) {
throw new Error('Regular expressions with lookaheads are not supported')
}

if (!path.node.negative) {
throw new Error('Regular expressions with positive lookaheads are not supported')
}

lookaheadDepth += 1

if (lookaheadDepth > 1) {
throw new Error('Regular expressions with nested lookaheads are not supported')
}

const lookahead = serializeNodeLocation(path.node)

if (lookahead) {
lookaheads.add(lookahead)
}
},

// If we're leaving a negative lookahead expression, remove it from the
// AST. We'll later replace its functionality with an exclusion pattern.
post(path) {
if (path.node.kind !== 'Lookahead' || !path.node.negative) {
return
}

lookaheadDepth -= 1

path.remove()
},
},

Group(path) {
Expand All @@ -146,6 +199,79 @@ export const parsePattern = (pattern: string) => {
},
})

// The `extractedExclusionPatterns` property works as a shut-off valve: if
// it's not supplied, don't even traverse the AST again to further process
// lookaheads.
if (extractedExclusionPatterns) {
const exclusionPatterns = [...lookaheads].map((lookahead) => getExclusionPatternFromLookahead(regexp, lookahead))

extractedExclusionPatterns.push(...exclusionPatterns)
}

// Strip leading and forward slashes.
return newRegexp.toString().slice(1, -1)
}

/**
* Takes a regular expression and a lookahead inside it and returns a new
* regular expression that acts as an exclusion pattern to replace the
* lookahead.
*
* @param regexp Original regular expression
* @param location Serialized location of the lookahead
* @returns Exclusion pattern regular expression
*/
const getExclusionPatternFromLookahead = (regexp: RegExp, location: string) => {
const exclusionRegexp = regexpAST.transform(regexp, {
Assertion(path) {
if (
path.node.kind !== 'Lookahead' ||
path.node.assertion === null ||
serializeNodeLocation(path.node) !== location
) {
return
}

// Unwrap the lookahead by replacing it with the expression it holds —
// e.g. `(?!foo)` becomes `foo`.
path.replace(path.node.assertion)

// Traverse the parents of the lookahead all the way up to the root. When
// we find a disjunction, replace it with the child we travelled from. In
// practice this means getting rid of all the branches that are not the
// lookahead.
// For example, in `(a|b(?!c)|d)` the exclusion patterns cannot contain
// the `a` or `d` branches of the disjunction, otherwise `ab` and `ad`
// would incorrectly be excluded. The exclusion must be `bc` only.
let visitor: NodePath | null = path

while (visitor !== null) {
const child = visitor

visitor = visitor.parentPath

if (visitor?.node.type !== 'Disjunction') {
continue
}

visitor.replace(child.node)
}
},
})

return exclusionRegexp.toString()
}

/**
* Creates a string representation of a regexp AST node in the format
* `<start line>,<start column>,<start offset>,<end line>,<end column>,<end offset>`
*/
const serializeNodeLocation = (node: NodePath['node']) => {
if (!node.loc) {
return ''
}

const { start, end } = node.loc

return [start.line, start.column, start.offset, end.line, end.column, end.offset].join(',')
}
4 changes: 3 additions & 1 deletion node/feature_flags.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
const defaultFlags = {}
const defaultFlags = {
edge_bundler_transform_lookaheads: false,
}

type FeatureFlag = keyof typeof defaultFlags
type FeatureFlags = Partial<Record<FeatureFlag, boolean>>
Expand Down
19 changes: 19 additions & 0 deletions node/manifest.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -486,3 +486,22 @@ test('Returns functions without a declaration and unrouted functions', () => {
expect(declarationsWithoutFunction).toEqual(['func-3'])
expect(unroutedFunctions).toEqual(['func-2', 'func-4'])
})

test('Generates exclusion patterns from negative lookaheads', () => {
const functions = [{ name: 'func-1', path: '/path/to/func-1.ts' }]
const declarations = [{ function: 'func-1', pattern: "'/((?!api|_next/static|_next/image|favicon.ico).*)'" }]
const { manifest } = generateManifest({
bundles: [],
declarations,
featureFlags: { edge_bundler_transform_lookaheads: true },
functions,
})

expect(manifest.routes).toEqual([
{
function: 'func-1',
pattern: "^'/(.*)'$",
excluded_patterns: ["/^'/(api|_next/static|_next/image|favicon.ico.*)'$/"],
},
])
})
10 changes: 6 additions & 4 deletions node/manifest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ const normalizeMethods = (method: unknown, name: string): string[] | undefined =
const generateManifest = ({
bundles = [],
declarations = [],
featureFlags,
functions,
userFunctionConfig = {},
internalFunctionConfig = {},
Expand Down Expand Up @@ -153,7 +154,8 @@ const generateManifest = ({
return
}

const pattern = getRegularExpression(declaration)
const extractedExclusionPatterns = featureFlags?.edge_bundler_transform_lookaheads ? [] : undefined
const pattern = getRegularExpression(declaration, extractedExclusionPatterns)

// If there is no `pattern`, the declaration will never be triggered, so we
// can discard it.
Expand All @@ -163,7 +165,7 @@ const generateManifest = ({

routedFunctions.add(declaration.function)

const excludedPattern = getExcludedRegularExpressions(declaration)
const excludedPattern = [...getExcludedRegularExpressions(declaration), ...(extractedExclusionPatterns ?? [])]
const route: Route = {
function: func.name,
pattern: serializePattern(pattern),
Expand Down Expand Up @@ -225,10 +227,10 @@ const pathToRegularExpression = (path: string) => {
}
}

const getRegularExpression = (declaration: Declaration) => {
const getRegularExpression = (declaration: Declaration, extractedExclusionPatterns?: string[]) => {
if ('pattern' in declaration) {
try {
return parsePattern(declaration.pattern)
return parsePattern(declaration.pattern, extractedExclusionPatterns)
} catch (error: unknown) {
throw wrapBundleError(
new Error(
Expand Down

0 comments on commit 4cf6a5e

Please sign in to comment.