Skip to content

Commit

Permalink
feat: Lexer, tokenize
Browse files Browse the repository at this point in the history
Signed-off-by: Lexus Drumgold <unicornware@flexdevelopment.llc>
  • Loading branch information
unicornware committed Jun 22, 2024
1 parent db04218 commit a4837a7
Show file tree
Hide file tree
Showing 94 changed files with 3,902 additions and 52 deletions.
5 changes: 4 additions & 1 deletion .codecov.yml
Original file line number Diff line number Diff line change
Expand Up @@ -89,4 +89,7 @@ ignore:
- '!src/index.ts'

profiling:
critical_files_paths: []
critical_files_paths:
- src/constructs/eof.ts
- src/constructs/initialize.ts
- src/lexer.ts
2 changes: 1 addition & 1 deletion .commitlintrc.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import { scopes } from '@flex-development/commitlint-config'
const config: UserConfig = {
extends: ['@flex-development'],
rules: {
'scope-enum': [Severity.Error, 'always', scopes(['chore'])]
'scope-enum': [Severity.Error, 'always', scopes(['chore', 'constructs'])]
}
}

Expand Down
3 changes: 3 additions & 0 deletions .dictionary.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,13 @@ mkbuild
mlly
nocheck
nvmrc
onreturn
pathe
pkgs
preid
shfmt
succ
tokenizes
unstub
vates
vfile
Expand Down
2 changes: 1 addition & 1 deletion .dprint.jsonc
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
},
{
"command": "node ./dprint/shfmt.mjs {{file_path}}",
"exts": ["sh", "txt", "zsh"],
"exts": ["sh", "zsh"],
"fileNames": [
".editorconfig",
".env",
Expand Down
2 changes: 1 addition & 1 deletion .github/infrastructure.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ branches:
- context: test (20)
- context: typescript (5.3.3)
- context: typescript (5.4.5)
- context: typescript (5.5.0-beta)
- context: typescript (5.5.2)
- context: typescript (latest)
strict: true
restrictions: null
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ jobs:
- id: test
if: steps.test-files-check.outputs.files_exists == 'true'
name: Run tests
run: yarn test:cov --segfault-retry=3
run: yarn test:cov
- id: codecov
name: Upload coverage report to Codecov
if: steps.test-files-check.outputs.files_exists == 'true'
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

[![github release](https://img.shields.io/github/v/release/flex-development/vfile-lexer.svg?include_prereleases&sort=semver)](https://github.com/flex-development/vfile-lexer/releases/latest)
[![npm](https://img.shields.io/npm/v/@flex-development/vfile-lexer.svg)](https://npmjs.com/package/@flex-development/vfile-lexer)
[![codecov](https://codecov.io/gh/flex-development/vfile-lexer/graph/badge.svg?token=)](https://codecov.io/gh/flex-development/vfile-lexer)
[![codecov](https://codecov.io/gh/flex-development/vfile-lexer/graph/badge.svg?token=iA1BvaucoZ)](https://codecov.io/gh/flex-development/vfile-lexer)
[![module type: esm](https://img.shields.io/badge/module%20type-esm-brightgreen)](https://github.com/voxpelli/badges-cjs-esm)
[![license](https://img.shields.io/github/license/flex-development/vfile-lexer.svg)](LICENSE.md)
[![conventional commits](https://img.shields.io/badge/-conventional%20commits-fe5196?logo=conventional-commits&logoColor=ffffff)](https://conventionalcommits.org/)
Expand Down
2 changes: 2 additions & 0 deletions __fixtures__/hello.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
const 你好 = "hello 👋";
console.log(\u4f60\u597d); // hello 👋
1 change: 1 addition & 0 deletions __fixtures__/inline-tag.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{@linkcode Code}
20 changes: 20 additions & 0 deletions __fixtures__/numerics.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
0
0n
1
1n
2
2n
3
3n
4
4n
5
5n
6
6n
7
7n
8
8n
9
9n
3 changes: 3 additions & 0 deletions __fixtures__/strings.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
'😍'
"👍"
\'🚀\'
21 changes: 21 additions & 0 deletions __fixtures__/tk.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
/**
* @file Fixtures - tk
* @module fixtures/tk
*/

/**
* Token types.
*
* @enum {Lowercase<string>}
*/
enum tk {
bigint = 'bigint',
inlineTag = 'inlineTag',
number = 'number',
punctuator = 'punctuator',
string = 'string',
tag = 'tag',
whitespace = 'whitespace'
}

export default tk
10 changes: 10 additions & 0 deletions __tests__/constructs/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
/**
* @file Entry Point - Test Constructs
* @module tests/constructs
*/

export { default as inlineTag } from './inline-tag'
export { default as numeric } from './numeric'
export { default as punctuator } from './punctuator'
export { default as string } from './string'
export { default as ws } from './ws'
186 changes: 186 additions & 0 deletions __tests__/constructs/inline-tag.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
/**
* @file Test Constructs - inlineTag
* @module tests/constructs/inlineTag
*/

import tk from '#fixtures/tk'
import type { Construct, TokenizeContext } from '#src/interfaces'
import type { Effects, Event, State } from '#src/types'
import { codes, type Code } from '@flex-development/vfile-reader'
import { ok as assert } from 'devlop'
import { asciiAlpha } from 'micromark-util-character'

/**
* Inline tag construct.
*
* @const {Construct} inlineTag
*/
const inlineTag: Construct = {
/**
* Construct name.
*/
name: tk.inlineTag,

/**
* Check if the previous character `code` can come before this construct.
*
* @see {@linkcode Code}
* @see {@linkcode TokenizeContext}
*
* @this {TokenizeContext}
*
* @param {Code} code - Previous character code
* @return {boolean} `true` if `code` allowed before construct
*/
previous(this: TokenizeContext, code: Code): boolean {
return code !== codes.backslash
},

/**
* Resolve all events when the content is complete, from the start to the end.
* Only used if `tokenize`is successful once in the content.
*
* @see {@linkcode Construct.tokenize}
* @see {@linkcode Event}
* @see {@linkcode TokenizeContext}
*
* @param {Event[]} events - List of events
* @param {TokenizeContext} context - Tokenize context
* @return {Event[]} Changed events
*/
resolveAll(events: Event[], context: TokenizeContext): Event[] {
for (const [, token] of events) {
if (token.type === tk.inlineTag) {
assert(token.next, 'expected next token')

if (token.next.type === tk.tag) {
/**
* Serialized token.
*
* @const {string} slice
*/
const slice: string = context.sliceSerialize(token)

/**
* Next serialized token.
*
* @const {string} next
*/
const next: string = context.sliceSerialize(token.next)

// @ts-expect-error custom field (2339)
token.tag = next

// @ts-expect-error custom field (2339)
token.value = slice.slice(next.length + 1, -1).trimStart()

if (token.next.next) token.next.next.previous = token
token.next = token.next.next
}
}
}

return events
},

/**
* Check if the current character `code` can start this construct.
*
* @see {@linkcode Code}
* @see {@linkcode TokenizeContext}
*
* @this {TokenizeContext}
*
* @param {Code} code - Current character code
* @return {boolean} `true` if `code` can start construct
*/
test(this: TokenizeContext, code: Code): boolean {
return code === codes.leftBrace
},

/**
* Set up a state machine to handle character codes streaming in.
*
* @see {@linkcode Effects}
* @see {@linkcode State}
* @see {@linkcode TokenizeContext}
*
* @this {TokenizeContext}
*
* @param {Effects} effects - Context object to transition state machine
* @param {State} ok - Successful tokenization state
* @param {State} nok - Failed tokenization state
* @return {State} Initial state
*/
tokenize(
this: TokenizeContext,
effects: Effects,
ok: State,
nok: State
): State {
/**
* Tokenize context.
*
* @const {TokenizeContext} self
*/
const self: TokenizeContext = this

/**
* Closed tag name token?
*
* @var {boolean} name
*/
let name: boolean = false

return inlineTag

/**
* Finish inline tag tokenization.
*
* @param {Code} code - Current character code
* @return {State | undefined} Next state
*/
function finish(code: Code): State | undefined {
if (code === codes.eof) return nok(code)

if (!asciiAlpha(code) && !name) {
name = true
effects.exit(tk.tag)
}

effects.consume(code)

if (code === codes.rightBrace && self.previous !== codes.backslash) {
effects.exit(tk.inlineTag)
return ok
}

return finish
}

/**
* Tokenize the beginning of an inline tag name (`@`).
*
* @param {Code} code - Current character code
* @return {State | undefined} Next state
*/
function begin(code: Code): State | undefined {
if (code !== codes.at) return nok(code)
return effects.enter(tk.tag), effects.consume(code), finish
}

/**
* Start inline tag tokenization.
*
* @param {Code} code - Current character code
* @return {State} Next state
*/
function inlineTag(code: Code): State {
assert(code === codes.leftBrace, 'expected `{`')
effects.enter(tk.inlineTag)
return effects.consume(code), begin
}
}
}

export default inlineTag
Loading

0 comments on commit a4837a7

Please sign in to comment.