Skip to content
This repository has been archived by the owner on Mar 24, 2022. It is now read-only.

Commit

Permalink
refactor: Remove PositionTrackingPreprocessorMixin
Browse files Browse the repository at this point in the history
  • Loading branch information
fb55 committed Nov 24, 2021
1 parent 1de6e63 commit 3ab6403
Show file tree
Hide file tree
Showing 8 changed files with 55 additions and 79 deletions.
4 changes: 2 additions & 2 deletions packages/html-rewriting-stream/lib/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { EndTag, StartTag, Doctype, Text, Comment, SaxToken } from '../../sax-pa
import type { Token, Location } from '@parse5/parse5/lib/common/token.js';
import { SAXParser } from '@parse5/sax-parser/lib/index.js';
import { escapeString } from '@parse5/parse5/lib/serializer/index.js';
import type { PositionTrackingPreprocessorMixin } from '@parse5/parse5/lib/extensions/position-tracking/preprocessor-mixin';
import type { Preprocessor } from '@parse5/parse5/lib/tokenizer/preprocessor.js';

/**
* Streaming [SAX](https://en.wikipedia.org/wiki/Simple_API_for_XML)-style HTML rewriter.
Expand Down Expand Up @@ -54,7 +54,7 @@ import type { PositionTrackingPreprocessorMixin } from '@parse5/parse5/lib/exten
* ```
*/
export class RewritingStream extends SAXParser {
posTracker: PositionTrackingPreprocessorMixin;
posTracker: Preprocessor;

/** Note: The `sourceCodeLocationInfo` is always enabled. */
constructor() {
Expand Down
10 changes: 5 additions & 5 deletions packages/parse5/lib/common/unicode.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,26 +51,26 @@ export const CODE_POINT_SEQUENCES = {
};

//Surrogates
export function isSurrogate(cp: number) {
export function isSurrogate(cp: number): boolean {
return cp >= 0xd800 && cp <= 0xdfff;
}

export function isSurrogatePair(cp: number) {
export function isSurrogatePair(cp: number): boolean {
return cp >= 0xdc00 && cp <= 0xdfff;
}

export function getSurrogatePairCodePoint(cp1: number, cp2: number) {
export function getSurrogatePairCodePoint(cp1: number, cp2: number): number {
return (cp1 - 0xd800) * 0x400 + 0x2400 + cp2;
}

//NOTE: excluding NULL and ASCII whitespace
export function isControlCodePoint(cp: number) {
export function isControlCodePoint(cp: number): boolean {
return (
(cp !== 0x20 && cp !== 0x0a && cp !== 0x0d && cp !== 0x09 && cp !== 0x0c && cp >= 0x01 && cp <= 0x1f) ||
(cp >= 0x7f && cp <= 0x9f)
);
}

export function isUndefinedCodePoint(cp: number) {
export function isUndefinedCodePoint(cp: number): boolean {
return (cp >= 0xfdd0 && cp <= 0xfdef) || UNDEFINED_CODE_POINTS.has(cp);
}
Original file line number Diff line number Diff line change
@@ -1,17 +1,15 @@
import { ErrorReportingMixinBase, ErrorReportingMixinOptions } from './mixin-base.js';
import { PositionTrackingPreprocessorMixin } from '../position-tracking/preprocessor-mixin.js';
import { Mixin } from '../../utils/mixin.js';
import type { Preprocessor } from '../../tokenizer/preprocessor.js';
import type { ERR } from '../../common/error-codes.js';

export class ErrorReportingPreprocessorMixin extends ErrorReportingMixinBase<Preprocessor> {
posTracker: PositionTrackingPreprocessorMixin;
posTracker: Preprocessor;
lastErrOffset = -1;

constructor(preprocessor: Preprocessor, opts: ErrorReportingMixinOptions) {
super(preprocessor, opts);

this.posTracker = Mixin.install(preprocessor, PositionTrackingPreprocessorMixin);
this.posTracker = preprocessor;
}

override _reportError(code: ERR) {
Expand Down
4 changes: 2 additions & 2 deletions packages/parse5/lib/extensions/location-info/parser-mixin.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@ import { Mixin } from '../../utils/mixin.js';
import { LocationInfoTokenizerMixin } from './tokenizer-mixin.js';
import { TAG_NAMES as $, NAMESPACES as NS } from '../../common/html.js';
import type { TreeAdapter, TreeAdapterTypeMap, ElementLocation } from '../../tree-adapters/interface';
import type { Preprocessor } from './../../tokenizer/preprocessor.js';
import type { Parser } from '../../parser/index.js';
import type { PositionTrackingPreprocessorMixin } from '../position-tracking/preprocessor-mixin';
import { TokenType, Token, TagToken } from '../../common/token.js';

export class LocationInfoParserMixin<T extends TreeAdapterTypeMap> extends Mixin<Parser<T>> {
treeAdapter: TreeAdapter<T>;
posTracker: PositionTrackingPreprocessorMixin | null = null;
posTracker: Preprocessor | null = null;
lastStartTagToken: null | TagToken = null;
lastFosterParentingLocation: null | ReturnType<Parser<T>['_findFosterParentingLocation']> = null;
currentToken: Token | null = null;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
import { Mixin } from '../../utils/mixin.js';
import { Tokenizer } from '../../tokenizer/index.js';
import { PositionTrackingPreprocessorMixin } from '../position-tracking/preprocessor-mixin.js';
import type { Preprocessor } from './../../tokenizer/preprocessor.js';
import { TokenType, Location, LocationWithAttributes } from '../../common/token.js';

export class LocationInfoTokenizerMixin extends Mixin<Tokenizer> {
posTracker: PositionTrackingPreprocessorMixin;
posTracker: Preprocessor;
currentAttrLocation: Location | null = null;
ctLoc: Location | null = null;

constructor(private tokenizer: Tokenizer) {
super(tokenizer);

this.posTracker = Mixin.install(tokenizer.preprocessor, PositionTrackingPreprocessorMixin);
this.posTracker = tokenizer.preprocessor;
}

_getCurrentLocation(): Location {
Expand Down

This file was deleted.

4 changes: 2 additions & 2 deletions packages/parse5/lib/tokenizer/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -460,7 +460,7 @@ export class Tokenizer {
//1)TokenType.NULL_CHARACTER - \u0000-character sequences (e.g. '\u0000\u0000\u0000')
//2)TokenType.WHITESPACE_CHARACTER - any whitespace/new-line character sequences (e.g. '\n \r\t \f')
//3)TokenType.CHARACTER - any character sequence which don't belong to groups 1 and 2 (e.g. 'abcdef1234@@#$%^')
_appendCharToCurrentCharacterToken(type: CharacterToken['type'], ch: string) {
private _appendCharToCurrentCharacterToken(type: CharacterToken['type'], ch: string) {
if (this.currentCharacterToken && this.currentCharacterToken.type !== type) {
this._emitCurrentCharacterToken();
}
Expand All @@ -472,7 +472,7 @@ export class Tokenizer {
}
}

_emitCodePoint(cp: number) {
private _emitCodePoint(cp: number) {
let type = TokenType.CHARACTER;

if (isWhitespace(cp)) {
Expand Down
45 changes: 39 additions & 6 deletions packages/parse5/lib/tokenizer/preprocessor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,25 @@ const DEFAULT_BUFFER_WATERLINE = 1 << 16;
//(see: http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#preprocessing-the-input-stream)
export class Preprocessor {
html: string | null = null;
pos = -1;
lastGapPos = -1;
lastCharPos = -1;
gapStack: number[] = [];
skipNextNewLine = false;
lastChunkWritten = false;
private pos = -1;
private lastGapPos = -1;
private lastCharPos = -1;
private gapStack: number[] = [];
private skipNextNewLine = false;
private lastChunkWritten = false;
endOfChunkHit = false;
bufferWaterline = DEFAULT_BUFFER_WATERLINE;

private isEol = false;
lineStartPos = 0;
droppedBufferSize = 0;
col = 0;
line = 1;

get offset(): number {
return this.droppedBufferSize + this.pos;
}

_err(_err: string) {
// NOTE: err reporting is noop by default. Enabled by mixin.
}
Expand Down Expand Up @@ -59,13 +69,20 @@ export class Preprocessor {
}

dropParsedChunk() {
const prevPos = this.pos;

if (this.pos > this.bufferWaterline) {
this.lastCharPos -= this.pos;
this.html = this.html!.substring(this.pos);
this.pos = 0;
this.lastGapPos = -1;
this.gapStack = [];
}

const reduction = prevPos - this.pos;

this.lineStartPos -= reduction;
this.droppedBufferSize += reduction;
}

write(chunk: string, isLastChunk: boolean) {
Expand All @@ -91,13 +108,26 @@ export class Preprocessor {
advance(): number {
this.pos++;

//NOTE: LF should be in the last column of the line
if (this.isEol) {
this.isEol = false;
this.line++;
this.lineStartPos = this.pos;
}

this.col = this.pos - this.lineStartPos + 1;

if (this.pos > this.lastCharPos) {
this.endOfChunkHit = !this.lastChunkWritten;
return $.EOF;
}

let cp = this.html!.charCodeAt(this.pos);

if (cp === $.LINE_FEED || (cp === $.CARRIAGE_RETURN && this.html!.charCodeAt(this.pos + 1) !== $.LINE_FEED)) {
this.isEol = true;
}

//NOTE: any U+000A LINE FEED (LF) characters that immediately follow a U+000D CARRIAGE RETURN (CR) character
//must be ignored.
if (this.skipNextNewLine && cp === $.LINE_FEED) {
Expand Down Expand Up @@ -146,5 +176,8 @@ export class Preprocessor {
}

this.pos--;

this.isEol = false;
this.col = this.pos - this.lineStartPos + 1;
}
}

0 comments on commit 3ab6403

Please sign in to comment.