From 7505d87669e439aa11ea44da82a5bb32ba4b07b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20B=C3=B6hm?= <188768+fb55@users.noreply.github.com> Date: Sun, 27 Feb 2022 07:35:22 +0000 Subject: [PATCH] refactor(tokenizer): Introduce events (#404) --- .../parse5-html-rewriting-stream/lib/index.ts | 8 +- packages/parse5-sax-parser/lib/index.ts | 186 ++++++------ .../lib/parser-feedback-simulator.ts | 132 ++++---- .../test/parser-feedback-simulator.test.ts | 14 +- packages/parse5/lib/parser/index.ts | 8 +- packages/parse5/lib/tokenizer/index.test.ts | 24 +- packages/parse5/lib/tokenizer/index.ts | 287 ++++++++++-------- packages/parse5/lib/tokenizer/preprocessor.ts | 12 +- packages/parse5/lib/tokenizer/queued.ts | 105 +++++++ .../tokenizer/tokenizer-location-info.test.ts | 82 +++-- .../generate-parser-feedback-test/index.ts | 46 ++- test/utils/generate-tokenization-tests.ts | 210 +++++++------ 12 files changed, 672 insertions(+), 442 deletions(-) create mode 100644 packages/parse5/lib/tokenizer/queued.ts diff --git a/packages/parse5-html-rewriting-stream/lib/index.ts b/packages/parse5-html-rewriting-stream/lib/index.ts index 24d6e6205..1d0694062 100644 --- a/packages/parse5-html-rewriting-stream/lib/index.ts +++ b/packages/parse5-html-rewriting-stream/lib/index.ts @@ -1,4 +1,4 @@ -import type { Token, Location } from 'parse5/dist/common/token.js'; +import type { Location } from 'parse5/dist/common/token.js'; import { SAXParser, EndTag, StartTag, Doctype, Text, Comment, SaxToken } from 'parse5-sax-parser'; import { escapeString } from 'parse5/dist/serializer/index.js'; @@ -73,9 +73,9 @@ export class RewritingStream extends SAXParser { } // Events - protected override _handleToken(token: Token): boolean { - if (!super._handleToken(token)) { - this.emitRaw(this._getRawHtml(token.location!)); + protected override emitIfListenerExists(eventName: string, token: SaxToken): boolean { + if (!super.emitIfListenerExists(eventName, token)) { + this.emitRaw(this._getRawHtml(token.sourceCodeLocation!)); } // NOTE: don't skip new lines after
 and other tags,
diff --git a/packages/parse5-sax-parser/lib/index.ts b/packages/parse5-sax-parser/lib/index.ts
index 304047870..53e492599 100644
--- a/packages/parse5-sax-parser/lib/index.ts
+++ b/packages/parse5-sax-parser/lib/index.ts
@@ -1,6 +1,13 @@
 import { Transform } from 'node:stream';
-import { Tokenizer } from 'parse5/dist/tokenizer/index.js';
-import { TokenType, Token, CharacterToken, Attribute, Location } from 'parse5/dist/common/token.js';
+import type { Tokenizer, TokenHandler } from 'parse5/dist/tokenizer/index.js';
+import type {
+    Attribute,
+    Location,
+    TagToken,
+    CommentToken,
+    DoctypeToken,
+    CharacterToken,
+} from 'parse5/dist/common/token.js';
 import { DevNullStream } from './dev-null-stream.js';
 import { ParserFeedbackSimulator } from './parser-feedback-simulator.js';
 
@@ -39,13 +46,13 @@ export interface SAXParserOptions {
  *     });
  * ```
  */
-export class SAXParser extends Transform {
+export class SAXParser extends Transform implements TokenHandler {
     protected options: SAXParserOptions;
-    protected tokenizer: Tokenizer;
     protected parserFeedbackSimulator: ParserFeedbackSimulator;
-    private pendingText: CharacterToken | null = null;
+    private pendingText: Text | null = null;
     private lastChunkWritten = false;
     private stopped = false;
+    protected tokenizer: Tokenizer;
 
     /**
      * @param options Parsing options.
@@ -58,8 +65,8 @@ export class SAXParser extends Transform {
             ...options,
         };
 
-        this.tokenizer = new Tokenizer(this.options);
-        this.parserFeedbackSimulator = new ParserFeedbackSimulator(this.tokenizer);
+        this.parserFeedbackSimulator = new ParserFeedbackSimulator(this.options, this);
+        this.tokenizer = this.parserFeedbackSimulator.tokenizer;
 
         // NOTE: always pipe stream to the /dev/null stream to avoid
         // `highWaterMark` hit even if we don't have consumers.
@@ -127,96 +134,95 @@ export class SAXParser extends Transform {
     }
 
     private _runParsingLoop(): void {
-        let token = null;
+        while (!this.stopped && this.tokenizer.active) {
+            this.tokenizer.getNextToken();
+        }
+    }
 
-        do {
-            token = this.parserFeedbackSimulator.getNextToken();
+    /** @internal */
+    onCharacter({ chars, location }: CharacterToken): void {
+        if (this.pendingText === null) {
+            this.pendingText = { text: chars, sourceCodeLocation: location };
+        } else {
+            this.pendingText.text += chars;
 
-            if (token.type === TokenType.HIBERNATION) {
-                break;
+            if (location && this.pendingText.sourceCodeLocation) {
+                const { endLine, endCol, endOffset } = location;
+                this.pendingText.sourceCodeLocation = {
+                    ...this.pendingText.sourceCodeLocation,
+                    endLine,
+                    endCol,
+                    endOffset,
+                };
             }
+        }
+    }
 
-            if (
-                token.type === TokenType.CHARACTER ||
-                token.type === TokenType.WHITESPACE_CHARACTER ||
-                token.type === TokenType.NULL_CHARACTER
-            ) {
-                if (this.pendingText === null) {
-                    token.type = TokenType.CHARACTER;
-                    this.pendingText = token;
-                } else {
-                    this.pendingText.chars += token.chars;
-
-                    if (token.location && this.pendingText.location) {
-                        const { endLine, endCol, endOffset } = token.location;
-                        this.pendingText.location = {
-                            ...this.pendingText.location,
-                            endLine,
-                            endCol,
-                            endOffset,
-                        };
-                    }
-                }
-            } else {
-                this._emitPendingText();
-                this._handleToken(token);
-            }
-        } while (!this.stopped && token.type !== TokenType.EOF);
+    /** @internal */
+    onWhitespaceCharacter(token: CharacterToken): void {
+        this.onCharacter(token);
     }
 
-    protected _handleToken(token: Token): boolean {
-        switch (token.type) {
-            case TokenType.EOF: {
-                return true;
-            }
-            case TokenType.START_TAG: {
-                const startTag: StartTag = {
-                    tagName: token.tagName,
-                    attrs: token.attrs,
-                    selfClosing: token.selfClosing,
-                    sourceCodeLocation: token.location,
-                };
-                return this._emitIfListenerExists('startTag', startTag);
-            }
-            case TokenType.END_TAG: {
-                const endTag: EndTag = {
-                    tagName: token.tagName,
-                    sourceCodeLocation: token.location,
-                };
-                return this._emitIfListenerExists('endTag', endTag);
-            }
-            case TokenType.COMMENT: {
-                const comment: Comment = {
-                    text: token.data,
-                    sourceCodeLocation: token.location,
-                };
-                return this._emitIfListenerExists('comment', comment);
-            }
-            case TokenType.DOCTYPE: {
-                const doctype: Doctype = {
-                    name: token.name,
-                    publicId: token.publicId,
-                    systemId: token.systemId,
-                    sourceCodeLocation: token.location,
-                };
-                return this._emitIfListenerExists('doctype', doctype);
-            }
-            case TokenType.CHARACTER:
-            case TokenType.NULL_CHARACTER:
-            case TokenType.WHITESPACE_CHARACTER: {
-                const text: Text = {
-                    text: token.chars,
-                    sourceCodeLocation: token.location,
-                };
-                return this._emitIfListenerExists('text', text);
-            }
-            case TokenType.HIBERNATION: {
-                return this._emitIfListenerExists('hibernation', {});
-            }
-        }
+    /** @internal */
+    onNullCharacter(token: CharacterToken): void {
+        this.onCharacter(token);
+    }
+
+    /** @internal */
+    onEof(): void {
+        this._emitPendingText();
+        this.stopped = true;
+    }
+
+    /** @internal */
+    onStartTag(token: TagToken): void {
+        this._emitPendingText();
+
+        const startTag: StartTag = {
+            tagName: token.tagName,
+            attrs: token.attrs,
+            selfClosing: token.selfClosing,
+            sourceCodeLocation: token.location,
+        };
+        this.emitIfListenerExists('startTag', startTag);
+    }
+
+    /** @internal */
+    onEndTag(token: TagToken): void {
+        this._emitPendingText();
+
+        const endTag: EndTag = {
+            tagName: token.tagName,
+            sourceCodeLocation: token.location,
+        };
+        this.emitIfListenerExists('endTag', endTag);
+    }
+
+    /** @internal */
+    onDoctype(token: DoctypeToken): void {
+        this._emitPendingText();
+
+        const doctype: Doctype = {
+            name: token.name,
+            publicId: token.publicId,
+            systemId: token.systemId,
+            sourceCodeLocation: token.location,
+        };
+        this.emitIfListenerExists('doctype', doctype);
+    }
+
+    /** @internal */
+    onComment(token: CommentToken): void {
+        this._emitPendingText();
+
+        const comment: Comment = {
+            text: token.data,
+            sourceCodeLocation: token.location,
+        };
+        this.emitIfListenerExists('comment', comment);
     }
 
-    private _emitIfListenerExists(eventName: string, token: SaxToken): boolean {
+    protected emitIfListenerExists(eventName: string, token: SaxToken): boolean {
         if (this.listenerCount(eventName) === 0) {
             return false;
         }
@@ -232,7 +238,7 @@ export class SAXParser extends Transform {
 
     private _emitPendingText(): void {
         if (this.pendingText !== null) {
-            this._handleToken(this.pendingText);
+            this.emitIfListenerExists('text', this.pendingText);
             this.pendingText = null;
         }
     }
diff --git a/packages/parse5-sax-parser/lib/parser-feedback-simulator.ts b/packages/parse5-sax-parser/lib/parser-feedback-simulator.ts
index 73b4221e0..e51c6ee31 100644
--- a/packages/parse5-sax-parser/lib/parser-feedback-simulator.ts
+++ b/packages/parse5-sax-parser/lib/parser-feedback-simulator.ts
@@ -1,63 +1,74 @@
-import { Tokenizer, TokenizerMode } from 'parse5/dist/tokenizer/index.js';
-import { TokenType, Token, TagToken } from 'parse5/dist/common/token.js';
+import { Tokenizer, TokenizerOptions, TokenizerMode, TokenHandler } from 'parse5/dist/tokenizer/index.js';
+import { TokenType, TagToken, CommentToken, DoctypeToken, CharacterToken, EOFToken } from 'parse5/dist/common/token.js';
 import * as foreignContent from 'parse5/dist/common/foreign-content.js';
 import * as unicode from 'parse5/dist/common/unicode.js';
 import { TAG_ID as $, TAG_NAMES as TN, NAMESPACES as NS, getTagID } from 'parse5/dist/common/html.js';
 
 //ParserFeedbackSimulator
 //Simulates adjustment of the Tokenizer which performed by standard parser during tree construction.
-export class ParserFeedbackSimulator {
+export class ParserFeedbackSimulator implements TokenHandler {
     private namespaceStack: NS[] = [];
     private inForeignContent = false;
     public skipNextNewLine = false;
+    public tokenizer: Tokenizer;
 
-    constructor(private tokenizer: Tokenizer) {
+    constructor(options: TokenizerOptions, private handler: TokenHandler) {
+        this.tokenizer = new Tokenizer(options, this);
         this._enterNamespace(NS.HTML);
     }
 
-    public getNextToken(): Token {
-        const token = this.tokenizer.getNextToken();
+    /** @internal */
+    onNullCharacter(token: CharacterToken): void {
+        this.skipNextNewLine = false;
 
-        switch (token.type) {
-            case TokenType.START_TAG: {
-                this._handleStartTagToken(token);
-                break;
-            }
-            case TokenType.END_TAG: {
-                this._handleEndTagToken(token);
-                break;
-            }
-
-            case TokenType.NULL_CHARACTER: {
-                this.skipNextNewLine = false;
-                if (this.inForeignContent) {
-                    token.type = TokenType.CHARACTER;
-                    token.chars = unicode.REPLACEMENT_CHARACTER;
-                }
-                break;
-            }
-            case TokenType.WHITESPACE_CHARACTER: {
-                if (this.skipNextNewLine && token.chars.charCodeAt(0) === unicode.CODE_POINTS.LINE_FEED) {
-                    this.skipNextNewLine = false;
+        if (this.inForeignContent) {
+            this.handler.onCharacter({
+                type: TokenType.CHARACTER,
+                chars: unicode.REPLACEMENT_CHARACTER,
+                location: token.location,
+            });
+        } else {
+            this.handler.onNullCharacter(token);
+        }
+    }
 
-                    if (token.chars.length === 1) {
-                        return this.getNextToken();
-                    }
+    /** @internal */
+    onWhitespaceCharacter(token: CharacterToken): void {
+        if (this.skipNextNewLine && token.chars.charCodeAt(0) === unicode.CODE_POINTS.LINE_FEED) {
+            this.skipNextNewLine = false;
 
-                    token.chars = token.chars.substr(1);
-                }
-                break;
-            }
-            case TokenType.HIBERNATION: {
-                // Ignore
-                break;
-            }
-            default: {
-                this.skipNextNewLine = false;
+            if (token.chars.length === 1) {
+                return;
             }
+
+            token.chars = token.chars.substr(1);
         }
 
-        return token;
+        this.handler.onWhitespaceCharacter(token);
+    }
+
+    /** @internal */
+    onCharacter(token: CharacterToken): void {
+        this.skipNextNewLine = false;
+        this.handler.onCharacter(token);
+    }
+
+    /** @internal */
+    onComment(token: CommentToken): void {
+        this.skipNextNewLine = false;
+        this.handler.onComment(token);
+    }
+
+    /** @internal */
+    onDoctype(token: DoctypeToken): void {
+        this.skipNextNewLine = false;
+        this.handler.onDoctype(token);
+    }
+
+    /** @internal */
+    onEof(token: EOFToken): void {
+        this.skipNextNewLine = false;
+        this.handler.onEof(token);
     }
 
     //Namespace stack mutations
@@ -103,7 +114,8 @@ export class ParserFeedbackSimulator {
         }
     }
 
-    private _handleStartTagToken(token: TagToken): void {
+    /** @internal */
+    onStartTag(token: TagToken): void {
         let tn = token.tagID;
 
         switch (tn) {
@@ -122,24 +134,23 @@ export class ParserFeedbackSimulator {
         if (this.inForeignContent) {
             if (foreignContent.causesExit(token)) {
                 this._leaveCurrentNamespace();
-                return;
-            }
-
-            const currentNs = this.namespaceStack[0];
-
-            if (currentNs === NS.MATHML) {
-                foreignContent.adjustTokenMathMLAttrs(token);
-            } else if (currentNs === NS.SVG) {
-                foreignContent.adjustTokenSVGTagName(token);
-                foreignContent.adjustTokenSVGAttrs(token);
-            }
+            } else {
+                const currentNs = this.namespaceStack[0];
+
+                if (currentNs === NS.MATHML) {
+                    foreignContent.adjustTokenMathMLAttrs(token);
+                } else if (currentNs === NS.SVG) {
+                    foreignContent.adjustTokenSVGTagName(token);
+                    foreignContent.adjustTokenSVGAttrs(token);
+                }
 
-            foreignContent.adjustTokenXMLAttrs(token);
+                foreignContent.adjustTokenXMLAttrs(token);
 
-            tn = token.tagID;
+                tn = token.tagID;
 
-            if (!token.selfClosing && foreignContent.isIntegrationPoint(tn, currentNs, token.attrs)) {
-                this._enterNamespace(NS.HTML);
+                if (!token.selfClosing && foreignContent.isIntegrationPoint(tn, currentNs, token.attrs)) {
+                    this._enterNamespace(NS.HTML);
+                }
             }
         } else {
             switch (tn) {
@@ -160,9 +171,12 @@ export class ParserFeedbackSimulator {
 
             this._ensureTokenizerMode(tn);
         }
+
+        this.handler.onStartTag(token);
     }
 
-    private _handleEndTagToken(token: TagToken): void {
+    /** @internal */
+    onEndTag(token: TagToken): void {
         let tn = token.tagID;
 
         if (!this.inForeignContent) {
@@ -191,5 +205,7 @@ export class ParserFeedbackSimulator {
         if (this.namespaceStack[0] === NS.SVG) {
             foreignContent.adjustTokenSVGTagName(token);
         }
+
+        this.handler.onEndTag(token);
     }
 }
diff --git a/packages/parse5-sax-parser/test/parser-feedback-simulator.test.ts b/packages/parse5-sax-parser/test/parser-feedback-simulator.test.ts
index 1015398a6..858769a6f 100644
--- a/packages/parse5-sax-parser/test/parser-feedback-simulator.test.ts
+++ b/packages/parse5-sax-parser/test/parser-feedback-simulator.test.ts
@@ -1,13 +1,11 @@
-import { Tokenizer } from 'parse5/dist/tokenizer/index.js';
-import type { Token } from 'parse5/dist/common/token.js';
 import { generateTokenizationTests } from 'parse5-test-utils/utils/generate-tokenization-tests.js';
 import { ParserFeedbackSimulator } from '../lib/parser-feedback-simulator.js';
 
 const feedbackPath = new URL('../../../test/data/parser-feedback', import.meta.url);
 
-generateTokenizationTests('ParserFeedbackSimulator', 'ParserFeedbackSimulator', feedbackPath.pathname, () => {
-    const tokenizer = new Tokenizer({});
-    const feedbackSimulator = new ParserFeedbackSimulator(tokenizer);
-
-    return { tokenizer, getNextToken: (): Token => feedbackSimulator.getNextToken() };
-});
+generateTokenizationTests(
+    'ParserFeedbackSimulator',
+    'ParserFeedbackSimulator',
+    feedbackPath.pathname,
+    (handler) => new ParserFeedbackSimulator({}, handler).tokenizer
+);
diff --git a/packages/parse5/lib/parser/index.ts b/packages/parse5/lib/parser/index.ts
index b3045620a..bc60640be 100644
--- a/packages/parse5/lib/parser/index.ts
+++ b/packages/parse5/lib/parser/index.ts
@@ -1,4 +1,5 @@
-import { Tokenizer, TokenizerMode } from '../tokenizer/index.js';
+import { TokenizerMode } from '../tokenizer/index.js';
+import { QueuedTokenizer } from '../tokenizer/queued.js';
 import { OpenElementStack } from './open-element-stack.js';
 import { FormattingElementList, ElementEntry, EntryType } from './formatting-element-list.js';
 import * as defaultTreeAdapter from '../tree-adapters/default.js';
@@ -146,7 +147,7 @@ export class Parser {
 
         this.document = document ?? this.treeAdapter.createDocument();
 
-        this.tokenizer = new Tokenizer(this.options);
+        this.tokenizer = new QueuedTokenizer(this.options);
         this.activeFormattingElements = new FormattingElementList(this.treeAdapter);
 
         this.fragmentContextID = fragmentContext ? getTagID(this.treeAdapter.getTagName(fragmentContext)) : $.UNKNOWN;
@@ -210,8 +211,7 @@ export class Parser {
         return fragment;
     }
 
-    tokenizer: Tokenizer;
-
+    tokenizer: QueuedTokenizer;
     stopped = false;
     insertionMode = InsertionMode.INITIAL;
     originalInsertionMode = InsertionMode.INITIAL;
diff --git a/packages/parse5/lib/tokenizer/index.test.ts b/packages/parse5/lib/tokenizer/index.test.ts
index 1c1a0c67a..f34733077 100644
--- a/packages/parse5/lib/tokenizer/index.test.ts
+++ b/packages/parse5/lib/tokenizer/index.test.ts
@@ -1,24 +1,18 @@
 import * as parse5 from 'parse5';
 import { Tokenizer } from 'parse5/dist/tokenizer/index.js';
-import type { Token } from 'parse5/dist/common/token';
 import { generateTokenizationTests } from 'parse5-test-utils/utils/generate-tokenization-tests.js';
 
 const dataPath = new URL('../../../../test/data/html5lib-tests/tokenizer', import.meta.url);
+const tokenizerOpts = {
+    sourceCodeLocationInfo: true,
+};
 
-generateTokenizationTests('tokenizer', 'Tokenizer', dataPath.pathname, ({ errors }) => {
-    const tokenizer = new Tokenizer({
-        sourceCodeLocationInfo: true,
-        onParseError(err): void {
-            errors.push({
-                code: err.code,
-                line: err.startLine,
-                col: err.startCol,
-            });
-        },
-    });
-
-    return { tokenizer, getNextToken: (): Token => tokenizer.getNextToken() };
-});
+generateTokenizationTests(
+    'tokenizer',
+    'Tokenizer',
+    dataPath.pathname,
+    (handler) => new Tokenizer(tokenizerOpts, handler)
+);
 
 describe('tokenizer', () => {
     it('Regression - `<<` in comment parses correctly (GH-325)', () => {
diff --git a/packages/parse5/lib/tokenizer/index.ts b/packages/parse5/lib/tokenizer/index.ts
index eead9ec90..8968bb9fa 100644
--- a/packages/parse5/lib/tokenizer/index.ts
+++ b/packages/parse5/lib/tokenizer/index.ts
@@ -13,6 +13,7 @@ import {
     CharacterToken,
     DoctypeToken,
     TagToken,
+    EOFToken,
     getTokenAttr,
     CommentToken,
     Attribute,
@@ -53,8 +54,6 @@ const C1_CONTROLS_REFERENCE_REPLACEMENTS = new Map([
     [0x9f, 0x01_78],
 ]);
 
-const HIBERNATION_TOKEN: Token = { type: TokenType.HIBERNATION, location: null };
-
 //States
 const enum State {
     DATA,
@@ -203,11 +202,29 @@ function isScriptDataDoubleEscapeSequenceEnd(cp: number): boolean {
     return isWhitespace(cp) || cp === $.SOLIDUS || cp === $.GREATER_THAN_SIGN;
 }
 
+export interface TokenizerOptions {
+    sourceCodeLocationInfo?: boolean;
+}
+
+export interface TokenHandler {
+    onComment(token: CommentToken): void;
+    onDoctype(token: DoctypeToken): void;
+    onStartTag(token: TagToken): void;
+    onEndTag(token: TagToken): void;
+    onEof(token: EOFToken): void;
+    onCharacter(token: CharacterToken): void;
+    onNullCharacter(token: CharacterToken): void;
+    onWhitespaceCharacter(token: CharacterToken): void;
+
+    onParseError?: ParserErrorHandler | null;
+}
+
 //Tokenizer
 export class Tokenizer {
     public preprocessor: Preprocessor;
 
-    private tokenQueue: Token[] = [];
+    /** Indicates that the next token has been emitted, and `getNextToken` should return. */
+    private hasEmitted = false;
 
     public allowCDATA = false;
     public lastStartTagName = '';
@@ -225,24 +242,19 @@ export class Tokenizer {
     private currentToken: Token | null = null;
     private currentAttr: Attribute = { name: '', value: '' };
 
-    private addLocationInfo;
-    private onParseError;
-
-    constructor(options: { sourceCodeLocationInfo?: boolean; onParseError?: ParserErrorHandler | null }) {
-        this.addLocationInfo = !!options.sourceCodeLocationInfo;
-        this.onParseError = options.onParseError ?? null;
-        this.preprocessor = new Preprocessor(options);
+    constructor(private options: TokenizerOptions, private handler: TokenHandler) {
+        this.preprocessor = new Preprocessor(handler);
         this.currentLocation = this.getCurrentLocation(-1);
     }
 
     //Errors
     private _err(code: ERR): void {
-        this.onParseError?.(this.preprocessor.getError(code));
+        this.handler.onParseError?.(this.preprocessor.getError(code));
     }
 
     // NOTE: `offset` may never run across line boundaries.
     private getCurrentLocation(offset: number): Location | null {
-        if (!this.addLocationInfo) {
+        if (!this.options.sourceCodeLocationInfo) {
             return null;
         }
 
@@ -257,8 +269,9 @@ export class Tokenizer {
     }
 
     //API
-    public getNextToken(): Token {
-        while (this.tokenQueue.length === 0 && this.active) {
+    public getNextToken(): void {
+        this.hasEmitted = false;
+        while (!this.hasEmitted && this.active) {
             this.consumedAfterSnapshot = 0;
 
             const cp = this._consume();
@@ -267,8 +280,6 @@ export class Tokenizer {
                 this._callState(cp);
             }
         }
-
-        return this.tokenQueue.shift()!;
     }
 
     public write(chunk: string, isLastChunk: boolean): void {
@@ -286,7 +297,6 @@ export class Tokenizer {
         if (this.preprocessor.endOfChunkHit) {
             this._unconsume(this.consumedAfterSnapshot);
             this.active = false;
-            this.tokenQueue.push(HIBERNATION_TOKEN);
 
             return true;
         }
@@ -378,18 +388,6 @@ export class Tokenizer {
         };
     }
 
-    private _createEOFToken(): void {
-        const location = this.getCurrentLocation(0);
-
-        if (location) {
-            location.endLine = location.startLine;
-            location.endCol = location.startCol;
-            location.endOffset = location.startOffset;
-        }
-
-        this.currentToken = { type: TokenType.EOF, location };
-    }
-
     //Tag attributes
     private _createAttr(attrNameFirstCh: string): void {
         this.currentAttr = {
@@ -426,46 +424,53 @@ export class Tokenizer {
     }
 
     //Token emission
-    private _emitCurrentToken(): void {
-        const ct = this.currentToken!;
-
+    private prepareToken(ct: Token): void {
         this._emitCurrentCharacterToken(ct.location);
-
         this.currentToken = null;
 
-        //NOTE: store emited start tag's tagName to determine is the following end tag token is appropriate.
-        switch (ct.type) {
-            case TokenType.START_TAG: {
-                ct.tagID = getTagID(ct.tagName);
-                this.lastStartTagName = ct.tagName;
-                break;
-            }
-            case TokenType.END_TAG: {
-                ct.tagID = getTagID(ct.tagName);
-
-                if (ct.attrs.length > 0) {
-                    this._err(ERR.endTagWithAttributes);
-                }
-
-                if (ct.selfClosing) {
-                    this._err(ERR.endTagWithTrailingSolidus);
-                }
-                break;
-            }
-            default:
-            // Do nothing
-        }
-
-        if (ct.location && ct.type !== TokenType.EOF) {
+        if (ct.location) {
             ct.location.endLine = this.preprocessor.line;
             ct.location.endCol = this.preprocessor.col + 1;
             ct.location.endOffset = this.preprocessor.offset + 1;
         }
 
-        this.tokenQueue.push(ct);
+        this.hasEmitted = true;
         this.currentLocation = this.getCurrentLocation(-1);
     }
 
+    private emitCurrentTagToken(): void {
+        const ct = this.currentToken as TagToken;
+
+        this.prepareToken(ct);
+
+        ct.tagID = getTagID(ct.tagName);
+
+        if (ct.type === TokenType.START_TAG) {
+            this.lastStartTagName = ct.tagName;
+            this.handler.onStartTag(ct);
+        } else {
+            if (ct.attrs.length > 0) {
+                this._err(ERR.endTagWithAttributes);
+            }
+
+            if (ct.selfClosing) {
+                this._err(ERR.endTagWithTrailingSolidus);
+            }
+
+            this.handler.onEndTag(ct);
+        }
+    }
+
+    private emitCurrentComment(ct: CommentToken): void {
+        this.prepareToken(ct);
+        this.handler.onComment(ct);
+    }
+
+    private emitCurrentDoctype(ct: DoctypeToken): void {
+        this.prepareToken(ct);
+        this.handler.onDoctype(ct);
+    }
+
     private _emitCurrentCharacterToken(nextLocation: Location | null): void {
         if (this.currentCharacterToken) {
             //NOTE: if we have pending character token make it's end location equal to the
@@ -476,14 +481,38 @@ export class Tokenizer {
                 this.currentCharacterToken.location.endOffset = nextLocation.startOffset;
             }
 
-            this.tokenQueue.push(this.currentCharacterToken);
+            switch (this.currentCharacterToken.type) {
+                case TokenType.CHARACTER: {
+                    this.handler.onCharacter(this.currentCharacterToken);
+                    break;
+                }
+                case TokenType.NULL_CHARACTER: {
+                    this.handler.onNullCharacter(this.currentCharacterToken);
+                    break;
+                }
+                case TokenType.WHITESPACE_CHARACTER: {
+                    this.handler.onWhitespaceCharacter(this.currentCharacterToken);
+                    break;
+                }
+            }
+
+            this.hasEmitted = true;
             this.currentCharacterToken = null;
         }
     }
 
     private _emitEOFToken(): void {
-        this._createEOFToken();
-        this._emitCurrentToken();
+        const location = this.getCurrentLocation(0);
+
+        if (location) {
+            location.endLine = location.startLine;
+            location.endCol = location.startCol;
+            location.endOffset = location.startOffset;
+        }
+
+        this._emitCurrentCharacterToken(location);
+        this.handler.onEof({ type: TokenType.EOF, location });
+        this.hasEmitted = true;
     }
 
     //Characters emission
@@ -497,16 +526,17 @@ export class Tokenizer {
     //2)TokenType.WHITESPACE_CHARACTER - any whitespace/new-line character sequences (e.g. '\n  \r\t   \f')
     //3)TokenType.CHARACTER - any character sequence which don't belong to groups 1 and 2 (e.g. 'abcdef1234@@#$%^')
     private _appendCharToCurrentCharacterToken(type: CharacterToken['type'], ch: string): void {
-        if (this.currentCharacterToken && this.currentCharacterToken.type !== type) {
-            this.currentLocation = this.getCurrentLocation(0);
-            this._emitCurrentCharacterToken(this.currentLocation);
-        }
-
         if (this.currentCharacterToken) {
-            this.currentCharacterToken.chars += ch;
-        } else {
-            this._createCharacterToken(type, ch);
+            if (this.currentCharacterToken.type !== type) {
+                this.currentLocation = this.getCurrentLocation(0);
+                this._emitCurrentCharacterToken(this.currentLocation);
+            } else {
+                this.currentCharacterToken.chars += ch;
+                return;
+            }
         }
+
+        this._createCharacterToken(type, ch);
     }
 
     private _emitCodePoint(cp: number): void {
@@ -1142,7 +1172,7 @@ export class Tokenizer {
             }
             case $.GREATER_THAN_SIGN: {
                 this.state = State.DATA;
-                this._emitCurrentToken();
+                this.emitCurrentTagToken();
                 break;
             }
             case $.NULL: {
@@ -1213,7 +1243,7 @@ export class Tokenizer {
             }
             case $.GREATER_THAN_SIGN: {
                 this._advanceBy(this.lastStartTagName.length);
-                this._emitCurrentToken();
+                this.emitCurrentTagToken();
                 this.state = State.DATA;
                 return false;
             }
@@ -1710,7 +1740,7 @@ export class Tokenizer {
             }
             case $.GREATER_THAN_SIGN: {
                 this.state = State.DATA;
-                this._emitCurrentToken();
+                this.emitCurrentTagToken();
                 break;
             }
             case $.EOF: {
@@ -1748,7 +1778,7 @@ export class Tokenizer {
             case $.GREATER_THAN_SIGN: {
                 this._err(ERR.missingAttributeValue);
                 this.state = State.DATA;
-                this._emitCurrentToken();
+                this.emitCurrentTagToken();
                 break;
             }
             default: {
@@ -1836,7 +1866,7 @@ export class Tokenizer {
             case $.GREATER_THAN_SIGN: {
                 this._leaveAttrValue();
                 this.state = State.DATA;
-                this._emitCurrentToken();
+                this.emitCurrentTagToken();
                 break;
             }
             case $.NULL: {
@@ -1884,7 +1914,7 @@ export class Tokenizer {
             case $.GREATER_THAN_SIGN: {
                 this._leaveAttrValue();
                 this.state = State.DATA;
-                this._emitCurrentToken();
+                this.emitCurrentTagToken();
                 break;
             }
             case $.EOF: {
@@ -1905,9 +1935,10 @@ export class Tokenizer {
     private _stateSelfClosingStartTag(cp: number): void {
         switch (cp) {
             case $.GREATER_THAN_SIGN: {
-                (this.currentToken as TagToken).selfClosing = true;
+                const token = this.currentToken as TagToken;
+                token.selfClosing = true;
                 this.state = State.DATA;
-                this._emitCurrentToken();
+                this.emitCurrentTagToken();
                 break;
             }
             case $.EOF: {
@@ -1931,11 +1962,11 @@ export class Tokenizer {
         switch (cp) {
             case $.GREATER_THAN_SIGN: {
                 this.state = State.DATA;
-                this._emitCurrentToken();
+                this.emitCurrentComment(token);
                 break;
             }
             case $.EOF: {
-                this._emitCurrentToken();
+                this.emitCurrentComment(token);
                 this._emitEOFToken();
                 break;
             }
@@ -1992,7 +2023,8 @@ export class Tokenizer {
             case $.GREATER_THAN_SIGN: {
                 this._err(ERR.abruptClosingOfEmptyComment);
                 this.state = State.DATA;
-                this._emitCurrentToken();
+                const token = this.currentToken as CommentToken;
+                this.emitCurrentComment(token);
                 break;
             }
             default: {
@@ -2005,6 +2037,7 @@ export class Tokenizer {
     // Comment start dash state
     //------------------------------------------------------------------
     private _stateCommentStartDash(cp: number): void {
+        const token = this.currentToken as CommentToken;
         switch (cp) {
             case $.HYPHEN_MINUS: {
                 this.state = State.COMMENT_END;
@@ -2013,17 +2046,17 @@ export class Tokenizer {
             case $.GREATER_THAN_SIGN: {
                 this._err(ERR.abruptClosingOfEmptyComment);
                 this.state = State.DATA;
-                this._emitCurrentToken();
+                this.emitCurrentComment(token);
                 break;
             }
             case $.EOF: {
                 this._err(ERR.eofInComment);
-                this._emitCurrentToken();
+                this.emitCurrentComment(token);
                 this._emitEOFToken();
                 break;
             }
             default: {
-                (this.currentToken as CommentToken).data += '-';
+                token.data += '-';
                 this.state = State.COMMENT;
                 this._stateComment(cp);
             }
@@ -2052,7 +2085,7 @@ export class Tokenizer {
             }
             case $.EOF: {
                 this._err(ERR.eofInComment);
-                this._emitCurrentToken();
+                this.emitCurrentComment(token);
                 this._emitEOFToken();
                 break;
             }
@@ -2120,6 +2153,7 @@ export class Tokenizer {
     // Comment end dash state
     //------------------------------------------------------------------
     private _stateCommentEndDash(cp: number): void {
+        const token = this.currentToken as CommentToken;
         switch (cp) {
             case $.HYPHEN_MINUS: {
                 this.state = State.COMMENT_END;
@@ -2127,12 +2161,12 @@ export class Tokenizer {
             }
             case $.EOF: {
                 this._err(ERR.eofInComment);
-                this._emitCurrentToken();
+                this.emitCurrentComment(token);
                 this._emitEOFToken();
                 break;
             }
             default: {
-                (this.currentToken as CommentToken).data += '-';
+                token.data += '-';
                 this.state = State.COMMENT;
                 this._stateComment(cp);
             }
@@ -2147,7 +2181,7 @@ export class Tokenizer {
         switch (cp) {
             case $.GREATER_THAN_SIGN: {
                 this.state = State.DATA;
-                this._emitCurrentToken();
+                this.emitCurrentComment(token);
                 break;
             }
             case $.EXCLAMATION_MARK: {
@@ -2160,7 +2194,7 @@ export class Tokenizer {
             }
             case $.EOF: {
                 this._err(ERR.eofInComment);
-                this._emitCurrentToken();
+                this.emitCurrentComment(token);
                 this._emitEOFToken();
                 break;
             }
@@ -2186,12 +2220,12 @@ export class Tokenizer {
             case $.GREATER_THAN_SIGN: {
                 this._err(ERR.incorrectlyClosedComment);
                 this.state = State.DATA;
-                this._emitCurrentToken();
+                this.emitCurrentComment(token);
                 break;
             }
             case $.EOF: {
                 this._err(ERR.eofInComment);
-                this._emitCurrentToken();
+                this.emitCurrentComment(token);
                 this._emitEOFToken();
                 break;
             }
@@ -2222,8 +2256,9 @@ export class Tokenizer {
             case $.EOF: {
                 this._err(ERR.eofInDoctype);
                 this._createDoctypeToken(null);
-                (this.currentToken as DoctypeToken).forceQuirks = true;
-                this._emitCurrentToken();
+                const token = this.currentToken as DoctypeToken;
+                token.forceQuirks = true;
+                this.emitCurrentDoctype(token);
                 this._emitEOFToken();
                 break;
             }
@@ -2259,16 +2294,18 @@ export class Tokenizer {
                 case $.GREATER_THAN_SIGN: {
                     this._err(ERR.missingDoctypeName);
                     this._createDoctypeToken(null);
-                    (this.currentToken as DoctypeToken).forceQuirks = true;
-                    this._emitCurrentToken();
+                    const token = this.currentToken as DoctypeToken;
+                    token.forceQuirks = true;
+                    this.emitCurrentDoctype(token);
                     this.state = State.DATA;
                     break;
                 }
                 case $.EOF: {
                     this._err(ERR.eofInDoctype);
                     this._createDoctypeToken(null);
-                    (this.currentToken as DoctypeToken).forceQuirks = true;
-                    this._emitCurrentToken();
+                    const token = this.currentToken as DoctypeToken;
+                    token.forceQuirks = true;
+                    this.emitCurrentDoctype(token);
                     this._emitEOFToken();
                     break;
                 }
@@ -2294,7 +2331,7 @@ export class Tokenizer {
             }
             case $.GREATER_THAN_SIGN: {
                 this.state = State.DATA;
-                this._emitCurrentToken();
+                this.emitCurrentDoctype(token);
                 break;
             }
             case $.NULL: {
@@ -2305,7 +2342,7 @@ export class Tokenizer {
             case $.EOF: {
                 this._err(ERR.eofInDoctype);
                 token.forceQuirks = true;
-                this._emitCurrentToken();
+                this.emitCurrentDoctype(token);
                 this._emitEOFToken();
                 break;
             }
@@ -2330,13 +2367,13 @@ export class Tokenizer {
             }
             case $.GREATER_THAN_SIGN: {
                 this.state = State.DATA;
-                this._emitCurrentToken();
+                this.emitCurrentDoctype(token);
                 break;
             }
             case $.EOF: {
                 this._err(ERR.eofInDoctype);
                 token.forceQuirks = true;
-                this._emitCurrentToken();
+                this.emitCurrentDoctype(token);
                 this._emitEOFToken();
                 break;
             }
@@ -2386,13 +2423,13 @@ export class Tokenizer {
                 this._err(ERR.missingDoctypePublicIdentifier);
                 token.forceQuirks = true;
                 this.state = State.DATA;
-                this._emitCurrentToken();
+                this.emitCurrentDoctype(token);
                 break;
             }
             case $.EOF: {
                 this._err(ERR.eofInDoctype);
                 token.forceQuirks = true;
-                this._emitCurrentToken();
+                this.emitCurrentDoctype(token);
                 this._emitEOFToken();
                 break;
             }
@@ -2432,13 +2469,13 @@ export class Tokenizer {
                 this._err(ERR.missingDoctypePublicIdentifier);
                 token.forceQuirks = true;
                 this.state = State.DATA;
-                this._emitCurrentToken();
+                this.emitCurrentDoctype(token);
                 break;
             }
             case $.EOF: {
                 this._err(ERR.eofInDoctype);
                 token.forceQuirks = true;
-                this._emitCurrentToken();
+                this.emitCurrentDoctype(token);
                 this._emitEOFToken();
                 break;
             }
@@ -2469,14 +2506,14 @@ export class Tokenizer {
             case $.GREATER_THAN_SIGN: {
                 this._err(ERR.abruptDoctypePublicIdentifier);
                 token.forceQuirks = true;
-                this._emitCurrentToken();
+                this.emitCurrentDoctype(token);
                 this.state = State.DATA;
                 break;
             }
             case $.EOF: {
                 this._err(ERR.eofInDoctype);
                 token.forceQuirks = true;
-                this._emitCurrentToken();
+                this.emitCurrentDoctype(token);
                 this._emitEOFToken();
                 break;
             }
@@ -2504,14 +2541,14 @@ export class Tokenizer {
             case $.GREATER_THAN_SIGN: {
                 this._err(ERR.abruptDoctypePublicIdentifier);
                 token.forceQuirks = true;
-                this._emitCurrentToken();
+                this.emitCurrentDoctype(token);
                 this.state = State.DATA;
                 break;
             }
             case $.EOF: {
                 this._err(ERR.eofInDoctype);
                 token.forceQuirks = true;
-                this._emitCurrentToken();
+                this.emitCurrentDoctype(token);
                 this._emitEOFToken();
                 break;
             }
@@ -2536,7 +2573,7 @@ export class Tokenizer {
             }
             case $.GREATER_THAN_SIGN: {
                 this.state = State.DATA;
-                this._emitCurrentToken();
+                this.emitCurrentDoctype(token);
                 break;
             }
             case $.QUOTATION_MARK: {
@@ -2554,7 +2591,7 @@ export class Tokenizer {
             case $.EOF: {
                 this._err(ERR.eofInDoctype);
                 token.forceQuirks = true;
-                this._emitCurrentToken();
+                this.emitCurrentDoctype(token);
                 this._emitEOFToken();
                 break;
             }
@@ -2581,7 +2618,7 @@ export class Tokenizer {
                 break;
             }
             case $.GREATER_THAN_SIGN: {
-                this._emitCurrentToken();
+                this.emitCurrentDoctype(token);
                 this.state = State.DATA;
                 break;
             }
@@ -2598,7 +2635,7 @@ export class Tokenizer {
             case $.EOF: {
                 this._err(ERR.eofInDoctype);
                 token.forceQuirks = true;
-                this._emitCurrentToken();
+                this.emitCurrentDoctype(token);
                 this._emitEOFToken();
                 break;
             }
@@ -2640,13 +2677,13 @@ export class Tokenizer {
                 this._err(ERR.missingDoctypeSystemIdentifier);
                 token.forceQuirks = true;
                 this.state = State.DATA;
-                this._emitCurrentToken();
+                this.emitCurrentDoctype(token);
                 break;
             }
             case $.EOF: {
                 this._err(ERR.eofInDoctype);
                 token.forceQuirks = true;
-                this._emitCurrentToken();
+                this.emitCurrentDoctype(token);
                 this._emitEOFToken();
                 break;
             }
@@ -2686,13 +2723,13 @@ export class Tokenizer {
                 this._err(ERR.missingDoctypeSystemIdentifier);
                 token.forceQuirks = true;
                 this.state = State.DATA;
-                this._emitCurrentToken();
+                this.emitCurrentDoctype(token);
                 break;
             }
             case $.EOF: {
                 this._err(ERR.eofInDoctype);
                 token.forceQuirks = true;
-                this._emitCurrentToken();
+                this.emitCurrentDoctype(token);
                 this._emitEOFToken();
                 break;
             }
@@ -2723,14 +2760,14 @@ export class Tokenizer {
             case $.GREATER_THAN_SIGN: {
                 this._err(ERR.abruptDoctypeSystemIdentifier);
                 token.forceQuirks = true;
-                this._emitCurrentToken();
+                this.emitCurrentDoctype(token);
                 this.state = State.DATA;
                 break;
             }
             case $.EOF: {
                 this._err(ERR.eofInDoctype);
                 token.forceQuirks = true;
-                this._emitCurrentToken();
+                this.emitCurrentDoctype(token);
                 this._emitEOFToken();
                 break;
             }
@@ -2758,14 +2795,14 @@ export class Tokenizer {
             case $.GREATER_THAN_SIGN: {
                 this._err(ERR.abruptDoctypeSystemIdentifier);
                 token.forceQuirks = true;
-                this._emitCurrentToken();
+                this.emitCurrentDoctype(token);
                 this.state = State.DATA;
                 break;
             }
             case $.EOF: {
                 this._err(ERR.eofInDoctype);
                 token.forceQuirks = true;
-                this._emitCurrentToken();
+                this.emitCurrentDoctype(token);
                 this._emitEOFToken();
                 break;
             }
@@ -2789,14 +2826,14 @@ export class Tokenizer {
                 break;
             }
             case $.GREATER_THAN_SIGN: {
-                this._emitCurrentToken();
+                this.emitCurrentDoctype(token);
                 this.state = State.DATA;
                 break;
             }
             case $.EOF: {
                 this._err(ERR.eofInDoctype);
                 token.forceQuirks = true;
-                this._emitCurrentToken();
+                this.emitCurrentDoctype(token);
                 this._emitEOFToken();
                 break;
             }
@@ -2811,9 +2848,11 @@ export class Tokenizer {
     // Bogus DOCTYPE state
     //------------------------------------------------------------------
     private _stateBogusDoctype(cp: number): void {
+        const token = this.currentToken as DoctypeToken;
+
         switch (cp) {
             case $.GREATER_THAN_SIGN: {
-                this._emitCurrentToken();
+                this.emitCurrentDoctype(token);
                 this.state = State.DATA;
                 break;
             }
@@ -2822,7 +2861,7 @@ export class Tokenizer {
                 break;
             }
             case $.EOF: {
-                this._emitCurrentToken();
+                this.emitCurrentDoctype(token);
                 this._emitEOFToken();
                 break;
             }
diff --git a/packages/parse5/lib/tokenizer/preprocessor.ts b/packages/parse5/lib/tokenizer/preprocessor.ts
index 7cb6d1ec3..7dbc82193 100644
--- a/packages/parse5/lib/tokenizer/preprocessor.ts
+++ b/packages/parse5/lib/tokenizer/preprocessor.ts
@@ -30,11 +30,7 @@ export class Preprocessor {
     public droppedBufferSize = 0;
     public line = 1;
 
-    onParseError: ParserErrorHandler | null;
-
-    constructor(options: { onParseError?: ParserErrorHandler | null }) {
-        this.onParseError = options.onParseError ?? null;
-    }
+    constructor(private handler: { onParseError?: ParserErrorHandler | null }) {}
 
     /** The column on the current line. If we just saw a gap (eg. a surrogate pair), return the index before. */
     public get col(): number {
@@ -62,9 +58,9 @@ export class Preprocessor {
     //NOTE: avoid reporting error twice on advance/retreat
     private lastErrOffset = -1;
     private _err(code: ERR): void {
-        if (this.onParseError && this.lastErrOffset !== this.offset) {
+        if (this.handler.onParseError && this.lastErrOffset !== this.offset) {
             this.lastErrOffset = this.offset;
-            this.onParseError(this.getError(code));
+            this.handler.onParseError(this.getError(code));
         }
     }
 
@@ -210,7 +206,7 @@ export class Preprocessor {
         //range (ASCII alphanumeric, whitespaces, big chunk of BMP)
         //before going into detailed performance cost validation.
         const isCommonValidRange =
-            this.onParseError === null ||
+            this.handler.onParseError === null ||
             (cp > 0x1f && cp < 0x7f) ||
             cp === $.LINE_FEED ||
             cp === $.CARRIAGE_RETURN ||
diff --git a/packages/parse5/lib/tokenizer/queued.ts b/packages/parse5/lib/tokenizer/queued.ts
new file mode 100644
index 000000000..6ac8178a2
--- /dev/null
+++ b/packages/parse5/lib/tokenizer/queued.ts
@@ -0,0 +1,105 @@
+import { TokenType, Token, CharacterToken, DoctypeToken, TagToken, EOFToken, CommentToken } from '../common/token.js';
+import { TokenHandler, Tokenizer, TokenizerOptions, TokenizerMode } from './index.js';
+import type { ParserErrorHandler } from '../common/error-codes.js';
+import type { Preprocessor } from './preprocessor.js';
+
+const HIBERNATION_TOKEN: Token = { type: TokenType.HIBERNATION, location: null };
+
+/** A token handler implemnetation that calls the same function for all tokens. */
+export abstract class SinglePathHandler implements TokenHandler {
+    protected abstract handleToken(token: Token): void;
+
+    onComment(token: CommentToken): void {
+        this.handleToken(token);
+    }
+    onDoctype(token: DoctypeToken): void {
+        this.handleToken(token);
+    }
+    onStartTag(token: TagToken): void {
+        this.handleToken(token);
+    }
+    onEndTag(token: TagToken): void {
+        this.handleToken(token);
+    }
+    onEof(token: EOFToken): void {
+        this.handleToken(token);
+    }
+    onCharacter(token: CharacterToken): void {
+        this.handleToken(token);
+    }
+    onNullCharacter(token: CharacterToken): void {
+        this.handleToken(token);
+    }
+    onWhitespaceCharacter(token: CharacterToken): void {
+        this.handleToken(token);
+    }
+}
+
+class QueuedHandler extends SinglePathHandler {
+    private tokenQueue: Token[] = [];
+
+    protected handleToken(token: Token): void {
+        this.tokenQueue.push(token);
+    }
+
+    constructor(public onParseError: ParserErrorHandler | null) {
+        super();
+    }
+
+    public getNextToken(tokenizer: Tokenizer): Token {
+        while (this.tokenQueue.length === 0 && tokenizer.active) {
+            tokenizer.getNextToken();
+        }
+
+        if (this.tokenQueue.length === 0 && !tokenizer.active) {
+            this.tokenQueue.push(HIBERNATION_TOKEN);
+        }
+
+        return this.tokenQueue.shift()!;
+    }
+}
+
+export interface QueuedTokenizerOptions extends TokenizerOptions {
+    onParseError?: ParserErrorHandler | null;
+}
+
+/**
+ * Provides the same interface as the old tokenizer, while allowing users to
+ * read data one token at a time.
+ */
+export class QueuedTokenizer {
+    private tokenizer: Tokenizer;
+    private handler: QueuedHandler;
+
+    constructor(options: QueuedTokenizerOptions) {
+        this.handler = new QueuedHandler(options.onParseError ?? null);
+        this.tokenizer = new Tokenizer(options, this.handler);
+    }
+
+    set allowCDATA(val: boolean) {
+        this.tokenizer.allowCDATA = val;
+    }
+
+    get preprocessor(): Preprocessor {
+        return this.tokenizer.preprocessor;
+    }
+    get active(): boolean {
+        return this.tokenizer.active;
+    }
+
+    set state(val: typeof TokenizerMode[keyof typeof TokenizerMode]) {
+        this.tokenizer.state = val;
+    }
+
+    public write(chunk: string, isLastChunk: boolean): void {
+        this.tokenizer.write(chunk, isLastChunk);
+    }
+
+    public insertHtmlAtCurrentPos(str: string): void {
+        this.tokenizer.insertHtmlAtCurrentPos(str);
+    }
+
+    public getNextToken(): Token {
+        return this.handler.getNextToken(this.tokenizer);
+    }
+}
diff --git a/packages/parse5/lib/tokenizer/tokenizer-location-info.test.ts b/packages/parse5/lib/tokenizer/tokenizer-location-info.test.ts
index e45a464ce..fe486a0c8 100644
--- a/packages/parse5/lib/tokenizer/tokenizer-location-info.test.ts
+++ b/packages/parse5/lib/tokenizer/tokenizer-location-info.test.ts
@@ -1,8 +1,61 @@
 import * as assert from 'node:assert';
 import { Tokenizer, TokenizerMode } from './index.js';
-import { TokenType } from '../common/token.js';
+import { SinglePathHandler } from './queued.js';
+import { Location, EOFToken, Token } from '../common/token.js';
 import { getSubstringByLineCol, normalizeNewLine } from 'parse5-test-utils/utils/common.js';
 
+interface LocationInfoTestCase {
+    initialMode: typeof TokenizerMode[keyof typeof TokenizerMode];
+    lastStartTagName: string;
+    htmlChunks: string[];
+}
+
+/** Receives events and immediately compares them against the expected values. */
+class LocationInfoHandler extends SinglePathHandler {
+    public sawEof = false;
+    /** The index of the last html chunk. */
+    private idx = 0;
+    /** All of the lines in the input. */
+    private lines: string[];
+
+    constructor(private testCase: LocationInfoTestCase, private html: string) {
+        super();
+        this.lines = html.split(/\r?\n/g);
+    }
+
+    protected handleToken(token: Token): void {
+        this.validateLocation(token.location);
+    }
+
+    private validateLocation(location: Location | null): void {
+        assert.ok(location);
+
+        //Offsets
+        const actual = this.html.substring(location.startOffset, location.endOffset);
+        const chunk = this.testCase.htmlChunks[this.idx];
+
+        assert.strictEqual(actual, chunk);
+
+        //Line/col
+        const line = getSubstringByLineCol(this.lines, location);
+        const expected = normalizeNewLine(chunk);
+
+        assert.strictEqual(line, expected);
+
+        this.idx += 1;
+    }
+
+    override onEof({ location }: EOFToken): void {
+        assert.ok(location);
+        assert.strictEqual(location.endOffset, location.startOffset);
+        assert.strictEqual(location.endOffset, this.html.length);
+
+        assert.strictEqual(this.idx, this.testCase.htmlChunks.length);
+
+        this.sawEof = true;
+    }
+}
+
 it('Location Info (Tokenizer)', () => {
     const testCases = [
         {
@@ -99,8 +152,8 @@ it('Location Info (Tokenizer)', () => {
 
     for (const testCase of testCases) {
         const html = testCase.htmlChunks.join('');
-        const lines = html.split(/\r?\n/g);
-        const tokenizer = new Tokenizer({ sourceCodeLocationInfo: true });
+        const handler = new LocationInfoHandler(testCase, html);
+        const tokenizer = new Tokenizer({ sourceCodeLocationInfo: true }, handler);
         const lastChunkIdx = testCase.htmlChunks.length - 1;
 
         for (let i = 0; i < testCase.htmlChunks.length; i++) {
@@ -113,27 +166,8 @@ it('Location Info (Tokenizer)', () => {
         tokenizer.lastStartTagName = testCase.lastStartTagName;
         tokenizer.allowCDATA = !!testCase.allowCDATA;
 
-        for (let token = tokenizer.getNextToken(), j = 0; token.type !== TokenType.EOF; ) {
-            if (token.type === TokenType.HIBERNATION) {
-                continue;
-            }
-
-            assert.ok(token.location);
-
-            //Offsets
-            let actual = html.substring(token.location.startOffset, token.location.endOffset);
-
-            assert.strictEqual(actual, testCase.htmlChunks[j]);
-
-            //Line/col
-            actual = getSubstringByLineCol(lines, token.location);
-
-            const expected = normalizeNewLine(testCase.htmlChunks[j]);
-
-            assert.strictEqual(actual, expected);
-
-            token = tokenizer.getNextToken();
-            j++;
+        while (!handler.sawEof) {
+            tokenizer.getNextToken();
         }
     }
 });
diff --git a/scripts/generate-parser-feedback-test/index.ts b/scripts/generate-parser-feedback-test/index.ts
index 0145790ed..c0e3ca815 100644
--- a/scripts/generate-parser-feedback-test/index.ts
+++ b/scripts/generate-parser-feedback-test/index.ts
@@ -1,12 +1,12 @@
 import { readFile, writeFile } from 'node:fs/promises';
 import { basename } from 'node:path';
-import { Parser } from '../../packages/parse5/dist/parser/index.js';
-import * as defaultTreeAdapter from '../../packages/parse5/dist/tree-adapters/default.js';
-import { convertTokenToHtml5Lib } from 'parse5-test-utils/utils/generate-tokenization-tests.js';
+import { Parser } from 'parse5/dist/parser/index.js';
+import * as defaultTreeAdapter from 'parse5/dist/tree-adapters/default.js';
+import { HtmlLibToken } from 'parse5-test-utils/utils/generate-tokenization-tests.js';
 import { parseDatFile } from 'parse5-test-utils/utils/parse-dat-file.js';
 import { addSlashes } from 'parse5-test-utils/utils/common.js';
-import { TokenType, Token } from '../../packages/parse5/dist/common/token.js';
-import type { TreeAdapterTypeMap } from '../../packages/parse5/dist/tree-adapters/interface.js';
+import { TokenType, Token } from 'parse5/dist/common/token.js';
+import type { TreeAdapterTypeMap } from 'parse5/dist/tree-adapters/interface.js';
 
 // eslint-disable-next-line no-console
 main().catch(console.error);
@@ -41,7 +41,41 @@ function appendToken(dest: Token[], token: Token): void {
     dest.push(token);
 }
 
-function collectParserTokens(html: string): ReturnType[] {
+function convertTokenToHtml5Lib(token: Token): HtmlLibToken {
+    switch (token.type) {
+        case TokenType.CHARACTER:
+        case TokenType.NULL_CHARACTER:
+        case TokenType.WHITESPACE_CHARACTER:
+            return ['Character', token.chars];
+
+        case TokenType.START_TAG: {
+            const reformatedAttrs = Object.fromEntries(token.attrs.map(({ name, value }) => [name, value]));
+            const startTagEntry: HtmlLibToken = ['StartTag', token.tagName, reformatedAttrs];
+
+            if (token.selfClosing) {
+                startTagEntry.push(true);
+            }
+
+            return startTagEntry;
+        }
+
+        case TokenType.END_TAG:
+            // NOTE: parser feedback simulator can produce adjusted SVG
+            // tag names for end tag tokens so we need to lower case it
+            return ['EndTag', token.tagName.toLowerCase()];
+
+        case TokenType.COMMENT:
+            return ['Comment', token.data];
+
+        case TokenType.DOCTYPE:
+            return ['DOCTYPE', token.name, token.publicId, token.systemId, !token.forceQuirks];
+
+        default:
+            throw new TypeError(`Unrecognized token type: ${token.type}`);
+    }
+}
+
+function collectParserTokens(html: string): HtmlLibToken[] {
     const tokens: Token[] = [];
 
     class ExtendedParser extends Parser {
diff --git a/test/utils/generate-tokenization-tests.ts b/test/utils/generate-tokenization-tests.ts
index fb3dc8481..5b9f4c59e 100644
--- a/test/utils/generate-tokenization-tests.ts
+++ b/test/utils/generate-tokenization-tests.ts
@@ -1,45 +1,12 @@
 import * as assert from 'node:assert';
 import * as fs from 'node:fs';
 import * as path from 'node:path';
-import { Tokenizer, TokenizerMode } from 'parse5/dist/tokenizer/index.js';
+import { type Tokenizer, TokenizerMode, type TokenHandler } from 'parse5/dist/tokenizer/index.js';
 import { makeChunks } from './common.js';
-import { TokenType, Token } from 'parse5/dist/common/token.js';
+import type { CommentToken, DoctypeToken, TagToken, CharacterToken } from 'parse5/dist/common/token.js';
+import type { ParserError } from 'parse5/dist/common/error-codes.js';
 
-type HtmlLibToken = [string, string | null, ...unknown[]];
-
-export function convertTokenToHtml5Lib(token: Token): HtmlLibToken {
-    switch (token.type) {
-        case TokenType.CHARACTER:
-        case TokenType.NULL_CHARACTER:
-        case TokenType.WHITESPACE_CHARACTER:
-            return ['Character', token.chars];
-
-        case TokenType.START_TAG: {
-            const reformatedAttrs = Object.fromEntries(token.attrs.map(({ name, value }) => [name, value]));
-            const startTagEntry: HtmlLibToken = ['StartTag', token.tagName, reformatedAttrs];
-
-            if (token.selfClosing) {
-                startTagEntry.push(true);
-            }
-
-            return startTagEntry;
-        }
-
-        case TokenType.END_TAG:
-            // NOTE: parser feedback simulator can produce adjusted SVG
-            // tag names for end tag tokens so we need to lower case it
-            return ['EndTag', token.tagName.toLowerCase()];
-
-        case TokenType.COMMENT:
-            return ['Comment', token.data];
-
-        case TokenType.DOCTYPE:
-            return ['DOCTYPE', token.name, token.publicId, token.systemId, !token.forceQuirks];
-
-        default:
-            throw new TypeError(`Unrecognized token type: ${token.type}`);
-    }
-}
+export type HtmlLibToken = [string, string | null, ...unknown[]];
 
 interface TokenError {
     code: string;
@@ -47,44 +14,122 @@ interface TokenError {
     col: number;
 }
 
+const TestsWithBrokenErrors: Record = {
+    /*
+     * 57.entities has an error that is not part of the test data.
+     *
+     * TODO: Move this to the test data.
+     */
+    'Undefined named entity in attribute value ending in semicolon and whose name starts with a known entity name.': [
+        { code: 'unknown-named-character-reference', col: 12, line: 1 },
+    ],
+};
+
 interface TokenSourceData {
     tokens: HtmlLibToken[];
     errors: TokenError[];
 }
 
-type TokenSourceCreator = (data: TokenSourceData) => {
-    tokenizer: Tokenizer;
-    getNextToken: () => Token;
-};
+type TokenSourceCreator = (data: TokenizeHandler) => Tokenizer;
+
+/** Receives events and immediately compares them against the expected values. We check the entire output again at the end. */
+class TokenizeHandler implements TokenSourceData, TokenHandler {
+    constructor(private testData: LoadedTest) {}
+
+    private addToken(token: HtmlLibToken): void {
+        assert.deepStrictEqual(token, this.testData.expected[this.tokens.length]);
+
+        this.tokens.push(token);
+    }
+
+    onComment(token: CommentToken): void {
+        this.addToken(['Comment', token.data]);
+    }
+    onDoctype(token: DoctypeToken): void {
+        this.addToken(['DOCTYPE', token.name, token.publicId, token.systemId, !token.forceQuirks]);
+    }
+    onStartTag(token: TagToken): void {
+        const reformatedAttrs = Object.fromEntries(token.attrs.map(({ name, value }) => [name, value]));
+        const startTagEntry: HtmlLibToken = ['StartTag', token.tagName, reformatedAttrs];
+
+        if (token.selfClosing) {
+            startTagEntry.push(true);
+        }
+
+        this.addToken(startTagEntry);
+    }
+    onEndTag(token: TagToken): void {
+        // NOTE: parser feedback simulator can produce adjusted SVG
+        // tag names for end tag tokens so we need to lower case it
+        this.addToken(['EndTag', token.tagName.toLowerCase()]);
+    }
+    onEof(): void {
+        this.sawEof = true;
+    }
+    onCharacter(token: CharacterToken): void {
+        const lastEntry = this.tokens[this.tokens.length - 1];
+
+        if (lastEntry && lastEntry[0] === 'Character' && lastEntry[1] != null) {
+            lastEntry[1] += token.chars;
+        } else {
+            this.tokens.push(['Character', token.chars]);
+        }
+
+        const actual = this.tokens[this.tokens.length - 1];
+        const expected = this.testData.expected[this.tokens.length - 1];
+        assert.strictEqual('Character', expected[0]);
+        assert.ok(typeof actual[1] === 'string');
+        assert.ok(expected[1]?.startsWith(actual[1]));
+    }
+    onNullCharacter(token: CharacterToken): void {
+        this.onCharacter(token);
+    }
+    onWhitespaceCharacter(token: CharacterToken): void {
+        this.onCharacter(token);
+    }
+    onParseError(err: ParserError): void {
+        assert.ok(
+            this.testData.expectedErrors.some(
+                ({ code, line, col }) => code === err.code && line === err.startLine && col === err.startCol
+            )
+        );
+
+        this.errors.push({
+            code: err.code,
+            line: err.startLine,
+            col: err.startCol,
+        });
+    }
+
+    public sawEof = false;
+    public tokens: HtmlLibToken[] = [];
+    public errors: TokenError[] = [];
+}
 
 function tokenize(
     createTokenSource: TokenSourceCreator,
     chunks: string | string[],
-    initialState: Tokenizer['state'],
-    lastStartTag: string | null
+    testData: LoadedTest
 ): TokenSourceData {
-    const result: TokenSourceData = { tokens: [], errors: [] };
-    const { tokenizer, getNextToken } = createTokenSource(result);
-    let token: Token = { type: TokenType.HIBERNATION, location: null };
+    const result = new TokenizeHandler(testData);
+    const tokenizer = createTokenSource(result);
     let chunkIdx = 0;
 
     // NOTE: set small waterline for testing purposes
     tokenizer.preprocessor.bufferWaterline = 8;
-    tokenizer.state = initialState;
+    tokenizer.state = testData.initialState;
 
-    if (lastStartTag) {
-        tokenizer.lastStartTagName = lastStartTag;
+    if (testData.lastStartTag) {
+        tokenizer.lastStartTagName = testData.lastStartTag;
     }
 
-    do {
-        if (token.type === TokenType.HIBERNATION) {
-            tokenizer.write(chunks[chunkIdx], ++chunkIdx === chunks.length);
+    while (!result.sawEof) {
+        if (tokenizer.active) {
+            tokenizer.getNextToken();
         } else {
-            appendTokenEntry(result.tokens, convertTokenToHtml5Lib(token));
+            tokenizer.write(chunks[chunkIdx], ++chunkIdx === chunks.length);
         }
-
-        token = getNextToken();
-    } while (token.type !== TokenType.EOF);
+    }
 
     // Sort errors by line and column
     result.errors.sort((err1, err2) => err1.line - err2.line || err1.col - err2.col);
@@ -108,27 +153,6 @@ function unescapeDescrIO(testDescr: TestDescription): void {
     }
 }
 
-function appendTokenEntry(result: HtmlLibToken[], tokenEntry: HtmlLibToken): void {
-    if (tokenEntry[0] === 'Character') {
-        const lastEntry = result[result.length - 1];
-
-        if (lastEntry && lastEntry[0] === 'Character' && lastEntry[1] != null) {
-            lastEntry[1] += tokenEntry[1];
-            return;
-        }
-    }
-
-    result.push(tokenEntry);
-}
-
-function concatCharacterTokens(tokenEntries: HtmlLibToken[]): HtmlLibToken[] {
-    const result: HtmlLibToken[] = [];
-
-    for (const tokenEntry of tokenEntries) appendTokenEntry(result, tokenEntry);
-
-    return result;
-}
-
 function getTokenizerSuitableStateName(testDataStateName: string): Tokenizer['state'] {
     const name = testDataStateName.slice(0, -6).replace(' ', '_').toUpperCase();
     return TokenizerMode[name as keyof typeof TokenizerMode];
@@ -141,7 +165,7 @@ interface TestDescription {
     description: string;
     input: string;
     lastStartTag: string;
-    errors?: string[];
+    errors?: TokenError[];
 }
 
 interface LoadedTest {
@@ -153,7 +177,7 @@ interface LoadedTest {
     initialState: Tokenizer['state'];
     initialStateName: string;
     lastStartTag: string;
-    expectedErrors: string[];
+    expectedErrors: TokenError[];
 }
 
 function loadTests(dataDirPath: string): LoadedTest[] {
@@ -194,11 +218,13 @@ function loadTests(dataDirPath: string): LoadedTest[] {
                     setName,
                     name: descr.description,
                     input: descr.input,
-                    expected: concatCharacterTokens(expected),
+                    expected,
                     initialState: getTokenizerSuitableStateName(initialStateName),
                     initialStateName,
                     lastStartTag: descr.lastStartTag,
-                    expectedErrors: descr.errors || [],
+                    expectedErrors: TestsWithBrokenErrors[descr.description]
+                        ? TestsWithBrokenErrors[descr.description]
+                        : descr.errors || [],
                 });
             }
         }
@@ -218,28 +244,10 @@ export function generateTokenizationTests(
 
         it(testName, () => {
             const chunks = makeChunks(testData.input);
-            const result = tokenize(
-                createTokenSource,
-                chunks,
-                testData.initialState as Tokenizer['state'],
-                testData.lastStartTag
-            );
+            const result = tokenize(createTokenSource, chunks, testData);
 
             assert.deepEqual(result.tokens, testData.expected, `Chunks: ${JSON.stringify(chunks)}`);
-
-            /*
-             * 57.entities has an error that is not part of the test data.
-             *
-             * TODO: Move this to the test data.
-             */
-            if (
-                testName ===
-                'Tokenizer - 57.entities - Undefined named entity in attribute value ending in semicolon and whose name starts with a known entity name. - Initial state: Data state'
-            ) {
-                assert.deepEqual(result.errors, [{ code: 'unknown-named-character-reference', col: 12, line: 1 }]);
-            } else {
-                assert.deepEqual(result.errors, testData.expectedErrors || []);
-            }
+            assert.deepEqual(result.errors, testData.expectedErrors || []);
         });
     }
 }