Skip to content

Commit

Permalink
Switch to a new model closer mimicking the web APIs
Browse files Browse the repository at this point in the history
I am using `activeNode` instead of `pivot` now which is sort of cheating but also kind of makes sense.
  • Loading branch information
TomasHubelbauer committed Oct 8, 2024
1 parent 7509718 commit 8ad4282
Show file tree
Hide file tree
Showing 2 changed files with 124 additions and 152 deletions.
64 changes: 32 additions & 32 deletions index.test.ts
Original file line number Diff line number Diff line change
@@ -1,43 +1,43 @@
import { test, expect } from 'bun:test';

// TODO: Introduce `*Test` types which do not have the circular references as
// base types for the actually exposed types
import DOMParser from './index';

test('one element', async () => {
test('HTML', async () => {
const domParser = new DOMParser();
const document = await domParser.parseFromString('<html></html>');
expect(document.body.outerHTML).toEqual(`<body></body>`);
});

test('HEAD', async () => {
const domParser = new DOMParser();
const document = await domParser.parseFromString('<body></body>', 'text/html');
expect(document).toEqual({
body: {
tagName: 'body',
children: [],
}
});
const document = await domParser.parseFromString('<head></head>');
expect(document.head.outerHTML).toEqual(`<head></head>`);
expect(document.body.outerHTML).toEqual(`<body></body>`);
});

test('two elements', async () => {
test('BODY', async () => {
const domParser = new DOMParser();
const document = await domParser.parseFromString('<head></head><body></body>', 'text/html');
expect(document).toEqual({
body: {
tagName: 'body',
children: [],
}
});
const document = await domParser.parseFromString('<body></body>');
expect(document.head.outerHTML).toEqual(`<head></head>`);
expect(document.body.outerHTML).toEqual(`<body></body>`);
});

test('normal element', async () => {
test('HEAD & BODY', async () => {
const domParser = new DOMParser();
const document = await domParser.parseFromString('<body><a></a></body>', 'text/html');
expect(document).toEqual({
body: {
tagName: 'body',
children: [
{
tagName: 'a',
children: [],
}
],
}
});
const document = await domParser.parseFromString('<head></head><body></body>');
expect(document.head.outerHTML).toEqual(`<head></head>`);
expect(document.body.outerHTML).toEqual(`<body></body>`);
});

test('H1', async () => {
const domParser = new DOMParser();
const document = await domParser.parseFromString('<h1></h1>');
expect(document.body.outerHTML).toEqual(`<body><h1></h1></body>`);
});

test('H1 & H2 & H3', async () => {
const domParser = new DOMParser();
const document = await domParser.parseFromString('<h1></h1><h2></h2><h3></h3>');
expect(document.body.outerHTML).toEqual(`<body><h1></h1><h2></h2><h3></h3></body>`);
});

// TODO: Use http://info.cern.ch/hypertext/WWW/TheProject.html as one of the tests
212 changes: 92 additions & 120 deletions index.ts
Original file line number Diff line number Diff line change
@@ -1,41 +1,60 @@
// TODO: Use the same API as https://developer.mozilla.org/en-US/docs/Web/API/Node
class Node {

// See https://developer.mozilla.org/en-US/docs/Web/API/Node
interface Node {
ownerDocument: Document;
parentNode: Node;
parentElement: Element;
}

// TODO: Use the same API as https://developer.mozilla.org/en-US/docs/Web/API/Element
class Document {
#parentMap: WeakMap<Element, Element>;
constructor() {
this.#parentMap = new WeakMap<Element, Element>();
// See https://developer.mozilla.org/en-US/docs/Web/API/Element
class Document implements Node {
head: Element;
body: Element;
activeElement: Element;
ownerDocument: Document;
parentNode: Node;
parentElement: Element;

createElement(tagName: string) {
const element = new Element();
element.tagName = tagName;
element.parentNode = this;
element.ownerDocument = this;
return element;
}
}

get parentMap() {
return this.#parentMap;
// See https://developer.mozilla.org/en-US/docs/Web/API/Document
class Element implements Node {
tagName: string;
children: Element[] = [];
ownerDocument: Document;
parentNode: Node;
parentElement: Element;

append(element: Element) {
this.children.push(element);
element.parentNode = this;
element.parentElement = this;
element.ownerDocument = this.ownerDocument;
}

body: Element;
}

// TODO: Use the same API as https://developer.mozilla.org/en-US/docs/Web/API/Document
class Element {
#document: Document;
constructor(document: Document, parentElement: Element | undefined, tagName: string) {
this.#document = document;
if (parentElement) {
this.#document.parentMap.set(this, parentElement);
get outerHTML() {
let html = `<${this.tagName}>`;
for (const child of this.children) {
html += child.outerHTML;
}

this.tagName = tagName;
html += `</${this.tagName}>`;
return html;
}
}

tagName: string;
children: Element[] = [];

// TODO: Get this from `Node`
get parentElement() {
return this.#document.parentMap.get(this);
};
// See https://developer.mozilla.org/en-US/docs/Web/API/Text
class Text implements Node {
textContent: string;
ownerDocument: Document;
parentNode: Node;
parentElement: Element;
}

// TODO: See if we can use some API from `HTMLRewriter` to not have to keep this
Expand Down Expand Up @@ -64,78 +83,58 @@ export default class DOMParser {
* @param mimeType The `text/html` MIME type.
* @returns A `Document` object representing the HTML document.
*/
parseFromString(string: string, mimeType: 'text/html') {
parseFromString(string: string, mimeType = 'text/html') {
if (mimeType !== 'text/html') {
throw new TypeError('Only text/html is supported');
}

const htmlRewriter = new HTMLRewriter();
const elements: Element[] = [];
let pivot: Element | undefined;

function getPath(element: Element) {
const path = [element.tagName.toUpperCase()];
let pivot = element.parentElement;
while (pivot) {
path.unshift(pivot.tagName.toUpperCase());
pivot = pivot.parentElement;
}

return path.join(' > ');
}

return new Promise<Document>((resolve, reject) => {
const document = new Document();
htmlRewriter
new HTMLRewriter()
.on('*', {
element(element) {
if (pivot && autoClosingTags[element.tagName.toUpperCase()]?.includes(pivot.tagName.toUpperCase())) {
pivot = pivot.parentElement;
if (pivot) {
console.log('auto-closed', getPath(pivot));

const newElement = new Element(document, pivot, element.tagName);
pivot.children.push(newElement);
pivot = newElement;
console.log('auto-opened', getPath(newElement));
}
else {
reject('TODO1');
// TODO: Carry over the HTML element attributes
if (element.tagName === 'HTML' || element.tagName === 'html') {
if (document.activeElement) {
throw new Error('Only one HTML element is allowed');
}

return;
}
else if (!element.selfClosing) {
const newElement = new Element(document, pivot, element.tagName);
if (pivot) {
pivot.children.push(newElement);
}
else {
elements.push(newElement);

if (element.tagName === 'HEAD' || element.tagName === 'head') {
if (document.head) {
throw new Error('Only one HEAD element is allowed');
}

pivot = newElement;
console.log('opened', getPath(newElement));

element.onEndTag(() => {
if (pivot) {
pivot = pivot.parentElement;
console.log('closed', getPath(newElement), pivot);
} else {
elements.push(newElement);
console.log('rooted', getPath(newElement));
}
});
document.head = document.createElement('head');
document.activeElement = document.head;
return;
}
else {
const newElement = new Element(document, pivot, element.tagName);
if (pivot) {
pivot.children.push(newElement);
}
else {
elements.push(newElement);

if (element.tagName === 'BODY' || element.tagName === 'body') {
if (document.body) {
throw new Error('Only one BODY element is allowed');
}

console.log('self-closed', getPath(newElement));
document.body = document.createElement('body');
document.activeElement = document.body;
return;
}

// Create a `body` in case the top-level element came before `body`
if (!document.activeElement) {
document.body = document.createElement('body');
document.activeElement = document.body;
}

const activeElement = document.createElement(element.tagName);
document.activeElement.append(activeElement);
document.activeElement = activeElement;

element.onEndTag(() => {
document.activeElement = document.activeElement.parentElement;
});
},
comments(comment) {
console.log('element comments', comment.text);
Expand All @@ -155,45 +154,18 @@ export default class DOMParser {
//console.log('document text', JSON.stringify(text.text));
},
end(_end) {
const tagNames = elements.map(e => e.tagName).join();
switch (tagNames) {
case 'body': {
document.body = elements[0];
resolve(document);
return;
}
case 'head,body':
case 'header,body': {
document.body = elements[1];
resolve(document);
return;
}
default: {
reject(`Expected head, body or both, got ${tagNames}`);
return;
}
if (!document.head) {
document.head = document.createElement('head');
}

reject('Unexpected end of document');
}
});
if (!document.body) {
document.body = document.createElement('body');
}

htmlRewriter.transform(string);
resolve(document);
}
})
.transform(string);
});
}
}

if (import.meta.main) {
const response = await fetch('http://info.cern.ch/hypertext/WWW/TheProject.html');
const text = await response.text();

const domParser = new DOMParser();
const document = await domParser.parseFromString(text, 'text/html');

function printElement(element: Element, level = 0) {
console.log(' '.repeat(level * 2) + element.tagName);
element.children.forEach(child => printElement(child, level + 1));
}

printElement(document.body);
}

0 comments on commit 8ad4282

Please sign in to comment.