-
Notifications
You must be signed in to change notification settings - Fork 0
/
parse.js
57 lines (49 loc) · 1.51 KB
/
parse.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
export function parseHTML(html) {
const htmlObject = {};
const tagRegex = /<(?!!)(?!meta)([^/][^>]+)>/g;
const attributeRegex = /(\S+)="([^"]+)"/g;
const styleRegex = /style="([^"]+)"/;
const stack = [htmlObject];
let match;
let lastIndex = 0;
while ((match = tagRegex.exec(html))) {
const tag = match[1];
const parent = stack[stack.length - 1];
if (tag.startsWith("/")) {
stack.pop();
} else {
const element = { tag: tag.split(" ")[0] };
let attrMatch;
while ((attrMatch = attributeRegex.exec(tag))) {
const [_, attr, value] = attrMatch;
if (attr === "style") {
const styleMatch = styleRegex.exec(tag);
if (styleMatch) {
const styleAttrs = styleMatch[1].split(";");
styleAttrs.forEach((styleAttr) => {
const [styleProp, styleValue] = styleAttr
.split(":")
.map((s) => s.trim());
if (styleProp && styleValue) {
element.style[styleProp] = styleValue;
}
});
}
} else {
element[attr] = value;
}
}
if (parent.children) parent.children.push(element);
else parent.children = [element];
stack.push(element);
lastIndex = tagRegex.lastIndex;
const textContent = html
.substring(lastIndex, html.indexOf("<", lastIndex))
.trim();
if (textContent) {
element.text = textContent;
}
}
}
return htmlObject.children[0];
}