FXP supports parsing of HTML document. Here is an example;
Input HTML Document
<!DOCTYPE html>
<html lang="en">
<head>
<script>
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'UA-80202630-2');
</script>
<title>Fast XML Parser</title>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="stylesheet" href="static/css/bootstrap.min.css">
<link rel="stylesheet" href="static/css/jquery-confirm.min.css">
<link rel="stylesheet" type="text/css" href="style.css">
<script src="static/js/jquery-3.2.1.min.js"></script>
<style>
.CodeMirror{
height: 100%;
width: 100%;
}
</style>
</head>
<body role="document" style="background-color: #2c3e50;">
<h1>Heading</h1>
<hr>
<h2>&inr;</h2>
<pre>
<h1>Heading</h1>
<hr>
<h2>&inr;</h2>
</pre>
<script>
let highlightedLine = null;
let editor;
<!-- this should not be parsed separately -->
function updateLength(){
const xmlData = editor.getValue();
$("#lengthxml")[0].innerText = xmlData.replace(/>\s*</g, "><").length;
}
</script>
</body>
</html>
Code and necessary configuration to parse it to JS object.
const parsingOptions = {
ignoreAttributes: false,
// preserveOrder: true,
unpairedTags: ["hr", "br", "link", "meta"],
stopNodes : [ "*.pre", "*.script"],
processEntities: true,
htmlEntities: true
};
const parser = new XMLParser(parsingOptions);
parser.parse(html);
JS Object
{
"html": {
"head": {
"script": [
"\n window.dataLayer = window.dataLayer || [];\n function gtag(){dataLayer.push(arguments);}\n gtag('js', new Date());\n \n gtag('config', 'UA-80202630-2');\n ",
{
"@_src": "static/js/jquery-3.2.1.min.js"
}
],
"title": "Fast XML Parser",
"meta": [
{
"@_charset": "UTF-8"
},
{
"@_name": "viewport",
"@_content": "width=device-width, initial-scale=1"
}
],
"link": [
{
"@_rel": "stylesheet",
"@_href": "static/css/bootstrap.min.css"
},
{
"@_rel": "stylesheet",
"@_href": "static/css/jquery-confirm.min.css"
},
{
"@_rel": "stylesheet",
"@_type": "text/css",
"@_href": "style.css"
}
],
"style": ".CodeMirror{\n height: 100%;\n width: 100%;\n }"
},
"body": {
"h1": "Heading",
"hr": "",
"h2": "₹",
"pre": "\n <h1>Heading</h1>\n <hr>\n <h2>&inr;</h2>\n ",
"script": "\n let highlightedLine = null;\n let editor;\n <!-- this should not be parsed separately -->\n function updateLength(){\n const xmlData = editor.getValue();\n $(\"#lengthxml\")[0].innerText = xmlData.replace(/>s*</g, \"><\").length;\n }\n ",
"@_role": "document",
"@_style": "background-color: #2c3e50;"
},
"@_lang": "en"
}
}
To build the HTML document back from JS object, you need to uncomment preserveOrder: true
in above code. And pass the output to the XML builder;
const parsingOptions = {
ignoreAttributes: false,
preserveOrder: true,
unpairedTags: ["hr", "br", "link", "meta"],
stopNodes : [ "*.pre", "*.script"],
processEntities: true,
htmlEntities: true
};
const parser = new XMLParser(parsingOptions);
let result = parser.parse(html);
const builderOptions = {
ignoreAttributes: false,
format: true,
preserveOrder: true,
suppressEmptyNode: true,
unpairedTags: ["hr", "br", "link", "meta"],
stopNodes : [ "*.pre", "*.script"],
}
const builder = new XMLBuilder(builderOptions);
const output = builder.build(result);
Output
<html lang="en">
<head>
<script>
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'UA-80202630-2');
</script>
<title>
Fast XML Parser
</title>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="stylesheet" href="static/css/bootstrap.min.css">
<link rel="stylesheet" href="static/css/jquery-confirm.min.css">
<link rel="stylesheet" type="text/css" href="style.css">
<script src="static/js/jquery-3.2.1.min.js">
</script>
<style>
.CodeMirror{
height: 100%;
width: 100%;
}
</style>
</head>
<body role="document" style="background-color: #2c3e50;">
<h1>
Heading
</h1>
<hr>
<h2>
₹
</h2>
<pre>
<h1>Heading</h1>
<hr>
<h2>&inr;</h2>
</pre>
<script>
let highlightedLine = null;
let editor;
<!-- this should not be parsed separately -->
function updateLength(){
const xmlData = editor.getValue();
$("#lengthxml")[0].innerText = xmlData.replace(/>s*</g, "><").length;
}
</script>
</body>
</html>