-
-
Notifications
You must be signed in to change notification settings - Fork 239
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement HTML rewriting stream (closes #222)
- Loading branch information
Showing
18 changed files
with
560 additions
and
101 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
<p align="center"> | ||
<a href="https://github.com/inikulin/parse5"> | ||
<img src="https://raw.github.com/inikulin/parse5/master/media/logo.png" alt="parse5" /> | ||
</a> | ||
</p> | ||
|
||
<div align="center"> | ||
<h1>parse5-html-rewriting-stream</h1> | ||
<i><b>Streaming HTML rewriter.</b></i> | ||
</div> | ||
<br> | ||
|
||
<div align="center"> | ||
<code>npm install --save parse5-html-rewriting-stream</code> | ||
</div> | ||
<br> | ||
|
||
<p align="center"> | ||
📖 <a href="https://github.com/inikulin/parse5/tree/master/packages/parse5-html-rewriting-stream/docs/index.md"><b>Documentation</b></a> 📖 | ||
</p> | ||
|
||
--- | ||
|
||
<p align="center"> | ||
<a href="https://github.com/inikulin/parse5/tree/master/docs/list-of-packages.md">List of parse5 toolset packages</a> | ||
</p> | ||
|
||
<p align="center"> | ||
<a href="https://github.com/inikulin/parse5">GitHub</a> | ||
</p> | ||
|
||
<p align="center"> | ||
<a href="https://github.com/inikulin/parse5/tree/master/docs/version-history.md">Version history</a> | ||
</p> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
'use strict'; | ||
|
||
const SAXParser = require('parse5-sax-parser'); | ||
const Tokenizer = require('parse5/lib/tokenizer'); | ||
|
||
class RewritingStream extends SAXParser { | ||
constructor() { | ||
super({ sourceCodeLocationInfo: true }); | ||
|
||
this.posTracker = this.locInfoMixin.posTracker; | ||
|
||
this.tokenEmissionHelpers = { | ||
[Tokenizer.START_TAG_TOKEN]: { | ||
eventName: 'startTag', | ||
reshapeToken: token => this._reshapeStartTagToken(token) | ||
}, | ||
[Tokenizer.END_TAG_TOKEN]: { | ||
eventName: 'endTag', | ||
reshapeToken: token => this._reshapeEndTagToken(token) | ||
}, | ||
[Tokenizer.COMMENT_TOKEN]: { | ||
eventName: 'comment', | ||
reshapeToken: token => this._reshapeCommentToken(token) | ||
}, | ||
[Tokenizer.DOCTYPE_TOKEN]: { | ||
eventName: 'doctype', | ||
reshapeToken: token => this._reshapeDoctypeToken(token) | ||
} | ||
}; | ||
} | ||
|
||
_transform(chunk, encoding, callback) { | ||
this._parseChunk(chunk); | ||
|
||
callback(); | ||
} | ||
|
||
_getCurrentTokenRawHtml() { | ||
const droppedBufferSize = this.posTracker.droppedBufferSize; | ||
const start = this.currentTokenLocation.startOffset - droppedBufferSize; | ||
const end = this.currentTokenLocation.endOffset - droppedBufferSize; | ||
|
||
return this.tokenizer.preprocessor.html.slice(start, end); | ||
} | ||
|
||
// Events | ||
_handleToken(token) { | ||
if (token.type === Tokenizer.EOF_TOKEN) { | ||
return; | ||
} | ||
|
||
const { eventName, reshapeToken } = this.tokenEmissionHelpers[token.type]; | ||
|
||
this.currentTokenLocation = token.location; | ||
|
||
const raw = this._getCurrentTokenRawHtml(); | ||
|
||
if (this.listenerCount(eventName) > 0) { | ||
this.emit(eventName, reshapeToken(token), raw); | ||
} else { | ||
this.emitRaw(raw); | ||
} | ||
|
||
// NOTE: don't skip new lines after <pre> and other tags, | ||
// otherwise we'll have incorrect raw data. | ||
this.parserFeedbackSimulator.skipNextNewLine = false; | ||
} | ||
|
||
_emitPendingText() { | ||
if (this.pendingText !== null) { | ||
const raw = this._getCurrentTokenRawHtml(); | ||
|
||
if (this.listenerCount('text') > 0) { | ||
this.emit('text', this._createTextToken(), raw); | ||
} else { | ||
this.emitRaw(raw); | ||
} | ||
|
||
this.pendingText = null; | ||
} | ||
} | ||
|
||
// Emitter API | ||
emitDoctype(token) { | ||
let res = `<!DOCTYPE ${token.name}`; | ||
|
||
if (token.publicId !== null) { | ||
res += ` PUBLIC "${token.publicId}"`; | ||
} else if (token.systemId !== null) { | ||
res += ' SYSTEM'; | ||
} | ||
|
||
if (token.systemId !== null) { | ||
res += ` "${token.systemId}"`; | ||
} | ||
|
||
res += '>'; | ||
|
||
this.push(res); | ||
} | ||
|
||
emitStartTag(token) { | ||
let res = `<${token.tagName}`; | ||
|
||
const attrs = token.attrs; | ||
|
||
for (let i = 0; i < attrs.length; i++) { | ||
res += ` ${attrs[i].name}="${attrs[i].value}"`; | ||
This comment has been minimized.
Sorry, something went wrong.
This comment has been minimized.
Sorry, something went wrong. |
||
} | ||
|
||
res += token.selfClosing ? '/>' : '>'; | ||
|
||
this.push(res); | ||
} | ||
|
||
emitEndTag(token) { | ||
this.push(`</${token.tagName}>`); | ||
} | ||
|
||
emitText({ text }) { | ||
this.push(text); | ||
This comment has been minimized.
Sorry, something went wrong.
RReverser
Collaborator
|
||
} | ||
|
||
emitComment(token) { | ||
this.push(`<!--${token.text}-->`); | ||
} | ||
|
||
emitRaw(html) { | ||
this.push(html); | ||
} | ||
} | ||
|
||
module.exports = RewritingStream; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
{ | ||
"name": "parse5-html-rewriting-stream", | ||
"description": "Streaming HTML rewriter.", | ||
"version": "5.0.0", | ||
"author": "Ivan Nikulin <ifaaan@gmail.com> (https://github.com/inikulin)", | ||
"contributors": "https://github.com/inikulin/parse5/graphs/contributors", | ||
"homepage": "https://github.com/inikulin/parse5", | ||
"keywords": ["parse5", "parser", "stream", "streaming", "rewritter", "rewrite", "HTML"], | ||
"license": "MIT", | ||
"main": "./lib/index.js", | ||
"dependencies": { | ||
"parse5": "^5.0.0", | ||
"parse5-sax-parser": "^5.0.0" | ||
}, | ||
"repository": { | ||
"type": "git", | ||
"url": "git://github.com/inikulin/parse5.git" | ||
}, | ||
"files": ["lib"] | ||
} |
Oops, something went wrong.
No escaping of values?