From 5d11840be37b4f237a65889dc031f7d0280b48f0 Mon Sep 17 00:00:00 2001 From: nfrasser Date: Fri, 6 Nov 2015 00:17:40 -0500 Subject: [PATCH 01/11] Installing HTML tokenizer --- package.json | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/package.json b/package.json index 630f486c..19946bc5 100644 --- a/package.json +++ b/package.json @@ -27,7 +27,9 @@ "mention", "mentions" ], - "dependencies": {}, + "dependencies": { + "simple-html-tokenizer": "^0.2.0" + }, "devDependencies": { "amd-optimize": "^0.6.1", "brfs": "^1.4.1", From 114cf4bf8bca9e6b98435c35ab83f7163157df80 Mon Sep 17 00:00:00 2001 From: nfrasser Date: Fri, 6 Nov 2015 00:18:06 -0500 Subject: [PATCH 02/11] clean -> escape --- src/linkify-string.js | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/linkify-string.js b/src/linkify-string.js index 6485dd64..1bb3c91a 100644 --- a/src/linkify-string.js +++ b/src/linkify-string.js @@ -4,14 +4,14 @@ import {tokenize, options} from './linkify'; -function cleanText(text) { +function escapeText(text) { return text .replace(/&/g, '&') .replace(//g, '>'); } -function cleanAttr(href) { +function escapeAttr(href) { return href.replace(/"/g, '"'); } @@ -22,7 +22,7 @@ function attributesToString(attributes) { for (let attr in attributes) { let val = (attributes[attr] + '').replace(/"/g, '"'); - result.push(`${attr}="${cleanAttr(val)}"`); + result.push(`${attr}="${escapeAttr(val)}"`); } return result.join(' '); } @@ -35,7 +35,7 @@ function linkifyStr(str, opts={}) { tokens = tokenize(str), result = []; - for (let i = 0; i < tokens.length; i++ ) { + for (let i = 0; i < tokens.length; i++) { let token = tokens[i]; if (token.isLink) { @@ -48,16 +48,16 @@ function linkifyStr(str, opts={}) { linkClass = options.resolve(opts.linkClass, href, token.type), target = options.resolve(opts.target, href, token.type); - let link = `<${tagName} href="${cleanAttr(formattedHref)}" class="${cleanAttr(linkClass)}"`; + let link = `<${tagName} href="${escapeAttr(formattedHref)}" class="${escapeAttr(linkClass)}"`; if (target) { - link += ` target="${cleanAttr(target)}"`; + link += ` target="${escapeAttr(target)}"`; } if (attributesHash) { link += ` ${attributesToString(attributesHash)}`; } - link += `>${cleanText(formatted)}`; + link += `>${escapeText(formatted)}`; result.push(link); } else if (token.type === 'nl' && opts.nl2br) { @@ -67,7 +67,7 @@ function linkifyStr(str, opts={}) { result.push('
\n'); } } else { - result.push(cleanText(token.toString())); + result.push(escapeText(token.toString())); } } From cf8777e9622f032f050aef64e54c88ee7bb3be52 Mon Sep 17 00:00:00 2001 From: nfrasser Date: Fri, 6 Nov 2015 00:18:34 -0500 Subject: [PATCH 03/11] Complete linkify-html implementation --- src/linkify-html.js | 183 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 183 insertions(+) create mode 100644 src/linkify-html.js diff --git a/src/linkify-html.js b/src/linkify-html.js new file mode 100644 index 00000000..fadc4ea2 --- /dev/null +++ b/src/linkify-html.js @@ -0,0 +1,183 @@ +import HTML5Tokenizer from 'simple-html-tokenizer'; +import {tokenize, options} from './linkify'; + +const StartTag = 'StartTag'; +const EndTag = 'EndTag'; +const Chars = 'Chars'; +const Comment = 'Comment'; + +/** + `tokens` and `token` in this section refer to tokens generated by the HTML + parser. +*/ +export default function linkifyHtml(str, opts={}) { + let tokens = HTML5Tokenizer.tokenize(str); + let linkifiedTokens = []; + let linkified = []; + var i; + + opts = options.normalize(opts); + + // Linkify the tokens given by the parser + for (i = 0; i < tokens.length; i++) { + let token = tokens[i]; + + if (token.type === StartTag && token.tagName.toUpperCase() === 'A') { + // Ignore all the contents of an anchor tag + let preskipLen = linkifiedTokens.length; + skipTokens('A', tokens, ++i, linkifiedTokens); + + i += linkifiedTokens.length - preskipLen; + continue; + + } else if (token.type !== Chars) { + // Skip this token, it's not important + linkifiedTokens.push(token); + continue; + } + + // Valid text token, linkify it! + let linkifedChars = linkifyChars(token.chars, opts); + linkifiedTokens.push.apply(linkifiedTokens, linkifedChars); + } + + // Convert the tokens back into a string + for (i = 0; i < linkifiedTokens.length; i++) { + let token = linkifiedTokens[i]; + switch (token.type) { + case StartTag: + let attrs = attrsToStrings(token.attributes); + let link = '<' + token.tagName; + if (attrs.length > 0) { link += ' ' + attrs.join(' '); } + link += '>'; + linkified.push(link); + break; + case EndTag: + linkified.push(``); + break; + case Chars: + linkified.push(escapeText(token.chars)); + break; + case Comment: + linkified.push(``); + break; + } + } + + return linkified.join(''); +} + +/** + `tokens` and `token` in this section referes to tokens returned by + `linkify.tokenize`. `linkified` will contain HTML Parser-style tokens +*/ +function linkifyChars(str, opts) { + let tokens = tokenize(str); + let result = []; + + for (var i = 0; i < tokens.length; i++) { + let token = tokens[i]; + if (token.type === 'nl' && opts.nl2br) { + result.push({ + type: StartTag, + tagName: 'br', + attributes: [], + selfClosing: true + }); + continue; + } else if (!token.isLink) { + result.push({type: Chars, chars: token.toString()}); + continue; + } + + let href = token.toHref(opts.defaultProtocol); + let formatted = options.resolve(opts.format, token.toString(), token.type); + let formattedHref = options.resolve(opts.formatHref, href, token.type); + let attributesHash = options.resolve(opts.attributes, href, token.type); + let tagName = options.resolve(opts.tagName, href, token.type); + let linkClass = options.resolve(opts.linkClass, href, token.type); + let target = options.resolve(opts.target, href, token.type); + + // Build up attributes + let attributes = [ + ['href', formattedHref], + ['class', linkClass] + ]; + + if (target) { + attributes.push(['target', target]); + } + + for (var attr in attributesHash) { + attributes.push([attr, attributesHash[attr]]); + } + + // Add the required tokens + result.push({ + type: StartTag, + tagName: tagName, + attributes: attributes, + selfClosing: false + }); + result.push({type: Chars, chars: formatted}); + result.push({type: EndTag, tagName: tagName}); + } + + return result; +} + +/** + Returns a list of tokens skipped until the closing tag of tagName. + + * `tagName` is the closing tag which will prompt us to stop skipping + * `tokens` is the array of tokens generated by HTML5Tokenizer which + * `i` is the index immediately after the opening tag to skip + * `skippedTokens` is an array which skipped tokens are being pushed into + + Caveats + + * Assumes that i is the first token after the given opening tagName + * The closing tag will be skipped, but nothing after it + * Will track whether there is a nested tag of the same type +*/ +function skipTagTokens(tagName, tokens, i, skippedTokens) { + + // number of tokens of this type on the [fictional] stack + var stackCount = 1; + + while (i < tokens.length && stackCount > 0) { + let token = tokens[i]; + if (token.type === StartTag && token.tagName.toUpperCase() === tagName) { + // Nested tag of the same type, "add to stack" + stackCount++; + } else if (token.type === EndTag && token.tagName.toUpperCase() === tagName) { + // Closing tag + stackCount--; + } + skippedTokens.push(token); + i++; + } + + // Note that if stackCount > 0 here, the HTML is probably invalid + return skippedTokens; +} + +function escapeText(text) { + return text + .replace(/&/g, '&') + .replace(//g, '>'); +} + +function escapeAttr(attr) { + return attr.replace(/"/g, '"'); +} + +function attrsToStrings(attrs) { + let attrStrs = []; + for (let i = 0; i < attrs.length; i++) { + let [name, value] = attrs[i]; + attrStrs.push(`${name}="${escapeAttr(value)}"`); + } + return attrStrs; +} From ccc4a932bd6752f4681c1ad9c8138336ff6df008 Mon Sep 17 00:00:00 2001 From: nfrasser Date: Fri, 6 Nov 2015 00:20:20 -0500 Subject: [PATCH 04/11] Tests for linkify-html --- test/spec/linkify-html-test.js | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 test/spec/linkify-html-test.js diff --git a/test/spec/linkify-html-test.js b/test/spec/linkify-html-test.js new file mode 100644 index 00000000..6d323abc --- /dev/null +++ b/test/spec/linkify-html-test.js @@ -0,0 +1,19 @@ +var linkifyHtml = require('../../lib/linkify-html'); +var htmlOptions = require('./html/options'); + +describe('linkify-html', function () { + + it('Works with default options', function () { + var linkified = linkifyHtml(htmlOptions.original); + expect(htmlOptions.linkified).to.contain(linkified); + }); + + it('Works with overriden options', function () { + var linkified = linkifyHtml( + htmlOptions.original, + htmlOptions.altOptions + ); + expect(htmlOptions.linkifiedAlt).to.contain(linkified); + }); + +}); From 49b9cb01915acff161d9fb7a3069e133c218501b Mon Sep 17 00:00:00 2001 From: nfrasser Date: Fri, 6 Nov 2015 17:22:07 -0500 Subject: [PATCH 05/11] Use forked HTML parser --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 19946bc5..0f562e31 100644 --- a/package.json +++ b/package.json @@ -28,7 +28,7 @@ "mentions" ], "dependencies": { - "simple-html-tokenizer": "^0.2.0" + "simple-html-tokenizer": "https://github.com/nfrasser/simple-html-tokenizer.git#master" }, "devDependencies": { "amd-optimize": "^0.6.1", From 64a348bb0600e87c78da4aa735b95ca9c226d267 Mon Sep 17 00:00:00 2001 From: nfrasser Date: Fri, 6 Nov 2015 17:23:32 -0500 Subject: [PATCH 06/11] Build optimizations for linkify-html interface --- html.js | 1 + src/linkify-html.js | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 10 deletions(-) create mode 100644 html.js diff --git a/html.js b/html.js new file mode 100644 index 00000000..eb277b07 --- /dev/null +++ b/html.js @@ -0,0 +1 @@ +module.exports = require('./lib/linkify-html'); diff --git a/src/linkify-html.js b/src/linkify-html.js index fadc4ea2..eeb09986 100644 --- a/src/linkify-html.js +++ b/src/linkify-html.js @@ -1,5 +1,5 @@ -import HTML5Tokenizer from 'simple-html-tokenizer'; -import {tokenize, options} from './linkify'; +import HTML5Tokenizer from './simple-html-tokenizer'; +import * as linkify from './linkify'; const StartTag = 'StartTag'; const EndTag = 'EndTag'; @@ -16,7 +16,7 @@ export default function linkifyHtml(str, opts={}) { let linkified = []; var i; - opts = options.normalize(opts); + opts = linkify.options.normalize(opts); // Linkify the tokens given by the parser for (i = 0; i < tokens.length; i++) { @@ -72,7 +72,7 @@ export default function linkifyHtml(str, opts={}) { `linkify.tokenize`. `linkified` will contain HTML Parser-style tokens */ function linkifyChars(str, opts) { - let tokens = tokenize(str); + let tokens = linkify.tokenize(str); let result = []; for (var i = 0; i < tokens.length; i++) { @@ -91,12 +91,12 @@ function linkifyChars(str, opts) { } let href = token.toHref(opts.defaultProtocol); - let formatted = options.resolve(opts.format, token.toString(), token.type); - let formattedHref = options.resolve(opts.formatHref, href, token.type); - let attributesHash = options.resolve(opts.attributes, href, token.type); - let tagName = options.resolve(opts.tagName, href, token.type); - let linkClass = options.resolve(opts.linkClass, href, token.type); - let target = options.resolve(opts.target, href, token.type); + let formatted = linkify.options.resolve(opts.format, token.toString(), token.type); + let formattedHref = linkify.options.resolve(opts.formatHref, href, token.type); + let attributesHash = linkify.options.resolve(opts.attributes, href, token.type); + let tagName = linkify.options.resolve(opts.tagName, href, token.type); + let linkClass = linkify.options.resolve(opts.linkClass, href, token.type); + let target = linkify.options.resolve(opts.target, href, token.type); // Build up attributes let attributes = [ From 0b2968d2283efabc8099ad943de2c81805d27db9 Mon Sep 17 00:00:00 2001 From: nfrasser Date: Fri, 6 Nov 2015 17:24:17 -0500 Subject: [PATCH 07/11] Importing simple-html-tokenizer via symlink So that it can be included in the build --- src/simple-html-tokenizer | 1 + src/simple-html-tokenizer.js | 15 +++++++++++++++ 2 files changed, 16 insertions(+) create mode 120000 src/simple-html-tokenizer create mode 100644 src/simple-html-tokenizer.js diff --git a/src/simple-html-tokenizer b/src/simple-html-tokenizer new file mode 120000 index 00000000..cea33c54 --- /dev/null +++ b/src/simple-html-tokenizer @@ -0,0 +1 @@ +../node_modules/simple-html-tokenizer/lib/simple-html-tokenizer \ No newline at end of file diff --git a/src/simple-html-tokenizer.js b/src/simple-html-tokenizer.js new file mode 100644 index 00000000..e53cbf0b --- /dev/null +++ b/src/simple-html-tokenizer.js @@ -0,0 +1,15 @@ +import HTML5NamedCharRefs from './simple-html-tokenizer/html5-named-char-refs'; +import EntityParser from './simple-html-tokenizer/entity-parser'; +import EventedTokenizer from './simple-html-tokenizer/evented-tokenizer'; +import Tokenizer from './simple-html-tokenizer/tokenizer'; +import tokenize from './simple-html-tokenizer/tokenize'; + +var HTML5Tokenizer = { + HTML5NamedCharRefs, + EntityParser, + EventedTokenizer, + Tokenizer, + tokenize, +}; + +export default HTML5Tokenizer; From e71937c8ffe50a3f2a7f90589ea1be8ef9d99ccd Mon Sep 17 00:00:00 2001 From: nfrasser Date: Fri, 6 Nov 2015 17:25:22 -0500 Subject: [PATCH 08/11] Adding linkify-html to build, other build enhancements Includes templates required to build. Appears to be working so far --- gulpfile.js | 40 ++++++++++++++++++----------------- templates/linkify-html.amd.js | 1 + templates/linkify-html.js | 5 +++++ 3 files changed, 27 insertions(+), 19 deletions(-) create mode 100644 templates/linkify-html.amd.js create mode 100644 templates/linkify-html.js diff --git a/gulpfile.js b/gulpfile.js index 75cd17fd..e226a04b 100644 --- a/gulpfile.js +++ b/gulpfile.js @@ -99,38 +99,40 @@ gulp.task('build-core', ['babel'], function () { // Depends on build-core gulp.task('build-interfaces', ['babel-amd'], function () { - var stream, streams = []; - // Core linkify functionality as plugins var interface, interfaces = [ 'string', 'element', - ['element', 'jquery'] // jQuery interface requires both element and jquery + ['linkify-element.js', 'jquery'], // jQuery interface requires both element and jquery + [ + 'simple-html-tokenizer/*.js', + 'simple-html-tokenizer.js', + 'html' + ] ]; - var files = {js: null, amd: null}; - // Globals browser interface - for (var i = 0; i < interfaces.length; i++) { - interface = interfaces[i]; + var streams = []; + + interfaces.forEach(function (interface) { + + var files = {js: [], amd: []}; if (interface instanceof Array) { - // Interface has dependencies - files.js = []; - files.amd = []; - for (var j = 0; j < interface.length; j++) { - files.js.push('src/linkify-' + interface[j] + '.js'); - files.amd.push('build/amd/linkify-' + interface[j] + '.js'); - } + // Interface has other interface dependencies within this package + interface.forEach(function (i, idx) { + if (idx == interface.length - 1) { return; } // ignore last index + files.js.push('src/' + i); + files.amd.push('build/amd/' + i); + }); // The last dependency is the name of the interface interface = interface.pop(); - - } else { - files.js = 'src/linkify-' + interface + '.js'; - files.amd = 'build/amd/linkify-' + interface + '.js'; } + files.js.push('src/linkify-' + interface + '.js'); + files.amd.push('build/amd/linkify-' + interface + '.js'); + // Browser interface stream = gulp.src(files.js) .pipe(babel({ @@ -150,7 +152,7 @@ gulp.task('build-interfaces', ['babel-amd'], function () { .pipe(gulp.dest('build')); streams.push(stream); - } + }); return merge.apply(this, streams); }); diff --git a/templates/linkify-html.amd.js b/templates/linkify-html.amd.js new file mode 100644 index 00000000..339e4544 --- /dev/null +++ b/templates/linkify-html.amd.js @@ -0,0 +1 @@ +<%= contents %> diff --git a/templates/linkify-html.js b/templates/linkify-html.js new file mode 100644 index 00000000..45ca72de --- /dev/null +++ b/templates/linkify-html.js @@ -0,0 +1,5 @@ +;(function (linkify) { +"use strict"; +<%= contents %> +window.linkifyHtml = linkifyHtml; +})(window.linkify); From c571f90412a8276db311207d3d221528e67dc404 Mon Sep 17 00:00:00 2001 From: nfrasser Date: Fri, 20 Nov 2015 13:07:24 -0500 Subject: [PATCH 09/11] Additional quick HTML embed tests --- test/index.html | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/index.html b/test/index.html index e44d8c68..9b5ac0df 100644 --- a/test/index.html +++ b/test/index.html @@ -14,10 +14,13 @@ module.exports = window.jQuery; }); + + +

You let's get all up in the http://element.co/?wat=this and the #swag @@ -37,5 +40,11 @@ console.log(linkify.find('What up #bro??')); }); + require(['linkify-html'], function (linkifyHtml) { + console.log(linkifyHtml('AMD

Hi

This is google.com

')); + }); + + console.log(linkifyHtml('GLOBAL

Hi

This is google.com

')); + From 6be4dd48b4c0165894a6dcb0180ab57775546592 Mon Sep 17 00:00:00 2001 From: nfrasser Date: Mon, 14 Dec 2015 21:06:46 -0500 Subject: [PATCH 10/11] Fixing coverage CI build step by ignoring tokenizer --- gulpfile.js | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/gulpfile.js b/gulpfile.js index e226a04b..fa529c88 100644 --- a/gulpfile.js +++ b/gulpfile.js @@ -21,7 +21,8 @@ wrap = require('gulp-wrap'); var paths = { src: 'src/**/*.js', - lib: 'lib/**/*.js', + lib: ['lib/**/*.js'], + libTest: ['lib/*.js', 'lib/linkify/**/*.js'], libCore: [ 'lib/linkify/core/*.js', 'lib/linkify/utils/*.js', @@ -232,7 +233,7 @@ gulp.task('mocha', ['build'], function () { */ gulp.task('coverage', ['build'], function (cb) { // IMPORTANT: return not required here (and will actually cause bugs!) - gulp.src(paths.lib) + gulp.src(paths.libTest) .pipe(istanbul()) // Covering files .pipe(istanbul.hookRequire()) // Force `require` to return covered files .on('finish', function () { From cca115d6094af5c87a5c4cd2ce27ee81cb50dd5a Mon Sep 17 00:00:00 2001 From: nfrasser Date: Mon, 14 Dec 2015 21:58:23 -0500 Subject: [PATCH 11/11] Last few linkify-html fixes with slimmer HTML tokenizer HTML character entities are not tokenized in the HTML parsing process. The character ref functionality has therefor been taken out. --- package.json | 4 ++-- src/linkify-html.js | 6 ++---- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/package.json b/package.json index 0f562e31..0fb792d0 100644 --- a/package.json +++ b/package.json @@ -28,7 +28,6 @@ "mentions" ], "dependencies": { - "simple-html-tokenizer": "https://github.com/nfrasser/simple-html-tokenizer.git#master" }, "devDependencies": { "amd-optimize": "^0.6.1", @@ -60,7 +59,8 @@ "lodash": "^3.10.1", "merge-stream": "^1.0.0", "mocha": "^2.3.3", - "phantomjs": "^1.9.18" + "phantomjs": "^1.9.18", + "simple-html-tokenizer": "https://github.com/nfrasser/simple-html-tokenizer.git#master" }, "optionalDependencies": { "jquery": ">=1.9.0" diff --git a/src/linkify-html.js b/src/linkify-html.js index eeb09986..0f3e8488 100644 --- a/src/linkify-html.js +++ b/src/linkify-html.js @@ -163,10 +163,8 @@ function skipTagTokens(tagName, tokens, i, skippedTokens) { } function escapeText(text) { - return text - .replace(/&/g, '&') - .replace(//g, '>'); + // Not required, HTML tokenizer ensures this occurs properly + return text; } function escapeAttr(attr) {