diff --git a/gulpfile.js b/gulpfile.js
index 75cd17fd..fa529c88 100644
--- a/gulpfile.js
+++ b/gulpfile.js
@@ -21,7 +21,8 @@ wrap = require('gulp-wrap');
var paths = {
src: 'src/**/*.js',
- lib: 'lib/**/*.js',
+ lib: ['lib/**/*.js'],
+ libTest: ['lib/*.js', 'lib/linkify/**/*.js'],
libCore: [
'lib/linkify/core/*.js',
'lib/linkify/utils/*.js',
@@ -99,38 +100,40 @@ gulp.task('build-core', ['babel'], function () {
// Depends on build-core
gulp.task('build-interfaces', ['babel-amd'], function () {
- var stream, streams = [];
-
// Core linkify functionality as plugins
var interface, interfaces = [
'string',
'element',
- ['element', 'jquery'] // jQuery interface requires both element and jquery
+ ['linkify-element.js', 'jquery'], // jQuery interface requires both element and jquery
+ [
+ 'simple-html-tokenizer/*.js',
+ 'simple-html-tokenizer.js',
+ 'html'
+ ]
];
- var files = {js: null, amd: null};
-
// Globals browser interface
- for (var i = 0; i < interfaces.length; i++) {
- interface = interfaces[i];
+ var streams = [];
+
+ interfaces.forEach(function (interface) {
+
+ var files = {js: [], amd: []};
if (interface instanceof Array) {
- // Interface has dependencies
- files.js = [];
- files.amd = [];
- for (var j = 0; j < interface.length; j++) {
- files.js.push('src/linkify-' + interface[j] + '.js');
- files.amd.push('build/amd/linkify-' + interface[j] + '.js');
- }
+ // Interface has other interface dependencies within this package
+ interface.forEach(function (i, idx) {
+ if (idx == interface.length - 1) { return; } // ignore last index
+ files.js.push('src/' + i);
+ files.amd.push('build/amd/' + i);
+ });
// The last dependency is the name of the interface
interface = interface.pop();
-
- } else {
- files.js = 'src/linkify-' + interface + '.js';
- files.amd = 'build/amd/linkify-' + interface + '.js';
}
+ files.js.push('src/linkify-' + interface + '.js');
+ files.amd.push('build/amd/linkify-' + interface + '.js');
+
// Browser interface
stream = gulp.src(files.js)
.pipe(babel({
@@ -150,7 +153,7 @@ gulp.task('build-interfaces', ['babel-amd'], function () {
.pipe(gulp.dest('build'));
streams.push(stream);
- }
+ });
return merge.apply(this, streams);
});
@@ -230,7 +233,7 @@ gulp.task('mocha', ['build'], function () {
*/
gulp.task('coverage', ['build'], function (cb) {
// IMPORTANT: return not required here (and will actually cause bugs!)
- gulp.src(paths.lib)
+ gulp.src(paths.libTest)
.pipe(istanbul()) // Covering files
.pipe(istanbul.hookRequire()) // Force `require` to return covered files
.on('finish', function () {
diff --git a/html.js b/html.js
new file mode 100644
index 00000000..eb277b07
--- /dev/null
+++ b/html.js
@@ -0,0 +1 @@
+module.exports = require('./lib/linkify-html');
diff --git a/package.json b/package.json
index 630f486c..0fb792d0 100644
--- a/package.json
+++ b/package.json
@@ -27,7 +27,8 @@
"mention",
"mentions"
],
- "dependencies": {},
+ "dependencies": {
+ },
"devDependencies": {
"amd-optimize": "^0.6.1",
"brfs": "^1.4.1",
@@ -58,7 +59,8 @@
"lodash": "^3.10.1",
"merge-stream": "^1.0.0",
"mocha": "^2.3.3",
- "phantomjs": "^1.9.18"
+ "phantomjs": "^1.9.18",
+ "simple-html-tokenizer": "https://github.com/nfrasser/simple-html-tokenizer.git#master"
},
"optionalDependencies": {
"jquery": ">=1.9.0"
diff --git a/src/linkify-html.js b/src/linkify-html.js
new file mode 100644
index 00000000..0f3e8488
--- /dev/null
+++ b/src/linkify-html.js
@@ -0,0 +1,181 @@
+import HTML5Tokenizer from './simple-html-tokenizer';
+import * as linkify from './linkify';
+
+const StartTag = 'StartTag';
+const EndTag = 'EndTag';
+const Chars = 'Chars';
+const Comment = 'Comment';
+
+/**
+ `tokens` and `token` in this section refer to tokens generated by the HTML
+ parser.
+*/
+export default function linkifyHtml(str, opts={}) {
+ let tokens = HTML5Tokenizer.tokenize(str);
+ let linkifiedTokens = [];
+ let linkified = [];
+ var i;
+
+ opts = linkify.options.normalize(opts);
+
+ // Linkify the tokens given by the parser
+ for (i = 0; i < tokens.length; i++) {
+ let token = tokens[i];
+
+ if (token.type === StartTag && token.tagName.toUpperCase() === 'A') {
+ // Ignore all the contents of an anchor tag
+ let preskipLen = linkifiedTokens.length;
+ skipTokens('A', tokens, ++i, linkifiedTokens);
+
+ i += linkifiedTokens.length - preskipLen;
+ continue;
+
+ } else if (token.type !== Chars) {
+ // Skip this token, it's not important
+ linkifiedTokens.push(token);
+ continue;
+ }
+
+ // Valid text token, linkify it!
+ let linkifedChars = linkifyChars(token.chars, opts);
+ linkifiedTokens.push.apply(linkifiedTokens, linkifedChars);
+ }
+
+ // Convert the tokens back into a string
+ for (i = 0; i < linkifiedTokens.length; i++) {
+ let token = linkifiedTokens[i];
+ switch (token.type) {
+ case StartTag:
+ let attrs = attrsToStrings(token.attributes);
+ let link = '<' + token.tagName;
+ if (attrs.length > 0) { link += ' ' + attrs.join(' '); }
+ link += '>';
+ linkified.push(link);
+ break;
+ case EndTag:
+ linkified.push(`${token.tagName}>`);
+ break;
+ case Chars:
+ linkified.push(escapeText(token.chars));
+ break;
+ case Comment:
+ linkified.push(``);
+ break;
+ }
+ }
+
+ return linkified.join('');
+}
+
+/**
+ `tokens` and `token` in this section referes to tokens returned by
+ `linkify.tokenize`. `linkified` will contain HTML Parser-style tokens
+*/
+function linkifyChars(str, opts) {
+ let tokens = linkify.tokenize(str);
+ let result = [];
+
+ for (var i = 0; i < tokens.length; i++) {
+ let token = tokens[i];
+ if (token.type === 'nl' && opts.nl2br) {
+ result.push({
+ type: StartTag,
+ tagName: 'br',
+ attributes: [],
+ selfClosing: true
+ });
+ continue;
+ } else if (!token.isLink) {
+ result.push({type: Chars, chars: token.toString()});
+ continue;
+ }
+
+ let href = token.toHref(opts.defaultProtocol);
+ let formatted = linkify.options.resolve(opts.format, token.toString(), token.type);
+ let formattedHref = linkify.options.resolve(opts.formatHref, href, token.type);
+ let attributesHash = linkify.options.resolve(opts.attributes, href, token.type);
+ let tagName = linkify.options.resolve(opts.tagName, href, token.type);
+ let linkClass = linkify.options.resolve(opts.linkClass, href, token.type);
+ let target = linkify.options.resolve(opts.target, href, token.type);
+
+ // Build up attributes
+ let attributes = [
+ ['href', formattedHref],
+ ['class', linkClass]
+ ];
+
+ if (target) {
+ attributes.push(['target', target]);
+ }
+
+ for (var attr in attributesHash) {
+ attributes.push([attr, attributesHash[attr]]);
+ }
+
+ // Add the required tokens
+ result.push({
+ type: StartTag,
+ tagName: tagName,
+ attributes: attributes,
+ selfClosing: false
+ });
+ result.push({type: Chars, chars: formatted});
+ result.push({type: EndTag, tagName: tagName});
+ }
+
+ return result;
+}
+
+/**
+ Returns a list of tokens skipped until the closing tag of tagName.
+
+ * `tagName` is the closing tag which will prompt us to stop skipping
+ * `tokens` is the array of tokens generated by HTML5Tokenizer which
+ * `i` is the index immediately after the opening tag to skip
+ * `skippedTokens` is an array which skipped tokens are being pushed into
+
+ Caveats
+
+ * Assumes that i is the first token after the given opening tagName
+ * The closing tag will be skipped, but nothing after it
+ * Will track whether there is a nested tag of the same type
+*/
+function skipTagTokens(tagName, tokens, i, skippedTokens) {
+
+ // number of tokens of this type on the [fictional] stack
+ var stackCount = 1;
+
+ while (i < tokens.length && stackCount > 0) {
+ let token = tokens[i];
+ if (token.type === StartTag && token.tagName.toUpperCase() === tagName) {
+ // Nested tag of the same type, "add to stack"
+ stackCount++;
+ } else if (token.type === EndTag && token.tagName.toUpperCase() === tagName) {
+ // Closing tag
+ stackCount--;
+ }
+ skippedTokens.push(token);
+ i++;
+ }
+
+ // Note that if stackCount > 0 here, the HTML is probably invalid
+ return skippedTokens;
+}
+
+function escapeText(text) {
+ // Not required, HTML tokenizer ensures this occurs properly
+ return text;
+}
+
+function escapeAttr(attr) {
+ return attr.replace(/"/g, '"');
+}
+
+function attrsToStrings(attrs) {
+ let attrStrs = [];
+ for (let i = 0; i < attrs.length; i++) {
+ let [name, value] = attrs[i];
+ attrStrs.push(`${name}="${escapeAttr(value)}"`);
+ }
+ return attrStrs;
+}
diff --git a/src/linkify-string.js b/src/linkify-string.js
index 6485dd64..1bb3c91a 100644
--- a/src/linkify-string.js
+++ b/src/linkify-string.js
@@ -4,14 +4,14 @@
import {tokenize, options} from './linkify';
-function cleanText(text) {
+function escapeText(text) {
return text
.replace(/&/g, '&')
.replace(//g, '>');
}
-function cleanAttr(href) {
+function escapeAttr(href) {
return href.replace(/"/g, '"');
}
@@ -22,7 +22,7 @@ function attributesToString(attributes) {
for (let attr in attributes) {
let val = (attributes[attr] + '').replace(/"/g, '"');
- result.push(`${attr}="${cleanAttr(val)}"`);
+ result.push(`${attr}="${escapeAttr(val)}"`);
}
return result.join(' ');
}
@@ -35,7 +35,7 @@ function linkifyStr(str, opts={}) {
tokens = tokenize(str),
result = [];
- for (let i = 0; i < tokens.length; i++ ) {
+ for (let i = 0; i < tokens.length; i++) {
let token = tokens[i];
if (token.isLink) {
@@ -48,16 +48,16 @@ function linkifyStr(str, opts={}) {
linkClass = options.resolve(opts.linkClass, href, token.type),
target = options.resolve(opts.target, href, token.type);
- let link = `<${tagName} href="${cleanAttr(formattedHref)}" class="${cleanAttr(linkClass)}"`;
+ let link = `<${tagName} href="${escapeAttr(formattedHref)}" class="${escapeAttr(linkClass)}"`;
if (target) {
- link += ` target="${cleanAttr(target)}"`;
+ link += ` target="${escapeAttr(target)}"`;
}
if (attributesHash) {
link += ` ${attributesToString(attributesHash)}`;
}
- link += `>${cleanText(formatted)}${tagName}>`;
+ link += `>${escapeText(formatted)}${tagName}>`;
result.push(link);
} else if (token.type === 'nl' && opts.nl2br) {
@@ -67,7 +67,7 @@ function linkifyStr(str, opts={}) {
result.push('
\n');
}
} else {
- result.push(cleanText(token.toString()));
+ result.push(escapeText(token.toString()));
}
}
diff --git a/src/simple-html-tokenizer b/src/simple-html-tokenizer
new file mode 120000
index 00000000..cea33c54
--- /dev/null
+++ b/src/simple-html-tokenizer
@@ -0,0 +1 @@
+../node_modules/simple-html-tokenizer/lib/simple-html-tokenizer
\ No newline at end of file
diff --git a/src/simple-html-tokenizer.js b/src/simple-html-tokenizer.js
new file mode 100644
index 00000000..e53cbf0b
--- /dev/null
+++ b/src/simple-html-tokenizer.js
@@ -0,0 +1,15 @@
+import HTML5NamedCharRefs from './simple-html-tokenizer/html5-named-char-refs';
+import EntityParser from './simple-html-tokenizer/entity-parser';
+import EventedTokenizer from './simple-html-tokenizer/evented-tokenizer';
+import Tokenizer from './simple-html-tokenizer/tokenizer';
+import tokenize from './simple-html-tokenizer/tokenize';
+
+var HTML5Tokenizer = {
+ HTML5NamedCharRefs,
+ EntityParser,
+ EventedTokenizer,
+ Tokenizer,
+ tokenize,
+};
+
+export default HTML5Tokenizer;
diff --git a/templates/linkify-html.amd.js b/templates/linkify-html.amd.js
new file mode 100644
index 00000000..339e4544
--- /dev/null
+++ b/templates/linkify-html.amd.js
@@ -0,0 +1 @@
+<%= contents %>
diff --git a/templates/linkify-html.js b/templates/linkify-html.js
new file mode 100644
index 00000000..45ca72de
--- /dev/null
+++ b/templates/linkify-html.js
@@ -0,0 +1,5 @@
+;(function (linkify) {
+"use strict";
+<%= contents %>
+window.linkifyHtml = linkifyHtml;
+})(window.linkify);
diff --git a/test/index.html b/test/index.html
index e44d8c68..9b5ac0df 100644
--- a/test/index.html
+++ b/test/index.html
@@ -14,10 +14,13 @@
module.exports = window.jQuery;
});
+
+
+
You let's get all up in the http://element.co/?wat=this and the #swag @@ -37,5 +40,11 @@ console.log(linkify.find('What up #bro??')); }); + require(['linkify-html'], function (linkifyHtml) { + console.log(linkifyHtml('AMD
This is google.com
')); + }); + + console.log(linkifyHtml('GLOBALThis is google.com
')); +