From ff06755cefb371f6a8fbf37bfd0de10eddff4aea Mon Sep 17 00:00:00 2001 From: nfrasser Date: Sun, 17 Apr 2016 21:01:06 -0400 Subject: [PATCH 1/3] Always parse links that begin with a protocol --- src/linkify/core/parser.js | 28 ++++++---------------------- 1 file changed, 6 insertions(+), 22 deletions(-) diff --git a/src/linkify/core/parser.js b/src/linkify/core/parser.js index fc66e860..a56d1af1 100644 --- a/src/linkify/core/parser.js +++ b/src/linkify/core/parser.js @@ -54,9 +54,6 @@ let S_START = makeState(); // Intermediate states for URLs. Note that domains that begin with a protocol // are treated slighly differently from those that don't. -// (PSS == "PROTOCOL SLASH SLASH") -// S_DOMAIN* states can generally become prefixes for email addresses, while -// S_PSS_DOMAIN* cannot let S_PROTOCOL = makeState(), // e.g., 'http:' S_PROTOCOL_SLASH = makeState(), // e.g., '/', 'http:/'' @@ -66,11 +63,6 @@ S_DOMAIN_DOT = makeState(), // (A) domain followed by DOT S_TLD = makeState(T_URL), // (A) Simplest possible URL with no query string S_TLD_COLON = makeState(), // (A) URL followed by colon (potential port number here) S_TLD_PORT = makeState(T_URL), // TLD followed by a port number -S_PSS_DOMAIN = makeState(), // parsed string starts with protocol and ends with a potential domain name (B) -S_PSS_DOMAIN_DOT = makeState(), // (B) domain followed by DOT -S_PSS_TLD = makeState(T_URL), // (B) Simplest possible URL with no query string and a protocol -S_PSS_TLD_COLON = makeState(), // (A) URL followed by colon (potential port number here) -S_PSS_TLD_PORT = makeState(T_URL), // TLD followed by a port number S_URL = makeState(T_URL), // Long URL with optional port and maybe query string S_URL_SYMS = makeState(), // URL followed by some symbols (will not be part of the final URL) S_URL_OPENBRACE = makeState(), // URL followed by { @@ -104,15 +96,16 @@ S_START.on(TT_TLD, S_DOMAIN); S_START.on(TT_DOMAIN, S_DOMAIN); S_START.on(TT_LOCALHOST, S_TLD); S_START.on(TT_NUM, S_DOMAIN); -S_PROTOCOL_SLASH_SLASH.on(TT_TLD, S_PSS_DOMAIN); -S_PROTOCOL_SLASH_SLASH.on(TT_DOMAIN, S_PSS_DOMAIN); -S_PROTOCOL_SLASH_SLASH.on(TT_NUM, S_PSS_DOMAIN); -S_PROTOCOL_SLASH_SLASH.on(TT_LOCALHOST, S_PSS_TLD); + +// Force URL for anything sane followed by protocol +S_PROTOCOL_SLASH_SLASH.on(TT_TLD, S_URL); +S_PROTOCOL_SLASH_SLASH.on(TT_DOMAIN, S_URL); +S_PROTOCOL_SLASH_SLASH.on(TT_NUM, S_URL); +S_PROTOCOL_SLASH_SLASH.on(TT_LOCALHOST, S_URL); // Account for dots and hyphens // hyphens are usually parts of domain names S_DOMAIN.on(TT_DOT, S_DOMAIN_DOT); -S_PSS_DOMAIN.on(TT_DOT, S_PSS_DOMAIN_DOT); S_EMAIL_DOMAIN.on(TT_DOT, S_EMAIL_DOMAIN_DOT); // Hyphen can jump back to a domain name @@ -122,10 +115,6 @@ S_DOMAIN_DOT.on(TT_TLD, S_TLD); S_DOMAIN_DOT.on(TT_DOMAIN, S_DOMAIN); S_DOMAIN_DOT.on(TT_NUM, S_DOMAIN); S_DOMAIN_DOT.on(TT_LOCALHOST, S_DOMAIN); -S_PSS_DOMAIN_DOT.on(TT_TLD, S_PSS_TLD); -S_PSS_DOMAIN_DOT.on(TT_DOMAIN, S_PSS_DOMAIN); -S_PSS_DOMAIN_DOT.on(TT_NUM, S_PSS_DOMAIN); -S_PSS_DOMAIN_DOT.on(TT_LOCALHOST, S_PSS_DOMAIN); S_EMAIL_DOMAIN_DOT.on(TT_TLD, S_EMAIL); S_EMAIL_DOMAIN_DOT.on(TT_DOMAIN, S_EMAIL_DOMAIN); S_EMAIL_DOMAIN_DOT.on(TT_NUM, S_EMAIL_DOMAIN); @@ -134,7 +123,6 @@ S_EMAIL_DOMAIN_DOT.on(TT_LOCALHOST, S_EMAIL_DOMAIN); // S_TLD accepts! But the URL could be longer, try to find a match greedily // The `run` function should be able to "rollback" to the accepting state S_TLD.on(TT_DOT, S_DOMAIN_DOT); -S_PSS_TLD.on(TT_DOT, S_PSS_DOMAIN_DOT); S_EMAIL.on(TT_DOT, S_EMAIL_DOMAIN_DOT); // Become real URLs after `SLASH` or `COLON NUM SLASH` @@ -143,10 +131,6 @@ S_TLD.on(TT_COLON, S_TLD_COLON); S_TLD.on(TT_SLASH, S_URL); S_TLD_COLON.on(TT_NUM, S_TLD_PORT); S_TLD_PORT.on(TT_SLASH, S_URL); -S_PSS_TLD.on(TT_COLON, S_PSS_TLD_COLON); -S_PSS_TLD.on(TT_SLASH, S_URL); -S_PSS_TLD_COLON.on(TT_NUM, S_PSS_TLD_PORT); -S_PSS_TLD_PORT.on(TT_SLASH, S_URL); S_EMAIL.on(TT_COLON, S_EMAIL_COLON); S_EMAIL_COLON.on(TT_NUM, S_EMAIL_PORT); From b9a128161df80857d07870ff01f5e2117e1d02b7 Mon Sep 17 00:00:00 2001 From: nfrasser Date: Sun, 17 Apr 2016 21:01:14 -0400 Subject: [PATCH 2/3] Updated parser tests to account for protocol parsing --- test/spec/linkify/core/parser-test.js | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/test/spec/linkify/core/parser-test.js b/test/spec/linkify/core/parser-test.js index 913f927a..e9fd768c 100644 --- a/test/spec/linkify/core/parser-test.js +++ b/test/spec/linkify/core/parser-test.js @@ -40,8 +40,8 @@ var tests = [ ['This [', 'i.imgur.com/ckSj2Ba.jpg', ')] should also work'] ], [ 'A link is http://nick.is.awesome/?q=nick+amazing&nick=yo%29%30hellp another is http://nick.con/?q=look', - [TEXT, URL, TEXT], - ['A link is ', 'http://nick.is', '.awesome/?q=nick+amazing&nick=yo%29%30hellp another is http://nick.con/?q=look'] + [TEXT, URL, TEXT, URL], + ['A link is ', 'http://nick.is.awesome/?q=nick+amazing&nick=yo%29%30hellp', ' another is ', 'http://nick.con/?q=look'] ], [ 'SOme URLS http://google.com https://google1.com google2.com google.com/search?q=potatoes+oven goo.gl/0192n1 google.com?q=asda test bit.ly/0912j www.bob.com indigo.dev.soapbox.co/mobile google.com/?q=.exe flickr.com/linktoimage.jpg', [TEXT, URL, TEXT, URL, TEXT, URL, TEXT, URL, TEXT, URL, TEXT, URL, TEXT, URL, TEXT, URL, TEXT, URL, TEXT, URL, TEXT, URL], @@ -130,6 +130,10 @@ var tests = [ 'A really funky one (example.com/?id=asd2{hellow}and%20it%20continues(23&((@)) and it ends', [TEXT, URL, TEXT], ['A really funky one (', 'example.com/?id=asd2{hellow}and%20it%20continues(23&((@)', ') and it ends'] + ], [ + 'Force http:/ and http:// are not but http://a and http://b.local?qeasd3qas=23 are all links', + [TEXT, URL, TEXT, URL, TEXT], + ['Force http:/ and http:// are not but ', 'http://a', ' and ', 'http://b.local?qeasd3qas=23', ' are all links'] ] ]; From 46b8c826ac018cf234e8b61d3599cd96753248ee Mon Sep 17 00:00:00 2001 From: nfrasser Date: Sun, 17 Apr 2016 21:18:59 -0400 Subject: [PATCH 3/3] Check to that HTTP Auth URL work --- test/spec/linkify/core/parser-test.js | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/spec/linkify/core/parser-test.js b/test/spec/linkify/core/parser-test.js index e9fd768c..80b77eb3 100644 --- a/test/spec/linkify/core/parser-test.js +++ b/test/spec/linkify/core/parser-test.js @@ -134,6 +134,10 @@ var tests = [ 'Force http:/ and http:// are not but http://a and http://b.local?qeasd3qas=23 are all links', [TEXT, URL, TEXT, URL, TEXT], ['Force http:/ and http:// are not but ', 'http://a', ' and ', 'http://b.local?qeasd3qas=23', ' are all links'] + ], [ + 'HTTP Auth URLs should work: http://username:password@example.com', + [TEXT, URL], + ['HTTP Auth URLs should work: ', 'http://username:password@example.com'] ] ];