Skip to content

Commit

Permalink
Merge pull request #127 from SoapBox/protocol-parsing
Browse files Browse the repository at this point in the history
Anything that begins with a protocol is recognized as a string
  • Loading branch information
Nick Frasser committed Apr 18, 2016
2 parents 3ae6099 + 46b8c82 commit 4d07072
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 24 deletions.
28 changes: 6 additions & 22 deletions src/linkify/core/parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,6 @@ let S_START = makeState();

// Intermediate states for URLs. Note that domains that begin with a protocol
// are treated slighly differently from those that don't.
// (PSS == "PROTOCOL SLASH SLASH")
// S_DOMAIN* states can generally become prefixes for email addresses, while
// S_PSS_DOMAIN* cannot
let
S_PROTOCOL = makeState(), // e.g., 'http:'
S_PROTOCOL_SLASH = makeState(), // e.g., '/', 'http:/''
Expand All @@ -66,11 +63,6 @@ S_DOMAIN_DOT = makeState(), // (A) domain followed by DOT
S_TLD = makeState(T_URL), // (A) Simplest possible URL with no query string
S_TLD_COLON = makeState(), // (A) URL followed by colon (potential port number here)
S_TLD_PORT = makeState(T_URL), // TLD followed by a port number
S_PSS_DOMAIN = makeState(), // parsed string starts with protocol and ends with a potential domain name (B)
S_PSS_DOMAIN_DOT = makeState(), // (B) domain followed by DOT
S_PSS_TLD = makeState(T_URL), // (B) Simplest possible URL with no query string and a protocol
S_PSS_TLD_COLON = makeState(), // (A) URL followed by colon (potential port number here)
S_PSS_TLD_PORT = makeState(T_URL), // TLD followed by a port number
S_URL = makeState(T_URL), // Long URL with optional port and maybe query string
S_URL_SYMS = makeState(), // URL followed by some symbols (will not be part of the final URL)
S_URL_OPENBRACE = makeState(), // URL followed by {
Expand Down Expand Up @@ -104,15 +96,16 @@ S_START.on(TT_TLD, S_DOMAIN);
S_START.on(TT_DOMAIN, S_DOMAIN);
S_START.on(TT_LOCALHOST, S_TLD);
S_START.on(TT_NUM, S_DOMAIN);
S_PROTOCOL_SLASH_SLASH.on(TT_TLD, S_PSS_DOMAIN);
S_PROTOCOL_SLASH_SLASH.on(TT_DOMAIN, S_PSS_DOMAIN);
S_PROTOCOL_SLASH_SLASH.on(TT_NUM, S_PSS_DOMAIN);
S_PROTOCOL_SLASH_SLASH.on(TT_LOCALHOST, S_PSS_TLD);

// Force URL for anything sane followed by protocol
S_PROTOCOL_SLASH_SLASH.on(TT_TLD, S_URL);
S_PROTOCOL_SLASH_SLASH.on(TT_DOMAIN, S_URL);
S_PROTOCOL_SLASH_SLASH.on(TT_NUM, S_URL);
S_PROTOCOL_SLASH_SLASH.on(TT_LOCALHOST, S_URL);

// Account for dots and hyphens
// hyphens are usually parts of domain names
S_DOMAIN.on(TT_DOT, S_DOMAIN_DOT);
S_PSS_DOMAIN.on(TT_DOT, S_PSS_DOMAIN_DOT);
S_EMAIL_DOMAIN.on(TT_DOT, S_EMAIL_DOMAIN_DOT);

// Hyphen can jump back to a domain name
Expand All @@ -122,10 +115,6 @@ S_DOMAIN_DOT.on(TT_TLD, S_TLD);
S_DOMAIN_DOT.on(TT_DOMAIN, S_DOMAIN);
S_DOMAIN_DOT.on(TT_NUM, S_DOMAIN);
S_DOMAIN_DOT.on(TT_LOCALHOST, S_DOMAIN);
S_PSS_DOMAIN_DOT.on(TT_TLD, S_PSS_TLD);
S_PSS_DOMAIN_DOT.on(TT_DOMAIN, S_PSS_DOMAIN);
S_PSS_DOMAIN_DOT.on(TT_NUM, S_PSS_DOMAIN);
S_PSS_DOMAIN_DOT.on(TT_LOCALHOST, S_PSS_DOMAIN);
S_EMAIL_DOMAIN_DOT.on(TT_TLD, S_EMAIL);
S_EMAIL_DOMAIN_DOT.on(TT_DOMAIN, S_EMAIL_DOMAIN);
S_EMAIL_DOMAIN_DOT.on(TT_NUM, S_EMAIL_DOMAIN);
Expand All @@ -134,7 +123,6 @@ S_EMAIL_DOMAIN_DOT.on(TT_LOCALHOST, S_EMAIL_DOMAIN);
// S_TLD accepts! But the URL could be longer, try to find a match greedily
// The `run` function should be able to "rollback" to the accepting state
S_TLD.on(TT_DOT, S_DOMAIN_DOT);
S_PSS_TLD.on(TT_DOT, S_PSS_DOMAIN_DOT);
S_EMAIL.on(TT_DOT, S_EMAIL_DOMAIN_DOT);

// Become real URLs after `SLASH` or `COLON NUM SLASH`
Expand All @@ -143,10 +131,6 @@ S_TLD.on(TT_COLON, S_TLD_COLON);
S_TLD.on(TT_SLASH, S_URL);
S_TLD_COLON.on(TT_NUM, S_TLD_PORT);
S_TLD_PORT.on(TT_SLASH, S_URL);
S_PSS_TLD.on(TT_COLON, S_PSS_TLD_COLON);
S_PSS_TLD.on(TT_SLASH, S_URL);
S_PSS_TLD_COLON.on(TT_NUM, S_PSS_TLD_PORT);
S_PSS_TLD_PORT.on(TT_SLASH, S_URL);
S_EMAIL.on(TT_COLON, S_EMAIL_COLON);
S_EMAIL_COLON.on(TT_NUM, S_EMAIL_PORT);

Expand Down
12 changes: 10 additions & 2 deletions test/spec/linkify/core/parser-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ var tests = [
['This [', 'i.imgur.com/ckSj2Ba.jpg', ')] should also work']
], [
'A link is http://nick.is.awesome/?q=nick+amazing&nick=yo%29%30hellp another is http://nick.con/?q=look',
[TEXT, URL, TEXT],
['A link is ', 'http://nick.is', '.awesome/?q=nick+amazing&nick=yo%29%30hellp another is http://nick.con/?q=look']
[TEXT, URL, TEXT, URL],
['A link is ', 'http://nick.is.awesome/?q=nick+amazing&nick=yo%29%30hellp', ' another is ', 'http://nick.con/?q=look']
], [
'SOme URLS http://google.com https://google1.com google2.com google.com/search?q=potatoes+oven goo.gl/0192n1 google.com?q=asda test bit.ly/0912j www.bob.com indigo.dev.soapbox.co/mobile google.com/?q=.exe flickr.com/linktoimage.jpg',
[TEXT, URL, TEXT, URL, TEXT, URL, TEXT, URL, TEXT, URL, TEXT, URL, TEXT, URL, TEXT, URL, TEXT, URL, TEXT, URL, TEXT, URL],
Expand Down Expand Up @@ -130,6 +130,14 @@ var tests = [
'A really funky one (example.com/?id=asd2{hellow}and%20it%20continues(23&((@)) and it ends',
[TEXT, URL, TEXT],
['A really funky one (', 'example.com/?id=asd2{hellow}and%20it%20continues(23&((@)', ') and it ends']
], [
'Force http:/ and http:// are not but http://a and http://b.local?qeasd3qas=23 are all links',
[TEXT, URL, TEXT, URL, TEXT],
['Force http:/ and http:// are not but ', 'http://a', ' and ', 'http://b.local?qeasd3qas=23', ' are all links']
], [
'HTTP Auth URLs should work: http://username:password@example.com',
[TEXT, URL],
['HTTP Auth URLs should work: ', 'http://username:password@example.com']
]
];

Expand Down

0 comments on commit 4d07072

Please sign in to comment.