diff --git a/index.js b/index.js index 94e357e..8e31ae3 100644 --- a/index.js +++ b/index.js @@ -2,8 +2,8 @@ var required = require('requires-port') , qs = require('querystringify') - , slashes = /^[A-Za-z][A-Za-z0-9+-.]*:[\\/]+/ - , protocolre = /^([a-z][a-z0-9.+-]*:)?([\\/]{1,})?([\S\s]*)/i + , slashes = /^[A-Za-z][A-Za-z0-9+-.]*:\/\// + , protocolre = /^([a-z][a-z0-9.+-]*:)?(\/\/)?([\\/]+)?([\S\s]*)/i , whitespace = '[\\x09\\x0A\\x0B\\x0C\\x0D\\x20\\xA0\\u1680\\u180E\\u2000\\u2001\\u2002\\u2003\\u2004\\u2005\\u2006\\u2007\\u2008\\u2009\\u200A\\u202F\\u205F\\u3000\\u2028\\u2029\\uFEFF]' , left = new RegExp('^'+ whitespace +'+'); @@ -98,6 +98,24 @@ function lolcation(loc) { return finaldestination; } +/** + * Check whether a protocol scheme is special. + * + * @param {String} The protocol scheme of the URL + * @return {Boolean} `true` if the protocol scheme is special, else `false` + * @private + */ +function isSpecial(scheme) { + return ( + scheme === 'file:' || + scheme === 'ftp:' || + scheme === 'http:' || + scheme === 'https:' || + scheme === 'ws:' || + scheme === 'wss:' + ); +} + /** * @typedef ProtocolExtract * @type Object @@ -110,20 +128,56 @@ function lolcation(loc) { * Extract protocol information from a URL with/without double slash ("//"). * * @param {String} address URL we want to extract from. + * @param {Object} location * @return {ProtocolExtract} Extracted information. * @private */ -function extractProtocol(address) { +function extractProtocol(address, location) { address = trimLeft(address); + location = location || {}; + + var match = protocolre.exec(address); + var protocol = match[1] ? match[1].toLowerCase() : ''; + var forwardSlashes = !!match[2]; + var otherSlashes = !!match[3]; + var slashesCount = 0; + var rest; + + if (forwardSlashes) { + if (otherSlashes) { + rest = match[2] + match[3] + match[4]; + slashesCount = match[2].length + match[3].length; + } else { + rest = match[2] + match[4]; + slashesCount = match[2].length; + } + } else { + if (otherSlashes) { + rest = match[3] + match[4]; + slashesCount = match[3].length; + } else { + rest = match[4] + } + } - var match = protocolre.exec(address) - , protocol = match[1] ? match[1].toLowerCase() : '' - , slashes = !!(match[2] && match[2].length >= 2) - , rest = match[2] && match[2].length === 1 ? '/' + match[3] : match[3]; + if (protocol === 'file:') { + if (slashesCount >= 2) { + rest = rest.slice(2); + } + } else if (isSpecial(protocol)) { + rest = match[4]; + } else if (protocol) { + if (forwardSlashes) { + rest = rest.slice(2); + } + } else if (slashesCount >= 2 && location.hostname) { + rest = match[4]; + } return { protocol: protocol, - slashes: slashes, + slashes: forwardSlashes || isSpecial(protocol), + slashesCount: slashesCount, rest: rest }; } @@ -214,7 +268,7 @@ function Url(address, location, parser) { // // Extract protocol information before running the instructions. // - extracted = extractProtocol(address || ''); + extracted = extractProtocol(address || '', location); relative = !extracted.protocol && !extracted.slashes; url.slashes = extracted.slashes || relative && location.slashes; url.protocol = extracted.protocol || location.protocol || ''; @@ -224,7 +278,10 @@ function Url(address, location, parser) { // When the authority component is absent the URL starts with a path // component. // - if (!extracted.slashes || url.protocol === 'file:') { + if ( + url.protocol === 'file:' || + (extracted.slashesCount < 2 && !isSpecial(extracted.protocol)) + ) { instructions[3] = [/(.*)/, 'pathname']; } @@ -290,10 +347,7 @@ function Url(address, location, parser) { // Default to a / for pathname if none exists. This normalizes the URL // to always have a / // - if ( - url.pathname.charAt(0) !== '/' - && (url.hostname || url.protocol === 'file:') - ) { + if (url.pathname.charAt(0) !== '/' && isSpecial(url.protocol)) { url.pathname = '/' + url.pathname; } @@ -317,7 +371,7 @@ function Url(address, location, parser) { url.password = instruction[1] || ''; } - url.origin = url.protocol && url.host && url.protocol !== 'file:' + url.origin = url.protocol !== 'file:' && isSpecial(url.protocol) && url.host ? url.protocol +'//'+ url.host : 'null'; @@ -410,7 +464,7 @@ function set(part, value, fn) { if (ins[4]) url[ins[1]] = url[ins[1]].toLowerCase(); } - url.origin = url.protocol && url.host && url.protocol !== 'file:' + url.origin = url.protocol !== 'file:' && isSpecial(url.protocol) && url.host ? url.protocol +'//'+ url.host : 'null'; @@ -435,7 +489,7 @@ function toString(stringify) { if (protocol && protocol.charAt(protocol.length - 1) !== ':') protocol += ':'; - var result = protocol + (url.slashes || url.protocol === 'file:' ? '//' : ''); + var result = protocol + (url.slashes || isSpecial(url.protocol) ? '//' : ''); if (url.username) { result += url.username; diff --git a/test/test.js b/test/test.js index 38290ed..d5a6cab 100644 --- a/test/test.js +++ b/test/test.js @@ -71,7 +71,8 @@ describe('url-parse', function () { assume(parse.extractProtocol('http://example.com')).eql({ slashes: true, protocol: 'http:', - rest: 'example.com' + rest: 'example.com', + slashesCount: 2 }); }); @@ -79,7 +80,8 @@ describe('url-parse', function () { assume(parse.extractProtocol('')).eql({ slashes: false, protocol: '', - rest: '' + rest: '', + slashesCount: 0 }); }); @@ -87,13 +89,15 @@ describe('url-parse', function () { assume(parse.extractProtocol('/foo')).eql({ slashes: false, protocol: '', - rest: '/foo' + rest: '/foo', + slashesCount: 1 }); assume(parse.extractProtocol('//foo/bar')).eql({ slashes: true, protocol: '', - rest: 'foo/bar' + rest: '//foo/bar', + slashesCount: 2 }); }); @@ -103,7 +107,8 @@ describe('url-parse', function () { assume(parse.extractProtocol(input)).eql({ slashes: false, protocol: '', - rest: input + rest: input, + slashesCount: 0 }); }); @@ -111,7 +116,8 @@ describe('url-parse', function () { assume(parse.extractProtocol(' javascript://foo')).eql({ slashes: true, protocol: 'javascript:', - rest: 'foo' + rest: 'foo', + slashesCount: 2 }); }); }); @@ -281,22 +287,92 @@ describe('url-parse', function () { assume(parsed.host).equals('what-is-up.com'); assume(parsed.href).equals('http://what-is-up.com/'); + + url = '\\\\\\\\what-is-up.com' + parsed = parse(url, parse('http://google.com')); + + assume(parsed.host).equals('what-is-up.com'); + assume(parsed.href).equals('http://what-is-up.com/'); }); - it('does not see a slash after the protocol as path', function () { + it('ignores slashes after the protocol for special URLs', function () { var url = 'https:\\/github.com/foo/bar' , parsed = parse(url); assume(parsed.host).equals('github.com'); assume(parsed.hostname).equals('github.com'); assume(parsed.pathname).equals('/foo/bar'); + assume(parsed.slashes).is.true(); + assume(parsed.href).equals('https://github.com/foo/bar'); url = 'https:/\\/\\/\\github.com/foo/bar'; + parsed = parse(url); assume(parsed.host).equals('github.com'); assume(parsed.hostname).equals('github.com'); assume(parsed.pathname).equals('/foo/bar'); + assume(parsed.slashes).is.true(); + assume(parsed.href).equals('https://github.com/foo/bar'); + + url = 'https:/github.com/foo/bar'; + parsed = parse(url); + assume(parsed.host).equals('github.com'); + assume(parsed.pathname).equals('/foo/bar'); + assume(parsed.slashes).is.true(); + assume(parsed.href).equals('https://github.com/foo/bar'); + + url = 'https:\\github.com/foo/bar'; + parsed = parse(url); + assume(parsed.host).equals('github.com'); + assume(parsed.pathname).equals('/foo/bar'); + assume(parsed.slashes).is.true(); + assume(parsed.href).equals('https://github.com/foo/bar'); + + url = 'https:github.com/foo/bar'; + parsed = parse(url); + assume(parsed.host).equals('github.com'); + assume(parsed.pathname).equals('/foo/bar'); + assume(parsed.slashes).is.true(); + assume(parsed.href).equals('https://github.com/foo/bar'); + + url = 'https:github.com/foo/bar'; + parsed = parse(url); + assume(parsed.host).equals('github.com'); + assume(parsed.pathname).equals('/foo/bar'); + assume(parsed.slashes).is.true(); + assume(parsed.href).equals('https://github.com/foo/bar'); }); + it('handles slashes after the protocol for non special URLs', function () { + var url = 'foo:example.com' + , parsed = parse(url); + + assume(parsed.hostname).equals(''); + assume(parsed.pathname).equals('example.com'); + assume(parsed.href).equals('foo:example.com'); + assume(parsed.slashes).is.false(); + + url = 'foo:/example.com'; + parsed = parse(url); + assume(parsed.hostname).equals(''); + assume(parsed.pathname).equals('/example.com'); + assume(parsed.href).equals('foo:/example.com'); + assume(parsed.slashes).is.false(); + + url = 'foo://example.com'; + parsed = parse(url); + assume(parsed.hostname).equals('example.com'); + assume(parsed.pathname).equals(''); + assume(parsed.href).equals('foo://example.com'); + assume(parsed.slashes).is.true(); + + url = 'foo:///example.com'; + parsed = parse(url); + assume(parsed.hostname).equals(''); + assume(parsed.pathname).equals('/example.com'); + assume(parsed.href).equals('foo:///example.com'); + assume(parsed.slashes).is.true(); + }) + describe('origin', function () { it('generates an origin property', function () { var url = 'http://google.com:80/pathname' @@ -319,6 +395,13 @@ describe('url-parse', function () { assume(parsed.origin).equals('null'); }); + it('is null for non special URLs', function () { + var o = parse('foo://example.com/pathname'); + assume(o.hostname).equals('example.com'); + assume(o.pathname).equals('/pathname'); + assume(o.origin).equals('null'); + }); + it('removes default ports for http', function () { var o = parse('http://google.com:80/pathname'); assume(o.origin).equals('http://google.com'); @@ -440,7 +523,7 @@ describe('url-parse', function () { }); it('handles the file: protocol', function () { - var slashes = ['', '/', '//', '///', '////', '/////']; + var slashes = ['', '/', '//', '///']; var data; var url; @@ -451,6 +534,18 @@ describe('url-parse', function () { assume(data.href).equals('file:///'); } + url = 'file:////'; + data = parse(url); + assume(data.protocol).equals('file:'); + assume(data.pathname).equals('//'); + assume(data.href).equals(url); + + url = 'file://///'; + data = parse(url); + assume(data.protocol).equals('file:'); + assume(data.pathname).equals('///'); + assume(data.href).equals(url); + url = 'file:///Users/foo/BAR/baz.pdf'; data = parse(url); assume(data.protocol).equals('file:');