From 8f797a739ba4f728606951d3d31bd10c30da448b Mon Sep 17 00:00:00 2001 From: Matt Broadstone Date: Mon, 22 Jan 2018 16:51:37 -0500 Subject: [PATCH] feat(uri-parser): add initial implementation of uri parser for core NODE-1295 --- lib/uri_parser.js | 169 ++++++++++++++++++ .../unit/connection_string_spec_tests.js | 73 ++++++++ 2 files changed, 242 insertions(+) create mode 100644 lib/uri_parser.js create mode 100644 test/tests/unit/connection_string_spec_tests.js diff --git a/lib/uri_parser.js b/lib/uri_parser.js new file mode 100644 index 000000000..bf1be3b0c --- /dev/null +++ b/lib/uri_parser.js @@ -0,0 +1,169 @@ +'use strict'; +const URL = require('url'); +const qs = require('querystring'); +const punycode = require('punycode'); + +const HOSTS_RX = /(mongodb(?:\+srv|)):\/\/(?: (?:[^:]*) (?: : ([^@]*) )? @ )?([^/?]*)(?:\/|)(.*)/; +/* + This regular expression has the following cpature groups: [ + protocol, username, password, hosts + ] +*/ + +/** + * + * @param {*} value + */ +function parseQueryStringItemValue(value) { + if (Array.isArray(value)) { + // deduplicate and simplify arrays + value = value.filter((value, idx) => value.indexOf(value) === idx); + if (value.length === 1) value = value[0]; + } else if (value.indexOf(':') > 0) { + value = value.split(',').reduce((result, pair) => { + const parts = pair.split(':'); + result[parts[0]] = parseQueryStringItemValue(parts[1]); + return result; + }, {}); + } else if (value.toLowerCase() === 'true' || value.toLowerCase() === 'false') { + value = value.toLowerCase() === 'true'; + } else if (!Number.isNaN(value)) { + const numericValue = parseFloat(value); + if (!Number.isNaN(numericValue)) { + value = parseFloat(value); + } + } + + return value; +} + +/** + * + * @param {*} query + */ +function parseQueryString(query) { + const result = {}; + let parsedQueryString = qs.parse(query); + for (const key in parsedQueryString) { + const value = parsedQueryString[key]; + if (value === '' || value == null) { + return new Error('Incomplete key value pair for option'); + } + + result[key.toLowerCase()] = parseQueryStringItemValue(value); + } + + // special cases for known deprecated options + if (result.wtimeout && result.wtimeoutms) { + delete result.wtimeout; + // TODO: emit a warning + } + + return Object.keys(result).length ? result : null; +} + +const SUPPORTED_PROTOCOLS = ['mongodb', 'mongodb+srv']; + +/** + * Parses a MongoDB Connection string + * + * @param {*} uri the MongoDB connection string to parse + * @param {parseCallback} callback + */ +function parseConnectionString(uri, callback) { + const cap = uri.match(HOSTS_RX); + if (!cap) { + return callback(new Error('Invalid connection string')); + } + + const protocol = cap[1]; + if (SUPPORTED_PROTOCOLS.indexOf(protocol) === -1) { + return callback(new Error('Invalid protocol provided')); + } + + const dbAndQuery = cap[4].split('?'); + const db = dbAndQuery.length > 0 ? dbAndQuery[0] : null; + const query = dbAndQuery.length > 1 ? dbAndQuery[1] : null; + const options = parseQueryString(query); + if (options instanceof Error) { + return callback(options); + } + + const auth = { username: null, password: null, db: db && db !== '' ? qs.unescape(db) : null }; + if (cap[4].split('?')[0].indexOf('@') !== -1) { + return callback(new Error('Unescaped slash in userinfo section')); + } + + const authorityParts = cap[3].split('@'); + if (authorityParts.length > 2) { + return callback(new Error('Unescaped at-sign in authority section')); + } + + if (authorityParts.length > 1) { + const authParts = authorityParts.shift().split(':'); + if (authParts.length > 2) { + return callback(new Error('Unescaped colon in authority section')); + } + + auth.username = qs.unescape(authParts[0]); + auth.password = authParts[1] ? qs.unescape(authParts[1]) : null; + } + + let hostParsingError = null; + const hosts = authorityParts + .shift() + .split(',') + .map(host => { + let parsedHost = URL.parse(`mongodb://${host}`); + if (parsedHost.path === '/:') { + hostParsingError = new Error('Double colon in host identifier'); + return null; + } + + // heuristically determine if we're working with a domain socket + if (host.match(/\.sock/)) { + parsedHost.hostname = qs.unescape(host); + parsedHost.port = null; + } + + if (Number.isNaN(parsedHost.port)) { + hostParsingError = new Error('Invalid port (non-numeric string)'); + return; + } + + const result = { + host: punycode.toUnicode(parsedHost.hostname), + port: parsedHost.port ? parseInt(parsedHost.port) : null + }; + + if (result.port === 0) { + hostParsingError = new Error('Invalid port (zero) with hostname'); + return; + } + + if (result.port > 65535) { + hostParsingError = new Error('Invalid port (larger than 65535) with hostname'); + return; + } + + if (result.port < 0) { + hostParsingError = new Error('Invalid port (negative number)'); + return; + } + + return result; + }) + .filter(host => !!host); + + if (hostParsingError) { + return callback(hostParsingError); + } + + if (hosts.length === 0 || hosts[0].host === '' || hosts[0].host === null) { + return callback(new Error('No hostname or hostnames provided in connection string')); + } + + callback(null, { hosts: hosts, auth: auth.db || auth.username ? auth : null, options: options }); +} + +module.exports = parseConnectionString; diff --git a/test/tests/unit/connection_string_spec_tests.js b/test/tests/unit/connection_string_spec_tests.js new file mode 100644 index 000000000..fb4d902f7 --- /dev/null +++ b/test/tests/unit/connection_string_spec_tests.js @@ -0,0 +1,73 @@ +'use strict'; + +const parseConnectionString = require('../../../lib/uri_parser'), + fs = require('fs'), + f = require('util').format, + expect = require('chai').expect; + +// NOTE: These are cases we could never check for unless we write out own +// url parser. The node parser simply won't let these through, so we +// are safe skipping them. +const skipTests = [ + 'Invalid port (negative number) with hostname', + 'Invalid port (non-numeric string) with hostname', + 'Missing delimiting slash between hosts and options', + + // These tests are only relevant to the native driver which + // cares about specific keys, and validating their values + 'Unrecognized option keys are ignored', + 'Unsupported option values are ignored' +]; + +describe('Connection String (spec)', function() { + const testFiles = fs + .readdirSync(f('%s/../spec/connection-string', __dirname)) + .filter(x => x.indexOf('.json') !== -1) + .map(x => JSON.parse(fs.readFileSync(f('%s/../spec/connection-string/%s', __dirname, x)))); + + // Execute the tests + for (let i = 0; i < testFiles.length; i++) { + const testFile = testFiles[i]; + + // Get each test + for (let j = 0; j < testFile.tests.length; j++) { + const test = testFile.tests[j]; + if (skipTests.indexOf(test.description) !== -1) { + continue; + } + + it(test.description, { + metadata: { requires: { topology: 'single' } }, + test: function(done) { + const valid = test.valid; + + parseConnectionString(test.uri, function(err, result) { + if (valid === false) { + expect(err).to.exist; + expect(result).to.not.exist; + } else { + expect(err).to.not.exist; + expect(result).to.exist; + + // remove data we don't track + if (test.auth && test.auth.password === '') { + test.auth.password = null; + } + + test.hosts = test.hosts.map(host => { + delete host.type; + return host; + }); + + expect(result.hosts).to.eql(test.hosts); + expect(result.auth).to.eql(test.auth); + expect(result.options).to.eql(test.options); + } + + done(); + }); + } + }); + } + } +});