diff --git a/lib/path.js b/lib/path.js index a100bde79a317b..c3b5271bfa8df4 100644 --- a/lib/path.js +++ b/lib/path.js @@ -65,26 +65,8 @@ function trimArray(arr) { const splitDeviceRe = /^([a-zA-Z]:|[\\\/]{2}[^\\\/]+[\\\/]+[^\\\/]+)?([\\\/])?([\s\S]*?)$/; -// Regex to split the tail part of the above into [*, dir, basename, ext] -const splitTailRe = - /^([\s\S]*?)((?:\.{1,2}|[^\\\/]+?|)(\.[^.\/\\]*|))(?:[\\\/]*)$/; - var win32 = {}; -// Function to split a filename into [root, dir, basename, ext] -function win32SplitPath(filename) { - // Separate device+slash from tail - const result = splitDeviceRe.exec(filename); - const device = (result[1] || '') + (result[2] || ''); - const tail = result[3]; - // Split the tail into dir, basename and extension - const result2 = splitTailRe.exec(tail); - const dir = result2[1]; - const basename = result2[2]; - const ext = result2[3]; - return [device, dir, basename, ext]; -} - function win32StatPath(path) { const result = splitDeviceRe.exec(path); const device = result[1] || ''; @@ -318,21 +300,104 @@ win32._makeLong = function(path) { win32.dirname = function(path) { - const result = win32SplitPath(path); - const root = result[0]; - var dir = result[1]; - - if (!root && !dir) { - // No dirname whatsoever + const len = path.length; + if (len === 0) return '.'; + var rootEnd = -1; + var end = -1; + var matchedSlash = true; + var offset = 0; + var code = path.charCodeAt(0); + + // Try to match a root + if (len > 1) { + if (code === 47/*/*/ || code === 92/*\*/) { + // Possible UNC root + + rootEnd = offset = 1; + + code = path.charCodeAt(1); + if (code === 47/*/*/ || code === 92/*\*/) { + // Matched double path separator at beginning + var j = 2; + var last = j; + // Match 1 or more non-path separators + for (; j < len; ++j) { + code = path.charCodeAt(j); + if (code === 47/*/*/ || code === 92/*\*/) + break; + } + if (j < len && j !== last) { + // Matched! + last = j; + // Match 1 or more path separators + for (; j < len; ++j) { + code = path.charCodeAt(j); + if (code !== 47/*/*/ && code !== 92/*\*/) + break; + } + if (j < len && j !== last) { + // Matched! + last = j; + // Match 1 or more non-path separators + for (; j < len; ++j) { + code = path.charCodeAt(j); + if (code === 47/*/*/ || code === 92/*\*/) + break; + } + if (j === len) { + // We matched a UNC root only + return path; + } + if (j !== last) { + // We matched a UNC root with leftovers + + // Offset by 1 to include the separator after the UNC root to + // treat it as a "normal root" on top of a (UNC) root + rootEnd = offset = j + 1; + } + } + } + } + } else if ((code >= 65/*A*/ && code <= 90/*Z*/) || + (code >= 97/*a*/ && code <= 122/*z*/)) { + // Possible device root + + if (path.charCodeAt(1) === 58/*:*/) { + rootEnd = offset = 2; + if (len > 2) { + code = path.charCodeAt(2); + if (code === 47/*/*/ || code === 92/*\*/) + rootEnd = offset = 3; + } + } + } + } else if (code === 47/*/*/ || code === 92/*\*/) { + // `path` contains just a path separator, exit early to avoid + // unnecessary work + return path; } - if (dir) { - // It has a dirname, strip trailing slash - dir = dir.substr(0, dir.length - 1); + for (var i = len - 1; i >= offset; --i) { + code = path.charCodeAt(i); + if (code === 47/*/*/ || code === 92/*\*/) { + if (!matchedSlash) { + end = i; + break; + } + } else { + // We saw the first non-path separator + matchedSlash = false; + } } - return root + dir; + if (end === -1) { + if (rootEnd === -1) + return '.'; + else + end = rootEnd; + } + return path.slice(0, end); }; @@ -340,17 +405,155 @@ win32.basename = function(path, ext) { if (ext !== undefined && typeof ext !== 'string') throw new TypeError('ext must be a string'); - var f = win32SplitPath(path)[2]; - // TODO: make this comparison case-insensitive on windows? - if (ext && f.substr(-1 * ext.length) === ext) { - f = f.substr(0, f.length - ext.length); + var start = 0; + var end = -1; + var matchedSlash = true; + var i; + + // Check for a drive letter prefix so as not to mistake the following + // path separator as an extra separator at the end of the path that can be + // disregarded + if (path.length >= 2) { + const drive = path.charCodeAt(0); + if ((drive >= 65/*A*/ && drive <= 90/*Z*/) || + (drive >= 97/*a*/ && drive <= 122/*z*/)) { + if (path.charCodeAt(1) === 58/*:*/) + start = 2; + } + } + + if (ext !== undefined && ext.length > 0 && ext.length <= path.length) { + if (ext.length === path.length && ext === path) + return ''; + var extIdx = ext.length - 1; + var firstNonSlashEnd = -1; + for (i = path.length - 1; i >= start; --i) { + const code = path.charCodeAt(i); + if (code === 47/*/*/ || code === 92/*\*/) { + // If we reached a path separator that was not part of a set of path + // separators at the end of the string, stop now + if (!matchedSlash) { + start = i + 1; + break; + } + } else { + if (firstNonSlashEnd === -1) { + // We saw the first non-path separator, remember this index in case + // we need it if the extension ends up not matching + matchedSlash = false; + firstNonSlashEnd = i + 1; + } + if (extIdx >= 0) { + // Try to match the explicit extension + if (code === ext.charCodeAt(extIdx)) { + if (--extIdx === -1) { + // We matched the extension, so mark this as the end of our path + // component + end = i; + } + } else { + // Extension does not match, so our result is the entire path + // component + extIdx = -1; + end = firstNonSlashEnd; + } + } + } + } + + if (start === end) + end = firstNonSlashEnd; + else if (end === -1) + end = path.length; + return path.slice(start, end); + } else { + for (i = path.length - 1; i >= start; --i) { + const code = path.charCodeAt(i); + if (code === 47/*/*/ || code === 92/*\*/) { + // If we reached a path separator that was not part of a set of path + // separators at the end of the string, stop now + if (!matchedSlash) { + start = i + 1; + break; + } + } else if (end === -1) { + // We saw the first non-path separator, mark this as the end of our + // path component + matchedSlash = false; + end = i + 1; + } + } + + if (end === -1) + return ''; + return path.slice(start, end); } - return f; }; win32.extname = function(path) { - return win32SplitPath(path)[3]; + var start = 0; + var startDot = -1; + var startPart = 0; + var end = -1; + var matchedSlash = true; + // Track the state of characters (if any) we see before our first dot and + // after any path separator we find + var preDotState = 0; + + // Check for a drive letter prefix so as not to mistake the following + // path separator as an extra separator at the end of the path that can be + // disregarded + if (path.length >= 2) { + const code = path.charCodeAt(0); + if (path.charCodeAt(1) === 58/*:*/ && + ((code >= 65/*A*/ && code <= 90/*Z*/) || + (code >= 97/*a*/ && code <= 122/*z*/))) { + start = startPart = 2; + } + } + + for (var i = path.length - 1; i >= start; --i) { + const code = path.charCodeAt(i); + if (code === 47/*/*/ || code === 92/*\*/) { + // If we reached a path separator that was not part of a set of path + // separators at the end of the string, stop now + if (!matchedSlash) { + startPart = i + 1; + break; + } + continue; + } + if (end === -1) { + // We saw the first non-path separator, mark this as the end of our + // extension + matchedSlash = false; + end = i + 1; + } + if (code === 46/*.*/) { + // If this is our first dot, mark it as the start of our extension + if (startDot === -1) + startDot = i; + else if (preDotState !== 1) + preDotState = 1; + } else if (startDot !== -1) { + // We saw a non-dot and non-path separator before our dot, so we should + // have a good chance at having a non-empty extension + preDotState = -1; + } + } + + if (startDot === -1 || + end === -1 || + // We saw a non-dot character immediately before the dot + preDotState === 0 || + // The (right-most) trimmed path component is exactly '..' + (preDotState === 1 && + startDot === end - 1 && + startDot === startPart + 1)) { + return ''; + } + return path.slice(startDot, end); }; @@ -382,17 +585,166 @@ win32.format = function(pathObject) { }; -win32.parse = function(pathString) { - assertPath(pathString); +win32.parse = function(path) { + assertPath(path); - var allParts = win32SplitPath(pathString); - return { - root: allParts[0], - dir: allParts[0] + allParts[1].slice(0, -1), - base: allParts[2], - ext: allParts[3], - name: allParts[2].slice(0, allParts[2].length - allParts[3].length) - }; + var ret = { root: '', dir: '', base: '', ext: '', name: '' }; + if (path.length === 0) + return ret; + + var len = path.length; + var rootEnd = 0; + var code = path.charCodeAt(0); + + // Try to match a root + if (len > 1) { + if (code === 47/*/*/ || code === 92/*\*/) { + // Possible UNC root + + code = path.charCodeAt(1); + rootEnd = 1; + if (code === 47/*/*/ || code === 92/*\*/) { + // Matched double path separator at beginning + var j = 2; + var last = j; + // Match 1 or more non-path separators + for (; j < len; ++j) { + code = path.charCodeAt(j); + if (code === 47/*/*/ || code === 92/*\*/) + break; + } + if (j < len && j !== last) { + // Matched! + last = j; + // Match 1 or more path separators + for (; j < len; ++j) { + code = path.charCodeAt(j); + if (code !== 47/*/*/ && code !== 92/*\*/) + break; + } + if (j < len && j !== last) { + // Matched! + last = j; + // Match 1 or more non-path separators + for (; j < len; ++j) { + code = path.charCodeAt(j); + if (code === 47/*/*/ || code === 92/*\*/) + break; + } + if (j === len) { + // We matched a UNC root only + + rootEnd = j; + } else if (j !== last) { + // We matched a UNC root with leftovers + + rootEnd = j + 1; + } + } + } + } + } else if ((code >= 65/*A*/ && code <= 90/*Z*/) || + (code >= 97/*a*/ && code <= 122/*z*/)) { + // Possible device root + + if (path.charCodeAt(1) === 58/*:*/) { + rootEnd = 2; + if (len > 2) { + code = path.charCodeAt(2); + if (code === 47/*/*/ || code === 92/*\*/) { + if (len === 3) { + // `path` contains just a drive root, exit early to avoid + // unnecessary work + ret.root = ret.dir = path; + return ret; + } + rootEnd = 3; + } + } else { + // `path` contains just a drive root, exit early to avoid + // unnecessary work + ret.root = ret.dir = path; + return ret; + } + } + } + } else if (code === 47/*/*/ || code === 92/*\*/) { + // `path` contains just a path separator, exit early to avoid + // unnecessary work + ret.root = ret.dir = path; + return ret; + } + + if (rootEnd > 0) + ret.root = path.slice(0, rootEnd); + + var startDot = -1; + var startPart = rootEnd; + var end = -1; + var matchedSlash = true; + var i = path.length - 1; + + // Track the state of characters (if any) we see before our first dot and + // after any path separator we find + var preDotState = 0; + + // Get non-dir info + for (; i >= rootEnd; --i) { + code = path.charCodeAt(i); + if (code === 47/*/*/ || code === 92/*\*/) { + // If we reached a path separator that was not part of a set of path + // separators at the end of the string, stop now + if (!matchedSlash) { + startPart = i + 1; + break; + } + continue; + } + if (end === -1) { + // We saw the first non-path separator, mark this as the end of our + // extension + matchedSlash = false; + end = i + 1; + } + if (code === 46/*.*/) { + // If this is our first dot, mark it as the start of our extension + if (startDot === -1) + startDot = i; + else if (preDotState !== 1) + preDotState = 1; + } else if (startDot !== -1) { + // We saw a non-dot and non-path separator before our dot, so we should + // have a good chance at having a non-empty extension + preDotState = -1; + } + } + + if (startDot === -1 || + end === -1 || + // We saw a non-dot character immediately before the dot + preDotState === 0 || + // The (right-most) trimmed path component is exactly '..' + (preDotState === 1 && + startDot === end - 1 && + startDot === startPart + 1)) { + if (end !== -1) { + ret.base = ret.name = path.slice(startPart, end); + } + } else { + ret.name = path.slice(startPart, startDot); + ret.base = path.slice(startPart, end); + ret.ext = path.slice(startDot, end); + } + + // If the directory is the root, use the entire root as the `dir` including + // the trailing slash if any (`C:\abc` -> `C:\`). Otherwise, strip out the + // trailing slash (`C:\abc\def` -> `C:\abc`). + if (startPart > 0 && startPart !== rootEnd) + ret.dir = path.slice(0, startPart - 1); + else + ret.dir = ret.root; + + return ret; };