Skip to content

Commit

Permalink
fs: optimize realpath using uv_fs_realpath()
Browse files Browse the repository at this point in the history
Remove realpath() and realpathSync() cache.
Use the native uv_fs_realpath() which is faster
then the JS implementation by a few orders of magnitude.

PR-URL: #3594
Reviewed-By: Trevor Norris <trev.norris@gmail.com>
Reviewed-By: Brian White <mscdex@mscdex.net>
Reviewed-By: James M Snell <jasnell@gmail.com>
Reviewed-By: Johan Bergström <bugs@bergstroem.nu>
  • Loading branch information
jhamhader authored and mscdex committed Apr 15, 2016
1 parent 81fd458 commit b488b19
Show file tree
Hide file tree
Showing 6 changed files with 134 additions and 329 deletions.
36 changes: 17 additions & 19 deletions doc/api/fs.markdown
Original file line number Diff line number Diff line change
Expand Up @@ -916,26 +916,20 @@ object with an `encoding` property specifying the character encoding to use for
the link path passed to the callback. If the `encoding` is set to `'buffer'`,
the link path returned will be passed as a `Buffer` object.

## fs.realpath(path[, cache], callback)
## fs.realpath(path[, options], callback)

* `path` {String | Buffer}
* `cache` {Object}
* `options` {String | Object}
* `encoding` {String} default = `'utf8'`
* `callback` {Function}

Asynchronous realpath(2). The `callback` gets two arguments `(err,
resolvedPath)`. May use `process.cwd` to resolve relative paths. `cache` is an
object literal of mapped paths that can be used to force a specific path
resolution or avoid additional `fs.stat` calls for known real paths.

Example:
resolvedPath)`. May use `process.cwd` to resolve relative paths.

```js
var cache = {'/etc':'/private/etc'};
fs.realpath('/etc/passwd', cache, (err, resolvedPath) => {
if (err) throw err;
console.log(resolvedPath);
});
```
The optional `options` argument can be a string specifying an encoding, or an
object with an `encoding` property specifying the character encoding to use for
the path passed to the callback. If the `encoding` is set to `'buffer'`,
the path returned will be passed as a `Buffer` object.

## fs.readSync(fd, buffer, offset, length, position)

Expand All @@ -947,14 +941,18 @@ fs.realpath('/etc/passwd', cache, (err, resolvedPath) => {

Synchronous version of [`fs.read()`][]. Returns the number of `bytesRead`.

## fs.realpathSync(path[, cache])
## fs.realpathSync(path[, options])

* `path` {String | Buffer};
* `cache` {Object}
* `options` {String | Object}
* `encoding` {String} default = `'utf8'`

Synchronous realpath(2). Returns the resolved path. `cache` is an
object literal of mapped paths that can be used to force a specific path
resolution or avoid additional `fs.stat` calls for known real paths.
Synchronous realpath(2). Returns the resolved path.

The optional `options` argument can be a string specifying an encoding, or an
object with an `encoding` property specifying the character encoding to use for
the path passed to the callback. If the `encoding` is set to `'buffer'`,
the path returned will be passed as a `Buffer` object.

## fs.rename(oldPath, newPath, callback)

Expand Down
249 changes: 26 additions & 223 deletions lib/fs.js
Original file line number Diff line number Diff line change
Expand Up @@ -1557,234 +1557,37 @@ fs.unwatchFile = function(filename, listener) {
}
};

// Regexp that finds the next partion of a (partial) path
// result is [base_with_slash, base], e.g. ['somedir/', 'somedir']
const nextPartRe = isWindows ?
/(.*?)(?:[\/\\]+|$)/g :
/(.*?)(?:[\/]+|$)/g;

// Regex to find the device root, including trailing slash. E.g. 'c:\\'.
const splitRootRe = isWindows ?
/^(?:[a-zA-Z]:|[\\\/]{2}[^\\\/]+[\\\/][^\\\/]+)?[\\\/]*/ :
/^[\/]*/;

fs.realpathSync = function realpathSync(p, cache) {
// make p is absolute
p = pathModule.resolve(p);

if (cache && Object.prototype.hasOwnProperty.call(cache, p)) {
return cache[p];
}

const original = p;
const seenLinks = {};
const knownHard = {};

// current character position in p
var pos;
// the partial path so far, including a trailing slash if any
var current;
// the partial path without a trailing slash (except when pointing at a root)
var base;
// the partial path scanned in the previous round, with slash
var previous;

start();

function start() {
// Skip over roots
var m = splitRootRe.exec(p);
pos = m[0].length;
current = m[0];
base = m[0];
previous = '';

// On windows, check that the root exists. On unix there is no need.
if (isWindows && !knownHard[base]) {
fs.lstatSync(base);
knownHard[base] = true;
}
}

// walk down the path, swapping out linked pathparts for their real
// values
// NB: p.length changes.
while (pos < p.length) {
// find the next part
nextPartRe.lastIndex = pos;
var result = nextPartRe.exec(p);
previous = current;
current += result[0];
base = previous + result[1];
pos = nextPartRe.lastIndex;

// continue if not a symlink
if (knownHard[base] || (cache && cache[base] === base)) {
continue;
}

var resolvedLink;
if (cache && Object.prototype.hasOwnProperty.call(cache, base)) {
// some known symbolic link. no need to stat again.
resolvedLink = cache[base];
} else {
var stat = fs.lstatSync(base);
if (!stat.isSymbolicLink()) {
knownHard[base] = true;
if (cache) cache[base] = base;
continue;
}

// read the link if it wasn't read before
// dev/ino always return 0 on windows, so skip the check.
var linkTarget = null;
if (!isWindows) {
var id = stat.dev.toString(32) + ':' + stat.ino.toString(32);
if (seenLinks.hasOwnProperty(id)) {
linkTarget = seenLinks[id];
}
}
if (linkTarget === null) {
fs.statSync(base);
linkTarget = fs.readlinkSync(base);
}
resolvedLink = pathModule.resolve(previous, linkTarget);
// track this, if given a cache.
if (cache) cache[base] = resolvedLink;
if (!isWindows) seenLinks[id] = linkTarget;
}

// resolve the link, then start over
p = pathModule.resolve(resolvedLink, p.slice(pos));
start();
}

if (cache) cache[original] = p;

return p;
fs.realpathSync = function realpathSync(path, options) {
if (!options)
options = {};
else if (typeof options === 'string')
options = {encoding: options};
else if (typeof options !== 'object')
throw new TypeError('"options" must be a string or an object');
nullCheck(path);
return binding.realpath(pathModule._makeLong(path), options.encoding);
};


fs.realpath = function realpath(p, cache, cb) {
if (typeof cb !== 'function') {
cb = maybeCallback(cache);
cache = null;
}

// make p is absolute
p = pathModule.resolve(p);

if (cache && Object.prototype.hasOwnProperty.call(cache, p)) {
return process.nextTick(cb.bind(null, null, cache[p]));
}

const original = p;
const seenLinks = {};
const knownHard = {};

// current character position in p
var pos;
// the partial path so far, including a trailing slash if any
var current;
// the partial path without a trailing slash (except when pointing at a root)
var base;
// the partial path scanned in the previous round, with slash
var previous;

start();

function start() {
// Skip over roots
var m = splitRootRe.exec(p);
pos = m[0].length;
current = m[0];
base = m[0];
previous = '';

// On windows, check that the root exists. On unix there is no need.
if (isWindows && !knownHard[base]) {
fs.lstat(base, function(err) {
if (err) return cb(err);
knownHard[base] = true;
LOOP();
});
} else {
process.nextTick(LOOP);
}
}

// walk down the path, swapping out linked pathparts for their real
// values
function LOOP() {
// stop if scanned past end of path
if (pos >= p.length) {
if (cache) cache[original] = p;
return cb(null, p);
}

// find the next part
nextPartRe.lastIndex = pos;
var result = nextPartRe.exec(p);
previous = current;
current += result[0];
base = previous + result[1];
pos = nextPartRe.lastIndex;

// continue if not a symlink
if (knownHard[base] || (cache && cache[base] === base)) {
return process.nextTick(LOOP);
}

if (cache && Object.prototype.hasOwnProperty.call(cache, base)) {
// known symbolic link. no need to stat again.
return gotResolvedLink(cache[base]);
}

return fs.lstat(base, gotStat);
}

function gotStat(err, stat) {
if (err) return cb(err);

// if not a symlink, skip to the next path part
if (!stat.isSymbolicLink()) {
knownHard[base] = true;
if (cache) cache[base] = base;
return process.nextTick(LOOP);
}

// stat & read the link if not read before
// call gotTarget as soon as the link target is known
// dev/ino always return 0 on windows, so skip the check.
if (!isWindows) {
var id = stat.dev.toString(32) + ':' + stat.ino.toString(32);
if (seenLinks.hasOwnProperty(id)) {
return gotTarget(null, seenLinks[id], base);
}
}
fs.stat(base, function(err) {
if (err) return cb(err);

fs.readlink(base, function(err, target) {
if (!isWindows) seenLinks[id] = target;
gotTarget(err, target);
});
});
}

function gotTarget(err, target, base) {
if (err) return cb(err);

var resolvedLink = pathModule.resolve(previous, target);
if (cache) cache[base] = resolvedLink;
gotResolvedLink(resolvedLink);
}

function gotResolvedLink(resolvedLink) {
// resolve the link, then start over
p = pathModule.resolve(resolvedLink, p.slice(pos));
start();
fs.realpath = function realpath(path, options, callback) {
if (!options) {
options = {};
} else if (typeof options === 'function') {
callback = options;
options = {};
} else if (typeof options === 'string') {
options = {encoding: options};
} else if (typeof options !== 'object') {
throw new TypeError('"options" must be a string or an object');
}
callback = makeCallback(callback);
if (!nullCheck(path, callback))
return;
var req = new FSReqWrap();
req.oncomplete = callback;
binding.realpath(pathModule._makeLong(path), options.encoding, req);
return;
};


Expand Down
13 changes: 2 additions & 11 deletions lib/module.js
Original file line number Diff line number Diff line change
Expand Up @@ -108,19 +108,10 @@ function tryPackage(requestPath, exts) {
tryExtensions(path.resolve(filename, 'index'), exts);
}

// In order to minimize unnecessary lstat() calls,
// this cache is a list of known-real paths.
// Set to an empty object to reset.
Module._realpathCache = {};

// check if the file exists and is not a directory
function tryFile(requestPath) {
const rc = stat(requestPath);
return rc === 0 && toRealPath(requestPath);
}

function toRealPath(requestPath) {
return fs.realpathSync(requestPath, Module._realpathCache);
return rc === 0 && fs.realpathSync(requestPath);
}

// given a path check a the file exists with any of the set extensions
Expand Down Expand Up @@ -163,7 +154,7 @@ Module._findPath = function(request, paths) {
if (!trailingSlash) {
const rc = stat(basePath);
if (rc === 0) { // File.
filename = toRealPath(basePath);
filename = fs.realpathSync(basePath);
} else if (rc === 1) { // Directory.
if (exts === undefined)
exts = Object.keys(Module._extensions);
Expand Down
Loading

0 comments on commit b488b19

Please sign in to comment.