Skip to content

Commit

Permalink
module: support require()ing synchronous ESM graphs
Browse files Browse the repository at this point in the history
This patch adds `require()` support for synchronous ESM graphs under
the flag --experimental-require-module.

This is based on the the following design aspect of ESM:

- The resolution can be synchronous (up to the host)
- The evaluation of a synchronous graph (without top-level await)
  is also synchronous, and, by the time the module graph is
  instantiated (before evaluation starts), this is is already known.

When the module being require()ed has .mjs extension or there are
other explicit indicators that it's an ES module, we load it as an
ES module. If the graph is synchronous, we return the module namespace
as the exports. If the graph contains top-level await, we throw an
error before evaluating the module. If an additional flag
--print-pending-tla is passed, we proceeds to evaluation but do not
run the microtasks, only to find out where the TLA is and print
their location to help users fix them.

If there are not explicit indicators whether a .js file is CJS or ESM,
we parse it as CJS first. If the parse error indicates that it contains
ESM syntax, we parse it again as ESM. If the second parsing succeeds,
we continue to treat it as ESM.
  • Loading branch information
joyeecheung committed Mar 7, 2024
1 parent 38c74d3 commit b4c3f5c
Show file tree
Hide file tree
Showing 20 changed files with 686 additions and 253 deletions.
2 changes: 2 additions & 0 deletions .eslintignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,5 @@ doc/changelogs/CHANGELOG_v1*.md
!doc/changelogs/CHANGELOG_v18.md
!doc/api_assets/*.js
!.eslintrc.js
test/es-module/test-require-module-entry-point.js
test/es-module/test-require-module-entry-point-aou.js
17 changes: 17 additions & 0 deletions doc/api/cli.md
Original file line number Diff line number Diff line change
Expand Up @@ -871,6 +871,22 @@ added: v11.8.0

Use the specified file as a security policy.

### `--experimental-require-module`

<!-- YAML
added: REPLACEME
-->

> Stability: 1.1 - Active Developement
Supports loading a synchronous ES module graph in `require()`. If the module
graph is not synchronous (contains top-level await), it throws an error.

By default, a `.js` file will be parsed as a CommonJS module first. If it
contains ES module syntax, Node.js will try to parse and evaluate the module
again as an ES module. If it turns out to be synchronous and can be evaluated
successfully, the module namespace object will be returned by `require()`.

### `--experimental-sea-config`

<!-- YAML
Expand Down Expand Up @@ -2523,6 +2539,7 @@ Node.js options that are allowed are:
* `--experimental-network-imports`
* `--experimental-permission`
* `--experimental-policy`
* `--experimental-require-module`
* `--experimental-shadow-realm`
* `--experimental-specifier-resolution`
* `--experimental-top-level-await`
Expand Down
3 changes: 3 additions & 0 deletions lib/internal/errors.js
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ class NodeAggregateError extends AggregateError {
}

const assert = require('internal/assert');
const { getOptionValue } = require('internal/options');

// Lazily loaded
let util;
Expand Down Expand Up @@ -1686,6 +1687,8 @@ E('ERR_PERFORMANCE_MEASURE_INVALID_OPTIONS', '%s', TypeError);
E('ERR_REQUIRE_ESM',
function(filename, hasEsmSyntax, parentPath = null, packageJsonPath = null) {
hideInternalStackFrames(this);
// TODO(joyeecheung): mention --experimental-require-module here.
assert(!getOptionValue('--experimental-require-module'));
let msg = `require() of ES Module ${filename}${parentPath ? ` from ${
parentPath}` : ''} not supported.`;
if (!packageJsonPath) {
Expand Down
150 changes: 106 additions & 44 deletions lib/internal/modules/cjs/loader.js
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,11 @@ const {
StringPrototypeSlice,
StringPrototypeSplit,
StringPrototypeStartsWith,
Symbol,
} = primordials;

// Map used to store CJS parsing data.
const cjsParseCache = new SafeWeakMap();
// Map used to store CJS parsing data or for ESM loading.
const cjsSourceCache = new SafeWeakMap();
/**
* Map of already-loaded CJS modules to use.
*/
Expand All @@ -72,12 +73,15 @@ const cjsExportsCache = new SafeWeakMap();
// Set first due to cycle with ESM loader functions.
module.exports = {
cjsExportsCache,
cjsParseCache,
cjsSourceCache,
initializeCJS,
Module,
wrapSafe,
makeRequireWithPolicy,
};

const is_main_symbol = Symbol('is_main_symbol');

const { BuiltinModule } = require('internal/bootstrap/realm');
const {
maybeCacheSourceMap,
Expand All @@ -98,7 +102,6 @@ const {
containsModuleSyntax,
compileFunctionForCJSLoader,
} = internalBinding('contextify');

const assert = require('internal/assert');
const fs = require('fs');
const path = require('path');
Expand All @@ -107,7 +110,6 @@ const { safeGetenv } = internalBinding('credentials');
const {
privateSymbols: {
require_private_symbol,
host_defined_option_symbol,
},
} = internalBinding('util');
const {
Expand Down Expand Up @@ -396,6 +398,10 @@ function initializeCJS() {
// TODO(joyeecheung): deprecate this in favor of a proper hook?
Module.runMain =
require('internal/modules/run_main').executeUserEntryPoint;

if (getOptionValue('--experimental-require-module')) {
Module._extensions['.mjs'] = loadESMFromCJS;
}
}

// Given a module name, and a list of paths to test, returns the first
Expand Down Expand Up @@ -988,7 +994,7 @@ Module._load = function(request, parent, isMain) {
if (cachedModule !== undefined) {
updateChildren(parent, cachedModule, true);
if (!cachedModule.loaded) {
const parseCachedModule = cjsParseCache.get(cachedModule);
const parseCachedModule = cjsSourceCache.get(cachedModule);
if (!parseCachedModule || parseCachedModule.loaded) {
return getExportsForCircularRequire(cachedModule);
}
Expand All @@ -1010,6 +1016,9 @@ Module._load = function(request, parent, isMain) {
setOwnProperty(process, 'mainModule', module);
setOwnProperty(module.require, 'main', process.mainModule);
module.id = '.';
module[is_main_symbol] = true;
} else {
module[is_main_symbol] = false;
}

reportModuleToWatchMode(filename);
Expand Down Expand Up @@ -1270,57 +1279,96 @@ function wrapSafe(filename, content, cjsModuleInstance, codeCache) {
);

// Cache the source map for the module if present.
if (script.sourceMapURL) {
maybeCacheSourceMap(filename, content, this, false, undefined, script.sourceMapURL);
const { sourceMapURL } = script;
if (sourceMapURL) {
maybeCacheSourceMap(filename, content, this, false, undefined, sourceMapURL);
}

return runScriptInThisContext(script, true, false);
return {
__proto__: null,
function: runScriptInThisContext(script, true, false),
sourceMapURL,
retryAsESM: false,
};
}

try {
const result = compileFunctionForCJSLoader(content, filename);
result.function[host_defined_option_symbol] = hostDefinedOptionId;

// cachedDataRejected is only set for cache coming from SEA.
if (codeCache &&
result.cachedDataRejected !== false &&
internalBinding('sea').isSea()) {
process.emitWarning('Code cache data rejected.');
}
const result = compileFunctionForCJSLoader(content, filename);

// Cache the source map for the module if present.
if (result.sourceMapURL) {
maybeCacheSourceMap(filename, content, this, false, undefined, result.sourceMapURL);
}
// cachedDataRejected is only set for cache coming from SEA.
if (codeCache &&
result.cachedDataRejected !== false &&
internalBinding('sea').isSea()) {
process.emitWarning('Code cache data rejected.');
}

return result.function;
} catch (err) {
if (process.mainModule === cjsModuleInstance) {
const { enrichCJSError } = require('internal/modules/esm/translators');
enrichCJSError(err, content, filename);
}
throw err;
// Cache the source map for the module if present.
if (result.sourceMapURL) {
maybeCacheSourceMap(filename, content, this, false, undefined, result.sourceMapURL);
}

return result;
}

// Resolve and evaluate as ESM, synchronously.
function loadESMFromCJS(mod, filename) {
const source = getMaybeCachedSource(mod, filename);
const cascadedLoader = require('internal/modules/esm/loader').getOrInitializeCascadedLoader();
// We are still using the CJS's resolution here.
const url = pathToFileURL(filename).href;
const isMain = mod[is_main_symbol];
// TODO(joyeecheung): maybe we can do some special handling for default here. Maybe we don't.
mod.exports = cascadedLoader.importSyncForRequire(url, source, isMain);
}

/**
* Create a require function for this module, apply policy if necessary.
* @param {Module} module
* @param {string} moduleURL
* @returns {Function}
*/
function makeRequireWithPolicy(module, moduleURL) {
const manifest = policy()?.manifest;
let redirects;
if (manifest) {
redirects = manifest.getDependencyMapper(moduleURL);
}
return makeRequireFunction(module, redirects);
}

/**
* Run the file contents in the correct scope or sandbox. Expose the correct helper variables (`require`, `module`,
* `exports`) to the file. Returns exception, if any.
* @param {string} content The source code of the module
* @param {string} filename The file path of the module
* @param {boolean} loadAsESM Whether it's known to be ESM - i.e. suffix is .mjs.
*/
Module.prototype._compile = function(content, filename) {
Module.prototype._compile = function(content, filename, loadAsESM = false) {
let moduleURL;
let redirects;
const manifest = policy()?.manifest;
if (manifest) {
moduleURL = pathToFileURL(filename);
redirects = manifest.getDependencyMapper(moduleURL);
manifest.assertIntegrity(moduleURL, content);
}

const compiledWrapper = wrapSafe(filename, content, this);
// TODO(joyeecheung): when the module is the entry point, consider allowing TLA.
// Only modules being require()'d really need to avoid TLA.
let compiledWrapper;
if (!loadAsESM) {
const result = wrapSafe(filename, content, this);
compiledWrapper = result.function;
loadAsESM = result.retryAsESM;
}

if (loadAsESM) {
// Pass the source into the .mjs extension handler indirectly through the cache.
cjsSourceCache.set(this, content);
loadESMFromCJS(this, filename);
return;
}

// TODO(joyeecheung): the detection below is unnecessarily complex. Maybe just
// use the is_main_symbol, or a break_on_start_symbol that gets passed from
// higher level instead of doing hacky detecion here.
let inspectorWrapper = null;
if (getOptionValue('--inspect-brk') && process._eval == null) {
if (!resolvedArgv) {
Expand All @@ -1344,8 +1392,9 @@ Module.prototype._compile = function(content, filename) {
inspectorWrapper = internalBinding('inspector').callAndPauseOnStart;
}
}

const dirname = path.dirname(filename);
const require = makeRequireFunction(this, redirects);
const require = makeRequireWithPolicy(this, moduleURL);
let result;
const exports = this.exports;
const thisValue = exports;
Expand All @@ -1363,25 +1412,37 @@ Module.prototype._compile = function(content, filename) {
return result;
};

/**
* Native handler for `.js` files.
* @param {Module} module The module to compile
* @param {string} filename The file path of the module
*/
Module._extensions['.js'] = function(module, filename) {
// If already analyzed the source, then it will be cached.
const cached = cjsParseCache.get(module);
function getMaybeCachedSource(mod, filename) {
const cached = cjsSourceCache.get(mod);
let content;
if (cached?.source) {
content = cached.source;
cached.source = undefined;
} else {
// TODO(joyeecheung): read a buffer.
content = fs.readFileSync(filename, 'utf8');
}
return content;
}

/**
* Native handler for `.js` files.
* @param {Module} module The module to compile
* @param {string} filename The file path of the module
*/
Module._extensions['.js'] = function(module, filename) {
// If already analyzed the source, then it will be cached.
const content = getMaybeCachedSource(module, filename);

if (StringPrototypeEndsWith(filename, '.js')) {
const pkg = packageJsonReader.getNearestParentPackageJSON(filename);
// Function require shouldn't be used in ES modules.
if (pkg?.data.type === 'module') {
if (getOptionValue('--experimental-require-module')) {
module._compile(content, filename, true);
return;
}

// This is an error path because `require` of a `.js` file in a `"type": "module"` scope is not allowed.
const parent = moduleParentCache.get(module);
const parentPath = parent?.filename;
Expand Down Expand Up @@ -1414,7 +1475,8 @@ Module._extensions['.js'] = function(module, filename) {
throw err;
}
}
module._compile(content, filename);

module._compile(content, filename, false);
};

/**
Expand Down
Loading

0 comments on commit b4c3f5c

Please sign in to comment.