Skip to content

Commit

Permalink
refactor: use new SourceParser class
Browse files Browse the repository at this point in the history
  • Loading branch information
fraxken committed Dec 18, 2023
1 parent f183c3e commit 4f946f0
Show file tree
Hide file tree
Showing 9 changed files with 204 additions and 139 deletions.
67 changes: 5 additions & 62 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,12 @@ import path from "path";

// Import Third-party Dependencies
import { walk } from "estree-walker";
import * as meriyah from "meriyah";
import isMinified from "is-minified-code";

// Import Internal Dependencies
import { SourceFile } from "./src/SourceFile.js";
import { SourceParser } from "./src/SourceParser.js";
import { warnings } from "./src/warnings.js";
import * as utils from "./src/utils.js";

// CONSTANTS
const kMeriyahDefaultOptions = {
next: true,
loc: true,
raw: true,
jsx: true
};

export function runASTAnalysis(
str,
Expand All @@ -30,15 +21,12 @@ export function runASTAnalysis(
removeHTMLComments = false
} = options;

// Note: if the file start with a shebang then we remove it because 'parseScript' may fail to parse it.
// Example: #!/usr/bin/env node
const strToAnalyze = str.charAt(0) === "#" ? str.slice(str.indexOf("\n")) : str;
const body = parseScriptExtended(strToAnalyze, {
isEcmaScriptModule: Boolean(module),
removeHTMLComments
const parser = new SourceParser(str, { removeHTMLComments });
const body = parser.parseScript({
isEcmaScriptModule: Boolean(module)
});

const source = new SourceFile(str);
const source = new SourceFile(parser.raw);

// we walk each AST Nodes, this is a purely synchronous I/O
walk(body, {
Expand Down Expand Up @@ -103,49 +91,4 @@ export async function runASTAnalysisOnFile(
}
}

function parseScriptExtended(strToAnalyze, options = {}) {
const { isEcmaScriptModule, removeHTMLComments } = options;

/**
* @see https://github.com/NodeSecure/js-x-ray/issues/109
*/
const cleanedStrToAnalyze = removeHTMLComments ?
utils.removeHTMLComment(strToAnalyze) : strToAnalyze;

try {
const { body } = meriyah.parseScript(
cleanedStrToAnalyze,
{
...kMeriyahDefaultOptions,
module: isEcmaScriptModule,
globalReturn: !isEcmaScriptModule
}
);

return body;
}
catch (error) {
const isIllegalReturn = error.description.includes("Illegal return statement");

if (error.name === "SyntaxError" && (
error.description.includes("The import keyword") ||
error.description.includes("The export keyword") ||
isIllegalReturn
)) {
const { body } = meriyah.parseScript(
cleanedStrToAnalyze,
{
...kMeriyahDefaultOptions,
module: true,
globalReturn: isIllegalReturn
}
);

return body;
}

throw error;
}
}

export { warnings };
87 changes: 87 additions & 0 deletions src/SourceParser.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
// Import Third-party Dependencies
import * as meriyah from "meriyah";

// CONSTANTS
const kParsingOptions = {
next: true,
loc: true,
raw: true,
jsx: true
};

export class SourceParser {
/**
* @param {!string} source
* @param {object} options
* @param {boolean} [options.removeHTMLComments=false]
*/
constructor(source, options = {}) {
if (typeof source !== "string") {
throw new TypeError("source must be a string");
}
const { removeHTMLComments = false } = options;

this.raw = source;

/**
* if the file start with a shebang then we remove it because meriyah.parseScript fail to parse it.
* @example
* #!/usr/bin/env node
*/
const rawNoShebang = source.charAt(0) === "#" ?
source.slice(source.indexOf("\n") + 1) : source;

this.source = removeHTMLComments ?
this.#removeHTMLComment(rawNoShebang) : rawNoShebang;
}

#removeHTMLComment(str) {
return str.replace(/<!--[\s\S]*?(?:-->)/g, "");

Check failure

Code scanning / CodeQL

Polynomial regular expression used on uncontrolled data High

This
regular expression
that depends on
library input
may run slow on strings starting with '<!--' and with many repetitions of '<!--'.

Check failure

Code scanning / CodeQL

Incomplete multi-character sanitization High

This string may still contain
<!--
, which may cause an HTML element injection vulnerability.
}

/**
* @param {object} options
* @param {boolean} options.isEcmaScriptModule
*/
parseScript(options = {}) {
const {
isEcmaScriptModule
} = options;

try {
const { body } = meriyah.parseScript(
this.source,
{
...kParsingOptions,
module: isEcmaScriptModule,
globalReturn: !isEcmaScriptModule
}
);

return body;
}
catch (error) {
const isIllegalReturn = error.description.includes("Illegal return statement");

if (error.name === "SyntaxError" && (
error.description.includes("The import keyword") ||
error.description.includes("The export keyword") ||
isIllegalReturn
)) {
const { body } = meriyah.parseScript(
this.source,
{
...kParsingOptions,
module: true,
globalReturn: isIllegalReturn
}
);

return body;
}

throw error;
}
}
}

4 changes: 0 additions & 4 deletions src/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,3 @@ export function extractNode(expectedType) {
}
};
}

export function removeHTMLComment(str) {
return str.replace(/<!--[\s\S]*?(?:-->)/g, "");
}
98 changes: 98 additions & 0 deletions test/SourceParser.spec.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
// Import Node.js Dependencies
import { describe, it } from "node:test";
import assert from "node:assert";

// Import Internal Dependencies
import { SourceParser } from "../src/SourceParser.js";

describe("SourceParser", () => {
describe("constructor", () => {
it("should throw a TypeError if source is not a string", () => {
assert.throws(
() => new SourceParser(10),
{ message: "source must be a string" }
);
});

it("should not update the content of raw", () => {
const expectedStr = "hello world";

assert.strictEqual(
new SourceParser(expectedStr).raw,
expectedStr
);

assert.strictEqual(
new SourceParser(expectedStr, { removeHTMLComments: true }).raw,
expectedStr
);
});

it("should remove shebang at the start of the file", () => {
const sp = new SourceParser("#!/usr/bin/env node\nconst hello = \"world\";");

assert.strictEqual(
sp.source,
"const hello = \"world\";"
);
});

it("should not remove shebang if not at the start (that's an illegal code)", () => {
const source = "const hello = \"world\";\n#!/usr/bin/env node";
const sp = new SourceParser(source);

assert.strictEqual(
sp.source,
source
);
});

it("should remove singleline HTML comment from source code when removeHTMLComments is enabled", () => {
const sp = new SourceParser("<!-- const yo = 5; -->", {
removeHTMLComments: true
});

assert.strictEqual(sp.source, "");
});

it("should remove multiline HTML comment from source code when removeHTMLComments is enabled", () => {
const sp = new SourceParser(`
<!--
// == fake comment == //
const yo = 5;
//-->
`, {
removeHTMLComments: true
});

assert.strictEqual(sp.source.trim(), "");
});
});

describe("parseScript", () => {
it("should not crash even if isEcmaScriptModule 'false' is provided (import keyword)", () => {
new SourceParser("import * as foo from \"foo\";").parseScript({
isEcmaScriptModule: false
});
});

it("should not crash even if isEcmaScriptModule 'false' is provided (export keyword)", () => {
new SourceParser("export const foo = 5;").parseScript({
isEcmaScriptModule: false
});
});

it("should not crash with a source code containing JSX", () => {
const code = `const Dropzone = forwardRef(({ children, ...params }, ref) => {
const { open, ...props } = useDropzone(params);
useImperativeHandle(ref, () => ({ open }), [open]);
return <Fragment>{children({ ...props, open })}</Fragment>;
});`;

new SourceParser(code).parseScript({
isEcmaScriptModule: false
});
});
});
});
8 changes: 0 additions & 8 deletions test/fixtures/issues/jsx.js

This file was deleted.

9 changes: 0 additions & 9 deletions test/issues/59-undefined-depName.spec.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
// Import Node.js Dependencies
import { readFileSync } from "node:fs";
import { test } from "node:test";
import assert from "node:assert";

// Import Internal Dependencies
import { runASTAnalysis } from "../../index.js";
Expand All @@ -17,11 +16,3 @@ test("it should not crash for prop-types", () => {
);
runASTAnalysis(propTypes);
});

test("it should not crash for JSX", () => {
const propTypes = readFileSync(
new URL("jsx.js", FIXTURE_URL),
"utf-8"
);
runASTAnalysis(propTypes);
});
32 changes: 0 additions & 32 deletions test/module.spec.js

This file was deleted.

14 changes: 14 additions & 0 deletions test/runASTAnalysis.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -128,3 +128,17 @@ test("it should return isOneLineRequire true given a single line CJS export", ()
assert.ok(isOneLineRequire);
assert.deepEqual([...dependencies.keys()], ["foo"]);
});

test("it should be capable to extract dependencies name for ECMAScript Modules (ESM)", () => {
const { dependencies, warnings } = runASTAnalysis(`
import * as http from "http";
import fs from "fs";
import { foo } from "xd";
`, { module: true });

assert.strictEqual(warnings.length, 0);
assert.deepEqual(
[...dependencies.keys()].sort(),
["http", "fs", "xd"].sort()
);
});
24 changes: 0 additions & 24 deletions test/utils.spec.js

This file was deleted.

0 comments on commit 4f946f0

Please sign in to comment.