Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

js: semgrep js pattern testing #8902

Merged
merged 40 commits into from
Oct 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
66f023e
More tests
ajbt200128 Oct 3, 2023
c3e302f
Unit_engine now running
ajbt200128 Oct 4, 2023
4a4037f
Re add Unit_engine
ajbt200128 Oct 4, 2023
9951001
Better error printing
ajbt200128 Oct 4, 2023
3233d10
Fix python parser
ajbt200128 Oct 5, 2023
b906280
Fix PCRE
ajbt200128 Oct 5, 2023
4c2ccad
Fix individual tests
ajbt200128 Oct 5, 2023
18d7355
MORE FIXES!!!! Fresh off the stove
ajbt200128 Oct 5, 2023
b4d0d28
Fix tests
ajbt200128 Oct 6, 2023
51dd5ac
Fix vue
ajbt200128 Oct 6, 2023
f4af37d
Torvalds moment: if it breaks userspace its a bug!
ajbt200128 Oct 9, 2023
fcfa299
Fix stack overflow issue :(
ajbt200128 Oct 9, 2023
dad102e
Ignore weird logging error
ajbt200128 Oct 9, 2023
af17296
camel strings > js strings IG
ajbt200128 Oct 9, 2023
1c20834
Real ocaml js string fixes fr fr
ajbt200128 Oct 9, 2023
babf300
update opam file
tpetr Oct 10, 2023
8d85acc
Merge branch 'develop' into austin/semgrep-js-pattern-testing
tpetr Oct 10, 2023
e27d60c
fix bogus merge
tpetr Oct 10, 2023
f0ad649
Revert "update opam file"
tpetr Oct 10, 2023
d209fa1
temporarily disable golang test that triggers jsoo int overflow
tpetr Oct 10, 2023
bfeb8e6
needed caml_jsstring_of_string impl
tpetr Oct 10, 2023
591a66b
update js parsers to use official semgrep test fixtures + some minor …
tpetr Oct 11, 2023
fc7e497
fix engine test
tpetr Oct 11, 2023
824cc82
move test semgrep js into build-test-javascript
tpetr Oct 11, 2023
adf6583
Merge branch 'develop' into austin/semgrep-js-pattern-testing
tpetr Oct 12, 2023
df38e47
fix engine getLangs()
tpetr Oct 12, 2023
9ed7d78
delete printfs from Entropy.ml
tpetr Oct 12, 2023
114f3a0
fix comment
tpetr Oct 12, 2023
9ebeaaa
js e2e
tpetr Oct 12, 2023
267a80f
try bumping version of semgrep in jsonnet precommit
tpetr Oct 13, 2023
248f69e
fix jsoo build upload
tpetr Oct 13, 2023
3b891c7
temporarily skip failing C test
tpetr Oct 13, 2023
9caeca6
jsoo 5.4.0
tpetr Oct 13, 2023
fa018c4
bump ocaml-layer image
tpetr Oct 13, 2023
178aae2
non-zero exit if tests fail
tpetr Oct 13, 2023
5f45ce4
workflows needed a dep for make
tpetr Oct 13, 2023
60ea1a4
Merge branch 'develop' into austin/semgrep-js-pattern-testing
tpetr Oct 16, 2023
cc9d1bb
Merge branch 'develop' into austin/semgrep-js-pattern-testing
tpetr Oct 17, 2023
32dcfff
PR feedback
tpetr Oct 17, 2023
d83208e
more comments
tpetr Oct 17, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 14 additions & 8 deletions .github/workflows/build-test-javascript.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,7 @@ local build_job =
with: {
'retention-days': 1,
path: |||
_build/default/js/engine/*.bc.js
_build/default/js/languages/*/*.bc.js
_build/default/js/**/*.bc.js
|||,
name: build_artifact_name,
},
Expand Down Expand Up @@ -130,7 +129,18 @@ local test_job = {
name: 'Build JS artifacts',
run: |||
make -C js -j $(nproc) build

|||
},
{
name: 'Test JS artifacts',
run: |||
make -C js -j $(nproc) test
make -C js/tests
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

don't need this line here; the make test above already run the tests under js/tests now.

|||
},
{
name: 'Package JS artifacts',
run:|||
tar cvzf semgrep-js-artifacts.tar.gz \
js/engine/dist/index.cjs \
js/engine/dist/index.mjs \
Expand All @@ -148,11 +158,7 @@ local test_job = {
'retention-days': 2,
name: artifact_name,
},
},
{
name: 'Run semgrep js e2e tests',
run: 'make -C js test',
},
}
],
};

Expand Down
20 changes: 11 additions & 9 deletions .github/workflows/build-test-javascript.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,7 @@ jobs:
- uses: actions/upload-artifact@v3
with:
retention-days: 1
path: "\n _build/default/js/engine/*.bc.js\n _build/default/js/languages/*/*.bc.js\n
\ "
path: "\n _build/default/js/**/*.bc.js\n "
name: semgrep-js-ocaml-build-${{ github.sha }}
container: returntocorp/ocaml:alpine-2023-10-12
env:
Expand Down Expand Up @@ -75,19 +74,22 @@ jobs:
with:
node-version: "18"
- name: Build JS artifacts
run: "\n make -C js -j $(nproc) build\n\n tar cvzf semgrep-js-artifacts.tar.gz
\\\n js/engine/dist/index.cjs \\\n js/engine/dist/index.mjs
\\\n js/engine/dist/semgrep-engine.wasm \\\n js/languages/*/dist/index.cjs
\\\n js/languages/*/dist/index.mjs \\\n js/languages/*/dist/semgrep-parser.wasm\n
\ "
run: "\n make -C js -j $(nproc) build\n "
- name: Test JS artifacts
run: "\n make -C js -j $(nproc) test\n "
- name: Run JS e2e tests
run: "\n make -C js/tests\n "
- name: Package JS artifacts
run: "\n tar cvzf semgrep-js-artifacts.tar.gz \\\n js/engine/dist/index.cjs
\\\n js/engine/dist/index.mjs \\\n js/engine/dist/semgrep-engine.wasm
\\\n js/languages/*/dist/index.cjs \\\n js/languages/*/dist/index.mjs
\\\n js/languages/*/dist/semgrep-parser.wasm\n "
- name: Upload JS artifacts
uses: actions/upload-artifact@v3
with:
path: semgrep-js-artifacts.tar.gz
retention-days: 2
name: semgrep-js-artifacts-${{ github.sha }}
- name: Run semgrep js e2e tests
run: make -C js test
upload:
needs:
- test
Expand Down
4 changes: 2 additions & 2 deletions js/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ test:
cd libpcre; $(MAKE) test
cd libyaml; $(MAKE) test
cd engine; $(MAKE) test
cd languages; $(MAKE) test # These are easily parallelizable
#cd tests; $(MAKE) # This is really slow for some reason :(
cd languages; $(MAKE) test
cd tests; $(MAKE)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe better to be consistent and use make test for the tests/ dir too.


.PHONY: e2e
e2e: engine languages
Expand Down
42 changes: 6 additions & 36 deletions js/engine/Main.ml
Original file line number Diff line number Diff line change
@@ -1,46 +1,16 @@
open Js_of_ocaml
open Semgrep_js_shared

let _ =
Js.export_all
(object%js
(*
The following methods are used internally by js/engine/src/index.js.
*)
tpetr marked this conversation as resolved.
Show resolved Hide resolved
method init = init_jsoo
method getMountpoints = get_jsoo_mountpoint ()

method setParsePattern (func : jbool -> jstring -> jstring -> 'a) =
Parse_pattern.parse_pattern_ref :=
fun print_error lang pattern ->
match lang with
(* The Yaml and JSON parsers are embedded in the engine because it's a
core component needed to parse rules *)
| Lang.Yaml -> Yaml_to_generic.any pattern
| _ ->
func (Js.bool print_error)
(Js.string (Lang.to_lowercase_alnum lang))
(Js.string pattern)

method setJustParseWithLang
(func : jstring -> jstring -> Parsing_result2.t) =
Parse_target.just_parse_with_lang_ref :=
fun lang filename ->
match lang with
(* The Yaml and JSON parsers are embedded in the engine because it's a
core component needed to parse rules *)
| Lang.Yaml ->
{
ast = Yaml_to_generic.program filename;
errors = [];
skipped_tokens = [];
inserted_tokens = [];
stat = Parsing_stat.default_stat filename;
}
| _ ->
func
(Js.string (Lang.to_lowercase_alnum lang))
(Js.string filename)
method init = Semgrep_js_shared.init_jsoo
method getMountpoints = Semgrep_js_shared.get_jsoo_mountpoint ()
method setParsePattern = Semgrep_js_shared.setParsePattern
method setJustParseWithLang = Semgrep_js_shared.setJustParseWithLang
method setJsonnetParser = Semgrep_js_shared.setJsonnetParser

(*
The following methods are part of the engine's public API.
Expand Down Expand Up @@ -98,5 +68,5 @@ let _ =
in
Semgrep_output_v1_j.string_of_core_output res
in
wrap_with_js_error execute
Semgrep_js_shared.wrap_with_js_error execute
end)
23 changes: 2 additions & 21 deletions js/engine/Makefile
Original file line number Diff line number Diff line change
@@ -1,10 +1,4 @@
EMCC_DEFAULTS = \
-sALLOW_MEMORY_GROWTH=1 \
-sEXPORTED_RUNTIME_METHODS=UTF8ToString,stringToUTF8,stringToAscii,lengthBytesUTF8,getValue,setValue \
-sMODULARIZE

include ../libyaml/Makefile.include
include ../libpcre/Makefile.include

ESBUILD_DEFAULTS = \
--bundle \
Expand All @@ -15,6 +9,8 @@ ESBUILD_DEFAULTS = \
.PHONY: default
default: build

include Makefile.include

.PHONY: build
build: dist/index.cjs dist/index.mjs dist/index.d.ts

Expand All @@ -34,21 +30,6 @@ clean:
distclean:
rm -rf dist node_modules

../libyaml/dist/libyaml.o:
cd ../libyaml; $(MAKE) dist/libyaml.o

../libpcre/dist/libpcre.a:
cd ../libpcre; $(MAKE) dist/libpcre.a

dist/semgrep-engine.js dist/semgrep-engine.wasm: ../libyaml/dist/libyaml.o ../libpcre/dist/libpcre.a
mkdir -p dist
emcc \
-O3 \
$^ \
$(EMCC_DEFAULTS) \
-sEXPORTED_FUNCTIONS=_malloc,_free,$(YAML_EXPORTED_METHODS),$(PCRE_EXPORTED_METHODS) \
-o $(word 1,$@)

package-lock.json:
npm install

Expand Down
22 changes: 22 additions & 0 deletions js/engine/Makefile.include
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
EMCC_DEFAULTS = \
-sALLOW_MEMORY_GROWTH=1 \
-sEXPORTED_RUNTIME_METHODS=UTF8ToString,stringToUTF8,stringToAscii,lengthBytesUTF8,getValue,setValue,intArrayFromString,writeArrayToMemory \
-sMODULARIZE

include ../libyaml/Makefile.include
include ../libpcre/Makefile.include

../libyaml/dist/libyaml.o:
$(MAKE) -C ../libyaml dist/libyaml.o

../libpcre/dist/libpcre.a:
$(MAKE) -C ../libpcre dist/libpcre.a

dist/semgrep-engine.js dist/semgrep-engine.wasm: ../libyaml/dist/libyaml.o ../libpcre/dist/libpcre.a
tpetr marked this conversation as resolved.
Show resolved Hide resolved
mkdir -p dist
emcc \
-O3 \
$^ \
$(EMCC_DEFAULTS) \
-sEXPORTED_FUNCTIONS=_malloc,_free,$(YAML_EXPORTED_METHODS),$(PCRE_EXPORTED_METHODS) \
-o $(word 1,$@)
9 changes: 9 additions & 0 deletions js/engine/src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ export const EngineFactory = async (wasmUri) => {
getMountpoints,
setParsePattern,
setJustParseWithLang,
setJsonnetParser,
execute,
lookupLang,
writeFile,
Expand Down Expand Up @@ -65,6 +66,14 @@ export const EngineFactory = async (wasmUri) => {
languages.set(lang, parser);
missingLanguages.delete(lang);
});
// The Semgrep core engine eagerly uses the Jsonnet parser
// because we support it as a format to write rules in.
// Unlike other languages, there is no pure-OCaml (e.g. Pfff)
// option that we can use instead, so we need to explicitly
// pass a reference of the Jsonnet parser to the engine.
if (parser.getLangs().includes("jsonnet")) {
setJsonnetParser((file) => parser.parseTargetTsOnly(file));
}
},
hasParser: (lang) => languages.has(lang),
isMissingLanguages: () => missingLanguages.size > 0,
Expand Down
2 changes: 1 addition & 1 deletion js/engine/tests/index.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ describe("engine", () => {
"python",
`${__dirname}/test-rule-python.json`,
`${__dirname}`,
[`dirname}/../../languages/python/tests/test.py`]
[`${__dirname}/test.py`]
);
expect(engine.isMissingLanguages()).toBe(true);
expect(engine.getMissingLanguages()).toEqual(["python"]);
Expand Down
File renamed without changes.
21 changes: 4 additions & 17 deletions js/languages/bash/tests/index.test.js
Original file line number Diff line number Diff line change
@@ -1,23 +1,10 @@
const { ParserFactory } = require("../dist/index.cjs");

const parserPromise = ParserFactory();
const { createParser, testParser } = require("../../shared/parser");

const LANG = "bash";
const EXPECTED_LANGS = [LANG];

test("getLangs", async () => {
const parser = await parserPromise;
expect(parser.getLangs()).toEqual(EXPECTED_LANGS);
});

test("it parses a pattern", async () => {
const parser = await parserPromise;
const pattern = parser.parsePattern(false, LANG, "echo $X");
expect(typeof pattern).toEqual("object");
});
describe(`${LANG} parser`, () => {
const parserPromise = createParser(`${__dirname}/../dist/index.cjs`);

test("it parses a file", async () => {
const parser = await parserPromise;
const target = parser.parseTarget(LANG, "tests/test.sh");
expect(typeof target).toEqual("object");
testParser(LANG, parserPromise);
});
3 changes: 0 additions & 3 deletions js/languages/bash/tests/test.sh

This file was deleted.

7 changes: 6 additions & 1 deletion js/languages/c/Parser.ml
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
let parse_target _ file =
Pfff_or_tree_sitter.run file
[ TreeSitter Parse_c_tree_sitter.parse ]
[
Pfff
(Pfff_or_tree_sitter.throw_tokens (fun file ->
Parse_c.parse (Fpath.v file)));
TreeSitter Parse_c_tree_sitter.parse;
]
C_to_generic.program

let parse_pattern print_errors _ str =
Expand Down
22 changes: 4 additions & 18 deletions js/languages/c/tests/index.test.js
Original file line number Diff line number Diff line change
@@ -1,23 +1,9 @@
const { ParserFactory } = require("../dist/index.cjs");

const parserPromise = ParserFactory();
const { createParser, testParser } = require("../../shared/parser");

const LANG = "c";
const EXPECTED_LANGS = [LANG];

test("getLangs", async () => {
const parser = await parserPromise;
expect(parser.getLangs()).toEqual(EXPECTED_LANGS);
});

test("it parses a pattern", async () => {
const parser = await parserPromise;
const pattern = parser.parsePattern(false, LANG, "printf(...)");
expect(typeof pattern).toEqual("object");
});
describe(`${LANG} parser`, () => {
const parserPromise = createParser(`${__dirname}/../dist/index.cjs`);

test("it parses a file", async () => {
const parser = await parserPromise;
const target = parser.parseTarget(LANG, "tests/test.c");
expect(typeof target).toEqual("object");
testParser(LANG, parserPromise);
});
3 changes: 0 additions & 3 deletions js/languages/c/tests/test.c

This file was deleted.

23 changes: 4 additions & 19 deletions js/languages/cairo/tests/index.test.js
Original file line number Diff line number Diff line change
@@ -1,24 +1,9 @@
const { ParserFactory } = require("../dist/index.cjs");

const parserPromise = ParserFactory();
const { createParser, testParser } = require("../../shared/parser");

const LANG = "cairo";
const EXPECTED_LANGS = [LANG];

test("getLangs", async () => {
const parser = await parserPromise;
expect(parser.getLangs()).toEqual(EXPECTED_LANGS);
});

// TODO
// test("it parses a pattern", async () => {
// const parser = await parserPromise;
// const pattern = parser.parsePattern(false, LANG, "sum by (foo) $X");
// expect(typeof pattern).toEqual("object");
// });
describe(`${LANG} parser`, () => {
const parserPromise = createParser(`${__dirname}/../dist/index.cjs`);

test("it parses a file", async () => {
const parser = await parserPromise;
const target = parser.parseTarget(LANG, "tests/test.cairo");
expect(typeof target).toEqual("object");
testParser(LANG, parserPromise);
});
4 changes: 0 additions & 4 deletions js/languages/cairo/tests/test.cairo

This file was deleted.

22 changes: 4 additions & 18 deletions js/languages/cpp/tests/index.test.js
Original file line number Diff line number Diff line change
@@ -1,23 +1,9 @@
const { ParserFactory } = require("../dist/index.cjs");

const parserPromise = ParserFactory();
const { createParser, testParser } = require("../../shared/parser");

const LANG = "cpp";
const EXPECTED_LANGS = [LANG];

test("getLangs", async () => {
const parser = await parserPromise;
expect(parser.getLangs()).toEqual(EXPECTED_LANGS);
});

test("it parses a pattern", async () => {
const parser = await parserPromise;
const pattern = parser.parsePattern(false, LANG, "std::cout << $X");
expect(typeof pattern).toEqual("object");
});
describe(`${LANG} parser`, () => {
const parserPromise = createParser(`${__dirname}/../dist/index.cjs`);

test("it parses a file", async () => {
const parser = await parserPromise;
const target = parser.parseTarget(LANG, "tests/test.cpp");
expect(typeof target).toEqual("object");
testParser(LANG, parserPromise);
});
5 changes: 0 additions & 5 deletions js/languages/cpp/tests/test.cpp

This file was deleted.

Loading
Loading