From 57096fe795dd4d80156b5aca370361a411c788ac Mon Sep 17 00:00:00 2001 From: Henry Mercer Date: Wed, 30 Mar 2022 15:56:23 +0100 Subject: [PATCH] Add a PR check to validate that ML-powered queries are run correctly --- .github/workflows/__ml-powered-queries.yml | 119 +++++++++++++++++++ pr-checks/checks/ml-powered-queries.yml | 67 +++++++++++ tests/ml-powered-queries-repo/add-note.js | 21 ++++ tests/ml-powered-queries-repo/app.js | 68 +++++++++++ tests/ml-powered-queries-repo/index.js | 7 ++ tests/ml-powered-queries-repo/logger.js | 5 + tests/ml-powered-queries-repo/models/note.js | 8 ++ tests/ml-powered-queries-repo/models/user.js | 6 + tests/ml-powered-queries-repo/notes-api.js | 44 +++++++ tests/ml-powered-queries-repo/read-notes.js | 37 ++++++ tests/ml-powered-queries-repo/users-api.js | 25 ++++ 11 files changed, 407 insertions(+) create mode 100644 .github/workflows/__ml-powered-queries.yml create mode 100644 pr-checks/checks/ml-powered-queries.yml create mode 100644 tests/ml-powered-queries-repo/add-note.js create mode 100644 tests/ml-powered-queries-repo/app.js create mode 100644 tests/ml-powered-queries-repo/index.js create mode 100644 tests/ml-powered-queries-repo/logger.js create mode 100644 tests/ml-powered-queries-repo/models/note.js create mode 100644 tests/ml-powered-queries-repo/models/user.js create mode 100644 tests/ml-powered-queries-repo/notes-api.js create mode 100644 tests/ml-powered-queries-repo/read-notes.js create mode 100644 tests/ml-powered-queries-repo/users-api.js diff --git a/.github/workflows/__ml-powered-queries.yml b/.github/workflows/__ml-powered-queries.yml new file mode 100644 index 0000000000..1d9bf2fd02 --- /dev/null +++ b/.github/workflows/__ml-powered-queries.yml @@ -0,0 +1,119 @@ +# Warning: This file is generated automatically, and should not be modified. +# Instead, please modify the template in the pr-checks directory and run: +# pip install ruamel.yaml && python3 sync.py +# to regenerate this file. + +name: PR Check - ML-powered queries +env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GO111MODULE: auto +on: + push: + branches: + - main + - v1 + - v2 + pull_request: + types: + - opened + - synchronize + - reopened + - ready_for_review + workflow_dispatch: {} +jobs: + ml-powered-queries: + strategy: + matrix: + include: + - os: ubuntu-latest + version: stable-20220120 + - os: macos-latest + version: stable-20220120 + - os: windows-latest + version: stable-20220120 + - os: ubuntu-latest + version: cached + - os: macos-latest + version: cached + - os: windows-latest + version: cached + - os: ubuntu-latest + version: latest + - os: macos-latest + version: latest + - os: windows-latest + version: latest + - os: ubuntu-latest + version: nightly-latest + - os: macos-latest + version: nightly-latest + - os: windows-latest + version: nightly-latest + name: ML-powered queries + timeout-minutes: 45 + runs-on: ${{ matrix.os }} + steps: + - name: Check out repository + uses: actions/checkout@v3 + - name: Prepare test + id: prepare-test + uses: ./.github/prepare-test + with: + version: ${{ matrix.version }} + - uses: ./../action/init + with: + languages: javascript + queries: security-extended + source-root: ./../action/tests/ml-powered-queries-repo + tools: ${{ steps.prepare-test.outputs.tools-url }} + + - uses: ./../action/analyze + with: + output: ${{ runner.temp }}/results + upload-database: false + env: + TEST_MODE: true + + - name: Upload SARIF + uses: actions/upload-artifact@v2 + with: + name: ml-powered-queries-${{ matrix.os }}-${{ matrix.version }}.sarif.json + path: ${{ runner.temp }}/results/javascript.sarif + retention-days: 7 + + - name: Check results + env: + IS_WINDOWS: ${{ matrix.os == 'windows-latest' }} + shell: bash + run: | + cd "$RUNNER_TEMP/results" + # We should run at least the ML-powered queries in `expected_rules`. + expected_rules="js/ml-powered/nosql-injection js/ml-powered/path-injection js/ml-powered/sql-injection js/ml-powered/xss" + + for rule in ${expected_rules}; do + found_rule=$(jq --arg rule "${rule}" '[.runs[0].tool.extensions[].rules | select(. != null) | + flatten | .[].id] | any(. == $rule)' javascript.sarif) + echo "Did find rule '${rule}': ${found_rule}" + if [[ "${found_rule}" != "true" && "${IS_WINDOWS}" != "true" ]]; then + echo "Expected SARIF output to contain rule '${rule}', but found no such rule." + exit 1 + elif [[ "${found_rule}" == "true" && "${IS_WINDOWS}" == "true" ]]; then + echo "Found rule '${rule}' in the SARIF output which shouldn't have been part of the analysis." + exit 1 + fi + done + + # We should have at least one alert from an ML-powered query. + num_alerts=$(jq '[.runs[0].results[] | + select(.properties.score != null and (.rule.id | startswith("js/ml-powered/")))] | length' \ + javascript.sarif) + echo "Found ${num_alerts} alerts from ML-powered queries."; + if [[ "${num_alerts}" -eq 0 && "${IS_WINDOWS}" != "true" ]]; then + echo "Expected to find at least one alert from an ML-powered query but found ${num_alerts}." + exit 1 + elif [[ "${num_alerts}" -ne 0 && "${IS_WINDOWS}" == "true" ]]; then + echo "Expected not to find any alerts from an ML-powered query but found ${num_alerts}." + exit 1 + fi + env: + INTERNAL_CODEQL_ACTION_DEBUG_LOC: true diff --git a/pr-checks/checks/ml-powered-queries.yml b/pr-checks/checks/ml-powered-queries.yml new file mode 100644 index 0000000000..778339fe61 --- /dev/null +++ b/pr-checks/checks/ml-powered-queries.yml @@ -0,0 +1,67 @@ +name: "ML-powered queries" +description: "Tests that ML-powered queries are run with the security-extended suite and that they produce alerts on a test DB" +versions: [ + # Latest release in 2.7.x series + "stable-20220120", + "cached", + "latest", + "nightly-latest", + ] +# Test on all three platforms since ML-powered queries use native code +os: ["ubuntu-latest", "macos-latest", "windows-latest"] +steps: + - uses: ./../action/init + with: + languages: javascript + queries: security-extended + source-root: ./../action/tests/ml-powered-queries-repo + tools: ${{ steps.prepare-test.outputs.tools-url }} + + - uses: ./../action/analyze + with: + output: "${{ runner.temp }}/results" + upload-database: false + env: + TEST_MODE: true + + - name: Upload SARIF + uses: actions/upload-artifact@v2 + with: + name: ml-powered-queries-${{ matrix.os }}-${{ matrix.version }}.sarif.json + path: "${{ runner.temp }}/results/javascript.sarif" + retention-days: 7 + + - name: Check results + env: + IS_WINDOWS: ${{ matrix.os == 'windows-latest' }} + shell: bash + run: | + cd "$RUNNER_TEMP/results" + # We should run at least the ML-powered queries in `expected_rules`. + expected_rules="js/ml-powered/nosql-injection js/ml-powered/path-injection js/ml-powered/sql-injection js/ml-powered/xss" + + for rule in ${expected_rules}; do + found_rule=$(jq --arg rule "${rule}" '[.runs[0].tool.extensions[].rules | select(. != null) | + flatten | .[].id] | any(. == $rule)' javascript.sarif) + echo "Did find rule '${rule}': ${found_rule}" + if [[ "${found_rule}" != "true" && "${IS_WINDOWS}" != "true" ]]; then + echo "Expected SARIF output to contain rule '${rule}', but found no such rule." + exit 1 + elif [[ "${found_rule}" == "true" && "${IS_WINDOWS}" == "true" ]]; then + echo "Found rule '${rule}' in the SARIF output which shouldn't have been part of the analysis." + exit 1 + fi + done + + # We should have at least one alert from an ML-powered query. + num_alerts=$(jq '[.runs[0].results[] | + select(.properties.score != null and (.rule.id | startswith("js/ml-powered/")))] | length' \ + javascript.sarif) + echo "Found ${num_alerts} alerts from ML-powered queries."; + if [[ "${num_alerts}" -eq 0 && "${IS_WINDOWS}" != "true" ]]; then + echo "Expected to find at least one alert from an ML-powered query but found ${num_alerts}." + exit 1 + elif [[ "${num_alerts}" -ne 0 && "${IS_WINDOWS}" == "true" ]]; then + echo "Expected not to find any alerts from an ML-powered query but found ${num_alerts}." + exit 1 + fi diff --git a/tests/ml-powered-queries-repo/add-note.js b/tests/ml-powered-queries-repo/add-note.js new file mode 100644 index 0000000000..ee7c735f95 --- /dev/null +++ b/tests/ml-powered-queries-repo/add-note.js @@ -0,0 +1,21 @@ +const mongoose = require('mongoose'); + +Logger = require('./logger').Logger; +Note = require('./models/note').Note; + +(async () => { + if (process.argv.length != 5) { + Logger.log("Creates a private note. Usage: node add-note.js <body>") + return; + } + + // Open the default mongoose connection + await mongoose.connect('mongodb://localhost:27017/notes', { useFindAndModify: false }); + + const [userToken, title, body] = process.argv.slice(2); + await Note.create({ title, body, userToken }); + + Logger.log(`Created private note with title ${title} and body ${body} belonging to user with token ${userToken}.`); + + await mongoose.connection.close(); +})(); diff --git a/tests/ml-powered-queries-repo/app.js b/tests/ml-powered-queries-repo/app.js new file mode 100644 index 0000000000..53c2f10e7b --- /dev/null +++ b/tests/ml-powered-queries-repo/app.js @@ -0,0 +1,68 @@ +const bodyParser = require('body-parser'); +const express = require('express'); +const mongoose = require('mongoose'); + +const notesApi = require('./notes-api'); +const usersApi = require('./users-api'); + +const addSampleData = module.exports.addSampleData = async () => { + const [userA, userB] = await User.create([ + { + name: "A", + token: "tokenA" + }, + { + name: "B", + token: "tokenB" + } + ]); + + await Note.create([ + { + title: "Public note belonging to A", + body: "This is a public note belonging to A", + isPublic: true, + ownerToken: userA.token + }, + { + title: "Public note belonging to B", + body: "This is a public note belonging to B", + isPublic: true, + ownerToken: userB.token + }, + { + title: "Private note belonging to A", + body: "This is a private note belonging to A", + ownerToken: userA.token + }, + { + title: "Private note belonging to B", + body: "This is a private note belonging to B", + ownerToken: userB.token + } + ]); +} + +module.exports.startApp = async () => { + // Open the default mongoose connection + await mongoose.connect('mongodb://mongo:27017/notes', { useFindAndModify: false }); + // Drop contents of DB + mongoose.connection.dropDatabase(); + // Add some sample data + await addSampleData(); + + const app = express(); + + app.use(bodyParser.json()); + app.use(bodyParser.urlencoded()); + + app.get('/', async (_req, res) => { + res.send('Hello World'); + }); + + app.use('/api/notes', notesApi.router); + app.use('/api/users', usersApi.router); + + app.listen(3000); + Logger.log('Express started on port 3000'); +}; diff --git a/tests/ml-powered-queries-repo/index.js b/tests/ml-powered-queries-repo/index.js new file mode 100644 index 0000000000..aaecbe83b6 --- /dev/null +++ b/tests/ml-powered-queries-repo/index.js @@ -0,0 +1,7 @@ +const startApp = require('./app').startApp; + +Logger = require('./logger').Logger; +Note = require('./models/note').Note; +User = require('./models/user').User; + +startApp(); diff --git a/tests/ml-powered-queries-repo/logger.js b/tests/ml-powered-queries-repo/logger.js new file mode 100644 index 0000000000..b8992c611c --- /dev/null +++ b/tests/ml-powered-queries-repo/logger.js @@ -0,0 +1,5 @@ +module.exports.Logger = class { + log(message, ...objs) { + console.log(message, objs); + } +}; diff --git a/tests/ml-powered-queries-repo/models/note.js b/tests/ml-powered-queries-repo/models/note.js new file mode 100644 index 0000000000..d2cecee123 --- /dev/null +++ b/tests/ml-powered-queries-repo/models/note.js @@ -0,0 +1,8 @@ +const mongoose = require('mongoose'); + +module.exports.Note = mongoose.model('Note', new mongoose.Schema({ + title: String, + body: String, + ownerToken: String, + isPublic: Boolean +})); diff --git a/tests/ml-powered-queries-repo/models/user.js b/tests/ml-powered-queries-repo/models/user.js new file mode 100644 index 0000000000..ac0545fb8b --- /dev/null +++ b/tests/ml-powered-queries-repo/models/user.js @@ -0,0 +1,6 @@ +const mongoose = require('mongoose'); + +module.exports.User = mongoose.model('User', new mongoose.Schema({ + name: String, + token: String +})); diff --git a/tests/ml-powered-queries-repo/notes-api.js b/tests/ml-powered-queries-repo/notes-api.js new file mode 100644 index 0000000000..d11a0f4346 --- /dev/null +++ b/tests/ml-powered-queries-repo/notes-api.js @@ -0,0 +1,44 @@ +const express = require('express') + +const router = module.exports.router = express.Router(); + +function serializeNote(note) { + return { + title: note.title, + body: note.body + }; +} + +router.post('/find', async (req, res) => { + const notes = await Note.find({ + ownerToken: req.body.token + }).exec(); + res.json({ + notes: notes.map(serializeNote) + }); +}); + +router.get('/findPublic', async (_req, res) => { + const notes = await Note.find({ + isPublic: true + }).exec(); + res.json({ + notes: notes.map(serializeNote) + }); +}); + +router.post('/findVisible', async (req, res) => { + const notes = await Note.find({ + $or: [ + { + isPublic: true + }, + { + ownerToken: req.body.token + } + ] + }).exec(); + res.json({ + notes: notes.map(serializeNote) + }); +}); diff --git a/tests/ml-powered-queries-repo/read-notes.js b/tests/ml-powered-queries-repo/read-notes.js new file mode 100644 index 0000000000..03578874bd --- /dev/null +++ b/tests/ml-powered-queries-repo/read-notes.js @@ -0,0 +1,37 @@ +const mongoose = require('mongoose'); + +Logger = require('./logger').Logger; +Note = require('./models/note').Note; +User = require('./models/user').User; + +(async () => { + if (process.argv.length != 3) { + Logger.log("Outputs all notes visible to a user. Usage: node read-notes.js <token>") + return; + } + + // Open the default mongoose connection + await mongoose.connect('mongodb://localhost:27017/notes', { useFindAndModify: false }); + + const ownerToken = process.argv[2]; + + const user = await User.findOne({ + token: ownerToken + }).exec(); + + const notes = await Note.find({ + $or: [ + { isPublic: true }, + { ownerToken } + ] + }).exec(); + + notes.map(note => { + Logger.log("Title:" + note.title); + Logger.log("By:" + user.name); + Logger.log("Body:" + note.body); + Logger.log(); + }); + + await mongoose.connection.close(); +})(); diff --git a/tests/ml-powered-queries-repo/users-api.js b/tests/ml-powered-queries-repo/users-api.js new file mode 100644 index 0000000000..88381cf7af --- /dev/null +++ b/tests/ml-powered-queries-repo/users-api.js @@ -0,0 +1,25 @@ +const express = require('express') + +Logger = require('./logger').Logger; +const router = module.exports.router = express.Router(); + +router.post('/updateName', async (req, res) => { + Logger.log("/updateName called with new name", req.body.name); + await User.findOneAndUpdate({ + token: req.body.token + }, { + name: req.body.name + }).exec(); + res.json({ + name: req.body.name + }); +}); + +router.post('/getName', async (req, res) => { + const user = await User.findOne({ + token: req.body.token + }).exec(); + res.json({ + name: user.name + }); +});