Skip to content

Commit

Permalink
Merge branch 'mainline' into packaging-experiment
Browse files Browse the repository at this point in the history
  • Loading branch information
ajCameron authored Jul 7, 2023
2 parents 65ac63e + 9bfd178 commit 6b8ae72
Show file tree
Hide file tree
Showing 6 changed files with 336 additions and 4 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/trufflehog.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
with:
fetch-depth: 0
- name: TruffleHog OSS
uses: trufflesecurity/trufflehog@v3.40.0
uses: trufflesecurity/trufflehog@v3.42.0
with:
path: ./
base: ${{ github.event.repository.default_branch }}
Expand Down
57 changes: 57 additions & 0 deletions dev-docs/security-analysis-dev-notes/example_bandit_outpt.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
<!--
SPDX-FileCopyrightText: 2023 Mewbot Developers <mewbot@quicksilver.london>
SPDX-License-Identifier: BSD-2-Clause
-->

Bandit produces errors of three different levels of severity, with three different levels of confidence.
Capturing and parsing these blocks into github annotations.

Here is an example of typical bandit output - which must then be parsed into a useful format.

```shell

Run started:2023-06-22 21:49:31.662238

Test results:
>> Issue: [B404:blacklist] Consider possible security implications associated with the subprocess module.
Severity: Low Confidence: High
CWE: CWE-78 (https://cwe.mitre.org/data/definitions/78.html)
More Info: https://bandit.readthedocs.io/en/1.7.5/blacklists/blacklist_imports.html#b404-import-subprocess
Location: src\mewbot\io\desktop_notification.py:18:0
17 import sys
18 import subprocess
19

--------------------------------------------------
>> Issue: [B101:assert_used] Use of assert detected. The enclosed code will be removed when compiling to optimised byte code.
Severity: Low Confidence: High
CWE: CWE-703 (https://cwe.mitre.org/data/definitions/703.html)
More Info: https://bandit.readthedocs.io/en/1.7.5/plugins/b101_assert_used.html
Location: src\mewbot\io\desktop_notification.py:251:8
250 return
251 assert ToastNotifier is not None
252

--------------------------------------------------

Code scanned:
Total lines of code: 3515
Total lines skipped (#nosec): 0

Run metrics:
Total issues (by severity):
Undefined: 0
Low: 2
Medium: 0
High: 0
Total issues (by confidence):
Undefined: 0
Low: 0
Medium: 0
High: 2
Files skipped (0):

```
The aim is to have a "passing" (no medium or high errors) on each branch.
5 changes: 4 additions & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
-r requirements-test.txt

# Installation toolchain
setuptools~=67.8
setuptools~=68.0

# Copyright linting toolchain
reuse~=1.1
Expand All @@ -24,6 +24,9 @@ pytest-github-actions-annotate-failures>=0.1.7
pydocstyle>=6.3.0
isort>=5.12.0

# Security Analysis toolchain
bandit~=1.7.5

# Type checking toolchain
mypy>=0.991
types-PyYAML~=6.0
Expand Down
6 changes: 4 additions & 2 deletions src/mewbot/tools/lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,13 @@
import sys

from .path import gather_paths
from .toolchain import Annotation, ToolChain
from .security_analysis import BanditMixin
from .toolchain import Annotation

LEVELS = frozenset({"notice", "warning", "error"})


class LintToolchain(ToolChain):
class LintToolchain(BanditMixin):
"""
Wrapper class for running linting tools.
Expand All @@ -47,6 +48,7 @@ def run(self) -> Iterable[Annotation]:
yield from self.lint_mypy()
yield from self.lint_pylint()
yield from self.lint_pydocstyle()
yield from self.lint_bandit()

def lint_isort(self) -> Iterable[Annotation]:
"""
Expand Down
261 changes: 261 additions & 0 deletions src/mewbot/tools/security_analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,261 @@
#!/usr/bin/env python3

# SPDX-FileCopyrightText: 2021 - 2023 Mewbot Developers <mewbot@quicksilver.london>
#
# SPDX-License-Identifier: BSD-2-Clause

"""
Wrapper class for the security analysis toolchain.
Any program which is exposed to the internet, and hs to process user input (as mewbot should be
able to, at least) has to deal with a number of security concerns.
Static security analysis can help with this.
Currently this runs bandit - a static security analysis toolkit.
More analysis tools may be added.
"""


from __future__ import annotations

from collections.abc import Iterable

import argparse
import os
import pprint
import re
import subprocess

from .path import gather_paths
from .toolchain import Annotation, ToolChain

LEVELS = frozenset({"notice", "warning", "error"})


class BanditMixin(ToolChain):
"""
Helper class to include bandit function in other tool chains.
"""

in_ci: bool

def lint_bandit(self) -> Iterable[Annotation]:
"""
Run 'bandit', an automatic security analysis tool.
bandit scans a code base for security vulnerabilities.
"""

args = ["bandit", "-r"]

if self.in_ci:
# Number of "l"s in -ll e.t.c controls the warning level
# default to medium and higher severity
args.extend(["--quiet", "-ll"])
else:
args.extend(["-ll"])

result = self.run_tool("Bandit (Security Analysis)", *args)

yield from lint_bandit_output(result)


class SecurityAnalysisToolchain(BanditMixin):
"""
Wrapper class for running security analysis tools.
The output of these tools will be emitted as GitHub annotations (in CI)
or default human output (otherwise).
By default, all paths declared to be part of mewbot source - either of the main
module or any installed plugins - are linted.
"""

def run(self) -> Iterable[Annotation]:
"""Runs the linting tools in sequence."""

yield from self.lint_bandit()


def lint_bandit_output(
result: subprocess.CompletedProcess[bytes],
) -> Iterable[Annotation]:
"""Processes 'bandits' output in to annotations."""
# Don't want to truncate the example bandit output
# pylint: disable=line-too-long

stdout_errors = result.stdout.decode("utf-8").split("\n")
stderr_errors = result.stderr.decode("utf-8").split("\n")

# Parsing bandit comment blocks into annotations

# Line for each individual block
block: list[str] = []

for line in stdout_errors + stderr_errors:
line = line.strip()

# We have reached the end of a block
if line.startswith("---------------------------"):
yield bandit_output_block_to_annotation(block)

block.append(line)

# The last block should be ignored - it's just a summary


def bandit_output_block_to_annotation(block: list[str]) -> Annotation:
"""
Process an output block and produce an annotation from it.
:param block: The block as a list of strings.
:return:
"""
target_block = prepare_target_block(block)

issue_code = re.findall(r"\[.+]", target_block[0])[0]
issue_code = issue_code[1:-1]

# Extracting the severity and confidence - should be on the next line
severity_confidence_re = r"^(\s+)?Severity:([a-zA-Z\s]+)Confidence:([a-zA-Z\s]+)$"
severity_confidence_match = re.match(severity_confidence_re, target_block[1])

assert (
severity_confidence_match is not None
), f"Error parsing {target_block[1]} with regex {severity_confidence_re}"

try:
severity = severity_confidence_match.group(2).strip()
except AttributeError as exp:
raise NotImplementedError(
f"Error parsing {target_block[1]} with regex {severity_confidence_re}"
) from exp

try:
confidence = severity_confidence_match.group(3).strip()
except AttributeError as exp:
raise NotImplementedError(
f"Error parsing {target_block[1]} with regex {severity_confidence_re}"
) from exp

# Scanning forward to try and find a location line
loc_line_found = False
loc_line = ""
for cand_line in target_block[2:]:
if cand_line.lower().strip().startswith("location"):
loc_line_found = True
loc_line = cand_line
break

if not loc_line_found:
raise NotImplementedError(
f"block does not seem to have a Location line - \n\n{pprint.pformat(block)}"
)

problem_path, problem_line, problem_char_pos = get_positions_from_loc_line(loc_line)

return Annotation(
level=severity_to_level(severity),
file=problem_path,
line=problem_line,
col=problem_char_pos,
tool="bandit",
title=f"Bandit {severity} security warning",
message=f"{issue_code = } - {severity} security warning with confidence {confidence}",
)


def prepare_target_block(block: list[str]) -> list[str]:
"""
Take a block and bring it into standard target block form.
:param block:
:return:
"""
target_block = []
for i, line in enumerate(block):
if line.startswith(">>"):
target_block = block[i:]
break

if not target_block:
raise NotImplementedError(
f"Line starting with '>>' was not found in block\n\n{block}"
)

assert target_block[0].startswith(
">>"
), f"Bad block format \n\n{target_block}\n\n was not in expected format."

return target_block


def get_positions_from_loc_line(loc_line: str) -> tuple[str, int, int]:
"""
Parse a location line into the path, line and char pos of the problem.
:param loc_line:
:return:
"""
# Windows uses ':' in its file paths - thus some care needs to be taken to split the tokens
# down properly
loc_tokens = loc_line.split(":")
# Four is the minimum, if : does not appear in the path
assert len(loc_tokens) > 4, f"{loc_tokens = } not as expected"

problem_path = ":".join(loc_tokens[1:-2])
problem_line = int(loc_tokens[-2])
problem_char_pos = int(loc_tokens[-1])

return problem_path, problem_line, problem_char_pos


def severity_to_level(severity: str) -> str:
"""
Turns a bandit severity level into a github notice level.
:param severity:
:return:
"""
if severity.lower() == "low":
return "notice"

raise NotImplementedError(f"severity {severity} not recognized!")


def parse_security_analysis_options() -> argparse.Namespace:
"""Parse command line argument for the security analysis tools."""

parser = argparse.ArgumentParser(description="Run security analysis for mewbot")
parser.add_argument(
"-n",
"--no-tests",
action="store_false",
default=True,
dest="tests",
help="Exclude tests from security analysis",
)
parser.add_argument(
"path",
nargs="*",
default=[],
help="Path of a file or a folder of files for security analysis.",
)
parser.add_argument(
"--ci",
dest="in_ci",
default="GITHUB_ACTIONS" in os.environ,
action="store_true",
help="Run in GitHub actions mode",
)

return parser.parse_args()


if __name__ == "__main__":
options = parse_security_analysis_options()

paths = options.path
if not paths:
paths = gather_paths("src", "tests") if options.tests else gather_paths("src")

linter = SecurityAnalysisToolchain(*paths, in_ci=options.in_ci)
linter()
9 changes: 9 additions & 0 deletions tools/security-analysis
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/sh

# SPDX-FileCopyrightText: 2021 - 2023 Mewbot Developers <mewbot@quicksilver.london>
#
# SPDX-License-Identifier: BSD-2-Clause

. "${0%/*}/path"

exec "$PYTHON" -m mewbot.tools.security_analysis "$@"

0 comments on commit 6b8ae72

Please sign in to comment.