Merge branch 'mainline' into packaging-experiment

mewbotorg · Jul 7, 2023 · 6b8ae72 · 6b8ae72
2 parents 65ac63e + 9bfd178
commit 6b8ae72
Show file tree

Hide file tree

Showing 6 changed files with 336 additions and 4 deletions.
diff --git a/.github/workflows/trufflehog.yml b/.github/workflows/trufflehog.yml
@@ -16,7 +16,7 @@ jobs:
         with:
           fetch-depth: 0
       - name: TruffleHog OSS
-        uses: trufflesecurity/trufflehog@v3.40.0
+        uses: trufflesecurity/trufflehog@v3.42.0
         with:
           path: ./
           base: ${{ github.event.repository.default_branch }}

diff --git a/dev-docs/security-analysis-dev-notes/example_bandit_outpt.md b/dev-docs/security-analysis-dev-notes/example_bandit_outpt.md
@@ -0,0 +1,57 @@
+<!--
+SPDX-FileCopyrightText: 2023 Mewbot Developers <mewbot@quicksilver.london>
+
+SPDX-License-Identifier: BSD-2-Clause
+-->
+
+Bandit produces errors of three different levels of severity, with three different levels of confidence.
+Capturing and parsing these blocks into github annotations.
+
+Here is an example of typical bandit output - which must then be parsed into a useful format.
+
+```shell
+
+Run started:2023-06-22 21:49:31.662238
+
+Test results:
+>> Issue: [B404:blacklist] Consider possible security implications associated with the subprocess module.
+   Severity: Low   Confidence: High
+   CWE: CWE-78 (https://cwe.mitre.org/data/definitions/78.html)
+   More Info: https://bandit.readthedocs.io/en/1.7.5/blacklists/blacklist_imports.html#b404-import-subprocess
+   Location: src\mewbot\io\desktop_notification.py:18:0
+17      import sys
+18      import subprocess
+19
+
+--------------------------------------------------
+>> Issue: [B101:assert_used] Use of assert detected. The enclosed code will be removed when compiling to optimised byte code.
+   Severity: Low   Confidence: High
+   CWE: CWE-703 (https://cwe.mitre.org/data/definitions/703.html)
+   More Info: https://bandit.readthedocs.io/en/1.7.5/plugins/b101_assert_used.html
+   Location: src\mewbot\io\desktop_notification.py:251:8
+250                 return
+251             assert ToastNotifier is not None
+252
+
+--------------------------------------------------
+
+Code scanned:
+        Total lines of code: 3515
+        Total lines skipped (#nosec): 0
+
+Run metrics:
+        Total issues (by severity):
+                Undefined: 0
+                Low: 2
+                Medium: 0
+                High: 0
+        Total issues (by confidence):
+                Undefined: 0
+                Low: 0
+                Medium: 0
+                High: 2
+Files skipped (0):
+
+```
+
+The aim is to have a "passing" (no medium or high errors) on each branch.
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -8,7 +8,7 @@
 -r requirements-test.txt
 
 # Installation toolchain
-setuptools~=67.8
+setuptools~=68.0
 
 # Copyright linting toolchain
 reuse~=1.1
@@ -24,6 +24,9 @@ pytest-github-actions-annotate-failures>=0.1.7
 pydocstyle>=6.3.0
 isort>=5.12.0
 
+# Security Analysis toolchain
+bandit~=1.7.5
+
 # Type checking toolchain
 mypy>=0.991
 types-PyYAML~=6.0

diff --git a/src/mewbot/tools/lint.py b/src/mewbot/tools/lint.py
@@ -23,12 +23,13 @@
 import sys
 
 from .path import gather_paths
-from .toolchain import Annotation, ToolChain
+from .security_analysis import BanditMixin
+from .toolchain import Annotation
 
 LEVELS = frozenset({"notice", "warning", "error"})
 
 
-class LintToolchain(ToolChain):
+class LintToolchain(BanditMixin):
     """
     Wrapper class for running linting tools.
 
@@ -47,6 +48,7 @@ def run(self) -> Iterable[Annotation]:
         yield from self.lint_mypy()
         yield from self.lint_pylint()
         yield from self.lint_pydocstyle()
+        yield from self.lint_bandit()
 
     def lint_isort(self) -> Iterable[Annotation]:
         """

diff --git a/src/mewbot/tools/security_analysis.py b/src/mewbot/tools/security_analysis.py
@@ -0,0 +1,261 @@
+#!/usr/bin/env python3
+
+# SPDX-FileCopyrightText: 2021 - 2023 Mewbot Developers <mewbot@quicksilver.london>
+#
+# SPDX-License-Identifier: BSD-2-Clause
+
+"""
+Wrapper class for the security analysis toolchain.
+
+Any program which is exposed to the internet, and hs to process user input (as mewbot should be
+able to, at least) has to deal with a number of security concerns.
+Static security analysis can help with this.
+Currently this runs bandit - a static security analysis toolkit.
+More analysis tools may be added.
+"""
+
+
+from __future__ import annotations
+
+from collections.abc import Iterable
+
+import argparse
+import os
+import pprint
+import re
+import subprocess
+
+from .path import gather_paths
+from .toolchain import Annotation, ToolChain
+
+LEVELS = frozenset({"notice", "warning", "error"})
+
+
+class BanditMixin(ToolChain):
+    """
+    Helper class to include bandit function in other tool chains.
+    """
+
+    in_ci: bool
+
+    def lint_bandit(self) -> Iterable[Annotation]:
+        """
+        Run 'bandit', an automatic security analysis tool.
+
+        bandit scans a code base for security vulnerabilities.
+        """
+
+        args = ["bandit", "-r"]
+
+        if self.in_ci:
+            # Number of "l"s in -ll e.t.c controls the warning level
+            # default to medium and higher severity
+            args.extend(["--quiet", "-ll"])
+        else:
+            args.extend(["-ll"])
+
+        result = self.run_tool("Bandit (Security Analysis)", *args)
+
+        yield from lint_bandit_output(result)
+
+
+class SecurityAnalysisToolchain(BanditMixin):
+    """
+    Wrapper class for running security analysis tools.
+
+    The output of these tools will be emitted as GitHub annotations (in CI)
+    or default human output (otherwise).
+    By default, all paths declared to be part of mewbot source - either of the main
+    module or any installed plugins - are linted.
+    """
+
+    def run(self) -> Iterable[Annotation]:
+        """Runs the linting tools in sequence."""
+
+        yield from self.lint_bandit()
+
+
+def lint_bandit_output(
+    result: subprocess.CompletedProcess[bytes],
+) -> Iterable[Annotation]:
+    """Processes 'bandits' output in to annotations."""
+    # Don't want to truncate the example bandit output
+    # pylint: disable=line-too-long
+
+    stdout_errors = result.stdout.decode("utf-8").split("\n")
+    stderr_errors = result.stderr.decode("utf-8").split("\n")
+
+    # Parsing bandit comment blocks into annotations
+
+    # Line for each individual block
+    block: list[str] = []
+
+    for line in stdout_errors + stderr_errors:
+        line = line.strip()
+
+        # We have reached the end of a block
+        if line.startswith("---------------------------"):
+            yield bandit_output_block_to_annotation(block)
+
+        block.append(line)
+
+    # The last block should be ignored - it's just a summary
+
+
+def bandit_output_block_to_annotation(block: list[str]) -> Annotation:
+    """
+    Process an output block and produce an annotation from it.
+
+    :param block: The block as a list of strings.
+    :return:
+    """
+    target_block = prepare_target_block(block)
+
+    issue_code = re.findall(r"\[.+]", target_block[0])[0]
+    issue_code = issue_code[1:-1]
+
+    # Extracting the severity and confidence - should be on the next line
+    severity_confidence_re = r"^(\s+)?Severity:([a-zA-Z\s]+)Confidence:([a-zA-Z\s]+)$"
+    severity_confidence_match = re.match(severity_confidence_re, target_block[1])
+
+    assert (
+        severity_confidence_match is not None
+    ), f"Error parsing {target_block[1]} with regex {severity_confidence_re}"
+
+    try:
+        severity = severity_confidence_match.group(2).strip()
+    except AttributeError as exp:
+        raise NotImplementedError(
+            f"Error parsing {target_block[1]} with regex {severity_confidence_re}"
+        ) from exp
+
+    try:
+        confidence = severity_confidence_match.group(3).strip()
+    except AttributeError as exp:
+        raise NotImplementedError(
+            f"Error parsing {target_block[1]} with regex {severity_confidence_re}"
+        ) from exp
+
+    # Scanning forward to try and find a location line
+    loc_line_found = False
+    loc_line = ""
+    for cand_line in target_block[2:]:
+        if cand_line.lower().strip().startswith("location"):
+            loc_line_found = True
+            loc_line = cand_line
+            break
+
+    if not loc_line_found:
+        raise NotImplementedError(
+            f"block does not seem to have a Location line - \n\n{pprint.pformat(block)}"
+        )
+
+    problem_path, problem_line, problem_char_pos = get_positions_from_loc_line(loc_line)
+
+    return Annotation(
+        level=severity_to_level(severity),
+        file=problem_path,
+        line=problem_line,
+        col=problem_char_pos,
+        tool="bandit",
+        title=f"Bandit {severity} security warning",
+        message=f"{issue_code = } - {severity} security warning with confidence {confidence}",
+    )
+
+
+def prepare_target_block(block: list[str]) -> list[str]:
+    """
+    Take a block and bring it into standard target block form.
+
+    :param block:
+    :return:
+    """
+    target_block = []
+    for i, line in enumerate(block):
+        if line.startswith(">>"):
+            target_block = block[i:]
+            break
+
+    if not target_block:
+        raise NotImplementedError(
+            f"Line starting with '>>' was not found in block\n\n{block}"
+        )
+
+    assert target_block[0].startswith(
+        ">>"
+    ), f"Bad block format \n\n{target_block}\n\n was not in expected format."
+
+    return target_block
+
+
+def get_positions_from_loc_line(loc_line: str) -> tuple[str, int, int]:
+    """
+    Parse a location line into the path, line and char pos of the problem.
+
+    :param loc_line:
+    :return:
+    """
+    # Windows uses ':' in its file paths - thus some care needs to be taken to split the tokens
+    # down properly
+    loc_tokens = loc_line.split(":")
+    # Four is the minimum, if : does not appear in the path
+    assert len(loc_tokens) > 4, f"{loc_tokens = } not as expected"
+
+    problem_path = ":".join(loc_tokens[1:-2])
+    problem_line = int(loc_tokens[-2])
+    problem_char_pos = int(loc_tokens[-1])
+
+    return problem_path, problem_line, problem_char_pos
+
+
+def severity_to_level(severity: str) -> str:
+    """
+    Turns a bandit severity level into a github notice level.
+
+    :param severity:
+    :return:
+    """
+    if severity.lower() == "low":
+        return "notice"
+
+    raise NotImplementedError(f"severity {severity} not recognized!")
+
+
+def parse_security_analysis_options() -> argparse.Namespace:
+    """Parse command line argument for the security analysis tools."""
+
+    parser = argparse.ArgumentParser(description="Run security analysis for mewbot")
+    parser.add_argument(
+        "-n",
+        "--no-tests",
+        action="store_false",
+        default=True,
+        dest="tests",
+        help="Exclude tests from security analysis",
+    )
+    parser.add_argument(
+        "path",
+        nargs="*",
+        default=[],
+        help="Path of a file or a folder of files for security analysis.",
+    )
+    parser.add_argument(
+        "--ci",
+        dest="in_ci",
+        default="GITHUB_ACTIONS" in os.environ,
+        action="store_true",
+        help="Run in GitHub actions mode",
+    )
+
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    options = parse_security_analysis_options()
+
+    paths = options.path
+    if not paths:
+        paths = gather_paths("src", "tests") if options.tests else gather_paths("src")
+
+    linter = SecurityAnalysisToolchain(*paths, in_ci=options.in_ci)
+    linter()
diff --git a/tools/security-analysis b/tools/security-analysis
@@ -0,0 +1,9 @@
+#!/bin/sh
+
+# SPDX-FileCopyrightText: 2021 - 2023 Mewbot Developers <mewbot@quicksilver.london>
+#
+# SPDX-License-Identifier: BSD-2-Clause
+
+. "${0%/*}/path"
+
+exec "$PYTHON" -m mewbot.tools.security_analysis "$@"