diff --git a/README.md b/README.md index 2d996e0..90df8c5 100644 --- a/README.md +++ b/README.md @@ -66,6 +66,10 @@ alias pip="scfw pip" alias npm="scfw npm" ``` +## Limitations + +Unlike `pip`, a variety of `npm` operations beyond `npm install` can end up installing new packages. For now, only `npm install` commands are in scope for analysis with the supply chain firewall. We are hoping to extend the firewall's purview to other "installish" `npm` commands over time. + ## Datadog Logs integration The supply-chain firewall can optionally send logs of blocked and successful installations to Datadog. diff --git a/scfw/cli.py b/scfw/cli.py index 27c2eb9..7e56dc2 100644 --- a/scfw/cli.py +++ b/scfw/cli.py @@ -2,11 +2,13 @@ Defines the supply-chain firewall's command-line interface and performs argument parsing. """ -from argparse import ArgumentParser, Namespace +from argparse import Namespace import logging import sys +from typing import Optional from scfw.ecosystem import ECOSYSTEM +from scfw.parser import ArgumentError, ArgumentParser _LOG_LEVELS = list( map( @@ -22,15 +24,15 @@ def _cli() -> ArgumentParser: Defines the command-line interface for the supply-chain firewall. Returns: - An `argparse.ArgumentParser` that encodes the supply-chain firewall's command line. + A parser for the supply-chain firewall's command line. - This parser only handles the firewall's optional arguments. It cannot be used to parse - the firewall's entire command line, as this contains a command for a supported ecosystem's - package manager which would otherwise be parsed greedily (and incorrectly) by `argparse`. + This parser only handles the firewall's optional arguments, not the package + manager command being run through the firewall. """ parser = ArgumentParser( prog="scfw", usage="%(prog)s [options] COMMAND", + exit_on_error=False, description="A tool to prevent the installation of vulnerable or malicious pip and npm packages" ) @@ -60,7 +62,7 @@ def _cli() -> ArgumentParser: return parser -def _parse_command_line(argv: list[str]) -> tuple[Namespace, str]: +def _parse_command_line(argv: list[str]) -> tuple[Optional[Namespace], str]: """ Parse the supply-chain firewall's command line from a given argument vector. @@ -70,10 +72,11 @@ def _parse_command_line(argv: list[str]) -> tuple[Namespace, str]: Returns: A `tuple` of a `Namespace` object containing the results of parsing the given argument vector and a `str` help message for the caller's use in early exits. + In the case of a parsing failure, `None` is returned instead of a `Namespace`. - The returned `Namespace` contains the package manager command present in - the given argument vector as a (possibly empty) `list[str]` under the `command` - attribute. + On success, the returned `Namespace` contains the package manager command + present in the given argument vector as a (possibly empty) `list[str]` under + the `command` attribute. """ hinge = len(argv) for ecosystem in ECOSYSTEM: @@ -83,26 +86,30 @@ def _parse_command_line(argv: list[str]) -> tuple[Namespace, str]: pass parser = _cli() - args = parser.parse_args(argv[1:hinge]) - args_dict = vars(args) - args_dict["command"] = argv[hinge:] + help_msg = parser.format_help() + + try: + args = parser.parse_args(argv[1:hinge]) + args_dict = vars(args) + args_dict["command"] = argv[hinge:] + return args, help_msg - return args, parser.format_help() + except ArgumentError: + return None, help_msg -def parse_command_line() -> tuple[Namespace, str]: +def parse_command_line() -> tuple[Optional[Namespace], str]: """ Parse the supply-chain firewall's command line. Returns: - A `tuple` of a `Namespace` object containing: - 1. The results of successfully parsing the firewall's command line and - 2. A `str` help message for the caller's use in early exits. + A `tuple` of a `Namespace` object containing the results of parsing the + firewall's command line and a `str` help message for the caller's use in + early exits. In the case of a parsing failure, `None` is returned instead + of a `Namespace`. - The returned `Namespace` contains the package manager command provided to the - firewall as a (possibly empty) `list[str]` under the `command` attribute. - - Parsing errors cause the program to print a usage message and exit early - with exit code 2. This function only returns if parsing was successful. + On success, the returned `Namespace` contains the package manager command + provided to the firewall as a (possibly empty) `list[str]` under the `command` + attribute. """ return _parse_command_line(sys.argv) diff --git a/scfw/commands/npm_command.py b/scfw/commands/npm_command.py index 8976b6e..5077b97 100644 --- a/scfw/commands/npm_command.py +++ b/scfw/commands/npm_command.py @@ -93,20 +93,21 @@ def str_to_install_target(s: str) -> InstallTarget: raise ValueError("Failed to parse npm install target") return InstallTarget(ECOSYSTEM.NPM, package, version) - # If any of the below options are present, a help message is printed or - # a dry-run of an installish action occurs: nothing will be installed + # For now, automatically allow all non-`install` commands + if not self._is_install_command(): + return [] + + # The presence of these options prevent the install command from running if any(opt in self._command for opt in {"-h", "--help", "--dry-run"}): return [] try: - # Compute the set of dependencies added by the command - # This is a superset of the set of install targets + # Compute the set of dependencies added by the install command dry_run_command = self._command + ["--dry-run", "--loglevel", "silly"] dry_run = subprocess.run(dry_run_command, check=True, text=True, capture_output=True) dependencies = map(line_to_dependency, filter(is_place_dep_line, dry_run.stderr.strip().split('\n'))) except subprocess.CalledProcessError: - # An error must have resulted from the given npm command - # As nothing will be installed in this case, allow the command + # An erroring command does not install anything _log.info("The npm command encountered an error while collecting installation targets") return [] @@ -126,3 +127,23 @@ def str_to_install_target(s: str) -> InstallTarget: targets = filter(lambda dep: dep not in installed, dependencies) return list(map(str_to_install_target, targets)) + + def _is_install_command(self) -> bool: + """ + Determine whether the underlying `npm` command is for an `install` subcommand. + + Returns: + A `bool` indicating whether the `npm` command underlying the given `NpmCommand` + is likely for an `install` subcommand. + + This function gives no false negatives but may give false positives. False + positives are safe in this case because they result in non-installish + commands being analyzed as if they were installish commands. To eliminate + false positives, we would need to write a full parser for npm. + """ + # https://docs.npmjs.com/cli/v10/commands/npm-install + install_aliases = { + "install", "add", "i", "in", "ins", "inst", "insta", "instal", "isnt", "isnta", "isntal", "isntall" + } + + return any(alias in self._command for alias in install_aliases) diff --git a/scfw/firewall.py b/scfw/firewall.py index 6f2f98f..49cc513 100644 --- a/scfw/firewall.py +++ b/scfw/firewall.py @@ -28,7 +28,7 @@ def run_firewall() -> int: try: args, help = cli.parse_command_line() - if not args.command: + if not args or not args.command: print(help) return 0 diff --git a/scfw/parser.py b/scfw/parser.py new file mode 100644 index 0000000..4984453 --- /dev/null +++ b/scfw/parser.py @@ -0,0 +1,29 @@ +""" +A drop-in replacement for `argparse.ArgumentParser`. +""" + +import argparse + + +class ArgumentError(Exception): + """ + An exception for `ArgumentParser` to raise. + """ + pass + + +class ArgumentParser(argparse.ArgumentParser): + """ + A drop-in replacement for `argparse.ArgumentParser` with a patched + implementation of the latter's `exit_on_error` behavior. + + See https://github.com/python/cpython/issues/103498 for more info. + """ + def error(self, message): + """ + Handle a parsing error. + + Args: + message: The error message. + """ + raise ArgumentError(message)