Yelp · domanchi · Apr 13, 2021 · Dec 22, 2020 · Dec 23, 2020 · Jan 8, 2021
diff --git a/README.md b/README.md
@@ -357,29 +357,34 @@ const secret = "hunter2";
 
 ```bash
 $ detect-secrets audit --help
-usage: detect-secrets audit [-h] [--diff] [--stats] [--json]
-                         filename [filename ...]
+usage: detect-secrets audit [-h] [--diff] [--stats]
+                      [--report] [--only-real | --only-false]
+                      [--json]
+                      filename [filename ...]
 
 Auditing a baseline allows analysts to label results, and optimize plugins for
 the highest signal-to-noise ratio for their environment.
 
 positional arguments:
-  filename    Audit a given baseline file to distinguish the difference
-              between false and true positives.
+  filename      Audit a given baseline file to distinguish the difference
+                between false and true positives.
 
 optional arguments:
-  -h, --help  show this help message and exit
-  --diff      Allows the comparison of two baseline files, in order to
-              effectively distinguish the difference between various plugin
-              configurations.
-  --stats     Displays the results of an interactive auditing session which
-              have been saved to a baseline file.
+  -h, --help    show this help message and exit
+  --diff        Allows the comparison of two baseline files, in order to
+                effectively distinguish the difference between various plugin
+                configurations.
+  --stats       Displays the results of an interactive auditing session which
+                have been saved to a baseline file.
+  --report      Displays a report with the secrets detected
+  --only-real   Only includes real secrets in the report
+  --only-false  Only includes false positives in the report
 
 analytics:
   Quantify the success of your plugins based on the labelled results in your
   baseline. To be used with the statisitcs mode (--stats).
 
-  --json      Outputs results in a machine-readable format.
+  --json        Outputs results in a machine-readable format.
 ```
 
 ## Configuration

diff --git a/detect_secrets/audit/__init__.py b/detect_secrets/audit/__init__.py
@@ -1,3 +1,4 @@
 from . import analytics                 # noqa: F401
+from . import report                    # noqa: F401
 from .audit import audit_baseline       # noqa: F401
 from .compare import compare_baselines  # noqa: F401
diff --git a/detect_secrets/audit/common.py b/detect_secrets/audit/common.py
@@ -95,6 +95,43 @@ def get_raw_secret_from_file(
     raise SecretNotFoundOnSpecifiedLineError(secret.line_number)
 
 
+def get_all_secrets_from_file(
+    secret: PotentialSecret,
+    line_getter_factory: Callable[[str], 'LineGetter'] = open_file,
+) -> [PotentialSecret]:
+    """
+    We're analyzing the contents straight from the baseline, and therefore, we don't know
+    the secret value (by design). However, we have secret hashes, filenames, and how we detected
+    it was a secret in the first place, so we can reverse-engineer it. This method searchs all
+    the ocurrences of one secret in one file using one plugin.
+    """
+    plugin = cast(BasePlugin, plugins.initialize.from_secret_type(secret.type))
+    line_getter = line_getter_factory(secret.filename)
+    is_first_time_opening_file = not line_getter.has_cached_lines
+    all_secrets = []
+    while True:
+        for line_number, line in enumerate(line_getter.lines):
+            identified_secrets = call_function_with_arguments(
+                plugin.analyze_line,
+                filename=secret.filename,
+                line=line,
+                line_number=line_number + 1,
+
+                # We enable eager search, because we *know* there's a secret here -- the baseline
+                # flagged it after all.
+                enable_eager_search=True,
+            )
+
+            for identified_secret in (identified_secrets or []):
+                if identified_secret == secret:
+                    all_secrets.append(identified_secret)
+
+        if len(all_secrets) == 0 and is_first_time_opening_file and not line_getter.use_eager_transformers:   # noqa: E501
+            line_getter.use_eager_transformers = True
+        else:
+            return all_secrets
+
+
 class LineGetter:
     """
     The problem we try to address with this class is to cache the lines of a transformed file,

diff --git a/detect_secrets/audit/report.py b/detect_secrets/audit/report.py
@@ -0,0 +1,83 @@
+import hashlib
+from enum import Enum
+from typing import Callable
+
+from ..constants import VerifiedResult
+from .common import get_all_secrets_from_file
+from .common import get_baseline_from_file
+from .common import LineGetter
+from .common import open_file
+
+
+class SecretClassToPrint(Enum):
+    REAL_SECRET = 1
+    FALSE_POSITIVE = 2
+
+    def from_class(secret_class: VerifiedResult) -> Enum:
+        if secret_class in [VerifiedResult.UNVERIFIED, VerifiedResult.VERIFIED_TRUE]:
+            return SecretClassToPrint.REAL_SECRET
+        else:
+            return SecretClassToPrint.FALSE_POSITIVE
+
+
+def generate_report(
+    baseline_file: str,
+    class_to_print: SecretClassToPrint = None,
+    line_getter_factory: Callable[[str], 'LineGetter'] = open_file,
+) -> None:
+    secrets = {}
+    for filename, secret in get_baseline_from_file(baseline_file):
+        verified_result = get_verified_result_from_boolean(secret.is_secret)
+        if class_to_print is not None and SecretClassToPrint.from_class(verified_result) != class_to_print:  # noqa: E501
+            continue
+        detections = get_all_secrets_from_file(secret)
+        identifier = hashlib.sha512((secret.secret_hash + filename).encode('utf-8')).hexdigest()
+        line_getter = line_getter_factory(filename)
+        for detection in detections:
+            if identifier in secrets:
+                secrets[identifier]['lines'][detection.line_number] = line_getter.lines[detection.line_number - 1]  # noqa: E501
+                if secret.type not in secrets[identifier]['types']:
+                    secrets[identifier]['types'].append(secret.type)
+                secrets[identifier]['category'] = get_prioritary_verified_result(
+                    verified_result,
+                    VerifiedResult[secrets[identifier]['category']],
+                ).name
+            else:
+                secrets[identifier] = {
+                    'secrets': detection.secret_value,
+                    'filename': filename,
+                    'lines': {
+                        detection.line_number: line_getter.lines[detection.line_number - 1],
+                    },
+                    'types': [
+                        secret.type,
+                    ],
+                    'category': verified_result.name,
+                }
+
+    output = []
+    for identifier in secrets:
+        output.append(secrets[identifier])
+
+    return output
+
+
+def get_prioritary_verified_result(
+    result1: VerifiedResult,
+    result2: VerifiedResult,
+) -> VerifiedResult:
+    if result1.value > result2.value:
+        return result1
+    else:
+        return result2
+
+
+def get_verified_result_from_boolean(
+    is_secret: bool,
+) -> VerifiedResult:
+    if is_secret is None:
+        return VerifiedResult.UNVERIFIED
+    elif is_secret:
+        return VerifiedResult.VERIFIED_TRUE
+    else:
+        return VerifiedResult.VERIFIED_FALSE
diff --git a/detect_secrets/core/usage/audit.py b/detect_secrets/core/usage/audit.py
@@ -21,6 +21,7 @@ def add_audit_action(parent: argparse._SubParsersAction) -> argparse.ArgumentPar
     )
 
     _add_mode_parser(parser)
+    _add_report_parser(parser)
     _add_statistics_module(parser)
     return parser
 
@@ -46,6 +47,33 @@ def _add_mode_parser(parser: argparse.ArgumentParser) -> None:
     )
 
 
+def _add_report_parser(parser: argparse.ArgumentParser) -> None:
+    parser.add_argument(
+        '--report',
+        action='store_true',
+        help=(
+            'Displays a report with the secrets detected'
+        ),
+    )
+
+    report_parser = parser.add_mutually_exclusive_group()
+    report_parser.add_argument(
+        '--only-real',
+        action='store_true',
+        help=(
+            'Only includes real secrets in the report'
+        ),
+    )
+
+    report_parser.add_argument(
+        '--only-false',
+        action='store_true',
+        help=(
+            'Only includes false positives in the report'
+        ),
+    )
+
+
 def _add_statistics_module(parent: argparse.ArgumentParser) -> None:
     parser = parent.add_argument_group(
         title='analytics',

diff --git a/detect_secrets/main.py b/detect_secrets/main.py
@@ -120,6 +120,19 @@ def handle_audit_action(args: argparse.Namespace) -> None:
                 print(json.dumps(stats.json(), indent=2))
             else:
                 print(str(stats))
+        elif args.report:
+            class_to_print = None
+            if args.only_real:
+                class_to_print = audit.report.SecretClassToPrint.REAL_SECRET
+            elif args.only_false:
+                class_to_print = audit.report.SecretClassToPrint.FALSE_POSITIVE
+            print(
+                json.dumps(
+                    audit.report.generate_report(args.filename[0], class_to_print),
+                    indent=4,
+                    sort_keys=True,
+                ),
+            )
         else:
             # Starts interactive session.
             if args.diff:

diff --git a/docs/audit.md b/docs/audit.md
@@ -140,3 +140,126 @@ There are times you want to extract the raw secret values to run further analysi
 so with the `--raw` flag.
 
 TODO: Example when this feature is written up.
+
+## Report generation
+
+Maybe, you need to generate a full report with all the detect-secrets findings. You can generate
+one with the `--report` flag:
+
+'''bash
+$ detect-secrets audit --report .secret.baseline
+[
+    {
+        "category": "VERIFIED_TRUE",
+        "filename": "test.properties",
+        "lines": {
+            "1": "secret=value",
+            "6": "password=value"
+        },
+        "secrets": "value",
+        "types": [
+            "Secret Keyword"
+        ]
+    },
+    {
+        "category": "UNVERIFIED",
+        "filename": "test.properties",
+        "lines": {
+            "2": "password=changeit",
+            "5": "password=changeit"
+        },
+        "secrets": "changeit",
+        "types": [
+            "Secret Keyword"
+        ]
+    },
+    {
+        "category": "VERIFIED_TRUE",
+        "filename": "test.properties",
+        "lines": {
+            "3": "password=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.",
+            "4": "test=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ."
+        },
+        "secrets": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.",
+        "types": [
+            "Secret Keyword",
+            "JSON Web Token"
+        ]
+    },
+    {
+        "category": "VERIFIED_FALSE",
+        "filename": "test.properties",
+        "lines": {
+            "7": "password=faketest"
+        },
+        "secrets": "faketest",
+        "types": [
+            "Secret Keyword"
+        ]
+    }
+]
+'''
+
+You can also select only the real secrets with the option `--only-real`:
+
+'''bash
+$ detect-secrets audit --report --only-real .secret.baseline
+[
+    {
+        "category": "VERIFIED_TRUE",
+        "filename": "test.properties",
+        "lines": {
+            "1": "secret=value",
+            "6": "password=value"
+        },
+        "secrets": "value",
+        "types": [
+            "Secret Keyword"
+        ]
+    },
+    {
+        "category": "UNVERIFIED",
+        "filename": "test.properties",
+        "lines": {
+            "2": "password=changeit",
+            "5": "password=changeit"
+        },
+        "secrets": "changeit",
+        "types": [
+            "Secret Keyword"
+        ]
+    },
+    {
+        "category": "VERIFIED_TRUE",
+        "filename": "test.properties",
+        "lines": {
+            "3": "password=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.",
+            "4": "test=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ."
+        },
+        "secrets": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.",
+        "types": [
+            "JSON Web Token",
+            "Secret Keyword"
+        ]
+    }
+]
+'''
+
+Or include only the false positives with `--only-false`:
+
+'''bash
+$ detect-secrets audit --report --only-false .secret.baseline
+[
+    {
+        "category": "VERIFIED_FALSE",
+        "filename": "test.properties",
+        "lines": {
+            "7": "password=faketest"
+        },
+        "secrets": "faketest",
+        "types": [
+            "Secret Keyword"
+        ]
+    }
+]
+'''