Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parser: Add support for importing json exports from the PTART Reporting Tool #11038

Draft
wants to merge 26 commits into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
0f8678d
Initial template
Hydragyrum Aug 14, 2024
6e3d82e
Add scan file sample
Hydragyrum Aug 14, 2024
d735669
Fix the basic structure of the parser
Hydragyrum Sep 6, 2024
af0f4a4
Begin unit tests and assessment parser
Hydragyrum Sep 6, 2024
5857c1b
Fix up json files and refactor to just use findings. Group by compone…
Hydragyrum Sep 6, 2024
481cb47
Use self.subTest to work out findings
Hydragyrum Sep 6, 2024
e26223b
Add test cases
Hydragyrum Sep 17, 2024
b4a22e0
update test files
Hydragyrum Sep 18, 2024
69960e1
Add support for tags and enpoints
Hydragyrum Sep 19, 2024
acf4435
Add files
Hydragyrum Sep 19, 2024
800fe93
Finish Assessment parser
Hydragyrum Oct 9, 2024
bcb037a
Fix bugs with CVSS vector and tool id
Hydragyrum Oct 9, 2024
349a383
Fix a unit test
Hydragyrum Oct 9, 2024
e368c66
Add PTART Deduplication settings
Hydragyrum Oct 9, 2024
dc43981
Add retest campaigns
Hydragyrum Oct 9, 2024
70de01a
Refactor and robustify screenshot parsing
Hydragyrum Oct 10, 2024
f29f0c8
Robustify attachements and screenshot naming
Hydragyrum Oct 10, 2024
eae6ed3
Refactor generation of the description
Hydragyrum Oct 10, 2024
e99d1bf
Robustify the import to avoid crashing and add comments
Hydragyrum Oct 10, 2024
adc6361
Add Documentation
Hydragyrum Oct 10, 2024
f393a67
Refactor code to implement proper guards and pythonic simplifications
Hydragyrum Oct 10, 2024
ee604ab
Nuke unused test code
Hydragyrum Oct 10, 2024
fae4b81
Fix formatting to flake8 standards
Hydragyrum Oct 10, 2024
14170af
Fix ruff errors
Hydragyrum Oct 10, 2024
fe25a87
Add vulnerability id from tool to vulnerability ID field
Hydragyrum Oct 10, 2024
b698e04
Add support for references
Hydragyrum Oct 10, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions docs/content/en/integrations/parsers/file/ptart.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
---
title: "PTART Reports"
toc_hide: true
---

### What is PTART?
PTART is a Pentest and Security Auditing Reporting Tool developed by the Michelin CERT (https://github.com/certmichelin/PTART)

### Importing Reports
Reports can be exported to JSON format from the PTART web UI, and imported into DefectDojo by using the "PTART Report" importer.

### Sample Scan Data
Sample scan data for testing purposes can be found [here](https://github.com/DefectDojo/django-DefectDojo/tree/master/unittests/scans/ptart).

1 change: 1 addition & 0 deletions dojo/settings/settings.dist.py
Original file line number Diff line number Diff line change
Expand Up @@ -1514,6 +1514,7 @@ def saml2_attrib_map_format(dict):
"ThreatComposer Scan": DEDUPE_ALGO_UNIQUE_ID_FROM_TOOL_OR_HASH_CODE,
"Invicti Scan": DEDUPE_ALGO_HASH_CODE,
"KrakenD Audit Scan": DEDUPE_ALGO_HASH_CODE,
"PTART Report": DEDUPE_ALGO_UNIQUE_ID_FROM_TOOL,
}

# Override the hardcoded settings here via the env var
Expand Down
Empty file added dojo/tools/ptart/__init__.py
Empty file.
62 changes: 62 additions & 0 deletions dojo/tools/ptart/assessment_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import dojo.tools.ptart.ptart_parser_tools as ptart_tools
from dojo.models import Finding


class PTARTAssessmentParser:
def __init__(self):
self.cvss_type = None

def get_test_data(self, tree):
# Check that the report is valid, If we have no assessments, then
# return an empty list
if "assessments" not in tree:
return []

self.cvss_type = tree.get("cvss_type", None)
assessments = tree["assessments"]
return [finding for assessment in assessments
for finding in self.parse_assessment(assessment)]

def parse_assessment(self, assessment):
hits = assessment.get("hits", [])
return [self.get_finding(assessment, hit) for hit in hits]

def get_finding(self, assessment, hit):
effort = ptart_tools.parse_ptart_fix_effort(hit.get("fix_complexity"))
finding = Finding(
title=ptart_tools.parse_title_from_hit(hit),
severity=ptart_tools.parse_ptart_severity(hit.get("severity")),
effort_for_fixing=effort,
component_name=assessment.get("title", "Unknown Component"),
date=ptart_tools.parse_date_added_from_hit(hit),
)

# Don't add fields if they are blank
if hit["body"]:
finding.description = hit.get("body")

if hit["remediation"]:
finding.mitigation = hit.get("remediation")

if hit["id"]:
finding.unique_id_from_tool = hit.get("id")
finding.vuln_id_from_tool = hit.get("id")
finding.cve = hit.get("id")

# Clean up and parse the CVSS vector
cvss_vector = ptart_tools.parse_cvss_vector(hit, self.cvss_type)
if cvss_vector:
finding.cvssv3 = cvss_vector

if "labels" in hit:
finding.unsaved_tags = hit["labels"]

finding.unsaved_endpoints = ptart_tools.parse_endpoints_from_hit(hit)

# Add screenshots to files, and add other attachments as well.
finding.unsaved_files = ptart_tools.parse_screenshots_from_hit(hit)
finding.unsaved_files.extend(ptart_tools.parse_attachment_from_hit(hit))

finding.references = ptart_tools.parse_references_from_hit(hit)

return finding
77 changes: 77 additions & 0 deletions dojo/tools/ptart/parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import json

import dojo.tools.ptart.ptart_parser_tools as ptart_tools
from dojo.tools.parser_test import ParserTest
from dojo.tools.ptart.assessment_parser import PTARTAssessmentParser
from dojo.tools.ptart.retest_parser import PTARTRetestParser


class PTARTParser:

"""
Imports JSON reports from the PTART reporting tool
(https://github.com/certmichelin/PTART)
"""

def get_scan_types(self):
return ["PTART Report"]

def get_label_for_scan_types(self, scan_type):
return "PTART Report"

def get_description_for_scan_types(self, scan_type):
return "Import a PTART report file in JSON format."

def get_tests(self, scan_type, scan):
data = json.load(scan)

test = ParserTest(
name="Pen Test Report",
type="Pen Test",
version="",
)

# We set both to the same value for now, setting just the name doesn't
# seem to display when imported. This may cause issues with the UI in
# the future, but there's not much (read no) documentation on this.
if "name" in data:
test.name = data["name"] + " Report"
test.type = data["name"] + " Report"

# Generate a description from the various fields in the report data
description = ptart_tools.generate_test_description_from_report(data)

# Check that the fields are filled, otherwise don't set the description
if description:
test.description = description

# Setting the dates doesn't seem to want to work in reality :(
# Perhaps in a future version of DefectDojo?
if "start_date" in data:
test.target_start = ptart_tools.parse_date(
data["start_date"], "%Y-%m-%d",
)

if "end_date" in data:
test.target_end = ptart_tools.parse_date(
data["end_date"], "%Y-%m-%d",
)

findings = self.get_items(data)
test.findings = findings
return [test]

def get_findings(self, file, test):
data = json.load(file)
return self.get_items(data)

def get_items(self, data):
# We have several main sections in the report json: Assessments and
# Retest Campaigns. I haven't been able to create multiple tests for
# each section, so we'll just merge them for now.
findings = PTARTAssessmentParser().get_test_data(data)
findings.extend(PTARTRetestParser().get_test_data(data))
return findings

def requires_file(self, scan_type):
return True
185 changes: 185 additions & 0 deletions dojo/tools/ptart/ptart_parser_tools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
import pathlib
from datetime import datetime

import cvss

from dojo.models import Endpoint

ATTACHMENT_ERROR = "Attachment data not found"
SCREENSHOT_ERROR = "Screenshot data not found"


def parse_ptart_severity(severity):
severity_mapping = {
1: "Critical",
2: "High",
3: "Medium",
4: "Low",
}
return severity_mapping.get(severity, "Info") # Default severity


def parse_ptart_fix_effort(effort):
effort_mapping = {
1: "High",
2: "Medium",
3: "Low",
}
return effort_mapping.get(effort, None)


def parse_title_from_hit(hit):
hit_title = hit.get("title", None)
hit_id = hit.get("id", None)

return f"{hit_id}: {hit_title}" \
if hit_title and hit_id \
else (hit_title or hit_id or "Unknown Hit")


def parse_date_added_from_hit(hit):
PTART_DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S.%f"
date_added = hit.get("added", None)
return parse_date(date_added, PTART_DATETIME_FORMAT)


def parse_date(date, format):
try:
return datetime.strptime(date, format) if date else datetime.now()
except ValueError:
return datetime.now()


def parse_cvss_vector(hit, cvss_type):
cvss_vector = hit.get("cvss_vector", None)
# Defect Dojo Only supports CVSS v3 for now.
if cvss_vector:
# Similar application once CVSS v4 is supported
if cvss_type == 3:
try:
c = cvss.CVSS3(cvss_vector)
return c.clean_vector()
except cvss.CVSS3Error:
return None
return None


def parse_retest_status(status):
fix_status_mapping = {
"F": "Fixed",
"NF": "Not Fixed",
"PF": "Partially Fixed",
"NA": "Not Applicable",
"NT": "Not Tested",
}
return fix_status_mapping.get(status, None)


def parse_screenshots_from_hit(hit):
if "screenshots" not in hit:
return []
screenshots = [parse_screenshot_data(screenshot)
for screenshot in hit["screenshots"]]
return [ss for ss in screenshots if ss is not None]


def parse_screenshot_data(screenshot):
try:
title = get_screenshot_title(screenshot)
data = get_screenshot_data(screenshot)
return {
"title": title,
"data": data,
}
except ValueError:
return None


def get_screenshot_title(screenshot):
caption = screenshot.get("caption", "screenshot")
if not caption:
caption = "screenshot"
return f"{caption}{get_file_suffix_from_screenshot(screenshot)}"


def get_screenshot_data(screenshot):
if ("screenshot" not in screenshot
or "data" not in screenshot["screenshot"]
or not screenshot["screenshot"]["data"]):
raise ValueError(SCREENSHOT_ERROR)
return screenshot["screenshot"]["data"]


def get_file_suffix_from_screenshot(screenshot):
return pathlib.Path(screenshot["screenshot"]["filename"]).suffix \
if ("screenshot" in screenshot
and "filename" in screenshot["screenshot"]) \
else ""


def parse_attachment_from_hit(hit):
if "attachments" not in hit:
return []
files = [parse_attachment_data(attachment)
for attachment in hit["attachments"]]
return [f for f in files if f is not None]


def parse_attachment_data(attachment):
try:
title = get_attachement_title(attachment)
data = get_attachment_data(attachment)
return {
"title": title,
"data": data,
}
except ValueError:
# No data in attachment, let's not import this file.
return None


def get_attachment_data(attachment):
if "data" not in attachment or not attachment["data"]:
raise ValueError(ATTACHMENT_ERROR)
return attachment["data"]


def get_attachement_title(attachment):
title = attachment.get("title", "attachment")
if not title:
title = "attachment"
return title


def parse_endpoints_from_hit(hit):
if "asset" not in hit or not hit["asset"]:
return []
endpoint = Endpoint.from_uri(hit["asset"])
return [endpoint]


def generate_test_description_from_report(data):
keys = ["executive_summary", "engagement_overview", "conclusion"]
clauses = [clause for clause in [data.get(key) for key in keys] if clause]
description = "\n\n".join(clauses)
return description or None


def parse_references_from_hit(hit):
if "references" not in hit:
return None

references = hit.get("references", [])
all_refs = [get_transformed_reference(ref) for ref in references]
clean_refs = [tref for tref in all_refs if tref]
return "\n".join(clean_refs)


def get_transformed_reference(reference):
title = reference.get("name", "Reference")
url = reference.get("url", None)
if not url:
if not title:
return url
return None
return f"{title}: {url}"
Loading