Skip to content

Commit

Permalink
Implement improver
Browse files Browse the repository at this point in the history
This is work in progress, there are a few bugs and a few fixmes as well.
Everything will be replaced before the final commit

Signed-off-by: Hritik Vijay <hritikxx8@gmail.com>
  • Loading branch information
Hritik14 committed Aug 13, 2021
1 parent ae79a86 commit 925d0ef
Show file tree
Hide file tree
Showing 9 changed files with 220 additions and 102 deletions.
8 changes: 8 additions & 0 deletions vulnerabilities/data_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,11 @@ def __post_init__(self):

if self.confidence < 0:
raise UnderConfidenceError

class Improver:
"""
All improvers should inherit this class and implement updated_inferences method to return
new inferences for a package or vulnerability
"""
def updated_inferences(self):
raise NotImplementedError
8 changes: 6 additions & 2 deletions vulnerabilities/data_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,14 +86,17 @@ class Advisory:

summary: str
vulnerability_id: Optional[str] = None
affected_packages: List[AffectedPackage] = dataclasses.field(default_factory=list)
affected_package_urls: Iterable[PackageURL] = dataclasses.field(default_factory=list)
fixed_package_urls: Iterable[PackageURL] = dataclasses.field(default_factory=list)
references: List[Reference] = dataclasses.field(default_factory=list)

def __post_init__(self):
if self.vulnerability_id and not is_cve(self.vulnerability_id):
raise ValueError("CVE expected, found: {}".format(self.vulnerability_id))

def normalized(self):
affected_package_urls = set(self.affected_package_urls)
fixed_package_urls = set(self.fixed_package_urls)
references = sorted(
self.references, key=lambda reference: (reference.reference_id, reference.url)
)
Expand All @@ -103,7 +106,8 @@ def normalized(self):
return Advisory(
summary=self.summary,
vulnerability_id=self.vulnerability_id,
affected_packages=sorted(self.affected_packages),
affected_package_urls=affected_package_urls,
fixed_package_urls=fixed_package_urls,
references=references,
)

Expand Down
92 changes: 12 additions & 80 deletions vulnerabilities/import_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,16 @@
import dataclasses
import datetime
import logging
from itertools import chain
from typing import Tuple
from typing import Set

from django.db import transaction

from vulnerabilities import models
from vulnerabilities.data_source import Advisory, DataSource
from vulnerabilities.data_source import Advisory
from vulnerabilities.data_source import PackageURL
from vulnerabilities.data_inference import Inference
from vulnerabilities.data_inference import MAX_CONFIDENCE
from vulnerabilities.improve_runner import process_inferences

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -87,7 +88,7 @@ def run(self, cutoff_date: datetime.datetime = None) -> None:
data_source = self.importer.make_data_source(cutoff_date=cutoff_date)
with data_source:
advisories = data_source.updated_advisories()
process_advisories(advisories)
process_advisories("importer", advisories)
self.importer.last_run = datetime.datetime.now(tz=datetime.timezone.utc)
self.importer.data_source_cfg = dataclasses.asdict(data_source.config)
self.importer.save()
Expand All @@ -108,79 +109,10 @@ def get_vuln_pkg_refs(vulnerability, package):
)


@transaction.atomic
def process_advisories(advisories: Set[Advisory]) -> None:
bulk_create_vuln_pkg_refs = set()
for advisory in advisories:
vuln, vuln_created = _get_or_create_vulnerability(advisory)
for vuln_ref in advisory.references:
ref, _ = models.VulnerabilityReference.objects.get_or_create(
vulnerability=vuln, reference_id=vuln_ref.reference_id, url=vuln_ref.url
)

for score in vuln_ref.severities:
models.VulnerabilitySeverity.objects.update_or_create(
vulnerability=vuln,
scoring_system=score.system.identifier,
reference=ref,
defaults={"value": str(score.value)},
)

for aff_pkg_with_patched_pkg in advisory.affected_packages:
vulnerable_package, _ = _get_or_create_package(
aff_pkg_with_patched_pkg.vulnerable_package
)
patched_package = None
if aff_pkg_with_patched_pkg.patched_package:
patched_package, _ = _get_or_create_package(
aff_pkg_with_patched_pkg.patched_package
)

prv, _ = models.PackageRelatedVulnerability.objects.get_or_create(
vulnerability=vuln,
package=vulnerable_package,
)

if patched_package:
prv.patched_package = patched_package
prv.save()

models.PackageRelatedVulnerability.objects.bulk_create(
[i.to_model_object() for i in bulk_create_vuln_pkg_refs]
)


def _get_or_create_vulnerability(
advisory: Advisory,
) -> Tuple[models.Vulnerability, bool]:

vuln, created = models.Vulnerability.objects.get_or_create(
vulnerability_id=advisory.vulnerability_id
) # nopep8
# Eventually we only want to keep summary from NVD and ignore other descriptions.
if advisory.summary and vuln.summary != advisory.summary:
vuln.summary = advisory.summary
vuln.save()

return vuln, created


def _get_or_create_package(p: PackageURL) -> Tuple[models.Package, bool]:

query_kwargs = {}
for key, val in p.to_dict().items():
if not val:
if key == "qualifiers":
query_kwargs[key] = {}
else:
query_kwargs[key] = ""
else:
query_kwargs[key] = val

return models.Package.objects.get_or_create(**query_kwargs)


def _package_url_to_package(purl: PackageURL) -> models.Package:
p = models.Package()
p.set_package_url(purl)
return p
def process_advisories(source: str, advisories: Set[Advisory]) -> None:
"""
Insert advisories into the database
Advisories are treated as full confidence infererences.
"""
inferences = [ Inference(advisory, source, MAX_CONFIDENCE) for advisory in advisories ]
process_inferences(inferences)
4 changes: 3 additions & 1 deletion vulnerabilities/importers/nginx.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
from vulnerabilities.package_managers import GitHubTagsAPI
from vulnerabilities.package_managers import Version
from vulnerabilities.helpers import nearest_patched_package
from vulnerabilities.helpers import AffectedPackage


@dataclasses.dataclass
Expand Down Expand Up @@ -117,7 +118,8 @@ def to_advisories(self, data):
Advisory(
vulnerability_id=cve_id,
summary=summary,
affected_packages=nearest_patched_package(vulnerable_packages, fixed_packages),
affected_package_urls=vulnerable_packages,
fixed_package_urls=fixed_packages,
)
)

Expand Down
115 changes: 111 additions & 4 deletions vulnerabilities/improve_runner.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
from datetime import datetime
import dataclasses
import logging
from typing import Tuple

from django.db import transaction

from vulnerabilities import models
from vulnerabilities.data_source import PackageURL
from vulnerabilities.data_source import Advisory

logger = logging.getLogger(__name__)

Expand All @@ -15,11 +22,111 @@ def __init__(self, improver):
self.improver = improver

def run(self) -> None:
logger.info("Improving using %s.", self.improver.__module__)
inferences = self.improver.updated_inferences()
logger.info("Improving using %s.", self.improver.__name__)
inferences = self.improver().updated_inferences()
process_inferences(inferences)
logger.info("Finished improving using %s.", self.improver.__module__)
logger.info("Finished improving using %s.", self.improver.__name__)


@transaction.atomic
def process_inferences(inferences):
...
bulk_create_vuln_pkg_refs = set()
for inference in inferences:
advisory = inference.advisory
vuln, vuln_created = _get_or_create_vulnerability(advisory)
for vuln_ref in advisory.references:
ref, _ = models.VulnerabilityReference.objects.get_or_create(
vulnerability=vuln, reference_id=vuln_ref.reference_id, url=vuln_ref.url
)

for score in vuln_ref.severities:
models.VulnerabilitySeverity.objects.update_or_create(
vulnerability=vuln,
scoring_system=score.system.identifier,
reference=ref,
defaults={"value": str(score.value)},
)

for aff_pkg in advisory.affected_package_urls:
vulnerable_package, _ = _get_or_create_package(
aff_pkg
)
create_or_update_relation(
relation=models.PackageRelatedVulnerability,
vulnerability=vuln,
source=inference.source,
package=vulnerable_package,
confidence=inference.confidence)

for fixed_pkg in advisory.fixed_package_urls:
patched_package, _ = _get_or_create_package(
fixed_pkg
)
create_or_update_relation(
relation=models.PackageRelatedVulnerabilityFix,
vulnerability=vuln,
source=inference.source,
package=vulnerable_package,
confidence=inference.confidence)


models.PackageRelatedVulnerability.objects.bulk_create(
[i.to_model_object() for i in bulk_create_vuln_pkg_refs]
)


def _get_or_create_vulnerability(
advisory: Advisory,
) -> Tuple[models.Vulnerability, bool]:

vuln, created = models.Vulnerability.objects.get_or_create(
vulnerability_id=advisory.vulnerability_id
) # nopep8
# Eventually we only want to keep summary from NVD and ignore other descriptions.
if advisory.summary and vuln.summary != advisory.summary:
vuln.summary = advisory.summary
vuln.save()

return vuln, created


def _get_or_create_package(p: PackageURL) -> Tuple[models.Package, bool]:

query_kwargs = {}
for key, val in p.to_dict().items():
if not val:
if key == "qualifiers":
query_kwargs[key] = {}
else:
query_kwargs[key] = ""
else:
query_kwargs[key] = val

return models.Package.objects.get_or_create(**query_kwargs)


def _package_url_to_package(purl: PackageURL) -> models.Package:
p = models.Package()
p.set_package_url(purl)
return p

def create_or_update_relation(relation, vulnerability, source, package, confidence):
try:
entry = relation.objects.get(
vulnerability=vulnerability,
package=package
)
if confidence > entry.confidence:
entry.source = source
entry.confidence = confidence
entry.save()
logger.debug("%s: Confidence improved for %s R %s, new confidence: %d", relation, package, vulnerability, confidence)

except relation.DoesNotExist:
relation.objects.create(
vulnerability=vulnerability,
source=source,
package=package,
confidence=confidence
)

9 changes: 6 additions & 3 deletions vulnerabilities/improvers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
IMPROVER_REGISTRY = []
from . import nginx

def class_name(module_name: str):
IMPROVER_REGISTRY = [nginx.NginxTimeTravel]

def find_class(class_name: str):
# FIXME: this might cause problems when there are two modules containing same class name, think of a better approach
for improver in IMPROVER_REGISTRY:
if improver.__module__ == module_name:
if class_name == improver.__name__:
return improver

raise AttributeError
44 changes: 44 additions & 0 deletions vulnerabilities/improvers/nginx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from packageurl import PackageURL

from vulnerabilities.data_inference import Improver
from vulnerabilities.data_inference import Advisory
from vulnerabilities.data_inference import Inference
from vulnerabilities.helpers import nearest_patched_package
from vulnerabilities.models import Vulnerability
from vulnerabilities.models import Package

class NginxTimeTravel(Improver):
def updated_inferences(self):
inferences = []

vulnerabilities = set(Vulnerability.objects.filter(vulnerable_packages__name="nginx"))
vulnerabilities.union(Vulnerability.objects.filter(patched_packages__name="nginx"))

for vulnerability in vulnerabilities:
affected_packages = map(package_url, Package.objects.filter(vulnerable_package__package__name="nginx", vulnerabilities = vulnerability))
fixed_packages = map(package_url, Package.objects.filter(patched_package__package__name="nginx", vulnerabilities = vulnerability))

time_traveller = nearest_patched_package(affected_packages, fixed_packages)
affected_packages = [ affected_package.vulnerable_package for affected_package in time_traveller]
fixed_packages = [ affected_package.patched_package for affected_package in time_traveller if affected_package.patched_package is not None]

inference = Inference(advisory = Advisory(
vulnerability_id=vulnerability.vulnerability_id,
summary=vulnerability.summary,
affected_package_urls=fixed_packages,
), source="time travel", confidence=30)
inferences.append(inference)

return inferences


def package_url(package):
return PackageURL(
type=package.type,
namespace=package.namespace,
name=package.name,
version=package.version,
subpath=package.subpath,
qualifiers=package.qualifiers
)

Loading

0 comments on commit 925d0ef

Please sign in to comment.