Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
Signed-off-by: Haiko Schol <hs@haikoschol.com>
  • Loading branch information
haikoschol committed Mar 15, 2020
1 parent 3265803 commit 6404723
Show file tree
Hide file tree
Showing 15 changed files with 161 additions and 191 deletions.
8 changes: 4 additions & 4 deletions vulnerabilities/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@

from vulnerabilities.models import (
ImpactedPackage,
Importer,
Package,
PackageReference,
ResolvedPackage,
Vulnerability,
VulnerabilityReference
VulnerabilityReference,
)


Expand Down Expand Up @@ -58,6 +58,6 @@ class ResolvedPackageAdmin(admin.ModelAdmin):
pass


@admin.register(PackageReference)
class PackageReferenceAdmin(admin.ModelAdmin):
@admin.register(Importer)
class ImporterAdmin(admin.ModelAdmin):
pass
16 changes: 0 additions & 16 deletions vulnerabilities/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,29 +22,15 @@
# Visit https://github.com/nexB/vulnerablecode/ for support and download.

from rest_framework import serializers
from rest_framework import status
from rest_framework import viewsets
from rest_framework.response import Response

from packageurl import PackageURL

from vulnerabilities.models import Package
from vulnerabilities.models import PackageReference
from vulnerabilities.models import Vulnerability
from vulnerabilities.models import VulnerabilityReference


class PackageReferenceSerializer(serializers.ModelSerializer):
class Meta:
model = PackageReference
fields = [
'repository',
'platform',
'name',
'version',
]


class VulnerabilityReferenceSerializer(serializers.ModelSerializer):
class Meta:
model = VulnerabilityReference
Expand All @@ -69,7 +55,6 @@ class Meta:

class PackageSerializer(serializers.ModelSerializer):
vulnerabilities = VulnerabilitySerializer(many=True)
references = PackageReferenceSerializer(source='packagereference_set', many=True)

class Meta:
model = Package
Expand All @@ -78,7 +63,6 @@ class Meta:
'version',
'package_url',
'vulnerabilities',
'references',
]


Expand Down
1 change: 0 additions & 1 deletion vulnerabilities/data_dump.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@

from vulnerabilities.models import ImpactedPackage
from vulnerabilities.models import Package
from vulnerabilities.models import PackageReference
from vulnerabilities.models import ResolvedPackage
from vulnerabilities.models import Vulnerability
from vulnerabilities.models import VulnerabilityReference
Expand Down
43 changes: 28 additions & 15 deletions vulnerabilities/data_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from datetime import datetime
from typing import Any
from typing import Mapping
from typing import Optional
from typing import Sequence


Expand All @@ -33,8 +34,8 @@ class DataSource:
TODO
"""
batch_size: int
cutoff_date: datetime
config: Mapping[str, Any]
config: Optional[Mapping[str, Any]] = None
cutoff_date: Optional[datetime] = None

def __enter__(self):
"""
Expand All @@ -48,33 +49,45 @@ def __exit__(self, exc_type, exc_val, exc_tb):
"""
pass

def __next__(self):
def new_records(self):
"""
Subclasses return batch_size sized batches of VulnerabilityInfo objects
Subclasses return batch_size sized batches of VulnerabilityInfo objects that have been added to the data source
since self.cutoff_date.
"""
pass
raise StopIteration

def updated_records(self):
"""
Subclasses return batch_size sized batches of VulnerabilityInfo objects that have been modified since
self.cutoff_date.
NOTE: Data sources that do now enable detection of changes to existing records vs added records must only
implement this method, not new_records(). The ImportRunner relies on this contract to decide between
insert and update operations.
"""
raise StopIteration


# The following data classes express the contract between data sources and the import runner.
# Data sources are expected to be usable as context managers and generators, yielding
# batches of VulnerabilityInfo sequences.
# Data sources are expected to be usable as context managers and generators, yielding batches of VulnerabilityInfo
# sequences.

@dataclass
class Package:
name: str
namespace: str
type: str
version: str
qualifiers: str
subpath: str
references: Sequence[str]
namespace: Optional[str] = ''
qualifiers: Optional[str] = ''
subpath: Optional[str] = ''
references: Optional[Sequence[str]] = None


@dataclass
class VulnerabilityInfo:
cve_id: str
summary: str
affected_packages: Sequence[Package]
unaffected_packages: Sequence[Package]
fixed_packages: Sequence[Package]
references: Sequence[str]
affected_packages: Sequence[Package]
unaffected_packages: Optional[Sequence[Package]] = None
cve_id: Optional[str] = None
references: Optional[Sequence[str]] = None
36 changes: 28 additions & 8 deletions vulnerabilities/import_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,24 +28,44 @@
logger = logging.getLogger(__name__)


# TODO This really should use asyncio for network and database, but sadly the Django ORM won't allow it.
class ImportRunner:
"""
The ImportRunner is responsible for inserting and updating data about vulnerabilities and
affected/unaffected/fixed packages in the database. The two main goals for the implementation are correctness and
efficiency.
Correctness:
- There must be no duplicates in the database (should be enforced by the schema).
- No valid data from the data source must be skipped or truncated.
Efficiency:
- Bulk inserts should be used whenever possible.
- Checking whether a record already exists should be kept to a minimum
(the data source should know this instead).
- All update and select operations must use indexed columns.
"""
def __init__(self, importer, batch_size=None):
self.importer = importer
self.batch_size = batch_size

def run(self, cutoff_date=None):
"""
Create a data source for the given importer and store the data retrieved in the database.
Data sources provide two kinds of records: Vulnerabilities and packages. Vulnerabiltites are potentially shared
across many packages, from the same data source and from different data sources. For example, a vulnerability
in the Linux kernel is mentioned by advisories from all Linux distributions that include this kernel version.
Therefore this method always checks whether vulnerabilities emitted by the data source already exist and if so,
their primary key is cached so they can be efficiently linked to packages that are inserted later.
"""
logger.debug(f'Starting import for {self.importer.name}.')
data_source = self.importer.make_data_source(cutoff_date=cutoff_date, batch_size=self.batch_size)

with data_source as ds:
for batch in ds:
# TODO
# Check if any Vulnerability or Package from this batch already exists in the DB
# If not: Bulk insert everything
# If yes: Update existing ones and bulk insert the rest
pass
# TODO

self.importer.last_run = datetime.datetime.utcnow()
self.importer.save()
logger.debug(f'Successfully finished import for {self.importer.name}.')

logger.debug(f'Successfully finished import for {self.importer.name}.')
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def import_vulnerabilities():
cve_id = advisory.get('cve')
vuln_id = advisory['id']
vuln_version_ranges = advisory['specs']
affected_versions = set()
for vuln_version_range in vuln_version_ranges:
version_range = RangeSpecifier(vuln_version_range)
affected_versions = set()
Expand Down
7 changes: 4 additions & 3 deletions vulnerabilities/management/commands/import.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@

from datetime import datetime

from django.core.management.base import BaseCommand, CommandError
from django.core.management.base import BaseCommand
from django.core.management.base import CommandError

from vulnerabilities.models import Importer
from vulnerabilities.import_runner import ImportRunner
Expand Down Expand Up @@ -76,14 +77,14 @@ def list_sources(self):
def import_data(self, names, cutoff_date):
importers = []
unknown_importers = set()

# make sure all arguments are valid before running any importers
for name in names:
try:
importers.append(Importer.objects.get(name=name))
except Importer.DoesNotExist:
unknown_importers.add(name)

if unknown_importers:
unknown_importers = ', '.join(unknown_importers)
raise CommandError(f'Unknown data sources: {unknown_importers}')
Expand Down
11 changes: 0 additions & 11 deletions vulnerabilities/migrations/0001_initial.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,17 +50,6 @@ class Migration(migrations.Migration):
('vulnerability', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='vulnerabilities.Vulnerability')),
],
),
migrations.CreateModel(
name='PackageReference',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('repository', models.CharField(blank=True, help_text='Repository URL eg:http://central.maven.org', max_length=100)),
('platform', models.CharField(blank=True, help_text='Platform eg:maven', max_length=50)),
('name', models.CharField(blank=True, help_text='Package reference name eg:org.apache.commons.io', max_length=50)),
('version', models.CharField(blank=True, help_text='Reference version', max_length=50)),
('package', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='vulnerabilities.Package')),
],
),
migrations.AddField(
model_name='package',
name='vulnerabilities',
Expand Down
56 changes: 10 additions & 46 deletions vulnerabilities/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@

class Vulnerability(models.Model):
"""
A software vulnerability with minimal information.
Identifiers other than CVE ID are stored as VulnerabilityReference.
A software vulnerability with minimal information. Identifiers other than CVE ID are stored as
VulnerabilityReference.
"""
cve_id = models.CharField(max_length=50, help_text='CVE ID', unique=True, null=True)
summary = models.TextField(help_text='Summary of the vulnerability', blank=True)
Expand All @@ -49,8 +49,7 @@ class Meta:

class VulnerabilityReference(models.Model):
"""
A reference to a vulnerability such as a security advisory from
a Linux distribution or language package manager.
A reference to a vulnerability such as a security advisory from a Linux distribution or language package manager.
"""
vulnerability = models.ForeignKey(
Vulnerability, on_delete=models.CASCADE)
Expand All @@ -70,13 +69,12 @@ def __str__(self):

class Package(PackageURLMixin):
"""
A software package with minimal identifying information.
Other identifiers are stored as PackageReference.
A software package with links to relevant vulnerabilities.
"""
vulnerabilities = models.ManyToManyField(to='Vulnerability', through='ImpactedPackage')

def __str__(self):
return self.name
return self.package_url


class ImpactedPackage(models.Model):
Expand All @@ -92,56 +90,23 @@ class Meta:

class ResolvedPackage(models.Model):
"""
Relates a vulnerability to package(s) that contain
a fix or resolution of this vulnerability.
Relates a vulnerability to package(s) that contain a fix or resolution of this vulnerability.
"""
vulnerability = models.ForeignKey(Vulnerability, on_delete=models.CASCADE)
package = models.ForeignKey(Package, on_delete=models.CASCADE)


class PackageReference(models.Model):
"""
One or more identifiers and references for a software package
in a package repository, such as a Debian, Maven or NPM repository.
"""
package = models.ForeignKey(Package, on_delete=models.CASCADE)
repository = models.CharField(
max_length=100,
help_text='Repository URL eg:http://central.maven.org',
blank=True,
)
platform = models.CharField(
max_length=50,
help_text='Platform eg:maven',
blank=True,
)
name = models.CharField(
max_length=50,
help_text='Package reference name eg:org.apache.commons.io',
blank=True,
)
version = models.CharField(
max_length=50,
help_text='Reference version',
blank=True,
)

def __str__(self):
return self.platform


class Importer(models.Model):
"""
Metadata and pointer to the implementation for a source
of vulnerability data (aka security advisories)
Metadata and pointer to the implementation for a source of vulnerability data (aka security advisories)
"""
name = models.CharField(max_length=100, unique=True, help_text='Name of the importer')
license = models.CharField(max_length=100, blank=True, help_text='License of the vulnerability data')
last_run = models.DateTimeField(null=True, help_text='UTC Timestamp of the last run')

data_source = models.CharField(
max_length=100,
help_text='Class name of the data source implementation importable from vulnerabilities.importers',
help_text='Class name of the data source implementation importable from vulnerabilities.importers',
)
data_source_cfg = pgfields.JSONField(
null=False,
Expand All @@ -151,8 +116,7 @@ class Importer(models.Model):

def make_data_source(self, cutoff_date=None, batch_size=None) -> DataSource:
"""
Return a configured and ready to use instance of
this importers data source implementation.
Return a configured and ready to use instance of this importers data source implementation.
cutoff_date - timestamp of the oldest data to include in the import (default: self.last_run)
batch_size - max. number of records to return on each iteration
Expand Down
8 changes: 0 additions & 8 deletions vulnerabilities/tests/test_data_dump.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@

from vulnerabilities.models import ImpactedPackage
from vulnerabilities.models import Package
from vulnerabilities.models import PackageReference
from vulnerabilities.models import ResolvedPackage
from vulnerabilities.models import Vulnerability
from vulnerabilities.models import VulnerabilityReference
Expand Down Expand Up @@ -153,13 +152,6 @@ def test_arch_Package(setArchLinuxData):
assert 'archlinux' == pkg.namespace


def test_arch_PackageReference(setArchLinuxData):
"""
Check that no package references were found in the test data
"""
assert 0 == PackageReference.objects.count()


def test_arch_ImpactedPackage(setArchLinuxData):
"""
Check there is one ImpactedPackage for the number of packages
Expand Down
Loading

0 comments on commit 6404723

Please sign in to comment.