Skip to content

Commit

Permalink
Add unique CVE ID field to Vulnerability
Browse files Browse the repository at this point in the history
This change adds the field cve_id to the Vulnerability model and based
on that, improves the data import for Arch Linux.

The improvements made have been discussed in issue aboutcode-org#20:

- For each CVE in a given AVG, exactly one Vulnerability is stored

- For each CVE, one VulnerabilityReference to its page on
  security.archlinux.org is stored

- Each ASA mentioned in an AVG is stored as a VulnerabilityReference

Since there is no production deployment of vulnerablecode yet, I took
the opportunity of changing the models to remove all migrations and
create a new one that creates the whole schema.

Since the cve_id field on Vulnerability has a unique constraint set, I
needed to make some changes to the import code that belong to issue aboutcode-org#28.
I kept them minimal however so aboutcode-org#28 is still open and needs to be
addressed later.

closes aboutcode-org#20

Signed-off-by: Haiko Schol <hs@haikoschol.com>
  • Loading branch information
haikoschol committed Nov 6, 2019
1 parent 58e3ff3 commit 9312d26
Show file tree
Hide file tree
Showing 16 changed files with 236 additions and 347 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ python -m pytest -v vulnerabilities/tests/test_scrapers.py vulnerabilities/tests

For Django based tests
```
DJANGO_DEV=1 python manage.py test vulnerabilities/tests
DJANGO_DEV=1 python manage.py test vulnerabilities.tests
```

## Data import
Expand Down
160 changes: 79 additions & 81 deletions vulnerabilities/data_dump.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,11 @@ def debian_dump(extract_data, base_release='jessie'):
Save data scraped from Debian' security tracker.
"""
for data in extract_data:
vulnerability = Vulnerability.objects.create(
summary=data.get('description', ''),
)
VulnerabilityReference.objects.create(
vulnerability=vulnerability,
reference_id=data.get('vulnerability_id', ''),
vulnerability, _ = Vulnerability.objects.get_or_create(
cve_id=data['cve_id'],
)

pkg_name = data.get('package_name', '')
pkg_name = data['package_name']
package = Package.objects.create(
name=pkg_name,
type='deb',
Expand Down Expand Up @@ -83,15 +79,11 @@ def ubuntu_dump(html):
Dump data scraped from Ubuntu's security tracker.
"""
for data in html:
vulnerability = Vulnerability.objects.create(
summary='',
)
VulnerabilityReference.objects.create(
vulnerability=vulnerability,
reference_id=data.get('cve_id'),
vulnerability, _ = Vulnerability.objects.get_or_create(
cve_id=data['cve_id'],
)
package = Package.objects.create(
name=data.get('package_name'),
name=data['package_name'],
type='deb',
namespace='ubuntu'
)
Expand All @@ -105,90 +97,96 @@ def archlinux_dump(extract_data):
"""
Save data scraped from archlinux' security tracker.
"""
for item in extract_data:
cves = item['issues']
group = item['name']

advisories = set(item['advisories'])
vulnerabilities = cves + list(advisories)
vulnerabilities.append(group)
packages_name = item['packages']

affected_version = item['affected']
fixed_version = item['fixed']
if not fixed_version:
fixed_version = 'None'

vulnerability = Vulnerability.objects.create(
summary=item['type'],
)
base_url = 'https://security.archlinux.org'

for vulnerability_id in vulnerabilities:
VulnerabilityReference.objects.create(
vulnerability=vulnerability,
reference_id=vulnerability_id,
url=f'https://security.archlinux.org/{vulnerability_id}',
)
for avg in extract_data:
affected_packages = []
fixed_packages = []

for package_name in packages_name:
package_affected = Package.objects.create(
for package_name in avg['packages']:
ap, _ = Package.objects.get_or_create(
name=package_name,
type='pacman',
namespace='archlinux',
version=affected_version
)
ImpactedPackage.objects.create(
vulnerability=vulnerability,
package=package_affected
)
PackageReference.objects.create(
package=package_affected,
repository=f'https://security.archlinux.org/package/{package_name}',
version=avg['affected'],
)
package_fixed = Package.objects.create(
affected_packages.append(ap)

fp, _ = Package.objects.get_or_create(
name=package_name,
type='pacman',
namespace='archlinux',
version=fixed_version
version=avg['fixed'],
)
ResolvedPackage.objects.create(
fixed_packages.append(fp)

for cve_id in avg['issues']:
vulnerability, _ = Vulnerability.objects.get_or_create(
cve_id=cve_id,
)
VulnerabilityReference.objects.create(
vulnerability=vulnerability,
package=package_fixed
url=f'{base_url}/{cve_id}',
)
PackageReference.objects.create(
package=package_fixed,
repository=f'https://security.archlinux.org/package/{package_name}',
avg_name = avg['name']
VulnerabilityReference.objects.create(
vulnerability=vulnerability,
reference_id=avg_name,
url=f'{base_url}/{avg_name}',
)

for asa in avg['advisories']:
VulnerabilityReference.objects.create(
vulnerability=vulnerability,
reference_id=asa,
url=f'{base_url}/{asa}',
)

for ap in affected_packages:
ImpactedPackage.objects.get_or_create(
vulnerability=vulnerability,
package=ap,
)

for fp in fixed_packages:
ResolvedPackage.objects.get_or_create(
vulnerability=vulnerability,
package=fp,
)


def npm_dump(extract_data):
for data in extract_data:
vulnerability = Vulnerability.objects.create(
summary=data.get('summary'),
)
VulnerabilityReference.objects.create(
vulnerability=vulnerability,
reference_id=data.get('vulnerability_id'),
)
package_name = data['package_name']
advisory = data['advisory']

affected_versions = data.get('affected_version', [])
for version in affected_versions:
package_affected = Package.objects.create(
name=data.get('package_name'),
version=version,
)
ImpactedPackage.objects.create(
vulnerability=vulnerability,
package=package_affected
for cve_id in data['cve_ids']:
vulnerability, _ = Vulnerability.objects.get_or_create(
cve_id=cve_id,
)

fixed_versions = data.get('fixed_version', [])
for version in fixed_versions:
package_fixed = Package.objects.create(
name=data.get('package_name'),
version=version
)
ResolvedPackage.objects.create(
vulnerability=vulnerability,
package=package_fixed
)
if advisory:
VulnerabilityReference.objects.create(
vulnerability=vulnerability,
url=advisory,
)

for version in data['affected_versions']:
package_affected = Package.objects.create(
name=package_name,
version=version,
)
ImpactedPackage.objects.create(
vulnerability=vulnerability,
package=package_affected
)

for version in data['fixed_versions']:
package_fixed = Package.objects.create(
name=package_name,
version=version
)
ResolvedPackage.objects.create(
vulnerability=vulnerability,
package=package_fixed
)
69 changes: 37 additions & 32 deletions vulnerabilities/migrations/0001_initial.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.4 on 2017-09-20 22:34
from __future__ import unicode_literals
# Generated by Django 2.2.4 on 2019-11-04 18:01

from django.db import migrations, models
import django.db.models.deletion
Expand All @@ -24,51 +22,49 @@ class Migration(migrations.Migration):
name='Package',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('platform', models.CharField(blank=True, help_text='Package platform eg:maven', max_length=50)),
('name', models.CharField(blank=True, help_text='Package name', max_length=50)),
('version', models.CharField(blank=True, help_text='Package version', max_length=50)),
('type', models.CharField(blank=True, help_text='A short code to identify the type of this package. For example: gem for a Rubygem, docker for a container, pypi for a Python Wheel or Egg, maven for a Maven Jar, deb for a Debian package, etc.', max_length=16, null=True)),
('namespace', models.CharField(blank=True, help_text='Package name prefix, such as Maven groupid, Docker image owner, GitHub user or organization, etc.', max_length=255, null=True)),
('name', models.CharField(blank=True, help_text='Name of the package.', max_length=100, null=True)),
('version', models.CharField(blank=True, help_text='Version of the package.', max_length=50, null=True)),
('qualifiers', models.CharField(blank=True, help_text='Extra qualifying data for a package such as the name of an OS, architecture, distro, etc.', max_length=1024, null=True)),
('subpath', models.CharField(blank=True, help_text='Extra subpath within a package, relative to the package root.', max_length=200, null=True)),
],
options={
'abstract': False,
},
),
migrations.CreateModel(
name='PackageReference',
name='Vulnerability',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('repository', models.CharField(blank=True, help_text='Repository URL eg:http://central.maven.org', max_length=50)),
('platform', models.CharField(blank=True, help_text='Platform eg:maven', max_length=50)),
('name', models.CharField(blank=True, help_text='Package reference name eg:org.apache.commons.io', max_length=50)),
('version', models.CharField(blank=True, help_text='Reference version', max_length=50)),
('package', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='vulnerabilities.Package')),
('cve_id', models.CharField(help_text='CVE ID', max_length=50, null=True, unique=True)),
('summary', models.TextField(blank=True, help_text='Summary of the vulnerability')),
('cvss', models.FloatField(help_text='CVSS Score', max_length=100, null=True)),
],
),
migrations.CreateModel(
name='ResolvedPackage',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('package', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='vulnerabilities.Package')),
('vulnerability', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='vulnerabilities.Vulnerability')),
],
),
migrations.CreateModel(
name='Vulnerability',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('summary', models.CharField(blank=True, help_text='Summary of the vulnerability', max_length=50)),
('cvss', models.FloatField(help_text='CVSS Score', max_length=50, null=True)),
],
),
migrations.CreateModel(
name='VulnerabilityReference',
name='PackageReference',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('source', models.CharField(blank=True, help_text='Source(s) name eg:NVD', max_length=50)),
('reference_id', models.CharField(blank=True, help_text='Reference ID, eg:CVE-ID', max_length=50)),
('url', models.URLField(blank=True, help_text='URL of Vulnerability data', max_length=1024)),
('vulnerability', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='vulnerabilities.Vulnerability')),
('repository', models.CharField(blank=True, help_text='Repository URL eg:http://central.maven.org', max_length=100)),
('platform', models.CharField(blank=True, help_text='Platform eg:maven', max_length=50)),
('name', models.CharField(blank=True, help_text='Package reference name eg:org.apache.commons.io', max_length=50)),
('version', models.CharField(blank=True, help_text='Reference version', max_length=50)),
('package', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='vulnerabilities.Package')),
],
),
migrations.AddField(
model_name='resolvedpackage',
name='vulnerability',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='vulnerabilities.Vulnerability'),
model_name='package',
name='vulnerabilities',
field=models.ManyToManyField(through='vulnerabilities.ImpactedPackage', to='vulnerabilities.Vulnerability'),
),
migrations.AddField(
model_name='impactedpackage',
Expand All @@ -80,12 +76,21 @@ class Migration(migrations.Migration):
name='vulnerability',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='vulnerabilities.Vulnerability'),
),
migrations.AlterUniqueTogether(
name='vulnerabilityreference',
unique_together=set([('vulnerability', 'source', 'reference_id', 'url')]),
migrations.CreateModel(
name='VulnerabilityReference',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('source', models.CharField(blank=True, help_text='Source(s) name eg:NVD', max_length=50)),
('reference_id', models.CharField(blank=True, help_text='Reference ID, eg:DSA-4465-1', max_length=50)),
('url', models.URLField(blank=True, help_text='URL of Vulnerability data', max_length=1024)),
('vulnerability', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='vulnerabilities.Vulnerability')),
],
options={
'unique_together': {('vulnerability', 'source', 'reference_id', 'url')},
},
),
migrations.AlterUniqueTogether(
name='impactedpackage',
unique_together=set([('vulnerability', 'package')]),
unique_together={('vulnerability', 'package')},
),
]
20 changes: 0 additions & 20 deletions vulnerabilities/migrations/0002_package_vulnerabilities.py

This file was deleted.

23 changes: 0 additions & 23 deletions vulnerabilities/migrations/0003_auto_20190406_0950.py

This file was deleted.

18 changes: 0 additions & 18 deletions vulnerabilities/migrations/0004_auto_20190407_1838.py

This file was deleted.

Loading

0 comments on commit 9312d26

Please sign in to comment.