diff --git a/vulnerabilities/data_dump.py b/vulnerabilities/data_dump.py index 45d94515e..97cfbba5a 100644 --- a/vulnerabilities/data_dump.py +++ b/vulnerabilities/data_dump.py @@ -21,8 +21,11 @@ # VulnerableCode is a free software code scanning tool from nexB Inc. and others. # Visit https://github.com/nexB/vulnerablecode/ for support and download. + from vulnerabilities.models import ImpactedPackage from vulnerabilities.models import Package +from vulnerabilities.models import PackageReference +from vulnerabilities.models import ResolvedPackage from vulnerabilities.models import Vulnerability from vulnerabilities.models import VulnerabilityReference @@ -68,3 +71,59 @@ def ubuntu_dump(html): vulnerability=vulnerability, package=package ) + + +def archlinux_dump(extract_data): + """ + Save data scraped from archlinux' security tracker. + """ + for item in extract_data: + cves = item['issues'] + group = item['name'] + + advisories = set(item['advisories']) + vulnerabilities = cves + list(advisories) + vulnerabilities.append(group) + packages_name = item['packages'] + + affected_version = item['affected'] + fixed_version = item['fixed'] + if not fixed_version: + fixed_version = 'None' + + vulnerability = Vulnerability.objects.create( + summary=item['type'], + ) + + for vulnerability_id in vulnerabilities: + VulnerabilityReference.objects.create( + vulnerability=vulnerability, + reference_id=vulnerability_id, + url='https://security.archlinux.org/{}'.format(vulnerability_id) + ) + + for package_name in packages_name: + package_affected = Package.objects.create( + name=package_name, + version=affected_version + ) + ImpactedPackage.objects.create( + vulnerability=vulnerability, + package=package_affected + ) + PackageReference.objects.create( + package=package_affected, + repository='https://security.archlinux.org/package/{}'.format(package_name) + ) + package_fixed = Package.objects.create( + name=package_name, + version=fixed_version + ) + ResolvedPackage.objects.create( + vulnerability=vulnerability, + package=package_fixed + ) + PackageReference.objects.create( + package=package_fixed, + repository='https://security.archlinux.org/package/{}'.format(package_name) + ) diff --git a/vulnerabilities/scraper/archlinux.py b/vulnerabilities/scraper/archlinux.py new file mode 100644 index 000000000..1b50ae3b7 --- /dev/null +++ b/vulnerabilities/scraper/archlinux.py @@ -0,0 +1,36 @@ +# +# Copyright (c) 2017 nexB Inc. and others. All rights reserved. +# http://nexb.com and https://github.com/nexB/vulnerablecode/ +# The VulnerableCode software is licensed under the Apache License version 2.0. +# Data generated with VulnerableCode require an acknowledgment. +# +# You may not use this software except in compliance with the License. +# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. +# +# When you publish or redistribute any data created with VulnerableCode or any VulnerableCode +# derivative work, you must accompany this data with the following acknowledgment: +# +# Generated with VulnerableCode and provided on an "AS IS" BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, either express or implied. No content created from +# VulnerableCode should be considered or used as legal advice. Consult an Attorney +# for any legal advice. +# VulnerableCode is a free software code scanning tool from nexB Inc. and others. +# Visit https://github.com/nexB/vulnerablecode/ for support and download. + +import json +from urllib.request import urlopen + +ARCHLINUX_TRACKER_URL = 'https://security.archlinux.org/json' + + +def scrape_vulnerabilities(): + """ + Fetch and return data scraped from archlinux security tracker. + """ + json_content = urlopen(ARCHLINUX_TRACKER_URL).read() + arch_data = json.loads(json_content) + return arch_data diff --git a/vulnerabilities/tests/test_data/archlinux.json b/vulnerabilities/tests/test_data/archlinux.json new file mode 100644 index 000000000..85e96f05a --- /dev/null +++ b/vulnerabilities/tests/test_data/archlinux.json @@ -0,0 +1,34 @@ +[ + { + "name": "AVG-708", + "packages": [ + "wireshark-common", + "wireshark-gtk", + "wireshark-cli", + "wireshark-qt" + ], + "status": "Fixed", + "severity": "Critical", + "type": "multiple issues", + "affected": "2.6.0-1", + "fixed": "2.6.1-1", + "ticket": null, + "issues": [ + "CVE-2018-11362", + "CVE-2018-11361", + "CVE-2018-11360", + "CVE-2018-11359", + "CVE-2018-11358", + "CVE-2018-11357", + "CVE-2018-11356", + "CVE-2018-11355", + "CVE-2018-11354" + ], + "advisories": [ + "ASA-201805-25", + "ASA-201805-24", + "ASA-201805-22", + "ASA-201805-23" + ] + } +] \ No newline at end of file diff --git a/vulnerabilities/tests/test_data_dump.py b/vulnerabilities/tests/test_data_dump.py index d796a50d3..d538ab88f 100644 --- a/vulnerabilities/tests/test_data_dump.py +++ b/vulnerabilities/tests/test_data_dump.py @@ -26,11 +26,16 @@ from django.test import TestCase -from vulnerabilities.models import Vulnerability -from vulnerabilities.models import VulnerabilityReference -from vulnerabilities.models import Package +from vulnerabilities.data_dump import archlinux_dump from vulnerabilities.data_dump import debian_dump from vulnerabilities.data_dump import ubuntu_dump +from vulnerabilities.models import ImpactedPackage +from vulnerabilities.models import Package +from vulnerabilities.models import PackageReference +from vulnerabilities.models import ResolvedPackage +from vulnerabilities.models import Vulnerability +from vulnerabilities.models import VulnerabilityReference +from vulnerabilities.scraper import archlinux from vulnerabilities.scraper import debian from vulnerabilities.scraper import ubuntu @@ -85,3 +90,30 @@ def test_ubuntu_data_dump(self): reference = VulnerabilityReference.objects.filter(reference_id='CVE-2002-2439')[0] self.assertEqual(reference.reference_id, 'CVE-2002-2439') self.assertTrue(Package.objects.filter(name='gcc-4.6')[0].name, 'gcc-4.6') + + def test_archlinux_data_dump(self): + """ + Scrape data from Archlinux' main tracker, save it + in the database and verify entries. + """ + with open(os.path.join(TEST_DATA, 'archlinux.json')) as f: + test_data = json.loads(f.read()) + + archlinux_dump(test_data) + + self.assertEqual(1, Vulnerability.objects.count()) + self.assertEqual(14, VulnerabilityReference.objects.count()) + self.assertEqual(8, Package.objects.count()) + self.assertEqual(8, PackageReference.objects.count()) + self.assertEqual(4, ImpactedPackage.objects.count()) + self.assertEqual(4, ResolvedPackage.objects.count()) + + self.assertTrue(Vulnerability.objects.get(summary='multiple issues')) + + self.assertTrue(VulnerabilityReference.objects.get(reference_id='CVE-2018-11360')) + + self.assertTrue(VulnerabilityReference.objects.get(reference_id='ASA-201805-24')) + + self.assertTrue(VulnerabilityReference.objects.get(reference_id='AVG-708')) + + self.assertEqual(Package.objects.filter(name='wireshark-cli')[0].name, 'wireshark-cli') \ No newline at end of file diff --git a/vulnerabilities/tests/test_scrapers.py b/vulnerabilities/tests/test_scrapers.py index 49f669909..711d314be 100644 --- a/vulnerabilities/tests/test_scrapers.py +++ b/vulnerabilities/tests/test_scrapers.py @@ -28,7 +28,6 @@ from vulnerabilities.scraper import debian from vulnerabilities.scraper import ubuntu - def test_ubuntu_extract_cves(): ubuntu_testfile = join(dirname(__file__), 'test_data', 'ubuntu_main.html')