diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 0fe4b6d10..7caadf836 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,17 @@ Release notes ============= +Version (next) +------------------- + + +Version v34.0.2 +------------------- + +- Add management command to commit exported vulnerability data (#1600) +- Fix API 500 error (#1603) + + Version v34.0.1 ------------------- diff --git a/requirements.txt b/requirements.txt index 994f9ccf4..0326622b8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,6 +21,7 @@ click==8.1.2 coreapi==2.3.3 coreschema==0.0.4 cryptography==43.0.1 +crispy-bootstrap4==2024.1 cwe2==3.0.0 dateparser==1.1.1 decorator==5.1.1 @@ -35,8 +36,8 @@ djangorestframework==3.15.2 doc8==0.11.1 docopt==0.6.2 docutils==0.17.1 -drf-spectacular==0.27.2 -drf-spectacular-sidecar==2024.7.1 +drf-spectacular==0.24.2 +drf-spectacular-sidecar==2022.10.1 executing==0.8.3 fetchcode==0.3.0 freezegun==1.2.1 diff --git a/setup.cfg b/setup.cfg index b46f23cdf..a2fae91ec 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = vulnerablecode -version = 34.0.1 +version = 34.0.2 license = Apache-2.0 AND CC-BY-SA-4.0 # description must be on ONE line https://github.com/pypa/setuptools/issues/1390 @@ -62,11 +62,12 @@ install_requires = django-filter>=24.0 django-widget-tweaks>=1.5.0 django-crispy-forms>=2.3 + crispy-bootstrap4>=2024.1 django-environ>=0.11.0 gunicorn>=23.0.0 # for the API doc - drf-spectacular[sidecar]>=0.27.2 + drf-spectacular[sidecar]>=0.24.2 #essentials packageurl-python>=0.15 diff --git a/vulnerabilities/api.py b/vulnerabilities/api.py index 278ed636c..5d953db9b 100644 --- a/vulnerabilities/api.py +++ b/vulnerabilities/api.py @@ -27,7 +27,7 @@ from rest_framework.throttling import UserRateThrottle from vulnerabilities.models import Alias -from vulnerabilities.models import Kev +from vulnerabilities.models import Exploit from vulnerabilities.models import Package from vulnerabilities.models import Vulnerability from vulnerabilities.models import VulnerabilityReference @@ -175,10 +175,23 @@ def to_representation(self, instance): return representation -class KEVSerializer(serializers.ModelSerializer): +class ExploitSerializer(serializers.ModelSerializer): class Meta: - model = Kev - fields = ["date_added", "description", "required_action", "due_date", "resources_and_notes"] + model = Exploit + fields = [ + "date_added", + "description", + "required_action", + "due_date", + "notes", + "known_ransomware_campaign_use", + "source_date_published", + "exploit_type", + "platform", + "source_date_updated", + "data_source", + "source_url", + ] class VulnerabilitySerializer(BaseResourceSerializer): @@ -189,7 +202,7 @@ class VulnerabilitySerializer(BaseResourceSerializer): references = VulnerabilityReferenceSerializer(many=True, source="vulnerabilityreference_set") aliases = AliasSerializer(many=True, source="alias") - kev = KEVSerializer(read_only=True) + exploits = ExploitSerializer(many=True, read_only=True) weaknesses = WeaknessSerializer(many=True) severity_range_score = serializers.SerializerMethodField() @@ -199,10 +212,6 @@ def to_representation(self, instance): weaknesses = data.get("weaknesses", []) data["weaknesses"] = [weakness for weakness in weaknesses if weakness is not None] - kev = data.get("kev", None) - if not kev: - data.pop("kev") - return data def get_severity_range_score(self, instance): @@ -240,7 +249,7 @@ class Meta: "affected_packages", "references", "weaknesses", - "kev", + "exploits", "severity_range_score", ] @@ -676,14 +685,10 @@ def filter_alias(self, queryset, name, value): return self.queryset.filter(aliases__alias__icontains=alias) -class AliasViewSet(viewsets.ReadOnlyModelViewSet): +class AliasViewSet(VulnerabilityViewSet): """ Lookup for vulnerabilities by vulnerability aliases such as a CVE (https://nvd.nist.gov/general/cve-process). """ - queryset = Vulnerability.objects.all() - serializer_class = VulnerabilitySerializer - filter_backends = (filters.DjangoFilterBackend,) filterset_class = AliasFilterSet - throttle_classes = [StaffUserRateThrottle, AnonRateThrottle] diff --git a/vulnerabilities/api_extension.py b/vulnerabilities/api_extension.py index a974f0796..4b9211c76 100644 --- a/vulnerabilities/api_extension.py +++ b/vulnerabilities/api_extension.py @@ -26,7 +26,7 @@ from rest_framework.throttling import AnonRateThrottle from vulnerabilities.api import BaseResourceSerializer -from vulnerabilities.models import Kev +from vulnerabilities.models import Exploit from vulnerabilities.models import Package from vulnerabilities.models import Vulnerability from vulnerabilities.models import VulnerabilityReference @@ -105,8 +105,21 @@ class Meta: class V2ExploitSerializer(ModelSerializer): class Meta: - model = Kev - fields = ("description", "required_action", "date_added", "due_date", "resources_and_notes") + model = Exploit + fields = [ + "date_added", + "description", + "required_action", + "due_date", + "notes", + "known_ransomware_campaign_use", + "source_date_published", + "exploit_type", + "platform", + "source_date_updated", + "data_source", + "source_url", + ] class V2VulnerabilitySerializer(ModelSerializer): diff --git a/vulnerabilities/importers/__init__.py b/vulnerabilities/importers/__init__.py index c44ced245..be1e838b0 100644 --- a/vulnerabilities/importers/__init__.py +++ b/vulnerabilities/importers/__init__.py @@ -19,13 +19,9 @@ from vulnerabilities.importers import epss from vulnerabilities.importers import fireeye from vulnerabilities.importers import gentoo -from vulnerabilities.importers import github from vulnerabilities.importers import github_osv -from vulnerabilities.importers import gitlab from vulnerabilities.importers import istio from vulnerabilities.importers import mozilla -from vulnerabilities.importers import nginx -from vulnerabilities.importers import nvd from vulnerabilities.importers import openssl from vulnerabilities.importers import oss_fuzz from vulnerabilities.importers import postgresql @@ -40,14 +36,14 @@ from vulnerabilities.importers import vulnrichment from vulnerabilities.importers import xen from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline +from vulnerabilities.pipelines import github_importer +from vulnerabilities.pipelines import gitlab_importer +from vulnerabilities.pipelines import nginx_importer from vulnerabilities.pipelines import npm_importer +from vulnerabilities.pipelines import nvd_importer from vulnerabilities.pipelines import pypa_importer IMPORTERS_REGISTRY = [ - nvd.NVDImporter, - github.GitHubAPIImporter, - gitlab.GitLabAPIImporter, - nginx.NginxImporter, pysec.PyPIImporter, alpine_linux.AlpineImporter, openssl.OpensslImporter, @@ -78,6 +74,10 @@ vulnrichment.VulnrichImporter, pypa_importer.PyPaImporterPipeline, npm_importer.NpmImporterPipeline, + nginx_importer.NginxImporterPipeline, + gitlab_importer.GitLabImporterPipeline, + github_importer.GitHubAPIImporterPipeline, + nvd_importer.NVDImporterPipeline, ] IMPORTERS_REGISTRY = { diff --git a/vulnerabilities/improvers/__init__.py b/vulnerabilities/improvers/__init__.py index d15504166..6e9c24b38 100644 --- a/vulnerabilities/improvers/__init__.py +++ b/vulnerabilities/improvers/__init__.py @@ -8,9 +8,11 @@ # from vulnerabilities.improvers import valid_versions -from vulnerabilities.improvers import vulnerability_kev from vulnerabilities.improvers import vulnerability_status from vulnerabilities.pipelines import VulnerableCodePipeline +from vulnerabilities.pipelines import enhance_with_exploitdb +from vulnerabilities.pipelines import enhance_with_kev +from vulnerabilities.pipelines import enhance_with_metasploit from vulnerabilities.pipelines import flag_ghost_packages IMPROVERS_REGISTRY = [ @@ -31,8 +33,10 @@ valid_versions.GithubOSVImprover, vulnerability_status.VulnerabilityStatusImprover, valid_versions.CurlImprover, - vulnerability_kev.VulnerabilityKevImprover, flag_ghost_packages.FlagGhostPackagePipeline, + enhance_with_kev.VulnerabilityKevPipeline, + enhance_with_metasploit.MetasploitImproverPipeline, + enhance_with_exploitdb.ExploitDBImproverPipeline, ] IMPROVERS_REGISTRY = { diff --git a/vulnerabilities/improvers/valid_versions.py b/vulnerabilities/improvers/valid_versions.py index 32f3dfc35..5d1e087ec 100644 --- a/vulnerabilities/improvers/valid_versions.py +++ b/vulnerabilities/improvers/valid_versions.py @@ -12,7 +12,6 @@ from datetime import datetime from typing import Iterable from typing import List -from typing import Mapping from typing import Optional from django.db.models import Q @@ -32,11 +31,8 @@ from vulnerabilities.importers.debian import DebianImporter from vulnerabilities.importers.debian_oval import DebianOvalImporter from vulnerabilities.importers.elixir_security import ElixirSecurityImporter -from vulnerabilities.importers.github import GitHubAPIImporter from vulnerabilities.importers.github_osv import GithubOSVImporter -from vulnerabilities.importers.gitlab import GitLabAPIImporter from vulnerabilities.importers.istio import IstioImporter -from vulnerabilities.importers.nginx import NginxImporter from vulnerabilities.importers.oss_fuzz import OSSFuzzImporter from vulnerabilities.importers.ruby import RubyImporter from vulnerabilities.importers.ubuntu import UbuntuImporter @@ -44,6 +40,10 @@ from vulnerabilities.improver import Improver from vulnerabilities.improver import Inference from vulnerabilities.models import Advisory +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline +from vulnerabilities.pipelines.github_importer import GitHubAPIImporterPipeline +from vulnerabilities.pipelines.gitlab_importer import GitLabImporterPipeline +from vulnerabilities.pipelines.nginx_importer import NginxImporterPipeline from vulnerabilities.pipelines.npm_importer import NpmImporterPipeline from vulnerabilities.utils import AffectedPackage as LegacyAffectedPackage from vulnerabilities.utils import clean_nginx_git_tag @@ -63,6 +63,8 @@ class ValidVersionImprover(Improver): @property def interesting_advisories(self) -> QuerySet: + if issubclass(self.importer, VulnerableCodeBaseImporterPipeline): + return Advisory.objects.filter(Q(created_by=self.importer.pipeline_id)).paginated() return Advisory.objects.filter(Q(created_by=self.importer.qualified_name)).paginated() def get_package_versions( @@ -220,7 +222,7 @@ class NginxBasicImprover(Improver): @property def interesting_advisories(self) -> QuerySet: - return Advisory.objects.filter(created_by=NginxImporter.qualified_name).paginated() + return Advisory.objects.filter(created_by=NginxImporterPipeline.pipeline_id).paginated() def get_inferences(self, advisory_data: AdvisoryData) -> Iterable[Inference]: all_versions = list(self.fetch_nginx_version_from_git_tags()) @@ -364,12 +366,12 @@ class DebianBasicImprover(ValidVersionImprover): class GitLabBasicImprover(ValidVersionImprover): - importer = GitLabAPIImporter + importer = GitLabImporterPipeline ignorable_versions = [] class GitHubBasicImprover(ValidVersionImprover): - importer = GitHubAPIImporter + importer = GitHubAPIImporterPipeline ignorable_versions = frozenset( [ "0.1-bulbasaur", diff --git a/vulnerabilities/improvers/vulnerability_kev.py b/vulnerabilities/improvers/vulnerability_kev.py deleted file mode 100644 index 3ca3291bc..000000000 --- a/vulnerabilities/improvers/vulnerability_kev.py +++ /dev/null @@ -1,66 +0,0 @@ -import logging -from typing import Iterable - -import requests -from django.db.models import QuerySet - -from vulnerabilities.improver import Improver -from vulnerabilities.improver import Inference -from vulnerabilities.models import Advisory -from vulnerabilities.models import Alias -from vulnerabilities.models import Kev - -logger = logging.getLogger(__name__) - - -class VulnerabilityKevImprover(Improver): - """ - Known Exploited Vulnerabilities Improver - """ - - @property - def interesting_advisories(self) -> QuerySet: - # TODO Modify KEV improver to iterate over the vulnerabilities alias, not the advisory - return [Advisory.objects.first()] - - def get_inferences(self, advisory_data) -> Iterable[Inference]: - """ - Fetch Kev data, iterate over it to find the vulnerability with the specified alias, and create or update - the Kev instance accordingly. - """ - - kev_url = ( - "https://www.cisa.gov/sites/default/files/feeds/known_exploited_vulnerabilities.json" - ) - response = requests.get(kev_url) - kev_data = response.json() - if response.status_code != 200: - logger.error( - f"Failed to fetch the CISA Catalog of Known Exploited Vulnerabilities: {kev_url}" - ) - return [] - - for kev_vul in kev_data.get("vulnerabilities", []): - alias = Alias.objects.get_or_none(alias=kev_vul["cveID"]) - if not alias: - continue - - vul = alias.vulnerability - - if not vul: - continue - - Kev.objects.update_or_create( - vulnerability=vul, - defaults={ - "description": kev_vul["shortDescription"], - "date_added": kev_vul["dateAdded"], - "required_action": kev_vul["requiredAction"], - "due_date": kev_vul["dueDate"], - "resources_and_notes": kev_vul["notes"], - "known_ransomware_campaign_use": True - if kev_vul["knownRansomwareCampaignUse"] == "Known" - else False, - }, - ) - return [] diff --git a/vulnerabilities/improvers/vulnerability_status.py b/vulnerabilities/improvers/vulnerability_status.py index 0157db557..353cca54c 100644 --- a/vulnerabilities/improvers/vulnerability_status.py +++ b/vulnerabilities/improvers/vulnerability_status.py @@ -14,7 +14,6 @@ from django.db.models.query import QuerySet from vulnerabilities.importer import AdvisoryData -from vulnerabilities.importers.nvd import NVDImporter from vulnerabilities.improver import Improver from vulnerabilities.improver import Inference from vulnerabilities.models import Advisory @@ -22,6 +21,7 @@ from vulnerabilities.models import Vulnerability from vulnerabilities.models import VulnerabilityChangeLog from vulnerabilities.models import VulnerabilityStatusType +from vulnerabilities.pipelines.nvd_importer import NVDImporterPipeline from vulnerabilities.utils import fetch_response from vulnerabilities.utils import get_item @@ -38,7 +38,7 @@ class VulnerabilityStatusImprover(Improver): @property def interesting_advisories(self) -> QuerySet: return ( - Advisory.objects.filter(Q(created_by=NVDImporter.qualified_name)) + Advisory.objects.filter(Q(created_by=NVDImporterPipeline.pipeline_id)) .distinct("aliases") .paginated() ) diff --git a/vulnerabilities/management/commands/commit_export.py b/vulnerabilities/management/commands/commit_export.py new file mode 100644 index 000000000..9d47904f3 --- /dev/null +++ b/vulnerabilities/management/commands/commit_export.py @@ -0,0 +1,179 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import logging +import os +import shutil +import tempfile +import textwrap +from datetime import datetime +from pathlib import Path +from urllib.parse import urlparse + +import requests +from django.core.management.base import BaseCommand +from django.core.management.base import CommandError +from git import Repo + +from vulnerablecode.settings import ALLOWED_HOSTS +from vulnerablecode.settings import VULNERABLECODE_VERSION + +logger = logging.getLogger(__name__) + + +class Command(BaseCommand): + help = """Commit the exported vulnerability data in the backing GitHub repository. + + This command takes the path to the exported vulnerability data and creates a pull + request in the backing GitHub repository with the changes. + """ + + def add_arguments(self, parser): + parser.add_argument( + "path", + help="Path to exported data.", + ) + + def handle(self, *args, **options): + if path := options["path"]: + base_path = Path(path) + + if not path or not base_path.is_dir(): + raise CommandError("Enter a valid directory path to the exported data.") + + vcio_export_repo_url = os.environ.get("VULNERABLECODE_EXPORT_REPO_URL") + vcio_github_service_token = os.environ.get("VULNERABLECODE_GITHUB_SERVICE_TOKEN") + vcio_github_service_name = os.environ.get("VULNERABLECODE_GITHUB_SERVICE_NAME") + vcio_github_service_email = os.environ.get("VULNERABLECODE_GITHUB_SERVICE_EMAIL") + + # Check for missing environment variables + missing_vars = [] + if not vcio_export_repo_url: + missing_vars.append("VULNERABLECODE_EXPORT_REPO_URL") + if not vcio_github_service_token: + missing_vars.append("VULNERABLECODE_GITHUB_SERVICE_TOKEN") + if not vcio_github_service_name: + missing_vars.append("VULNERABLECODE_GITHUB_SERVICE_NAME") + if not vcio_github_service_email: + missing_vars.append("VULNERABLECODE_GITHUB_SERVICE_EMAIL") + + if missing_vars: + raise CommandError(f'Missing environment variables: {", ".join(missing_vars)}') + + local_dir = tempfile.mkdtemp() + current_date = datetime.now().strftime("%Y-%m-%d") + + branch_name = f"export-update-{current_date}" + pr_title = "Update package vulnerabilities from VulnerableCode" + pr_body = f"""\ + Tool: pkg:github/aboutcode-org/vulnerablecode@v{VULNERABLECODE_VERSION} + Reference: https://{ALLOWED_HOSTS[0]}/ + """ + commit_message = f"""\ + Update package vulnerabilities from VulnerableCode + + Tool: pkg:github/aboutcode-org/vulnerablecode@v{VULNERABLECODE_VERSION} + Reference: https://{ALLOWED_HOSTS[0]}/ + + Signed-off-by: {vcio_github_service_name} <{vcio_github_service_email}> + """ + + self.stdout.write("Committing VulnerableCode package and vulnerability data.") + repo = self.clone_repository( + repo_url=vcio_export_repo_url, + local_path=local_dir, + token=vcio_github_service_token, + ) + + repo.config_writer().set_value("user", "name", vcio_github_service_name).release() + repo.config_writer().set_value("user", "email", vcio_github_service_email).release() + + self.add_changes(repo=repo, content_path=path) + + if self.commit_and_push_changes( + repo=repo, + branch=branch_name, + commit_message=textwrap.dedent(commit_message), + ): + self.create_pull_request( + repo_url=vcio_export_repo_url, + branch=branch_name, + title=pr_title, + body=textwrap.dedent(pr_body), + token=vcio_github_service_token, + ) + shutil.rmtree(local_dir) + + def clone_repository(self, repo_url, local_path, token): + """Clone repository to local_path.""" + + if os.path.exists(local_path): + shutil.rmtree(local_path) + + authenticated_repo_url = repo_url.replace("https://", f"https://{token}@") + return Repo.clone_from(authenticated_repo_url, local_path) + + def add_changes(self, repo, content_path): + """Copy changes from the ``content_path`` to ``repo``.""" + + source_path = Path(content_path) + destination_path = Path(repo.working_dir) + + for item in source_path.iterdir(): + if not item.is_dir(): + continue + target_item = destination_path / item.name + if target_item.exists(): + shutil.rmtree(target_item) + shutil.copytree(item, target_item) + + def commit_and_push_changes(self, repo, branch, commit_message, remote_name="origin"): + """Commit changes and push to remote repository, return name of changed files.""" + + repo.git.checkout("HEAD", b=branch) + files_changed = repo.git.diff("HEAD", name_only=True) + + if not files_changed: + self.stderr.write(self.style.SUCCESS("No changes to commit.")) + return + + repo.git.add(A=True) + repo.index.commit(commit_message) + repo.git.push(remote_name, branch) + return files_changed + + def create_pull_request(self, repo_url, branch, title, body, token): + """Create a pull request in the GitHub repository.""" + + url_parts = urlparse(repo_url).path + path_parts = url_parts.strip("/").rstrip(".git").split("/") + + if len(path_parts) >= 2: + repo_owner = path_parts[0] + repo_name = path_parts[1] + else: + raise ValueError("Invalid GitHub repo URL") + + url = f"https://api.github.com/repos/{repo_owner}/{repo_name}/pulls" + headers = {"Authorization": f"token {token}", "Accept": "application/vnd.github.v3+json"} + data = {"title": title, "head": branch, "base": "main", "body": body} + + response = requests.post(url, headers=headers, json=data) + + if response.status_code == 201: + pr_response = response.json() + self.stdout.write( + self.style.SUCCESS( + f"Pull request created successfully: {pr_response.get('html_url')}." + ) + ) + else: + self.stderr.write( + self.style.ERROR(f"Failed to create pull request: {response.content}") + ) diff --git a/vulnerabilities/migrations/0065_update_nginx_advisory_created_by.py b/vulnerabilities/migrations/0065_update_nginx_advisory_created_by.py new file mode 100644 index 000000000..80b43a954 --- /dev/null +++ b/vulnerabilities/migrations/0065_update_nginx_advisory_created_by.py @@ -0,0 +1,38 @@ +# Generated by Django 4.2.15 on 2024-09-23 13:06 + +from django.db import migrations + +""" +Update the created_by field on Advisory from the old qualified_name +to the new pipeline_id. +""" + + +def update_created_by(apps, schema_editor): + from vulnerabilities.pipelines.nginx_importer import NginxImporterPipeline + + Advisory = apps.get_model("vulnerabilities", "Advisory") + Advisory.objects.filter(created_by="vulnerabilities.importers.nginx.NginxImporter").update( + created_by=NginxImporterPipeline.pipeline_id + ) + + + +def reverse_update_created_by(apps, schema_editor): + from vulnerabilities.pipelines.nginx_importer import NginxImporterPipeline + + Advisory = apps.get_model("vulnerabilities", "Advisory") + Advisory.objects.filter(created_by=NginxImporterPipeline.pipeline_id).update( + created_by="vulnerabilities.importers.nginx.NginxImporter" + ) + + +class Migration(migrations.Migration): + + dependencies = [ + ("vulnerabilities", "0064_update_npm_pypa_advisory_created_by"), + ] + + operations = [ + migrations.RunPython(update_created_by, reverse_code=reverse_update_created_by), + ] diff --git a/vulnerabilities/migrations/0066_update_gitlab_advisory_created_by.py b/vulnerabilities/migrations/0066_update_gitlab_advisory_created_by.py new file mode 100644 index 000000000..e72b0616b --- /dev/null +++ b/vulnerabilities/migrations/0066_update_gitlab_advisory_created_by.py @@ -0,0 +1,38 @@ +# Generated by Django 4.2.15 on 2024-09-27 13:08 + +from django.db import migrations + +""" +Update the created_by field on Advisory from the old qualified_name +to the new pipeline_id. +""" + + +def update_created_by(apps, schema_editor): + from vulnerabilities.pipelines.gitlab_importer import GitLabImporterPipeline + + Advisory = apps.get_model("vulnerabilities", "Advisory") + Advisory.objects.filter(created_by="vulnerabilities.importers.gitlab.GitLabAPIImporter").update( + created_by=GitLabImporterPipeline.pipeline_id + ) + + + +def reverse_update_created_by(apps, schema_editor): + from vulnerabilities.pipelines.gitlab_importer import GitLabImporterPipeline + + Advisory = apps.get_model("vulnerabilities", "Advisory") + Advisory.objects.filter(created_by=GitLabImporterPipeline.pipeline_id).update( + created_by="vulnerabilities.importers.gitlab.GitLabAPIImporter" + ) + + +class Migration(migrations.Migration): + + dependencies = [ + ("vulnerabilities", "0065_update_nginx_advisory_created_by"), + ] + + operations = [ + migrations.RunPython(update_created_by, reverse_code=reverse_update_created_by), + ] diff --git a/vulnerabilities/migrations/0067_update_github_advisory_created_by.py b/vulnerabilities/migrations/0067_update_github_advisory_created_by.py new file mode 100644 index 000000000..4b9bb8485 --- /dev/null +++ b/vulnerabilities/migrations/0067_update_github_advisory_created_by.py @@ -0,0 +1,38 @@ +# Generated by Django 4.2.15 on 2024-09-27 14:31 + +from django.db import migrations + +""" +Update the created_by field on Advisory from the old qualified_name +to the new pipeline_id. +""" + + +def update_created_by(apps, schema_editor): + from vulnerabilities.pipelines.github_importer import GitHubAPIImporterPipeline + + Advisory = apps.get_model("vulnerabilities", "Advisory") + Advisory.objects.filter(created_by="vulnerabilities.importers.github.GitHubAPIImporter").update( + created_by=GitHubAPIImporterPipeline.pipeline_id + ) + + + +def reverse_update_created_by(apps, schema_editor): + from vulnerabilities.pipelines.github_importer import GitHubAPIImporterPipeline + + Advisory = apps.get_model("vulnerabilities", "Advisory") + Advisory.objects.filter(created_by=GitHubAPIImporterPipeline.pipeline_id).update( + created_by="vulnerabilities.importers.github.GitHubAPIImporter" + ) + + +class Migration(migrations.Migration): + + dependencies = [ + ("vulnerabilities", "0066_update_gitlab_advisory_created_by"), + ] + + operations = [ + migrations.RunPython(update_created_by, reverse_code=reverse_update_created_by), + ] diff --git a/vulnerabilities/migrations/0068_update_nvd_advisory_created_by.py b/vulnerabilities/migrations/0068_update_nvd_advisory_created_by.py new file mode 100644 index 000000000..2a91f55ee --- /dev/null +++ b/vulnerabilities/migrations/0068_update_nvd_advisory_created_by.py @@ -0,0 +1,38 @@ +# Generated by Django 4.2.15 on 2024-09-27 19:38 + +from django.db import migrations + +""" +Update the created_by field on Advisory from the old qualified_name +to the new pipeline_id. +""" + + +def update_created_by(apps, schema_editor): + from vulnerabilities.pipelines.nvd_importer import NVDImporterPipeline + + Advisory = apps.get_model("vulnerabilities", "Advisory") + Advisory.objects.filter(created_by="vulnerabilities.importers.nvd.NVDImporter").update( + created_by=NVDImporterPipeline.pipeline_id + ) + + + +def reverse_update_created_by(apps, schema_editor): + from vulnerabilities.pipelines.nvd_importer import NVDImporterPipeline + + Advisory = apps.get_model("vulnerabilities", "Advisory") + Advisory.objects.filter(created_by=NVDImporterPipeline.pipeline_id).update( + created_by="vulnerabilities.importers.nvd.NVDImporter" + ) + + +class Migration(migrations.Migration): + + dependencies = [ + ("vulnerabilities", "0067_update_github_advisory_created_by"), + ] + + operations = [ + migrations.RunPython(update_created_by, reverse_code=reverse_update_created_by), + ] diff --git a/vulnerabilities/migrations/0069_exploit_delete_kev.py b/vulnerabilities/migrations/0069_exploit_delete_kev.py new file mode 100644 index 000000000..5c06911eb --- /dev/null +++ b/vulnerabilities/migrations/0069_exploit_delete_kev.py @@ -0,0 +1,131 @@ +# Generated by Django 4.2.15 on 2024-09-21 15:37 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ("vulnerabilities", "0068_update_nvd_advisory_created_by"), + ] + + operations = [ + migrations.CreateModel( + name="Exploit", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ( + "date_added", + models.DateField( + blank=True, + help_text="The date the vulnerability was added to an exploit catalog.", + null=True, + ), + ), + ( + "description", + models.TextField( + blank=True, + help_text="Description of the vulnerability in an exploit catalog, often a refinement of the original CVE description", + null=True, + ), + ), + ( + "required_action", + models.TextField( + blank=True, + help_text="The required action to address the vulnerability, typically to apply vendor updates or apply vendor mitigations or to discontinue use.", + null=True, + ), + ), + ( + "due_date", + models.DateField( + blank=True, + help_text="The date the required action is due, which applies to all USA federal civilian executive branch (FCEB) agencies, but all organizations are strongly encouraged to execute the required action", + null=True, + ), + ), + ( + "notes", + models.TextField( + blank=True, + help_text="Additional notes and resources about the vulnerability, often a URL to vendor instructions.", + null=True, + ), + ), + ( + "known_ransomware_campaign_use", + models.BooleanField( + default=False, + help_text="Known' if this vulnerability is known to have been leveraged as part of a ransomware campaign; \n or 'Unknown' if there is no confirmation that the vulnerability has been utilized for ransomware.", + ), + ), + ( + "source_date_published", + models.DateField( + blank=True, + help_text="The date that the exploit was published or disclosed.", + null=True, + ), + ), + ( + "exploit_type", + models.TextField( + blank=True, + help_text="The type of the exploit as provided by the original upstream data source.", + null=True, + ), + ), + ( + "platform", + models.TextField( + blank=True, + help_text="The platform associated with the exploit as provided by the original upstream data source.", + null=True, + ), + ), + ( + "source_date_updated", + models.DateField( + blank=True, + help_text="The date the exploit was updated in the original upstream data source.", + null=True, + ), + ), + ( + "data_source", + models.TextField( + blank=True, + help_text="The source of the exploit information, such as CISA KEV, exploitdb, metaspoit, or others.", + null=True, + ), + ), + ( + "source_url", + models.URLField( + blank=True, + help_text="The URL to the exploit as provided in the original upstream data source.", + null=True, + ), + ), + ( + "vulnerability", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="exploits", + to="vulnerabilities.vulnerability", + ), + ), + ], + ), + migrations.DeleteModel( + name="Kev", + ), + ] diff --git a/vulnerabilities/migrations/0070_alter_advisory_created_by_and_more.py b/vulnerabilities/migrations/0070_alter_advisory_created_by_and_more.py new file mode 100644 index 000000000..41294f20a --- /dev/null +++ b/vulnerabilities/migrations/0070_alter_advisory_created_by_and_more.py @@ -0,0 +1,39 @@ +# Generated by Django 4.2.15 on 2024-10-07 12:28 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("vulnerabilities", "0069_exploit_delete_kev"), + ] + + operations = [ + migrations.AlterField( + model_name="advisory", + name="created_by", + field=models.CharField( + help_text="Fully qualified name of the importer prefixed with themodule name importing the advisory. Eg:vulnerabilities.pipeline.nginx_importer.NginxImporterPipeline", + max_length=100, + ), + ), + migrations.AlterField( + model_name="packagechangelog", + name="software_version", + field=models.CharField( + default="34.0.2", + help_text="Version of the software at the time of change", + max_length=100, + ), + ), + migrations.AlterField( + model_name="vulnerabilitychangelog", + name="software_version", + field=models.CharField( + default="34.0.2", + help_text="Version of the software at the time of change", + max_length=100, + ), + ), + ] diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index ae885ecd2..903443fa1 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -1103,7 +1103,7 @@ class Advisory(models.Model): max_length=100, help_text="Fully qualified name of the importer prefixed with the" "module name importing the advisory. Eg:" - "vulnerabilities.importers.nginx.NginxImporter", + "vulnerabilities.pipeline.nginx_importer.NginxImporterPipeline", ) url = models.URLField( blank=True, @@ -1391,49 +1391,90 @@ def log_fixing(cls, package, importer, source_url, related_vulnerability): ) -class Kev(models.Model): +class Exploit(models.Model): """ - Known Exploited Vulnerabilities + A vulnerability exploit is code used to + take advantage of a security flaw for unauthorized access or malicious activity. """ - vulnerability = models.OneToOneField( + vulnerability = models.ForeignKey( Vulnerability, + related_name="exploits", on_delete=models.CASCADE, - related_name="kev", ) date_added = models.DateField( - help_text="The date the vulnerability was added to the Known Exploited Vulnerabilities" - " (KEV) catalog in the format YYYY-MM-DD.", null=True, blank=True, + help_text="The date the vulnerability was added to an exploit catalog.", ) description = models.TextField( - help_text="Description of the vulnerability in the Known Exploited Vulnerabilities" - " (KEV) catalog, usually a refinement of the original CVE description" + null=True, + blank=True, + help_text="Description of the vulnerability in an exploit catalog, often a refinement of the original CVE description", ) required_action = models.TextField( + null=True, + blank=True, help_text="The required action to address the vulnerability, typically to " - "apply vendor updates or apply vendor mitigations or to discontinue use." + "apply vendor updates or apply vendor mitigations or to discontinue use.", ) due_date = models.DateField( - help_text="The date the required action is due in the format YYYY-MM-DD," - "which applies to all USA federal civilian executive branch (FCEB) agencies," - "but all organizations are strongly encouraged to execute the required action." + null=True, + blank=True, + help_text="The date the required action is due, which applies" + " to all USA federal civilian executive branch (FCEB) agencies, " + "but all organizations are strongly encouraged to execute the required action", ) - resources_and_notes = models.TextField( + notes = models.TextField( + null=True, + blank=True, help_text="Additional notes and resources about the vulnerability," - " often a URL to vendor instructions." + " often a URL to vendor instructions.", ) known_ransomware_campaign_use = models.BooleanField( default=False, - help_text="""Known if this vulnerability is known to have been leveraged as part of a ransomware campaign; - or 'Unknown' if CISA lacks confirmation that the vulnerability has been utilized for ransomware.""", + help_text="""Known' if this vulnerability is known to have been leveraged as part of a ransomware campaign; + or 'Unknown' if there is no confirmation that the vulnerability has been utilized for ransomware.""", + ) + + source_date_published = models.DateField( + null=True, blank=True, help_text="The date that the exploit was published or disclosed." + ) + + exploit_type = models.TextField( + null=True, + blank=True, + help_text="The type of the exploit as provided by the original upstream data source.", + ) + + platform = models.TextField( + null=True, + blank=True, + help_text="The platform associated with the exploit as provided by the original upstream data source.", + ) + + source_date_updated = models.DateField( + null=True, + blank=True, + help_text="The date the exploit was updated in the original upstream data source.", + ) + + data_source = models.TextField( + null=True, + blank=True, + help_text="The source of the exploit information, such as CISA KEV, exploitdb, metaspoit, or others.", + ) + + source_url = models.URLField( + null=True, + blank=True, + help_text="The URL to the exploit as provided in the original upstream data source.", ) @property diff --git a/vulnerabilities/pipelines/__init__.py b/vulnerabilities/pipelines/__init__.py index aa3d59d83..0d3589b67 100644 --- a/vulnerabilities/pipelines/__init__.py +++ b/vulnerabilities/pipelines/__init__.py @@ -89,7 +89,12 @@ def advisories_count(self) -> int: def collect_and_store_advisories(self): collected_advisory_count = 0 - progress = LoopProgress(total_iterations=self.advisories_count(), logger=self.log) + estimated_advisory_count = self.advisories_count() + + if estimated_advisory_count > 0: + self.log(f"Collecting {estimated_advisory_count:,d} advisories") + + progress = LoopProgress(total_iterations=estimated_advisory_count, logger=self.log) for advisory in progress.iter(self.collect_advisories()): if _obj := insert_advisory( advisory=advisory, diff --git a/vulnerabilities/pipelines/enhance_with_exploitdb.py b/vulnerabilities/pipelines/enhance_with_exploitdb.py new file mode 100644 index 000000000..54554f951 --- /dev/null +++ b/vulnerabilities/pipelines/enhance_with_exploitdb.py @@ -0,0 +1,158 @@ +import csv +import io +import logging +from traceback import format_exc as traceback_format_exc + +import requests +from aboutcode.pipeline import LoopProgress +from dateutil import parser as dateparser +from django.db import DataError + +from vulnerabilities.models import Alias +from vulnerabilities.models import Exploit +from vulnerabilities.models import VulnerabilityReference +from vulnerabilities.models import VulnerabilityRelatedReference +from vulnerabilities.pipelines import VulnerableCodePipeline + + +class ExploitDBImproverPipeline(VulnerableCodePipeline): + """ + ExploitDB Improver Pipeline: Fetch ExploitDB data, iterate over it to find the vulnerability with + the specified alias, and create or update the ref and ref-type accordingly. + """ + + pipeline_id = "enhance_with_exploitdb" + spdx_license_expression = "GPL-2.0" + + @classmethod + def steps(cls): + return ( + cls.fetch_exploits, + cls.add_exploit, + ) + + def fetch_exploits(self): + exploit_db_url = ( + "https://gitlab.com/exploit-database/exploitdb/-/raw/main/files_exploits.csv" + ) + self.log(f"Fetching {exploit_db_url}") + + try: + response = requests.get(exploit_db_url) + response.raise_for_status() + except requests.exceptions.HTTPError as http_err: + self.log( + f"Failed to fetch the Exploit-DB Exploits: {exploit_db_url} with error {http_err!r}:\n{traceback_format_exc()}", + level=logging.ERROR, + ) + raise + + self.exploit_data = io.StringIO(response.text) + + def add_exploit(self): + + csvreader = csv.DictReader(self.exploit_data) + + raw_data = list(csvreader) + fetched_exploit_count = len(raw_data) + + vulnerability_exploit_count = 0 + self.log(f"Enhancing the vulnerability with {fetched_exploit_count:,d} exploit records") + progress = LoopProgress(total_iterations=fetched_exploit_count, logger=self.log) + + for row in progress.iter(raw_data): + vulnerability_exploit_count += add_vulnerability_exploit(row, self.log) + + self.log( + f"Successfully added {vulnerability_exploit_count:,d} exploit-db vulnerability exploit" + ) + + +def add_vulnerability_exploit(row, logger): + vulnerabilities = set() + + aliases = row["codes"].split(";") if row["codes"] else [] + + if not aliases: + return 0 + + for raw_alias in aliases: + try: + if alias := Alias.objects.get(alias=raw_alias): + vulnerabilities.add(alias.vulnerability) + except Alias.DoesNotExist: + continue + + if not vulnerabilities: + logger(f"No vulnerability found for aliases {aliases}") + return 0 + + date_added = parse_date(row["date_added"]) + source_date_published = parse_date(row["date_published"]) + source_date_updated = parse_date(row["date_updated"]) + + for vulnerability in vulnerabilities: + add_exploit_references(row["codes"], row["source_url"], row["file"], vulnerability, logger) + try: + Exploit.objects.update_or_create( + vulnerability=vulnerability, + data_source="Exploit-DB", + defaults={ + "date_added": date_added, + "description": row["description"], + "known_ransomware_campaign_use": row["verified"], + "source_date_published": source_date_published, + "exploit_type": row["type"], + "platform": row["platform"], + "source_date_updated": source_date_updated, + "source_url": row["source_url"], + }, + ) + except DataError as e: + logger( + f"Failed to Create the Vulnerability Exploit-DB with error {e!r}:\n{traceback_format_exc()}", + level=logging.ERROR, + ) + return 1 + + +def add_exploit_references(ref_id, direct_url, path, vul, logger): + url_map = { + "file_url": f"https://gitlab.com/exploit-database/exploitdb/-/blob/main/{path}", + "direct_url": direct_url, + } + + for key, url in url_map.items(): + if url: + try: + ref, created = VulnerabilityReference.objects.update_or_create( + url=url, + defaults={ + "reference_id": ref_id, + "reference_type": VulnerabilityReference.EXPLOIT, + }, + ) + + if created: + VulnerabilityRelatedReference.objects.get_or_create( + vulnerability=vul, + reference=ref, + ) + + except DataError as e: + logger( + f"Failed to Create the Vulnerability Reference For Exploit-DB with error {e!r}:\n{traceback_format_exc()}", + level=logging.ERROR, + ) + + +def parse_date(date_string): + if date_string: + try: + date_obj = dateparser.parse(date_string).date() + return date_obj.strftime("%Y-%m-%d") + except (ValueError, TypeError, Exception) as e: + logging.error( + f"Error while parsing ExploitDB date '{date_string}' with error {e!r}:\n{traceback_format_exc()}" + ) + return diff --git a/vulnerabilities/pipelines/enhance_with_kev.py b/vulnerabilities/pipelines/enhance_with_kev.py new file mode 100644 index 000000000..6372bd3b0 --- /dev/null +++ b/vulnerabilities/pipelines/enhance_with_kev.py @@ -0,0 +1,89 @@ +import logging +from traceback import format_exc as traceback_format_exc + +import requests +from aboutcode.pipeline import LoopProgress + +from vulnerabilities.models import Alias +from vulnerabilities.models import Exploit +from vulnerabilities.pipelines import VulnerableCodePipeline + + +class VulnerabilityKevPipeline(VulnerableCodePipeline): + """ + Known Exploited Vulnerabilities Pipeline: Retrieve KEV data, iterate through it to identify vulnerabilities + by their associated aliases, and create or update the corresponding Exploit instances. + """ + + pipeline_id = "enhance_with_kev" + license_expression = None + + @classmethod + def steps(cls): + return ( + cls.fetch_exploits, + cls.add_exploits, + ) + + def fetch_exploits(self): + kev_url = ( + "https://www.cisa.gov/sites/default/files/feeds/known_exploited_vulnerabilities.json" + ) + self.log(f"Fetching {kev_url}") + + try: + response = requests.get(kev_url) + response.raise_for_status() + except requests.exceptions.HTTPError as http_err: + self.log( + f"Failed to fetch the KEV Exploits: {kev_url} with error {http_err!r}:\n{traceback_format_exc()}", + level=logging.ERROR, + ) + raise + self.kev_data = response.json() + + def add_exploits(self): + fetched_exploit_count = self.kev_data.get("count") + self.log(f"Enhancing the vulnerability with {fetched_exploit_count:,d} exploit records") + + vulnerability_exploit_count = 0 + progress = LoopProgress(total_iterations=fetched_exploit_count, logger=self.log) + + for record in progress.iter(self.kev_data.get("vulnerabilities", [])): + vulnerability_exploit_count += add_vulnerability_exploit( + kev_vul=record, + logger=self.log, + ) + + self.log(f"Successfully added {vulnerability_exploit_count:,d} kev exploit") + + +def add_vulnerability_exploit(kev_vul, logger): + cve_id = kev_vul.get("cveID") + + if not cve_id: + return 0 + + vulnerability = None + try: + if alias := Alias.objects.get(alias=cve_id): + vulnerability = alias.vulnerability + except Alias.DoesNotExist: + logger(f"No vulnerability found for aliases {cve_id}") + return 0 + + Exploit.objects.update_or_create( + vulnerability=vulnerability, + data_source="KEV", + defaults={ + "description": kev_vul["shortDescription"], + "date_added": kev_vul["dateAdded"], + "required_action": kev_vul["requiredAction"], + "due_date": kev_vul["dueDate"], + "notes": kev_vul["notes"], + "known_ransomware_campaign_use": True + if kev_vul["knownRansomwareCampaignUse"] == "Known" + else False, + }, + ) + return 1 diff --git a/vulnerabilities/pipelines/enhance_with_metasploit.py b/vulnerabilities/pipelines/enhance_with_metasploit.py new file mode 100644 index 000000000..72897abd0 --- /dev/null +++ b/vulnerabilities/pipelines/enhance_with_metasploit.py @@ -0,0 +1,111 @@ +import logging +from traceback import format_exc as traceback_format_exc + +import requests +import saneyaml +from aboutcode.pipeline import LoopProgress +from dateutil import parser as dateparser + +from vulnerabilities.models import Alias +from vulnerabilities.models import Exploit +from vulnerabilities.pipelines import VulnerableCodePipeline + + +class MetasploitImproverPipeline(VulnerableCodePipeline): + """ + Metasploit Exploits Pipeline: Retrieve Metasploit data, iterate through it to identify vulnerabilities + by their associated aliases, and create or update the corresponding Exploit instances. + """ + + pipeline_id = "enhance_with_metasploit" + spdx_license_expression = "BSD-3-clause" + + @classmethod + def steps(cls): + return ( + cls.fetch_exploits, + cls.add_vulnerability_exploits, + ) + + def fetch_exploits(self): + url = "https://raw.githubusercontent.com/rapid7/metasploit-framework/master/db/modules_metadata_base.json" + self.log(f"Fetching {url}") + try: + response = requests.get(url) + response.raise_for_status() + except requests.exceptions.HTTPError as http_err: + self.log( + f"Failed to fetch the Metasploit Exploits: {url} with error {http_err!r}:\n{traceback_format_exc()}", + level=logging.ERROR, + ) + raise + + self.metasploit_data = response.json() + + def add_vulnerability_exploits(self): + fetched_exploit_count = len(self.metasploit_data) + self.log(f"Enhancing the vulnerability with {fetched_exploit_count:,d} exploit records") + + vulnerability_exploit_count = 0 + progress = LoopProgress(total_iterations=fetched_exploit_count, logger=self.log) + for _, record in progress.iter(self.metasploit_data.items()): + vulnerability_exploit_count += add_vulnerability_exploit( + record=record, + logger=self.log, + ) + self.log(f"Successfully added {vulnerability_exploit_count:,d} vulnerability exploit") + + +def add_vulnerability_exploit(record, logger): + vulnerabilities = set() + references = record.get("references", []) + + interesting_references = [ + ref for ref in references if not ref.startswith("OSVDB") and not ref.startswith("URL-") + ] + + if not interesting_references: + return 0 + + for ref in interesting_references: + try: + if alias := Alias.objects.get(alias=ref): + vulnerabilities.add(alias.vulnerability) + except Alias.DoesNotExist: + continue + + if not vulnerabilities: + logger(f"No vulnerability found for aliases {interesting_references}") + return 0 + + description = record.get("description", "") + notes = record.get("notes", {}) + platform = record.get("platform") + + source_url = "" + if path := record.get("path"): + source_url = f"https://github.com/rapid7/metasploit-framework/tree/master{path}" + source_date_published = None + + if disclosure_date := record.get("disclosure_date"): + try: + source_date_published = dateparser.parse(disclosure_date).date() + except ValueError as e: + logger( + f"Error while parsing date {disclosure_date} with error {e!r}:\n{traceback_format_exc()}", + level=logging.ERROR, + ) + + for vulnerability in vulnerabilities: + Exploit.objects.update_or_create( + vulnerability=vulnerability, + data_source="Metasploit", + defaults={ + "description": description, + "notes": saneyaml.dump(notes), + "source_date_published": source_date_published, + "platform": platform, + "source_url": source_url, + }, + ) + return 1 diff --git a/vulnerabilities/importers/github.py b/vulnerabilities/pipelines/github_importer.py similarity index 59% rename from vulnerabilities/importers/github.py rename to vulnerabilities/pipelines/github_importer.py index c12c43044..4603b939a 100644 --- a/vulnerabilities/importers/github.py +++ b/vulnerabilities/pipelines/github_importer.py @@ -8,7 +8,10 @@ # import logging +from traceback import format_exc as traceback_format_exc +from typing import Callable from typing import Iterable +from typing import List from typing import Optional from cwe2.database import Database @@ -21,85 +24,105 @@ from vulnerabilities import utils from vulnerabilities.importer import AdvisoryData from vulnerabilities.importer import AffectedPackage -from vulnerabilities.importer import Importer from vulnerabilities.importer import Reference from vulnerabilities.importer import VulnerabilitySeverity +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline from vulnerabilities.utils import dedupe from vulnerabilities.utils import get_cwe_id from vulnerabilities.utils import get_item -logger = logging.getLogger(__name__) - -PACKAGE_TYPE_BY_GITHUB_ECOSYSTEM = { - "MAVEN": "maven", - "NUGET": "nuget", - "COMPOSER": "composer", - "PIP": "pypi", - "RUBYGEMS": "gem", - "NPM": "npm", - "RUST": "cargo", - # "GO": "golang", -} - -GITHUB_ECOSYSTEM_BY_PACKAGE_TYPE = { - value: key for (key, value) in PACKAGE_TYPE_BY_GITHUB_ECOSYSTEM.items() -} - -# TODO: We will try to gather more info from GH API -# Check https://github.com/nexB/vulnerablecode/issues/1039#issuecomment-1366458885 -# Check https://github.com/nexB/vulnerablecode/issues/645 -# set of all possible values of first '%s' = {'MAVEN','COMPOSER', 'NUGET', 'RUBYGEMS', 'PYPI', 'NPM', 'RUST'} -# second '%s' is interesting, it will have the value '' for the first request, -GRAPHQL_QUERY_TEMPLATE = """ -query{ - securityVulnerabilities(first: 100, ecosystem: %s, %s) { - edges { - node { - advisory { - identifiers { - type - value - } - summary - references { - url - } - severity - cwes(first: 10){ - nodes { - cweId + +class GitHubAPIImporterPipeline(VulnerableCodeBaseImporterPipeline): + """Collect GitHub advisories.""" + + pipeline_id = "github_importer" + + spdx_license_expression = "CC-BY-4.0" + license_url = "https://github.com/github/advisory-database/blob/main/LICENSE.md" + importer_name = "GHSA Importer" + + @classmethod + def steps(cls): + return ( + cls.collect_and_store_advisories, + cls.import_new_advisories, + ) + + package_type_by_github_ecosystem = { + "MAVEN": "maven", + "NUGET": "nuget", + "COMPOSER": "composer", + "PIP": "pypi", + "RUBYGEMS": "gem", + "NPM": "npm", + "RUST": "cargo", + # "GO": "golang", + } + + def advisories_count(self): + advisory_query = """ + query{ + securityVulnerabilities(first: 0, ecosystem: %s) { + totalCount + } + } + """ + advisory_counts = 0 + for ecosystem in self.package_type_by_github_ecosystem.keys(): + graphql_query = {"query": advisory_query % (ecosystem)} + response = utils.fetch_github_graphql_query(graphql_query) + advisory_counts += get_item(response, "data", "securityVulnerabilities", "totalCount") + return advisory_counts + + def collect_advisories(self) -> Iterable[AdvisoryData]: + + # TODO: We will try to gather more info from GH API + # Check https://github.com/nexB/vulnerablecode/issues/1039#issuecomment-1366458885 + # Check https://github.com/nexB/vulnerablecode/issues/645 + # set of all possible values of first '%s' = {'MAVEN','COMPOSER', 'NUGET', 'RUBYGEMS', 'PYPI', 'NPM', 'RUST'} + # second '%s' is interesting, it will have the value '' for the first request, + advisory_query = """ + query{ + securityVulnerabilities(first: 100, ecosystem: %s, %s) { + edges { + node { + advisory { + identifiers { + type + value + } + summary + references { + url + } + severity + cwes(first: 10){ + nodes { + cweId + } + } + publishedAt + } + firstPatchedVersion{ + identifier } + package { + name + } + vulnerableVersionRange } - publishedAt - } - firstPatchedVersion{ - identifier } - package { - name + pageInfo { + hasNextPage + endCursor } - vulnerableVersionRange } } - pageInfo { - hasNextPage - endCursor - } - } -} -""" - - -class GitHubAPIImporter(Importer): - spdx_license_expression = "CC-BY-4.0" - importer_name = "GHSA Importer" - license_url = "https://github.com/github/advisory-database/blob/main/LICENSE.md" - - def advisory_data(self) -> Iterable[AdvisoryData]: - for ecosystem, package_type in PACKAGE_TYPE_BY_GITHUB_ECOSYSTEM.items(): + """ + for ecosystem, package_type in self.package_type_by_github_ecosystem.items(): end_cursor_exp = "" while True: - graphql_query = {"query": GRAPHQL_QUERY_TEMPLATE % (ecosystem, end_cursor_exp)} + graphql_query = {"query": advisory_query % (ecosystem, end_cursor_exp)} response = utils.fetch_github_graphql_query(graphql_query) page_info = get_item(response, "data", "securityVulnerabilities", "pageInfo") @@ -114,7 +137,7 @@ def advisory_data(self) -> Iterable[AdvisoryData]: break -def get_purl(pkg_type: str, github_name: str) -> Optional[PackageURL]: +def get_purl(pkg_type: str, github_name: str, logger: Callable = None) -> Optional[PackageURL]: """ Return a PackageURL by splitting the `github_name` using the `pkg_type` convention. Return None and log an error if we can not split or it is an @@ -129,7 +152,8 @@ def get_purl(pkg_type: str, github_name: str) -> Optional[PackageURL]: """ if pkg_type == "maven": if ":" not in github_name: - logger.error(f"get_purl: Invalid maven package name {github_name}") + if logger: + logger(f"get_purl: Invalid maven package name {github_name}", level=logging.ERROR) return ns, _, name = github_name.partition(":") return PackageURL(type=pkg_type, namespace=ns, name=name) @@ -143,18 +167,23 @@ def get_purl(pkg_type: str, github_name: str) -> Optional[PackageURL]: if pkg_type in ("nuget", "pypi", "gem", "golang", "npm", "cargo"): return PackageURL(type=pkg_type, name=github_name) - logger.error(f"get_purl: Unknown package type {pkg_type}") + if logger: + logger(f"get_purl: Unknown package type {pkg_type}", level=logging.ERROR) -def process_response(resp: dict, package_type: str) -> Iterable[AdvisoryData]: +def process_response( + resp: dict, package_type: str, logger: Callable = None +) -> Iterable[AdvisoryData]: """ Yield `AdvisoryData` by taking `resp` and `ecosystem` as input """ vulnerabilities = get_item(resp, "data", "securityVulnerabilities", "edges") or [] if not vulnerabilities: - logger.error( - f"No vulnerabilities found for package_type: {package_type!r} in response: {resp!r}" - ) + if logger: + logger( + f"No vulnerabilities found for package_type: {package_type!r} in response: {resp!r}", + level=logging.ERROR, + ) return for vulnerability in vulnerabilities: @@ -162,12 +191,14 @@ def process_response(resp: dict, package_type: str) -> Iterable[AdvisoryData]: affected_packages = [] github_advisory = get_item(vulnerability, "node") if not github_advisory: - logger.error(f"No node found in {vulnerability!r}") + if logger: + logger(f"No node found in {vulnerability!r}", level=logging.ERROR) continue advisory = get_item(github_advisory, "advisory") if not advisory: - logger.error(f"No advisory found in {github_advisory!r}") + if logger: + logger(f"No advisory found in {github_advisory!r}", level=logging.ERROR) continue summary = get_item(advisory, "summary") or "" @@ -183,7 +214,7 @@ def process_response(resp: dict, package_type: str) -> Iterable[AdvisoryData]: name = get_item(github_advisory, "package", "name") if name: - purl = get_purl(pkg_type=package_type, github_name=name) + purl = get_purl(pkg_type=package_type, github_name=name, logger=logger) if purl: affected_range = get_item(github_advisory, "vulnerableVersionRange") fixed_version = get_item(github_advisory, "firstPatchedVersion", "identifier") @@ -193,7 +224,11 @@ def process_response(resp: dict, package_type: str) -> Iterable[AdvisoryData]: package_type, affected_range ) except Exception as e: - logger.error(f"Could not parse affected range {affected_range!r} {e!r}") + if logger: + logger( + f"Could not parse affected range {affected_range!r} {e!r} \n {traceback_format_exc()}", + level=logging.ERROR, + ) affected_range = None if fixed_version: try: @@ -201,7 +236,11 @@ def process_response(resp: dict, package_type: str) -> Iterable[AdvisoryData]: fixed_version ) except Exception as e: - logger.error(f"Invalid fixed version {fixed_version!r} {e!r}") + if logger: + logger( + f"Invalid fixed version {fixed_version!r} {e!r} \n {traceback_format_exc()}", + level=logging.ERROR, + ) fixed_version = None if affected_range or fixed_version: affected_packages.append( @@ -236,9 +275,13 @@ def process_response(resp: dict, package_type: str) -> Iterable[AdvisoryData]: elif identifier_type == "CVE": pass else: - logger.error(f"Unknown identifier type {identifier_type!r} and value {value!r}") + if logger: + logger( + f"Unknown identifier type {identifier_type!r} and value {value!r}", + level=logging.ERROR, + ) - weaknesses = get_cwes_from_github_advisory(advisory) + weaknesses = get_cwes_from_github_advisory(advisory, logger) yield AdvisoryData( aliases=sorted(dedupe(aliases)), @@ -251,7 +294,7 @@ def process_response(resp: dict, package_type: str) -> Iterable[AdvisoryData]: ) -def get_cwes_from_github_advisory(advisory) -> [int]: +def get_cwes_from_github_advisory(advisory, logger=None) -> List[int]: """ Return the cwe-id list from advisory ex: [ 522 ] by extracting the cwe_list from advisory ex: [{'cweId': 'CWE-522'}] @@ -267,6 +310,7 @@ def get_cwes_from_github_advisory(advisory) -> [int]: try: db.get(cwe_id) weaknesses.append(cwe_id) - except Exception: - logger.error("Invalid CWE id") + except Exception as e: + if logger: + logger(f"Invalid CWE id {e!r} \n {traceback_format_exc()}", level=logging.ERROR) return weaknesses diff --git a/vulnerabilities/importers/gitlab.py b/vulnerabilities/pipelines/gitlab_importer.py similarity index 67% rename from vulnerabilities/importers/gitlab.py rename to vulnerabilities/pipelines/gitlab_importer.py index cd42b24ed..0b76a31f2 100644 --- a/vulnerabilities/importers/gitlab.py +++ b/vulnerabilities/pipelines/gitlab_importer.py @@ -12,11 +12,12 @@ from pathlib import Path from typing import Iterable from typing import List -from typing import Optional +from typing import Tuple import pytz import saneyaml from dateutil import parser as dateparser +from fetchcode.vcs import fetch_via_vcs from packageurl import PackageURL from univers.version_range import RANGE_CLASS_BY_SCHEMES from univers.version_range import VersionRange @@ -25,58 +26,88 @@ from vulnerabilities.importer import AdvisoryData from vulnerabilities.importer import AffectedPackage -from vulnerabilities.importer import Importer from vulnerabilities.importer import Reference +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline from vulnerabilities.utils import build_description from vulnerabilities.utils import get_advisory_url from vulnerabilities.utils import get_cwe_id -logger = logging.getLogger(__name__) -PURL_TYPE_BY_GITLAB_SCHEME = { - "conan": "conan", - "gem": "gem", - # Entering issue to parse go package names https://github.com/nexB/vulnerablecode/issues/742 - # "go": "golang", - "maven": "maven", - "npm": "npm", - "nuget": "nuget", - "packagist": "composer", - "pypi": "pypi", -} +class GitLabImporterPipeline(VulnerableCodeBaseImporterPipeline): + """Collect advisory from GitLab Advisory Database (Open Source Edition).""" -GITLAB_SCHEME_BY_PURL_TYPE = {v: k for k, v in PURL_TYPE_BY_GITLAB_SCHEME.items()} + pipeline_id = "gitlab_importer" - -class GitLabAPIImporter(Importer): spdx_license_expression = "MIT" license_url = "https://gitlab.com/gitlab-org/advisories-community/-/blob/main/LICENSE" importer_name = "GitLab Importer" repo_url = "git+https://gitlab.com/gitlab-org/advisories-community/" - def advisory_data(self, _keep_clone=False) -> Iterable[AdvisoryData]: - try: - self.clone(repo_url=self.repo_url) - base_path = Path(self.vcs_response.dest_dir) + @classmethod + def steps(cls): + return ( + cls.clone, + cls.collect_and_store_advisories, + cls.import_new_advisories, + cls.clean_downloads, + ) - for file_path in base_path.glob("**/*.yml"): - gitlab_type, package_slug, vuln_id = parse_advisory_path( - base_path=base_path, - file_path=file_path, - ) + purl_type_by_gitlab_scheme = { + "conan": "conan", + "gem": "gem", + # Entering issue to parse go package names https://github.com/nexB/vulnerablecode/issues/742 + # "go": "golang", + "maven": "maven", + "npm": "npm", + "nuget": "nuget", + "packagist": "composer", + "pypi": "pypi", + } + + gitlab_scheme_by_purl_type = {v: k for k, v in purl_type_by_gitlab_scheme.items()} + + def clone(self): + self.log(f"Cloning `{self.repo_url}`") + self.vcs_response = fetch_via_vcs(self.repo_url) + + def advisories_count(self): + root = Path(self.vcs_response.dest_dir) + return sum(1 for _ in root.rglob("*.yml")) + + def collect_advisories(self) -> Iterable[AdvisoryData]: + base_path = Path(self.vcs_response.dest_dir) + + for file_path in base_path.rglob("*.yml"): + if file_path.parent == base_path: + continue + + gitlab_type, _, _ = parse_advisory_path( + base_path=base_path, + file_path=file_path, + ) - if gitlab_type in PURL_TYPE_BY_GITLAB_SCHEME: - yield parse_gitlab_advisory(file=file_path, base_path=base_path) + if gitlab_type not in self.purl_type_by_gitlab_scheme: + # self.log( + # f"Unknown package type {gitlab_type!r} in {file_path!r}", + # level=logging.ERROR, + # ) + continue + + yield parse_gitlab_advisory( + file=file_path, + base_path=base_path, + gitlab_scheme_by_purl_type=self.gitlab_scheme_by_purl_type, + purl_type_by_gitlab_scheme=self.purl_type_by_gitlab_scheme, + logger=self.log, + ) - else: - logger.error(f"Unknow package type {gitlab_type!r} in {file_path!r}") - continue - finally: - if self.vcs_response and not _keep_clone: - self.vcs_response.delete() + def clean_downloads(self): + if self.vcs_response: + self.log(f"Removing cloned repository") + self.vcs_response.delete() -def parse_advisory_path(base_path: Path, file_path: Path) -> Optional[AdvisoryData]: +def parse_advisory_path(base_path: Path, file_path: Path) -> Tuple[str, str, str]: """ Parse a gitlab advisory file and return a 3-tuple of: (gitlab_type, package_slug, vulnerability_id) @@ -96,21 +127,21 @@ def parse_advisory_path(base_path: Path, file_path: Path) -> Optional[AdvisoryDa >>> parse_advisory_path(base_path=base_path, file_path=file_path) ('npm', '@express/beego/beego/v2', 'CVE-2021-43831') """ - relative_path_segments = str(file_path.relative_to(base_path)).strip("/").split("/") + relative_path_segments = file_path.relative_to(base_path).parts gitlab_type = relative_path_segments[0] - vuln_id = relative_path_segments[-1].replace(".yml", "") + vuln_id = file_path.stem package_slug = "/".join(relative_path_segments[1:-1]) return gitlab_type, package_slug, vuln_id -def get_purl(package_slug): +def get_purl(package_slug, purl_type_by_gitlab_scheme, logger): """ Return a PackageURL object from a package slug """ parts = [p for p in package_slug.strip("/").split("/") if p] gitlab_scheme = parts[0] - purl_type = PURL_TYPE_BY_GITLAB_SCHEME[gitlab_scheme] + purl_type = purl_type_by_gitlab_scheme[gitlab_scheme] if gitlab_scheme == "go": name = "/".join(parts[1:]) return PackageURL(type=purl_type, namespace=None, name=name) @@ -125,7 +156,7 @@ def get_purl(package_slug): name = parts[-1] namespace = "/".join(parts[1:-1]) return PackageURL(type=purl_type, namespace=namespace, name=name) - logger.error(f"get_purl: package_slug can not be parsed: {package_slug!r}") + logger(f"get_purl: package_slug can not be parsed: {package_slug!r}", level=logging.ERROR) return @@ -140,7 +171,7 @@ def extract_affected_packages( In case of gitlab advisory data we get a list of fixed_versions and a affected_version_range. Since we can not determine which package fixes which range. We store the all the fixed_versions with the same affected_version_range in the advisory. - Later the advisory data is used to be infered in the GitLabBasicImprover. + Later the advisory data is used to be inferred in the GitLabBasicImprover. """ for fixed_version in fixed_versions: yield AffectedPackage( @@ -150,7 +181,9 @@ def extract_affected_packages( ) -def parse_gitlab_advisory(file, base_path): +def parse_gitlab_advisory( + file, base_path, gitlab_scheme_by_purl_type, purl_type_by_gitlab_scheme, logger +): """ Parse a Gitlab advisory file and return an AdvisoryData or None. These files are YAML. There is a JSON schema documented at @@ -177,8 +210,9 @@ def parse_gitlab_advisory(file, base_path): with open(file) as f: gitlab_advisory = saneyaml.load(f) if not isinstance(gitlab_advisory, dict): - logger.error( - f"parse_gitlab_advisory: unknown gitlab advisory format in {file!r} with data: {gitlab_advisory!r}" + logger( + f"parse_gitlab_advisory: unknown gitlab advisory format in {file!r} with data: {gitlab_advisory!r}", + level=logging.ERROR, ) return @@ -199,9 +233,15 @@ def parse_gitlab_advisory(file, base_path): base_path=base_path, url="https://gitlab.com/gitlab-org/advisories-community/-/blob/main/", ) - purl: PackageURL = get_purl(package_slug=package_slug) + purl: PackageURL = get_purl( + package_slug=package_slug, + purl_type_by_gitlab_scheme=purl_type_by_gitlab_scheme, + logger=logger, + ) if not purl: - logger.error(f"parse_yaml_file: purl is not valid: {file!r} {package_slug!r}") + logger( + f"parse_yaml_file: purl is not valid: {file!r} {package_slug!r}", level=logging.ERROR + ) return AdvisoryData( aliases=aliases, summary=summary, @@ -214,7 +254,7 @@ def parse_gitlab_advisory(file, base_path): affected_range = gitlab_advisory.get("affected_range") gitlab_native_schemes = set(["pypi", "gem", "npm", "go", "packagist", "conan"]) vrc: VersionRange = RANGE_CLASS_BY_SCHEMES[purl.type] - gitlab_scheme = GITLAB_SCHEME_BY_PURL_TYPE[purl.type] + gitlab_scheme = gitlab_scheme_by_purl_type[purl.type] try: if affected_range: if gitlab_scheme in gitlab_native_schemes: @@ -224,8 +264,9 @@ def parse_gitlab_advisory(file, base_path): else: affected_version_range = vrc.from_native(affected_range) except Exception as e: - logger.error( - f"parse_yaml_file: affected_range is not parsable: {affected_range!r} type:{purl.type!r} error: {e!r}\n {traceback.format_exc()}" + logger( + f"parse_yaml_file: affected_range is not parsable: {affected_range!r} for: {purl!s} error: {e!r}\n {traceback.format_exc()}", + level=logging.ERROR, ) parsed_fixed_versions = [] @@ -234,8 +275,9 @@ def parse_gitlab_advisory(file, base_path): fixed_version = vrc.version_class(fixed_version) parsed_fixed_versions.append(fixed_version) except Exception as e: - logger.error( - f"parse_yaml_file: fixed_version is not parsable`: {fixed_version!r} error: {e!r}\n {traceback.format_exc()}" + logger( + f"parse_yaml_file: fixed_version is not parsable`: {fixed_version!r} error: {e!r}\n {traceback.format_exc()}", + level=logging.ERROR, ) if parsed_fixed_versions: diff --git a/vulnerabilities/importers/nginx.py b/vulnerabilities/pipelines/nginx_importer.py similarity index 77% rename from vulnerabilities/importers/nginx.py rename to vulnerabilities/pipelines/nginx_importer.py index 4fe0ca6ae..c5e017033 100644 --- a/vulnerabilities/importers/nginx.py +++ b/vulnerabilities/pipelines/nginx_importer.py @@ -3,58 +3,62 @@ # VulnerableCode is a trademark of nexB Inc. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/vulnerablecode for support or download. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # -import logging from typing import Iterable -from typing import List from typing import NamedTuple import requests from bs4 import BeautifulSoup -from django.db.models.query import QuerySet from packageurl import PackageURL from univers.version_range import NginxVersionRange from univers.versions import NginxVersion from vulnerabilities.importer import AdvisoryData from vulnerabilities.importer import AffectedPackage -from vulnerabilities.importer import Importer from vulnerabilities.importer import Reference from vulnerabilities.importer import VulnerabilitySeverity +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline from vulnerabilities.severity_systems import GENERIC -logger = logging.getLogger(__name__) +class NginxImporterPipeline(VulnerableCodeBaseImporterPipeline): + """Collect Nginx security advisories.""" -class NginxImporter(Importer): - - url = "https://nginx.org/en/security_advisories.html" + pipeline_id = "nginx_importer" spdx_license_expression = "BSD-2-Clause" license_url = "https://nginx.org/LICENSE" + url = "https://nginx.org/en/security_advisories.html" importer_name = "Nginx Importer" - def advisory_data(self) -> Iterable[AdvisoryData]: - text = self.fetch() - yield from advisory_data_from_text(text) + @classmethod + def steps(cls): + return ( + cls.fetch, + cls.collect_and_store_advisories, + cls.import_new_advisories, + ) def fetch(self): - return requests.get(self.url).content + self.log(f"Fetch `{self.url}`") + self.advisory_data = requests.get(self.url).text + def advisories_count(self): + return self.advisory_data.count("
") -def advisory_data_from_text(text): - """ - Yield AdvisoryData from the ``text`` of the nginx security advisories HTML - web page. - """ - soup = BeautifulSoup(text, features="lxml") - vuln_list = soup.select("li p") - for vuln_info in vuln_list: - ngnix_adv = parse_advisory_data_from_paragraph(vuln_info) - yield to_advisory_data(ngnix_adv) + def collect_advisories(self) -> Iterable[AdvisoryData]: + """ + Yield AdvisoryData from nginx security advisories HTML + web page. + """ + soup = BeautifulSoup(self.advisory_data, features="lxml") + vulnerability_list = soup.select("li p") + for vulnerability_info in vulnerability_list: + ngnix_advisory = parse_advisory_data_from_paragraph(vulnerability_info) + yield to_advisory_data(ngnix_advisory) class NginxAdvisory(NamedTuple): @@ -69,7 +73,7 @@ def to_dict(self): return self._asdict() -def to_advisory_data(ngnx_adv: NginxAdvisory) -> AdvisoryData: +def to_advisory_data(nginx_adv: NginxAdvisory) -> AdvisoryData: """ Return AdvisoryData from an NginxAdvisory tuple. """ @@ -77,7 +81,7 @@ def to_advisory_data(ngnx_adv: NginxAdvisory) -> AdvisoryData: package_type = "nginx" qualifiers = {} - _, _, affected_version_range = ngnx_adv.vulnerable.partition(":") + _, _, affected_version_range = nginx_adv.vulnerable.partition(":") if "nginx/Windows" in affected_version_range: qualifiers["os"] = "windows" affected_version_range = affected_version_range.replace("nginx/Windows", "") @@ -87,7 +91,7 @@ def to_advisory_data(ngnx_adv: NginxAdvisory) -> AdvisoryData: affected_version_range = NginxVersionRange.from_native(affected_version_range) affected_packages = [] - _, _, fixed_versions = ngnx_adv.not_vulnerable.partition(":") + _, _, fixed_versions = nginx_adv.not_vulnerable.partition(":") for fixed_version in fixed_versions.split(","): fixed_version = fixed_version.rstrip("+") @@ -112,17 +116,17 @@ def to_advisory_data(ngnx_adv: NginxAdvisory) -> AdvisoryData: ) return AdvisoryData( - aliases=ngnx_adv.aliases, - summary=ngnx_adv.summary, + aliases=nginx_adv.aliases, + summary=nginx_adv.summary, affected_packages=affected_packages, - references=ngnx_adv.references, + references=nginx_adv.references, url="https://nginx.org/en/security_advisories.html", ) -def parse_advisory_data_from_paragraph(vuln_info): +def parse_advisory_data_from_paragraph(vulnerability_info): """ - Return an NginxAdvisory from a ``vuln_info`` bs4 paragraph. + Return an NginxAdvisory from a ``vulnerability_info`` bs4 paragraph. An advisory paragraph, without html markup, looks like this: @@ -145,7 +149,7 @@ def parse_advisory_data_from_paragraph(vuln_info): # we iterate on the children to accumulate values in variables # FIXME: using an explicit xpath-like query could be simpler - for child in vuln_info.children: + for child in vulnerability_info.children: if is_first: summary = child is_first = False diff --git a/vulnerabilities/importers/nvd.py b/vulnerabilities/pipelines/nvd_importer.py similarity index 88% rename from vulnerabilities/importers/nvd.py rename to vulnerabilities/pipelines/nvd_importer.py index 1a6048dfd..38800eb62 100644 --- a/vulnerabilities/importers/nvd.py +++ b/vulnerabilities/pipelines/nvd_importer.py @@ -9,7 +9,10 @@ import gzip import json +import logging from datetime import date +from traceback import format_exc as traceback_format_exc +from typing import Iterable import attr import requests @@ -17,14 +20,18 @@ from vulnerabilities import severity_systems from vulnerabilities.importer import AdvisoryData -from vulnerabilities.importer import Importer from vulnerabilities.importer import Reference from vulnerabilities.importer import VulnerabilitySeverity +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline from vulnerabilities.utils import get_cwe_id from vulnerabilities.utils import get_item -class NVDImporter(Importer): +class NVDImporterPipeline(VulnerableCodeBaseImporterPipeline): + """Collect advisories from NVD.""" + + pipeline_id = "nvd_importer" + # See https://github.com/nexB/vulnerablecode/issues/665 for follow up spdx_license_expression = ( "LicenseRef-scancode-us-govt-public-domain AND LicenseRef-scancode-cve-tou" @@ -61,19 +68,46 @@ class NVDImporter(Importer): """ importer_name = "NVD Importer" - def advisory_data(self): - for _year, cve_data in fetch_cve_data_1_1(): + @classmethod + def steps(cls): + return ( + cls.collect_and_store_advisories, + cls.import_new_advisories, + ) + + def advisories_count(self): + url = "https://services.nvd.nist.gov/rest/json/cves/2.0?resultsPerPage=1" + + advisory_count = 0 + try: + response = requests.get(url) + response.raise_for_status() + data = response.json() + except requests.HTTPError as http_err: + self.log( + f"HTTP error occurred: {http_err} \n {traceback_format_exc()}", + level=logging.ERROR, + ) + return advisory_count + + advisory_count = data.get("totalResults", 0) + return advisory_count + + def collect_advisories(self) -> Iterable[AdvisoryData]: + for _year, cve_data in fetch_cve_data_1_1(logger=self.log): yield from to_advisories(cve_data=cve_data) # Isolating network calls for simplicity of testing -def fetch(url): +def fetch(url, logger=None): + if logger: + logger(f"Fetching `{url}`") gz_file = requests.get(url) data = gzip.decompress(gz_file.content) return json.loads(data) -def fetch_cve_data_1_1(starting_year=2002): +def fetch_cve_data_1_1(starting_year=2002, logger=None): """ Yield tuples of (year, lists of CVE mappings) from the NVD, one for each year since ``starting_year`` defaulting to 2002. @@ -82,7 +116,7 @@ def fetch_cve_data_1_1(starting_year=2002): # NVD json feeds start from 2002. for year in range(starting_year, current_year + 1): download_url = f"https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-{year}.json.gz" - yield year, fetch(url=download_url) + yield year, fetch(url=download_url, logger=logger) def to_advisories(cve_data): diff --git a/vulnerabilities/pipelines/pypa_importer.py b/vulnerabilities/pipelines/pypa_importer.py index 7a598de4d..29a1283fe 100644 --- a/vulnerabilities/pipelines/pypa_importer.py +++ b/vulnerabilities/pipelines/pypa_importer.py @@ -17,8 +17,6 @@ from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline from vulnerabilities.utils import get_advisory_url -module_logger = logging.getLogger(__name__) - class PyPaImporterPipeline(VulnerableCodeBaseImporterPipeline): """Collect advisories from PyPA GitHub repository.""" diff --git a/vulnerabilities/pipes/advisory.py b/vulnerabilities/pipes/advisory.py index ebc25f93b..f33eb4d2b 100644 --- a/vulnerabilities/pipes/advisory.py +++ b/vulnerabilities/pipes/advisory.py @@ -90,7 +90,7 @@ def import_advisory( if not vulnerability: if logger: - logger(f"Unable to get vulnerability for advisory: {advisory!r}", level=logging.WARNING) + logger(f"Unable to get vulnerability for advisory: {advisory!r}", level=logging.ERROR) return for ref in advisory_data.references: diff --git a/vulnerabilities/templates/package_details.html b/vulnerabilities/templates/package_details.html index 75e006839..26de42fa8 100644 --- a/vulnerabilities/templates/package_details.html +++ b/vulnerabilities/templates/package_details.html @@ -2,6 +2,7 @@ {% load humanize %} {% load widget_tweaks %} {% load static %} +{% load url_filters %} {% block title %} VulnerableCode Package Details - {{ package.purl }} @@ -91,7 +92,7 @@