From 87ea90aee1aa191230ef5fb8e78dfcd378fd71d5 Mon Sep 17 00:00:00 2001 From: Haiko Schol Date: Fri, 10 Apr 2020 23:37:00 +0200 Subject: [PATCH] WIP Signed-off-by: Haiko Schol --- requirements.txt | 9 +- vulnerabilities/data_source.py | 91 +++++++++++++- vulnerabilities/models.py | 4 +- vulnerabilities/tests/conftest.py | 10 ++ vulnerabilities/tests/test_data_source.py | 145 ++++++++++++++++++++++ 5 files changed, 255 insertions(+), 4 deletions(-) create mode 100644 vulnerabilities/tests/test_data_source.py diff --git a/requirements.txt b/requirements.txt index 357dfe633..60b23150c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,8 @@ +asgiref==3.2.7 attrs==19.3.0 beautifulsoup4==4.7.1 +cached-property==1.5.1 +cffi==1.14.0 dephell-specifier==0.2.1 dj-database-url==0.4.2 Django==3.0.3 @@ -15,14 +18,18 @@ pluggy==0.13.1 psycopg2==2.8.4 py==1.8.0 pycodestyle==2.5.0 +pycparser==2.20 +pygit2==1.2.0 pyparsing==2.4.5 pytest==5.3.2 pytest-dependency==0.4.0 pytest-django==3.7.0 pytest-mock==1.13.0 +pytoml==0.1.21 pytz==2019.3 PyYAML==5.3 saneyaml==0.4 +schema==0.7.1 six==1.13.0 soupsieve==1.9.5 sqlparse==0.3.0 @@ -30,5 +37,3 @@ tqdm==4.41.1 wcwidth==0.1.7 whitenoise==5.0.1 zipp==0.6.0 -pytoml==0.1.21 -schema==0.7.1 diff --git a/vulnerabilities/data_source.py b/vulnerabilities/data_source.py index 7644896a1..04caef245 100644 --- a/vulnerabilities/data_source.py +++ b/vulnerabilities/data_source.py @@ -20,17 +20,26 @@ # VulnerableCode is a free software code scanning tool from nexB Inc. and others. # Visit https://github.com/nexB/vulnerablecode/ for support and download. +import dataclasses +import os +import shutil +import tempfile from datetime import datetime +from pathlib import Path from typing import Any from typing import ContextManager from typing import Mapping from typing import Optional from typing import Sequence -import dataclasses +import pygit2 from packageurl import PackageURL +class InvalidConfigError(Exception): + pass + + @dataclasses.dataclass class DataSource(ContextManager): """ @@ -55,6 +64,15 @@ def __exit__(self, exc_type, exc_val, exc_tb): """ pass + def validate_config(self): + """ + Subclasses can check self.config for mandatory parameters here. + + The method is called in Importer.make_data_source(). In case of errors, subclasses should raise + InvalidConfigError. + """ + pass + def added_advisories(self): """ Subclasses yield batch_size sized batches of Advisory objects that have been added to the data source @@ -73,6 +91,12 @@ def updated_advisories(self): """ raise StopIteration + def error(self, msg): + """ + Helper method for raising InvalidConfigError with the class name in the message. + """ + raise InvalidConfigError(f'{type(self).__name__}: {msg}') + @dataclasses.dataclass class Advisory: @@ -97,3 +121,68 @@ def impacted_purls(self): @property def resolved_purls(self): return {str(p) for p in self.resolved_package_urls} + + +class GitDataSource(DataSource): + repository_url: Optional[str] = None + branch: str = 'master' + create_working_directory: bool = True + remove_working_directory: bool = True + working_directory: Optional[str] = None + + def validate_config(self): + if 'repository_url' not in self.config: + self.error('config value "repository_url" is required') + + self._set_param('repository_url', str) + self._set_param('branch', str) + self._set_param('working_directory', str) + self._set_param('create_working_directory', bool) + self._set_param('remove_working_directory', bool) + + if not self.create_working_directory and self.working_directory is None: + self.error('"create_working_directory" is not set but "working_directory" is set to the default, which ' + 'calls tempfile.mkdtemp()') + + if not self.create_working_directory and not os.path.exists(self.working_directory): + self.error('"working_directory" does not contain an existing directory and "create_working_directory" is ' + 'not set') + + if not self.remove_working_directory and self.working_directory is None: + self.error('"remove_working_directory" is not set and "working_directory" is set to the default, which ' + 'calls tempfile.mkdtemp()') + + def _set_param(self, name, type, attr=None): + value = self.config.get(name) + if value is None: + return + + if not isinstance(value, type): + self.error(f'config value "{name}" must be of type {type}') + + setattr(self, attr or name, value) + + def __enter__(self): + if self.working_directory is None: + self.working_directory = tempfile.mkdtemp() + elif self.create_working_directory and not os.path.exists(self.working_directory): + os.mkdir(self.working_directory) + + self.repo = pygit2.clone_repository(self.repository_url, self.working_directory, checkout_branch=self.branch) + + def __exit__(self, exc_type, exc_val, exc_tb): + if self.remove_working_directory: + shutil.rmtree(self.working_directory) + + def added_advisories(self): + raise NotImplementedError + + def updated_advisories(self): + raise NotImplementedError + + def added_files(self, subdir=None, recursive=False, file_ext=None): + working_dir = self.working_directory if subdir is None else os.path.join(self.working_directory, subdir) + path = Path(working_dir) + + if self.cutoff_date is None: + raise RuntimeError('etc etc') diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 7e72da1f8..21d60ff31 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -124,4 +124,6 @@ def make_data_source(self, cutoff_date=None, batch_size=None) -> DataSource: cd = cutoff_date or self.last_run importers_module = importlib.import_module('vulnerabilities.importers') klass = getattr(importers_module, self.data_source) - return klass(cutoff_date=cd, batch_size=batch_size, config=self.data_source_cfg) + ds = klass(cutoff_date=cd, batch_size=batch_size, config=self.data_source_cfg) + ds.validate_config() + return ds diff --git a/vulnerabilities/tests/conftest.py b/vulnerabilities/tests/conftest.py index bf5144ad3..fcd4be922 100644 --- a/vulnerabilities/tests/conftest.py +++ b/vulnerabilities/tests/conftest.py @@ -59,3 +59,13 @@ def setArchLinuxData(db): test_data = json.load(f) archlinux_dump(test_data) + + +@pytest.fixture +def no_mkdir(monkeypatch): + monkeypatch.delattr('os.mkdir') + + +@pytest.fixture +def no_rmtree(monkeypatch): + monkeypatch.delattr('shutil.rmtree') diff --git a/vulnerabilities/tests/test_data_source.py b/vulnerabilities/tests/test_data_source.py new file mode 100644 index 000000000..6a1320f61 --- /dev/null +++ b/vulnerabilities/tests/test_data_source.py @@ -0,0 +1,145 @@ +# Copyright (c) 2017 nexB Inc. and others. All rights reserved. +# http://nexb.com and https://github.com/nexB/vulnerablecode/ +# The VulnerableCode software is licensed under the Apache License version 2.0. +# Data generated with VulnerableCode require an acknowledgment. +# +# You may not use this software except in compliance with the License. +# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. +# +# When you publish or redistribute any data created with VulnerableCode or any VulnerableCode +# derivative work, you must accompany this data with the following acknowledgment: +# +# Generated with VulnerableCode and provided on an "AS IS" BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, either express or implied. No content created from +# VulnerableCode should be considered or used as legal advice. Consult an Attorney +# for any legal advice. +# VulnerableCode is a free software code scanning tool from nexB Inc. and others. +# Visit https://github.com/nexB/vulnerablecode/ for support and download. +from unittest.mock import patch + +import pytest + +from vulnerabilities.data_source import GitDataSource +from vulnerabilities.data_source import InvalidConfigError + + +def mk_ds(**kwargs): + # batch_size is a required parameter of the base class, unrelated to these tests + return GitDataSource(batch_size=100, config=kwargs) + + +def test_GitDataSource_validate_config_repository_url_required(no_mkdir, no_rmtree): + ds = mk_ds() + + with pytest.raises(InvalidConfigError): + ds.validate_config() + + +def test_GitDataSource_validate_config_repository_url_must_be_string(no_mkdir, no_rmtree): + ds = mk_ds(repository_url=True) + + with pytest.raises(InvalidConfigError): + ds.validate_config() + + +def test_GitDataSource_validate_config_branch_must_be_string(no_mkdir, no_rmtree): + ds = mk_ds(repository_url='asdf', branch={}) + + with pytest.raises(InvalidConfigError): + ds.validate_config() + + +def test_GitDataSource_validate_config_working_directory_must_be_string(no_mkdir, no_rmtree): + ds = mk_ds(repository_url='asdf', working_directory=[]) + + with pytest.raises(InvalidConfigError): + ds.validate_config() + + +def test_GitDataSource_validate_config_create_working_directory_must_be_bool(no_mkdir, no_rmtree): + ds = mk_ds(repository_url='asdf', create_working_directory='maybe') + + with pytest.raises(InvalidConfigError): + ds.validate_config() + + +def test_GitDataSource_validate_config_remove_working_directory_must_be_bool(no_mkdir, no_rmtree): + ds = mk_ds(repository_url='asdf', remove_working_directory='maybe') + + with pytest.raises(InvalidConfigError): + ds.validate_config() + + +def test_GitDataSource_validate_config_create_working_directory_must_be_set_when_working_directory_is_default( + no_mkdir, no_rmtree): + + ds = mk_ds(repository_url='asdf', create_working_directory=False) + + with pytest.raises(InvalidConfigError): + ds.validate_config() + + +def test_GitDataSource_validate_config_remove_working_directory_must_be_set_when_working_directory_is_default( + no_mkdir, no_rmtree): + + ds = mk_ds(repository_url='asdf', remove_working_directory=False) + + with pytest.raises(InvalidConfigError): + ds.validate_config() + + +def test_GitDataSource_validate_config_remove_working_directory_is_applied(no_mkdir, no_rmtree): + ds = mk_ds(repository_url='asdf', remove_working_directory=False, working_directory='/some/directory') + + ds.validate_config() + + assert not ds.remove_working_directory + + +def test_GitDataSource_validate_config_working_directory_must_exist_when_create_working_directory_is_not_set( + no_mkdir, no_rmtree): + + ds = mk_ds(repository_url='asdf', working_directory='/does/not/exist', create_working_directory=False) + + with pytest.raises(InvalidConfigError): + ds.validate_config() + + +@patch('os.path.exists', return_value=False) +@patch('shutil.rmtree') +@patch('pygit2.clone_repository') +@patch('os.mkdir') +def test_GitDataSource_contextmgr_working_directory_is_created_and_removed(mkdir, clone_repository, rmtree, _): + + wd = '/some/working/directory' + ds = mk_ds(repository_url='asdf', working_directory=wd) + ds.validate_config() + + assert wd == ds.working_directory + + with ds: + assert mkdir.called_with(wd) + + assert mkdir.called_with(wd) + assert clone_repository.called_with('asdf', wd, checkout_branch=ds.branch) + assert rmtree.called_with(wd) + + +@patch('shutil.rmtree') +@patch('pygit2.clone_repository') +@patch('tempfile.mkdtemp', return_value='/fake/tempdir') +def test_GitDataSource_contextmgr_calls_mkdtemp_if_working_directory_is_not_set(mkdtemp, clone_repository, rmtree): + + ds = mk_ds(repository_url='asdf') + ds.validate_config() + + with ds: + assert mkdtemp.called + assert ds.working_directory == '/fake/tempdir' + + assert clone_repository.called_with('asdf', '/fake/tempdir', checkout_branch=ds.branch) + assert rmtree.called_with('/fake/tempdir')