Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
Signed-off-by: Haiko Schol <hs@haikoschol.com>
  • Loading branch information
haikoschol committed Apr 16, 2020
1 parent 7f1e8ab commit 87ea90a
Show file tree
Hide file tree
Showing 5 changed files with 255 additions and 4 deletions.
9 changes: 7 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
asgiref==3.2.7
attrs==19.3.0
beautifulsoup4==4.7.1
cached-property==1.5.1
cffi==1.14.0
dephell-specifier==0.2.1
dj-database-url==0.4.2
Django==3.0.3
Expand All @@ -15,20 +18,22 @@ pluggy==0.13.1
psycopg2==2.8.4
py==1.8.0
pycodestyle==2.5.0
pycparser==2.20
pygit2==1.2.0
pyparsing==2.4.5
pytest==5.3.2
pytest-dependency==0.4.0
pytest-django==3.7.0
pytest-mock==1.13.0
pytoml==0.1.21
pytz==2019.3
PyYAML==5.3
saneyaml==0.4
schema==0.7.1
six==1.13.0
soupsieve==1.9.5
sqlparse==0.3.0
tqdm==4.41.1
wcwidth==0.1.7
whitenoise==5.0.1
zipp==0.6.0
pytoml==0.1.21
schema==0.7.1
91 changes: 90 additions & 1 deletion vulnerabilities/data_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,26 @@
# VulnerableCode is a free software code scanning tool from nexB Inc. and others.
# Visit https://github.com/nexB/vulnerablecode/ for support and download.

import dataclasses
import os
import shutil
import tempfile
from datetime import datetime
from pathlib import Path
from typing import Any
from typing import ContextManager
from typing import Mapping
from typing import Optional
from typing import Sequence
import dataclasses

import pygit2
from packageurl import PackageURL


class InvalidConfigError(Exception):
pass


@dataclasses.dataclass
class DataSource(ContextManager):
"""
Expand All @@ -55,6 +64,15 @@ def __exit__(self, exc_type, exc_val, exc_tb):
"""
pass

def validate_config(self):
"""
Subclasses can check self.config for mandatory parameters here.
The method is called in Importer.make_data_source(). In case of errors, subclasses should raise
InvalidConfigError.
"""
pass

def added_advisories(self):
"""
Subclasses yield batch_size sized batches of Advisory objects that have been added to the data source
Expand All @@ -73,6 +91,12 @@ def updated_advisories(self):
"""
raise StopIteration

def error(self, msg):
"""
Helper method for raising InvalidConfigError with the class name in the message.
"""
raise InvalidConfigError(f'{type(self).__name__}: {msg}')


@dataclasses.dataclass
class Advisory:
Expand All @@ -97,3 +121,68 @@ def impacted_purls(self):
@property
def resolved_purls(self):
return {str(p) for p in self.resolved_package_urls}


class GitDataSource(DataSource):
repository_url: Optional[str] = None
branch: str = 'master'
create_working_directory: bool = True
remove_working_directory: bool = True
working_directory: Optional[str] = None

def validate_config(self):
if 'repository_url' not in self.config:
self.error('config value "repository_url" is required')

self._set_param('repository_url', str)
self._set_param('branch', str)
self._set_param('working_directory', str)
self._set_param('create_working_directory', bool)
self._set_param('remove_working_directory', bool)

if not self.create_working_directory and self.working_directory is None:
self.error('"create_working_directory" is not set but "working_directory" is set to the default, which '
'calls tempfile.mkdtemp()')

if not self.create_working_directory and not os.path.exists(self.working_directory):
self.error('"working_directory" does not contain an existing directory and "create_working_directory" is '
'not set')

if not self.remove_working_directory and self.working_directory is None:
self.error('"remove_working_directory" is not set and "working_directory" is set to the default, which '
'calls tempfile.mkdtemp()')

def _set_param(self, name, type, attr=None):
value = self.config.get(name)
if value is None:
return

if not isinstance(value, type):
self.error(f'config value "{name}" must be of type {type}')

setattr(self, attr or name, value)

def __enter__(self):
if self.working_directory is None:
self.working_directory = tempfile.mkdtemp()
elif self.create_working_directory and not os.path.exists(self.working_directory):
os.mkdir(self.working_directory)

self.repo = pygit2.clone_repository(self.repository_url, self.working_directory, checkout_branch=self.branch)

def __exit__(self, exc_type, exc_val, exc_tb):
if self.remove_working_directory:
shutil.rmtree(self.working_directory)

def added_advisories(self):
raise NotImplementedError

def updated_advisories(self):
raise NotImplementedError

def added_files(self, subdir=None, recursive=False, file_ext=None):
working_dir = self.working_directory if subdir is None else os.path.join(self.working_directory, subdir)
path = Path(working_dir)

if self.cutoff_date is None:
raise RuntimeError('etc etc')
4 changes: 3 additions & 1 deletion vulnerabilities/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,4 +124,6 @@ def make_data_source(self, cutoff_date=None, batch_size=None) -> DataSource:
cd = cutoff_date or self.last_run
importers_module = importlib.import_module('vulnerabilities.importers')
klass = getattr(importers_module, self.data_source)
return klass(cutoff_date=cd, batch_size=batch_size, config=self.data_source_cfg)
ds = klass(cutoff_date=cd, batch_size=batch_size, config=self.data_source_cfg)
ds.validate_config()
return ds
10 changes: 10 additions & 0 deletions vulnerabilities/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,13 @@ def setArchLinuxData(db):
test_data = json.load(f)

archlinux_dump(test_data)


@pytest.fixture
def no_mkdir(monkeypatch):
monkeypatch.delattr('os.mkdir')


@pytest.fixture
def no_rmtree(monkeypatch):
monkeypatch.delattr('shutil.rmtree')
145 changes: 145 additions & 0 deletions vulnerabilities/tests/test_data_source.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
# Copyright (c) 2017 nexB Inc. and others. All rights reserved.
# http://nexb.com and https://github.com/nexB/vulnerablecode/
# The VulnerableCode software is licensed under the Apache License version 2.0.
# Data generated with VulnerableCode require an acknowledgment.
#
# You may not use this software except in compliance with the License.
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software distributed
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
#
# When you publish or redistribute any data created with VulnerableCode or any VulnerableCode
# derivative work, you must accompany this data with the following acknowledgment:
#
# Generated with VulnerableCode and provided on an "AS IS" BASIS, WITHOUT WARRANTIES
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
# VulnerableCode should be considered or used as legal advice. Consult an Attorney
# for any legal advice.
# VulnerableCode is a free software code scanning tool from nexB Inc. and others.
# Visit https://github.com/nexB/vulnerablecode/ for support and download.
from unittest.mock import patch

import pytest

from vulnerabilities.data_source import GitDataSource
from vulnerabilities.data_source import InvalidConfigError


def mk_ds(**kwargs):
# batch_size is a required parameter of the base class, unrelated to these tests
return GitDataSource(batch_size=100, config=kwargs)


def test_GitDataSource_validate_config_repository_url_required(no_mkdir, no_rmtree):
ds = mk_ds()

with pytest.raises(InvalidConfigError):
ds.validate_config()


def test_GitDataSource_validate_config_repository_url_must_be_string(no_mkdir, no_rmtree):
ds = mk_ds(repository_url=True)

with pytest.raises(InvalidConfigError):
ds.validate_config()


def test_GitDataSource_validate_config_branch_must_be_string(no_mkdir, no_rmtree):
ds = mk_ds(repository_url='asdf', branch={})

with pytest.raises(InvalidConfigError):
ds.validate_config()


def test_GitDataSource_validate_config_working_directory_must_be_string(no_mkdir, no_rmtree):
ds = mk_ds(repository_url='asdf', working_directory=[])

with pytest.raises(InvalidConfigError):
ds.validate_config()


def test_GitDataSource_validate_config_create_working_directory_must_be_bool(no_mkdir, no_rmtree):
ds = mk_ds(repository_url='asdf', create_working_directory='maybe')

with pytest.raises(InvalidConfigError):
ds.validate_config()


def test_GitDataSource_validate_config_remove_working_directory_must_be_bool(no_mkdir, no_rmtree):
ds = mk_ds(repository_url='asdf', remove_working_directory='maybe')

with pytest.raises(InvalidConfigError):
ds.validate_config()


def test_GitDataSource_validate_config_create_working_directory_must_be_set_when_working_directory_is_default(
no_mkdir, no_rmtree):

ds = mk_ds(repository_url='asdf', create_working_directory=False)

with pytest.raises(InvalidConfigError):
ds.validate_config()


def test_GitDataSource_validate_config_remove_working_directory_must_be_set_when_working_directory_is_default(
no_mkdir, no_rmtree):

ds = mk_ds(repository_url='asdf', remove_working_directory=False)

with pytest.raises(InvalidConfigError):
ds.validate_config()


def test_GitDataSource_validate_config_remove_working_directory_is_applied(no_mkdir, no_rmtree):
ds = mk_ds(repository_url='asdf', remove_working_directory=False, working_directory='/some/directory')

ds.validate_config()

assert not ds.remove_working_directory


def test_GitDataSource_validate_config_working_directory_must_exist_when_create_working_directory_is_not_set(
no_mkdir, no_rmtree):

ds = mk_ds(repository_url='asdf', working_directory='/does/not/exist', create_working_directory=False)

with pytest.raises(InvalidConfigError):
ds.validate_config()


@patch('os.path.exists', return_value=False)
@patch('shutil.rmtree')
@patch('pygit2.clone_repository')
@patch('os.mkdir')
def test_GitDataSource_contextmgr_working_directory_is_created_and_removed(mkdir, clone_repository, rmtree, _):

wd = '/some/working/directory'
ds = mk_ds(repository_url='asdf', working_directory=wd)
ds.validate_config()

assert wd == ds.working_directory

with ds:
assert mkdir.called_with(wd)

assert mkdir.called_with(wd)
assert clone_repository.called_with('asdf', wd, checkout_branch=ds.branch)
assert rmtree.called_with(wd)


@patch('shutil.rmtree')
@patch('pygit2.clone_repository')
@patch('tempfile.mkdtemp', return_value='/fake/tempdir')
def test_GitDataSource_contextmgr_calls_mkdtemp_if_working_directory_is_not_set(mkdtemp, clone_repository, rmtree):

ds = mk_ds(repository_url='asdf')
ds.validate_config()

with ds:
assert mkdtemp.called
assert ds.working_directory == '/fake/tempdir'

assert clone_repository.called_with('asdf', '/fake/tempdir', checkout_branch=ds.branch)
assert rmtree.called_with('/fake/tempdir')

0 comments on commit 87ea90a

Please sign in to comment.