Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
Signed-off-by: Haiko Schol <hs@haikoschol.com>
  • Loading branch information
haikoschol committed Apr 10, 2020
1 parent 6c94104 commit 7e322b7
Show file tree
Hide file tree
Showing 4 changed files with 232 additions and 3 deletions.
7 changes: 6 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
asgiref==3.2.7
attrs==19.3.0
beautifulsoup4==4.7.1
cached-property==1.5.1
cffi==1.14.0
dephell-specifier==0.2.1
dj-database-url==0.4.2
Django==3.0.3
Expand All @@ -15,11 +18,14 @@ pluggy==0.13.1
psycopg2==2.8.4
py==1.8.0
pycodestyle==2.5.0
pycparser==2.20
pygit2==1.2.0
pyparsing==2.4.5
pytest==5.3.2
pytest-dependency==0.4.0
pytest-django==3.7.0
pytest-mock==1.13.0
pytoml==0.1.21
pytz==2019.3
PyYAML==5.3
saneyaml==0.4
Expand All @@ -30,4 +36,3 @@ tqdm==4.41.1
wcwidth==0.1.7
whitenoise==5.0.1
zipp==0.6.0
pytoml==0.1.21
102 changes: 101 additions & 1 deletion vulnerabilities/data_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,26 @@
# VulnerableCode is a free software code scanning tool from nexB Inc. and others.
# Visit https://github.com/nexB/vulnerablecode/ for support and download.

import dataclasses
import os
import shutil
import tempfile
from datetime import datetime
from pathlib import Path
from typing import Any
from typing import ContextManager
from typing import Mapping
from typing import Optional
from typing import Sequence
import dataclasses

import pygit2
from packageurl import PackageURL


class InvalidConfigError(Exception):
pass


@dataclasses.dataclass
class DataSource(ContextManager):
"""
Expand All @@ -55,6 +64,15 @@ def __exit__(self, exc_type, exc_val, exc_tb):
"""
pass

def validate_config(self):
"""
Subclasses can check self.config for mandatory parameters here.
The method is called in Importer.make_data_source(). In case of errors, subclasses should raise
InvalidConfigError.
"""
pass

def added_advisories(self):
"""
Subclasses yield batch_size sized batches of Advisory objects that have been added to the data source
Expand All @@ -73,6 +91,12 @@ def updated_advisories(self):
"""
raise StopIteration

def error(self, msg):
"""
Helper method for raising InvalidConfigError with the class name in the message.
"""
raise InvalidConfigError(f'{type(self).__name__}: {msg}')


@dataclasses.dataclass
class Advisory:
Expand All @@ -97,3 +121,79 @@ def impacted_purls(self):
@property
def resolved_purls(self):
return {str(p) for p in self.resolved_package_urls}


class GitDataSource(DataSource):
repository_url: Optional[str] = None
branch: str = 'master'
create_working_directory: bool = True
remove_working_directory: bool = True
working_directory: Optional[str] = None

# @property
# def working_directory(self):
# if self._working_directory is None:
# self._working_directory = tempfile.mkdtemp()
#
# if self.create_working_directory and self._working_directory is not None and not os.path.exists(
# self._working_directory):
# os.mkdir(self._working_directory)
#
# return self._working_directory

def validate_config(self):
if not 'repository_url' in self.config:
self.error('config value "repository_url" is required')

self._set_param('repository_url', str)
self._set_param('branch', str)
self._set_param('working_directory', str)
self._set_param('create_working_directory', bool)
self._set_param('remove_working_directory', bool)

if not self.create_working_directory and self.working_directory is None:
self.error('"create_working_directory" is not set but "working_directory" is set to the default, which '
'calls tempfile.mkdtemp()')

if not self.create_working_directory and not os.path.exists(self.working_directory):
self.error('"working_directory" does not contain an existing directory and "create_working_directory" is '
'not set')

if not self.remove_working_directory and self.working_directory is None:
self.error('"remove_working_directory" is not set and "working_directory" is set to the default, which '
'calls tempfile.mkdtemp()')

def _set_param(self, name, type, attr=None):
value = self.config.get(name)
if value is None:
return

if not isinstance(value, type):
self.error(f'config value "{name}" must be of type {type}')

setattr(self, attr or name, value)

def __enter__(self):
if self.working_directory is None:
self.working_directory = tempfile.mkdtemp()
elif self.create_working_directory and not os.path.exists(self.working_directory):
os.mkdir(self.working_directory)

self.repo = pygit2.clone_repository(self.repository_url, self.working_directory, checkout_branch=self.branch)

def __exit__(self, exc_type, exc_val, exc_tb):
if self.remove_working_directory:
shutil.rmtree(self.working_directory)

def added_advisories(self):
raise NotImplementedError

def updated_advisories(self):
raise NotImplementedError

def added_files(self, subdir=None, recursive=False, file_ext=None):
working_dir = self.working_directory if subdir is None else os.path.join(self.working_directory, subdir)
path = Path(working_dir)

if self.cutoff_date is None:
raise RuntimeError('etc etc')
4 changes: 3 additions & 1 deletion vulnerabilities/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,4 +124,6 @@ def make_data_source(self, cutoff_date=None, batch_size=None) -> DataSource:
cd = cutoff_date or self.last_run
importers_module = importlib.import_module('vulnerabilities.importers')
klass = getattr(importers_module, self.data_source)
return klass(cutoff_date=cd, batch_size=batch_size, config=self.data_source_cfg)
ds = klass(cutoff_date=cd, batch_size=batch_size, config=self.data_source_cfg)
ds.validate_config()
return ds
122 changes: 122 additions & 0 deletions vulnerabilities/tests/test_data_source.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
# Copyright (c) 2017 nexB Inc. and others. All rights reserved.
# http://nexb.com and https://github.com/nexB/vulnerablecode/
# The VulnerableCode software is licensed under the Apache License version 2.0.
# Data generated with VulnerableCode require an acknowledgment.
#
# You may not use this software except in compliance with the License.
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software distributed
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
#
# When you publish or redistribute any data created with VulnerableCode or any VulnerableCode
# derivative work, you must accompany this data with the following acknowledgment:
#
# Generated with VulnerableCode and provided on an "AS IS" BASIS, WITHOUT WARRANTIES
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
# VulnerableCode should be considered or used as legal advice. Consult an Attorney
# for any legal advice.
# VulnerableCode is a free software code scanning tool from nexB Inc. and others.
# Visit https://github.com/nexB/vulnerablecode/ for support and download.
import os
import tempfile
from datetime import datetime
from unittest.mock import patch

import pytest

from vulnerabilities.data_source import GitDataSource
from vulnerabilities.data_source import InvalidConfigError


def mk_ds(**kwargs):
# batch_size is a required parameter of the base class, unrelated to these tests
return GitDataSource(batch_size=100, config=kwargs)


def test_GitDataSource_validate_config_repository_url_required():
ds = mk_ds()

with pytest.raises(InvalidConfigError):
ds.validate_config()


def test_GitDataSource_validate_config_repository_url_must_be_string():
ds = mk_ds(repository_url=True)

with pytest.raises(InvalidConfigError):
ds.validate_config()


def test_GitDataSource_validate_config_branch_must_be_string():
ds = mk_ds(repository_url='asdf', branch={})

with pytest.raises(InvalidConfigError):
ds.validate_config()


def test_GitDataSource_validate_config_working_directory_must_be_string():
ds = mk_ds(repository_url='asdf', working_directory=[])

with pytest.raises(InvalidConfigError):
ds.validate_config()


def test_GitDataSource_validate_config_create_working_directory_must_be_bool():
ds = mk_ds(repository_url='asdf', create_working_directory='maybe')

with pytest.raises(InvalidConfigError):
ds.validate_config()


def test_GitDataSource_validate_config_remove_working_directory_must_be_bool():
ds = mk_ds(repository_url='asdf', remove_working_directory='maybe')

with pytest.raises(InvalidConfigError):
ds.validate_config()


def test_GitDataSource_validate_config_create_working_directory_must_be_set_when_working_directory_is_default():
ds = mk_ds(repository_url='asdf', create_working_directory=False)

with pytest.raises(InvalidConfigError):
ds.validate_config()


def test_GitDataSource_validate_config_remove_working_directory_must_be_set_when_working_directory_is_default():
ds = mk_ds(repository_url='asdf', remove_working_directory=False)

with pytest.raises(InvalidConfigError):
ds.validate_config()


def test_GitDataSource_validate_config_remove_working_directory_is_applied():
ds = mk_ds(repository_url='asdf', remove_working_directory=False, working_directory='/some/directory')

ds.validate_config()

assert not ds.remove_working_directory


def test_GitDataSource_validate_config_working_directory_must_exist_when_create_working_directory_is_not_set():
ds = mk_ds(repository_url='asdf', working_directory='/does/not/exist', create_working_directory=False)

with pytest.raises(InvalidConfigError):
ds.validate_config()


@patch('pygit2.clone_repository')
def test_GitDataSource_validate_config_working_directory_is_created_and_removed(_):
wd = os.path.join(tempfile.gettempdir(), f'vulnerablecode_tests_{str(datetime.now().timestamp())}')
assert not os.path.exists(wd)

ds = mk_ds(repository_url='asdf', working_directory=wd)
ds.validate_config()

assert wd == ds.working_directory

with ds:
assert os.path.exists(ds.working_directory)

assert not os.path.exists(wd)

0 comments on commit 7e322b7

Please sign in to comment.