Skip to content

Commit

Permalink
Collect NPM
Browse files Browse the repository at this point in the history
closes aboutcode-org#101

Signed-off-by: Navonil Das <navneeladas@gmail.com>
  • Loading branch information
NavonilDas authored and haikoschol committed Nov 1, 2019
1 parent f56b604 commit 58e3ff3
Show file tree
Hide file tree
Showing 7 changed files with 328 additions and 1 deletion.
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ django==2.2.4
djangorestframework==3.9.2
django-filter==2.1.0
packageurl-python==0.8.7
semantic-version==2.8.2

# Tests
pytest==3.2.3
Expand Down
33 changes: 33 additions & 0 deletions vulnerabilities/data_dump.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,3 +159,36 @@ def archlinux_dump(extract_data):
package=package_fixed,
repository=f'https://security.archlinux.org/package/{package_name}',
)


def npm_dump(extract_data):
for data in extract_data:
vulnerability = Vulnerability.objects.create(
summary=data.get('summary'),
)
VulnerabilityReference.objects.create(
vulnerability=vulnerability,
reference_id=data.get('vulnerability_id'),
)

affected_versions = data.get('affected_version', [])
for version in affected_versions:
package_affected = Package.objects.create(
name=data.get('package_name'),
version=version,
)
ImpactedPackage.objects.create(
vulnerability=vulnerability,
package=package_affected
)

fixed_versions = data.get('fixed_version', [])
for version in fixed_versions:
package_fixed = Package.objects.create(
name=data.get('package_name'),
version=version
)
ResolvedPackage.objects.create(
vulnerability=vulnerability,
package=package_fixed
)
3 changes: 2 additions & 1 deletion vulnerabilities/management/commands/import.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,10 @@
from django.core.management.base import BaseCommand, CommandError

from vulnerabilities import data_dump as dd
from vulnerabilities.scraper import debian, ubuntu, archlinux
from vulnerabilities.scraper import debian, ubuntu, archlinux, npm

IMPORTERS = {
'npm': lambda: dd.npm_dump(npm.scrape_vulnerabilities()),
'debian': lambda: dd.debian_dump(debian.scrape_vulnerabilities()),
'ubuntu': lambda: dd.ubuntu_dump(ubuntu.scrape_cves()),
'archlinux': lambda: dd.archlinux_dump(archlinux.scrape_vulnerabilities()),
Expand Down
138 changes: 138 additions & 0 deletions vulnerabilities/scraper/npm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
# Author: Navonil Das (@NavonilDas)
# Copyright (c) 2017 nexB Inc. and others. All rights reserved.
# http://nexb.com and https://github.com/nexB/vulnerablecode/
# The VulnerableCode software is licensed under the Apache License version 2.0.
# Data generated with VulnerableCode require an acknowledgment.
#
# You may not use this software except in compliance with the License.
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software distributed
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
#
# When you publish or redistribute any data created with VulnerableCode or any VulnerableCode
# derivative work, you must accompany this data with the following acknowledgment:
#
# Generated with VulnerableCode and provided on an "AS IS" BASIS, WITHOUT WARRANTIES
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
# VulnerableCode should be considered or used as legal advice. Consult an Attorney
# for any legal advice.
# VulnerableCode is a free software code scanning tool from nexB Inc. and others.
# Visit https://github.com/nexB/vulnerablecode/ for support and download.

import json
import re
import semantic_version
from urllib.request import urlopen

NPM_URL = 'https://registry.npmjs.org{}'
PAGE = '/-/npm/v1/security/advisories?page=0'


def remove_spaces(x):
"""
Remove Multiple Space, spaces after relational operator
and remove v charecter in front of version string (ex v1.2.3)
"""
x = re.sub(r' +', ' ', x)
x = re.sub(r'< +', '<', x)
x = re.sub(r'> +', '>', x)
x = re.sub(r'<= +', '<=', x)
x = re.sub(r'>= +', '>=', x)
x = re.sub(r'>=[vV]', '>=', x)
x = re.sub(r'<=[vV]', '<=', x)
x = re.sub(r'>[vV]', '>', x)
x = re.sub(r'<[vV]', '<', x)
return x


def get_all_version(package_name):
"""
Returns all available for a module
"""
package_url = NPM_URL.format('/'+package_name)
response = urlopen(package_url).read()
data = json.loads(response)
versions = data.get('versions', {})
all_version = [obj for obj in versions]
return all_version


def extract_version(package_name, aff_version_range, fixed_version_range):
"""
Seperate list of Affected version and fixed version from all version
using the range specified
"""

if aff_version_range == '' or fixed_version_range == '':
return ([], [])

aff_spec = semantic_version.NpmSpec(remove_spaces(aff_version_range))
fix_spec = semantic_version.NpmSpec(remove_spaces(fixed_version_range))
all_ver = get_all_version(package_name)
aff_ver = []
fix_ver = []
for ver in all_ver:
cur_version = semantic_version.Version(ver)
if cur_version in aff_spec:
aff_ver.append(ver)
else:
if cur_version in fix_spec:
fix_ver.append(ver)

return (aff_ver, fix_ver)


def extract_data(JSON):
"""
Extract module name, summary, vulnerability id,severity
"""
package_vulnerabilities = []
for obj in JSON.get('objects', []):
if 'module_name' not in obj:
continue
package_name = obj['module_name']
summary = obj.get('overview', '')
severity = obj.get('severity', '')

vulnerability_id = obj.get('cves', [])
if len(vulnerability_id) > 0:
vulnerability_id = vulnerability_id[0]
else:
vulnerability_id = ''

affected_version, fixed_version = extract_version(
package_name,
obj.get('vulnerable_versions', ''),
obj.get('patched_versions', '')
)

package_vulnerabilities.append({
'package_name': package_name,
'summary': summary,
'vulnerability_id': vulnerability_id,
'fixed_version': fixed_version,
'affected_version': affected_version,
'severity': severity
})
return package_vulnerabilities


def scrape_vulnerabilities():
"""
Extract JSON From NPM registry
"""
cururl = NPM_URL.format(PAGE)
response = urlopen(cururl).read()
package_vulnerabilities = []
while True:
data = json.loads(response)
package_vulnerabilities = package_vulnerabilities + extract_data(data)
next_page = data.get('urls', {}).get('next', False)
if next_page:
cururl = NPM_URL.format(next_page)
response = urlopen(cururl).read()
else:
break
return package_vulnerabilities
40 changes: 40 additions & 0 deletions vulnerabilities/tests/test_data/npm_test.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
{
"objects": [
{
"id": 12,
"created": "2015-10-17T19:41:46.382Z",
"updated": "2019-06-24T14:13:54.355Z",
"deleted": null,
"title": "Rosetta-Flash JSONP Vulnerability",
"found_by": {
"name": "Michele Spagnuolo"
},
"reported_by": {
"name": "Michele Spagnuolo"
},
"module_name": "hapi",
"cves": [
"CVE-2014-4671"
],
"vulnerable_versions": "< 6.1.0",
"patched_versions": ">= 6.1.0",
"overview": "This description taken from the pull request provided by Patrick Kettner.\n\n\n\nVersions 6.1.0 and earlier of hapi are vulnerable to a rosetta-flash attack, which can be used by attackers to send data across domains and break the browser same-origin-policy.\n\n\n",
"recommendation": "- Update hapi to version 6.1.1 or later.\n\nAlternatively, a solution previously implemented by Google, Facebook, and Github is to prepend callbacks with an empty inline comment. This will cause the flash parser to break on invalid inputs and prevent the issue, and how the issue has been resolved internally in hapi.",
"references": "- [PR #1766 - prepend jsonp callbacks with a comment to prevent the rosetta-flash vulnerability](https://github.com/spumko/hapi/pull/1766)\n\n- [Background from Michele Spagnuolo](http://miki.it/blog/2014/7/8/abusing-jsonp-with-rosetta-flash/)\n\nThanks to [Patrick Kettner](https://github.com/patrickkettner) for submitting a pull request to address this in hapi.",
"access": "public",
"severity": "moderate",
"cwe": "CWE-538",
"metadata": {
"module_type": "Network.Library",
"exploitability": 3,
"affected_components": ""
},
"url": "https://npmjs.com/advisories/12"
}
],
"total": 1179,
"urls": {
"next": "/-/npm/v1/security/advisories?page=2",
"prev": "/-/npm/v1/security/advisories?page=0"
}
}
1 change: 1 addition & 0 deletions vulnerabilities/tests/test_import_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ def test_list_sources(self):
call_command('import', '--list', stdout=buf)

out = buf.getvalue()
self.assertIn('npm', out)
self.assertIn('debian', out)
self.assertIn('ubuntu', out)
self.assertIn('archlinux', out)
Expand Down
113 changes: 113 additions & 0 deletions vulnerabilities/tests/test_npm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
# Author: Navonil Das (@NavonilDas)
# Copyright (c) 2017 nexB Inc. and others. All rights reserved.
# http://nexb.com and https://github.com/nexB/vulnerablecode/
# The VulnerableCode software is licensed under the Apache License version 2.0.
# Data generated with VulnerableCode require an acknowledgment.
#
# You may not use this software except in compliance with the License.
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software distributed
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
#
# When you publish or redistribute any data created with VulnerableCode or any VulnerableCode
# derivative work, you must accompany this data with the following acknowledgment:
#
# Generated with VulnerableCode and provided on an "AS IS" BASIS, WITHOUT WARRANTIES
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
# VulnerableCode should be considered or used as legal advice. Consult an Attorney
# for any legal advice.
# VulnerableCode is a free software code scanning tool from nexB Inc. and others.
# Visit https://github.com/nexB/vulnerablecode/ for support and download.


from django.test import TestCase
from vulnerabilities.scraper.npm import remove_spaces, get_all_version, extract_data
import os
import json

BASE_DIR = os.path.dirname(os.path.abspath(__file__))
TEST_DATA = os.path.join(BASE_DIR, 'test_data/')


class NPMScrapperTest(TestCase):
def test_remove_space(self):
res = remove_spaces(">= 1.2.1 || <= 2.1.1")
self.assertEqual(res, '>=1.2.1 || <=2.1.1')

res = remove_spaces(">= v1.2.1 || <= V2.1.1")
self.assertEqual(res, '>=1.2.1 || <=2.1.1')

def test_get_all_version(self):
x = get_all_version('electron')
expected = ['0.1.2', '2.0.0', '3.0.0',
'4.0.0', '5.0.0', '6.0.0', '7.0.0']
self.assertTrue(set(expected) <= set(x))

def test_extract_data(self):
with open(os.path.join(TEST_DATA, 'npm_test.json')) as f:
test_data = json.loads(f.read())

expected = {
'package_name': 'hapi',
'vulnerability_id': 'CVE-2014-4671',
'fixed_version': [
'6.1.0', '6.2.0', '6.2.1', '6.2.2', '6.3.0', '6.4.0',
'6.5.0', '6.5.1', '6.6.0', '6.7.0', '6.7.1', '6.8.0',
'6.8.1', '6.9.0', '6.10.0', '6.11.0', '6.11.1', '7.0.0',
'7.0.1', '7.1.0', '7.1.1', '7.2.0', '7.3.0', '7.4.0',
'7.5.0', '7.5.1', '7.5.2', '8.0.0', '7.5.3', '8.1.0',
'8.2.0', '8.3.0', '8.3.1', '8.4.0', '8.5.0', '8.5.1',
'8.5.2', '8.5.3', '8.6.0', '8.6.1', '8.8.0', '8.8.1',
'9.0.0', '9.0.1', '9.0.2', '9.0.3', '9.0.4', '9.1.0',
'9.2.0', '9.3.0', '9.3.1', '10.0.0', '10.0.1', '10.1.0',
'10.2.1', '10.4.0', '10.4.1', '10.5.0', '11.0.0', '11.0.1',
'11.0.2', '11.0.3', '11.0.4', '11.0.5', '11.1.0', '11.1.1',
'11.1.2', '11.1.3', '11.1.4', '12.0.0', '12.0.1', '12.1.0',
'9.5.1', '13.0.0', '13.1.0', '13.2.0', '13.2.1', '13.2.2',
'13.3.0', '13.4.0', '13.4.1', '13.4.2', '13.5.0', '14.0.0',
'13.5.3', '14.1.0', '14.2.0', '15.0.1', '15.0.2', '15.0.3',
'15.1.0', '15.1.1', '15.2.0', '16.0.0', '16.0.1', '16.0.2',
'16.0.3', '16.1.0', '16.1.1', '16.2.0', '16.3.0', '16.3.1',
'16.4.0', '16.4.1', '16.4.2', '16.4.3', '16.5.0', '16.5.1',
'16.5.2', '16.6.0', '16.6.1', '16.6.2', '17.0.0', '17.0.1',
'17.0.2', '17.1.0', '17.1.1', '17.2.0', '17.2.1', '16.6.3',
'17.2.2', '17.2.3', '17.3.0', '17.3.1', '17.4.0', '17.5.0',
'17.5.1', '17.5.2', '17.5.3', '17.5.4', '17.5.5', '17.6.0',
'17.6.1', '17.6.2', '17.6.3', '16.6.4', '17.6.4', '16.6.5',
'17.7.0', '16.7.0', '17.8.0', '17.8.1', '18.0.0', '17.8.2',
'17.8.3', '18.0.1', '17.8.4', '18.1.0', '17.8.5'],
'affected_version': [
'0.0.1', '0.0.2', '0.0.3', '0.0.4', '0.0.5', '0.0.6', '0.1.0',
'0.1.1', '0.1.2', '0.1.3', '0.2.0', '0.2.1', '0.3.0', '0.4.0',
'0.4.1', '0.4.2', '0.4.3', '0.4.4', '0.5.0', '0.5.1', '0.6.0',
'0.6.1', '0.5.2', '0.7.0', '0.7.1', '0.8.0', '0.8.1', '0.8.2',
'0.8.3', '0.8.4', '0.9.0', '0.9.1', '0.9.2', '0.10.0', '0.10.1',
'0.11.0', '0.11.1', '0.11.2', '0.11.3', '0.12.0', '0.13.0',
'0.13.1', '0.13.2', '0.11.4', '0.13.3', '0.14.0', '0.14.1',
'0.14.2', '0.15.0', '0.15.1', '0.15.2', '0.15.3', '0.15.4',
'0.15.5', '0.15.6', '0.15.7', '0.15.8', '0.15.9', '0.16.0',
'1.0.0', '1.0.1', '1.0.2', '1.0.3', '1.1.0', '1.2.0', '1.3.0',
'1.4.0', '1.5.0', '1.6.0', '1.6.1', '1.6.2', '1.7.0', '1.7.1',
'1.7.2', '1.7.3', '1.8.0', '1.8.1', '1.8.2', '1.8.3', '1.9.0',
'1.9.1', '1.9.2', '1.9.3', '1.9.4', '1.9.5', '1.9.6', '1.9.7',
'1.10.0', '1.11.0', '1.11.1', '1.12.0', '1.13.0', '1.14.0',
'1.15.0', '1.16.0', '1.16.1', '1.17.0', '1.18.0', '1.19.0',
'1.19.1', '1.19.2', '1.19.3', '1.19.4', '1.19.5', '1.20.0',
'2.0.0', '2.1.0', '2.1.1', '2.1.2', '2.2.0', '2.3.0', '2.4.0',
'2.5.0', '2.6.0', '3.0.0', '3.0.1', '3.0.2', '3.1.0', '4.0.0',
'4.0.1', '4.0.2', '4.0.3', '4.1.0', '4.1.1', '4.1.2', '4.1.3',
'4.1.4', '5.0.0', '5.1.0', '6.0.0', '6.0.1', '6.0.2'],
'severity': 'moderate'
}
got = extract_data(test_data)[0]
# Check if expected affected version and fixed version is subset of what we get from online
self.assertTrue(set(expected['fixed_version'])
<= set(got['fixed_version']))
self.assertTrue(set(expected['affected_version']) <= set(
got['affected_version']))

self.assertEqual(expected['package_name'], got['package_name'])
self.assertEqual(expected['severity'], got['severity'])
self.assertEqual(expected['vulnerability_id'], got['vulnerability_id'])

0 comments on commit 58e3ff3

Please sign in to comment.