From 05d02d29b91c10f6953d01f0ace8a75ce9b508a3 Mon Sep 17 00:00:00 2001 From: Fraser Tweedale Date: Fri, 21 Jul 2023 15:18:18 +1000 Subject: [PATCH] Hackage and GHC version enumeration (#1418) (#1463) Add version enumeration for the Haskell ecosystems: Hackage (the main package index) and GHC (the toolchain). Fixes: https://github.com/google/osv.dev/issues/1418 --- osv/ecosystems/_ecosystems.py | 5 +- osv/ecosystems/haskell.py | 153 +++++++++++++++++++++++++++++++++ osv/ecosystems/haskell_test.py | 48 +++++++++++ 3 files changed, 204 insertions(+), 2 deletions(-) create mode 100644 osv/ecosystems/haskell.py create mode 100644 osv/ecosystems/haskell_test.py diff --git a/osv/ecosystems/_ecosystems.py b/osv/ecosystems/_ecosystems.py index 6987b686444..43c7055ec67 100644 --- a/osv/ecosystems/_ecosystems.py +++ b/osv/ecosystems/_ecosystems.py @@ -16,6 +16,7 @@ from .helper_base import Ecosystem, OrderingUnsupportedEcosystem from .alpine import Alpine from .debian import Debian +from .haskell import Hackage, GHC from .maven import Maven from .nuget import NuGet from .packagist import Packagist @@ -42,8 +43,8 @@ 'GitHub Actions': OrderingUnsupportedEcosystem(), 'Linux': OrderingUnsupportedEcosystem(), 'OSS-Fuzz': OrderingUnsupportedEcosystem(), - 'Hackage': OrderingUnsupportedEcosystem(), - 'GHC': OrderingUnsupportedEcosystem(), + 'Hackage': Hackage(), + 'GHC': GHC(), # Ecosystems which require a release version for enumeration, which is # handled separately in get(). 'AlmaLinux': OrderingUnsupportedEcosystem(), diff --git a/osv/ecosystems/haskell.py b/osv/ecosystems/haskell.py new file mode 100644 index 00000000000..7500dacde39 --- /dev/null +++ b/osv/ecosystems/haskell.py @@ -0,0 +1,153 @@ +# Copyright 2021 Google LLC +# Copyright 2023 Fraser Tweedale +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Haskell ecosystem helpers. + +Contact the Haskell Security Response Team +if something is broken and you need help to fix it. + +""" + +import requests +import typing + +from . import config +from .helper_base import Ecosystem, EnumerateError +from .. import semver_index + + +class Hackage(Ecosystem): + """Hackage (Haskell package index) ecosystem.""" + + _API_PACKAGE_URL = 'https://hackage.haskell.org/package/{package}.json' + + def sort_key(self, version): + """Sort key. + + The Haskell package version data type is defined at + https://hackage.haskell.org/package/Cabal-syntax/docs/Distribution-Types-Version.html + + """ + return [int(x) for x in version.split('.')] + + def enumerate_versions(self, + package, + introduced, + fixed=None, + last_affected=None, + limits=None): + """Enumerate versions.""" + response = requests.get( + self._API_PACKAGE_URL.format(package=package), timeout=config.timeout) + if response.status_code == 404: + raise EnumerateError(f'Package {package} not found') + if response.status_code != 200: + raise RuntimeError( + f'Failed to get Hackage versions for {package} with: {response.text}') + + response = response.json() + versions = list(response.keys()) + + self.sort_versions(versions) + return self._get_affected_versions(versions, introduced, fixed, + last_affected, limits) + + +class GHC(Ecosystem): + """Glasgow Haskell Compiler (GHC) ecosystem.""" + + _API_PACKAGE_URL = ('https://gitlab.haskell.org' + '/api/v4/projects/3561/repository/tags?per_page=-1') + """ + Historical versions do not have tags in the Git repo, so we hardcode the + list. See https://github.com/google/osv.dev/pull/1463 for discussion. + """ + HISTORICAL_VERSIONS = [ + '0.29', + '2.10', + '3.02', '3.03', + '4.02', '4.04', '4.06', '4.08', '4.08.1', '4.08.2', + '5.00', '5.00.1', '5.00.2', '5.02', '5.02.1', '5.02.2', '5.02.3', + '5.04', '5.04.1', '5.04.2', '5.04.3', + '6.0', '6.0.1', + '6.2', '6.2.1', '6.2.2', + '6.4', '6.4.1', '6.4.2', '6.4.3', + '6.6', '6.6.1', + '6.8.1', '6.8.1', '6.8.3', + '6.10.1', '6.10.2-rc1', '6.10.2', '6.10.3', '6.10.4-rc1', '6.10.4', + '6.12.1-rc1', '6.12.1', '6.12.2-rc1', '6.12.2', '6.12.3-rc1', '6.12.3', + '7.0.1-rc1', '7.0.1-rc2', '7.0.1', '7.0.2-rc1', '7.0.2-rc2', '7.0.2', + '7.0.3', '7.0.4-rc1', '7.0.4', + ] # yapf: disable + + def sort_key(self, version): + """Sort key.""" + return semver_index.parse(version) + + @classmethod + def tag_to_version(cls, tag: str) -> typing.Optional[str]: + """Convert a tag to a release version, or return None if invalid. + + GHC release tags follow the scheme: + + - ghc-..-alpha + - ghc-..-rc + - ghc-..-release + + """ + parts = tag.split('-') + if len(parts) == 3 and parts[0] == 'ghc' \ + and cls.is_major_minor_patch(parts[1]): + if parts[2].startswith('alpha') or parts[2].startswith('rc'): + return '-'.join(parts[1:3]) + if parts[2] == 'release': + return parts[1] + return None + + @staticmethod + def is_major_minor_patch(s: str) -> bool: + """Check that string matches ``..``.""" + parts = s.split('.') + return len(parts) == 3 and all(x.isdigit() for x in parts) + + def enumerate_versions(self, + package, + introduced, + fixed=None, + last_affected=None, + limits=None): + """Enumerate versions. + + Different components of GHC are part of the same software release. + So we ignore the package (component) name. + + """ + + response = requests.get(self._API_PACKAGE_URL, timeout=config.timeout) + if response.status_code == 404: + raise EnumerateError('GHC tag list not found') + if response.status_code != 200: + raise RuntimeError(f'Failed to get GHC versions with: {response.text}') + + response = response.json() + versions = self.HISTORICAL_VERSIONS + [ + self.tag_to_version(x['name']) + for x in response + if self.tag_to_version(x['name']) + ] + + self.sort_versions(versions) + return self._get_affected_versions(versions, introduced, fixed, + last_affected, limits) diff --git a/osv/ecosystems/haskell_test.py b/osv/ecosystems/haskell_test.py new file mode 100644 index 00000000000..a4e68623bb3 --- /dev/null +++ b/osv/ecosystems/haskell_test.py @@ -0,0 +1,48 @@ +# Copyright 2021 Google LLC +# Copyright 2023 Fraser Tweedale +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Haskell ecosystem helper tests.""" + +import unittest + +from .. import ecosystems + + +class HackageEcosystemTest(unittest.TestCase): + """Hackage ecosystem helper tests.""" + + def test_next_version(self): + """Test next_version.""" + ecosystem = ecosystems.get('Hackage') + self.assertEqual('1.0.0.0', ecosystem.next_version('aeson', '0.11.3.0')) + self.assertEqual('1.0.1.0', ecosystem.next_version('aeson', '1.0.0.0')) + self.assertEqual('0.1.26.0', ecosystem.next_version('jose', '0')) + with self.assertRaises(ecosystems.EnumerateError): + ecosystem.next_version('doesnotexist123456', '1') + + +class GHCEcosystemTest(unittest.TestCase): + """GHC ecosystem helper tests.""" + + def test_next_version(self): + """Test next_version.""" + ecosystem = ecosystems.get('GHC') + self.assertEqual('0.29', ecosystem.next_version('GHC', '0')) + self.assertEqual('7.0.4', ecosystem.next_version('GHC', '7.0.4-rc1')) + # 7.0.4 is the last of the hardcoded versions + self.assertEqual('7.2.1', ecosystem.next_version('GHC', '7.0.4')) + + # The whole GHC ecosystem is versioned together. Enumeration ignores + # package/component name. Therefore this should NOT raise: + ecosystem.next_version('doesnotexist123456', '1')