Skip to content

Commit

Permalink
Support parsing metadata filenames for purls (#36)
Browse files Browse the repository at this point in the history
Supports parsing .dsc, copyright and changelog files,
typically present in the debian package/metadata archives
for name and version.

Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com>
  • Loading branch information
AyanSinhaMahapatra authored Jan 31, 2024
1 parent 0c20724 commit 1ec0b95
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 2 deletions.
25 changes: 23 additions & 2 deletions src/debian_inspector/package.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,8 @@ def from_filename(cls, filename):
return cls(
name=name,
version=version,
original_filename=filename)
original_filename=filename
)

def to_dict(self):
data = {}
Expand All @@ -110,16 +111,36 @@ def to_tuple(self):
return tuple(v for v in self.to_dict().values() if v != 'original_filename')


@attrs
class CodeMetadata(CodeArchive):
"""
A .dsc, copyright or changelog file present in the debian
package/metadata archive and contains package information
on the filename or as file contents.
For example in ./changelogs/main/d/diffutils/ there are
files such as:
- diffutils_3.7-5_copyright
- diffutils_3.7-5_changelog
And in .pool/main/b/base-files/ there are files such as:
- base-files_11.1+deb11u8.dsc
"""


def get_nva(filename):
"""
Return a tuple of (name string, Version object, archictecture string or
None) parsed from the `filename` of .deb, .udeb, .orig or .debian archive..
"""
is_known = False
if filename.endswith(('.deb', '.udeb')):
if filename.endswith(('.deb', '.udeb', '.dsc')):
basename, _extension = path.splitext(filename)
is_known = True

elif filename.endswith(('_changelog', '_copyright')):
basename, _, _ = filename.rpartition("_")
is_known = True

elif filename.endswith(('.tar.gz', '.tar.xz', '.tar.bz2', '.tar.lzma')):
# A Format: 3.0 archive.
# Note that we ignore the legacy .diff.gz files for Format: 1.0
Expand Down
18 changes: 18 additions & 0 deletions tests/test_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,24 @@ def test_CodeArchive_from_filename(self):
original_filename=fn)
assert debarch == expected

def test_CodeMetadata_from_filename_dsc(self):
fn = 'base-files_11.1+deb11u8.dsc'
debarch = package.CodeMetadata.from_filename(fn)
expected = package.CodeMetadata(
name='base-files',
version=version.Version(epoch=0, upstream='11.1+deb11u8', revision='0'),
original_filename=fn)
assert debarch == expected

def test_CodeMetadata_from_filename_copyright(self):
fn = 'bash_4.1-3+deb6u2_copyright'
debarch = package.CodeMetadata.from_filename(fn)
expected = package.CodeMetadata(
name='bash',
version=version.Version(epoch=0, upstream='4.1', revision='3+deb6u2'),
original_filename=fn)
assert debarch == expected

def test_CodeArchive_from_filename_supports_tar_gz_bz2_and_xz(self):
package.CodeArchive.from_filename('python2.7_2.7.3-0ubuntu3.4.orig.tar.gz')
package.CodeArchive.from_filename('python2.7_2.7.3-0ubuntu3.4.debian.tar.gz')
Expand Down

0 comments on commit 1ec0b95

Please sign in to comment.