Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bring File closer to CWL specification #26

Merged
merged 2 commits into from
Aug 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 54 additions & 9 deletions src/blue_cwl/core/cwl_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,20 @@
"""Types module."""

import enum
import hashlib
import os
from pathlib import Path
from typing import Literal

from entity_management.util import unquote_uri_path

from blue_cwl.core.common import CustomBaseModel
from blue_cwl.core.exceptions import CWLError

# according to the CWL spec only the first 64KB are loaded to contents
# https://www.commonwl.org/v1.0/CommandLineTool.html#File
FILE_BUFFER_SIZE = 64 * 1024

CWLType = Literal[
"null",
"boolean",
Expand All @@ -35,10 +43,11 @@ class CWLWorkflowType(enum.Enum):
STEP = enum.auto()


class _FileLike(CustomBaseModel):
class _Path(CustomBaseModel):
"""Path class."""

path: str | None = None
location: str | None = None
basename: str | None = None

def __init__(self, **data):
"""Initialize a FileLike object."""
Expand All @@ -53,19 +62,55 @@ def __init__(self, **data):
data["location"] = f"file://{os.path.abspath(path)}"

if location and not path:
data["path"] = str(location)[7:]

if data.get("basename", None) is None:
data["basename"] = os.path.basename(data["path"])
path = data["path"] = unquote_uri_path(location)

super().__init__(**data)

@property
def basename(self):
"""Return the base name of the file path."""
return Path(self.path).name

class File(_FileLike):
"""File class."""

class File(_Path):
"""File class."""

class Directory(_FileLike):
@property
def dirname(self):
"""Return the path to the directory containing the file."""
return str(Path(self.path).parent)

@property
def nameroot(self):
"""Return the base name without extension."""
return Path(self.path).stem

@property
def nameext(self):
"""Return the extension of the base name."""
return Path(self.path).suffix

@property
def contents(self):
"""Return first 64KB from file."""
return Path(self.path).open(buffering=FILE_BUFFER_SIZE, encoding="utf-8").read()

@property
def size(self):
"""Return the size of the file."""
return Path(self.path).stat().st_size

@property
def checksum(self):
"""Return the sha1 checksum of the file."""
with open(self.path, "rb") as f:
sha1 = hashlib.sha1() # noqa: S324
while chunk := f.read(FILE_BUFFER_SIZE):
sha1.update(chunk)
return sha1.hexdigest()


class Directory(_Path):
"""Directory class."""


Expand Down
62 changes: 51 additions & 11 deletions tests/unit/core/test_cwl_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,67 @@
import pytest
from blue_cwl.core.exceptions import CWLError
from blue_cwl.core import cwl_types as tested
from blue_cwl.utils import cwd


def test_File():
res = tested.File(path="/gpfs/foo.txt")
assert res.path == "/gpfs/foo.txt"
def test_File(tmp_path):
"""
File fields: https://www.commonwl.org/v1.0/CommandLineTool.html#File
"""

contents = "foo"

path = tmp_path / "foo.txt"
path.write_text(contents)

parent_dir = str(tmp_path)
path = str(path)
uri = f"file://{path}"
size = 3
checksum = "0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33"

res = tested.File(path=path)
assert res.path == path
assert res.basename == "foo.txt"
assert res.dirname == parent_dir
assert res.nameroot == "foo"
assert res.nameext == ".txt"
assert os.path.isabs(res.location[7:])
assert res.location == "file:///gpfs/foo.txt"
assert res.location == uri
assert res.size == size
assert res.contents == contents
assert res.checksum == checksum

res = tested.File(path="foo.txt")
assert res.path == "foo.txt"
assert os.path.isabs(res.location[7:])
assert res.location.endswith("foo.txt")
assert res.basename == "foo.txt"
with cwd(tmp_path):
res = tested.File(path="foo.txt")
assert res.path == "foo.txt"
assert res.basename == "foo.txt"
assert res.dirname == "."
assert res.nameroot == "foo"
assert res.nameext == ".txt"
assert os.path.isabs(res.location[7:])
assert res.location == uri
assert res.size == size
assert res.contents == contents
assert res.checksum == checksum

res = tested.File(location="file:///gpfs/foo.txt")
assert res.path == "/gpfs/foo.txt"
res = tested.File(location=uri)
assert res.path == path
assert res.basename == "foo.txt"
assert res.dirname == parent_dir
assert res.nameroot == "foo"
assert res.nameext == ".txt"
assert os.path.isabs(res.location[7:])
assert res.location == uri
assert res.size == size
assert res.contents == contents
assert res.checksum == checksum


def test_Directory():
"""
Directory fields: https://www.commonwl.org/v1.0/CommandLineTool.html#Directory
"""
res = tested.Directory(path="/gpfs/foo")
assert res.path == "/gpfs/foo"
assert res.basename == "foo"
Expand Down
Loading