Skip to content

Commit

Permalink
Validate actual file properties such as checksum
Browse files Browse the repository at this point in the history
A re-do of #146

Co-Authored-By: Michael R. Crusoe <michael.crusoe@gmail.com>
  • Loading branch information
tom-tan and mr-c committed May 27, 2023
1 parent 5cd65d5 commit 4c57396
Show file tree
Hide file tree
Showing 3 changed files with 236 additions and 27 deletions.
136 changes: 114 additions & 22 deletions cwltest/compare.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
"""Compare utilities for CWL objects."""

import hashlib
import json
from typing import Any, Dict, Optional, Set
import os.path
import urllib.parse
from typing import Any, Callable, Dict, Optional, Set


class CompareFail(Exception):
Expand All @@ -22,11 +25,11 @@ def format(


def _check_keys(
keys: Set[str], expected: Dict[str, Any], actual: Dict[str, Any]
keys: Set[str], expected: Dict[str, Any], actual: Dict[str, Any], skip_details: bool
) -> None:
for k in keys:
try:
compare(expected.get(k), actual.get(k))
compare(expected.get(k), actual.get(k), skip_details)
except CompareFail as e:
raise CompareFail.format(
expected, actual, f"field {k!r} failed comparison: {str(e)}"
Expand All @@ -48,10 +51,12 @@ def _compare_contents(expected: Dict[str, Any], actual: Dict[str, Any]) -> None:
)


def _compare_dict(expected: Dict[str, Any], actual: Dict[str, Any]) -> None:
def _compare_dict(
expected: Dict[str, Any], actual: Dict[str, Any], skip_details: bool
) -> None:
for c in expected:
try:
compare(expected[c], actual.get(c))
compare(expected[c], actual.get(c), skip_details)
except CompareFail as e:
raise CompareFail.format(
expected, actual, f"failed comparison for key {c!r}: {e}"
Expand All @@ -62,7 +67,9 @@ def _compare_dict(expected: Dict[str, Any], actual: Dict[str, Any]) -> None:
raise CompareFail.format(expected, actual, "unexpected key '%s'" % k)


def _compare_directory(expected: Dict[str, Any], actual: Dict[str, Any]) -> None:
def _compare_directory(
expected: Dict[str, Any], actual: Dict[str, Any], skip_details: bool
) -> None:
if actual.get("class") != "Directory":
raise CompareFail.format(
expected, actual, "expected object with a class 'Directory'"
Expand All @@ -75,7 +82,7 @@ def _compare_directory(expected: Dict[str, Any], actual: Dict[str, Any]) -> None
found = False
for j in actual["listing"]:
try:
compare(i, j)
compare(i, j, skip_details)
found = True
break
except CompareFail:
Expand All @@ -86,19 +93,32 @@ def _compare_directory(expected: Dict[str, Any], actual: Dict[str, Any]) -> None
actual,
"%s not found" % json.dumps(i, indent=4, sort_keys=True),
)
_compare_file(expected, actual)
_compare_file(expected, actual, skip_details)


def _compare_file(expected: Dict[str, Any], actual: Dict[str, Any]) -> None:
_compare_location(expected, actual)
def _compare_file(
expected: Dict[str, Any], actual: Dict[str, Any], skip_details: bool
) -> None:
_compare_location(expected, actual, skip_details)
if "contents" in expected:
_compare_contents(expected, actual)
other_keys = set(expected.keys()) - {"path", "location", "listing", "contents"}
_check_keys(other_keys, expected, actual)
_check_keys(other_keys, expected, actual)


def _compare_location(expected: Dict[str, Any], actual: Dict[str, Any]) -> None:
if actual.get("class") == "File" and not skip_details:
_compare_checksum(expected, actual)
_compare_size(expected, actual)
other_keys = set(expected.keys()) - {
"path",
"location",
"listing",
"contents",
"checksum",
"size",
}
_check_keys(other_keys, expected, actual, skip_details)


def _compare_location(
expected: Dict[str, Any], actual: Dict[str, Any], skip_details: bool
) -> None:
if "path" in expected:
comp = "path"
if "path" not in actual:
Expand All @@ -109,7 +129,19 @@ def _compare_location(expected: Dict[str, Any], actual: Dict[str, Any]) -> None:
return
if actual.get("class") == "Directory":
actual[comp] = actual[comp].rstrip("/")

exist_fun: Callable[[str], bool] = os.path.isdir
else:
exist_fun = os.path.isfile
if "path" in actual:
path = urllib.parse.urlparse(actual["path"]).path
else:
path = urllib.parse.urlparse(actual["location"]).path
if not exist_fun(path) and not skip_details:
raise CompareFail.format(
expected,
actual,
f"{actual[comp]} does not exist",
)
if expected[comp] != "Any" and (
not (
actual[comp].endswith("/" + expected[comp])
Expand All @@ -123,7 +155,67 @@ def _compare_location(expected: Dict[str, Any], actual: Dict[str, Any]) -> None:
)


def compare(expected: Any, actual: Any) -> None:
def _compare_checksum(expected: Dict[str, Any], actual: Dict[str, Any]) -> None:
if "path" in actual:
path = urllib.parse.urlparse(actual["path"]).path
else:
path = urllib.parse.urlparse(actual["location"]).path
checksum = hashlib.sha1() # nosec
with open(path, "rb") as f:
contents = f.read(1024 * 1024)
while contents != b"":
checksum.update(contents)
contents = f.read(1024 * 1024)
actual_checksum_on_disk = f"sha1${checksum.hexdigest()}"
if "checksum" in actual:
actual_checksum_declared = actual["checksum"]
if actual_checksum_on_disk != actual_checksum_declared:
raise CompareFail.format(
expected,
actual,
"Output file checksums do not match: actual "
f"{actual_checksum_on_disk!r} on disk is not equal to actual "
f"{actual_checksum_declared!r} in the output object",
)
if "checksum" in expected:
expected_checksum = expected["checksum"]
if expected_checksum != actual_checksum_on_disk:
raise CompareFail.format(
expected,
actual,
"Output file checksums do not match: actual "
f"{actual_checksum_on_disk!r} is not equal to expected {expected_checksum!r}",
)


def _compare_size(expected: Dict[str, Any], actual: Dict[str, Any]) -> None:
if "path" in actual:
path = urllib.parse.urlparse(actual["path"]).path
else:
path = urllib.parse.urlparse(actual["location"]).path
actual_size_on_disk = os.path.getsize(path)
if "size" in actual:
actual_size_declared = actual["size"]
if actual_size_on_disk != actual_size_declared:
raise CompareFail.format(
expected,
actual,
"Output file sizes do not match: actual "
f"{actual_size_on_disk!r} on disk is not equal to actual "
f"{actual_size_declared!r}' in the output object",
)
if "size" in expected:
expected_size = expected["size"]
if expected_size != actual_size_on_disk:
raise CompareFail.format(
expected,
actual,
"Output file sizes do not match: actual "
f"{actual_size_on_disk!r} is not equal to expected {expected_size!r}",
)


def compare(expected: Any, actual: Any, skip_details: bool = False) -> None:
"""Compare two CWL objects."""
if expected == "Any":
return
Expand All @@ -136,11 +228,11 @@ def compare(expected: Any, actual: Any) -> None:
raise CompareFail.format(expected, actual)

if expected.get("class") == "File":
_compare_file(expected, actual)
_compare_file(expected, actual, skip_details)
elif expected.get("class") == "Directory":
_compare_directory(expected, actual)
_compare_directory(expected, actual, skip_details)
else:
_compare_dict(expected, actual)
_compare_dict(expected, actual, skip_details)

elif isinstance(expected, list):
if not isinstance(actual, list):
Expand All @@ -150,7 +242,7 @@ def compare(expected: Any, actual: Any) -> None:
raise CompareFail.format(expected, actual, "lengths don't match")
for c in range(0, len(expected)):
try:
compare(expected[c], actual[c])
compare(expected[c], actual[c], skip_details)
except CompareFail as e:
raise CompareFail.format(expected, actual, e) from e
else:
Expand Down
123 changes: 120 additions & 3 deletions tests/test_compare.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import os
from pathlib import Path
from typing import Any, Dict

import pytest
from cwltest.compare import CompareFail, compare
from cwltest.compare import CompareFail, _compare_directory, _compare_file, compare

from .util import get_data

Expand Down Expand Up @@ -37,6 +39,7 @@ def test_compare_contents_success() -> None:
"size": 2,
"class": "File",
"contents": "2\n",
"checksum": "sha1$7448d8798a4380162d4b56f9b452e2f6f9e24e7a",
}
actual = {
"basename": "cores.txt",
Expand All @@ -49,6 +52,120 @@ def test_compare_contents_success() -> None:
compare(expected, actual)


def test_compare_contents_not_exist() -> None:
expected = {
"location": "cores.txt",
"class": "File",
}
actual = {
"basename": "cores.txt",
"class": "File",
"location": "file:///var/folders/8x/2df05_7j20j6r8y81w4qf43r0000gn/T/tmpG0EkrS/cores.txt",
"path": "/none/exist/path/to/cores.txt",
"size": 2,
}
with pytest.raises(CompareFail):
_compare_file(expected, actual, False)


def test_compare_file_different_size(tmp_path: Path) -> None:
expected = {
"location": "cores.txt",
"size": 2,
"class": "File",
}

path = tmp_path / "cores.txt"
with open(path, "w") as f:
f.write("hello")

actual = {
"basename": "cores.txt",
"class": "File",
"location": str(path),
}
with pytest.raises(CompareFail):
_compare_file(expected, actual, False)


def test_compare_file_different_checksum(tmp_path: Path) -> None:
expected = {
"location": "cores.txt",
"class": "File",
"checksum": "sha1$7448d8798a4380162d4b56f9b452e2f6f9e24e7a",
}

path = tmp_path / "cores.txt"
with open(path, "w") as f:
f.write("hello")

actual = {
"basename": "cores.txt",
"class": "File",
"location": str(path),
}
with pytest.raises(CompareFail):
_compare_file(expected, actual, False)


def test_compare_file_inconsistent_size(tmp_path: Path) -> None:
expected = {
"location": "cores.txt",
"class": "File",
}

path = tmp_path / "cores.txt"
with open(path, "w") as f:
f.write("hello")

actual = {
"basename": "cores.txt",
"class": "File",
"location": str(path),
"size": 65535,
}
with pytest.raises(CompareFail):
_compare_file(expected, actual, False)


def test_compare_file_inconsistent_checksum(tmp_path: Path) -> None:
expected = {
"location": "cores.txt",
"class": "File",
}

path = tmp_path / "cores.txt"
with open(path, "w") as f:
f.write("hello")

actual = {
"basename": "cores.txt",
"checksum": "inconsistent-checksum",
"class": "File",
"location": str(path),
}
with pytest.raises(CompareFail):
_compare_file(expected, actual, False)


def test_compare_directory(tmp_path: Path) -> None:
expected = {
"location": "dir",
"class": "Directory",
"listing": [],
}

path = tmp_path / "dir"
os.makedirs(path)

actual = {
"class": "Directory",
"location": str(path),
"listing": [],
}
_compare_directory(expected, actual, False)


def test_compare_directory_success() -> None:
expected = {
"stuff": {
Expand Down Expand Up @@ -100,7 +217,7 @@ def test_compare_directory_success() -> None:
],
}
}
compare(expected, actual)
compare(expected, actual, skip_details=True)


def test_compare_directory_failure_different_listing() -> None:
Expand Down Expand Up @@ -270,7 +387,7 @@ def test_compare_file_success() -> None:
"path": "/var/folders/8x/2df05_7j20j6r8y81w4qf43r0000gn/T/tmpG0EkrS/cores.txt",
"size": 2,
}
compare(expected, actual)
compare(expected, actual, skip_details=True)


def test_compare_list_failure_missing() -> None:
Expand Down
4 changes: 2 additions & 2 deletions tox.ini
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tox]
envlist =
py{36,37,38,39,310,311}-lint,
py{37,38,39,310,311}-lint,
py{36,37,38,39,310,311}-unit,
py{36,37,38,39,310,311}-bandit,
py{37,38,39,310,311}-mypy,
Expand Down Expand Up @@ -58,7 +58,7 @@ commands =
py{36,37,38,39,310,311}-unit: python -m pip install -U pip setuptools wheel
py{36,37,38,39,310,311}-unit: python -m pytest --cov --cov-config={toxinidir}/.coveragerc --cov-append {posargs}
py{36,37,38,39,310,311}-unit: coverage xml
py{36,37,38,39,310,311}-bandit: bandit --recursive cwltest --exclude tests/*
py{36,37,38,39,310,311}-bandit: bandit --recursive cwltest
py{36,37,38,39,310,311}-lint: make flake8
py{36,37,38,39,310,311}-lint: make format-check
py{37,38,39,310,311}-mypy: make mypy
Expand Down

0 comments on commit 4c57396

Please sign in to comment.