Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Detection of known CNS errors when failed run #1018

Merged
merged 30 commits into from
Sep 19, 2024
Merged
Show file tree
Hide file tree
Changes from 29 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
3966589
function and exception for known cns error
VGPReys Sep 11, 2024
7defcb0
slight optimization
VGPReys Sep 11, 2024
5739a57
adding error detection to base module
VGPReys Sep 11, 2024
bfbbeb6
adding tests
VGPReys Sep 11, 2024
5d8250b
uni/integration tests
VGPReys Sep 11, 2024
182380c
lints
VGPReys Sep 11, 2024
4bcaf8d
check undetected CNS errors
VGPReys Sep 11, 2024
824afad
Update src/haddock/gear/known_cns_errors.py
VGPReys Sep 12, 2024
436f2e7
Update src/haddock/gear/known_cns_errors.py
VGPReys Sep 12, 2024
16c299e
Merge branch 'main' into known-cns-errors
VGPReys Sep 12, 2024
d81f1f2
fix typo in symmultimer
VGPReys Sep 12, 2024
5b4adef
traceback files containing error
VGPReys Sep 12, 2024
0c8e202
adding hint
VGPReys Sep 12, 2024
5eae673
commenting list of files
VGPReys Sep 12, 2024
aa7abf7
all handle of .out.gz file parsing too
VGPReys Sep 12, 2024
130fff4
fix types
VGPReys Sep 12, 2024
c42ad04
Add comment
VGPReys Sep 12, 2024
b530831
Merge branch 'main' into known-cns-errors
VGPReys Sep 16, 2024
b5d5fd8
uni/integration tests
VGPReys Sep 16, 2024
9a59a9d
update cns modules to send a error filename
VGPReys Sep 16, 2024
659675d
fix variable name in topoaa
VGPReys Sep 16, 2024
78cf343
remove removal of .err files
VGPReys Sep 16, 2024
678e4d0
fix file ext to match .err
VGPReys Sep 16, 2024
376c83d
Merge branch 'main' into known-cns-errors
VGPReys Sep 17, 2024
250fa8f
Merge branch 'main' into known-cns-errors
VGPReys Sep 17, 2024
e058971
modify file extension to .cnserr
VGPReys Sep 17, 2024
4962179
update integration tests with new file extension
VGPReys Sep 17, 2024
cef0a6f
Update src/haddock/gear/clean_steps.py
VGPReys Sep 18, 2024
43e97e8
Merge branch 'main' into known-cns-errors
VGPReys Sep 18, 2024
5e11238
addressing reviews
VGPReys Sep 18, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 74 additions & 5 deletions integration_tests/test_cnsjob.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
import pytest
import pytest_mock # noqa : F401
import random
import tempfile
from haddock.libs.libsubprocess import CNSJob

from pathlib import Path
from typing import Generator

from haddock.gear.known_cns_errors import KNOWN_ERRORS
from haddock.libs.libsubprocess import CNSJob

from integration_tests import GOLDEN_DATA, CNS_EXEC



@pytest.fixture
def cns_output_pdb_filename() -> Generator[str, None, None]:
with tempfile.NamedTemporaryFile(suffix=".pdb", delete=False) as output_f:
Expand All @@ -19,20 +25,34 @@ def cns_output_filename() -> Generator[str, None, None]:
yield out_f.name


@pytest.fixture(name="cns_error_filename")
def fixture_cns_error_filename() -> Generator[str, None, None]:
with tempfile.NamedTemporaryFile(suffix=".cnserr", delete=False) as err_f:
yield err_f.name


@pytest.fixture
def cnsjob(cns_input_filename, cns_output_filename):
return CNSJob(
def cnsjob(
cns_input_filename,
cns_output_filename,
cns_error_filename,
) -> Generator[CNSJob, None, None]:
yield CNSJob(
input_file=Path(cns_input_filename),
output_file=Path(cns_output_filename),
error_file=Path(cns_error_filename),
cns_exec=CNS_EXEC,
)


@pytest.fixture
def cnsjob_no_files(cns_inp_str):

def cnsjob_no_files(
cns_inp_str,
cns_error_filename,
) -> Generator[CNSJob, None, None]:
yield CNSJob(
input_file=cns_inp_str,
error_file=Path(cns_error_filename),
cns_exec=CNS_EXEC,
)

Expand Down Expand Up @@ -100,6 +120,7 @@ def test_cnsjob_run_compress_out(cnsjob, cns_output_filename, cns_output_pdb_fil
cnsjob.run(
compress_inp=False,
compress_out=True,
compress_err=False,
compress_seed=False,
)

Expand All @@ -110,10 +131,57 @@ def test_cnsjob_run_compress_out(cnsjob, cns_output_filename, cns_output_pdb_fil
assert Path(cns_output_pdb_filename).stat().st_size > 0


def test_cnsjob_run_uncompressed_err(
mocker,
cnsjob,
cns_error_filename,
):
"""Test uncompressed error file."""
# Mock generation of an error in STDOUT
random_error = random.choice(list(KNOWN_ERRORS.keys()))
mocker.patch(
"haddock.libs.libsubprocess.subprocess.Popen.communicate",
return_value=(bytes(random_error, encoding="utf-8"), b""),
)
cnsjob.run(
compress_inp=False,
compress_out=False,
compress_err=False,
compress_seed=False,
)
# Check that error file was created
assert Path(f"{cns_error_filename}").exists()
assert Path(f"{cns_error_filename}").stat().st_size > 0


def test_cnsjob_run_compress_err(
mocker,
cnsjob,
cns_error_filename,
):
"""Test compressed error file."""
# Mock generation of an error in STDOUT
random_error = random.choice(list(KNOWN_ERRORS.keys()))
mocker.patch(
"haddock.libs.libsubprocess.subprocess.Popen.communicate",
return_value=(bytes(random_error, encoding="utf-8"), b""),
)
cnsjob.run(
compress_inp=False,
compress_out=False,
compress_err=True,
compress_seed=False,
)
# Check that error file was created and compressed !
assert Path(f"{cns_error_filename}.gz").exists()
assert Path(f"{cns_error_filename}.gz").stat().st_size > 0


def test_cnsjob_compress_seed(cnsjob, cns_output_pdb_filename, cns_seed_filename):
cnsjob.run(
compress_inp=False,
compress_out=False,
compress_err=False,
compress_seed=True,
)

Expand All @@ -130,6 +198,7 @@ def test_cnsjob_nofiles(cnsjob_no_files, cns_output_pdb_filename):
cnsjob_no_files.run(
compress_inp=False,
compress_out=False,
compress_err=False,
compress_seed=False,
)

Expand Down
125 changes: 125 additions & 0 deletions integration_tests/test_knownCNSerrors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
"""Integration tests related to haddock.gear.known_cns_errors.py."""

import gzip
import pytest
import tempfile
import random

from os import linesep
from pathlib import Path
from string import ascii_letters

from haddock.gear.known_cns_errors import KNOWN_ERRORS
from haddock.libs.libontology import PDBFile
from haddock.modules.sampling.rigidbody import (
DEFAULT_CONFIG as DEFAULT_RIGIDBODY_CONFIG,
HaddockModule as RigidbodyModule
)


@pytest.fixture
def gen_random_text():
"""Generate some random text."""
textline = "".join([random.choice(ascii_letters) for _ in range(80)])
text = ""
for _ in range(500):
text += f"{textline}{linesep}"
yield text


@pytest.fixture
def gen_fake_cns_errors(gen_random_text):
"""Generate directory full of CNS.cnserr file with errors."""
with tempfile.TemporaryDirectory("moduleoutputs") as tmp:
for i, error in enumerate(KNOWN_ERRORS.keys()):
# Generate an error string in the middle of the file
error_text = gen_random_text + error + gen_random_text
# Create two files with same error
for j in range(1, 3):
errored_filepath = Path(tmp, f"with_error_cns_{i}_{j}.cnserr")
# Write error in a file
errored_filepath.write_text(error_text)
# Create two compressed files with same error
for j in range(1, 3):
errored_gz_file = Path(tmp, f"with_error_cns_{i}_{j}.cnserr.gz")
# Write error in a file
with gzip.open(errored_gz_file, mode="wb") as gout:
gout.write(bytes(error_text, encoding="utf-8"))
yield tmp


@pytest.fixture
def rigidbody_module_with_cns_errors(gen_fake_cns_errors):
"""Generate a failed rigidbody module with CNS errors."""
rigidbody = RigidbodyModule(
order=1,
path=Path(gen_fake_cns_errors),
initial_params=DEFAULT_RIGIDBODY_CONFIG,
)
# Generate 9 filepath that were not created
rigidbody.output_models = [
PDBFile(Path(gen_fake_cns_errors, f"not_generated_output_{i}.pdb"))
for i in range(1, 10)
]
yield rigidbody


@pytest.fixture
def rigidbody_module_without_cns_errors():
"""Generate a failed rigidbody module without CNS errors."""
with tempfile.TemporaryDirectory("moduleoutputs") as tmp:
rigidbody = RigidbodyModule(
order=1,
path=Path(tmp),
initial_params=DEFAULT_RIGIDBODY_CONFIG,
)
# Generate 9 filepath that were not created
rigidbody.output_models = [
PDBFile(Path(tmp, f"not_generated_output_{i}.pdb"))
for i in range(1, 10)
]
yield rigidbody


class MockPreviousIO:
"""Mock proviousIO function."""

def __init__(self, path):
self.path = path
self.output = []


def test_detection_when_faulty(rigidbody_module_with_cns_errors):
"""Test failure of run and detection of CNS errors."""
rigidbody_module_with_cns_errors.previous_io = MockPreviousIO(
rigidbody_module_with_cns_errors.path
)
# Check that the run will fail
with pytest.raises(RuntimeError) as error_info:
rigidbody_module_with_cns_errors.export_io_models()
# Get final error string
string_error = str(error_info.value)
# Loop over known errors
for cns_error_string, user_hint in KNOWN_ERRORS.items():
# Check it was detected
assert cns_error_string in string_error
# Check user hint is present in error message
assert user_hint in string_error


def test_undetected_when_faulty(rigidbody_module_without_cns_errors):
"""Test failure of run and undetection of CNS errors."""
rigidbody_module_without_cns_errors.previous_io = MockPreviousIO(
rigidbody_module_without_cns_errors.path
)
# Check that the run will fail
with pytest.raises(RuntimeError) as error_info:
rigidbody_module_without_cns_errors.export_io_models()
# Get final error string
string_error = str(error_info.value)
# Loop over known errors
for cns_error_string, user_hint in KNOWN_ERRORS.items():
# Check it was NOT detected
assert cns_error_string not in string_error
# Check user hint NOT is present in error message
assert user_hint not in string_error
19 changes: 19 additions & 0 deletions src/haddock/core/exceptions.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
"""HADDOCK library custom errors."""

from os import linesep

from haddock.core.typing import FilePath

class HaddockError(Exception):
"""Error in HADDOCK3."""
Expand Down Expand Up @@ -36,6 +39,22 @@ class CNSRunningError(HaddockError):

pass

class KnownCNSError(CNSRunningError):
VGPReys marked this conversation as resolved.
Show resolved Hide resolved
"""Detected CNS output error."""

def __init__(self, cns_message: str, hint: str, filepath: FilePath):
self.cns_error = cns_message
self.hint = hint
self.filepath = filepath

def __str__(self) -> str:
"""Generate custom string representation of this exception."""
full_msg = (
f"A CNS error occured: `{self.cns_error}`.{linesep}"
f"Here is a hint on how to solve it:{linesep}{self.hint}"
)
return full_msg


class HaddockModuleError(HaddockError):
"""General error in a HADDOCK3 module."""
Expand Down
38 changes: 20 additions & 18 deletions src/haddock/gear/clean_steps.py
VGPReys marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import gzip
import shutil
import tarfile

from functools import partial
from multiprocessing import Pool
from pathlib import Path
Expand Down Expand Up @@ -61,12 +62,12 @@ def clean_output(path: FilePath, ncores: int = 1) -> None:
# Files to delete
# deletes all except the first one
files_to_delete = [
'.inp',
'.inp.gz',
'.out',
'.out.gz',
'.job',
'.err',
VGPReys marked this conversation as resolved.
Show resolved Hide resolved
".inp",
".inp.gz",
".out",
".out.gz",
".job",
VGPReys marked this conversation as resolved.
Show resolved Hide resolved
".err",
]

for extension in files_to_delete:
Expand All @@ -76,9 +77,9 @@ def clean_output(path: FilePath, ncores: int = 1) -> None:

# files to archive (all files in single .gz)
files_to_archive = [
'.seed',
'.seed.gz',
'.con',
".seed",
".seed.gz",
".con",
]

archive_ready = partial(_archive_and_remove_files, path=path)
Expand All @@ -90,10 +91,11 @@ def clean_output(path: FilePath, ncores: int = 1) -> None:

# files to compress in .gz
files_to_compress = [
'.inp',
'.out',
'.pdb',
'.psf',
".inp",
".out",
".pdb",
".psf",
".cnserr",
]

for ftc in files_to_compress:
Expand Down Expand Up @@ -132,9 +134,9 @@ def unpack_compressed_and_archived_files(folders: Iterable[FilePathT],
UNPACK_FOLDERS.clear()

files_to_decompress = [
'.pdb.gz',
'.psf.gz',
'.seed.gz',
".pdb.gz",
".psf.gz",
".seed.gz",
]

for folder in folders:
Expand Down Expand Up @@ -170,8 +172,8 @@ def unpack_compressed_and_archived_files(folders: Iterable[FilePathT],
def _unpack_gz(gz_file: Path) -> None:
out_file = Path(gz_file.parent, gz_file.stem)

with gzip.open(gz_file, 'rb') as fin, \
open(out_file, 'wb') as fout:
with gzip.open(gz_file, "rb") as fin, \
open(out_file, "wb") as fout:
shutil.copyfileobj(fin, fout, 2 * 10**8)

gz_file.unlink()
Expand Down
Loading
Loading