Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Detection of known CNS errors when failed run #1018

Merged
merged 30 commits into from
Sep 19, 2024
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
3966589
function and exception for known cns error
VGPReys Sep 11, 2024
7defcb0
slight optimization
VGPReys Sep 11, 2024
5739a57
adding error detection to base module
VGPReys Sep 11, 2024
bfbbeb6
adding tests
VGPReys Sep 11, 2024
5d8250b
uni/integration tests
VGPReys Sep 11, 2024
182380c
lints
VGPReys Sep 11, 2024
4bcaf8d
check undetected CNS errors
VGPReys Sep 11, 2024
824afad
Update src/haddock/gear/known_cns_errors.py
VGPReys Sep 12, 2024
436f2e7
Update src/haddock/gear/known_cns_errors.py
VGPReys Sep 12, 2024
16c299e
Merge branch 'main' into known-cns-errors
VGPReys Sep 12, 2024
d81f1f2
fix typo in symmultimer
VGPReys Sep 12, 2024
5b4adef
traceback files containing error
VGPReys Sep 12, 2024
0c8e202
adding hint
VGPReys Sep 12, 2024
5eae673
commenting list of files
VGPReys Sep 12, 2024
aa7abf7
all handle of .out.gz file parsing too
VGPReys Sep 12, 2024
130fff4
fix types
VGPReys Sep 12, 2024
c42ad04
Add comment
VGPReys Sep 12, 2024
b530831
Merge branch 'main' into known-cns-errors
VGPReys Sep 16, 2024
b5d5fd8
uni/integration tests
VGPReys Sep 16, 2024
9a59a9d
update cns modules to send a error filename
VGPReys Sep 16, 2024
659675d
fix variable name in topoaa
VGPReys Sep 16, 2024
78cf343
remove removal of .err files
VGPReys Sep 16, 2024
678e4d0
fix file ext to match .err
VGPReys Sep 16, 2024
376c83d
Merge branch 'main' into known-cns-errors
VGPReys Sep 17, 2024
250fa8f
Merge branch 'main' into known-cns-errors
VGPReys Sep 17, 2024
e058971
modify file extension to .cnserr
VGPReys Sep 17, 2024
4962179
update integration tests with new file extension
VGPReys Sep 17, 2024
cef0a6f
Update src/haddock/gear/clean_steps.py
VGPReys Sep 18, 2024
43e97e8
Merge branch 'main' into known-cns-errors
VGPReys Sep 18, 2024
5e11238
addressing reviews
VGPReys Sep 18, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 118 additions & 0 deletions integration_tests/test_knownCNSerrors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
"""Integration tests related to haddock.gear.known_cns_errors.py."""

import pytest
import tempfile
import random

from os import linesep
from pathlib import Path
from string import ascii_letters

from haddock.gear.known_cns_errors import KNOWN_ERRORS
from haddock.libs.libontology import PDBFile
from haddock.modules.sampling.rigidbody import (
DEFAULT_CONFIG as DEFAULT_RIGIDBODY_CONFIG,
HaddockModule as RigidbodyModule
)


@pytest.fixture
def gen_random_text():
"""Generate some random text."""
textline = "".join([random.choice(ascii_letters) for _ in range(80)])
text = ""
for _ in range(500):
text += f"{textline}{linesep}"
yield text


@pytest.fixture
def gen_fake_cns_errors(gen_random_text):
"""Generate directory full of CNS.out file with errors."""
with tempfile.TemporaryDirectory("moduleoutputs") as tmp:
for i, error in enumerate(KNOWN_ERRORS.keys()):
# Generate an error string in the middle of the file
error_text = gen_random_text + error + gen_random_text
# Create two files with same error
for j in range(1, 3):
errored_filepath = Path(tmp, f"with_error_cns_{i}_{j}.out")
# Write error in a file
errored_filepath.write_text(error_text)
yield tmp


@pytest.fixture
def rigidbody_module_with_cns_errors(gen_fake_cns_errors):
"""Generate a failed rigidbody module with CNS errors."""
rigidbody = RigidbodyModule(
order=1,
path=Path(gen_fake_cns_errors),
initial_params=DEFAULT_RIGIDBODY_CONFIG,
)
# Generate 9 filepath that were not created
rigidbody.output_models = [
PDBFile(Path(gen_fake_cns_errors, f"not_generated_output_{i}.pdb"))
for i in range(1, 10)
]
yield rigidbody


@pytest.fixture
def rigidbody_module_without_cns_errors():
"""Generate a failed rigidbody module without CNS errors."""
with tempfile.TemporaryDirectory("moduleoutputs") as tmp:
rigidbody = RigidbodyModule(
order=1,
path=Path(tmp),
initial_params=DEFAULT_RIGIDBODY_CONFIG,
)
# Generate 9 filepath that were not created
rigidbody.output_models = [
PDBFile(Path(tmp, f"not_generated_output_{i}.pdb"))
for i in range(1, 10)
]
yield rigidbody


class MockPreviousIO:
"""Mock proviousIO function."""

def __init__(self, path):
self.path = path
self.output = []


def test_detection_when_faulty(rigidbody_module_with_cns_errors):
"""Test failure of run and detection of CNS errors."""
rigidbody_module_with_cns_errors.previous_io = MockPreviousIO(
rigidbody_module_with_cns_errors.path
)
# Check that the run will fail
with pytest.raises(RuntimeError) as error_info:
rigidbody_module_with_cns_errors.export_io_models()
# Get final error string
string_error = str(error_info.value)
# Loop over known errors
for cns_error_string, user_hint in KNOWN_ERRORS.items():
# Check it was detected
assert cns_error_string in string_error
# Check user hint is present in error message
assert user_hint in string_error


def test_undetected_when_faulty(rigidbody_module_without_cns_errors):
"""Test failure of run and undetection of CNS errors."""
rigidbody_module_without_cns_errors.previous_io = MockPreviousIO(
rigidbody_module_without_cns_errors.path
)
# Check that the run will fail
with pytest.raises(RuntimeError) as error_info:
rigidbody_module_without_cns_errors.export_io_models()
# Get final error string
string_error = str(error_info.value)
# Loop over known errors
for cns_error_string, user_hint in KNOWN_ERRORS.items():
# Check it was NOT detected
assert cns_error_string not in string_error
# Check user hint NOT is present in error message
assert user_hint not in string_error
19 changes: 19 additions & 0 deletions src/haddock/core/exceptions.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
"""HADDOCK library custom errors."""

from os import linesep

from haddock.core.typing import FilePath

class HaddockError(Exception):
"""Error in HADDOCK3."""
Expand Down Expand Up @@ -36,6 +39,22 @@ class CNSRunningError(HaddockError):

pass

class KnownCNSError(CNSRunningError):
VGPReys marked this conversation as resolved.
Show resolved Hide resolved
"""Detected CNS output error."""

def __init__(self, cns_message: str, hint: str, filepath: FilePath):
self.cns_error = cns_message
self.hint = hint
self.filepath = filepath

def __str__(self) -> str:
"""Generate custom string representation of this exception."""
full_msg = (
f"A CNS error occured: `{self.cns_error}`.{linesep}"
f"Here is a hint on how to solve it:{linesep}{self.hint}"
)
return full_msg


class HaddockModuleError(HaddockError):
"""General error in a HADDOCK3 module."""
Expand Down
158 changes: 158 additions & 0 deletions src/haddock/gear/known_cns_errors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
"""Detect known/common CNS errors.

Inspired from:
https://github.com/haddocking/haddock25/blob/main/tools/check-error-messages.sh
"""

from pathlib import Path

from haddock.core.exceptions import KnownCNSError
from haddock.core.typing import FilePath, Optional, Union

# Dictionary of known errors
# as key: How to catch it in the cns.out
# as value: Message to user
KNOWN_ERRORS = {
"CHAIN LENGTH FOR SYMMETRY RESTRAINTS DOES NOT MATCH": (
"Mismatch between chain length for symmetry restraints. "
"Check your input molecules and symmetry restraints."
),
"NCS-restraints error encountered: Improperly defined non-crystallographic symmetry": ( # noqa : E501
"Improperly defined non-crystallographic symmetry (NCS). "
"Check your symmetry restraints definition."
),
"error in SYMMETRY potential, check NOE table": (
"Check your symmetry restraints definition."
),
"exceeded allocation for NOE-restraints": (
"Too many distance restraints defined. "
"Try to reduce this number by checking your definition of active "
"and passive residues. "
"Make sure to filter those for solvent accessibility."
VGPReys marked this conversation as resolved.
Show resolved Hide resolved
),
"SELRPN error encountered: parsing error": (
"Check your restraint files."
),
"PARSER error encountered: Encountered too many parsing errors": (
"Encountered too many parsing errors."
VGPReys marked this conversation as resolved.
Show resolved Hide resolved
),
"XMREAD error encountered: sectioning of map incompatible with resolution": ( # noqa : E501
VGPReys marked this conversation as resolved.
Show resolved Hide resolved
"Check your EM map resolution and sectioning."
),
"ALLHP error encountered: not enough memory available": (
"Too many distance restraints defined. "
"Try to reduce this number by checking your definition of active and "
"passive residues. "
"Make sure to filter those for solvent accessibility. "
"Try to decrease the size of your system where possible."
),
"error encountered: missing SCATter definition for SELEcted atoms": (
"Unsupported atoms/molecules for cryo-EM restraints."
),
"ROTMAT error encountered: rotation vector has zero length": (
"Check your input parameters and restraints. "
"Possibly try turning off the sampling of 180 degrees rotation."
)
}


def find_cns_errors(cns_out_fpath: FilePath) -> Optional[KnownCNSError]:
"""Detect if a known CNS error is in a cns.out file.

Parameters
----------
cns_out_fpath : FilePath -> Union[str, Path]
Path to the cns.out file to check.

Returns
-------
Optional[KnownCNSError]
An exception for known CNS errors, with its hint on how to solve it!
"""
try:
_find_cns_errors(cns_out_fpath, KNOWN_ERRORS)
except KnownCNSError as err:
return err
else:
return None


def _find_cns_errors(
cns_out_fpath: FilePath,
known_errors: dict[str, str],
chunk_size: int = 4096,
) -> None:
"""Backward reading and detect first known CNS error in file.

Parameters
----------
cns_out_fpath : FilePath -> Union[str, Path]
Path to the cns.out file to check.
known_errors : dict[str, str]
Dict of known errors and their hints
chunk_size : int, optional
Check size (in bytes) to read the file backwards, by default 4096

Raises
------
KnownCNSError
An exception for known CNS errors, with its hint on how to solve it!
"""
# Read file
with open(cns_out_fpath, 'rb') as file:
# Find file size
file.seek(0, 2)
size = file.tell()
buffer = b''
parsed_lines = 9999
for i in range(size - 1, -1, -chunk_size):
# Go to location in file
file.seek(max(i - chunk_size, 0))
# Read next chunk
chunk = file.read(min(chunk_size, i + 1))
# Increment buffer
buffer = chunk + buffer
lines = buffer.split(b'\n')
# Read lines
for line in reversed(lines[-len(lines):parsed_lines]):
decoded_line = line.decode('utf-8', errors='replace')
# Loop over known errors
for error_string, hint in known_errors.items():
# Check if this error is known
if error_string in decoded_line:
# return the cause
raise KnownCNSError(
error_string,
hint,
cns_out_fpath,
)
# Update number of parsed lines so we do not check them again
parsed_lines = -len(lines)


def find_all_cns_errors(
directory_path: FilePath,
) -> dict[str, dict[str, Union[int, KnownCNSError]]]:
"""Find all errors in a directory.

Parameters
----------
directory_path : FilePath
Path to the directory to be checked

Returns
-------
all_errors : dict[str, dict[str, Union[int, KnownCNSError]]]
_description_
"""
all_errors: dict[str, dict[str, Union[int, KnownCNSError]]] = {}
# Loop over all .out files
for fpath in Path(directory_path).glob("*.out"):
# Try to dectect an error
if (detected_error := find_cns_errors(fpath)):
error_type = all_errors.setdefault(
detected_error.cns_error,
{"files": [], "error": detected_error}
)
error_type["files"].append(detected_error.filepath)
return all_errors
13 changes: 13 additions & 0 deletions src/haddock/modules/__init__.py
VGPReys marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
"""HADDOCK3 modules."""

import re

from abc import ABC, abstractmethod
from contextlib import contextmanager, suppress
from copy import deepcopy
from functools import partial
from os import linesep
from pathlib import Path

from haddock import EmptyPath, log, modules_defaults_path
Expand All @@ -22,6 +24,7 @@
)
from haddock.gear import config
from haddock.gear.clean_steps import clean_output
from haddock.gear.known_cns_errors import find_all_cns_errors
from haddock.gear.parameters import config_mandatory_general_parameters
from haddock.gear.yaml2cfg import read_from_yaml_config, find_incompatible_parameters
from haddock.libs.libhpc import HPCScheduler
Expand Down Expand Up @@ -297,6 +300,16 @@ def export_io_models(self, faulty_tolerance=0):
f"{faulty:.2f}% of output was not generated for this module "
f"and tolerance was set to {faulty_tolerance:.2f}%."
)
# Try to detect CNS errors
if detected_errors := find_all_cns_errors(self.path):
_msg += linesep
for error in detected_errors.values():
_msg += (
f'An error was detected in {len(error["files"])} files'
f'({",".join(error["files"][:3])}...).{linesep}'
f'{str(error["error"])}{linesep}'
)
# Show final error message
self.finish_with_error(_msg)

def finish_with_error(self, reason: object = "Module has failed.") -> None:
Expand Down
2 changes: 1 addition & 1 deletion src/haddock/modules/refinement/emref/cns/symmultimer.cns
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ if ($Data.flags.sym eq true) then
evaluate ($diff1 = $i1end - $i1start)
evaluate ($diff2 = $i2end - $i2start)
if ($diff1 ne $diff2) then
display CHAIN LENGHT FOR SYMMETRY RESTRAINTS DO NOT MATCH
display CHAIN LENGTH FOR SYMMETRY RESTRAINTS DOES NOT MATCH
display PLEASE CHECK CAREFULLY YOUR INPUT
display ... stopping ...
stop
Expand Down
2 changes: 1 addition & 1 deletion src/haddock/modules/refinement/flexref/cns/symmultimer.cns
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ if ($Data.flags.sym eq true) then
evaluate ($diff1 = $i1end - $i1start)
evaluate ($diff2 = $i2end - $i2start)
if ($diff1 ne $diff2) then
display CHAIN LENGHT FOR SYMMETRY RESTRAINTS DO NOT MATCH
display CHAIN LENGTH FOR SYMMETRY RESTRAINTS DOES NOT MATCH
display PLEASE CHECK CAREFULLY YOUR INPUT
display ... stopping ...
stop
Expand Down
2 changes: 1 addition & 1 deletion src/haddock/modules/refinement/mdref/cns/symmultimer.cns
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ if ($Data.flags.sym eq true) then
evaluate ($diff1 = $i1end - $i1start)
evaluate ($diff2 = $i2end - $i2start)
if ($diff1 ne $diff2) then
display CHAIN LENGHT FOR SYMMETRY RESTRAINTS DO NOT MATCH
display CHAIN LENGTH FOR SYMMETRY RESTRAINTS DOES NOT MATCH
display PLEASE CHECK CAREFULLY YOUR INPUT
display ... stopping ...
stop
Expand Down
2 changes: 1 addition & 1 deletion src/haddock/modules/sampling/rigidbody/cns/symmultimer.cns
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ if ($Data.flags.sym eq true) then
evaluate ($diff1 = $i1end - $i1start)
evaluate ($diff2 = $i2end - $i2start)
if ($diff1 ne $diff2) then
display CHAIN LENGHT FOR SYMMETRY RESTRAINTS DO NOT MATCH
display CHAIN LENGTH FOR SYMMETRY RESTRAINTS DOES NOT MATCH
display PLEASE CHECK CAREFULLY YOUR INPUT
display ... stopping ...
stop
Expand Down
Loading
Loading