Skip to content

Commit

Permalink
FastQCJob test added
Browse files Browse the repository at this point in the history
  • Loading branch information
charlie committed Feb 23, 2024
1 parent 71fd5b2 commit 4f004e0
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 38 deletions.
43 changes: 12 additions & 31 deletions sequence_processing_pipeline/FastQCJob.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from functools import partial
from json import dumps
import logging
import glob


class FastQCJob(Job):
Expand Down Expand Up @@ -309,37 +310,17 @@ def _generate_job_script(self):
f.write('\n'.join(self.commands))

def parse_logs(self):
'''
log_path = join(self.output_path, 'Logs')
errors = join(log_path, 'Errors.log')
warnings = join(log_path, 'Warnings.log')
# info = join(log_path, "Info.log")
log_path = join(self.output_path, 'logs')
files = sorted(glob.glob(join(log_path, '*.out')))
msgs = []

if not exists(errors):
# we do not raise an Error in this case because it's expected that
# parse_logs() will be called in response to an exceptional
# condition.
msgs.append(f"'{errors} does not exist")
if not exists(warnings):
# we do not raise an Error in this case because it's expected that
# parse_logs() will be called in response to an exceptional
# condition. We usually expect bcl-convert to write all three
# files.
msgs.append(f"'{warnings} does not exist")
with open(errors, 'r') as f:
lines = f.readlines()
for line in [x.strip() for x in lines]:
msgs.append(line)
with open(warnings, 'r') as f:
lines = f.readlines()
for line in [x.strip() for x in lines]:
msgs.append(line)
for some_file in files:
with open(some_file, 'r') as f:
msgs += [line for line in f.readlines()
# note 'error' is not same
# requirement as found in QCJob.
# ('error:'). This is a very
# generalized filter.
if 'error' in line.lower()]

return msgs
'''
return []
return [msg.strip() for msg in msgs]
2 changes: 1 addition & 1 deletion sequence_processing_pipeline/NuQCJob.py
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,6 @@ def parse_logs(self):
for some_file in files:
with open(some_file, 'r') as f:
msgs += [line for line in f.readlines()
if 'error' in line.lower()]
if 'error:' in line.lower()]

return [msg.strip() for msg in msgs]
70 changes: 69 additions & 1 deletion sequence_processing_pipeline/tests/test_FastQCJob.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
from os.path import join, exists, isfile
from functools import partial
from sequence_processing_pipeline.FastQCJob import FastQCJob
from sequence_processing_pipeline.PipelineError import PipelineError
from sequence_processing_pipeline.PipelineError import (PipelineError,
JobFailedError)
from os import makedirs, listdir, mkdir
from shutil import rmtree, move
from json import load
Expand All @@ -15,6 +16,7 @@ def setUp(self):
self.path = partial(join, package_root, 'tests', 'data')
self.qiita_job_id = 'abcdabcdabcdabcdabcdabcdabcdabcd'
self.output_path = self.path('output_dir2')
self.fastqc_log_path = join(self.output_path, 'logs')
self.raw_fastq_files_path = ('sequence_processing_pipeline/tests/data'
'/211021_A00000_0000_SAMPLE/Data/Fastq/p'
'roject1')
Expand Down Expand Up @@ -527,6 +529,41 @@ def setUp(self):
with open(file_name, 'w') as f2:
f2.write("This is a file.")

# set up dummy logs
self.fastqc_log_path = join(self.output_path, "FastQCJob", "logs")
makedirs(self.fastqc_log_path, exist_ok=True)

log_files = {
'slurm-9999999_35.out': ["---------------",
"Run details:",
("hds-fe848a9e-c0e9-49d9-978d-"
"27565a314e8b 1908305 b2-018"),
"---------------",
"+ this",
"+ that",
"+ blah",
("something error: Generic Standin Error"
" (GSE).")],
'slurm-9999999_17.out': ["---------------",
"Run details:",
("hds-fe848a9e-c0e9-49d9-978d-"
"27565a314e8b 1908305 b2-018"),
"---------------",
"+ this",
"+ that",
"+ blah",
("something error: Another Standin Error"
" (ASE).")]
}

for log_file in log_files:
fp = join(self.fastqc_log_path, log_file)

with open(fp, 'w') as f:
lines = log_files[log_file]
for line in lines:
f.write(f"{line}\n")

def tearDown(self):
rmtree(self.output_path)

Expand Down Expand Up @@ -1071,6 +1108,37 @@ def test_completed_file_generation_some_failures(self):
"failed_indexes": [3, 4]}
self.assertDictEqual(obs, exp)

def test_error_msg_from_logs(self):
job = FastQCJob(self.qc_root_path, self.output_path,
self.raw_fastq_files_path.replace('/project1', ''),
self.processed_fastq_files_path,
16, 16,
'sequence_processing_pipeline/tests/bin/fastqc', [],
self.qiita_job_id, 'queue_name', 4, 23, '8g', 30,
self.config_yml, 1000, False)

self.assertFalse(job is None)

# an internal method to force submit_job() to raise a JobFailedError
# instead of submitting the job w/sbatch and waiting for a failed
# job w/sacct.
self.assertTrue(job._toggle_force_job_fail())

try:
job.run()
except JobFailedError as e:
# assert that the text of the original error message was
# preserved, while including known strings from each of the
# sample log-files.
print(str(e))
self.assertIn('This job died.', str(e))
'''
self.assertIn('something error: Generic Standin Error (GSE)',
str(e))
'''
self.assertIn('something error: Another Standin Error (ASE)',
str(e))


if __name__ == '__main__':
unittest.main()
13 changes: 8 additions & 5 deletions sequence_processing_pipeline/tests/test_NuQCJob.py
Original file line number Diff line number Diff line change
Expand Up @@ -555,7 +555,8 @@ def setUp(self):
"+ this",
"+ that",
"+ blah",
"[ERROR] Generic Standin Error (GSE)."],
("something error: Generic Standin Error"
" (GSE).")],
'slurm-9999999_17.out': ["---------------",
"Run details:",
("hds-fe848a9e-c0e9-49d9-978d-"
Expand All @@ -564,7 +565,8 @@ def setUp(self):
"+ this",
"+ that",
"+ blah",
"[ERROR] Another Standin Error (ASE)."]
("something error: Another Standin Error"
" (ASE).")]
}

for log_file in log_files:
Expand Down Expand Up @@ -651,13 +653,14 @@ def test_error_msg_from_logs(self):
try:
job.run()
except JobFailedError as e:
print(">>>%s<<<" % str(e))
# assert that the text of the original error message was
# preserved, while including known strings from each of the
# sample log-files.
self.assertIn('This job died.', str(e))
self.assertIn('[ERROR] Generic Standin Error (GSE)', str(e))
self.assertIn('[ERROR] Another Standin Error (ASE)', str(e))
self.assertIn('something error: Generic Standin Error (GSE)',
str(e))
self.assertIn('something error: Another Standin Error (ASE)',
str(e))

def test_assay_value(self):
with self.assertRaisesRegex(ValueError, "bad-sample-sheet-metagenomics"
Expand Down

0 comments on commit 4f004e0

Please sign in to comment.