Skip to content

Commit bb9dcc7

Browse files
authored
Log warnings for likely errors in provenance record
1 parent a1f2888 commit bb9dcc7

File tree

6 files changed

+291
-98
lines changed

6 files changed

+291
-98
lines changed

.circleci/install_triggers

+1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
^\.circleci/
22
^environment\.yml$
33
^[A-Za-z_]*meta\.yaml$
4+
^package/
45
^setup\.py$
56
^setup\.cfg$

esmvalcore/_task.py

+35-9
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,7 @@ def _ncl_type(value):
203203

204204
class BaseTask:
205205
"""Base class for defining task classes."""
206+
206207
def __init__(self, ancestors=None, name='', products=None):
207208
"""Initialize task."""
208209
self.ancestors = [] if ancestors is None else ancestors
@@ -265,6 +266,7 @@ class DiagnosticError(Exception):
265266

266267
class DiagnosticTask(BaseTask):
267268
"""Task for running a diagnostic."""
269+
268270
def __init__(self, script, settings, output_dir, ancestors=None, name=''):
269271
"""Create a diagnostic task."""
270272
super().__init__(ancestors=ancestors, name=name)
@@ -523,8 +525,10 @@ def _collect_provenance(self):
523525
provenance_file = Path(
524526
self.settings['run_dir']) / 'diagnostic_provenance.yml'
525527
if not provenance_file.is_file():
526-
logger.warning("No provenance information was written to %s",
527-
provenance_file)
528+
logger.warning(
529+
"No provenance information was written to %s. Unable to "
530+
"record provenance for files created by diagnostic script %s "
531+
"in task %s", provenance_file, self.script, self.name)
528532
return
529533

530534
logger.debug("Collecting provenance from %s", provenance_file)
@@ -554,16 +558,33 @@ def _collect_provenance(self):
554558
if key not in ignore:
555559
attrs[key] = self.settings[key]
556560

557-
ancestor_products = {p for a in self.ancestors for p in a.products}
561+
ancestor_products = {
562+
p.filename: p
563+
for a in self.ancestors for p in a.products
564+
}
558565

566+
valid = True
559567
for filename, attributes in table.items():
560568
# copy to avoid updating other entries if file contains anchors
561569
attributes = deepcopy(attributes)
562570
ancestor_files = attributes.pop('ancestors', [])
563-
ancestors = {
564-
p
565-
for p in ancestor_products if p.filename in ancestor_files
566-
}
571+
if not ancestor_files:
572+
logger.warning(
573+
"No ancestor files specified for recording provenance of "
574+
"%s, created by diagnostic script %s in task %s", filename,
575+
self.script, self.name)
576+
valid = False
577+
ancestors = set()
578+
for ancestor_file in ancestor_files:
579+
if ancestor_file in ancestor_products:
580+
ancestors.add(ancestor_products[ancestor_file])
581+
else:
582+
valid = False
583+
logger.warning(
584+
"Invalid ancestor file %s specified for recording "
585+
"provenance of %s, created by diagnostic script %s "
586+
"in task %s", ancestor_file, filename, self.script,
587+
self.name)
567588

568589
attributes.update(deepcopy(attrs))
569590
for key in attributes:
@@ -575,9 +596,14 @@ def _collect_provenance(self):
575596
product.save_provenance()
576597
_write_citation_files(product.filename, product.provenance)
577598
self.products.add(product)
599+
600+
if not valid:
601+
logger.warning(
602+
"Valid ancestor files for diagnostic script %s in task %s "
603+
"are:\n%s", self.script, self.name,
604+
'\n'.join(ancestor_products))
578605
logger.debug("Collecting provenance of task %s took %.1f seconds",
579-
self.name,
580-
time.time() - start)
606+
self.name, time.time() - start)
581607

582608
def __str__(self):
583609
"""Get human readable description."""

setup.py

+1
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
'pytest-flake8',
5555
'pytest-html!=2.1.0',
5656
'pytest-metadata>=1.5.1',
57+
'pytest-mock',
5758
],
5859
# Development dependencies
5960
# Use pip install -e .[develop] to install in development mode

tests/unit/task/__init__.py

Whitespace-only changes.
+254
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,254 @@
1+
import stat
2+
from pathlib import Path
3+
4+
import pytest
5+
import yaml
6+
7+
import esmvalcore._task
8+
9+
10+
@pytest.mark.parametrize("ext", ['.jl', '.py', '.ncl', '.R'])
11+
def test_initialize_env(ext, tmp_path, monkeypatch):
12+
"""Test that the environmental variables are set correctly."""
13+
monkeypatch.setattr(esmvalcore._task.DiagnosticTask, '_initialize_cmd',
14+
lambda self: None)
15+
16+
esmvaltool_path = tmp_path / 'esmvaltool'
17+
monkeypatch.setattr(esmvalcore._task, 'DIAGNOSTICS_PATH', esmvaltool_path)
18+
19+
diagnostics_path = esmvaltool_path / 'diag_scripts'
20+
diagnostics_path.mkdir(parents=True)
21+
script = diagnostics_path / ('test' + ext)
22+
script.touch()
23+
24+
settings = {
25+
'run_dir': str(tmp_path / 'run_dir'),
26+
'profile_diagnostic': False,
27+
}
28+
task = esmvalcore._task.DiagnosticTask(
29+
script,
30+
settings,
31+
output_dir=str(tmp_path),
32+
)
33+
34+
# Create correct environment
35+
env = {}
36+
if ext in ('.jl', '.py'):
37+
env['MPLBACKEND'] = 'Agg'
38+
if ext == '.jl':
39+
env['JULIA_LOAD_PATH'] = f"{esmvaltool_path / 'install' / 'Julia'}:"
40+
if ext in ('.ncl', '.R'):
41+
env['diag_scripts'] = str(diagnostics_path)
42+
43+
assert task.env == env
44+
45+
46+
CMD = {
47+
# ext, profile: expected command
48+
('.py', False): ['python'],
49+
('.py', True): ['python', '-m', 'vmprof', '--lines', '-o'],
50+
('.ncl', False): ['ncl', '-n', '-p'],
51+
('.ncl', True): ['ncl', '-n', '-p'],
52+
('.R', False): ['Rscript'],
53+
('.R', True): ['Rscript'],
54+
('.jl', False): ['julia'],
55+
('.jl', True): ['julia'],
56+
('', False): [],
57+
('', True): [],
58+
}
59+
60+
61+
@pytest.mark.parametrize("ext_profile,cmd", CMD.items())
62+
def test_initialize_cmd(ext_profile, cmd, tmp_path, monkeypatch):
63+
"""Test creating the command to run the diagnostic script."""
64+
monkeypatch.setattr(esmvalcore._task.DiagnosticTask, '_initialize_env',
65+
lambda self: None)
66+
67+
ext, profile = ext_profile
68+
script = tmp_path / ('test' + ext)
69+
script.touch()
70+
if ext == '':
71+
# test case where file is executable
72+
script.chmod(stat.S_IEXEC)
73+
74+
run_dir = tmp_path / 'run_dir'
75+
settings = {
76+
'run_dir': str(run_dir),
77+
'profile_diagnostic': profile,
78+
}
79+
80+
monkeypatch.setattr(esmvalcore._task, 'which', lambda x: x)
81+
82+
task = esmvalcore._task.DiagnosticTask(script,
83+
settings,
84+
output_dir=str(tmp_path))
85+
86+
# Append filenames to expected command
87+
if ext == '.py' and profile:
88+
cmd.append(str(run_dir / 'profile.bin'))
89+
cmd.append(str(script))
90+
91+
assert task.cmd == cmd
92+
93+
94+
@pytest.fixture
95+
def diagnostic_task(mocker, tmp_path):
96+
class TrackedFile(esmvalcore._task.TrackedFile):
97+
provenance = None
98+
99+
mocker.patch.object(esmvalcore._task, 'TrackedFile', autospec=TrackedFile)
100+
mocker.patch.dict(esmvalcore._task.TAGS,
101+
{'plot_type': {
102+
'tag': 'tag_value'
103+
}})
104+
mocker.patch.object(esmvalcore._task,
105+
'_write_citation_files',
106+
autospec=True)
107+
108+
mocker.patch.object(esmvalcore._task.DiagnosticTask, '_initialize_cmd')
109+
mocker.patch.object(esmvalcore._task.DiagnosticTask, '_initialize_env')
110+
111+
settings = {
112+
'run_dir': str(tmp_path / 'run_dir'),
113+
'profile_diagnostic': False,
114+
'some_diagnostic_setting': True,
115+
}
116+
117+
task = esmvalcore._task.DiagnosticTask('test.py',
118+
settings,
119+
output_dir=str(tmp_path),
120+
name='some-diagnostic-task')
121+
return task
122+
123+
124+
def write_mock_provenance(diagnostic_task, record):
125+
run_dir = Path(diagnostic_task.settings['run_dir'])
126+
run_dir.mkdir(parents=True)
127+
provenance_file = run_dir / 'diagnostic_provenance.yml'
128+
provenance_file.write_text(yaml.safe_dump(record))
129+
130+
131+
def test_collect_provenance(mocker, diagnostic_task):
132+
tracked_file_instance = mocker.Mock()
133+
tracked_file_class = mocker.patch.object(
134+
esmvalcore._task, 'TrackedFile', return_value=tracked_file_instance)
135+
write_citation = mocker.patch.object(esmvalcore._task,
136+
'_write_citation_files')
137+
138+
record = {
139+
"test.png": {
140+
"caption": "Some figure",
141+
"ancestors": ["xyz.nc"],
142+
"plot_type": ["tag"],
143+
},
144+
}
145+
146+
write_mock_provenance(diagnostic_task, record)
147+
148+
ancestor_product = mocker.Mock()
149+
ancestor_product.filename = "xyz.nc"
150+
151+
ancestor_task = mocker.Mock()
152+
ancestor_task.products = {ancestor_product}
153+
154+
diagnostic_task.ancestors = [ancestor_task]
155+
156+
diagnostic_task.products = mocker.Mock(autospec=set)
157+
diagnostic_task._collect_provenance()
158+
159+
tracked_file_class.assert_called_once_with(
160+
"test.png",
161+
{
162+
"caption": "Some figure",
163+
"plot_type": ("tag_value", ),
164+
"script_file": "test.py",
165+
"some_diagnostic_setting": True,
166+
},
167+
{ancestor_product},
168+
)
169+
tracked_file_instance.initialize_provenance.assert_called_once_with(
170+
diagnostic_task.activity)
171+
tracked_file_instance.save_provenance.assert_called_once()
172+
write_citation.assert_called_once_with(tracked_file_instance.filename,
173+
tracked_file_instance.provenance)
174+
diagnostic_task.products.add.assert_called_once_with(tracked_file_instance)
175+
176+
177+
def assert_warned(log, msgs):
178+
"""Check that messages have been logged."""
179+
assert len(log.records) == len(msgs)
180+
for msg, record in zip(msgs, log.records):
181+
for snippet in msg:
182+
assert snippet in record.message
183+
184+
185+
def test_collect_no_provenance(caplog, diagnostic_task):
186+
187+
diagnostic_task._collect_provenance()
188+
assert_warned(caplog, [["No provenance information was written"]])
189+
190+
191+
def test_collect_provenance_no_ancestors(caplog, diagnostic_task):
192+
193+
record = {
194+
"test.png": {
195+
"caption": "Some figure",
196+
},
197+
}
198+
199+
write_mock_provenance(diagnostic_task, record)
200+
201+
diagnostic_task._collect_provenance()
202+
203+
assert_warned(caplog, [
204+
["No ancestor files specified", "test.png"],
205+
["Valid ancestor files"],
206+
])
207+
208+
209+
def test_collect_provenance_invalid_ancestors(caplog, diagnostic_task):
210+
211+
record = {
212+
"test.png": {
213+
"caption": "Some figure",
214+
"ancestors": ["xyz.nc"],
215+
},
216+
}
217+
218+
write_mock_provenance(diagnostic_task, record)
219+
220+
diagnostic_task._collect_provenance()
221+
222+
assert_warned(caplog, [
223+
["Invalid ancestor file", "test.png"],
224+
["Valid ancestor files"],
225+
])
226+
227+
228+
def test_collect_provenance_ancestor_hint(mocker, caplog, diagnostic_task):
229+
230+
record = {
231+
"test.png": {
232+
"caption": "Some figure",
233+
"ancestors": ["xyz.nc"],
234+
},
235+
"test.nc": {
236+
"ancestors": ["abc.nc"],
237+
},
238+
}
239+
240+
write_mock_provenance(diagnostic_task, record)
241+
242+
ancestor_product = mocker.Mock()
243+
ancestor_product.filename = "xyz.nc"
244+
245+
ancestor_task = mocker.Mock()
246+
ancestor_task.products = {ancestor_product}
247+
248+
diagnostic_task.ancestors = [ancestor_task]
249+
diagnostic_task._collect_provenance()
250+
251+
assert_warned(caplog, [
252+
["Invalid ancestor file", "abc.nc", "test.nc"],
253+
["Valid ancestor files", "xyz.nc"],
254+
])

0 commit comments

Comments
 (0)