-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathblack_box_steps.py
298 lines (254 loc) · 10.3 KB
/
black_box_steps.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Steps for the black_box features.
These steps use the AM APIs to initiate transfers and validate the
contents of their AIPs without relying on user interface interactions.
"""
from __future__ import print_function, unicode_literals
import os
import time
from behave import given, when, then, use_step_matcher
from lxml import etree
import metsrw
import environment
from features.steps import utils
@given(
'the transfer "DemoTransferCSV" is started with the '
"automatedProcessingMCP processing configuration."
)
def step_impl(context):
"""Step 1"""
if not utils.browse_default_ts_location(context):
raise environment.EnvironmentError("Location cannot be verified")
try:
context.transfer_uuid = utils.start_transfer(context)
except environment.EnvironmentError as err:
assert False, "Error starting transfer: {}".format(err)
@given("an AIP has been created and stored")
def step_impl(context):
context.execute_steps(
'Given the transfer "DemoTransferCSV" is started with the'
" automatedProcessingMCP processing configuration.\n"
"When the Transfer is COMPLETE\n"
"And the Ingest is COMPLETE\n"
)
@when("the Transfer is COMPLETE")
def step_impl(context):
"""Step 2"""
context.sip_uuid = None
status = None
resp = None
try:
while status not in ("COMPLETE", "FAILED"):
time.sleep(environment.MEDIUM_WAIT)
resp = utils.check_unit_status(context)
if isinstance(resp, int) or resp is None:
continue
status = resp.get("status")
if status == "COMPLETE":
context.sip_uuid = resp.get("sip_uuid")
else:
assert False, "Error in transfer"
except environment.EnvironmentError as err:
assert False, "Error checking transfer status: {}".format(err)
@when("the Ingest is COMPLETE")
def step_impl(context):
"""Step 3"""
status = None
resp = None
try:
while status not in ("COMPLETE", "FAILED"):
time.sleep(environment.MEDIUM_WAIT)
resp = utils.check_unit_status(context, unit="ingest")
if isinstance(resp, int) or resp is None:
continue
status = resp.get("status")
if status == "COMPLETE":
return
else:
assert False, "Error in ingest"
except environment.EnvironmentError as err:
assert False, "Error checking ingest status: {}".format(err)
@when("the AIP is downloaded")
def step_impl(context):
context.execute_steps("Then an AIP can be downloaded")
@then("an AIP can be downloaded")
def step_impl(context):
"""Step 4"""
# Download of individual files is package-type agnostic.
context.aip_mets_location = utils.download_mets(context)
print("\n", "AIP output to:", context.aip_mets_location, "\n")
@then("the AIP METS can be accessed and parsed by mets-reader-writer")
def step_impl(context):
"""Step 5"""
mets = metsrw.METSDocument.fromfile(context.aip_mets_location)
error = (
"METS read successfully by metsrw but does not contain an "
"objects directory structure"
)
assert mets.get_file(type="Directory", label="objects") is not None, error
@then("the AIP contains all files that were present in the transfer")
def step_impl(context):
"""Compare METS file entries with transfer contents.
For each 'original' file entry assert that its path exists in the
transfer directory.
"""
mets = metsrw.METSDocument.fromfile(context.aip_mets_location)
# cache each query to the SS browse endpoint by directory name
cached_directories = {}
# look for an 'objects' directory in the transfer directory
contains_objects_dir = False
objects_dir = os.path.join(context.demo_transfer_path, "objects")
objects_dir_browse_result = utils.browse_default_ts_location(context, objects_dir)
if objects_dir_browse_result:
contains_objects_dir = True
cached_directories[objects_dir] = objects_dir_browse_result
# get the paths (before sanitization) of each 'original' file
original_file_paths = [
utils.get_path_before_sanitization(fsentry, contains_objects_dir)
for fsentry in mets.all_files()
if fsentry.use == "original"
]
# verify each path has an entry in the transfer directory
for file_path in original_file_paths:
file_dir = os.path.join(context.demo_transfer_path, os.path.dirname(file_path))
file_name = os.path.basename(file_path)
if file_dir not in cached_directories:
file_dir_browse_result = utils.browse_default_ts_location(context, file_dir)
cached_directories[file_dir] = file_dir_browse_result
assert file_name in cached_directories[file_dir]["entries"]
@then("the AIP contains a file called README.html")
def step_impl(context):
readme_file = "{}-{}/data/README.html".format(
context.transfer_name, context.sip_uuid
)
extracted_file = utils.download_file(context, readme_file)
utils.is_valid_download(extracted_file)
@then("the AIP contains a METS.xml file in the data directory")
def step_impl(context):
extracted_file = utils.download_mets(context)
utils.is_valid_download(extracted_file)
@then("the AIP conforms to expected content and structure")
def step_impl(context):
context.aip_location = utils.download_aip(context)
extracted_aip_dir = utils.extract_aip(context)
expected_directories = ["objects", "logs", "objects/submissionDocumentation"]
for directory in expected_directories:
path = "data/{}".format(directory)
assert os.path.isdir(os.path.join(extracted_aip_dir, path))
@then(
"the fileSec of the AIP METS will record every file in the objects "
"and metadata directories of the AIP"
)
def step_impl(context):
context.aip_location = utils.download_aip(context)
extracted_aip_dir = utils.extract_aip(context)
tree = etree.parse(context.aip_mets_location)
filesec_files = tree.findall(
"mets:fileSec//mets:file", namespaces=context.mets_nsmap
)
for filesec_file in filesec_files:
flocat = filesec_file.find("mets:FLocat", namespaces=context.mets_nsmap)
href = flocat.attrib["{http://www.w3.org/1999/xlink}href"]
path = "data/{}".format(href)
assert os.path.exists(os.path.join(extracted_aip_dir, path))
@then(
"the physical structMap of the AIP METS accurately reflects "
"the physical layout of the AIP"
)
def step_impl(context):
context.aip_location = utils.download_aip(context)
extracted_aip_dir = utils.extract_aip(context)
root_path = "{}/data".format(extracted_aip_dir)
tree = etree.parse(context.aip_mets_location)
structmap = tree.find(
'mets:structMap[@TYPE="physical"]', namespaces=context.mets_nsmap
)
transfer_dir = structmap.find(
'mets:div[@LABEL="{}-{}"][@TYPE="Directory"]'.format(
context.transfer_name, context.sip_uuid
),
namespaces=context.mets_nsmap,
)
for item in transfer_dir:
utils.assert_structmap_item_path_exists(item, root_path)
@then("every object in the AIP has been assigned a UUID in the AIP METS")
def step_impl(context):
tree = etree.parse(context.aip_mets_location)
filesec_files = tree.findall(
"mets:fileSec//mets:file", namespaces=context.mets_nsmap
)
for filesec_file in filesec_files:
# remove the 'file-' prefix from the UUID of the file
file_uuid = filesec_file.attrib["ID"].split("file-")[-1]
amdsec_id = filesec_file.attrib["ADMID"]
amdsec = tree.find(
'mets:amdSec[@ID="{}"]'.format(amdsec_id), namespaces=context.mets_nsmap
)
object_uuid = amdsec.findtext(
"mets:techMD/mets:mdWrap/mets:xmlData/premis:object/"
"premis:objectIdentifier/premis:objectIdentifierValue",
namespaces=context.mets_nsmap,
)
assert object_uuid == file_uuid
@then("every object in the objects and metadata directories has an amdSec")
def step_impl(context):
tree = etree.parse(context.aip_mets_location)
filesec_files = tree.findall(
"mets:fileSec//mets:file", namespaces=context.mets_nsmap
)
for filesec_file in filesec_files:
amdsec_id = filesec_file.attrib["ADMID"]
amdsec = tree.find(
'mets:amdSec[@ID="{}"]'.format(amdsec_id), namespaces=context.mets_nsmap
)
assert amdsec is not None
@then(
"every PREMIS event recorded in the AIP METS records the logged-in "
"user, the organization and the software as PREMIS agents"
)
def step_impl(context):
expected_agent_types = set(
["Archivematica user pk", "repository code", "preservation system"]
)
tree = etree.parse(context.aip_mets_location)
premis_events = tree.findall(
'mets:amdSec/mets:techMD/mets:mdWrap[@MDTYPE="PREMIS:EVENT"]/'
"mets:xmlData/premis:event",
namespaces=context.mets_nsmap,
)
for event in premis_events:
event_agents = event.findall(
"premis:linkingAgentIdentifier", namespaces=context.mets_nsmap
)
event_agent_types = set(
[
event_agent.findtext(
"premis:linkingAgentIdentifierType", namespaces=context.mets_nsmap
)
for event_agent in event_agents
]
)
assert event_agent_types == expected_agent_types
use_step_matcher("re")
@then("there is a.? (?P<event_type>.*) event for each original object in the AIP METS")
def step_impl(context, event_type):
# map the event types as written in the feature file
# to what AM outputs in the METS
types = {
"file format identification": "format identification",
"ingestion": "ingestion",
"message digest calculation": "message digest calculation",
"virus scanning": "virus check",
}
mets = metsrw.METSDocument.fromfile(context.aip_mets_location)
original_files = [
fsentry for fsentry in mets.all_files() if fsentry.use == "original"
]
for fsentry in original_files:
events = utils.get_premis_events_by_type(fsentry, types[event_type])
error = "Expected one {} event in the METS for file {}".format(
event_type, fsentry.path
)
assert len(events) == 1, error
use_step_matcher("parse")