-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathvalidate-cocina
executable file
·88 lines (74 loc) · 3.19 KB
/
validate-cocina
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/usr/bin/env ruby
# frozen_string_literal: true
# Validates cocina model changes by loading each object into cocina to determine if an error is raised.
# Also, transforms the cocina model to MODS.
# This should be run on sdr-deploy since it requires using a version of cocina models that is not the latest release.
# To select the cocina model version to test, adjust the Gemfile.
# For example, gem 'cocina-models', github: 'sul-dlss/cocina-models', branch: 'test_me'
require_relative '../config/environment'
require 'optparse'
require 'tty-progressbar'
Honeybadger.configure do |config|
config.exceptions.ignore += [Cocina::Models::ValidationError]
end
options = { processes: 4 }
parser = OptionParser.new do |option_parser|
option_parser.banner = 'Usage: bin/validate-cocina [options]'
option_parser.on('-pPROCESSES', '--processes PROCESSES', Integer, "Number of processes. Default is #{options[:processes]}.")
option_parser.on('-sSAMPLE', '--sample SAMPLE', Integer, 'Sample size per type, otherwise all objects.')
option_parser.on('-h', '--help', 'Displays help.') do
puts option_parser
exit
end
end
parser.parse!(into: options)
# how many druids to process as a single advance unit of the progress bar
def num_for_progress_advance(count)
return 1 if count < 100
count / 100
end
def on_finish(results, progress_bar)
progress_bar.advance(results.size)
end
def tty_progress_bar(count)
TTY::ProgressBar.new(
'Validating [:bar] (:percent (:current/:total), rate: :rate/s, mean rate: :mean_rate/s, :elapsed total, ETA: :eta_time)',
bar_format: :box,
advance: num_for_progress_advance(count),
total: count
)
end
def validate(sample_size, processes)
obj_ids = if sample_size
RepositoryObject.limit(sample_size).ids
else
RepositoryObject.ids
end
progress_bar = tty_progress_bar(obj_ids.size)
progress_bar.start
Parallel.map(obj_ids.each_slice(100),
in_processes: processes,
finish: ->(_, _, results) { on_finish(results, progress_bar) }) do |slice_obj_ids|
RepositoryObject.find(slice_obj_ids).map do |repository_object|
repository_object.versions.where.not(cocina_version: nil).map do |repository_object_version|
cocina_obj = repository_object_version.to_cocina
Cocina::Models::Mapping::ToMods::Description.transform(cocina_obj.description, cocina_obj.externalIdentifier)
[repository_object.external_identifier, repository_object_version.version, nil]
rescue StandardError => e
collection = if repository_object.object_type == 'dro'
repository_object_version.structural['isMemberOf'].join(' ')
else
''
end
[repository_object.external_identifier, repository_object_version.version, repository_object.object_type, collection, e.message]
end
end
end.flatten(2)
end
results = validate(options[:sample], options[:processes])
CSV.open('validate-cocina.csv', 'w') do |writer|
writer << %w[druid version type collection message]
results.each do |(druid, version, type, collection, error)|
writer << [druid, version, type, collection, error] if error
end
end