diff --git a/bin/export-collections b/bin/export-collections new file mode 100755 index 000000000..dc8fb64ae --- /dev/null +++ b/bin/export-collections @@ -0,0 +1,50 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +# Usage: +# RAILS_ENV=production bin/export-collections > collections.jsonl + +GRAVEYARD_APO = 'druid:kg712km1576' +UNIVERSITY_ARCHIVES_APO = 'druid:yf767bj4831' + +require_relative '../config/environment' +list = Hydrus::Collection.all + +def serialize(coll) + creator = coll.events.ng_xml.xpath('//event[text()="Collection created"]/@who').to_s + return if coll.admin_policy_object_id == GRAVEYARD_APO + if coll.admin_policy_object_id == UNIVERSITY_ARCHIVES_APO + warn "Collection #{coll.id} is in the University Archives APO, which is not a Hydrus::APO" + return + end + + apo = coll.apo + related_items = coll.related_items.map { |rel| { link_title: rel.title.presence, url: rel.url.presence }.compact }.reject(&:blank?) + + { + druid: coll.id, + version: coll.current_version.to_i, + creator: { sunetid: creator }, + name: coll.title, + description: coll.abstract, + contact_email: coll.contact, + visibility_option: coll.visibility_option, + visibility: coll.visibility, + embargo_option: coll.embargo_option, + embargo_terms: coll.embargo_terms, + requires_human_approval: coll.requires_human_approval, + license_option: coll.license_option, + license: coll.license, + object_status: coll.object_status, + managers: apo.persons_with_role('hydrus-collection-manager'), + depositors: apo.persons_with_role('hydrus-collection-item-depositor') + apo.persons_with_role('hydrus-collection-depositor'), + reviewers: apo.persons_with_role('hydrus-collection-reviewer'), + related_items: related_items, + created_at: item.create_date, + updated_at: item.modified_date + } +end + +warn "Exporting #{list.count} collections" + +puts list.map { |collection| serialize(collection).to_json }.compact diff --git a/bin/export-items b/bin/export-items new file mode 100755 index 000000000..32d6b1e4b --- /dev/null +++ b/bin/export-items @@ -0,0 +1,66 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +# Usage: +# RAILS_ENV=production bin/export-items > items.jsonl + +require_relative '../config/environment' +list = Hydrus::Item.all + +def contributors(item) + item.contributors. + map { |contrib| { full_name: contrib.name, role: contrib.role, name_type: contrib.name_type }}. + reject { |contrib| contrib[:full_name].blank? } +end + +def files(item) + item.files. + map do |object_file| + { + path: File.realdirpath(object_file.current_path), + label: object_file.label + } + end +end + + +def serialize(item) + creator = item.roleMetadata.ng_xml.xpath('//role/person/identifier').text + related_items = item.related_items.map { |rel| { link_title: rel.title.presence, url: rel.url.presence }.compact }.reject(&:blank?) + { + druid: item.id, + version: item.current_version.to_i, + creator: { sunetid: creator }, + title: item.title, + abstract: item.abstract, + contact_email: item.contact, + collection: item.collection_id, + visibility: item.visibility.first, + license: item.license, + embargo_release_date: item.rmd_embargo_release_date, + date_created: item.date_created, + object_status: item.object_status, + item_type: item.item_type, + citation: item.preferred_citation, + related_items: related_items, + related_citations: item.related_citation, + contributors: contributors(item), + keywords: item.keywords, + files: files(item), + created_at: item.create_date, + updated_at: item.modified_date + } +end + +warn "Exporting #{list.count} items" +count = 0 +list.find_each do |item| + count += 1 + warn count + begin + attributes = serialize(item) + puts attributes.compact.to_json + rescue => e + warn "Error with #{item.pid}. #{e.message}" + end +end