diff --git a/app/importers/langmuir_preprocessor.rb b/app/importers/langmuir_preprocessor.rb new file mode 100644 index 000000000..7d7aa772b --- /dev/null +++ b/app/importers/langmuir_preprocessor.rb @@ -0,0 +1,101 @@ +# frozen_string_literal: true +require 'csv' + +## +# Utility service and methods that merge metadata from a CSV Pull List and MARCXml records +# into a format suitable for ingest by the curate CSV importer + +class LangmuirPreprocessor + attr_accessor :processed_csv + + ## + # Initialize a preprocessor instance by supplying + # @param [String] csv the path to a CSV file containing the expectd Pull List metadata + def initialize(csv) + @source_csv = CSV.read(csv, headers: true) + directory = File.dirname(csv) + extension = File.extname(csv) + filename = File.basename(csv, extension) + @processed_csv = File.join(directory, filename + "-processed.csv") + @tree = {} + end + + def record_count + @source_csv.count + end + + def additional_headers + ['source_row', 'deduplication_key', 'type', 'fileset_label', 'preservation_master_file', 'intermediate_file'] + end + + # process_source_rows builds + # and + # output_work_tree writes + # a hash of hashes: + # { work_id => { + # :metadata = CSV::Row, + # :filesets => { + # index1 => CSV::Row, + # index2 => CSV::Row, + # etc. for remaining sides/pages + # } + # } + # } + def merge + process_source_rows + output_work_tree + end + + def process_source_rows + @source_csv.each.with_index do |row, row_num| + process_row(row, row_num + 2) if row['Digital Object - Parent Identifier'] # skip blank rows in the source csv + end + end + + def output_work_tree + merge_csv = CSV.open(@processed_csv, 'w+', headers: true, write_headers: true) + original_headers = @source_csv.headers + merge_csv << additional_headers + original_headers + @tree.each_value do |work| + merge_csv << work[:metadata] + two_sided = work[:filesets].count <= 2 + work[:filesets].keys.sort.each do |fileset_index| + fileset = work[:filesets][fileset_index] + fileset['fileset_label'] = make_label(fileset_index, two_sided) + merge_csv << fileset + end + end + merge_csv.close + end + + def process_row(row, source_row) + deduplication_key = row['Digital Object - Parent Identifier'] + sequence_number, target_file, metadata_row = extract_structure(row) + @tree[deduplication_key] ||= { metadata: nil, filesets: {} } # create a placeholder if we don't have one for this key + @tree[deduplication_key][:metadata] = extract_metadata(row, source_row) if metadata_row + @tree[deduplication_key][:filesets][sequence_number] ||= CSV::Row.new(additional_headers, [source_row, deduplication_key, 'fileset']) + @tree[deduplication_key][:filesets][sequence_number][target_file] = row['Filename'] + end + + def extract_structure(row) + filename = row['Filename'] + p_number = filename.scan(/P0+(\d+)_(ARCH|PROD)/)[0][0].to_i + target_file = filename.include?('ARCH') ? 'preservation_master_file' : 'intermediate_file' + metadata_row = p_number == 1 && target_file == 'preservation_master_file' + [p_number, target_file, metadata_row] + end + + def extract_metadata(row, source_row) + deduplication_key = row['Digital Object - Parent Identifier'] + processed_row = CSV::Row.new(additional_headers, [source_row, deduplication_key, 'work']) + processed_row << row.to_hash + end + + def make_label(side, two_sided) + if two_sided + side == 1 ? 'Front' : 'Back' + else + "Side #{side}" + end + end +end diff --git a/lib/tasks/curate_langmuir.rake b/lib/tasks/curate_langmuir.rake new file mode 100644 index 000000000..59d101571 --- /dev/null +++ b/lib/tasks/curate_langmuir.rake @@ -0,0 +1,27 @@ +# frozen_string_literal: true +namespace :curate do + desc "Langmuir preprocessing" + task langmuir: :environment do + langmuir_csv = ENV['CSV'] || ENV['csv'] || '' + valid_args = \ + ARGV.length > 1 && + File.extname(langmuir_csv) == '.csv' + if valid_args + preprocessor = LangmuirPreprocessor.new(langmuir_csv) + preprocessor.merge + puts 'Rows processed: ' + preprocessor.record_count.to_s + puts 'Processed file: ' + File.basename(preprocessor.processed_csv) + else + puts <<~HEREDOC + Langmuir preprocessor + + USAGE: + rake curate:langmuir csv=manifest.csv + + RETURNS: + manifest-processed.csv in the samve folder as pull_list.csv + + HEREDOC + end + end +end diff --git a/spec/fixtures/csv_import/before_processing/langmuir-unprocessed.csv b/spec/fixtures/csv_import/before_processing/langmuir-unprocessed.csv new file mode 100644 index 000000000..3d36aaa79 --- /dev/null +++ b/spec/fixtures/csv_import/before_processing/langmuir-unprocessed.csv @@ -0,0 +1,29 @@ +Original Order,Batch,other_identifiers,abstract,administrative_unit,local_call_number,contact_information,creator,date_created,Desc - Date Created - Date Precision,date_issued,Desc - Date Published - Date Precision,content_genres,holding_repository,institution,primary_language,Desc - Legacy Identifier,notes,place_of_production,publisher,emory_rights_statements,rights_statement,subject_names,subject_geo,keywords,subject_topics,uniform_title,title,content_type,data_classifications,Digital Object - Migration Note,Digital Object - Parent Identifier,visibility,Digital Object - Visibility,Directory Path,File Size,Filename,Path,Rights - Access Basis,Rights - Access Basis - Review Date,Rights - Access Basis Note,Rights - Access Level,copyright_date,rights_holders,Rights - Digitization Basis,Rights - Digitization Basis - Review Date,Rights - Digitization Basis - Reviewer,Rights - Digitization Basis Note,internal_rights_note,legacy_rights,sensitive_material,sensitive_material_note,extent,sublocation,date_digitized,transfer_engineer +15653,4,dams:165572|MSS1218_B071_I205,"Recto: Copyright, 1904, Lamson, City gates, St. Augustine, Florida; Verso: Post Card, Place the stamp here, one cent for United States, and island possessions, Cuba, Canada and Mexico, two cents for foreign, H.C. Leighton Co., Portland, Me., manufacturers of postal cards, Made in Germany, No. 363, This side for the address","Stuart A. Rose Manuscript, Archives, and Rare Book Library",MSS 1218,"Stuart A. Rose Manuscript, Archives, and Rare Book Library rose.library@emory.edu",Lamson.,1904,,,,postcards,"Stuart A. Rose Manuscript, Archives, and Rare Book Library",Emory University,,MSS1218_B071_I205,,,,"Emory University does not control copyright for this image. This image is made available for individual viewing and reference for educational purposes only such as personal study, preparation for teaching, and research. Your reproduction, distribution, public display or other re-use of any content beyond a fair use as codified in section 107 of US Copyright Law is at your own risk. We are always interested in learning more about our collections. If you have information regarding this photograph, please contact marbl@emory.edu.",http://rightsstatements.org/vocab/NoC-US/1.0/,,Saint Augustine (Fla.),"Places, Florida",Cows.|African American men.,,"City gates, St. Augustine, Florida",http://id.loc.gov/vocabulary/resourceTypes/img,Confidential,"Migrated to Cor repository from Extensis Portfolio DAMS during Phase 1 Migrations, 2019",MSS1218_B071_I205,Public,Public,::nasn2dmz.cc.emory.edu:dmfiles:MARBL:Manuscripts:MSS_1218_Langmuir:ARCH:B071:,72565546,MSS1218_B071_I205_P0001_ARCH.tif,::nasn2dmz.cc.emory.edu:dmfiles:MARBL:Manuscripts:MSS_1218_Langmuir:ARCH:B071:MSS1218_B071_I205_P0001_ARCH.tif,,,,,,,Administrative Signoff,2016-03-01,Scholarly Communications Office,,,"Emory University does not control copyright for this image. This image is made available for individual viewing and reference for educational purposes only such as personal study, preparation for teaching, and research. Your reproduction, distribution, public display or other re-use of any content beyond a fair use as codified in section 107 of US Copyright Law is at your own risk. We are always interested in learning more about our collections. If you have information regarding this photograph, please contact marbl@emory.edu.",,,03.76 x 05.71 inches ,Box 71,, +37821,4,dams:187792|MSS1218_B071_I205,,,,,,XXXX,,,,,,,,MSS1218_B071_I205,,,,,,,,,,,,,,,MSS1218_B071_I205,,,::nasn2dmz.cc.emory.edu:dmfiles:MARBL:Manuscripts:MSS_1218_Langmuir:PROD:B071:,,MSS1218_B071_I205_P0001_PROD.tif,::nasn2dmz.cc.emory.edu:dmfiles:MARBL:Manuscripts:MSS_1218_Langmuir:PROD:B071:MSS1218_B071_I205_P0001_PROD.tif,,,,,,,,,,,,,,,,,, +15410,4,dams:165326|MSS1218_B071_I205,"Recto: Copyright, 1904, Lamson, City gates, St. Augustine, Florida; Verso: Post Card, Place the stamp here, one cent for United States, and island possessions, Cuba, Canada and Mexico, two cents for foreign, H.C. Leighton Co., Portland, Me., manufacturers of postal cards, Made in Germany, No. 363, This side for the address","Stuart A. Rose Manuscript, Archives, and Rare Book Library",MSS 1218,"Stuart A. Rose Manuscript, Archives, and Rare Book Library rose.library@emory.edu",Lamson.,1904,,,,postcards,"Stuart A. Rose Manuscript, Archives, and Rare Book Library",Emory University,,MSS1218_B071_I205,,,,"Emory University does not control copyright for this image. This image is made available for individual viewing and reference for educational purposes only such as personal study, preparation for teaching, and research. Your reproduction, distribution, public display or other re-use of any content beyond a fair use as codified in section 107 of US Copyright Law is at your own risk. We are always interested in learning more about our collections. If you have information regarding this photograph, please contact marbl@emory.edu.",http://rightsstatements.org/vocab/NoC-US/1.0/,,Saint Augustine (Fla.),"Places, Florida",Cows.|African American men.,,"City gates, St. Augustine, Florida",http://id.loc.gov/vocabulary/resourceTypes/img,Confidential,"Migrated to Cor repository from Extensis Portfolio DAMS during Phase 1 Migrations, 2019",MSS1218_B071_I205,Public,Public,::nasn2dmz.cc.emory.edu:dmfiles:MARBL:Manuscripts:MSS_1218_Langmuir:ARCH:B071:,71196526,MSS1218_B071_I205_P0002_ARCH.tif,::nasn2dmz.cc.emory.edu:dmfiles:MARBL:Manuscripts:MSS_1218_Langmuir:ARCH:B071:MSS1218_B071_I205_P0002_ARCH.tif,,,,,,,Administrative Signoff,2016-03-01,Scholarly Communications Office,,,"Emory University does not control copyright for this image. This image is made available for individual viewing and reference for educational purposes only such as personal study, preparation for teaching, and research. Your reproduction, distribution, public display or other re-use of any content beyond a fair use as codified in section 107 of US Copyright Law is at your own risk. We are always interested in learning more about our collections. If you have information regarding this photograph, please contact marbl@emory.edu.",,,03.76 x 05.71 inches ,Box 71,, +37822,4,dams:188720|MSS1218_B071_I205,,,,,,XXXX,,,,,,,,MSS1218_B071_I205,,,,,,,,,,,,,,,MSS1218_B071_I205,,,::nasn2dmz.cc.emory.edu:dmfiles:MARBL:Manuscripts:MSS_1218_Langmuir:PROD:B071:,,MSS1218_B071_I205_P0002_PROD.tif,::nasn2dmz.cc.emory.edu:dmfiles:MARBL:Manuscripts:MSS_1218_Langmuir:PROD:B071:MSS1218_B071_I205_P0002_PROD.tif,,,,,,,,,,,,,,,,,, +,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +15796,4,dams:165715|MSS1218_B071_I206,"Verso: A disappearing mode of transportation, Palm Beach, Florida, Dukane Scenics, Hollywood, Florida, 33020, Post Card, Place stamp here, DO1260, [copyright] 1970 Dukane Press, Hollywood, Florida, Printed in U.S.A.","Stuart A. Rose Manuscript, Archives, and Rare Book Library",MSS 1218,"Stuart A. Rose Manuscript, Archives, and Rare Book Library rose.library@emory.edu",Dukane Press.,1970,,,,photographic postcards,"Stuart A. Rose Manuscript, Archives, and Rare Book Library",Emory University,,MSS1218_B071_I206,,,,"Emory University does not control copyright for this image. This image is made available for individual viewing and reference for educational purposes only such as personal study, preparation for teaching, and research. Your reproduction, distribution, public display or other re-use of any content beyond a fair use as codified in section 107 of US Copyright Law is at your own risk. We are always interested in learning more about our collections. If you have information regarding this photograph, please contact marbl@emory.edu.",http://rightsstatements.org/vocab/InC/1.0/,,Palm Beach (Fla.),"Places, Florida",Bicycle racing.|African American men.|Bicycles.,,"A disappearing mode of transportation, Palm Beach, Florida",http://id.loc.gov/vocabulary/resourceTypes/img,Confidential,"Migrated to Cor repository from Extensis Portfolio DAMS during Phase 1 Migrations, 2019",MSS1218_B071_I206,Emory Low Download,Emory Network,::nasn2dmz.cc.emory.edu:dmfiles:MARBL:Manuscripts:MSS_1218_Langmuir:ARCH:B071:,71883756,MSS1218_B071_I206_P0001_ARCH.tif,::nasn2dmz.cc.emory.edu:dmfiles:MARBL:Manuscripts:MSS_1218_Langmuir:ARCH:B071:MSS1218_B071_I206_P0001_ARCH.tif,Administrative Signoff,,,,,,Administrative Signoff,2016-03-01,Scholarly Communications Office,,,"Emory University does not control copyright for this image. This image is made available for individual viewing and reference for educational purposes only such as personal study, preparation for teaching, and research. Your reproduction, distribution, public display or other re-use of any content beyond a fair use as codified in section 107 of US Copyright Law is at your own risk. We are always interested in learning more about our collections. If you have information regarding this photograph, please contact marbl@emory.edu.",,,03.72 x 05.77 inches ,Box 71,, +37823,4,dams:188405|MSS1218_B071_I206,,,,,,XXXX,,,,,,,,MSS1218_B071_I206,,,,,,,,,,,,,,,MSS1218_B071_I206,,,::nasn2dmz.cc.emory.edu:dmfiles:MARBL:Manuscripts:MSS_1218_Langmuir:PROD:B071:,,MSS1218_B071_I206_P0001_PROD.tif,::nasn2dmz.cc.emory.edu:dmfiles:MARBL:Manuscripts:MSS_1218_Langmuir:PROD:B071:MSS1218_B071_I206_P0001_PROD.tif,,,,,,,,,,,,,,,,,, +14914,4,dams:164830|MSS1218_B071_I206,"Verso: A disappearing mode of transportation, Palm Beach, Florida, Dukane Scenics, Hollywood, Florida, 33020, Post Card, Place stamp here, DO1260, [copyright] 1970 Dukane Press, Hollywood, Florida, Printed in U.S.A.","Stuart A. Rose Manuscript, Archives, and Rare Book Library",MSS 1218,"Stuart A. Rose Manuscript, Archives, and Rare Book Library rose.library@emory.edu",Dukane Press.,1970,,,,photographic postcards,"Stuart A. Rose Manuscript, Archives, and Rare Book Library",Emory University,,MSS1218_B071_I206,,,,"Emory University does not control copyright for this image. This image is made available for individual viewing and reference for educational purposes only such as personal study, preparation for teaching, and research. Your reproduction, distribution, public display or other re-use of any content beyond a fair use as codified in section 107 of US Copyright Law is at your own risk. We are always interested in learning more about our collections. If you have information regarding this photograph, please contact marbl@emory.edu.",http://rightsstatements.org/vocab/InC/1.0/,,Palm Beach (Fla.),"Places, Florida",Bicycle racing.|African American men.|Bicycles.,,"A disappearing mode of transportation, Palm Beach, Florida",http://id.loc.gov/vocabulary/resourceTypes/img,Confidential,"Migrated to Cor repository from Extensis Portfolio DAMS during Phase 1 Migrations, 2019",MSS1218_B071_I206,Emory Low Download,Emory Network,::nasn2dmz.cc.emory.edu:dmfiles:MARBL:Manuscripts:MSS_1218_Langmuir:ARCH:B071:,72947578,MSS1218_B071_I206_P0002_ARCH.tif,::nasn2dmz.cc.emory.edu:dmfiles:MARBL:Manuscripts:MSS_1218_Langmuir:ARCH:B071:MSS1218_B071_I206_P0002_ARCH.tif,Administrative Signoff,,,,,,Administrative Signoff,2016-03-01,Scholarly Communications Office,,,"Emory University does not control copyright for this image. This image is made available for individual viewing and reference for educational purposes only such as personal study, preparation for teaching, and research. Your reproduction, distribution, public display or other re-use of any content beyond a fair use as codified in section 107 of US Copyright Law is at your own risk. We are always interested in learning more about our collections. If you have information regarding this photograph, please contact marbl@emory.edu.",,,03.72 x 05.77 inches ,Box 71,, +37824,4,dams:188639|MSS1218_B071_I206,,,,,,XXXX,,,,,,,,MSS1218_B071_I206,,,,,,,,,,,,,,,MSS1218_B071_I206,,,::nasn2dmz.cc.emory.edu:dmfiles:MARBL:Manuscripts:MSS_1218_Langmuir:PROD:B071:,,MSS1218_B071_I206_P0002_PROD.tif,::nasn2dmz.cc.emory.edu:dmfiles:MARBL:Manuscripts:MSS_1218_Langmuir:PROD:B071:MSS1218_B071_I206_P0002_PROD.tif,,,,,,,,,,,,,,,,,, +,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +15698,4,dams:165615|MSS1218_B071_I207,"Verso: Florida Souvenir Co., St. Augustine, Florida, S.A. 55, The old city gate, St. Augustine, Fla., Was once the main entrance to St. Augustine. These pillars, built in 1808, are the only remnants of an earthwork first built in 1704. Now part of Castillo de San Marcos National Monument, Lusterchrome Reg. U.S. Pat. Off., made only by Tichor Bros., Inc., Boston 15, Mass., Post Card, Place stamp here, Made in U.S.A., K-15819","Stuart A. Rose Manuscript, Archives, and Rare Book Library",MSS 1218,"Stuart A. Rose Manuscript, Archives, and Rare Book Library rose.library@emory.edu","Florida Souvenir Co. (St. Augustine, Fla.)",XXXX,Unknown,,,photographic postcards,"Stuart A. Rose Manuscript, Archives, and Rare Book Library",Emory University,,MSS1218_B071_I207,,,,"Emory University does not control copyright for this image. This image is made available for individual viewing and reference for educational purposes only such as personal study, preparation for teaching, and research. Your reproduction, distribution, public display or other re-use of any content beyond a fair use as codified in section 107 of US Copyright Law is at your own risk. We are always interested in learning more about our collections. If you have information regarding this photograph, please contact marbl@emory.edu.",http://rightsstatements.org/vocab/InC/1.0/,,Saint Augustine (Fla.),"Places, Florida",Men.|Horse-drawn vehicles.,,"The old city gate, St. Augustine, Fla.",http://id.loc.gov/vocabulary/resourceTypes/img,Confidential,"Migrated to Cor repository from Extensis Portfolio DAMS during Phase 1 Migrations, 2019",MSS1218_B071_I207,Public Low View,Public,::nasn2dmz.cc.emory.edu:dmfiles:MARBL:Manuscripts:MSS_1218_Langmuir:ARCH:B071:,73970214,MSS1218_B071_I207_P0001_ARCH.tif,::nasn2dmz.cc.emory.edu:dmfiles:MARBL:Manuscripts:MSS_1218_Langmuir:ARCH:B071:MSS1218_B071_I207_P0001_ARCH.tif,,,,,,,Administrative Signoff,2016-03-01,Scholarly Communications Office,,,"Emory University does not control copyright for this image. This image is made available for individual viewing and reference for educational purposes only such as personal study, preparation for teaching, and research. Your reproduction, distribution, public display or other re-use of any content beyond a fair use as codified in section 107 of US Copyright Law is at your own risk. We are always interested in learning more about our collections. If you have information regarding this photograph, please contact marbl@emory.edu.",,,03.78 x 05.79 inches ,Box 71,, +37825,4,dams:188684|MSS1218_B071_I207,,,,,,XXXX,,,,,,,,MSS1218_B071_I207,,,,,,,,,,,,,,,MSS1218_B071_I207,,,::nasn2dmz.cc.emory.edu:dmfiles:MARBL:Manuscripts:MSS_1218_Langmuir:PROD:B071:,,MSS1218_B071_I207_P0001_PROD.tif,::nasn2dmz.cc.emory.edu:dmfiles:MARBL:Manuscripts:MSS_1218_Langmuir:PROD:B071:MSS1218_B071_I207_P0001_PROD.tif,,,,,,,,,,,,,,,,,, +15479,4,dams:165398|MSS1218_B071_I207,"Verso: Florida Souvenir Co., St. Augustine, Florida, S.A. 55, The old city gate, St. Augustine, Fla., Was once the main entrance to St. Augustine. These pillars, built in 1808, are the only remnants of an earthwork first built in 1704. Now part of Castillo de San Marcos National Monument, Lusterchrome Reg. U.S. Pat. Off., made only by Tichor Bros., Inc., Boston 15, Mass., Post Card, Place stamp here, Made in U.S.A., K-15819","Stuart A. Rose Manuscript, Archives, and Rare Book Library",MSS 1218,"Stuart A. Rose Manuscript, Archives, and Rare Book Library rose.library@emory.edu","Florida Souvenir Co. (St. Augustine, Fla.)",XXXX,Unknown,,,photographic postcards,"Stuart A. Rose Manuscript, Archives, and Rare Book Library",Emory University,,MSS1218_B071_I207,,,,"Emory University does not control copyright for this image. This image is made available for individual viewing and reference for educational purposes only such as personal study, preparation for teaching, and research. Your reproduction, distribution, public display or other re-use of any content beyond a fair use as codified in section 107 of US Copyright Law is at your own risk. We are always interested in learning more about our collections. If you have information regarding this photograph, please contact marbl@emory.edu.",http://rightsstatements.org/vocab/InC/1.0/,,Saint Augustine (Fla.),"Places, Florida",Men.|Horse-drawn vehicles.,,"The old city gate, St. Augustine, Fla.",http://id.loc.gov/vocabulary/resourceTypes/img,Confidential,"Migrated to Cor repository from Extensis Portfolio DAMS during Phase 1 Migrations, 2019",MSS1218_B071_I207,Public Low View,Public,::nasn2dmz.cc.emory.edu:dmfiles:MARBL:Manuscripts:MSS_1218_Langmuir:ARCH:B071:,70005834,MSS1218_B071_I207_P0002_ARCH.tif,::nasn2dmz.cc.emory.edu:dmfiles:MARBL:Manuscripts:MSS_1218_Langmuir:ARCH:B071:MSS1218_B071_I207_P0002_ARCH.tif,,,,,,,Administrative Signoff,2016-03-01,Scholarly Communications Office,,,"Emory University does not control copyright for this image. This image is made available for individual viewing and reference for educational purposes only such as personal study, preparation for teaching, and research. Your reproduction, distribution, public display or other re-use of any content beyond a fair use as codified in section 107 of US Copyright Law is at your own risk. We are always interested in learning more about our collections. If you have information regarding this photograph, please contact marbl@emory.edu.",,,03.78 x 05.79 inches ,Box 71,, +37827,4,dams:188547|MSS1218_B071_I207,,,,,,XXXX,,,,,,,,MSS1218_B071_I207,,,,,,,,,,,,,,,MSS1218_B071_I207,,,::nasn2dmz.cc.emory.edu:dmfiles:MARBL:Manuscripts:MSS_1218_Langmuir:PROD:B071:,,MSS1218_B071_I207_P0002_PROD.tif,::nasn2dmz.cc.emory.edu:dmfiles:MARBL:Manuscripts:MSS_1218_Langmuir:PROD:B071:MSS1218_B071_I207_P0002_PROD.tif,,,,,,,,,,,,,,,,,, +,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +15299,4,dams:165215|MSS1218_B071_I208,"Verso: Oldest wooden school house, St. Augustine, Florida, The oldest wooden school house, under three flags stands on quaint old St. George Street, just within the old city gates. As evidence a Spanish map of 1788 shows this cedar red house in fair condition. It has been used in recent exhibitions as contrast to the most modern types of school architecture, K, 61242, Florida Souvenir Co., 72 Spanish Street, St. Augustine, Florida, Post Card, Address, Place stamp here","Stuart A. Rose Manuscript, Archives, and Rare Book Library",MSS 1218,"Stuart A. Rose Manuscript, Archives, and Rare Book Library rose.library@emory.edu","Florida Souvenir Co. (St. Augustine, Fla.)",1788,,,,photographic postcards,"Stuart A. Rose Manuscript, Archives, and Rare Book Library",Emory University,,MSS1218_B071_I208,,,,"Emory University does not control copyright for this image. This image is made available for individual viewing and reference for educational purposes only such as personal study, preparation for teaching, and research. Your reproduction, distribution, public display or other re-use of any content beyond a fair use as codified in section 107 of US Copyright Law is at your own risk. We are always interested in learning more about our collections. If you have information regarding this photograph, please contact marbl@emory.edu.",http://rightsstatements.org/vocab/InC/1.0/,,Saint Augustine (Fla.),"Places, Florida",Horse-drawn vehicles.|Tourists.|Fountains.|Schools.,,"Oldest wooden school house, St. Augustine, Florida",http://id.loc.gov/vocabulary/resourceTypes/img,Confidential,"Migrated to Cor repository from Extensis Portfolio DAMS during Phase 1 Migrations, 2019",MSS1218_B071_I208,Public Low View,Public,::nasn2dmz.cc.emory.edu:dmfiles:MARBL:Manuscripts:MSS_1218_Langmuir:ARCH:B071:,72576174,MSS1218_B071_I208_P0001_ARCH.tif,::nasn2dmz.cc.emory.edu:dmfiles:MARBL:Manuscripts:MSS_1218_Langmuir:ARCH:B071:MSS1218_B071_I208_P0001_ARCH.tif,,,,,,,Administrative Signoff,2016-03-01,Scholarly Communications Office,,,"Emory University does not control copyright for this image. This image is made available for individual viewing and reference for educational purposes only such as personal study, preparation for teaching, and research. Your reproduction, distribution, public display or other re-use of any content beyond a fair use as codified in section 107 of US Copyright Law is at your own risk. We are always interested in learning more about our collections. If you have information regarding this photograph, please contact marbl@emory.edu.",,,03.76 x 05.75 inches ,Box 71,, +37826,4,dams:188226|MSS1218_B071_I208,,,,,,XXXX,,,,,,,,MSS1218_B071_I208,,,,,,,,,,,,,,,MSS1218_B071_I208,,,::nasn2dmz.cc.emory.edu:dmfiles:MARBL:Manuscripts:MSS_1218_Langmuir:PROD:B071:,,MSS1218_B071_I208_P0001_PROD.tif,::nasn2dmz.cc.emory.edu:dmfiles:MARBL:Manuscripts:MSS_1218_Langmuir:PROD:B071:MSS1218_B071_I208_P0001_PROD.tif,,,,,,,,,,,,,,,,,, +15264,4,dams:165182|MSS1218_B071_I208,"Verso: Oldest wooden school house, St. Augustine, Florida, The oldest wooden school house, under three flags stands on quaint old St. George Street, just within the old city gates. As evidence a Spanish map of 1788 shows this cedar red house in fair condition. It has been used in recent exhibitions as contrast to the most modern types of school architecture, K, 61242, Florida Souvenir Co., 72 Spanish Street, St. Augustine, Florida, Post Card, Address, Place stamp here","Stuart A. Rose Manuscript, Archives, and Rare Book Library",MSS 1218,"Stuart A. Rose Manuscript, Archives, and Rare Book Library rose.library@emory.edu","Florida Souvenir Co. (St. Augustine, Fla.)",1788,,,,photographic postcards,"Stuart A. Rose Manuscript, Archives, and Rare Book Library",Emory University,,MSS1218_B071_I208,,,,"Emory University does not control copyright for this image. This image is made available for individual viewing and reference for educational purposes only such as personal study, preparation for teaching, and research. Your reproduction, distribution, public display or other re-use of any content beyond a fair use as codified in section 107 of US Copyright Law is at your own risk. We are always interested in learning more about our collections. If you have information regarding this photograph, please contact marbl@emory.edu.",http://rightsstatements.org/vocab/InC/1.0/,,Saint Augustine (Fla.),"Places, Florida",Horse-drawn vehicles.|Tourists.|Fountains.|Schools.,,"Oldest wooden school house, St. Augustine, Florida",http://id.loc.gov/vocabulary/resourceTypes/img,Confidential,"Migrated to Cor repository from Extensis Portfolio DAMS during Phase 1 Migrations, 2019",MSS1218_B071_I208,Public Low View,Public,::nasn2dmz.cc.emory.edu:dmfiles:MARBL:Manuscripts:MSS_1218_Langmuir:ARCH:B071:,71927786,MSS1218_B071_I208_P0002_ARCH.tif,::nasn2dmz.cc.emory.edu:dmfiles:MARBL:Manuscripts:MSS_1218_Langmuir:ARCH:B071:MSS1218_B071_I208_P0002_ARCH.tif,,,,,,,Administrative Signoff,2016-03-01,Scholarly Communications Office,,,"Emory University does not control copyright for this image. This image is made available for individual viewing and reference for educational purposes only such as personal study, preparation for teaching, and research. Your reproduction, distribution, public display or other re-use of any content beyond a fair use as codified in section 107 of US Copyright Law is at your own risk. We are always interested in learning more about our collections. If you have information regarding this photograph, please contact marbl@emory.edu.",,,03.76 x 05.75 inches ,Box 71,, +37828,4,dams:188709|MSS1218_B071_I208,,,,,,XXXX,,,,,,,,MSS1218_B071_I208,,,,,,,,,,,,,,,MSS1218_B071_I208,,,::nasn2dmz.cc.emory.edu:dmfiles:MARBL:Manuscripts:MSS_1218_Langmuir:PROD:B071:,,MSS1218_B071_I208_P0002_PROD.tif,::nasn2dmz.cc.emory.edu:dmfiles:MARBL:Manuscripts:MSS_1218_Langmuir:PROD:B071:MSS1218_B071_I208_P0002_PROD.tif,,,,,,,,,,,,,,,,,, +,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +29264,2,dams:179460|MSS1218_B028_I091,,,,,,XXXX,,,,,,,,MSS1218_B028_I091,,,,,,,,,,,,,,,MSS1218_B028_I091,,,::nasn2dmz.cc.emory.edu:dmfiles:MARBL:Manuscripts:MSS_1218_Langmuir:PROD:B028:,,MSS1218_B028_I091_P0004_PROD.tif,::nasn2dmz.cc.emory.edu:dmfiles:MARBL:Manuscripts:MSS_1218_Langmuir:PROD:B028:MSS1218_B028_I091_P0004_PROD.tif,,,,,,,,,,,,,,,,,, +6477,2,dams:156374|MSS1218_B028_I091,"Recto: Rafael's, gay 'n frisky, the spot to have a hell of a good time! 590 Pacific Street, international settlement, San Francisco; Verso: Miss Ardena Head, [Ardena Head] second front right; Verso 2: My souvenir from San Francisco's famous gay 'n frisky, San Francisco Bay Bridge, cable cars and Chinatown, Old Mission Dolores, Bay Meadows Race Track, Candid Night Club Photo Studios, 557 Pacific Avenue, San Francisco 11, California","Stuart A. Rose Manuscript, Archives, and Rare Book Library",MSS 1218,"Stuart A. Rose Manuscript, Archives, and Rare Book Library rose.library@emory.edu",,XXXX,Unknown,,,card photographs (photographs),"Stuart A. Rose Manuscript, Archives, and Rare Book Library",Emory University,,MSS1218_B028_I091,,,,"Emory University does not control copyright for this image. This image is made available for individual viewing and reference for educational purposes only such as personal study, preparation for teaching, and research. Your reproduction, distribution, public display or other re-use of any content beyond a fair use as codified in section 107 of US Copyright Law is at your own risk. We are always interested in learning more about our collections. If you have information regarding this photograph, please contact marbl@emory.edu.",http://rightsstatements.org/vocab/InC/1.0/,Rafael's.,,Nightclubs,Clubs.|Advertising.,,"Advertising : Rafael's, gay 'n frisky",http://id.loc.gov/vocabulary/resourceTypes/img,Confidential,"Migrated to Cor repository from Extensis Portfolio DAMS during Phase 1 Migrations, 2019",MSS1218_B028_I091,Public Low View,Public,::nasn2dmz.cc.emory.edu:dmfiles:MARBL:Manuscripts:MSS_1218_Langmuir:ARCH:B028:,49798666,MSS1218_B028_I091_P0004_ARCH.tif,::nasn2dmz.cc.emory.edu:dmfiles:MARBL:Manuscripts:MSS_1218_Langmuir:ARCH:B028:MSS1218_B028_I091_P0004_ARCH.tif,,,,,,,Administrative Signoff,2016-03-01,Scholarly Communications Office,,,"Emory University does not control copyright for this image. This image is made available for individual viewing and reference for educational purposes only such as personal study, preparation for teaching, and research. Your reproduction, distribution, public display or other re-use of any content beyond a fair use as codified in section 107 of US Copyright Law is at your own risk. We are always interested in learning more about our collections. If you have information regarding this photograph, please contact marbl@emory.edu.",,,07.59 x 08.92 inches ,Box 28,, +29262,2,dams:179463|MSS1218_B028_I091,,,,,,XXXX,,,,,,,,MSS1218_B028_I091,,,,,,,,,,,,,,,MSS1218_B028_I091,,,::nasn2dmz.cc.emory.edu:dmfiles:MARBL:Manuscripts:MSS_1218_Langmuir:PROD:B028:,,MSS1218_B028_I091_P0003_PROD.tif,::nasn2dmz.cc.emory.edu:dmfiles:MARBL:Manuscripts:MSS_1218_Langmuir:PROD:B028:MSS1218_B028_I091_P0003_PROD.tif,,,,,,,,,,,,,,,,,, +6607,2,dams:156505|MSS1218_B028_I091,"Recto: Rafael's, gay 'n frisky, the spot to have a hell of a good time! 590 Pacific Street, international settlement, San Francisco; Verso: Miss Ardena Head, [Ardena Head] second front right; Verso 2: My souvenir from San Francisco's famous gay 'n frisky, San Francisco Bay Bridge, cable cars and Chinatown, Old Mission Dolores, Bay Meadows Race Track, Candid Night Club Photo Studios, 557 Pacific Avenue, San Francisco 11, California","Stuart A. Rose Manuscript, Archives, and Rare Book Library",MSS 1218,"Stuart A. Rose Manuscript, Archives, and Rare Book Library rose.library@emory.edu",,XXXX,Unknown,,,card photographs (photographs),"Stuart A. Rose Manuscript, Archives, and Rare Book Library",Emory University,,MSS1218_B028_I091,,,,"Emory University does not control copyright for this image. This image is made available for individual viewing and reference for educational purposes only such as personal study, preparation for teaching, and research. Your reproduction, distribution, public display or other re-use of any content beyond a fair use as codified in section 107 of US Copyright Law is at your own risk. We are always interested in learning more about our collections. If you have information regarding this photograph, please contact marbl@emory.edu.",http://rightsstatements.org/vocab/InC/1.0/,Rafael's.,,Nightclubs,Clubs.|Advertising.,,"Advertising : Rafael's, gay 'n frisky",http://id.loc.gov/vocabulary/resourceTypes/img,Confidential,"Migrated to Cor repository from Extensis Portfolio DAMS during Phase 1 Migrations, 2019",MSS1218_B028_I091,Public Low View,Public,::nasn2dmz.cc.emory.edu:dmfiles:MARBL:Manuscripts:MSS_1218_Langmuir:ARCH:B028:,51291212,MSS1218_B028_I091_P0003_ARCH.tif,::nasn2dmz.cc.emory.edu:dmfiles:MARBL:Manuscripts:MSS_1218_Langmuir:ARCH:B028:MSS1218_B028_I091_P0003_ARCH.tif,,,,,,,Administrative Signoff,2016-03-01,Scholarly Communications Office,,,"Emory University does not control copyright for this image. This image is made available for individual viewing and reference for educational purposes only such as personal study, preparation for teaching, and research. Your reproduction, distribution, public display or other re-use of any content beyond a fair use as codified in section 107 of US Copyright Law is at your own risk. We are always interested in learning more about our collections. If you have information regarding this photograph, please contact marbl@emory.edu.",,,07.59 x 08.92 inches ,Box 28,, +29261,2,dams:179346|MSS1218_B028_I091,,,,,,XXXX,,,,,,,,MSS1218_B028_I091,,,,,,,,,,,,,,,MSS1218_B028_I091,,,::nasn2dmz.cc.emory.edu:dmfiles:MARBL:Manuscripts:MSS_1218_Langmuir:PROD:B028:,,MSS1218_B028_I091_P0002_PROD.tif,::nasn2dmz.cc.emory.edu:dmfiles:MARBL:Manuscripts:MSS_1218_Langmuir:PROD:B028:MSS1218_B028_I091_P0002_PROD.tif,,,,,,,,,,,,,,,,,, +6542,2,dams:156440|MSS1218_B028_I091,"Recto: Rafael's, gay 'n frisky, the spot to have a hell of a good time! 590 Pacific Street, international settlement, San Francisco; Verso: Miss Ardena Head, [Ardena Head] second front right; Verso 2: My souvenir from San Francisco's famous gay 'n frisky, San Francisco Bay Bridge, cable cars and Chinatown, Old Mission Dolores, Bay Meadows Race Track, Candid Night Club Photo Studios, 557 Pacific Avenue, San Francisco 11, California","Stuart A. Rose Manuscript, Archives, and Rare Book Library",MSS 1218,"Stuart A. Rose Manuscript, Archives, and Rare Book Library rose.library@emory.edu",,XXXX,Unknown,,,card photographs (photographs),"Stuart A. Rose Manuscript, Archives, and Rare Book Library",Emory University,,MSS1218_B028_I091,,,,"Emory University does not control copyright for this image. This image is made available for individual viewing and reference for educational purposes only such as personal study, preparation for teaching, and research. Your reproduction, distribution, public display or other re-use of any content beyond a fair use as codified in section 107 of US Copyright Law is at your own risk. We are always interested in learning more about our collections. If you have information regarding this photograph, please contact marbl@emory.edu.",http://rightsstatements.org/vocab/InC/1.0/,Rafael's.,,Nightclubs,Clubs.|Advertising.,,"Advertising : Rafael's, gay 'n frisky",http://id.loc.gov/vocabulary/resourceTypes/img,Confidential,"Migrated to Cor repository from Extensis Portfolio DAMS during Phase 1 Migrations, 2019",MSS1218_B028_I091,Public Low View,Public,::nasn2dmz.cc.emory.edu:dmfiles:MARBL:Manuscripts:MSS_1218_Langmuir:ARCH:B028:,51925684,MSS1218_B028_I091_P0002_ARCH.tif,::nasn2dmz.cc.emory.edu:dmfiles:MARBL:Manuscripts:MSS_1218_Langmuir:ARCH:B028:MSS1218_B028_I091_P0002_ARCH.tif,,,,,,,Administrative Signoff,2016-03-01,Scholarly Communications Office,,,"Emory University does not control copyright for this image. This image is made available for individual viewing and reference for educational purposes only such as personal study, preparation for teaching, and research. Your reproduction, distribution, public display or other re-use of any content beyond a fair use as codified in section 107 of US Copyright Law is at your own risk. We are always interested in learning more about our collections. If you have information regarding this photograph, please contact marbl@emory.edu.",,,07.59 x 08.92 inches ,Box 28,, +29260,2,dams:179325|MSS1218_B028_I091,,,,,,XXXX,,,,,,,,MSS1218_B028_I091,,,,,,,,,,,,,,,MSS1218_B028_I091,,,::nasn2dmz.cc.emory.edu:dmfiles:MARBL:Manuscripts:MSS_1218_Langmuir:PROD:B028:,,MSS1218_B028_I091_P0001_PROD.tif,::nasn2dmz.cc.emory.edu:dmfiles:MARBL:Manuscripts:MSS_1218_Langmuir:PROD:B028:MSS1218_B028_I091_P0001_PROD.tif,,,,,,,,,,,,,,,,,, +6512,2,dams:156411|MSS1218_B028_I091,"Recto: Rafael's, gay 'n frisky, the spot to have a hell of a good time! 590 Pacific Street, international settlement, San Francisco; Verso: Miss Ardena Head, [Ardena Head] second front right; Verso 2: My souvenir from San Francisco's famous gay 'n frisky, San Francisco Bay Bridge, cable cars and Chinatown, Old Mission Dolores, Bay Meadows Race Track, Candid Night Club Photo Studios, 557 Pacific Avenue, San Francisco 11, California","Stuart A. Rose Manuscript, Archives, and Rare Book Library",MSS 1218,"Stuart A. Rose Manuscript, Archives, and Rare Book Library rose.library@emory.edu",,XXXX,Unknown,,,card photographs (photographs),"Stuart A. Rose Manuscript, Archives, and Rare Book Library",Emory University,,MSS1218_B028_I091,,,,"Emory University does not control copyright for this image. This image is made available for individual viewing and reference for educational purposes only such as personal study, preparation for teaching, and research. Your reproduction, distribution, public display or other re-use of any content beyond a fair use as codified in section 107 of US Copyright Law is at your own risk. We are always interested in learning more about our collections. If you have information regarding this photograph, please contact marbl@emory.edu.",http://rightsstatements.org/vocab/InC/1.0/,Rafael's.,,Nightclubs,Clubs.|Advertising.,,"Advertising : Rafael's, gay 'n frisky",http://id.loc.gov/vocabulary/resourceTypes/img,Confidential,"Migrated to Cor repository from Extensis Portfolio DAMS during Phase 1 Migrations, 2019",MSS1218_B028_I091,Public Low View,Public,::nasn2dmz.cc.emory.edu:dmfiles:MARBL:Manuscripts:MSS_1218_Langmuir:ARCH:B028:,240500606,MSS1218_B028_I091_P0001_ARCH.tif,::nasn2dmz.cc.emory.edu:dmfiles:MARBL:Manuscripts:MSS_1218_Langmuir:ARCH:B028:MSS1218_B028_I091_P0001_ARCH.tif,,,,,,,Administrative Signoff,2016-03-01,Scholarly Communications Office,,,"Emory University does not control copyright for this image. This image is made available for individual viewing and reference for educational purposes only such as personal study, preparation for teaching, and research. Your reproduction, distribution, public display or other re-use of any content beyond a fair use as codified in section 107 of US Copyright Law is at your own risk. We are always interested in learning more about our collections. If you have information regarding this photograph, please contact marbl@emory.edu.",,,07.59 x 08.92 inches ,Box 28,, diff --git a/spec/importers/langmuir_preprocessor_spec.rb b/spec/importers/langmuir_preprocessor_spec.rb new file mode 100644 index 000000000..7ebcf55aa --- /dev/null +++ b/spec/importers/langmuir_preprocessor_spec.rb @@ -0,0 +1,77 @@ +# frozen_string_literal: true +require 'rails_helper' + +RSpec.describe LangmuirPreprocessor do + before :all do + # running #merge is expensive, only set it up and run it once and then check the results + langmuir_sample = File.join(fixture_path, 'csv_import', 'before_processing', 'langmuir-unprocessed.csv') + preprocessor = described_class.new(langmuir_sample) + preprocessor.merge + end + + after :all do + test_csv = File.join(fixture_path, 'csv_import', 'before_processing', 'langmuir-unprocessed-processed.csv') + File.delete(test_csv) if File.exist?(test_csv) + end + + # each test inspects the output of the pre-processor, read into the import_rows CSV::Table object + let(:import_rows) { CSV.read(File.join(fixture_path, 'csv_import', 'before_processing', 'langmuir-unprocessed-processed.csv'), headers: true).by_row! } + + it 'outputs the expected number of rows' do + expect(import_rows.length).to eq(17) + end + + # this just repeats the csv data, but shows the testing pattern and titles used in subsequent tests + it 'processes the expected works' do + expect(import_rows[0]['title']).to eq('City gates, St. Augustine, Florida') # City gates, St. Augustine, Florida + expect(import_rows[3]['title']).to eq('A disappearing mode of transportation, Palm Beach, Florida') # A disappearing mode of transportation, Palm Beach, Florida + expect(import_rows[6]['title']).to eq('The old city gate, St. Augustine, Fla.') # The old city gate, St. Augustine, Fla. + expect(import_rows[9]['title']).to eq('Oldest wooden school house, St. Augustine, Florida') # Oldest wooden school house, St. Augustine, Florida + expect(import_rows[12]['title']).to eq('Advertising : Rafael\'s, gay \'n frisky') # Advertising : Rafael's, gay 'n frisky + end + + it 'identifies the source row from the original csv' do + expect(import_rows[0]['source_row']).to eq('2') # City gates, St. Augustine, Florida + end + + it 'skips blank lines in the original csv & lists works in proper order' do + expect(import_rows[12]['source_row']).to eq('29') # Advertising : Rafael's, gay 'n frisky - work metadata + end + + it 'creates a deduplication_key' do + expect(import_rows[0]['deduplication_key']).to eq('MSS1218_B071_I205') # City gates, St. Augustine, Florida + end + + it 'sets the row type' do + expect(import_rows[0]['type']).to eq('work') # City gates, St. Augustine, Florida + end + + it 'creates a fileset row for each side/part' do + expect(import_rows[13]['type']).to eq('fileset') # Advertising : Rafael's, gay 'n frisky + end + + it 'uses Front/Back as fileset labels for two sided works' do + expect(import_rows[1]['fileset_label']).to eq('Front') # City gates, St. Augustine, Florida + expect(import_rows[2]['fileset_label']).to eq('Back') # City gates, St. Augustine, Florida + end + + it 'uses Side # as fileset labels for multi-sided works' do + expect(import_rows[13]['fileset_label']).to eq('Side 1') # Advertising : Rafael's, gay 'n frisky + expect(import_rows[16]['fileset_label']).to eq('Side 4') # Advertising : Rafael's, gay 'n frisky + end + + it 'attaches the ARCH file as the preservation_master_file' do + expect(import_rows[2]['preservation_master_file']).to match(/ARCH/) # City gates, St. Augustine, Florida + end + + it 'attaches the PROD file as the intermediate_file' do + expect(import_rows[2]['intermediate_file']).to match(/PROD/) # City gates, St. Augustine, Florida + end + + it 'attaches the expected files to the expected filesets in the expected order' do # Advertising : Rafael's, gay 'n frisky + expect(import_rows[14]['fileset_label']).to eq('Side 2') # P0002 + expect(import_rows[14]['preservation_master_file']).to eq('MSS1218_B028_I091_P0002_ARCH.tif') # ARCH + expect(import_rows[15]['fileset_label']).to eq('Side 3') # P0003 + expect(import_rows[15]['intermediate_file']).to eq('MSS1218_B028_I091_P0003_PROD.tif') # PROD + end +end