Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adjust indexing so all MO metadata values get added to proxy #5425

Merged
merged 8 commits into from
Nov 15, 2023
39 changes: 18 additions & 21 deletions app/controllers/catalog_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,11 @@ class CatalogController < ApplicationController
config.add_facet_field 'avalon_resource_type_ssim', label: 'Format', limit: 5, collapse: false
config.add_facet_field 'creator_ssim', label: 'Main contributor', limit: 5
config.add_facet_field 'date_sim', label: 'Date', limit: 5
config.add_facet_field 'genre_sim', label: 'Genres', limit: 5
config.add_facet_field 'genre_ssim', label: 'Genres', limit: 5
config.add_facet_field 'series_ssim', label: 'Series', limit: 5
config.add_facet_field 'collection_ssim', label: 'Collection', limit: 5
config.add_facet_field 'unit_ssim', label: 'Unit', limit: 5
config.add_facet_field 'language_sim', label: 'Language', limit: 5
config.add_facet_field 'language_ssim', label: 'Language', limit: 5
# Hide these facets if not a Collection Manager
config.add_facet_field 'workflow_published_sim', label: 'Published', limit: 5, if: Proc.new {|context, config, opts| Ability.new(context.current_user, context.user_session).can? :create, MediaObject}, group: "workflow"
config.add_facet_field 'avalon_uploader_ssi', label: 'Created by', limit: 5, if: Proc.new {|context, config, opts| Ability.new(context.current_user, context.user_session).can? :create, MediaObject}, group: "workflow"
Expand All @@ -97,8 +97,8 @@ class CatalogController < ApplicationController
private: { label: "Private", fq: "has_model_ssim:MediaObject AND NOT read_access_group_ssim:#{Hydra::AccessControls::AccessRight::PERMISSION_TEXT_VALUE_PUBLIC} AND NOT read_access_group_ssim:#{Hydra::AccessControls::AccessRight::PERMISSION_TEXT_VALUE_AUTHENTICATED}" }
}
config.add_facet_field 'read_access_virtual_group_ssim', label: 'External Group', limit: 5, if: Proc.new {|context, config, opts| Ability.new(context.current_user, context.user_session).can? :create, MediaObject}, group: "workflow", helper_method: :vgroup_display
config.add_facet_field 'date_digitized_sim', label: 'Date Digitized', limit: 5, if: Proc.new {|context, config, opts| Ability.new(context.current_user, context.user_session).can? :create, MediaObject}, group: "workflow"#, partial: 'blacklight/hierarchy/facet_hierarchy'
config.add_facet_field 'date_ingested_sim', label: 'Date Ingested', limit: 5, if: Proc.new {|context, config, opts| Ability.new(context.current_user, context.user_session).can? :create, MediaObject}, group: "workflow"
config.add_facet_field 'date_digitized_ssim', label: 'Date Digitized', limit: 5, if: Proc.new {|context, config, opts| Ability.new(context.current_user, context.user_session).can? :create, MediaObject}, group: "workflow"#, partial: 'blacklight/hierarchy/facet_hierarchy'
config.add_facet_field 'date_ingested_ssim', label: 'Date Ingested', limit: 5, if: Proc.new {|context, config, opts| Ability.new(context.current_user, context.user_session).can? :create, MediaObject}, group: "workflow"

# Have BL send all facet field names to Solr, which has been the default
# previously. Simply remove these lines if you'd rather use Solr request
Expand All @@ -111,27 +111,24 @@ class CatalogController < ApplicationController
# solr fields to be displayed in the index (search results) view
# The ordering of the field names is the order of the display
config.add_index_field 'title_tesi', label: 'Title', if: Proc.new {|context, _field_config, _document| context.request.format == :json }
config.add_index_field 'date_ssi', label: 'Date', helper_method: :combined_display_date
config.add_index_field 'date_issued_ssi', label: 'Date', helper_method: :combined_display_date
config.add_index_field 'creator_ssim', label: 'Main contributors', helper_method: :contributor_index_display
config.add_index_field 'summary_ssi', label: 'Summary', helper_method: :description_index_display
config.add_index_field 'abstract_ssi', label: 'Summary', helper_method: :description_index_display
config.add_index_field 'duration_ssi', label: 'Duration', if: Proc.new {|context, _field_config, _document| context.request.format == :json }
config.add_index_field 'section_id_ssim', label: 'Sections', if: Proc.new {|context, _field_config, _document| context.request.format == :json }, helper_method: :section_id_json_index_display

# solr fields to be displayed in the show (single result) view
# The ordering of the field names is the order of the display
config.add_show_field 'title_tesi', label: 'Title'
config.add_show_field 'format_sim', label: 'Format'
config.add_show_field 'creator_sim', label: 'Creator'
config.add_show_field 'language_sim', label: 'Language'
config.add_show_field 'date_ssi', label: 'Date'
config.add_show_field 'abstract_sim', label: 'Abstract'
config.add_show_field 'location_sim', label: 'Locations'
config.add_show_field 'time_period_sim', label: 'Time periods'
config.add_show_field 'contributor_sim', label: 'Contributors'
config.add_show_field 'publisher_sim', label: 'Publisher'
config.add_show_field 'genre_sim', label: 'Genre'
config.add_show_field 'publication_location_sim', label: 'Place of publication'
config.add_show_field 'terms_sim', label: 'Terms'
config.add_show_field 'resource_type_ssim', label: 'Format'
config.add_show_field 'creator_ssim', label: 'Main Contributors'
config.add_show_field 'language_ssim', label: 'Language'
config.add_show_field 'date_issued_ssi', label: 'Date'
config.add_show_field 'abstract_ssim', label: 'Abstract'
config.add_show_field 'location_ssim', label: 'Locations'
config.add_show_field 'contributor_ssim', label: 'Contributors'
config.add_show_field 'publisher_ssim', label: 'Publisher'
config.add_show_field 'genre_ssim', label: 'Genre'

# "fielded" search configuration. Used by pulldown among other places.
# For supported keys in hash, see rdoc for Blacklight::SearchFields
Expand Down Expand Up @@ -191,10 +188,10 @@ class CatalogController < ApplicationController
# label in pulldown is followed by the name of the SOLR field to sort by and
# whether the sort is ascending or descending (it must be asc or desc
# except in the relevancy case).
config.add_sort_field 'score desc, title_ssort asc, date_ssi desc', label: 'Relevance'
config.add_sort_field 'date_ssi desc, title_ssort asc', label: 'Date'
config.add_sort_field 'score desc, title_ssort asc, date_issued_ssi desc', label: 'Relevance'
config.add_sort_field 'date_issued_ssi desc, title_ssort asc', label: 'Date'
config.add_sort_field 'creator_ssort asc, title_ssort asc', label: 'Main contributor'
config.add_sort_field 'title_ssort asc, date_ssi desc', label: 'Title'
config.add_sort_field 'title_ssort asc, date_issued_ssi desc', label: 'Title'
config.add_sort_field 'timestamp desc', label: 'Recently Updated', if: false

# If there are more than this many search results, no spelling ("did you
Expand Down
4 changes: 3 additions & 1 deletion app/models/iiif_manifest_presenter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -139,10 +139,12 @@ def display_unit(media_object)
end

def display_language(media_object)
return nil unless media_object.language.present?
media_object.language.collect { |l| l[:text] }.uniq
end

def display_related_item(media_object)
return nil unless media_object.related_item_url.present?
media_object.related_item_url.collect { |r| "<a href='#{r[:url]}'>#{r[:label]}</a>" }
end

Expand Down Expand Up @@ -180,7 +182,7 @@ def iiif_metadata_fields
metadata_field('Contributor', media_object.contributor),
metadata_field('Publisher', media_object.publisher),
metadata_field('Genre', media_object.genre),
metadata_field('Subject', media_object.subject),
metadata_field('Subject', media_object.topical_subject),
metadata_field('Time period', media_object.temporal_subject),
metadata_field('Location', media_object.geographic_subject),
metadata_field('Collection', display_collection(media_object)),
Expand Down
29 changes: 16 additions & 13 deletions app/models/media_object.rb
Original file line number Diff line number Diff line change
Expand Up @@ -235,10 +235,10 @@ def section_physical_descriptions
def fill_in_solr_fields_that_need_master_files(solr_doc)
solr_doc['section_id_ssim'] = ordered_master_file_ids
solr_doc["other_identifier_sim"] += master_files.collect {|mf| mf.identifier.to_a }.flatten
solr_doc["date_digitized_sim"] = master_files.collect {|mf| mf.date_digitized }.compact.map {|t| Time.parse(t).strftime "%F" }
solr_doc["date_digitized_ssim"] = master_files.collect {|mf| mf.date_digitized }.compact.map {|t| Time.parse(t).strftime "%F" }
solr_doc["section_label_tesim"] = section_labels
solr_doc['section_physical_description_ssim'] = section_physical_descriptions
solr_doc['all_comments_sim'] = all_comments
solr_doc['all_comments_ssim'] = all_comments
end

# Enqueue background job to do a full indexing including more costly fields that read from children
Expand All @@ -257,9 +257,12 @@ def to_solr(include_child_fields: false)
solr_doc[Hydra.config.permissions.read.group] += solr_doc['read_access_ip_group_ssim']
solr_doc["title_ssort"] = self.title
solr_doc["creator_ssort"] = Array(self.creator).join(', ')
solr_doc["date_ingested_sim"] = self.create_date.strftime "%F" if self.create_date.present?
solr_doc["date_ingested_ssim"] = self.create_date.strftime "%F" if self.create_date.present?
solr_doc['avalon_resource_type_ssim'] = self.avalon_resource_type.map(&:titleize)
solr_doc['identifier_ssim'] = self.identifier.map(&:downcase)
solr_doc['note_ssm'] = self.note.collect { |n| n.to_json }
solr_doc['other_identifier_ssm'] = self.other_identifier.collect { |oi| oi.to_json }
solr_doc['related_item_url_ssm'] = self.related_item_url.collect { |r| r.to_json }
if include_child_fields
fill_in_solr_fields_that_need_master_files(solr_doc)
elsif id.present? # avoid error in test suite
Expand All @@ -272,21 +275,21 @@ def to_solr(include_child_fields: false)
all_text_values = []
all_text_values << solr_doc["title_tesi"]
all_text_values << solr_doc["creator_ssim"]
all_text_values << solr_doc["contributor_sim"]
all_text_values << solr_doc["contributor_ssim"]
all_text_values << solr_doc["unit_ssim"]
all_text_values << solr_doc["collection_ssim"]
all_text_values << solr_doc["summary_ssi"]
all_text_values << solr_doc["publisher_sim"]
all_text_values << solr_doc["subject_topic_sim"]
all_text_values << solr_doc["subject_geographic_sim"]
all_text_values << solr_doc["subject_temporal_sim"]
all_text_values << solr_doc["genre_sim"]
all_text_values << solr_doc["language_sim"]
all_text_values << solr_doc["physical_description_sim"]
all_text_values << solr_doc["abstract_ssi"]
all_text_values << solr_doc["publisher_ssim"]
all_text_values << solr_doc["topical_subject_ssim"]
all_text_values << solr_doc["geographic_subject_ssim"]
all_text_values << solr_doc["temporal_subject_ssim"]
all_text_values << solr_doc["genre_ssim"]
all_text_values << solr_doc["language_ssim"]
all_text_values << solr_doc["physical_description_ssim"]
all_text_values << solr_doc["series_ssim"]
all_text_values << solr_doc["date_sim"]
all_text_values << solr_doc["notes_sim"]
all_text_values << solr_doc["table_of_contents_sim"]
all_text_values << solr_doc["table_of_contents_ssim"]
all_text_values << solr_doc["other_identifier_sim"]
solr_doc["all_text_timv"] = all_text_values.flatten
solr_doc.each_pair { |k,v| solr_doc[k] = v.is_a?(Array) ? v.select { |e| e =~ /\S/ } : v }
Expand Down
60 changes: 32 additions & 28 deletions app/models/mods_behaviors.rb
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# Copyright 2011-2023, The Trustees of Indiana University and Northwestern
# University. Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
#
#
# You may obtain a copy of the License at
#
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#
# Unless required by applicable law or agreed to in writing, software distributed
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
Expand Down Expand Up @@ -39,51 +39,51 @@ def to_solr(solr_doc = Hash.new, opts = {})
solr_doc['creator_ssim'] = gather_terms(self.find_by_terms(:creator))
# solr_doc['creator_ssi'] = self.find_by_terms(:creator).text
# Individual fields
solr_doc['summary_ssi'] = self.find_by_terms(:abstract).text
solr_doc['publisher_sim'] = gather_terms(self.find_by_terms(:publisher))
solr_doc['contributor_sim'] = gather_terms(self.find_by_terms(:contributor))
solr_doc['subject_sim'] = gather_terms(self.find_by_terms(:subject))
solr_doc['genre_sim'] = gather_terms(self.find_by_terms(:genre))
solr_doc['abstract_ssi'] = self.find_by_terms(:abstract).text
solr_doc['publisher_ssim'] = gather_terms(self.find_by_terms(:publisher))
solr_doc['contributor_ssim'] = gather_terms(self.find_by_terms(:contributor))
solr_doc['subject_ssim'] = gather_terms(self.find_by_terms(:subject))
solr_doc['genre_ssim'] = gather_terms(self.find_by_terms(:genre))
# solr_doc['physical_dtl_sim'] = gather_terms(self.find_by_terms(:format))
# solr_doc['contents_sim'] = gather_terms(self.find_by_terms(:parts_list))
solr_doc['notes_sim'] = gather_terms(self.find_by_terms(:note))
solr_doc['table_of_contents_sim'] = gather_terms(self.find_by_terms(:table_of_contents))
solr_doc['access_sim'] = gather_terms(self.find_by_terms(:usage))
solr_doc['table_of_contents_ssim'] = gather_terms(self.find_by_terms(:table_of_contents))
solr_doc['usage_ssim'] = gather_terms(self.find_by_terms(:usage))
# solr_doc['collection_sim'] = gather_terms(self.find_by_terms(:archival_collection))
solr_doc['series_ssim'] = gather_terms(self.find_by_terms(:series))
#filter formats based upon whitelist
solr_doc['format_sim'] = (gather_terms(self.find_by_terms(:resource_type)) & ['moving image', 'sound recording' ]).map(&:titleize)
solr_doc['location_sim'] = gather_terms(self.find_by_terms(:geographic_subject))
solr_doc['resource_type_ssim'] = (gather_terms(self.find_by_terms(:resource_type)) & ['moving image', 'sound recording' ]).map(&:titleize)
solr_doc['location_ssim'] = gather_terms(self.find_by_terms(:geographic_subject))

# Blacklight facets - these are the same facet fields used in our Blacklight app
# for consistency and so they'll show up when we export records from Hydra into BL:
solr_doc['material_sim'] = "Digital"
solr_doc['subject_topic_sim'] = gather_terms(self.find_by_terms(:topical_subject))
solr_doc['subject_geographic_sim'] = gather_terms(self.find_by_terms(:geographic_subject))
solr_doc['subject_temporal_sim'] = gather_terms(self.find_by_terms(:temporal_subject))
solr_doc['subject_occupation_sim'] = gather_terms(self.find_by_terms(:occupation_subject))
solr_doc['subject_person_sim'] = gather_terms(self.find_by_terms(:person_subject))
solr_doc['subject_corporate_sim'] = gather_terms(self.find_by_terms(:corporate_subject))
solr_doc['subject_family_sim'] = gather_terms(self.find_by_terms(:family_subject))
solr_doc['subject_title_sim'] = gather_terms(self.find_by_terms(:title_subject))
solr_doc['time_sim'] = gather_terms(self.find_by_terms(:temporal_subject))
solr_doc['material_ssim'] = "Digital"
solr_doc['topical_subject_ssim'] = gather_terms(self.find_by_terms(:topical_subject))
solr_doc['geographic_subject_ssim'] = gather_terms(self.find_by_terms(:geographic_subject))
solr_doc['temporal_subject_ssim'] = gather_terms(self.find_by_terms(:temporal_subject))
solr_doc['occupation_subject_ssim'] = gather_terms(self.find_by_terms(:occupation_subject))
solr_doc['person_subject_ssim'] = gather_terms(self.find_by_terms(:person_subject))
solr_doc['corporate_subject_ssim'] = gather_terms(self.find_by_terms(:corporate_subject))
solr_doc['family_subject_ssim'] = gather_terms(self.find_by_terms(:family_subject))
solr_doc['title_subject_ssim'] = gather_terms(self.find_by_terms(:title_subject))
solr_doc['time_ssim'] = gather_terms(self.find_by_terms(:temporal_subject))

# TODO: map PBcore's three-letter language codes to full language names
# Right now, everything's English.
solr_doc['language_sim'] = gather_terms(self.find_by_terms(:language_text))
solr_doc['language_code_sim'] = gather_terms(self.find_by_terms(:language_code))
solr_doc['physical_description_sim'] = gather_terms(self.find_by_terms(:physical_description))
solr_doc['language_ssim'] = gather_terms(self.find_by_terms(:language_text))
solr_doc['language_code_ssim'] = gather_terms(self.find_by_terms(:language_code))
solr_doc['physical_description_ssim'] = gather_terms(self.find_by_terms(:physical_description))
solr_doc['related_item_url_sim'] = gather_terms(self.find_by_terms(:related_item_url))
solr_doc['related_item_label_sim'] = gather_terms(self.find_by_terms(:related_item_label))
solr_doc['terms_of_use_si'] = (self.find_by_terms(:terms_of_use) - self.find_by_terms(:rights_statement)).text
solr_doc['terms_of_use_ssi'] = (self.find_by_terms(:terms_of_use) - self.find_by_terms(:rights_statement)).text
solr_doc['rights_statement_ssi'] = self.find_by_terms(:rights_statement).text
solr_doc['other_identifier_sim'] = gather_terms(self.find_by_terms(:other_identifier))

# Extract 4-digit year for creation date facet in Hydra and pub_date facet in Blacklight
solr_doc['date_ssi'] = self.find_by_terms(:date_issued).text
solr_doc['date_issued_ssi'] = self.find_by_terms(:date_issued).text
solr_doc['date_created_ssi'] = self.find_by_terms(:date_created).text
# Put both publication date and creation date into the date facet
solr_doc['date_sim'] = gather_years(solr_doc['date_ssi'])
solr_doc['date_sim'] = gather_years(solr_doc['date_issued_ssi'])
solr_doc['date_sim'] += gather_years(solr_doc['date_created_ssi']) if solr_doc['date_created_ssi'].present?

# For full text, we stuff it into the mods_tesim field which is already configured for Mods doucments
Expand Down Expand Up @@ -185,6 +185,10 @@ def gather_terms(terms)
terms.collect { |r| r.text }.compact.uniq
end

def gather_attribute(terms, attribute)
terms.collect { |t| t.attribute(attribute).value }
end

def gather_years(date)
parsed = Date.edtf(date)
return Array.new if parsed.nil?
Expand Down
Loading