Skip to content

Commit

Permalink
Merge pull request #2813 from sciencehistory/box_folder_indexing
Browse files Browse the repository at this point in the history
Box and folder indexing.
  • Loading branch information
eddierubeiz authored Dec 20, 2024
2 parents 6414cb6 + f6ce7ab commit f7710c9
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 0 deletions.
17 changes: 17 additions & 0 deletions app/controllers/catalog_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,23 @@ def active_sort_fields
# case for a BL "search field", which is really a dismax aggregate
# of Solr search fields.


# See https://github.com/sciencehistory/scihist_digicoll/issues/2585
# Note that a Work can be associated with two consecutive boxes, in which case the box number is something like "34-35".
config.add_search_field('box') do |field|
field.label = 'Box'
field.solr_parameters = {
qf: 'box_tsi',
}
end

config.add_search_field('folder') do |field|
field.label = 'Folder'
field.solr_parameters = {
qf: 'folder_tsi',
}
end

# config.add_search_field('title') do |field|
# # solr_parameters hash are sent to Solr as ordinary url query params.
# field.solr_parameters = {
Expand Down
6 changes: 6 additions & 0 deletions app/indexers/work_indexer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,12 @@ class WorkIndexer < Kithe::Indexer
acc.concat(DateIndexHelper.new(record).expanded_years)
end

to_field "box_tsi", obj_extract("physical_container"), transform( ->(v) { v.box if v.box.present? })
to_field "folder_tsi", obj_extract("physical_container"), transform( ->(v) { v.folder if v.folder.present? })
to_field "box_sort", obj_extract("physical_container"), transform( ->(v) { v.box[/\d+/] if v.box.present? })
to_field "folder_sort", obj_extract("physical_container"), transform( ->(v) { v.folder[/\d+/] if v.folder.present? })


# For sorting by oldest first
to_field "earliest_date" do |record, acc|
# for Solr, we need in "xml schema" format, with 00:00:00 time, and UTC timezone
Expand Down
6 changes: 6 additions & 0 deletions solr/config/schema.xml
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,12 @@
<field name="latest_date" type="date_sortmissinglast" stored="true" indexed="true" multiValued="false"/>
<field name="earliest_date" type="date_sortmissinglast" stored="true" indexed="true" multiValued="false"/>


<!-- Box and folder sort fields. Added by Science History Institute. -->
<field name="box_sort" type="int_sortmissinglast" stored="true" indexed="true" multiValued="false"/>
<field name="folder_sort" type="int_sortmissinglast" stored="true" indexed="true" multiValued="false"/>


<!-- Single-value title sort field. Used to sort collections of serials by title.
See https://github.com/sciencehistory/scihist_digicoll/issues/2494
If we can use the first value of (multivalued) text1_tesim instead, let's consider doing that.
Expand Down
26 changes: 26 additions & 0 deletions spec/indexers/work_indexer_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,32 @@
end
end

# See https://github.com/sciencehistory/scihist_digicoll/issues/2585
describe "box and folder" do
let(:box_search_field) {'box_tsi'}
let(:box_sort_field) {'box_sort'}
let(:folder_search_field) {'folder_tsi'}
let(:folder_sort_field) {'folder_sort'}

let(:work_2) { create(:work, physical_container: Work::PhysicalContainer.new({"box"=>"1", "folder"=>"3"})) }
let(:work_3) { create(:work, physical_container: Work::PhysicalContainer.new({"box"=>"12, 34, 56", "folder"=>"56, 78, 10"})) }
let(:output_hash) { WorkIndexer.new.map_record(work) }
let(:output_hash_2) { WorkIndexer.new.map_record(work_2) }
let(:output_hash_3) { WorkIndexer.new.map_record(work_3) }

it "puts the first consecutive digits, if found, into the sort fields, and everything into the search fields" do
expect(output_hash[box_search_field]).to eq ['Box']
expect(output_hash[folder_search_field]).to eq ['Folder']
expect(output_hash[box_sort_field]).to be_nil
expect(output_hash[folder_sort_field]).to be_nil

expect(output_hash_3[box_search_field]).to eq ['12, 34, 56']
expect(output_hash_3[folder_search_field]).to eq ['56, 78, 10']
expect(output_hash_3[box_sort_field]).to eq ['12']
expect(output_hash_3[folder_sort_field]).to eq ['56']
end
end

describe "oral history" do
let(:work) { create(:oral_history_work, format: ['text']) }

Expand Down

0 comments on commit f7710c9

Please sign in to comment.