From d1e8558dc57210fcb902d359a8df9208365ca954 Mon Sep 17 00:00:00 2001 From: carolyncole <1599081+carolyncole@users.noreply.github.com> Date: Wed, 5 Mar 2025 15:12:52 -0500 Subject: [PATCH] Remove Dataspace Communities (#762) refs #685 --- app/lib/research_data_collection.rb | 20 ---- app/models/dataspace_communities.rb | 135 ---------------------- app/models/dataspace_community.rb | 57 --------- spec/lib/research_data_collection_spec.rb | 26 ----- spec/models/dataspace_communities_spec.rb | 44 ------- 5 files changed, 282 deletions(-) delete mode 100644 app/lib/research_data_collection.rb delete mode 100644 app/models/dataspace_communities.rb delete mode 100644 app/models/dataspace_community.rb delete mode 100644 spec/lib/research_data_collection_spec.rb delete mode 100644 spec/models/dataspace_communities_spec.rb diff --git a/app/lib/research_data_collection.rb b/app/lib/research_data_collection.rb deleted file mode 100644 index 0309ff33..00000000 --- a/app/lib/research_data_collection.rb +++ /dev/null @@ -1,20 +0,0 @@ -# frozen_string_literal: true - -require 'csv' - -## -# A DataSpace collection that contains research data and should be harvested. -class ResearchDataCollection - attr_reader :parent_community, :community, :collection_name, :handle, :collection_id, :item_count - - ## - # Take a CSV::Row and parse it out into the values we'll need at indexing time - def initialize(csv_row) - @parent_community = csv_row["ParentCommunity"] - @community = csv_row["Community"] - @collection_name = csv_row["CollectionName"] - @handle = csv_row["Handle"] - @collection_id = csv_row["CollectionID"] - @item_count = csv_row["ItemCount"].to_i - end -end diff --git a/app/models/dataspace_communities.rb b/app/models/dataspace_communities.rb deleted file mode 100644 index ef48edbc..00000000 --- a/app/models/dataspace_communities.rb +++ /dev/null @@ -1,135 +0,0 @@ -# frozen_string_literal: true - -require "httparty" -require "json" - -# Fetches information about DataSpace Communities (and collections) -# and handles the nested structure of Communities. -# -# rubocop:disable Style/Next -class DataspaceCommunities - attr_reader :tree - - # @param filename [] File name with communities information cached from DataSpace (used for testing). - # Fetches data from DataSpace directly when no filename is provided. - def initialize(filename = nil) - @tree = [] - @flat_list = nil - if filename.blank? - load_from_dataspace - else - load_from_file(filename) - end - end - - # Returns community information for a given ID - # @param id [] ID of the community. - def find_by_id(id) - flat_list.find { |community| community.id == id } - end - - # Returns the name of the root community for given community ID. - # @param id [] ID of the community. - def find_root_name(id) - root_id = find_path_ids(id, []).last - found = find_by_id(root_id) - return if found.nil? - - found.name - end - - # Returns an array with the names (from root to sub-community) to the given community ID. - # @param id [] ID of the community. - def find_path_name(id) - ids = find_path(id) - ids.map { |path_id| find_by_id(path_id).name } - end - - private - - # Loads community information straight from Dataspace API - # See DSpace API reference: https://dataspace.princeton.edu/rest/ - def load_from_dataspace - @tree = [] - communities_url = "#{Rails.configuration.pdc_discovery.dataspace_url}/rest/communities?expand=all" - Rails.logger.info "Fetching communities information from #{communities_url}" - response = HTTParty.get(communities_url) - d_communities = JSON.parse(response.body) - d_communities.each do |d_community| - root_community = d_community['parentCommunity'].nil? - if root_community - node = DataspaceCommunity.new(d_community, true) - @tree << node - end - end - @tree - end - - # Loads community information from a pre-saved file with the information - def load_from_file(filename) - @tree = [] - Rails.logger.info "Loading communities information from #{filename}" - content = File.read(filename) - d_communities = JSON.parse(content) - d_communities.each do |d_community| - root_community = d_community['parentCommunity'].nil? - if root_community - node = DataspaceCommunity.new(d_community, false) - @tree << node - end - end - @tree - end - - # Returns an array with all the DataspaceCommunity as a flat array. - # This array is used inernally to perform searches by ID (it's faster to search a flat array - # than to search a nested structure.) - def flat_list - @flat_list ||= begin - nodes = [] - @tree.each do |node| - nodes += flat_node(node) - end - nodes - end - end - - # Returns an array of all the communities and sub-communities for a given community. - # @param community [] a community object. - def flat_node(community) - list = [community] - community.subcommunities.each do |sub| - list += flat_node(sub) - end - list - end - - # Returns the path (from root to sub-community) to the community as an array of IDs. - # @param id [] ID of the community. - def find_path(id) - find_path_ids(id, []).reverse - end - - # Returns the ID of the root community for a given community. - # @param id [] ID of the community. - # @note No testing coverage but not called and private - def find_root(id) - find_path_ids(id, []).last - end - - # Returns the path (community to parent to grandparent) to the community as an array of IDs. - # @param id [] ID of the community. - # @param path [Array] Array of IDs. - def find_path_ids(id, path) - community = find_by_id(id) - return [] if community.nil? - - path << id - if community.parent_id.nil? - path - else - find_path_ids(community.parent_id, path) - end - end -end -# rubocop:enable Style/Next diff --git a/app/models/dataspace_community.rb b/app/models/dataspace_community.rb deleted file mode 100644 index d825eb57..00000000 --- a/app/models/dataspace_community.rb +++ /dev/null @@ -1,57 +0,0 @@ -# frozen_string_literal: true - -require "httparty" -require "json" - -# rubocop:disable Rails/Delegate -class DataspaceCommunity - attr_accessor :id, :name, :handle, :collections, :subcommunities, :parent_id - - # d_community is a hash with DSpace community information - def initialize(d_community, fetch_from_dataspace = false) - @id = d_community['id'] - @name = d_community['name'] - @handle = d_community['handle'] - @parent_id = fetch_from_dataspace ? d_community.dig('parentCommunity', 'id') : d_community['parent_id'] - - @collections = [] - d_community['collections'].each do |d_collection| - @collections << { id: d_collection['id'], name: d_collection['name'] } - end - - @subcommunities = [] - d_community['subcommunities'].each do |d_sub_community| - if fetch_from_dataspace - # Fetch data from dataspace since the subcommunity information does not come on the community response - # by default. - sub_community_url = "#{Rails.configuration.pdc_discovery.dataspace_url}/rest/communities/#{d_sub_community['id']}?expand=all" - response = HTTParty.get(sub_community_url) - d_sub_community = JSON.parse(response.body) - @subcommunities << DataspaceCommunity.new(d_sub_community, true) - else - # Use the data that we got as-is. - # This is used when building the nodes from cache where we have all the information already available. - @subcommunities << DataspaceCommunity.new(d_sub_community, false) - end - end - end - - # @note Not covered in testing but not called - def to_hash - hash = { - id: @id, - name: @name, - handle: @handle, - collections: @collections, - subcommunities: @subcommunities, - parent_id: @parent_id - } - hash - end - - # @note Not covered in testing but not called - def to_json(opts) - to_hash.to_json(opts) - end -end -# rubocop:enable Rails/Delegate diff --git a/spec/lib/research_data_collection_spec.rb b/spec/lib/research_data_collection_spec.rb deleted file mode 100644 index bfadfbdb..00000000 --- a/spec/lib/research_data_collection_spec.rb +++ /dev/null @@ -1,26 +0,0 @@ -# frozen_string_literal: true - -RSpec.describe ResearchDataCollection do - let(:csv_data) do - { - "ParentCommunity" => "Princeton Plasma Physics Laboratory", - "Community" => "Advanced Projects", - "CollectionName" => "Socio Economic", - "Handle" => "88435/dsp01sf268746p", - "CollectionID" => "1305", - "ItemCount" => "5", - nil => nil - } - end - let(:csv_row) { CSV::Row.new(csv_data.keys, csv_data.values) } - let(:rdc) { described_class.new(csv_row) } - - it "takes a CSV::Row as an argument" do - expect(rdc.parent_community).to eq "Princeton Plasma Physics Laboratory" - expect(rdc.community).to eq "Advanced Projects" - expect(rdc.collection_name).to eq "Socio Economic" - expect(rdc.handle).to eq "88435/dsp01sf268746p" - expect(rdc.collection_id).to eq "1305" - expect(rdc.item_count).to eq 5 - end -end diff --git a/spec/models/dataspace_communities_spec.rb b/spec/models/dataspace_communities_spec.rb deleted file mode 100644 index f495b2eb..00000000 --- a/spec/models/dataspace_communities_spec.rb +++ /dev/null @@ -1,44 +0,0 @@ -# frozen_string_literal: true - -require 'rails_helper' - -RSpec.describe DataspaceCommunities do - let(:communities) { described_class.new('./spec/fixtures/files/dataspace_communities.json') } - let(:communities_blank) { described_class.new('') } - - describe "#find_by_id" do - it "finds a root community" do - astrophysical = communities.find_by_id(186) - expect(astrophysical.name).to eq "Astrophysical Sciences" - expect(astrophysical.parent_id).to be nil - expect(astrophysical.collections.count).to eq 1 - end - - it "finds a subcommunity" do - adv_projects = communities.find_by_id(347) - expect(adv_projects.name).to eq "Advanced Projects" - expect(adv_projects.parent_id).to eq 346 - end - end - - describe "#find_root_name" do - it "reports root name for a root community" do - # We expect root communities to report their name as root. - pppl_id = 346 - expect(communities.find_root_name(pppl_id)).to eq "Princeton Plasma Physics Laboratory" - end - it "reports root name for a subcommunity" do - pppl_adv_projects_id = 347 - expect(communities.find_root_name(pppl_adv_projects_id)).to eq "Princeton Plasma Physics Laboratory" - end - end - - describe "#find_path_name" do - it "finds path for a root community" do - expect(communities.find_path_name(346)).to eq ["Princeton Plasma Physics Laboratory"] - end - it "finds path for a subcommunity" do - expect(communities.find_path_name(347)).to eq ["Princeton Plasma Physics Laboratory", "Advanced Projects"] - end - end -end