From d6d201be11ad9207113a26026a9143c252046e47 Mon Sep 17 00:00:00 2001 From: Mike Virata-Stone Date: Mon, 12 Oct 2015 11:41:06 -0700 Subject: [PATCH 1/3] Avoid filesystem limits using a trie Thanks to @pcarranza --- lib/gemstash/storage.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/gemstash/storage.rb b/lib/gemstash/storage.rb index 9d574756..bbe13bf1 100644 --- a/lib/gemstash/storage.rb +++ b/lib/gemstash/storage.rb @@ -40,7 +40,8 @@ class Resource def initialize(folder, name) @base_path = folder @name = name - @folder = File.join(@base_path, @name) + trie_parents = @name[0...3].split("") + @folder = File.join(@base_path, *trie_parents, @name) end def exist? From 398b88ff548cc645893f87fe4a0c64b9235bfe24 Mon Sep 17 00:00:00 2001 From: Mike Virata-Stone Date: Mon, 12 Oct 2015 13:47:42 -0700 Subject: [PATCH 2/3] Handle case sensitivity with a digest --- lib/gemstash/storage.rb | 9 +++++++-- spec/gemstash/storage_spec.rb | 18 ++++++++++++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/lib/gemstash/storage.rb b/lib/gemstash/storage.rb index bbe13bf1..1a0c8c30 100644 --- a/lib/gemstash/storage.rb +++ b/lib/gemstash/storage.rb @@ -1,4 +1,5 @@ require "gemstash" +require "digest" require "pathname" require "fileutils" require "yaml" @@ -36,12 +37,16 @@ def path_valid?(path) #:nodoc: class Resource - attr_accessor :name + attr_accessor :name, :folder def initialize(folder, name) @base_path = folder @name = name trie_parents = @name[0...3].split("") - @folder = File.join(@base_path, *trie_parents, @name) + # The digest is included in case the name differs only by case + # Some file systems are case insensitive, so such collisions will be a problem + digest = Digest::MD5.hexdigest(@name) + child_folder = "#{@name}-#{digest}" + @folder = File.join(@base_path, *trie_parents, child_folder) end def exist? diff --git a/spec/gemstash/storage_spec.rb b/spec/gemstash/storage_spec.rb index dd88d661..e1c5a9be 100644 --- a/spec/gemstash/storage_spec.rb +++ b/spec/gemstash/storage_spec.rb @@ -73,5 +73,23 @@ expect(resource.content).to eq(content) end end + + context "with resource name that is unique by case only" do + let(:first_resource) { "SomeResource" } + let(:second_resource) { "someresource" } + + it "stores the content separately" do + storage.resource(first_resource).save("first content") + storage.resource(second_resource).save("second content") + expect(storage.resource(first_resource).load.content).to eq("first content") + expect(storage.resource(second_resource).load.content).to eq("second content") + end + + it "uses different downcased paths to avoid issues with case insensitive file systems" do + first = storage.resource(first_resource) + second = storage.resource(second_resource) + expect(first.folder.downcase).to_not eq(second.folder.downcase) + end + end end end From 6f3eebd7fc00bf0403f54823390e09836baa0d03 Mon Sep 17 00:00:00 2001 From: Mike Virata-Stone Date: Mon, 12 Oct 2015 14:02:57 -0700 Subject: [PATCH 3/3] Only allow limited characters in paths Downcase trie to avoid case insensitive file system issues --- lib/gemstash/storage.rb | 8 ++++++-- spec/gemstash/storage_spec.rb | 31 ++++++++++++++++++++++--------- 2 files changed, 28 insertions(+), 11 deletions(-) diff --git a/lib/gemstash/storage.rb b/lib/gemstash/storage.rb index 1a0c8c30..dcc19d26 100644 --- a/lib/gemstash/storage.rb +++ b/lib/gemstash/storage.rb @@ -41,11 +41,15 @@ class Resource def initialize(folder, name) @base_path = folder @name = name - trie_parents = @name[0...3].split("") + # Avoid odd characters in paths, in case of issues with the file system + safe_name = @name.gsub(/[^a-zA-Z0-9_]/, "_") + # Use a trie structure to avoid file system limits causing too many files in 1 folder + # Downcase to avoid issues with case insensitive file systems + trie_parents = safe_name[0...3].downcase.split("") # The digest is included in case the name differs only by case # Some file systems are case insensitive, so such collisions will be a problem digest = Digest::MD5.hexdigest(@name) - child_folder = "#{@name}-#{digest}" + child_folder = "#{safe_name}-#{digest}" @folder = File.join(@base_path, *trie_parents, child_folder) end diff --git a/spec/gemstash/storage_spec.rb b/spec/gemstash/storage_spec.rb index e1c5a9be..150631bd 100644 --- a/spec/gemstash/storage_spec.rb +++ b/spec/gemstash/storage_spec.rb @@ -75,20 +75,33 @@ end context "with resource name that is unique by case only" do - let(:first_resource) { "SomeResource" } - let(:second_resource) { "someresource" } + let(:first_resource_id) { "SomeResource" } + let(:second_resource_id) { "someresource" } it "stores the content separately" do - storage.resource(first_resource).save("first content") - storage.resource(second_resource).save("second content") - expect(storage.resource(first_resource).load.content).to eq("first content") - expect(storage.resource(second_resource).load.content).to eq("second content") + storage.resource(first_resource_id).save("first content") + storage.resource(second_resource_id).save("second content") + expect(storage.resource(first_resource_id).load.content).to eq("first content") + expect(storage.resource(second_resource_id).load.content).to eq("second content") end it "uses different downcased paths to avoid issues with case insensitive file systems" do - first = storage.resource(first_resource) - second = storage.resource(second_resource) - expect(first.folder.downcase).to_not eq(second.folder.downcase) + first_resource = storage.resource(first_resource_id) + second_resource = storage.resource(second_resource_id) + expect(first_resource.folder.downcase).to_not eq(second_resource.folder.downcase) + end + end + + context "with resource name that includes odd characters" do + let(:resource_id) { ".=$&resource" } + + it "stores and retrieves the data" do + storage.resource(resource_id).save("odd name content") + expect(storage.resource(resource_id).load.content).to eq("odd name content") + end + + it "doesn't include the odd characters in the path" do + expect(storage.resource(resource_id).folder).to_not match(/[.=$&]/) end end end