From 5c4788d88c282c0f09bbb9a70c5cbffeb24ad33c Mon Sep 17 00:00:00 2001 From: Yves Date: Mon, 20 Jan 2025 17:24:29 +0100 Subject: [PATCH] Create dedicated cache FS --- third_party/cached_httpfs/http_file_cache.cpp | 3 ++- third_party/cached_httpfs/include/http_file_cache.hpp | 11 +++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/third_party/cached_httpfs/http_file_cache.cpp b/third_party/cached_httpfs/http_file_cache.cpp index 813db57f..216fb951 100644 --- a/third_party/cached_httpfs/http_file_cache.cpp +++ b/third_party/cached_httpfs/http_file_cache.cpp @@ -97,7 +97,8 @@ shared_ptr HTTPFileCache::GetCachedFile(const string &cache_dir, con if (it != cached_files.end()) { return it->second; } - auto cache_entry = make_shared_ptr(cache_dir, db->GetFileSystem(), key, cache_file); + + auto cache_entry = make_shared_ptr(cache_dir, cache_fs, key, cache_file); if (cache_entry->Initialized() || cache_file) { cached_files[key] = cache_entry; return cache_entry; diff --git a/third_party/cached_httpfs/include/http_file_cache.hpp b/third_party/cached_httpfs/include/http_file_cache.hpp index 0bd431c2..e97c8638 100644 --- a/third_party/cached_httpfs/include/http_file_cache.hpp +++ b/third_party/cached_httpfs/include/http_file_cache.hpp @@ -1,10 +1,12 @@ #pragma once #include "duckdb/main/client_data.hpp" +#include "duckdb/common/local_file_system.hpp" namespace duckdb { class CachedFileHandle; +class LocalFileSystem; //! Represents a file that is intended to be fully downloaded, then used in parallel by multiple threads class CachedFile : public enable_shared_from_this { @@ -78,6 +80,13 @@ class CachedFileHandle { shared_ptr file; }; +class LocalCacheFileSystem: public LocalFileSystem { + // TODO: we could lock down the LocalFileSystem to only allow path that are in the cache directory + std::string GetName() const override { + return "LocalCacheFileSystem"; + } +}; + class HTTPFileCache : public ClientContextState { public: explicit HTTPFileCache(ClientContext &context) { @@ -88,6 +97,8 @@ class HTTPFileCache : public ClientContextState { shared_ptr GetCachedFile(const string &cache_dir, const string &key, bool create_cache); private: + LocalCacheFileSystem cache_fs; + //! Database Instance shared_ptr db; //! Mutex to lock when getting the cached file (Parallel Only)