From f2e100c9d1a3e44c1d86d6253e757968ef94fc9d Mon Sep 17 00:00:00 2001 From: Brooks Date: Mon, 11 Sep 2023 15:55:48 -0400 Subject: [PATCH 1/2] Shares accounts hash cache data between full and incremental (#33164) (cherry picked from commit 6298c6c31e6f5cf9306b54cc62960da80afd3882) # Conflicts: # runtime/src/accounts_db.rs # runtime/src/cache_hash_data.rs --- runtime/src/accounts_db.rs | 65 ++++++++++++++++++++++++++++++---- runtime/src/cache_hash_data.rs | 28 +++++++++++++-- 2 files changed, 83 insertions(+), 10 deletions(-) diff --git a/runtime/src/accounts_db.rs b/runtime/src/accounts_db.rs index cca30c0c867fa0..17eaf5f0f2cbc8 100644 --- a/runtime/src/accounts_db.rs +++ b/runtime/src/accounts_db.rs @@ -1396,9 +1396,13 @@ pub struct AccountsDb { /// Set of storage paths to pick from pub(crate) paths: Vec, +<<<<<<< HEAD:runtime/src/accounts_db.rs /// Directories for account hash calculations, within base_working_path full_accounts_hash_cache_path: PathBuf, incremental_accounts_hash_cache_path: PathBuf, +======= + accounts_hash_cache_path: PathBuf, +>>>>>>> 6298c6c31e (Shares accounts hash cache data between full and incremental (#33164)):accounts-db/src/accounts_db.rs transient_accounts_hash_cache_path: PathBuf, // used by tests @@ -2374,6 +2378,19 @@ impl AccountsDb { (cache_path, Some(temp_dir)) }; +<<<<<<< HEAD:runtime/src/accounts_db.rs +======= + let accounts_hash_cache_path = accounts_hash_cache_path.unwrap_or_else(|| { + let accounts_hash_cache_path = + base_working_path.join(Self::DEFAULT_ACCOUNTS_HASH_CACHE_DIR); + if !accounts_hash_cache_path.exists() { + fs_err::create_dir(&accounts_hash_cache_path) + .expect("create accounts hash cache dir"); + } + accounts_hash_cache_path + }); + +>>>>>>> 6298c6c31e (Shares accounts hash cache data between full and incremental (#33164)):accounts-db/src/accounts_db.rs let mut bank_hash_stats = HashMap::new(); bank_hash_stats.insert(0, BankHashStats::default()); @@ -2402,10 +2419,17 @@ impl AccountsDb { write_cache_limit_bytes: None, write_version: AtomicU64::new(0), paths: vec![], +<<<<<<< HEAD:runtime/src/accounts_db.rs full_accounts_hash_cache_path: accounts_hash_cache_path.join("full"), incremental_accounts_hash_cache_path: accounts_hash_cache_path.join("incremental"), transient_accounts_hash_cache_path: accounts_hash_cache_path.join("transient"), temp_accounts_hash_cache_path, +======= + base_working_path, + base_working_temp_dir, + transient_accounts_hash_cache_path: accounts_hash_cache_path.join("transient"), + accounts_hash_cache_path, +>>>>>>> 6298c6c31e (Shares accounts hash cache data between full and incremental (#33164)):accounts-db/src/accounts_db.rs shrink_paths: RwLock::new(None), temp_paths: None, file_size: DEFAULT_FILE_SIZE, @@ -7517,18 +7541,20 @@ impl AccountsDb { fn get_cache_hash_data( accounts_hash_cache_path: PathBuf, config: &CalcAccountsHashConfig<'_>, + kind: CalcAccountsHashKind, slot: Slot, ) -> CacheHashData { - if !config.store_detailed_debug_info_on_failure { - CacheHashData::new(accounts_hash_cache_path) + let accounts_hash_cache_path = if !config.store_detailed_debug_info_on_failure { + accounts_hash_cache_path } else { // this path executes when we are failing with a hash mismatch let failed_dir = accounts_hash_cache_path .join("failed_calculate_accounts_hash_cache") .join(slot.to_string()); - let _ = std::fs::remove_dir_all(&failed_dir); - CacheHashData::new(failed_dir) - } + _ = std::fs::remove_dir_all(&failed_dir); + failed_dir + }; + CacheHashData::new(accounts_hash_cache_path, kind == CalcAccountsHashKind::Full) } // modeled after calculate_accounts_delta_hash @@ -7543,8 +7569,12 @@ impl AccountsDb { config, storages, stats, +<<<<<<< HEAD:runtime/src/accounts_db.rs CalcAccountsHashFlavor::Full, self.full_accounts_hash_cache_path.clone(), +======= + CalcAccountsHashKind::Full, +>>>>>>> 6298c6c31e (Shares accounts hash cache data between full and incremental (#33164)):accounts-db/src/accounts_db.rs )?; let AccountsHashEnum::Full(accounts_hash) = accounts_hash else { panic!("calculate_accounts_hash_from_storages must return a FullAccountsHash"); @@ -7571,8 +7601,12 @@ impl AccountsDb { config, storages, stats, +<<<<<<< HEAD:runtime/src/accounts_db.rs CalcAccountsHashFlavor::Incremental, self.incremental_accounts_hash_cache_path.clone(), +======= + CalcAccountsHashKind::Incremental, +>>>>>>> 6298c6c31e (Shares accounts hash cache data between full and incremental (#33164)):accounts-db/src/accounts_db.rs )?; let AccountsHashEnum::Incremental(incremental_accounts_hash) = accounts_hash else { panic!("calculate_incremental_accounts_hash must return an IncrementalAccountsHash"); @@ -7585,9 +7619,15 @@ impl AccountsDb { config: &CalcAccountsHashConfig<'_>, storages: &SortedStorages<'_>, mut stats: HashStats, +<<<<<<< HEAD:runtime/src/accounts_db.rs flavor: CalcAccountsHashFlavor, accounts_hash_cache_path: PathBuf, ) -> Result<(AccountsHashEnum, u64), AccountsHashVerificationError> { +======= + kind: CalcAccountsHashKind, + ) -> Result<(AccountsHashKind, u64), AccountsHashVerificationError> { + let total_time = Measure::start(""); +>>>>>>> 6298c6c31e (Shares accounts hash cache data between full and incremental (#33164)):accounts-db/src/accounts_db.rs let _guard = self.active_stats.activate(ActiveStatItem::Hash); stats.oldest_root = storages.range().start; @@ -7595,8 +7635,19 @@ impl AccountsDb { let slot = storages.max_slot_inclusive(); let use_bg_thread_pool = config.use_bg_thread_pool; + let accounts_hash_cache_path = self.accounts_hash_cache_path.clone(); let scan_and_hash = || { +<<<<<<< HEAD:runtime/src/accounts_db.rs let cache_hash_data = Self::get_cache_hash_data(accounts_hash_cache_path, config, slot); +======= + let (cache_hash_data, cache_hash_data_us) = measure_us!(Self::get_cache_hash_data( + accounts_hash_cache_path, + config, + kind, + slot + )); + stats.cache_hash_data_us += cache_hash_data_us; +>>>>>>> 6298c6c31e (Shares accounts hash cache data between full and incremental (#33164)):accounts-db/src/accounts_db.rs let bounds = Range { start: 0, @@ -9592,7 +9643,7 @@ pub mod tests { let temp_dir = TempDir::new().unwrap(); let accounts_hash_cache_path = temp_dir.path().to_path_buf(); self.scan_snapshot_stores_with_cache( - &CacheHashData::new(accounts_hash_cache_path), + &CacheHashData::new(accounts_hash_cache_path, true), storage, stats, bins, @@ -10691,7 +10742,7 @@ pub mod tests { }; let result = accounts_db.scan_account_storage_no_bank( - &CacheHashData::new(accounts_hash_cache_path), + &CacheHashData::new(accounts_hash_cache_path, true), &CalcAccountsHashConfig::default(), &get_storage_refs(&[storage]), test_scan, diff --git a/runtime/src/cache_hash_data.rs b/runtime/src/cache_hash_data.rs index 9a0a742f2a3181..717f197fdb7885 100644 --- a/runtime/src/cache_hash_data.rs +++ b/runtime/src/cache_hash_data.rs @@ -140,30 +140,48 @@ impl CacheHashDataFile { } } -pub type PreExistingCacheFiles = HashSet; pub struct CacheHashData { cache_dir: PathBuf, +<<<<<<< HEAD:runtime/src/cache_hash_data.rs pre_existing_cache_files: Arc>, pub stats: Arc>, +======= + pre_existing_cache_files: Arc>>, + should_delete_old_cache_files_on_drop: bool, + pub stats: Arc, +>>>>>>> 6298c6c31e (Shares accounts hash cache data between full and incremental (#33164)):accounts-db/src/cache_hash_data.rs } impl Drop for CacheHashData { fn drop(&mut self) { +<<<<<<< HEAD:runtime/src/cache_hash_data.rs self.delete_old_cache_files(); self.stats.lock().unwrap().report(); +======= + if self.should_delete_old_cache_files_on_drop { + self.delete_old_cache_files(); + } + self.stats.report(); +>>>>>>> 6298c6c31e (Shares accounts hash cache data between full and incremental (#33164)):accounts-db/src/cache_hash_data.rs } } impl CacheHashData { - pub fn new(cache_dir: PathBuf) -> CacheHashData { + pub fn new(cache_dir: PathBuf, should_delete_old_cache_files_on_drop: bool) -> CacheHashData { std::fs::create_dir_all(&cache_dir).unwrap_or_else(|err| { panic!("error creating cache dir {}: {err}", cache_dir.display()) }); let result = CacheHashData { cache_dir, +<<<<<<< HEAD:runtime/src/cache_hash_data.rs pre_existing_cache_files: Arc::new(Mutex::new(PreExistingCacheFiles::default())), stats: Arc::new(Mutex::new(CacheHashDataStats::default())), +======= + pre_existing_cache_files: Arc::new(Mutex::new(HashSet::default())), + should_delete_old_cache_files_on_drop, + stats: Arc::default(), +>>>>>>> 6298c6c31e (Shares accounts hash cache data between full and incremental (#33164)):accounts-db/src/cache_hash_data.rs }; result.get_cache_files(); @@ -270,6 +288,10 @@ impl CacheHashData { stats.total_entries = entries; stats.cache_file_size += capacity as usize; +<<<<<<< HEAD:runtime/src/cache_hash_data.rs +======= + fn pre_existing_cache_file_will_be_used(&self, file_name: impl AsRef) { +>>>>>>> 6298c6c31e (Shares accounts hash cache data between full and incremental (#33164)):accounts-db/src/cache_hash_data.rs self.pre_existing_cache_files .lock() .unwrap() @@ -386,7 +408,7 @@ pub mod tests { data_this_pass.push(this_bin_data); } } - let cache = CacheHashData::new(cache_dir.clone()); + let cache = CacheHashData::new(cache_dir.clone(), true); let file_name = PathBuf::from("test"); cache.save(&file_name, &data_this_pass).unwrap(); cache.get_cache_files(); From d5424867f4b7c862bdb5d4fb37f04ef5a9e2ff94 Mon Sep 17 00:00:00 2001 From: brooks Date: Mon, 11 Sep 2023 16:44:40 -0400 Subject: [PATCH 2/2] fix merge conflicts --- runtime/src/accounts_db.rs | 68 +++++----------------------------- runtime/src/cache_hash_data.rs | 25 ++----------- 2 files changed, 12 insertions(+), 81 deletions(-) diff --git a/runtime/src/accounts_db.rs b/runtime/src/accounts_db.rs index 17eaf5f0f2cbc8..e1cac348215122 100644 --- a/runtime/src/accounts_db.rs +++ b/runtime/src/accounts_db.rs @@ -1396,13 +1396,7 @@ pub struct AccountsDb { /// Set of storage paths to pick from pub(crate) paths: Vec, -<<<<<<< HEAD:runtime/src/accounts_db.rs - /// Directories for account hash calculations, within base_working_path - full_accounts_hash_cache_path: PathBuf, - incremental_accounts_hash_cache_path: PathBuf, -======= accounts_hash_cache_path: PathBuf, ->>>>>>> 6298c6c31e (Shares accounts hash cache data between full and incremental (#33164)):accounts-db/src/accounts_db.rs transient_accounts_hash_cache_path: PathBuf, // used by tests @@ -2374,23 +2368,10 @@ impl AccountsDb { (accounts_hash_cache_path, None) } else { let temp_dir = TempDir::new().expect("new tempdir"); - let cache_path = temp_dir.path().join(Self::DEFAULT_ACCOUNTS_HASH_CACHE_DIR); + let cache_path = temp_dir.path().to_path_buf(); (cache_path, Some(temp_dir)) }; -<<<<<<< HEAD:runtime/src/accounts_db.rs -======= - let accounts_hash_cache_path = accounts_hash_cache_path.unwrap_or_else(|| { - let accounts_hash_cache_path = - base_working_path.join(Self::DEFAULT_ACCOUNTS_HASH_CACHE_DIR); - if !accounts_hash_cache_path.exists() { - fs_err::create_dir(&accounts_hash_cache_path) - .expect("create accounts hash cache dir"); - } - accounts_hash_cache_path - }); - ->>>>>>> 6298c6c31e (Shares accounts hash cache data between full and incremental (#33164)):accounts-db/src/accounts_db.rs let mut bank_hash_stats = HashMap::new(); bank_hash_stats.insert(0, BankHashStats::default()); @@ -2419,17 +2400,9 @@ impl AccountsDb { write_cache_limit_bytes: None, write_version: AtomicU64::new(0), paths: vec![], -<<<<<<< HEAD:runtime/src/accounts_db.rs - full_accounts_hash_cache_path: accounts_hash_cache_path.join("full"), - incremental_accounts_hash_cache_path: accounts_hash_cache_path.join("incremental"), - transient_accounts_hash_cache_path: accounts_hash_cache_path.join("transient"), - temp_accounts_hash_cache_path, -======= - base_working_path, - base_working_temp_dir, transient_accounts_hash_cache_path: accounts_hash_cache_path.join("transient"), accounts_hash_cache_path, ->>>>>>> 6298c6c31e (Shares accounts hash cache data between full and incremental (#33164)):accounts-db/src/accounts_db.rs + temp_accounts_hash_cache_path, shrink_paths: RwLock::new(None), temp_paths: None, file_size: DEFAULT_FILE_SIZE, @@ -7541,7 +7514,7 @@ impl AccountsDb { fn get_cache_hash_data( accounts_hash_cache_path: PathBuf, config: &CalcAccountsHashConfig<'_>, - kind: CalcAccountsHashKind, + flavor: CalcAccountsHashFlavor, slot: Slot, ) -> CacheHashData { let accounts_hash_cache_path = if !config.store_detailed_debug_info_on_failure { @@ -7554,7 +7527,10 @@ impl AccountsDb { _ = std::fs::remove_dir_all(&failed_dir); failed_dir }; - CacheHashData::new(accounts_hash_cache_path, kind == CalcAccountsHashKind::Full) + CacheHashData::new( + accounts_hash_cache_path, + flavor == CalcAccountsHashFlavor::Full, + ) } // modeled after calculate_accounts_delta_hash @@ -7569,12 +7545,7 @@ impl AccountsDb { config, storages, stats, -<<<<<<< HEAD:runtime/src/accounts_db.rs CalcAccountsHashFlavor::Full, - self.full_accounts_hash_cache_path.clone(), -======= - CalcAccountsHashKind::Full, ->>>>>>> 6298c6c31e (Shares accounts hash cache data between full and incremental (#33164)):accounts-db/src/accounts_db.rs )?; let AccountsHashEnum::Full(accounts_hash) = accounts_hash else { panic!("calculate_accounts_hash_from_storages must return a FullAccountsHash"); @@ -7601,12 +7572,7 @@ impl AccountsDb { config, storages, stats, -<<<<<<< HEAD:runtime/src/accounts_db.rs CalcAccountsHashFlavor::Incremental, - self.incremental_accounts_hash_cache_path.clone(), -======= - CalcAccountsHashKind::Incremental, ->>>>>>> 6298c6c31e (Shares accounts hash cache data between full and incremental (#33164)):accounts-db/src/accounts_db.rs )?; let AccountsHashEnum::Incremental(incremental_accounts_hash) = accounts_hash else { panic!("calculate_incremental_accounts_hash must return an IncrementalAccountsHash"); @@ -7619,15 +7585,8 @@ impl AccountsDb { config: &CalcAccountsHashConfig<'_>, storages: &SortedStorages<'_>, mut stats: HashStats, -<<<<<<< HEAD:runtime/src/accounts_db.rs flavor: CalcAccountsHashFlavor, - accounts_hash_cache_path: PathBuf, ) -> Result<(AccountsHashEnum, u64), AccountsHashVerificationError> { -======= - kind: CalcAccountsHashKind, - ) -> Result<(AccountsHashKind, u64), AccountsHashVerificationError> { - let total_time = Measure::start(""); ->>>>>>> 6298c6c31e (Shares accounts hash cache data between full and incremental (#33164)):accounts-db/src/accounts_db.rs let _guard = self.active_stats.activate(ActiveStatItem::Hash); stats.oldest_root = storages.range().start; @@ -7637,17 +7596,8 @@ impl AccountsDb { let use_bg_thread_pool = config.use_bg_thread_pool; let accounts_hash_cache_path = self.accounts_hash_cache_path.clone(); let scan_and_hash = || { -<<<<<<< HEAD:runtime/src/accounts_db.rs - let cache_hash_data = Self::get_cache_hash_data(accounts_hash_cache_path, config, slot); -======= - let (cache_hash_data, cache_hash_data_us) = measure_us!(Self::get_cache_hash_data( - accounts_hash_cache_path, - config, - kind, - slot - )); - stats.cache_hash_data_us += cache_hash_data_us; ->>>>>>> 6298c6c31e (Shares accounts hash cache data between full and incremental (#33164)):accounts-db/src/accounts_db.rs + let cache_hash_data = + Self::get_cache_hash_data(accounts_hash_cache_path, config, flavor, slot); let bounds = Range { start: 0, diff --git a/runtime/src/cache_hash_data.rs b/runtime/src/cache_hash_data.rs index 717f197fdb7885..3e3f203509b496 100644 --- a/runtime/src/cache_hash_data.rs +++ b/runtime/src/cache_hash_data.rs @@ -142,27 +142,17 @@ impl CacheHashDataFile { pub struct CacheHashData { cache_dir: PathBuf, -<<<<<<< HEAD:runtime/src/cache_hash_data.rs - pre_existing_cache_files: Arc>, - pub stats: Arc>, -======= pre_existing_cache_files: Arc>>, should_delete_old_cache_files_on_drop: bool, - pub stats: Arc, ->>>>>>> 6298c6c31e (Shares accounts hash cache data between full and incremental (#33164)):accounts-db/src/cache_hash_data.rs + pub stats: Arc>, } impl Drop for CacheHashData { fn drop(&mut self) { -<<<<<<< HEAD:runtime/src/cache_hash_data.rs - self.delete_old_cache_files(); - self.stats.lock().unwrap().report(); -======= if self.should_delete_old_cache_files_on_drop { self.delete_old_cache_files(); } - self.stats.report(); ->>>>>>> 6298c6c31e (Shares accounts hash cache data between full and incremental (#33164)):accounts-db/src/cache_hash_data.rs + self.stats.lock().unwrap().report(); } } @@ -174,14 +164,9 @@ impl CacheHashData { let result = CacheHashData { cache_dir, -<<<<<<< HEAD:runtime/src/cache_hash_data.rs - pre_existing_cache_files: Arc::new(Mutex::new(PreExistingCacheFiles::default())), - stats: Arc::new(Mutex::new(CacheHashDataStats::default())), -======= pre_existing_cache_files: Arc::new(Mutex::new(HashSet::default())), should_delete_old_cache_files_on_drop, - stats: Arc::default(), ->>>>>>> 6298c6c31e (Shares accounts hash cache data between full and incremental (#33164)):accounts-db/src/cache_hash_data.rs + stats: Arc::new(Mutex::new(CacheHashDataStats::default())), }; result.get_cache_files(); @@ -288,10 +273,6 @@ impl CacheHashData { stats.total_entries = entries; stats.cache_file_size += capacity as usize; -<<<<<<< HEAD:runtime/src/cache_hash_data.rs -======= - fn pre_existing_cache_file_will_be_used(&self, file_name: impl AsRef) { ->>>>>>> 6298c6c31e (Shares accounts hash cache data between full and incremental (#33164)):accounts-db/src/cache_hash_data.rs self.pre_existing_cache_files .lock() .unwrap()