diff --git a/ledger-tool/src/main.rs b/ledger-tool/src/main.rs index 6843ddeeb28e76..a61b5a4a049e7b 100644 --- a/ledger-tool/src/main.rs +++ b/ledger-tool/src/main.rs @@ -46,9 +46,18 @@ use { }, solana_measure::{measure, measure::Measure}, solana_runtime::{ + account_storage::meta::StoredAccountMeta, accounts::Accounts, - accounts_db::CalcAccountsHashDataSource, - accounts_index::ScanConfig, + accounts_background_service::{ + AbsRequestHandlers, AbsRequestSender, AccountsBackgroundService, + PrunedBanksRequestHandler, SnapshotRequestHandler, + }, + accounts_db::{ + AccountsDb, AccountsDbConfig, CalcAccountsHashDataSource, FillerAccountsConfig, + }, + accounts_index::{AccountsIndexConfig, IndexLimitMb, ScanConfig}, + accounts_update_notifier_interface::AccountsUpdateNotifier, + append_vec::AppendVec, bank::{Bank, RewardCalculationEvent, TotalAccountsStats}, bank_forks::BankForks, hardened_unpack::MAX_GENESIS_ARCHIVE_UNPACKED_SIZE, @@ -59,6 +68,7 @@ use { self, ArchiveFormat, SnapshotVersion, DEFAULT_ARCHIVE_COMPRESSION, SUPPORTED_ARCHIVE_COMPRESSION, }, + tiered_storage::{hot::HOT_FORMAT, TieredStorage}, }, solana_sdk::{ account::{AccountSharedData, ReadableAccount, WritableAccount}, @@ -2643,6 +2653,7 @@ fn main() { exit_signal.store(true, Ordering::Relaxed); system_monitor_service.join().unwrap(); } + /* ("graph", Some(arg_matches)) => { let output_file = value_t_or_exit!(arg_matches, "graph_filename", String); let graph_config = GraphConfig { @@ -2705,6 +2716,7 @@ fn main() { } } } + */ ("create-snapshot", Some(arg_matches)) => { let is_incremental = arg_matches.is_present("incremental"); let is_minimized = arg_matches.is_present("minimized"); diff --git a/runtime/src/account_storage.rs b/runtime/src/account_storage.rs index c3f6563b31027e..72a71d73e30b9e 100644 --- a/runtime/src/account_storage.rs +++ b/runtime/src/account_storage.rs @@ -270,6 +270,8 @@ pub(crate) mod tests { use {super::*, std::path::Path}; #[test] + /// The new cold storage will fail this test as it does not have + /// the concept of capacity yet. fn test_shrink_in_progress() { // test that we check in order map then shrink_in_progress_map let storage = AccountStorage::default(); @@ -283,13 +285,13 @@ pub(crate) mod tests { let store_file_size = 4000; let store_file_size2 = store_file_size * 2; // 2 append vecs with same id, but different sizes - let entry = Arc::new(AccountStorageEntry::new( + let entry = Arc::new(AccountStorageEntry::new_av( common_store_path, slot, id, store_file_size, )); - let entry2 = Arc::new(AccountStorageEntry::new( + let entry2 = Arc::new(AccountStorageEntry::new_av( common_store_path, slot, id, diff --git a/runtime/src/account_storage/meta.rs b/runtime/src/account_storage/meta.rs index dba672292310f5..0269d390d260a1 100644 --- a/runtime/src/account_storage/meta.rs +++ b/runtime/src/account_storage/meta.rs @@ -1,5 +1,9 @@ use { - crate::{append_vec::AppendVecStoredAccountMeta, storable_accounts::StorableAccounts}, + crate::{ + append_vec::AppendVecStoredAccountMeta, + storable_accounts::StorableAccounts, + tiered_storage::{hot::HotAccountMeta, readable::TieredReadableAccount}, + }, solana_sdk::{account::ReadableAccount, hash::Hash, pubkey::Pubkey, stake_history::Epoch}, std::{borrow::Borrow, marker::PhantomData}, }; @@ -12,6 +16,13 @@ pub struct StoredAccountInfo { pub size: usize, } +lazy_static! { + pub static ref DEFAULT_ACCOUNT_HASH: Hash = Hash::default(); +} + +pub const DEFAULT_WRITE_VERSION: StoredMetaWriteVersion = 0; +pub const DEFAULT_RENT_EPOCH: Epoch = Epoch::MAX; + /// Goal is to eliminate copies and data reshaping given various code paths that store accounts. /// This struct contains what is needed to store accounts to a storage /// 1. account & pubkey (StorableAccounts) @@ -100,66 +111,88 @@ impl<'a: 'b, 'b, T: ReadableAccount + Sync + 'b, U: StorableAccounts<'a, T>, V: #[derive(PartialEq, Eq, Debug)] pub enum StoredAccountMeta<'storage> { AppendVec(AppendVecStoredAccountMeta<'storage>), + // Cold(TieredReadableAccount<'storage, ColdAccountMeta>), + Hot(TieredReadableAccount<'storage, HotAccountMeta>), } impl<'storage> StoredAccountMeta<'storage> { pub fn pubkey(&self) -> &'storage Pubkey { match self { Self::AppendVec(av) => av.pubkey(), + // Self::Cold(cs) => cs.pubkey(), + Self::Hot(hs) => hs.address(), } } pub fn hash(&self) -> &'storage Hash { match self { Self::AppendVec(av) => av.hash(), + // Self::Cold(cs) => cs.hash(), + Self::Hot(hs) => hs.hash().unwrap_or(&DEFAULT_ACCOUNT_HASH), } } pub fn stored_size(&self) -> usize { match self { Self::AppendVec(av) => av.stored_size(), + // Self::Cold(cs) => cs.stored_size(), + Self::Hot(hs) => hs.stored_size(), } } pub fn offset(&self) -> usize { match self { Self::AppendVec(av) => av.offset(), + // Self::Cold(cs) => cs.offset(), + Self::Hot(hs) => hs.index(), } } pub fn data(&self) -> &'storage [u8] { match self { Self::AppendVec(av) => av.data(), + // Self::Cold(cs) => cs.data(), + Self::Hot(hs) => hs.data(), } } pub fn data_len(&self) -> u64 { match self { Self::AppendVec(av) => av.data_len(), + // Self::Cold(cs) => cs.data_len(), + Self::Hot(hs) => hs.data().len() as u64, } } pub fn write_version(&self) -> StoredMetaWriteVersion { match self { Self::AppendVec(av) => av.write_version(), + // Self::Cold(cs) => cs.write_version(), + Self::Hot(hs) => hs.write_version().unwrap_or(DEFAULT_WRITE_VERSION), } } pub fn meta(&self) -> &StoredMeta { match self { Self::AppendVec(av) => av.meta(), + // Self::Cold(_) => unreachable!(), + Self::Hot(_) => unreachable!(), } } pub fn set_meta(&mut self, meta: &'storage StoredMeta) { match self { Self::AppendVec(av) => av.set_meta(meta), + // Self::Cold(_) => unreachable!(), + Self::Hot(_) => unreachable!(), } } pub(crate) fn sanitize(&self) -> bool { match self { Self::AppendVec(av) => av.sanitize(), + // Self::Cold(_) => unimplemented!(), + Self::Hot(_) => unreachable!(), } } } @@ -168,26 +201,36 @@ impl<'storage> ReadableAccount for StoredAccountMeta<'storage> { fn lamports(&self) -> u64 { match self { Self::AppendVec(av) => av.lamports(), + // Self::Cold(cs) => cs.lamports(), + Self::Hot(hs) => hs.lamports(), } } fn data(&self) -> &[u8] { match self { Self::AppendVec(av) => av.data(), + // Self::Cold(cs) => cs.data(), + Self::Hot(hs) => hs.data(), } } fn owner(&self) -> &Pubkey { match self { Self::AppendVec(av) => av.owner(), + // Self::Cold(cs) => cs.owner(), + Self::Hot(hs) => hs.owner(), } } fn executable(&self) -> bool { match self { Self::AppendVec(av) => av.executable(), + // Self::Cold(cs) => cs.executable(), + Self::Hot(hs) => hs.executable(), } } fn rent_epoch(&self) -> Epoch { match self { Self::AppendVec(av) => av.rent_epoch(), + // Self::Cold(cs) => cs.rent_epoch(), + Self::Hot(hs) => hs.rent_epoch(), } } } diff --git a/runtime/src/accounts_db.rs b/runtime/src/accounts_db.rs index a8ff9812f53da3..730b7017fd1866 100644 --- a/runtime/src/accounts_db.rs +++ b/runtime/src/accounts_db.rs @@ -67,6 +67,7 @@ use { snapshot_utils::create_accounts_run_and_snapshot_dirs, sorted_storages::SortedStorages, storable_accounts::StorableAccounts, + tiered_storage::{hot::HOT_FORMAT, TieredStorage}, verify_accounts_hash_in_background::VerifyAccountsHashInBackground, }, blake3::traits::digest::Digest, @@ -1048,6 +1049,11 @@ pub struct AccountStorageEntry { impl AccountStorageEntry { pub fn new(path: &Path, slot: Slot, id: AppendVecId, file_size: u64) -> Self { + // Self::new_cold(path, slot, id, file_size) + Self::new_hot(path, slot, id, file_size) + } + + pub fn new_av(path: &Path, slot: Slot, id: AppendVecId, file_size: u64) -> Self { let tail = AccountsFile::file_name(slot, id); let path = Path::new(path).join(tail); let accounts = AccountsFile::AppendVec(AppendVec::new(&path, true, file_size as usize)); @@ -1062,6 +1068,40 @@ impl AccountStorageEntry { } } + /* + pub fn new_cold(path: &Path, slot: Slot, id: AppendVecId, _file_size: u64) -> Self { + let tail = AccountsFile::file_name(slot, id); + let path = Path::new(path).join(tail); + let accounts = AccountsFile::Tiered(TieredStorage::new(&path, Some(&COLD_FORMAT))); + info!("[Cold]: Create new cold storage at path {:?}", path); + + Self { + id: AtomicAppendVecId::new(id), + slot: AtomicU64::new(slot), + accounts, + count_and_status: RwLock::new((0, AccountStorageStatus::Available)), + approx_store_count: AtomicUsize::new(0), + alive_bytes: AtomicUsize::new(0), + } + } + */ + + pub fn new_hot(path: &Path, slot: Slot, id: AppendVecId, _file_size: u64) -> Self { + let tail = AccountsFile::file_name(slot, id); + let path = Path::new(path).join(tail); + let accounts = AccountsFile::Tiered(TieredStorage::new(&path, Some(&HOT_FORMAT))); + info!("[Hot]: Create new hot storage at path {:?}", path); + + Self { + id: AtomicAppendVecId::new(id), + slot: AtomicU64::new(slot), + accounts, + count_and_status: RwLock::new((0, AccountStorageStatus::Available)), + approx_store_count: AtomicUsize::new(0), + alive_bytes: AtomicUsize::new(0), + } + } + pub(crate) fn new_existing( slot: Slot, id: AppendVecId, @@ -3967,6 +4007,7 @@ impl AccountsDb { .fetch_add(time.as_us(), Ordering::Relaxed); } + /// Perform shrink on the speficied slot if the slot is eligible for shrink. fn do_shrink_slot_store(&self, slot: Slot, store: &Arc) { if self.accounts_cache.contains(slot) { // It is not correct to shrink a slot while it is in the write cache until flush is complete and the slot is removed from the write cache. @@ -3993,6 +4034,7 @@ impl AccountsDb { self.shrink_stats .skipped_shrink .fetch_add(1, Ordering::Relaxed); + // comment(YH): undo the unref to the locked entry in the accounts_index. for pubkey in shrink_collect.unrefed_pubkeys { if let Some(locked_entry) = self.accounts_index.get_account_read_entry(pubkey) { // pubkeys in `unrefed_pubkeys` were unref'd in `shrink_collect` above under the assumption that we would shrink everything. @@ -4151,6 +4193,7 @@ impl AccountsDb { slot: Slot, aligned_total: u64, ) -> ShrinkInProgress<'_> { + /* let shrunken_store = self .try_recycle_store(slot, aligned_total, aligned_total + 1024) .unwrap_or_else(|| { @@ -4161,6 +4204,13 @@ impl AccountsDb { .unwrap_or_else(|| (&self.paths, "shrink")); self.create_store(slot, aligned_total, from, shrink_paths) }); + */ + let maybe_shrink_paths = self.shrink_paths.read().unwrap(); + let (shrink_paths, from) = maybe_shrink_paths + .as_ref() + .map(|paths| (paths, "shrink-w-path")) + .unwrap_or_else(|| (&self.paths, "shrink")); + let shrunken_store = self.create_store(slot, aligned_total, from, shrink_paths); self.storage.shrinking_in_progress(slot, shrunken_store) } diff --git a/runtime/src/accounts_file.rs b/runtime/src/accounts_file.rs index d2a37f34151fce..975176e8ca8507 100644 --- a/runtime/src/accounts_file.rs +++ b/runtime/src/accounts_file.rs @@ -5,7 +5,7 @@ use { }, append_vec::{AppendVec, AppendVecError, MatchAccountOwnerError}, storable_accounts::StorableAccounts, - tiered_storage::error::TieredStorageError, + tiered_storage::{error::TieredStorageError, hot::HOT_FORMAT, TieredStorage}, }, solana_sdk::{account::ReadableAccount, clock::Slot, hash::Hash, pubkey::Pubkey}, std::{ @@ -46,6 +46,7 @@ pub type Result = std::result::Result; /// under different formats. pub enum AccountsFile { AppendVec(AppendVec), + Tiered(TieredStorage), } impl AccountsFile { @@ -54,58 +55,84 @@ impl AccountsFile { /// The second element of the returned tuple is the number of accounts in the /// accounts file. pub fn new_from_file(path: impl AsRef, current_len: usize) -> Result<(Self, usize)> { - let (av, num_accounts) = AppendVec::new_from_file(path, current_len)?; + if let Ok((ts, num_accounts)) = TieredStorage::new_from_file(path.as_ref()) { + log::info!("YH: Open {:?} as tiered storage", path.as_ref()); + return Ok((Self::Tiered(ts), num_accounts)); + } + + let (av, num_accounts) = AppendVec::new_from_file(path.as_ref(), current_len)?; Ok((Self::AppendVec(av), num_accounts)) } + /* + pub fn new_cold_entry(file_path: &Path, create: bool) -> Self { + Self::Tiered(TieredStorage::new( + file_path, + create.then_some(&COLD_FORMAT), + )) + } + */ + + pub fn new_hot_entry(file_path: &Path, create: bool) -> Self { + Self::Tiered(TieredStorage::new(file_path, create.then_some(&HOT_FORMAT))) + } + /// By default, all AccountsFile will remove its underlying file on /// drop. Calling this function to disable such behavior for this /// instance. pub fn set_no_remove_on_drop(&mut self) { match self { Self::AppendVec(av) => av.set_no_remove_on_drop(), + Self::Tiered(ts) => ts.set_no_remove_on_drop(), } } pub fn flush(&self) -> Result<()> { match self { Self::AppendVec(av) => av.flush(), + Self::Tiered(..) => Ok(()), } } pub fn reset(&self) { match self { Self::AppendVec(av) => av.reset(), + Self::Tiered(..) => {} } } pub fn remaining_bytes(&self) -> u64 { match self { Self::AppendVec(av) => av.remaining_bytes(), + Self::Tiered(ts) => ts.remaining_bytes(), } } pub fn len(&self) -> usize { match self { Self::AppendVec(av) => av.len(), + Self::Tiered(ts) => ts.len(), } } pub fn is_empty(&self) -> bool { match self { Self::AppendVec(av) => av.is_empty(), + Self::Tiered(ts) => ts.is_empty(), } } pub fn capacity(&self) -> u64 { match self { Self::AppendVec(av) => av.capacity(), + Self::Tiered(ts) => ts.capacity(), } } pub fn is_recyclable(&self) -> bool { match self { Self::AppendVec(_) => true, + Self::Tiered(_) => false, } } @@ -119,6 +146,7 @@ impl AccountsFile { pub fn get_account(&self, index: usize) -> Option<(StoredAccountMeta<'_>, usize)> { match self { Self::AppendVec(av) => av.get_account(index), + Self::Tiered(ts) => ts.get_account(index), } } @@ -129,6 +157,7 @@ impl AccountsFile { ) -> std::result::Result { match self { Self::AppendVec(av) => av.account_matches_owners(offset, owners), + Self::Tiered(ts) => ts.account_matches_owners(offset, owners), } } @@ -136,6 +165,7 @@ impl AccountsFile { pub fn get_path(&self) -> PathBuf { match self { Self::AppendVec(av) => av.get_path(), + Self::Tiered(ts) => ts.get_path(), } } @@ -148,6 +178,7 @@ impl AccountsFile { pub fn accounts(&self, offset: usize) -> Vec { match self { Self::AppendVec(av) => av.accounts(offset), + Self::Tiered(ts) => ts.accounts(offset), } } @@ -171,6 +202,7 @@ impl AccountsFile { ) -> Option> { match self { Self::AppendVec(av) => av.append_accounts(accounts, skip), + Self::Tiered(ts) => ts.append_accounts(accounts, skip), } } } @@ -209,6 +241,7 @@ pub mod tests { pub(crate) fn set_current_len_for_tests(&self, len: usize) { match self { Self::AppendVec(av) => av.set_current_len_for_tests(len), + Self::Tiered(..) => todo!(), } } } diff --git a/runtime/src/ancient_append_vecs.rs b/runtime/src/ancient_append_vecs.rs index 6d9c7f8a624677..446cdd668ff97c 100644 --- a/runtime/src/ancient_append_vecs.rs +++ b/runtime/src/ancient_append_vecs.rs @@ -899,6 +899,9 @@ pub fn get_ancient_append_vec_capacity() -> u64 { pub fn is_ancient(storage: &AccountsFile) -> bool { match storage { AccountsFile::AppendVec(storage) => storage.capacity() >= get_ancient_append_vec_capacity(), + AccountsFile::Tiered(..) => { + return false; + } } } diff --git a/runtime/src/append_vec.rs b/runtime/src/append_vec.rs index ccc69eb21f5e9c..3ff2b5a783d6a2 100644 --- a/runtime/src/append_vec.rs +++ b/runtime/src/append_vec.rs @@ -599,6 +599,7 @@ impl AppendVec { let mut offsets = Vec::with_capacity(len); for i in skip..len { let (account, pubkey, hash, write_version_obsolete) = accounts.get(i); + println!("av hash = {}", hash); let account_meta = account .map(|account| AccountMeta { lamports: account.lamports(), @@ -675,7 +676,10 @@ pub mod tests { self.current_len.store(len, Ordering::Release); } - fn append_account_test(&self, data: &(StoredMeta, AccountSharedData)) -> Option { + pub(crate) fn append_account_test( + &self, + data: &(StoredMeta, AccountSharedData), + ) -> Option { let slot_ignored = Slot::MAX; let accounts = [(&data.0.pubkey, &data.1)]; let slice = &accounts[..]; @@ -697,6 +701,8 @@ pub mod tests { pub(crate) fn ref_executable_byte(&self) -> &u8 { match self { Self::AppendVec(av) => av.ref_executable_byte(), + // Self::Cold(_) => unimplemented!(), + Self::Hot(_) => unimplemented!(), } } } @@ -1180,20 +1186,23 @@ pub mod tests { av.append_account_test(&create_test_account(10)).unwrap(); let accounts = av.accounts(0); - let StoredAccountMeta::AppendVec(account) = accounts.first().unwrap(); - account.set_data_len_unsafe(crafted_data_len); - assert_eq!(account.data_len(), crafted_data_len); + if let StoredAccountMeta::AppendVec(account) = accounts.first().unwrap() { + account.set_data_len_unsafe(crafted_data_len); + assert_eq!(account.data_len(), crafted_data_len); - // Reload accounts and observe crafted_data_len - let accounts = av.accounts(0); - let account = accounts.first().unwrap(); - assert_eq!(account.data_len(), crafted_data_len); + // Reload accounts and observe crafted_data_len + let accounts = av.accounts(0); + let account = accounts.first().unwrap(); + assert_eq!(account.data_len(), crafted_data_len); - av.flush().unwrap(); - let accounts_len = av.len(); - drop(av); - let result = AppendVec::new_from_file(path, accounts_len); - assert_matches!(result, Err(ref message) if message.to_string().contains("incorrect layout/length/data")); + av.flush().unwrap(); + let accounts_len = av.len(); + drop(av); + let result = AppendVec::new_from_file(path, accounts_len); + assert_matches!(result, Err(ref message) if message.to_string().contains("incorrect layout/length/data")); + } else { + unreachable!(); + } } #[test] @@ -1207,19 +1216,22 @@ pub mod tests { av.append_account_test(&create_test_account(10)).unwrap(); let accounts = av.accounts(0); - let StoredAccountMeta::AppendVec(account) = accounts.first().unwrap(); - account.set_data_len_unsafe(too_large_data_len); - assert_eq!(account.data_len(), too_large_data_len); + if let StoredAccountMeta::AppendVec(account) = accounts.first().unwrap() { + account.set_data_len_unsafe(too_large_data_len); + assert_eq!(account.data_len(), too_large_data_len); - // Reload accounts and observe no account with bad offset - let accounts = av.accounts(0); - assert_matches!(accounts.first(), None); + // Reload accounts and observe no account with bad offset + let accounts = av.accounts(0); + assert_matches!(accounts.first(), None); - av.flush().unwrap(); - let accounts_len = av.len(); - drop(av); - let result = AppendVec::new_from_file(path, accounts_len); - assert_matches!(result, Err(ref message) if message.to_string().contains("incorrect layout/length/data")); + av.flush().unwrap(); + let accounts_len = av.len(); + drop(av); + let result = AppendVec::new_from_file(path, accounts_len); + assert_matches!(result, Err(ref message) if message.to_string().contains("incorrect layout/length/data")); + } else { + unreachable!(); + } } #[test] @@ -1242,46 +1254,52 @@ pub mod tests { assert_eq!(*accounts[0].ref_executable_byte(), 0); assert_eq!(*accounts[1].ref_executable_byte(), 1); - let StoredAccountMeta::AppendVec(account) = &accounts[0]; - let crafted_executable = u8::max_value() - 1; + let crafted_executable: u8; + if let StoredAccountMeta::AppendVec(account) = &accounts[0] { + crafted_executable = u8::max_value() - 1; - account.set_executable_as_byte(crafted_executable); + account.set_executable_as_byte(crafted_executable); + } else { + unreachable!(); + } // reload crafted accounts let accounts = av.accounts(0); - let StoredAccountMeta::AppendVec(account) = accounts.first().unwrap(); + if let StoredAccountMeta::AppendVec(account) = accounts.first().unwrap() { + // upper 7-bits are not 0, so sanitization should fail + assert!(!account.sanitize_executable()); - // upper 7-bits are not 0, so sanitization should fail - assert!(!account.sanitize_executable()); - - // we can observe crafted value by ref - { - let executable_bool: &bool = &account.account_meta.executable; - // Depending on use, *executable_bool can be truthy or falsy due to direct memory manipulation - // assert_eq! thinks *executable_bool is equal to false but the if condition thinks it's not, contradictorily. - assert!(!*executable_bool); - #[cfg(not(target_arch = "aarch64"))] + // we can observe crafted value by ref { - const FALSE: bool = false; // keep clippy happy - if *executable_bool == FALSE { - panic!("This didn't occur if this test passed."); + let executable_bool: &bool = &account.account_meta.executable; + // Depending on use, *executable_bool can be truthy or falsy due to direct memory manipulation + // assert_eq! thinks *executable_bool is equal to false but the if condition thinks it's not, contradictorily. + assert!(!*executable_bool); + #[cfg(not(target_arch = "aarch64"))] + { + const FALSE: bool = false; // keep clippy happy + if *executable_bool == FALSE { + panic!("This didn't occur if this test passed."); + } } + assert_eq!(*account.ref_executable_byte(), crafted_executable); } - assert_eq!(*account.ref_executable_byte(), crafted_executable); - } - // we can NOT observe crafted value by value - { - let executable_bool: bool = account.executable(); - assert!(!executable_bool); - assert_eq!(account.get_executable_byte(), 0); // Wow, not crafted_executable! - } + // we can NOT observe crafted value by value + { + let executable_bool: bool = account.executable(); + assert!(!executable_bool); + assert_eq!(account.get_executable_byte(), 0); // Wow, not crafted_executable! + } - av.flush().unwrap(); - let accounts_len = av.len(); - drop(av); - let result = AppendVec::new_from_file(path, accounts_len); - assert_matches!(result, Err(ref message) if message.to_string().contains("incorrect layout/length/data")); + av.flush().unwrap(); + let accounts_len = av.len(); + drop(av); + let result = AppendVec::new_from_file(path, accounts_len); + assert_matches!(result, Err(ref message) if message.to_string().contains("incorrect layout/length/data")); + } else { + unreachable!(); + } } #[test] diff --git a/runtime/src/append_vec/test_utils.rs b/runtime/src/append_vec/test_utils.rs index 252044e755d06c..04e72703bc81e9 100644 --- a/runtime/src/append_vec/test_utils.rs +++ b/runtime/src/append_vec/test_utils.rs @@ -45,3 +45,17 @@ pub fn create_test_account(sample: usize) -> (StoredMeta, AccountSharedData) { }; (stored_meta, account) } + +pub fn create_test_account_from_len(data_len: usize) -> (StoredMeta, AccountSharedData) { + let mut account = AccountSharedData::new(data_len as u64, 0, &Pubkey::new_unique()); + let data_byte: u8 = (data_len % 256) as u8; + account.set_data((0..data_len).map(|_| data_byte).collect()); + account.executable = data_len % 2 > 0; + account.rent_epoch = if data_len % 3 > 0 { data_len as u64 } else { 0 }; + let stored_meta = StoredMeta { + write_version_obsolete: data_len as u64, + pubkey: Pubkey::new_unique(), + data_len: data_len as u64, + }; + (stored_meta, account) +} diff --git a/runtime/src/tiered_storage.rs b/runtime/src/tiered_storage.rs index 23c97f7c59688e..5db0016e6b0e22 100644 --- a/runtime/src/tiered_storage.rs +++ b/runtime/src/tiered_storage.rs @@ -1,4 +1,5 @@ pub mod byte_block; +pub mod cold; pub mod error; pub mod file; pub mod footer; @@ -7,7 +8,575 @@ pub mod index; pub mod meta; pub mod mmap_utils; pub mod readable; +pub mod writer; -use crate::tiered_storage::error::TieredStorageError; +use { + crate::{ + account_storage::meta::{ + StorableAccountsWithHashesAndWriteVersions, StoredAccountInfo, StoredAccountMeta, + }, + append_vec::MatchAccountOwnerError, + storable_accounts::StorableAccounts, + }, + error::TieredStorageError, + footer::TieredFileFormat, + log::log_enabled, + once_cell::sync::OnceCell, + readable::TieredAccountsFileReader, + solana_sdk::{account::ReadableAccount, hash::Hash, pubkey::Pubkey}, + std::{ + borrow::Borrow, + fs::{remove_file, OpenOptions}, + path::{Path, PathBuf}, + }, + writer::TieredStorageWriter, +}; pub type TieredStorageResult = Result; + +pub const ACCOUNT_DATA_BLOCK_SIZE: usize = 4096; +pub const ACCOUNTS_DATA_STORAGE_FORMAT_VERSION: u64 = 1; + +lazy_static! { + pub static ref HASH_DEFAULT: Hash = Hash::default(); +} + +#[derive(Debug)] +pub struct TieredStorage { + reader: OnceCell, + // This format will be used when creating new data + format: Option<&'static TieredFileFormat>, + path: PathBuf, + remove_on_drop: bool, +} + +impl Drop for TieredStorage { + fn drop(&mut self) { + if self.remove_on_drop { + if let Err(_e) = remove_file(&self.path) { + // promote this to panic soon. + // disabled due to many false positive warnings while running tests. + // blocked by rpc's upgrade to jsonrpc v17 + //error!("AppendVec failed to remove {:?}: {:?}", &self.path, e); + inc_new_counter_info!("append_vec_drop_fail", 1); + } + } + } +} + +impl TieredStorage { + pub fn new(file_path: &Path, format: Option<&'static TieredFileFormat>) -> Self { + if format.is_some() { + let _ignored = remove_file(file_path); + Self { + reader: OnceCell::::new(), + format: format, + path: file_path.to_path_buf(), + remove_on_drop: true, + } + } else { + let (accounts_file, _) = Self::new_from_file(file_path).unwrap(); + return accounts_file; + } + } + + pub fn new_from_file>(path: P) -> TieredStorageResult<(Self, usize)> { + let reader = TieredAccountsFileReader::new_from_path(path.as_ref())?; + let count = reader.num_accounts(); + let reader_cell = OnceCell::::new(); + reader_cell.set(reader).unwrap(); + Ok(( + Self { + reader: reader_cell, + format: None, + path: path.as_ref().to_path_buf(), + remove_on_drop: true, + }, + count, + )) + } + + pub fn account_matches_owners( + &self, + multiplied_index: usize, + owners: &[&Pubkey], + ) -> Result { + if let Some(reader) = self.reader.get() { + return reader.account_matches_owners(multiplied_index, owners); + } + + Err(MatchAccountOwnerError::UnableToLoad) + } + + pub fn get_account<'a>( + &'a self, + multiplied_index: usize, + ) -> Option<(StoredAccountMeta<'a>, usize)> { + if multiplied_index % 1024 == 0 { + log::info!( + "TieredStorage::get_account(): fetch {} account at file {:?}", + multiplied_index, + self.path + ); + } + if let Some(reader) = self.reader.get() { + return reader.get_account(multiplied_index); + } + None + } + + pub fn get_path(&self) -> PathBuf { + self.path.clone() + } + + pub fn accounts(&self, mut multiplied_index: usize) -> Vec { + log::info!( + "TieredStorage::accounts(): fetch all accounts after {} at file {:?}", + multiplied_index, + self.path + ); + let mut accounts = vec![]; + while let Some((account, next)) = self.get_account(multiplied_index) { + accounts.push(account); + multiplied_index = next; + } + accounts + } + + // Returns the Vec of offsets corresponding to the input accounts to later + // construct AccountInfo + pub fn append_accounts< + 'a, + 'b, + T: ReadableAccount + Sync, + U: StorableAccounts<'a, T>, + V: Borrow, + >( + &self, + accounts: &StorableAccountsWithHashesAndWriteVersions<'a, 'b, T, U, V>, + skip: usize, + ) -> Option> { + log::info!("TieredStorage::append_accounts(): file {:?}", self.path); + if self.is_read_only() { + log::error!("TieredStorage::append_accounts(): attempt to append accounts to read only file {:?}", self.path); + return None; + } + + let result: Option>; + { + let writer = TieredStorageWriter::new(&self.path, self.format.unwrap()); + result = writer.append_accounts(accounts, skip); + } + + if self + .reader + .set(TieredAccountsFileReader::new_from_path(&self.path).unwrap()) + .is_err() + { + panic!( + "TieredStorage::append_accounts(): unable to create reader for file {:?}", + self.path + ); + } + log::info!( + "TieredStorage::append_accounts(): successfully appended {} accounts to file {:?}", + accounts.len() - skip, + self.path + ); + result + } + + pub fn file_size(&self) -> TieredStorageResult { + let file = OpenOptions::new() + .read(true) + .create(false) + .open(self.path.to_path_buf())?; + Ok(file.metadata()?.len()) + } + + pub fn is_read_only(&self) -> bool { + self.reader.get().is_some() + } + + /* + pub fn write_from_append_vec(&self, append_vec: &AppendVec) -> TieredStorageResult<()> { + let writer = TieredStorageWriter::new(&self.path, self.format.unwrap()); + writer.write_from_append_vec(&append_vec)?; + + self.reader + .set(TieredAccountsFileReader::new_from_path(&self.path)?) + .map_err(|_| TieredStorageError::ReaderInitializationFailure()) + }*/ + + /////////////////////////////////////////////////////////////////////////////// + + pub fn set_no_remove_on_drop(&mut self) { + self.remove_on_drop = false; + } + + pub fn remaining_bytes(&self) -> u64 { + if self.is_read_only() { + return 0; + } + std::u64::MAX + } + + pub fn len(&self) -> usize { + self.file_size().unwrap_or(0).try_into().unwrap() + } + + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /////////////////////////////////////////////////////////////////////////////// + // unimplemented + + pub fn flush(&self) -> TieredStorageResult<()> { + Ok(()) + } + + pub fn reset(&self) {} + + pub fn capacity(&self) -> u64 { + if self.is_read_only() { + return self.len().try_into().unwrap(); + } + self.len().try_into().unwrap() + } + + pub fn is_ancient(&self) -> bool { + false + } +} + +#[cfg(test)] +pub mod tests { + use { + crate::{ + account_storage::meta::{StorableAccountsWithHashesAndWriteVersions, StoredMeta}, + append_vec::{ + test_utils::{create_test_account_from_len, get_append_vec_path, TempFile}, + AppendVec, + }, + tiered_storage::{ + // cold::COLD_FORMAT, + footer::{TieredFileFormat, TieredStorageFooter, FOOTER_SIZE}, + hot::HOT_FORMAT, + index::AccountIndexFormat, + readable::TieredAccountsFileReader, + TieredStorage, + ACCOUNTS_DATA_STORAGE_FORMAT_VERSION, + ACCOUNT_DATA_BLOCK_SIZE, + }, + }, + once_cell::sync::OnceCell, + solana_sdk::{ + account::{AccountSharedData, ReadableAccount}, + clock::Slot, + hash::Hash, + pubkey::Pubkey, + }, + std::{collections::HashMap, mem, path::Path}, + }; + + impl TieredStorage { + fn new_for_test(file_path: &Path, format: &'static TieredFileFormat) -> Self { + Self { + reader: OnceCell::::new(), + format: Some(format), + path: file_path.to_path_buf(), + remove_on_drop: false, + } + } + + fn footer(&self) -> Option<&TieredStorageFooter> { + if let Some(reader) = self.reader.get() { + return Some(reader.footer()); + } + None + } + } + + impl TieredAccountsFileReader { + fn footer(&self) -> &TieredStorageFooter { + match self { + // Self::Cold(cs) => &cs.footer, + Self::Hot(hs) => hs.footer(), + } + } + } + + fn create_test_append_vec( + path: &str, + data_sizes: &[usize], + ) -> (HashMap, AppendVec) { + let av_path = get_append_vec_path(path); + let av = AppendVec::new(&av_path.path, true, 100 * 1024 * 1024); + let mut test_accounts: HashMap = HashMap::new(); + + for size in data_sizes { + let account = create_test_account_from_len(*size); + let index = av.append_account_test(&account).unwrap(); + assert_eq!(av.get_account_test(index).unwrap(), account); + test_accounts.insert(account.0.pubkey, account); + } + + (test_accounts, av) + } + + fn ads_writer_test_help( + path_prefix: &str, + account_data_sizes: &[usize], + format: &'static TieredFileFormat, + ) { + /* + write_from_append_vec_test_helper( + &(path_prefix.to_owned() + "_from_append_vec"), + account_data_sizes, + ); + */ + append_accounts_test_helper( + &(path_prefix.to_owned() + "_append_accounts"), + account_data_sizes, + format, + ); + } + + fn append_accounts_test_helper( + path_prefix: &str, + account_data_sizes: &[usize], + format: &'static TieredFileFormat, + ) { + let account_count = account_data_sizes.len(); + let (test_accounts, _av) = + create_test_append_vec(&(path_prefix.to_owned() + "_av"), account_data_sizes); + + let slot_ignored = Slot::MAX; + let accounts: Vec<(Pubkey, AccountSharedData)> = test_accounts + .clone() + .into_iter() + .map(|(pubkey, acc)| (pubkey, acc.1)) + .collect(); + let mut accounts_ref: Vec<(&Pubkey, &AccountSharedData)> = Vec::new(); + + for (x, y) in &accounts { + accounts_ref.push((&x, &y)); + } + + let slice = &accounts_ref[..]; + let account_data = (slot_ignored, slice); + let mut write_versions = Vec::new(); + + for (_pubkey, acc) in &test_accounts { + write_versions.push(acc.0.write_version_obsolete); + } + + let mut hashes = Vec::new(); + let mut hashes_ref = Vec::new(); + let mut hashes_map = HashMap::new(); + + for _ in 0..write_versions.len() { + hashes.push(Hash::new_unique()); + } + for i in 0..write_versions.len() { + hashes_ref.push(&hashes[i]); + } + for i in 0..write_versions.len() { + hashes_map.insert(accounts[i].0, &hashes[i]); + } + + let storable_accounts = + StorableAccountsWithHashesAndWriteVersions::new_with_hashes_and_write_versions( + &account_data, + hashes_ref, + write_versions, + ); + + let ads_path = get_append_vec_path(&(path_prefix.to_owned() + "_ads")); + { + let ads = TieredStorage::new_for_test(&ads_path.path, format); + ads.append_accounts(&storable_accounts, 0); + } + + verify_account_data_storage( + account_count, + &test_accounts, + &ads_path, + &hashes_map, + format, + ); + } + + /* + fn write_from_append_vec_test_helper( + path_prefix: &str, + account_data_sizes: &[usize], + format: &'static TieredFileFormat, + ) { + let account_count = account_data_sizes.len(); + let (test_accounts, av) = + create_test_append_vec(&(path_prefix.to_owned() + "_av"), account_data_sizes); + + let ads_path = get_append_vec_path(&(path_prefix.to_owned() + "_ads")); + { + let ads = TieredStorage::new_for_test(&ads_path.path, format); + ads.write_from_append_vec(&av).unwrap(); + } + + verify_account_data_storage( + account_count, + &test_accounts, + &ads_path, + &HashMap::new(), + format, + ); + } + */ + + fn verify_account_data_storage( + account_count: usize, + test_accounts: &HashMap, + ads_path: &TempFile, + hashes_map: &HashMap, + format: &'static TieredFileFormat, + ) { + let ads = TieredStorage::new(&ads_path.path, None); + let footer = ads.footer().unwrap(); + let indexer = AccountIndexFormat::AddressAndOffset; + + let expected_footer = TieredStorageFooter { + account_meta_format: format.account_meta_format.clone(), + owners_block_format: format.owners_block_format.clone(), + account_index_format: format.account_index_format.clone(), + account_block_format: format.account_block_format.clone(), + account_entry_count: account_count as u32, + account_meta_entry_size: format.meta_entry_size as u32, + account_block_size: ACCOUNT_DATA_BLOCK_SIZE as u64, + owner_count: account_count as u32, + owner_entry_size: mem::size_of::() as u32, + // This number should be the total compressed account data size. + account_index_offset: footer.account_index_offset, + owners_offset: footer.account_index_offset + + (account_count * indexer.entry_size()) as u64, + // TODO(yhchiang): reach out Brooks on how to obtain the new hash + hash: footer.hash, + // TODO(yhchiang): fix this + min_account_address: Pubkey::default(), + max_account_address: Pubkey::default(), + format_version: ACCOUNTS_DATA_STORAGE_FORMAT_VERSION, + footer_size: FOOTER_SIZE as u64, + }; + assert_eq!(*footer, expected_footer); + + let mut index = 0; + let mut count_from_ads = 0; + while let Some((account, next)) = ads.get_account(index) { + index = next; + count_from_ads += 1; + let expected_account = &test_accounts[account.pubkey()]; + assert_eq!(account.to_account_shared_data(), expected_account.1); + + if hashes_map.len() > 0 { + let expected_hash = &hashes_map[account.pubkey()]; + assert_eq!(account.hash(), *expected_hash); + } + + let stored_meta_from_storage = StoredMeta { + write_version_obsolete: account.write_version(), + pubkey: *account.pubkey(), + data_len: account.data_len(), + }; + assert_eq!(stored_meta_from_storage, expected_account.0); + } + assert_eq!(&count_from_ads, &account_count); + } + + #[test] + fn test_write_from_append_vec_one_small() { + ads_writer_test_help( + "test_write_from_append_vec_one_small_hot", + &[255], + &HOT_FORMAT, + ); + ads_writer_test_help( + "test_write_from_append_vec_one_small_cold", + &[255], + &HOT_FORMAT, //&COLD_FORMAT YHCHIANG + ); + } + + #[test] + fn test_write_from_append_vec_one_big() { + ads_writer_test_help( + "test_write_from_append_vec_one_big_hot", + &[25500], + &HOT_FORMAT, + ); + ads_writer_test_help( + "test_write_from_append_vec_one_big_cold", + &[25500], + &HOT_FORMAT, //&COLD_FORMAT YHCHIANG + ); + } + + #[test] + fn test_write_from_append_vec_one_10_mb() { + ads_writer_test_help( + "test_write_from_append_vec_one_10_mb_hot", + &[10 * 1024 * 1024], + &HOT_FORMAT, + ); + ads_writer_test_help( + "test_write_from_append_vec_one_10_mb_cold", + &[10 * 1024 * 1024], + &HOT_FORMAT, //&COLD_FORMAT YHCHIANG + ); + } + + #[test] + fn test_write_from_append_vec_multiple_blobs() { + ads_writer_test_help( + "test_write_from_append_vec_multiple_blobs_hot", + &[5000, 6000, 7000, 8000, 5500, 10241023, 9999], + &HOT_FORMAT, + ); + ads_writer_test_help( + "test_write_from_append_vec_multiple_blobs_cold", + &[5000, 6000, 7000, 8000, 5500, 10241023, 9999], + &HOT_FORMAT, //&COLD_FORMAT YHCHIANG + ); + } + + #[test] + fn test_write_from_append_vec_one_data_block() { + ads_writer_test_help( + "test_write_from_append_vec_one_data_block_hot", + &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + &HOT_FORMAT, + ); + ads_writer_test_help( + "test_write_from_append_vec_one_data_block_cold", + &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + &HOT_FORMAT, //&COLD_FORMAT YHCHIANG + ); + } + + #[test] + fn test_write_from_append_vec_mixed_block() { + ads_writer_test_help( + "test_write_from_append_vec_mixed_block_hot", + &[ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1000, 2000, 3000, 4000, 9, 8, 7, 6, 5, 4, 3, 2, 1, + ], + &HOT_FORMAT, + ); + ads_writer_test_help( + "test_write_from_append_vec_mixed_block_cold", + &[ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1000, 2000, 3000, 4000, 9, 8, 7, 6, 5, 4, 3, 2, 1, + ], + &HOT_FORMAT, //&COLD_FORMAT YHCHIANG + ); + } +} diff --git a/runtime/src/tiered_storage/cold.rs b/runtime/src/tiered_storage/cold.rs new file mode 100644 index 00000000000000..1c7cbdf1bdfa16 --- /dev/null +++ b/runtime/src/tiered_storage/cold.rs @@ -0,0 +1,526 @@ +/* +use { + crate::{ + account_storage::meta::{StoredAccountMeta, StoredMetaWriteVersion}, + accounts_file::ALIGN_BOUNDARY_OFFSET, + append_vec::MatchAccountOwnerError, + tiered_storage::{ + byte_block::ByteBlockReader, + file::TieredStorageFile, + footer::{ + AccountBlockFormat, AccountIndexFormat, AccountMetaFormat, OwnersBlockFormat, + TieredFileFormat, TieredStorageFooter, + }, + meta::{ + get_compressed_block_size, AccountMetaFlags, TieredAccountMeta, + ACCOUNT_DATA_ENTIRE_BLOCK, DEFAULT_ACCOUNT_HASH, + }, + readable::TieredReadableAccount, + TieredStorageResult, + }, + }, + log::*, + solana_sdk::{hash::Hash, pubkey::Pubkey, stake_history::Epoch}, + std::{collections::HashMap, path::Path}, +}; + +pub static COLD_FORMAT: TieredFileFormat = TieredFileFormat { + meta_entry_size: std::mem::size_of::(), + account_meta_format: AccountMetaFormat::Cold, + owners_block_format: OwnersBlockFormat::LocalIndex, + account_index_format: AccountIndexFormat::AddressAndOffset, + account_block_format: AccountBlockFormat::Lz4, +}; + +#[derive(Debug)] +pub struct ColdStorageReader { + pub(crate) footer: TieredStorageFooter, + pub(crate) metas: Vec, + accounts: Vec, + owners: Vec, + data_blocks: HashMap>, +} + +impl ColdStorageReader { + pub fn new_from_file(file_path: impl AsRef) -> TieredStorageResult { + let storage = TieredStorageFile::new_writable(file_path.as_ref()); + let footer = ColdReaderBuilder::read_footer_block(&storage)?; + + let metas = ColdReaderBuilder::read_account_metas_block(&storage, &footer)?; + let accounts = ColdReaderBuilder::read_account_addresses_block(&storage, &footer)?; + let owners = ColdReaderBuilder::read_owners_block(&storage, &footer)?; + let data_blocks = ColdReaderBuilder::read_data_blocks(&storage, &footer, &metas)?; + + info!( + "[Cold] Opening cold storage from {}. Footer: {:?}", + file_path.as_ref().display(), + footer + ); + + Ok(ColdStorageReader { + footer, + metas, + accounts, + owners, + data_blocks, + }) + } + + pub fn num_accounts(&self) -> usize { + self.footer.account_entry_count.try_into().unwrap() + } + + fn multiplied_index_to_index(multiplied_index: usize) -> usize { + // This is a temporary workaround to work with existing AccountInfo + // implementation that ties to AppendVec with the assumption that the offset + // is a multiple of ALIGN_BOUNDARY_OFFSET, while tiered storage actually talks + // about index instead of offset. + multiplied_index / ALIGN_BOUNDARY_OFFSET + } + + pub fn account_matches_owners( + &self, + multiplied_index: usize, + owners: &[&Pubkey], + ) -> Result { + let index = Self::multiplied_index_to_index(multiplied_index); + if index >= self.metas.len() { + return Err(MatchAccountOwnerError::UnableToLoad); + } + + owners + .iter() + .position(|entry| &&self.owners[self.metas[index].owner_local_id() as usize] == entry) + .ok_or(MatchAccountOwnerError::NoMatch) + } + + pub fn get_account<'a>( + &'a self, + multiplied_index: usize, + ) -> Option<(StoredAccountMeta<'a>, usize)> { + let index = Self::multiplied_index_to_index(multiplied_index); + if index >= self.metas.len() { + return None; + } + if let Some(data_block) = self.data_blocks.get(&self.metas[index].block_offset()) { + return Some(( + StoredAccountMeta::Cold(TieredReadableAccountMeta { + meta: &self.metas[index], + pubkey: &self.accounts[index], + owner: &self.owners[self.metas[index].owner_local_id() as usize], + index: multiplied_index, + data_block: data_block, + }), + multiplied_index + ALIGN_BOUNDARY_OFFSET, + )); + } + None + } +} + +pub(crate) struct ColdReaderBuilder {} + +impl ColdReaderBuilder { + fn read_footer_block(storage: &TieredStorageFile) -> TieredStorageResult { + TieredStorageFooter::new_from_footer_block(&storage) + } + + fn read_account_metas_block( + storage: &TieredStorageFile, + footer: &TieredStorageFooter, + ) -> TieredStorageResult> { + let mut metas: Vec = + Vec::with_capacity(footer.account_entry_count as usize); + + (&storage).seek(0)?; + + for _ in 0..footer.account_entry_count { + metas.push(ColdAccountMeta::new_from_file(&storage)?); + } + + Ok(metas) + } + + fn read_account_addresses_block( + storage: &TieredStorageFile, + footer: &TieredStorageFooter, + ) -> TieredStorageResult> { + Self::read_pubkeys_block( + storage, + footer.account_index_offset, + footer.account_entry_count, + ) + } + + fn read_owners_block( + storage: &TieredStorageFile, + footer: &TieredStorageFooter, + ) -> TieredStorageResult> { + Self::read_pubkeys_block(storage, footer.owners_offset, footer.owner_count) + } + + fn read_pubkeys_block( + storage: &TieredStorageFile, + offset: u64, + count: u32, + ) -> TieredStorageResult> { + let mut addresses: Vec = Vec::with_capacity(count as usize); + (&storage).seek(offset)?; + for _ in 0..count { + let mut pubkey = Pubkey::default(); + (&storage).read_type(&mut pubkey)?; + addresses.push(pubkey); + } + + Ok(addresses) + } + + pub fn read_data_blocks( + storage: &TieredStorageFile, + footer: &TieredStorageFooter, + metas: &Vec, + ) -> TieredStorageResult>> { + let count = footer.account_entry_count as usize; + let mut data_blocks = HashMap::>::new(); + for i in 0..count { + Self::update_data_block_map(&mut data_blocks, storage, footer, metas, i)?; + } + Ok(data_blocks) + } + + fn update_data_block_map( + data_blocks: &mut HashMap>, + storage: &TieredStorageFile, + footer: &TieredStorageFooter, + metas: &Vec, + index: usize, + ) -> TieredStorageResult<()> { + let block_offset = &metas[index].block_offset(); + if !data_blocks.contains_key(&block_offset) { + let data_block = Self::read_data_block(storage, footer, metas, index).unwrap(); + + data_blocks.insert(metas[index].block_offset(), data_block); + } + Ok(()) + } + + pub fn read_data_block( + storage: &TieredStorageFile, + footer: &TieredStorageFooter, + metas: &Vec, + index: usize, + ) -> TieredStorageResult> { + let compressed_block_size = get_compressed_block_size(footer, metas, index) as usize; + + (&storage).seek(metas[index].block_offset())?; + + let mut buffer: Vec = vec![0; compressed_block_size]; + (&storage).read_bytes(&mut buffer)?; + + // TODO(yhchiang): encoding from footer + Ok(ByteBlockReader::decode( + AccountBlockFormat::Lz4, + &buffer[..], + )?) + } +} + +#[derive(Debug, PartialEq, Eq)] +#[repr(C)] +pub struct ColdAccountMeta { + lamports: u64, + block_offset: u64, + uncompressed_data_size: u16, + intra_block_offset: u16, + owner_local_id: u32, + flags: AccountMetaFlags, + // It will still be 32 bytes even without this field. + _unused: u32, +} + +impl TieredAccountMeta for ColdAccountMeta { + fn new() -> Self { + Self { + ..ColdAccountMeta::default() + } + } + + fn is_blob_account_data(data_len: u64) -> bool { + data_len > ACCOUNT_DATA_BLOCK_SIZE as u64 + } + + fn with_lamports(&mut self, lamports: u64) -> &mut Self { + self.lamports = lamports; + self + } + + fn with_block_offset(&mut self, offset: u64) -> &mut Self { + self.block_offset = offset; + self + } + + fn with_data_tailing_paddings(&mut self, _paddings: u8) -> &mut Self { + // cold storage never has paddings. + self + } + + fn with_owner_local_id(&mut self, local_id: u32) -> &mut Self { + self.owner_local_id = local_id; + self + } + + fn with_uncompressed_data_size(&mut self, data_size: u64) -> &mut Self { + assert!(ACCOUNT_DATA_ENTIRE_BLOCK <= std::u16::MAX); + if data_size >= ACCOUNT_DATA_ENTIRE_BLOCK as u64 { + self.uncompressed_data_size = ACCOUNT_DATA_ENTIRE_BLOCK; + } else { + self.uncompressed_data_size = data_size as u16; + } + self + } + + fn with_intra_block_offset(&mut self, offset: u16) -> &mut Self { + self.intra_block_offset = offset; + self + } + + fn with_flags(&mut self, flags: &AccountMetaFlags) -> &mut Self { + self.flags = *flags; + self + } + + fn support_shared_byte_block() -> bool { + true + } + + fn lamports(&self) -> u64 { + self.lamports + } + + fn block_offset(&self) -> u64 { + self.block_offset + } + + fn set_block_offset(&mut self, offset: u64) { + self.block_offset = offset; + } + + fn padding_bytes(&self) -> u8 { + 0u8 + } + + fn intra_block_offset(&self) -> u16 { + self.intra_block_offset + } + + fn owner_local_id(&self) -> u32 { + self.owner_local_id + } + + fn flags(&self) -> &AccountMetaFlags { + &self.flags + } + + fn uncompressed_data_size(&self) -> usize { + self.uncompressed_data_size as usize + } + + fn rent_epoch(&self, data_block: &[u8]) -> Option { + let offset = self.optional_fields_offset(data_block); + if self.flags.has_rent_epoch() { + unsafe { + let unaligned = + std::ptr::addr_of!(data_block[offset..offset + std::mem::size_of::()]) + as *const Epoch; + return Some(std::ptr::read_unaligned(unaligned)); + } + } + None + } + + fn account_hash<'a>(&self, data_block: &'a [u8]) -> &'a Hash { + let mut offset = self.optional_fields_offset(data_block); + if self.flags.has_rent_epoch() { + offset += std::mem::size_of::(); + } + if self.flags.has_account_hash() { + unsafe { + let raw_ptr = std::slice::from_raw_parts( + data_block[offset..offset + std::mem::size_of::()].as_ptr() as *const u8, + std::mem::size_of::(), + ); + let ptr: *const Hash = raw_ptr.as_ptr() as *const Hash; + return &*ptr; + } + } + return &DEFAULT_ACCOUNT_HASH; + } + + fn write_version(&self, data_block: &[u8]) -> Option { + let mut offset = self.optional_fields_offset(data_block); + if self.flags.has_rent_epoch() { + offset += std::mem::size_of::(); + } + if self.flags.has_account_hash() { + offset += std::mem::size_of::(); + } + if self.flags.has_write_version() { + unsafe { + let unaligned = std::ptr::addr_of!( + data_block[offset..offset + std::mem::size_of::()] + ) as *const StoredMetaWriteVersion; + return Some(std::ptr::read_unaligned(unaligned)); + } + } + None + } + + fn data_len(&self, data_block: &[u8]) -> usize { + self.optional_fields_offset(data_block) + .saturating_sub(self.intra_block_offset as usize) + } + + fn optional_fields_offset<'a>(&self, data_block: &'a [u8]) -> usize { + if self.is_blob_account() { + return data_block.len().saturating_sub(self.optional_fields_size()); + } + (self.intra_block_offset + self.uncompressed_data_size) as usize + } + + fn account_data<'a>(&self, data_block: &'a [u8]) -> &'a [u8] { + &data_block[(self.intra_block_offset as usize)..self.optional_fields_offset(data_block)] + } + + fn is_blob_account(&self) -> bool { + self.uncompressed_data_size == ACCOUNT_DATA_ENTIRE_BLOCK && self.intra_block_offset == 0 + } + + fn stored_size( + footer: &TieredStorageFooter, + metas: &Vec, + i: usize, + ) -> usize { + let compressed_block_size = get_compressed_block_size(footer, metas, i); + + let data_size = if metas[i].is_blob_account() { + compressed_block_size + } else { + let compression_rate: f64 = + compressed_block_size as f64 / Self::get_raw_block_size(metas, i) as f64; + + ((metas[i].uncompressed_data_size() as usize + metas[i].optional_fields_size()) as f64 + / compression_rate) as usize + }; + + return std::mem::size_of::() + data_size; + } +} + +impl ColdAccountMeta { + fn new_from_file(file: &TieredStorageFile) -> TieredStorageResult { + let mut entry = ColdAccountMeta::new(); + file.read_type(&mut entry)?; + + Ok(entry) + } + + pub fn get_raw_block_size(metas: &Vec, index: usize) -> usize { + let mut block_size = 0; + + for i in index..metas.len() { + if metas[i].block_offset() == metas[index].block_offset() { + block_size += metas[i].uncompressed_data_size(); + } else { + break; + } + } + + block_size.try_into().unwrap() + } +} + +impl Default for ColdAccountMeta { + fn default() -> Self { + Self { + lamports: 0, + block_offset: 0, + owner_local_id: 0, + uncompressed_data_size: 0, + intra_block_offset: 0, + flags: AccountMetaFlags::new(), + _unused: 0, + } + } +} + +#[cfg(test)] +pub mod tests { + use { + crate::{ + account_storage::meta::StoredMetaWriteVersion, + append_vec::test_utils::get_append_vec_path, + tiered_storage::{ + cold::ColdAccountMeta, + file::TieredStorageFile, + meta::{AccountMetaFlags, AccountMetaOptionalFields, TieredAccountMeta}, + }, + }, + ::solana_sdk::{hash::Hash, stake_history::Epoch}, + memoffset::offset_of, + }; + + #[test] + fn test_account_meta_entry() { + let path = get_append_vec_path("test_account_meta_entry"); + + const TEST_LAMPORT: u64 = 7; + const BLOCK_OFFSET: u64 = 56987; + const OWNER_LOCAL_ID: u32 = 54; + const UNCOMPRESSED_LENGTH: u64 = 255; + const LOCAL_OFFSET: u16 = 82; + const TEST_RENT_EPOCH: Epoch = 7; + const TEST_WRITE_VERSION: StoredMetaWriteVersion = 0; + + let optional_fields = AccountMetaOptionalFields { + rent_epoch: Some(TEST_RENT_EPOCH), + account_hash: Some(Hash::new_unique()), + write_version: Some(TEST_WRITE_VERSION), + }; + + let mut expected_entry = ColdAccountMeta::new(); + let mut flags = AccountMetaFlags::new_from(&optional_fields); + flags.set_executable(true); + expected_entry + .with_lamports(TEST_LAMPORT) + .with_block_offset(BLOCK_OFFSET) + .with_owner_local_id(OWNER_LOCAL_ID) + .with_uncompressed_data_size(UNCOMPRESSED_LENGTH) + .with_intra_block_offset(LOCAL_OFFSET) + .with_flags(&flags); + + { + let ads_file = TieredStorageFile::new_writable(&path.path); + ads_file.write_type(&expected_entry); + } + + let mut file = TieredStorageFile::new_readonly(&path.path); + let entry = ColdAccountMeta::new_from_file(&mut file).unwrap(); + + assert_eq!(expected_entry, entry); + assert!(entry.flags.executable()); + assert!(entry.flags.has_rent_epoch()); + } + + #[test] + fn test_cold_account_meta_layout() { + const COLD_META_SIZE_BYTES: usize = 32; + assert_eq!(offset_of!(ColdAccountMeta, lamports), 0x00); + assert_eq!(offset_of!(ColdAccountMeta, block_offset), 0x08); + assert_eq!(offset_of!(ColdAccountMeta, uncompressed_data_size), 0x10); + assert_eq!(offset_of!(ColdAccountMeta, intra_block_offset), 0x12); + assert_eq!(offset_of!(ColdAccountMeta, owner_local_id), 0x14); + assert_eq!(offset_of!(ColdAccountMeta, flags), 0x18); + assert_eq!(std::mem::size_of::(), COLD_META_SIZE_BYTES); + } +} +*/ diff --git a/runtime/src/tiered_storage/error.rs b/runtime/src/tiered_storage/error.rs index 74fa247506f2c5..226abc98dde6f5 100644 --- a/runtime/src/tiered_storage/error.rs +++ b/runtime/src/tiered_storage/error.rs @@ -7,4 +7,7 @@ pub enum TieredStorageError { #[error("MagicNumberMismatch: expected {0}, found {1}")] MagicNumberMismatch(u64, u64), + + #[error("unable to initialize reader")] + ReaderInitializationFailure(), } diff --git a/runtime/src/tiered_storage/footer.rs b/runtime/src/tiered_storage/footer.rs index d8684b04399506..da4adcd1f664dd 100644 --- a/runtime/src/tiered_storage/footer.rs +++ b/runtime/src/tiered_storage/footer.rs @@ -47,7 +47,7 @@ impl Default for TieredStorageMagicNumber { pub enum AccountMetaFormat { #[default] Hot = 0, - Cold = 1, + // Cold = 1, } #[repr(u16)] @@ -85,6 +85,15 @@ pub enum OwnersBlockFormat { LocalIndex = 0, } +#[derive(Debug)] +pub struct TieredFileFormat { + pub meta_entry_size: usize, + pub account_meta_format: AccountMetaFormat, + pub owners_block_format: OwnersBlockFormat, + pub account_index_format: AccountIndexFormat, + pub account_block_format: AccountBlockFormat, +} + #[derive(Debug, PartialEq, Eq, Clone)] #[repr(C)] pub struct TieredStorageFooter { diff --git a/runtime/src/tiered_storage/hot.rs b/runtime/src/tiered_storage/hot.rs index dd3491516f9167..b304e012a18980 100644 --- a/runtime/src/tiered_storage/hot.rs +++ b/runtime/src/tiered_storage/hot.rs @@ -3,12 +3,17 @@ use { crate::{ - account_storage::meta::StoredMetaWriteVersion, + account_storage::meta::{StoredAccountMeta, StoredMetaWriteVersion}, accounts_file::ALIGN_BOUNDARY_OFFSET, append_vec::MatchAccountOwnerError, tiered_storage::{ byte_block, - footer::TieredStorageFooter, + file::TieredStorageFile, + footer::{ + AccountBlockFormat, AccountMetaFormat, OwnersBlockFormat, TieredFileFormat, + TieredStorageFooter, + }, + index::AccountIndexFormat, meta::{AccountMetaFlags, AccountMetaOptionalFields, TieredAccountMeta}, mmap_utils::{get_slice, get_type}, readable::TieredReadableAccount, @@ -21,6 +26,14 @@ use { std::{fs::OpenOptions, option::Option, path::Path}, }; +pub static HOT_FORMAT: TieredFileFormat = TieredFileFormat { + meta_entry_size: std::mem::size_of::(), + account_meta_format: AccountMetaFormat::Hot, + owners_block_format: OwnersBlockFormat::LocalIndex, + account_index_format: AccountIndexFormat::AddressAndOffset, + account_block_format: AccountBlockFormat::AlignedRaw, +}; + /// The maximum number of padding bytes used in a hot account entry. const MAX_HOT_PADDING: u8 = 7; @@ -58,6 +71,16 @@ pub struct HotAccountMeta { flags: AccountMetaFlags, } +impl HotAccountMeta { + #[allow(dead_code)] + fn new_from_file(ads_file: &TieredStorageFile) -> TieredStorageResult { + let mut entry = HotAccountMeta::new(); + ads_file.read_type(&mut entry)?; + + Ok(entry) + } +} + impl TieredAccountMeta for HotAccountMeta { /// Construct a HotAccountMeta instance. fn new() -> Self { @@ -193,8 +216,16 @@ impl TieredAccountMeta for HotAccountMeta { fn account_data<'a>(&self, account_block: &'a [u8]) -> &'a [u8] { &account_block[..self.account_data_size(account_block)] } -} + fn stored_size( + _footer: &TieredStorageFooter, + _metas: &Vec, + _i: usize, + ) -> usize { + // TODO(yhchiang): need a new way to obtain data size + std::mem::size_of::() + } +} #[derive(Debug)] pub struct HotAccountsFileReader { map: Mmap, @@ -349,7 +380,7 @@ impl HotAccountsFileReader { pub fn get_account<'a>( &'a self, multiplied_index: usize, - ) -> Option<(TieredReadableAccount<'a, HotAccountMeta>, usize)> { + ) -> Option<(StoredAccountMeta<'a>, usize)> { let index = Self::multiplied_index_to_index(multiplied_index); if index >= self.footer.account_entry_count as usize { return None; @@ -366,13 +397,13 @@ impl HotAccountsFileReader { let account_block: &'a [u8] = self.get_account_block(meta_offset, index).unwrap(); return Some(( - TieredReadableAccount { + StoredAccountMeta::Hot(TieredReadableAccount { meta: meta, address: address, owner: owner, index: multiplied_index, account_block: account_block, - }, + }), multiplied_index + ALIGN_BOUNDARY_OFFSET, )); } @@ -384,6 +415,7 @@ pub mod tests { super::*, crate::{ account_storage::meta::StoredMetaWriteVersion, + append_vec::test_utils::get_append_vec_path, tiered_storage::{ byte_block::ByteBlockWriter, file::TieredStorageFile, diff --git a/runtime/src/tiered_storage/meta.rs b/runtime/src/tiered_storage/meta.rs index 15a4d7aefbfef0..90163b0aff64f7 100644 --- a/runtime/src/tiered_storage/meta.rs +++ b/runtime/src/tiered_storage/meta.rs @@ -1,7 +1,9 @@ #![allow(dead_code)] //! The account meta and related structs for the tiered storage. use { - crate::account_storage::meta::StoredMetaWriteVersion, + crate::{ + account_storage::meta::StoredMetaWriteVersion, tiered_storage::footer::TieredStorageFooter, + }, ::solana_sdk::{hash::Hash, stake_history::Epoch}, modular_bitfield::prelude::*, }; @@ -17,8 +19,10 @@ pub struct AccountMetaFlags { pub has_account_hash: bool, /// whether the account meta has write version pub has_write_version: bool, + /// executable flag + pub executable: bool, /// the reserved bits. - reserved: B29, + reserved: B28, } /// A trait that allows different implementations of the account meta that @@ -85,6 +89,12 @@ pub trait TieredAccountMeta: Sized { /// Returns the data associated to this account based on the specified /// account block. fn account_data<'a>(&self, _account_block: &'a [u8]) -> &'a [u8]; + + fn stored_size( + _footer: &TieredStorageFooter, + _metas: &Vec, + _i: usize, + ) -> usize; } impl AccountMetaFlags { diff --git a/runtime/src/tiered_storage/readable.rs b/runtime/src/tiered_storage/readable.rs index d2cf4ea7663994..52983a1999954e 100644 --- a/runtime/src/tiered_storage/readable.rs +++ b/runtime/src/tiered_storage/readable.rs @@ -5,7 +5,6 @@ use { tiered_storage::{ footer::{AccountMetaFormat, TieredStorageFooter}, hot::HotAccountsFileReader, - index::AccountIndexFormat, meta::TieredAccountMeta, TieredStorageResult, }, @@ -82,7 +81,7 @@ impl<'accounts_file, M: TieredAccountMeta> ReadableAccount /// Temporarily unimplemented!() as program runtime v2 will use /// a different API for executable. fn executable(&self) -> bool { - unimplemented!(); + self.meta.flags().executable() } /// Returns the epoch that this account will next owe rent by parsing @@ -111,15 +110,9 @@ impl TieredAccountsFileReader { /// Creates a reader for the specified tiered storage accounts file. pub fn new_from_path>(path: P) -> TieredStorageResult { let footer = TieredStorageFooter::new_from_path(&path)?; - let indexer = match footer.account_index_format { - AccountIndexFormat::AddressAndOffset => AccountIndexFormat::AddressAndOffset, - }; - match footer.account_meta_format { // AccountMetaFormat::Cold => Ok(Self::Cold(ColdStorageReader::new_from_file(path)?)), - AccountMetaFormat::Hot => Ok(Self::Hot(HotAccountsFileReader::new_from_path( - path, indexer, - )?)), + AccountMetaFormat::Hot => Ok(Self::Hot(HotAccountsFileReader::new_from_path(path)?)), } } diff --git a/runtime/src/tiered_storage/writer.rs b/runtime/src/tiered_storage/writer.rs new file mode 100644 index 00000000000000..af0e411f3c52c7 --- /dev/null +++ b/runtime/src/tiered_storage/writer.rs @@ -0,0 +1,615 @@ +//! docs/src/proposals/append-vec-storage.md + +use { + crate::{ + account_storage::meta::{ + StorableAccountsWithHashesAndWriteVersions, StoredAccountInfo, StoredMetaWriteVersion, + DEFAULT_ACCOUNT_HASH, DEFAULT_RENT_EPOCH, DEFAULT_WRITE_VERSION, + }, + accounts_file::ALIGN_BOUNDARY_OFFSET, + storable_accounts::StorableAccounts, + tiered_storage::{ + byte_block::ByteBlockWriter, + file::TieredStorageFile, + footer::{AccountMetaFormat, TieredFileFormat, TieredStorageFooter}, + hot::HotAccountMeta, + index::{AccountIndexFormat, AccountIndexWriterEntry}, + meta::{AccountMetaFlags, AccountMetaOptionalFields, TieredAccountMeta}, + TieredStorageResult, + }, + }, + log::*, + solana_sdk::{ + account::{Account, ReadableAccount}, + hash::Hash, + pubkey::Pubkey, + stake_history::Epoch, + }, + std::{borrow::Borrow, collections::HashMap, fs::remove_file, mem, path::Path}, +}; + +pub const ACCOUNT_DATA_BLOCK_SIZE: usize = 4096; +pub const ACCOUNTS_DATA_STORAGE_FORMAT_VERSION: u64 = 1; +lazy_static! { + pub static ref ACCOUNT_DEFAULT: Account = Account { + lamports: 0, + data: Vec::new(), + owner: Pubkey::from([0u8; 32]), + executable: false, + rent_epoch: u64::MAX, + }; +} + +lazy_static! { + pub static ref HASH_DEFAULT: Hash = Hash::default(); +} + +pub(crate) struct AccountOwnerTable { + pub owners_vec: Vec, + pub owners_map: HashMap, +} + +impl AccountOwnerTable { + pub fn new() -> Self { + Self { + owners_vec: vec![], + owners_map: HashMap::new(), + } + } + pub fn check_and_add(&mut self, pubkey: &Pubkey) -> u32 { + if let Some(index) = self.owners_map.get(pubkey) { + return index.clone(); + } + let index: u32 = self.owners_vec.len().try_into().unwrap(); + self.owners_vec.push(*pubkey); + self.owners_map.insert(*pubkey, index); + + index + } +} + +#[derive(Debug)] +pub struct TieredStorageWriter { + storage: TieredStorageFile, + format: &'static TieredFileFormat, +} + +impl TieredStorageWriter { + /// Create a new accounts-state-file + #[allow(dead_code)] + pub fn new(file_path: &Path, format: &'static TieredFileFormat) -> Self { + let _ignored = remove_file(file_path); + Self { + storage: TieredStorageFile::new_writable(file_path), + format: format, + } + } + + fn append_accounts_impl< + 'a, + 'b, + T: ReadableAccount + Sync, + U: StorableAccounts<'a, T>, + V: Borrow, + W: TieredAccountMeta, + >( + &self, + accounts: &StorableAccountsWithHashesAndWriteVersions<'a, 'b, T, U, V>, + mut footer: TieredStorageFooter, + mut account_metas: Vec, + skip: usize, + ) -> Option> { + let mut cursor = 0; + let mut account_pubkeys: Vec<&Pubkey> = vec![]; + let mut owners_table = AccountOwnerTable::new(); + let mut dummy_hash: Hash = Hash::new_unique(); + + let mut data_block_writer = self.new_data_block_writer(&footer); + footer.account_block_size = ACCOUNT_DATA_BLOCK_SIZE as u64; + footer.account_meta_entry_size = std::mem::size_of::() as u32; + + let mut buffered_account_metas = Vec::::new(); + let mut buffered_account_pubkeys: Vec<&Pubkey> = vec![]; + + let len = accounts.accounts.len(); + let mut input_pubkey_map: HashMap<&Pubkey, usize> = HashMap::with_capacity(len); + let mut account_index_entries = Vec::::new(); + + let default_data = [0u8; 0]; + let default_pubkey = Pubkey::default(); + + for i in skip..len { + let (account, pubkey, hash, write_version) = accounts.get(i); + input_pubkey_map.insert(pubkey, i); + if let Some(account) = account { + data_block_writer = self + .write_single_account( + account.lamports(), + account.rent_epoch(), + account.data(), + account.owner(), + account.executable(), + pubkey, + hash, + write_version, + &mut cursor, + &mut footer, + &mut account_metas, + &mut account_pubkeys, + &mut owners_table, + data_block_writer, + &mut buffered_account_metas, + &mut buffered_account_pubkeys, + &mut dummy_hash, + &mut account_index_entries, + ) + .unwrap(); + } else { + data_block_writer = self + .write_single_account( + 0, // lamports + u64::MAX, // rent_epoch, + &default_data, + &default_pubkey, + false, // executable + pubkey, + hash, + write_version, + &mut cursor, + &mut footer, + &mut account_metas, + &mut account_pubkeys, + &mut owners_table, + data_block_writer, + &mut buffered_account_metas, + &mut buffered_account_pubkeys, + &mut dummy_hash, + &mut account_index_entries, + ) + .unwrap(); + } + } + + // Persist the last block if any + if buffered_account_metas.len() > 0 { + self.flush_account_block( + &mut cursor, + &mut footer, + &mut account_metas, + &mut account_pubkeys, + &mut buffered_account_metas, + &mut buffered_account_pubkeys, + data_block_writer, + ) + .ok()?; + } + + assert_eq!(buffered_account_metas.len(), 0); + assert_eq!(buffered_account_pubkeys.len(), 0); + assert_eq!(footer.account_entry_count, account_metas.len() as u32); + + self.write_account_pubkeys_block(&mut cursor, &mut footer, &account_index_entries) + .ok()?; + + self.write_owners_block(&mut cursor, &mut footer, &owners_table.owners_vec) + .ok()?; + + footer.write_footer_block(&self.storage).ok()?; + + assert_eq!(account_metas.len(), account_pubkeys.len()); + assert_eq!(account_metas.len(), len - skip); + + let mut stored_accounts_info: Vec = Vec::with_capacity(len); + for _ in skip..len { + stored_accounts_info.push(StoredAccountInfo { offset: 0, size: 0 }); + } + for i in 0..account_metas.len() { + let index = input_pubkey_map.get(&account_pubkeys[i]).unwrap(); + + // of ALIGN_BOUNDARY_OFFSET, while cold storage actually talks about index + // instead of offset. + stored_accounts_info[*index].offset = i * ALIGN_BOUNDARY_OFFSET; + stored_accounts_info[*index].size = W::stored_size(&footer, &account_metas, i); + } + match footer.account_meta_format { + AccountMetaFormat::Hot => info!( + "[Hot] append_accounts successfully completed. Footer: {:?}", + footer + ), + /* + AccountMetaFormat::Cold => info!( + "[Cold] append_accounts successfully completed. Footer: {:?}", + footer + ), + */ + } + + Some(stored_accounts_info) + } + + pub fn append_accounts< + 'a, + 'b, + T: ReadableAccount + Sync, + U: StorableAccounts<'a, T>, + V: Borrow, + >( + &self, + accounts: &StorableAccountsWithHashesAndWriteVersions<'a, 'b, T, U, V>, + skip: usize, + ) -> Option> { + let mut footer = TieredStorageFooter::default(); + // TODO(yhchiang): make it configerable + footer.account_meta_format = self.format.account_meta_format.clone(); + footer.account_block_format = self.format.account_block_format.clone(); + footer.format_version = ACCOUNTS_DATA_STORAGE_FORMAT_VERSION; + match footer.account_meta_format { + AccountMetaFormat::Hot => { + info!( + "[Hot] Appending {} accounts to hot storage.", + accounts.len() - skip + ); + self.append_accounts_impl(accounts, footer, Vec::::new(), skip) + } /* + AccountMetaFormat::Cold => { + info!( + "[Cold] Appending {} accounts to cold storage.", + accounts.len() - skip + ); + self.append_accounts_impl(accounts, footer, Vec::::new(), skip) + }*/ + } + } + + fn new_data_block_writer(&self, footer: &TieredStorageFooter) -> ByteBlockWriter { + return ByteBlockWriter::new(footer.account_block_format); + } + + pub(crate) fn write_account_pubkeys_block( + &self, + cursor: &mut u64, + footer: &mut TieredStorageFooter, + index_entries: &Vec, + ) -> TieredStorageResult<()> { + footer.account_index_offset = *cursor; + // TODO(yhchiang): check whether footer has the right index setting + let indexer = match footer.account_meta_format { + AccountMetaFormat::Hot => AccountIndexFormat::AddressAndOffset, + }; + *cursor += indexer.write_index_block(&self.storage, index_entries)? as u64; + Ok(()) + } + + fn write_owners_block( + &self, + cursor: &mut u64, + footer: &mut TieredStorageFooter, + pubkeys: &Vec, + ) -> TieredStorageResult<()> { + footer.owners_offset = *cursor; + footer.owner_count = pubkeys.len() as u32; + footer.owner_entry_size = mem::size_of::() as u32; + + self.write_pubkeys_block(cursor, pubkeys) + } + + fn write_pubkeys_block( + &self, + cursor: &mut u64, + pubkeys: &Vec, + ) -> TieredStorageResult<()> { + for pubkey in pubkeys { + *cursor += self.storage.write_type(pubkey)? as u64; + } + + Ok(()) + } + + fn flush_account_block<'a, T: TieredAccountMeta>( + &self, + cursor: &mut u64, + footer: &mut TieredStorageFooter, + account_metas: &mut Vec, + account_pubkeys: &mut Vec<&'a Pubkey>, + input_metas: &mut Vec, + input_pubkeys: &mut Vec<&'a Pubkey>, + data_block_writer: ByteBlockWriter, + ) -> TieredStorageResult<()> { + // Persist the current block + let encoded_data = data_block_writer.finish()?; + self.storage.write_bytes(&encoded_data)?; + + assert_eq!(input_metas.len(), input_pubkeys.len()); + + /* + for input_meta in &mut input_metas.into_iter() { + input_meta.set_block_offset(*cursor); + } + for input_meta in &mut input_metas.into_iter() { + assert_eq!(input_meta.block_offset(), *cursor); + } + */ + + footer.account_entry_count += input_metas.len() as u32; + account_metas.append(input_metas); + account_pubkeys.append(input_pubkeys); + + *cursor += encoded_data.len() as u64; + assert_eq!(input_metas.len(), 0); + assert_eq!(input_pubkeys.len(), 0); + + Ok(()) + } + + fn write_single_account<'a, T: TieredAccountMeta>( + &self, + lamports: u64, + rent_epoch: Epoch, + account_data: &[u8], + owner: &Pubkey, + executable: bool, + address: &'a Pubkey, + hash: &Hash, + write_version: StoredMetaWriteVersion, + cursor: &mut u64, + footer: &mut TieredStorageFooter, + account_metas: &mut Vec, + account_pubkeys: &mut Vec<&'a Pubkey>, + owners_table: &mut AccountOwnerTable, + mut data_block: ByteBlockWriter, + buffered_account_metas: &mut Vec, + buffered_account_pubkeys: &mut Vec<&'a Pubkey>, + _hash: &mut Hash, + account_index_entries: &mut Vec>, + ) -> TieredStorageResult { + let optional_fields = AccountMetaOptionalFields { + rent_epoch: (rent_epoch != u64::MAX).then(|| rent_epoch), + account_hash: (*hash != Hash::default()).then(|| *hash), + // TODO(yhchiang): free to kill the write_version + write_version: (write_version != u64::MAX).then(|| write_version), + }; + + let account_raw_size = + std::mem::size_of::() + account_data.len() + optional_fields.size(); + + if !T::supports_shared_account_block() || account_raw_size > ACCOUNT_DATA_BLOCK_SIZE { + account_index_entries.push( + self.write_blob_account_block( + cursor, + footer, + account_metas, + account_pubkeys, + owners_table, + lamports, + rent_epoch, + account_data, + owner, + executable, + address, + hash, + write_version, + ) + .unwrap(), + ); + return Ok(data_block); + } + + // If the current data cannot fit in the current block, then + // persist the current block. + + if data_block.raw_len() + account_raw_size > ACCOUNT_DATA_BLOCK_SIZE { + self.flush_account_block( + cursor, + footer, + account_metas, + account_pubkeys, + buffered_account_metas, + buffered_account_pubkeys, + data_block, + ) + .unwrap(); + data_block = self.new_data_block_writer(footer); + } + + let owner_index = owners_table.check_and_add(owner); + let local_offset = data_block.raw_len(); + + account_index_entries.push(AccountIndexWriterEntry { + address, + block_offset: *cursor, + intra_block_offset: local_offset as u64, + }); + + let mut flags = AccountMetaFlags::new_from(&optional_fields); + flags.set_executable(executable); + let meta = T::new() + .with_lamports(lamports) + .with_owner_index(owner_index) + .with_account_data_size(account_data.len() as u64) + .with_account_data_padding(((8 - (account_data.len() % 8)) % 8).try_into().unwrap()) + .with_flags(&flags); + + // account meta first, then data, then optional fields. + data_block.write_type(&meta)?; + data_block.write(account_data)?; + data_block.write_optional_fields(&optional_fields)?; + + buffered_account_metas.push(meta); + buffered_account_pubkeys.push(address); + + Ok(data_block) + } + + fn write_blob_account_block<'a, T: TieredAccountMeta>( + &self, + cursor: &mut u64, + footer: &mut TieredStorageFooter, + account_metas: &mut Vec, + account_pubkeys: &mut Vec<&'a Pubkey>, + owners_table: &mut AccountOwnerTable, + lamports: u64, + rent_epoch: Epoch, + account_data: &[u8], + owner: &Pubkey, + executable: bool, + address: &'a Pubkey, + hash: &Hash, + write_version: StoredMetaWriteVersion, + ) -> TieredStorageResult> { + let owner_index = owners_table.check_and_add(owner); + let optional_fields = AccountMetaOptionalFields { + rent_epoch: (rent_epoch != DEFAULT_RENT_EPOCH).then(|| rent_epoch), + account_hash: (*hash != *DEFAULT_ACCOUNT_HASH).then(|| *hash), + write_version: (write_version != DEFAULT_WRITE_VERSION).then(|| write_version), + }; + + let index_entry = AccountIndexWriterEntry { + address, + block_offset: *cursor, + intra_block_offset: 0, + }; + + let mut flags = AccountMetaFlags::new_from(&optional_fields); + flags.set_executable(executable); + let meta = T::new() + .with_lamports(lamports) + .with_owner_index(owner_index) + .with_account_data_size(account_data.len() as u64) + .with_account_data_padding(((8 - (account_data.len() % 8)) % 8).try_into().unwrap()) + .with_flags(&flags); + + let mut writer = ByteBlockWriter::new(footer.account_block_format); + writer.write_type(&meta)?; + writer.write(account_data)?; + if meta.account_data_padding() > 0 { + let padding = [0u8; 8]; + writer.write(&padding[0..meta.account_data_padding() as usize])?; + } + writer.write_optional_fields(&optional_fields)?; + + let data = writer.finish().unwrap(); + let compressed_length = data.len(); + self.storage.write_bytes(&data)?; + + account_metas.push(meta); + account_pubkeys.push(address); + + *cursor += compressed_length as u64; + footer.account_entry_count += 1; + + Ok(index_entry) + } + /* + + #[allow(dead_code)] + pub fn write_from_append_vec(&self, append_vec: &AppendVec) -> TieredStorageResult<()> { + let mut footer = TieredStorageFooter::default(); + // TODO(yhchiang): make it configerable + footer.account_meta_format = self.format.account_meta_format.clone(); + footer.account_block_format = self.format.account_block_format.clone(); + footer.format_version = ACCOUNTS_DATA_STORAGE_FORMAT_VERSION; + let mut cursor = 0; + let mut account_pubkeys: Vec = vec![]; + let mut owners_table = AccountOwnerTable::new(); + let mut hash: Hash = Hash::new_unique(); + let mut account_index_entries = Vec::::new(); + + match footer.account_meta_format { + AccountMetaFormat::Hot => { + let mut account_metas = Vec::::new(); + self.write_account_blocks( + &mut cursor, + &mut footer, + &mut account_metas, + &mut account_pubkeys, + &mut owners_table, + &mut hash, + &mut account_index_entries, + &append_vec, + )?; + footer.account_meta_entry_size = std::mem::size_of::() as u32; + } /* + AccountMetaFormat::Cold => { + let mut account_metas = Vec::::new(); + self.write_account_blocks( + &mut cursor, + &mut footer, + &mut account_metas, + &mut account_pubkeys, + &mut owners_table, + &mut hash, + &mut account_index_entries, + &append_vec, + )?; + footer.account_meta_entry_size = std::mem::size_of::() as u32; + }*/ + } + self.write_account_pubkeys_block(&mut cursor, &mut footer, &account_index_entries)?; + self.write_owners_block(&mut cursor, &mut footer, &owners_table.owners_vec)?; + + assert_eq!( + self.format.meta_entry_size as u32, + footer.account_meta_entry_size + ); + footer.write_footer_block(&self.storage)?; + + Ok(()) + } + + fn write_account_blocks( + &self, + cursor: &mut u64, + footer: &mut TieredStorageFooter, + account_metas: &mut Vec, + account_pubkeys: &mut Vec, + owners_table: &mut AccountOwnerTable, + // TODO(yhchiang): update hash + _hash: &mut Hash, + account_index_entries: &mut Vec, + append_vec: &AppendVec, + ) -> TieredStorageResult<()> { + let mut offset = 0; + footer.account_block_size = ACCOUNT_DATA_BLOCK_SIZE as u64; + + let mut buffered_account_metas = Vec::::new(); + let mut buffered_account_pubkeys: Vec = vec![]; + let mut data_block_writer = self.new_data_block_writer(footer); + + while let Some((account, next_offset)) = append_vec.get_account(offset) { + offset = next_offset; + data_block_writer = self.write_stored_account_meta( + &account, + cursor, + footer, + account_metas, + account_pubkeys, + owners_table, + data_block_writer, + &mut buffered_account_metas, + &mut buffered_account_pubkeys, + _hash, + account_index_entries, + )?; + } + + // Persist the last block if any + if buffered_account_metas.len() > 0 { + self.flush_account_block( + cursor, + footer, + account_metas, + account_pubkeys, + &mut buffered_account_metas, + &mut buffered_account_pubkeys, + data_block_writer, + )?; + } + + assert_eq!(buffered_account_metas.len(), 0); + assert_eq!(buffered_account_pubkeys.len(), 0); + assert_eq!(footer.account_entry_count, account_metas.len() as u32); + + Ok(()) + } + */ +} diff --git a/sdk/src/account.rs b/sdk/src/account.rs index 4d1bbf3e755b26..6a5e0b841083e5 100644 --- a/sdk/src/account.rs +++ b/sdk/src/account.rs @@ -105,15 +105,15 @@ impl Serialize for AccountSharedData { #[serde(from = "Account")] pub struct AccountSharedData { /// lamports in the account - lamports: u64, + pub lamports: u64, /// data held in this account - data: Arc>, + pub data: Arc>, /// the program that owns this account. If executable, the program that loads this account. - owner: Pubkey, + pub owner: Pubkey, /// this account's data contains a loaded program (and is now read-only) - executable: bool, + pub executable: bool, /// the epoch at which this account will next owe rent - rent_epoch: Epoch, + pub rent_epoch: Epoch, } /// Compares two ReadableAccounts