Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

7. feat(db): Add a transparent address transaction index #4038

Merged
merged 3 commits into from
Apr 13, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion zebra-state/src/constants.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ pub use zebra_chain::transparent::MIN_TRANSPARENT_COINBASE_MATURITY;
pub const MAX_BLOCK_REORG_HEIGHT: u32 = MIN_TRANSPARENT_COINBASE_MATURITY - 1;

/// The database format version, incremented each time the database format changes.
pub const DATABASE_FORMAT_VERSION: u32 = 21;
pub const DATABASE_FORMAT_VERSION: u32 = 22;

/// The maximum number of blocks to check for NU5 transactions,
/// before we assume we are on a pre-NU5 legacy chain.
Expand Down
4 changes: 4 additions & 0 deletions zebra-state/src/service/finalized_state/disk_db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,10 @@ impl DiskDb {
"utxo_loc_by_transparent_addr_loc",
db_options.clone(),
),
rocksdb::ColumnFamilyDescriptor::new(
"tx_loc_by_transparent_addr_loc",
db_options.clone(),
),
// Sprout
rocksdb::ColumnFamilyDescriptor::new("sprout_nullifiers", db_options.clone()),
rocksdb::ColumnFamilyDescriptor::new("sprout_anchors", db_options.clone()),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ pub const TRANSACTION_LOCATION_DISK_BYTES: usize = HEIGHT_DISK_BYTES + TX_INDEX_
any(test, feature = "proptest-impl"),
derive(Arbitrary, Serialize, Deserialize)
)]
pub struct TransactionIndex(u16);
pub struct TransactionIndex(pub(super) u16);

impl TransactionIndex {
/// Creates a transaction index from the inner type.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ use crate::service::finalized_state::{
disk_format::{
block::MAX_ON_DISK_HEIGHT,
transparent::{
AddressBalanceLocation, AddressLocation, AddressUnspentOutput, OutputLocation,
AddressBalanceLocation, AddressLocation, AddressTransaction, AddressUnspentOutput,
OutputLocation,
},
IntoDisk, TransactionLocation,
},
Expand Down Expand Up @@ -191,6 +192,20 @@ fn roundtrip_address_unspent_output() {
);
}

#[test]
fn roundtrip_address_transaction() {
zebra_test::init();

proptest!(
|(mut val in any::<AddressTransaction>())| {
*val.address_location_mut().height_mut() = val.address_location().height().clamp(Height(0), MAX_ON_DISK_HEIGHT);
val.transaction_location_mut().height = val.transaction_location().height.clamp(Height(0), MAX_ON_DISK_HEIGHT);

assert_value_properties(val)
}
);
}

#[test]
fn roundtrip_amount() {
zebra_test::init();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ expression: cf_names
"tip_chain_value_pool",
"tx_by_hash",
"tx_by_loc",
"tx_loc_by_transparent_addr_loc",
"utxo_by_outpoint",
"utxo_loc_by_transparent_addr_loc",
]
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ expression: empty_column_families
"sprout_anchors: no entries",
"sprout_nullifiers: no entries",
"tip_chain_value_pool: no entries",
"tx_loc_by_transparent_addr_loc: no entries",
"utxo_by_outpoint: no entries",
"utxo_loc_by_transparent_addr_loc: no entries",
]
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ expression: empty_column_families
"tip_chain_value_pool: no entries",
"tx_by_hash: no entries",
"tx_by_loc: no entries",
"tx_loc_by_transparent_addr_loc: no entries",
"utxo_by_outpoint: no entries",
"utxo_loc_by_transparent_addr_loc: no entries",
]
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ expression: empty_column_families
"sprout_anchors: no entries",
"sprout_nullifiers: no entries",
"tip_chain_value_pool: no entries",
"tx_loc_by_transparent_addr_loc: no entries",
"utxo_by_outpoint: no entries",
"utxo_loc_by_transparent_addr_loc: no entries",
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
---
source: zebra-state/src/service/finalized_state/disk_format/tests/snapshot.rs
expression: cf_data
---
[
KV(
k: "00000100000000010000010000",
v: "",
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
---
source: zebra-state/src/service/finalized_state/disk_format/tests/snapshot.rs
expression: cf_data
---
[
KV(
k: "00000100000000010000010000",
v: "",
),
KV(
k: "00000100000000010000020000",
v: "",
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
---
source: zebra-state/src/service/finalized_state/disk_format/tests/snapshot.rs
expression: cf_data
---
[
KV(
k: "00000100000000010000010000",
v: "",
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
---
source: zebra-state/src/service/finalized_state/disk_format/tests/snapshot.rs
expression: cf_data
---
[
KV(
k: "00000100000000010000010000",
v: "",
),
KV(
k: "00000100000000010000020000",
v: "",
),
]
121 changes: 121 additions & 0 deletions zebra-state/src/service/finalized_state/disk_format/transparent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,96 @@ impl AddressUnspentOutput {
}
}

/// A single transaction sent to a [`transparent::Address`].
///
/// We store both the address location key and transaction location value
/// in the RocksDB column family key. This improves insert and delete performance.
///
/// This requires 8 extra bytes for each transaction location,
/// because we repeat the key for each value.
/// But RocksDB compression reduces the duplicate data size on disk.
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
#[cfg_attr(
any(test, feature = "proptest-impl"),
derive(Arbitrary, Serialize, Deserialize)
)]
pub struct AddressTransaction {
/// The location of the first [`transparent::Output`] sent to the address in `output`.
address_location: AddressLocation,

/// The location of the transaction sent to the address.
transaction_location: TransactionLocation,
}

impl AddressTransaction {
/// Create a new [`AddressTransaction`] from an address location,
/// and a transaction location.
pub fn new(
address_location: AddressLocation,
transaction_location: TransactionLocation,
) -> AddressTransaction {
AddressTransaction {
address_location,
transaction_location,
}
}

/// Create an [`AddressTransaction`] which starts iteration for the supplied address.
/// Used to look up the first transaction with [`ReadDisk::zs_next_key_value_from`].
///
/// The transaction location is before all unspent output locations in the index.
/// It is always invalid, due to the genesis consensus rules. But this is not an issue
/// since [`ReadDisk::zs_next_key_value_from`] will fetch the next existing (valid) value.
pub fn address_iterator_start(address_location: AddressLocation) -> AddressTransaction {
// Iterating from the lowest possible transaction location gets us the first transaction.
let zero_transaction_location = TransactionLocation::from_usize(Height(0), 0);

AddressTransaction {
address_location,
transaction_location: zero_transaction_location,
}
}

/// Update the transaction location to the next possible transaction for the supplied address.
/// Used to look up the next output with [`ReadDisk::zs_next_key_value_from`].
///
/// The updated transaction location may be invalid, which is not an issue
/// since [`ReadDisk::zs_next_key_value_from`] will fetch the next existing (valid) value.
pub fn address_iterator_next(&mut self) {
// Iterating from the next possible output location gets us the next output,
// even if it is in a later block or transaction.
//
// Consensus: the block size limit is 2MB, which is much lower than the index range.
self.transaction_location.index.0 += 1;
}

/// The location of the first [`transparent::Output`] sent to the address of this output.
///
/// This can be used to look up the address.
pub fn address_location(&self) -> AddressLocation {
self.address_location
}

/// The location of this transaction.
pub fn transaction_location(&self) -> TransactionLocation {
self.transaction_location
}

/// Allows tests to modify the address location.
#[cfg(any(test, feature = "proptest-impl"))]
#[allow(dead_code)]
pub fn address_location_mut(&mut self) -> &mut AddressLocation {
&mut self.address_location
}

/// Allows tests to modify the unspent output location.
#[cfg(any(test, feature = "proptest-impl"))]
#[allow(dead_code)]
pub fn transaction_location_mut(&mut self) -> &mut TransactionLocation {
&mut self.transaction_location
}
}

// Transparent trait impls

/// Returns a byte representing the [`transparent::Address`] variant.
Expand Down Expand Up @@ -547,3 +637,34 @@ impl FromDisk for AddressUnspentOutput {
AddressUnspentOutput::new(address_location, unspent_output_location)
}
}

impl IntoDisk for AddressTransaction {
type Bytes = [u8; OUTPUT_LOCATION_DISK_BYTES + TRANSACTION_LOCATION_DISK_BYTES];

fn as_bytes(&self) -> Self::Bytes {
let address_location_bytes: [u8; OUTPUT_LOCATION_DISK_BYTES] =
self.address_location().as_bytes();
let transaction_location_bytes: [u8; TRANSACTION_LOCATION_DISK_BYTES] =
self.transaction_location().as_bytes();

address_location_bytes
.iter()
.copied()
.chain(transaction_location_bytes.iter().copied())
.collect::<Vec<u8>>()
.try_into()
.expect("concatenation of fixed-sized arrays should have the correct size")
}
}

impl FromDisk for AddressTransaction {
fn from_bytes(disk_bytes: impl AsRef<[u8]>) -> Self {
let (address_location_bytes, transaction_location_bytes) =
disk_bytes.as_ref().split_at(OUTPUT_LOCATION_DISK_BYTES);

let address_location = AddressLocation::from_bytes(address_location_bytes);
let transaction_location = TransactionLocation::from_bytes(transaction_location_bytes);

AddressTransaction::new(address_location, transaction_location)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -432,15 +432,15 @@ fn snapshot_block_and_transaction_data(state: &FinalizedState) {

/// Snapshot transparent address data, using `cargo insta` and RON serialization.
fn snapshot_transparent_address_data(state: &FinalizedState, height: u32) {
// TODO: transactions for each address (#3951)

let balance_by_transparent_addr = state.cf_handle("balance_by_transparent_addr").unwrap();
let utxo_loc_by_transparent_addr_loc =
state.cf_handle("utxo_loc_by_transparent_addr_loc").unwrap();
let tx_loc_by_transparent_addr_loc = state.cf_handle("tx_loc_by_transparent_addr_loc").unwrap();

let mut stored_address_balances = Vec::new();
let mut stored_address_utxo_locations = Vec::new();
let mut stored_address_utxos = Vec::new();
let mut stored_address_transaction_locations = Vec::new();

// Correctness: Multi-key iteration causes hangs in concurrent code, but seems ok in tests.
let addresses =
Expand All @@ -451,6 +451,12 @@ fn snapshot_transparent_address_data(state: &FinalizedState, height: u32) {
rocksdb::IteratorMode::Start,
)
.count();
let transaction_address_location_count = state
.full_iterator_cf(
&tx_loc_by_transparent_addr_loc,
rocksdb::IteratorMode::Start,
)
.count();

let addresses: Vec<transparent::Address> = addresses
.map(|(key, _value)| transparent::Address::from_bytes(key))
Expand All @@ -463,6 +469,7 @@ fn snapshot_transparent_address_data(state: &FinalizedState, height: u32) {
if height == 0 {
assert_eq!(addresses.len(), 0);
assert_eq!(utxo_address_location_count, 0);
assert_eq!(transaction_address_location_count, 0);
return;
}

Expand All @@ -487,21 +494,37 @@ fn snapshot_transparent_address_data(state: &FinalizedState, height: u32) {
stored_utxos.push(utxo);
}

let mut stored_transaction_locations = Vec::new();
for transaction_location in state.address_transaction_locations(stored_address_location) {
assert_eq!(
transaction_location.address_location(),
stored_address_location
);

stored_transaction_locations.push(transaction_location.transaction_location());
}

// Check that the lists are in chain order
//
// TODO: check that the transaction list is in chain order (#3951)
assert!(
is_sorted(&stored_utxo_locations),
"unsorted: {:?}\n\
for address: {:?}",
stored_utxo_locations,
address,
);
assert!(
is_sorted(&stored_transaction_locations),
"unsorted: {:?}\n\
for address: {:?}",
stored_transaction_locations,
address,
);

// The default raw data serialization is very verbose, so we hex-encode the bytes.
stored_address_balances.push((address.to_string(), stored_address_balance_location));
stored_address_utxo_locations.push((stored_address_location, stored_utxo_locations));
stored_address_utxos.push((address, stored_utxos));
stored_address_transaction_locations.push((address, stored_transaction_locations));
}

// We want to snapshot the order in the database,
Expand All @@ -511,6 +534,10 @@ fn snapshot_transparent_address_data(state: &FinalizedState, height: u32) {
// TODO: change these names to address_utxo_locations and address_utxos
insta::assert_ron_snapshot!("address_utxos", stored_address_utxo_locations);
insta::assert_ron_snapshot!("address_utxo_data", stored_address_utxos);
insta::assert_ron_snapshot!(
"address_transaction_locations",
stored_address_transaction_locations
);
}

/// Return true if `list` is sorted in ascending order.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
---
source: zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshot.rs
expression: stored_address_transaction_locations
---
[
("t3Vz22vK5z2LcKEdg16Yv4FFneEL1zg9ojd", [
TransactionLocation(
height: Height(1),
index: TransactionIndex(0),
),
]),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
---
source: zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshot.rs
expression: stored_address_transaction_locations
---
[
("t3Vz22vK5z2LcKEdg16Yv4FFneEL1zg9ojd", [
TransactionLocation(
height: Height(1),
index: TransactionIndex(0),
),
TransactionLocation(
height: Height(2),
index: TransactionIndex(0),
),
]),
]
Loading