Skip to content

Commit

Permalink
Conserve disk space by raising default SPRP (#3137)
Browse files Browse the repository at this point in the history
## Proposed Changes

Increase the default `--slots-per-restore-point` to 8192 for a 4x reduction in freezer DB disk usage.

Existing nodes that use the previous default of 2048 will be left unchanged. Newly synced nodes (with or without checkpoint sync) will use the new 8192 default. 

Long-term we could do away with the freezer DB entirely for validator-only nodes, but this change is much simpler and grants us some extra space in the short term. We can also roll it out gradually across our nodes by purging databases one by one, while keeping the Ansible config the same.

## Additional Info

We ignore a change from 2048 to 8192 if the user hasn't set the 8192 explicitly. We fire a debug log in the case where we do ignore:

```
DEBG Ignoring slots-per-restore-point config in favour of on-disk value, on_disk: 2048, config: 8192
```
  • Loading branch information
michaelsproul committed Apr 1, 2022
1 parent 414197b commit 375e2b4
Show file tree
Hide file tree
Showing 9 changed files with 114 additions and 17 deletions.
2 changes: 2 additions & 0 deletions beacon_node/http_api/src/database.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@ pub fn info<T: BeaconChainTypes>(
) -> Result<DatabaseInfo, warp::Rejection> {
let store = &chain.store;
let split = store.get_split_info();
let config = store.get_config().clone();
let anchor = store.get_anchor_info();

Ok(DatabaseInfo {
schema_version: CURRENT_SCHEMA_VERSION.as_u64(),
config,
split,
anchor,
})
Expand Down
17 changes: 12 additions & 5 deletions beacon_node/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,9 @@ pub fn get_config<E: EthSpec>(
client_config.freezer_db_path = Some(PathBuf::from(freezer_dir));
}

client_config.store.slots_per_restore_point = get_slots_per_restore_point::<E>(cli_args)?;
let (sprp, sprp_explicit) = get_slots_per_restore_point::<E>(cli_args)?;
client_config.store.slots_per_restore_point = sprp;
client_config.store.slots_per_restore_point_set_explicitly = sprp_explicit;

if let Some(block_cache_size) = cli_args.value_of("block-cache-size") {
client_config.store.block_cache_size = block_cache_size
Expand Down Expand Up @@ -813,15 +815,20 @@ pub fn get_data_dir(cli_args: &ArgMatches) -> PathBuf {
}

/// Get the `slots_per_restore_point` value to use for the database.
pub fn get_slots_per_restore_point<E: EthSpec>(cli_args: &ArgMatches) -> Result<u64, String> {
///
/// Return `(sprp, set_explicitly)` where `set_explicitly` is `true` if the user provided the value.
pub fn get_slots_per_restore_point<E: EthSpec>(
cli_args: &ArgMatches,
) -> Result<(u64, bool), String> {
if let Some(slots_per_restore_point) =
clap_utils::parse_optional(cli_args, "slots-per-restore-point")?
{
Ok(slots_per_restore_point)
Ok((slots_per_restore_point, true))
} else {
Ok(std::cmp::min(
let default = std::cmp::min(
E::slots_per_historical_root() as u64,
store::config::DEFAULT_SLOTS_PER_RESTORE_POINT,
))
);
Ok((default, false))
}
}
6 changes: 5 additions & 1 deletion beacon_node/store/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,17 @@ use ssz::{Decode, Encode};
use ssz_derive::{Decode, Encode};
use types::{EthSpec, MinimalEthSpec};

pub const DEFAULT_SLOTS_PER_RESTORE_POINT: u64 = 2048;
pub const PREV_DEFAULT_SLOTS_PER_RESTORE_POINT: u64 = 2048;
pub const DEFAULT_SLOTS_PER_RESTORE_POINT: u64 = 8192;
pub const DEFAULT_BLOCK_CACHE_SIZE: usize = 5;

/// Database configuration parameters.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct StoreConfig {
/// Number of slots to wait between storing restore points in the freezer database.
pub slots_per_restore_point: u64,
/// Flag indicating whether the `slots_per_restore_point` was set explicitly by the user.
pub slots_per_restore_point_set_explicitly: bool,
/// Maximum number of blocks to store in the in-memory block cache.
pub block_cache_size: usize,
/// Whether to compact the database on initialization.
Expand All @@ -36,6 +39,7 @@ impl Default for StoreConfig {
Self {
// Safe default for tests, shouldn't ever be read by a CLI node.
slots_per_restore_point: MinimalEthSpec::slots_per_historical_root() as u64,
slots_per_restore_point_set_explicitly: false,
block_cache_size: DEFAULT_BLOCK_CACHE_SIZE,
compact_on_init: false,
compact_on_prune: true,
Expand Down
35 changes: 32 additions & 3 deletions beacon_node/store/src/hot_cold_store.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
use crate::chunked_vector::{
store_updated_vector, BlockRoots, HistoricalRoots, RandaoMixes, StateRoots,
};
use crate::config::{OnDiskStoreConfig, StoreConfig};
use crate::config::{
OnDiskStoreConfig, StoreConfig, DEFAULT_SLOTS_PER_RESTORE_POINT,
PREV_DEFAULT_SLOTS_PER_RESTORE_POINT,
};
use crate::forwards_iter::{HybridForwardsBlockRootsIterator, HybridForwardsStateRootsIterator};
use crate::impls::beacon_state::{get_full_state, store_full_state};
use crate::iter::{ParentRootBlockIterator, StateRootsIterator};
Expand Down Expand Up @@ -150,7 +153,7 @@ impl<E: EthSpec> HotColdDB<E, LevelDB<E>, LevelDB<E>> {
) -> Result<Arc<Self>, Error> {
Self::verify_slots_per_restore_point(config.slots_per_restore_point)?;

let db = Arc::new(HotColdDB {
let mut db = HotColdDB {
split: RwLock::new(Split::default()),
anchor_info: RwLock::new(None),
cold_db: LevelDB::open(cold_path)?,
Expand All @@ -160,10 +163,31 @@ impl<E: EthSpec> HotColdDB<E, LevelDB<E>, LevelDB<E>> {
spec,
log,
_phantom: PhantomData,
});
};

// Allow the slots-per-restore-point value to stay at the previous default if the config
// uses the new default. Don't error on a failed read because the config itself may need
// migrating.
if let Ok(Some(disk_config)) = db.load_config() {
if !db.config.slots_per_restore_point_set_explicitly
&& disk_config.slots_per_restore_point == PREV_DEFAULT_SLOTS_PER_RESTORE_POINT
&& db.config.slots_per_restore_point == DEFAULT_SLOTS_PER_RESTORE_POINT
{
debug!(
db.log,
"Ignoring slots-per-restore-point config in favour of on-disk value";
"config" => db.config.slots_per_restore_point,
"on_disk" => disk_config.slots_per_restore_point,
);

// Mutate the in-memory config so that it's compatible.
db.config.slots_per_restore_point = PREV_DEFAULT_SLOTS_PER_RESTORE_POINT;
}
}

// Ensure that the schema version of the on-disk database matches the software.
// If the version is mismatched, an automatic migration will be attempted.
let db = Arc::new(db);
if let Some(schema_version) = db.load_schema_version()? {
debug!(
db.log,
Expand Down Expand Up @@ -1108,6 +1132,11 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> HotColdDB<E, Hot, Cold>
.map_or(self.spec.genesis_slot, |anchor| anchor.oldest_block_slot)
}

/// Return the in-memory configuration used by the database.
pub fn get_config(&self) -> &StoreConfig {
&self.config
}

/// Load previously-stored config from disk.
fn load_config(&self) -> Result<Option<OnDiskStoreConfig>, Error> {
self.hot_db.get(&CONFIG_KEY)
Expand Down
24 changes: 18 additions & 6 deletions book/src/advanced_database.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,27 +23,39 @@ states to slow down dramatically. A lower _slots per restore point_ value (SPRP)
frequent restore points, while a higher SPRP corresponds to less frequent. The table below shows
some example values.

| Use Case | SPRP | Yearly Disk Usage | Load Historical State |
| ---------------------- | -------------- | ----------------- | --------------------- |
| Block explorer/analysis | 32 | 1.4 TB | 155 ms |
| Default | 2048 | 23.1 GB | 10.2 s |
| Validator only | 8192 | 5.7 GB | 41 s |
| Use Case | SPRP | Yearly Disk Usage | Load Historical State |
| ---------------------- | -------------- | ----------------- | --------------------- |
| Block explorer/analysis | 32 | 1.4 TB | 155 ms |
| Hobbyist (prev. default) | 2048 | 23.1 GB | 10.2 s |
| Validator only (default) | 8192 | 5.7 GB | 41 s |

As you can see, it's a high-stakes trade-off! The relationships to disk usage and historical state
load time are both linear – doubling SPRP halves disk usage and doubles load time. The minimum SPRP
is 32, and the maximum is 8192.

The default value is 8192 for databases synced from scratch using Lighthouse v2.2.0 or later, or
2048 for prior versions. Please see the section on [Defaults](#defaults) below.

The values shown in the table are approximate, calculated using a simple heuristic: each
`BeaconState` consumes around 18MB of disk space, and each block replayed takes around 5ms. The
**Yearly Disk Usage** column shows the approx size of the freezer DB _alone_ (hot DB not included),
and the **Load Historical State** time is the worst-case load time for a state in the last slot
before a restore point.

### Defaults

As of Lighthouse v2.2.0, the default slots-per-restore-point value has been increased from 2048
to 8192 in order to conserve disk space. Existing nodes will continue to use SPRP=2048 unless
re-synced. Note that it is currently not possible to change the SPRP without re-syncing, although
fast re-syncing may be achieved with [Checkpoint Sync](./checkpoint-sync.md).

### CLI Configuration

To configure your Lighthouse node's database with a non-default SPRP, run your Beacon Node with
the `--slots-per-restore-point` flag:

```bash
lighthouse beacon_node --slots-per-restore-point 8192
lighthouse beacon_node --slots-per-restore-point 32
```

## Glossary
Expand Down
6 changes: 6 additions & 0 deletions book/src/api-lighthouse.md
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,12 @@ curl "http://localhost:5052/lighthouse/database/info" | jq
```json
{
"schema_version": 5,
"config": {
"slots_per_restore_point": 2048,
"block_cache_size": 5,
"compact_on_init": false,
"compact_on_prune": true
},
"split": {
"slot": "2034912",
"state_root": "0x11c8516aa7d4d1613e84121e3a557ceca34618b4c1a38f05b66ad045ff82b33b"
Expand Down
3 changes: 2 additions & 1 deletion common/eth2/src/lighthouse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use reqwest::IntoUrl;
use serde::{Deserialize, Serialize};
use ssz::four_byte_option_impl;
use ssz_derive::{Decode, Encode};
use store::{AnchorInfo, Split};
use store::{AnchorInfo, Split, StoreConfig};

pub use attestation_performance::{
AttestationPerformance, AttestationPerformanceQuery, AttestationPerformanceStatistics,
Expand Down Expand Up @@ -334,6 +334,7 @@ impl Eth1Block {
#[derive(Debug, Serialize, Deserialize)]
pub struct DatabaseInfo {
pub schema_version: u64,
pub config: StoreConfig,
pub split: Split,
pub anchor: Option<AnchorInfo>,
}
Expand Down
4 changes: 3 additions & 1 deletion database_manager/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,9 @@ fn parse_client_config<E: EthSpec>(
client_config.freezer_db_path = Some(freezer_dir);
}

client_config.store.slots_per_restore_point = get_slots_per_restore_point::<E>(cli_args)?;
let (sprp, sprp_explicit) = get_slots_per_restore_point::<E>(cli_args)?;
client_config.store.slots_per_restore_point = sprp;
client_config.store.slots_per_restore_point_set_explicitly = sprp_explicit;

Ok(client_config)
}
Expand Down
34 changes: 34 additions & 0 deletions lighthouse/tests/beacon_node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -804,6 +804,40 @@ fn slots_per_restore_point_flag() {
.run_with_zero_port()
.with_config(|config| assert_eq!(config.store.slots_per_restore_point, 64));
}
#[test]
fn slots_per_restore_point_update_prev_default() {
use beacon_node::beacon_chain::store::config::{
DEFAULT_SLOTS_PER_RESTORE_POINT, PREV_DEFAULT_SLOTS_PER_RESTORE_POINT,
};

CommandLineTest::new()
.flag("slots-per-restore-point", Some("2048"))
.run_with_zero_port()
.with_config_and_dir(|config, dir| {
// Check that 2048 is the previous default.
assert_eq!(
config.store.slots_per_restore_point,
PREV_DEFAULT_SLOTS_PER_RESTORE_POINT
);

// Restart the BN with the same datadir and the new default SPRP. It should
// allow this.
CommandLineTest::new()
.flag("datadir", Some(&dir.path().display().to_string()))
.flag("zero-ports", None)
.run_with_no_datadir()
.with_config(|config| {
// The dumped config will have the new default 8192 value, but the fact that
// the BN started and ran (with the same datadir) means that the override
// was successful.
assert_eq!(
config.store.slots_per_restore_point,
DEFAULT_SLOTS_PER_RESTORE_POINT
);
});
})
}

#[test]
fn block_cache_size_flag() {
CommandLineTest::new()
Expand Down

0 comments on commit 375e2b4

Please sign in to comment.