-
Notifications
You must be signed in to change notification settings - Fork 253
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Speed up altair block processing 2x #3115
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -237,7 +237,7 @@ proc initialize_beacon_state_from_eth1*( | |
deposits.len) | ||
state.eth1_deposit_index = deposits.lenu64 | ||
|
||
var pubkeyToIndex = initTable[ValidatorPubKey, int]() | ||
var pubkeyToIndex = initTable[ValidatorPubKey, ValidatorIndex]() | ||
for idx, deposit in deposits: | ||
let | ||
pubkey = deposit.pubkey | ||
|
@@ -249,7 +249,7 @@ proc initialize_beacon_state_from_eth1*( | |
do: | ||
if skipBlsValidation in flags or | ||
verify_deposit_signature(cfg, deposit): | ||
pubkeyToIndex[pubkey] = state.validators.len | ||
pubkeyToIndex[pubkey] = ValidatorIndex(state.validators.len) | ||
if not state.validators.add(get_validator_from_deposit(deposit)): | ||
raiseAssert "too many validators" | ||
if not state.balances.add(amount): | ||
|
@@ -707,6 +707,9 @@ func get_next_sync_committee_keys(state: altair.BeaconState | merge.BeaconState) | |
## Return the sequence of sync committee indices (which may include | ||
## duplicate indices) for the next sync committee, given a ``state`` at a | ||
## sync committee period boundary. | ||
# The sync committe depends on seed and effective balance - it can | ||
# thus only be computed for the current epoch of the state, after balance | ||
# updates have been performed | ||
|
||
let epoch = get_current_epoch(state) + 1 | ||
|
||
|
@@ -744,9 +747,9 @@ proc get_next_sync_committee*(state: altair.BeaconState | merge.BeaconState): | |
# see signatures_batch, TODO shouldn't be here | ||
# Deposit processing ensures all keys are valid | ||
var attestersAgg: AggregatePublicKey | ||
attestersAgg.init(res.pubkeys.data[0].loadWithCache().get) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's wrong with using the cache here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The cache would only be used once per day for the 512 lookups and uses quite a lot of memory (absolute minimum 144 bytes per validator + hashset and threadvar overhead which is significant) - thanks to the index cache (about 4kb fixed size), the cost no longer matches the benefit. There are no other uses of this cache outside of test cases (I'd generally remove it). There's an additional optimization that can be done to pre-seed the statecache with the index cache as long as the states line up, but that's for a separate PR - the main benefit is during replays where many blocks need to be applied - the creation of the index mapping is the elephant in the room. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Previously, the keys would end up in the cache anyway because the cache was also used in gossip validation. I guess it's now possible to introduce a more precise eviction of the cache around the update points of the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we have two caches of the same data, where one would be enough: |
||
attestersAgg.init(res.pubkeys.data[0].load().get) | ||
for i in 1 ..< res.pubkeys.data.len: | ||
attestersAgg.aggregate(res.pubkeys.data[i].loadWithCache().get) | ||
attestersAgg.aggregate(res.pubkeys.data[i].load().get) | ||
|
||
res.aggregate_pubkey = finish(attestersAgg).toPubKey() | ||
res | ||
|
@@ -922,3 +925,36 @@ func latest_block_root*(state: ForkyBeaconState, state_root: Eth2Digest): Eth2Di | |
|
||
func latest_block_root*(state: ForkyHashedBeaconState): Eth2Digest = | ||
latest_block_root(state.data, state.root) | ||
|
||
func get_sync_committee_cache*( | ||
state: altair.BeaconState | merge.BeaconState, cache: var StateCache): | ||
SyncCommitteeCache = | ||
let period = state.slot.sync_committee_period() | ||
|
||
cache.sync_committees.withValue(period, v) do: | ||
return v[] | ||
|
||
var | ||
s = toHashSet(state.current_sync_committee.pubkeys.data) | ||
|
||
for pk in state.next_sync_committee.pubkeys.data: | ||
s.incl(pk) | ||
|
||
var pubkeyIndices: Table[ValidatorPubKey, ValidatorIndex] | ||
for i, v in state.validators: | ||
if v.pubkey in s: | ||
pubkeyIndices[v.pubkey] = i.ValidatorIndex | ||
|
||
var res: SyncCommitteeCache | ||
try: | ||
for i in 0..<res.current_sync_committee.len(): | ||
res.current_sync_committee[i] = | ||
pubkeyIndices[state.current_sync_committee.pubkeys[i]] | ||
res.next_sync_committee[i] = | ||
pubkeyIndices[state.next_sync_committee.pubkeys[i]] | ||
except KeyError: | ||
raiseAssert "table constructed just above" | ||
|
||
cache.sync_committees[period] = res | ||
|
||
res |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -414,4 +414,48 @@ proc collectSignatureSets*( | |
volex.message.epoch, | ||
DOMAIN_VOLUNTARY_EXIT) | ||
|
||
block: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For documentation it would be nice to have a comment section There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 8e445dc - there's not much to write though, like proposer signature, it's just a single field with a signature in it |
||
# 7. SyncAggregate | ||
# ---------------------------------------------------- | ||
withState(state): | ||
when stateFork >= BeaconStateFork.Altair and | ||
(signed_block is altair.SignedBeaconBlock or | ||
signed_block is merge.SignedBeaconBlock): | ||
let | ||
current_sync_committee = | ||
state.data.get_sync_committee_cache(cache).current_sync_committee | ||
|
||
var inited = false | ||
var attestersAgg{.noInit.}: AggregatePublicKey | ||
for i in 0 ..< current_sync_committee.len: | ||
if signed_block.message.body.sync_aggregate.sync_committee_bits[i]: | ||
let key = validatorKeys.load(current_sync_committee[i]) | ||
if not key.isSome(): | ||
return err("Invalid key cache") | ||
|
||
if not inited: # first iteration | ||
attestersAgg.init(key.get()) | ||
inited = true | ||
else: | ||
attestersAgg.aggregate(key.get()) | ||
|
||
if not inited: | ||
if signed_block.message.body.sync_aggregate.sync_committee_signature != | ||
default(CookedSig).toValidatorSig(): | ||
return err("process_sync_aggregate: empty sync aggregates need signature of point at infinity") | ||
else: | ||
let | ||
attesters = finish(attestersAgg) | ||
previous_slot = max(state.data.slot, Slot(1)) - 1 | ||
|
||
sigs.addSignatureSet( | ||
attesters, | ||
get_block_root_at_slot(state.data, previous_slot), | ||
signed_block.message.body.sync_aggregate.sync_committee_signature.loadOrExit( | ||
"process_sync_aggregate: cannot load signature"), | ||
state.data.fork, | ||
state.data.genesis_validators_root, | ||
previous_slot.epoch, | ||
DOMAIN_SYNC_COMMITTEE) | ||
|
||
ok() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The previous use of the cache here was quite important for performance. @mratsim, is
dag.validatorKey(index)
considered fast enough? (it's based internally onloadValid
which in turn is based onfromBytesKnownOnCurve
).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
block_sim
is a good benchmark for this, because it simulates the full network traffic around sync committees and their aggregation. Thencli_db bench
just replays blocks, so it covers only the sync aggregate verification code.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ImmutableValidatorData2
stores uncompressed pubkeys that are quite fast to load - this was introduced a while ago, giving a decent performance boost: d859bc1We're using this same approach for attestations and everything else, pretty much - if we're going to update it, we should update it across the board by changing the backend storage for
validatorKey
- sync message validation was the outlier here, andloadWithCache
is in general a problematic construct.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Posting before/after
block_sim
results will be a convincing evidence that there is no performance regression.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've added a benchmark for pubkeys and signature deserialization.
On a 11th gen Intel it's 14 µs per key.
status-im/nim-blscurve#126
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
that said, it's likely a valid optimization point regardless, to load the data into cooked pubkeys on startup.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure 13440 is relevant here - different data set, multiple flaws in methodology
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, if loading uncompressed keys is so fast, loading them on start-up shouldn't be a problem either.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
separate PR though, since it affects attestations significantly
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
#3122