-
Notifications
You must be signed in to change notification settings - Fork 5
proper synchronized mining #143
base: master
Are you sure you want to change the base?
Changes from 2 commits
8695873
193d78f
4b0929d
d3a593f
072e022
1f2903e
486f936
89c07dc
0dc62c4
c467b32
2e3de49
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
[metadata] | ||
name = "lotus-soup" | ||
author = "raulk" | ||
|
||
[global] | ||
plan = "lotus-soup" | ||
case = "sync-mining" | ||
total_instances = 4 | ||
builder = "exec:go" | ||
runner = "local:exec" | ||
|
||
build = { selectors = ["testground"] } | ||
build_config = { enable_go_build_cache = true } | ||
run_config = { exposed_ports = ["6060", "1234", "2345"] } | ||
|
||
[global.run.test_params] | ||
clients = "1" | ||
miners = "2" | ||
genesis_timestamp_offset = "0" | ||
balance = "2000000000" | ||
sectors = "10" | ||
random_beacon_type = "mock" | ||
mining_mode = "synchronized" | ||
|
||
[[groups]] | ||
id = "bootstrapper" | ||
instances = { count = 1 } | ||
[groups.run.test_params] | ||
role = "bootstrapper" | ||
|
||
[[groups]] | ||
id = "miners" | ||
instances = { count = 2 } | ||
[groups.run.test_params] | ||
role = "miner" | ||
|
||
[[groups]] | ||
id = "clients" | ||
instances = { count = 1 } | ||
[groups.run.test_params] | ||
role = "client" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
package main | ||
|
||
import ( | ||
"github.com/filecoin-project/oni/lotus-soup/testkit" | ||
) | ||
|
||
func syncMining(t *testkit.TestEnvironment) error { | ||
return testkit.HandleDefaultRole(t) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
package testkit | ||
|
||
import ( | ||
"context" | ||
"fmt" | ||
"time" | ||
|
||
"github.com/filecoin-project/lotus/build" | ||
"github.com/filecoin-project/specs-actors/actors/abi" | ||
"github.com/raulk/clock" | ||
"github.com/testground/sdk-go/sync" | ||
) | ||
|
||
// ClockSyncTopic is the topic where clock synchronization events are exchanged. | ||
var ClockSyncTopic = sync.NewTopic("clock_sync", &ClockSyncMsg{}) | ||
|
||
// ClockSyncMsg is a clock synchronization message. | ||
type ClockSyncMsg struct { | ||
ID string // ${groupname}_${groupseq} | ||
Epoch abi.ChainEpoch | ||
} | ||
|
||
// SynchronizeClock synchronizes this instance's clock with the clocks of all | ||
// miner and client nodes participating in this test scenario. | ||
// | ||
// This function takes a channel (localEpochStartCh) where we expect to receive | ||
// local signals to advance the clock to the next epoch. Such signals should | ||
// correspond to some local event that implies readiness to advance, such as | ||
// "mining round completed" events from synchronized mining. | ||
// | ||
// Upon receiving a local advance signal, it forwards the clock by build.BlockDelaySecs, | ||
// thus progressing one chain epoch. It then signals this fact to all other | ||
// nodes participating in the test via the synchronization service. | ||
// | ||
// Once all miners and clients have advanced, this function releases the new | ||
// abi.ChainEpoch on the `globalEpochStartCh`, thus signalling that all nodes | ||
// are ready to commence that chain epoch. | ||
// | ||
// Essentially this behaviour can be assimilated to a distributed sempaphore, | ||
// where all miners and clients wait until they have locally advanced their | ||
// clocks. Once they all signal that fact via the relevant sync topic, the | ||
// semaphore fires and allows them all to proceed at once. | ||
// | ||
// All of this happens in a background goroutine. | ||
func (n *LotusNode) SynchronizeClock(ctx context.Context, genesisTime time.Time, localEpochStartCh <-chan abi.ChainEpoch, globalEpochStartCh chan<- abi.ChainEpoch) { | ||
// replace the clock, setting it to the genesis timestamp + 1 epoch | ||
raulk marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// we are now ready to mine (and receive) the first block. | ||
n.MockClock = clock.NewMock() | ||
build.Clock = n.MockClock | ||
|
||
// start at genesis + 100ms. | ||
n.MockClock.Set(genesisTime.Add(100 * time.Millisecond)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This still triggers (after last set of patches) the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the simplest solution here would be to advance the clock after the mining round and not before as it seems to be done now. (I think the change would translate to something like moving the clock advance logic from the local to the global case in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That would make sense. I did try different variants and in some cases I ended up hitting the "got a block from the future" error. I think we do need to get this absolutely right, so I'm willing to invest time in nailing the state machine right now so that we're all happy and we can build on a solid base. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think advancing the clock just before the mining round won't work because of races between miners. Miners who advance their clock sooner will end up propagating what appear to be "blocks from the future" to miners who still haven't advanced their clocks. That's why I opted for this approach, as we have the assurance that all miners are in the right epoch when mining commences in concert. After removing the drift, and testing atop this patch, we no longer have the spurious logs. The only one we're left with is the occasional:
Which I think has to do with gossipsub propagation, and not worth investigating right now IMO. |
||
|
||
var ( | ||
id = fmt.Sprintf("%s_%d", n.t.TestGroupID, n.t.GroupSeq) | ||
|
||
minerCnt = n.t.IntParam("miners") | ||
clientCnt = n.t.IntParam("clients") | ||
totalCnt = minerCnt + clientCnt // miner is running a worker and a mining node. | ||
|
||
epoch = abi.ChainEpoch(0) // current epoch | ||
raulk marked this conversation as resolved.
Show resolved
Hide resolved
|
||
epochInterval = time.Duration(build.BlockDelaySecs) * time.Second | ||
|
||
ready []string | ||
raulk marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
signalCh = make(chan *ClockSyncMsg, 128) | ||
sub = n.t.SyncClient.MustSubscribe(ctx, ClockSyncTopic, signalCh) | ||
) | ||
|
||
go func() { | ||
defer close(globalEpochStartCh) | ||
|
||
for { | ||
select { | ||
case <-localEpochStartCh: | ||
// move the local clock forward, and announce that fact to all other instances. | ||
n.MockClock.Add(epochInterval) | ||
schomatis marked this conversation as resolved.
Show resolved
Hide resolved
|
||
epoch++ | ||
n.t.RecordMessage("advanced local clock: %v [epoch=%d]", n.MockClock.Now(), epoch) | ||
n.t.SyncClient.MustPublish(ctx, ClockSyncTopic, &ClockSyncMsg{ | ||
ID: id, | ||
Epoch: epoch, | ||
}) | ||
|
||
case s := <-signalCh: | ||
// this is a clock event from another instance (or ourselves). | ||
ready = append(ready, s.ID) | ||
if len(ready) != totalCnt { | ||
continue | ||
} | ||
// release the new epoch and reset the state. | ||
globalEpochStartCh <- epoch | ||
ready = ready[0:0] | ||
|
||
case <-ctx.Done(): | ||
return | ||
|
||
case <-sub.Done(): | ||
panic("global clock synchronization subscription died") | ||
} | ||
} | ||
}() | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Need to sort this out. Synchronised mining doesn't need to reduce the block delay and propagation delay.