Skip to content
This repository has been archived by the owner on Nov 26, 2020. It is now read-only.

proper synchronized mining #143

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions lotus-soup/_compositions/sync-mining.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
[metadata]
name = "lotus-soup"
author = "raulk"

[global]
plan = "lotus-soup"
case = "sync-mining"
total_instances = 4
builder = "exec:go"
runner = "local:exec"

build = { selectors = ["testground"] }
build_config = { enable_go_build_cache = true }
run_config = { exposed_ports = ["6060", "1234", "2345"] }

[global.run.test_params]
clients = "1"
miners = "2"
genesis_timestamp_offset = "0"
balance = "2000000000"
sectors = "10"
random_beacon_type = "mock"
mining_mode = "synchronized"

[[groups]]
id = "bootstrapper"
instances = { count = 1 }
[groups.run.test_params]
role = "bootstrapper"

[[groups]]
id = "miners"
instances = { count = 2 }
[groups.run.test_params]
role = "miner"

[[groups]]
id = "clients"
instances = { count = 1 }
[groups.run.test_params]
role = "client"
6 changes: 5 additions & 1 deletion lotus-soup/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@ require (
github.com/filecoin-project/go-fil-markets v0.4.0
github.com/filecoin-project/go-jsonrpc v0.1.1-0.20200602181149-522144ab4e24
github.com/filecoin-project/go-storedcounter v0.0.0-20200421200003-1c99c62e8a5b
github.com/filecoin-project/lotus v0.4.2-0.20200708212122-7d841dbfa81c
github.com/filecoin-project/lotus v0.4.2-0.20200713102953-973dbd6a1371
github.com/filecoin-project/specs-actors v0.7.1
github.com/gorilla/mux v1.7.4
github.com/hashicorp/go-multierror v1.1.0
github.com/influxdata/influxdb v1.8.0 // indirect
github.com/ipfs/go-cid v0.0.6
github.com/ipfs/go-datastore v0.4.4
Expand All @@ -28,8 +29,11 @@ require (
github.com/libp2p/go-libp2p-pubsub-tracer v0.0.0-20200626141350-e730b32bf1e6
github.com/multiformats/go-multiaddr v0.2.2
github.com/multiformats/go-multiaddr-net v0.1.5
github.com/prometheus/common v0.10.0
github.com/raulk/clock v1.1.0
github.com/testground/sdk-go v0.2.3-0.20200706132230-6a65ddac2d8c
go.opencensus.io v0.22.4
go.uber.org/fx v1.9.0
)

// This will work in all build modes: docker:go, exec:go, and local go build.
Expand Down
10 changes: 6 additions & 4 deletions lotus-soup/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -259,12 +259,12 @@ github.com/filecoin-project/go-statestore v0.1.0 h1:t56reH59843TwXHkMcwyuayStBIi
github.com/filecoin-project/go-statestore v0.1.0/go.mod h1:LFc9hD+fRxPqiHiaqUEZOinUJB4WARkRfNl10O7kTnI=
github.com/filecoin-project/go-storedcounter v0.0.0-20200421200003-1c99c62e8a5b h1:fkRZSPrYpk42PV3/lIXiL0LHetxde7vyYYvSsttQtfg=
github.com/filecoin-project/go-storedcounter v0.0.0-20200421200003-1c99c62e8a5b/go.mod h1:Q0GQOBtKf1oE10eSXSlhN45kDBdGvEcVOqMiffqX+N8=
github.com/filecoin-project/lotus v0.4.2-0.20200708212122-7d841dbfa81c h1:eqLJHtS/fTYeYX90iUYU79x5FNlVihvYr2TNGRuDFYk=
github.com/filecoin-project/lotus v0.4.2-0.20200708212122-7d841dbfa81c/go.mod h1:q1K9ZdcmxhZJhzFpFtg/peJVoMMXIUE1rRDurQToI7Q=
github.com/filecoin-project/lotus v0.4.2-0.20200713102953-973dbd6a1371 h1:tRcxTxZ9Vf6/zZ4sH/1f8kC584up8fWhPwqJXS9xuEA=
github.com/filecoin-project/lotus v0.4.2-0.20200713102953-973dbd6a1371/go.mod h1:Q6rc5AyzGOKcHsjNGN5c4v4tsV4UPXB/zu199QYuAZo=
github.com/filecoin-project/sector-storage v0.0.0-20200615154852-728a47ab99d6/go.mod h1:M59QnAeA/oV+Z8oHFLoNpGMv0LZ8Rll+vHVXX7GirPM=
github.com/filecoin-project/sector-storage v0.0.0-20200625154333-98ef8e4ef246/go.mod h1:8f0hWDzzIi1hKs4IVKH9RnDsO4LEHVz8BNat0okDOuY=
github.com/filecoin-project/sector-storage v0.0.0-20200708195134-e3b9ba01c287 h1:lMzTlms23AnWKx7+Bq0nMqSCnuE1tcMkMwtT8zfS7gE=
github.com/filecoin-project/sector-storage v0.0.0-20200708195134-e3b9ba01c287/go.mod h1:salgVdX7qeXFo/xaiEQE29J4pPkjn71T0kt0n+VDBzo=
github.com/filecoin-project/sector-storage v0.0.0-20200709184611-f0dae546b517 h1:bnYiOUDKxJtFk6jtGWSiXSYq3TeSKGx6BOjb2wa1SwQ=
github.com/filecoin-project/sector-storage v0.0.0-20200709184611-f0dae546b517/go.mod h1:salgVdX7qeXFo/xaiEQE29J4pPkjn71T0kt0n+VDBzo=
github.com/filecoin-project/specs-actors v0.0.0-20200210130641-2d1fbd8672cf/go.mod h1:xtDZUB6pe4Pksa/bAJbJ693OilaC5Wbot9jMhLm3cZA=
github.com/filecoin-project/specs-actors v0.3.0/go.mod h1:nQYnFbQ7Y0bHZyq6HDEuVlCPR+U3z5Q3wMOQ+2aiV+Y=
github.com/filecoin-project/specs-actors v0.6.0/go.mod h1:dRdy3cURykh2R8O/DKqy8olScl70rmIS7GrB4hB1IDY=
Expand Down Expand Up @@ -1249,6 +1249,8 @@ github.com/prometheus/procfs v0.0.11/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4
github.com/prometheus/procfs v0.1.0/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU=
github.com/prometheus/procfs v0.1.3 h1:F0+tqvhOksq22sc6iCHF5WGlWjdwj92p0udFh1VFBS8=
github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU=
github.com/raulk/clock v1.1.0 h1:dpb29+UKMbLqiU/jqIJptgLR1nn23HLgMY0sTCDza5Y=
github.com/raulk/clock v1.1.0/go.mod h1:3MpVxdZ/ODBQDxbN+kzshf5OSZwPjtMDx6BBXBmOeY0=
github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4=
github.com/rcrowley/go-metrics v0.0.0-20200313005456-10cdbea86bc0 h1:MkV+77GLUNo5oJ0jf870itWm3D0Sjh7+Za9gazKc5LQ=
github.com/rcrowley/go-metrics v0.0.0-20200313005456-10cdbea86bc0/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4=
Expand Down
4 changes: 2 additions & 2 deletions lotus-soup/init.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ import (
)

func init() {
build.BlockDelaySecs = 2
build.PropagationDelaySecs = 1
// build.BlockDelaySecs = 2
// build.PropagationDelaySecs = 1
Comment on lines +18 to +19
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Need to sort this out. Synchronised mining doesn't need to reduce the block delay and propagation delay.


_ = log.SetLogLevel("*", "WARN")
_ = log.SetLogLevel("dht/RtRefreshManager", "ERROR") // noisy
Expand Down
1 change: 1 addition & 0 deletions lotus-soup/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ var cases = map[string]interface{}{
"deals-stress": testkit.WrapTestEnvironment(dealsStress),
"drand-halting": testkit.WrapTestEnvironment(dealsE2E),
"paych-stress": testkit.WrapTestEnvironment(paych.Stress),
"sync-mining": testkit.WrapTestEnvironment(syncMining),
}

func main() {
Expand Down
29 changes: 29 additions & 0 deletions lotus-soup/manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -151,3 +151,32 @@ instances = { min = 1, max = 100, default = 5 }
# ********** Test-case specific **********
increments = { type = "int", default = "100", desc = "increments in which to send payment vouchers" }
lane_count = { type = "int", default = "256", desc = "lanes to open; vouchers will be distributed across these lanes in round-robin fashion" }


[[testcases]]
name = "sync-mining"
instances = { min = 1, max = 100, default = 5 }

[testcases.params]
clients = { type = "int", default = 1 }
miners = { type = "int", default = 1 }
balance = { type = "float", default = 1 }
sectors = { type = "int", default = 1 }
role = { type = "string" }
genesis_timestamp_offset = { type = "int", default = 0 }

random_beacon_type = { type = "enum", default = "local-drand", options = ["mock", "local-drand", "external-drand"] }

# Params relevant to drand nodes. drand nodes should have role="drand", and must all be
# in the same composition group. There must be at least threshold drand nodes.
# To get lotus nodes to actually use the drand nodes, you must set random_beacon_type="local-drand"
# for the lotus node groups.
drand_period = { type = "duration", default="10s" }
drand_threshold = { type = "int", default = 2 }
drand_gossip_relay = { type = "bool", default = true }
drand_log_level = { type = "string", default="info" }
suspend_events = { type = "string", default="", desc = "a sequence of halt/resume/wait events separated by '->'" }

# Params relevant to pubsub tracing
enable_pubsub_tracer = { type = "bool", default = false } # Mining Mode: synchronized -vs- natural time
mining_mode = { type = "enum", default = "synchronized", options = ["synchronized", "natural"] }
9 changes: 9 additions & 0 deletions lotus-soup/sync_mining.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package main

import (
"github.com/filecoin-project/oni/lotus-soup/testkit"
)

func syncMining(t *testkit.TestEnvironment) error {
return testkit.HandleDefaultRole(t)
}
16 changes: 16 additions & 0 deletions lotus-soup/testkit/lotus_opts.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,16 @@ package testkit
import (
"fmt"

"github.com/filecoin-project/lotus/chain/store"
"github.com/filecoin-project/lotus/node"
"github.com/filecoin-project/lotus/node/config"
"github.com/filecoin-project/lotus/node/modules"
"github.com/filecoin-project/lotus/node/modules/dtypes"
"github.com/filecoin-project/lotus/node/modules/lp2p"
"github.com/filecoin-project/lotus/node/repo"
"github.com/filecoin-project/specs-actors/actors/runtime"
"github.com/prometheus/common/log"
"go.uber.org/fx"

"github.com/libp2p/go-libp2p-core/peer"
ma "github.com/multiformats/go-multiaddr"
Expand Down Expand Up @@ -65,3 +69,15 @@ func withApiEndpoint(addr string) node.Option {
return lr.SetAPIEndpoint(apima)
})
}

// withChainStore takes a double pointer to a ChainStore, creates a new
// ChainStore, and sets it under the pointer.
func withChainStore(cs **store.ChainStore) node.Option {
return node.Override(new(*store.ChainStore), func(_ fx.Lifecycle, bs dtypes.ChainBlockstore, ds dtypes.MetadataDS, syscalls runtime.Syscalls) *store.ChainStore {
*cs = store.NewChainStore(bs, ds, syscalls)
if err := (*cs).Load(); err != nil {
log.Warnf("loading chain state from disk: %s", err)
}
return *cs
})
}
18 changes: 13 additions & 5 deletions lotus-soup/testkit/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,17 @@ import (
"sort"
"time"

"github.com/filecoin-project/go-address"
"github.com/filecoin-project/lotus/api"
"github.com/filecoin-project/lotus/chain/beacon"
"github.com/filecoin-project/lotus/chain/store"
"github.com/filecoin-project/lotus/chain/wallet"
"github.com/filecoin-project/lotus/metrics"
"github.com/filecoin-project/lotus/node"
"github.com/filecoin-project/lotus/node/modules/dtypes"
modtest "github.com/filecoin-project/lotus/node/modules/testing"
tstats "github.com/filecoin-project/lotus/tools/stats"
"github.com/raulk/clock"

"github.com/kpacha/opencensus-influxdb"
ma "github.com/multiformats/go-multiaddr"
Expand All @@ -27,11 +30,16 @@ import (
var PrepareNodeTimeout = time.Minute

type LotusNode struct {
FullApi api.FullNode
MinerApi api.StorageMiner
StopFn node.StopFunc
Wallet *wallet.Key
MineOne func(context.Context, func(bool, error)) error
t *TestEnvironment

FullApi api.FullNode
MinerApi api.StorageMiner
Addresses []address.Address // clients have only one; miners have two addresses (worker, miner)
StopFn node.StopFunc
ChainStore *store.ChainStore
Wallet *wallet.Key
MockClock *clock.Mock
MineOne func(context.Context, func(bool, error)) error
}

func (n *LotusNode) setWallet(ctx context.Context, walletKey *wallet.Key) error {
Expand Down
103 changes: 103 additions & 0 deletions lotus-soup/testkit/node_time.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
package testkit

import (
"context"
"fmt"
"time"

"github.com/filecoin-project/lotus/build"
"github.com/filecoin-project/specs-actors/actors/abi"
"github.com/raulk/clock"
"github.com/testground/sdk-go/sync"
)

// ClockSyncTopic is the topic where clock synchronization events are exchanged.
var ClockSyncTopic = sync.NewTopic("clock_sync", &ClockSyncMsg{})

// ClockSyncMsg is a clock synchronization message.
type ClockSyncMsg struct {
ID string // ${groupname}_${groupseq}
Epoch abi.ChainEpoch
}

// SynchronizeClock synchronizes this instance's clock with the clocks of all
// miner and client nodes participating in this test scenario.
//
// This function takes a channel (localEpochStartCh) where we expect to receive
// local signals to advance the clock to the next epoch. Such signals should
// correspond to some local event that implies readiness to advance, such as
// "mining round completed" events from synchronized mining.
//
// Upon receiving a local advance signal, it forwards the clock by build.BlockDelaySecs,
// thus progressing one chain epoch. It then signals this fact to all other
// nodes participating in the test via the synchronization service.
//
// Once all miners and clients have advanced, this function releases the new
// abi.ChainEpoch on the `globalEpochStartCh`, thus signalling that all nodes
// are ready to commence that chain epoch.
//
// Essentially this behaviour can be assimilated to a distributed sempaphore,
// where all miners and clients wait until they have locally advanced their
// clocks. Once they all signal that fact via the relevant sync topic, the
// semaphore fires and allows them all to proceed at once.
//
// All of this happens in a background goroutine.
func (n *LotusNode) SynchronizeClock(ctx context.Context, genesisTime time.Time, localEpochStartCh <-chan abi.ChainEpoch, globalEpochStartCh chan<- abi.ChainEpoch) {
// replace the clock, setting it to the genesis timestamp + 1 epoch
raulk marked this conversation as resolved.
Show resolved Hide resolved
// we are now ready to mine (and receive) the first block.
n.MockClock = clock.NewMock()
build.Clock = n.MockClock

// start at genesis + 100ms.
n.MockClock.Set(genesisTime.Add(100 * time.Millisecond))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This still triggers (after last set of patches) the mined block in the past and related. In total we are advancing BlockDelaySecs plus this slight drift, putting us outside of the mining epoch window: the miner only advances BlockDelaySecs from its tipset and sees that it lost the mining window by 100ms. Ideally this should be a subtraction but in my local tests this breaks other assumptions (understandably because I'm drifting before genesis time). Maybe we should jump a smaller amount than BlockDelaySecs the first time (to remain inside the mining window when adding the full BlockDelaySecs amount in subsequent rounds).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the simplest solution here would be to advance the clock after the mining round and not before as it seems to be done now. (I think the change would translate to something like moving the clock advance logic from the local to the global case in SynchronizeClock.)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That would make sense. I did try different variants and in some cases I ended up hitting the "got a block from the future" error. I think we do need to get this absolutely right, so I'm willing to invest time in nailing the state machine right now so that we're all happy and we can build on a solid base.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think advancing the clock just before the mining round won't work because of races between miners. Miners who advance their clock sooner will end up propagating what appear to be "blocks from the future" to miners who still haven't advanced their clocks. That's why I opted for this approach, as we have the assurance that all miners are in the right epoch when mining commences in concert.

After removing the drift, and testing atop this patch, we no longer have the spurious logs.

The only one we're left with is the occasional:

WARN	sub	sub/incoming.go:81	Received block with large delay 25 from miner t01001	{"req_id": "a305c648"}

Which I think has to do with gossipsub propagation, and not worth investigating right now IMO.


var (
id = fmt.Sprintf("%s_%d", n.t.TestGroupID, n.t.GroupSeq)

minerCnt = n.t.IntParam("miners")
clientCnt = n.t.IntParam("clients")
totalCnt = minerCnt + clientCnt // miner is running a worker and a mining node.

epoch = abi.ChainEpoch(0) // current epoch
raulk marked this conversation as resolved.
Show resolved Hide resolved
epochInterval = time.Duration(build.BlockDelaySecs) * time.Second

ready []string
raulk marked this conversation as resolved.
Show resolved Hide resolved

signalCh = make(chan *ClockSyncMsg, 128)
sub = n.t.SyncClient.MustSubscribe(ctx, ClockSyncTopic, signalCh)
)

go func() {
defer close(globalEpochStartCh)

for {
select {
case <-localEpochStartCh:
// move the local clock forward, and announce that fact to all other instances.
n.MockClock.Add(epochInterval)
schomatis marked this conversation as resolved.
Show resolved Hide resolved
epoch++
n.t.RecordMessage("advanced local clock: %v [epoch=%d]", n.MockClock.Now(), epoch)
n.t.SyncClient.MustPublish(ctx, ClockSyncTopic, &ClockSyncMsg{
ID: id,
Epoch: epoch,
})

case s := <-signalCh:
// this is a clock event from another instance (or ourselves).
ready = append(ready, s.ID)
if len(ready) != totalCnt {
continue
}
// release the new epoch and reset the state.
globalEpochStartCh <- epoch
ready = ready[0:0]

case <-ctx.Done():
return

case <-sub.Done():
panic("global clock synchronization subscription died")
}
}
}()
}
2 changes: 1 addition & 1 deletion lotus-soup/testkit/retrieval.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import (

func RetrieveData(t *TestEnvironment, ctx context.Context, client api.FullNode, fcid cid.Cid, carExport bool, data []byte) {
t1 := time.Now()
offers, err := client.ClientFindData(ctx, fcid)
offers, err := client.ClientFindData(ctx, fcid, nil)
if err != nil {
panic(err)
}
Expand Down
12 changes: 8 additions & 4 deletions lotus-soup/testkit/role_bootstrapper.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,12 @@ func PrepareBootstrapper(t *TestEnvironment) (*Bootstrapper, error) {
genesisMiners = append(genesisMiners, pm.Miner)
}

genesisTimestamp := uint64(time.Now().Unix()) - uint64(t.IntParam("genesis_timestamp_offset"))

genesisTemplate := genesis.Template{
Accounts: genesisActors,
Miners: genesisMiners,
Timestamp: uint64(time.Now().Unix()) - uint64(t.IntParam("genesis_timestamp_offset")), // this needs to be in the past
Timestamp: genesisTimestamp,
}

// dump the genesis block
Expand All @@ -114,7 +116,7 @@ func PrepareBootstrapper(t *TestEnvironment) (*Bootstrapper, error) {

bootstrapperIP := t.NetClient.MustGetDataNetworkIP().String()

n := &LotusNode{}
n := &LotusNode{t: t}
stop, err := node.New(context.Background(),
node.FullAPI(&n.FullApi),
node.Online(),
Expand All @@ -138,6 +140,7 @@ func PrepareBootstrapper(t *TestEnvironment) (*Bootstrapper, error) {
stop(context.TODO())
return nil, err
}

for _, a := range bootstrapperAddrs.Addrs {
ip, err := a.ValueForProtocol(ma.P_IP4)
if err != nil {
Expand All @@ -162,8 +165,9 @@ func PrepareBootstrapper(t *TestEnvironment) (*Bootstrapper, error) {
}

genesisMsg := &GenesisMsg{
Genesis: genesisBuffer.Bytes(),
Bootstrapper: bootstrapperAddr.Bytes(),
Genesis: genesisBuffer.Bytes(),
Bootstrapper: bootstrapperAddr.Bytes(),
TimestampUnix: genesisTimestamp,
}
t.SyncClient.MustPublish(ctx, GenesisTopic, genesisMsg)

Expand Down
Loading