Skip to content

Commit

Permalink
Merge pull request #4685 from influxdb/heal-raft-cluster
Browse files Browse the repository at this point in the history
Heal raft cluster
  • Loading branch information
corylanou committed Nov 11, 2015
2 parents 615f47b + a4c54cb commit 8ec4d04
Show file tree
Hide file tree
Showing 12 changed files with 561 additions and 205 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
- [#4721](https://github.com/influxdb/influxdb/pull/4721): Export tsdb.InterfaceValues
- [#4681](https://github.com/influxdb/influxdb/pull/4681): Increase default buffer size for collectd and graphite listeners
- [#4659](https://github.com/influxdb/influxdb/pull/4659): Support IF EXISTS for DROP DATABASE
- [#4685](https://github.com/influxdb/influxdb/pull/4685): Automatically promote node to raft peer if drop server results in removing a raft peer.

### Bugfixes
- [#4715](https://github.com/influxdb/influxdb/pull/4715): Fix panic during Raft-close. Fix [issue #4707](https://github.com/influxdb/influxdb/issues/4707). Thanks @oiooj
Expand Down
8 changes: 8 additions & 0 deletions etc/config.sample.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,14 @@ reporting-disabled = false
heartbeat-timeout = "1s"
leader-lease-timeout = "500ms"
commit-timeout = "50ms"
cluster-tracing = false

# If enabled, when a Raft cluster loses a peer due to a `DROP SERVER` command,
# the leader will automatically ask a non-raft peer node to promote to a raft
# peer. This only happens if there is a non-raft peer node available to promote.
# This setting only affects the local node, so to ensure if operates correctly, be sure to set
# it in the config of every node.
raft-promotion-enabled = true

###
### [data]
Expand Down
39 changes: 22 additions & 17 deletions meta/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,31 +24,36 @@ const (

// DefaultCommitTimeout is the default commit timeout for the store.
DefaultCommitTimeout = 50 * time.Millisecond

// DefaultRaftPromotionEnabled is the default for auto promoting a node to a raft node when needed
DefaultRaftPromotionEnabled = true
)

// Config represents the meta configuration.
type Config struct {
Dir string `toml:"dir"`
Hostname string `toml:"hostname"`
BindAddress string `toml:"bind-address"`
Peers []string `toml:"-"`
RetentionAutoCreate bool `toml:"retention-autocreate"`
ElectionTimeout toml.Duration `toml:"election-timeout"`
HeartbeatTimeout toml.Duration `toml:"heartbeat-timeout"`
LeaderLeaseTimeout toml.Duration `toml:"leader-lease-timeout"`
CommitTimeout toml.Duration `toml:"commit-timeout"`
ClusterTracing bool `toml:"cluster-tracing"`
Dir string `toml:"dir"`
Hostname string `toml:"hostname"`
BindAddress string `toml:"bind-address"`
Peers []string `toml:"-"`
RetentionAutoCreate bool `toml:"retention-autocreate"`
ElectionTimeout toml.Duration `toml:"election-timeout"`
HeartbeatTimeout toml.Duration `toml:"heartbeat-timeout"`
LeaderLeaseTimeout toml.Duration `toml:"leader-lease-timeout"`
CommitTimeout toml.Duration `toml:"commit-timeout"`
ClusterTracing bool `toml:"cluster-tracing"`
RaftPromotionEnabled bool `toml:"raft-promotion-enabled"`
}

// NewConfig builds a new configuration with default values.
func NewConfig() *Config {
return &Config{
Hostname: DefaultHostname,
BindAddress: DefaultBindAddress,
RetentionAutoCreate: true,
ElectionTimeout: toml.Duration(DefaultElectionTimeout),
HeartbeatTimeout: toml.Duration(DefaultHeartbeatTimeout),
LeaderLeaseTimeout: toml.Duration(DefaultLeaderLeaseTimeout),
CommitTimeout: toml.Duration(DefaultCommitTimeout),
Hostname: DefaultHostname,
BindAddress: DefaultBindAddress,
RetentionAutoCreate: true,
ElectionTimeout: toml.Duration(DefaultElectionTimeout),
HeartbeatTimeout: toml.Duration(DefaultHeartbeatTimeout),
LeaderLeaseTimeout: toml.Duration(DefaultLeaderLeaseTimeout),
CommitTimeout: toml.Duration(DefaultCommitTimeout),
RaftPromotionEnabled: DefaultRaftPromotionEnabled,
}
}
3 changes: 3 additions & 0 deletions meta/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ election-timeout = "10s"
heartbeat-timeout = "20s"
leader-lease-timeout = "30h"
commit-timeout = "40m"
raft-promotion-enabled = false
`, &c); err != nil {
t.Fatal(err)
}
Expand All @@ -32,5 +33,7 @@ commit-timeout = "40m"
t.Fatalf("unexpected leader lease timeout: %v", c.LeaderLeaseTimeout)
} else if time.Duration(c.CommitTimeout) != 40*time.Minute {
t.Fatalf("unexpected commit timeout: %v", c.CommitTimeout)
} else if c.RaftPromotionEnabled {
t.Fatalf("unexpected raft promotion enabled: %v", c.RaftPromotionEnabled)
}
}
7 changes: 7 additions & 0 deletions meta/data.go
Original file line number Diff line number Diff line change
Expand Up @@ -751,6 +751,13 @@ func (ni *NodeInfo) unmarshal(pb *internal.NodeInfo) {
ni.Host = pb.GetHost()
}

// NodeInfos is a slice of NodeInfo used for sorting
type NodeInfos []NodeInfo

func (n NodeInfos) Len() int { return len(n) }
func (n NodeInfos) Swap(i, j int) { n[i], n[j] = n[j], n[i] }
func (n NodeInfos) Less(i, j int) bool { return n[i].ID < n[j].ID }

// DatabaseInfo represents information about a database in the system.
type DatabaseInfo struct {
Name string
Expand Down
Loading

0 comments on commit 8ec4d04

Please sign in to comment.