Skip to content

Commit

Permalink
etcd: add a read/write timeout to server
Browse files Browse the repository at this point in the history
The default is for connections to last forever[1]. This leads to fds
leaking. I set the timeout so high by default so that watches don't have
to keep retrying but perhaps we should set it slower.

Tested on a cluster with lots of clients and it seems to have relieved
the problem.

[1] https://groups.google.com/forum/#!msg/golang-nuts/JFhGwh1q9xU/heh4J8pul3QJ
  • Loading branch information
Brandon Philips committed Jul 7, 2014
1 parent 8a0266a commit 084dcb5
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 13 deletions.
6 changes: 6 additions & 0 deletions Documentation/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ The full documentation is contained in the [API docs](https://github.com/coreos/

* `-addr` - The advertised public hostname:port for client communication. Defaults to `127.0.0.1:4001`.
* `-discovery` - A URL to use for discovering the peer list. (i.e `"https://discovery.etcd.io/your-unique-key"`).
* `-http-read-timeout` - The number of seconds before an HTTP read operation is timed out.
* `-http-write-timeout` - The number of seconds before an HTTP write operation is timed out.
* `-bind-addr` - The listening hostname for client communication. Defaults to advertised IP.
* `-peers` - A comma separated list of peers in the cluster (i.e `"203.0.113.101:7001,203.0.113.102:7001"`).
* `-peers-file` - The file path containing a comma separated list of peers in the cluster.
Expand Down Expand Up @@ -73,6 +75,8 @@ cors = []
cpu_profile_file = ""
data_dir = "."
discovery = "http://etcd.local:4001/v2/keys/_etcd/registry/examplecluster"
http_read_timeout = 10
http_write_timeout = 10
key_file = ""
peers = []
peers_file = ""
Expand Down Expand Up @@ -108,6 +112,8 @@ sync_interval = 5.0
* `ETCD_CPU_PROFILE_FILE`
* `ETCD_DATA_DIR`
* `ETCD_DISCOVERY`
* `ETCD_CLUSTER_HTTP_READ_TIMEOUT`
* `ETCD_CLUSTER_HTTP_WRITE_TIMEOUT`
* `ETCD_KEY_FILE`
* `ETCD_PEERS`
* `ETCD_PEERS_FILE`
Expand Down
7 changes: 7 additions & 0 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ type Config struct {
Discovery string `toml:"discovery" env:"ETCD_DISCOVERY"`
Force bool
KeyFile string `toml:"key_file" env:"ETCD_KEY_FILE"`
HTTPReadTimeout float64 `toml:"http_read_timeout" env:"ETCD_HTTP_READ_TIMEOUT"`
HTTPWriteTimeout float64 `toml:"http_write_timeout" env:"ETCD_HTTP_WRITE_TIMEOUT"`
Peers []string `toml:"peers" env:"ETCD_PEERS"`
PeersFile string `toml:"peers_file" env:"ETCD_PEERS_FILE"`
MaxResultBuffer int `toml:"max_result_buffer" env:"ETCD_MAX_RESULT_BUFFER"`
Expand Down Expand Up @@ -98,6 +100,8 @@ func New() *Config {
c := new(Config)
c.SystemPath = DefaultSystemConfigPath
c.Addr = "127.0.0.1:4001"
c.HTTPReadTimeout = server.DefaultReadTimeout
c.HTTPWriteTimeout = server.DefaultWriteTimeout
c.MaxResultBuffer = 1024
c.MaxRetryAttempts = 3
c.RetryInterval = 10.0
Expand Down Expand Up @@ -255,6 +259,9 @@ func (c *Config) LoadFlags(arguments []string) error {
f.StringVar(&c.Peer.CertFile, "peer-cert-file", c.Peer.CertFile, "")
f.StringVar(&c.Peer.KeyFile, "peer-key-file", c.Peer.KeyFile, "")

f.Float64Var(&c.HTTPReadTimeout, "http-read-timeout", c.HTTPReadTimeout, "")
f.Float64Var(&c.HTTPWriteTimeout, "http-write-timeout", c.HTTPReadTimeout, "")

f.StringVar(&c.DataDir, "data-dir", c.DataDir, "")
f.IntVar(&c.MaxResultBuffer, "max-result-buffer", c.MaxResultBuffer, "")
f.IntVar(&c.MaxRetryAttempts, "max-retry-attempts", c.MaxRetryAttempts, "")
Expand Down
23 changes: 12 additions & 11 deletions config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,11 @@ func TestConfigTOML(t *testing.T) {
max_result_buffer = 512
max_retry_attempts = 5
name = "test-name"
http_read_timeout = 2.34
snapshot = true
verbose = true
very_verbose = true
http_write_timeout = 1.23
[peer]
addr = "127.0.0.1:7002"
Expand All @@ -52,6 +54,8 @@ func TestConfigTOML(t *testing.T) {
assert.Equal(t, c.CorsOrigins, []string{"*"}, "")
assert.Equal(t, c.DataDir, "/tmp/data", "")
assert.Equal(t, c.Discovery, "http://example.com/foobar", "")
assert.Equal(t, c.HTTPReadTimeout, 2.34, "")
assert.Equal(t, c.HTTPWriteTimeout, 1.23, "")
assert.Equal(t, c.KeyFile, "/tmp/file.key", "")
assert.Equal(t, c.BindAddr, "127.0.0.1:4003", "")
assert.Equal(t, c.Peers, []string{"coreos.com:4001", "coreos.com:4002"}, "")
Expand Down Expand Up @@ -80,6 +84,8 @@ func TestConfigEnv(t *testing.T) {
os.Setenv("ETCD_CORS", "localhost:4001,localhost:4002")
os.Setenv("ETCD_DATA_DIR", "/tmp/data")
os.Setenv("ETCD_DISCOVERY", "http://example.com/foobar")
os.Setenv("ETCD_HTTP_READ_TIMEOUT", "2.34")
os.Setenv("ETCD_HTTP_WRITE_TIMEOUT", "1.23")
os.Setenv("ETCD_KEY_FILE", "/tmp/file.key")
os.Setenv("ETCD_BIND_ADDR", "127.0.0.1:4003")
os.Setenv("ETCD_PEERS", "coreos.com:4001,coreos.com:4002")
Expand Down Expand Up @@ -107,6 +113,8 @@ func TestConfigEnv(t *testing.T) {
assert.Equal(t, c.CorsOrigins, []string{"localhost:4001", "localhost:4002"}, "")
assert.Equal(t, c.DataDir, "/tmp/data", "")
assert.Equal(t, c.Discovery, "http://example.com/foobar", "")
assert.Equal(t, c.HTTPReadTimeout, 2.34, "")
assert.Equal(t, c.HTTPWriteTimeout, 1.23, "")
assert.Equal(t, c.KeyFile, "/tmp/file.key", "")
assert.Equal(t, c.BindAddr, "127.0.0.1:4003", "")
assert.Equal(t, c.Peers, []string{"coreos.com:4001", "coreos.com:4002"}, "")
Expand Down Expand Up @@ -553,19 +561,12 @@ func TestConfigClusterRemoveDelayFlag(t *testing.T) {
assert.Equal(t, c.Cluster.RemoveDelay, 100.0, "")
}

// Ensures that the cluster sync interval can be parsed from the environment.
func TestConfigClusterSyncIntervalEnv(t *testing.T) {
withEnv("ETCD_CLUSTER_SYNC_INTERVAL", "10", func(c *Config) {
assert.Nil(t, c.LoadEnv(), "")
assert.Equal(t, c.Cluster.SyncInterval, 10.0, "")
})
}

// Ensures that the cluster sync interval flag can be parsed.
func TestConfigClusterSyncIntervalFlag(t *testing.T) {
c := New()
assert.Nil(t, c.LoadFlags([]string{"-cluster-sync-interval", "10"}), "")
assert.Equal(t, c.Cluster.SyncInterval, 10.0, "")
assert.Nil(t, c.LoadFlags([]string{"-http-read-timeout", "2.34"}), "")
assert.Equal(t, c.HTTPReadTimeout, 2.34, "")
assert.Nil(t, c.LoadFlags([]string{"-http-write-timeout", "1.23"}), "")
assert.Equal(t, c.HTTPWriteTimeout, 1.23, "")
}

// Ensures that a system config field is overridden by a custom config field.
Expand Down
13 changes: 11 additions & 2 deletions etcd/etcd.go
Original file line number Diff line number Diff line change
Expand Up @@ -260,10 +260,19 @@ func (e *Etcd) Run() {

log.Infof("etcd server [name %s, listen on %s, advertised url %s]", e.Server.Name, e.Config.BindAddr, e.Server.URL())
listener := server.NewListener(e.Config.EtcdTLSInfo().Scheme(), e.Config.BindAddr, etcdTLSConfig)
e.server = &http.Server{Handler: &ModeHandler{e, serverHTTPHandler, standbyServerHTTPHandler}}

e.server = &http.Server{Handler: &ModeHandler{e, serverHTTPHandler, standbyServerHTTPHandler},
ReadTimeout: time.Duration(e.Config.HTTPReadTimeout) * time.Second,
WriteTimeout: time.Duration(e.Config.HTTPWriteTimeout) * time.Second,
}

log.Infof("peer server [name %s, listen on %s, advertised url %s]", e.PeerServer.Config.Name, e.Config.Peer.BindAddr, e.PeerServer.Config.URL)
peerListener := server.NewListener(e.Config.PeerTLSInfo().Scheme(), e.Config.Peer.BindAddr, peerTLSConfig)
e.peerServer = &http.Server{Handler: &ModeHandler{e, peerServerHTTPHandler, http.NotFoundHandler()}}

e.peerServer = &http.Server{Handler: &ModeHandler{e, peerServerHTTPHandler, http.NotFoundHandler()},
ReadTimeout: time.Duration(server.DefaultReadTimeout) * time.Second,
WriteTimeout: time.Duration(server.DefaultWriteTimeout) * time.Second,
}

wg := sync.WaitGroup{}
wg.Add(2)
Expand Down
6 changes: 6 additions & 0 deletions server/listener.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,16 @@ package server
import (
"crypto/tls"
"net"
"time"

"github.com/coreos/etcd/log"
)

const (
DefaultReadTimeout = float64((5 * time.Minute) / time.Second)
DefaultWriteTimeout = float64((5 * time.Minute) / time.Second)
)

// TLSServerConfig generates tls configuration based on TLSInfo
// If any error happens, this function will call log.Fatal
func TLSServerConfig(info *TLSInfo) *tls.Config {
Expand Down

0 comments on commit 084dcb5

Please sign in to comment.