Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pkg/srv, embed, etcdmain: Support multiple clusters in the same DNS domain #8690

Merged
merged 2 commits into from
Jan 25, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions Documentation/op-guide/clustering.md
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,13 @@ If `_etcd-client-ssl._tcp.example.com` is found, clients will attempt to communi

If etcd is using TLS without a custom certificate authority, the discovery domain (e.g., example.com) must match the SRV record domain (e.g., infra1.example.com). This is to mitigate attacks that forge SRV records to point to a different domain; the domain would have a valid certificate under PKI but be controlled by an unknown third party.

The `-discovery-srv-name` flag additionally configures a suffix to the SRV name that is queried during discovery.
Use this flag to differentiate between multiple etcd clusters under the same domain.
For example, if `discovery-srv=example.com` and `-discovery-srv-name=foo` are set, the following DNS SRV queries are made:

* _etcd-server-ssl-foo._tcp.example.com
* _etcd-server-foo._tcp.example.com

#### Create DNS SRV records

```
Expand Down
5 changes: 5 additions & 0 deletions Documentation/op-guide/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,11 @@ To start etcd automatically using custom settings at startup in Linux, using a [
+ default: ""
+ env variable: ETCD_DISCOVERY_SRV

### --discovery-srv-name
+ Suffix to the DNS srv name queried when bootstrapping using DNS.
+ default: ""
+ env variable: ETCD_DISCOVERY_SRV_NAME

### --discovery-fallback
+ Expected behavior ("exit" or "proxy") when discovery services fails. "proxy" supports v2 API only.
+ default: "proxy"
Expand Down
7 changes: 7 additions & 0 deletions Documentation/v2/clustering.md
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,13 @@ To help clients discover the etcd cluster, the following DNS SRV records are loo

If `_etcd-client-ssl._tcp.example.com` is found, clients will attempt to communicate with the etcd cluster over SSL.

The `-discovery-srv-name` flag additionally configures a suffix to the SRV name that is queried during discovery.
Use this flag to differentiate between multiple etcd clusters under the same domain.
For example, if `discovery-srv=example.com` and `-discovery-srv-name=foo` are set, the following DNS SRV queries are made:

* _etcd-server-ssl-foo._tcp.example.com
* _etcd-server-foo._tcp.example.com

#### Create DNS SRV records

```
Expand Down
5 changes: 5 additions & 0 deletions Documentation/v2/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,11 @@ To start etcd automatically using custom settings at startup in Linux, using a [
+ default: none
+ env variable: ETCD_DISCOVERY_SRV

### --discovery-srv-name
+ Suffix to the DNS srv name queried when bootstrapping using DNS.
+ default: none
+ env variable: ETCD_DISCOVERY_SRV_NAME

### --discovery-fallback
+ Expected behavior ("exit" or "proxy") when discovery services fails.
+ default: "proxy"
Expand Down
44 changes: 34 additions & 10 deletions embed/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,15 +129,16 @@ type Config struct {

// clustering

APUrls, ACUrls []url.URL
ClusterState string `json:"initial-cluster-state"`
DNSCluster string `json:"discovery-srv"`
Dproxy string `json:"discovery-proxy"`
Durl string `json:"discovery"`
InitialCluster string `json:"initial-cluster"`
InitialClusterToken string `json:"initial-cluster-token"`
StrictReconfigCheck bool `json:"strict-reconfig-check"`
EnableV2 bool `json:"enable-v2"`
APUrls, ACUrls []url.URL
ClusterState string `json:"initial-cluster-state"`
DNSCluster string `json:"discovery-srv"`
DNSClusterServiceName string `json:"discovery-srv-name"`
Dproxy string `json:"discovery-proxy"`
Durl string `json:"discovery"`
InitialCluster string `json:"initial-cluster"`
InitialClusterToken string `json:"initial-cluster-token"`
StrictReconfigCheck bool `json:"strict-reconfig-check"`
EnableV2 bool `json:"enable-v2"`

// security

Expand Down Expand Up @@ -463,7 +464,8 @@ func (cfg *Config) PeerURLsMapAndToken(which string) (urlsmap types.URLsMap, tok
urlsmap[cfg.Name] = cfg.APUrls
token = cfg.Durl
case cfg.DNSCluster != "":
clusterStrs, cerr := srv.GetCluster("etcd-server", cfg.Name, cfg.DNSCluster, cfg.APUrls)
clusterStrs, cerr := cfg.GetDNSClusterNames()

if cerr != nil {
plog.Errorf("couldn't resolve during SRV discovery (%v)", cerr)
return nil, "", cerr
Expand All @@ -490,6 +492,28 @@ func (cfg *Config) PeerURLsMapAndToken(which string) (urlsmap types.URLsMap, tok
return urlsmap, token, err
}

// GetDNSClusterNames uses DNS SRV records to get a list of initial nodes for cluster bootstrapping.
func (cfg *Config) GetDNSClusterNames() ([]string, error) {
var (
clusterStrs []string
cerr error
serviceNameSuffix string
)
if cfg.DNSClusterServiceName != "" {
serviceNameSuffix = "-" + cfg.DNSClusterServiceName
}
// Use both etcd-server-ssl and etcd-server for discovery. Combine the results if both are available.
clusterStrs, cerr = srv.GetCluster("https", "etcd-server-ssl"+serviceNameSuffix, cfg.Name, cfg.DNSCluster, cfg.APUrls)
defaultHTTPClusterStrs, httpCerr := srv.GetCluster("http", "etcd-server"+serviceNameSuffix, cfg.Name, cfg.DNSCluster, cfg.APUrls)
if cerr != nil {
clusterStrs = make([]string, 0)
}
if httpCerr != nil {
clusterStrs = append(clusterStrs, defaultHTTPClusterStrs...)
}
return clusterStrs, cerr
}

func (cfg Config) InitialClusterFromName(name string) (ret string) {
if len(cfg.APUrls) == 0 {
return ""
Expand Down
3 changes: 2 additions & 1 deletion etcdmain/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ func newConfig() *config {

fs.StringVar(&cfg.ec.Dproxy, "discovery-proxy", cfg.ec.Dproxy, "HTTP proxy to use for traffic to discovery service.")
fs.StringVar(&cfg.ec.DNSCluster, "discovery-srv", cfg.ec.DNSCluster, "DNS domain used to bootstrap initial cluster.")
fs.StringVar(&cfg.ec.DNSClusterServiceName, "discovery-srv-name", cfg.ec.DNSClusterServiceName, "Service name to query when using DNS discovery.")
fs.StringVar(&cfg.ec.InitialCluster, "initial-cluster", cfg.ec.InitialCluster, "Initial cluster configuration for bootstrapping.")
fs.StringVar(&cfg.ec.InitialClusterToken, "initial-cluster-token", cfg.ec.InitialClusterToken, "Initial cluster token for the etcd cluster during bootstrap.")
fs.Var(cfg.cf.clusterState, "initial-cluster-state", "Initial cluster state ('new' or 'existing').")
Expand Down Expand Up @@ -285,7 +286,7 @@ func (cfg *config) configFromCmdLine() error {
}

// disable default initial-cluster if discovery is set
if (cfg.ec.Durl != "" || cfg.ec.DNSCluster != "") && !flags.IsSet(cfg.cf.flagSet, "initial-cluster") {
if (cfg.ec.Durl != "" || cfg.ec.DNSCluster != "" || cfg.ec.DNSClusterServiceName != "") && !flags.IsSet(cfg.cf.flagSet, "initial-cluster") {
cfg.ec.InitialCluster = ""
}

Expand Down
17 changes: 3 additions & 14 deletions pkg/srv/srv.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ var (

// GetCluster gets the cluster information via DNS discovery.
// Also sees each entry as a separate instance.
func GetCluster(service, name, dns string, apurls types.URLs) ([]string, error) {
func GetCluster(serviceScheme, service, name, dns string, apurls types.URLs) ([]string, error) {
tempName := int(0)
tcp2ap := make(map[string]url.URL)

Expand Down Expand Up @@ -83,20 +83,9 @@ func GetCluster(service, name, dns string, apurls types.URLs) ([]string, error)
return nil
}

failCount := 0
err := updateNodeMap(service+"-ssl", "https")
srvErr := make([]string, 2)
err := updateNodeMap(service, serviceScheme)
if err != nil {
srvErr[0] = fmt.Sprintf("error querying DNS SRV records for _%s-ssl %s", service, err)
failCount++
}
err = updateNodeMap(service, "http")
if err != nil {
srvErr[1] = fmt.Sprintf("error querying DNS SRV records for _%s %s", service, err)
failCount++
}
if failCount == 2 {
return nil, fmt.Errorf("srv: too many errors querying DNS SRV records (%q, %q)", srvErr[0], srvErr[1])
return nil, fmt.Errorf("error querying DNS SRV records for _%s %s", service, err)
}
return stringParts, nil
}
Expand Down
32 changes: 13 additions & 19 deletions pkg/srv/srv_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,51 +44,51 @@ func TestSRVGetCluster(t *testing.T) {
}

tests := []struct {
withSSL []*net.SRV
withoutSSL []*net.SRV
urls []string
scheme string
records []*net.SRV
urls []string

expected string
}{
{
[]*net.SRV{},
"https",
[]*net.SRV{},
nil,

"",
},
{
"https",
srvAll,
[]*net.SRV{},
nil,

"0=https://1.example.com:2480,1=https://2.example.com:2480,2=https://3.example.com:2480",
},
{
"http",
srvAll,
[]*net.SRV{{Target: "4.example.com.", Port: 2380}},
nil,

"0=https://1.example.com:2480,1=https://2.example.com:2480,2=https://3.example.com:2480,3=http://4.example.com:2380",
"0=http://1.example.com:2480,1=http://2.example.com:2480,2=http://3.example.com:2480",
},
{
"https",
srvAll,
[]*net.SRV{{Target: "4.example.com.", Port: 2380}},
[]string{"https://10.0.0.1:2480"},

"dnsClusterTest=https://1.example.com:2480,0=https://2.example.com:2480,1=https://3.example.com:2480,2=http://4.example.com:2380",
"dnsClusterTest=https://1.example.com:2480,0=https://2.example.com:2480,1=https://3.example.com:2480",
},
// matching local member with resolved addr and return unresolved hostnames
{
"https",
srvAll,
nil,
[]string{"https://10.0.0.1:2480"},

"dnsClusterTest=https://1.example.com:2480,0=https://2.example.com:2480,1=https://3.example.com:2480",
},
// reject if apurls are TLS but SRV is only http
{
nil,
"http",
srvAll,
[]string{"https://10.0.0.1:2480"},

Expand All @@ -109,16 +109,10 @@ func TestSRVGetCluster(t *testing.T) {

for i, tt := range tests {
lookupSRV = func(service string, proto string, domain string) (string, []*net.SRV, error) {
if service == "etcd-server-ssl" {
return "", tt.withSSL, nil
}
if service == "etcd-server" {
return "", tt.withoutSSL, nil
}
return "", nil, errors.New("Unknown service in mock")
return "", tt.records, nil
}
urls := testutil.MustNewURLs(t, tt.urls)
str, err := GetCluster("etcd-server", name, "example.com", urls)
str, err := GetCluster(tt.scheme, "etcd-server", name, "example.com", urls)
if err != nil {
t.Fatalf("%d: err: %#v", i, err)
}
Expand Down