From 7306c38cdd8417b5d8c6220cb1d09a6cdbdba05b Mon Sep 17 00:00:00 2001 From: Luke Addison Date: Wed, 1 Aug 2018 14:41:02 +0100 Subject: [PATCH] Recreate bastion when failed --- .../tarmak/data_source_bastion_instance.go | 4 -- .../tarmak/resource_vault_cluster.go | 10 +++- .../tarmak/resource_vault_instance_role.go | 28 ++++++++- .../tarmak/rpc/bastion_instance_status.go | 59 +++++++++++++++---- .../tarmak/rpc/vault_cluster_status.go | 37 ++++++++---- terraform/amazon/modules/bastion/bastion.tf | 8 ++- terraform/amazon/modules/bastion/outputs.tf | 21 ++----- terraform/amazon/modules/kubernetes/inputs.tf | 8 +-- terraform/amazon/modules/kubernetes/vault.tf | 16 +---- terraform/amazon/modules/vault/cluster.tf | 7 +++ terraform/amazon/modules/vault/inputs.tf | 4 +- terraform/amazon/modules/vault/outputs.tf | 12 ++-- .../amazon/templates/modules.tf.template | 6 +- .../templates/vault_instances.tf.template | 8 --- 14 files changed, 137 insertions(+), 91 deletions(-) create mode 100644 terraform/amazon/modules/vault/cluster.tf diff --git a/pkg/terraform/providers/tarmak/data_source_bastion_instance.go b/pkg/terraform/providers/tarmak/data_source_bastion_instance.go index 65587f068b..2757c3c5eb 100644 --- a/pkg/terraform/providers/tarmak/data_source_bastion_instance.go +++ b/pkg/terraform/providers/tarmak/data_source_bastion_instance.go @@ -19,10 +19,6 @@ func dataSourceBastionInstance() *schema.Resource { Type: schema.TypeString, Required: true, }, - "instance_id": { - Type: schema.TypeString, - Optional: true, - }, "username": { Type: schema.TypeString, Required: true, diff --git a/pkg/terraform/providers/tarmak/resource_vault_cluster.go b/pkg/terraform/providers/tarmak/resource_vault_cluster.go index ece053078e..dae5c21b79 100644 --- a/pkg/terraform/providers/tarmak/resource_vault_cluster.go +++ b/pkg/terraform/providers/tarmak/resource_vault_cluster.go @@ -18,6 +18,7 @@ func resourceTarmakVaultCluster() *schema.Resource { Create: resourceTarmakVaultClusterCreate, Read: resourceTarmakVaultClusterRead, Delete: resourceTarmakVaultClusterDelete, + Update: resourceTarmakVaultClusterCreate, Schema: map[string]*schema.Schema{ "internal_fqdns": { @@ -43,6 +44,10 @@ func resourceTarmakVaultCluster() *schema.Resource { Required: true, ForceNew: true, }, + "bastion_status": { + Type: schema.TypeString, + Required: true, + }, "status": { Type: schema.TypeString, Computed: true, @@ -52,12 +57,10 @@ func resourceTarmakVaultCluster() *schema.Resource { } func resourceTarmakVaultClusterCreate(d *schema.ResourceData, meta interface{}) (err error) { + client := meta.(*rpc.Client) vaultInternalFQDNs := []string{} - - //return fmt.Errorf("DEBUG: %#v", d.Get("internal_fqdns").([]interface{})[0]) - for _, internalFQDN := range d.Get("internal_fqdns").([]interface{}) { vaultInternalFQDNs = append(vaultInternalFQDNs, internalFQDN.(string)) } @@ -94,6 +97,7 @@ func resourceTarmakVaultClusterCreate(d *schema.ResourceData, meta interface{}) } func resourceTarmakVaultClusterRead(d *schema.ResourceData, meta interface{}) (err error) { + client := meta.(*rpc.Client) vaultInternalFQDNs := []string{} diff --git a/pkg/terraform/providers/tarmak/resource_vault_instance_role.go b/pkg/terraform/providers/tarmak/resource_vault_instance_role.go index 1e87efde90..134fb6bdf5 100644 --- a/pkg/terraform/providers/tarmak/resource_vault_instance_role.go +++ b/pkg/terraform/providers/tarmak/resource_vault_instance_role.go @@ -2,7 +2,6 @@ package tarmak import ( - "fmt" "log" "net/rpc" @@ -16,6 +15,7 @@ func resourceTarmakVaultInstanceRole() *schema.Resource { Create: resourceTarmakVaultInstanceRoleCreate, Read: resourceTarmakVaultInstanceRoleRead, Delete: resourceTarmakVaultInstanceRoleDelete, + Update: resourceTarmakVaultInstanceRoleCreate, Schema: map[string]*schema.Schema{ "role_name": { @@ -41,6 +41,10 @@ func resourceTarmakVaultInstanceRole() *schema.Resource { Required: true, ForceNew: true, }, + "vault_status": { + Type: schema.TypeString, + Required: true, + }, "init_token": { Type: schema.TypeString, Computed: true, @@ -52,6 +56,13 @@ func resourceTarmakVaultInstanceRole() *schema.Resource { func resourceTarmakVaultInstanceRoleCreate(d *schema.ResourceData, meta interface{}) (err error) { client := meta.(*rpc.Client) + vaultStatus := d.Get("vault_status").(string) + if vaultStatus != tarmakRPC.VaultStatusReady { + log.Print("vault is not ready") + d.SetId("") + return nil + } + roleName := d.Get("role_name").(string) clusterName := d.Get("vault_cluster_name").(string) vaultInternalFQDNs := []string{} @@ -72,12 +83,15 @@ func resourceTarmakVaultInstanceRoleCreate(d *schema.ResourceData, meta interfac var reply tarmakRPC.VaultInstanceRoleReply err = client.Call(tarmakRPC.VaultInstanceRole, args, &reply) if err != nil { + log.Printf("call to %s failed: %s", tarmakRPC.VaultInstanceRole, err) d.SetId("") - return fmt.Errorf("call to %s failed: %s", tarmakRPC.VaultInstanceRole, err) + return nil } if err = d.Set("init_token", reply.InitToken); err != nil { - return fmt.Errorf("failed to set init token: %s", err) + log.Printf("failed to set init token: %s", err) + d.SetId("") + return } d.SetId(reply.InitToken) @@ -88,6 +102,13 @@ func resourceTarmakVaultInstanceRoleCreate(d *schema.ResourceData, meta interfac func resourceTarmakVaultInstanceRoleRead(d *schema.ResourceData, meta interface{}) (err error) { client := meta.(*rpc.Client) + vaultStatus := d.Get("vault_status").(string) + if vaultStatus != tarmakRPC.VaultStatusReady { + log.Printf("vault is not ready") + d.SetId("") + return nil + } + roleName := d.Get("role_name").(string) clusterName := d.Get("vault_cluster_name").(string) vaultInternalFQDNs := []string{} @@ -108,6 +129,7 @@ func resourceTarmakVaultInstanceRoleRead(d *schema.ResourceData, meta interface{ var reply tarmakRPC.VaultInstanceRoleReply err = client.Call(tarmakRPC.VaultInstanceRole, args, &reply) if err != nil { + log.Printf("call to %s failed: %s", tarmakRPC.VaultInstanceRole, err) d.SetId("") return nil } diff --git a/pkg/terraform/providers/tarmak/rpc/bastion_instance_status.go b/pkg/terraform/providers/tarmak/rpc/bastion_instance_status.go index 359c741845..afbedeb8ae 100644 --- a/pkg/terraform/providers/tarmak/rpc/bastion_instance_status.go +++ b/pkg/terraform/providers/tarmak/rpc/bastion_instance_status.go @@ -8,6 +8,13 @@ import ( cluster "github.com/jetstack/tarmak/pkg/apis/cluster/v1alpha1" ) +const ( + bastionVerifyTimeoutSeconds = 180 + BastionStatusUnknown = "unknown" + BastionStatusReady = "ready" + BastionStatusDown = "down" +) + var ( BastionInstanceStatusCall = fmt.Sprintf("%s.BastionInstanceStatus", RPCName) ) @@ -25,23 +32,53 @@ func (r *tarmakRPC) BastionInstanceStatus(args *BastionInstanceStatusArgs, resul r.tarmak.Log().Debug("received rpc bastion status") if r.cluster.GetState() == cluster.StateDestroy { - result.Status = "unknown" + result.Status = BastionStatusUnknown return nil } - var err error - for i := 1; i <= Retries; i++ { - if err = r.cluster.Environment().VerifyBastionAvailable(); err != nil { - r.tarmak.Log().Error(err) - time.Sleep(time.Second) - } else { - break + // check if bastion instance exists + instances, err := r.cluster.Environment().Provider().ListHosts(r.cluster.Environment().Hub()) + if err != nil { + r.tarmak.Log().Debug("failed to list instances in hub: %s", err) + result.Status = BastionStatusUnknown + return nil + } + bastionExists := false + for _, instance := range instances { + for _, role := range instance.Roles() { + if role == cluster.InstancePoolTypeBastion { + bastionExists = true + } } } - if err != nil { - return fmt.Errorf("bastion instance is not ready: %s", err) + if !bastionExists { + r.tarmak.Log().Debug("bastion instance does not exist") + result.Status = BastionStatusDown + return nil + } + + // verify bastion responsiveness + verifyChannel := make(chan bool) + go func() { + for { + if err := r.cluster.Environment().VerifyBastionAvailable(); err != nil { + r.tarmak.Log().Error(err) + time.Sleep(time.Second) + continue + } + verifyChannel <- true + return + } + }() + + select { + case <-verifyChannel: + case <-time.After(bastionVerifyTimeoutSeconds * time.Second): + r.tarmak.Log().Debug("failed to verify bastion instance") + result.Status = BastionStatusDown + return nil } - result.Status = "ready" + result.Status = BastionStatusReady return nil } diff --git a/pkg/terraform/providers/tarmak/rpc/vault_cluster_status.go b/pkg/terraform/providers/tarmak/rpc/vault_cluster_status.go index 4e1a0c1f86..8b87822f85 100644 --- a/pkg/terraform/providers/tarmak/rpc/vault_cluster_status.go +++ b/pkg/terraform/providers/tarmak/rpc/vault_cluster_status.go @@ -10,6 +10,11 @@ import ( cluster "github.com/jetstack/tarmak/pkg/apis/cluster/v1alpha1" ) +const ( + VaultStatusUnknown = "unknown" + VaultStatusReady = "ready" +) + var ( VaultClusterStatusCall = fmt.Sprintf("%s.VaultClusterStatus", RPCName) VaultClusterInitStatusCall = fmt.Sprintf("%s.VaultClusterInitStatus", RPCName) @@ -30,7 +35,7 @@ func (r *tarmakRPC) VaultClusterStatus(args *VaultClusterStatusArgs, result *Vau r.tarmak.Log().Debug("received rpc vault cluster status") if r.tarmak.Cluster().GetState() == cluster.StateDestroy { - result.Status = "unknown" + result.Status = VaultStatusUnknown return nil } @@ -40,14 +45,16 @@ func (r *tarmakRPC) VaultClusterStatus(args *VaultClusterStatusArgs, result *Vau if err != nil { err = fmt.Errorf("failed to initialise vault cluster: %s", err) r.tarmak.Log().Error(err) - return err + result.Status = VaultStatusUnknown + return nil } vaultTunnel, err := vault.TunnelFromFQDNs(args.VaultInternalFQDNs, args.VaultCA) if err != nil { err = fmt.Errorf("failed to create vault tunnel: %s", err) r.tarmak.Log().Error(err) - return err + result.Status = VaultStatusUnknown + return nil } defer vaultTunnel.Stop() @@ -57,7 +64,8 @@ func (r *tarmakRPC) VaultClusterStatus(args *VaultClusterStatusArgs, result *Vau if err != nil { err = fmt.Errorf("failed to retrieve vault root token: %s", err) r.tarmak.Log().Error(err) - return err + result.Status = VaultStatusUnknown + return nil } vaultClient.SetToken(vaultRootToken) @@ -68,10 +76,11 @@ func (r *tarmakRPC) VaultClusterStatus(args *VaultClusterStatusArgs, result *Vau if err := k.Ensure(); err != nil { err = fmt.Errorf("vault cluster is not ready: %s", err) r.tarmak.Log().Error(err) - return err + result.Status = VaultStatusUnknown + return nil } - result.Status = "ready" + result.Status = VaultStatusReady return nil } @@ -79,7 +88,7 @@ func (r *tarmakRPC) VaultClusterInitStatus(args *VaultClusterStatusArgs, result r.tarmak.Log().Debug("received rpc vault cluster status") if r.tarmak.Cluster().GetState() == cluster.StateDestroy { - result.Status = "unknown" + result.Status = VaultStatusUnknown return nil } @@ -89,7 +98,8 @@ func (r *tarmakRPC) VaultClusterInitStatus(args *VaultClusterStatusArgs, result if err != nil { err = fmt.Errorf("failed to create vault tunnel: %s", err) r.tarmak.Log().Error(err) - return err + result.Status = VaultStatusUnknown + return nil } defer vaultTunnel.Stop() @@ -99,7 +109,8 @@ func (r *tarmakRPC) VaultClusterInitStatus(args *VaultClusterStatusArgs, result if err != nil { err = fmt.Errorf("failed to retrieve vault root token: %s", err) r.tarmak.Log().Error(err) - return err + result.Status = VaultStatusUnknown + return nil } vaultClient.SetToken(vaultRootToken) @@ -117,14 +128,16 @@ func (r *tarmakRPC) VaultClusterInitStatus(args *VaultClusterStatusArgs, result if err != nil { err = fmt.Errorf("failed to retrieve init status: %s", err) r.tarmak.Log().Error(err) - return err + result.Status = VaultStatusUnknown + return nil } if !up { err = fmt.Errorf("failed to initialised vault cluster") r.tarmak.Log().Error(err) - return err + result.Status = VaultStatusUnknown + return nil } - result.Status = "ready" + result.Status = VaultStatusReady return nil } diff --git a/terraform/amazon/modules/bastion/bastion.tf b/terraform/amazon/modules/bastion/bastion.tf index b8988b85e3..db56a7740e 100644 --- a/terraform/amazon/modules/bastion/bastion.tf +++ b/terraform/amazon/modules/bastion/bastion.tf @@ -24,8 +24,14 @@ resource "aws_security_group" "bastion" { } } +data "tarmak_bastion_instance" "bastion" { + hostname = "bastion" + username = "centos" + + depends_on = ["aws_instance.bastion"] +} + resource "aws_instance" "bastion" { - count = 1 ami = "${var.bastion_ami}" instance_type = "${var.bastion_instance_type}" subnet_id = "${var.public_subnet_ids[0]}" diff --git a/terraform/amazon/modules/bastion/outputs.tf b/terraform/amazon/modules/bastion/outputs.tf index 9dc4d01400..821edc97f6 100644 --- a/terraform/amazon/modules/bastion/outputs.tf +++ b/terraform/amazon/modules/bastion/outputs.tf @@ -1,24 +1,11 @@ -output "bastion_instance_id" { - value = "${element(concat(aws_instance.bastion.*.id, list("")), 0)}" -} - - -output "bastion_fqdn" { - value = "${aws_route53_record.bastion.fqdn}" -} - -output "bastion_private_ip" { - value = "${aws_eip.bastion.public_ip}" -} - -output "bastion_ip" { - value = "${aws_eip.bastion.public_ip}" +output "bastion_status" { + value = "${data.tarmak_bastion_instance.bastion.status}" } output "bastion_security_group_id" { value = "${element(concat(aws_security_group.bastion.*.id, list("")), 0)}" } -output "remote_admin_security_group_id" { - value = "${aws_security_group.remote_admin.id}" +output "bastion_instance_id" { + value = "${element(concat(aws_instance.bastion.*.id, list("")), 0)}" } \ No newline at end of file diff --git a/terraform/amazon/modules/kubernetes/inputs.tf b/terraform/amazon/modules/kubernetes/inputs.tf index 8fde238dbf..65aaf1b351 100644 --- a/terraform/amazon/modules/kubernetes/inputs.tf +++ b/terraform/amazon/modules/kubernetes/inputs.tf @@ -47,10 +47,6 @@ variable "internal_fqdns" { type = "list" } -variable "vault_kms_key_id" {} - -variable "vault_unseal_key_name" {} - # template variables variable "availability_zones" { type = "list" @@ -76,4 +72,6 @@ variable "public_zone_id" {} variable "vault_security_group_id" {} -variable "bastion_security_group_id" {} \ No newline at end of file +variable "bastion_security_group_id" {} + +variable "vault_status" {} diff --git a/terraform/amazon/modules/kubernetes/vault.tf b/terraform/amazon/modules/kubernetes/vault.tf index 7fe8cfa094..3ef1d6a0d3 100644 --- a/terraform/amazon/modules/kubernetes/vault.tf +++ b/terraform/amazon/modules/kubernetes/vault.tf @@ -1,17 +1,9 @@ -resource "tarmak_vault_cluster" "vault" { - internal_fqdns = ["${var.internal_fqdns}"] - vault_ca = "${var.vault_ca}" - vault_kms_key_id = "${var.vault_kms_key_id}" - vault_unseal_key_name = "${var.vault_unseal_key_name}" -} - resource "tarmak_vault_instance_role" "master" { role_name = "master" vault_cluster_name = "${var.vault_cluster_name}" internal_fqdns = ["${var.internal_fqdns}"] vault_ca = "${var.vault_ca}" - - depends_on = ["tarmak_vault_cluster.vault"] + vault_status = "${var.vault_status}" } resource "tarmak_vault_instance_role" "worker" { @@ -19,8 +11,7 @@ resource "tarmak_vault_instance_role" "worker" { vault_cluster_name = "${var.vault_cluster_name}" internal_fqdns = ["${var.internal_fqdns}"] vault_ca = "${var.vault_ca}" - - depends_on = ["tarmak_vault_cluster.vault"] + vault_status = "${var.vault_status}" } resource "tarmak_vault_instance_role" "etcd" { @@ -28,6 +19,5 @@ resource "tarmak_vault_instance_role" "etcd" { vault_cluster_name = "${var.vault_cluster_name}" internal_fqdns = ["${var.internal_fqdns}"] vault_ca = "${var.vault_ca}" - - depends_on = ["tarmak_vault_cluster.vault"] + vault_status = "${var.vault_status}" } \ No newline at end of file diff --git a/terraform/amazon/modules/vault/cluster.tf b/terraform/amazon/modules/vault/cluster.tf new file mode 100644 index 0000000000..d13147f280 --- /dev/null +++ b/terraform/amazon/modules/vault/cluster.tf @@ -0,0 +1,7 @@ +resource "tarmak_vault_cluster" "vault" { + bastion_status = "${var.bastion_status}" + internal_fqdns = ["${aws_route53_record.per-instance.*.fqdn}"] + vault_ca = "${element(concat(tls_self_signed_cert.ca.*.cert_pem, list("")), 0)}" + vault_kms_key_id = "${element(split("/", var.secrets_kms_arn), 1)}" + vault_unseal_key_name = "${data.template_file.vault_unseal_key_name.rendered}" +} \ No newline at end of file diff --git a/terraform/amazon/modules/vault/inputs.tf b/terraform/amazon/modules/vault/inputs.tf index 648bca34d4..a57cd88b4b 100644 --- a/terraform/amazon/modules/vault/inputs.tf +++ b/terraform/amazon/modules/vault/inputs.tf @@ -72,8 +72,6 @@ variable "bastion_security_group_id" {} # data.terraform_remote_state.network.vpc_id variable "vpc_id" {} -variable "bastion_instance_id" {} - variable "vault_cluster_name" {} data "template_file" "stack_name" { @@ -84,4 +82,4 @@ data "template_file" "vault_unseal_key_name" { template = "vault-${var.environment}-" } - +variable "bastion_status" {} diff --git a/terraform/amazon/modules/vault/outputs.tf b/terraform/amazon/modules/vault/outputs.tf index 4bd0f17922..8f5c5b7cae 100644 --- a/terraform/amazon/modules/vault/outputs.tf +++ b/terraform/amazon/modules/vault/outputs.tf @@ -6,14 +6,6 @@ output "vault_url" { value = "https://${element(concat(aws_route53_record.endpoint.*.fqdn, list("")), 0)}:8200" } -output "vault_kms_key_id" { - value = "${element(split("/", var.secrets_kms_arn), 1)}" -} - -output "vault_unseal_key_name" { - value = "${data.template_file.vault_unseal_key_name.rendered}" -} - output "instance_fqdns" { value = ["${aws_route53_record.per-instance.*.fqdn}"] } @@ -24,4 +16,8 @@ output "vault_security_group_id" { output "vault_aws_caller_identity_current_account_id" { value = "${data.aws_caller_identity.current.account_id}" +} + +output "vault_status" { + value = "${tarmak_vault_cluster.vault.status}" } \ No newline at end of file diff --git a/terraform/amazon/templates/modules.tf.template b/terraform/amazon/templates/modules.tf.template index 7d1ed03ba3..9d7392b84d 100644 --- a/terraform/amazon/templates/modules.tf.template +++ b/terraform/amazon/templates/modules.tf.template @@ -128,8 +128,8 @@ module "vault" { availability_zones = ["${var.availability_zones}"] bastion_security_group_id = "${module.bastion.bastion_security_group_id}" vpc_id = "${module.network.vpc_id}" - bastion_instance_id = "${module.bastion.bastion_instance_id}" vault_cluster_name = "${var.vault_cluster_name}" + bastion_status = "${module.bastion.bastion_status}" } {{end}} @@ -160,8 +160,6 @@ module "kubernetes" { private_subnet_ids = ["${module.network.private_subnet_ids}"] public_subnet_ids = ["${module.network.public_subnet_ids}"] internal_fqdns = ["${module.vault.instance_fqdns}"] - vault_kms_key_id = "${module.vault.vault_kms_key_id}" - vault_unseal_key_name = "${module.vault.vault_unseal_key_name}" # template variables availability_zones = ["${module.network.availability_zones}"] vpc_id = "${module.network.vpc_id}" @@ -173,6 +171,7 @@ module "kubernetes" { public_zone_id = "${module.state.public_zone_id}" vault_security_group_id = "${module.vault.vault_security_group_id}" bastion_security_group_id = "${module.bastion.bastion_security_group_id}" + vault_status = "${module.vault.vault_status}" } {{end}} @@ -226,5 +225,6 @@ module "kubernetes" { vault_ca = "${data.terraform_remote_state.hub_state.vault_vault_ca}" vault_url = "${data.terraform_remote_state.hub_state.vault_vault_url}" vault_security_group_id = "${data.terraform_remote_state.hub_state.vault_vault_security_group_id}" + vault_status = "${module.vault.vault_status}" } {{end}} diff --git a/terraform/amazon/templates/vault_instances.tf.template b/terraform/amazon/templates/vault_instances.tf.template index 2dd0551b35..141c99d8f0 100644 --- a/terraform/amazon/templates/vault_instances.tf.template +++ b/terraform/amazon/templates/vault_instances.tf.template @@ -49,12 +49,6 @@ resource "aws_cloudwatch_metric_alarm" "vault-autorecover" { } } -data "tarmak_bastion_instance" "bastion" { - hostname = "bastion" - username = "centos" - instance_id = "${var.bastion_instance_id}" -} - resource "aws_instance" "vault" { ami = "${var.vault_ami}" instance_type = "${var.vault_instance_type}" @@ -86,8 +80,6 @@ resource "aws_instance" "vault" { lifecycle { ignore_changes = ["volume_tags"] } - - depends_on = ["data.tarmak_bastion_instance.bastion"] } resource "aws_ebs_volume" "vault" {