diff --git a/cmd/kops/toolbox_dump.go b/cmd/kops/toolbox_dump.go index 2198af700be4b..c5e423f757857 100644 --- a/cmd/kops/toolbox_dump.go +++ b/cmd/kops/toolbox_dump.go @@ -210,7 +210,14 @@ func RunToolboxDump(ctx context.Context, f commandutils.Factory, out io.Writer, return fmt.Errorf("adding key to SSH agent: %w", err) } - dumper := dump.NewLogDumper(cluster.ObjectMeta.Name, sshConfig, keyRing, options.Dir) + // look for a bastion instance and use it if exists + bastionAddress := "" + for _, instance := range d.Instances { + if strings.Contains(instance.Name, "bastion") { + bastionAddress = instance.PublicAddresses[0] + } + } + dumper := dump.NewLogDumper(bastionAddress, sshConfig, keyRing, options.Dir) var additionalIPs []string var additionalPrivateIPs []string @@ -224,7 +231,7 @@ func RunToolboxDump(ctx context.Context, f commandutils.Factory, out io.Writer, } } - if err := dumper.DumpAllNodes(ctx, nodes, additionalIPs, additionalPrivateIPs); err != nil { + if err := dumper.DumpAllNodes(ctx, nodes, options.MaxNodes, additionalIPs, additionalPrivateIPs); err != nil { return fmt.Errorf("error dumping nodes: %v", err) } diff --git a/pkg/dump/dumper.go b/pkg/dump/dumper.go index d1452d55f4247..3a5e26c101b6d 100644 --- a/pkg/dump/dumper.go +++ b/pkg/dump/dumper.go @@ -34,11 +34,6 @@ import ( "k8s.io/klog/v2" ) -const ( - // MaxNodesToDump is the maximum number of nodes to dump - MaxNodesToDump = 500 -) - // logDumper gets all the nodes from a kubernetes cluster and dumps a well-known set of logs type logDumper struct { sshClientFactory sshClientFactory @@ -51,12 +46,15 @@ type logDumper struct { } // NewLogDumper is the constructor for a logDumper -func NewLogDumper(clusterName string, sshConfig *ssh.ClientConfig, keyRing agent.Agent, artifactsDir string) *logDumper { +func NewLogDumper(bastionAddress string, sshConfig *ssh.ClientConfig, keyRing agent.Agent, artifactsDir string) *logDumper { sshClientFactory := &sshClientFactoryImplementation{ - bastion: "bastion." + clusterName, keyRing: keyRing, sshConfig: sshConfig, } + if bastionAddress != "" { + log.Printf("detected a bastion instance, with the address: %s", bastionAddress) + sshClientFactory.bastion = bastionAddress + } d := &logDumper{ sshClientFactory: sshClientFactory, @@ -106,9 +104,10 @@ func NewLogDumper(clusterName string, sshConfig *ssh.ClientConfig, keyRing agent // if the IPs are not found from kubectl get nodes, then these will be dumped also. // This allows for dumping log on nodes even if they don't register as a kubernetes // node, or if a node fails to register, or if the whole cluster fails to start. -func (d *logDumper) DumpAllNodes(ctx context.Context, nodes corev1.NodeList, additionalIPs, additionalPrivateIPs []string) error { +func (d *logDumper) DumpAllNodes(ctx context.Context, nodes corev1.NodeList, maxNodesToDump int, additionalIPs, additionalPrivateIPs []string) error { var special, regular, dumped []*corev1.Node + log.Printf("starting to dump %d nodes fetched through the Kubernetes APIs", len(nodes.Items)) for i := range nodes.Items { node := &nodes.Items[i] @@ -139,8 +138,8 @@ func (d *logDumper) DumpAllNodes(ctx context.Context, nodes corev1.NodeList, add } for i := range regular { - if len(dumped) >= MaxNodesToDump { - log.Printf("stopping dumping nodes: %d nodes dumped", MaxNodesToDump) + if len(dumped) >= maxNodesToDump { + log.Printf("stopping dumping nodes: %d nodes dumped", maxNodesToDump) return nil } node := regular[i] @@ -154,8 +153,8 @@ func (d *logDumper) DumpAllNodes(ctx context.Context, nodes corev1.NodeList, add notDumped := findInstancesNotDumped(additionalIPs, dumped) for _, ip := range notDumped { - if len(dumped) >= MaxNodesToDump { - log.Printf("stopping dumping nodes: %d nodes dumped", MaxNodesToDump) + if len(dumped) >= maxNodesToDump { + log.Printf("stopping dumping nodes: %d nodes dumped", maxNodesToDump) return nil } err := d.dumpNotRegistered(ctx, ip, false) @@ -166,8 +165,8 @@ func (d *logDumper) DumpAllNodes(ctx context.Context, nodes corev1.NodeList, add notDumped = findInstancesNotDumped(additionalPrivateIPs, dumped) for _, ip := range notDumped { - if len(dumped) >= MaxNodesToDump { - log.Printf("stopping dumping nodes: %d nodes dumped", MaxNodesToDump) + if len(dumped) >= maxNodesToDump { + log.Printf("stopping dumping nodes: %d nodes dumped", maxNodesToDump) return nil } err := d.dumpNotRegistered(ctx, ip, true) diff --git a/pkg/resources/gce/dump.go b/pkg/resources/gce/dump.go index 89090a5b87714..2977dcc224fe1 100644 --- a/pkg/resources/gce/dump.go +++ b/pkg/resources/gce/dump.go @@ -63,6 +63,12 @@ func DumpManagedInstance(op *resources.DumpOperation, r *resources.Resource) err klog.Warningf("instance %q not found", instance.Instance) } else { for _, ni := range instanceDetails.NetworkInterfaces { + if ni.NetworkIP != "" { + i.PrivateAddresses = append(i.PrivateAddresses, ni.NetworkIP) + } + if ni.Ipv6Address != "" { + i.PrivateAddresses = append(i.PrivateAddresses, ni.Ipv6Address) + } for _, ac := range ni.AccessConfigs { if ac.NatIP != "" { i.PublicAddresses = append(i.PublicAddresses, ac.NatIP) diff --git a/tests/e2e/kubetest2-kops/deployer/common.go b/tests/e2e/kubetest2-kops/deployer/common.go index 230694916b6f6..aae96c6964b0b 100644 --- a/tests/e2e/kubetest2-kops/deployer/common.go +++ b/tests/e2e/kubetest2-kops/deployer/common.go @@ -100,6 +100,8 @@ func (d *deployer) initialize() error { d.SSHPublicKeyPath = publicKey } d.createBucket = true + } else if d.SSHPrivateKeyPath == "" && os.Getenv("KUBE_SSH_KEY_PATH") != "" { + d.SSHPrivateKeyPath = os.Getenv("KUBE_SSH_KEY_PATH") } } diff --git a/tests/e2e/kubetest2-kops/deployer/deployer.go b/tests/e2e/kubetest2-kops/deployer/deployer.go index 67a929bb8f845..c5787bfee0e61 100644 --- a/tests/e2e/kubetest2-kops/deployer/deployer.go +++ b/tests/e2e/kubetest2-kops/deployer/deployer.go @@ -68,6 +68,7 @@ type deployer struct { ValidationWait time.Duration `flag:"validation-wait" desc:"time to wait for newly created cluster to pass validation"` ValidationCount int `flag:"validation-count" desc:"how many times should a validation pass"` ValidationInterval time.Duration `flag:"validation-interval" desc:"time in duration to wait between validation attempts"` + MaxNodesToDump string `flag:"max-nodes-to-dump" desc:"max number of nodes to dump logs from, helpful to set when running scale tests"` TemplatePath string `flag:"template-path" desc:"The path to the manifest template used for cluster creation"` diff --git a/tests/e2e/kubetest2-kops/deployer/dumplogs.go b/tests/e2e/kubetest2-kops/deployer/dumplogs.go index 0f16e0b12e88c..ab0d72940da31 100644 --- a/tests/e2e/kubetest2-kops/deployer/dumplogs.go +++ b/tests/e2e/kubetest2-kops/deployer/dumplogs.go @@ -44,6 +44,10 @@ func (d *deployer) DumpClusterLogs() error { "--private-key", d.SSHPrivateKeyPath, "--ssh-user", d.SSHUser, } + + if d.MaxNodesToDump != "" { + args = append(args, "--max-nodes", d.MaxNodesToDump) + } klog.Info(strings.Join(args, " ")) cmd := exec.Command(args[0], args[1:]...) cmd.SetEnv(append(d.env(), "KOPS_TOOLBOX_DUMP_K8S_RESOURCES=1")...)