Skip to content

Commit b690ffe

Browse files
committed
test: improve DNS resolver test stability
Run a health check before the test, as the test depends on CoreDNS being healthy, and previous tests might disturb the cluster. Also refactor by using watch instead of retries, make pods terminate fast. Signed-off-by: Andrey Smirnov <andrey.smirnov@siderolabs.com>
1 parent 5aa0299 commit b690ffe

File tree

3 files changed

+87
-21
lines changed

3 files changed

+87
-21
lines changed

internal/integration/api/common.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
"strings"
1212
"time"
1313

14+
"github.com/siderolabs/go-pointer"
1415
corev1 "k8s.io/api/core/v1"
1516
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1617

@@ -103,6 +104,7 @@ file locks (-x) unlimited
103104
},
104105
},
105106
},
107+
TerminationGracePeriodSeconds: pointer.To[int64](0),
106108
},
107109
}, metav1.CreateOptions{})
108110

@@ -122,6 +124,11 @@ file locks (-x) unlimited
122124

123125
// TestDNSResolver verifies that external DNS resolving works from a pod.
124126
func (suite *CommonSuite) TestDNSResolver() {
127+
if suite.Cluster != nil {
128+
// cluster should be healthy for kube-dns resolving to work
129+
suite.AssertClusterHealthy(suite.ctx)
130+
}
131+
125132
const (
126133
namespace = "default"
127134
pod = "dns-test"
@@ -143,6 +150,7 @@ func (suite *CommonSuite) TestDNSResolver() {
143150
},
144151
},
145152
},
153+
TerminationGracePeriodSeconds: pointer.To[int64](0),
146154
},
147155
}, metav1.CreateOptions{})
148156

@@ -151,7 +159,7 @@ func (suite *CommonSuite) TestDNSResolver() {
151159
defer suite.Clientset.CoreV1().Pods(namespace).Delete(suite.ctx, pod, metav1.DeleteOptions{}) //nolint:errcheck
152160

153161
// wait for the pod to be ready
154-
suite.Require().NoError(suite.WaitForPodToBeRunning(suite.ctx, 10*time.Minute, namespace, pod))
162+
suite.Require().NoError(suite.WaitForPodToBeRunning(suite.ctx, time.Minute, namespace, pod))
155163

156164
stdout, stderr, err := suite.ExecuteCommandInPod(suite.ctx, namespace, pod, "wget https://www.google.com/")
157165
suite.Require().NoError(err)

internal/integration/base/api.go

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -58,17 +58,12 @@ func (apiSuite *APISuite) SetupSuite() {
5858
apiSuite.Talosconfig, err = clientconfig.Open(apiSuite.TalosConfig)
5959
apiSuite.Require().NoError(err)
6060

61-
opts := []client.OptionFunc{
62-
client.WithConfig(apiSuite.Talosconfig),
63-
}
64-
6561
if apiSuite.Endpoint != "" {
66-
opts = append(opts, client.WithEndpoints(apiSuite.Endpoint))
62+
apiSuite.Client = apiSuite.GetClientWithEndpoints(apiSuite.Endpoint)
63+
} else {
64+
apiSuite.Client = apiSuite.GetClientWithEndpoints()
6765
}
6866

69-
apiSuite.Client, err = client.New(context.TODO(), opts...)
70-
apiSuite.Require().NoError(err)
71-
7267
// clear any connection refused errors left after the previous tests
7368
nodes := apiSuite.DiscoverNodeInternalIPs(context.TODO())
7469

@@ -78,6 +73,19 @@ func (apiSuite *APISuite) SetupSuite() {
7873
}
7974
}
8075

76+
// GetClientWithEndpoints returns Talos API client with provided endpoints.
77+
func (apiSuite *APISuite) GetClientWithEndpoints(endpoints ...string) *client.Client {
78+
opts := []client.OptionFunc{
79+
client.WithConfig(apiSuite.Talosconfig),
80+
client.WithEndpoints(endpoints...),
81+
}
82+
83+
cli, err := client.New(context.TODO(), opts...)
84+
apiSuite.Require().NoError(err)
85+
86+
return cli
87+
}
88+
8189
// DiscoverNodes provides list of Talos nodes in the cluster.
8290
//
8391
// As there's no way to provide this functionality via Talos API, it works the following way:
@@ -590,6 +598,9 @@ func (apiSuite *APISuite) ResetNode(ctx context.Context, node string, resetSpec
590598

591599
nodeCtx := client.WithNode(ctx, node)
592600

601+
nodeClient := apiSuite.GetClientWithEndpoints(node)
602+
defer nodeClient.Close() //nolint:errcheck
603+
593604
// any reset should lead to a reboot, so read boot_id before reboot
594605
bootIDBefore, err := apiSuite.ReadBootID(nodeCtx)
595606
apiSuite.Require().NoError(err)
@@ -612,15 +623,15 @@ func (apiSuite *APISuite) ResetNode(ctx context.Context, node string, resetSpec
612623
preReset, err := apiSuite.HashKubeletCert(ctx, node)
613624
apiSuite.Require().NoError(err)
614625

615-
resp, err := apiSuite.Client.ResetGenericWithResponse(nodeCtx, resetSpec)
626+
resp, err := nodeClient.ResetGenericWithResponse(nodeCtx, resetSpec)
616627
apiSuite.Require().NoError(err)
617628

618629
actorID := resp.Messages[0].ActorId
619630

620631
eventCh := make(chan client.EventResult)
621632

622633
// watch for events
623-
apiSuite.Require().NoError(apiSuite.Client.EventsWatchV2(nodeCtx, eventCh, client.WithActorID(actorID), client.WithTailEvents(-1)))
634+
apiSuite.Require().NoError(nodeClient.EventsWatchV2(nodeCtx, eventCh, client.WithActorID(actorID), client.WithTailEvents(-1)))
624635

625636
waitTimer := time.NewTimer(5 * time.Minute)
626637
defer waitTimer.Stop()

internal/integration/base/k8s.go

Lines changed: 57 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -195,20 +195,67 @@ func (k8sSuite *K8sSuite) WaitForEventExists(ctx context.Context, ns string, che
195195

196196
// WaitForPodToBeRunning waits for the pod with the given namespace and name to be running.
197197
func (k8sSuite *K8sSuite) WaitForPodToBeRunning(ctx context.Context, timeout time.Duration, namespace, podName string) error {
198-
return retry.Constant(timeout, retry.WithUnits(time.Second*10)).Retry(
199-
func() error {
200-
pod, err := k8sSuite.Clientset.CoreV1().Pods(namespace).Get(ctx, podName, metav1.GetOptions{})
201-
if err != nil {
202-
return retry.ExpectedErrorf("error getting pod: %s", err)
198+
ctx, cancel := context.WithTimeout(ctx, timeout)
199+
defer cancel()
200+
201+
watcher, err := k8sSuite.Clientset.CoreV1().Pods(namespace).Watch(ctx, metav1.ListOptions{
202+
FieldSelector: fields.OneTermEqualSelector("metadata.name", podName).String(),
203+
})
204+
if err != nil {
205+
return err
206+
}
207+
208+
defer watcher.Stop()
209+
210+
for {
211+
select {
212+
case <-ctx.Done():
213+
return ctx.Err()
214+
case event := <-watcher.ResultChan():
215+
if event.Type == watch.Error {
216+
return fmt.Errorf("error watching pod: %v", event.Object)
203217
}
204218

205-
if pod.Status.Phase != corev1.PodRunning {
206-
return retry.ExpectedErrorf("pod is not running yet: %s", pod.Status.Phase)
219+
pod, ok := event.Object.(*corev1.Pod)
220+
if !ok {
221+
continue
207222
}
208223

209-
return nil
210-
},
211-
)
224+
if pod.Name == podName && pod.Status.Phase == corev1.PodRunning {
225+
return nil
226+
}
227+
}
228+
}
229+
}
230+
231+
// WaitForPodToBeDeleted waits for the pod with the given namespace and name to be deleted.
232+
func (k8sSuite *K8sSuite) WaitForPodToBeDeleted(ctx context.Context, timeout time.Duration, namespace, podName string) error {
233+
ctx, cancel := context.WithTimeout(ctx, timeout)
234+
defer cancel()
235+
236+
watcher, err := k8sSuite.Clientset.CoreV1().Pods(namespace).Watch(ctx, metav1.ListOptions{
237+
FieldSelector: fields.OneTermEqualSelector("metadata.name", podName).String(),
238+
})
239+
if err != nil {
240+
return err
241+
}
242+
243+
defer watcher.Stop()
244+
245+
for {
246+
select {
247+
case <-ctx.Done():
248+
return ctx.Err()
249+
case event := <-watcher.ResultChan():
250+
if event.Type == watch.Deleted {
251+
return nil
252+
}
253+
254+
if event.Type == watch.Error {
255+
return fmt.Errorf("error watching pod: %v", event.Object)
256+
}
257+
}
258+
}
212259
}
213260

214261
// ExecuteCommandInPod executes the given command in the pod with the given namespace and name.

0 commit comments

Comments
 (0)