Skip to content

Commit c5b59df

Browse files
committed
fix: wait for devices to be discovered before probing filesystems
With Talos 1.7+, more storage drivers are split as modules, so the devices might not be discovered by the time platform config is going to be loaded. Explicitly wait for udevd to settle down before trying to probe a CD. Fixes #8625 Signed-off-by: Andrey Smirnov <andrey.smirnov@siderolabs.com>
1 parent 0821b9c commit c5b59df

File tree

10 files changed

+95
-14
lines changed

10 files changed

+95
-14
lines changed

internal/app/machined/pkg/runtime/v1alpha1/platform/internal/netutils/netutils.go

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import (
1818

1919
"github.com/siderolabs/talos/pkg/machinery/constants"
2020
"github.com/siderolabs/talos/pkg/machinery/resources/network"
21+
"github.com/siderolabs/talos/pkg/machinery/resources/runtime"
2122
)
2223

2324
// Wait for the network to be ready to interact with platform metadata services.
@@ -39,11 +40,21 @@ func WaitInterfaces(ctx context.Context, r state.State) error {
3940
return fmt.Errorf("error listing host interfaces: %w", err)
4041
}
4142

42-
if hostInterfaces.Len() != 0 {
43+
numPhysical := 0
44+
45+
for iter := hostInterfaces.Iterator(); iter.Next(); {
46+
iface := iter.Value()
47+
48+
if iface.TypedSpec().Physical() {
49+
numPhysical++
50+
}
51+
}
52+
53+
if numPhysical > 0 {
4354
return nil
4455
}
4556

46-
log.Printf("waiting for network interface appearse...")
57+
log.Printf("waiting for physical network interfaces to appear...")
4758

4859
interval := backoff.NextBackOff()
4960

@@ -57,6 +68,13 @@ func WaitInterfaces(ctx context.Context, r state.State) error {
5768
return nil
5869
}
5970

71+
// WaitForDevicesReady waits for devices to be ready.
72+
func WaitForDevicesReady(ctx context.Context, r state.State) error {
73+
log.Printf("waiting for devices to be ready...")
74+
75+
return runtime.NewDevicesStatusCondition(r).Wait(ctx)
76+
}
77+
6078
// RetryFetch retries fetching from metadata service.
6179
func RetryFetch(ctx context.Context, f func(ctx context.Context) (string, error)) (string, error) {
6280
var (

internal/app/machined/pkg/runtime/v1alpha1/platform/metal/metal.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ func (m *Metal) Configuration(ctx context.Context, r state.State) ([]byte, error
7676

7777
switch *option {
7878
case constants.MetalConfigISOLabel:
79-
return readConfigFromISO()
79+
return readConfigFromISO(ctx, r)
8080
default:
8181
if err := netutils.Wait(ctx, r); err != nil {
8282
return nil, err
@@ -119,7 +119,11 @@ func (m *Metal) Mode() runtime.Mode {
119119
return runtime.ModeMetal
120120
}
121121

122-
func readConfigFromISO() ([]byte, error) {
122+
func readConfigFromISO(ctx context.Context, r state.State) ([]byte, error) {
123+
if err := netutils.WaitForDevicesReady(ctx, r); err != nil {
124+
return nil, fmt.Errorf("failed to wait for devices: %w", err)
125+
}
126+
123127
dev, err := probe.GetDevWithFileSystemLabel(constants.MetalConfigISOLabel)
124128
if err != nil {
125129
return nil, fmt.Errorf("failed to find %s iso: %w", constants.MetalConfigISOLabel, err)

internal/app/machined/pkg/runtime/v1alpha1/platform/nocloud/metadata.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,12 @@ func (n *Nocloud) configFromNetwork(ctx context.Context, metaBaseURL string, r s
144144
return metaConfig, networkConfig, machineConfig, err
145145
}
146146

147-
func (n *Nocloud) configFromCD() (metaConfig []byte, networkConfig []byte, machineConfig []byte, err error) {
147+
//nolint:gocyclo
148+
func (n *Nocloud) configFromCD(ctx context.Context, r state.State) (metaConfig []byte, networkConfig []byte, machineConfig []byte, err error) {
149+
if err := netutils.WaitForDevicesReady(ctx, r); err != nil {
150+
return nil, nil, nil, fmt.Errorf("failed to wait for devices: %w", err)
151+
}
152+
148153
var dev *probe.ProbedBlockDevice
149154

150155
dev, err = probe.GetDevWithFileSystemLabel(strings.ToLower(configISOLabel))
@@ -244,7 +249,7 @@ func (n *Nocloud) acquireConfig(ctx context.Context, r state.State) (metadataCon
244249
if networkSource && metaBaseURL != "" {
245250
metadataConfigDl, metadataNetworkConfigDl, machineConfigDl, err = n.configFromNetwork(ctx, metaBaseURL, r)
246251
} else {
247-
metadataConfigDl, metadataNetworkConfigDl, machineConfigDl, err = n.configFromCD()
252+
metadataConfigDl, metadataNetworkConfigDl, machineConfigDl, err = n.configFromCD(ctx, r)
248253
}
249254

250255
metadata = &MetadataConfig{}

internal/app/machined/pkg/runtime/v1alpha1/platform/nocloud/nocloud_test.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,14 +58,20 @@ func TestParseMetadata(t *testing.T) {
5858

5959
eth0 := network.NewLinkStatus(network.NamespaceName, "eth0")
6060
eth0.TypedSpec().PermanentAddr = nethelpers.HardwareAddr{0x68, 0x05, 0xca, 0xb8, 0xf1, 0xf7}
61+
eth0.TypedSpec().Type = nethelpers.LinkEther
62+
eth0.TypedSpec().Kind = ""
6163
require.NoError(t, st.Create(context.TODO(), eth0))
6264

6365
eth1 := network.NewLinkStatus(network.NamespaceName, "eth1")
6466
eth1.TypedSpec().PermanentAddr = nethelpers.HardwareAddr{0x68, 0x05, 0xca, 0xb8, 0xf1, 0xf8}
67+
eth1.TypedSpec().Type = nethelpers.LinkEther
68+
eth1.TypedSpec().Kind = ""
6569
require.NoError(t, st.Create(context.TODO(), eth1))
6670

6771
eth2 := network.NewLinkStatus(network.NamespaceName, "eth2")
6872
eth2.TypedSpec().PermanentAddr = nethelpers.HardwareAddr{0x68, 0x05, 0xca, 0xb8, 0xf1, 0xf9}
73+
eth2.TypedSpec().Type = nethelpers.LinkEther
74+
eth2.TypedSpec().Kind = ""
6975
require.NoError(t, st.Create(context.TODO(), eth2))
7076

7177
var m nocloud.NetworkConfig

internal/app/machined/pkg/runtime/v1alpha1/platform/opennebula/metadata.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,20 @@
66
package opennebula
77

88
import (
9+
"context"
910
"fmt"
1011
"log"
1112
"os"
1213
"path/filepath"
1314
"strings"
1415

16+
"github.com/cosi-project/runtime/pkg/state"
1517
"github.com/siderolabs/go-blockdevice/blockdevice/filesystem"
1618
"github.com/siderolabs/go-blockdevice/blockdevice/probe"
1719
"golang.org/x/sys/unix"
1820

1921
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime/v1alpha1/platform/errors"
22+
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime/v1alpha1/platform/internal/netutils"
2023
)
2124

2225
const (
@@ -25,7 +28,11 @@ const (
2528
mnt = "/mnt"
2629
)
2730

28-
func (o *OpenNebula) contextFromCD() (oneContext []byte, err error) {
31+
func (o *OpenNebula) contextFromCD(ctx context.Context, r state.State) (oneContext []byte, err error) {
32+
if err := netutils.WaitForDevicesReady(ctx, r); err != nil {
33+
return nil, fmt.Errorf("failed to wait for devices: %w", err)
34+
}
35+
2936
var dev *probe.ProbedBlockDevice
3037

3138
dev, err = probe.GetDevWithFileSystemLabel(strings.ToLower(configISOLabel))

internal/app/machined/pkg/runtime/v1alpha1/platform/opennebula/opennebula.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@ func (o *OpenNebula) ParseMetadata(st state.State, oneContextPlain []byte) (*run
196196

197197
// Configuration implements the runtime.Platform interface.
198198
func (o *OpenNebula) Configuration(ctx context.Context, r state.State) (machineConfig []byte, err error) {
199-
oneContextPlain, err := o.contextFromCD()
199+
oneContextPlain, err := o.contextFromCD(ctx, r)
200200
if err != nil {
201201
return nil, err
202202
}
@@ -234,7 +234,7 @@ func (o *OpenNebula) KernelArgs(string) procfs.Parameters {
234234

235235
// NetworkConfiguration implements the runtime.Platform interface.
236236
func (o *OpenNebula) NetworkConfiguration(ctx context.Context, st state.State, ch chan<- *runtime.PlatformNetworkConfig) error {
237-
oneContext, err := o.contextFromCD()
237+
oneContext, err := o.contextFromCD(ctx, st)
238238
if stderrors.Is(err, errors.ErrNoConfigSource) {
239239
err = nil
240240
}

internal/app/machined/pkg/runtime/v1alpha1/platform/opennebula/opennebula_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// This Source Code Form is subject to the terms of the Mozilla Public
22
// License, v. 2.0. If a copy of the MPL was not distributed with this
33
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
4-
// go test -v ./internal/app/machined/pkg/runtime/v1alpha1/platform/opennebula
4+
55
package opennebula_test
66

77
import (

internal/app/machined/pkg/runtime/v1alpha1/platform/openstack/metadata.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,13 @@ import (
1212
"os"
1313
"path/filepath"
1414

15+
"github.com/cosi-project/runtime/pkg/state"
1516
"github.com/siderolabs/go-blockdevice/blockdevice/filesystem"
1617
"github.com/siderolabs/go-blockdevice/blockdevice/probe"
1718
"golang.org/x/sys/unix"
1819

1920
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime/v1alpha1/platform/errors"
21+
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime/v1alpha1/platform/internal/netutils"
2022
"github.com/siderolabs/talos/pkg/download"
2123
)
2224

@@ -108,7 +110,12 @@ func (o *Openstack) configFromNetwork(ctx context.Context) (metaConfig []byte, n
108110
return metaConfig, networkConfig, machineConfig, err
109111
}
110112

111-
func (o *Openstack) configFromCD() (metaConfig []byte, networkConfig []byte, machineConfig []byte, err error) {
113+
//nolint:gocyclo
114+
func (o *Openstack) configFromCD(ctx context.Context, r state.State) (metaConfig []byte, networkConfig []byte, machineConfig []byte, err error) {
115+
if err := netutils.WaitForDevicesReady(ctx, r); err != nil {
116+
return nil, nil, nil, fmt.Errorf("failed to wait for devices: %w", err)
117+
}
118+
112119
var dev *probe.ProbedBlockDevice
113120

114121
dev, err = probe.GetDevWithFileSystemLabel(configISOLabel)

internal/app/machined/pkg/runtime/v1alpha1/platform/openstack/openstack.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -351,7 +351,7 @@ func (o *Openstack) ParseMetadata(
351351

352352
// Configuration implements the runtime.Platform interface.
353353
func (o *Openstack) Configuration(ctx context.Context, r state.State) (machineConfig []byte, err error) {
354-
_, _, machineConfig, err = o.configFromCD()
354+
_, _, machineConfig, err = o.configFromCD(ctx, r)
355355
if err != nil {
356356
if err = netutils.Wait(ctx, r); err != nil {
357357
return nil, err
@@ -389,7 +389,7 @@ func (o *Openstack) KernelArgs(string) procfs.Parameters {
389389
func (o *Openstack) NetworkConfiguration(ctx context.Context, st state.State, ch chan<- *runtime.PlatformNetworkConfig) error {
390390
networkSource := false
391391

392-
metadataConfigDl, metadataNetworkConfigDl, _, err := o.configFromCD()
392+
metadataConfigDl, metadataNetworkConfigDl, _, err := o.configFromCD(ctx, st)
393393
if err != nil {
394394
metadataConfigDl, metadataNetworkConfigDl, _, err = o.configFromNetwork(ctx)
395395
if stderrors.Is(err, errors.ErrNoConfigSource) {

pkg/machinery/resources/runtime/condition.go

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ type ExtensionServiceConfigStatusCondition struct {
6363
serviceName string
6464
}
6565

66-
// NewExtensionServiceConfigStatusCondition builds a coondition which waits for extension service config to be available.
66+
// NewExtensionServiceConfigStatusCondition builds a condition which waits for extension service config to be available.
6767
func NewExtensionServiceConfigStatusCondition(state state.State, serviceName string) *ExtensionServiceConfigStatusCondition {
6868
return &ExtensionServiceConfigStatusCondition{
6969
state: state,
@@ -85,3 +85,37 @@ func (condition *ExtensionServiceConfigStatusCondition) Wait(ctx context.Context
8585

8686
return err
8787
}
88+
89+
// DevicesStatusCondition implements condition which waits for devices to be ready.
90+
type DevicesStatusCondition struct {
91+
state state.State
92+
}
93+
94+
// NewDevicesStatusCondition builds a condition which waits for devices to be ready.
95+
func NewDevicesStatusCondition(state state.State) *DevicesStatusCondition {
96+
return &DevicesStatusCondition{
97+
state: state,
98+
}
99+
}
100+
101+
func (condition *DevicesStatusCondition) String() string {
102+
return "devices to be ready"
103+
}
104+
105+
// Wait implements condition interface.
106+
func (condition *DevicesStatusCondition) Wait(ctx context.Context) error {
107+
_, err := condition.state.WatchFor(
108+
ctx,
109+
resource.NewMetadata(NamespaceName, DevicesStatusType, DevicesID, resource.VersionUndefined),
110+
state.WithEventTypes(state.Created, state.Updated),
111+
state.WithCondition(func(r resource.Resource) (bool, error) {
112+
if resource.IsTombstone(r) {
113+
return false, nil
114+
}
115+
116+
return r.(*DevicesStatus).TypedSpec().Ready, nil
117+
}),
118+
)
119+
120+
return err
121+
}

0 commit comments

Comments
 (0)