Skip to content

Commit 4dd0aa7

Browse files
committed
feat: implement PCI device bus enumeration
Fixes #8826 From the QEMU VM: ```shell $ talosctl -n 172.20.0.5 get pcidevice NODE NAMESPACE TYPE ID VERSION CLASS SUBCLASS VENDOR PRODUCT 172.20.0.5 hardware PCIDevice 0000:00:00.0 1 Bridge Host bridge Intel Corporation 82G33/G31/P35/P31 Express DRAM Controller 172.20.0.5 hardware PCIDevice 0000:00:01.0 1 Display controller VGA compatible controller 172.20.0.5 hardware PCIDevice 0000:00:02.0 1 Network controller Ethernet controller Red Hat, Inc. Virtio network device 172.20.0.5 hardware PCIDevice 0000:00:03.0 1 Unclassified device Red Hat, Inc. Virtio RNG 172.20.0.5 hardware PCIDevice 0000:00:04.0 1 Unclassified device Red Hat, Inc. Virtio memory balloon 172.20.0.5 hardware PCIDevice 0000:00:05.0 1 Communication controller Communication controller Red Hat, Inc. Virtio console 172.20.0.5 hardware PCIDevice 0000:00:06.0 1 Generic system peripheral System peripheral Intel Corporation 6300ESB Watchdog Timer 172.20.0.5 hardware PCIDevice 0000:00:07.0 1 Mass storage controller SCSI storage controller Red Hat, Inc. Virtio block device 172.20.0.5 hardware PCIDevice 0000:00:1f.0 1 Bridge ISA bridge Intel Corporation 82801IB (ICH9) LPC Interface Controller 172.20.0.5 hardware PCIDevice 0000:00:1f.2 1 Mass storage controller SATA controller Intel Corporation 82801IR/IO/IH (ICH9R/DO/DH) 6 port SATA Controller [AHCI mode] 172.20.0.5 hardware PCIDevice 0000:00:1f.3 1 Serial bus controller SMBus Intel Corporation 82801I (ICH9 Family) SMBus Controller ``` ```yaml node: 172.20.0.5 metadata: namespace: hardware type: PCIDevices.hardware.talos.dev id: 0000:00:1f.3 version: 1 owner: hardware.PCIDevicesController phase: running created: 2024-05-30T12:09:05Z updated: 2024-05-30T12:09:05Z spec: class: Serial bus controller subclass: SMBus vendor: Intel Corporation product: 82801I (ICH9 Family) SMBus Controller class_id: "0x0c" subclass_id: "0x05" vendor_id: "0x8086" product_id: "0x2930" ``` Signed-off-by: Andrey Smirnov <andrey.smirnov@siderolabs.com>
1 parent b0466e0 commit 4dd0aa7

File tree

15 files changed

+990
-64
lines changed

15 files changed

+990
-64
lines changed

api/resource/definitions/hardware/hardware.proto

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,18 @@ message MemoryModuleSpec {
1616
string product_name = 8;
1717
}
1818

19+
// PCIDeviceSpec represents a single processor.
20+
message PCIDeviceSpec {
21+
string class = 1;
22+
string subclass = 2;
23+
string vendor = 3;
24+
string product = 4;
25+
string class_id = 5;
26+
string subclass_id = 6;
27+
string vendor_id = 7;
28+
string product_id = 8;
29+
}
30+
1931
// ProcessorSpec represents a single processor.
2032
message ProcessorSpec {
2133
string socket = 1;

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ require (
137137
github.com/siderolabs/go-kubeconfig v0.1.0
138138
github.com/siderolabs/go-kubernetes v0.2.9
139139
github.com/siderolabs/go-loadbalancer v0.3.3
140-
github.com/siderolabs/go-pcidb v0.2.0
140+
github.com/siderolabs/go-pcidb v0.3.0
141141
github.com/siderolabs/go-pointer v1.0.0
142142
github.com/siderolabs/go-procfs v0.1.2
143143
github.com/siderolabs/go-retry v0.3.3

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -678,8 +678,8 @@ github.com/siderolabs/go-kubernetes v0.2.9 h1:EtaOcni9P0etJz+UDlIKQkgsTjCg2MWI2p
678678
github.com/siderolabs/go-kubernetes v0.2.9/go.mod h1:AAydnLZrqG+MJrKTa82AszkWIytkqwDBt7PL+bfbupI=
679679
github.com/siderolabs/go-loadbalancer v0.3.3 h1:D6ONnP9Erlh4TS6kV9L7ocnfrNYCA/58i6ZF0QweLJk=
680680
github.com/siderolabs/go-loadbalancer v0.3.3/go.mod h1:7j4Q9peU/UFuTNSFfwhKLQ028CNkyMkAdGnSi1Dm7Jw=
681-
github.com/siderolabs/go-pcidb v0.2.0 h1:ZCkF1cz6UjoEIHpP7+aeTI5BwmSxE627Jl1Wy2VZAwU=
682-
github.com/siderolabs/go-pcidb v0.2.0/go.mod h1:XstZrp8xnganxzIc3UQKfCs1fQFgYWH2lqtWeqBwRok=
681+
github.com/siderolabs/go-pcidb v0.3.0 h1:jR4w1YLNY8Cv1o5jnoQ2Q+pbxcosO2FVFrAAp1RURnw=
682+
github.com/siderolabs/go-pcidb v0.3.0/go.mod h1:4XYdmnR/o9kSzMe8dKK17wLBhPNIsisjqmU3QD1FjRk=
683683
github.com/siderolabs/go-pointer v1.0.0 h1:6TshPKep2doDQJAAtHUuHWXbca8ZfyRySjSBT/4GsMU=
684684
github.com/siderolabs/go-pointer v1.0.0/go.mod h1:HTRFUNYa3R+k0FFKNv11zgkaCLzEkWVzoYZ433P3kHc=
685685
github.com/siderolabs/go-procfs v0.1.2 h1:bDs9hHyYGE2HO1frpmUsD60yg80VIEDrx31fkbi4C8M=

hack/release.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,12 @@ machine:
4949
```
5050
5151
Please note that on running cluster you will have to kill CoreDNS pods for this change to apply.
52+
"""
53+
54+
[notes.lspci]
55+
title = "PCI Devices"
56+
description = """\
57+
A list of PCI devices can now be obtained via `PCIDevices` resource, e.g. `talosctl get pcidevices`.
5258
"""
5359

5460
[make_deps]
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
// This Source Code Form is subject to the terms of the Mozilla Public
2+
// License, v. 2.0. If a copy of the MPL was not distributed with this
3+
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
4+
5+
package hardware
6+
7+
import (
8+
"bytes"
9+
"context"
10+
"fmt"
11+
"os"
12+
"path/filepath"
13+
"strconv"
14+
15+
"github.com/cosi-project/runtime/pkg/controller"
16+
"github.com/cosi-project/runtime/pkg/safe"
17+
"github.com/siderolabs/go-pcidb/pkg/pcidb"
18+
"go.uber.org/zap"
19+
20+
runtimetalos "github.com/siderolabs/talos/internal/app/machined/pkg/runtime"
21+
"github.com/siderolabs/talos/pkg/machinery/resources/hardware"
22+
)
23+
24+
// PCIDevicesController populates PCI device information.
25+
type PCIDevicesController struct {
26+
V1Alpha1Mode runtimetalos.Mode
27+
}
28+
29+
// Name implements controller.Controller interface.
30+
func (ctrl *PCIDevicesController) Name() string {
31+
return "hardware.PCIDevicesController"
32+
}
33+
34+
// Inputs implements controller.Controller interface.
35+
func (ctrl *PCIDevicesController) Inputs() []controller.Input {
36+
return nil
37+
}
38+
39+
// Outputs implements controller.Controller interface.
40+
func (ctrl *PCIDevicesController) Outputs() []controller.Output {
41+
return []controller.Output{
42+
{
43+
Type: hardware.PCIDeviceType,
44+
Kind: controller.OutputExclusive,
45+
},
46+
}
47+
}
48+
49+
// Run implements controller.Controller interface.
50+
//
51+
//nolint:gocyclo
52+
func (ctrl *PCIDevicesController) Run(ctx context.Context, r controller.Runtime, logger *zap.Logger) error {
53+
// PCI device info doesn't make sense inside a container, so skip the controller
54+
if ctrl.V1Alpha1Mode == runtimetalos.ModeContainer {
55+
return nil
56+
}
57+
58+
// [TODO]: a single run for now, need to figure out how to trigger rescan
59+
for {
60+
select {
61+
case <-ctx.Done():
62+
return nil
63+
case <-r.EventCh():
64+
}
65+
66+
deviceIDs, err := os.ReadDir("/sys/bus/pci/devices")
67+
if err != nil {
68+
return fmt.Errorf("error scanning devices: %w", err)
69+
}
70+
71+
logger.Debug("found PCI devices", zap.Int("count", len(deviceIDs)))
72+
73+
r.StartTrackingOutputs()
74+
75+
for _, deviceID := range deviceIDs {
76+
class, err := readHexPCIInfo(deviceID.Name(), "class")
77+
if err != nil {
78+
if os.IsNotExist(err) {
79+
continue
80+
}
81+
82+
return fmt.Errorf("error parsing device %s class: %w", deviceID.Name(), err)
83+
}
84+
85+
vendor, err := readHexPCIInfo(deviceID.Name(), "vendor")
86+
if err != nil {
87+
if os.IsNotExist(err) {
88+
continue
89+
}
90+
91+
return fmt.Errorf("error parsing device %s vendor: %w", deviceID.Name(), err)
92+
}
93+
94+
product, err := readHexPCIInfo(deviceID.Name(), "device")
95+
if err != nil {
96+
if os.IsNotExist(err) {
97+
continue
98+
}
99+
100+
return fmt.Errorf("error parsing device %s product: %w", deviceID.Name(), err)
101+
}
102+
103+
classID := pcidb.Class((class >> 16) & 0xff)
104+
subclassID := pcidb.Subclass((class >> 8) & 0xff)
105+
vendorID := pcidb.Vendor(vendor)
106+
productID := pcidb.Product(product)
107+
108+
if err := safe.WriterModify(ctx, r, hardware.NewPCIDeviceInfo(deviceID.Name()), func(r *hardware.PCIDevice) error {
109+
r.TypedSpec().ClassID = fmt.Sprintf("0x%02x", classID)
110+
r.TypedSpec().SubclassID = fmt.Sprintf("0x%02x", subclassID)
111+
r.TypedSpec().VendorID = fmt.Sprintf("0x%04x", vendorID)
112+
r.TypedSpec().ProductID = fmt.Sprintf("0x%04x", productID)
113+
114+
r.TypedSpec().Class, _ = pcidb.LookupClass(classID)
115+
r.TypedSpec().Subclass, _ = pcidb.LookupSubclass(classID, subclassID)
116+
r.TypedSpec().Vendor, _ = pcidb.LookupVendor(vendorID)
117+
r.TypedSpec().Product, _ = pcidb.LookupProduct(vendorID, productID)
118+
119+
return nil
120+
}); err != nil {
121+
return fmt.Errorf("error modifying output resource: %w", err)
122+
}
123+
}
124+
125+
if err = safe.CleanupOutputs[*hardware.PCIDevice](ctx, r); err != nil {
126+
return err
127+
}
128+
}
129+
}
130+
131+
func readHexPCIInfo(deviceID, info string) (uint64, error) {
132+
contents, err := os.ReadFile(filepath.Join("/sys/bus/pci/devices", deviceID, info))
133+
if err != nil {
134+
return 0, err
135+
}
136+
137+
return strconv.ParseUint(string(bytes.TrimSpace(contents)), 0, 64)
138+
}

internal/app/machined/pkg/runtime/v1alpha2/v1alpha2_controller.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,9 @@ func (ctrl *Controller) Run(ctx context.Context, drainer *runtime.Drainer) error
133133
EtcPath: "/etc",
134134
ShadowPath: constants.SystemEtcPath,
135135
},
136+
&hardware.PCIDevicesController{
137+
V1Alpha1Mode: ctrl.v1alpha1Runtime.State().Platform().Mode(),
138+
},
136139
&hardware.SystemInfoController{
137140
V1Alpha1Mode: ctrl.v1alpha1Runtime.State().Platform().Mode(),
138141
},

internal/app/machined/pkg/runtime/v1alpha2/v1alpha2_state.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,8 +111,9 @@ func NewState() (*State, error) {
111111
&etcd.Member{},
112112
&files.EtcFileSpec{},
113113
&files.EtcFileStatus{},
114-
&hardware.Processor{},
115114
&hardware.MemoryModule{},
115+
&hardware.PCIDevice{},
116+
&hardware.Processor{},
116117
&hardware.SystemInformation{},
117118
&k8s.AdmissionControlConfig{},
118119
&k8s.AuditPolicyConfig{},

internal/integration/api/hardware.go

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
// This Source Code Form is subject to the terms of the Mozilla Public
2+
// License, v. 2.0. If a copy of the MPL was not distributed with this
3+
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
4+
5+
//go:build integration_api
6+
7+
package api
8+
9+
import (
10+
"context"
11+
"time"
12+
13+
"github.com/cosi-project/runtime/pkg/resource"
14+
"github.com/cosi-project/runtime/pkg/safe"
15+
"github.com/google/uuid"
16+
17+
"github.com/siderolabs/talos/internal/integration/base"
18+
"github.com/siderolabs/talos/pkg/machinery/client"
19+
"github.com/siderolabs/talos/pkg/machinery/resources/hardware"
20+
)
21+
22+
// HardwareSuite ...
23+
type HardwareSuite struct {
24+
base.APISuite
25+
26+
ctx context.Context //nolint:containedctx
27+
ctxCancel context.CancelFunc
28+
}
29+
30+
// SuiteName ...
31+
func (suite *HardwareSuite) SuiteName() string {
32+
return "api.HardwareSuite"
33+
}
34+
35+
// SetupTest ...
36+
func (suite *HardwareSuite) SetupTest() {
37+
if !suite.Capabilities().RunsTalosKernel {
38+
suite.T().Skipf("doesn't run Talos kernel, skipping")
39+
}
40+
41+
suite.ctx, suite.ctxCancel = context.WithTimeout(context.Background(), 15*time.Second)
42+
}
43+
44+
// TearDownTest ...
45+
func (suite *HardwareSuite) TearDownTest() {
46+
if suite.ctxCancel != nil {
47+
suite.ctxCancel()
48+
}
49+
}
50+
51+
// TestSystemInformation tests that SystemInformation is populated.
52+
func (suite *HardwareSuite) TestSystemInformation() {
53+
node := suite.RandomDiscoveredNodeInternalIP()
54+
55+
sysInfo, err := safe.StateGetByID[*hardware.SystemInformation](client.WithNode(suite.ctx, node), suite.Client.COSI, hardware.SystemInformationID)
56+
suite.Require().NoError(err)
57+
58+
suite.Assert().NotEmpty(sysInfo.TypedSpec().UUID)
59+
suite.Assert().NotEqual((uuid.UUID{}).String(), sysInfo.TypedSpec().UUID)
60+
}
61+
62+
// TestHardwareInfo tests that hardware info is populated.
63+
func (suite *HardwareSuite) TestHardwareInfo() {
64+
node := suite.RandomDiscoveredNodeInternalIP()
65+
66+
for _, resourceType := range []resource.Type{
67+
hardware.MemoryModuleType,
68+
hardware.ProcessorType,
69+
hardware.PCIDeviceType,
70+
} {
71+
items, err := suite.Client.COSI.List(client.WithNode(suite.ctx, node), resource.NewMetadata(hardware.NamespaceName, resourceType, "", resource.VersionUndefined))
72+
suite.Require().NoError(err)
73+
74+
suite.Assert().NotEmpty(items.Items, "resource type %s is not populated", resourceType)
75+
}
76+
}
77+
78+
func init() {
79+
allSuites = append(allSuites, new(HardwareSuite))
80+
}

0 commit comments

Comments
 (0)