Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add pci address lock for allocated devices #220

Merged
merged 2 commits into from
Nov 17, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions cmd/sriov/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,12 @@ func cmdAdd(args *skel.CmdArgs) error {
return fmt.Errorf("error saving NetConf %q", err)
}

allocator := utils.NewPCIAllocator(config.DefaultCNIDir)
// Mark the pci address as in used
if err = allocator.SaveAllocatedPCI(netConf.DeviceID, args.Netns); err != nil {
return fmt.Errorf("error saving the pci allocation for vf pci address %s: %v", netConf.DeviceID, err)
}

return types.PrintResult(result, netConf.CNIVersion)
}

Expand Down Expand Up @@ -231,6 +237,12 @@ func cmdDel(args *skel.CmdArgs) error {
return fmt.Errorf("cmdDel() error reseting VF: %q", err)
}

// Mark the pci address as released
adrianchiris marked this conversation as resolved.
Show resolved Hide resolved
allocator := utils.NewPCIAllocator(config.DefaultCNIDir)
if err = allocator.DeleteAllocatedPCI(netConf.DeviceID); err != nil {
return fmt.Errorf("error cleaning the pci allocation for vf pci address %s: %v", netConf.DeviceID, err)
}

return nil
}

Expand Down
14 changes: 14 additions & 0 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,20 @@ func LoadConf(bytes []byte) (*sriovtypes.NetConf, error) {
return nil, fmt.Errorf("LoadConf(): VF pci addr is required")
}

allocator := utils.NewPCIAllocator(DefaultCNIDir)
SchSeba marked this conversation as resolved.
Show resolved Hide resolved
// Check if the device is already allocated.
// This is to prevent issues where kubelet request to delete a pod and in the same time a new pod using the same
// vf is started. we can have an issue where the cmdDel of the old pod is called AFTER the cmdAdd of the new one
// This will block the new pod creation until the cmdDel is done.
isAllocated, err := allocator.IsAllocated(n.DeviceID)
if err != nil {
return n, err
}

if isAllocated {
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I moved the verification from the main to here so we produce a better error when a PCI address is still assign.
instead of this error

Failed to create pod sandbox: rpc error: code = Unknown desc = failed to create pod network sandbox k8s_client_seba_65fe3641-ebd8-4f8b-9b17-785f07b3ad3b_0(0140bb9890307e2f3f8804a06dd0f70a0970f7b8ee1254227eb5c0043ddf2f77): error adding pod seba_client to CNI network "multus-cni-network": plugin type="multus" name="multus-cni-network" failed (add): [seba/client/65fe3641-ebd8-4f8b-9b17-785f07b3ad3b:sriov-network-2]: error adding container to network "sriov-network-2": SRIOV-CNI failed to load netconf: LoadConf(): the VF 0000:d8:0b.0 does not have a interface name or a dpdk driver

we will return error that the PCI is still allocated

return n, fmt.Errorf("pci address %s is already allocated", n.DeviceID)
}

// Assuming VF is netdev interface; Get interface name(s)
hostIFNames, err := utils.GetVFLinkNames(n.DeviceID)
if err != nil || hostIFNames == "" {
Expand Down
46 changes: 46 additions & 0 deletions pkg/config/config_test.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
package config

import (
"github.com/containernetworking/plugins/pkg/testutils"
"github.com/k8snetworkplumbingwg/sriov-cni/pkg/utils"
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
"os"
)

var _ = Describe("Config", func() {
Expand Down Expand Up @@ -61,6 +64,49 @@ var _ = Describe("Config", func() {
_, err := LoadConf(conf)
Expect(err).To(HaveOccurred())
})

It("Assuming device is allocated", func() {
conf := []byte(`{
"name": "mynet",
"type": "sriov",
"deviceID": "0000:af:06.1",
"vf": 0,
"ipam": {
"type": "host-local",
"subnet": "10.55.206.0/26",
"routes": [
{ "dst": "0.0.0.0/0" }
],
"gateway": "10.55.206.1"
}
}`)

tmpdir, err := os.MkdirTemp("/tmp", "sriovplugin-testfiles-")
Expect(err).ToNot(HaveOccurred())
originCNIDir := DefaultCNIDir
DefaultCNIDir = tmpdir
defer func() {
DefaultCNIDir = originCNIDir
}()

targetNetNS, err := testutils.NewNS()
Expect(err).NotTo(HaveOccurred())
defer func() {
if targetNetNS != nil {
targetNetNS.Close()
err = testutils.UnmountNS(targetNetNS)
}
}()

allocator := utils.NewPCIAllocator(tmpdir)
err = allocator.SaveAllocatedPCI("0000:af:06.1", targetNetNS.Path())
Expect(err).ToNot(HaveOccurred())

_, err = LoadConf(conf)
Expect(err).To(HaveOccurred())
Expect(err.Error()).To(ContainSubstring("pci address 0000:af:06.1 is already allocated"))
})

})
Context("Checking getVfInfo function", func() {
It("Assuming existing PF", func() {
Expand Down
81 changes: 81 additions & 0 deletions pkg/utils/mocks/pci_allocator_mock.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

87 changes: 87 additions & 0 deletions pkg/utils/pci_allocator.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
package utils

import (
"fmt"
"github.com/containernetworking/plugins/pkg/ns"
"os"
"path/filepath"
)

type PCIAllocation interface {
SaveAllocatedPCI(string, string) error
DeleteAllocatedPCI(string) error
IsAllocated(string) error
}

type PCIAllocator struct {
adrianchiris marked this conversation as resolved.
Show resolved Hide resolved
dataDir string
}

// NewPCIAllocator returns a new PCI allocator
// it will use the <dataDir>/pci folder to store the information about allocated PCI addresses
func NewPCIAllocator(dataDir string) *PCIAllocator {
return &PCIAllocator{dataDir: filepath.Join(dataDir, "pci")}
}

// SaveAllocatedPCI creates a file with the pci address as a name and the network namespace as the content
// return error if the file was not created
func (p *PCIAllocator) SaveAllocatedPCI(pciAddress, ns string) error {
if err := os.MkdirAll(p.dataDir, 0600); err != nil {
return fmt.Errorf("failed to create the sriov data directory(%q): %v", p.dataDir, err)
}

path := filepath.Join(p.dataDir, pciAddress)
err := os.WriteFile(path, []byte(ns), 0600)
if err != nil {
return fmt.Errorf("failed to write used PCI address lock file in the path(%q): %v", path, err)
}

return err
}

// DeleteAllocatedPCI Remove the allocated PCI file
// return error if the file doesn't exist
func (p *PCIAllocator) DeleteAllocatedPCI(pciAddress string) error {
path := filepath.Join(p.dataDir, pciAddress)
if err := os.Remove(path); err != nil {
return fmt.Errorf("error removing PCI address lock file %s: %v", path, err)
}
return nil
}

// IsAllocated checks if the PCI address file exist
// if it exists we also check the network namespace still exist if not we delete the allocation
// The function will return an error if the pci is still allocated to a running pod
func (p *PCIAllocator) IsAllocated(pciAddress string) (bool, error) {
path := filepath.Join(p.dataDir, pciAddress)
_, err := os.Stat(path)
if err != nil {
if os.IsNotExist(err) {
return false, nil
}

return false, fmt.Errorf("failed to check for pci address file for %s: %v", path, err)
}

dat, err := os.ReadFile(path)
if err != nil {
return false, fmt.Errorf("failed to read for pci address file for %s: %v", path, err)
}

// To prevent a locking of a PCI address for every pciAddress file we also add the netns path where it's been used
// This way if for some reason the cmdDel command was not called but the pod namespace doesn't exist anymore
// we release the PCI address
networkNamespace, err := ns.GetNS(string(dat))
if err != nil {
err = p.DeleteAllocatedPCI(pciAddress)
if err != nil {
return false, fmt.Errorf("error deleting the pci allocation for vf pci address %s: %v", pciAddress, err)
}

return false, nil
}

// Close the network namespace
networkNamespace.Close()
return true, nil
}
60 changes: 60 additions & 0 deletions pkg/utils/pci_allocator_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
package utils

import (
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"

"github.com/containernetworking/plugins/pkg/ns"
"github.com/containernetworking/plugins/pkg/testutils"
)

var _ = Describe("PCIAllocator", func() {
var targetNetNS ns.NetNS
var err error

AfterEach(func() {
if targetNetNS != nil {
targetNetNS.Close()
err = testutils.UnmountNS(targetNetNS)
}
})

Context("IsAllocated", func() {
It("Assuming is not allocated", func() {
allocator := NewPCIAllocator(ts.dirRoot)
isAllocated, err := allocator.IsAllocated("0000:af:00.1")
Expect(err).ToNot(HaveOccurred())
Expect(isAllocated).To(BeFalse())
})

It("Assuming is allocated and namespace exist", func() {
targetNetNS, err = testutils.NewNS()
Expect(err).NotTo(HaveOccurred())
allocator := NewPCIAllocator(ts.dirRoot)

err = allocator.SaveAllocatedPCI("0000:af:00.1", targetNetNS.Path())
Expect(err).ToNot(HaveOccurred())

isAllocated, err := allocator.IsAllocated("0000:af:00.1")
Expect(err).ToNot(HaveOccurred())
Expect(isAllocated).To(BeTrue())
})

It("Assuming is allocated and namespace doesn't exist", func() {
targetNetNS, err = testutils.NewNS()
Expect(err).NotTo(HaveOccurred())

allocator := NewPCIAllocator(ts.dirRoot)
err = allocator.SaveAllocatedPCI("0000:af:00.1", targetNetNS.Path())
Expect(err).ToNot(HaveOccurred())
err = targetNetNS.Close()
Expect(err).ToNot(HaveOccurred())
err = testutils.UnmountNS(targetNetNS)
Expect(err).ToNot(HaveOccurred())

isAllocated, err := allocator.IsAllocated("0000:af:00.1")
Expect(err).ToNot(HaveOccurred())
Expect(isAllocated).To(BeFalse())
})
})
})
4 changes: 2 additions & 2 deletions pkg/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ func GetPfName(vf string) (string, error) {
return strings.TrimSpace(files[0].Name()), nil
}

// GetPciAddress takes in a interface(ifName) and VF id and returns returns its pci addr as string
// GetPciAddress takes in a interface(ifName) and VF id and returns its pci addr as string
func GetPciAddress(ifName string, vf int) (string, error) {
var pciaddr string
vfDir := filepath.Join(NetDirectory, ifName, "device", fmt.Sprintf("virtfn%d", vf))
Expand Down Expand Up @@ -282,7 +282,7 @@ func ReadScratchNetConf(cRefPath string) ([]byte, error) {
// CleanCachedNetConf removed cached NetConf from disk
func CleanCachedNetConf(cRefPath string) error {
if err := os.Remove(cRefPath); err != nil {
return fmt.Errorf("error removing NetConf file %s: %q", cRefPath, err)
return fmt.Errorf("error removing NetConf file %s: %v", cRefPath, err)
}
return nil
}
Expand Down