Skip to content

Commit

Permalink
Add pci address lock for allocated devices (k8snetworkplumbingwg#220)
Browse files Browse the repository at this point in the history
* Create allocation interface and implementation.
This is needed to lock the allocation of the same PCI address until the cmdDel is called or the kernel remove the network namespace.

Signed-off-by: Sebastian Sch <sebassch@gmail.com>

* Use the allocator interface to prevent allocation of still in used pci addresses

Fixes: k8snetworkplumbingwg#219

Signed-off-by: Sebastian Sch <sebassch@gmail.com>

Signed-off-by: Sebastian Sch <sebassch@gmail.com>
  • Loading branch information
SchSeba authored and cgoncalves committed Dec 7, 2022
1 parent d4c7caf commit 90509b6
Show file tree
Hide file tree
Showing 7 changed files with 302 additions and 2 deletions.
12 changes: 12 additions & 0 deletions cmd/sriov/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,12 @@ func cmdAdd(args *skel.CmdArgs) error {
return fmt.Errorf("error saving NetConf %q", err)
}

allocator := utils.NewPCIAllocator(config.DefaultCNIDir)
// Mark the pci address as in used
if err = allocator.SaveAllocatedPCI(netConf.DeviceID, args.Netns); err != nil {
return fmt.Errorf("error saving the pci allocation for vf pci address %s: %v", netConf.DeviceID, err)
}

return types.PrintResult(result, netConf.CNIVersion)
}

Expand Down Expand Up @@ -231,6 +237,12 @@ func cmdDel(args *skel.CmdArgs) error {
return fmt.Errorf("cmdDel() error reseting VF: %q", err)
}

// Mark the pci address as released
allocator := utils.NewPCIAllocator(config.DefaultCNIDir)
if err = allocator.DeleteAllocatedPCI(netConf.DeviceID); err != nil {
return fmt.Errorf("error cleaning the pci allocation for vf pci address %s: %v", netConf.DeviceID, err)
}

return nil
}

Expand Down
14 changes: 14 additions & 0 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,20 @@ func LoadConf(bytes []byte) (*sriovtypes.NetConf, error) {
return nil, fmt.Errorf("LoadConf(): VF pci addr is required")
}

allocator := utils.NewPCIAllocator(DefaultCNIDir)
// Check if the device is already allocated.
// This is to prevent issues where kubelet request to delete a pod and in the same time a new pod using the same
// vf is started. we can have an issue where the cmdDel of the old pod is called AFTER the cmdAdd of the new one
// This will block the new pod creation until the cmdDel is done.
isAllocated, err := allocator.IsAllocated(n.DeviceID)
if err != nil {
return n, err
}

if isAllocated {
return n, fmt.Errorf("pci address %s is already allocated", n.DeviceID)
}

// Assuming VF is netdev interface; Get interface name(s)
hostIFNames, err := utils.GetVFLinkNames(n.DeviceID)
if err != nil || hostIFNames == "" {
Expand Down
46 changes: 46 additions & 0 deletions pkg/config/config_test.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
package config

import (
"github.com/containernetworking/plugins/pkg/testutils"
"github.com/k8snetworkplumbingwg/sriov-cni/pkg/utils"
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
"os"
)

var _ = Describe("Config", func() {
Expand Down Expand Up @@ -61,6 +64,49 @@ var _ = Describe("Config", func() {
_, err := LoadConf(conf)
Expect(err).To(HaveOccurred())
})

It("Assuming device is allocated", func() {
conf := []byte(`{
"name": "mynet",
"type": "sriov",
"deviceID": "0000:af:06.1",
"vf": 0,
"ipam": {
"type": "host-local",
"subnet": "10.55.206.0/26",
"routes": [
{ "dst": "0.0.0.0/0" }
],
"gateway": "10.55.206.1"
}
}`)

tmpdir, err := os.MkdirTemp("/tmp", "sriovplugin-testfiles-")
Expect(err).ToNot(HaveOccurred())
originCNIDir := DefaultCNIDir
DefaultCNIDir = tmpdir
defer func() {
DefaultCNIDir = originCNIDir
}()

targetNetNS, err := testutils.NewNS()
Expect(err).NotTo(HaveOccurred())
defer func() {
if targetNetNS != nil {
targetNetNS.Close()
err = testutils.UnmountNS(targetNetNS)
}
}()

allocator := utils.NewPCIAllocator(tmpdir)
err = allocator.SaveAllocatedPCI("0000:af:06.1", targetNetNS.Path())
Expect(err).ToNot(HaveOccurred())

_, err = LoadConf(conf)
Expect(err).To(HaveOccurred())
Expect(err.Error()).To(ContainSubstring("pci address 0000:af:06.1 is already allocated"))
})

})
Context("Checking getVfInfo function", func() {
It("Assuming existing PF", func() {
Expand Down
81 changes: 81 additions & 0 deletions pkg/utils/mocks/pci_allocator_mock.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

87 changes: 87 additions & 0 deletions pkg/utils/pci_allocator.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
package utils

import (
"fmt"
"github.com/containernetworking/plugins/pkg/ns"
"os"
"path/filepath"
)

type PCIAllocation interface {
SaveAllocatedPCI(string, string) error
DeleteAllocatedPCI(string) error
IsAllocated(string) error
}

type PCIAllocator struct {
dataDir string
}

// NewPCIAllocator returns a new PCI allocator
// it will use the <dataDir>/pci folder to store the information about allocated PCI addresses
func NewPCIAllocator(dataDir string) *PCIAllocator {
return &PCIAllocator{dataDir: filepath.Join(dataDir, "pci")}
}

// SaveAllocatedPCI creates a file with the pci address as a name and the network namespace as the content
// return error if the file was not created
func (p *PCIAllocator) SaveAllocatedPCI(pciAddress, ns string) error {
if err := os.MkdirAll(p.dataDir, 0600); err != nil {
return fmt.Errorf("failed to create the sriov data directory(%q): %v", p.dataDir, err)
}

path := filepath.Join(p.dataDir, pciAddress)
err := os.WriteFile(path, []byte(ns), 0600)
if err != nil {
return fmt.Errorf("failed to write used PCI address lock file in the path(%q): %v", path, err)
}

return err
}

// DeleteAllocatedPCI Remove the allocated PCI file
// return error if the file doesn't exist
func (p *PCIAllocator) DeleteAllocatedPCI(pciAddress string) error {
path := filepath.Join(p.dataDir, pciAddress)
if err := os.Remove(path); err != nil {
return fmt.Errorf("error removing PCI address lock file %s: %v", path, err)
}
return nil
}

// IsAllocated checks if the PCI address file exist
// if it exists we also check the network namespace still exist if not we delete the allocation
// The function will return an error if the pci is still allocated to a running pod
func (p *PCIAllocator) IsAllocated(pciAddress string) (bool, error) {
path := filepath.Join(p.dataDir, pciAddress)
_, err := os.Stat(path)
if err != nil {
if os.IsNotExist(err) {
return false, nil
}

return false, fmt.Errorf("failed to check for pci address file for %s: %v", path, err)
}

dat, err := os.ReadFile(path)
if err != nil {
return false, fmt.Errorf("failed to read for pci address file for %s: %v", path, err)
}

// To prevent a locking of a PCI address for every pciAddress file we also add the netns path where it's been used
// This way if for some reason the cmdDel command was not called but the pod namespace doesn't exist anymore
// we release the PCI address
networkNamespace, err := ns.GetNS(string(dat))
if err != nil {
err = p.DeleteAllocatedPCI(pciAddress)
if err != nil {
return false, fmt.Errorf("error deleting the pci allocation for vf pci address %s: %v", pciAddress, err)
}

return false, nil
}

// Close the network namespace
networkNamespace.Close()
return true, nil
}
60 changes: 60 additions & 0 deletions pkg/utils/pci_allocator_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
package utils

import (
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"

"github.com/containernetworking/plugins/pkg/ns"
"github.com/containernetworking/plugins/pkg/testutils"
)

var _ = Describe("PCIAllocator", func() {
var targetNetNS ns.NetNS
var err error

AfterEach(func() {
if targetNetNS != nil {
targetNetNS.Close()
err = testutils.UnmountNS(targetNetNS)
}
})

Context("IsAllocated", func() {
It("Assuming is not allocated", func() {
allocator := NewPCIAllocator(ts.dirRoot)
isAllocated, err := allocator.IsAllocated("0000:af:00.1")
Expect(err).ToNot(HaveOccurred())
Expect(isAllocated).To(BeFalse())
})

It("Assuming is allocated and namespace exist", func() {
targetNetNS, err = testutils.NewNS()
Expect(err).NotTo(HaveOccurred())
allocator := NewPCIAllocator(ts.dirRoot)

err = allocator.SaveAllocatedPCI("0000:af:00.1", targetNetNS.Path())
Expect(err).ToNot(HaveOccurred())

isAllocated, err := allocator.IsAllocated("0000:af:00.1")
Expect(err).ToNot(HaveOccurred())
Expect(isAllocated).To(BeTrue())
})

It("Assuming is allocated and namespace doesn't exist", func() {
targetNetNS, err = testutils.NewNS()
Expect(err).NotTo(HaveOccurred())

allocator := NewPCIAllocator(ts.dirRoot)
err = allocator.SaveAllocatedPCI("0000:af:00.1", targetNetNS.Path())
Expect(err).ToNot(HaveOccurred())
err = targetNetNS.Close()
Expect(err).ToNot(HaveOccurred())
err = testutils.UnmountNS(targetNetNS)
Expect(err).ToNot(HaveOccurred())

isAllocated, err := allocator.IsAllocated("0000:af:00.1")
Expect(err).ToNot(HaveOccurred())
Expect(isAllocated).To(BeFalse())
})
})
})
4 changes: 2 additions & 2 deletions pkg/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ func GetPfName(vf string) (string, error) {
return strings.TrimSpace(files[0].Name()), nil
}

// GetPciAddress takes in a interface(ifName) and VF id and returns returns its pci addr as string
// GetPciAddress takes in a interface(ifName) and VF id and returns its pci addr as string
func GetPciAddress(ifName string, vf int) (string, error) {
var pciaddr string
vfDir := filepath.Join(NetDirectory, ifName, "device", fmt.Sprintf("virtfn%d", vf))
Expand Down Expand Up @@ -282,7 +282,7 @@ func ReadScratchNetConf(cRefPath string) ([]byte, error) {
// CleanCachedNetConf removed cached NetConf from disk
func CleanCachedNetConf(cRefPath string) error {
if err := os.Remove(cRefPath); err != nil {
return fmt.Errorf("error removing NetConf file %s: %q", cRefPath, err)
return fmt.Errorf("error removing NetConf file %s: %v", cRefPath, err)
}
return nil
}
Expand Down

0 comments on commit 90509b6

Please sign in to comment.