diff --git a/cmd/sriov/main.go b/cmd/sriov/main.go index 3ba469832..06dd05697 100644 --- a/cmd/sriov/main.go +++ b/cmd/sriov/main.go @@ -221,6 +221,14 @@ func cmdDel(args *skel.CmdArgs) error { sm := sriov.NewSriovManager() + /* ResetVFConfig resets a VF administratively. We must run ResetVFConfig + before ReleaseVF because some drivers will error out if we try to + reset netdev VF with trust off. So, reset VF MAC address via PF first. + */ + if err := sm.ResetVFConfig(netConf); err != nil { + return fmt.Errorf("cmdDel() error reseting VF: %q", err) + } + if !netConf.DPDKMode { netns, err := ns.GetNS(args.Netns) if err != nil { @@ -243,10 +251,6 @@ func cmdDel(args *skel.CmdArgs) error { } } - if err := sm.ResetVFConfig(netConf); err != nil { - return fmt.Errorf("cmdDel() error reseting VF: %q", err) - } - // Mark the pci address as released allocator := utils.NewPCIAllocator(config.DefaultCNIDir) if err = allocator.DeleteAllocatedPCI(netConf.DeviceID); err != nil { diff --git a/pkg/sriov/sriov.go b/pkg/sriov/sriov.go index 70160bdae..deaad0b98 100644 --- a/pkg/sriov/sriov.go +++ b/pkg/sriov/sriov.go @@ -3,6 +3,7 @@ package sriov import ( "fmt" "net" + "time" "github.com/containernetworking/plugins/pkg/ns" @@ -91,7 +92,20 @@ func (s *sriovManager) SetupVF(conf *sriovtypes.NetConf, podifName string, cid s // Save the original effective MAC address before overriding it conf.OrigVfState.EffectiveMAC = linkObj.Attrs().HardwareAddr.String() - if err = s.nLink.LinkSetHardwareAddr(linkObj, hwaddr); err != nil { + /* Some NIC drivers (i.e. i40e/iavf) set VF MAC address asynchronously + via PF. This means that while the PF could already show the VF with + the desired MAC address, the netdev VF may still have the original + one. If in this window we issue a netdev VF MAC address set, the driver + will return an error and the pod will fail to create. + Other NICs (Mellanox) require explicit netdev VF MAC address so we + cannot skip this part. + Retry up to 5 times; wait 200 milliseconds between retries + */ + err = utils.Retry(5, 200*time.Millisecond, func() error { + return s.nLink.LinkSetHardwareAddr(linkObj, hwaddr) + }) + + if err != nil { return "", fmt.Errorf("failed to set netlink MAC address to %s: %v", hwaddr, err) } macAddress = conf.MAC @@ -332,7 +346,20 @@ func (s *sriovManager) ResetVFConfig(conf *sriovtypes.NetConf) error { if err != nil { return fmt.Errorf("failed to parse original administrative MAC address %s: %v", conf.OrigVfState.AdminMAC, err) } - if err = s.nLink.LinkSetVfHardwareAddr(pfLink, conf.VFID, hwaddr); err != nil { + + /* Some NIC drivers (i.e. i40e/iavf) set VF MAC address asynchronously + via PF. This means that while the PF could already show the VF with + the desired MAC address, the netdev VF may still have the original + one. If in this window we issue a netdev VF MAC address set, the driver + will return an error and the pod will fail to create. + Other NICs (Mellanox) require explicit netdev VF MAC address so we + cannot skip this part. + Retry up to 5 times; wait 200 milliseconds between retries + */ + err = utils.Retry(5, 200*time.Millisecond, func() error { + return s.nLink.LinkSetVfHardwareAddr(pfLink, conf.VFID, hwaddr) + }) + if err != nil { return fmt.Errorf("failed to restore original administrative MAC address %s: %v", hwaddr, err) } } diff --git a/pkg/utils/utils.go b/pkg/utils/utils.go index 9d4ccae7b..24178da19 100644 --- a/pkg/utils/utils.go +++ b/pkg/utils/utils.go @@ -9,6 +9,7 @@ import ( "path/filepath" "strconv" "strings" + "time" ) var ( @@ -314,3 +315,16 @@ func IsIPv4(ip net.IP) bool { func IsIPv6(ip net.IP) bool { return ip.To4() == nil && ip.To16() != nil } + +// Retry retries a given function until no return error; times out after retries*sleep +func Retry(retries int, sleep time.Duration, f func() error) error { + err := error(nil) + for retry := 0; retry < retries; retry++ { + err = f() + if err == nil { + return nil + } + time.Sleep(sleep) + } + return err +} diff --git a/pkg/utils/utils_test.go b/pkg/utils/utils_test.go index 3896e573f..0bb159030 100644 --- a/pkg/utils/utils_test.go +++ b/pkg/utils/utils_test.go @@ -1,6 +1,9 @@ package utils import ( + "errors" + "time" + . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" ) @@ -78,4 +81,14 @@ var _ = Describe("Utils", func() { Expect(err).To(HaveOccurred(), "Not existing VF should return an error") }) }) + Context("Checking Retry functon", func() { + It("Assuming calling function fails", func() { + err := Retry(5, 10*time.Millisecond, func() error { return errors.New("") }) + Expect(err).To((HaveOccurred()), "Retry should return an error") + }) + It("Assuming calling function does not fail", func() { + err := Retry(5, 10*time.Millisecond, func() error { return nil }) + Expect(err).NotTo((HaveOccurred()), "Retry should not return an error") + }) + }) })