-
Notifications
You must be signed in to change notification settings - Fork 114
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implement a rebind to default driver as a w/a #233
Merged
pliurh
merged 2 commits into
k8snetworkplumbingwg:master
from
SchSeba:add_intel_driver_wa
Feb 10, 2022
+58
−17
Merged
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -272,7 +272,7 @@ func configSriovDevice(iface *sriovnetworkv1.Interface, ifaceStatus *sriovnetwor | |
for _, addr := range vfAddrs { | ||
var group sriovnetworkv1.VfGroup | ||
i := 0 | ||
var driver string | ||
var dpdkDriver string | ||
var isRdma bool | ||
vfID, err := dputils.GetVFID(addr) | ||
for i, group = range iface.VfGroups { | ||
|
@@ -282,22 +282,50 @@ func configSriovDevice(iface *sriovnetworkv1.Interface, ifaceStatus *sriovnetwor | |
if sriovnetworkv1.IndexInRange(vfID, group.VfRange) { | ||
isRdma = group.IsRdma | ||
if sriovnetworkv1.StringInArray(group.DeviceType, DpdkDrivers) { | ||
driver = group.DeviceType | ||
dpdkDriver = group.DeviceType | ||
} | ||
break | ||
} | ||
} | ||
if strings.EqualFold(iface.LinkType, "IB") { | ||
if err = setVfGuid(addr, pfLink); err != nil { | ||
return err | ||
|
||
// only set GUID and MAC for VF with default driver | ||
// for userspace drivers like vfio we configure the vf mac using the kernel nic mac address | ||
// before we switch to the userspace driver | ||
if yes, d := hasDriver(addr); yes && !sriovnetworkv1.StringInArray(d, DpdkDrivers) { | ||
if strings.EqualFold(iface.LinkType, "IB") { | ||
if err = setVfGuid(addr, pfLink); err != nil { | ||
return err | ||
} | ||
} else { | ||
vfLink, err := vfIsReady(addr) | ||
if err != nil { | ||
glog.Errorf("configSriovDevice(): VF link is not ready for device %s %q", addr, err) | ||
err = RebindVfToDefaultDriver(addr) | ||
if err != nil { | ||
glog.Errorf("configSriovDevice(): failed to rebind VF %s %q", addr, err) | ||
return err | ||
} | ||
|
||
// Try to check the VF status again | ||
vfLink, err = vfIsReady(addr) | ||
if err != nil { | ||
glog.Errorf("configSriovDevice(): VF link is not ready for device %s %q", addr, err) | ||
return err | ||
} | ||
|
||
} | ||
if err = setVfAdminMac(addr, pfLink, vfLink); err != nil { | ||
glog.Errorf("configSriovDevice(): fail to configure VF admin mac address for device %s %q", addr, err) | ||
return err | ||
} | ||
} | ||
} else if err = setVfAdminMac(addr, pfLink); err != nil { | ||
return err | ||
} | ||
|
||
if err = unbindDriverIfNeeded(addr, isRdma); err != nil { | ||
return err | ||
} | ||
if driver == "" { | ||
|
||
if dpdkDriver == "" { | ||
if err := BindDefaultDriver(addr); err != nil { | ||
glog.Warningf("configSriovDevice(): fail to bind default driver for device %s", addr) | ||
return err | ||
|
@@ -310,8 +338,8 @@ func configSriovDevice(iface *sriovnetworkv1.Interface, ifaceStatus *sriovnetwor | |
} | ||
} | ||
} else { | ||
if err := BindDpdkDriver(addr, driver); err != nil { | ||
glog.Warningf("configSriovDevice(): fail to bind driver %s for device %s", driver, addr) | ||
if err := BindDpdkDriver(addr, dpdkDriver); err != nil { | ||
glog.Warningf("configSriovDevice(): fail to bind driver %s for device %s", dpdkDriver, addr) | ||
return err | ||
} | ||
} | ||
|
@@ -541,7 +569,7 @@ func vfIsReady(pciAddr string) (netlink.Link, error) { | |
glog.Infof("vfIsReady(): VF device %s", pciAddr) | ||
var err error | ||
var vfLink netlink.Link | ||
err = wait.PollImmediate(time.Second, 5*time.Second, func() (bool, error) { | ||
err = wait.PollImmediate(time.Second, 10*time.Second, func() (bool, error) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do you need to increase the timeout? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the timeout was too short and was generating spurious errors. |
||
vfName := tryGetInterfaceName(pciAddr) | ||
vfLink, err = netlink.LinkByName(vfName) | ||
if err != nil { | ||
|
@@ -555,19 +583,15 @@ func vfIsReady(pciAddr string) (netlink.Link, error) { | |
return vfLink, nil | ||
} | ||
|
||
func setVfAdminMac(vfAddr string, pfLink netlink.Link) error { | ||
func setVfAdminMac(vfAddr string, pfLink, vfLink netlink.Link) error { | ||
glog.Infof("setVfAdminMac(): VF %s", vfAddr) | ||
|
||
vfID, err := dputils.GetVFID(vfAddr) | ||
if err != nil { | ||
glog.Errorf("setVfAdminMac(): unable to get VF id %+v %q", vfAddr, err) | ||
return err | ||
} | ||
vfLink, err := vfIsReady(vfAddr) | ||
if err != nil { | ||
glog.Errorf("setVfAdminMac(): VF link is not ready for device %+v %q", vfAddr, err) | ||
return err | ||
} | ||
|
||
if err := netlink.LinkSetVfHardwareAddr(pfLink, vfID, vfLink.Attrs().HardwareAddr); err != nil { | ||
return err | ||
} | ||
|
@@ -722,3 +746,20 @@ func hasMellanoxInterfacesInSpec(newState *sriovnetworkv1.SriovNetworkNodeState) | |
} | ||
return false | ||
} | ||
|
||
// Workaround function to handle a case where the vf default driver is stuck and not able to create the vf kernel interface. | ||
// This function unbind the VF from the default driver and try to bind it again | ||
// bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2045087 | ||
func RebindVfToDefaultDriver(vfAddr string) error { | ||
glog.Infof("RebindVfToDefaultDriver(): VF %s", vfAddr) | ||
if err := Unbind(vfAddr); err != nil { | ||
return err | ||
} | ||
if err := BindDefaultDriver(vfAddr); err != nil { | ||
glog.Errorf("RebindVfToDefaultDriver(): fail to bind default driver for device %s", vfAddr) | ||
return err | ||
} | ||
|
||
glog.Warningf("RebindVfToDefaultDriver(): workaround implemented for VF %s", vfAddr) | ||
return nil | ||
} |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
/cc @adrianchiris @mskrocki @pliurh @zshi-redhat
I add this validation instead of
if iface.NumVfs != ifaceStatus.NumVfs
that was proposed under #245.I think is a better solution because we are inside a for loop going over all the vfs. if there is an issue with one of the vfs(for example the intel driver get stuck) we exist the
configSriovDevice
function with an error. Then in the next reconcile the iface.NumVfs will be equal to the ifaceStatus.NumVfs because the vfs got already created here (https://github.com/k8snetworkplumbingwg/sriov-network-operator/pull/233/files#diff-81ddbadfb415ccbb9c7af84f11668d1aa5e53c34025bf86d4702f16b4e42f045R246) but we didn't really finish allocating the GUID or the mac address to all the vfs.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
agree.