Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve GPU SR-IOV allocation #559

Merged
merged 3 commits into from
Feb 29, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
131 changes: 50 additions & 81 deletions internal/server/device/gpu_sriov.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,7 @@ package device

import (
"fmt"
"os"
"path/filepath"
"strconv"
"strings"
"sync"

"github.com/lxc/incus/internal/linux"
"github.com/lxc/incus/internal/revert"
Expand All @@ -17,6 +14,9 @@ import (
"github.com/lxc/incus/shared/util"
)

// sriovMu is used to lock concurrent GPU allocations.
var sriovMu sync.Mutex

type gpuSRIOV struct {
deviceCommon
}
Expand Down Expand Up @@ -81,53 +81,22 @@ func (d *gpuSRIOV) Start() (*deviceConfig.RunConfig, error) {
runConf := deviceConfig.RunConfig{}
saveData := make(map[string]string)

// Get SRIOV parent, i.e. the actual GPU.
parentPCIAddresses, err := d.getParentPCIAddresses()
if err != nil {
return nil, err
}

var parentPCIAddress string
var pciParentDev pcidev.Device
vfID := -1

// Make sure that vfio-pci is loaded.
err = linux.LoadModule("vfio-pci")
if err != nil {
return nil, fmt.Errorf("Error loading %q module: %w", "vfio-pci", err)
}

// Since there might be multiple GPUs, we iterate through them and get the first free
// virtual function.
for _, parentPCIAddress = range parentPCIAddresses {
// Get PCI information about the GPU device.
devicePath := filepath.Join("/sys/bus/pci/devices", parentPCIAddress)

pciParentDev, err = pcidev.ParseUeventFile(filepath.Join(devicePath, "uevent"))
if err != nil {
err = fmt.Errorf("Failed to get PCI device info for GPU %q: %w", parentPCIAddress, err)
continue
}

vfID, err = d.findFreeVirtualFunction(pciParentDev)
if err != nil {
err = fmt.Errorf("Failed to find free virtual function: %w", err)
continue
}

if vfID > -1 {
break
}
}
// Get global SR-IOV lock to prevent concurent allocations of the VF.
sriovMu.Lock()
defer sriovMu.Unlock()

// Get SRIOV VF.
parentPCIAddress, vfID, err := d.getVF()
if err != nil {
return nil, err
}

if vfID == -1 {
return nil, fmt.Errorf("All virtual functions on parent device seem to be in use")
}

vfPCIDev, err := d.setupSriovParent(parentPCIAddress, vfID, saveData)
if err != nil {
return nil, err
Expand All @@ -146,29 +115,62 @@ func (d *gpuSRIOV) Start() (*deviceConfig.RunConfig, error) {
return &runConf, nil
}

// getParentPCIAddresses returns the PCI addresses of parent GPUs.
func (d *gpuSRIOV) getParentPCIAddresses() ([]string, error) {
// getVF returns the parent PCI address and VF id for a matching GPU.
func (d *gpuSRIOV) getVF() (string, int, error) {
// List all the GPUs.
gpus, err := resources.GetGPU()
if err != nil {
return nil, err
return "", -1, err
}

var parentPCIAddresses []string
// Locate a suitable VF from the least loaded suitable card.
var pciAddress string
var vfID int
var cardTotal int
var cardAvailable int

for _, gpu := range gpus.Cards {
// Skip any cards that are not selected.
if !gpuSelected(d.Config(), gpu) {
continue
}

parentPCIAddresses = append(parentPCIAddresses, gpu.PCIAddress)
// Skip any card without SR-IOV.
if gpu.SRIOV == nil {
continue
}

// Find available VFs.
vfs := []int{}

for id, vf := range gpu.SRIOV.VFs {
if vf.Driver == "" {
vfs = append(vfs, id)
}
}

// Skip if no available VFs.
if len(vfs) == 0 {
continue
}

// Check if current card is less busy.
if (float64(len(vfs)) / float64(gpu.SRIOV.CurrentVFs)) <= (float64(cardAvailable) / float64(cardTotal)) {
continue
}

pciAddress = gpu.PCIAddress
vfID = vfs[0]
cardAvailable = len(vfs)
cardTotal = int(gpu.SRIOV.CurrentVFs)
}

if len(parentPCIAddresses) == 0 {
return nil, fmt.Errorf("Failed to detect requested GPU device")
// Check if any physical GPU was found to match.
if pciAddress == "" {
return "", -1, fmt.Errorf("Couldn't find a matching GPU with available VFs")
}

return parentPCIAddresses, nil
return pciAddress, vfID, nil
}

// setupSriovParent configures a SR-IOV virtual function (VF) device on parent and stores original properties of
Expand Down Expand Up @@ -220,39 +222,6 @@ func (d *gpuSRIOV) getVFDevicePCISlot(parentPCIAddress string, vfID string) (pci
return pciDev, nil
}

func (d *gpuSRIOV) findFreeVirtualFunction(parentDev pcidev.Device) (int, error) {
// Get number of currently enabled VFs.
sriovNumVFs := fmt.Sprintf("/sys/bus/pci/devices/%s/sriov_numvfs", parentDev.SlotName)

sriovNumVfsBuf, err := os.ReadFile(sriovNumVFs)
if err != nil {
return 0, err
}

sriovNumVfsStr := strings.TrimSpace(string(sriovNumVfsBuf))
sriovNum, err := strconv.Atoi(sriovNumVfsStr)
if err != nil {
return 0, err
}

vfID := -1

for i := 0; i < sriovNum; i++ {
pciDev, err := pcidev.ParseUeventFile(fmt.Sprintf("/sys/bus/pci/devices/%s/virtfn%d/uevent", parentDev.SlotName, i))
if err != nil {
return 0, err
}

// We assume the virtual function is free if there's no driver bound to it.
if pciDev.Driver == "" {
vfID = i
break
}
}

return vfID, nil
}

// Stop is run when the device is removed from the instance.
func (d *gpuSRIOV) Stop() (*deviceConfig.RunConfig, error) {
runConf := deviceConfig.RunConfig{
Expand Down
Loading