Skip to content

Commit

Permalink
VM live migrate (#3767)
Browse files Browse the repository at this point in the history
optimize vm live migrate

---------

Signed-off-by: bobz965 <zhangbingbing2_yewu@cmss.chinamobile.com>
Co-authored-by: Oilbeater <liumengxinfly@gmail.com>
Signed-off-by: bobz965 <zhangbingbing2_yewu@cmss.chinamobile.com>
  • Loading branch information
zbb88888 and oilbeater committed Nov 11, 2024
1 parent 42f99a7 commit ee560e8
Show file tree
Hide file tree
Showing 7 changed files with 342 additions and 7 deletions.
84 changes: 84 additions & 0 deletions mocks/pkg/ovs/interface.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 1 addition & 3 deletions pkg/controller/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -582,9 +582,7 @@ func (c *Controller) updateProviderNetworkForNodeDeletion(pn *kubeovnv1.Provider
}
}
if changed {
if newPn == nil {
newPn = pn.DeepCopy()
}
newPn = pn.DeepCopy()
newPn.Spec.CustomInterfaces = customInterfaces
}
if newPn != nil {
Expand Down
110 changes: 108 additions & 2 deletions pkg/controller/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -552,7 +552,6 @@ func (c *Controller) handleAddOrUpdatePod(key string) (err error) {
return nil
}
pod = cachedPod.DeepCopy()
// check if allocate subnet is need. also allocate subnet when hotplug nic
needAllocatePodNets := needAllocateSubnets(pod, podNets)
if len(needAllocatePodNets) != 0 {
if cachedPod, err = c.reconcileAllocateSubnets(cachedPod, pod, needAllocatePodNets); err != nil {
Expand Down Expand Up @@ -580,6 +579,16 @@ func (c *Controller) reconcileAllocateSubnets(cachedPod, pod *v1.Pod, needAlloca
podName := c.getNameByPod(pod)
// todo: isVmPod, getPodType, getNameByPod has duplicated logic

var err error
var isMigrate, migrated, migratedFail bool
var vmKey, srcNodeName, targetNodeName string
if isVMPod && c.config.EnableKeepVMIP {
vmKey = fmt.Sprintf("%s/%s", namespace, vmName)
if isMigrate, migrated, migratedFail, srcNodeName, targetNodeName, err = c.migrateVM(pod, vmKey); err != nil {
klog.Error(err)
return nil, err
}
}
// Avoid create lsp for already running pod in ovn-nb when controller restart
for _, podNet := range needAllocatePodNets {
// the subnet may changed when alloc static ip from the latter subnet after ns supports multi subnets
Expand Down Expand Up @@ -663,6 +672,24 @@ func (c *Controller) reconcileAllocateSubnets(cachedPod, pod *v1.Pod, needAlloca
return nil, err
}

if isMigrate {
if migrated {
klog.Infof("migrate end reset options for lsp %s from %s to %s, migrated fail: %t", portName, srcNodeName, targetNodeName, migratedFail)
if err := c.OVNNbClient.ResetLogicalSwitchPortMigrateOptions(portName, srcNodeName, targetNodeName, migratedFail); err != nil {
err = fmt.Errorf("failed to clean migrate options for lsp %s, %v", portName, err)
klog.Error(err)
return nil, err
}
} else {
klog.Infof("migrate start set options for lsp %s from %s to %s", portName, srcNodeName, targetNodeName)
if err := c.OVNNbClient.SetLogicalSwitchPortMigrateOptions(portName, srcNodeName, targetNodeName); err != nil {
err = fmt.Errorf("failed to set migrate options for lsp %s, %v", portName, err)
klog.Error(err)
return nil, err
}
}
}

if pod.Annotations[fmt.Sprintf(util.Layer2ForwardAnnotationTemplate, podNet.ProviderName)] == "true" {
if err := c.OVNNbClient.EnablePortLayer2forward(portName); err != nil {
c.recorder.Eventf(pod, v1.EventTypeWarning, "SetOVNPortL2ForwardFailed", err.Error())
Expand Down Expand Up @@ -943,6 +970,19 @@ func (c *Controller) handleDeletePod(key string) error {
}
isVMPod, vmName := isVMPod(pod)
if isVMPod && c.config.EnableKeepVMIP {
ports, err := c.OVNNbClient.ListNormalLogicalSwitchPorts(true, map[string]string{"pod": podKey})
if err != nil {
klog.Errorf("failed to list lsps of pod '%s', %v", pod.Name, err)
return err
}
for _, port := range ports {
klog.Infof("clean migrate options for vm lsp %s", port.Name)
if err := c.OVNNbClient.CleanLogicalSwitchPortMigrateOptions(port.Name); err != nil {
err = fmt.Errorf("failed to clean migrate options for vm lsp %s, %v", port.Name, err)
klog.Error(err)
return err
}
}
vmToBeDel := c.isVMToDel(pod, vmName)
isDelete, err := appendCheckPodToDel(c, pod, vmName, util.VMInstance)
if pod.DeletionTimestamp != nil {
Expand Down Expand Up @@ -1263,6 +1303,9 @@ func getNextHopByTunnelIP(gw []net.IP) string {
}

func needAllocateSubnets(pod *v1.Pod, nets []*kubeovnNet) []*kubeovnNet {
// check if allocate from subnet is need.
// allocate subnet when change subnet to hotplug nic
// allocate subnet when migrate vm
if !isPodAlive(pod) {
return nil
}
Expand All @@ -1271,9 +1314,15 @@ func needAllocateSubnets(pod *v1.Pod, nets []*kubeovnNet) []*kubeovnNet {
return nets
}

migrate := false
if job, ok := pod.Annotations[util.MigrationJobAnnotation]; ok {
klog.Infof("pod %s/%s is in the migration job %s", pod.Namespace, pod.Name, job)
migrate = true
}

result := make([]*kubeovnNet, 0, len(nets))
for _, n := range nets {
if pod.Annotations[fmt.Sprintf(util.AllocatedAnnotationTemplate, n.ProviderName)] != "true" {
if migrate || pod.Annotations[fmt.Sprintf(util.AllocatedAnnotationTemplate, n.ProviderName)] != "true" {
result = append(result, n)
}
}
Expand Down Expand Up @@ -1940,3 +1989,60 @@ func getPodType(pod *v1.Pod) string {
}
return ""
}

// migrate vm return migrate, migrated, fail, src node, target node, err
func (c *Controller) migrateVM(pod *v1.Pod, vmKey string) (bool, bool, bool, string, string, error) {
// try optimize vm migration, no need return error
// migrate true means need ovn set migrate options
// migrated ok means need set migrate options to target node
// migrated failed means need set migrate options to source node
if _, ok := pod.Annotations[util.MigrationJobAnnotation]; !ok {
return false, false, false, "", "", nil
}
if _, ok := pod.Annotations[util.MigrationSourceAnnotation]; ok {
klog.Infof("will migrate out vm %s pod %s from source node %s", vmKey, pod.Name, pod.Spec.NodeName)
return false, false, false, "", "", nil
}
// ovn set migrator only in the process of target vm pod
if _, ok := pod.Annotations[util.MigrationTargetAnnotation]; !ok {
return false, false, false, "", "", nil
}
srcNode, ok := pod.Annotations[util.MigrationSourceNodeAnnotation]
if !ok || srcNode == "" {
err := fmt.Errorf("vm %s migration source node is not set", vmKey)
klog.Warning(err)
return false, false, false, "", "", nil
}
targetNode := pod.Spec.NodeName
if targetNode == "" {
err := fmt.Errorf("vm %s migration target node is not set", vmKey)
klog.Warning(err)
return false, false, false, "", "", nil
}
migratePhase, ok := pod.Annotations[util.MigrationPhaseAnnotation]
if !ok {
err := fmt.Errorf("vm %s migration phase is not set", vmKey)
klog.Warning(err)
return false, false, false, "", "", nil
}
// check migrate phase
if migratePhase == "" {
err := fmt.Errorf("vm %s migration phase is empty", vmKey)
klog.Warning(err)
return false, false, false, "", "", nil
}
if migratePhase == util.MigrationPhaseStarted {
klog.Infof("start to migrate src vm %s from %s to %s", vmKey, srcNode, targetNode)
return true, false, false, srcNode, targetNode, nil
}
if migratePhase == util.MigrationPhaseSucceeded {
klog.Infof("succeed to migrate src vm %s from %s to %s", vmKey, srcNode, targetNode)
return true, true, false, srcNode, targetNode, nil
}
if migratePhase == util.MigrationPhaseFailed {
klog.Infof("failed to migrate src vm %s from %s to %s", vmKey, srcNode, targetNode)
return true, true, true, srcNode, targetNode, nil
}

return false, false, false, "", "", nil
}
4 changes: 4 additions & 0 deletions pkg/ovs/interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@ type LogicalSwitchPort interface {
ListLogicalSwitchPortsWithLegacyExternalIDs() ([]ovnnb.LogicalSwitchPort, error)
GetLogicalSwitchPort(lspName string, ignoreNotFound bool) (*ovnnb.LogicalSwitchPort, error)
LogicalSwitchPortExists(name string) (bool, error)
// vm live migrate
SetLogicalSwitchPortMigrateOptions(lspName, srcNodeName, targetNodeName string) error
ResetLogicalSwitchPortMigrateOptions(lspName, srcNodeName, targetNodeName string, migratedFail bool) error
CleanLogicalSwitchPortMigrateOptions(lspName string) error
}

type LoadBalancer interface {
Expand Down
1 change: 0 additions & 1 deletion pkg/ovs/ovn-nb-load_balancer.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,6 @@ func (c *OVNNbClient) LoadBalancerDeleteVip(lbName, vipEndpoint string, ignoreHe
klog.Errorf("failed to delete lb ip port mapping: %v", err)
return err
}

if err = c.LoadBalancerDeleteHealthCheck(lbName, lbhc.UUID); err != nil {
klog.Errorf("failed to delete lb health check: %v", err)
return err
Expand Down
Loading

0 comments on commit ee560e8

Please sign in to comment.