Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make connecting to OVN more reliable #1089

Merged
merged 2 commits into from
Aug 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions cmd/incusd/api_cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -773,6 +773,9 @@ func clusterPutJoin(d *Daemon, r *http.Request, req api.ClusterPut) response.Res
// Refresh the state.
s = d.State()

// Re-connect OVN if needed.
_ = d.setupOVN()

// Start up networks so any post-join changes can be applied now that we have a Node ID.
logger.Debug("Starting networks after cluster join")
err = networkStartup(s)
Expand Down
21 changes: 16 additions & 5 deletions cmd/incusd/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ type Daemon struct {
// OVN clients.
ovnnb *ovn.NB
ovnsb *ovn.SB
ovnMu sync.Mutex

// API info.
apiExtensions int
Expand Down Expand Up @@ -559,8 +560,7 @@ func (d *Daemon) State() *state.State {
InstanceTypes: instanceTypes,
LocalConfig: localConfig,
OS: d.os,
OVNNB: d.ovnnb,
OVNSB: d.ovnsb,
OVN: d.getOVN,
Proxy: d.proxy,
ServerCert: d.serverCert,
ServerClustered: d.serverClustered,
Expand Down Expand Up @@ -1467,9 +1467,6 @@ func (d *Daemon) init() error {
logger.Info("Started BGP server")
}

// Attempt to setup OVN clients.
_ = d.setupOVN()

// Setup DNS listener.
d.dns = dns.NewServer(d.db.Cluster, func(name string, full bool) (*dns.Zone, error) {
// Fetch the zone.
Expand Down Expand Up @@ -2535,6 +2532,9 @@ func (d *Daemon) nodeRefreshTask(heartbeatData *cluster.APIHeartbeat, isLeader b
}

func (d *Daemon) setupOVN() error {
d.ovnMu.Lock()
defer d.ovnMu.Unlock()

// Clear any existing clients.
d.ovnnb = nil
d.ovnsb = nil
Expand Down Expand Up @@ -2597,3 +2597,14 @@ func (d *Daemon) setupOVN() error {

return nil
}

func (d *Daemon) getOVN() (*ovn.NB, *ovn.SB, error) {
if d.ovnnb == nil || d.ovnsb == nil {
err := d.setupOVN()
if err != nil {
return nil, nil, fmt.Errorf("Failed to connect to OVN: %w", err)
}
}

return d.ovnnb, d.ovnsb, nil
}
17 changes: 12 additions & 5 deletions internal/server/device/nic_ovn.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ type nicOVN struct {
deviceCommon

network ovnNet // Populated in validateConfig().

ovnnb *ovn.NB
ovnsb *ovn.SB
}

// CanHotPlug returns whether the device can be managed whilst the instance is running.
Expand Down Expand Up @@ -383,10 +386,14 @@ func (d *nicOVN) validateEnvironment() error {

func (d *nicOVN) init(inst instance.Instance, s *state.State, name string, conf deviceConfig.Device, volatileGet VolatileGetter, volatileSet VolatileSetter) error {
// Check that OVN is available.
if s.OVNNB == nil {
return fmt.Errorf("OVN isn't currently available")
ovnnb, ovnsb, err := s.OVN()
if err != nil {
return err
}

d.ovnnb = ovnnb
d.ovnsb = ovnsb

return d.deviceCommon.init(inst, s, name, conf, volatileGet, volatileSet)
}

Expand Down Expand Up @@ -658,7 +665,7 @@ func (d *nicOVN) Start() (*deviceConfig.RunConfig, error) {

// Add post start hook for setting logical switch port chassis once instance has been started.
runConf.PostHooks = append(runConf.PostHooks, func() error {
err := d.state.OVNNB.UpdateLogicalSwitchPortOptions(context.TODO(), logicalPortName, map[string]string{"requested-chassis": chassisID})
err := d.ovnnb.UpdateLogicalSwitchPortOptions(context.TODO(), logicalPortName, map[string]string{"requested-chassis": chassisID})
if err != nil {
return fmt.Errorf("Failed setting logical switch port chassis ID: %w", err)
}
Expand Down Expand Up @@ -803,7 +810,7 @@ func (d *nicOVN) Update(oldDevices deviceConfig.Devices, isRunning bool) error {
}

if len(removedACLs) > 0 {
err := acl.OVNPortGroupDeleteIfUnused(d.state, d.logger, d.state.OVNNB, d.network.Project(), d.inst, d.name, newACLs...)
err := acl.OVNPortGroupDeleteIfUnused(d.state, d.logger, d.ovnnb, d.network.Project(), d.inst, d.name, newACLs...)
if err != nil {
return fmt.Errorf("Failed removing unused OVN port groups: %w", err)
}
Expand Down Expand Up @@ -1006,7 +1013,7 @@ func (d *nicOVN) Remove() error {
// Check for port groups that will become unused (and need deleting) as this NIC is deleted.
securityACLs := util.SplitNTrimSpace(d.config["security.acls"], ",", -1, true)
if len(securityACLs) > 0 {
err := acl.OVNPortGroupDeleteIfUnused(d.state, d.logger, d.state.OVNNB, d.network.Project(), d.inst, d.name)
err := acl.OVNPortGroupDeleteIfUnused(d.state, d.logger, d.ovnnb, d.network.Project(), d.inst, d.name)
if err != nil {
return fmt.Errorf("Failed removing unused OVN port groups: %w", err)
}
Expand Down
9 changes: 5 additions & 4 deletions internal/server/network/acl/driver_common.go
Original file line number Diff line number Diff line change
Expand Up @@ -648,8 +648,9 @@ func (d *common) Update(config *api.NetworkACLPut, clientType request.ClientType
// This way we won't apply the same changes multiple times for each cluster member.
if len(aclOVNNets) > 0 && clientType == request.ClientTypeNormal {
// Check that OVN is available.
if d.state.OVNNB == nil {
return fmt.Errorf("OVN isn't currently available")
ovnnb, _, err := d.state.OVN()
if err != nil {
return err
}

var aclNameIDs map[string]int64
Expand All @@ -670,7 +671,7 @@ func (d *common) Update(config *api.NetworkACLPut, clientType request.ClientType
// apply those rules to each network affected by the ACL, so pass the full list of OVN networks
// affected by this ACL (either because the ACL is assigned directly or because it is assigned to
// an OVN NIC in an instance or profile).
cleanup, err := OVNEnsureACLs(d.state, d.logger, d.state.OVNNB, d.projectName, aclNameIDs, aclOVNNets, []string{d.info.Name}, true)
cleanup, err := OVNEnsureACLs(d.state, d.logger, ovnnb, d.projectName, aclNameIDs, aclOVNNets, []string{d.info.Name}, true)
if err != nil {
return fmt.Errorf("Failed ensuring ACL is configured in OVN: %w", err)
}
Expand All @@ -679,7 +680,7 @@ func (d *common) Update(config *api.NetworkACLPut, clientType request.ClientType

// Run unused port group cleanup in case any formerly referenced ACL in this ACL's rules means that
// an ACL port group is now considered unused.
err = OVNPortGroupDeleteIfUnused(d.state, d.logger, d.state.OVNNB, d.projectName, nil, "", d.info.Name)
err = OVNPortGroupDeleteIfUnused(d.state, d.logger, ovnnb, d.projectName, nil, "", d.info.Name)
if err != nil {
return fmt.Errorf("Failed removing unused OVN port groups: %w", err)
}
Expand Down
Loading
Loading