Skip to content

Commit

Permalink
networking: Inject implicit constraints on CNI plugins when using bri…
Browse files Browse the repository at this point in the history
…dge mode (#15473)

This PR adds a job mutator which injects constraints on the job taskgroups
that make use of bridge networking. Creating a bridge network makes use of the
CNI plugins: bridge, firewall, host-local, loopback, and portmap. Starting
with Nomad 1.5 these plugins are fingerprinted on each node, and as such we
can ensure jobs are correctly scheduled only on nodes where they are available,
when needed.
  • Loading branch information
shoenig authored Mar 27, 2024
1 parent 9c22860 commit 6ad648b
Show file tree
Hide file tree
Showing 7 changed files with 141 additions and 4 deletions.
3 changes: 3 additions & 0 deletions .changelog/15473.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:improvement
networking: Inject constraints on CNI plugins when using bridge networking
```
3 changes: 2 additions & 1 deletion client/allocrunner/networking_bridge_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,8 @@ func buildNomadBridgeNetConfig(b bridgeNetworkConfigurator) []byte {
}

// Update website/content/docs/networking/cni.mdx when the bridge configuration
// is modified.
// is modified. If CNI plugins are added or versions need to be updated for new
// fields, add a new constraint to nomad/job_endpoint_hooks.go
const nomadCNIConfigTemplate = `{
"cniVersion": "0.4.0",
"name": "nomad",
Expand Down
68 changes: 67 additions & 1 deletion nomad/job_endpoint_hooks.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,24 @@ import (
"golang.org/x/exp/maps"
)

// Node attributes acquired via fingerprinting.
const (
attrVaultVersion = `${attr.vault.version}`
attrConsulVersion = `${attr.consul.version}`
attrNomadVersion = `${attr.nomad.version}`
attrNomadServiceDisco = `${attr.nomad.service_discovery}`
attrBridgeCNI = `${attr.plugins.cni.version.bridge}`
attrFirewallCNI = `${attr.plugins.cni.version.firewall}`
attrHostLocalCNI = `${attr.plugins.cni.version.host-local}`
attrLoopbackCNI = `${attr.plugins.cni.version.loopback}`
attrPortMapCNI = `${attr.plugins.cni.version.portmap}`
)

// cniMinVersion is the version expression for the minimum CNI version supported
// for the CNI container-networking plugins. Support was added at v0.4.0, so
// we set the minimum to that.
const cniMinVersion = ">= 0.4.0"

var (
// vaultConstraint is the implicit constraint added to jobs requesting a
// Vault token
Expand Down Expand Up @@ -78,6 +89,51 @@ var (
RTarget: "linux",
Operand: "=",
}

// cniBridgeConstraint is an implicit constraint added to jobs making use
// of bridge networking mode. This is one of the CNI plugins used to support
// bridge networking.
cniBridgeConstraint = &structs.Constraint{
LTarget: attrBridgeCNI,
RTarget: cniMinVersion,
Operand: structs.ConstraintSemver,
}

// cniFirewallConstraint is an implicit constraint added to jobs making use
// of bridge networking mode. This is one of the CNI plugins used to support
// bridge networking.
cniFirewallConstraint = &structs.Constraint{
LTarget: attrFirewallCNI,
RTarget: cniMinVersion,
Operand: structs.ConstraintSemver,
}

// cniHostLocalConstraint is an implicit constraint added to jobs making use
// of bridge networking mode. This is one of the CNI plugins used to support
// bridge networking.
cniHostLocalConstraint = &structs.Constraint{
LTarget: attrHostLocalCNI,
RTarget: cniMinVersion,
Operand: structs.ConstraintSemver,
}

// cniLoopbackConstraint is an implicit constraint added to jobs making use
// of bridge networking mode. This is one of the CNI plugins used to support
// bridge networking.
cniLoopbackConstraint = &structs.Constraint{
LTarget: attrLoopbackCNI,
RTarget: cniMinVersion,
Operand: structs.ConstraintSemver,
}

// cniPortMapConstraint is an implicit constraint added to jobs making use
// of bridge networking mode. This is one of the CNI plugins used to support
// bridge networking.
cniPortMapConstraint = &structs.Constraint{
LTarget: attrPortMapCNI,
RTarget: cniMinVersion,
Operand: structs.ConstraintSemver,
}
)

type admissionController interface {
Expand Down Expand Up @@ -192,12 +248,14 @@ func (jobImpliedConstraints) Mutate(j *structs.Job) (*structs.Job, []error, erro
// Identify which task groups are utilizing NUMA resources.
numaTaskGroups := j.RequiredNUMA()

bridgeNetworkingTaskGroups := j.RequiredBridgeNetwork()

// Hot path where none of our things require constraints.
//
// [UPDATE THIS] if you are adding a new constraint thing!
if len(signals) == 0 && len(vaultBlocks) == 0 &&
nativeServiceDisco.Empty() && len(consulServiceDisco) == 0 &&
numaTaskGroups.Empty() {
numaTaskGroups.Empty() && bridgeNetworkingTaskGroups.Empty() {
return j, nil, nil
}

Expand Down Expand Up @@ -254,6 +312,14 @@ func (jobImpliedConstraints) Mutate(j *structs.Job) (*structs.Job, []error, erro
}
}
}

if bridgeNetworkingTaskGroups.Contains(tg.Name) {
mutateConstraint(constraintMatcherLeft, tg, cniBridgeConstraint)
mutateConstraint(constraintMatcherLeft, tg, cniFirewallConstraint)
mutateConstraint(constraintMatcherLeft, tg, cniHostLocalConstraint)
mutateConstraint(constraintMatcherLeft, tg, cniLoopbackConstraint)
mutateConstraint(constraintMatcherLeft, tg, cniPortMapConstraint)
}
}

return j, nil, nil
Expand Down
34 changes: 34 additions & 0 deletions nomad/job_endpoint_hooks_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1160,6 +1160,40 @@ func Test_jobImpliedConstraints_Mutate(t *testing.T) {
expectedOutputWarnings: nil,
expectedOutputError: nil,
},
{
inputJob: &structs.Job{
Name: "example",
TaskGroups: []*structs.TaskGroup{
{
Name: "group-with-bridge",
Networks: []*structs.NetworkResource{
{Mode: "bridge"},
},
},
},
},
expectedOutputJob: &structs.Job{
Name: "example",
TaskGroups: []*structs.TaskGroup{
{
Name: "group-with-bridge",
Networks: []*structs.NetworkResource{
{Mode: "bridge"},
},
Constraints: []*structs.Constraint{
cniBridgeConstraint,
cniFirewallConstraint,
cniHostLocalConstraint,
cniLoopbackConstraint,
cniPortMapConstraint,
},
},
},
},
expectedOutputWarnings: nil,
expectedOutputError: nil,
name: "task group with bridge network",
},
}

for _, tc := range testCases {
Expand Down
12 changes: 12 additions & 0 deletions nomad/structs/job.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,3 +132,15 @@ func (j *Job) RequiredNUMA() set.Collection[string] {
}
return result
}

// RequiredBridgeNetwork identifies which task groups, if any, within the job
// contain networks requesting bridge networking.
func (j *Job) RequiredBridgeNetwork() set.Collection[string] {
result := set.New[string](len(j.TaskGroups))
for _, tg := range j.TaskGroups {
if tg.Networks.Modes().Contains("bridge") {
result.Insert(tg.Name)
}
}
return result
}
7 changes: 7 additions & 0 deletions nomad/structs/structs.go
Original file line number Diff line number Diff line change
Expand Up @@ -3006,6 +3006,13 @@ func (ns Networks) NetIndex(n *NetworkResource) int {
return -1
}

// Modes returns the set of network modes used by our NetworkResource blocks.
func (ns Networks) Modes() *set.Set[string] {
return set.FromFunc(ns, func(nr *NetworkResource) string {
return nr.Mode
})
}

// RequestedDevice is used to request a device for a task.
type RequestedDevice struct {
// Name is the request name. The possible values are as follows:
Expand Down
18 changes: 16 additions & 2 deletions website/content/docs/upgrade/upgrade-specific.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,26 @@ their upgrades as a result of new features or changed behavior. This page is
used to document those details separately from the standard upgrade flow.

## Nomad 1.8.0

#### Deprecated Disconnect Fields

Nomad 1.8.0 introduces a `disconnect` block meant to group all the configuration
options related to disconnected client's and server's behavior, causing the
deprecation of the fileds `stop_after_client_disconnect`, `max_client_disconnect`
options related to disconnected client's and server's behavior, causing the
deprecation of the fields `stop_after_client_disconnect`, `max_client_disconnect`
and `prevent_reschedule_on_lost`. This block also introduces new options for
allocations reconciliation if the client regains connectivity.

#### CNI Constraints

In Nomad 1.8.0, jobs with `bridge` networking will have constraints added during
job submit that require CNI plugins to be present on the node. Nodes have
fingerprinted the available CNI plugins starting in Nomad 1.5.0.

If you are upgrading from Nomad 1.5.0 or later to 1.8.0 or later, there's
nothing additional for you to do. It's not recommended to skip more than 2
versions of Nomad. But if you upgrade from earlier than 1.5.0 to 1.8.0 or later,
you will need to ensure that clients have been upgraded before submitting any
jobs that use `bridge` networking.

#### Removal of `raw_exec` option `no_cgroups`

Expand Down

0 comments on commit 6ad648b

Please sign in to comment.