Skip to content

Commit

Permalink
vault: add new nomad setup vault -check commmand (#19720)
Browse files Browse the repository at this point in the history
The new `nomad setup vault -check` commmand can be used to retrieve
information about the changes required before a cluster is migrated from
the deprecated legacy authentication flow with Vault to use only
workload identities.
  • Loading branch information
lgfa29 authored Jan 12, 2024
1 parent 5b7f474 commit e1e80f3
Show file tree
Hide file tree
Showing 16 changed files with 1,070 additions and 15 deletions.
3 changes: 3 additions & 0 deletions .changelog/19720.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:improvement
cli: Add new option `nomad setup vault -check` to help cluster operators migrate to workload identities for Vault
```
60 changes: 60 additions & 0 deletions api/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -411,3 +411,63 @@ type LeadershipTransferResponse struct {

WriteMeta
}

// VaultWorkloadIdentityUpgradeCheck is the result of verifying if the cluster
// is ready to switch to workload identities for Vault.
type VaultWorkloadIdentityUpgradeCheck struct {
// JobsWithoutVaultIdentity is the list of jobs that have a `vault` block
// but do not have an `identity` for Vault.
JobsWithoutVaultIdentity []*JobListStub

// OutdatedNodes is the list of nodes running a version of Nomad that does
// not support workload identities for Vault.
OutdatedNodes []*NodeListStub

// VaultTokens is the list of Vault ACL token accessors that Nomad created
// and will no longer manage after the cluster is migrated to workload
// identities.
VaultTokens []*VaultAccessor
}

// Ready returns true if the cluster is ready to migrate to workload identities
// with Vault.
func (v *VaultWorkloadIdentityUpgradeCheck) Ready() bool {
return v != nil &&
len(v.VaultTokens) == 0 &&
len(v.OutdatedNodes) == 0 &&
len(v.JobsWithoutVaultIdentity) == 0
}

// VaultAccessor is a Vault ACL token created by Nomad for a task to access
// Vault using the legacy authentication flow.
type VaultAccessor struct {
// AllocID is the ID of the allocation that requested this token.
AllocID string

// Task is the name of the task that requested this token.
Task string

// NodeID is the ID of the node running the allocation that requested this
// token.
NodeID string

// Accessor is the Vault ACL token accessor ID.
Accessor string

// CreationTTL is the TTL set when the token was created.
CreationTTL int

// CreateIndex is the Raft index when the token was created.
CreateIndex uint64
}

// UpgradeCheckVaultWorkloadIdentity retrieves the cluster status for migrating
// to workload identities with Vault.
func (op *Operator) UpgradeCheckVaultWorkloadIdentity(q *QueryOptions) (*VaultWorkloadIdentityUpgradeCheck, *QueryMeta, error) {
var resp VaultWorkloadIdentityUpgradeCheck
qm, err := op.c.query("/v1/operator/upgrade-check/vault-workload-identity", &resp, q)
if err != nil {
return nil, nil, err
}
return &resp, qm, nil
}
1 change: 1 addition & 0 deletions command/agent/http.go
Original file line number Diff line number Diff line change
Expand Up @@ -487,6 +487,7 @@ func (s *HTTPServer) registerHandlers(enableDebug bool) {
s.mux.HandleFunc("/v1/operator/autopilot/configuration", s.wrap(s.OperatorAutopilotConfiguration))
s.mux.HandleFunc("/v1/operator/autopilot/health", s.wrap(s.OperatorServerHealth))
s.mux.HandleFunc("/v1/operator/snapshot", s.wrap(s.SnapshotRequest))
s.mux.HandleFunc("/v1/operator/upgrade-check/", s.wrap(s.UpgradeCheckRequest))

s.mux.HandleFunc("/v1/system/gc", s.wrap(s.GarbageCollectRequest))
s.mux.HandleFunc("/v1/system/reconcile/summaries", s.wrap(s.ReconcileJobSummaries))
Expand Down
29 changes: 29 additions & 0 deletions command/agent/operator_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -521,3 +521,32 @@ func (s *HTTPServer) snapshotRestoreRequest(resp http.ResponseWriter, req *http.

return nil, codedErr
}

func (s *HTTPServer) UpgradeCheckRequest(resp http.ResponseWriter, req *http.Request) (any, error) {
path := strings.TrimPrefix(req.URL.Path, "/v1/operator/upgrade-check")
switch {
case strings.HasSuffix(path, "/vault-workload-identity"):
return s.upgradeCheckVaultWorkloadIdentity(resp, req)
default:
return nil, CodedError(http.StatusNotFound, fmt.Sprintf("Path %s not found", req.URL.Path))
}
}

func (s *HTTPServer) upgradeCheckVaultWorkloadIdentity(resp http.ResponseWriter, req *http.Request) (any, error) {
if req.Method != http.MethodGet {
return nil, CodedError(405, ErrInvalidMethod)
}

args := structs.UpgradeCheckVaultWorkloadIdentityRequest{}
if s.parse(resp, req, &args.Region, &args.QueryOptions) {
return nil, nil
}

var out structs.UpgradeCheckVaultWorkloadIdentityResponse
if err := s.agent.RPC("Operator.UpgradeCheckVaultWorkloadIdentity", &args, &out); err != nil {
return nil, err
}

setMeta(resp, &out.QueryMeta)
return out, nil
}
39 changes: 39 additions & 0 deletions command/agent/operator_endpoint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -661,3 +661,42 @@ func TestOperator_SnapshotRequests(t *testing.T) {
require.True(t, jobExists())
})
}

func TestOperator_UpgradeCheckRequest_VaultWorkloadIdentity(t *testing.T) {
ci.Parallel(t)
httpTest(t, func(c *Config) {
c.Vaults[0].Enabled = pointer.Of(true)
c.Vaults[0].Name = "default"
}, func(s *TestAgent) {
// Create a test job with a Vault block but without an identity.
job := mock.Job()
job.TaskGroups[0].Tasks[0].Vault = &structs.Vault{
Cluster: "default",
Policies: []string{"test"},
}

args := structs.JobRegisterRequest{
Job: job,
WriteRequest: structs.WriteRequest{Region: "global"},
}
var resp structs.JobRegisterResponse
err := s.Agent.RPC("Job.Register", &args, &resp)
must.NoError(t, err)

// Make HTTP request to retrieve
req, err := http.NewRequest(http.MethodGet, "/v1/operator/upgrade-check/vault-workload-identity", nil)
must.NoError(t, err)
respW := httptest.NewRecorder()

obj, err := s.Server.UpgradeCheckRequest(respW, req)
must.NoError(t, err)
must.NotEq(t, "", respW.Header().Get("X-Nomad-Index"))
must.NotEq(t, "", respW.Header().Get("X-Nomad-LastContact"))
must.Eq(t, "true", respW.Header().Get("X-Nomad-KnownLeader"))

upgradeCheck := obj.(structs.UpgradeCheckVaultWorkloadIdentityResponse)
must.Len(t, 1, upgradeCheck.JobsWithoutVaultIdentity)
must.Len(t, 0, upgradeCheck.VaultTokens)
must.Eq(t, job.ID, upgradeCheck.JobsWithoutVaultIdentity[0].ID)
})
}
180 changes: 178 additions & 2 deletions command/setup_vault.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"slices"
"strings"

"github.com/dustin/go-humanize/english"
"github.com/hashicorp/vault/api"
"github.com/mitchellh/cli"
"github.com/posener/complete"
Expand Down Expand Up @@ -48,6 +49,12 @@ type SetupVaultCommand struct {

destroy bool
autoYes bool

// Options for -check.
check bool
json bool
tmpl string
verbose bool
}

// Help satisfies the cli.Command Help function.
Expand All @@ -62,6 +69,10 @@ Usage: nomad setup vault [options]
VAULT_TOKEN, VAULT_ADDR, and other Vault-related environment variables
as documented in https://developer.hashicorp.com/vault/docs/commands#environment-variables.
The -check option can be used to verify if the Nomad cluster is ready to
migrate to use Workload Identities with Vault. This option requires
operator:read permission for Nomad.
WARNING: This command is an experimental feature and may change its behavior
in future versions of Nomad.
Expand All @@ -79,7 +90,22 @@ Setup Vault options:
Automatically answers "yes" to all the questions, making the setup
non-interactive. Defaults to "false".
`
-check
Verify if the Nomad cluster is ready to migrate to Workload Identities.
Setup Vault options when using -check:
-json
Output migration status information in its JSON format.
-t
Format and display migration status information using a Go template.
-verbose
Display full information.
` + generalOptionsUsage(usageOptsDefault|usageOptsNoNamespace)

return strings.TrimSpace(helpText)
}

Expand All @@ -89,6 +115,12 @@ func (s *SetupVaultCommand) AutocompleteFlags() complete.Flags {
"-jwks-url": complete.PredictAnything,
"-destroy": complete.PredictSet("true", "false"),
"-y": complete.PredictSet("true", "false"),

// Options for -check.
"-check": complete.PredictSet("true", "false"),
"-json": complete.PredictSet("true", "false"),
"-verbose": complete.PredictSet("true", "false"),
"-t": complete.PredictAnything,
})
}

Expand All @@ -110,6 +142,13 @@ func (s *SetupVaultCommand) Run(args []string) int {
flags.BoolVar(&s.destroy, "destroy", false, "")
flags.BoolVar(&s.autoYes, "y", false, "")
flags.StringVar(&s.jwksURL, "jwks-url", "http://localhost:4646/.well-known/jwks.json", "")

// Options for -check.
flags.BoolVar(&s.check, "check", false, "")
flags.BoolVar(&s.json, "json", false, "")
flags.BoolVar(&s.verbose, "verbose", false, "")
flags.StringVar(&s.tmpl, "t", "", "")

if err := flags.Parse(args); err != nil {
return 1
}
Expand All @@ -121,6 +160,32 @@ func (s *SetupVaultCommand) Run(args []string) int {
return 1
}

if s.check {
return s.checkUpgrade()
} else {
// Verify that -check flags are not set.
var invalid []string
if s.json {
invalid = append(invalid, "-json")
}
if s.verbose {
invalid = append(invalid, "-verbose")
}
if s.tmpl != "" {
invalid = append(invalid, "-t")
}

if len(invalid) > 0 {
s.Ui.Error(fmt.Sprintf(
"The %s %s can only be used with -check",
english.OxfordWordSeries(invalid, "and"),
english.PluralWord(len(invalid), "option", "options"),
))
s.Ui.Error(commandErrorText(s))
return 1
}
}

if !isTty() && !s.autoYes {
s.Ui.Error("This command requires -y option when running in non-interactive mode")
return 1
Expand Down Expand Up @@ -216,7 +281,7 @@ a namespace %q and create all configuration within that namespace.
*/
s.Ui.Output(`
We will now enable the JWT credential backend and create a JWT auth method that
Nomad workloads will use.
Nomad workloads will use.
`)

if s.authMethodExists() {
Expand Down Expand Up @@ -606,6 +671,117 @@ func (s *SetupVaultCommand) removeConfiguredComponents() int {
return exitCode
}

func (s *SetupVaultCommand) checkUpgrade() int {
length := shortId
if s.verbose {
length = fullId
}

client, err := s.Meta.Client()
if err != nil {
s.Ui.Error(fmt.Sprintf("Error initializing client: %s", err))
return 1
}

resp, _, err := client.Operator().UpgradeCheckVaultWorkloadIdentity(nil)
if err != nil {
s.Ui.Error(fmt.Sprintf("Error querying scheduler configuration: %s", err))
return 1
}

// Output formatted option if requested.
if s.json || len(s.tmpl) > 0 {
out, err := Format(s.json, s.tmpl, resp)
if err != nil {
s.Ui.Error(err.Error())
return 1
}

s.Ui.Output(out)
return 0
}

if resp.Ready() {
s.Ui.Output("Nomad cluster is ready to use workload identities with Vault.")
return 0
}

if len(resp.JobsWithoutVaultIdentity) != 0 {
s.Ui.Output(s.Colorize().Color(`
[bold]Jobs Without Workload Identity for Vault[reset]
The following jobs access Vault but are not configured for workload identity.
You should redeploy them before fully migrating to workload identities with
Vault to prevent unexpected errors if their tokens need to be recreated.
Refer to https://developer.hashicorp.com/nomad/s/vault-workload-identity-migration
for more information.
`))
out := make([]string, len(resp.JobsWithoutVaultIdentity)+1)
out[0] = "ID|Namespace|Type|Status"
for i, job := range resp.JobsWithoutVaultIdentity {
out[i+1] = fmt.Sprintf("%s|%s|%s|%s",
limit(job.ID, length),
job.Namespace,
job.Type,
job.Status,
)
}
s.Ui.Output(formatList(out))
}

if len(resp.OutdatedNodes) != 0 {
s.Ui.Output(s.Colorize().Color(`
[bold]Outdated Nodes[reset]
The following nodes are running a version of Nomad that does not support using
workload identities with Vault.
You should upgrade them to Nomad 1.7 before fully migrating to workload
identities with Vault to prevent unexpected errors if they receive allocations
for jobs that use Vault.
Refer to https://developer.hashicorp.com/nomad/s/vault-workload-identity-migration
for more information.
`))
out := make([]string, len(resp.OutdatedNodes)+1)
out[0] = "ID|Name|Address|Version|Drain|Eligibility|Status"
for i, node := range resp.OutdatedNodes {
out[i+1] = fmt.Sprintf("%s|%s|%s|%s|%v|%s|%s",
limit(node.ID, length),
node.Name,
node.Address,
node.Version,
node.Drain,
node.SchedulingEligibility,
node.Status,
)
}
s.Ui.Output(formatList(out))
}

if len(resp.VaultTokens) != 0 {
s.Ui.Output(s.Colorize().Color(`
[bold]Vault Tokens[reset]
The following Vault ACL tokens were created by Nomad but will not be
automatically revoked after migrating to workload identities. They will expire
once their TTL reaches zero.
`))
out := make([]string, len(resp.VaultTokens)+1)
out[0] = "Accessor ID|Allocation ID|Node ID|Configured TTL"
for i, token := range resp.VaultTokens {
out[i+1] = fmt.Sprintf("%s|%s|%s|%d",
token.Accessor,
limit(token.AllocID, length),
limit(token.NodeID, length),
token.CreationTTL,
)
}
s.Ui.Output(formatList(out))
}

return 0
}

func printMapOfStrings(m map[string]string) string {
var output string

Expand Down
Loading

0 comments on commit e1e80f3

Please sign in to comment.