From c69a5569e5701ffd2f30178061b74ee2cf873d40 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Wed, 11 Oct 2023 12:03:35 -0400 Subject: [PATCH] auth: add client-only ACL The RPC handlers expect to see `nil` ACL objects whenever ACLs are disabled. By using `nil` as a sentinel value, we have the risk of nil pointer exceptions and improper handling of `nil` when returned from our various auth methods that can lead to privilege escalation bugs. This is the third in a series to eliminate the use of `nil` ACLs as a sentinel value for when ACLs are disabled. This patch involves creating a new "virtual" ACL object for checking permissions on client operations and a matching `AuthenticateClientOnly` method for client-only RPCs that can produce that ACL. Unlike the server ACLs PR, this also includes a special case for "legacy" client RPCs where the client was not previously sending the secret as it should (leaning on mTLS only). Those client RPCs were fixed in Nomad 1.6.0, but it'll take a while before we can guarantee they'll be present during upgrades. Ref: https://github.com/hashicorp/nomad-enterprise/pull/1218 Ref: https://github.com/hashicorp/nomad/pull/18703 Ref: https://github.com/hashicorp/nomad/pull/18715 Ref: https://github.com/hashicorp/nomad/pull/16799 --- .semgrep/rpc_endpoint.yml | 29 +++- acl/acl.go | 44 +++++- acl/acl_test.go | 3 + acl/virtual.go | 12 ++ nomad/acl.go | 12 +- nomad/alloc_endpoint.go | 17 +-- nomad/alloc_endpoint_test.go | 10 +- nomad/auth/auth.go | 97 ++++++++++++ nomad/auth/auth_test.go | 159 ++++++++++++++++++++ nomad/csi_endpoint.go | 11 +- nomad/node_endpoint.go | 37 +++-- nomad/node_endpoint_test.go | 7 +- nomad/operator_endpoint_test.go | 16 +- nomad/rpc_test.go | 2 +- nomad/service_registration_endpoint.go | 80 +++------- nomad/service_registration_endpoint_test.go | 6 +- 16 files changed, 419 insertions(+), 123 deletions(-) diff --git a/.semgrep/rpc_endpoint.yml b/.semgrep/rpc_endpoint.yml index 5fc8f22ebbd6..c4ad201d9f3e 100644 --- a/.semgrep/rpc_endpoint.yml +++ b/.semgrep/rpc_endpoint.yml @@ -40,17 +40,38 @@ rules: } ... - # Pattern used by endpoints that are used by both ACLs and Clients. - # These endpoints will always have a ctx passed to Authenticate + + # Pattern used by endpoints that are used only for client-to-server. + # Authorization can be done after forwarding, but must check the + # AllowClientOp policy - pattern-not-inside: | - authErr := $A.$B.Authenticate($A.ctx, args) + aclObj, err := $A.$B.AuthenticateClientOnly($A.ctx, args) + ... + if done, err := $A.$B.forward($METHOD, ...); done { + return err + } + ... + if !aclObj.AllowClientOp() { + return structs.ErrPermissionDenied + } + ... + + + # Pattern used by endpoints that are used only for client-to-server. + # Authorization can be done after forwarding, but must check the + # AllowClientOp policy. This should not be added to any new endpoints. + - pattern-not-inside: | + aclObj, err := $A.$B.AuthenticateClientOnlyLegacy($A.ctx, args) ... if done, err := $A.$B.forward($METHOD, ...); done { return err } ... - ... := $A.$B.ResolveClientOrACL(...) + if !aclObj.AllowClientOp() { + return structs.ErrPermissionDenied + } ... + # Pattern used by ACL endpoints that need to interact with the token directly - pattern-not-inside: | authErr := $A.$B.Authenticate($A.ctx, args) diff --git a/acl/acl.go b/acl/acl.go index a791dc576c1a..44c807b1c755 100644 --- a/acl/acl.go +++ b/acl/acl.go @@ -78,6 +78,7 @@ type ACL struct { // The attributes below detail a virtual policy that we never expose // directly to the end user. + client string server string isLeader bool } @@ -301,6 +302,7 @@ func NewACL(management bool, policies []*Policy) (*ACL, error) { acl.variables = svTxn.Commit() acl.wildcardVariables = wsvTxn.Commit() + acl.client = PolicyDeny acl.server = PolicyDeny acl.isLeader = false @@ -332,6 +334,11 @@ func (a *ACL) AllowNamespaceOperation(ns string, op string) bool { return true } + // Clients need to be able to read their namespaced objects + if a.client != PolicyDeny { + return true + } + // If using the all namespaces wildcard, allow if any namespace allows the // operation. if ns == AllNamespacesSentinel && a.anyNamespaceAllowsOp(op) { @@ -731,6 +738,9 @@ func (a *ACL) AllowNodeRead() bool { return true case a.node == PolicyRead: return true + case a.client == PolicyRead, + a.client == PolicyWrite: + return true case a.server == PolicyRead, a.server == PolicyWrite: return true @@ -813,6 +823,9 @@ func (a *ACL) AllowPluginRead() bool { return true case a.management: return true + case a.client == PolicyRead, + a.client == PolicyWrite: + return true case a.plugin == PolicyRead: return true default: @@ -828,6 +841,9 @@ func (a *ACL) AllowPluginList() bool { return true case a.management: return true + case a.client == PolicyRead, + a.client == PolicyWrite: + return true case a.plugin == PolicyList: return true case a.plugin == PolicyRead: @@ -837,6 +853,15 @@ func (a *ACL) AllowPluginList() bool { } } +func (a *ACL) AllowServiceRegistrationReadList(ns string, isWorkload bool) bool { + if a == nil { + // ACL is nil only if ACLs are disabled + // TODO(tgross): return false when there are no nil ACLs + return true + } + return isWorkload || a.AllowNsOp(ns, NamespaceCapabilityReadJob) +} + // AllowServerOp checks if server-only operations are allowed func (a *ACL) AllowServerOp() bool { if a == nil { @@ -847,6 +872,15 @@ func (a *ACL) AllowServerOp() bool { return a.server != PolicyDeny || a.isLeader } +func (a *ACL) AllowClientOp() bool { + if a == nil { + // ACL is nil only if ACLs are disabled + // TODO(tgross): return false when there are no nil ACLs + return true + } + return a.client != PolicyDeny +} + // IsManagement checks if this represents a management token func (a *ACL) IsManagement() bool { return a.management @@ -856,14 +890,18 @@ func (a *ACL) IsManagement() bool { // a list of operations. Returns true (allowed) if acls are disabled or if // *any* capabilities match. func NamespaceValidator(ops ...string) func(*ACL, string) bool { - return func(acl *ACL, ns string) bool { + return func(a *ACL, ns string) bool { // Always allow if ACLs are disabled. - if acl == nil { + if a == nil { + return true + } + // Clients need to be able to read namespaced objects + if a.client != PolicyDeny { return true } for _, op := range ops { - if acl.AllowNamespaceOperation(ns, op) { + if a.AllowNamespaceOperation(ns, op) { // An operation is allowed, return true return true } diff --git a/acl/acl_test.go b/acl/acl_test.go index 260daa10067b..a0332f6559c2 100644 --- a/acl/acl_test.go +++ b/acl/acl_test.go @@ -100,6 +100,7 @@ func TestACLManagement(t *testing.T) { must.True(t, acl.AllowQuotaRead()) must.True(t, acl.AllowQuotaWrite()) must.True(t, acl.AllowServerOp()) + must.True(t, acl.AllowClientOp()) } func TestACLMerge(t *testing.T) { @@ -143,6 +144,7 @@ func TestACLMerge(t *testing.T) { must.True(t, acl.AllowQuotaRead()) must.True(t, acl.AllowQuotaWrite()) must.False(t, acl.AllowServerOp()) + must.False(t, acl.AllowClientOp()) // Merge read + blank p3, err := Parse("") @@ -178,6 +180,7 @@ func TestACLMerge(t *testing.T) { must.True(t, acl.AllowQuotaRead()) must.False(t, acl.AllowQuotaWrite()) must.False(t, acl.AllowServerOp()) + must.False(t, acl.AllowClientOp()) // Merge read + deny p4, err := Parse(denyAll) diff --git a/acl/virtual.go b/acl/virtual.go index e9fd58813a69..fa136a3cf799 100644 --- a/acl/virtual.go +++ b/acl/virtual.go @@ -3,8 +3,20 @@ package acl +var ClientACL = initClientACL() var ServerACL = initServerACL() +func initClientACL() *ACL { + aclObj, err := NewACL(false, []*Policy{}) + if err != nil { + panic(err) + } + aclObj.client = PolicyWrite + aclObj.agent = PolicyRead + aclObj.server = PolicyRead + return aclObj +} + func initServerACL() *ACL { aclObj, err := NewACL(false, []*Policy{}) if err != nil { diff --git a/nomad/acl.go b/nomad/acl.go index 22cb7c65e692..39157d3e3f1b 100644 --- a/nomad/acl.go +++ b/nomad/acl.go @@ -16,6 +16,14 @@ func (srv *Server) AuthenticateServerOnly(ctx *RPCContext, args structs.RequestW return srv.auth.AuthenticateServerOnly(ctx, args) } +func (srv *Server) AuthenticateClientOnly(ctx *RPCContext, args structs.RequestWithIdentity) (*acl.ACL, error) { + return srv.auth.AuthenticateClientOnly(ctx, args) +} + +func (srv *Server) AuthenticateClientOnlyLegacy(ctx *RPCContext, args structs.RequestWithIdentity) (*acl.ACL, error) { + return srv.auth.AuthenticateClientOnlyLegacy(ctx, args) +} + func (srv *Server) ResolveACL(args structs.RequestWithIdentity) (*acl.ACL, error) { return srv.auth.ResolveACL(args) } @@ -28,10 +36,6 @@ func (srv *Server) ResolveToken(secretID string) (*acl.ACL, error) { return srv.auth.ResolveToken(secretID) } -func (srv *Server) ResolveClientOrACL(args structs.RequestWithIdentity) (*acl.ACL, error) { - return srv.auth.ResolveClientOrACL(args) -} - func (srv *Server) ResolvePoliciesForClaims(claims *structs.IdentityClaims) ([]*structs.ACLPolicy, error) { return srv.auth.ResolvePoliciesForClaims(claims) } diff --git a/nomad/alloc_endpoint.go b/nomad/alloc_endpoint.go index ee0d3d52703b..bdd1f5801e68 100644 --- a/nomad/alloc_endpoint.go +++ b/nomad/alloc_endpoint.go @@ -155,7 +155,7 @@ func (a *Alloc) GetAlloc(args *structs.AllocSpecificRequest, // Check namespace read-job permissions before performing blocking query. allowNsOp := acl.NamespaceValidator(acl.NamespaceCapabilityReadJob) - aclObj, err := a.srv.ResolveClientOrACL(args) + aclObj, err := a.srv.ResolveACL(args) if err != nil { return err } @@ -200,21 +200,20 @@ func (a *Alloc) GetAlloc(args *structs.AllocSpecificRequest, func (a *Alloc) GetAllocs(args *structs.AllocsGetRequest, reply *structs.AllocsGetResponse) error { - authErr := a.srv.Authenticate(a.ctx, args) - - // Ensure the connection was initiated by a client if TLS is used. - err := validateTLSCertificateLevel(a.srv, a.ctx, tlsCertificateLevelClient) + aclObj, err := a.srv.AuthenticateClientOnly(a.ctx, args) + a.srv.MeasureRPCRate("alloc", structs.RateMetricWrite, args) if err != nil { - return err + return structs.ErrPermissionDenied } + if done, err := a.srv.forward("Alloc.GetAllocs", args, args, reply); done { return err } - a.srv.MeasureRPCRate("alloc", structs.RateMetricList, args) - if authErr != nil { + defer metrics.MeasureSince([]string{"nomad", "alloc", "get_allocs"}, time.Now()) + + if !aclObj.AllowClientOp() { return structs.ErrPermissionDenied } - defer metrics.MeasureSince([]string{"nomad", "alloc", "get_allocs"}, time.Now()) allocs := make([]*structs.Allocation, len(args.AllocIDs)) diff --git a/nomad/alloc_endpoint_test.go b/nomad/alloc_endpoint_test.go index e5d6d99405e6..dbbdda87f16c 100644 --- a/nomad/alloc_endpoint_test.go +++ b/nomad/alloc_endpoint_test.go @@ -948,11 +948,15 @@ func TestAllocEndpoint_GetAllocs(t *testing.T) { t.Fatalf("err: %v", err) } + node := mock.Node() + state.UpsertNode(structs.MsgTypeTestSetup, 1001, node) + // Lookup the allocs get := &structs.AllocsGetRequest{ AllocIDs: []string{alloc.ID, alloc2.ID}, QueryOptions: structs.QueryOptions{ - Region: "global", + Region: "global", + AuthToken: node.SecretID, }, } var resp structs.AllocsGetResponse @@ -986,6 +990,9 @@ func TestAllocEndpoint_GetAllocs_Blocking(t *testing.T) { codec := rpcClient(t, s1) testutil.WaitForLeader(t, s1.RPC) + node := mock.Node() + state.UpsertNode(structs.MsgTypeTestSetup, 50, node) + // Create the allocs alloc1 := mock.Alloc() alloc2 := mock.Alloc() @@ -1014,6 +1021,7 @@ func TestAllocEndpoint_GetAllocs_Blocking(t *testing.T) { QueryOptions: structs.QueryOptions{ Region: "global", MinQueryIndex: 150, + AuthToken: node.SecretID, }, } var resp structs.AllocsGetResponse diff --git a/nomad/auth/auth.go b/nomad/auth/auth.go index 69756839a8d9..12c8a5422bee 100644 --- a/nomad/auth/auth.go +++ b/nomad/auth/auth.go @@ -249,6 +249,103 @@ func (s *Authenticator) AuthenticateServerOnly(ctx RPCContext, args structs.Requ return acl.ServerACL, nil } +// AuthenticateClientOnly returns an ACL object for use *only* with internal +// RPCs originating from clients (including those forwarded). This should never +// be used for RPCs that serve HTTP endpoints to avoid confused deputy attacks +// by making a request to a client that's forwarded. It should also not be used +// with Node.Register, which should use AuthenticateClientTOFU +// +// The returned ACL object is always a acl.ClientACL but in the future this +// could be extended to allow clients access only to their own pool and +// associated namespaces, etc. +func (s *Authenticator) AuthenticateClientOnly(ctx RPCContext, args structs.RequestWithIdentity) (*acl.ACL, error) { + + remoteIP, err := ctx.GetRemoteIP() // capture for metrics + if err != nil { + s.logger.Error("could not determine remote address", "error", err) + } + + identity := &structs.AuthenticatedIdentity{RemoteIP: remoteIP} + defer args.SetIdentity(identity) // always set the identity, even on errors + + if s.tlsEnabled && !ctx.IsStatic() { + tlsCert := ctx.Certificate() + if tlsCert == nil { + return nil, errors.New("missing certificate information") + } + + // set on the identity whether or not its valid for server RPC, so we + // can capture it for metrics + identity.TLSName = tlsCert.Subject.CommonName + + expected := fmt.Sprintf("client.%s.nomad", s.region) + _, err := validateCertificateForName(tlsCert, expected) + if err != nil { + expected := fmt.Sprintf("server.%s.nomad", s.region) + _, err := validateCertificateForName(tlsCert, expected) + if err != nil { + return nil, err + } + } + } + + secretID := args.GetAuthToken() + if secretID == "" { + return nil, structs.ErrPermissionDenied + } + + // Otherwise, see if the secret ID belongs to a node. We should + // reach this point only on first connection. + node, err := s.getState().NodeBySecretID(nil, secretID) + if err != nil { + // this is a go-memdb error; shouldn't happen + return nil, fmt.Errorf("could not resolve node secret: %w", err) + } + if node == nil { + return nil, structs.ErrPermissionDenied + } + identity.ClientID = node.ID + return acl.ClientACL, nil +} + +// AuthenticateClientOnlyLegacy is a version of AuthenticateClientOnly that's +// used by a few older RPCs that did not properly enforce node secrets. +// COMPAT(1.8.0): In Nomad 1.6.0 we starting sending those node secrets, so we +// can remove this in Nomad 1.8.0. +func (s *Authenticator) AuthenticateClientOnlyLegacy(ctx RPCContext, args structs.RequestWithIdentity) (*acl.ACL, error) { + + remoteIP, err := ctx.GetRemoteIP() // capture for metrics + if err != nil { + s.logger.Error("could not determine remote address", "error", err) + } + + identity := &structs.AuthenticatedIdentity{RemoteIP: remoteIP} + defer args.SetIdentity(identity) // always set the identity, even on errors + + if s.tlsEnabled && !ctx.IsStatic() { + tlsCert := ctx.Certificate() + if tlsCert == nil { + return nil, errors.New("missing certificate information") + } + + // set on the identity whether or not its valid for server RPC, so we + // can capture it for metrics + identity.TLSName = tlsCert.Subject.CommonName + + expected := fmt.Sprintf("client.%s.nomad", s.region) + _, err := validateCertificateForName(tlsCert, expected) + if err != nil { + expected := fmt.Sprintf("server.%s.nomad", s.region) + _, err := validateCertificateForName(tlsCert, expected) + if err != nil { + return nil, err + } + } + } + + return acl.ClientACL, nil +} + // validateCertificateForName returns true if the certificate is valid // for the given domain name. func validateCertificateForName(cert *x509.Certificate, expectedName string) (bool, error) { diff --git a/nomad/auth/auth_test.go b/nomad/auth/auth_test.go index 8184c449eba3..bcac38976fe7 100644 --- a/nomad/auth/auth_test.go +++ b/nomad/auth/auth_test.go @@ -414,6 +414,165 @@ func TestAuthenticateServerOnly(t *testing.T) { } +func TestAuthenticateClientOnly(t *testing.T) { + ci.Parallel(t) + + testAuthenticator := func(t *testing.T, store *state.StateStore, + hasACLs, hasTLS bool) *Authenticator { + leaderACL := uuid.Generate() + + return NewAuthenticator(&AuthenticatorConfig{ + StateFn: func() *state.StateStore { return store }, + Logger: testlog.HCLogger(t), + GetLeaderACLFn: func() string { return leaderACL }, + AclsEnabled: hasACLs, + TLSEnabled: hasTLS, + Region: "global", + Encrypter: nil, + }) + } + + testCases := []struct { + name string + testFn func(*testing.T, *state.StateStore, *structs.Node) + }{ + { + name: "no mTLS or ACLs but no node secret", + testFn: func(t *testing.T, store *state.StateStore, node *structs.Node) { + ctx := newTestContext(t, noTLSCtx, "192.168.1.1") + args := &structs.GenericRequest{} + args.AuthToken = "" + + auth := testAuthenticator(t, store, false, false) + + aclObj, err := auth.AuthenticateClientOnly(ctx, args) + must.ErrorIs(t, err, structs.ErrPermissionDenied) + must.Eq(t, ":192.168.1.1", args.GetIdentity().String()) + must.Nil(t, aclObj) + }, + }, + { + name: "no mTLS or ACLs but with node secret", + testFn: func(t *testing.T, store *state.StateStore, node *structs.Node) { + ctx := newTestContext(t, noTLSCtx, "192.168.1.1") + args := &structs.GenericRequest{} + args.AuthToken = node.SecretID + + auth := testAuthenticator(t, store, false, false) + + aclObj, err := auth.AuthenticateClientOnly(ctx, args) + must.NoError(t, err) + must.NotNil(t, aclObj) + must.Eq(t, "client:"+node.ID, args.GetIdentity().String()) + }, + }, + { + name: "no mTLS but with ACLs", + testFn: func(t *testing.T, store *state.StateStore, node *structs.Node) { + ctx := newTestContext(t, noTLSCtx, "192.168.1.1") + args := &structs.GenericRequest{} + args.AuthToken = node.SecretID + + auth := testAuthenticator(t, store, true, false) + + aclObj, err := auth.AuthenticateClientOnly(ctx, args) + must.NoError(t, err) + must.NotNil(t, aclObj) + must.Eq(t, "client:"+node.ID, args.GetIdentity().String()) + must.True(t, aclObj.AllowClientOp()) + }, + }, + { + name: "no mTLS but with ACLs and bad secret", + testFn: func(t *testing.T, store *state.StateStore, node *structs.Node) { + ctx := newTestContext(t, noTLSCtx, "192.168.1.1") + args := &structs.GenericRequest{} + args.AuthToken = uuid.Generate() + + auth := testAuthenticator(t, store, true, false) + + aclObj, err := auth.AuthenticateClientOnly(ctx, args) + must.ErrorIs(t, err, structs.ErrPermissionDenied) + must.Eq(t, ":192.168.1.1", args.GetIdentity().String()) + must.Nil(t, aclObj) + }, + }, + { + name: "with mTLS and ACLs but CLI cert", + testFn: func(t *testing.T, store *state.StateStore, node *structs.Node) { + ctx := newTestContext(t, "cli.global.nomad", "192.168.1.1") + args := &structs.GenericRequest{} + + auth := testAuthenticator(t, store, true, true) + + aclObj, err := auth.AuthenticateClientOnly(ctx, args) + must.EqError(t, err, + "invalid certificate, server.global.nomad not in cli.global.nomad") + must.Eq(t, "cli.global.nomad:192.168.1.1", args.GetIdentity().String()) + must.Nil(t, aclObj) + }, + }, + { + name: "with mTLS and ACLs with server cert but bad token", + testFn: func(t *testing.T, store *state.StateStore, node *structs.Node) { + ctx := newTestContext(t, "server.global.nomad", "192.168.1.1") + args := &structs.GenericRequest{} + args.AuthToken = uuid.Generate() + + auth := testAuthenticator(t, store, true, true) + + aclObj, err := auth.AuthenticateClientOnly(ctx, args) + must.ErrorIs(t, err, structs.ErrPermissionDenied) + must.Eq(t, "server.global.nomad:192.168.1.1", args.GetIdentity().String()) + must.Nil(t, aclObj) + }, + }, + { + name: "with mTLS and ACLs with server cert and valid token", + testFn: func(t *testing.T, store *state.StateStore, node *structs.Node) { + ctx := newTestContext(t, "server.global.nomad", "192.168.1.1") + args := &structs.GenericRequest{} + args.AuthToken = node.SecretID + + auth := testAuthenticator(t, store, true, true) + + aclObj, err := auth.AuthenticateClientOnly(ctx, args) + must.NoError(t, err) + + must.Eq(t, "client:"+node.ID, args.GetIdentity().String()) + must.NotNil(t, aclObj) + must.True(t, aclObj.AllowClientOp()) + }, + }, + { + name: "with mTLS and ACLs with client cert", + testFn: func(t *testing.T, store *state.StateStore, node *structs.Node) { + ctx := newTestContext(t, "client.global.nomad", "192.168.1.1") + args := &structs.GenericRequest{} + args.AuthToken = node.SecretID + + auth := testAuthenticator(t, store, true, true) + + aclObj, err := auth.AuthenticateClientOnly(ctx, args) + must.NoError(t, err) + must.Eq(t, "client:"+node.ID, args.GetIdentity().String()) + must.NotNil(t, aclObj) + must.True(t, aclObj.AllowClientOp()) + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + node := mock.Node() + store := testStateStore(t) + store.UpsertNode(structs.MsgTypeTestSetup, 100, node) + tc.testFn(t, store, node) + }) + } + +} + func TestResolveACLToken(t *testing.T) { ci.Parallel(t) diff --git a/nomad/csi_endpoint.go b/nomad/csi_endpoint.go index 3ffb41e0e252..8d32efd155a7 100644 --- a/nomad/csi_endpoint.go +++ b/nomad/csi_endpoint.go @@ -189,7 +189,7 @@ func (v *CSIVolume) Get(args *structs.CSIVolumeGetRequest, reply *structs.CSIVol allowCSIAccess := acl.NamespaceValidator(acl.NamespaceCapabilityCSIReadVolume, acl.NamespaceCapabilityCSIMountVolume, acl.NamespaceCapabilityReadJob) - aclObj, err := v.srv.ResolveClientOrACL(args) + aclObj, err := v.srv.ResolveACL(args) if err != nil { return err } @@ -452,14 +452,13 @@ func (v *CSIVolume) Claim(args *structs.CSIVolumeClaimRequest, reply *structs.CS return structs.ErrPermissionDenied } + defer metrics.MeasureSince([]string{"nomad", "volume", "claim"}, time.Now()) + allowVolume := acl.NamespaceValidator(acl.NamespaceCapabilityCSIMountVolume) - aclObj, err := v.srv.ResolveClientOrACL(args) + aclObj, err := v.srv.ResolveACL(args) if err != nil { return err } - - defer metrics.MeasureSince([]string{"nomad", "volume", "claim"}, time.Now()) - if !allowVolume(aclObj, args.RequestNamespace()) || !aclObj.AllowPluginRead() { return structs.ErrPermissionDenied } @@ -694,7 +693,7 @@ func (v *CSIVolume) Unpublish(args *structs.CSIVolumeUnpublishRequest, reply *st defer metrics.MeasureSince([]string{"nomad", "volume", "unpublish"}, time.Now()) allowVolume := acl.NamespaceValidator(acl.NamespaceCapabilityCSIMountVolume) - aclObj, err := v.srv.ResolveClientOrACL(args) + aclObj, err := v.srv.ResolveACL(args) if err != nil { return err } diff --git a/nomad/node_endpoint.go b/nomad/node_endpoint.go index 824da2c93ff7..485be68c237a 100644 --- a/nomad/node_endpoint.go +++ b/nomad/node_endpoint.go @@ -1021,7 +1021,7 @@ func (n *Node) GetNode(args *structs.NodeSpecificRequest, defer metrics.MeasureSince([]string{"nomad", "client", "get_node"}, time.Now()) // Check node read permissions - aclObj, err := n.srv.ResolveClientOrACL(args) + aclObj, err := n.srv.ResolveACL(args) if err != nil { return err } @@ -1317,23 +1317,21 @@ func (n *Node) GetClientAllocs(args *structs.NodeSpecificRequest, // - The node status is down or disconnected. Clients must call the // UpdateStatus method to update its status in the server. func (n *Node) UpdateAlloc(args *structs.AllocUpdateRequest, reply *structs.GenericResponse) error { - - authErr := n.srv.Authenticate(n.ctx, args) - - // Ensure the connection was initiated by another client if TLS is used. - err := validateTLSCertificateLevel(n.srv, n.ctx, tlsCertificateLevelClient) + // COMPAT(1.9.0): move to AuthenticateClientOnly + aclObj, err := n.srv.AuthenticateClientOnlyLegacy(n.ctx, args) + n.srv.MeasureRPCRate("node", structs.RateMetricWrite, args) if err != nil { - return err + return structs.ErrPermissionDenied } + if done, err := n.srv.forward("Node.UpdateAlloc", args, args, reply); done { return err } - n.srv.MeasureRPCRate("node", structs.RateMetricWrite, args) - if authErr != nil { - return structs.ErrPermissionDenied - } defer metrics.MeasureSince([]string{"nomad", "client", "update_alloc"}, time.Now()) + if !aclObj.AllowClientOp() { + return structs.ErrPermissionDenied + } // Ensure at least a single alloc if len(args.Alloc) == 0 { @@ -2253,22 +2251,21 @@ func taskUsesConnect(task *structs.Task) bool { } func (n *Node) EmitEvents(args *structs.EmitNodeEventsRequest, reply *structs.EmitNodeEventsResponse) error { - - authErr := n.srv.Authenticate(n.ctx, args) - - // Ensure the connection was initiated by another client if TLS is used. - err := validateTLSCertificateLevel(n.srv, n.ctx, tlsCertificateLevelClient) + // COMPAT(1.9.0): move to AuthenticateClientOnly + aclObj, err := n.srv.AuthenticateClientOnlyLegacy(n.ctx, args) + n.srv.MeasureRPCRate("node", structs.RateMetricWrite, args) if err != nil { - return err + return structs.ErrPermissionDenied } + if done, err := n.srv.forward("Node.EmitEvents", args, args, reply); done { return err } - n.srv.MeasureRPCRate("node", structs.RateMetricWrite, args) - if authErr != nil { + defer metrics.MeasureSince([]string{"nomad", "client", "emit_events"}, time.Now()) + + if !aclObj.AllowClientOp() { return structs.ErrPermissionDenied } - defer metrics.MeasureSince([]string{"nomad", "client", "emit_events"}, time.Now()) if len(args.NodeEvents) == 0 { return fmt.Errorf("no node events given") diff --git a/nomad/node_endpoint_test.go b/nomad/node_endpoint_test.go index c67fa2806728..c9b273efd417 100644 --- a/nomad/node_endpoint_test.go +++ b/nomad/node_endpoint_test.go @@ -4514,8 +4514,11 @@ func TestClientEndpoint_UpdateAlloc_Evals_ByTrigger(t *testing.T) { } updateReq := &structs.AllocUpdateRequest{ - Alloc: []*structs.Allocation{clientAlloc}, - WriteRequest: structs.WriteRequest{Region: "global"}, + Alloc: []*structs.Allocation{clientAlloc}, + WriteRequest: structs.WriteRequest{ + Region: "global", + AuthToken: node.SecretID, + }, } var nodeAllocResp structs.NodeAllocsResponse diff --git a/nomad/operator_endpoint_test.go b/nomad/operator_endpoint_test.go index 0608d8794100..02d5c932f6b8 100644 --- a/nomad/operator_endpoint_test.go +++ b/nomad/operator_endpoint_test.go @@ -487,7 +487,7 @@ func TestOperator_TransferLeadershipToServerAddress_ACL(t *testing.T) { // Create ACL token invalidToken := mock.CreatePolicyAndToken(t, state, 1001, "test-invalid", mock.NodePolicy(acl.PolicyWrite)) - arg := structs.RaftPeerRequest{ + arg := &structs.RaftPeerRequest{ RaftIDAddress: structs.RaftIDAddress{Address: addr}, WriteRequest: structs.WriteRequest{Region: s1.config.Region}, } @@ -496,7 +496,7 @@ func TestOperator_TransferLeadershipToServerAddress_ACL(t *testing.T) { t.Run("no-token", func(t *testing.T) { // Try with no token and expect permission denied - err := msgpackrpc.CallWithCodec(codec, "Operator.TransferLeadershipToPeer", &arg, &reply) + err := msgpackrpc.CallWithCodec(codec, "Operator.TransferLeadershipToPeer", arg, &reply) must.Error(t, err) must.ErrorIs(t, err, rpcPermDeniedErr) }) @@ -504,7 +504,7 @@ func TestOperator_TransferLeadershipToServerAddress_ACL(t *testing.T) { t.Run("invalid-token", func(t *testing.T) { // Try with an invalid token and expect permission denied arg.AuthToken = invalidToken.SecretID - err := msgpackrpc.CallWithCodec(codec, "Operator.TransferLeadershipToPeer", &arg, &reply) + err := msgpackrpc.CallWithCodec(codec, "Operator.TransferLeadershipToPeer", arg, &reply) must.Error(t, err) must.ErrorIs(t, err, rpcPermDeniedErr) }) @@ -512,7 +512,7 @@ func TestOperator_TransferLeadershipToServerAddress_ACL(t *testing.T) { t.Run("good-token", func(t *testing.T) { // Try with a management token arg.AuthToken = tc.token.SecretID - err := msgpackrpc.CallWithCodec(codec, "Operator.TransferLeadershipToPeer", &arg, &reply) + err := msgpackrpc.CallWithCodec(codec, "Operator.TransferLeadershipToPeer", arg, &reply) must.NoError(t, err) // Is the expected leader the new one? @@ -543,7 +543,7 @@ func TestOperator_TransferLeadershipToServerID_ACL(t *testing.T) { // Create ACL token invalidToken := mock.CreatePolicyAndToken(t, state, 1001, "test-invalid", mock.NodePolicy(acl.PolicyWrite)) - arg := structs.RaftPeerRequest{ + arg := &structs.RaftPeerRequest{ RaftIDAddress: structs.RaftIDAddress{ ID: tgtID, }, @@ -554,7 +554,7 @@ func TestOperator_TransferLeadershipToServerID_ACL(t *testing.T) { t.Run("no-token", func(t *testing.T) { // Try with no token and expect permission denied - err := msgpackrpc.CallWithCodec(codec, "Operator.TransferLeadershipToPeer", &arg, &reply) + err := msgpackrpc.CallWithCodec(codec, "Operator.TransferLeadershipToPeer", arg, &reply) must.Error(t, err) must.ErrorIs(t, err, rpcPermDeniedErr) }) @@ -562,7 +562,7 @@ func TestOperator_TransferLeadershipToServerID_ACL(t *testing.T) { t.Run("invalid-token", func(t *testing.T) { // Try with an invalid token and expect permission denied arg.AuthToken = invalidToken.SecretID - err := msgpackrpc.CallWithCodec(codec, "Operator.TransferLeadershipToPeer", &arg, &reply) + err := msgpackrpc.CallWithCodec(codec, "Operator.TransferLeadershipToPeer", arg, &reply) must.Error(t, err) must.ErrorIs(t, err, rpcPermDeniedErr) }) @@ -570,7 +570,7 @@ func TestOperator_TransferLeadershipToServerID_ACL(t *testing.T) { t.Run("good-token", func(t *testing.T) { // Try with a management token arg.AuthToken = tc.token.SecretID - err := msgpackrpc.CallWithCodec(codec, "Operator.TransferLeadershipToPeer", &arg, &reply) + err := msgpackrpc.CallWithCodec(codec, "Operator.TransferLeadershipToPeer", arg, &reply) must.NoError(t, err) // Is the expected leader the new one? diff --git a/nomad/rpc_test.go b/nomad/rpc_test.go index 4ab9717865a2..a33c69b698df 100644 --- a/nomad/rpc_test.go +++ b/nomad/rpc_test.go @@ -1257,7 +1257,7 @@ func TestRPC_TLS_Enforcement_RPC(t *testing.T) { name: "other region server/clients only rpc", cn: "server.other.nomad", rpcs: localClientsOnlyRPCs, - expectErr: "(certificate|broken pipe)", + expectErr: "(Permission denied|broken pipe)", }, // Other region client. { diff --git a/nomad/service_registration_endpoint.go b/nomad/service_registration_endpoint.go index 68bbf54d564c..b5c9ac6cf86a 100644 --- a/nomad/service_registration_endpoint.go +++ b/nomad/service_registration_endpoint.go @@ -40,20 +40,20 @@ func (s *ServiceRegistration) Upsert( args *structs.ServiceRegistrationUpsertRequest, reply *structs.ServiceRegistrationUpsertResponse) error { - authErr := s.srv.Authenticate(s.ctx, args) - - // Ensure the connection was initiated by a client if TLS is used. - if err := validateTLSCertificateLevel(s.srv, s.ctx, tlsCertificateLevelClient); err != nil { - return err + aclObj, err := s.srv.AuthenticateClientOnly(s.ctx, args) + s.srv.MeasureRPCRate("service_registration", structs.RateMetricWrite, args) + if err != nil { + return structs.ErrPermissionDenied } + if done, err := s.srv.forward(structs.ServiceRegistrationUpsertRPCMethod, args, args, reply); done { return err } - s.srv.MeasureRPCRate("service_registration", structs.RateMetricWrite, args) - if authErr != nil { + defer metrics.MeasureSince([]string{"nomad", "service_registration", "upsert"}, time.Now()) + + if !aclObj.AllowClientOp() { return structs.ErrPermissionDenied } - defer metrics.MeasureSince([]string{"nomad", "service_registration", "upsert"}, time.Now()) // Nomad service registrations can only be used once all servers, in the // local region, have been upgraded to 1.3.0 or greater. @@ -62,17 +62,6 @@ func (s *ServiceRegistration) Upsert( minNomadServiceRegistrationVersion) } - // This endpoint is only callable by nodes in the cluster. Therefore, - // perform a node lookup using the secret ID to confirm the caller is a - // known node. - node, err := s.srv.fsm.State().NodeBySecretID(nil, args.AuthToken) - if err != nil { - return err - } - if node == nil { - return structs.ErrTokenNotFound - } - // Use a multierror, so we can capture all validation errors and pass this // back so fixing in a single swoop. var mErr multierror.Error @@ -124,41 +113,10 @@ func (s *ServiceRegistration) DeleteByID( minNomadServiceRegistrationVersion) } - // Perform the ACL token resolution. - aclObj, err := s.srv.ResolveACL(args) - - switch err { - case nil: - // If ACLs are enabled, ensure the caller has the submit-job namespace - // capability. - if aclObj != nil { - hasSubmitJob := aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilitySubmitJob) - if !hasSubmitJob { - return structs.ErrPermissionDenied - } - } - default: - // This endpoint is generally called by Nomad nodes, so we want to - // perform this check, unless the token resolution gave us a terminal - // error. - if err != structs.ErrTokenNotFound { - return err - } - - // Attempt to lookup AuthToken as a Node.SecretID and return any error - // wrapped along with the original. - node, stateErr := s.srv.fsm.State().NodeBySecretID(nil, args.AuthToken) - if stateErr != nil { - var mErr multierror.Error - mErr.Errors = append(mErr.Errors, err, stateErr) - return mErr.ErrorOrNil() - } - - // At this point, we do not have a valid ACL token, nor are we being - // called, or able to confirm via the state store, by a node. - if node == nil { - return structs.ErrTokenNotFound - } + if aclObj, err := s.srv.ResolveACL(args); err != nil { + return structs.ErrPermissionDenied + } else if !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilitySubmitJob) { + return structs.ErrPermissionDenied } // Update via Raft. @@ -216,12 +174,12 @@ func (s *ServiceRegistration) List( return s.listAllServiceRegistrations(args, reply) } - aclObj, err := s.srv.ResolveClientOrACL(args) + aclObj, err := s.srv.ResolveACL(args) if err != nil { - return structs.ErrPermissionDenied + return err } - if args.GetIdentity().Claims == nil && - !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilityReadJob) { + if aclObj != nil && !aclObj.AllowServiceRegistrationReadList(args.RequestNamespace(), + args.GetIdentity().Claims != nil) { return structs.ErrPermissionDenied } @@ -381,12 +339,12 @@ func (s *ServiceRegistration) GetService( } defer metrics.MeasureSince([]string{"nomad", "service_registration", "get_service"}, time.Now()) - aclObj, err := s.srv.ResolveClientOrACL(args) + aclObj, err := s.srv.ResolveACL(args) if err != nil { return structs.ErrPermissionDenied } - if args.GetIdentity().Claims == nil && - !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilityReadJob) { + if aclObj != nil && !aclObj.AllowServiceRegistrationReadList(args.RequestNamespace(), + args.GetIdentity().Claims != nil) { return structs.ErrPermissionDenied } diff --git a/nomad/service_registration_endpoint_test.go b/nomad/service_registration_endpoint_test.go index 094f27f07bad..3fbe01aaa3d9 100644 --- a/nomad/service_registration_endpoint_test.go +++ b/nomad/service_registration_endpoint_test.go @@ -52,8 +52,7 @@ func TestServiceRegistration_Upsert(t *testing.T) { var serviceRegResp structs.ServiceRegistrationUpsertResponse err := msgpackrpc.CallWithCodec( codec, structs.ServiceRegistrationUpsertRPCMethod, serviceRegReq, &serviceRegResp) - require.Error(t, err) - require.Contains(t, err.Error(), "node lookup by SecretID failed") + must.EqError(t, err, structs.ErrPermissionDenied.Error()) // Generate a node and retry the upsert. node := mock.Node() @@ -135,8 +134,7 @@ func TestServiceRegistration_Upsert(t *testing.T) { var serviceRegResp structs.ServiceRegistrationUpsertResponse err := msgpackrpc.CallWithCodec( codec, structs.ServiceRegistrationUpsertRPCMethod, serviceRegReq, &serviceRegResp) - require.Error(t, err) - require.Contains(t, err.Error(), "node lookup by SecretID failed") + must.EqError(t, err, structs.ErrPermissionDenied.Error()) // Generate a node and retry the upsert. node := mock.Node()