Skip to content

Commit

Permalink
auth: add client-only ACL
Browse files Browse the repository at this point in the history
The RPC handlers expect to see `nil` ACL objects whenever ACLs are disabled. By
using `nil` as a sentinel value, we have the risk of nil pointer exceptions and
improper handling of `nil` when returned from our various auth methods that can
lead to privilege escalation bugs. This is the third in a series to eliminate
the use of `nil` ACLs as a sentinel value for when ACLs are disabled.

This patch involves creating a new "virtual" ACL object for checking permissions
on client operations and a matching `AuthenticateClientOnly` method for
client-only RPCs that can produce that ACL.

Unlike the server ACLs PR, this also includes a special case for "legacy" client
RPCs where the client was not previously sending the secret as it
should (leaning on mTLS only). Those client RPCs were fixed in Nomad 1.6.0, but
it'll take a while before we can guarantee they'll be present during upgrades.

Ref: hashicorp/nomad-enterprise#1218
Ref: #18703
Ref: #18715
Ref: #16799
  • Loading branch information
tgross committed Oct 11, 2023
1 parent a92461c commit 0e7e683
Show file tree
Hide file tree
Showing 13 changed files with 398 additions and 110 deletions.
21 changes: 19 additions & 2 deletions .semgrep/rpc_endpoint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,24 @@ rules:
}
...
# Pattern used by endpoints that are used by both ACLs and Clients.
# These endpoints will always have a ctx passed to Authenticate
# Pattern used by endpoints that are used only for client-to-server.
# Authorization can be done after forwarding, but must check the
# AllowClientOp policy
- pattern-not-inside: |
aclObj, err := $A.$B.AuthenticateClientOnly($A.ctx, args)
...
if done, err := $A.$B.forward($METHOD, ...); done {
return err
}
...
if !aclObj.AllowClientOp() {
return structs.ErrPermissionDenied
}
...
# Pattern used by ACL endpoints that need to interact with the token
# directly.
- pattern-not-inside: |
authErr := $A.$B.Authenticate($A.ctx, args)
...
Expand All @@ -51,6 +67,7 @@ rules:
...
... := $A.$B.ResolveClientOrACL(...)
...
# Pattern used by ACL endpoints that need to interact with the token directly
- pattern-not-inside: |
authErr := $A.$B.Authenticate($A.ctx, args)
Expand Down
44 changes: 41 additions & 3 deletions acl/acl.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ type ACL struct {

// The attributes below detail a virtual policy that we never expose
// directly to the end user.
client string
server string
isLeader bool
}
Expand Down Expand Up @@ -301,6 +302,7 @@ func NewACL(management bool, policies []*Policy) (*ACL, error) {
acl.variables = svTxn.Commit()
acl.wildcardVariables = wsvTxn.Commit()

acl.client = PolicyDeny
acl.server = PolicyDeny
acl.isLeader = false

Expand Down Expand Up @@ -332,6 +334,11 @@ func (a *ACL) AllowNamespaceOperation(ns string, op string) bool {
return true
}

// Clients need to be able to read their namespaced objects
if a.client != PolicyDeny {
return true
}

// If using the all namespaces wildcard, allow if any namespace allows the
// operation.
if ns == AllNamespacesSentinel && a.anyNamespaceAllowsOp(op) {
Expand Down Expand Up @@ -731,6 +738,9 @@ func (a *ACL) AllowNodeRead() bool {
return true
case a.node == PolicyRead:
return true
case a.client == PolicyRead,
a.client == PolicyWrite:
return true
case a.server == PolicyRead,
a.server == PolicyWrite:
return true
Expand Down Expand Up @@ -813,6 +823,9 @@ func (a *ACL) AllowPluginRead() bool {
return true
case a.management:
return true
case a.client == PolicyRead,
a.client == PolicyWrite:
return true
case a.plugin == PolicyRead:
return true
default:
Expand All @@ -828,6 +841,9 @@ func (a *ACL) AllowPluginList() bool {
return true
case a.management:
return true
case a.client == PolicyRead,
a.client == PolicyWrite:
return true
case a.plugin == PolicyList:
return true
case a.plugin == PolicyRead:
Expand All @@ -837,6 +853,15 @@ func (a *ACL) AllowPluginList() bool {
}
}

func (a *ACL) AllowServiceRegistrationReadList(ns string, isWorkload bool) bool {
if a == nil {
// ACL is nil only if ACLs are disabled
// TODO(tgross): return false when there are no nil ACLs
return true
}
return isWorkload || a.AllowNsOp(ns, NamespaceCapabilityReadJob)
}

// AllowServerOp checks if server-only operations are allowed
func (a *ACL) AllowServerOp() bool {
if a == nil {
Expand All @@ -847,6 +872,15 @@ func (a *ACL) AllowServerOp() bool {
return a.server != PolicyDeny || a.isLeader
}

func (a *ACL) AllowClientOp() bool {
if a == nil {
// ACL is nil only if ACLs are disabled
// TODO(tgross): return false when there are no nil ACLs
return true
}
return a.client != PolicyDeny
}

// IsManagement checks if this represents a management token
func (a *ACL) IsManagement() bool {
return a.management
Expand All @@ -856,14 +890,18 @@ func (a *ACL) IsManagement() bool {
// a list of operations. Returns true (allowed) if acls are disabled or if
// *any* capabilities match.
func NamespaceValidator(ops ...string) func(*ACL, string) bool {
return func(acl *ACL, ns string) bool {
return func(a *ACL, ns string) bool {
// Always allow if ACLs are disabled.
if acl == nil {
if a == nil {
return true
}
// Clients need to be able to read namespaced objects
if a.client != PolicyDeny {
return true
}

for _, op := range ops {
if acl.AllowNamespaceOperation(ns, op) {
if a.AllowNamespaceOperation(ns, op) {
// An operation is allowed, return true
return true
}
Expand Down
3 changes: 3 additions & 0 deletions acl/acl_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ func TestACLManagement(t *testing.T) {
must.True(t, acl.AllowQuotaRead())
must.True(t, acl.AllowQuotaWrite())
must.True(t, acl.AllowServerOp())
must.True(t, acl.AllowClientOp())
}

func TestACLMerge(t *testing.T) {
Expand Down Expand Up @@ -143,6 +144,7 @@ func TestACLMerge(t *testing.T) {
must.True(t, acl.AllowQuotaRead())
must.True(t, acl.AllowQuotaWrite())
must.False(t, acl.AllowServerOp())
must.False(t, acl.AllowClientOp())

// Merge read + blank
p3, err := Parse("")
Expand Down Expand Up @@ -178,6 +180,7 @@ func TestACLMerge(t *testing.T) {
must.True(t, acl.AllowQuotaRead())
must.False(t, acl.AllowQuotaWrite())
must.False(t, acl.AllowServerOp())
must.False(t, acl.AllowClientOp())

// Merge read + deny
p4, err := Parse(denyAll)
Expand Down
12 changes: 12 additions & 0 deletions acl/virtual.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,20 @@

package acl

var ClientACL = initClientACL()
var ServerACL = initServerACL()

func initClientACL() *ACL {
aclObj, err := NewACL(false, []*Policy{})
if err != nil {
panic(err)
}
aclObj.client = PolicyWrite
aclObj.agent = PolicyRead
aclObj.server = PolicyRead
return aclObj
}

func initServerACL() *ACL {
aclObj, err := NewACL(false, []*Policy{})
if err != nil {
Expand Down
14 changes: 11 additions & 3 deletions nomad/acl.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,14 @@ func (srv *Server) AuthenticateServerOnly(ctx *RPCContext, args structs.RequestW
return srv.auth.AuthenticateServerOnly(ctx, args)
}

func (srv *Server) AuthenticateClientOnly(ctx *RPCContext, args structs.RequestWithIdentity) (*acl.ACL, error) {
return srv.auth.AuthenticateClientOnly(ctx, args)
}

func (srv *Server) AuthenticateClientOnlyLegacy(ctx *RPCContext, args structs.RequestWithIdentity) (*acl.ACL, error) {
return srv.auth.AuthenticateClientOnlyLegacy(ctx, args)
}

func (srv *Server) ResolveACL(args structs.RequestWithIdentity) (*acl.ACL, error) {
return srv.auth.ResolveACL(args)
}
Expand All @@ -28,9 +36,9 @@ func (srv *Server) ResolveToken(secretID string) (*acl.ACL, error) {
return srv.auth.ResolveToken(secretID)
}

func (srv *Server) ResolveClientOrACL(args structs.RequestWithIdentity) (*acl.ACL, error) {
return srv.auth.ResolveClientOrACL(args)
}
// func (srv *Server) ResolveClientOrACL(args structs.RequestWithIdentity) (*acl.ACL, error) {
// return srv.auth.ResolveClientOrACL(args)
// }

func (srv *Server) ResolvePoliciesForClaims(claims *structs.IdentityClaims) ([]*structs.ACLPolicy, error) {
return srv.auth.ResolvePoliciesForClaims(claims)
Expand Down
17 changes: 8 additions & 9 deletions nomad/alloc_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ func (a *Alloc) GetAlloc(args *structs.AllocSpecificRequest,

// Check namespace read-job permissions before performing blocking query.
allowNsOp := acl.NamespaceValidator(acl.NamespaceCapabilityReadJob)
aclObj, err := a.srv.ResolveClientOrACL(args)
aclObj, err := a.srv.ResolveACL(args)
if err != nil {
return err
}
Expand Down Expand Up @@ -200,21 +200,20 @@ func (a *Alloc) GetAlloc(args *structs.AllocSpecificRequest,
func (a *Alloc) GetAllocs(args *structs.AllocsGetRequest,
reply *structs.AllocsGetResponse) error {

authErr := a.srv.Authenticate(a.ctx, args)

// Ensure the connection was initiated by a client if TLS is used.
err := validateTLSCertificateLevel(a.srv, a.ctx, tlsCertificateLevelClient)
aclObj, err := a.srv.AuthenticateClientOnly(a.ctx, args)
a.srv.MeasureRPCRate("alloc", structs.RateMetricWrite, args)
if err != nil {
return err
return structs.ErrPermissionDenied
}

if done, err := a.srv.forward("Alloc.GetAllocs", args, args, reply); done {
return err
}
a.srv.MeasureRPCRate("alloc", structs.RateMetricList, args)
if authErr != nil {
defer metrics.MeasureSince([]string{"nomad", "alloc", "get_allocs"}, time.Now())

if !aclObj.AllowClientOp() {
return structs.ErrPermissionDenied
}
defer metrics.MeasureSince([]string{"nomad", "alloc", "get_allocs"}, time.Now())

allocs := make([]*structs.Allocation, len(args.AllocIDs))

Expand Down
97 changes: 97 additions & 0 deletions nomad/auth/auth.go
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,103 @@ func (s *Authenticator) AuthenticateServerOnly(ctx RPCContext, args structs.Requ
return acl.ServerACL, nil
}

// AuthenticateClientOnly returns an ACL object for use *only* with internal
// RPCs originating from clients (including those forwarded). This should never
// be used for RPCs that serve HTTP endpoints to avoid confused deputy attacks
// by making a request to a client that's forwarded. It should also not be used
// with Node.Register, which should use AuthenticateClientTOFU
//
// The returned ACL object is always a acl.ClientACL but in the future this
// could be extended to allow clients access only to their own pool and
// associated namespaces, etc.
func (s *Authenticator) AuthenticateClientOnly(ctx RPCContext, args structs.RequestWithIdentity) (*acl.ACL, error) {

remoteIP, err := ctx.GetRemoteIP() // capture for metrics
if err != nil {
s.logger.Error("could not determine remote address", "error", err)
}

identity := &structs.AuthenticatedIdentity{RemoteIP: remoteIP}
defer args.SetIdentity(identity) // always set the identity, even on errors

if s.tlsEnabled && !ctx.IsStatic() {
tlsCert := ctx.Certificate()
if tlsCert == nil {
return nil, errors.New("missing certificate information")
}

// set on the identity whether or not its valid for server RPC, so we
// can capture it for metrics
identity.TLSName = tlsCert.Subject.CommonName

expected := fmt.Sprintf("client.%s.nomad", s.region)
_, err := validateCertificateForName(tlsCert, expected)
if err != nil {
expected := fmt.Sprintf("server.%s.nomad", s.region)
_, err := validateCertificateForName(tlsCert, expected)
if err != nil {
return nil, err
}
}
}

secretID := args.GetAuthToken()
if secretID == "" {
return nil, structs.ErrPermissionDenied
}

// Otherwise, see if the secret ID belongs to a node. We should
// reach this point only on first connection.
node, err := s.getState().NodeBySecretID(nil, secretID)
if err != nil {
// this is a go-memdb error; shouldn't happen
return nil, fmt.Errorf("could not resolve node secret: %w", err)
}
if node == nil {
return nil, structs.ErrPermissionDenied
}
identity.ClientID = node.ID
return acl.ClientACL, nil
}

// AuthenticateClientOnlyLegacy is a version of AuthenticateClientOnly that's
// used by a few older RPCs that did not properly enforce node secrets.
// COMPAT(1.8.0): In Nomad 1.6.0 we starting sending those node secrets, so we
// can remove this in Nomad 1.8.0.
func (s *Authenticator) AuthenticateClientOnlyLegacy(ctx RPCContext, args structs.RequestWithIdentity) (*acl.ACL, error) {

remoteIP, err := ctx.GetRemoteIP() // capture for metrics
if err != nil {
s.logger.Error("could not determine remote address", "error", err)
}

identity := &structs.AuthenticatedIdentity{RemoteIP: remoteIP}
defer args.SetIdentity(identity) // always set the identity, even on errors

if s.tlsEnabled && !ctx.IsStatic() {
tlsCert := ctx.Certificate()
if tlsCert == nil {
return nil, errors.New("missing certificate information")
}

// set on the identity whether or not its valid for server RPC, so we
// can capture it for metrics
identity.TLSName = tlsCert.Subject.CommonName

expected := fmt.Sprintf("client.%s.nomad", s.region)
_, err := validateCertificateForName(tlsCert, expected)
if err != nil {
expected := fmt.Sprintf("server.%s.nomad", s.region)
_, err := validateCertificateForName(tlsCert, expected)
if err != nil {
return nil, err
}
}
}

return acl.ClientACL, nil
}

// validateCertificateForName returns true if the certificate is valid
// for the given domain name.
func validateCertificateForName(cert *x509.Certificate, expectedName string) (bool, error) {
Expand Down
Loading

0 comments on commit 0e7e683

Please sign in to comment.