From 034cc4718589fa6d111e34dc752520f25cbce9fe Mon Sep 17 00:00:00 2001 From: Gabriel Adrian Samfira Date: Fri, 18 Aug 2023 06:09:44 +0000 Subject: [PATCH 01/14] Add jitconfig model field Signed-off-by: Gabriel Adrian Samfira --- database/sql/instances.go | 16 ++++++++++++++++ database/sql/models.go | 1 + database/sql/util.go | 11 +++++++++++ params/params.go | 11 ++++++----- params/requests.go | 1 + 5 files changed, 35 insertions(+), 5 deletions(-) diff --git a/database/sql/instances.go b/database/sql/instances.go index a85a22e09..d5d2eb87c 100644 --- a/database/sql/instances.go +++ b/database/sql/instances.go @@ -17,8 +17,10 @@ package sql import ( "context" "encoding/json" + "fmt" runnerErrors "github.com/cloudbase/garm-provider-common/errors" + "github.com/cloudbase/garm-provider-common/util" "github.com/cloudbase/garm/params" "github.com/google/uuid" @@ -42,6 +44,19 @@ func (s *sqlDatabase) CreateInstance(ctx context.Context, poolID string, param p } } + var secret []byte + if len(param.JitConfiguration) > 0 { + jitConfig, err := json.Marshal(param.JitConfiguration) + if err != nil { + return params.Instance{}, errors.Wrap(err, "marshalling jit config") + } + + secret, err = util.Seal(jitConfig, []byte(s.cfg.Passphrase)) + if err != nil { + return params.Instance{}, fmt.Errorf("failed to encrypt jitconfig: %w", err) + } + } + newInstance := Instance{ Pool: pool, Name: param.Name, @@ -52,6 +67,7 @@ func (s *sqlDatabase) CreateInstance(ctx context.Context, poolID string, param p CallbackURL: param.CallbackURL, MetadataURL: param.MetadataURL, GitHubRunnerGroup: param.GitHubRunnerGroup, + JitConfiguration: secret, AditionalLabels: labels, } q := s.conn.Create(&newInstance) diff --git a/database/sql/models.go b/database/sql/models.go index ac41f0318..f33fe9c8d 100644 --- a/database/sql/models.go +++ b/database/sql/models.go @@ -155,6 +155,7 @@ type Instance struct { ProviderFault []byte `gorm:"type:longblob"` CreateAttempt int TokenFetched bool + JitConfiguration []byte `gorm:"type:longblob"` GitHubRunnerGroup string AditionalLabels datatypes.JSON diff --git a/database/sql/util.go b/database/sql/util.go index 3f91c5731..3930d9c43 100644 --- a/database/sql/util.go +++ b/database/sql/util.go @@ -41,6 +41,16 @@ func (s *sqlDatabase) sqlToParamsInstance(instance Instance) (params.Instance, e } } + var jitConfig map[string]string + if len(instance.JitConfiguration) > 0 { + decrypted, err := util.Unseal(instance.JitConfiguration, []byte(s.cfg.Passphrase)) + if err != nil { + return params.Instance{}, errors.Wrap(err, "decrypting jit config") + } + if err := json.Unmarshal(decrypted, &jitConfig); err != nil { + return params.Instance{}, errors.Wrap(err, "unmarshalling jit config") + } + } ret := params.Instance{ ID: instance.ID.String(), ProviderID: id, @@ -59,6 +69,7 @@ func (s *sqlDatabase) sqlToParamsInstance(instance Instance) (params.Instance, e CreateAttempt: instance.CreateAttempt, UpdatedAt: instance.UpdatedAt, TokenFetched: instance.TokenFetched, + JitConfiguration: jitConfig, GitHubRunnerGroup: instance.GitHubRunnerGroup, AditionalLabels: labels, } diff --git a/params/params.go b/params/params.go index 8844a3e5e..eab4a1735 100644 --- a/params/params.go +++ b/params/params.go @@ -156,11 +156,12 @@ type Instance struct { GitHubRunnerGroup string `json:"github-runner-group"` // Do not serialize sensitive info. - CallbackURL string `json:"-"` - MetadataURL string `json:"-"` - CreateAttempt int `json:"-"` - TokenFetched bool `json:"-"` - AditionalLabels []string `json:"-"` + CallbackURL string `json:"-"` + MetadataURL string `json:"-"` + CreateAttempt int `json:"-"` + TokenFetched bool `json:"-"` + AditionalLabels []string `json:"-"` + JitConfiguration map[string]string `json:"-"` } func (i Instance) GetName() string { diff --git a/params/requests.go b/params/requests.go index 8b3336628..12a2acc3e 100644 --- a/params/requests.go +++ b/params/requests.go @@ -138,6 +138,7 @@ type CreateInstanceParams struct { GitHubRunnerGroup string CreateAttempt int `json:"-"` AditionalLabels []string + JitConfiguration map[string]string } type CreatePoolParams struct { From de17fb04b4511eec6b616137296ab0eee8a70b9b Mon Sep 17 00:00:00 2001 From: Gabriel Adrian Samfira Date: Sat, 19 Aug 2023 16:31:02 +0000 Subject: [PATCH 02/14] Add helper functions for marshaling and sealing Signed-off-by: Gabriel Adrian Samfira --- database/sql/instances.go | 35 ++++++++++++++++++++++++++++------- database/sql/util.go | 8 ++------ params/requests.go | 13 +++++++------ 3 files changed, 37 insertions(+), 19 deletions(-) diff --git a/database/sql/instances.go b/database/sql/instances.go index d5d2eb87c..0c8e4a487 100644 --- a/database/sql/instances.go +++ b/database/sql/instances.go @@ -17,7 +17,6 @@ package sql import ( "context" "encoding/json" - "fmt" runnerErrors "github.com/cloudbase/garm-provider-common/errors" "github.com/cloudbase/garm-provider-common/util" @@ -30,6 +29,25 @@ import ( "gorm.io/gorm/clause" ) +func (s *sqlDatabase) marshalAndSeal(data interface{}) ([]byte, error) { + enc, err := json.Marshal(data) + if err != nil { + return nil, errors.Wrap(err, "marshalling data") + } + return util.Seal(enc, []byte(s.cfg.Passphrase)) +} + +func (s *sqlDatabase) unsealAndUnmarshal(data []byte, target interface{}) error { + decrypted, err := util.Unseal(data, []byte(s.cfg.Passphrase)) + if err != nil { + return errors.Wrap(err, "decrypting data") + } + if err := json.Unmarshal(decrypted, target); err != nil { + return errors.Wrap(err, "unmarshalling data") + } + return nil +} + func (s *sqlDatabase) CreateInstance(ctx context.Context, poolID string, param params.CreateInstanceParams) (params.Instance, error) { pool, err := s.getPoolByID(ctx, poolID) if err != nil { @@ -46,15 +64,10 @@ func (s *sqlDatabase) CreateInstance(ctx context.Context, poolID string, param p var secret []byte if len(param.JitConfiguration) > 0 { - jitConfig, err := json.Marshal(param.JitConfiguration) + secret, err = s.marshalAndSeal(param.JitConfiguration) if err != nil { return params.Instance{}, errors.Wrap(err, "marshalling jit config") } - - secret, err = util.Seal(jitConfig, []byte(s.cfg.Passphrase)) - if err != nil { - return params.Instance{}, fmt.Errorf("failed to encrypt jitconfig: %w", err) - } } newInstance := Instance{ @@ -251,6 +264,14 @@ func (s *sqlDatabase) UpdateInstance(ctx context.Context, instanceID string, par instance.TokenFetched = *param.TokenFetched } + if param.JitConfiguration != nil { + secret, err := s.marshalAndSeal(param.JitConfiguration) + if err != nil { + return params.Instance{}, errors.Wrap(err, "marshalling jit config") + } + instance.JitConfiguration = secret + } + instance.ProviderFault = param.ProviderFault q := s.conn.Save(&instance) diff --git a/database/sql/util.go b/database/sql/util.go index 3930d9c43..ad0a4d8b3 100644 --- a/database/sql/util.go +++ b/database/sql/util.go @@ -43,12 +43,8 @@ func (s *sqlDatabase) sqlToParamsInstance(instance Instance) (params.Instance, e var jitConfig map[string]string if len(instance.JitConfiguration) > 0 { - decrypted, err := util.Unseal(instance.JitConfiguration, []byte(s.cfg.Passphrase)) - if err != nil { - return params.Instance{}, errors.Wrap(err, "decrypting jit config") - } - if err := json.Unmarshal(decrypted, &jitConfig); err != nil { - return params.Instance{}, errors.Wrap(err, "unmarshalling jit config") + if err := s.unsealAndUnmarshal(instance.JitConfiguration, &jitConfig); err != nil { + return params.Instance{}, errors.Wrap(err, "unmarshalling jit configuration") } } ret := params.Instance{ diff --git a/params/requests.go b/params/requests.go index 12a2acc3e..0293223eb 100644 --- a/params/requests.go +++ b/params/requests.go @@ -199,12 +199,13 @@ type UpdateInstanceParams struct { // for this instance. Addresses []commonParams.Address `json:"addresses,omitempty"` // Status is the status of the instance inside the provider (eg: running, stopped, etc) - Status commonParams.InstanceStatus `json:"status,omitempty"` - RunnerStatus RunnerStatus `json:"runner_status,omitempty"` - ProviderFault []byte `json:"provider_fault,omitempty"` - AgentID int64 `json:"-"` - CreateAttempt int `json:"-"` - TokenFetched *bool `json:"-"` + Status commonParams.InstanceStatus `json:"status,omitempty"` + RunnerStatus RunnerStatus `json:"runner_status,omitempty"` + ProviderFault []byte `json:"provider_fault,omitempty"` + AgentID int64 `json:"-"` + CreateAttempt int `json:"-"` + TokenFetched *bool `json:"-"` + JitConfiguration map[string]string `json:"-"` } type UpdateUserParams struct { From 09d2f1b061eb27c19a6b624e9f583b0c6811b378 Mon Sep 17 00:00:00 2001 From: Gabriel Adrian Samfira Date: Sat, 19 Aug 2023 16:49:41 +0000 Subject: [PATCH 03/14] Add Jit functions to GH client interface Signed-off-by: Gabriel Adrian Samfira --- runner/common/mocks/GithubClient.go | 70 +++++++++++++++++++++++++++++ runner/common/util.go | 4 ++ 2 files changed, 74 insertions(+) diff --git a/runner/common/mocks/GithubClient.go b/runner/common/mocks/GithubClient.go index f351b58e5..918c60ffb 100644 --- a/runner/common/mocks/GithubClient.go +++ b/runner/common/mocks/GithubClient.go @@ -206,6 +206,76 @@ func (_m *GithubClient) DeleteRepoHook(ctx context.Context, owner string, repo s return r0, r1 } +// GenerateOrgJITConfig provides a mock function with given fields: ctx, owner, request +func (_m *GithubClient) GenerateOrgJITConfig(ctx context.Context, owner string, request *github.GenerateJITConfigRequest) (*github.JITRunnerConfig, *github.Response, error) { + ret := _m.Called(ctx, owner, request) + + var r0 *github.JITRunnerConfig + var r1 *github.Response + var r2 error + if rf, ok := ret.Get(0).(func(context.Context, string, *github.GenerateJITConfigRequest) (*github.JITRunnerConfig, *github.Response, error)); ok { + return rf(ctx, owner, request) + } + if rf, ok := ret.Get(0).(func(context.Context, string, *github.GenerateJITConfigRequest) *github.JITRunnerConfig); ok { + r0 = rf(ctx, owner, request) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*github.JITRunnerConfig) + } + } + + if rf, ok := ret.Get(1).(func(context.Context, string, *github.GenerateJITConfigRequest) *github.Response); ok { + r1 = rf(ctx, owner, request) + } else { + if ret.Get(1) != nil { + r1 = ret.Get(1).(*github.Response) + } + } + + if rf, ok := ret.Get(2).(func(context.Context, string, *github.GenerateJITConfigRequest) error); ok { + r2 = rf(ctx, owner, request) + } else { + r2 = ret.Error(2) + } + + return r0, r1, r2 +} + +// GenerateRepoJITConfig provides a mock function with given fields: ctx, owner, repo, request +func (_m *GithubClient) GenerateRepoJITConfig(ctx context.Context, owner string, repo string, request *github.GenerateJITConfigRequest) (*github.JITRunnerConfig, *github.Response, error) { + ret := _m.Called(ctx, owner, repo, request) + + var r0 *github.JITRunnerConfig + var r1 *github.Response + var r2 error + if rf, ok := ret.Get(0).(func(context.Context, string, string, *github.GenerateJITConfigRequest) (*github.JITRunnerConfig, *github.Response, error)); ok { + return rf(ctx, owner, repo, request) + } + if rf, ok := ret.Get(0).(func(context.Context, string, string, *github.GenerateJITConfigRequest) *github.JITRunnerConfig); ok { + r0 = rf(ctx, owner, repo, request) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*github.JITRunnerConfig) + } + } + + if rf, ok := ret.Get(1).(func(context.Context, string, string, *github.GenerateJITConfigRequest) *github.Response); ok { + r1 = rf(ctx, owner, repo, request) + } else { + if ret.Get(1) != nil { + r1 = ret.Get(1).(*github.Response) + } + } + + if rf, ok := ret.Get(2).(func(context.Context, string, string, *github.GenerateJITConfigRequest) error); ok { + r2 = rf(ctx, owner, repo, request) + } else { + r2 = ret.Error(2) + } + + return r0, r1, r2 +} + // GetOrgHook provides a mock function with given fields: ctx, org, id func (_m *GithubClient) GetOrgHook(ctx context.Context, org string, id int64) (*github.Hook, *github.Response, error) { ret := _m.Called(ctx, org, id) diff --git a/runner/common/util.go b/runner/common/util.go index 42f1cc081..7637fb1c3 100644 --- a/runner/common/util.go +++ b/runner/common/util.go @@ -41,6 +41,8 @@ type GithubClient interface { RemoveRunner(ctx context.Context, owner, repo string, runnerID int64) (*github.Response, error) // CreateRegistrationToken creates a runner registration token for one repository. CreateRegistrationToken(ctx context.Context, owner, repo string) (*github.RegistrationToken, *github.Response, error) + // GenerateRepoJITConfig generates a just-in-time configuration for a repository. + GenerateRepoJITConfig(ctx context.Context, owner, repo string, request *github.GenerateJITConfigRequest) (*github.JITRunnerConfig, *github.Response, error) // ListOrganizationRunners lists all runners within an organization. ListOrganizationRunners(ctx context.Context, owner string, opts *github.ListOptions) (*github.Runners, *github.Response, error) @@ -51,6 +53,8 @@ type GithubClient interface { RemoveOrganizationRunner(ctx context.Context, owner string, runnerID int64) (*github.Response, error) // CreateOrganizationRegistrationToken creates a runner registration token for an organization. CreateOrganizationRegistrationToken(ctx context.Context, owner string) (*github.RegistrationToken, *github.Response, error) + // GenerateOrgJITConfig generate a just-in-time configuration for an organization. + GenerateOrgJITConfig(ctx context.Context, owner string, request *github.GenerateJITConfigRequest) (*github.JITRunnerConfig, *github.Response, error) } type GithubEnterpriseClient interface { From 591641a8a3be658aeff1938f33bff27d4a43f8ea Mon Sep 17 00:00:00 2001 From: Gabriel Adrian Samfira Date: Sun, 20 Aug 2023 13:45:16 +0000 Subject: [PATCH 04/14] Add temporary redirect to go-github fork Signed-off-by: Gabriel Adrian Samfira --- auth/context.go | 14 ++++++++++++++ go.mod | 2 ++ runner/common/util.go | 2 ++ runner/runner.go | 19 ++++++++----------- vendor/modules.txt | 1 + 5 files changed, 27 insertions(+), 11 deletions(-) diff --git a/auth/context.go b/auth/context.go index 642735305..bce9f25e8 100644 --- a/auth/context.go +++ b/auth/context.go @@ -39,6 +39,7 @@ const ( instanceEntityKey contextFlags = "entity" instanceRunnerStatus contextFlags = "status" instanceTokenFetched contextFlags = "tokenFetched" + instanceHasJITConfig contextFlags = "hasJITConfig" instanceParams contextFlags = "instanceParams" ) @@ -66,6 +67,18 @@ func InstanceTokenFetched(ctx context.Context) bool { return elem.(bool) } +func SetInstanceHasJITConfig(ctx context.Context, cfg map[string]string) context.Context { + return context.WithValue(ctx, instanceHasJITConfig, len(cfg) > 0) +} + +func InstanceHasJITConfig(ctx context.Context) bool { + elem := ctx.Value(instanceHasJITConfig) + if elem == nil { + return false + } + return elem.(bool) +} + func SetInstanceParams(ctx context.Context, instance params.Instance) context.Context { return context.WithValue(ctx, instanceParams, instance) } @@ -149,6 +162,7 @@ func PopulateInstanceContext(ctx context.Context, instance params.Instance) cont ctx = SetInstancePoolID(ctx, instance.PoolID) ctx = SetInstanceRunnerStatus(ctx, instance.RunnerStatus) ctx = SetInstanceTokenFetched(ctx, instance.TokenFetched) + ctx = SetInstanceHasJITConfig(ctx, instance.JitConfiguration) ctx = SetInstanceParams(ctx, instance) return ctx } diff --git a/go.mod b/go.mod index 98cee70f4..064844bcc 100644 --- a/go.mod +++ b/go.mod @@ -36,6 +36,8 @@ require ( gorm.io/gorm v1.24.6 ) +replace github.com/google/go-github/v54 => github.com/gabriel-samfira/go-github/v54 v54.0.0-20230820134006-4b55d56a9b7f + require ( github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect github.com/beorn7/perks v1.0.1 // indirect diff --git a/runner/common/util.go b/runner/common/util.go index 7637fb1c3..c9989a9da 100644 --- a/runner/common/util.go +++ b/runner/common/util.go @@ -67,4 +67,6 @@ type GithubEnterpriseClient interface { // ListRunnerApplicationDownloads returns a list of github runner application downloads for the // various supported operating systems and architectures. ListRunnerApplicationDownloads(ctx context.Context, enterprise string) ([]*github.RunnerApplicationDownload, *github.Response, error) + + GenerateEnterpriseJITConfig(ctx context.Context, enterprise string, request *github.GenerateJITConfigRequest) (*github.JITRunnerConfig, *github.Response, error) } diff --git a/runner/runner.go b/runner/runner.go index 76563c6dc..0aba8823a 100644 --- a/runner/runner.go +++ b/runner/runner.go @@ -859,15 +859,11 @@ func (r *Runner) AddInstanceStatusMessage(ctx context.Context, param params.Inst } func (r *Runner) GetInstanceGithubRegistrationToken(ctx context.Context) (string, error) { - instanceName := auth.InstanceName(ctx) - if instanceName == "" { - return "", runnerErrors.ErrUnauthorized - } - - // Check if this instance already fetched a registration token. We only allow an instance to - // fetch one token. If the instance fails to bootstrap after a token is fetched, we reset the - // token fetched field when re-queueing the instance. - if auth.InstanceTokenFetched(ctx) { + // Check if this instance already fetched a registration token or if it was configured using + // the new Just In Time runner feature. If we're still using the old way of configuring a runner, + // we only allow an instance to fetch one token. If the instance fails to bootstrap after a token + // is fetched, we reset the token fetched field when re-queueing the instance. + if auth.InstanceTokenFetched(ctx) || auth.InstanceHasJITConfig(ctx) { return "", runnerErrors.ErrUnauthorized } @@ -876,9 +872,10 @@ func (r *Runner) GetInstanceGithubRegistrationToken(ctx context.Context) (string return "", runnerErrors.ErrUnauthorized } - instance, err := r.store.GetInstanceByName(ctx, instanceName) + instance, err := auth.InstanceParams(ctx) if err != nil { - return "", errors.Wrap(err, "fetching instance") + log.Printf("failed to get instance params: %s", err) + return "", runnerErrors.ErrUnauthorized } poolMgr, err := r.getPoolManagerFromInstance(ctx, instance) diff --git a/vendor/modules.txt b/vendor/modules.txt index 2106e6b5b..2eae13451 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -439,3 +439,4 @@ gorm.io/gorm/logger gorm.io/gorm/migrator gorm.io/gorm/schema gorm.io/gorm/utils +# github.com/google/go-github/v54 => github.com/gabriel-samfira/go-github/v54 v54.0.0-20230820134006-4b55d56a9b7f From d5f8cf079ef018f79c4936b0459b62611e7adc7c Mon Sep 17 00:00:00 2001 From: Gabriel Adrian Samfira Date: Sun, 20 Aug 2023 14:27:58 +0000 Subject: [PATCH 05/14] Ignore instances that are still being created from reaping When using JIT runners, we register the runner on GitHub before we get a chance to spin up the instance in the provider. In such cases, we end up with a runner in "offline" state while we're creating the actual resource that will embody the runner. This change will give runners a chance to come online before garm tries to clean them up. Signed-off-by: Gabriel Adrian Samfira --- runner/pool/pool.go | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/runner/pool/pool.go b/runner/pool/pool.go index a74c44c49..8e1e661ab 100644 --- a/runner/pool/pool.go +++ b/runner/pool/pool.go @@ -527,6 +527,16 @@ func (r *basePoolManager) cleanupOrphanedGithubRunners(runners []*github.Runner) // already marked for deletion or is in the process of being deleted. // Let consolidate take care of it. continue + case commonParams.InstancePendingCreate, commonParams.InstanceCreating: + // instance is still being created. We give it a chance to finish. + r.log("instance %s is still being created, give it a chance to finish", dbInstance.Name) + continue + case commonParams.InstanceRunning: + if time.Since(dbInstance.UpdatedAt).Minutes() < 5 { + // instance was updated recently. We give it a chance to register itself in github. + r.log("instance %s was updated recently, skipping check", dbInstance.Name) + continue + } } pool, err := r.helper.GetPoolByID(dbInstance.PoolID) From 6dea1c1937995d7b778b652904ca78cbe94f7538 Mon Sep 17 00:00:00 2001 From: Gabriel Adrian Samfira Date: Mon, 21 Aug 2023 11:57:52 +0000 Subject: [PATCH 06/14] Add temporary replace to fork Signed-off-by: Gabriel Adrian Samfira --- go.mod | 2 +- runner/common/util.go | 6 +++++- runner/pool/pool.go | 24 +++++++++++++++++------- vendor/modules.txt | 2 +- 4 files changed, 24 insertions(+), 10 deletions(-) diff --git a/go.mod b/go.mod index 064844bcc..8c5abdf6f 100644 --- a/go.mod +++ b/go.mod @@ -36,7 +36,7 @@ require ( gorm.io/gorm v1.24.6 ) -replace github.com/google/go-github/v54 => github.com/gabriel-samfira/go-github/v54 v54.0.0-20230820134006-4b55d56a9b7f +replace github.com/google/go-github/v54 => github.com/gabriel-samfira/go-github/v54 v54.0.0-20230821112832-bbb536ee5a3a require ( github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect diff --git a/runner/common/util.go b/runner/common/util.go index c9989a9da..e9bc1fdc3 100644 --- a/runner/common/util.go +++ b/runner/common/util.go @@ -55,6 +55,8 @@ type GithubClient interface { CreateOrganizationRegistrationToken(ctx context.Context, owner string) (*github.RegistrationToken, *github.Response, error) // GenerateOrgJITConfig generate a just-in-time configuration for an organization. GenerateOrgJITConfig(ctx context.Context, owner string, request *github.GenerateJITConfigRequest) (*github.JITRunnerConfig, *github.Response, error) + // ListOrganizationRunnerGroups lists all runner groups within an organization. + ListOrganizationRunnerGroups(ctx context.Context, org string, opts *github.ListOrgRunnerGroupOptions) (*github.RunnerGroups, *github.Response, error) } type GithubEnterpriseClient interface { @@ -67,6 +69,8 @@ type GithubEnterpriseClient interface { // ListRunnerApplicationDownloads returns a list of github runner application downloads for the // various supported operating systems and architectures. ListRunnerApplicationDownloads(ctx context.Context, enterprise string) ([]*github.RunnerApplicationDownload, *github.Response, error) - + // GenerateEnterpriseJITConfig generate a just-in-time configuration for an enterprise. GenerateEnterpriseJITConfig(ctx context.Context, enterprise string, request *github.GenerateJITConfigRequest) (*github.JITRunnerConfig, *github.Response, error) + // ListRunnerGroups lists all self-hosted runner groups configured in an enterprise. + ListRunnerGroups(ctx context.Context, enterprise string, opts *github.ListEnterpriseRunnerGroupOptions) (*github.EnterpriseRunnerGroups, *github.Response, error) } diff --git a/runner/pool/pool.go b/runner/pool/pool.go index 8e1e661ab..fdfb6e8b9 100644 --- a/runner/pool/pool.go +++ b/runner/pool/pool.go @@ -532,6 +532,8 @@ func (r *basePoolManager) cleanupOrphanedGithubRunners(runners []*github.Runner) r.log("instance %s is still being created, give it a chance to finish", dbInstance.Name) continue case commonParams.InstanceRunning: + // this check is not strictly needed, but can help avoid unnecessary strain on the provider. + // At worst, we will have a runner that is offline in github for 5 minutes before we reap it. if time.Since(dbInstance.UpdatedAt).Minutes() < 5 { // instance was updated recently. We give it a chance to register itself in github. r.log("instance %s was updated recently, skipping check", dbInstance.Name) @@ -716,6 +718,10 @@ func (r *basePoolManager) AddRunner(ctx context.Context, poolID string, aditiona return errors.Wrap(err, "creating instance") } + // labels := r.getLabelsForInstance(pool) + + // // Attempt to create JIT config + return nil } @@ -744,6 +750,16 @@ func (r *basePoolManager) setPoolRunningState(isRunning bool, failureReason stri r.mux.Unlock() } +func (r *basePoolManager) getLabelsForInstance(pool params.Pool) []string { + labels := []string{} + for _, tag := range pool.Tags { + labels = append(labels, tag.Name) + } + labels = append(labels, r.controllerLabel()) + labels = append(labels, r.poolLabel(pool.ID)) + return labels +} + func (r *basePoolManager) addInstanceToProvider(instance params.Instance) error { pool, err := r.helper.GetPoolByID(instance.PoolID) if err != nil { @@ -755,13 +771,7 @@ func (r *basePoolManager) addInstanceToProvider(instance params.Instance) error return fmt.Errorf("unknown provider %s for pool %s", pool.ProviderName, pool.ID) } - labels := []string{} - for _, tag := range pool.Tags { - labels = append(labels, tag.Name) - } - labels = append(labels, r.controllerLabel()) - labels = append(labels, r.poolLabel(pool.ID)) - + labels := r.getLabelsForInstance(pool) jwtValidity := pool.RunnerTimeout() entity := r.helper.String() diff --git a/vendor/modules.txt b/vendor/modules.txt index 2eae13451..24d7e7ab5 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -439,4 +439,4 @@ gorm.io/gorm/logger gorm.io/gorm/migrator gorm.io/gorm/schema gorm.io/gorm/utils -# github.com/google/go-github/v54 => github.com/gabriel-samfira/go-github/v54 v54.0.0-20230820134006-4b55d56a9b7f +# github.com/google/go-github/v54 => github.com/gabriel-samfira/go-github/v54 v54.0.0-20230821112832-bbb536ee5a3a From 5214aca22803b715234f22e8deb57f501f942b47 Mon Sep 17 00:00:00 2001 From: Gabriel Adrian Samfira Date: Mon, 21 Aug 2023 15:33:30 +0000 Subject: [PATCH 07/14] Add jit config for new runner Signed-off-by: Gabriel Adrian Samfira --- runner/pool/enterprise.go | 87 +++++++++++++++++++++++++++++++++++++ runner/pool/interfaces.go | 2 + runner/pool/organization.go | 86 ++++++++++++++++++++++++++++++++++++ runner/pool/pool.go | 40 +++++++++++++++-- runner/pool/repository.go | 48 ++++++++++++++++++++ 5 files changed, 259 insertions(+), 4 deletions(-) diff --git a/runner/pool/enterprise.go b/runner/pool/enterprise.go index 6ad0d54ba..fe9a18312 100644 --- a/runner/pool/enterprise.go +++ b/runner/pool/enterprise.go @@ -2,7 +2,10 @@ package pool import ( "context" + "encoding/base64" + "encoding/json" "fmt" + "log" "net/http" "strings" "sync" @@ -65,6 +68,90 @@ type enterprise struct { mux sync.Mutex } +func (r *enterprise) findRunnerGroupByName(ctx context.Context, name string) (*github.EnterpriseRunnerGroup, error) { + // TODO(gabriel-samfira): implement caching + opts := github.ListEnterpriseRunnerGroupOptions{ + ListOptions: github.ListOptions{ + PerPage: 100, + }, + } + + for { + runnerGroups, ghResp, err := r.ghcEnterpriseCli.ListRunnerGroups(r.ctx, r.cfg.Name, &opts) + if err != nil { + if ghResp != nil && ghResp.StatusCode == http.StatusUnauthorized { + return nil, errors.Wrap(runnerErrors.ErrUnauthorized, "fetching runners") + } + return nil, errors.Wrap(err, "fetching runners") + } + for _, runnerGroup := range runnerGroups.RunnerGroups { + if runnerGroup.Name != nil && *runnerGroup.Name == name { + return runnerGroup, nil + } + } + if ghResp.NextPage == 0 { + break + } + opts.Page = ghResp.NextPage + } + + return nil, errors.Wrap(runnerErrors.ErrNotFound, "runner group not found") +} + +func (r *enterprise) GetJITConfig(ctx context.Context, instance params.Instance, pool params.Pool, labels []string) (jitConfigMap map[string]string, runner *github.Runner, err error) { + if instance.AgentID != 0 { + return nil, nil, fmt.Errorf("instance already has an agent ID: %w", runnerErrors.ErrBadRequest) + } + + if instance.JitConfiguration != nil { + return nil, nil, fmt.Errorf("instance already has a JIT configuration: %w", runnerErrors.ErrBadRequest) + } + + var rg int64 = 1 + if pool.GitHubRunnerGroup != "" { + runnerGroup, err := r.findRunnerGroupByName(ctx, pool.GitHubRunnerGroup) + if err != nil { + return nil, nil, fmt.Errorf("failed to find runner group: %w", err) + } + rg = *runnerGroup.ID + } + + req := github.GenerateJITConfigRequest{ + Name: instance.Name, + RunnerGroupID: rg, + Labels: labels, + // TODO(gabriel-samfira): Should we make this configurable? + WorkFolder: github.String("_work"), + } + jitConfig, resp, err := r.ghcEnterpriseCli.GenerateEnterpriseJITConfig(ctx, r.cfg.Name, &req) + if err != nil { + if resp != nil && resp.StatusCode == http.StatusUnauthorized { + return nil, nil, fmt.Errorf("failed to get JIT config: %w", err) + } + return nil, nil, fmt.Errorf("failed to get JIT config: %w", err) + } + + runner = jitConfig.Runner + defer func() { + if err != nil && runner != nil { + _, innerErr := r.ghcEnterpriseCli.RemoveRunner(r.ctx, r.cfg.Name, runner.GetID()) + log.Printf("failed to remove runner: %v", innerErr) + } + }() + + decoded, err := base64.StdEncoding.DecodeString(*jitConfig.EncodedJITConfig) + if err != nil { + return nil, nil, fmt.Errorf("failed to decode JIT config: %w", err) + } + + var ret map[string]string + if err := json.Unmarshal(decoded, &ret); err != nil { + return nil, nil, fmt.Errorf("failed to unmarshal JIT config: %w", err) + } + + return ret, jitConfig.Runner, nil +} + func (r *enterprise) GithubCLI() common.GithubClient { return r.ghcli } diff --git a/runner/pool/interfaces.go b/runner/pool/interfaces.go index ef84abdae..d40fc39eb 100644 --- a/runner/pool/interfaces.go +++ b/runner/pool/interfaces.go @@ -37,6 +37,8 @@ type poolHelper interface { GithubCLI() common.GithubClient + GetJITConfig(ctx context.Context, instance params.Instance, pool params.Pool, labels []string) (map[string]string, *github.Runner, error) + FetchDbInstances() ([]params.Instance, error) ListPools() ([]params.Pool, error) GithubURL() string diff --git a/runner/pool/organization.go b/runner/pool/organization.go index dc2206e02..0c9eb1466 100644 --- a/runner/pool/organization.go +++ b/runner/pool/organization.go @@ -16,6 +16,8 @@ package pool import ( "context" + "encoding/base64" + "encoding/json" "fmt" "log" "net/http" @@ -84,6 +86,90 @@ type organization struct { mux sync.Mutex } +func (r *organization) findRunnerGroupByName(ctx context.Context, name string) (*github.RunnerGroup, error) { + // TODO(gabriel-samfira): implement caching + opts := github.ListOrgRunnerGroupOptions{ + ListOptions: github.ListOptions{ + PerPage: 100, + }, + } + + for { + runnerGroups, ghResp, err := r.ghcli.ListOrganizationRunnerGroups(r.ctx, r.cfg.Name, &opts) + if err != nil { + if ghResp != nil && ghResp.StatusCode == http.StatusUnauthorized { + return nil, errors.Wrap(runnerErrors.ErrUnauthorized, "fetching runners") + } + return nil, errors.Wrap(err, "fetching runners") + } + for _, runnerGroup := range runnerGroups.RunnerGroups { + if runnerGroup.Name != nil && *runnerGroup.Name == name { + return runnerGroup, nil + } + } + if ghResp.NextPage == 0 { + break + } + opts.Page = ghResp.NextPage + } + + return nil, errors.Wrap(runnerErrors.ErrNotFound, "runner group not found") +} + +func (r *organization) GetJITConfig(ctx context.Context, instance params.Instance, pool params.Pool, labels []string) (jitConfigMap map[string]string, runner *github.Runner, err error) { + if instance.AgentID != 0 { + return nil, nil, fmt.Errorf("instance already has an agent ID: %w", runnerErrors.ErrBadRequest) + } + + if instance.JitConfiguration != nil { + return nil, nil, fmt.Errorf("instance already has a JIT configuration: %w", runnerErrors.ErrBadRequest) + } + + var rg int64 = 1 + if pool.GitHubRunnerGroup != "" { + runnerGroup, err := r.findRunnerGroupByName(ctx, pool.GitHubRunnerGroup) + if err != nil { + return nil, nil, fmt.Errorf("failed to find runner group: %w", err) + } + rg = *runnerGroup.ID + } + + req := github.GenerateJITConfigRequest{ + Name: instance.Name, + RunnerGroupID: rg, + Labels: labels, + // TODO(gabriel-samfira): Should we make this configurable? + WorkFolder: github.String("_work"), + } + jitConfig, resp, err := r.ghcli.GenerateOrgJITConfig(ctx, r.cfg.Name, &req) + if err != nil { + if resp != nil && resp.StatusCode == http.StatusUnauthorized { + return nil, nil, fmt.Errorf("failed to get JIT config: %w", err) + } + return nil, nil, fmt.Errorf("failed to get JIT config: %w", err) + } + + runner = jitConfig.Runner + defer func() { + if err != nil && runner != nil { + _, innerErr := r.ghcli.RemoveOrganizationRunner(r.ctx, r.cfg.Name, runner.GetID()) + log.Printf("failed to remove runner: %v", innerErr) + } + }() + + decoded, err := base64.StdEncoding.DecodeString(*jitConfig.EncodedJITConfig) + if err != nil { + return nil, nil, fmt.Errorf("failed to decode JIT config: %w", err) + } + + var ret map[string]string + if err := json.Unmarshal(decoded, &ret); err != nil { + return nil, nil, fmt.Errorf("failed to unmarshal JIT config: %w", err) + } + + return ret, jitConfig.Runner, nil +} + func (r *organization) GithubCLI() common.GithubClient { return r.ghcli } diff --git a/runner/pool/pool.go b/runner/pool/pool.go index fdfb6e8b9..0730f07b8 100644 --- a/runner/pool/pool.go +++ b/runner/pool/pool.go @@ -692,7 +692,7 @@ func (r *basePoolManager) setInstanceStatus(runnerName string, status commonPara return instance, nil } -func (r *basePoolManager) AddRunner(ctx context.Context, poolID string, aditionalLabels []string) error { +func (r *basePoolManager) AddRunner(ctx context.Context, poolID string, aditionalLabels []string) (err error) { pool, err := r.helper.GetPoolByID(poolID) if err != nil { return errors.Wrap(err, "fetching pool") @@ -713,14 +713,43 @@ func (r *basePoolManager) AddRunner(ctx context.Context, poolID string, aditiona AditionalLabels: aditionalLabels, } - _, err = r.store.CreateInstance(r.ctx, poolID, createParams) + instance, err := r.store.CreateInstance(r.ctx, poolID, createParams) if err != nil { return errors.Wrap(err, "creating instance") } - // labels := r.getLabelsForInstance(pool) + defer func() { + if err != nil && instance.ID != "" { + if err := r.ForceDeleteRunner(instance); err != nil { + r.log("failed to cleanup instance: %s", instance.Name) + } + } + }() - // // Attempt to create JIT config + labels := r.getLabelsForInstance(pool) + // Attempt to create JIT config + jitConfig, runner, err := r.helper.GetJITConfig(ctx, instance, pool, labels) + if err == nil { + updateParams := params.UpdateInstanceParams{ + AgentID: runner.GetID(), + // We're using a JIT config. Setting the TokenFetched will disable the registration token + // metadata endpoint. + TokenFetched: github.Bool(true), + JitConfiguration: jitConfig, + } + instance, err = r.updateInstance(instance.Name, updateParams) + if err != nil { + // The agent ID is not recorded in the instance, so the deferred ForceDeleteRunner() will not + // attempt to clean it up. We need to do it here. + _, runnerCleanupErr := r.helper.RemoveGithubRunner(runner.GetID()) + if err != nil { + log.Printf("failed to remove runner %d: %s", runner.GetID(), runnerCleanupErr) + } + return errors.Wrap(err, "updating instance") + } + } else { + r.log("failed to get JIT config, falling back to registration token: %s", err) + } return nil } @@ -771,6 +800,8 @@ func (r *basePoolManager) addInstanceToProvider(instance params.Instance) error return fmt.Errorf("unknown provider %s for pool %s", pool.ProviderName, pool.ID) } + // We still need the labels here for situations where we don't have a JIT config generated. + // This can happen if GARM is used against an instance of GHES older than version 3.10. labels := r.getLabelsForInstance(pool) jwtValidity := pool.RunnerTimeout() @@ -796,6 +827,7 @@ func (r *basePoolManager) addInstanceToProvider(instance params.Instance) error PoolID: instance.PoolID, CACertBundle: r.credsDetails.CABundle, GitHubRunnerGroup: instance.GitHubRunnerGroup, + // JitConfigEnabled: len(instance.JitConfiguration) > 0, } var instanceIDToDelete string diff --git a/runner/pool/repository.go b/runner/pool/repository.go index b12c87aa3..ec5c92e1a 100644 --- a/runner/pool/repository.go +++ b/runner/pool/repository.go @@ -16,6 +16,8 @@ package pool import ( "context" + "encoding/base64" + "encoding/json" "fmt" "log" "net/http" @@ -86,6 +88,52 @@ type repository struct { mux sync.Mutex } +func (r *repository) GetJITConfig(ctx context.Context, instance params.Instance, pool params.Pool, labels []string) (jitConfigMap map[string]string, runner *github.Runner, err error) { + if instance.AgentID != 0 { + return nil, nil, fmt.Errorf("instance already has an agent ID: %w", runnerErrors.ErrBadRequest) + } + + if instance.JitConfiguration != nil { + return nil, nil, fmt.Errorf("instance already has a JIT configuration: %w", runnerErrors.ErrBadRequest) + } + + req := github.GenerateJITConfigRequest{ + Name: instance.Name, + // At the repository level we only have the default runner group. + RunnerGroupID: 1, + Labels: labels, + // TODO(gabriel-samfira): Should we make this configurable? + WorkFolder: github.String("_work"), + } + jitConfig, resp, err := r.ghcli.GenerateRepoJITConfig(ctx, r.cfg.Owner, r.cfg.Name, &req) + if err != nil { + if resp != nil && resp.StatusCode == http.StatusUnauthorized { + return nil, nil, fmt.Errorf("failed to get JIT config: %w", err) + } + return nil, nil, fmt.Errorf("failed to get JIT config: %w", err) + } + runner = jitConfig.Runner + + defer func() { + if err != nil && runner != nil { + _, innerErr := r.ghcli.RemoveRunner(r.ctx, r.cfg.Owner, r.cfg.Name, runner.GetID()) + log.Printf("failed to remove runner: %v", innerErr) + } + }() + + decoded, err := base64.StdEncoding.DecodeString(*jitConfig.EncodedJITConfig) + if err != nil { + return nil, nil, fmt.Errorf("failed to decode JIT config: %w", err) + } + + var ret map[string]string + if err := json.Unmarshal(decoded, &ret); err != nil { + return nil, nil, fmt.Errorf("failed to unmarshal JIT config: %w", err) + } + + return ret, jitConfig.Runner, nil +} + func (r *repository) GithubCLI() common.GithubClient { return r.ghcli } From 1268507ce6fa0e1e3176203e6952a84d814e984c Mon Sep 17 00:00:00 2001 From: Gabriel Adrian Samfira Date: Tue, 22 Aug 2023 11:58:52 +0000 Subject: [PATCH 08/14] Add jit config routes Signed-off-by: Gabriel Adrian Samfira --- apiserver/controllers/metadata.go | 69 +++++++++++++ apiserver/routers/routers.go | 8 ++ go.mod | 5 +- go.sum | 2 - runner/metadata.go | 160 ++++++++++++++++++++++++++++++ runner/pool/pool.go | 18 ++-- runner/runner.go | 46 --------- vendor/modules.txt | 3 +- 8 files changed, 255 insertions(+), 56 deletions(-) diff --git a/apiserver/controllers/metadata.go b/apiserver/controllers/metadata.go index c88e68cb5..0f2f7b77f 100644 --- a/apiserver/controllers/metadata.go +++ b/apiserver/controllers/metadata.go @@ -16,8 +16,12 @@ package controllers import ( "encoding/json" + "fmt" "log" "net/http" + + "github.com/cloudbase/garm/apiserver/params" + "github.com/gorilla/mux" ) func (a *APIController) InstanceGithubRegistrationTokenHandler(w http.ResponseWriter, r *http.Request) { @@ -36,6 +40,71 @@ func (a *APIController) InstanceGithubRegistrationTokenHandler(w http.ResponseWr } } +func (a *APIController) JITCredentialsFileHandler(w http.ResponseWriter, r *http.Request) { + ctx := r.Context() + vars := mux.Vars(r) + fileName, ok := vars["fileName"] + if !ok { + w.WriteHeader(http.StatusNotFound) + if err := json.NewEncoder(w).Encode(params.APIErrorResponse{ + Error: "Not Found", + Details: "Not Found", + }); err != nil { + log.Printf("failed to encode response: %q", err) + } + return + } + + data, err := a.r.GetJITConfigFile(ctx, fileName) + if err != nil { + handleError(w, err) + return + } + + // Note the leading dot in the filename + name := fmt.Sprintf("attachment; filename=.%s", fileName) + w.Header().Set("Content-Disposition", name) + w.Header().Set("Content-Type", "octet-stream") + w.Header().Set("Content-Length", fmt.Sprintf("%d", len(data))) + w.WriteHeader(http.StatusOK) + if _, err := w.Write(data); err != nil { + log.Printf("failed to encode response: %q", err) + } +} + +func (a *APIController) SystemdServiceNameHandler(w http.ResponseWriter, r *http.Request) { + ctx := r.Context() + + serviceName, err := a.r.GetRunnerServiceName(ctx) + if err != nil { + handleError(w, err) + return + } + + w.Header().Set("Content-Type", "text/plain") + w.WriteHeader(http.StatusOK) + if _, err := w.Write([]byte(serviceName)); err != nil { + log.Printf("failed to encode response: %q", err) + } +} + +func (a *APIController) SystemdUnitFileHandler(w http.ResponseWriter, r *http.Request) { + ctx := r.Context() + runAsUser := r.URL.Query().Get("runAsUser") + + data, err := a.r.GenerateSystemdUnitFile(ctx, runAsUser) + if err != nil { + handleError(w, err) + return + } + + w.Header().Set("Content-Type", "text/plain") + w.WriteHeader(http.StatusOK) + if _, err := w.Write(data); err != nil { + log.Printf("failed to encode response: %q", err) + } +} + func (a *APIController) RootCertificateBundleHandler(w http.ResponseWriter, r *http.Request) { ctx := r.Context() diff --git a/apiserver/routers/routers.go b/apiserver/routers/routers.go index 244992fe8..88c2e7539 100644 --- a/apiserver/routers/routers.go +++ b/apiserver/routers/routers.go @@ -117,6 +117,14 @@ func NewAPIRouter(han *controllers.APIController, logWriter io.Writer, authMiddl // Registration token metadataRouter.Handle("/runner-registration-token/", http.HandlerFunc(han.InstanceGithubRegistrationTokenHandler)).Methods("GET", "OPTIONS") metadataRouter.Handle("/runner-registration-token", http.HandlerFunc(han.InstanceGithubRegistrationTokenHandler)).Methods("GET", "OPTIONS") + // JIT credential files + metadataRouter.Handle("/credentials/{fileName}/", http.HandlerFunc(han.JITCredentialsFileHandler)).Methods("GET", "OPTIONS") + metadataRouter.Handle("/credentials/{fileName}", http.HandlerFunc(han.JITCredentialsFileHandler)).Methods("GET", "OPTIONS") + // Systemd files + metadataRouter.Handle("/systemd/service-name/", http.HandlerFunc(han.SystemdServiceNameHandler)).Methods("GET", "OPTIONS") + metadataRouter.Handle("/systemd/service-name/", http.HandlerFunc(han.SystemdServiceNameHandler)).Methods("GET", "OPTIONS") + metadataRouter.Handle("/systemd/runner-service/", http.HandlerFunc(han.SystemdUnitFileHandler)).Methods("GET", "OPTIONS") + metadataRouter.Handle("/systemd/runner-service", http.HandlerFunc(han.SystemdUnitFileHandler)).Methods("GET", "OPTIONS") metadataRouter.Handle("/system/cert-bundle/", http.HandlerFunc(han.RootCertificateBundleHandler)).Methods("GET", "OPTIONS") metadataRouter.Handle("/system/cert-bundle", http.HandlerFunc(han.RootCertificateBundleHandler)).Methods("GET", "OPTIONS") diff --git a/go.mod b/go.mod index 8c5abdf6f..9709b78e8 100644 --- a/go.mod +++ b/go.mod @@ -36,7 +36,10 @@ require ( gorm.io/gorm v1.24.6 ) -replace github.com/google/go-github/v54 => github.com/gabriel-samfira/go-github/v54 v54.0.0-20230821112832-bbb536ee5a3a +replace ( + github.com/cloudbase/garm-provider-common => /home/ubuntu/garm-provider-common + github.com/google/go-github/v54 => github.com/gabriel-samfira/go-github/v54 v54.0.0-20230821112832-bbb536ee5a3a +) require ( github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect diff --git a/go.sum b/go.sum index 0a3bf1425..815edd0af 100644 --- a/go.sum +++ b/go.sum @@ -22,8 +22,6 @@ github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMn github.com/chzyer/test v1.0.0 h1:p3BQDXSxOhOG0P9z6/hGnII4LGiEPOYBhs8asl/fC04= github.com/chzyer/test v1.0.0/go.mod h1:2JlltgoNkt4TW/z9V/IzDdFaMTM2JPIi26O1pF38GC8= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= -github.com/cloudbase/garm-provider-common v0.0.0-20230924074517-dd3e26769a05 h1:V9TQBCnwTeKX+gpmlEtZAQ5gNbYrdGelAA3jgnGde1c= -github.com/cloudbase/garm-provider-common v0.0.0-20230924074517-dd3e26769a05/go.mod h1:NgR629o2NYWTffZt3uURmu3orjMDQ2vh6KJ5ytwh7Qw= github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= diff --git a/runner/metadata.go b/runner/metadata.go index 9d0a2769c..bbbc0e475 100644 --- a/runner/metadata.go +++ b/runner/metadata.go @@ -1,15 +1,175 @@ package runner import ( + "bytes" "context" + "encoding/base64" + "fmt" + "html/template" "log" + "strings" + "github.com/cloudbase/garm-provider-common/defaults" runnerErrors "github.com/cloudbase/garm-provider-common/errors" "github.com/cloudbase/garm/auth" "github.com/cloudbase/garm/params" "github.com/pkg/errors" ) +var systemdUnitTemplate = `[Unit] +Description=GitHub Actions Runner ({{.ServiceName}}) +After=network.target + +[Service] +ExecStart=/home/{{.RunAsUser}}/actions-runner/runsvc.sh +User=runner +WorkingDirectory=/home/{{.RunAsUser}}/actions-runner +KillMode=process +KillSignal=SIGTERM +TimeoutStopSec=5min + +[Install] +WantedBy=multi-user.target +` + +func validateInstanceState(ctx context.Context) (params.Instance, error) { + if !auth.InstanceHasJITConfig(ctx) { + return params.Instance{}, fmt.Errorf("instance not configured for JIT: %w", runnerErrors.ErrNotFound) + } + + status := auth.InstanceRunnerStatus(ctx) + if status != params.RunnerPending && status != params.RunnerInstalling { + return params.Instance{}, runnerErrors.ErrUnauthorized + } + + instance, err := auth.InstanceParams(ctx) + if err != nil { + log.Printf("failed to get instance params: %s", err) + return params.Instance{}, runnerErrors.ErrUnauthorized + } + return instance, nil +} + +func (r *Runner) GetRunnerServiceName(ctx context.Context) (string, error) { + instance, err := validateInstanceState(ctx) + if err != nil { + return "", runnerErrors.ErrUnauthorized + } + + pool, err := r.store.GetPoolByID(r.ctx, instance.PoolID) + if err != nil { + return "", errors.Wrap(err, "fetching pool") + } + + tpl := "actions.runner.%s.%s" + var serviceName string + switch pool.PoolType() { + case params.EnterprisePool: + serviceName = fmt.Sprintf(tpl, pool.EnterpriseName, instance.Name) + case params.OrganizationPool: + serviceName = fmt.Sprintf(tpl, pool.OrgName, instance.Name) + case params.RepositoryPool: + serviceName = fmt.Sprintf(tpl, strings.Replace(pool.RepoName, "/", "-", -1), instance.Name) + } + return serviceName, nil +} + +func (r *Runner) GenerateSystemdUnitFile(ctx context.Context, runAsUser string) ([]byte, error) { + serviceName, err := r.GetRunnerServiceName(ctx) + if err != nil { + return nil, errors.Wrap(err, "fetching runner service name") + } + + unitTemplate, err := template.New("").Parse(systemdUnitTemplate) + if err != nil { + return nil, errors.Wrap(err, "parsing template") + } + + if runAsUser == "" { + runAsUser = defaults.DefaultUser + } + + data := struct { + ServiceName string + RunAsUser string + }{ + ServiceName: serviceName, + RunAsUser: runAsUser, + } + + var unitFile bytes.Buffer + if err := unitTemplate.Execute(&unitFile, data); err != nil { + return nil, errors.Wrap(err, "executing template") + } + return unitFile.Bytes(), nil +} + +func (r *Runner) GetJITConfigFile(ctx context.Context, file string) ([]byte, error) { + instance, err := validateInstanceState(ctx) + if err != nil { + log.Printf("failed to get instance params: %s", err) + return nil, runnerErrors.ErrUnauthorized + } + jitConfig := instance.JitConfiguration + contents, ok := jitConfig[file] + if !ok { + return nil, fmt.Errorf("file not found: %w", runnerErrors.ErrNotFound) + } + + decoded, err := base64.StdEncoding.DecodeString(contents) + if err != nil { + return nil, errors.Wrap(err, "decoding file contents") + } + + return decoded, nil +} + +func (r *Runner) GetInstanceGithubRegistrationToken(ctx context.Context) (string, error) { + // Check if this instance already fetched a registration token or if it was configured using + // the new Just In Time runner feature. If we're still using the old way of configuring a runner, + // we only allow an instance to fetch one token. If the instance fails to bootstrap after a token + // is fetched, we reset the token fetched field when re-queueing the instance. + if auth.InstanceTokenFetched(ctx) || auth.InstanceHasJITConfig(ctx) { + return "", runnerErrors.ErrUnauthorized + } + + status := auth.InstanceRunnerStatus(ctx) + if status != params.RunnerPending && status != params.RunnerInstalling { + return "", runnerErrors.ErrUnauthorized + } + + instance, err := auth.InstanceParams(ctx) + if err != nil { + log.Printf("failed to get instance params: %s", err) + return "", runnerErrors.ErrUnauthorized + } + + poolMgr, err := r.getPoolManagerFromInstance(ctx, instance) + if err != nil { + return "", errors.Wrap(err, "fetching pool manager for instance") + } + + token, err := poolMgr.GithubRunnerRegistrationToken() + if err != nil { + return "", errors.Wrap(err, "fetching runner token") + } + + tokenFetched := true + updateParams := params.UpdateInstanceParams{ + TokenFetched: &tokenFetched, + } + + if _, err := r.store.UpdateInstance(r.ctx, instance.ID, updateParams); err != nil { + return "", errors.Wrap(err, "setting token_fetched for instance") + } + + if err := r.store.AddInstanceEvent(ctx, instance.ID, params.FetchTokenEvent, params.EventInfo, "runner registration token was retrieved"); err != nil { + return "", errors.Wrap(err, "recording event") + } + + return token, nil +} + func (r *Runner) GetRootCertificateBundle(ctx context.Context) (params.CertificateBundle, error) { instance, err := auth.InstanceParams(ctx) if err != nil { diff --git a/runner/pool/pool.go b/runner/pool/pool.go index 0730f07b8..842747d4c 100644 --- a/runner/pool/pool.go +++ b/runner/pool/pool.go @@ -800,9 +800,6 @@ func (r *basePoolManager) addInstanceToProvider(instance params.Instance) error return fmt.Errorf("unknown provider %s for pool %s", pool.ProviderName, pool.ID) } - // We still need the labels here for situations where we don't have a JIT config generated. - // This can happen if GARM is used against an instance of GHES older than version 3.10. - labels := r.getLabelsForInstance(pool) jwtValidity := pool.RunnerTimeout() entity := r.helper.String() @@ -811,6 +808,8 @@ func (r *basePoolManager) addInstanceToProvider(instance params.Instance) error return errors.Wrap(err, "fetching instance jwt token") } + hasJITConfig := len(instance.JitConfiguration) > 0 + bootstrapArgs := commonParams.BootstrapInstance{ Name: instance.Name, Tools: r.tools, @@ -823,11 +822,17 @@ func (r *basePoolManager) addInstanceToProvider(instance params.Instance) error Flavor: pool.Flavor, Image: pool.Image, ExtraSpecs: pool.ExtraSpecs, - Labels: labels, PoolID: instance.PoolID, CACertBundle: r.credsDetails.CABundle, GitHubRunnerGroup: instance.GitHubRunnerGroup, - // JitConfigEnabled: len(instance.JitConfiguration) > 0, + JitConfigEnabled: hasJITConfig, + } + + if !hasJITConfig { + // We still need the labels here for situations where we don't have a JIT config generated. + // This can happen if GARM is used against an instance of GHES older than version 3.10. + // The labels field should be ignored by providers if JIT config is enabled. + bootstrapArgs.Labels = r.getLabelsForInstance(pool) } var instanceIDToDelete string @@ -1162,11 +1167,12 @@ func (r *basePoolManager) retryFailedInstancesForOnePool(ctx context.Context, po // TODO(gabriel-samfira): Incrementing CreateAttempt should be done within a transaction. // It's fairly safe to do here (for now), as there should be no other code path that updates // an instance in this state. - var tokenFetched bool = false + var tokenFetched bool = len(instance.JitConfiguration) > 0 updateParams := params.UpdateInstanceParams{ CreateAttempt: instance.CreateAttempt + 1, TokenFetched: &tokenFetched, Status: commonParams.InstancePendingCreate, + RunnerStatus: params.RunnerPending, } r.log("queueing previously failed instance %s for retry", instance.Name) // Set instance to pending create and wait for retry. diff --git a/runner/runner.go b/runner/runner.go index 0aba8823a..aa4aedbe7 100644 --- a/runner/runner.go +++ b/runner/runner.go @@ -858,52 +858,6 @@ func (r *Runner) AddInstanceStatusMessage(ctx context.Context, param params.Inst return nil } -func (r *Runner) GetInstanceGithubRegistrationToken(ctx context.Context) (string, error) { - // Check if this instance already fetched a registration token or if it was configured using - // the new Just In Time runner feature. If we're still using the old way of configuring a runner, - // we only allow an instance to fetch one token. If the instance fails to bootstrap after a token - // is fetched, we reset the token fetched field when re-queueing the instance. - if auth.InstanceTokenFetched(ctx) || auth.InstanceHasJITConfig(ctx) { - return "", runnerErrors.ErrUnauthorized - } - - status := auth.InstanceRunnerStatus(ctx) - if status != params.RunnerPending && status != params.RunnerInstalling { - return "", runnerErrors.ErrUnauthorized - } - - instance, err := auth.InstanceParams(ctx) - if err != nil { - log.Printf("failed to get instance params: %s", err) - return "", runnerErrors.ErrUnauthorized - } - - poolMgr, err := r.getPoolManagerFromInstance(ctx, instance) - if err != nil { - return "", errors.Wrap(err, "fetching pool manager for instance") - } - - token, err := poolMgr.GithubRunnerRegistrationToken() - if err != nil { - return "", errors.Wrap(err, "fetching runner token") - } - - tokenFetched := true - updateParams := params.UpdateInstanceParams{ - TokenFetched: &tokenFetched, - } - - if _, err := r.store.UpdateInstance(r.ctx, instance.ID, updateParams); err != nil { - return "", errors.Wrap(err, "setting token_fetched for instance") - } - - if err := r.store.AddInstanceEvent(ctx, instance.ID, params.FetchTokenEvent, params.EventInfo, "runner registration token was retrieved"); err != nil { - return "", errors.Wrap(err, "recording event") - } - - return token, nil -} - func (r *Runner) getPoolManagerFromInstance(ctx context.Context, instance params.Instance) (common.PoolManager, error) { pool, err := r.store.GetPoolByID(ctx, instance.PoolID) if err != nil { diff --git a/vendor/modules.txt b/vendor/modules.txt index 24d7e7ab5..7789a712b 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -14,7 +14,7 @@ github.com/cespare/xxhash/v2 # github.com/chzyer/readline v1.5.1 ## explicit; go 1.15 github.com/chzyer/readline -# github.com/cloudbase/garm-provider-common v0.0.0-20230924074517-dd3e26769a05 +# github.com/cloudbase/garm-provider-common v0.0.0-20230924074517-dd3e26769a05 => /home/ubuntu/garm-provider-common ## explicit; go 1.20 github.com/cloudbase/garm-provider-common/cloudconfig github.com/cloudbase/garm-provider-common/defaults @@ -439,4 +439,5 @@ gorm.io/gorm/logger gorm.io/gorm/migrator gorm.io/gorm/schema gorm.io/gorm/utils +# github.com/cloudbase/garm-provider-common => /home/ubuntu/garm-provider-common # github.com/google/go-github/v54 => github.com/gabriel-samfira/go-github/v54 v54.0.0-20230821112832-bbb536ee5a3a From e53c2713375df961973777215f1d88cd2f74fb1f Mon Sep 17 00:00:00 2001 From: Gabriel Adrian Samfira Date: Tue, 22 Aug 2023 15:08:58 +0000 Subject: [PATCH 09/14] Add metadata URLs Signed-off-by: Gabriel Adrian Samfira --- apiserver/controllers/metadata.go | 7 +++++-- apiserver/routers/routers.go | 8 ++++---- runner/metadata.go | 4 +++- vendor/modules.txt | 2 +- 4 files changed, 13 insertions(+), 8 deletions(-) diff --git a/apiserver/controllers/metadata.go b/apiserver/controllers/metadata.go index 0f2f7b77f..664d46a63 100644 --- a/apiserver/controllers/metadata.go +++ b/apiserver/controllers/metadata.go @@ -55,14 +55,17 @@ func (a *APIController) JITCredentialsFileHandler(w http.ResponseWriter, r *http return } - data, err := a.r.GetJITConfigFile(ctx, fileName) + dotFileName := fmt.Sprintf(".%s", fileName) + + data, err := a.r.GetJITConfigFile(ctx, dotFileName) if err != nil { + log.Printf("getting JIT config file: %s", err) handleError(w, err) return } // Note the leading dot in the filename - name := fmt.Sprintf("attachment; filename=.%s", fileName) + name := fmt.Sprintf("attachment; filename=%s", dotFileName) w.Header().Set("Content-Disposition", name) w.Header().Set("Content-Type", "octet-stream") w.Header().Set("Content-Length", fmt.Sprintf("%d", len(data))) diff --git a/apiserver/routers/routers.go b/apiserver/routers/routers.go index 88c2e7539..6644a4bae 100644 --- a/apiserver/routers/routers.go +++ b/apiserver/routers/routers.go @@ -121,10 +121,10 @@ func NewAPIRouter(han *controllers.APIController, logWriter io.Writer, authMiddl metadataRouter.Handle("/credentials/{fileName}/", http.HandlerFunc(han.JITCredentialsFileHandler)).Methods("GET", "OPTIONS") metadataRouter.Handle("/credentials/{fileName}", http.HandlerFunc(han.JITCredentialsFileHandler)).Methods("GET", "OPTIONS") // Systemd files - metadataRouter.Handle("/systemd/service-name/", http.HandlerFunc(han.SystemdServiceNameHandler)).Methods("GET", "OPTIONS") - metadataRouter.Handle("/systemd/service-name/", http.HandlerFunc(han.SystemdServiceNameHandler)).Methods("GET", "OPTIONS") - metadataRouter.Handle("/systemd/runner-service/", http.HandlerFunc(han.SystemdUnitFileHandler)).Methods("GET", "OPTIONS") - metadataRouter.Handle("/systemd/runner-service", http.HandlerFunc(han.SystemdUnitFileHandler)).Methods("GET", "OPTIONS") + metadataRouter.Handle("/system/service-name/", http.HandlerFunc(han.SystemdServiceNameHandler)).Methods("GET", "OPTIONS") + metadataRouter.Handle("/system/service-name", http.HandlerFunc(han.SystemdServiceNameHandler)).Methods("GET", "OPTIONS") + metadataRouter.Handle("/systemd/unit-file/", http.HandlerFunc(han.SystemdUnitFileHandler)).Methods("GET", "OPTIONS") + metadataRouter.Handle("/systemd/unit-file", http.HandlerFunc(han.SystemdUnitFileHandler)).Methods("GET", "OPTIONS") metadataRouter.Handle("/system/cert-bundle/", http.HandlerFunc(han.RootCertificateBundleHandler)).Methods("GET", "OPTIONS") metadataRouter.Handle("/system/cert-bundle", http.HandlerFunc(han.RootCertificateBundleHandler)).Methods("GET", "OPTIONS") diff --git a/runner/metadata.go b/runner/metadata.go index bbbc0e475..7fa8f782f 100644 --- a/runner/metadata.go +++ b/runner/metadata.go @@ -53,11 +53,13 @@ func validateInstanceState(ctx context.Context) (params.Instance, error) { func (r *Runner) GetRunnerServiceName(ctx context.Context) (string, error) { instance, err := validateInstanceState(ctx) if err != nil { + log.Printf("failed to get instance params: %s", err) return "", runnerErrors.ErrUnauthorized } pool, err := r.store.GetPoolByID(r.ctx, instance.PoolID) if err != nil { + log.Printf("failed to get pool: %s", err) return "", errors.Wrap(err, "fetching pool") } @@ -113,7 +115,7 @@ func (r *Runner) GetJITConfigFile(ctx context.Context, file string) ([]byte, err jitConfig := instance.JitConfiguration contents, ok := jitConfig[file] if !ok { - return nil, fmt.Errorf("file not found: %w", runnerErrors.ErrNotFound) + return nil, errors.Wrap(runnerErrors.ErrNotFound, "retrieving file") } decoded, err := base64.StdEncoding.DecodeString(contents) diff --git a/vendor/modules.txt b/vendor/modules.txt index 7789a712b..8a00e86ea 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -439,5 +439,5 @@ gorm.io/gorm/logger gorm.io/gorm/migrator gorm.io/gorm/schema gorm.io/gorm/utils -# github.com/cloudbase/garm-provider-common => /home/ubuntu/garm-provider-common # github.com/google/go-github/v54 => github.com/gabriel-samfira/go-github/v54 v54.0.0-20230821112832-bbb536ee5a3a +# github.com/cloudbase/garm-provider-common => /home/ubuntu/garm-provider-common From e238f847811c2139a2556306c39d3e3f5c1f61bf Mon Sep 17 00:00:00 2001 From: Gabriel Adrian Samfira Date: Tue, 22 Aug 2023 19:00:00 +0000 Subject: [PATCH 10/14] update modules Signed-off-by: Gabriel Adrian Samfira --- go.mod | 5 ----- go.sum | 2 ++ vendor/modules.txt | 4 +--- 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/go.mod b/go.mod index 9709b78e8..98cee70f4 100644 --- a/go.mod +++ b/go.mod @@ -36,11 +36,6 @@ require ( gorm.io/gorm v1.24.6 ) -replace ( - github.com/cloudbase/garm-provider-common => /home/ubuntu/garm-provider-common - github.com/google/go-github/v54 => github.com/gabriel-samfira/go-github/v54 v54.0.0-20230821112832-bbb536ee5a3a -) - require ( github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect github.com/beorn7/perks v1.0.1 // indirect diff --git a/go.sum b/go.sum index 815edd0af..0a3bf1425 100644 --- a/go.sum +++ b/go.sum @@ -22,6 +22,8 @@ github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMn github.com/chzyer/test v1.0.0 h1:p3BQDXSxOhOG0P9z6/hGnII4LGiEPOYBhs8asl/fC04= github.com/chzyer/test v1.0.0/go.mod h1:2JlltgoNkt4TW/z9V/IzDdFaMTM2JPIi26O1pF38GC8= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= +github.com/cloudbase/garm-provider-common v0.0.0-20230924074517-dd3e26769a05 h1:V9TQBCnwTeKX+gpmlEtZAQ5gNbYrdGelAA3jgnGde1c= +github.com/cloudbase/garm-provider-common v0.0.0-20230924074517-dd3e26769a05/go.mod h1:NgR629o2NYWTffZt3uURmu3orjMDQ2vh6KJ5ytwh7Qw= github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= diff --git a/vendor/modules.txt b/vendor/modules.txt index 8a00e86ea..2106e6b5b 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -14,7 +14,7 @@ github.com/cespare/xxhash/v2 # github.com/chzyer/readline v1.5.1 ## explicit; go 1.15 github.com/chzyer/readline -# github.com/cloudbase/garm-provider-common v0.0.0-20230924074517-dd3e26769a05 => /home/ubuntu/garm-provider-common +# github.com/cloudbase/garm-provider-common v0.0.0-20230924074517-dd3e26769a05 ## explicit; go 1.20 github.com/cloudbase/garm-provider-common/cloudconfig github.com/cloudbase/garm-provider-common/defaults @@ -439,5 +439,3 @@ gorm.io/gorm/logger gorm.io/gorm/migrator gorm.io/gorm/schema gorm.io/gorm/utils -# github.com/google/go-github/v54 => github.com/gabriel-samfira/go-github/v54 v54.0.0-20230821112832-bbb536ee5a3a -# github.com/cloudbase/garm-provider-common => /home/ubuntu/garm-provider-common From 5f2cb19503e1297f70f24d9c96f9c04bfa109886 Mon Sep 17 00:00:00 2001 From: Gabriel Adrian Samfira Date: Wed, 23 Aug 2023 14:21:51 +0000 Subject: [PATCH 11/14] Use accessors when getting response values Signed-off-by: Gabriel Adrian Samfira --- runner/pool/organization.go | 10 +++++----- runner/pool/repository.go | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/runner/pool/organization.go b/runner/pool/organization.go index 0c9eb1466..5e88f73e2 100644 --- a/runner/pool/organization.go +++ b/runner/pool/organization.go @@ -103,7 +103,7 @@ func (r *organization) findRunnerGroupByName(ctx context.Context, name string) ( return nil, errors.Wrap(err, "fetching runners") } for _, runnerGroup := range runnerGroups.RunnerGroups { - if runnerGroup.Name != nil && *runnerGroup.Name == name { + if runnerGroup.GetName() == name { return runnerGroup, nil } } @@ -131,7 +131,7 @@ func (r *organization) GetJITConfig(ctx context.Context, instance params.Instanc if err != nil { return nil, nil, fmt.Errorf("failed to find runner group: %w", err) } - rg = *runnerGroup.ID + rg = runnerGroup.GetID() } req := github.GenerateJITConfigRequest{ @@ -149,7 +149,7 @@ func (r *organization) GetJITConfig(ctx context.Context, instance params.Instanc return nil, nil, fmt.Errorf("failed to get JIT config: %w", err) } - runner = jitConfig.Runner + runner = jitConfig.GetRunner() defer func() { if err != nil && runner != nil { _, innerErr := r.ghcli.RemoveOrganizationRunner(r.ctx, r.cfg.Name, runner.GetID()) @@ -157,7 +157,7 @@ func (r *organization) GetJITConfig(ctx context.Context, instance params.Instanc } }() - decoded, err := base64.StdEncoding.DecodeString(*jitConfig.EncodedJITConfig) + decoded, err := base64.StdEncoding.DecodeString(jitConfig.GetEncodedJITConfig()) if err != nil { return nil, nil, fmt.Errorf("failed to decode JIT config: %w", err) } @@ -167,7 +167,7 @@ func (r *organization) GetJITConfig(ctx context.Context, instance params.Instanc return nil, nil, fmt.Errorf("failed to unmarshal JIT config: %w", err) } - return ret, jitConfig.Runner, nil + return ret, runner, nil } func (r *organization) GithubCLI() common.GithubClient { diff --git a/runner/pool/repository.go b/runner/pool/repository.go index ec5c92e1a..e09081d3d 100644 --- a/runner/pool/repository.go +++ b/runner/pool/repository.go @@ -121,7 +121,7 @@ func (r *repository) GetJITConfig(ctx context.Context, instance params.Instance, } }() - decoded, err := base64.StdEncoding.DecodeString(*jitConfig.EncodedJITConfig) + decoded, err := base64.StdEncoding.DecodeString(jitConfig.GetEncodedJITConfig()) if err != nil { return nil, nil, fmt.Errorf("failed to decode JIT config: %w", err) } @@ -131,7 +131,7 @@ func (r *repository) GetJITConfig(ctx context.Context, instance params.Instance, return nil, nil, fmt.Errorf("failed to unmarshal JIT config: %w", err) } - return ret, jitConfig.Runner, nil + return ret, runner, nil } func (r *repository) GithubCLI() common.GithubClient { From 4bedb1dd6377bb731e68e870b7ff358f26f8df72 Mon Sep 17 00:00:00 2001 From: Gabriel Adrian Samfira Date: Wed, 23 Aug 2023 22:50:04 +0000 Subject: [PATCH 12/14] Fix URLs for enterprises Signed-off-by: Gabriel Adrian Samfira --- runner/pool/enterprise.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/runner/pool/enterprise.go b/runner/pool/enterprise.go index fe9a18312..904fa8412 100644 --- a/runner/pool/enterprise.go +++ b/runner/pool/enterprise.go @@ -47,6 +47,12 @@ func NewEnterprisePoolManager(ctx context.Context, cfg params.Enterprise, cfgInt store: store, providers: providers, controllerID: cfgInternal.ControllerID, + urls: urls{ + webhookURL: cfgInternal.BaseWebhookURL, + callbackURL: cfgInternal.InstanceCallbackURL, + metadataURL: cfgInternal.InstanceMetadataURL, + controllerWebhookURL: cfgInternal.ControllerWebhookURL, + }, quit: make(chan struct{}), helper: helper, credsDetails: cfgInternal.GithubCredentialsDetails, From 8c507a92510280208e77b392dae2e666a48a87de Mon Sep 17 00:00:00 2001 From: Gabriel Adrian Samfira Date: Thu, 24 Aug 2023 13:56:35 +0000 Subject: [PATCH 13/14] Run go generate Signed-off-by: Gabriel Adrian Samfira --- runner/common/mocks/GithubClient.go | 35 ++++++++++ runner/common/mocks/GithubEnterpriseClient.go | 70 +++++++++++++++++++ 2 files changed, 105 insertions(+) diff --git a/runner/common/mocks/GithubClient.go b/runner/common/mocks/GithubClient.go index 918c60ffb..b8f450e75 100644 --- a/runner/common/mocks/GithubClient.go +++ b/runner/common/mocks/GithubClient.go @@ -451,6 +451,41 @@ func (_m *GithubClient) ListOrganizationRunnerApplicationDownloads(ctx context.C return r0, r1, r2 } +// ListOrganizationRunnerGroups provides a mock function with given fields: ctx, org, opts +func (_m *GithubClient) ListOrganizationRunnerGroups(ctx context.Context, org string, opts *github.ListOrgRunnerGroupOptions) (*github.RunnerGroups, *github.Response, error) { + ret := _m.Called(ctx, org, opts) + + var r0 *github.RunnerGroups + var r1 *github.Response + var r2 error + if rf, ok := ret.Get(0).(func(context.Context, string, *github.ListOrgRunnerGroupOptions) (*github.RunnerGroups, *github.Response, error)); ok { + return rf(ctx, org, opts) + } + if rf, ok := ret.Get(0).(func(context.Context, string, *github.ListOrgRunnerGroupOptions) *github.RunnerGroups); ok { + r0 = rf(ctx, org, opts) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*github.RunnerGroups) + } + } + + if rf, ok := ret.Get(1).(func(context.Context, string, *github.ListOrgRunnerGroupOptions) *github.Response); ok { + r1 = rf(ctx, org, opts) + } else { + if ret.Get(1) != nil { + r1 = ret.Get(1).(*github.Response) + } + } + + if rf, ok := ret.Get(2).(func(context.Context, string, *github.ListOrgRunnerGroupOptions) error); ok { + r2 = rf(ctx, org, opts) + } else { + r2 = ret.Error(2) + } + + return r0, r1, r2 +} + // ListOrganizationRunners provides a mock function with given fields: ctx, owner, opts func (_m *GithubClient) ListOrganizationRunners(ctx context.Context, owner string, opts *github.ListOptions) (*github.Runners, *github.Response, error) { ret := _m.Called(ctx, owner, opts) diff --git a/runner/common/mocks/GithubEnterpriseClient.go b/runner/common/mocks/GithubEnterpriseClient.go index 0a0e6bcaf..2fc1442a5 100644 --- a/runner/common/mocks/GithubEnterpriseClient.go +++ b/runner/common/mocks/GithubEnterpriseClient.go @@ -49,6 +49,41 @@ func (_m *GithubEnterpriseClient) CreateRegistrationToken(ctx context.Context, e return r0, r1, r2 } +// GenerateEnterpriseJITConfig provides a mock function with given fields: ctx, enterprise, request +func (_m *GithubEnterpriseClient) GenerateEnterpriseJITConfig(ctx context.Context, enterprise string, request *github.GenerateJITConfigRequest) (*github.JITRunnerConfig, *github.Response, error) { + ret := _m.Called(ctx, enterprise, request) + + var r0 *github.JITRunnerConfig + var r1 *github.Response + var r2 error + if rf, ok := ret.Get(0).(func(context.Context, string, *github.GenerateJITConfigRequest) (*github.JITRunnerConfig, *github.Response, error)); ok { + return rf(ctx, enterprise, request) + } + if rf, ok := ret.Get(0).(func(context.Context, string, *github.GenerateJITConfigRequest) *github.JITRunnerConfig); ok { + r0 = rf(ctx, enterprise, request) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*github.JITRunnerConfig) + } + } + + if rf, ok := ret.Get(1).(func(context.Context, string, *github.GenerateJITConfigRequest) *github.Response); ok { + r1 = rf(ctx, enterprise, request) + } else { + if ret.Get(1) != nil { + r1 = ret.Get(1).(*github.Response) + } + } + + if rf, ok := ret.Get(2).(func(context.Context, string, *github.GenerateJITConfigRequest) error); ok { + r2 = rf(ctx, enterprise, request) + } else { + r2 = ret.Error(2) + } + + return r0, r1, r2 +} + // ListRunnerApplicationDownloads provides a mock function with given fields: ctx, enterprise func (_m *GithubEnterpriseClient) ListRunnerApplicationDownloads(ctx context.Context, enterprise string) ([]*github.RunnerApplicationDownload, *github.Response, error) { ret := _m.Called(ctx, enterprise) @@ -84,6 +119,41 @@ func (_m *GithubEnterpriseClient) ListRunnerApplicationDownloads(ctx context.Con return r0, r1, r2 } +// ListRunnerGroups provides a mock function with given fields: ctx, enterprise, opts +func (_m *GithubEnterpriseClient) ListRunnerGroups(ctx context.Context, enterprise string, opts *github.ListEnterpriseRunnerGroupOptions) (*github.EnterpriseRunnerGroups, *github.Response, error) { + ret := _m.Called(ctx, enterprise, opts) + + var r0 *github.EnterpriseRunnerGroups + var r1 *github.Response + var r2 error + if rf, ok := ret.Get(0).(func(context.Context, string, *github.ListEnterpriseRunnerGroupOptions) (*github.EnterpriseRunnerGroups, *github.Response, error)); ok { + return rf(ctx, enterprise, opts) + } + if rf, ok := ret.Get(0).(func(context.Context, string, *github.ListEnterpriseRunnerGroupOptions) *github.EnterpriseRunnerGroups); ok { + r0 = rf(ctx, enterprise, opts) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*github.EnterpriseRunnerGroups) + } + } + + if rf, ok := ret.Get(1).(func(context.Context, string, *github.ListEnterpriseRunnerGroupOptions) *github.Response); ok { + r1 = rf(ctx, enterprise, opts) + } else { + if ret.Get(1) != nil { + r1 = ret.Get(1).(*github.Response) + } + } + + if rf, ok := ret.Get(2).(func(context.Context, string, *github.ListEnterpriseRunnerGroupOptions) error); ok { + r2 = rf(ctx, enterprise, opts) + } else { + r2 = ret.Error(2) + } + + return r0, r1, r2 +} + // ListRunners provides a mock function with given fields: ctx, enterprise, opts func (_m *GithubEnterpriseClient) ListRunners(ctx context.Context, enterprise string, opts *github.ListOptions) (*github.Runners, *github.Response, error) { ret := _m.Called(ctx, enterprise, opts) From 019948acbe40797785b4d7c1bf482fc3dfa18932 Mon Sep 17 00:00:00 2001 From: Gabriel Adrian Samfira Date: Sat, 26 Aug 2023 19:40:01 +0000 Subject: [PATCH 14/14] Add JIT config as part of instance create We must create the DB entry for a runner with a JIT config included. Adding it later via an update runs the risk of having the consolidate loop pick up the incomplete instance. Signed-off-by: Gabriel Adrian Samfira --- database/sql/instances.go | 1 + params/requests.go | 3 +- runner/pool/enterprise.go | 12 +----- runner/pool/interfaces.go | 2 +- runner/pool/organization.go | 12 +----- runner/pool/pool.go | 77 ++++++++++++++++--------------------- runner/pool/repository.go | 12 +----- 7 files changed, 43 insertions(+), 76 deletions(-) diff --git a/database/sql/instances.go b/database/sql/instances.go index 0c8e4a487..608d4fa6a 100644 --- a/database/sql/instances.go +++ b/database/sql/instances.go @@ -82,6 +82,7 @@ func (s *sqlDatabase) CreateInstance(ctx context.Context, poolID string, param p GitHubRunnerGroup: param.GitHubRunnerGroup, JitConfiguration: secret, AditionalLabels: labels, + AgentID: param.AgentID, } q := s.conn.Create(&newInstance) if q.Error != nil { diff --git a/params/requests.go b/params/requests.go index 0293223eb..74bfeb503 100644 --- a/params/requests.go +++ b/params/requests.go @@ -136,7 +136,8 @@ type CreateInstanceParams struct { // GithubRunnerGroup is the github runner group to which the runner belongs. // The runner group must be created by someone with access to the enterprise. GitHubRunnerGroup string - CreateAttempt int `json:"-"` + CreateAttempt int `json:"-"` + AgentID int64 `json:"-"` AditionalLabels []string JitConfiguration map[string]string } diff --git a/runner/pool/enterprise.go b/runner/pool/enterprise.go index 904fa8412..b7adad768 100644 --- a/runner/pool/enterprise.go +++ b/runner/pool/enterprise.go @@ -104,15 +104,7 @@ func (r *enterprise) findRunnerGroupByName(ctx context.Context, name string) (*g return nil, errors.Wrap(runnerErrors.ErrNotFound, "runner group not found") } -func (r *enterprise) GetJITConfig(ctx context.Context, instance params.Instance, pool params.Pool, labels []string) (jitConfigMap map[string]string, runner *github.Runner, err error) { - if instance.AgentID != 0 { - return nil, nil, fmt.Errorf("instance already has an agent ID: %w", runnerErrors.ErrBadRequest) - } - - if instance.JitConfiguration != nil { - return nil, nil, fmt.Errorf("instance already has a JIT configuration: %w", runnerErrors.ErrBadRequest) - } - +func (r *enterprise) GetJITConfig(ctx context.Context, instance string, pool params.Pool, labels []string) (jitConfigMap map[string]string, runner *github.Runner, err error) { var rg int64 = 1 if pool.GitHubRunnerGroup != "" { runnerGroup, err := r.findRunnerGroupByName(ctx, pool.GitHubRunnerGroup) @@ -123,7 +115,7 @@ func (r *enterprise) GetJITConfig(ctx context.Context, instance params.Instance, } req := github.GenerateJITConfigRequest{ - Name: instance.Name, + Name: instance, RunnerGroupID: rg, Labels: labels, // TODO(gabriel-samfira): Should we make this configurable? diff --git a/runner/pool/interfaces.go b/runner/pool/interfaces.go index d40fc39eb..f981992b6 100644 --- a/runner/pool/interfaces.go +++ b/runner/pool/interfaces.go @@ -37,7 +37,7 @@ type poolHelper interface { GithubCLI() common.GithubClient - GetJITConfig(ctx context.Context, instance params.Instance, pool params.Pool, labels []string) (map[string]string, *github.Runner, error) + GetJITConfig(ctx context.Context, instanceName string, pool params.Pool, labels []string) (map[string]string, *github.Runner, error) FetchDbInstances() ([]params.Instance, error) ListPools() ([]params.Pool, error) diff --git a/runner/pool/organization.go b/runner/pool/organization.go index 5e88f73e2..539af39a6 100644 --- a/runner/pool/organization.go +++ b/runner/pool/organization.go @@ -116,15 +116,7 @@ func (r *organization) findRunnerGroupByName(ctx context.Context, name string) ( return nil, errors.Wrap(runnerErrors.ErrNotFound, "runner group not found") } -func (r *organization) GetJITConfig(ctx context.Context, instance params.Instance, pool params.Pool, labels []string) (jitConfigMap map[string]string, runner *github.Runner, err error) { - if instance.AgentID != 0 { - return nil, nil, fmt.Errorf("instance already has an agent ID: %w", runnerErrors.ErrBadRequest) - } - - if instance.JitConfiguration != nil { - return nil, nil, fmt.Errorf("instance already has a JIT configuration: %w", runnerErrors.ErrBadRequest) - } - +func (r *organization) GetJITConfig(ctx context.Context, instance string, pool params.Pool, labels []string) (jitConfigMap map[string]string, runner *github.Runner, err error) { var rg int64 = 1 if pool.GitHubRunnerGroup != "" { runnerGroup, err := r.findRunnerGroupByName(ctx, pool.GitHubRunnerGroup) @@ -135,7 +127,7 @@ func (r *organization) GetJITConfig(ctx context.Context, instance params.Instanc } req := github.GenerateJITConfigRequest{ - Name: instance.Name, + Name: instance, RunnerGroupID: rg, Labels: labels, // TODO(gabriel-samfira): Should we make this configurable? diff --git a/runner/pool/pool.go b/runner/pool/pool.go index 842747d4c..4ca49c4e7 100644 --- a/runner/pool/pool.go +++ b/runner/pool/pool.go @@ -388,11 +388,18 @@ func (r *basePoolManager) cleanupOrphanedProviderRunners(runners []*github.Runne continue } + pool, err := r.store.GetPoolByID(r.ctx, instance.PoolID) + if err != nil { + return errors.Wrap(err, "fetching instance pool info") + } + switch instance.RunnerStatus { case params.RunnerPending, params.RunnerInstalling: - // runner is still installing. We give it a chance to finish. - r.log("runner %s is still installing, give it a chance to finish", instance.Name) - continue + if time.Since(instance.UpdatedAt).Minutes() < float64(pool.RunnerTimeout()) { + // runner is still installing. We give it a chance to finish. + r.log("runner %s is still installing, give it a chance to finish", instance.Name) + continue + } } if time.Since(instance.UpdatedAt).Minutes() < 5 { @@ -451,20 +458,7 @@ func (r *basePoolManager) reapTimedOutRunners(runners []*github.Runner) error { // * The runner never joined github within the pool timeout // * The runner managed to join github, but the setup process failed later and the runner // never started on the instance. - // - // There are several steps in the user data that sets up the runner: - // * Download and unarchive the runner from github (or used the cached version) - // * Configure runner (connects to github). At this point the runner is seen as offline. - // * Install the service - // * Set SELinux context (if SELinux is enabled) - // * Start the service (if successful, the runner will transition to "online") - // * Get the runner ID - // - // If we fail getting the runner ID after it's started, garm will set the runner status to "failed", - // even though, technically the runner is online and fully functional. This is why we check here for - // both the runner status as reported by GitHub and the runner status as reported by the provider. - // If the runner is "offline" and marked as "failed", it should be safe to reap it. - if runner, ok := runnersByName[instance.Name]; !ok || (runner.GetStatus() == "offline" && instance.RunnerStatus == params.RunnerFailed) { + if runner, ok := runnersByName[instance.Name]; !ok || runner.GetStatus() == "offline" { r.log("reaping timed-out/failed runner %s", instance.Name) if err := r.ForceDeleteRunner(instance); err != nil { r.log("failed to update runner %s status: %s", instance.Name, err) @@ -699,6 +693,12 @@ func (r *basePoolManager) AddRunner(ctx context.Context, poolID string, aditiona } name := fmt.Sprintf("%s-%s", pool.GetRunnerPrefix(), util.NewID()) + labels := r.getLabelsForInstance(pool) + // Attempt to create JIT config + jitConfig, runner, err := r.helper.GetJITConfig(ctx, name, pool, labels) + if err != nil { + r.log("failed to get JIT config, falling back to registration token: %s", err) + } createParams := params.CreateInstanceParams{ Name: name, @@ -711,6 +711,11 @@ func (r *basePoolManager) AddRunner(ctx context.Context, poolID string, aditiona CreateAttempt: 1, GitHubRunnerGroup: pool.GitHubRunnerGroup, AditionalLabels: aditionalLabels, + JitConfiguration: jitConfig, + } + + if runner != nil { + createParams.AgentID = runner.GetID() } instance, err := r.store.CreateInstance(r.ctx, poolID, createParams) @@ -719,37 +724,21 @@ func (r *basePoolManager) AddRunner(ctx context.Context, poolID string, aditiona } defer func() { - if err != nil && instance.ID != "" { - if err := r.ForceDeleteRunner(instance); err != nil { - r.log("failed to cleanup instance: %s", instance.Name) + if err != nil { + if instance.ID != "" { + if err := r.ForceDeleteRunner(instance); err != nil { + r.log("failed to cleanup instance: %s", instance.Name) + } } - } - }() - labels := r.getLabelsForInstance(pool) - // Attempt to create JIT config - jitConfig, runner, err := r.helper.GetJITConfig(ctx, instance, pool, labels) - if err == nil { - updateParams := params.UpdateInstanceParams{ - AgentID: runner.GetID(), - // We're using a JIT config. Setting the TokenFetched will disable the registration token - // metadata endpoint. - TokenFetched: github.Bool(true), - JitConfiguration: jitConfig, - } - instance, err = r.updateInstance(instance.Name, updateParams) - if err != nil { - // The agent ID is not recorded in the instance, so the deferred ForceDeleteRunner() will not - // attempt to clean it up. We need to do it here. - _, runnerCleanupErr := r.helper.RemoveGithubRunner(runner.GetID()) - if err != nil { - log.Printf("failed to remove runner %d: %s", runner.GetID(), runnerCleanupErr) + if runner != nil { + _, runnerCleanupErr := r.helper.RemoveGithubRunner(runner.GetID()) + if err != nil { + r.log("failed to remove runner %d: %s", runner.GetID(), runnerCleanupErr) + } } - return errors.Wrap(err, "updating instance") } - } else { - r.log("failed to get JIT config, falling back to registration token: %s", err) - } + }() return nil } diff --git a/runner/pool/repository.go b/runner/pool/repository.go index e09081d3d..c74452de8 100644 --- a/runner/pool/repository.go +++ b/runner/pool/repository.go @@ -88,17 +88,9 @@ type repository struct { mux sync.Mutex } -func (r *repository) GetJITConfig(ctx context.Context, instance params.Instance, pool params.Pool, labels []string) (jitConfigMap map[string]string, runner *github.Runner, err error) { - if instance.AgentID != 0 { - return nil, nil, fmt.Errorf("instance already has an agent ID: %w", runnerErrors.ErrBadRequest) - } - - if instance.JitConfiguration != nil { - return nil, nil, fmt.Errorf("instance already has a JIT configuration: %w", runnerErrors.ErrBadRequest) - } - +func (r *repository) GetJITConfig(ctx context.Context, instance string, pool params.Pool, labels []string) (jitConfigMap map[string]string, runner *github.Runner, err error) { req := github.GenerateJITConfigRequest{ - Name: instance.Name, + Name: instance, // At the repository level we only have the default runner group. RunnerGroupID: 1, Labels: labels,