Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add support for shallow cloning repos #363

Merged
merged 6 commits into from
Feb 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions cmd/locations.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import (

func processLocations(ctx context.Context, ctr container.Container, locations []string) error {
for index, location := range locations {
if newLocation, err := maybeCloneGitUrl(ctx, ctr.RepoManager, ctr.Config.RepoRefreshInterval, location, ctr.VcsClient.Username()); err != nil {
if newLocation, err := maybeCloneGitUrl(ctx, ctr.RepoManager, ctr.Config.RepoRefreshInterval, location, ctr.VcsClient.Username(), ctr.Config.RepoShallowClone); err != nil {
return errors.Wrapf(err, "failed to clone %q", location)
} else if newLocation != "" {
locations[index] = newLocation
Expand All @@ -31,12 +31,12 @@ func processLocations(ctx context.Context, ctr container.Container, locations []
}

type cloner interface {
Clone(ctx context.Context, cloneUrl, branchName string) (*git.Repo, error)
Clone(ctx context.Context, cloneUrl, branchName string, shallow bool) (*git.Repo, error)
}

var ErrCannotUseQueryWithFilePath = errors.New("relative and absolute file paths cannot have query parameters")

func maybeCloneGitUrl(ctx context.Context, repoManager cloner, repoRefreshDuration time.Duration, location, vcsUsername string) (string, error) {
func maybeCloneGitUrl(ctx context.Context, repoManager cloner, repoRefreshDuration time.Duration, location, vcsUsername string, shallow bool) (string, error) {
result := strings.SplitN(location, "?", 2)
if !isGitURL(result[0]) {
if len(result) > 1 {
Expand All @@ -51,7 +51,7 @@ func maybeCloneGitUrl(ctx context.Context, repoManager cloner, repoRefreshDurati
}
cloneUrl := repoUrl.CloneURL(vcsUsername)

repo, err := repoManager.Clone(ctx, cloneUrl, query.Get("branch"))
repo, err := repoManager.Clone(ctx, cloneUrl, query.Get("branch"), shallow)
if err != nil {
return "", errors.Wrap(err, "failed to clone")
}
Expand Down
10 changes: 5 additions & 5 deletions cmd/locations_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ type fakeCloner struct {
err error
}

func (f *fakeCloner) Clone(_ context.Context, cloneUrl, branchName string) (*git.Repo, error) {
func (f *fakeCloner) Clone(_ context.Context, cloneUrl, branchName string, shallow bool) (*git.Repo, error) {
f.cloneUrl = cloneUrl
f.branchName = branchName
return f.result, f.err
Expand All @@ -43,7 +43,7 @@ func TestMaybeCloneGitUrl_NonGitUrl(t *testing.T) {
tc := tc
t.Run(tc.name, func(t *testing.T) {
fc := &fakeCloner{result: nil, err: nil}
actual, err := maybeCloneGitUrl(ctx, fc, time.Duration(0), tc.input, testUsername)
actual, err := maybeCloneGitUrl(ctx, fc, time.Duration(0), tc.input, testUsername, false)
require.NoError(t, err)
assert.Equal(t, "", fc.branchName)
assert.Equal(t, "", fc.cloneUrl)
Expand Down Expand Up @@ -137,7 +137,7 @@ func TestMaybeCloneGitUrl_HappyPath(t *testing.T) {
tc := tc
t.Run(tc.name, func(t *testing.T) {
fc := &fakeCloner{result: &git.Repo{Directory: testRoot}, err: nil}
actual, err := maybeCloneGitUrl(ctx, fc, time.Duration(0), tc.input, testUsername)
actual, err := maybeCloneGitUrl(ctx, fc, time.Duration(0), tc.input, testUsername, false)
require.NoError(t, err)
assert.Equal(t, tc.expected.branch, fc.branchName)
assert.Equal(t, tc.expected.cloneUrl, fc.cloneUrl)
Expand Down Expand Up @@ -165,7 +165,7 @@ func TestMaybeCloneGitUrl_URLError(t *testing.T) {
tc := tc
t.Run(tc.name, func(t *testing.T) {
fc := &fakeCloner{result: &git.Repo{Directory: testRoot}, err: nil}
result, err := maybeCloneGitUrl(ctx, fc, time.Duration(0), tc.input, testUsername)
result, err := maybeCloneGitUrl(ctx, fc, time.Duration(0), tc.input, testUsername, false)
require.ErrorContains(t, err, tc.expected)
require.Equal(t, "", result)
})
Expand Down Expand Up @@ -193,7 +193,7 @@ func TestMaybeCloneGitUrl_CloneError(t *testing.T) {
defer cancel()

fc := &fakeCloner{result: &git.Repo{Directory: testRoot}, err: tc.cloneError}
result, err := maybeCloneGitUrl(ctx, fc, time.Duration(0), tc.input, testUsername)
result, err := maybeCloneGitUrl(ctx, fc, time.Duration(0), tc.input, testUsername, false)
require.ErrorContains(t, err, tc.expected)
require.Equal(t, "", result)
})
Expand Down
3 changes: 3 additions & 0 deletions cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,9 @@ func init() {
newStringOpts().
withDefault("kubechecks again"))
stringSliceFlag(flags, "additional-apps-namespaces", "Additional namespaces other than the ArgoCDNamespace to monitor for applications.")
boolFlag(flags, "repo-shallow-clone", "Enable shallow cloning for all git repos.",
newBoolOpts().
withDefault(false))
stringFlag(flags, "identifier", "Identifier for the kubechecks instance. Used to differentiate between multiple kubechecks instances.",
newStringOpts().
withDefault(""))
Expand Down
1 change: 1 addition & 0 deletions docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ The full list of supported environment variables is described below:
|`KUBECHECKS_POLICIES_LOCATION`|Sets rego policy locations to be used for every check request. Can be common path inside the repos being checked or git urls in either git or http(s) format.|`[./policies]`|
|`KUBECHECKS_REPLAN_COMMENT_MSG`|comment message which re-triggers kubechecks on PR.|`kubechecks again`|
|`KUBECHECKS_REPO_REFRESH_INTERVAL`|Interval between static repo refreshes (for schemas and policies).|`5m`|
|`KUBECHECKS_REPO_SHALLOW_CLONE`|Enable shallow cloning for all git repos.|`false`|
|`KUBECHECKS_SCHEMAS_LOCATION`|Sets schema locations to be used for every check request. Can be a common path on the host or git urls in either git or http(s) format.|`[]`|
|`KUBECHECKS_SHOW_DEBUG_INFO`|Set to true to print debug info to the footer of MR comments.|`false`|
|`KUBECHECKS_TIDY_OUTDATED_COMMENTS_MODE`|Sets the mode to use when tidying outdated comments. One of hide, delete.|`hide`|
Expand Down
4 changes: 3 additions & 1 deletion localdev/kubechecks/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,18 @@ configMap:
#
# KUBECHECKS_LABEL_FILTER: "test" # On your PR/MR, prefix this with "kubechecks:"
# KUBECHECKS_SCHEMAS_LOCATION: https://github.com/zapier/kubecheck-schemas.git
KUBECHECKS_REPO_REFRESH_INTERVAL: 30s
KUBECHECKS_TIDY_OUTDATED_COMMENTS_MODE: "delete"
KUBECHECKS_ENABLE_CONFTEST: "false"
KUBECHECKS_REPO_SHALLOW_CLONE: "true"
KUBECHECKS_IDENTIFIER: "test"

deployment:
annotations:
reloader.stakater.com/auto: "true"

image:
pullPolicy: Never
pullPolicy: IfNotPresent
name: "kubechecks"
tag: ""

Expand Down
1 change: 1 addition & 0 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ type ServerConfig struct {
MonitorAllApplications bool `mapstructure:"monitor-all-applications"`
OpenAIAPIToken string `mapstructure:"openai-api-token"`
RepoRefreshInterval time.Duration `mapstructure:"repo-refresh-interval"`
RepoShallowClone bool `mapstructure:"repo-shallow-clone"`
SchemasLocations []string `mapstructure:"schemas-location"`
ShowDebugInfo bool `mapstructure:"show-debug-info"`
TidyOutdatedCommentsMode string `mapstructure:"tidy-outdated-comments-mode"`
Expand Down
4 changes: 2 additions & 2 deletions pkg/events/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ type CheckEvent struct {
}

type repoManager interface {
Clone(ctx context.Context, cloneURL, branchName string) (*git.Repo, error)
Clone(ctx context.Context, cloneURL, branchName string, shallow bool) (*git.Repo, error)
}

func generateMatcher(ce *CheckEvent, repo *git.Repo) error {
Expand Down Expand Up @@ -192,7 +192,7 @@ func (ce *CheckEvent) getRepo(ctx context.Context, cloneURL, branchName string)
return repo, nil
}

repo, err = ce.repoManager.Clone(ctx, cloneURL, branchName)
repo, err = ce.repoManager.Clone(ctx, cloneURL, branchName, ce.ctr.Config.RepoShallowClone)
if err != nil {
return nil, errors.Wrap(err, "failed to clone repo")
}
Expand Down
5 changes: 4 additions & 1 deletion pkg/git/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,11 @@ func NewRepoManager(cfg config.ServerConfig) *RepoManager {
return &RepoManager{cfg: cfg}
}

func (rm *RepoManager) Clone(ctx context.Context, cloneUrl, branchName string) (*Repo, error) {
func (rm *RepoManager) Clone(ctx context.Context, cloneUrl, branchName string, shallow bool) (*Repo, error) {
repo := New(rm.cfg, cloneUrl, branchName)
if shallow {
repo.Shallow = true
}

if err := repo.Clone(ctx); err != nil {
return nil, errors.Wrap(err, "failed to clone repository")
Expand Down
97 changes: 93 additions & 4 deletions pkg/git/repo.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ type Repo struct {
BranchName string
Config config.ServerConfig
CloneURL string
Shallow bool

// exposed state
Directory string
Expand All @@ -46,11 +47,17 @@ func New(cfg config.ServerConfig, cloneUrl, branchName string) *Repo {
}

func (r *Repo) Clone(ctx context.Context) error {
if r.Shallow {
return r.shallowClone(ctx)
}

var err error

r.Directory, err = os.MkdirTemp("/tmp", "kubechecks-repo-")
if err != nil {
return errors.Wrap(err, "failed to make temp dir")
if r.Directory == "" {
r.Directory, err = os.MkdirTemp("/tmp", "kubechecks-repo-")
if err != nil {
return errors.Wrap(err, "failed to make temp dir")
}
}

log.Info().
Expand Down Expand Up @@ -85,6 +92,63 @@ func (r *Repo) Clone(ctx context.Context) error {
return nil
}

func (r *Repo) shallowClone(ctx context.Context) error {
var err error

if r.Directory == "" {
r.Directory, err = os.MkdirTemp("/tmp", "kubechecks-repo-")
if err != nil {
return errors.Wrap(err, "failed to make temp dir")
}
}

log.Info().
Str("temp-dir", r.Directory).
Str("clone-url", r.CloneURL).
Str("branch", r.BranchName).
Msg("cloning git repo")

// Attempt to locally clone the repo based on the provided information stored within
_, span := tracer.Start(ctx, "ShallowCloneRepo")
defer span.End()

args := []string{"clone", r.CloneURL, r.Directory, "--depth", "1"}
cmd := r.execGitCommand(args...)
out, err := cmd.CombinedOutput()
if err != nil {
log.Error().Err(err).Msgf("unable to clone repository, %s", out)
return err
}

if r.BranchName != "HEAD" {
// Fetch SHA
args = []string{"fetch", "origin", r.BranchName, "--depth", "1"}
cmd = r.execGitCommand(args...)
out, err = cmd.CombinedOutput()
if err != nil {
log.Error().Err(err).Msgf("unable to fetch %s repository, %s", r.BranchName, out)
return err
}
// Checkout SHA
args = []string{"checkout", r.BranchName}
cmd = r.execGitCommand(args...)
out, err = cmd.CombinedOutput()
if err != nil {
log.Error().Err(err).Msgf("unable to checkout branch %s repository, %s", r.BranchName, out)
return err
}
}

if log.Trace().Enabled() {
if err = filepath.WalkDir(r.Directory, printFile); err != nil {
log.Warn().Err(err).Msg("failed to walk directory")
}
}

log.Info().Msg("repo has been cloned")
return nil
}

func printFile(s string, d fs.DirEntry, err error) error {
if err != nil {
return err
Expand Down Expand Up @@ -118,8 +182,24 @@ func (r *Repo) MergeIntoTarget(ctx context.Context, ref string) error {
attribute.String("sha", ref),
))
defer span.End()
merge_command := []string{"merge", ref}
// For shallow clones, we need to pull the ref into the repo
if r.Shallow {
ref = strings.TrimPrefix(ref, "origin/")
cmd := r.execGitCommand("fetch", "origin", fmt.Sprintf("%s:%s", ref, ref), "--depth", "1")
out, err := cmd.CombinedOutput()
if err != nil {
telemetry.SetError(span, err, "fetch origin ref")
log.Error().Err(err).Msgf("unable to fetch ref %s, %s", ref, out)
return err
}
// When merging shallow clones, we need to allow unrelated histories
// and use the "theirs" strategy to avoid conflicts
// cons of this is that it may not be entirely accurate and may overwrite changes in the target branch
merge_command = []string{"merge", ref, "--allow-unrelated-histories", "-X", "theirs"}
}

cmd := r.execGitCommand("merge", ref)
cmd := r.execGitCommand(merge_command...)
out, err := cmd.CombinedOutput()
if err != nil {
telemetry.SetError(span, err, "merge commit into branch")
Expand All @@ -131,6 +211,15 @@ func (r *Repo) MergeIntoTarget(ctx context.Context, ref string) error {
}

func (r *Repo) Update(ctx context.Context) error {
// Since we're shallow cloning, to update we need to wipe the directory and re-clone
if r.Shallow {
r.Wipe()
err := os.Mkdir(r.Directory, 0700)
if err != nil {
return errors.Wrap(err, "failed to create repo directory")
}
return r.Clone(ctx)
}
cmd := r.execGitCommand("pull")
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stdout
Expand Down
1 change: 1 addition & 0 deletions pkg/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ func Pointer[T interface{}](item T) *T {
}

func WipeDir(dir string) {
log.Debug().Str("path", dir).Msg("wiping path")
if err := os.RemoveAll(dir); err != nil {
log.Error().
Err(err).
Expand Down
Loading