Skip to content

Commit

Permalink
Create a go binary to wrap the shell commands to create PRs. (kubeflo…
Browse files Browse the repository at this point in the history
…w#714)

* Create a go binary to wrap the shell commands to create PRs.

To better enable GitOps we want to make it easier to create pipelines
that automatically create PRs.

This PR creates a new implementation of create_manfiests_pr.py that is
more reusable.

This PR introduces a go binary prctl that will contain subcommands
related to creating PRs.

The basic flow for automating PRs is

1. Clone the repo and create a branch on a fork repo
2. Apply some use case specific logic to modify one or more files
3. Commit and push the changes
4. Create PR from the branch

Steps 1, 3, 4 are largely the same for different use cases. With that
in mind we want to create a reusable binary so that the only part
that has to change is step 2.

There's a couple reasons for rewriting the code in go versus sticking
with Python

1. Being able to build statically linked binaries makes it a lot easier
   to create easily distrutable binaries

2. Longer term instead of shelling out to CLI tools like git and hub
   we can link and use appropriate go libraries

   * We should reach out the flux/Argo communities to see if this
     should be a part of their GitOps toolkit
     https://github.com/fluxcd/toolkit

   * GitHub's CLIs (hub and now gh) are written in go.

* Related to kubeflow/kubeflow#4789
* Related to kubeflow/code-intelligence#155

* Add a dockerfile.

* * Use Alpine as a base image and install ca-certificates
  • Loading branch information
jlewi authored Jul 4, 2020
1 parent c2af63b commit 11317c5
Show file tree
Hide file tree
Showing 7 changed files with 561 additions and 0 deletions.
67 changes: 67 additions & 0 deletions go/Dockerfile.prctl
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# Build the manager binary
#
# The Docker context is expected to be:
#
# ${PATH_TO_KUBEFLOW/KUBEFLOW repo}/components
#
# This is necessary because the Jupyter controller now depends on
# components/common
ARG GOLANG_VERSION=1.14
FROM golang:${GOLANG_VERSION} as builder

WORKDIR /workspace

COPY . /workspace

# Build
RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 GO111MODULE=on go build -a -o prctl cmd/prctl/main.go

# TODO(jlewi): We would really like to use a distroless images but we need to shell out to
# git. I tried running git on a base-debian10 distrolless but that was missing some of the
# libraries. Ideally if prctl uses go libraries instead of shelling out to git
# we could replace this with a distroless image
#
# Use distroless as minimal base image to package the manager binary
# Refer to https://github.com/GoogleContainerTools/distroless for more details
FROM ubuntu:18.04 as hub_builder

RUN apt-get update -y && \
apt-get install -y curl

# Install the hub CLI for git
RUN cd /tmp && \
curl -LO https://github.com/github/hub/releases/download/v2.11.2/hub-linux-amd64-2.11.2.tgz && \
tar -xvf hub-linux-amd64-2.11.2.tgz && \
cp hub-linux-amd64-2.11.2/bin/hub /usr/local/hub

# TODO(jlewi): We would really like to use a distroless images but we need to shell out to
# git. I tried running git on a base-debian10 distrolless but that was missing some of the
# libraries. Ideally if prctl uses go libraries instead of shelling out to git
# we could replace this with a distroless image
#
# Use distroless as minimal base image to package the manager binary
# Refer to https://github.com/GoogleContainerTools/distroless for more details
FROM alpine:3.11

WORKDIR /

# Without ca-certificates we will get SSL errors with git
RUN set -ex \
&& apt-get update -yqq \
&& apt-get install -yqq --no-install-recommends \
git \
ca-certificates \
&& apt-get clean \
&& rm -rf \
/var/lib/apt/lists/* \
/tmp/* \
/var/tmp/* \
/usr/share/man \
/usr/share/doc \
/usr/share/doc-base

# Install the hub CLI for git
COPY --from=hub_builder /usr/local/hub /usr/local/bin/
COPY --from=builder /workspace/prctl /usr/local/bin/

ENTRYPOINT ["/usr/local/bin/prctl"]
2 changes: 2 additions & 0 deletions go/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
build-prctl-image:
skaffold build -p releasing --kube-context=kubeflow-releasing -v info --file-output=prctl_latest_image.json
263 changes: 263 additions & 0 deletions go/cmd/prctl/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,263 @@
package main

import (
"github.com/onrik/logrus/filename"
"github.com/pkg/errors"
log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"os/exec"
"regexp"
)

var (
rootCmd = &cobra.Command{
Use: "prctl",
Short: "A CLI to help with creating PRs",
Long: `prctl is a CLI to help create PRs as part of GitOps workflows`,
}

upstreamName string
repoDir string
forkName string
fork string
branchName string
messagePath string
baseBranch string

branchCmd = &cobra.Command{
Use: "branch",
Short: "Create a branch to be used for creating PRs.",
Long: `prctl branch creates a branch on a remote repo to contain any changes`,
Run: func(cmd *cobra.Command, args []string) {
err := branch(repoDir, upstreamName, forkName, fork, branchName)

if err != nil {
log.Fatalf("branch failed; error: %+v", err)
}
},
}

pushCmd = &cobra.Command{
Use: "push",
Short: "Commit changes and push them.",
Long: `prctl push commit changes and push them`,
Run: func(cmd *cobra.Command, args []string) {
err := push(repoDir, forkName, branchName, messagePath)

if err != nil {
log.Fatalf("push failed; error: %+v", err)
}
},
}

prCmd = &cobra.Command{
Use: "pull-request",
Short: "Create a PR in GitHub.",
Long: `prctl pull-request creates a PR`,
Run: func(cmd *cobra.Command, args []string) {
err := pr(repoDir, baseBranch, messagePath)

if err != nil {
log.Fatalf("push failed; error: %+v", err)
}
},
}
)

func init() {
rootCmd.AddCommand(branchCmd)
rootCmd.AddCommand(pushCmd)
rootCmd.AddCommand(prCmd)

branchCmd.Flags().StringVarP(&upstreamName, "upstreamName", "", "origin", "The name of the remote repository corresponding to the upstream URL")
branchCmd.Flags().StringVarP(&repoDir, "repoDir", "", "", "Directory where the code is checked out")
branchCmd.Flags().StringVarP(&forkName, "forkName", "", "", "Name to assign the remote repo for the fork")
branchCmd.Flags().StringVarP(&fork, "fork", "", "", "Name to assign the remote repo for the fork")
branchCmd.Flags().StringVarP(&branchName, "branchName", "", "", "Name to the branch to create")

pushCmd.Flags().StringVarP(&repoDir, "repoDir", "", "", "Directory where the code is checked out")
pushCmd.Flags().StringVarP(&forkName, "forkName", "", "", "Name to assign the remote repo for the fork")
pushCmd.Flags().StringVarP(&branchName, "branchName", "", "", "Name to the branch to create")
pushCmd.Flags().StringVarP(&messagePath, "messagePath", "", "", "Path to a file containing the message to use for the commit")

prCmd.Flags().StringVarP(&repoDir, "repoDir", "", "", "Directory where the code is checked out")
prCmd.Flags().StringVarP(&baseBranch, "baseBranch", "", "kubeflow:master", "Name of the branch to use as the base")
prCmd.Flags().StringVarP(&messagePath, "messagePath", "", "", "Path to a file containing the message to use for the commit")

// Add filename as one of the fields of the structured log message.
filenameHook := filename.NewHook()
filenameHook.Field = "filename"
log.AddHook(filenameHook)
}

// execHelper is a helper class for running some shell commands.
type execHelper struct {
// cmd is the command to execute
cmd *exec.Cmd
// errorRes is a map from an id (an arbitrary) string to a list of regexes for errors
// if the regexes match stderr then the corresponding id will be returned in run
errorRes map[string][]string
}

// Run the specified command.
// Returns the id of the error that matched if any; the empty string otherwise.
// error non nil if an unmatched exception occur.
func (e *execHelper) Run() (string, error) {
out, err := e.cmd.Output()

if err != nil {

exitError, ok := err.(*exec.ExitError)

if !ok{
return "", errors.WithStack(err)
}

log.Infof("Output of %v; %v", e.cmd.String(), string(exitError.Stderr))
for eid, patterns := range e.errorRes {
for _, re := range patterns {
match, err := regexp.MatchString(re, string(exitError.Stderr))

if err != nil {
return "", errors.WithStack(err)
}

if match {
return eid, nil
}
}
}

return "", err
}
log.Infof("Output of %v; %v", e.cmd.String(), string(out))

return "", nil
}

// branch creates a branch for all the changes
func branch( repoDir string, upstreamName string, forkName string, forkUrl string, branchName string) error {
e := &execHelper{
cmd: exec.Command("git", "fetch", "--unshallow"),
errorRes: map[string][]string {
"unshallow": []string{".*unshallow on a complete repository.*"},
},
}

e.cmd.Dir = repoDir

_, err := e.Run()

if err != nil {
return errors.Wrapf(err, "There was a problem unshallowing the repo.")
}

// Create a new branch for the pull request.
e = &execHelper{
cmd: exec.Command("git", "checkout", "-b", branchName, upstreamName + "/master"),
errorRes: map[string][]string {
"exists": []string{".*branch.*already.*exists.*"},
},
}

e.cmd.Dir = repoDir

result, err := e.Run()

if err != nil {
return errors.Wrapf(err, "There was a checking out a branch.")
}

if result == "exists" {
e := &execHelper{
cmd: exec.Command("git", "checkout", branchName),
errorRes: map[string][]string {},
}

e.cmd.Dir = repoDir

_, err := e.Run()

if err != nil {
return errors.Wrapf(err, "There was a problem checking out the branch.")
}
}

// Add the remote repo where things will be pushed
e = &execHelper{
cmd: exec.Command("git", "remote", "add", forkName, fork),
errorRes: map[string][]string {
"exists": {".*remote.*already.*exists.*"},
},
}

e.cmd.Dir = repoDir

_, err = e.Run()

if err != nil {
return errors.Wrapf(err, "There was a problem adding the remote repo.")
}
return nil
}

// push commits and pushes all changes
func push( repoDir string, forkName string, branchName string, messagePath string) error {
e := &execHelper{
cmd: exec.Command("git", "commit", "-a", "-F", messagePath),
errorRes: map[string][]string {
"empty": {".*nothing to commit.*"},
},
}

e.cmd.Dir = repoDir

_, err := e.Run()

if err != nil {
return errors.Wrapf(err, "There was an error commiting the changes.")
}

e = &execHelper{
cmd: exec.Command("git", "push", forkName, branchName),
errorRes: map[string][]string {},
}

e.cmd.Dir = repoDir

_, err = e.Run()

if err != nil {
return errors.Wrapf(err, "There was a problem pushing the changes.")
}
return nil
}

// pr creates a pull request
func pr( repoDir string, baseBranch, messagePath string) error {
// TODO(jlewi): We might want to use gh here. gh is the new official CLI
// https://github.com/cli/cli/blob/trunk/docs/gh-vs-hub.md. According to the FAQ
// https://github.com/cli/cli/blob/trunk/docs/gh-vs-hub.md#should-i-use-gh-or-hub
// hub might be better for scripting.
// both are go so we could potentially link them in.
e := &execHelper{
cmd: exec.Command("hub", "pull-request", "-f", "-b", baseBranch, "-F", messagePath),
errorRes: map[string][]string {
"exists": {".*already exists.*"},
},
}

e.cmd.Dir = repoDir

_, err := e.Run()

if err != nil {
return errors.Wrapf(err, "There was an error creating the pull request.")
}

return nil
}

func main() {
rootCmd.Execute()
}
10 changes: 10 additions & 0 deletions go/go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
module github.com/kubeflow/testing/go

go 1.13

require (
github.com/onrik/logrus v0.6.0 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/sirupsen/logrus v1.6.0 // indirect
github.com/spf13/cobra v1.0.0 // indirect
)
Loading

0 comments on commit 11317c5

Please sign in to comment.