Skip to content

Commit

Permalink
chore: more rbac proxy
Browse files Browse the repository at this point in the history
  • Loading branch information
shreddedbacon committed Dec 4, 2024
1 parent 7a42d14 commit bc223b3
Show file tree
Hide file tree
Showing 9 changed files with 201 additions and 100 deletions.
27 changes: 14 additions & 13 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ import (

"github.com/uselagoon/remote-controller/internal/harbor"
"github.com/uselagoon/remote-controller/internal/helpers"
"github.com/uselagoon/remote-controller/internal/metrics"
"github.com/uselagoon/remote-controller/internal/utilities/deletions"
"github.com/uselagoon/remote-controller/internal/utilities/pruner"

Expand Down Expand Up @@ -185,10 +184,10 @@ func main() {

var unauthenticatedRegistry string

flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080",
"The address the metric endpoint binds to.")
flag.BoolVar(&secureMetrics, "metrics-secure", false,
"If set the metrics endpoint is served securely")
flag.StringVar(&metricsAddr, "metrics-bind-address", "0", "The address the metrics endpoint binds to. "+
"Use :8443 for HTTPS or :8080 for HTTP, or leave as 0 to disable the metrics service.")
flag.BoolVar(&secureMetrics, "metrics-secure", true,
"If set, the metrics endpoint is served securely via HTTPS. Use --metrics-secure=false to use HTTP instead.")
flag.BoolVar(&enableHTTP2, "enable-http2", false,
"If set, HTTP/2 will be enabled for the metrics and webhook servers")

Expand Down Expand Up @@ -489,10 +488,16 @@ func main() {
tlsOpts = append(tlsOpts, disableHTTP2)
}
metricsServerOptions := metricsserver.Options{
BindAddress: metricsAddr,
SecureServing: secureMetrics,
TLSOpts: tlsOpts,
FilterProvider: filters.WithAuthenticationAndAuthorization,
BindAddress: metricsAddr,
SecureServing: secureMetrics,
TLSOpts: tlsOpts,
}
if secureMetrics {
// FilterProvider is used to protect the metrics endpoint with authn/authz.
// These configurations ensure that only authorized users and service accounts
// can access the metrics endpoint. The RBAC are configured in 'config/rbac/kustomization.yaml'. More info:
// https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.19.1/pkg/metrics/filters#WithAuthenticationAndAuthorization
metricsServerOptions.FilterProvider = filters.WithAuthenticationAndAuthorization
}
mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{
Scheme: scheme,
Expand Down Expand Up @@ -1012,10 +1017,6 @@ func main() {
}
// +kubebuilder:scaffold:builder

setupLog.Info("starting lagoon metrics server")
m := metrics.NewServer(setupLog, ":9912")
defer m.Shutdown(context.Background())

setupLog.Info("starting manager")
if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil {
setupLog.Error(err, "problem running manager")
Expand Down
2 changes: 2 additions & 0 deletions config/default/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ resources:
- ../crd
- ../rbac
- ../manager
# [METRICS] Expose the controller manager metrics service.
- metrics_service.yaml
patches:
- path: envs.yaml
- path: manager_auth_proxy_patch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ spec:
ports:
- name: https
port: 8443
targetPort: https
protocol: TCP
targetPort: 8443
selector:
control-plane: controller-manager
control-plane: controller-manager
3 changes: 2 additions & 1 deletion config/rbac/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,5 @@ resources:
# More info: https://book.kubebuilder.io/reference/metrics.html
- metrics_auth_role.yaml
- metrics_auth_role_binding.yaml
- metrics_reader_role.yaml
- metrics_reader_role.yaml
- metrics_reader_role_binding.yaml
12 changes: 12 additions & 0 deletions config/rbac/metrics_reader_role_binding.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: metrics-reader-rolebinding
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: metrics-reader
subjects:
- kind: ServiceAccount
name: controller-manager
namespace: system
164 changes: 90 additions & 74 deletions internal/metrics/metrics.go
Original file line number Diff line number Diff line change
@@ -1,89 +1,85 @@
package metrics

import (
"fmt"
"net/http"
"time"

"github.com/go-logr/logr"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/prometheus/client_golang/prometheus/promhttp"
"sigs.k8s.io/controller-runtime/pkg/metrics"
)

// NewServer returns a *http.Server serving prometheus metrics in a new
// goroutine.
// Caller should defer Shutdown() for cleanup.
func NewServer(log logr.Logger, addr string) *http.Server {
mux := http.NewServeMux()
mux.Handle("/metrics", promhttp.Handler())
s := http.Server{
Addr: addr,
Handler: mux,
ReadTimeout: 16 * time.Second,
WriteTimeout: 16 * time.Second,
}
go func() {
if err := s.ListenAndServe(); err != http.ErrServerClosed {
log.Error(fmt.Errorf("metrics server did not shut down cleanly"), err.Error())
}
}()
return &s
}

var (
// general counters for builds
BuildsRunningGauge = promauto.NewGauge(prometheus.GaugeOpts{
Name: "lagoon_builds_running_current",
Help: "The total number of Lagoon builds running",
})
BuildsPendingGauge = promauto.NewGauge(prometheus.GaugeOpts{
Name: "lagoon_builds_pending_current",
Help: "The total number of Lagoon builds pending or queued",
})
BuildsStartedCounter = promauto.NewCounter(prometheus.CounterOpts{
Name: "lagoon_builds_started_total",
Help: "The total number of Lagoon builds started",
})
BuildsCompletedCounter = promauto.NewCounter(prometheus.CounterOpts{
Name: "lagoon_builds_completed_total",
Help: "The total number of Lagoon builds completed",
})
BuildsFailedCounter = promauto.NewCounter(prometheus.CounterOpts{
Name: "lagoon_builds_failed_total",
Help: "The total number of Lagoon builds failed",
})
BuildsCancelledCounter = promauto.NewCounter(prometheus.CounterOpts{
Name: "lagoon_builds_cancelled_total",
Help: "The total number of Lagoon builds cancelled",
})
BuildsRunningGauge = prometheus.NewGauge(
prometheus.GaugeOpts{
Name: "lagoon_builds_running_current",
Help: "The total number of Lagoon builds running",
},
)
BuildsPendingGauge = prometheus.NewGauge(
prometheus.GaugeOpts{
Name: "lagoon_builds_pending_current",
Help: "The total number of Lagoon builds pending or queued",
},
)
BuildsStartedCounter = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "lagoon_builds_started_total",
Help: "The total number of Lagoon builds started",
},
)
BuildsCompletedCounter = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "lagoon_builds_completed_total",
Help: "The total number of Lagoon builds completed",
},
)
BuildsFailedCounter = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "lagoon_builds_failed_total",
Help: "The total number of Lagoon builds failed",
},
)
BuildsCancelledCounter = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "lagoon_builds_cancelled_total",
Help: "The total number of Lagoon builds cancelled",
},
)

// general counters for tasks
TasksRunningGauge = promauto.NewGauge(prometheus.GaugeOpts{
Name: "lagoon_tasks_running_current",
Help: "The total number of Lagoon tasks running",
})
TasksStartedCounter = promauto.NewCounter(prometheus.CounterOpts{
Name: "lagoon_tasks_started_total",
Help: "The total number of Lagoon tasks started",
})
TasksCompletedCounter = promauto.NewCounter(prometheus.CounterOpts{
Name: "lagoon_tasks_completed_total",
Help: "The total number of Lagoon tasks completed",
})
TasksFailedCounter = promauto.NewCounter(prometheus.CounterOpts{
Name: "lagoon_tasks_failed_total",
Help: "The total number of Lagoon tasks failed",
})
TasksCancelledCounter = promauto.NewCounter(prometheus.CounterOpts{
Name: "lagoon_tasks_cancelled_total",
Help: "The total number of Lagoon tasks cancelled",
})
TasksRunningGauge = prometheus.NewGauge(
prometheus.GaugeOpts{
Name: "lagoon_tasks_running_current",
Help: "The total number of Lagoon tasks running",
},
)
TasksStartedCounter = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "lagoon_tasks_started_total",
Help: "The total number of Lagoon tasks started",
},
)
TasksCompletedCounter = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "lagoon_tasks_completed_total",
Help: "The total number of Lagoon tasks completed",
},
)
TasksFailedCounter = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "lagoon_tasks_failed_total",
Help: "The total number of Lagoon tasks failed",
},
)
TasksCancelledCounter = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "lagoon_tasks_cancelled_total",
Help: "The total number of Lagoon tasks cancelled",
},
)

// buildStatus will count the build transisiton steps
// when the build step changes, the count is removed and the new step metric is created
// this is useful to gauge how long particular steps take in a build
BuildStatus = promauto.NewGaugeVec(prometheus.GaugeOpts{
BuildStatus = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "lagoon_build_status",
Help: "The status of running Lagoon builds",
},
Expand All @@ -97,7 +93,7 @@ var (
// RunningStatus will count when a build or task is running
// when the build or task is complete, the count is removed
// this is useful to gauge how long a build or task runs for
BuildRunningStatus = promauto.NewGaugeVec(prometheus.GaugeOpts{
BuildRunningStatus = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "lagoon_build_running_status",
Help: "The duration of running Lagoon builds",
},
Expand All @@ -106,7 +102,7 @@ var (
"build_namespace",
},
)
TaskRunningStatus = promauto.NewGaugeVec(prometheus.GaugeOpts{
TaskRunningStatus = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "lagoon_task_running_status",
Help: "The duration of running Lagoon tasks",
},
Expand All @@ -116,3 +112,23 @@ var (
},
)
)

func init() {
// Register custom metrics with the global prometheus registry
metrics.Registry.MustRegister(
BuildsRunningGauge,
BuildsPendingGauge,
BuildsStartedCounter,
BuildsCompletedCounter,
BuildsFailedCounter,
BuildsCancelledCounter,
TasksRunningGauge,
TasksStartedCounter,
TasksCompletedCounter,
TasksFailedCounter,
TasksCancelledCounter,
BuildStatus,
BuildRunningStatus,
TaskRunningStatus,
)
}
47 changes: 37 additions & 10 deletions test/e2e/e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,20 @@ var (

duration = 600 * time.Second
interval = 1 * time.Second

metricLabels = []string{
"lagoon_builds_cancelled_total",
"lagoon_builds_completed_total",
"lagoon_builds_failed_total",
"lagoon_builds_pending_current",
"lagoon_builds_running_current",
"lagoon_builds_started_total",
"lagoon_tasks_cancelled_total",
"lagoon_tasks_completed_total",
"lagoon_tasks_failed_total",
"lagoon_tasks_running_current",
"lagoon_tasks_started_total",
}
)

func init() {
Expand All @@ -59,16 +73,20 @@ var _ = Describe("controller", Ordered, func() {

// comment to prevent cleaning up controller namespace and local services
AfterAll(func() {
By("removing manager namespace")
cmd := exec.Command("kubectl", "delete", "ns", namespace)
_, _ = utils.Run(cmd)
By("stop metrics consumer")
utils.StartMetricsConsumer()

// By("removing manager namespace")
// cmd := exec.Command("kubectl", "delete", "ns", namespace)
// _, _ = utils.Run(cmd)

By("stop local services")
utils.StopLocalServices()
// By("stop local services")
// utils.StopLocalServices()
})

Context("Operator", func() {
It("should run successfully", func() {
// start tests
var controllerPodName string
var err error

Expand Down Expand Up @@ -116,7 +134,6 @@ var _ = Describe("controller", Ordered, func() {
controllerPodName = podNames[0]
ExpectWithOffset(2, controllerPodName).Should(ContainSubstring("controller-manager"))

// Validate pod status
cmd = exec.Command("kubectl", "get",
"pods", controllerPodName, "-o", "jsonpath={.status.phase}",
"-n", namespace,
Expand All @@ -130,6 +147,11 @@ var _ = Describe("controller", Ordered, func() {
}
EventuallyWithOffset(1, verifyControllerUp, time.Minute, time.Second).Should(Succeed())

By("start metrics consumer")
Expect(utils.StartMetricsConsumer()).To(Succeed())

time.Sleep(30 * time.Second)

By("validating that lagoonbuilds are working")
for _, name := range []string{"7m5zypx", "8m5zypx", "9m5zypx", "1m5zypx"} {
if name == "9m5zypx" {
Expand Down Expand Up @@ -363,7 +385,6 @@ var _ = Describe("controller", Ordered, func() {
controllerPodName = podNames[0]
ExpectWithOffset(2, controllerPodName).Should(ContainSubstring("controller-manager"))
verifyRobotCredentialsRotate := func() error {
// Validate pod status
cmd = exec.Command("kubectl", "logs",
controllerPodName, "-c", "manager",
"-n", namespace,
Expand Down Expand Up @@ -411,9 +432,15 @@ var _ = Describe("controller", Ordered, func() {
return nil
}
EventuallyWithOffset(1, verifyNamespaceRemoved, duration, interval).Should(Succeed())

By("validating that there are metrics")
runCmd := `curl -s -k -H "Authorization: Bearer $(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" https://remote-controller-controller-manager-metrics-service.remote-controller-system.svc.cluster.local:8443/metrics | grep -v "#" | grep "lagoon_"`
output, err := utils.RunCommonsCommand(namespace, runCmd)
ExpectWithOffset(2, err).NotTo(HaveOccurred())
fmt.Printf("metrics: %s", string(output))
err = utils.CheckStringContainsStrings(string(output), metricLabels)
ExpectWithOffset(2, err).NotTo(HaveOccurred())
// End tests
})
// uncomment to debug ...
// time.Sleep(5 * time.Minute)
})

})
Loading

0 comments on commit bc223b3

Please sign in to comment.