openshift · openshift-ci · Oct 5, 2023 · Oct 4, 2023
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -86,6 +86,30 @@ Prerequisites: Ensure you have a running CRC Cluster (Step 6)
 3. `oc apply -k https://github.com/kubernetes-csi/external-snapshotter//deploy/kubernetes/snapshot-controller`
 4. Start again at Step 7
 
+#### Remotely debugging LVMS inside a cluster
+
+A typical issue for any workload interacting with nodes in kubernetes is that it is hard to test properly.
+This is because nodes usually have their own specific environment and can be hard to debug.
+During development, you can still remotely debug into both `lvm-operator` and `vgmanager` by attaching remotely to the debugger.
+
+For this you need 2 things:
+1. An image made specifically to include a debugging server and debugging symbols for stack trace information.
+   Run `make docker-build-debug` to build one for you.
+2. A deployment that starts the operator through the debugging server.
+   We have the [`debug`](config/debug) kustomize target for this.
+   Run `deploy-debug` after building the image to run the debugger for you.
+
+Now we can remotely attach to the binaries in the cluster on port `2345`.
+However, we first need to port-forward into the cluster:
+1. Run `oc port-forward deploy/lvms-operator 2345:2345` to port-forward to the controller.
+2. Run `oc port-forward pod/vgmanager-xxx 2345:2345` to port-forward to a vgmanager pod on a node.
+
+After opening the port, you will only need to connect to the debugger and set breakpoints.
+Here are some tutorials on remotely connecting to a running binary:
+
+- [Visual Studio Code](https://github.com/golang/vscode-go/blob/master/docs/debugging.md#connect-to-headless-delve-with-target-specified-at-server-start-up)
+- [Goland](https://www.jetbrains.com/help/go/attach-to-running-go-processes-with-debugger.html#step-3-create-the-remote-run-debug-configuration-on-the-client-computer)
+
 ## Commits Per Pull Request
 
 Pull requests should always represent a complete logical change. Where possible, pull requests should be composed of multiple commits that each make small but meaningful changes. Striking a balance between minimal commits and logically complete changes is an art as much as a science, but when it is possible and reasonable, divide your pull request into more commits.

diff --git a/Makefile b/Makefile
@@ -182,14 +182,17 @@ ARCH ?= amd64
 all: build
 
 build: generate fmt vet ## Build manager binary.
-	GOOS=$(OS) GOARCH=$(ARCH) go build -o bin/lvms cmd/main.go
+	GOOS=$(OS) GOARCH=$(ARCH) go build -gcflags='all=-N -l' -o bin/lvms cmd/main.go
 
 build-prometheus-alert-rules: jsonnet monitoring/mixin.libsonnet monitoring/alerts/alerts.jsonnet monitoring/alerts/*.libsonnet
 	$(JSONNET) -S monitoring/alerts/alerts.jsonnet > config/prometheus/prometheus_rules.yaml
 
 docker-build: ## Build docker image with the manager.
 	$(IMAGE_BUILD_CMD) build --platform=${OS}/${ARCH} -t ${IMG} .
 
+docker-build-debug: ## Build remote-debugging enabled docker image with the manager. See CONTRIBUTING.md for more information
+	$(IMAGE_BUILD_CMD) build -f hack/debug.Dockerfile --platform=${OS}/${ARCH} -t ${IMG} .
+
 docker-push: ## Push docker image with the manager.
 	$(IMAGE_BUILD_CMD) push ${IMG}
 
@@ -210,6 +213,11 @@ deploy: update-mgr-env manifests kustomize ## Deploy controller to the K8s clust
 	cd config/webhook && $(KUSTOMIZE) edit set nameprefix ${MANAGER_NAME_PREFIX}
 	$(KUSTOMIZE) build config/default | kubectl apply -f -
 
+deploy-debug: update-mgr-env manifests kustomize ## Deploy controller started through delve to the K8s cluster specified in ~/.kube/config. See CONTRIBUTING.md for more information
+	cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG} && $(KUSTOMIZE) edit set nameprefix ${MANAGER_NAME_PREFIX}
+	cd config/webhook && $(KUSTOMIZE) edit set nameprefix ${MANAGER_NAME_PREFIX}
+	$(KUSTOMIZE) build config/debug | kubectl apply -f -
+
 undeploy: ## Undeploy controller from the K8s cluster specified in ~/.kube/config.
 	$(KUSTOMIZE) build config/default | kubectl delete -f -
 

diff --git a/cmd/operator/operator.go b/cmd/operator/operator.go
@@ -55,13 +55,17 @@ const (
 	DefaultEnableLeaderElection = false
 )
 
+var DefaultVGManagerCommand = []string{"/lvms", "vgmanager"}
+
 type Options struct {
 	Scheme   *runtime.Scheme
 	SetupLog logr.Logger
 
 	diagnosticsAddr      string
 	healthProbeAddr      string
 	enableLeaderElection bool
+
+	vgManagerCommand []string
 }
 
 // NewCmd creates a new CLI command
@@ -88,6 +92,10 @@ func NewCmd(opts *Options) *cobra.Command {
 		"Enable leader election for controller manager. Enabling this will ensure there is only one active controller manager.",
 	)
 
+	cmd.Flags().StringSliceVar(
+		&opts.vgManagerCommand, "vgmanager-cmd", DefaultVGManagerCommand, "The command that should be used to start vgmanager on the node. Useful for debugging purposes but normally not changed.",
+	)
+
 	return cmd
 }
 
@@ -165,6 +173,7 @@ func run(cmd *cobra.Command, _ []string, opts *Options) error {
 		Namespace:                        operatorNamespace,
 		TopoLVMLeaderElectionPassthrough: leaderElectionConfig,
 		EnableSnapshotting:               enableSnapshotting,
+		VGManagerCommand:                 opts.vgManagerCommand,
 	}).SetupWithManager(mgr); err != nil {
 		return fmt.Errorf("unable to create LVMCluster controller: %w", err)
 	}

diff --git a/config/debug/kustomization.yaml b/config/debug/kustomization.yaml
@@ -0,0 +1,6 @@
+patchesStrategicMerge:
+  - manager_debug_patch.yaml
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+resources:
+- ../default
diff --git a/config/debug/manager_debug_patch.yaml b/config/debug/manager_debug_patch.yaml
@@ -0,0 +1,27 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: operator
+  namespace: system
+spec:
+  template:
+    spec:
+      containers:
+        - name: manager
+          command:
+            - "/usr/sbin/dlv"
+            - "exec"
+            - "--listen=:2345"
+            - "--headless=true"
+            - "--log=true"
+            - "--accept-multiclient"
+            - "--api-version=2"
+            - "--continue"
+            - "/usr/sbin/lvms"
+            - "--"
+            - "operator"
+          args:
+            - "--vgmanager-cmd=/usr/sbin/dlv,exec,--listen=:2345,--headless=true,--log=true,--accept-multiclient,--api-version=2,--continue,/usr/sbin/lvms,--,vgmanager"
+          ports:
+            - containerPort: 2345
+              name: "debug"
diff --git a/hack/debug.Dockerfile b/hack/debug.Dockerfile
@@ -0,0 +1,49 @@
+# https://docs.docker.com/engine/reference/builder/#automatic-platform-args-in-the-global-scope
+ARG TARGETOS
+ARG TARGETARCH
+ARG TARGETPLATFORM
+FROM golang:1.20 as builder
+
+WORKDIR /workspace
+# Copy the Go Modules manifests
+COPY ../go.mod go.mod
+COPY ../go.sum go.sum
+
+# since we use vendoring we don't need to redownload our dependencies every time. Instead we can simply
+# reuse our vendored directory and verify everything is good. If not we can abort here and ask for a revendor.
+COPY ../vendor vendor/
+RUN go mod verify
+
+# Copy the go source
+COPY ../api api/
+COPY ../cmd cmd/
+COPY ../internal internal/
+
+ENV GOARCH=$TARGETARCH
+ENV GOOS=$TARGETOS
+ENV CGO_ENABLED=0
+
+# Build
+RUN go build -gcflags "all=-N -l" -mod=vendor -a -o lvms cmd/main.go
+
+FROM golang:1.20 as dlv
+RUN go install -ldflags "-s -w -extldflags '-static'" github.com/go-delve/delve/cmd/dlv@latest
+
+# vgmanager needs 'nsenter' and other basic linux utils to correctly function
+FROM --platform=$TARGETPLATFORM registry.access.redhat.com/ubi9/ubi-minimal:9.2
+
+# Update the image to get the latest CVE updates
+RUN microdnf update -y && \
+    microdnf install -y util-linux && \
+    microdnf clean all
+
+WORKDIR /app
+
+COPY --from=builder /workspace/lvms /usr/sbin/lvms
+COPY --from=dlv /go/bin/dlv /usr/sbin/dlv
+
+USER 65532:65532
+
+EXPOSE 2345
+
+ENTRYPOINT ["/usr/sbin/dlv"]
diff --git a/internal/controllers/lvmcluster/lvmcluster_controller.go b/internal/controllers/lvmcluster/lvmcluster_controller.go
@@ -64,6 +64,9 @@ type LVMClusterReconciler struct {
 	Namespace          string
 	ImageName          string
 
+	// VGManagerCommand is the command that will be used to start vgmanager
+	VGManagerCommand []string
+
 	// TopoLVMLeaderElectionPassthrough uses the given leaderElection when initializing TopoLVM to synchronize
 	// leader election configuration
 	TopoLVMLeaderElectionPassthrough configv1.LeaderElection
@@ -85,6 +88,10 @@ func (r *LVMClusterReconciler) SnapshotsEnabled() bool {
 	return r.EnableSnapshotting
 }
 
+func (r *LVMClusterReconciler) GetVGManagerCommand() []string {
+	return r.VGManagerCommand
+}
+
 func (r *LVMClusterReconciler) GetTopoLVMLeaderElectionPassthrough() configv1.LeaderElection {
 	return r.TopoLVMLeaderElectionPassthrough
 }

diff --git a/internal/controllers/lvmcluster/resource/manager.go b/internal/controllers/lvmcluster/resource/manager.go
@@ -14,6 +14,7 @@ type Reconciler interface {
 	GetNamespace() string
 	GetImageName() string
 	SnapshotsEnabled() bool
+	GetVGManagerCommand() []string
 
 	// GetTopoLVMLeaderElectionPassthrough uses the given leaderElection when initializing TopoLVM to synchronize
 	// leader election configuration

diff --git a/internal/controllers/lvmcluster/resource/vgmanager.go b/internal/controllers/lvmcluster/resource/vgmanager.go
@@ -47,7 +47,7 @@ func (v vgManager) EnsureCreated(r Reconciler, ctx context.Context, lvmCluster *
 	logger := log.FromContext(ctx).WithValues("resourceManager", v.GetName())
 
 	// get desired daemonset spec
-	dsTemplate := newVGManagerDaemonset(lvmCluster, r.GetNamespace(), r.GetImageName())
+	dsTemplate := newVGManagerDaemonset(lvmCluster, r.GetNamespace(), r.GetImageName(), r.GetVGManagerCommand())
 
 	// create desired daemonset or update mutable fields on existing one
 	ds := &appsv1.DaemonSet{

diff --git a/internal/controllers/lvmcluster/resource/vgmanager_daemonset.go b/internal/controllers/lvmcluster/resource/vgmanager_daemonset.go
@@ -137,16 +137,16 @@ var (
 )
 
 // newVGManagerDaemonset returns the desired vgmanager daemonset for a given LVMCluster
-func newVGManagerDaemonset(lvmCluster *lvmv1alpha1.LVMCluster, namespace string, vgImage string) appsv1.DaemonSet {
+func newVGManagerDaemonset(lvmCluster *lvmv1alpha1.LVMCluster, namespace, vgImage string, command []string) appsv1.DaemonSet {
 	// aggregate nodeSelector and tolerations from all deviceClasses
 	nodeSelector, tolerations := selector.ExtractNodeSelectorAndTolerations(lvmCluster)
 	volumes := []corev1.Volume{LVMDConfVol, DevHostDirVol, UDevHostDirVol, SysHostDirVol, MetricsCertsDirVol}
 	volumeMounts := []corev1.VolumeMount{LVMDConfVolMount, DevHostDirVolMount, UDevHostDirVolMount, SysHostDirVolMount, MetricsCertsDirVolMount}
 	privileged := true
 	var zero int64 = 0
 
-	command := []string{
-		"/lvms", "vgmanager",
+	if len(command) == 0 {
+		command = []string{"/lvms", "vgmanager"}
 	}
 
 	resourceRequirements := corev1.ResourceRequirements{