Skip to content

Commit

Permalink
Use controller-runtime to reconsturct spark operator (kubeflow#2072)
Browse files Browse the repository at this point in the history
* Use controller-runtime to reconstruct spark operator

Signed-off-by: Yi Chen <github@chenyicn.net>

* Update helm charts

Signed-off-by: Yi Chen <github@chenyicn.net>

* Update examples

Signed-off-by: Yi Chen <github@chenyicn.net>

---------

Signed-off-by: Yi Chen <github@chenyicn.net>
  • Loading branch information
ChenYi015 authored Aug 1, 2024
1 parent a3ec8f1 commit 0dc641b
Show file tree
Hide file tree
Showing 291 changed files with 20,893 additions and 18,910 deletions.
39 changes: 39 additions & 0 deletions .golangci.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
run:
deadline: 5m

linters:
enable:
- revive
- gci
- depguard
- godot
- testifylint
- unconvert

issues:
exclude-rules:
# Disable errcheck linter for test files.
- path: _test.go
linters:
- errcheck

linters-settings:
gci:
sections:
- standard
- default
- prefix(github.com/kubeflow/spark-operator)
depguard:
Main:
files:
- $all
- "!$test"
listMode: Lax
deny:
reflect: Please don't use reflect package
Test:
files:
- $test
listMode: Lax
deny:
reflect: Please don't use reflect package
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ repos:
# Make the tool search for charts only under the `charts` directory
- --chart-search-root=charts
- --template-files=README.md.gotmpl
- --sort-values-order=file
25 changes: 9 additions & 16 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,33 +16,26 @@

ARG SPARK_IMAGE=spark:3.5.0

FROM golang:1.22-alpine as builder
FROM golang:1.22.5 AS builder

WORKDIR /workspace

# Copy the Go Modules manifests
COPY go.mod go.mod
COPY go.sum go.sum
# Cache deps before building and copying source so that we don't need to re-download as much
# and so that source changes don't invalidate our downloaded layer
RUN go mod download
COPY . .

# Copy the go source code
COPY main.go main.go
COPY pkg/ pkg/

# Build
ARG TARGETARCH
RUN CGO_ENABLED=0 GOOS=linux GOARCH=${TARGETARCH} GO111MODULE=on go build -a -o /usr/bin/spark-operator main.go

RUN CGO_ENABLED=0 GOOS=linux GOARCH=${TARGETARCH} GO111MODULE=on make build-operator

FROM ${SPARK_IMAGE}

USER root
COPY --from=builder /usr/bin/spark-operator /usr/bin/
RUN apt-get update --allow-releaseinfo-change \
&& apt-get update \

RUN apt-get update \
&& apt-get install -y tini \
&& rm -rf /var/lib/apt/lists/*

COPY --from=builder /workspace/bin/spark-operator /usr/bin/spark-operator

COPY entrypoint.sh /usr/bin/

ENTRYPOINT ["/usr/bin/entrypoint.sh"]
107 changes: 77 additions & 30 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,18 @@ endif
SHELL = /usr/bin/env bash -o pipefail
.SHELLFLAGS = -ec

# Version information.
VERSION=$(shell cat VERSION | sed "s/^v//")
BUILD_DATE = $(shell date -u +"%Y-%m-%dT%H:%M:%S%:z")
GIT_COMMIT = $(shell git rev-parse HEAD)
GIT_TAG = $(shell if [ -z "`git status --porcelain`" ]; then git describe --exact-match --tags HEAD 2>/dev/null; fi)
GIT_TREE_STATE = $(shell if [ -z "`git status --porcelain`" ]; then echo "clean" ; else echo "dirty"; fi)
GIT_SHA = $(shell git rev-parse --short HEAD || echo "HEAD")
GIT_VERSION = ${VERSION}-${GIT_SHA}

REPO=github.com/kubeflow/spark-operator
SPARK_OPERATOR_GOPATH=/go/src/github.com/kubeflow/spark-operator
SPARK_OPERATOR_CHART_PATH=charts/spark-operator-chart
OPERATOR_VERSION ?= $$(grep appVersion $(SPARK_OPERATOR_CHART_PATH)/Chart.yaml | awk '{print $$2}')
DEP_VERSION:=`grep DEP_VERSION= Dockerfile | awk -F\" '{print $$2}'`
BUILDER=`grep "FROM golang:" Dockerfile | awk '{print $$2}'`
UNAME:=`uname | tr '[:upper:]' '[:lower:]'`
Expand All @@ -27,9 +35,18 @@ UNAME:=`uname | tr '[:upper:]' '[:lower:]'`
CONTAINER_TOOL ?= docker

# Image URL to use all building/pushing image targets
IMAGE_REPOSITORY ?= docker.io/kubeflow/spark-operator
IMAGE_TAG ?= $(OPERATOR_VERSION)
OPERATOR_IMAGE ?= $(IMAGE_REPOSITORY):$(IMAGE_TAG)
IMAGE_REGISTRY ?= docker.io
IMAGE_REPOSITORY ?= kubeflow/spark-operator
IMAGE_TAG ?= $(VERSION)
IMAGE ?= $(IMAGE_REGISTRY)/$(IMAGE_REPOSITORY):$(IMAGE_TAG)

# Kind cluster
KIND_CLUSTER_NAME ?= spark-operator
KIND_CONFIG_FILE ?= charts/spark-operator-chart/ci/kind-config.yaml
KIND_KUBE_CONFIG ?= $(HOME)/.kube/config

# ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be downloaded by envtest binary.
ENVTEST_K8S_VERSION = 1.29.3

##@ General

Expand All @@ -46,7 +63,11 @@ OPERATOR_IMAGE ?= $(IMAGE_REPOSITORY):$(IMAGE_TAG)

.PHONY: help
help: ## Display this help.
@awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m<target>\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST)
@awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m<target>\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf " \033[36m%-30s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST)

.PHONY: version
version: ## Print version information.
@echo "Version: ${VERSION}"

##@ Development

Expand Down Expand Up @@ -94,20 +115,28 @@ lint-fix: golangci-lint ## Run golangci-lint linter and perform fixes.
$(GOLANGCI_LINT) run --fix

.PHONY: unit-test
unit-test: clean ## Run go unit tests.
@echo "running unit tests"
unit-test: envtest ## Run unit tests.
@echo "Running unit tests..."
KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)"
go test $(shell go list ./... | grep -v /e2e) -coverprofile cover.out

.PHONY: e2e-test
e2e-test: clean ## Run go integration tests.
@echo "running integration tests"
go test -v ./test/e2e/ --kubeconfig "$(HOME)/.kube/config" --operator-image=docker.io/spark-operator/spark-operator:local
e2e-test: envtest ## Run the e2e tests against a Kind k8s instance that is spun up.
@echo "Running e2e tests..."
go test ./test/e2e/ -v -ginkgo.v -timeout 30m

##@ Build

override LDFLAGS += \
-X ${REPO}.version=v${VERSION} \
-X ${REPO}.buildDate=${BUILD_DATE} \
-X ${REPO}.gitCommit=${GIT_COMMIT} \
-X ${REPO}.gitTreeState=${GIT_TREE_STATE} \
-extldflags "-static"

.PHONY: build-operator
build-operator: ## Build spark-operator binary.
go build -o bin/spark-operator main.go
build-operator: ## Build Spark operator
go build -o bin/spark-operator -ldflags '${LDFLAGS}' cmd/main.go

.PHONY: build-sparkctl
build-sparkctl: ## Build sparkctl binary.
Expand All @@ -117,7 +146,7 @@ build-sparkctl: ## Build sparkctl binary.
-v $$(pwd):$(SPARK_OPERATOR_GOPATH) $(BUILDER) sh -c \
"apk add --no-cache bash git && \
cd sparkctl && \
./build.sh" || true
bash build.sh" || true

.PHONY: install-sparkctl
install-sparkctl: | sparkctl/sparkctl-darwin-amd64 sparkctl/sparkctl-linux-amd64 ## Install sparkctl binary.
Expand All @@ -141,7 +170,7 @@ clean-sparkctl: ## Clean sparkctl binary.
build-api-docs: gen-crd-api-reference-docs ## Build api documentaion.
$(GEN_CRD_API_REFERENCE_DOCS) \
-config hack/api-docs/config.json \
-api-dir github.com/kubeflow/spark-operator/pkg/apis/sparkoperator.k8s.io/v1beta2 \
-api-dir github.com/kubeflow/spark-operator/api/v1beta2 \
-template-dir hack/api-docs/template \
-out-file docs/api-docs.md

Expand All @@ -150,11 +179,11 @@ build-api-docs: gen-crd-api-reference-docs ## Build api documentaion.
# More info: https://docs.docker.com/develop/develop-images/build_enhancements/
.PHONY: docker-build
docker-build: ## Build docker image with the operator.
$(CONTAINER_TOOL) build -t ${IMAGE_REPOSITORY}:${IMAGE_TAG} .
$(CONTAINER_TOOL) build -t ${IMAGE} .

.PHONY: docker-push
docker-push: ## Push docker image with the operator.
$(CONTAINER_TOOL) push ${IMAGE_REPOSITORY}:${IMAGE_TAG}
$(CONTAINER_TOOL) push ${IMAGE}

# PLATFORMS defines the target platforms for the operator image be built to provide support to multiple
# architectures. (i.e. make docker-buildx IMG=myregistry/mypoperator:0.0.1). To use this option you need to:
Expand All @@ -164,14 +193,11 @@ docker-push: ## Push docker image with the operator.
# To adequately provide solutions that are compatible with multiple platforms, you should consider using this option.
PLATFORMS ?= linux/amd64,linux/arm64
.PHONY: docker-buildx
docker-buildx: ## Build and push docker image for the operator for cross-platform support.
# copy existing Dockerfile and insert --platform=${BUILDPLATFORM} into Dockerfile.cross, and preserve the original Dockerfile
sed -e '1 s/\(^FROM\)/FROM --platform=\$$\{BUILDPLATFORM\}/; t' -e ' 1,// s//FROM --platform=\$$\{BUILDPLATFORM\}/' Dockerfile > Dockerfile.cross
docker-buildx: ## Build and push docker image for the operator for cross-platform support
- $(CONTAINER_TOOL) buildx create --name spark-operator-builder
$(CONTAINER_TOOL) buildx use spark-operator-builder
- $(CONTAINER_TOOL) buildx build --push --platform=$(PLATFORMS) --tag ${IMAGE_REPOSITORY}:${IMAGE_TAG} -f Dockerfile.cross .
- $(CONTAINER_TOOL) buildx build --push --platform=$(PLATFORMS) --tag ${IMAGE} -f Dockerfile .
- $(CONTAINER_TOOL) buildx rm spark-operator-builder
rm Dockerfile.cross

##@ Helm

Expand All @@ -185,24 +211,39 @@ helm-unittest: helm-unittest-plugin ## Run Helm chart unittests.

.PHONY: helm-lint
helm-lint: ## Run Helm chart lint test.
docker run --rm --workdir /workspace --volume "$$(pwd):/workspace" quay.io/helmpack/chart-testing:latest ct lint --target-branch master
docker run --rm --workdir /workspace --volume "$$(pwd):/workspace" quay.io/helmpack/chart-testing:latest ct lint --target-branch master --validate-maintainers=false

.PHONY: helm-docs
helm-docs: ## Generates markdown documentation for helm charts from requirements and values files.
docker run --rm --volume "$$(pwd):/helm-docs" -u "$(id -u)" jnorwood/helm-docs:latest
helm-docs: helm-docs-plugin ## Generates markdown documentation for helm charts from requirements and values files.
$(HELM_DOCS) --sort-values-order=file

##@ Deployment

ifndef ignore-not-found
ignore-not-found = false
endif

.PHONY: install-crds
install-crds: manifests kustomize ## Install CRDs into the K8s cluster specified in ~/.kube/config.
$(KUSTOMIZE) build config/crd | $(KUBECTL) create -f -
.PHONY: kind-create-cluster
kind-create-cluster: kind ## Create a kind cluster for integration tests.
if ! $(KIND) get clusters 2>/dev/null | grep -q "^$(KIND_CLUSTER_NAME)$$"; then \
kind create cluster --name $(KIND_CLUSTER_NAME) --config $(KIND_CONFIG_FILE) --kubeconfig $(KIND_KUBE_CONFIG); \
fi

.PHONY: kind-load-image
kind-load-image: kind-create-cluster docker-build ## Load the image into the kind cluster.
kind load docker-image --name $(KIND_CLUSTER_NAME) $(IMAGE)

.PHONY: kind-delete-custer
kind-delete-custer: kind ## Delete the created kind cluster.
$(KIND) delete cluster --name $(KIND_CLUSTER_NAME) && \
rm -f $(KIND_KUBE_CONFIG)

.PHONY: uninstall-crds
uninstall-crds: manifests kustomize ## Uninstall CRDs from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.
.PHONY: install
install-crd: manifests kustomize ## Install CRDs into the K8s cluster specified in ~/.kube/config.
$(KUSTOMIZE) build config/crd | $(KUBECTL) apply -f -

.PHONY: uninstall
uninstall-crd: manifests kustomize ## Uninstall CRDs from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.
$(KUSTOMIZE) build config/crd | $(KUBECTL) delete --ignore-not-found=$(ignore-not-found) -f -

.PHONY: deploy
Expand Down Expand Up @@ -231,6 +272,7 @@ GOLANGCI_LINT = $(LOCALBIN)/golangci-lint-$(GOLANGCI_LINT_VERSION)
GEN_CRD_API_REFERENCE_DOCS ?= $(LOCALBIN)/gen-crd-api-reference-docs-$(GEN_CRD_API_REFERENCE_DOCS_VERSION)
HELM ?= helm
HELM_UNITTEST ?= unittest
HELM_DOCS ?= $(LOCALBIN)/helm-docs-$(HELM_DOCS_VERSION)

## Tool Versions
KUSTOMIZE_VERSION ?= v5.4.1
Expand All @@ -240,6 +282,7 @@ ENVTEST_VERSION ?= release-0.18
GOLANGCI_LINT_VERSION ?= v1.57.2
GEN_CRD_API_REFERENCE_DOCS_VERSION ?= v0.3.0
HELM_UNITTEST_VERSION ?= 0.5.1
HELM_DOCS_VERSION ?= v1.14.2

.PHONY: kustomize
kustomize: $(KUSTOMIZE) ## Download kustomize locally if necessary.
Expand Down Expand Up @@ -274,10 +317,14 @@ $(GEN_CRD_API_REFERENCE_DOCS): $(LOCALBIN)
.PHONY: helm-unittest-plugin
helm-unittest-plugin: ## Download helm unittest plugin locally if necessary.
if [ -z "$(shell helm plugin list | grep unittest)" ]; then \
echo "Installing helm unittest plugin..."; \
echo "Installing helm unittest plugin"; \
helm plugin install https://github.com/helm-unittest/helm-unittest.git --version $(HELM_UNITTEST_VERSION); \
fi

.PHONY: helm-docs-plugin
helm-docs-plugin: ## Download helm-docs plugin locally if necessary.
$(call go-install-tool,$(HELM_DOCS),github.com/norwoodj/helm-docs/cmd/helm-docs,$(HELM_DOCS_VERSION))

# go-install-tool will 'go install' any package with custom target and name of binary, if it doesn't exist
# $1 - target path with name of binary (ideally with version)
# $2 - package url which can be installed
Expand Down
47 changes: 47 additions & 0 deletions PROJECT
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# Code generated by tool. DO NOT EDIT.
# This file is used to track the info used to scaffold your project
# and allow the plugins properly work.
# More info: https://book.kubebuilder.io/reference/project-config.html
domain: sparkoperator.k8s.io
layout:
- go.kubebuilder.io/v4
projectName: spark-operator
repo: github.com/kubeflow/spark-operator
resources:
- api:
crdVersion: v1
namespaced: true
controller: true
domain: sparkoperator.k8s.io
kind: SparkApplication
path: github.com/kubeflow/spark-operator/api/v1beta1
version: v1beta1
- api:
crdVersion: v1
namespaced: true
controller: true
domain: sparkoperator.k8s.io
kind: ScheduledSparkApplication
path: github.com/kubeflow/spark-operator/api/v1beta1
version: v1beta1
- api:
crdVersion: v1
namespaced: true
controller: true
domain: sparkoperator.k8s.io
kind: SparkApplication
path: github.com/kubeflow/spark-operator/api/v1beta2
version: v1beta2
webhooks:
defaulting: true
validation: true
webhookVersion: v1
- api:
crdVersion: v1
namespaced: true
controller: true
domain: sparkoperator.k8s.io
kind: ScheduledSparkApplication
path: github.com/kubeflow/spark-operator/api/v1beta2
version: v1beta2
version: "3"
File renamed without changes.
File renamed without changes.
36 changes: 36 additions & 0 deletions api/v1beta1/groupversion_info.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/*
Copyright 2024 The Kubeflow authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

// Package v1beta1 contains API Schema definitions for the v1beta1 API group
// +kubebuilder:object:generate=true
// +groupName=sparkoperator.k8s.io
package v1beta1

import (
"k8s.io/apimachinery/pkg/runtime/schema"
"sigs.k8s.io/controller-runtime/pkg/scheme"
)

var (
// GroupVersion is group version used to register these objects.
GroupVersion = schema.GroupVersion{Group: "sparkoperator.k8s.io", Version: "v1beta1"}

// SchemeBuilder is used to add go types to the GroupVersionKind scheme.
SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion}

// AddToScheme adds the types in this group-version to the given scheme.
AddToScheme = SchemeBuilder.AddToScheme
)
Loading

0 comments on commit 0dc641b

Please sign in to comment.