Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update Charmed Katib Operators + CI to 0.12 #1717

Merged
merged 4 commits into from
Oct 25, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 23 additions & 15 deletions .github/workflows/test-charmed-katib.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@ jobs:

- name: Install dependencies
run: |
sudo apt-get install python3-setuptools
set -eux
sudo apt update
sudo apt install python3-setuptools
sudo pip3 install black flake8

- name: Check black
Expand All @@ -34,19 +36,18 @@ jobs:

- uses: balchua/microk8s-actions@v0.2.2
with:
channel: "1.20/stable"
channel: "1.21/stable"
addons: '["dns", "storage", "rbac"]'

- name: Install dependencies
run: |
set -eux
sudo apt update
sudo apt install -y python3-pip
sudo snap install charm --classic
sudo snap install juju --classic
sudo snap install juju-helpers --classic
sudo snap install juju-bundle --classic
sudo snap install juju-wait --classic
sudo pip3 install charmcraft==1.0.0
sudo pip3 install charmcraft==1.3.1

- name: Build Docker images
run: |
Expand All @@ -62,20 +63,20 @@ jobs:
done

- name: Deploy Katib
env:
CHARMCRAFT_DEVELOPER: "1"
run: |
set -eux
cd operators/
git clone git://git.launchpad.net/canonical-osm
cp -r canonical-osm/charms/interfaces/juju-relation-mysql mysql
sg microk8s -c 'juju bootstrap microk8s uk8s'
juju add-model kubeflow
juju bundle deploy -b bundle-edge.yaml --build
juju bundle deploy --build --destructive-mode --serial
juju wait -wvt 300

- name: Test Katib
run: |
set -eux
kubectl apply -f examples/v1beta1/hp-tuning/random.yaml
run: kubectl apply -f examples/v1beta1/hp-tuning/random.yaml

- name: Get pod statuses
run: kubectl get all -A
Expand All @@ -86,25 +87,32 @@ jobs:
if: failure()

- name: Get katib-controller workload logs
run: kubectl logs --tail 100 -nkubeflow -ljuju-app=katib-controller
run: kubectl logs --tail 100 -nkubeflow -lapp.kubernetes.io/name=katib-controller
if: failure()

- name: Get katib-controller operator logs
run: kubectl logs --tail 100 -nkubeflow -ljuju-operator=katib-controller
run: kubectl logs --tail 100 -nkubeflow -loperator.juju.is/name=katib-controller
if: failure()

- name: Get katib-ui workload logs
run: kubectl logs --tail 100 -nkubeflow -ljuju-app=katib-ui
run: kubectl logs --tail 100 -nkubeflow -lapp.kubernetes.io/name=katib-ui
if: failure()

- name: Get katib-ui operator logs
run: kubectl logs --tail 100 -nkubeflow -ljuju-operator=katib-ui
run: kubectl logs --tail 100 -nkubeflow -loperator.juju.is/name=katib-ui
if: failure()

- name: Get katib-db-manager workload logs
run: kubectl logs --tail 100 -nkubeflow -ljuju-app=katib-db-manager
run: kubectl logs --tail 100 -nkubeflow -lapp.kubernetes.io/name=katib-db-manager
if: failure()

- name: Get katib-db-manager operator logs
run: kubectl logs --tail 100 -nkubeflow -ljuju-operator=katib-db-manager
run: kubectl logs --tail 100 -nkubeflow -loperator.juju.is/name=katib-db-manager
if: failure()

- name: Upload charmcraft logs
uses: actions/upload-artifact@v2
with:
name: charmcraft-logs
path: /tmp/charmcraft-log-*
if: failure()
11 changes: 11 additions & 0 deletions operators/katib-controller/charmcraft.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
type: charm
bases:
- build-on:
- name: "ubuntu"
channel: "20.04"
run-on:
- name: "ubuntu"
channel: "20.04"
parts:
charm:
charm-python-packages: [setuptools, pip]
1 change: 0 additions & 1 deletion operators/katib-controller/icon.svg

This file was deleted.

2 changes: 1 addition & 1 deletion operators/katib-controller/metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ resources:
type: oci-image
description: Backing OCI image
auto-fetch: true
upstream-source: docker.io/kubeflowkatib/katib-controller:v1beta1-c6c9172
upstream-source: docker.io/kubeflowkatib/katib-controller:v0.12.0
provides:
katib-controller:
interface: http
Expand Down
44 changes: 17 additions & 27 deletions operators/katib-controller/src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,11 @@
from subprocess import check_call

import yaml
from oci_image import OCIImageResource, OCIImageResourceError
from ops.charm import CharmBase
from ops.framework import StoredState
from ops.main import main
from ops.model import ActiveStatus, MaintenanceStatus
from ops.framework import StoredState

from oci_image import OCIImageResource, OCIImageResourceError

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -62,29 +61,30 @@ def set_pod_spec(self, event):
"namespaces",
"persistentvolumes",
"persistentvolumeclaims",
"pods",
"pods/log",
"pods/status",
],
"verbs": ["*"],
},
{
"apiGroups": [""],
"resources": ["pods", "pods/log", "pods/status"],
"apiGroups": ["apps"],
"resources": ["deployments"],
"verbs": ["*"],
},
{
"apiGroups": ["apps"],
"resources": ["deployments"],
"apiGroups": ["rbac.authorization.k8s.io"],
"resources": [
"roles",
"rolebindings",
],
"verbs": ["*"],
},
{
"apiGroups": ["batch"],
"resources": ["jobs", "cronjobs"],
"verbs": ["*"],
},
{
"apiGroups": ["apiextensions.k8s.io"],
"resources": ["customresourcedefinitions"],
"verbs": ["create", "get"],
},
{
"apiGroups": ["kubeflow.org"],
"resources": [
Expand All @@ -97,24 +97,14 @@ def set_pod_spec(self, event):
"suggestions",
"suggestions/status",
"suggestions/finalizers",
"tfjobs",
"pytorchjobs",
"mpijobs",
"xgboostjobs",
"mxjobs",
],
"verbs": ["*"],
},
{
"apiGroups": ["kubeflow.org"],
"resources": ["tfjobs", "pytorchjobs", "mpijobs"],
"verbs": ["*"],
},
{
"apiGroups": ["tekton.dev"],
"resources": ["pipelineruns", "taskruns"],
"verbs": ["*"],
},
{
"apiGroups": ["rbac.authorization.k8s.io"],
"resources": ["roles", "rolebindings"],
"verbs": ["*"],
},
],
}
],
Expand Down
119 changes: 69 additions & 50 deletions operators/katib-controller/src/crds.yaml
Original file line number Diff line number Diff line change
@@ -1,24 +1,31 @@
---
apiVersion: apiextensions.k8s.io/v1beta1
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
name: experiments.kubeflow.org
spec:
additionalPrinterColumns:
- JSONPath: .status.conditions[-1:].type
name: Type
type: string
- JSONPath: .status.conditions[-1:].status
name: Status
type: string
- JSONPath: .metadata.creationTimestamp
name: Age
type: date
group: kubeflow.org
version: v1beta1
scope: Namespaced
subresources:
status: {}
versions:
- name: v1beta1
served: true
storage: true
additionalPrinterColumns:
- name: Type
type: string
jsonPath: .status.conditions[-1:].type
- name: Status
type: string
jsonPath: .status.conditions[-1:].status
- name: Age
type: date
jsonPath: .metadata.creationTimestamp
subresources:
status: {}
schema:
openAPIV3Schema:
type: object
x-kubernetes-preserve-unknown-fields: true
names:
kind: Experiment
singular: experiment
Expand All @@ -27,34 +34,40 @@ spec:
- all
- kubeflow
- katib

---
apiVersion: apiextensions.k8s.io/v1beta1
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
name: suggestions.kubeflow.org
spec:
additionalPrinterColumns:
- JSONPath: .status.conditions[-1:].type
name: Type
type: string
- JSONPath: .status.conditions[-1:].status
name: Status
type: string
- JSONPath: .spec.requests
name: Requested
type: string
- JSONPath: .status.suggestionCount
name: Assigned
type: string
- JSONPath: .metadata.creationTimestamp
name: Age
type: date
group: kubeflow.org
version: v1beta1
scope: Namespaced
subresources:
status: {}
versions:
- name: v1beta1
served: true
storage: true
additionalPrinterColumns:
- name: Type
type: string
jsonPath: .status.conditions[-1:].type
- name: Status
type: string
jsonPath: .status.conditions[-1:].status
- name: Requested
type: string
jsonPath: .spec.requests
- name: Assigned
type: string
jsonPath: .status.suggestionCount
- name: Age
type: date
jsonPath: .metadata.creationTimestamp
subresources:
status: {}
schema:
openAPIV3Schema:
type: object
x-kubernetes-preserve-unknown-fields: true
names:
kind: Suggestion
singular: suggestion
Expand All @@ -63,28 +76,34 @@ spec:
- all
- kubeflow
- katib

---
apiVersion: apiextensions.k8s.io/v1beta1
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
name: trials.kubeflow.org
spec:
additionalPrinterColumns:
- JSONPath: .status.conditions[-1:].type
name: Type
type: string
- JSONPath: .status.conditions[-1:].status
name: Status
type: string
- JSONPath: .metadata.creationTimestamp
name: Age
type: date
group: kubeflow.org
version: v1beta1
scope: Namespaced
subresources:
status: {}
versions:
- name: v1beta1
served: true
storage: true
additionalPrinterColumns:
- name: Type
type: string
jsonPath: .status.conditions[-1:].type
- name: Status
type: string
jsonPath: .status.conditions[-1:].status
- name: Age
type: date
jsonPath: .metadata.creationTimestamp
subresources:
status: {}
schema:
openAPIV3Schema:
type: object
x-kubernetes-preserve-unknown-fields: true
names:
kind: Trial
singular: trial
Expand Down
3 changes: 1 addition & 2 deletions operators/katib-controller/src/early-stopping.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
{
"medianstop": {
"image": "docker.io/kubeflowkatib/earlystopping-medianstop",
"imagePullPolicy": "Always"
"image": "docker.io/kubeflowkatib/earlystopping-medianstop:v0.12.0"
}
}
6 changes: 3 additions & 3 deletions operators/katib-controller/src/metrics-collector-sidecar.json
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
{
"StdOut": {
"image": "docker.io/kubeflowkatib/file-metrics-collector"
"image": "docker.io/kubeflowkatib/file-metrics-collector:v0.12.0"
},
"File": {
"image": "docker.io/kubeflowkatib/file-metrics-collector"
"image": "docker.io/kubeflowkatib/file-metrics-collector:v0.12.0"
},
"TensorFlowEvent": {
"image": "docker.io/kubeflowkatib/tfevent-metrics-collector",
"image": "docker.io/kubeflowkatib/tfevent-metrics-collector:v0.12.0",
"resources": {
"limits": {
"memory": "1Gi"
Expand Down
2 changes: 1 addition & 1 deletion operators/katib-controller/src/pytorchJobTemplate.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
apiVersion: kubeflow.org/v1
apiVersion: "kubeflow.org/v1"
kind: PyTorchJob
spec:
pytorchReplicaSpecs:
Expand Down
Loading