Skip to content

Commit

Permalink
Deploy rook.io with Ceph and test it with a VM
Browse files Browse the repository at this point in the history
Signed-off-by: Simone Tiraboschi <stirabos@redhat.com>
  • Loading branch information
tiraboschi committed Dec 2, 2021
1 parent 6f5d5fe commit 2d65140
Show file tree
Hide file tree
Showing 6 changed files with 225 additions and 4 deletions.
21 changes: 21 additions & 0 deletions hack/99-worker-format-sdb.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
apiVersion: machineconfiguration.openshift.io/v1
kind: MachineConfig
metadata:
generation: 1
labels:
machineconfiguration.openshift.io/role: worker
machine.openshift.io/zone: us-central1-f
name: 99-worker-format-sdb
spec:
config:
ignition:
version: 3.2.0
storage:
disks:
- device: "/dev/sdb"
wipeTable: true
partitions:
- label: cephstorage
number: 1
sizeMiB: 128000
startMiB: 0
41 changes: 41 additions & 0 deletions hack/deploy-hco.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ set -euxo pipefail
TARGET_NAMESPACE=${TARGET_NAMESPACE:-kubevirt-hyperconverged}
HCO_UNSTABLE=${HCO_UNSTABLE:-false}
HCO_MANIFESTS_SUFFIX=""
ROOK_VERSION=${ROOK_VERSION:-v1.7.8}
# TODO: properly do this on openshift-ci config with an additional step
TUNE_GCP_CONF=${TUNE_GCP_CONF:-true}
CEPH_CLUSTER=${CEPH_CLUSTER:-my-cluster}

function cleanup() {
rv=$?
Expand All @@ -20,6 +24,17 @@ trap "cleanup" INT TERM EXIT

SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"

if [ "$TUNE_GCP_CONF" = "true" ]; then
echo "adding 3 worker nodes with 2 disks in zone f"
oc patch -n openshift-machine-api $(oc get machinesets -n openshift-machine-api -o name | grep worker-f) --type=json -p '[ { "op": "add", "path": /spec/template/spec/providerSpec/value/disks/-, "value": {"autoDelete": true, "boot": false, "labels": null, "sizeGb": 128, "type": "pd-ssd"} }, { "op": "replace", "path": /spec/replicas, "value": 1 } ]'
echo "free the additional disk for ceph"
# TODO: check if we can start with a blank disk
oc apply -f ${SCRIPT_DIR}/99-worker-format-sdb.yaml
echo "wait for the infra to be ready"
./hack/retry.sh 360 10 "oc get -n openshift-machine-api $(oc get machinesets -n openshift-machine-api -o name | grep worker-f) -o jsonpath='{ .status.readyReplicas }' | grep '1'"
./hack/retry.sh 360 10 "oc get -n openshift-machine-api $(oc get machinesets -n openshift-machine-api -o name | grep worker-f) -o jsonpath='{ .status.availableReplicas }' | grep '1'"
fi


echo "creating catalogsource, operator group, and subscription"
oc create namespace ${TARGET_NAMESPACE}
Expand All @@ -31,3 +46,29 @@ oc apply -n ${TARGET_NAMESPACE} -f ${SCRIPT_DIR}/../manifests/virtualization${HC

echo "waiting for HyperConverged operator to become ready"
"$SCRIPT_DIR"/wait-for-hco.sh

echo "deploy rook.io with Ceph"
git clone --single-branch --branch ${ROOK_VERSION} https://github.com/rook/rook.git
oc create -f rook/cluster/examples/kubernetes/ceph/crds.yaml -f rook/cluster/examples/kubernetes/ceph/common.yaml
oc create -f rook/cluster/examples/kubernetes/ceph/operator-openshift.yaml
# oc create -f rook/cluster/examples/kubernetes/ceph/cluster.yaml
# oc create -f rook/cluster/examples/kubernetes/ceph/csi/rbd/storageclass.yaml
# Settings for a test cluster where redundancy is not configured. Requires only a single node.
# TODO: try with a 3 nodes cluster
oc create -f rook/cluster/examples/kubernetes/ceph/cluster-test.yaml
oc create -f rook/cluster/examples/kubernetes/ceph/csi/rbd/storageclass-test.yaml

echo "waiting for rook.io with Ceph to become ready and health"
./hack/retry.sh 60 10 "oc get CephCluster -n rook-ceph ${CEPH_CLUSTER} -o jsonpath='{ .status.phase }' | grep 'Ready'"
./hack/retry.sh 60 10 "oc get CephCluster -n rook-ceph ${CEPH_CLUSTER} -o jsonpath='{ .status.state }' | grep 'Created'"
./hack/retry.sh 90 10 "oc get CephCluster -n rook-ceph ${CEPH_CLUSTER} -o jsonpath='{ .status.ceph.health }' | grep HEALTH_OK"

echo "start a Fedora 35 VM with Ceph by rook.io"
oc apply -f ${SCRIPT_DIR}/vm_fedora35_rookceph.yaml
if [ "$TUNE_GCP_CONF" = "true" ]; then
oc patch vm -n default fedora-35-test --type=json -p '[ { "op": "add", "path": /spec/template/spec/nodeSelector, "value": {"topology.kubernetes.io/zone": "us-central1-f"} } ]'
fi
oc patch vm -n default fedora-35-test --type=json -p '[ { "op": "replace", "path": /spec/running, "value": true } ]'

./hack/retry.sh 180 10 "oc get vmi -n default fedora-35-test -o jsonpath='{ .status.phase }' | grep 'Running'"
oc get vmi -n default -o yaml fedora-35-test
77 changes: 77 additions & 0 deletions hack/dump-state.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,33 @@ cat <<EOF
=================================
Start of HCO state dump
=================================
EOF

if [ -n "${ARTIFACT_DIR}" ]; then
cat <<EOF
==============================
executing kubevirt-must-gather
==============================
EOF
mkdir -p ${ARTIFACT_DIR}/kubevirt-must-gather
RunCmd "${CMD} adm must-gather --image=quay.io/kubevirt/must-gather:latest --dest-dir=${ARTIFACT_DIR}/kubevirt-must-gather"
mkdir -p ${ARTIFACT_DIR}/origin-must-gather
RunCmd "${CMD} adm must-gather --image=quay.io/openshift/origin-must-gather:latest --dest-dir=${ARTIFACT_DIR}/origin-must-gather"
mkdir -p ${ARTIFACT_DIR}/rook-must-gather
RunCmd "${CMD} adm must-gather --image=quay.io/ocs-dev/ocs-must-gather:latest --dest-dir=${ARTIFACT_DIR}/rook-must-gather"
fi

cat <<EOF
==========================
summary of operator status
==========================
EOF
NAMESPACE_ARG=$1
ROOK_NAMESPACE_ARG=$2
HCO_NAMESPACE=${NAMESPACE_ARG:-"kubevirt-hyperconverged"}
ROOK_NAMESPACE=${ROOK_NAMESPACE_ARG:-"rook-ceph"}
echo $1

RunCmd "${CMD} get pods -n ${HCO_NAMESPACE}"
Expand Down Expand Up @@ -166,6 +185,64 @@ EOF

RunCmd "$CMD get deployments -n ${HCO_NAMESPACE} -o json"

cat <<EOF
=================
Rook Ceph Cluster
=================
EOF

RunCmd "$CMD get CephCluster -n ${ROOK_NAMESPACE} rook-ceph -o yaml"

cat <<EOF
====================
Rook Ceph Block Pool
====================
EOF

RunCmd "$CMD get CephBlockPool -n ${ROOK_NAMESPACE} replicapool -o yaml"

cat <<EOF
======================
Rook Ceph StorageClass
======================
EOF

RunCmd "$CMD get storageclass rook-ceph-block -o yaml"

cat <<EOF
==============
Rook Ceph Pods
==============
EOF

RunCmd "$CMD get pods -n ${ROOK_NAMESPACE}"

cat <<EOF
==============
Rook Pods Logs
==============
EOF

for p in $($CMD -n ${ROOK_NAMESPACE} get pods -o jsonpath='{.items[*].metadata.name}')
do
for c in $($CMD -n ${ROOK_NAMESPACE} get pod ${p} -o jsonpath='{.spec.containers[*].name}')
do
echo "====== BEGIN logs from pod: ${p} ${c} ======"
$CMD logs -n ${ROOK_NAMESPACE} -c ${c} ${p} || true
echo "====== END logs from pod: ${p} ${c} ======"
done
done

cat <<EOF
===============================
End of HCO state dump
Expand Down
82 changes: 82 additions & 0 deletions hack/vm_fedora35_rookceph.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
apiVersion: kubevirt.io/v1
kind: VirtualMachine
metadata:
name: fedora-35-test
namespace: default
spec:
dataVolumeTemplates:
- apiVersion: cdi.kubevirt.io/v1beta1
kind: DataVolume
metadata:
creationTimestamp: null
name: fedora-35-test
spec:
pvc:
accessModes:
- ReadWriteMany
resources:
requests:
storage: 30Gi
storageClassName: rook-ceph-block
volumeMode: Block
source:
http:
url: >-
https://download.fedoraproject.org/pub/fedora/linux/releases/35/Cloud/x86_64/images/Fedora-Cloud-Base-35-1.2.x86_64.raw.xz
running: false
template:
metadata:
annotations:
vm.kubevirt.io/flavor: small
vm.kubevirt.io/os: fedora
vm.kubevirt.io/workload: server
creationTimestamp: null
labels:
flavor.template.kubevirt.io/small: 'true'
kubevirt.io/domain: fedora-35-test
kubevirt.io/size: small
os.template.kubevirt.io/fedora34: 'true'
vm.kubevirt.io/name: fedora-35-test
workload.template.kubevirt.io/server: 'true'
spec:
domain:
cpu:
cores: 1
sockets: 1
threads: 1
devices:
disks:
- bootOrder: 1
disk:
bus: virtio
name: fedora-35-test
- disk:
bus: virtio
name: cloudinitdisk
interfaces:
- macAddress: '02:a3:1f:00:00:00'
masquerade: {}
name: default
networkInterfaceMultiqueue: true
rng: {}
machine:
type: q35
resources:
requests:
memory: 2Gi
hostname: fedora-35-test
networks:
- name: default
pod: {}
terminationGracePeriodSeconds: 180
volumes:
- dataVolume:
name: fedora-35-test
name: fedora-35-test
- cloudInitNoCloud:
userData: |-
#cloud-config
user: fedora
password: j0h2-8i82-gt1u
chpasswd: { expire: False }
name: cloudinitdisk
4 changes: 2 additions & 2 deletions manifests/testing/kubevirt-testing-configuration.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"storageClassLocal": "hostpath-provisioner",
"storageClassHostPath": "hostpath-provisioner",
"storageClassRhel": "hostpath-provisioner",
"storageClassWindows": "hostpath-provisioner",
"storageClassRhel": "rook-ceph-block",
"storageClassWindows": "rook-ceph-block",
"manageStorageClasses": false
}
4 changes: 2 additions & 2 deletions manifests/testing/kubevirt-testing-infra.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ data:
{
"storageClassLocal": "hostpath-provisioner",
"storageClassHostPath": "hostpath-provisioner",
"storageClassRhel": "hostpath-provisioner",
"storageClassWindows": "hostpath-provisioner",
"storageClassRhel": "rook-ceph-block",
"storageClassWindows": "rook-ceph-block",
"manageStorageClasses": false
}

0 comments on commit 2d65140

Please sign in to comment.