-
Notifications
You must be signed in to change notification settings - Fork 43
132 lines (110 loc) · 4.21 KB
/
e2e_tests.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
name: e2e
on:
pull_request:
branches:
- main
- 'release-*'
paths-ignore:
- 'docs/**'
- '**.adoc'
- '**.md'
- 'LICENSE'
push:
branches:
- main
- 'release-*'
paths-ignore:
- 'docs/**'
- '**.adoc'
- '**.md'
- 'LICENSE'
concurrency:
group: ${{ github.head_ref }}-${{ github.workflow }}
cancel-in-progress: true
jobs:
kubernetes-e2e:
runs-on: ubuntu-20.04-4core-gpu
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
submodules: recursive
- name: Checkout common repo code
uses: actions/checkout@v4
with:
repository: 'project-codeflare/codeflare-common'
ref: 'main'
path: 'common'
- name: Set Go
uses: actions/setup-go@v5
with:
go-version-file: './go.mod'
- name: Set up gotestfmt
uses: gotesttools/gotestfmt-action@v2
with:
token: ${{ secrets.GITHUB_TOKEN }}
- name: Setup NVidia GPU environment for KinD
uses: ./common/github-actions/nvidia-gpu-setup
- name: Setup and start KinD cluster
id: kind-install
uses: ./common/github-actions/kind
- name: Install NVidia GPU operator for KinD
uses: ./common/github-actions/nvidia-gpu-operator
- name: Deploy CodeFlare stack
id: deploy
run: |
echo Setting up CodeFlare stack
make setup-e2e
echo Deploying CodeFlare operator
IMG=localhost/codeflare-operator:test
make image-build -e IMG="${IMG}"
podman save -o cfo.tar ${IMG}
kind load image-archive cfo.tar --name cluster --verbosity 1000
make deploy -e IMG="${IMG}" -e ENV="e2e"
kubectl wait --timeout=120s --for=condition=Available=true deployment -n openshift-operators codeflare-operator-manager
- name: Run e2e tests
run: |
export CODEFLARE_TEST_TIMEOUT_SHORT=3m
export CODEFLARE_TEST_TIMEOUT_MEDIUM=15m
export CODEFLARE_TEST_TIMEOUT_LONG=20m
export CODEFLARE_TEST_TIMEOUT_GPU_PROVISIONING=30m
export CODEFLARE_TEST_OUTPUT_DIR=${{ env.TEMP_DIR }}
set -euo pipefail
go test -timeout 60m -v -skip "^Test.*Cpu$" ./test/e2e -json 2>&1 | tee ${CODEFLARE_TEST_OUTPUT_DIR}/gotest.log | gotestfmt
- name: Print CodeFlare operator logs
if: always() && steps.deploy.outcome == 'success'
run: |
echo "Printing CodeFlare operator logs"
kubectl logs -n openshift-operators --tail -1 -l app.kubernetes.io/name=codeflare-operator | tee ${TEMP_DIR}/codeflare-operator.log
- name: Print Kueue operator logs
if: always() && steps.deploy.outcome == 'success'
run: |
echo "Printing Kueue operator logs"
KUEUE_CONTROLLER_POD=$(kubectl get pods -n kueue-system | grep kueue-controller | awk '{print $1}')
kubectl logs -n kueue-system --tail -1 ${KUEUE_CONTROLLER_POD} | tee ${TEMP_DIR}/kueue.log
- name: Print KubeRay operator logs
if: always() && steps.deploy.outcome == 'success'
run: |
echo "Printing KubeRay operator logs"
kubectl logs -n ray-system --tail -1 -l app.kubernetes.io/name=kuberay | tee ${TEMP_DIR}/kuberay.log
- name: Export all KinD pod logs
uses: ./common/github-actions/kind-export-logs
if: always() && steps.kind-install.outcome == 'success'
with:
output-directory: ${TEMP_DIR}
- name: Upload logs
uses: actions/upload-artifact@v4
if: always() && steps.kind-install.outcome == 'success'
with:
name: logs
retention-days: 10
path: |
${{ env.TEMP_DIR }}/**/*.log
- name: Post notification about failure to a Slack channel in case of push event
if: failure() && github.event_name == 'push'
uses: slackapi/slack-github-action@v1.24.0
with:
channel-id: "codeflare-nightlies"
slack-message: "e2e test on push failed, <https://github.com/project-codeflare/codeflare-operator/actions/workflows/e2e_tests.yaml|View workflow runs>"
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}