Skip to content

update codeflare-common version to remove replace for prometheus/common #1841

update codeflare-common version to remove replace for prometheus/common

update codeflare-common version to remove replace for prometheus/common #1841

Workflow file for this run

name: e2e
on:
pull_request:
branches:
- main
- 'release-*'
paths-ignore:
- 'docs/**'
- '**.adoc'
- '**.md'
- 'LICENSE'
push:
branches:
- main
- 'release-*'
paths-ignore:
- 'docs/**'
- '**.adoc'
- '**.md'
- 'LICENSE'
concurrency:
group: ${{ github.head_ref }}-${{ github.workflow }}
cancel-in-progress: true
jobs:
kubernetes-e2e:
runs-on: ubuntu-20.04-4core-gpu
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
submodules: recursive
- name: Checkout common repo code
uses: actions/checkout@v4
with:
repository: 'project-codeflare/codeflare-common'
ref: 'main'
path: 'common'
- name: Set Go
uses: actions/setup-go@v5
with:
go-version-file: './go.mod'
- name: Set up gotestfmt
uses: gotesttools/gotestfmt-action@v2
with:
token: ${{ secrets.GITHUB_TOKEN }}
- name: Setup NVidia GPU environment for KinD
uses: ./common/github-actions/nvidia-gpu-setup
- name: Setup and start KinD cluster
id: kind-install
uses: ./common/github-actions/kind
- name: Install NVidia GPU operator for KinD
uses: ./common/github-actions/nvidia-gpu-operator
- name: Deploy CodeFlare stack
id: deploy
run: |
echo Setting up CodeFlare stack
make setup-e2e
echo Deploying CodeFlare operator
IMG=localhost/codeflare-operator:test
make image-build -e IMG="${IMG}"
podman save -o cfo.tar ${IMG}
kind load image-archive cfo.tar --name cluster --verbosity 1000
make deploy -e IMG="${IMG}" -e ENV="e2e"
kubectl wait --timeout=120s --for=condition=Available=true deployment -n openshift-operators codeflare-operator-manager
- name: Run e2e tests
run: |
export CODEFLARE_TEST_TIMEOUT_SHORT=3m
export CODEFLARE_TEST_TIMEOUT_MEDIUM=15m
export CODEFLARE_TEST_TIMEOUT_LONG=20m
export CODEFLARE_TEST_TIMEOUT_GPU_PROVISIONING=30m
export CODEFLARE_TEST_OUTPUT_DIR=${{ env.TEMP_DIR }}
set -euo pipefail
go test -timeout 60m -v -skip "^Test.*Cpu$" ./test/e2e -json 2>&1 | tee ${CODEFLARE_TEST_OUTPUT_DIR}/gotest.log | gotestfmt
- name: Print CodeFlare operator logs
if: always() && steps.deploy.outcome == 'success'
run: |
echo "Printing CodeFlare operator logs"
kubectl logs -n openshift-operators --tail -1 -l app.kubernetes.io/name=codeflare-operator | tee ${TEMP_DIR}/codeflare-operator.log
- name: Print Kueue operator logs
if: always() && steps.deploy.outcome == 'success'
run: |
echo "Printing Kueue operator logs"
KUEUE_CONTROLLER_POD=$(kubectl get pods -n kueue-system | grep kueue-controller | awk '{print $1}')
kubectl logs -n kueue-system --tail -1 ${KUEUE_CONTROLLER_POD} | tee ${TEMP_DIR}/kueue.log
- name: Print KubeRay operator logs
if: always() && steps.deploy.outcome == 'success'
run: |
echo "Printing KubeRay operator logs"
kubectl logs -n ray-system --tail -1 -l app.kubernetes.io/name=kuberay | tee ${TEMP_DIR}/kuberay.log
- name: Export all KinD pod logs
uses: ./common/github-actions/kind-export-logs
if: always() && steps.kind-install.outcome == 'success'
with:
output-directory: ${TEMP_DIR}
- name: Upload logs
uses: actions/upload-artifact@v4
if: always() && steps.kind-install.outcome == 'success'
with:
name: logs
retention-days: 10
path: |
${{ env.TEMP_DIR }}/**/*.log
- name: Post notification about failure to a Slack channel in case of push event
if: failure() && github.event_name == 'push'
uses: slackapi/slack-github-action@v1.24.0
with:
channel-id: "codeflare-nightlies"
slack-message: "e2e test on push failed, <https://github.com/project-codeflare/codeflare-operator/actions/workflows/e2e_tests.yaml|View workflow runs>"
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}