Skip to content

Commit

Permalink
OFED builder workflow (#1132)
Browse files Browse the repository at this point in the history
Add OFED workflow
  • Loading branch information
assumptionsandg committed Sep 27, 2024
1 parent d243298 commit d25970b
Show file tree
Hide file tree
Showing 9 changed files with 460 additions and 1 deletion.
254 changes: 254 additions & 0 deletions .github/workflows/package-build-ofed.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,254 @@
---
name: Build OFED packages
on:
workflow_dispatch:
inputs:
rocky9:
description: Build Rocky Linux 9
type: boolean
default: true
secrets:
KAYOBE_VAULT_PASSWORD:
required: true
CLOUDS_YAML:
required: true
OS_APPLICATION_CREDENTIAL_ID:
required: true
OS_APPLICATION_CREDENTIAL_SECRET:
required: true

env:
ANSIBLE_FORCE_COLOR: True
KAYOBE_ENVIRONMENT: ci-builder
KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD }}
jobs:
overcloud-ofed-packages:
name: Build OFED packages
if: github.repository == 'stackhpc/stackhpc-kayobe-config'
runs-on: arc-skc-host-image-builder-runner
permissions: {}
steps:
- name: Install Package
uses: ConorMacBride/install-package@main
with:
apt: git unzip nodejs python3-pip python3-venv openssh-server openssh-client jq

- name: Start the SSH service
run: |
sudo /etc/init.d/ssh start
- name: Checkout
uses: actions/checkout@v4
with:
path: src/kayobe-config

- name: Determine OpenStack release
id: openstack_release
run: |
BRANCH=$(awk -F'=' '/defaultbranch/ {print $2}' src/kayobe-config/.gitreview)
echo "openstack_release=${BRANCH}" | sed -E "s,(stable|unmaintained)/,," >> $GITHUB_OUTPUT
- name: Clone StackHPC Kayobe repository
uses: actions/checkout@v4
with:
repository: stackhpc/kayobe
ref: refs/heads/stackhpc/${{ steps.openstack_release.outputs.openstack_release }}
path: src/kayobe

- name: Install Kayobe
run: |
mkdir -p venvs &&
pushd venvs &&
python3 -m venv kayobe &&
source kayobe/bin/activate &&
pip install -U pip &&
pip install ../src/kayobe
- name: Install terraform
uses: hashicorp/setup-terraform@v2

- name: Initialise terraform
run: terraform init
working-directory: ${{ github.workspace }}/src/kayobe-config/terraform/aio

- name: Generate SSH keypair
run: ssh-keygen -f id_rsa -N ''
working-directory: ${{ github.workspace }}/src/kayobe-config/terraform/aio

- name: Generate clouds.yaml
run: |
cat << EOF > clouds.yaml
${{ secrets.CLOUDS_YAML }}
EOF
working-directory: ${{ github.workspace }}/src/kayobe-config/terraform/aio

- name: Output image tag
id: image_tag
run: |
echo image_tag=$(grep stackhpc_rocky_9_overcloud_host_image_version: etc/kayobe/pulp-host-image-versions.yml | awk '{print $2}') >> $GITHUB_OUTPUT
# Use the image override if set, otherwise use overcloud-os_distribution-os_release-tag
- name: Output image name
id: image_name
run: |
echo image_name=overcloud-rocky-9-${{ steps.image_tag.outputs.image_tag }} >> $GITHUB_OUTPUT
- name: Generate terraform.tfvars
run: |
cat << EOF > terraform.tfvars
ssh_public_key = "id_rsa.pub"
ssh_username = "cloud-user"
aio_vm_name = "skc-ofed-builder"
aio_vm_image = "${{ env.VM_IMAGE }}"
aio_vm_flavor = "en1.medium"
aio_vm_network = "stackhpc-ci"
aio_vm_subnet = "stackhpc-ci"
aio_vm_interface = "ens3"
EOF
working-directory: ${{ github.workspace }}/src/kayobe-config/terraform/aio
env:
VM_IMAGE: ${{ steps.image_name.outputs.image_name }}

- name: Terraform Plan
run: terraform plan
working-directory: ${{ github.workspace }}/src/kayobe-config/terraform/aio
env:
OS_CLOUD: "openstack"
OS_APPLICATION_CREDENTIAL_ID: ${{ secrets.OS_APPLICATION_CREDENTIAL_ID }}
OS_APPLICATION_CREDENTIAL_SECRET: ${{ secrets.OS_APPLICATION_CREDENTIAL_SECRET }}

- name: Terraform Apply
run: |
for attempt in $(seq 5); do
if terraform apply -auto-approve; then
echo "Created infrastructure on attempt $attempt"
exit 0
fi
echo "Failed to create infrastructure on attempt $attempt"
sleep 10
terraform destroy -auto-approve
sleep 60
done
echo "Failed to create infrastructure after $attempt attempts"
exit 1
working-directory: ${{ github.workspace }}/src/kayobe-config/terraform/aio
env:
OS_CLOUD: "openstack"
OS_APPLICATION_CREDENTIAL_ID: ${{ secrets.OS_APPLICATION_CREDENTIAL_ID }}
OS_APPLICATION_CREDENTIAL_SECRET: ${{ secrets.OS_APPLICATION_CREDENTIAL_SECRET }}

- name: Get Terraform outputs
id: tf_outputs
run: |
terraform output -json
working-directory: ${{ github.workspace }}/src/kayobe-config/terraform/aio

- name: Write Terraform outputs
run: |
cat << EOF > src/kayobe-config/etc/kayobe/environments/ci-builder/tf-outputs.yml
${{ steps.tf_outputs.outputs.stdout }}
EOF
- name: Write Terraform network config
run: |
cat << EOF > src/kayobe-config/etc/kayobe/environments/ci-builder/tf-network-allocation.yml
---
aio_ips:
builder: "{{ access_ip_v4.value }}"
EOF
- name: Write Terraform network interface config
run: |
mkdir -p src/kayobe-config/etc/kayobe/environments/$KAYOBE_ENVIRONMENT/inventory/group_vars/seed
rm -f src/kayobe-config/etc/kayobe/environments/$KAYOBE_ENVIRONMENT/inventory/group_vars/seed/network-interfaces
cat << EOF > src/kayobe-config/etc/kayobe/environments/$KAYOBE_ENVIRONMENT/inventory/group_vars/seed/network-interfaces
admin_interface: "{{ access_interface.value }}"
aio_interface: "{{ access_interface.value }}"
EOF
- name: Manage SSH keys
run: |
mkdir -p ~/.ssh
touch ~/.ssh/authorized_keys
cat src/kayobe-config/terraform/aio/id_rsa.pub >> ~/.ssh/authorized_keys
cp src/kayobe-config/terraform/aio/id_rsa* ~/.ssh/
- name: Bootstrap the control host
run: |
source venvs/kayobe/bin/activate &&
source src/kayobe-config/kayobe-env --environment ci-builder &&
kayobe control host bootstrap
- name: Run growroot playbook
run: |
source venvs/kayobe/bin/activate &&
source src/kayobe-config/kayobe-env --environment ci-builder &&
kayobe playbook run src/kayobe-config/etc/kayobe/ansible/growroot.yml
env:
KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD }}

- name: Configure the seed host (Builder VM)
run: |
source venvs/kayobe/bin/activate &&
source src/kayobe-config/kayobe-env --environment ci-builder &&
kayobe seed host configure --skip-tags network,docker
env:
KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD }}

- name: Run a distro-sync
run: |
source venvs/kayobe/bin/activate &&
source src/kayobe-config/kayobe-env --environment ci-builder &&
kayobe seed host command run --become --command "dnf distro-sync --refresh"
env:
KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD }}

- name: Reset BLS entries on the seed host
run: |
source venvs/kayobe/bin/activate &&
source src/kayobe-config/kayobe-env --environment ci-builder &&
kayobe playbook run src/kayobe-config/etc/kayobe/ansible/reset-bls-entries.yml \
-e "reset_bls_host=ofed-builder"
env:
KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD }}

- name: Disable noexec in /var/tmp
run: |
source venvs/kayobe/bin/activate &&
source src/kayobe-config/kayobe-env --environment ci-builder &&
kayobe seed host command run --become --command "sed -i 's/noexec,//g' /etc/fstab"
env:
KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD }}

- name: Reboot to apply the kernel update
run: |
source venvs/kayobe/bin/activate &&
source src/kayobe-config/kayobe-env --environment ci-builder &&
kayobe playbook run src/kayobe-config/etc/kayobe/ansible/reboot.yml
env:
KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD }}

- name: Run OFED builder playbook
run: |
source venvs/kayobe/bin/activate &&
source src/kayobe-config/kayobe-env --environment ci-builder &&
kayobe playbook run src/kayobe-config/etc/kayobe/ansible/build-ofed-rocky.yml
env:
KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD }}

- name: Run OFED upload playbook
run: |
source venvs/kayobe/bin/activate &&
source src/kayobe-config/kayobe-env --environment ci-builder &&
kayobe playbook run src/kayobe-config/etc/kayobe/ansible/push-ofed.yml
env:
KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD }}

- name: Destroy
run: terraform destroy -auto-approve
working-directory: ${{ github.workspace }}/src/kayobe-config/terraform/aio
env:
OS_CLOUD: openstack
OS_APPLICATION_CREDENTIAL_ID: ${{ secrets.OS_APPLICATION_CREDENTIAL_ID }}
OS_APPLICATION_CREDENTIAL_SECRET: ${{ secrets.OS_APPLICATION_CREDENTIAL_SECRET }}
if: always()
1 change: 1 addition & 0 deletions doc/source/contributor/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ This guide is for contributors of the StackHPC Kayobe configuration project.
environments/index
package-updates
pre-commit
ofed
55 changes: 55 additions & 0 deletions doc/source/contributor/ofed.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
====
OFED
====

Warning: Experimental workflow subject to change

This section documents the workflow for building OFED packages for Release train integration.

The workflow builds the OFED kernel modules against the latest available kernel in Release train
(as configured in SKC) and compiles them into RPM packages to be uploaded to Ark. Addtionally,
this workflow downloads the userspace OFED packages from the Nvidia repository and uploads these
to Ark.

Workflow
========

The workflow uses workflow_dispatch to manually request an OFED build, which will deploy a builder
VM, apply kayobe config to the builder, upgrade the kernel, reboot, then run two Ansible playbooks
for building and uploading OFED to Ark.

Pre-requisites
--------------

Before building OFED packages, the workflow will ensure that:

* A full distro-sync has taken place, ensuring the kernel is upgraded.

* The bootloader has been configured to use the latest kernel

* noexec is disabled in the temporary logical volume.

build-ofed
----------

Currently we only support building Rocky Linux 9 OFED packages.

In order to setup OFED, we're required to build kernel modules for the OFED drivers as
the kernels we provide in release train are unsupported by OFED. To accomplish this we
will need to use the doca-kernel-support from the doca-extra repository.

We will need to instll dependencies in order to build the OFED kernel modules, and these
are installed at the beginning of the build playbook. We also install base and appstream
dependencies of userspace OFED packages here, this is intended to stop these dependencies
being pulled in later when we download the OFED packages from the doca-host repository.

At the end of the playbook following the kernel module build, the OFED userspace packages
are downloaded from the upstream repository in order to upload these to Ark.

push-ofed
---------

As we're not syncing OFED from any upstream source, and are instead creating our own
repository of custom packages, we will be required to setup the Pulp distribution/publication
and upload the content directly to Ark. This playbook uses the Pulp CLI to upload the RPMs
to Ark.
73 changes: 73 additions & 0 deletions etc/kayobe/ansible/build-ofed-rocky.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
---
- name: Build OFED packages
become: true
hosts: ofed-builder
gather_facts: false
tasks:
- name: Check whether noexec is enabled for /var/tmp
ansible.builtin.lineinfile:
path: "/etc/fstab"
regexp: "noexec"
state: absent
changed_when: false
check_mode: true
register: result
failed_when: result.found

- name: Install package dependencies
ansible.builtin.dnf:
name:
- kpartx
- perl
- rpm-build
- automake
- patch
- kernel
- kernel-devel
- autoconf
- pciutils
- kernel-modules-extra
- kernel-rpm-macros
- lsof
- libtool
- tk
- gcc-gfortran
- tcl
- createrepo
- cmake-filesystem
- libnl3-devel
- python3-devel
state: latest
update_cache: true

- name: Add DOCA host repository package
ansible.builtin.dnf:
name: https://developer.nvidia.com/downloads/networking/secure/doca-sdk/DOCA_2.8/doca-host-2.8.0-204000_{{ stackhpc_pulp_doca_ofed_version }}_rhel9{{ stackhpc_pulp_repo_rocky_9_minor_version }}.x86_64.rpm
disable_gpg_check: true

- name: Install DOCA extra packages
ansible.builtin.dnf:
name: doca-extra

- name: Create build directory
ansible.builtin.file:
path: /home/cloud-user/ofed
state: directory
mode: 0777

- name: Set build directory
ansible.builtin.replace:
path: /opt/mellanox/doca/tools/doca-kernel-support
regexp: 'TMP_DIR=\$1'
replace: 'TMP_DIR=/home/cloud-user/ofed'

- name: Build OFED kernel modules
ansible.builtin.shell:
cmd: |
/opt/mellanox/doca/tools/doca-kernel-support
- name: Download OFED userspace packages
ansible.builtin.dnf:
name: doca-ofed-userspace
download_only: true
download_dir: /home/cloud-user/ofed
Loading

0 comments on commit d25970b

Please sign in to comment.