Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Alma Linux 8.7 Compute and Desktop packer image definition #1599

Merged
merged 20 commits into from
Jun 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .github/workflows/configs/images.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,14 @@ images:
os_type: Linux
version: 8.7

- name: azhop-desktop-almalinux-8_7
publisher: azhpc
offer: azhop-desktop
sku: almalinux-8_7
hyper_v: V2
os_type: Linux
version: 8.7

- name: azhop-compute-ubuntu-2004
publisher: azhpc
offer: azhop-compute
Expand Down
6 changes: 4 additions & 2 deletions .github/workflows/z_create_config_callable.yml
Original file line number Diff line number Diff line change
Expand Up @@ -173,10 +173,11 @@ jobs:
sed -i "s|azhpc:azhop-compute:centos-7_9:latest|$SHARED_SIG/azhop-compute-centos-7_9/latest|g" config.yml
sed -i "s|azhpc:azhop-desktop:centos-7_9:latest|$SHARED_SIG/azhop-desktop-centos-7_9/latest|g" config.yml
sed -i "s|azhpc:azhop-compute:almalinux-8_7:latest|$SHARED_SIG/azhop-compute-almalinux-8_7/latest|g" config.yml
sed -i "s|azhpc:azhop-desktop:almalinux-8_7:latest|$SHARED_SIG/azhop-desktop-almalinux-8_7/latest|g" config.yml
sed -i "s|azhpc:azhop-compute:ubuntu-18_04:latest|$SHARED_SIG/azhop-compute-ubuntu-1804/latest|g" config.yml
sed -i "s|azhpc:azhop-compute:ubuntu-20_04:latest|$SHARED_SIG/azhop-compute-ubuntu-2004/latest|g" config.yml

sed -i "s|__ALMA8_PLAN__|plan: almalinux:almalinux:8-gen2|g" config.yml
sed -i "s|__ALMA8_PLAN__|plan: almalinux:almalinux-hpc:8_7-hpc-gen2|g" config.yml
;;
private)
echo "Using private images"
Expand All @@ -185,10 +186,11 @@ jobs:
sed -i "s|azhpc:azhop-compute:centos-7_9:latest|$SIG/azhop-compute-centos-7_9/latest|g" config.yml
sed -i "s|azhpc:azhop-desktop:centos-7_9:latest|$SIG/azhop-desktop-centos-7_9/latest|g" config.yml
sed -i "s|azhpc:azhop-compute:almalinux-8_7:latest|$SIG/azhop-compute-almalinux-8_7/latest|g" config.yml
sed -i "s|azhpc:azhop-desktop:almalinux-8_7:latest|$SIG/azhop-desktop-almalinux-8_7/latest|g" config.yml
sed -i "s|azhpc:azhop-compute:ubuntu-18_04:latest|$SIG/azhop-compute-ubuntu-1804/latest|g" config.yml
sed -i "s|azhpc:azhop-compute:ubuntu-20_04:latest|$SIG/azhop-compute-ubuntu-2004/latest|g" config.yml

sed -i "s|__ALMA8_PLAN__|plan: almalinux:almalinux:8-gen2|g" config.yml
sed -i "s|__ALMA8_PLAN__|plan: almalinux:almalinux-hpc:8_7-hpc-gen2|g" config.yml
;;
esac

Expand Down
12 changes: 5 additions & 7 deletions packer/azhop-compute-almalinux-8_7.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,18 @@
"type": "azure-arm",
"use_azure_cli_auth": "{{user `var_use_azure_cli_auth`}}",
"image_publisher": "almalinux",
"image_offer": "almalinux",
"image_sku": "8-gen2",
"image_offer": "almalinux-hpc",
"image_sku": "8_7-hpc-gen2",
"image_version": "latest",
"plan_info": {
"plan_name": "8-gen2",
"plan_name": "8_7-hpc-gen2",
"plan_publisher": "almalinux",
"plan_product": "almalinux"
"plan_product": "almalinux-hpc"
},
"managed_image_resource_group_name": "{{user `var_resource_group`}}",
"managed_image_name": "{{user `var_image`}}",
"os_type": "Linux",
"vm_size": "Standard_NV12s_v3",
"vm_size": "Standard_D8s_v5",
"keep_os_disk": "{{user `var_keep_os_disk`}}",
"temp_os_disk_name": "{{user `var_image`}}",
"managed_image_storage_account_type": "Premium_LRS",
Expand All @@ -42,9 +42,7 @@
"execute_command": "chmod +x {{ .Path }}; {{ .Vars }} sudo -E sh '{{ .Path }}'",
"inline": [
"chmod +x /tmp/*.sh",
"/tmp/hpc_image.sh || exit 1",
"/tmp/linux-setup.sh || exit 1",
"/tmp/install_persistent_rdma.sh || exit 1",
"/tmp/openpbs.sh || exit 1",
"/tmp/slurm.sh || exit 1",
"/tmp/telegraf.sh || exit 1",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,19 @@
"use_azure_cli_auth": "{{user `var_use_azure_cli_auth`}}",
"image_publisher": "almalinux",
"image_offer": "almalinux-hpc",
"image_sku": "8_5-hpc-gen2",
"image_version": "8.5.20211210",
"image_sku": "8_7-hpc-gen2",
"image_version": "latest",
"plan_info": {
"plan_name": "8_5-hpc-gen2",
"plan_name": "8_7-hpc-gen2",
"plan_publisher": "almalinux",
"plan_product": "almalinux-hpc"
},
"managed_image_resource_group_name": "{{user `var_resource_group`}}",
"managed_image_name": "{{user `var_image`}}",
"os_type": "Linux",
"vm_size": "Standard_d8s_v3",
"vm_size": "Standard_NV12s_v3",
"keep_os_disk": "{{user `var_keep_os_disk`}}",
"temp_os_disk_name": "{{user `var_image`}}",
"managed_image_storage_account_type": "Premium_LRS",
"ssh_pty": "true",
"build_resource_group_name": "{{user `var_resource_group`}}",
Expand All @@ -41,11 +43,11 @@
"inline": [
"chmod +x /tmp/*.sh",
"/tmp/linux-setup.sh || exit 1",
"/tmp/install_persistent_rdma.sh || exit 1",
"/tmp/{{user `var_queue_manager`}}.sh || exit 1",
"/tmp/interactive-desktop-3d.sh || exit 1",
"/tmp/openpbs.sh || exit 1",
"/tmp/slurm.sh || exit 1",
"/tmp/telegraf.sh || exit 1",
"/tmp/lustreclient.sh 2.12.8 || exit 1",
"/tmp/zz-compute-custom.sh || exit 1",
"/tmp/zz-desktop-custom.sh || exit 1",
"echo ' This is the end '",
"yum history sync",
"rm -rf /tmp/*.sh",
Expand Down
110 changes: 110 additions & 0 deletions packer/scripts/almalinux/interactive-desktop-3d.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
#!/bin/bash

dnf install -y epel-release
dnf install -y libglvnd-devel pkgconfig
dnf install -y dkms

cat <<EOF >/etc/modprobe.d/nouveau.conf
blacklist nouveau
blacklist lbm-nouveau
EOF

echo "################### INSTALL NVIDIA GRID DRIVERS"
echo "################### UNLOAD NVIDIA MODULES"
systemctl stop nv_peer_mem.service
systemctl stop nvidia-fabricmanager
systemctl stop dcgm.service
systemctl stop nvidia-dcgm.service

rmmod gdrdrv
rmmod nvidia_drm
rmmod drm_kms_helper
lsof /dev/nvidia0
# nv_hostengine_pid=$(lsof /dev/nvidia0 | tail -n 1 | cut -d' ' -f2)
# echo "Kill process $nv_hostengine_pid"
# sudo kill -9 $nv_hostengine_pid
#lsof /dev/nvidia0
rmmod nvidia_modeset
rmmod nvidia_uvm
rmmod nvidia
rmmod drm

#init 3
lsmod

# remove previous kernel modules
sudo /sbin/dkms status
sudo /sbin/dkms status | grep nvidia | cut -d',' -f1 | xargs -I{} /sbin/dkms remove {} --all
#rm -f /lib/modules/$(uname -r)/kernel/drivers/video/*.ko

# Use the direct link which contains the clear version number
# Check which latest version to use from https://github.com/Azure/azhpc-extensions/blob/master/NvidiaGPU/resources.json
wget -O /mnt/NVIDIA-Linux-x86_64-grid.run https://download.microsoft.com/download/6/b/d/6bd2850f-5883-4e2a-9a35-edbd3dd6808c/NVIDIA-Linux-x86_64-525.105.17-grid-azure.run
chmod +x /mnt/NVIDIA-Linux-x86_64-grid.run
sudo /mnt/NVIDIA-Linux-x86_64-grid.run -s --no-check-for-alternate-installs
cat /var/log/nvidia-installer.log
set -e
/sbin/dkms install --no-depmod -m nvidia -v 525.105.17 -k $(uname -r) --force
# Answers are: yes, yes, yes
sudo cp /etc/nvidia/gridd.conf.template /etc/nvidia/gridd.conf

cat <<EOF >>/etc/nvidia/gridd.conf
IgnoreSP=FALSE
EnableUI=FALSE
EOF
sed -i '/FeatureType=0/d' /etc/nvidia/gridd.conf

echo "Test if nvidia-smi is working"
set -e
nvidia-smi
set +e

echo "################### INSTALL VirtualGL / VNC"
dnf groupinstall -y xfce
dnf remove -y xfce4-screensaver
dnf install -y https://kumisystems.dl.sourceforge.net/project/turbovnc/3.0.3/turbovnc-3.0.3.x86_64.rpm
dnf install -y python3-websockify

wget --no-check-certificate "https://virtualgl.com/pmwiki/uploads/Downloads/VirtualGL.repo" -O /etc/yum.repos.d/VirtualGL.repo

yum install -y VirtualGL turbojpeg xorg-x11-apps
set -e
/usr/bin/vglserver_config -config +s +f -t
set +e
systemctl disable firstboot-graphical
systemctl set-default graphical.target
systemctl isolate graphical.target

cat <<EOF >/etc/rc.d/rc3.d/busidupdate.sh
#!/bin/bash
nvidia-xconfig --enable-all-gpus --allow-empty-initial-configuration -c /etc/X11/xorg.conf --virtual=1920x1200 -s
# https://virtualgl.org/Documentation/HeadlessNV
sed -i '/NVIDIA/a\ Option "HardDPMS" "false"' /etc/X11/xorg.conf
EOF
chmod +x /etc/rc.d/rc3.d/busidupdate.sh
/etc/rc.d/rc3.d/busidupdate.sh

# Create a vglrun alias
cat <<EOF >/etc/profile.d/vglrun.sh
#!/bin/bash
# Set the vglrun alias to pickup a GPU device based on the noVNC port so that each session is landing on a different GPU, modulo the number of GPU devices.
ngpu=\$(lspci | grep NVIDIA | wc -l)
alias vglrun='/usr/bin/vglrun -d :0.\$(( \${port:-0} % \${ngpu:-1}))'
EOF

# browser and codecs
dnf install -y https://mirrors.rpmfusion.org/free/el/rpmfusion-free-release-8.noarch.rpm
dnf -y install firefox ffmpeg ffmpeg-devel

# Install archive manager
dnf install -y file-roller
#dnf -y install xarchiver
#rm -f /usr/libexec/thunar-archive-plugin/gnome-file-roller.tap
#ln -s /usr/libexec/thunar-archive-plugin/xarchiver.tap /usr/libexec/thunar-archive-plugin/gnome-file-roller.tap
#update-desktop-database /usr/share/applications

# increase buffer size
cat << EOF >>/etc/sysctl.conf
net.core.rmem_max=2097152
net.core.wmem_max=2097152
EOF