Skip to content

Commit

Permalink
feat: bump driver version and more flexible install method
Browse files Browse the repository at this point in the history
NVIDIA driver install needs a gcc version matching the one used to compile kernel. Had to update install method to ensure proper version of gcc is installed
  • Loading branch information
PierreBeucher committed Nov 7, 2024
1 parent 71f2fdc commit ea8a1bf
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 9 deletions.
14 changes: 9 additions & 5 deletions ansible/roles/nvidia-driver/defaults/main.yml
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
# Old variables, to be removed in V1
# Used to remove potential old NVIDIA driver installation
nvidia_driver_package_version: ''
nvidia_driver_persistence_mode_on: yes
nvidia_driver_module_file: /etc/modprobe.d/nvidia.conf
nvidia_driver_module_params: ''
nvidia_driver_branch: "535"

# Driver which were installed with Ubuntu
nvidia_driver_ubuntu_branch: "{{ nvidia_driver_branch }}"
nvidia_driver_ubuntu_packages_suffix: "-server"
# END Old variables, to be removed in V1

nvidia_driver_module_file: /etc/modprobe.d/nvidia.conf
nvidia_driver_module_params: ''

# Install version from .run file
nvidia_driver_dotrun_install_version: "535.183.01"
nvidia_driver_dotrun_install_version: "550.127.05"

nvidia_container_toolkit_version: "1.17.0-1"
50 changes: 47 additions & 3 deletions ansible/roles/nvidia-driver/tasks/driver.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,64 @@
when: nvidia_nouveau_driver_blacklist.changed
reboot:

- name: install driver required packages
# NVIDIA driver install require gcc and make
# and that gcc version matches the one used to compile kernel
# Identify gcc version used for kernel and install expected gcc version
- name: remove packages used by previous install methods
register: previous_install_method_package_remove
apt:
name: "{{ item }}"
state: present
state: absent
purge: true
loop:
- build-essential
- gcc-multilib
- dkms

- name: Reboot after previous install method package removal
when: previous_install_method_package_remove.changed
reboot:

- name: install driver required packages
apt:
name: "{{ item }}"
state: present
loop:
- make

- name: Get GCC version used to compile the kernel
command: cat /proc/version
register: kernel_version_output

- name: Extract GCC version from kernel version output
set_fact:
gcc_version: "{{ kernel_version_output.stdout | regex_search('gcc-([1-9]+)', '\\1') | first }}"

- debug:
msg: Found kernel compiled with GCC version '{{ gcc_version }}'

- name: Install matching GCC version
apt:
name: "gcc-{{ gcc_version }}"
state: present

- name: Set installed GCC version as default
alternatives:
name: gcc
link: /usr/bin/gcc
path: /usr/bin/gcc-{{ gcc_version }}

- name: Set CC as GCC
alternatives:
name: cc
link: /usr/bin/cc
path: /usr/bin/gcc

- name: Get current NVIDIA driver version (error expected and ignored if driver not installed yet)
slurp:
src: /sys/module/nvidia/version
register: nvidia_driver_version_file
ignore_errors: true # fail may not yet exist if driver not installed
ignore_errors: true # expected that version file may not yet exist if driver not installed

- name: Set NVIDIA driver version var
when: nvidia_driver_version_file is success
Expand Down
1 change: 1 addition & 0 deletions ansible/roles/nvidia-driver/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
- name: install nvidia-modprobe
apt:
name: nvidia-modprobe
update_cache: true

- name: install NVIDIA drivers
import_tasks: driver.yml
Expand Down
2 changes: 1 addition & 1 deletion ansible/roles/nvidia-driver/tasks/toolkit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

- name: Install NVIDIA Container Toolkit
ansible.builtin.apt:
name: nvidia-container-toolkit
name: nvidia-container-toolkit={{ nvidia_container_toolkit_version }}
state: present

- name: Update container runtime
Expand Down

0 comments on commit ea8a1bf

Please sign in to comment.