Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add pynvml compatibility by monkey-patching #143

Merged
merged 4 commits into from
Dec 1, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
name: "Run Tests"
on: [push, pull_request, workflow_dispatch]

defaults:
run:
shell: bash

jobs:
unit-tests:
name: "Unit Tests"
Expand All @@ -22,6 +26,9 @@ jobs:
python-version: "3.10"
- os: ubuntu-latest
python-version: "3.11"
- os: ubuntu-latest
python-version: "3.11"
pynvml-version: 11.495.46
- os: windows-latest
python-version: "3.8"
- os: windows-latest
Expand All @@ -46,6 +53,9 @@ jobs:
- name: Install dependencies
run: |
pip install -e ".[test]"
if [ -n "${{ matrix.pynvml-version }}" ]; then
pip install nvidia-ml-py==${{ matrix.pynvml-version }}
fi
python -m gpustat --version

- name: Run tests
Expand Down
6 changes: 4 additions & 2 deletions gpustat/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,9 @@ def get_process_info(nv_process):
fan_speed = None # Not supported

try:
memory = N.nvmlDeviceGetMemoryInfo(handle) # in Bytes
# memory: in Bytes
# Note that this is a compat-patched API (see gpustat.nvml)
memory = N.nvmlDeviceGetMemoryInfo(handle)
except N.NVMLError as e:
log.add_exception("memory", e)
memory = None # Not supported
Expand All @@ -479,7 +481,7 @@ def get_process_info(nv_process):
try:
utilization_dec = N.nvmlDeviceGetDecoderUtilization(handle)
except N.NVMLError as e:
log.add_exception("utilization_dnc", e)
log.add_exception("utilization_dec", e)
utilization_dec = None # Not supported

try:
Expand Down
110 changes: 105 additions & 5 deletions gpustat/nvml.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
"""Imports pynvml with sanity checks and custom patches."""

import textwrap
import warnings
import functools
import os


pynvml = None
import sys
import textwrap

# If this environment variable is set, we will bypass pynvml version validation
# so that legacy pynvml (nvidia-ml-py3) can be used. This would be useful
Expand All @@ -25,15 +25,18 @@
hasattr(pynvml, 'nvmlDeviceGetComputeRunningProcesses_v2')
) and not ALLOW_LEGACY_PYNVML:
raise RuntimeError("pynvml library is outdated.")

except (ImportError, SyntaxError, RuntimeError) as e:
_pynvml = sys.modules.get('pynvml', None)

raise ImportError(textwrap.dedent(
"""\
pynvml is missing or an outdated version is installed.

We require nvidia-ml-py>=11.450.129, and nvidia-ml-py3 shall not be used.
For more details, please refer to: https://github.com/wookayin/gpustat/issues/107

Your pynvml installation: """ + repr(pynvml) +
Your pynvml installation: """ + repr(_pynvml) +
"""

-----------------------------------------------------------
Expand All @@ -48,4 +51,101 @@
""")) from e


# Monkey-patch nvml due to breaking changes in pynvml.
# See #107, #141, and test_gpustat.py for more details.

_original_nvmlGetFunctionPointer = pynvml._nvmlGetFunctionPointer
_original_nvmlDeviceGetMemoryInfo = pynvml.nvmlDeviceGetMemoryInfo


class pynvml_monkeypatch:

@staticmethod # Note: must be defined as a staticmethod to allow mocking.
def original_nvmlGetFunctionPointer(name):
return _original_nvmlGetFunctionPointer(name)

FUNCTION_FALLBACKS = {
# for pynvml._nvmlGetFunctionPointer
'nvmlDeviceGetComputeRunningProcesses_v3': 'nvmlDeviceGetComputeRunningProcesses_v2',
'nvmlDeviceGetGraphicsRunningProcesses_v3': 'nvmlDeviceGetGraphicsRunningProcesses_v2',
}

@staticmethod
@functools.wraps(pynvml._nvmlGetFunctionPointer)
def _nvmlGetFunctionPointer(name):
"""Our monkey-patched pynvml._nvmlGetFunctionPointer().

See also:
test_gpustat::NvidiaDriverMock for test scenarios. See #107.
"""
M = pynvml_monkeypatch

try:
ret = M.original_nvmlGetFunctionPointer(name)
return ret
except pynvml.NVMLError_FunctionNotFound: # type: ignore
if name in M.FUNCTION_FALLBACKS:
# Lack of ...Processes_v3 APIs happens for
# OLD drivers < 510.39.01 && pynvml >= 11.510, where
# we fallback to v2 APIs. (see #107 for more details)

ret = M.original_nvmlGetFunctionPointer(
M.FUNCTION_FALLBACKS[name]
)
# populate the cache, so this handler won't get executed again
pynvml._nvmlGetFunctionPointer_cache[name] = ret

else:
# Unknown case, cannot handle. re-raise again
raise

return ret

@staticmethod # Note: must be defined as a staticmethod to allow mocking.
def original_nvmlDeviceGetMemoryInfo(*args, **kwargs):
return _original_nvmlDeviceGetMemoryInfo(*args, **kwargs)

has_memoryinfo_v2 = None

@staticmethod
@functools.wraps(pynvml.nvmlDeviceGetMemoryInfo)
def nvmlDeviceGetMemoryInfo(handle):
"""A patched version of nvmlDeviceGetMemoryInfo.

This tries `version=N.nvmlMemory_v2` if the nvmlDeviceGetMemoryInfo_v2
function is available (for driver >= 515), or fallback to the legacy
v1 API for (driver < 515) to yield a correct result. See #141.
"""
M = pynvml_monkeypatch

if M.has_memoryinfo_v2 is not None:
try:
pynvml._nvmlGetFunctionPointer("nvmlDeviceGetMemoryInfo_v2")
M.has_memoryinfo_v2 = True
except pynvml.NVMLError_FunctionNotFound: # type: ignore
M.has_memoryinfo_v2 = False

if hasattr(pynvml, 'nvmlMemory_v2'): # pynvml >= 11.510.69
try:
memory = M.original_nvmlDeviceGetMemoryInfo(
handle, version=pynvml.nvmlMemory_v2)
except pynvml.NVMLError_FunctionNotFound: # type: ignore
# pynvml >= 11.510 but driver is old (<515.39)
memory = M.original_nvmlDeviceGetMemoryInfo(handle)
else:
if M.has_memoryinfo_v2:
warnings.warn(
"Your NVIDIA driver requires a compatible version of "
"pynvml (>= 11.510.69) installed to display the correct "
"memory usage information (See #141 for more details). "
"Please try `pip install --upgrade pynvml`.")
memory = M.original_nvmlDeviceGetMemoryInfo(handle)

return memory


setattr(pynvml, '_nvmlGetFunctionPointer', pynvml_monkeypatch._nvmlGetFunctionPointer)
setattr(pynvml, 'nvmlDeviceGetMemoryInfo', pynvml_monkeypatch.nvmlDeviceGetMemoryInfo)


__all__ = ['pynvml']
Loading