Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Neuron + GPU: report utilization % of accelerators regardless of execution time #840

Merged
merged 12 commits into from
Jul 19, 2024
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ scalene = "scalene.__main__:main"
[build-system]
build-backend = "setuptools.build_meta"
requires = [
"setuptools>=65.5.1",
"setuptools>=65.5.1,<71.0", # Pin to setuptools<71.0 to avoid this bug: https://github.com/pypa/setuptools/issues/4496
"setuptools_scm>=8",
"wheel",
"cython",
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,6 @@ packaging==20.9
psutil>=5.9.2
pyperf==2.0.0
rich>=10.7.0
setuptools>=65.5.1
setuptools>=65.5.1,<71.0 # Pin to setuptools<71.0 to avoid this bug: https://github.com/pypa/setuptools/issues/4496
nvidia-ml-py>=12.555.43; platform_system !='Darwin'
wheel~=0.38.1
12 changes: 6 additions & 6 deletions scalene/scalene-gui/scalene-gui.js
Original file line number Diff line number Diff line change
Expand Up @@ -904,7 +904,7 @@ function makeBar(python, native, system, params) {
}


function makeGPUPie(util) {
function makeGPUPie(util, gpu_device, params) {
return {
$schema: "https://vega.github.io/schema/vega-lite/v5.json",
config: {
Expand Down Expand Up @@ -940,7 +940,7 @@ function makeGPUPie(util) {
legend: false,
scale: { range: ["goldenrod", "#f4e6c2"] },
},
tooltip: [{ field: "c", type: "nominal", title: "GPU" }],
tooltip: [{ field: "c", type: "nominal", title: gpu_device }],
},
};
}
Expand Down Expand Up @@ -986,7 +986,7 @@ function makeGPUBar(util, gpu_device, params) {
legend: false,
scale: { range: ["goldenrod", "#f4e6c2"] },
},
tooltip: [{ field: "dd", type: "nominal", title: gpu_device }],
tooltip: [{ field: "dd", type: "nominal", title: gpu_device + ":" } ],
},
},
{
Expand All @@ -1004,7 +1004,7 @@ function makeGPUBar(util, gpu_device, params) {
},
text: { field: "d" },
color: { value: "white" },
tooltip: [{ field: "dd", type: "nominal", title: gpu_device }],
tooltip: [{ field: "dd", type: "nominal", title: gpu_device + ":" } ],
},
},
],
Expand Down Expand Up @@ -1560,8 +1560,8 @@ function makeProfileLine(
s += `<td style="width: 50; vertical-align: middle" align="right" data-sort="${line.n_gpu_percent}">`;
s += `<span style="height: 20; width: 30; vertical-align: middle" id="gpu_pie${gpu_pies.length}"></span>`;
s += "</td>";
// gpu_pies.push(makeGPUPie(line.n_gpu_percent));
gpu_pies.push(makeGPUBar(line.n_gpu_percent, prof.gpu_device, { height: 20, width: 100 }));
gpu_pies.push(makeGPUPie(line.n_gpu_percent, prof.gpu_device, { height: 20, width: 100 }));
// gpu_pies.push(makeGPUBar(line.n_gpu_percent, prof.gpu_device, { height: 20, width: 100 }));
}
if (true) {
if (line.n_gpu_peak_memory_mb < 1.0 || line.n_gpu_percent < 1.0) {
Expand Down
7 changes: 5 additions & 2 deletions scalene/scalene_accelerator.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from typing import Tuple
from abc import ABC, abstractmethod


# Base class for accelerators (GPUs, TPUs, etc.)
class ScaleneAccelerator(ABC):

@abstractmethod
def has_gpu(self) -> bool:
pass
Expand All @@ -20,4 +21,6 @@ def reinit(self) -> None:
def get_stats(self) -> Tuple[float, float]:
pass


@abstractmethod
def get_num_cores(self) -> int:
pass
7 changes: 6 additions & 1 deletion scalene/scalene_apple_gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from scalene.scalene_accelerator import ScaleneAccelerator


class ScaleneAppleGPU(ScaleneAccelerator):
"""Wrapper class for Apple integrated GPU statistics."""

Expand All @@ -22,7 +23,7 @@ def __init__(self, sampling_frequency: int = 100) -> None:

def gpu_device(self) -> str:
return "GPU"

def has_gpu(self) -> bool:
"""True iff there is a GPU"""
# Disabling Apple GPU, since it does not collect per-process statistics.
Expand All @@ -32,6 +33,10 @@ def reinit(self) -> None:
"""A NOP, here for compatibility with the nvidia wrapper."""
return

def get_num_cores(self) -> int:
# FIXME: not yet implemented
return 1

def get_stats(self) -> Tuple[float, float]:
"""Returns a tuple of (utilization%, memory in use)"""
if not self.has_gpu():
Expand Down
48 changes: 33 additions & 15 deletions scalene/scalene_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

import numpy as np


class ScaleneJSON:
@staticmethod
def memory_consumed_str(size_in_mb: float) -> str:
Expand Down Expand Up @@ -65,19 +66,25 @@
self.gpu_device = ""

def rdp(self, points, epsilon):
"""
Ramer-Douglas-Peucker algorithm implementation using NumPy
"""
Ramer-Douglas-Peucker algorithm implementation using NumPy
"""

def perpendicular_distance(point, start, end):
if np.all(start == end):
return np.linalg.norm(point - start)
return np.abs(np.cross(end - start, start - point) / np.linalg.norm(end - start))
return np.abs(
np.cross(end - start, start - point)
/ np.linalg.norm(end - start)
)

def recursive_rdp(points, start: int, end: int, epsilon: float):
dmax = 0.0
index = start
for i in range(start + 1, end):
d = perpendicular_distance(points[i], points[start], points[end])
d = perpendicular_distance(
points[i], points[start], points[end]
)
if d > dmax:
index = i
dmax = d
Expand All @@ -92,9 +99,9 @@
start = 0
end = len(points) - 1
return np.array(recursive_rdp(points, start, end, epsilon))

def compress_samples(
self, samples: List[Any], max_footprint: float
self, samples: List[Any], max_footprint: float
) -> Any:
# Try to reduce the number of samples with the
# Ramer-Douglas-Peucker algorithm, which attempts to
Expand All @@ -107,19 +114,28 @@

if True:
# FIXME: bypassing RDP for now
#return samples[:self.max_sparkline_samples]
# return samples[:self.max_sparkline_samples]

new_samples = sorted(random.sample(list(map(tuple, samples)), self.max_sparkline_samples))
new_samples = sorted(
random.sample(
list(map(tuple, samples)), self.max_sparkline_samples
)
)
return new_samples

else:
epsilon = (len(samples) / (3 * self.max_sparkline_samples)) * 2

# Use NumPy for RDP algorithm
new_samples = self.rdp(np.array(samples), epsilon)

if len(new_samples) > self.max_sparkline_samples:
new_samples = sorted(random.sample(list(map(tuple, new_samples)), self.max_sparkline_samples))
new_samples = sorted(
random.sample(
list(map(tuple, new_samples)),
self.max_sparkline_samples,
)
)

return new_samples

Expand Down Expand Up @@ -180,11 +196,13 @@
n_cpu_percent_c = 0
n_cpu_percent_python = 0

n_gpu_percent = n_gpu_samples * 100
# n_gpu_percent = n_gpu_samples * 100

if False:
if stats.total_gpu_samples:
n_gpu_percent = n_gpu_samples * 100 / stats.total_gpu_samples
if True:

Check warning

Code scanning / CodeQL

Constant in conditional expression or statement Warning

Testing a constant will always give the same result.
if stats.n_gpu_samples[fname][line_no]:
n_gpu_percent = (
n_gpu_samples * 100 / stats.n_gpu_samples[fname][line_no]
) # total_gpu_samples
else:
n_gpu_percent = 0

Expand Down Expand Up @@ -358,7 +376,7 @@
),
"files": {},
"gpu": self.gpu,
"gpu_device" : self.gpu_device,
"gpu_device": self.gpu_device,
"memory": profile_memory,
"samples": stats.memory_footprint_samples,
"stacks": stks,
Expand Down
Loading
Loading