Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Apple system metrics support #3377

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,9 @@
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import org.pytorch.serve.device.Accelerator;
import org.pytorch.serve.device.AcceleratorVendor;
import org.pytorch.serve.device.interfaces.IAcceleratorUtility;
Expand Down Expand Up @@ -75,15 +74,12 @@ public List<JsonObject> extractAccelerators(JsonElement rootObject) {
.getAsJsonObject() // Gets the outer object
.get("SPDisplaysDataType") // Gets the "SPDisplaysDataType" element
.getAsJsonArray();

JsonObject gpuObject = displaysArray.get(0).getAsJsonObject();
int number_of_cores = Integer.parseInt(gpuObject.get("sppci_cores").getAsString());

// add the object `number_of_cores` times to maintain the exsisitng
// functionality
accelerators =
IntStream.range(0, number_of_cores)
.mapToObj(i -> gpuObject)
.collect(Collectors.toList());

// Create list with only a single accelerator object as
// M1, M2, M3 Macs have only single integrated GPU
accelerators = Collections.singletonList(gpuObject);

return accelerators;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1372,8 +1372,17 @@ public void testMetricManager() throws JsonParseException, InterruptedException
Assert.assertTrue(++count < 5);
}

// 7 system-level metrics + 3 gpu-specific metrics
Assert.assertEquals(metrics.size(), 7 + 3 * configManager.getNumberOfGpu());
// Determine if the device is Apple or not
String vendor = System.getProperty("os.name");
if (vendor != null) {
if (vendor.startsWith("Mac")) {
// 7 system-level metrics + 2 gpu-specific metrics (per GPU) for Apple devices
Assert.assertEquals(metrics.size(), 7 + 2 * configManager.getNumberOfGpu());
} else {
// 7 system-level metrics + 3 gpu-specific metrics (per GPU) for non-Apple devices
Assert.assertEquals(metrics.size(), 7 + 3 * configManager.getNumberOfGpu());
}
}

for (Metric metric : metrics) {
String metricName = metric.getMetricName();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ public void testExtractAcceleratorId() {
public void testExtractAccelerators() {
List<JsonObject> accelerators = appleUtil.extractAccelerators(sampleOutputJson);

assertEquals(accelerators.size(), 7);
assertEquals(accelerators.size(), 1);
assertEquals(accelerators.get(0).get("sppci_model").getAsString(), "Apple M1");
}

Expand All @@ -88,7 +88,7 @@ public void testSmiOutputToUpdatedAccelerators() {
ArrayList<Accelerator> updatedAccelerators =
appleUtil.smiOutputToUpdatedAccelerators(sampleOutputJson.toString(), parsedGpuIds);

assertEquals(updatedAccelerators.size(), 7);
assertEquals(updatedAccelerators.size(), 1);
Accelerator accelerator = updatedAccelerators.get(0);
assertEquals(accelerator.getAcceleratorModel(), "Apple M1");
assertEquals(accelerator.getVendor(), AcceleratorVendor.APPLE);
Expand All @@ -112,7 +112,7 @@ public String[] getUtilizationSmiCommand() {
ArrayList<Accelerator> availableAccelerators =
spyAppleUtil.getAvailableAccelerators(availableAcceleratorIds);

assertEquals(availableAccelerators.size(), 7);
assertEquals(availableAccelerators.size(), 1);
Accelerator accelerator = availableAccelerators.get(0);
assertEquals(accelerator.getAcceleratorModel(), "Apple M1");
assertEquals(accelerator.getVendor(), AcceleratorVendor.APPLE);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,8 @@ public void testNumGpuM1() throws ReflectiveOperationException, IOException {
String mac_arm64_cpu_only = System.getenv().getOrDefault("TS_MAC_ARM64_CPU_ONLY", "False");
if (arch.equals("aarch64")) {
if (mac_arm64_cpu_only.equals("True")) {
Assert.assertEquals(configManager.getNumberOfGpu(), 0);
// Mac M1 returns 1 accelerator device
Assert.assertEquals(configManager.getNumberOfGpu(), 1);
} else {
Assert.assertTrue(configManager.getNumberOfGpu() > 0);
}
Expand Down
14 changes: 14 additions & 0 deletions ts/metrics/system_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,20 @@ def collect_gpu_metrics(num_of_gpus):
amdsmi.amdsmi_shut_down()
except amdsmi.AmdSmiException as e:
logging.error("Could not shut down AMD-SMI library.")
elif torch.backends.mps.is_available():
try:
total_memory = torch.mps.driver_allocated_memory()
mem_used = torch.mps.current_allocated_memory()
gpu_mem_utilization = (
(mem_used / total_memory * 100) if total_memory > 0 else 0
)
# Currently there is no way to calculate GPU utilization with MPS.
gpu_utilization = None
except Exception as e:
logging.error(f"Could not capture MPS memory metrics")
mem_used = 0
gpu_mem_utilization = 0
gpu_utilization = None

dimension_gpu = [
Dimension("Level", "Host"),
Expand Down
Loading