-
Notifications
You must be signed in to change notification settings - Fork 92
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add option to track RMM allocations #842
Changes from 8 commits
0c6766e
ad4f2c0
cdde589
e63f3eb
ad66c14
b63abb7
c4fbd2a
a5c5a8b
ea75dea
9196be4
6106813
085910a
d2c3771
4da1fc9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -253,3 +253,34 @@ def test_cuda_visible_devices_uuid(loop): # noqa: F811 | |
|
||
result = client.run(lambda: os.environ["CUDA_VISIBLE_DEVICES"]) | ||
assert list(result.values())[0] == gpu_uuid | ||
|
||
|
||
def test_rmm_track_allocations(loop): # noqa: F811 | ||
rmm = pytest.importorskip("rmm") | ||
with popen(["dask-scheduler", "--port", "9369", "--no-dashboard"]): | ||
with popen( | ||
[ | ||
"dask-cuda-worker", | ||
"127.0.0.1:9369", | ||
"--host", | ||
"127.0.0.1", | ||
"--rmm-pool-size", | ||
"2 GB", | ||
"--no-dashboard", | ||
"--rmm-track-allocations", | ||
] | ||
): | ||
with Client("127.0.0.1:9369", loop=loop) as client: | ||
assert wait_workers(client, n_gpus=get_n_gpus()) | ||
|
||
memory_resource_type = client.run( | ||
rmm.mr.get_current_device_resource_type | ||
) | ||
for v in memory_resource_type.values(): | ||
assert v is rmm.mr.TrackingResourceAdaptor | ||
|
||
memory_resource_upstream_type = client.run( | ||
lambda: type(rmm.mr.get_current_device_resource().upstream_mr) | ||
) | ||
for v in memory_resource_upstream_type.values(): | ||
assert v is rmm.mr.PoolMemoryResource | ||
Comment on lines
+295
to
+323
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Given There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks! I added that. |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -52,6 +52,7 @@ def __init__( | |
managed_memory, | ||
async_alloc, | ||
log_directory, | ||
track_allocations, | ||
): | ||
if initial_pool_size is None and maximum_pool_size is not None: | ||
raise ValueError( | ||
|
@@ -65,10 +66,12 @@ def __init__( | |
self.async_alloc = async_alloc | ||
self.logging = log_directory is not None | ||
self.log_directory = log_directory | ||
self.rmm_track_allocations = track_allocations | ||
|
||
def setup(self, worker=None): | ||
import rmm | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are we safe to always import RMM here? If this function is always run as part of the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Great catch @charlesbluca , indeed I failed to realize that. Yes, we should remove it from the "main" context of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks both! I've accepted @pentschev's suggestions here. |
||
|
||
if self.async_alloc: | ||
shwina marked this conversation as resolved.
Show resolved
Hide resolved
pentschev marked this conversation as resolved.
Show resolved
Hide resolved
|
||
import rmm | ||
|
||
rmm.mr.set_current_device_resource(rmm.mr.CudaAsyncMemoryResource()) | ||
if self.logging: | ||
|
@@ -78,7 +81,6 @@ def setup(self, worker=None): | |
) | ||
) | ||
elif self.initial_pool_size is not None or self.managed_memory: | ||
pentschev marked this conversation as resolved.
Show resolved
Hide resolved
|
||
import rmm | ||
|
||
pool_allocator = False if self.initial_pool_size is None else True | ||
|
||
|
@@ -92,6 +94,9 @@ def setup(self, worker=None): | |
worker, self.logging, self.log_directory | ||
), | ||
) | ||
if self.rmm_track_allocations: | ||
shwina marked this conversation as resolved.
Show resolved
Hide resolved
|
||
mr = rmm.mr.get_current_device_resource() | ||
rmm.mr.set_current_device_resource(rmm.mr.TrackingResourceAdaptor(mr)) | ||
|
||
|
||
def unpack_bitmask(x, mask_bits=64): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you also add docstrings for the new parameter in here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done!