Skip to content

Commit

Permalink
[ci][perf][core] ray.wait & ray.get ownership check improvement (#31539)
Browse files Browse the repository at this point in the history
There was regression seen due to O(n) checking of object ownership at `ray.get` and `ray.wait` (see more in #31284) 

This PR:
1.  adds a more efficient checking that doesn't allocate `rpc::Address`. 
2. adds a microbenchmark that tests `ray.wait` with 1k reference objects in loop

TODO:
- add the microbenchmark to core dashboard.
  • Loading branch information
rickyyx authored and AmeerHajAli committed Jan 12, 2023
1 parent 0fc50ad commit e87bb6c
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 6 deletions.
8 changes: 8 additions & 0 deletions python/ray/_private/ray_perf.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,14 @@ def get_containing_object_ref():
"single client get object containing 10k refs", get_containing_object_ref
)

def wait_multiple_refs():
num_objs = 1000
not_ready = [small_value.remote() for _ in range(num_objs)]
for _ in range(num_objs):
_ready, not_ready = ray.wait(not_ready)

results += timeit("single client wait 1k refs", wait_multiple_refs)

def small_task():
ray.get(small_value.remote())

Expand Down
12 changes: 6 additions & 6 deletions src/ray/core_worker/core_worker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -877,6 +877,10 @@ CoreWorker::GetAllReferenceCounts() const {

const rpc::Address &CoreWorker::GetRpcAddress() const { return rpc_address_; }

bool CoreWorker::HasOwner(const ObjectID &object_id) const {
return reference_counter_->HasOwner(object_id);
}

rpc::Address CoreWorker::GetOwnerAddressOrDie(const ObjectID &object_id) const {
rpc::Address owner_address;
auto status = GetOwnerAddress(object_id, &owner_address);
Expand Down Expand Up @@ -1210,9 +1214,7 @@ Status CoreWorker::Get(const std::vector<ObjectID> &ids,
std::ostringstream ids_stream;

for (size_t i = 0; i < ids.size(); i++) {
rpc::Address unused_owner_address;
auto status = GetOwnerAddress(ids[i], &unused_owner_address);
if (status.IsObjectUnknownOwner()) {
if (!HasOwner(ids[i])) {
ids_stream << ids[i] << " ";
got_exception = true;
}
Expand Down Expand Up @@ -1382,9 +1384,7 @@ Status CoreWorker::Wait(const std::vector<ObjectID> &ids,
std::ostringstream ids_stream;

for (size_t i = 0; i < ids.size(); i++) {
rpc::Address unused_owner_address;
auto status = GetOwnerAddress(ids[i], &unused_owner_address);
if (status.IsObjectUnknownOwner()) {
if (!HasOwner(ids[i])) {
ids_stream << ids[i] << " ";
missing_owners += 1;
}
Expand Down
3 changes: 3 additions & 0 deletions src/ray/core_worker/core_worker.h
Original file line number Diff line number Diff line change
Expand Up @@ -1169,6 +1169,9 @@ class CoreWorker : public rpc::CoreWorkerServiceHandler {
/// Record metric for executed and owned tasks. Will be run periodically.
void RecordMetrics();

/// Check if there is an owner of the object from the ReferenceCounter.
bool HasOwner(const ObjectID &object_id) const;

/// Helper method to fill in object status reply given an object.
void PopulateObjectStatus(const ObjectID &object_id,
std::shared_ptr<RayObject> obj,
Expand Down
5 changes: 5 additions & 0 deletions src/ray/core_worker/reference_count.cc
Original file line number Diff line number Diff line change
Expand Up @@ -513,6 +513,11 @@ void ReferenceCounter::RemoveSubmittedTaskReferences(
}
}

bool ReferenceCounter::HasOwner(const ObjectID &object_id) const {
absl::MutexLock lock(&mutex_);
return object_id_refs_.find(object_id) != object_id_refs_.end();
}

bool ReferenceCounter::GetOwner(const ObjectID &object_id,
rpc::Address *owner_address) const {
absl::MutexLock lock(&mutex_);
Expand Down
9 changes: 9 additions & 0 deletions src/ray/core_worker/reference_count.h
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,9 @@ class ReferenceCounter : public ReferenceCounterInterface,

/// Get the owner address of the given object.
///
/// Use `HasOwner` instead if the caller doesn't need to use owner_address for
/// performance.
///
/// \param[in] object_id The ID of the object to look up.
/// \param[out] owner_address The address of the object owner.
/// \return false if the object is out of scope or we do not yet have
Expand All @@ -233,6 +236,12 @@ class ReferenceCounter : public ReferenceCounterInterface,
bool GetOwner(const ObjectID &object_id, rpc::Address *owner_address = nullptr) const
LOCKS_EXCLUDED(mutex_);

/// Check if the object has an owner.
///
/// \param[in] object_id The ID of the object.
/// \return if the object has an owner.
bool HasOwner(const ObjectID &object_id) const LOCKS_EXCLUDED(mutex_);

/// Get the owner addresses of the given objects. The owner address
/// must be registered for these objects.
///
Expand Down

0 comments on commit e87bb6c

Please sign in to comment.