diff --git a/paddle/fluid/memory/allocation/allocator_facade.cc b/paddle/fluid/memory/allocation/allocator_facade.cc index 7add694a04f68..6af73d8f48958 100644 --- a/paddle/fluid/memory/allocation/allocator_facade.cc +++ b/paddle/fluid/memory/allocation/allocator_facade.cc @@ -212,10 +212,6 @@ class AllocatorFacadePrivate { platform::CustomPlace(dev_type, dev_id)); } } - if (FLAGS_use_stream_safe_cuda_allocator) { - WrapStreamSafeCustomDeviceAllocatorForDefault(); - is_stream_safe_cuda_allocator_used_ = true; - } #endif break; } @@ -576,13 +572,14 @@ class AllocatorFacadePrivate { #endif #ifdef PADDLE_WITH_CUSTOM_DEVICE - bool HasCustomDevice(const platform::CustomPlace& place, - phi::stream::stream_t stream) { + bool HasCustomDeviceAllocator(const platform::CustomPlace& place, + phi::stream::stream_t stream) { auto it = custom_device_allocators_.find(place); if (it == custom_device_allocators_.end()) { return false; } - auto& allocator_map = it->second; + const std::map>& + allocator_map = it->second; return allocator_map.find(stream) != allocator_map.end(); } @@ -590,10 +587,15 @@ class AllocatorFacadePrivate { const platform::CustomPlace& place, phi::stream::stream_t stream, bool create_if_not_found = false) { + if (stream == GetDefaultStream(place)) { + VLOG(7) << "Get Allocator by passing in a default stream"; + return GetAllocator(place, /* A non-zero num to choose allocator_ */ 1); + } + /* shared_lock_guard */ { std::shared_lock lock_guard( custom_device_allocator_mutex_); - if (LIKELY(HasCustomDevice(place, stream))) { + if (LIKELY(HasCustomDeviceAllocator(place, stream))) { return custom_device_allocators_[place][stream]; } else { PADDLE_ENFORCE_NE(create_if_not_found, @@ -627,17 +629,11 @@ class AllocatorFacadePrivate { return iter->second; } - void RecordStream(std::shared_ptr allocation, - phi::stream::stream_t stream) { - std::shared_ptr - stream_safe_custom_device_allocation = - std::dynamic_pointer_cast( - allocation); - if (stream_safe_custom_device_allocation != nullptr) { - stream_safe_custom_device_allocation->RecordStream(stream); - } else { - VLOG(6) << "RecordStream for a non-StreamSafeCustomDeviceAllocation"; - } + phi::stream::stream_t GetDefaultStream( + const platform::CustomPlace& place) const { + const std::shared_ptr& allocator = + GetDefaultStreamSafeCustomDeviceAllocator(place); + return allocator->GetDefaultStream(); } void SetDefaultStream(const platform::CustomPlace& place, @@ -662,6 +658,34 @@ class AllocatorFacadePrivate { << ") in " << place; } + void RecordStream(std::shared_ptr allocation, + phi::stream::stream_t stream) { + std::shared_ptr + stream_safe_custom_device_allocation = + std::dynamic_pointer_cast( + allocation); + if (stream_safe_custom_device_allocation != nullptr) { + stream_safe_custom_device_allocation->RecordStream(stream); + } else { + VLOG(6) << "RecordStream for a non-StreamSafeCustomDeviceAllocation"; + } + } + + phi::stream::stream_t GetStream( + const std::shared_ptr& allocation) const { + const std::shared_ptr + stream_safe_custom_device_allocation = + std::dynamic_pointer_cast( + allocation); + if (stream_safe_custom_device_allocation != nullptr) { + return stream_safe_custom_device_allocation->GetOwningStream(); + } + + VLOG(6) << "GetStream for a non-StreamSafeCustomDeviceAllocation"; + return static_cast( + platform::DeviceContextPool::Instance().Get(allocation->place())) + ->stream(); + } #endif private: @@ -1108,10 +1132,41 @@ class AllocatorFacadePrivate { allocators_[p] = std::make_shared(p); } - void InitNaiveBestFitCustomDeviceAllocator(platform::CustomPlace p, - phi::stream::stream_t stream) { + std::shared_ptr CreateCustomDeviceAllocator( + platform::CustomPlace p) { + return std::make_shared(p); + } + + void InitStreamSafeCustomDeviceAllocator(platform::CustomPlace p, + phi::stream::stream_t stream) { + PADDLE_ENFORCE_EQ( + strategy_, + AllocatorStrategy::kAutoGrowth, + platform::errors::Unimplemented( + "Only support auto-growth strategey for " + "StreamSafeCustomDeviceAllocator, " + "the allocator strategy %d is unsupported for multi-stream", + static_cast(strategy_))); + if (LIKELY(!HasCustomDeviceAllocator(p, stream))) { + VLOG(8) << "Init StreamSafeCustomDeviceAllocator for stream " << stream + << " in place " << p; + InitAutoGrowthCustomDeviceAllocator(p, stream); + WrapStreamSafeCustomDeviceAllocator(p, stream); + } + } + + void InitAutoGrowthCustomDeviceAllocator(platform::CustomPlace p, + phi::stream::stream_t stream) { + auto chunk_size = FLAGS_auto_growth_chunk_size_in_mb << 20; + VLOG(4) << "FLAGS_auto_growth_chunk_size_in_mb is " + << FLAGS_auto_growth_chunk_size_in_mb; + + auto custom_allocator = + std::make_shared(p); + auto alignment = phi::DeviceManager::GetMinChunkSize(p); custom_device_allocators_[p][stream] = - std::make_shared(p); + std::make_shared( + custom_allocator, alignment, chunk_size, allow_free_idle_chunk_); } void InitAutoGrowthCustomDeviceAllocator(platform::CustomPlace p, @@ -1146,20 +1201,6 @@ class AllocatorFacadePrivate { } } - void InitAutoGrowthCustomDeviceAllocator(platform::CustomPlace p, - phi::stream::stream_t stream) { - auto chunk_size = FLAGS_auto_growth_chunk_size_in_mb << 20; - VLOG(4) << "FLAGS_auto_growth_chunk_size_in_mb is " - << FLAGS_auto_growth_chunk_size_in_mb; - - auto custom_allocator = - std::make_shared(p); - auto alignment = phi::DeviceManager::GetMinChunkSize(p); - custom_device_allocators_[p][stream] = - std::make_shared( - custom_allocator, alignment, chunk_size, allow_free_idle_chunk_); - } - void WrapStreamSafeCustomDeviceAllocator(platform::CustomPlace p, phi::stream::stream_t stream) { std::shared_ptr& allocator = @@ -1167,18 +1208,6 @@ class AllocatorFacadePrivate { allocator = std::make_shared(allocator, p, stream); } - - void InitStreamSafeCustomDeviceAllocator(platform::CustomPlace p, - phi::stream::stream_t stream) { - VLOG(8) << "Init CustomDevice allocator for stream " << stream - << " in place " << p; - if (strategy_ == AllocatorStrategy::kAutoGrowth) { - InitAutoGrowthCustomDeviceAllocator(p, stream); - } else { - InitNaiveBestFitCustomDeviceAllocator(p, stream); - } - WrapStreamSafeCustomDeviceAllocator(p, stream); - } #endif void InitSystemAllocators() { @@ -1419,12 +1448,20 @@ AllocationPtr AllocatorFacade::Alloc(const platform::Place& place, const phi::Stream& stream) { #ifdef PADDLE_WITH_CUSTOM_DEVICE if (platform::is_custom_place(place)) { + if (!GetPrivate()->IsStreamSafeCUDAAllocatorUsed()) { + VLOG(6) << "Warning: StreamSafeCustomDeviceAllocator is not used!"; + return Alloc(place, size); + } platform::CustomPlace p(place); - phi::stream::stream_t s = - reinterpret_cast(stream.id()); - return GetPrivate() - ->GetAllocator(p, s, /* create_if_not_found = */ true) - ->Allocate(size); + if (LIKELY(size > 0 && FLAGS_use_system_allocator == false)) { + phi::stream::stream_t s = + reinterpret_cast(stream.id()); + return GetPrivate() + ->GetAllocator(p, s, /* create_if_not_found = */ true) + ->Allocate(size); + } else { + return GetPrivate()->GetAllocator(p, size)->Allocate(size); + } } #endif #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) @@ -1552,10 +1589,32 @@ void AllocatorFacade::RemoveMemoryPoolOfCUDAGraph(int64_t id) { #endif #ifdef PADDLE_WITH_CUSTOM_DEVICE +uint64_t AllocatorFacade::Release(const platform::CustomPlace& place, + phi::stream::stream_t stream) { + AllocatorFacadePrivate* m = GetPrivate(); + if (!m->IsStreamSafeCUDAAllocatorUsed()) { + VLOG(6) << "Warning: StreamSafeCustomDeviceAllocator is not used!"; + return Release(place); + } + + return m->GetAllocator(place, stream)->Release(place); +} + +void AllocatorFacade::RecordStream(std::shared_ptr allocation, + phi::stream::stream_t stream) { + GetPrivate()->RecordStream(allocation, stream); +} + const std::shared_ptr& AllocatorFacade::GetAllocator( const platform::Place& place, phi::stream::stream_t stream) { AllocatorFacadePrivate* m = GetPrivate(); + if (!m->IsStreamSafeCUDAAllocatorUsed()) { + VLOG(6) << "Warning: StreamSafeCustomDeviceAllocator is not used!"; + return GetAllocator(place); + } + + if (platform::is_custom_place(place) && FLAGS_use_system_allocator == false) { return m->GetAllocator(place, stream, /*create_if_not_found=*/true); @@ -1563,9 +1622,9 @@ const std::shared_ptr& AllocatorFacade::GetAllocator( return m->GetAllocator(place, /* A non-zero num to choose allocator_ */ 1); } -void AllocatorFacade::RecordStream(std::shared_ptr allocation, - phi::stream::stream_t stream) { - GetPrivate()->RecordStream(allocation, stream); +phi::stream::stream_t AllocatorFacade::GetStream( + const std::shared_ptr& allocation) const { + return GetPrivate()->GetStream(allocation); } void AllocatorFacade::SetDefaultStream(const platform::CustomPlace& place, diff --git a/paddle/fluid/memory/allocation/allocator_facade.h b/paddle/fluid/memory/allocation/allocator_facade.h index 0131d56c6f642..9d2c85eccf455 100644 --- a/paddle/fluid/memory/allocation/allocator_facade.h +++ b/paddle/fluid/memory/allocation/allocator_facade.h @@ -97,11 +97,14 @@ class AllocatorFacade { #endif #ifdef PADDLE_WITH_CUSTOM_DEVICE + uint64_t Release(const platform::CustomPlace& place, + phi::stream::stream_t stream); + void RecordStream(std::shared_ptr allocation, + phi::stream::stream_t stream); const std::shared_ptr& GetAllocator(const platform::Place& place, phi::stream::stream_t stream); - void RecordStream(std::shared_ptr allocation, - phi::stream::stream_t stream); - + phi::stream::stream_t GetStream( + const std::shared_ptr& allocation) const; void SetDefaultStream(const platform::CustomPlace& place, phi::stream::stream_t stream); #endif