Skip to content

Commit

Permalink
Save device properties in globals (#225)
Browse files Browse the repository at this point in the history
  • Loading branch information
Duttenheim authored Sep 3, 2024
1 parent 960a47f commit 510287e
Show file tree
Hide file tree
Showing 10 changed files with 142 additions and 101 deletions.
31 changes: 31 additions & 0 deletions code/render/coregraphics/graphicsdevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,37 @@ extern bool VariableRateShadingSupported;
extern bool MeshShadersSupported;
extern bool NvidiaCheckpointsSupported;

extern uint ReadWriteBufferAlignment;
extern uint ConstantBufferAlignment;
extern uint64 MaxConstantBufferSize;
extern uint MaxPushConstantSize;
extern uint64 SparseAddressSize;

extern uint MaxPerStageConstantBuffers;
extern uint MaxPerStageReadWriteBuffers;
extern uint MaxPerStageSampledImages;
extern uint MaxPerStageReadWriteImages;
extern uint MaxPerStageSamplers;
extern uint MaxPerStageInputAttachments;

extern uint MaxResourceTableConstantBuffers;
extern uint MaxResourceTableDynamicOffsetConstantBuffers;
extern uint MaxResourceTableReadWriteBuffers;
extern uint MaxResourceTableDynamicOffsetReadWriteBuffers;
extern uint MaxResourceTableSampledImages;
extern uint MaxResourceTableReadWriteImages;
extern uint MaxResourceTableSamplers;
extern uint MaxResourceTableInputAttachments;

extern uint MemoryRangeGranularity; // Set to the smallest amount of bytes allowed for a non-coherent memory write
extern uint TimestampPeriod;

/// Raytracing properties
extern uint AccelerationStructureScratchAlignment;
extern uint ShaderGroupAlignment;
extern uint64 ShaderGroupSize;
extern uint MaxRecursionDepth;

struct GraphicsDeviceCreateInfo
{
uint64 globalConstantBufferMemorySize;
Expand Down
30 changes: 13 additions & 17 deletions code/render/coregraphics/vk/vkbuffer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ CreateBuffer(const BufferCreateInfo& info)
VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT,
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR,
0x0,
VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR,
VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR,
VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR,
VK_BUFFER_USAGE_SHADER_BINDING_TABLE_BIT_KHR
Expand Down Expand Up @@ -147,28 +147,26 @@ CreateBuffer(const BufferCreateInfo& info)
else if (info.mode == HostCached)
pool = CoreGraphics::MemoryPool_HostCached;

uint baseAlignment = 1;
if (AllBits(info.usageFlags, CoreGraphics::AccelerationStructureScratch))
baseAlignment = CoreGraphics::GetCurrentAccelerationStructureProperties().minAccelerationStructureScratchOffsetAlignment;
else if (AllBits(info.usageFlags, CoreGraphics::AccelerationStructureInstances))

VkMemoryRequirements memoryReqs;
vkGetBufferMemoryRequirements(loadInfo.dev, runtimeInfo.buf, &memoryReqs);

uint baseAlignment = memoryReqs.alignment;
if (AllBits(info.usageFlags, CoreGraphics::AccelerationStructureInstances))
baseAlignment = 16;
else if (AllBits(info.usageFlags, CoreGraphics::ShaderTable))
baseAlignment = CoreGraphics::GetCurrentRaytracingProperties().shaderGroupBaseAlignment;
if (AllBits(info.usageFlags, CoreGraphics::ShaderTable))
baseAlignment = Math::max(baseAlignment, CoreGraphics::ShaderGroupAlignment);

if (info.sparse)
{
Ids::Id32 sparseExtension = bufferSparseExtensionAllocator.Alloc();
loadInfo.sparseExtension = sparseExtension;
BufferSparsePageTable& table = bufferSparseExtensionAllocator.Get<BufferExtension_SparsePageTable>(sparseExtension);

VkMemoryRequirements memoryReqs;
vkGetBufferMemoryRequirements(loadInfo.dev, runtimeInfo.buf, &memoryReqs);

VkPhysicalDeviceProperties devProps = GetCurrentProperties();
n_assert(memoryReqs.size < devProps.limits.sparseAddressSpaceSize);
n_assert(memoryReqs.size < CoreGraphics::SparseAddressSize);

table.memoryReqs = memoryReqs;
table.bindCounts = size / memoryReqs.alignment;
table.bindCounts = size / baseAlignment;
table.pages.Resize(table.bindCounts);

SizeT offset = 0;
Expand Down Expand Up @@ -204,13 +202,11 @@ CreateBuffer(const BufferCreateInfo& info)
// if not host-local memory, we need to flush the initial update
if (info.mode == HostCached || info.mode == DeviceAndHost)
{
VkPhysicalDeviceProperties props = Vulkan::GetCurrentProperties();

VkMappedMemoryRange range;
range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
range.pNext = nullptr;
range.offset = Math::align_down(alloc.offset, props.limits.nonCoherentAtomSize);
range.size = Math::align(alloc.size, props.limits.nonCoherentAtomSize);
range.offset = Math::align_down(alloc.offset, CoreGraphics::MemoryRangeGranularity);
range.size = Math::align(alloc.size, CoreGraphics::MemoryRangeGranularity);
range.memory = alloc.mem;
VkResult res = vkFlushMappedMemoryRanges(loadInfo.dev, 1, &range);
n_assert(res == VK_SUCCESS);
Expand Down
2 changes: 1 addition & 1 deletion code/render/coregraphics/vk/vkcommandbuffer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1178,7 +1178,7 @@ void
CmdRaysDispatch(const CmdBufferId id, const RayDispatchTable& table, int dimX, int dimY, int dimZ)
{
VkStridedDeviceAddressRegionKHR genRegion, hitRegion, missRegion, callableRegion;
uint handleSize = CoreGraphics::GetCurrentRaytracingProperties().shaderGroupHandleSize;
uint handleSize = CoreGraphics::ShaderGroupSize;

auto RegionSetup = [handleSize](VkStridedDeviceAddressRegionKHR& region, const RayDispatchTable::Entry& entry)
{
Expand Down
99 changes: 66 additions & 33 deletions code/render/coregraphics/vk/vkgraphicsdevice.cc
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,36 @@ SetupAdapter(CoreGraphics::GraphicsDeviceCreateInfo::Features features)
{
n_printf("[Graphics Device] Using '%s' as primary graphics adapter", state.deviceProps[i].properties.deviceName);
state.currentDevice = i;
CoreGraphics::ReadWriteBufferAlignment = state.deviceProps[i].properties.limits.minStorageBufferOffsetAlignment;
CoreGraphics::ConstantBufferAlignment = state.deviceProps[i].properties.limits.minUniformBufferOffsetAlignment;
CoreGraphics::MaxConstantBufferSize = state.deviceProps[i].properties.limits.maxUniformBufferRange;
CoreGraphics::SparseAddressSize = state.deviceProps[i].properties.limits.sparseAddressSpaceSize;

CoreGraphics::MaxPerStageConstantBuffers = state.deviceProps[i].properties.limits.maxPerStageDescriptorUniformBuffers;
CoreGraphics::MaxPerStageReadWriteBuffers = state.deviceProps[i].properties.limits.maxPerStageDescriptorStorageBuffers;
CoreGraphics::MaxPerStageSampledImages = state.deviceProps[i].properties.limits.maxPerStageDescriptorSampledImages;
CoreGraphics::MaxPerStageReadWriteImages = state.deviceProps[i].properties.limits.maxPerStageDescriptorStorageImages;
CoreGraphics::MaxPerStageSamplers = state.deviceProps[i].properties.limits.maxPerStageDescriptorSamplers;
CoreGraphics::MaxPerStageInputAttachments = state.deviceProps[i].properties.limits.maxPerStageDescriptorInputAttachments;

CoreGraphics::MaxPushConstantSize = state.deviceProps[i].properties.limits.maxPushConstantsSize;
CoreGraphics::MaxResourceTableConstantBuffers = state.deviceProps[i].properties.limits.maxDescriptorSetUniformBuffers;
CoreGraphics::MaxResourceTableDynamicOffsetConstantBuffers = state.deviceProps[i].properties.limits.maxDescriptorSetUniformBuffersDynamic;
CoreGraphics::MaxResourceTableReadWriteBuffers = state.deviceProps[i].properties.limits.maxDescriptorSetStorageBuffers;
CoreGraphics::MaxResourceTableDynamicOffsetReadWriteBuffers = state.deviceProps[i].properties.limits.maxDescriptorSetStorageBuffersDynamic;
CoreGraphics::MaxResourceTableSampledImages = state.deviceProps[i].properties.limits.maxDescriptorSetSampledImages;
CoreGraphics::MaxResourceTableReadWriteImages = state.deviceProps[i].properties.limits.maxDescriptorSetStorageImages;
CoreGraphics::MaxResourceTableSamplers = state.deviceProps[i].properties.limits.maxDescriptorSetSamplers;
CoreGraphics::MaxResourceTableInputAttachments = state.deviceProps[i].properties.limits.maxDescriptorSetInputAttachments;

CoreGraphics::MemoryRangeGranularity = state.deviceProps[i].properties.limits.nonCoherentAtomSize;
CoreGraphics::TimestampPeriod = state.deviceProps[i].properties.limits.timestampPeriod;

CoreGraphics::AccelerationStructureScratchAlignment = state.accelerationStructureDeviceProps[i].minAccelerationStructureScratchOffsetAlignment;
CoreGraphics::ShaderGroupAlignment = state.raytracingDeviceProps[i].shaderGroupBaseAlignment;
CoreGraphics::ShaderGroupSize = state.raytracingDeviceProps[i].shaderGroupHandleSize;
CoreGraphics::MaxRecursionDepth = state.raytracingDeviceProps[i].maxRayRecursionDepth;

break;
}
}
Expand Down Expand Up @@ -373,40 +403,13 @@ GetCurrentPhysicalDevice()
return state.physicalDevices[state.currentDevice];
}

//------------------------------------------------------------------------------
/**
*/
VkPhysicalDeviceProperties
GetCurrentProperties()
{
return state.deviceProps[state.currentDevice].properties;
}

//------------------------------------------------------------------------------
/**
*/
VkPhysicalDeviceAccelerationStructurePropertiesKHR
GetCurrentAccelerationStructureProperties()
{
return state.accelerationStructureDeviceProps[state.currentDevice];
}

//------------------------------------------------------------------------------
/**
*/
VkPhysicalDeviceRayTracingPipelinePropertiesKHR
GetCurrentRaytracingProperties()
{
return state.raytracingDeviceProps[state.currentDevice];
}

//------------------------------------------------------------------------------
/**
*/
VkPhysicalDeviceFeatures
GetCurrentFeatures()
{
return state.deviceFeatures[state.currentDevice];;
return state.deviceFeatures[state.currentDevice];
}

//------------------------------------------------------------------------------
Expand Down Expand Up @@ -715,8 +718,38 @@ bool RayTracingSupported = false;
bool DynamicVertexInputSupported = false;
bool MeshShadersSupported = false;
bool VariableRateShadingSupported = false;

bool NvidiaCheckpointsSupported = false;

uint ReadWriteBufferAlignment = UINT_MAX;
uint ConstantBufferAlignment = UINT_MAX;
uint64 MaxConstantBufferSize = UINT_MAX;
uint MaxPushConstantSize = UINT_MAX;
uint64 SparseAddressSize = UINT_MAX;

uint MaxPerStageConstantBuffers = UINT_MAX;
uint MaxPerStageReadWriteBuffers = UINT_MAX;
uint MaxPerStageSampledImages = UINT_MAX;
uint MaxPerStageReadWriteImages = UINT_MAX;
uint MaxPerStageSamplers = UINT_MAX;
uint MaxPerStageInputAttachments = UINT_MAX;

uint MaxResourceTableConstantBuffers = UINT_MAX;
uint MaxResourceTableDynamicOffsetConstantBuffers = UINT_MAX;
uint MaxResourceTableReadWriteBuffers = UINT_MAX;
uint MaxResourceTableDynamicOffsetReadWriteBuffers = UINT_MAX;
uint MaxResourceTableSampledImages = UINT_MAX;
uint MaxResourceTableReadWriteImages = UINT_MAX;
uint MaxResourceTableSamplers = UINT_MAX;
uint MaxResourceTableInputAttachments = UINT_MAX;

uint MemoryRangeGranularity = UINT_MAX;
uint TimestampPeriod = UINT_MAX;

uint AccelerationStructureScratchAlignment = UINT_MAX;
uint ShaderGroupAlignment = UINT_MAX;
uint64 ShaderGroupSize = UINT_MAX;
uint MaxRecursionDepth = UINT_MAX;

using namespace Vulkan;

#if NEBULA_GRAPHICS_DEBUG
Expand Down Expand Up @@ -1327,7 +1360,7 @@ CreateGraphicsDevice(const GraphicsDeviceCreateInfo& info)

CoreGraphics::BufferCreateInfo uploadInfo;
uploadInfo.name = "Global Upload Buffer";
uploadInfo.byteSize = Math::align(info.globalUploadMemorySize, state.deviceProps[state.currentDevice].properties.limits.nonCoherentAtomSize);
uploadInfo.byteSize = Math::align(info.globalUploadMemorySize, CoreGraphics::MemoryRangeGranularity);
uploadInfo.mode = CoreGraphics::BufferAccessMode::HostLocal;
uploadInfo.queueSupport = CoreGraphics::BufferQueueSupport::GraphicsQueueSupport | CoreGraphics::BufferQueueSupport::ComputeQueueSupport | CoreGraphics::BufferQueueSupport::TransferQueueSupport;
uploadInfo.usageFlags = CoreGraphics::BufferUsageFlag::TransferBufferSource;
Expand Down Expand Up @@ -1427,7 +1460,7 @@ DestroyGraphicsDevice()
VkDestroyDebugMessenger(state.instance, VkErrorDebugMessageHandle, nullptr);
#endif

vkDestroyDevice(state.devices[0], nullptr);
vkDestroyDevice(state.devices[state.currentDevice], nullptr);
vkDestroyInstance(state.instance, nullptr);
}

Expand Down Expand Up @@ -2184,7 +2217,7 @@ AllocateUpload(const SizeT numBytes, const SizeT alignment)
Threading::CriticalScope _0(&UploadLock);

// Calculate aligned upper bound
SizeT adjustedAlignment = Math::max(alignment, (SizeT)state.deviceProps[state.currentDevice].properties.limits.nonCoherentAtomSize);
SizeT adjustedAlignment = Math::max(alignment, (SizeT)CoreGraphics::MemoryRangeGranularity);
const SizeT alignedBytes = numBytes + adjustedAlignment - 1;
N_BUDGET_COUNTER_INCR(N_UPLOAD_MEMORY, alignedBytes);

Expand Down Expand Up @@ -2233,7 +2266,7 @@ FlushUploads(const Util::Array<Memory::RangeAllocation>& allocations)
range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
range.pNext = nullptr;
range.offset = allocations[i].offset; //uploadBuffer.interval.start;
range.size = Math::align(allocations[i].size, state.deviceProps[state.currentDevice].properties.limits.nonCoherentAtomSize);// (DeviceSize)size;
range.size = Math::align(allocations[i].size, CoreGraphics::MemoryRangeGranularity);// (DeviceSize)size;
range.memory = BufferGetVkMemory(state.uploadBuffer);
}

Expand Down
6 changes: 0 additions & 6 deletions code/render/coregraphics/vk/vkgraphicsdevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,6 @@ VkInstance GetInstance();
VkDevice GetCurrentDevice();
/// get the currently activated physical device
VkPhysicalDevice GetCurrentPhysicalDevice();
/// get the current device properties
VkPhysicalDeviceProperties GetCurrentProperties();
/// Get the current device acceleration structure properties
VkPhysicalDeviceAccelerationStructurePropertiesKHR GetCurrentAccelerationStructureProperties();
/// Get the current device raytracing properties
VkPhysicalDeviceRayTracingPipelinePropertiesKHR GetCurrentRaytracingProperties();
/// get the current device features
VkPhysicalDeviceFeatures GetCurrentFeatures();
/// get pipeline cache
Expand Down
15 changes: 6 additions & 9 deletions code/render/coregraphics/vk/vkmemory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,6 @@ AllocateMemory(const VkDevice dev, const VkBuffer& buf, MemoryPoolType type, uin
VkMemoryRequirements req;
vkGetBufferMemoryRequirements(dev, buf, &req);
req.alignment = Math::align(req.alignment, (DeviceSize)alignment);
VkPhysicalDeviceProperties props = Vulkan::GetCurrentProperties();

VkMemoryPropertyFlags flags = 0;

Expand All @@ -251,8 +250,8 @@ AllocateMemory(const VkDevice dev, const VkBuffer& buf, MemoryPoolType type, uin
case MemoryPool_DeviceAndHost:
flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
// Memory needs to be aligned to non coherent atom size for flushing
req.size = Math::align(req.size, props.limits.nonCoherentAtomSize);
req.alignment = Math::align(req.alignment, props.limits.nonCoherentAtomSize);
req.size = Math::align(req.size, CoreGraphics::MemoryRangeGranularity);
req.alignment = Math::align(req.alignment, CoreGraphics::MemoryRangeGranularity);
break;
default:
n_crash("AllocateMemory(): Only buffer pool types are allowed for buffer memory");
Expand Down Expand Up @@ -302,15 +301,14 @@ AllocateMemory(const VkDevice dev, VkMemoryRequirements reqs, VkDeviceSize alloc
void
Flush(const VkDevice dev, const Alloc& alloc, IndexT offset, SizeT size)
{
VkPhysicalDeviceProperties props = Vulkan::GetCurrentProperties();
CoreGraphics::MemoryPool& pool = CoreGraphics::Pools[alloc.poolIndex];
VkMappedMemoryRange range;
range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
range.pNext = nullptr;
range.offset = Math::align_down(alloc.offset + offset, props.limits.nonCoherentAtomSize);
range.offset = Math::align_down(alloc.offset + offset, CoreGraphics::MemoryRangeGranularity);
uint flushSize = size == NEBULA_WHOLE_BUFFER_SIZE ? alloc.size : Math::min(size, (SizeT)alloc.size);
range.size = Math::min(
(VkDeviceSize)Math::align(flushSize + (alloc.offset + offset - range.offset), props.limits.nonCoherentAtomSize),
(VkDeviceSize)Math::align(flushSize + (alloc.offset + offset - range.offset), CoreGraphics::MemoryRangeGranularity),
pool.blockSize);
range.memory = alloc.mem;
VkResult res = vkFlushMappedMemoryRanges(dev, 1, &range);
Expand All @@ -323,15 +321,14 @@ Flush(const VkDevice dev, const Alloc& alloc, IndexT offset, SizeT size)
void
Invalidate(const VkDevice dev, const CoreGraphics::Alloc& alloc, IndexT offset, SizeT size)
{
VkPhysicalDeviceProperties props = Vulkan::GetCurrentProperties();
CoreGraphics::MemoryPool& pool = CoreGraphics::Pools[alloc.poolIndex];
VkMappedMemoryRange range;
range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
range.pNext = nullptr;
range.offset = Math::align_down(alloc.offset + offset, props.limits.nonCoherentAtomSize);
range.offset = Math::align_down(alloc.offset + offset, CoreGraphics::MemoryRangeGranularity);
uint flushSize = size == NEBULA_WHOLE_BUFFER_SIZE ? alloc.size : Math::min((VkDeviceSize)size, alloc.size);
range.size = Math::min(
(VkDeviceSize)Math::align(flushSize + (alloc.offset + offset - range.offset), props.limits.nonCoherentAtomSize),
(VkDeviceSize)Math::align(flushSize + (alloc.offset + offset - range.offset), CoreGraphics::MemoryRangeGranularity),
pool.blockSize);
range.memory = alloc.mem;
VkResult res = vkInvalidateMappedMemoryRanges(dev, 1, &range);
Expand Down
Loading

0 comments on commit 510287e

Please sign in to comment.