From 4223bf3c0fc8fbd436f9e6b86032a83f7a752f0c Mon Sep 17 00:00:00 2001 From: Bob Cao Date: Fri, 30 Dec 2022 13:42:21 -0800 Subject: [PATCH] [gfx] Update Device API: Splitting ResourceBinder into seperate ShaderResourceSet & RasterResources (#6954) Issue: #6832 ### Brief Summary ResourceBinder is not split into two structures, one controls binding of shared-accessible resources, the other is dedicated to control binding of rasterizer states (vertex buffers, etc.) This makes the mapping onto Vulkan DescriptorSets easier, and cleans up the implementation of resource binding all over the place. In addition, these binding states are now no longer attached to the program and can be thus pre-filled / pre-allocated to achieve lower overhead. Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .github/workflows/scripts/aot-demo.sh | 2 +- .../rhi_examples/sample_2_triangle.cpp | 16 +- taichi/codegen/spirv/spirv_codegen.cpp | 27 +- taichi/codegen/spirv/spirv_ir_builder.cpp | 6 +- taichi/codegen/spirv/spirv_ir_builder.h | 10 + taichi/rhi/cpu/cpu_device.h | 31 +- taichi/rhi/cuda/cuda_device.h | 30 +- taichi/rhi/device.h | 195 ++++-- taichi/rhi/dx/dx_device.cpp | 77 ++- taichi/rhi/dx/dx_device.h | 76 ++- taichi/rhi/impl_support.h | 7 + taichi/rhi/interop/vulkan_cpu_interop.cpp | 2 +- taichi/rhi/metal/device.cpp | 62 +- taichi/rhi/opengl/opengl_device.cpp | 108 ++- taichi/rhi/opengl/opengl_device.h | 85 ++- taichi/rhi/vulkan/vulkan_api.cpp | 22 +- taichi/rhi/vulkan/vulkan_api.h | 10 +- taichi/rhi/vulkan/vulkan_device.cpp | 630 +++++++++--------- taichi/rhi/vulkan/vulkan_device.h | 359 +++++----- taichi/rhi/vulkan/vulkan_device_creator.cpp | 4 +- taichi/runtime/gfx/runtime.cpp | 30 +- taichi/ui/backends/vulkan/renderable.cpp | 17 +- taichi/ui/backends/vulkan/renderable.h | 2 + .../backends/vulkan/renderables/circles.cpp | 3 +- .../ui/backends/vulkan/renderables/lines.cpp | 6 +- .../ui/backends/vulkan/renderables/mesh.cpp | 10 +- .../backends/vulkan/renderables/particles.cpp | 5 +- .../vulkan/renderables/scene_lines.cpp | 8 +- .../backends/vulkan/renderables/set_image.cpp | 5 +- .../backends/vulkan/renderables/triangles.cpp | 3 +- tests/python/test_ggui.py | 6 +- 31 files changed, 1014 insertions(+), 840 deletions(-) diff --git a/.github/workflows/scripts/aot-demo.sh b/.github/workflows/scripts/aot-demo.sh index 7c5071a61e362..6b20e58cff1ba 100755 --- a/.github/workflows/scripts/aot-demo.sh +++ b/.github/workflows/scripts/aot-demo.sh @@ -4,7 +4,7 @@ set -ex export TI_SKIP_VERSION_CHECK=ON export TI_CI=1 -export TAICHI_AOT_DEMO_URL=https://github.com/taichi-dev/taichi-aot-demo +export TAICHI_AOT_DEMO_URL=https://github.com/bobcao3/taichi-aot-demo export TAICHI_AOT_DEMO_BRANCH=master export TAICHI_UNITY2_URL=https://github.com/taichi-dev/taichi-unity2 diff --git a/cpp_examples/rhi_examples/sample_2_triangle.cpp b/cpp_examples/rhi_examples/sample_2_triangle.cpp index 7382abeab20aa..0769b94b3a20d 100644 --- a/cpp_examples/rhi_examples/sample_2_triangle.cpp +++ b/cpp_examples/rhi_examples/sample_2_triangle.cpp @@ -73,6 +73,12 @@ class SampleApp : public App { device->unmap(*vertex_buffer); } + // Define the raster state + { + raster_resources = device->create_raster_resources_unique(); + raster_resources->vertex_buffer(vertex_buffer->get_ptr(0), 0); + } + TI_INFO("App Init Done"); } @@ -94,10 +100,7 @@ class SampleApp : public App { // Bind our triangle pipeline cmdlist->bind_pipeline(pipeline.get()); - // Get the binder and bind our vertex buffer - auto resource_binder = pipeline->resource_binder(); - resource_binder->vertex_buffer(vertex_buffer->get_ptr(0), 0); - cmdlist->bind_resources(resource_binder); + cmdlist->bind_raster_resources(raster_resources.get()); // Render the triangle cmdlist->draw(3, 0); // End rendering @@ -110,9 +113,10 @@ class SampleApp : public App { } public: - std::unique_ptr pipeline; + std::unique_ptr pipeline{nullptr}; + std::unique_ptr raster_resources{nullptr}; - std::unique_ptr vertex_buffer; + std::unique_ptr vertex_buffer{nullptr}; }; int main() { diff --git a/taichi/codegen/spirv/spirv_codegen.cpp b/taichi/codegen/spirv/spirv_codegen.cpp index ea40d7e82dcdc..92077abdc7f87 100644 --- a/taichi/codegen/spirv/spirv_codegen.cpp +++ b/taichi/codegen/spirv/spirv_codegen.cpp @@ -91,7 +91,7 @@ class TaskCodegen : public IRVisitor { void fill_snode_to_root() { for (int root = 0; root < compiled_structs_.size(); ++root) { - for (auto [node_id, node] : compiled_structs_[root].snode_descriptors) { + for (auto &[node_id, node] : compiled_structs_[root].snode_descriptors) { snode_to_root_[node_id] = root; } } @@ -108,9 +108,6 @@ class TaskCodegen : public IRVisitor { kernel_function_ = ir_->new_function(); // void main(); ir_->debug_name(spv::OpName, kernel_function_, "main"); - compile_args_struct(); - compile_ret_struct(); - if (task_ir_->task_type == OffloadedTaskType::serial) { generate_serial_kernel(task_ir_); } else if (task_ir_->task_type == OffloadedTaskType::range_for) { @@ -1749,22 +1746,21 @@ class TaskCodegen : public IRVisitor { std::vector buffers; if (caps_->get(DeviceCapability::spirv_version) > 0x10300) { buffers = shared_array_binds_; - std::unordered_set unique_bufs; // One buffer can be bound to different bind points but has to be unique // in OpEntryPoint interface declarations. // From Spec: before SPIR-V version 1.4, duplication of these interface id // is tolerated. Starting with version 1.4, an interface id must not // appear more than once. + std::unordered_set entry_point_values; for (const auto &bb : task_attribs_.buffer_binds) { - if (unique_bufs.count(bb.buffer) == 0) { - for (auto &it : buffer_value_map_) { - if (it.first.first == bb.buffer) { - buffers.push_back(it.second); - } + for (auto &it : buffer_value_map_) { + if (it.first.first == bb.buffer) { + entry_point_values.insert(it.second); } - unique_bufs.insert(bb.buffer); } } + buffers.insert(buffers.end(), entry_point_values.begin(), + entry_point_values.end()); } ir_->commit_kernel_function(kernel_function_, "main", buffers, group_size); // kernel entry @@ -2248,12 +2244,16 @@ class TaskCodegen : public IRVisitor { } if (buffer.type == BufferType::Args) { + compile_args_struct(); + buffer_binding_map_[key] = 0; buffer_value_map_[key] = args_buffer_value_; return args_buffer_value_; } if (buffer.type == BufferType::Rets) { + compile_ret_struct(); + buffer_binding_map_[key] = 1; buffer_value_map_[key] = ret_buffer_value_; return ret_buffer_value_; @@ -2537,7 +2537,7 @@ void KernelCodegen::run(TaichiKernelAttributes &kernel_attribs, size_t last_size; bool success = true; - do { + { last_size = optimized_spv.size(); bool result = false; TI_ERROR_IF( @@ -2546,9 +2546,8 @@ void KernelCodegen::run(TaichiKernelAttributes &kernel_attribs, "SPIRV optimization failed"); if (result) { success = false; - break; } - } while (last_size != optimized_spv.size()); + } TI_TRACE("SPIRV-Tools-opt: binary size, before={}, after={}", task_res.spirv_code.size(), optimized_spv.size()); diff --git a/taichi/codegen/spirv/spirv_ir_builder.cpp b/taichi/codegen/spirv/spirv_ir_builder.cpp index ae14731455e6b..7d12da0581c69 100644 --- a/taichi/codegen/spirv/spirv_ir_builder.cpp +++ b/taichi/codegen/spirv/spirv_ir_builder.cpp @@ -835,11 +835,7 @@ Value IRBuilder::fetch_texel(Value texture_var, // OpImageFetch requires operand with OpImageType // We have to extract the underlying OpImage from OpSampledImage here SType image_type = get_underlying_image_type(f32_type(), args.size()); - Value image_val = new_value(image_type, ValueKind::kNormal); - - ib_.begin(spv::OpImage) - .add_seq(image_type, image_val, sampled_image) - .commit(&function_); + Value image_val = make_value(spv::OpImage, image_type, sampled_image); Value uv; if (args.size() == 1) { diff --git a/taichi/codegen/spirv/spirv_ir_builder.h b/taichi/codegen/spirv/spirv_ir_builder.h index a7211b059a534..a981f819953bf 100644 --- a/taichi/codegen/spirv/spirv_ir_builder.h +++ b/taichi/codegen/spirv/spirv_ir_builder.h @@ -86,6 +86,16 @@ struct Value { SType stype; // Additional flags about the value ValueKind flag{ValueKind::kNormal}; + + bool operator==(const Value &rhs) const { + return id == rhs.id; + } +}; + +struct ValueHasher { + size_t operator()(const spirv::Value &v) const { + return std::hash()(v.id); + } }; // Represent the SPIRV Label diff --git a/taichi/rhi/cpu/cpu_device.h b/taichi/rhi/cpu/cpu_device.h index 840e58b47a3a4..1f44f603bd4a8 100644 --- a/taichi/rhi/cpu/cpu_device.h +++ b/taichi/rhi/cpu/cpu_device.h @@ -11,33 +11,10 @@ namespace taichi::lang { namespace cpu { -class CpuResourceBinder : public ResourceBinder { - public: - ~CpuResourceBinder() override { - } - - void rw_buffer(uint32_t set, - uint32_t binding, - DevicePtr ptr, - size_t size) override{TI_NOT_IMPLEMENTED}; - void rw_buffer(uint32_t set, - uint32_t binding, - DeviceAllocation alloc) override{TI_NOT_IMPLEMENTED}; - - void buffer(uint32_t set, - uint32_t binding, - DevicePtr ptr, - size_t size) override{TI_NOT_IMPLEMENTED}; - void buffer(uint32_t set, uint32_t binding, DeviceAllocation alloc) override{ - TI_NOT_IMPLEMENTED}; -}; - class CpuPipeline : public Pipeline { public: ~CpuPipeline() override { } - - ResourceBinder *resource_binder() override{TI_NOT_IMPLEMENTED}; }; class CpuCommandList : public CommandList { @@ -46,7 +23,11 @@ class CpuCommandList : public CommandList { } void bind_pipeline(Pipeline *p) override{TI_NOT_IMPLEMENTED}; - void bind_resources(ResourceBinder *binder) override{TI_NOT_IMPLEMENTED}; + RhiResult bind_shader_resources(ShaderResourceSet *res, + int set_index = 0) override{ + TI_NOT_IMPLEMENTED}; + RhiResult bind_raster_resources(RasterResources *res) override{ + TI_NOT_IMPLEMENTED}; void buffer_barrier(DevicePtr ptr, size_t size) override{TI_NOT_IMPLEMENTED}; void buffer_barrier(DeviceAllocation alloc) override{TI_NOT_IMPLEMENTED}; void memory_barrier() override{TI_NOT_IMPLEMENTED}; @@ -91,6 +72,8 @@ class CpuDevice : public LlvmDevice { const LlvmRuntimeAllocParams ¶ms) override; void dealloc_memory(DeviceAllocation handle) override; + ShaderResourceSet *create_resource_set() override{TI_NOT_IMPLEMENTED}; + std::unique_ptr create_pipeline( const PipelineSourceDesc &src, std::string name = "Pipeline") override{TI_NOT_IMPLEMENTED}; diff --git a/taichi/rhi/cuda/cuda_device.h b/taichi/rhi/cuda/cuda_device.h index f230594e6a875..43636b1feec94 100644 --- a/taichi/rhi/cuda/cuda_device.h +++ b/taichi/rhi/cuda/cuda_device.h @@ -11,33 +11,10 @@ namespace taichi::lang { namespace cuda { -class CudaResourceBinder : public ResourceBinder { - public: - ~CudaResourceBinder() override { - } - - void rw_buffer(uint32_t set, - uint32_t binding, - DevicePtr ptr, - size_t size) override{TI_NOT_IMPLEMENTED}; - void rw_buffer(uint32_t set, - uint32_t binding, - DeviceAllocation alloc) override{TI_NOT_IMPLEMENTED}; - - void buffer(uint32_t set, - uint32_t binding, - DevicePtr ptr, - size_t size) override{TI_NOT_IMPLEMENTED}; - void buffer(uint32_t set, uint32_t binding, DeviceAllocation alloc) override{ - TI_NOT_IMPLEMENTED}; -}; - class CudaPipeline : public Pipeline { public: ~CudaPipeline() override { } - - ResourceBinder *resource_binder() override{TI_NOT_IMPLEMENTED}; }; class CudaCommandList : public CommandList { @@ -46,7 +23,10 @@ class CudaCommandList : public CommandList { } void bind_pipeline(Pipeline *p) override{TI_NOT_IMPLEMENTED}; - void bind_resources(ResourceBinder *binder) override{TI_NOT_IMPLEMENTED}; + RhiResult bind_shader_resources(ShaderResourceSet *res, + int set_index = 0) final{TI_NOT_IMPLEMENTED}; + RhiResult bind_raster_resources(RasterResources *res) final{ + TI_NOT_IMPLEMENTED}; void buffer_barrier(DevicePtr ptr, size_t size) override{TI_NOT_IMPLEMENTED}; void buffer_barrier(DeviceAllocation alloc) override{TI_NOT_IMPLEMENTED}; void memory_barrier() override{TI_NOT_IMPLEMENTED}; @@ -104,6 +84,8 @@ class CudaDevice : public LlvmDevice { const LlvmRuntimeAllocParams ¶ms) override; void dealloc_memory(DeviceAllocation handle) override; + ShaderResourceSet *create_resource_set() final{TI_NOT_IMPLEMENTED}; + std::unique_ptr create_pipeline( const PipelineSourceDesc &src, std::string name = "Pipeline") override{TI_NOT_IMPLEMENTED}; diff --git a/taichi/rhi/device.h b/taichi/rhi/device.h index 299a01510925f..a2b59ba970d85 100644 --- a/taichi/rhi/device.h +++ b/taichi/rhi/device.h @@ -51,7 +51,6 @@ enum class BlendFactor : uint32_t { class Device; struct DeviceAllocation; struct DevicePtr; -struct LLVMRuntime; // TODO: Figure out how to support images. Temporary solutions is to have all // opque types such as images work as an allocation @@ -100,52 +99,93 @@ constexpr DevicePtr kDeviceNullPtr{}; // TODO: fill this with the required options struct ImageSamplerConfig {}; -class ResourceBinder { +// A set of shader resources (that is bound at once) +class TI_DLL_EXPORT ShaderResourceSet { public: - virtual ~ResourceBinder() { - } - - // In Vulkan this is called Storage Buffer (shader can store) - virtual void rw_buffer(uint32_t set, - uint32_t binding, - DevicePtr ptr, - size_t size) = 0; - virtual void rw_buffer(uint32_t set, - uint32_t binding, - DeviceAllocation alloc) = 0; - - // In Vulkan this is called Uniform Buffer (shader can only load) - virtual void buffer(uint32_t set, - uint32_t binding, - DevicePtr ptr, - size_t size) = 0; - virtual void buffer(uint32_t set, - uint32_t binding, - DeviceAllocation alloc) = 0; - - virtual void image(uint32_t set, - uint32_t binding, - DeviceAllocation alloc, - ImageSamplerConfig sampler_config) { - TI_NOT_IMPLEMENTED + virtual ~ShaderResourceSet() = default; + + /** + * Bind a RW subregion of a buffer resource (StorgeBuffer / SSBO) + * @params[in] binding The binding index of the resource + * @params[in] ptr The Device Pointer that is going to be bound + * @params[in] size The size of the bound region of the buffer + */ + virtual ShaderResourceSet &rw_buffer(uint32_t binding, + DevicePtr ptr, + size_t size) = 0; + + /** + * Bind an entire RW buffer resource (StorgeBuffer / SSBO) + * @params[in] binding The binding index of the resource + * @params[in] alloc The Device Allocation that is going to be bound + */ + virtual ShaderResourceSet &rw_buffer(uint32_t binding, + DeviceAllocation alloc) = 0; + + /** + * Bind a read-only subregion of a buffer resource (Constants / UBO) + * @params[in] binding The binding index of the resource + * @params[in] ptr The Device Pointer that is going to be bound + * @params[in] size The size of the bound region of the buffer + */ + virtual ShaderResourceSet &buffer(uint32_t binding, + DevicePtr ptr, + size_t size) = 0; + + /** + * Bind an entire read-only buffer resource (Constants / UBO) + * @params[in] binding The binding index of the resource + * @params[in] alloc The Device Allocation that is going to be bound + */ + virtual ShaderResourceSet &buffer(uint32_t binding, + DeviceAllocation alloc) = 0; + + /** + * Bind a read-only image resource (SRV / Texture) + * @params[in] binding The binding index of the resource + * @params[in] alloc The Device Allocation that is going to be bound + * @params[in] sampler_config The texture sampling configuration + */ + virtual ShaderResourceSet &image(uint32_t binding, + DeviceAllocation alloc, + ImageSamplerConfig sampler_config) { + TI_NOT_IMPLEMENTED; } - virtual void rw_image(uint32_t set, - uint32_t binding, - DeviceAllocation alloc, - int lod) { + /** + * Bind a RW image resource (UAV / Storage Image) + * @params binding The binding index of the resource + * @params alloc The Device Allocation that is going to be bound + */ + virtual ShaderResourceSet &rw_image(uint32_t binding, + DeviceAllocation alloc, + int lod) { TI_NOT_IMPLEMENTED } +}; - // Set vertex buffer (not implemented in compute only device) - virtual void vertex_buffer(DevicePtr ptr, uint32_t binding = 0) { +// A set of states / resources for rasterization +class TI_DLL_EXPORT RasterResources { + public: + virtual ~RasterResources() = default; + + /** + * Set a vertex buffer for the rasterization + * @params ptr The Device Pointer to the vertices data + * @params binding The binding index of the vertex buffer + */ + virtual RasterResources &vertex_buffer(DevicePtr ptr, uint32_t binding = 0) { TI_NOT_IMPLEMENTED } - // Set index buffer (not implemented in compute only device) - // index_width = 4 -> uint32 index - // index_width = 2 -> uint16 index - virtual void index_buffer(DevicePtr ptr, size_t index_width) { + /** + * Set an index buffer for the rasterization + * @params ptr The Device Pointer to the vertices data + * @params index_width The index data width (in bits). + * index_width = 32 -> uint32 index + * index_width = 16 -> uint16 index + */ + virtual RasterResources &index_buffer(DevicePtr ptr, size_t index_width) { TI_NOT_IMPLEMENTED } }; @@ -187,12 +227,10 @@ enum class BufferFormat : uint32_t { #undef PER_BUFFER_FORMAT }; -class Pipeline { +class TI_DLL_EXPORT Pipeline { public: virtual ~Pipeline() { } - - virtual ResourceBinder *resource_binder() = 0; }; enum class ImageDimension { @@ -232,13 +270,48 @@ struct ImageCopyParams { uint32_t depth{1}; }; -class CommandList { +class TI_DLL_EXPORT CommandList { public: virtual ~CommandList() { } + /** + * Bind a pipeline to the command list. + * Doing so resets all bound resources. + * @params[in] pipeline The pipeline to be bound + */ virtual void bind_pipeline(Pipeline *p) = 0; - virtual void bind_resources(ResourceBinder *binder) = 0; + + /** + * Bind a ShaderResourceSet to a set index. + * - If the set index is already bound, the previous binding will be + * overwritten. + * - A set index can only be bound with a single ShaderResourceSet. + * - If the input set is empty, this command is a no-op. + * @params[in] res The ShaderResourceSet to be bound. + * @params[in] set_index The index the resources will be bound to. + * @return The binding result code + * `success` If the binding succeded + * `invalid_usage` If `res` is incompatible with current pipeline + * `not_supported` If some bindings are not supported by the backend + * `out_of_memory` If binding failed due to OOM conditions + * `error` If binding failed due to other reasons + */ + virtual RhiResult bind_shader_resources(ShaderResourceSet *res, + int set_index = 0) = 0; + + /** + * Bind RasterResources to the command list. + * - If the input resource is empty, this command is a no-op. + * @params res The RasterResources to be bound. + * @return The binding result code + * `success` If the binding succeded + * `invalid_usage` If `res` is incompatible with current pipeline + * `not_supported` If some bindings are not supported by the backend + * `error` If binding failed due to other reasons + */ + virtual RhiResult bind_raster_resources(RasterResources *res) = 0; + virtual void buffer_barrier(DevicePtr ptr, size_t size) = 0; virtual void buffer_barrier(DeviceAllocation alloc) = 0; virtual void memory_barrier() = 0; @@ -348,7 +421,7 @@ enum class AllocUsage : int { MAKE_ENUM_FLAGS(AllocUsage) -class StreamSemaphoreObject { +class TI_DLL_EXPORT StreamSemaphoreObject { public: virtual ~StreamSemaphoreObject() { } @@ -356,7 +429,7 @@ class StreamSemaphoreObject { using StreamSemaphore = std::shared_ptr; -class Stream { +class TI_DLL_EXPORT Stream { public: virtual ~Stream() { } @@ -376,7 +449,7 @@ class Stream { } }; -class Device { +class TI_DLL_EXPORT Device { DeviceCapabilityConfig caps_{}; public: @@ -423,6 +496,20 @@ class Device { // Wait for all tasks to complete (task from all streams) virtual void wait_idle() = 0; + /** + * Create a new shader resource set + * @return The new shader resource set pointer + */ + virtual ShaderResourceSet *create_resource_set() = 0; + + /** + * Create a new shader resource set (wrapped in unique ptr) + * @return The new shader resource set unique pointer + */ + inline std::unique_ptr create_resource_set_unique() { + return std::unique_ptr(this->create_resource_set()); + } + /** * Map a range within a DeviceAllocation memory into host address space. * @@ -501,7 +588,7 @@ class Device { } }; -class Surface { +class TI_DLL_EXPORT Surface { public: virtual ~Surface() { } @@ -605,6 +692,20 @@ class TI_DLL_EXPORT GraphicsDevice : public Device { virtual Stream *get_graphics_stream() = 0; + /** + * Create a new raster resources set + * @return The new RasterResources pointer + */ + virtual RasterResources *create_raster_resources() = 0; + + /** + * Create a new raster resources set (wrapped in unique ptr) + * @return The new RasterResources unique pointer + */ + inline std::unique_ptr create_raster_resources_unique() { + return std::unique_ptr(this->create_raster_resources()); + } + virtual std::unique_ptr create_surface( const SurfaceConfig &config) = 0; // You are not expected to call this directly. If you want to use this image diff --git a/taichi/rhi/dx/dx_device.cpp b/taichi/rhi/dx/dx_device.cpp index c942adf95640f..47cbebf26eed2 100644 --- a/taichi/rhi/dx/dx_device.cpp +++ b/taichi/rhi/dx/dx_device.cpp @@ -25,59 +25,51 @@ void check_dx_error(HRESULT hr, const char *msg) { } } -void Dx11ResourceBinder::rw_buffer(uint32_t set, - uint32_t binding, - DevicePtr ptr, - size_t size) { +ShaderResourceSet &Dx11ResourceSet::rw_buffer(uint32_t binding, + DevicePtr ptr, + size_t size) { TI_NOT_IMPLEMENTED; + return *this; } -void Dx11ResourceBinder::rw_buffer(uint32_t set, - uint32_t binding, - DeviceAllocation alloc) { +ShaderResourceSet &Dx11ResourceSet::rw_buffer(uint32_t binding, + DeviceAllocation alloc) { uav_binding_to_alloc_id_[binding] = alloc.alloc_id; + return *this; } -void Dx11ResourceBinder::buffer(uint32_t set, - uint32_t binding, - DevicePtr ptr, - size_t size) { +ShaderResourceSet &Dx11ResourceSet::buffer(uint32_t binding, + DevicePtr ptr, + size_t size) { TI_NOT_IMPLEMENTED; + return *this; } -void Dx11ResourceBinder::buffer(uint32_t set, - uint32_t binding, - DeviceAllocation alloc) { +ShaderResourceSet &Dx11ResourceSet::buffer(uint32_t binding, + DeviceAllocation alloc) { // args_t now use constant buffers. // Example: // cbuffer args_t : register(b0) // { ... } cb_binding_to_alloc_id_[binding] = alloc.alloc_id; + return *this; } -void Dx11ResourceBinder::image(uint32_t set, - uint32_t binding, - DeviceAllocation alloc, - ImageSamplerConfig sampler_config) { +ShaderResourceSet &Dx11ResourceSet::image(uint32_t binding, + DeviceAllocation alloc, + ImageSamplerConfig sampler_config) { TI_NOT_IMPLEMENTED; + return *this; } -void Dx11ResourceBinder::rw_image(uint32_t set, - uint32_t binding, - DeviceAllocation alloc, - int lod) { +ShaderResourceSet &Dx11ResourceSet::rw_image(uint32_t binding, + DeviceAllocation alloc, + int lod) { TI_NOT_IMPLEMENTED; + return *this; } -void Dx11ResourceBinder::vertex_buffer(DevicePtr ptr, uint32_t binding) { - TI_NOT_IMPLEMENTED; -} - -void Dx11ResourceBinder::index_buffer(DevicePtr ptr, size_t index_width) { - TI_NOT_IMPLEMENTED; -} - -Dx11ResourceBinder::~Dx11ResourceBinder() { +Dx11ResourceSet::~Dx11ResourceSet() { } Dx11CommandList::Dx11CommandList(Dx11Device *ti_device) : device_(ti_device) { @@ -102,11 +94,16 @@ void Dx11CommandList::bind_pipeline(Pipeline *p) { d3d11_deferred_context_->CSSetShader(pipeline->get_program(), nullptr, 0); } -void Dx11CommandList::bind_resources(ResourceBinder *binder_) { - Dx11ResourceBinder *binder = static_cast(binder_); +RhiResult Dx11CommandList::bind_shader_resources(ShaderResourceSet *res, + int set_index) { + Dx11ResourceSet *set = static_cast(res); + if (set_index > 0) { + // TODO: Add remapping? + return RhiResult::not_supported; + } // UAV - for (auto &[binding, alloc_id] : binder->uav_binding_to_alloc_id()) { + for (auto &[binding, alloc_id] : set->uav_binding_to_alloc_id()) { ID3D11UnorderedAccessView *uav = device_->alloc_id_to_uav(d3d11_deferred_context_, alloc_id); d3d11_deferred_context_->CSSetUnorderedAccessViews(binding, 1, &uav, @@ -114,7 +111,7 @@ void Dx11CommandList::bind_resources(ResourceBinder *binder_) { } // CBV - for (auto &[binding, alloc_id] : binder->cb_binding_to_alloc_id()) { + for (auto &[binding, alloc_id] : set->cb_binding_to_alloc_id()) { auto cb_buffer = device_->alloc_id_to_cb_buffer(d3d11_deferred_context_, alloc_id); @@ -122,6 +119,12 @@ void Dx11CommandList::bind_resources(ResourceBinder *binder_) { cb_slot_watermark_ = std::max(cb_slot_watermark_, int(binding)); } + + return RhiResult::success; +} + +RhiResult Dx11CommandList::bind_raster_resources(RasterResources *res) { + TI_NOT_IMPLEMENTED; } void Dx11CommandList::buffer_barrier(DevicePtr ptr, size_t size) { @@ -946,10 +949,6 @@ Dx11Pipeline::Dx11Pipeline(const PipelineSourceDesc &desc, Dx11Pipeline::~Dx11Pipeline() { } -ResourceBinder *Dx11Pipeline::resource_binder() { - return &binder_; -} - } // namespace directx11 } // namespace taichi::lang diff --git a/taichi/rhi/dx/dx_device.h b/taichi/rhi/dx/dx_device.h index 0f20f95a3427b..4779d3147a700 100644 --- a/taichi/rhi/dx/dx_device.h +++ b/taichi/rhi/dx/dx_device.h @@ -19,37 +19,23 @@ constexpr bool kD3d11ForceRef = false; void check_dx_error(HRESULT hr, const char *msg); -class Dx11ResourceBinder : public ResourceBinder { +class Dx11ResourceSet : public ShaderResourceSet { public: - ~Dx11ResourceBinder() override; - void rw_buffer(uint32_t set, - uint32_t binding, - DevicePtr ptr, - size_t size) override; - void rw_buffer(uint32_t set, - uint32_t binding, - DeviceAllocation alloc) override; - void buffer(uint32_t set, - uint32_t binding, - DevicePtr ptr, - size_t size) override; - void buffer(uint32_t set, uint32_t binding, DeviceAllocation alloc) override; - void image(uint32_t set, - uint32_t binding, - DeviceAllocation alloc, - ImageSamplerConfig sampler_config) override; - void rw_image(uint32_t set, - uint32_t binding, - DeviceAllocation alloc, - int lod) override; - - // Set vertex buffer (not implemented in compute only device) - void vertex_buffer(DevicePtr ptr, uint32_t binding = 0) override; - - // Set index buffer (not implemented in compute only device) - // index_width = 4 -> uint32 index - // index_width = 2 -> uint16 index - void index_buffer(DevicePtr ptr, size_t index_width) override; + Dx11ResourceSet() = default; + ~Dx11ResourceSet() override; + + ShaderResourceSet &rw_buffer(uint32_t binding, + DevicePtr ptr, + size_t size) final; + ShaderResourceSet &rw_buffer(uint32_t binding, DeviceAllocation alloc) final; + ShaderResourceSet &buffer(uint32_t binding, DevicePtr ptr, size_t size) final; + ShaderResourceSet &buffer(uint32_t binding, DeviceAllocation alloc) final; + ShaderResourceSet &image(uint32_t binding, + DeviceAllocation alloc, + ImageSamplerConfig sampler_config) final; + ShaderResourceSet &rw_image(uint32_t binding, + DeviceAllocation alloc, + int lod) final; const std::unordered_map &uav_binding_to_alloc_id() { return uav_binding_to_alloc_id_; @@ -64,6 +50,20 @@ class Dx11ResourceBinder : public ResourceBinder { std::unordered_map cb_binding_to_alloc_id_; }; +class Dx11RasterResources : public RasterResources { + ~Dx11RasterResources() override = default; + + RasterResources &vertex_buffer(DevicePtr ptr, uint32_t binding = 0) final { + TI_NOT_IMPLEMENTED; + return *this; + } + + RasterResources &index_buffer(DevicePtr ptr, size_t index_width) final { + TI_NOT_IMPLEMENTED; + return *this; + } +}; + class Dx11Device; class Dx11Pipeline : public Pipeline { @@ -72,7 +72,7 @@ class Dx11Pipeline : public Pipeline { const std::string &name, Dx11Device *device); ~Dx11Pipeline() override; - ResourceBinder *resource_binder() override; + ID3D11ComputeShader *get_program() { return compute_shader_; } @@ -86,7 +86,6 @@ class Dx11Pipeline : public Pipeline { Dx11Device *device_{nullptr}; ID3D11ComputeShader *compute_shader_{nullptr}; - Dx11ResourceBinder binder_; std::string name_; }; @@ -114,7 +113,9 @@ class Dx11CommandList : public CommandList { ~Dx11CommandList() override; void bind_pipeline(Pipeline *p) override; - void bind_resources(ResourceBinder *binder) override; + RhiResult bind_shader_resources(ShaderResourceSet *res, + int set_index = 0) final; + RhiResult bind_raster_resources(RasterResources *res) final; void buffer_barrier(DevicePtr ptr, size_t size) override; void buffer_barrier(DeviceAllocation alloc) override; void memory_barrier() override; @@ -174,6 +175,15 @@ class Dx11Device : public GraphicsDevice { DeviceAllocation allocate_memory(const AllocParams ¶ms) override; void dealloc_memory(DeviceAllocation handle) override; + + ShaderResourceSet *create_resource_set() final { + return new Dx11ResourceSet; + } + + RasterResources *create_raster_resources() final { + return new Dx11RasterResources; + } + std::unique_ptr create_pipeline( const PipelineSourceDesc &src, std::string name = "Pipeline") override; diff --git a/taichi/rhi/impl_support.h b/taichi/rhi/impl_support.h index 38d66ef824c07..4e6729d994fd8 100644 --- a/taichi/rhi/impl_support.h +++ b/taichi/rhi/impl_support.h @@ -148,5 +148,12 @@ class SyncedPtrStableObjectList { std::vector free_nodes_; }; +// A helper to combine hash +template +inline void hash_combine(std::size_t &seed, const T &v) { + std::hash hasher; + seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); +} + } // namespace rhi_impl } // namespace taichi::lang diff --git a/taichi/rhi/interop/vulkan_cpu_interop.cpp b/taichi/rhi/interop/vulkan_cpu_interop.cpp index e8e4656df5bed..2c33af0a95f4d 100644 --- a/taichi/rhi/interop/vulkan_cpu_interop.cpp +++ b/taichi/rhi/interop/vulkan_cpu_interop.cpp @@ -44,7 +44,7 @@ void memcpy_cpu_to_vulkan_via_staging(DevicePtr dst, CpuDevice::AllocInfo src_alloc_info = cpu_dev->get_alloc_info(src_alloc); void *dst_ptr{nullptr}; - TI_ASSERT(vk_dev->map_range(dst, size, &dst_ptr) == RhiResult::success); + TI_ASSERT(vk_dev->map_range(staging, size, &dst_ptr) == RhiResult::success); void *src_ptr = (uint8_t *)src_alloc_info.ptr + src.offset; memcpy(dst_ptr, src_ptr, size); diff --git a/taichi/rhi/metal/device.cpp b/taichi/rhi/metal/device.cpp index 7cdd6b5e215c5..77f406f03e8ac 100644 --- a/taichi/rhi/metal/device.cpp +++ b/taichi/rhi/metal/device.cpp @@ -11,7 +11,7 @@ namespace metal { #ifdef TI_PLATFORM_OSX namespace { -class ResourceBinderImpl : public ResourceBinder { +class ShaderResourceSetImpl : public ShaderResourceSet { public: struct Binding { DeviceAllocationId alloc_id{0}; @@ -22,31 +22,32 @@ class ResourceBinderImpl : public ResourceBinder { }; using BindingMap = std::unordered_map; - explicit ResourceBinderImpl(const Device *dev) : dev_(dev) { + explicit ShaderResourceSetImpl(const Device *dev) : dev_(dev) { } // RW buffers - void rw_buffer(uint32_t set, - uint32_t binding, - DevicePtr ptr, - size_t size) override { - bind_buffer(set, binding, ptr, ptr.offset, /*is_constant=*/false); + ShaderResourceSet &rw_buffer(uint32_t binding, + DevicePtr ptr, + size_t size) override { + bind_buffer(binding, ptr, ptr.offset, /*is_constant=*/false); + return *this; } - void rw_buffer(uint32_t set, - uint32_t binding, - DeviceAllocation alloc) override { - bind_buffer(set, binding, alloc, /*offset=*/0, /*is_constant=*/false); + ShaderResourceSet &rw_buffer(uint32_t binding, + DeviceAllocation alloc) override { + bind_buffer(binding, alloc, /*offset=*/0, /*is_constant=*/false); + return *this; } // Constant buffers - void buffer(uint32_t set, - uint32_t binding, - DevicePtr ptr, - size_t size) override { - bind_buffer(set, binding, ptr, ptr.offset, /*is_constant=*/false); + ShaderResourceSet &buffer(uint32_t binding, + DevicePtr ptr, + size_t size) override { + bind_buffer(binding, ptr, ptr.offset, /*is_constant=*/false); + return *this; } - void buffer(uint32_t set, uint32_t binding, DeviceAllocation alloc) override { - bind_buffer(set, binding, alloc, /*offset=*/0, /*is_constant=*/true); + ShaderResourceSet &buffer(uint32_t binding, DeviceAllocation alloc) override { + bind_buffer(binding, alloc, /*offset=*/0, /*is_constant=*/true); + return *this; } const BindingMap &binding_map() const { @@ -54,12 +55,10 @@ class ResourceBinderImpl : public ResourceBinder { } private: - void bind_buffer(uint32_t set, - uint32_t binding, + void bind_buffer(uint32_t binding, const DeviceAllocation &alloc, uint64_t offset, bool is_constant) { - TI_ASSERT(set == 0); TI_ASSERT(alloc.device == dev_); binding_map_[binding] = {alloc.alloc_id, offset, is_constant}; } @@ -74,11 +73,6 @@ class PipelineImpl : public Pipeline { : pipeline_state_(std::move(pipeline)) { } - ResourceBinder *resource_binder() override { - // TODO: Hmm, why do we need this interface? - return nullptr; - } - MTLComputePipelineState *mtl_pipeline_state() { return pipeline_state_.get(); } @@ -91,7 +85,7 @@ class CommandListImpl : public CommandList { private: struct ComputeEncoderBuilder { MTLComputePipelineState *pipeline{nullptr}; - ResourceBinderImpl::BindingMap binding_map; + ShaderResourceSetImpl::BindingMap binding_map; }; public: @@ -113,9 +107,15 @@ class CommandListImpl : public CommandList { static_cast(p)->mtl_pipeline_state(); } - void bind_resources(ResourceBinder *binder) override { + RhiResult bind_shader_resources(ShaderResourceSet *res, + int set_index = 0) final { get_or_make_compute_builder()->binding_map = - static_cast(binder)->binding_map(); + static_cast(res)->binding_map(); + return RhiResult::success; + } + + RhiResult bind_raster_resources(RasterResources *res) final { + TI_NOT_IMPLEMENTED; } void buffer_barrier(DevicePtr ptr, size_t size) override { @@ -322,6 +322,10 @@ class DeviceImpl : public Device, public AllocToMTLBufferMapper { return std::make_unique(std::move(pipeline)); } + ShaderResourceSet *create_resource_set() final { + return new ShaderResourceSetImpl(this); + } + RhiResult map_range(DevicePtr ptr, uint64_t size, void **mapped_ptr) final { auto *mem = find(ptr).mem; if (!mem) { diff --git a/taichi/rhi/opengl/opengl_device.cpp b/taichi/rhi/opengl/opengl_device.cpp index e7c8d71c2f6b7..759edb1637f43 100644 --- a/taichi/rhi/opengl/opengl_device.cpp +++ b/taichi/rhi/opengl/opengl_device.cpp @@ -182,62 +182,42 @@ void check_opengl_error(const std::string &msg) { } } -GLResourceBinder::~GLResourceBinder() { +GLResourceSet::~GLResourceSet() { } -void GLResourceBinder::rw_buffer(uint32_t set, - uint32_t binding, - DevicePtr ptr, - size_t size) { - // FIXME: Implement ranged bind - TI_NOT_IMPLEMENTED; +GLResourceSet &GLResourceSet::rw_buffer(uint32_t binding, + DevicePtr ptr, + size_t size) { + ssbo_binding_map_[binding] = {GLuint(ptr.alloc_id), ptr.offset, size}; + return *this; } -void GLResourceBinder::rw_buffer(uint32_t set, - uint32_t binding, - DeviceAllocation alloc) { - TI_ASSERT_INFO(set == 0, "OpenGL only supports set = 0, requested set = {}", - set); - ssbo_binding_map_[binding] = alloc.alloc_id; +GLResourceSet &GLResourceSet::rw_buffer(uint32_t binding, + DeviceAllocation alloc) { + return rw_buffer(binding, alloc.get_ptr(0), -1); } -void GLResourceBinder::buffer(uint32_t set, - uint32_t binding, - DevicePtr ptr, - size_t size) { - // FIXME: Implement ranged bind - TI_NOT_IMPLEMENTED; +GLResourceSet &GLResourceSet::buffer(uint32_t binding, + DevicePtr ptr, + size_t size) { + ubo_binding_map_[binding] = {GLuint(ptr.alloc_id), ptr.offset, size}; + return *this; } -void GLResourceBinder::buffer(uint32_t set, - uint32_t binding, - DeviceAllocation alloc) { - TI_ASSERT_INFO(set == 0, "OpenGL only supports set = 0, requested set = {}", - set); - ubo_binding_map_[binding] = alloc.alloc_id; +GLResourceSet &GLResourceSet::buffer(uint32_t binding, DeviceAllocation alloc) { + return buffer(binding, alloc.get_ptr(0), -1); } -void GLResourceBinder::image(uint32_t set, - uint32_t binding, - DeviceAllocation alloc, - ImageSamplerConfig sampler_config) { - TI_ASSERT_INFO(set == 0, "OpenGL only supports set = 0, requested set = {}", - set); +GLResourceSet &GLResourceSet::image(uint32_t binding, + DeviceAllocation alloc, + ImageSamplerConfig sampler_config) { texture_binding_map_[binding] = alloc.alloc_id; + return *this; } -void GLResourceBinder::rw_image(uint32_t set, - uint32_t binding, - DeviceAllocation alloc, - int lod) { - TI_NOT_IMPLEMENTED; -} - -void GLResourceBinder::vertex_buffer(DevicePtr ptr, uint32_t binding) { - TI_NOT_IMPLEMENTED; -} - -void GLResourceBinder::index_buffer(DevicePtr ptr, size_t index_width) { +GLResourceSet &GLResourceSet::rw_image(uint32_t binding, + DeviceAllocation alloc, + int lod) { TI_NOT_IMPLEMENTED; } @@ -322,10 +302,6 @@ GLPipeline::~GLPipeline() { check_opengl_error("glDeleteShader"); } -ResourceBinder *GLPipeline::resource_binder() { - return &binder_; -} - GLCommandList::~GLCommandList() { } @@ -336,33 +312,39 @@ void GLCommandList::bind_pipeline(Pipeline *p) { recorded_commands_.push_back(std::move(cmd)); } -void GLCommandList::bind_resources(ResourceBinder *_binder) { - GLResourceBinder *binder = static_cast(_binder); - for (auto &[binding, buffer] : binder->ssbo_binding_map()) { +RhiResult GLCommandList::bind_shader_resources(ShaderResourceSet *res, + int set_index) { + GLResourceSet *set = static_cast(res); + for (auto &[binding, buffer] : set->ssbo_binding_map()) { auto cmd = std::make_unique(); - cmd->buffer = buffer; + cmd->buffer = buffer.buffer; + cmd->offset = buffer.offset; + cmd->size = buffer.size; cmd->index = binding; recorded_commands_.push_back(std::move(cmd)); } - for (auto &[binding, buffer] : binder->ubo_binding_map()) { + for (auto &[binding, buffer] : set->ubo_binding_map()) { auto cmd = std::make_unique(); - cmd->buffer = buffer; + cmd->buffer = buffer.buffer; + cmd->offset = buffer.offset; + cmd->size = buffer.size; cmd->index = binding; cmd->target = GL_UNIFORM_BUFFER; recorded_commands_.push_back(std::move(cmd)); } - for (auto &[binding, texture] : binder->texture_binding_map()) { + for (auto &[binding, texture] : set->texture_binding_map()) { auto cmd = std::make_unique(); cmd->texture = texture; cmd->index = binding; cmd->target = device_->get_image_gl_dims(texture); recorded_commands_.push_back(std::move(cmd)); } + + return RhiResult::success; } -template -std::initializer_list make_init_list(std::initializer_list &&l) { - return l; +RhiResult GLCommandList::bind_raster_resources(RasterResources *res) { + TI_NOT_IMPLEMENTED; } void GLCommandList::buffer_barrier(DevicePtr ptr, size_t size) { @@ -734,7 +716,6 @@ void GLDevice::image_to_buffer(DevicePtr dst_buf, } GLSurface::~GLSurface() { - TI_NOT_IMPLEMENTED; } StreamSemaphore GLSurface::acquire_next_image() { @@ -772,9 +753,14 @@ void GLCommandList::CmdBindPipeline::execute() { } void GLCommandList::CmdBindBufferToIndex::execute() { - check_opengl_error("before"); - glBindBufferBase(target, index, buffer); - check_opengl_error("glBindBufferBase"); + if (size == -1) { + glBindBufferBase(target, index, buffer); + check_opengl_error("glBindBufferBase"); + } else { + glBindBufferRange(target, index, buffer, GLintptr(offset), + GLsizeiptr(size)); + check_opengl_error("glBindBufferRange"); + } } void GLCommandList::CmdBindTextureToIndex::execute() { diff --git a/taichi/rhi/opengl/opengl_device.h b/taichi/rhi/opengl/opengl_device.h index f792e9be0595b..7645aaf1c002f 100644 --- a/taichi/rhi/opengl/opengl_device.h +++ b/taichi/rhi/opengl/opengl_device.h @@ -12,53 +12,37 @@ class GLDevice; void check_opengl_error(const std::string &msg = "OpenGL"); -class GLResourceBinder : public ResourceBinder { +class GLResourceSet : public ShaderResourceSet { public: - ~GLResourceBinder() override; + GLResourceSet() = default; + explicit GLResourceSet(const GLResourceSet &other) = default; - struct Bindings { - // OpenGL has no sets, default set = 0 - uint32_t binding{0}; - GLuint buffer{0}; - GLuint image{0}; + ~GLResourceSet() override; + + GLResourceSet &rw_buffer(uint32_t binding, DevicePtr ptr, size_t size) final; + GLResourceSet &rw_buffer(uint32_t binding, DeviceAllocation alloc) final; + + GLResourceSet &buffer(uint32_t binding, DevicePtr ptr, size_t size) final; + GLResourceSet &buffer(uint32_t binding, DeviceAllocation alloc) final; + + GLResourceSet &image(uint32_t binding, + DeviceAllocation alloc, + ImageSamplerConfig sampler_config) final; + GLResourceSet &rw_image(uint32_t binding, + DeviceAllocation alloc, + int lod) final; + + struct BufferBinding { + GLuint buffer; + size_t offset; + size_t size; }; - void rw_buffer(uint32_t set, - uint32_t binding, - DevicePtr ptr, - size_t size) override; - void rw_buffer(uint32_t set, - uint32_t binding, - DeviceAllocation alloc) override; - - void buffer(uint32_t set, - uint32_t binding, - DevicePtr ptr, - size_t size) override; - void buffer(uint32_t set, uint32_t binding, DeviceAllocation alloc) override; - - void image(uint32_t set, - uint32_t binding, - DeviceAllocation alloc, - ImageSamplerConfig sampler_config) override; - void rw_image(uint32_t set, - uint32_t binding, - DeviceAllocation alloc, - int lod) override; - - // Set vertex buffer (not implemented in compute only device) - void vertex_buffer(DevicePtr ptr, uint32_t binding = 0) override; - - // Set index buffer (not implemented in compute only device) - // index_width = 4 -> uint32 index - // index_width = 2 -> uint16 index - void index_buffer(DevicePtr ptr, size_t index_width) override; - - const std::unordered_map &ssbo_binding_map() { + const std::unordered_map &ssbo_binding_map() { return ssbo_binding_map_; } - const std::unordered_map &ubo_binding_map() { + const std::unordered_map &ubo_binding_map() { return ubo_binding_map_; } @@ -67,8 +51,8 @@ class GLResourceBinder : public ResourceBinder { } private: - std::unordered_map ssbo_binding_map_; - std::unordered_map ubo_binding_map_; + std::unordered_map ssbo_binding_map_; + std::unordered_map ubo_binding_map_; std::unordered_map texture_binding_map_; }; @@ -77,15 +61,12 @@ class GLPipeline : public Pipeline { GLPipeline(const PipelineSourceDesc &desc, const std::string &name); ~GLPipeline() override; - ResourceBinder *resource_binder() override; - GLuint get_program() { return program_id_; } private: GLuint program_id_; - GLResourceBinder binder_; }; class GLCommandList : public CommandList { @@ -95,7 +76,9 @@ class GLCommandList : public CommandList { ~GLCommandList() override; void bind_pipeline(Pipeline *p) override; - void bind_resources(ResourceBinder *binder) override; + RhiResult bind_shader_resources(ShaderResourceSet *res, + int set_index = 0) final; + RhiResult bind_raster_resources(RasterResources *res) final; void buffer_barrier(DevicePtr ptr, size_t size) override; void buffer_barrier(DeviceAllocation alloc) override; void memory_barrier() override; @@ -151,6 +134,8 @@ class GLCommandList : public CommandList { struct CmdBindBufferToIndex : public Cmd { GLuint buffer{0}; GLuint index{0}; + GLuint offset{0}; + GLuint size{0}; GLenum target{GL_SHADER_STORAGE_BUFFER}; void execute() override; }; @@ -250,6 +235,14 @@ class GLDevice : public GraphicsDevice { const PipelineSourceDesc &src, std::string name = "Pipeline") override; + ShaderResourceSet *create_resource_set() final { + return new GLResourceSet; + } + + RasterResources *create_raster_resources() final { + TI_NOT_IMPLEMENTED; + } + // Mapping can fail and will return nullptr RhiResult map_range(DevicePtr ptr, uint64_t size, void **mapped_ptr) final; RhiResult map(DeviceAllocation alloc, void **mapped_ptr) final; diff --git a/taichi/rhi/vulkan/vulkan_api.cpp b/taichi/rhi/vulkan/vulkan_api.cpp index 949fd0618100d..ab40e35757079 100644 --- a/taichi/rhi/vulkan/vulkan_api.cpp +++ b/taichi/rhi/vulkan/vulkan_api.cpp @@ -42,6 +42,10 @@ DeviceObjVkPipeline::~DeviceObjVkPipeline() { vkDestroyPipeline(device, pipeline, nullptr); } +DeviceObjVkSampler::~DeviceObjVkSampler() { + vkDestroySampler(device, sampler, nullptr); +} + DeviceObjVkImage::~DeviceObjVkImage() { if (allocation) { vmaDestroyImage(allocator, image, allocation); @@ -146,7 +150,10 @@ IVkDescriptorPool create_descriptor_pool( obj->device = device; VkResult res = vkCreateDescriptorPool(device, create_info, nullptr, &obj->pool); - BAIL_ON_VK_BAD_RESULT_NO_RETURN(res, "failed to create descriptor pool"); + if (res != VK_SUCCESS) { + // All failure condition listed in spec are OOM + return nullptr; + } return obj; } @@ -245,7 +252,7 @@ IVkPipelineLayout create_pipeline_layout( std::vector layouts; layouts.reserve(set_layouts.size()); - for (auto l : set_layouts) { + for (auto &l : set_layouts) { layouts.push_back(l->layout); } @@ -416,6 +423,17 @@ IVkPipeline create_raytracing_pipeline( return obj; } +IVkSampler create_sampler(VkDevice device, const VkSamplerCreateInfo &info) { + IVkSampler sampler = std::make_shared(); + sampler->device = device; + + BAIL_ON_VK_BAD_RESULT_NO_RETURN( + vkCreateSampler(device, &info, nullptr, &sampler->sampler), + "failed to create texture sampler!"); + + return sampler; +} + IVkImage create_image(VkDevice device, VmaAllocator allocator, VkImageCreateInfo *image_info, diff --git a/taichi/rhi/vulkan/vulkan_api.h b/taichi/rhi/vulkan/vulkan_api.h index a20224aad4e94..39dc3f82f5684 100644 --- a/taichi/rhi/vulkan/vulkan_api.h +++ b/taichi/rhi/vulkan/vulkan_api.h @@ -65,7 +65,7 @@ struct DeviceObjVkDescriptorSet : public DeviceObj { VkDescriptorSet set{VK_NULL_HANDLE}; IVkDescriptorSetLayout ref_layout{nullptr}; IVkDescriptorPool ref_pool{nullptr}; - std::unordered_map ref_binding_objs; + std::vector ref_binding_objs; ~DeviceObjVkDescriptorSet() override; }; using IVkDescriptorSet = std::shared_ptr; @@ -172,6 +172,14 @@ IVkPipeline create_raytracing_pipeline( IVkPipelineCache cache = nullptr, IVkPipeline base_pipeline = nullptr); +// VkSampler +struct DeviceObjVkSampler : public DeviceObj { + VkSampler sampler{VK_NULL_HANDLE}; + ~DeviceObjVkSampler() override; +}; +using IVkSampler = std::shared_ptr; +IVkSampler create_sampler(VkDevice device, const VkSamplerCreateInfo &info); + // VkImage struct DeviceObjVkImage : public DeviceObj { VkImage image{VK_NULL_HANDLE}; diff --git a/taichi/rhi/vulkan/vulkan_device.cpp b/taichi/rhi/vulkan/vulkan_device.cpp index b080ae1a67457..b9ca65e42faf1 100644 --- a/taichi/rhi/vulkan/vulkan_device.cpp +++ b/taichi/rhi/vulkan/vulkan_device.cpp @@ -140,7 +140,9 @@ RhiReturn blend_factor_ti_to_vk(BlendFactor factor) { } VulkanPipeline::VulkanPipeline(const Params ¶ms) - : device_(params.device->vk_device()), name_(params.name) { + : ti_device_(*params.device), + device_(params.device->vk_device()), + name_(params.name) { create_descriptor_set_layout(params); create_shader_stages(params); create_pipeline_layout(); @@ -157,7 +159,9 @@ VulkanPipeline::VulkanPipeline( const RasterParams &raster_params, const std::vector &vertex_inputs, const std::vector &vertex_attrs) - : device_(params.device->vk_device()), name_(params.name) { + : ti_device_(*params.device), + device_(params.device->vk_device()), + name_(params.name) { this->graphics_pipeline_template_ = std::make_unique(); @@ -218,7 +222,7 @@ vkapi::IVkPipeline VulkanPipeline::graphics_pipeline_dynamic( color_attachment_formats.push_back(color_attachment.first); } - VkPipelineRenderingCreateInfoKHR rendering_info; + VkPipelineRenderingCreateInfoKHR rendering_info{}; rendering_info.sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO_KHR; rendering_info.pNext = nullptr; rendering_info.viewMask = 0; @@ -238,8 +242,6 @@ vkapi::IVkPipeline VulkanPipeline::graphics_pipeline_dynamic( } void VulkanPipeline::create_descriptor_set_layout(const Params ¶ms) { - std::unordered_set sets_used; - for (auto &code_view : params.code) { SpvReflectShaderModule module; SpvReflectResult result = @@ -255,31 +257,31 @@ void VulkanPipeline::create_descriptor_set_layout(const Params ¶ms) { RHI_ASSERT(result == SPV_REFLECT_RESULT_SUCCESS); for (SpvReflectDescriptorSet *desc_set : desc_sets) { - uint32_t set = desc_set->set; + uint32_t set_index = desc_set->set; + if (set_templates_.find(set_index) == set_templates_.end()) { + set_templates_.insert({set_index, VulkanResourceSet(&ti_device_)}); + } + VulkanResourceSet &set = set_templates_.at(set_index); + for (int i = 0; i < desc_set->binding_count; i++) { SpvReflectDescriptorBinding *desc_binding = desc_set->bindings[i]; if (desc_binding->descriptor_type == SPV_REFLECT_DESCRIPTOR_TYPE_STORAGE_BUFFER) { - resource_binder_.rw_buffer(set, desc_binding->binding, kDeviceNullPtr, - 0); + set.rw_buffer(desc_binding->binding, kDeviceNullPtr, 0); } else if (desc_binding->descriptor_type == SPV_REFLECT_DESCRIPTOR_TYPE_UNIFORM_BUFFER) { - resource_binder_.buffer(set, desc_binding->binding, kDeviceNullPtr, - 0); + set.buffer(desc_binding->binding, kDeviceNullPtr, 0); } else if (desc_binding->descriptor_type == SPV_REFLECT_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) { - resource_binder_.image(set, desc_binding->binding, - kDeviceNullAllocation, {}); + set.image(desc_binding->binding, kDeviceNullAllocation, {}); } else if (desc_binding->descriptor_type == SPV_REFLECT_DESCRIPTOR_TYPE_STORAGE_IMAGE) { - resource_binder_.rw_image(set, desc_binding->binding, - kDeviceNullAllocation, {}); + set.rw_image(desc_binding->binding, kDeviceNullAllocation, {}); } else { RHI_LOG_ERROR("Unrecognized binding ignored"); } } - sets_used.insert(set); } // Handle special vertex shaders stuff @@ -335,14 +337,21 @@ void VulkanPipeline::create_descriptor_set_layout(const Params ¶ms) { spvReflectDestroyShaderModule(&module); } - for (uint32_t set : sets_used) { - vkapi::IVkDescriptorSetLayout layout = - params.device->get_desc_set_layout(resource_binder_.get_set(set)); + // A program can have no binding sets at all. + if (set_templates_.size()) { + // We need to verify the set layouts are all continous + uint32_t max_set = 0; + for (auto &[index, layout_template] : set_templates_) { + max_set = std::max(index, max_set); + } + RHI_ASSERT(max_set + 1 == set_templates_.size() && + "Sets must be continous & start with 0"); - set_layouts_.push_back(layout); + set_layouts_.resize(set_templates_.size(), nullptr); + for (auto &[index, layout_template] : set_templates_) { + set_layouts_[index] = ti_device_.get_desc_set_layout(layout_template); + } } - - resource_binder_.lock_layout(); } void VulkanPipeline::create_shader_stages(const Params ¶ms) { @@ -381,7 +390,7 @@ void VulkanPipeline::create_graphics_pipeline( const std::vector &vertex_inputs, const std::vector &vertex_attrs) { // Use dynamic viewport state. These two are just dummies - VkViewport viewport; + VkViewport viewport{}; viewport.width = 1; viewport.height = 1; viewport.x = 0; @@ -389,9 +398,7 @@ void VulkanPipeline::create_graphics_pipeline( viewport.minDepth = 0.0; viewport.maxDepth = 1.0; - VkRect2D scissor; - scissor.offset = {0, 0}; - scissor.extent = {1, 1}; + VkRect2D scissor{/*offset*/ {0, 0}, /*extent*/ {1, 1}}; VkPipelineViewportStateCreateInfo &viewport_state = graphics_pipeline_template_->viewport_state; @@ -580,244 +587,218 @@ void VulkanPipeline::create_graphics_pipeline( pipeline_info.basePipelineHandle = VK_NULL_HANDLE; } -VulkanResourceBinder::VulkanResourceBinder(VkPipelineBindPoint bind_point) - : bind_point_(bind_point) { +VulkanResourceSet::VulkanResourceSet(VulkanDevice *device) : device_(device) { } -VulkanResourceBinder::~VulkanResourceBinder() { - for (auto &set_pair : sets_) { - Set &set = set_pair.second; - for (auto &binding_pair : set.bindings) { - VkSampler sampler = binding_pair.second.sampler; - if (sampler != VK_NULL_HANDLE) { - Device *dev = binding_pair.second.ptr.device; - vkDestroySampler(static_cast(dev)->vk_device(), sampler, - kNoVkAllocCallbacks); - } - } - } +VulkanResourceSet::~VulkanResourceSet() { } -VkSampler create_sampler(ImageSamplerConfig config, VkDevice device) { - VkSampler sampler = VK_NULL_HANDLE; - - // todo: fill these using the information from the ImageSamplerConfig - VkSamplerCreateInfo sampler_info{}; - sampler_info.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; - sampler_info.magFilter = VK_FILTER_LINEAR; - sampler_info.minFilter = VK_FILTER_LINEAR; - sampler_info.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT; - sampler_info.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT; - sampler_info.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT; - sampler_info.anisotropyEnable = VK_FALSE; - sampler_info.borderColor = VK_BORDER_COLOR_INT_OPAQUE_BLACK; - sampler_info.unnormalizedCoordinates = VK_FALSE; - sampler_info.compareEnable = VK_FALSE; - sampler_info.compareOp = VK_COMPARE_OP_ALWAYS; - sampler_info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; - - if (vkCreateSampler(device, &sampler_info, nullptr, &sampler) != VK_SUCCESS) { - throw std::runtime_error("failed to create texture sampler!"); - } - return sampler; -} - -#define CHECK_SET_BINDINGS \ - bool set_not_found = (sets_.find(set) == sets_.end()); \ - if (set_not_found) { \ - if (layout_locked_) { \ - return; \ - } else { \ - sets_[set] = {}; \ - } \ - } \ - auto &bindings = sets_.at(set).bindings; \ - if (layout_locked_ && bindings.find(binding) == bindings.end()) { \ - return; \ - } - -void VulkanResourceBinder::rw_buffer(uint32_t set, - uint32_t binding, - DevicePtr ptr, - size_t size) { - CHECK_SET_BINDINGS; - - if (layout_locked_) { - RHI_ASSERT(bindings.at(binding).type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); - } else { - if (bindings.find(binding) != bindings.end()) { - RHI_LOG_ERROR("Overriding last binding"); - } - } +ShaderResourceSet &VulkanResourceSet::rw_buffer(uint32_t binding, + DevicePtr ptr, + size_t size) { + dirty_ = true; - Binding new_binding = {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, ptr, size}; - bindings[binding] = new_binding; + vkapi::IVkBuffer buffer = + (ptr != kDeviceNullPtr) ? device_->get_vkbuffer(ptr) : nullptr; + bindings_[binding] = {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + Buffer{buffer, ptr.offset, size}}; + return *this; } -void VulkanResourceBinder::rw_buffer(uint32_t set, - uint32_t binding, - DeviceAllocation alloc) { - rw_buffer(set, binding, alloc.get_ptr(0), VK_WHOLE_SIZE); +ShaderResourceSet &VulkanResourceSet::rw_buffer(uint32_t binding, + DeviceAllocation alloc) { + return rw_buffer(binding, alloc.get_ptr(0), VK_WHOLE_SIZE); } -void VulkanResourceBinder::buffer(uint32_t set, - uint32_t binding, - DevicePtr ptr, - size_t size) { - CHECK_SET_BINDINGS; - - if (layout_locked_) { - RHI_ASSERT(bindings.at(binding).type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER); - } else { - if (bindings.find(binding) != bindings.end()) { - RHI_LOG_ERROR("Overriding last binding"); - } - } +ShaderResourceSet &VulkanResourceSet::buffer(uint32_t binding, + DevicePtr ptr, + size_t size) { + dirty_ = true; - Binding new_binding = {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, ptr, size}; - bindings[binding] = new_binding; + vkapi::IVkBuffer buffer = + (ptr != kDeviceNullPtr) ? device_->get_vkbuffer(ptr) : nullptr; + bindings_[binding] = {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + Buffer{buffer, ptr.offset, size}}; + return *this; } -void VulkanResourceBinder::buffer(uint32_t set, - uint32_t binding, - DeviceAllocation alloc) { - buffer(set, binding, alloc.get_ptr(0), VK_WHOLE_SIZE); +ShaderResourceSet &VulkanResourceSet::buffer(uint32_t binding, + DeviceAllocation alloc) { + return buffer(binding, alloc.get_ptr(0), VK_WHOLE_SIZE); } -void VulkanResourceBinder::image(uint32_t set, - uint32_t binding, - DeviceAllocation alloc, - ImageSamplerConfig sampler_config) { - CHECK_SET_BINDINGS - if (layout_locked_) { - RHI_ASSERT(bindings.at(binding).type == - VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER); - } else { - if (bindings.find(binding) != bindings.end()) { - RHI_LOG_ERROR("Overriding last binding"); - } - } - if (bindings[binding].sampler != VK_NULL_HANDLE) { - Device *dev = bindings[binding].ptr.device; - vkDestroySampler(static_cast(dev)->vk_device(), - bindings[binding].sampler, kNoVkAllocCallbacks); - } - bindings[binding] = {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - alloc.get_ptr(0), VK_WHOLE_SIZE}; - if (alloc.device) { - VulkanDevice *device = static_cast(alloc.device); - bindings[binding].sampler = - create_sampler(sampler_config, device->vk_device()); - } -} +ShaderResourceSet &VulkanResourceSet::image(uint32_t binding, + DeviceAllocation alloc, + ImageSamplerConfig sampler_config) { + dirty_ = true; -void VulkanResourceBinder::rw_image(uint32_t set, - uint32_t binding, - DeviceAllocation alloc, - int lod) { - CHECK_SET_BINDINGS - if (layout_locked_) { - RHI_ASSERT(bindings.at(binding).type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE); - } else { - if (bindings.find(binding) != bindings.end()) { - RHI_LOG_ERROR("Overriding last binding"); - } + vkapi::IVkSampler sampler = nullptr; + vkapi::IVkImageView view = nullptr; + + if (alloc != kDeviceNullAllocation) { + VkSamplerCreateInfo sampler_info{}; + sampler_info.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; + sampler_info.magFilter = VK_FILTER_LINEAR; + sampler_info.minFilter = VK_FILTER_LINEAR; + sampler_info.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT; + sampler_info.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT; + sampler_info.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT; + sampler_info.anisotropyEnable = VK_FALSE; + sampler_info.borderColor = VK_BORDER_COLOR_INT_OPAQUE_BLACK; + sampler_info.unnormalizedCoordinates = VK_FALSE; + sampler_info.compareEnable = VK_FALSE; + sampler_info.compareOp = VK_COMPARE_OP_ALWAYS; + sampler_info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; + + sampler = vkapi::create_sampler(device_->vk_device(), sampler_info); + view = device_->get_vk_imageview(alloc); } - bindings[binding] = {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, alloc.get_ptr(0), - VK_WHOLE_SIZE}; -} -#undef CHECK_SET_BINDINGS + bindings_[binding] = {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + Texture{view, sampler}}; -void VulkanResourceBinder::vertex_buffer(DevicePtr ptr, uint32_t binding) { - vertex_buffers_[binding] = ptr; + return *this; } -void VulkanResourceBinder::index_buffer(DevicePtr ptr, size_t index_width) { - index_buffer_ = ptr; - if (index_width == 32) { - index_type_ = VK_INDEX_TYPE_UINT32; - } else if (index_width == 16) { - index_type_ = VK_INDEX_TYPE_UINT16; - } else { - RHI_LOG_ERROR("unsupported index width"); - } +ShaderResourceSet &VulkanResourceSet::rw_image(uint32_t binding, + DeviceAllocation alloc, + int lod) { + dirty_ = true; + + vkapi::IVkImageView view = (alloc != kDeviceNullAllocation) + ? device_->get_vk_lod_imageview(alloc, lod) + : nullptr; + + bindings_[binding] = {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, Image{view}}; + + return *this; } -void VulkanResourceBinder::write_to_set(uint32_t index, - VulkanDevice &device, - vkapi::IVkDescriptorSet set) { - std::vector buffer_infos; - std::vector image_infos; - std::vector is_image; - std::vector desc_writes; +RhiReturn VulkanResourceSet::finalize() { + if (!dirty_ && set_) { + // If nothing changed directly return the set + return {RhiResult::success, set_}; + } - for (auto &pair : sets_.at(index).bindings) { - uint32_t binding = pair.first; + if (bindings_.size() <= 0) { + // A set can't be empty + return {RhiResult::invalid_usage, nullptr}; + } - if (pair.second.ptr != kDeviceNullPtr) { - VkDescriptorBufferInfo &buffer_info = buffer_infos.emplace_back(); - VkDescriptorImageInfo &image_info = image_infos.emplace_back(); - - if (pair.second.type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER || - pair.second.type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) { - auto buffer = device.get_vkbuffer(pair.second.ptr); - buffer_info.buffer = buffer->buffer; - buffer_info.offset = pair.second.ptr.offset; - buffer_info.range = pair.second.size; - is_image.push_back(false); - set->ref_binding_objs[binding] = buffer; - } else if (pair.second.type == - VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) { - auto view = std::get<1>(device.get_vk_image(pair.second.ptr)); - image_info.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - image_info.imageView = view->view; - image_info.sampler = pair.second.sampler; - is_image.push_back(true); - set->ref_binding_objs[binding] = view; - } else if (pair.second.type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) { - auto view = - device.get_vk_lod_imageview(pair.second.ptr, pair.second.image_lod); - image_info.imageLayout = VK_IMAGE_LAYOUT_GENERAL; - image_info.imageView = view->view; - is_image.push_back(true); - set->ref_binding_objs[binding] = view; - } else { - RHI_LOG_ERROR("Ignoring unsupported Descriptor Type"); - } + vkapi::IVkDescriptorSetLayout new_layout = + device_->get_desc_set_layout(*this); + if (new_layout != layout_) { + // Layout changed, reset `set` + set_ = nullptr; + layout_ = new_layout; + } - VkWriteDescriptorSet &write = desc_writes.emplace_back(); - write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - write.pNext = nullptr; - write.dstSet = set->set; - write.dstBinding = binding; - write.dstArrayElement = 0; - write.descriptorCount = 1; - write.descriptorType = pair.second.type; - write.pImageInfo = nullptr; - write.pBufferInfo = nullptr; - write.pTexelBufferView = nullptr; + if (!set_) { + // If set_ is null, create a new one + auto [status, new_set] = device_->alloc_desc_set(layout_); + if (status != RhiResult::success) { + return {status, nullptr}; } + set_ = new_set; } - // Set these pointers later as std::vector resize can relocate the pointers - int i = 0; - for (auto &write : desc_writes) { - if (is_image[i]) { - write.pImageInfo = &image_infos[i]; + std::forward_list buffer_infos; + std::forward_list image_infos; + std::vector desc_writes; + + for (auto &pair : bindings_) { + uint32_t binding = pair.first; + VkDescriptorType type = pair.second.type; + auto &resource = pair.second.res; + + VkWriteDescriptorSet &write = desc_writes.emplace_back(); + write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + write.pNext = nullptr; + write.dstSet = set_->set; + write.dstBinding = binding; + write.dstArrayElement = 0; + write.descriptorCount = 1; + write.descriptorType = type; + write.pImageInfo = nullptr; + write.pBufferInfo = nullptr; + write.pTexelBufferView = nullptr; + + if (Buffer *buf = std::get_if(&resource)) { + VkDescriptorBufferInfo &buffer_info = buffer_infos.emplace_front(); + buffer_info.buffer = buf->buffer ? buf->buffer->buffer : VK_NULL_HANDLE; + buffer_info.offset = buf->offset; + buffer_info.range = buf->size; + + write.pBufferInfo = &buffer_info; + if (buf->buffer) { + set_->ref_binding_objs.push_back(buf->buffer); + } + } else if (Image *img = std::get_if(&resource)) { + VkDescriptorImageInfo &image_info = image_infos.emplace_front(); + image_info.imageLayout = VK_IMAGE_LAYOUT_GENERAL; + image_info.imageView = img->view ? img->view->view : VK_NULL_HANDLE; + image_info.sampler = VK_NULL_HANDLE; + + write.pImageInfo = &image_info; + if (img->view) { + set_->ref_binding_objs.push_back(img->view); + } + } else if (Texture *tex = std::get_if(&resource)) { + VkDescriptorImageInfo &image_info = image_infos.emplace_front(); + image_info.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + image_info.imageView = tex->view ? tex->view->view : VK_NULL_HANDLE; + image_info.sampler = + tex->sampler ? tex->sampler->sampler : VK_NULL_HANDLE; + + write.pImageInfo = &image_info; + if (tex->view) { + set_->ref_binding_objs.push_back(tex->view); + } + if (tex->sampler) { + set_->ref_binding_objs.push_back(tex->sampler); + } } else { - write.pBufferInfo = &buffer_infos[i]; + RHI_LOG_ERROR("Ignoring unsupported Descriptor Type"); } - i++; } - vkUpdateDescriptorSets(device.vk_device(), desc_writes.size(), + vkUpdateDescriptorSets(device_->vk_device(), desc_writes.size(), desc_writes.data(), /*descriptorCopyCount=*/0, /*pDescriptorCopies=*/nullptr); + + dirty_ = false; + + return {RhiResult::success, set_}; } -void VulkanResourceBinder::lock_layout() { - layout_locked_ = true; +RasterResources &VulkanRasterResources::vertex_buffer(DevicePtr ptr, + uint32_t binding) { + vkapi::IVkBuffer buffer = + (ptr != kDeviceNullPtr) ? device_->get_vkbuffer(ptr) : nullptr; + if (buffer == nullptr) { + vertex_buffers.erase(binding); + } else { + vertex_buffers[binding] = {buffer, ptr.offset}; + } + return *this; +} + +RasterResources &VulkanRasterResources::index_buffer(DevicePtr ptr, + size_t index_width) { + vkapi::IVkBuffer buffer = + (ptr != kDeviceNullPtr) ? device_->get_vkbuffer(ptr) : nullptr; + if (buffer == nullptr) { + index_binding = BufferBinding(); + index_type = VK_INDEX_TYPE_MAX_ENUM; + } else { + index_binding = {buffer, ptr.offset}; + if (index_width == 32) { + index_type = VK_INDEX_TYPE_UINT32; + } else if (index_width == 16) { + index_type = VK_INDEX_TYPE_UINT16; + } + } + return *this; } VulkanCommandList::VulkanCommandList(VulkanDevice *ti_device, @@ -866,7 +847,7 @@ void VulkanCommandList::bind_pipeline(Pipeline *p) { vkCmdBindPipeline(buffer_->buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, vk_pipeline->pipeline); - VkViewport viewport; + VkViewport viewport{}; viewport.width = viewport_width_; viewport.height = viewport_height_; viewport.x = 0; @@ -874,9 +855,8 @@ void VulkanCommandList::bind_pipeline(Pipeline *p) { viewport.minDepth = 0.0; viewport.maxDepth = 1.0; - VkRect2D scissor; - scissor.offset = {0, 0}; - scissor.extent = {viewport_width_, viewport_height_}; + VkRect2D scissor{/*offset*/ {0, 0}, + /*extent*/ {viewport_width_, viewport_height_}}; vkCmdSetViewport(buffer_->buffer, 0, 1, &viewport); vkCmdSetScissor(buffer_->buffer, 0, 1, &scissor); @@ -892,59 +872,88 @@ void VulkanCommandList::bind_pipeline(Pipeline *p) { current_pipeline_ = pipeline; } -void VulkanCommandList::bind_resources(ResourceBinder *ti_binder) { - VulkanResourceBinder *binder = static_cast(ti_binder); +RhiResult VulkanCommandList::bind_shader_resources(ShaderResourceSet *res, + int set_index) { + VulkanResourceSet *set = static_cast(res); + if (set->get_bindings().size() <= 0) { + return RhiResult::success; + } + + auto [status, vk_set] = set->finalize(); + if (status != RhiResult::success) { + return status; + } - for (auto &pair : binder->get_sets()) { - VkPipelineLayout pipeline_layout = - current_pipeline_->pipeline_layout()->layout; + vkapi::IVkDescriptorSetLayout set_layout = set->get_layout(); - vkapi::IVkDescriptorSetLayout layout = - ti_device_->get_desc_set_layout(pair.second); + if (current_pipeline_->pipeline_layout()->ref_desc_layouts[set_index] != + set_layout) { + // WARN: we have a layout mismatch + RHI_LOG_ERROR("Layout mismatch"); - vkapi::IVkDescriptorSet set = nullptr; + auto &templates = current_pipeline_->get_resource_set_templates(); + VulkanResourceSet &set_template = templates.at(set_index); - if (currently_used_sets_.find(pair.second) != currently_used_sets_.end()) { - set = currently_used_sets_.at(pair.second); + for (const auto &template_binding : set_template.get_bindings()) { + char msg[512]; + snprintf(msg, 512, "Template binding %d: (VkDescriptorType) %d", + template_binding.first, template_binding.second.type); + RHI_LOG_ERROR(msg); } - if (!set) { - set = ti_device_->alloc_desc_set(layout); - binder->write_to_set(pair.first, *ti_device_, set); - currently_used_sets_[pair.second] = set; + for (const auto &binding : set->get_bindings()) { + char msg[512]; + snprintf(msg, 512, "Binding %d: (VkDescriptorType) %d", binding.first, + binding.second.type); + RHI_LOG_ERROR(msg); } - VkPipelineBindPoint bind_point; - if (current_pipeline_->is_graphics()) { - bind_point = VK_PIPELINE_BIND_POINT_GRAPHICS; - } else { - bind_point = VK_PIPELINE_BIND_POINT_COMPUTE; - } + return RhiResult::invalid_usage; + } - vkCmdBindDescriptorSets(buffer_->buffer, bind_point, pipeline_layout, - /*firstSet=*/0, - /*descriptorSetCount=*/1, &set->set, - /*dynamicOffsetCount=*/0, - /*pDynamicOffsets=*/nullptr); - buffer_->refs.push_back(set); - } - - if (current_pipeline_->is_graphics()) { - auto [idx_ptr, type] = binder->get_index_buffer(); - if (idx_ptr.device) { - auto index_buffer = ti_device_->get_vkbuffer(idx_ptr); - vkCmdBindIndexBuffer(buffer_->buffer, index_buffer->buffer, - idx_ptr.offset, type); - buffer_->refs.push_back(index_buffer); - } + VkPipelineLayout pipeline_layout = + current_pipeline_->pipeline_layout()->layout; + VkPipelineBindPoint bind_point = current_pipeline_->is_graphics() + ? VK_PIPELINE_BIND_POINT_GRAPHICS + : VK_PIPELINE_BIND_POINT_COMPUTE; - for (auto [binding, ptr] : binder->get_vertex_buffers()) { - auto buffer = ti_device_->get_vkbuffer(ptr); - vkCmdBindVertexBuffers(buffer_->buffer, binding, 1, &buffer->buffer, - &ptr.offset); - buffer_->refs.push_back(buffer); - } + vkCmdBindDescriptorSets(buffer_->buffer, bind_point, pipeline_layout, + /*firstSet=*/set_index, + /*descriptorSetCount=*/1, &vk_set->set, + /*dynamicOffsetCount=*/0, + /*pDynamicOffsets=*/nullptr); + buffer_->refs.push_back(vk_set); + + return RhiResult::success; +} + +RhiResult VulkanCommandList::bind_raster_resources(RasterResources *_res) { + VulkanRasterResources *res = static_cast(_res); + + if (!current_pipeline_->is_graphics()) { + return RhiResult::invalid_usage; } + + if (res->index_type >= VK_INDEX_TYPE_MAX_ENUM) { + return RhiResult::not_supported; + } + + if (res->index_binding.buffer != nullptr) { + // We have a valid index buffer + vkapi::IVkBuffer index_buffer = res->index_binding.buffer; + vkCmdBindIndexBuffer(buffer_->buffer, index_buffer->buffer, + res->index_binding.offset, res->index_type); + buffer_->refs.push_back(index_buffer); + } + + for (auto &[binding, buffer] : res->vertex_buffers) { + VkDeviceSize offset_vk = buffer.offset; + vkCmdBindVertexBuffers(buffer_->buffer, binding, 1, &buffer.buffer->buffer, + &offset_vk); + buffer_->refs.push_back(buffer.buffer); + } + + return RhiResult::success; } void VulkanCommandList::buffer_barrier(DevicePtr ptr, size_t size) { @@ -952,7 +961,7 @@ void VulkanCommandList::buffer_barrier(DevicePtr ptr, size_t size) { auto buffer = ti_device_->get_vkbuffer(ptr); - VkBufferMemoryBarrier barrier; + VkBufferMemoryBarrier barrier{}; barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; barrier.pNext = nullptr; barrier.buffer = buffer->buffer; @@ -986,7 +995,7 @@ void VulkanCommandList::buffer_barrier(DeviceAllocation alloc) { } void VulkanCommandList::memory_barrier() { - VkMemoryBarrier barrier; + VkMemoryBarrier barrier{}; barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; barrier.pNext = nullptr; barrier.srcAccessMask = @@ -1055,9 +1064,8 @@ void VulkanCommandList::begin_renderpass(int x0, current_renderpass_desc_.color_attachments.clear(); rp_desc.clear_depth = depth_clear; - VkRect2D render_area; - render_area.offset = {x0, y0}; - render_area.extent = {uint32_t(x1 - x0), uint32_t(y1 - y0)}; + VkRect2D render_area{/*offset*/ {x0, y0}, + /*extent*/ {uint32_t(x1 - x0), uint32_t(y1 - y0)}}; viewport_width_ = render_area.extent.width; viewport_height_ = render_area.extent.height; @@ -1106,7 +1114,7 @@ void VulkanCommandList::begin_renderpass(int x0, render_info.pDepthAttachment = nullptr; render_info.pStencilAttachment = nullptr; - VkRenderingAttachmentInfo depth_attachment_info; + VkRenderingAttachmentInfo depth_attachment_info{}; if (depth_attachment) { auto [image, view, format] = ti_device_->get_vk_image(*depth_attachment); rp_desc.depth_attachment = format; @@ -1427,10 +1435,9 @@ void VulkanCommandList::blit_image(DeviceAllocation dst_img, ImageLayout dst_img_layout, ImageLayout src_img_layout, const ImageCopyParams ¶ms) { - VkOffset3D blit_size; - blit_size.x = params.width; - blit_size.y = params.height; - blit_size.z = params.depth; + VkOffset3D blit_size{/*x*/ int(params.width), + /*y*/ int(params.height), + /*z*/ int(params.depth)}; VkImageBlit blit{}; blit.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; blit.srcSubresource.layerCount = 1; @@ -1502,7 +1509,8 @@ void VulkanDevice::init_vulkan_structs(Params ¶ms) { graphics_queue_family_index_ = params.graphics_queue_family_index; create_vma_allocator(); - new_descriptor_pool(); + RHI_ASSERT(new_descriptor_pool() == RhiResult::success && + "Failed to allocate initial descriptor pool"); } VulkanDevice::~VulkanDevice() { @@ -1700,6 +1708,14 @@ void VulkanDevice::dealloc_memory(DeviceAllocation handle) { allocations_.release(&get_alloc_internal(handle)); } +ShaderResourceSet *VulkanDevice::create_resource_set() { + return new VulkanResourceSet(this); +} + +RasterResources *VulkanDevice::create_raster_resources() { + return new VulkanRasterResources(this); +} + uint64_t VulkanDevice::get_memory_physical_pointer(DeviceAllocation handle) { return uint64_t(get_alloc_internal(handle).addr); } @@ -1899,7 +1915,7 @@ std::unique_ptr VulkanDevice::create_raster_pipeline( params.device = this; params.name = name; - for (auto src_desc : src) { + for (auto &src_desc : src) { SpirvCodeView &code = params.code.emplace_back(); code.data = (uint32_t *)src_desc.data; code.size = src_desc.size; @@ -2156,10 +2172,10 @@ vkapi::IVkRenderPass VulkanDevice::get_renderpass( std::vector attachments; std::vector color_attachments; - VkAttachmentReference depth_attachment; + VkAttachmentReference depth_attachment{}; uint32_t i = 0; - for (auto [format, clear] : desc.color_attachments) { + for (auto &[format, clear] : desc.color_attachments) { VkAttachmentDescription &description = attachments.emplace_back(); description.flags = 0; description.format = format; @@ -2231,10 +2247,10 @@ vkapi::IVkRenderPass VulkanDevice::get_renderpass( } vkapi::IVkDescriptorSetLayout VulkanDevice::get_desc_set_layout( - VulkanResourceBinder::Set &set) { + VulkanResourceSet &set) { if (desc_set_layouts_.find(set) == desc_set_layouts_.end()) { std::vector bindings; - for (auto &pair : set.bindings) { + for (const auto &pair : set.get_bindings()) { bindings.push_back(VkDescriptorSetLayoutBinding{ /*binding=*/pair.first, pair.second.type, /*descriptorCount=*/1, VK_SHADER_STAGE_ALL, @@ -2257,20 +2273,22 @@ vkapi::IVkDescriptorSetLayout VulkanDevice::get_desc_set_layout( } } -vkapi::IVkDescriptorSet VulkanDevice::alloc_desc_set( +RhiReturn VulkanDevice::alloc_desc_set( vkapi::IVkDescriptorSetLayout layout) { - // TODO: Currently we assume the calling code has called get_desc_set_layout - // before allocating a desc set. Either we should guard against this or - // maintain this assumption in other parts of the VulkanBackend + // This returns nullptr if can't allocate (OOM or pool is full) vkapi::IVkDescriptorSet set = vkapi::allocate_descriptor_sets(desc_pool_, layout); if (set == nullptr) { - new_descriptor_pool(); + RhiResult status = new_descriptor_pool(); + // Allocating new descriptor pool failed + if (status != RhiResult::success) { + return {status, nullptr}; + } set = vkapi::allocate_descriptor_sets(desc_pool_, layout); } - return set; + return {RhiResult::success, set}; } void VulkanDevice::create_vma_allocator() { @@ -2357,7 +2375,7 @@ void VulkanDevice::create_vma_allocator() { vmaCreateAllocator(&allocatorInfo, &allocator_export_); } -void VulkanDevice::new_descriptor_pool() { +RhiResult VulkanDevice::new_descriptor_pool() { std::vector pool_sizes{ {VK_DESCRIPTOR_TYPE_SAMPLER, 64}, {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 256}, @@ -2376,7 +2394,15 @@ void VulkanDevice::new_descriptor_pool() { pool_info.maxSets = 64; pool_info.poolSizeCount = pool_sizes.size(); pool_info.pPoolSizes = pool_sizes.data(); - desc_pool_ = vkapi::create_descriptor_pool(device_, &pool_info); + auto new_desc_pool = vkapi::create_descriptor_pool(device_, &pool_info); + + if (!new_desc_pool) { + return RhiResult::out_of_memory; + } + + desc_pool_ = new_desc_pool; + + return RhiResult::success; } VkPresentModeKHR choose_swap_present_mode( @@ -2540,7 +2566,7 @@ void VulkanSurface::create_swap_chain() { this->width_ = extent.width; this->height_ = extent.height; - VkSwapchainCreateInfoKHR createInfo; + VkSwapchainCreateInfoKHR createInfo{}; createInfo.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR; createInfo.pNext = nullptr; createInfo.flags = 0; @@ -2602,7 +2628,7 @@ void VulkanSurface::create_swap_chain() { } void VulkanSurface::destroy_swap_chain() { - for (auto alloc : swapchain_images_) { + for (auto &alloc : swapchain_images_) { std::get<1>(device_->get_vk_image(alloc)) = nullptr; device_->destroy_image(alloc); } @@ -2644,7 +2670,7 @@ std::pair VulkanSurface::get_size() { StreamSemaphore VulkanSurface::acquire_next_image() { if (!config_.window_handle) { - image_index_ = (image_index_ + 1) % swapchain_images_.size(); + image_index_ = (image_index_ + 1) % uint32_t(swapchain_images_.size()); return nullptr; } else { vkAcquireNextImageKHR(device_->vk_device(), swapchain_, UINT64_MAX, @@ -2694,7 +2720,7 @@ DeviceAllocation VulkanSurface::get_depth_data(DeviceAllocation &depth_alloc) { auto *stream = device_->get_graphics_stream(); auto [w, h] = get_size(); - size_t size_bytes = w * h * 4; + size_t size_bytes = size_t(w * h) * sizeof(float); if (depth_buffer_ == kDeviceNullAllocation) { Device::AllocParams params{size_bytes, /*host_wrtie*/ false, @@ -2725,7 +2751,7 @@ DeviceAllocation VulkanSurface::get_image_data() { auto *stream = device_->get_graphics_stream(); DeviceAllocation img_alloc = swapchain_images_[image_index_]; auto [w, h] = get_size(); - size_t size_bytes = w * h * 4; + size_t size_bytes = size_t(w * h) * sizeof(uint8_t) * 4; /* if (screenshot_image_ == kDeviceNullAllocation) { diff --git a/taichi/rhi/vulkan/vulkan_device.h b/taichi/rhi/vulkan/vulkan_device.h index 9818c965a8321..edd110110ca6e 100644 --- a/taichi/rhi/vulkan/vulkan_device.h +++ b/taichi/rhi/vulkan/vulkan_device.h @@ -1,8 +1,11 @@ #pragma once +#include "taichi/rhi/device.h" #include "taichi/rhi/vulkan/vulkan_api.h" +#include "taichi/rhi/vulkan/vulkan_utils.h" +#include "taichi/common/ref_counted_pool.h" -#include +#include "vk_mem_alloc.h" #ifdef ANDROID #include @@ -13,10 +16,7 @@ #include #include #include - -#include -#include -#include +#include namespace taichi::lang { namespace vulkan { @@ -59,7 +59,7 @@ struct RenderPassDescHasher { std::size_t operator()(const VulkanRenderPassDesc &desc) const { // TODO: Come up with a better hash size_t hash = 0; - for (auto pair : desc.color_attachments) { + for (auto &pair : desc.color_attachments) { hash ^= (size_t(pair.first) + pair.second); hash = (hash << 3) || (hash >> 61); } @@ -70,10 +70,10 @@ struct RenderPassDescHasher { }; struct VulkanFramebufferDesc { - std::vector attachments; - uint32_t width; - uint32_t height; - vkapi::IVkRenderPass renderpass; + std::vector attachments{}; + uint32_t width{0}; + uint32_t height{0}; + vkapi::IVkRenderPass renderpass{nullptr}; bool operator==(const VulkanFramebufferDesc &other) const { return width == other.width && height == other.height && @@ -84,7 +84,7 @@ struct VulkanFramebufferDesc { struct FramebufferDescHasher { std::size_t operator()(const VulkanFramebufferDesc &desc) const { size_t hash = 0; - for (auto view : desc.attachments) { + for (auto &view : desc.attachments) { hash ^= size_t(view->view); hash = (hash << 3) || (hash >> 61); } @@ -95,76 +95,105 @@ struct FramebufferDescHasher { } }; -class VulkanResourceBinder : public ResourceBinder { +class VulkanResourceSet : public ShaderResourceSet { public: - struct Binding { - VkDescriptorType type; - DevicePtr ptr; - VkDeviceSize size; - union { - VkSampler sampler{VK_NULL_HANDLE}; // used only for images - int image_lod; - }; + struct Buffer { + vkapi::IVkBuffer buffer{nullptr}; + VkDeviceSize offset{0}; + VkDeviceSize size{0}; - bool operator==(const Binding &other) const { - return other.type == type && other.ptr == ptr && other.size == size && - other.sampler == sampler; + bool operator==(const Buffer &rhs) const { + return buffer == rhs.buffer && offset == rhs.offset && size == rhs.size; } - bool operator!=(const Binding &other) const { - return !(*this == other); + bool operator!=(const Buffer &rhs) const { + return !(*this == rhs); } }; - struct Set { - std::unordered_map bindings; + struct Image { + vkapi::IVkImageView view{nullptr}; - // The compare function is for the hashmap to locate a set layout - bool operator==(const Set &other) const { - if (other.bindings.size() != bindings.size()) { - return false; - } - for (auto &pair : bindings) { - auto other_binding_iter = other.bindings.find(pair.first); - if (other_binding_iter == other.bindings.end()) { - return false; - } - const Binding &other_binding = other_binding_iter->second; - if (other_binding.type != pair.second.type) { - return false; - } - } - return true; + bool operator==(const Image &rhs) const { + return view == rhs.view; + } + + bool operator!=(const Image &rhs) const { + return view != rhs.view; } + }; - bool operator!=(const Set &other) const { - return !(*this == other); + struct Texture { + vkapi::IVkImageView view{nullptr}; + vkapi::IVkSampler sampler{nullptr}; + + bool operator==(const Texture &rhs) const { + return view == rhs.view && sampler == rhs.sampler; + } + + bool operator!=(const Texture &rhs) const { + return !(*this == rhs); + } + }; + + struct Binding { + VkDescriptorType type{VK_DESCRIPTOR_TYPE_MAX_ENUM}; + std::variant res{Buffer()}; + + bool operator==(const Binding &other) const { + return other.type == type && other.res == res; + } + + bool operator!=(const Binding &other) const { + return other.type != type || other.res != res; + } + + size_t hash() const { + size_t hash = 0; + rhi_impl::hash_combine(hash, int(type)); + if (const Buffer *buf = std::get_if(&res)) { + rhi_impl::hash_combine(hash, (void *)buf->buffer.get()); + rhi_impl::hash_combine(hash, size_t(buf->offset)); + rhi_impl::hash_combine(hash, size_t(buf->size)); + } else if (const Image *img = std::get_if(&res)) { + rhi_impl::hash_combine(hash, (void *)img->view.get()); + } else if (const Texture *tex = std::get_if(&res)) { + rhi_impl::hash_combine(hash, (void *)tex->view.get()); + rhi_impl::hash_combine(hash, (void *)tex->sampler.get()); + } + return hash; } }; + // This hashes the Set Layout struct SetLayoutHasher { - std::size_t operator()(const Set &set) const { - // TODO: Come up with a better hash + std::size_t operator()(const VulkanResourceSet &set) const { + // NOTE: Bindings in this case is ordered, we can use non-commutative + // operations size_t hash = 0; - for (const auto &pair : set.bindings) { - hash = (hash ^ size_t(pair.second.type)) ^ size_t(pair.first); + for (const auto &pair : set.bindings_) { + rhi_impl::hash_combine(hash, pair.first); + // We only care about type in this case + rhi_impl::hash_combine(hash, pair.second.type); } return hash; } }; - struct DescSetCmp { - bool operator()(const Set &a, const Set &b) const { - if (a.bindings.size() != b.bindings.size()) { + // This compares the layout of two sets + struct SetLayoutCmp { + bool operator()(const VulkanResourceSet &lhs, + const VulkanResourceSet &rhs) const { + if (lhs.bindings_.size() != rhs.bindings_.size()) { return false; } - for (auto &pair : a.bindings) { - auto other_binding_iter = b.bindings.find(pair.first); - if (other_binding_iter == b.bindings.end()) { + for (auto &lhs_pair : lhs.bindings_) { + auto rhs_binding_iter = rhs.bindings_.find(lhs_pair.first); + if (rhs_binding_iter == rhs.bindings_.end()) { return false; } - const Binding &other_binding = other_binding_iter->second; - if (other_binding != pair.second) { + const Binding &rhs_binding = rhs_binding_iter->second; + if (rhs_binding.type != lhs_pair.second.type) { return false; } } @@ -172,83 +201,84 @@ class VulkanResourceBinder : public ResourceBinder { } }; + // This hashes the entire set (including resources) struct DescSetHasher { - std::size_t operator()(const Set &set) const { - // TODO: Come up with a better hash + std::size_t operator()(const VulkanResourceSet &set) const { size_t hash = 0; - for (const auto &pair : set.bindings) { - size_t binding_hash = 0; - uint32_t *u32_ptr = (uint32_t *)&pair.second; - static_assert( - sizeof(VulkanResourceBinder::Binding) % sizeof(uint32_t) == 0, - "sizeof(VulkanResourceBinder::Binding) is not a multiple of 4"); - size_t n = sizeof(VulkanResourceBinder::Binding) / sizeof(uint32_t); - for (size_t i = 0; i < n; i++) { - binding_hash = binding_hash ^ u32_ptr[i]; - binding_hash = (binding_hash << 7) | (binding_hash >> (64 - 7)); - } - binding_hash = binding_hash ^ pair.first; - binding_hash = - (binding_hash << pair.first) | (binding_hash >> (64 - pair.first)); - hash = hash ^ binding_hash; + for (const auto &pair : set.bindings_) { + rhi_impl::hash_combine(hash, pair.first); + hash ^= pair.second.hash() + 0x9e3779b9 + (hash << 6) + (hash >> 2); } return hash; } }; - explicit VulkanResourceBinder( - VkPipelineBindPoint bind_point = VK_PIPELINE_BIND_POINT_COMPUTE); - ~VulkanResourceBinder() override; - - void rw_buffer(uint32_t set, - uint32_t binding, - DevicePtr ptr, - size_t size) override; - void rw_buffer(uint32_t set, - uint32_t binding, - DeviceAllocation alloc) override; - void buffer(uint32_t set, - uint32_t binding, - DevicePtr ptr, - size_t size) override; - void buffer(uint32_t set, uint32_t binding, DeviceAllocation alloc) override; - void image(uint32_t set, - uint32_t binding, - DeviceAllocation alloc, - ImageSamplerConfig sampler_config) override; - void rw_image(uint32_t set, - uint32_t binding, - DeviceAllocation alloc, - int lod) override; - void vertex_buffer(DevicePtr ptr, uint32_t binding = 0) override; - void index_buffer(DevicePtr ptr, size_t index_width) override; - - void write_to_set(uint32_t index, - VulkanDevice &device, - vkapi::IVkDescriptorSet set); - Set &get_set(uint32_t index) { - return sets_[index]; - } - std::unordered_map &get_sets() { - return sets_; + // This compares two sets (including resources) + struct SetCmp { + bool operator()(const VulkanResourceSet &lhs, + const VulkanResourceSet &rhs) const { + return lhs.bindings_ == rhs.bindings_; + } + }; + + explicit VulkanResourceSet(VulkanDevice *device); + VulkanResourceSet(const VulkanResourceSet &other) = default; + ~VulkanResourceSet() override; + + ShaderResourceSet &rw_buffer(uint32_t binding, + DevicePtr ptr, + size_t size) final; + ShaderResourceSet &rw_buffer(uint32_t binding, DeviceAllocation alloc) final; + ShaderResourceSet &buffer(uint32_t binding, DevicePtr ptr, size_t size) final; + ShaderResourceSet &buffer(uint32_t binding, DeviceAllocation alloc) final; + ShaderResourceSet &image(uint32_t binding, + DeviceAllocation alloc, + ImageSamplerConfig sampler_config) final; + ShaderResourceSet &rw_image(uint32_t binding, + DeviceAllocation alloc, + int lod) final; + + rhi_impl::RhiReturn finalize(); + + vkapi::IVkDescriptorSetLayout get_layout() { + return layout_; } - std::unordered_map &get_vertex_buffers() { - return vertex_buffers_; + + const std::map &get_bindings() const { + return bindings_; } - std::pair get_index_buffer() { - return std::make_pair(index_buffer_, index_type_); + + private: + std::map bindings_; + VulkanDevice *device_; + + vkapi::IVkDescriptorSetLayout layout_{nullptr}; + vkapi::IVkDescriptorSet set_{nullptr}; + + bool dirty_{true}; +}; + +class VulkanRasterResources : public RasterResources { + public: + VulkanRasterResources(VulkanDevice *device) : device_(device) { } - void lock_layout(); + struct BufferBinding { + vkapi::IVkBuffer buffer{nullptr}; + size_t offset{0}; + }; - private: - std::unordered_map sets_; - bool layout_locked_{false}; - VkPipelineBindPoint bind_point_; + std::unordered_map vertex_buffers; + BufferBinding index_binding; + VkIndexType index_type{VK_INDEX_TYPE_MAX_ENUM}; + + ~VulkanRasterResources() override = default; + + RasterResources &vertex_buffer(DevicePtr ptr, uint32_t binding = 0) final; + RasterResources &index_buffer(DevicePtr ptr, size_t index_width) final; - std::unordered_map vertex_buffers_; - DevicePtr index_buffer_{kDeviceNullPtr}; - VkIndexType index_type_; + private: + VulkanDevice *device_; }; // VulkanPipeline maps to a vkapi::IVkPipeline, or a SPIR-V module (a GLSL @@ -269,10 +299,6 @@ class VulkanPipeline : public Pipeline { const std::vector &vertex_attrs); ~VulkanPipeline() override; - ResourceBinder *resource_binder() override { - return &resource_binder_; - } - vkapi::IVkPipelineLayout pipeline_layout() const { return pipeline_layout_; } @@ -296,6 +322,11 @@ class VulkanPipeline : public Pipeline { return graphics_pipeline_template_ != nullptr; } + std::unordered_map + &get_resource_set_templates() { + return set_templates_; + } + private: void create_descriptor_set_layout(const Params ¶ms); void create_shader_stages(const Params ¶ms); @@ -328,6 +359,7 @@ class VulkanPipeline : public Pipeline { VkGraphicsPipelineCreateInfo pipeline_info{}; }; + VulkanDevice &ti_device_; // not owned VkDevice device_{VK_NULL_HANDLE}; // not owned std::string name_; @@ -344,7 +376,7 @@ class VulkanPipeline : public Pipeline { RenderPassDescHasher> graphics_pipeline_dynamic_; - VulkanResourceBinder resource_binder_; + std::unordered_map set_templates_; std::vector set_layouts_; std::vector shader_modules_; vkapi::IVkPipeline pipeline_{VK_NULL_HANDLE}; @@ -359,7 +391,9 @@ class VulkanCommandList : public CommandList { ~VulkanCommandList() override; void bind_pipeline(Pipeline *p) override; - void bind_resources(ResourceBinder *binder) override; + RhiResult bind_shader_resources(ShaderResourceSet *res, + int set_index = 0) final; + RhiResult bind_raster_resources(RasterResources *res) final; void buffer_barrier(DevicePtr ptr, size_t size) override; void buffer_barrier(DeviceAllocation alloc) override; void memory_barrier() override; @@ -432,12 +466,6 @@ class VulkanCommandList : public CommandList { vkapi::IVkCommandBuffer buffer_; VulkanPipeline *current_pipeline_{nullptr}; - std::unordered_map - currently_used_sets_; - // Renderpass & raster pipeline std::vector current_dynamic_targets_; VulkanRenderPassDesc current_renderpass_desc_; @@ -470,16 +498,16 @@ class VulkanSurface : public Surface { SurfaceConfig config_; - VulkanDevice *device_; - VkSurfaceKHR surface_; - VkSwapchainKHR swapchain_; - vkapi::IVkSemaphore image_available_; + VulkanDevice *device_{nullptr}; + VkSurfaceKHR surface_{VK_NULL_HANDLE}; + VkSwapchainKHR swapchain_{VK_NULL_HANDLE}; + vkapi::IVkSemaphore image_available_{nullptr}; #ifdef ANDROID - ANativeWindow *window_; + ANativeWindow *window_{nullptr}; #else - GLFWwindow *window_; + GLFWwindow *window_{nullptr}; #endif - BufferFormat image_format_; + BufferFormat image_format_{BufferFormat::unknown}; uint32_t image_index_{0}; @@ -563,13 +591,13 @@ class TI_DLL_EXPORT VulkanDevice : public GraphicsDevice { public: struct Params { PFN_vkGetInstanceProcAddr get_proc_addr{nullptr}; - VkInstance instance; - VkPhysicalDevice physical_device; - VkDevice device; - VkQueue compute_queue; - uint32_t compute_queue_family_index; - VkQueue graphics_queue; - uint32_t graphics_queue_family_index; + VkInstance instance{VK_NULL_HANDLE}; + VkPhysicalDevice physical_device{VK_NULL_HANDLE}; + VkDevice device{VK_NULL_HANDLE}; + VkQueue compute_queue{VK_NULL_HANDLE}; + uint32_t compute_queue_family_index{0}; + VkQueue graphics_queue{VK_NULL_HANDLE}; + uint32_t graphics_queue_family_index{0}; }; VulkanDevice(); @@ -589,6 +617,10 @@ class TI_DLL_EXPORT VulkanDevice : public GraphicsDevice { uint64_t get_memory_physical_pointer(DeviceAllocation handle) override; + ShaderResourceSet *create_resource_set() final; + + RasterResources *create_raster_resources() final; + RhiResult map_range(DevicePtr ptr, uint64_t size, void **mapped_ptr) final; RhiResult map(DeviceAllocation alloc, void **mapped_ptr) final; @@ -670,9 +702,9 @@ class TI_DLL_EXPORT VulkanDevice : public GraphicsDevice { vkapi::IVkFramebuffer get_framebuffer(const VulkanFramebufferDesc &desc); - vkapi::IVkDescriptorSetLayout get_desc_set_layout( - VulkanResourceBinder::Set &set); - vkapi::IVkDescriptorSet alloc_desc_set(vkapi::IVkDescriptorSetLayout layout); + vkapi::IVkDescriptorSetLayout get_desc_set_layout(VulkanResourceSet &set); + rhi_impl::RhiReturn alloc_desc_set( + vkapi::IVkDescriptorSetLayout layout); constexpr VulkanCapabilities &vk_caps() { return vk_caps_; @@ -685,21 +717,21 @@ class TI_DLL_EXPORT VulkanDevice : public GraphicsDevice { friend VulkanSurface; void create_vma_allocator(); - void new_descriptor_pool(); + [[nodiscard]] RhiResult new_descriptor_pool(); VulkanCapabilities vk_caps_; - VkInstance instance_; - VkDevice device_; - VkPhysicalDevice physical_device_; - VmaAllocator allocator_; + VkInstance instance_{VK_NULL_HANDLE}; + VkDevice device_{VK_NULL_HANDLE}; + VkPhysicalDevice physical_device_{VK_NULL_HANDLE}; + VmaAllocator allocator_{nullptr}; VmaAllocator allocator_export_{nullptr}; - VkQueue compute_queue_; - uint32_t compute_queue_family_index_; + VkQueue compute_queue_{VK_NULL_HANDLE}; + uint32_t compute_queue_family_index_{0}; - VkQueue graphics_queue_; - uint32_t graphics_queue_family_index_; + VkQueue graphics_queue_{VK_NULL_HANDLE}; + uint32_t graphics_queue_family_index_{0}; struct ThreadLocalStreams; std::unique_ptr compute_streams_{nullptr}; @@ -722,10 +754,10 @@ class TI_DLL_EXPORT VulkanDevice : public GraphicsDevice { // Images / Image views struct ImageAllocInternal { bool external{false}; - VmaAllocationInfo alloc_info; + VmaAllocationInfo alloc_info{}; vkapi::IVkImage image{nullptr}; vkapi::IVkImageView view{nullptr}; - std::vector view_lods; + std::vector view_lods{}; }; // Since we use the pointer to AllocationInternal as the `alloc_id`, @@ -744,9 +776,10 @@ class TI_DLL_EXPORT VulkanDevice : public GraphicsDevice { framebuffer_pools_; // Descriptors / Layouts / Pools - unordered_map + VulkanResourceSet::SetLayoutHasher, + VulkanResourceSet::SetLayoutCmp> desc_set_layouts_; vkapi::IVkDescriptorPool desc_pool_{nullptr}; diff --git a/taichi/rhi/vulkan/vulkan_device_creator.cpp b/taichi/rhi/vulkan/vulkan_device_creator.cpp index 1d56d557883c8..eb6e9395d30d3 100644 --- a/taichi/rhi/vulkan/vulkan_device_creator.cpp +++ b/taichi/rhi/vulkan/vulkan_device_creator.cpp @@ -332,10 +332,10 @@ void VulkanDeviceCreator::create_instance(uint32_t vk_api_version, } std::unordered_set extensions; - for (auto ext : get_required_extensions(params_.enable_validation_layer)) { + for (auto &ext : get_required_extensions(params_.enable_validation_layer)) { extensions.insert(std::string(ext)); } - for (auto ext : params_.additional_instance_extensions) { + for (auto &ext : params_.additional_instance_extensions) { extensions.insert(std::string(ext)); } diff --git a/taichi/runtime/gfx/runtime.cpp b/taichi/runtime/gfx/runtime.cpp index 525c8ac223d5c..401332be9c233 100644 --- a/taichi/runtime/gfx/runtime.cpp +++ b/taichi/runtime/gfx/runtime.cpp @@ -464,19 +464,23 @@ void GfxRuntime::launch_kernel(KernelHandle handle, RuntimeContext *host_ctx) { const int group_x = (attribs.advisory_total_num_threads + attribs.advisory_num_threads_per_group - 1) / attribs.advisory_num_threads_per_group; - ResourceBinder *binder = vp->resource_binder(); + std::unique_ptr bindings = + device_->create_resource_set_unique(); for (auto &bind : attribs.buffer_binds) { + // We might have to bind a invalid buffer (this is fine as long as + // shader don't do anything with it) if (bind.buffer.type == BufferType::ExtArr) { - binder->rw_buffer(0, bind.binding, any_arrays.at(bind.buffer.root_id)); - } else if (args_buffer && bind.buffer.type == BufferType::Args) { - binder->buffer(0, bind.binding, *args_buffer); - } else if (ret_buffer && bind.buffer.type == BufferType::Rets) { - binder->rw_buffer(0, bind.binding, *ret_buffer); + bindings->rw_buffer(bind.binding, any_arrays.at(bind.buffer.root_id)); + } else if (bind.buffer.type == BufferType::Args) { + bindings->buffer(bind.binding, + args_buffer ? *args_buffer : kDeviceNullAllocation); + } else if (bind.buffer.type == BufferType::Rets) { + bindings->rw_buffer(bind.binding, + ret_buffer ? *ret_buffer : kDeviceNullAllocation); } else { DeviceAllocation *alloc = ti_kernel->get_buffer_bind(bind.buffer); - if (alloc) { - binder->rw_buffer(0, bind.binding, *alloc); - } + bindings->rw_buffer(bind.binding, + alloc ? *alloc : kDeviceNullAllocation); } } @@ -484,10 +488,10 @@ void GfxRuntime::launch_kernel(KernelHandle handle, RuntimeContext *host_ctx) { DeviceAllocation texture = textures.at(bind.arg_id); if (bind.is_storage) { transition_image(texture, ImageLayout::shader_read_write); - binder->rw_image(0, bind.binding, texture, 0); + bindings->rw_image(bind.binding, texture, 0); } else { transition_image(texture, ImageLayout::shader_read); - binder->image(0, bind.binding, texture, {}); + bindings->image(bind.binding, texture, {}); } } @@ -506,7 +510,9 @@ void GfxRuntime::launch_kernel(KernelHandle handle, RuntimeContext *host_ctx) { } current_cmdlist_->bind_pipeline(vp); - current_cmdlist_->bind_resources(binder); + RhiResult status = current_cmdlist_->bind_shader_resources(bindings.get()); + TI_ERROR_IF(status != RhiResult::success, + "Resource binding error : RhiResult({})", status); current_cmdlist_->dispatch(group_x); current_cmdlist_->memory_barrier(); } diff --git a/taichi/ui/backends/vulkan/renderable.cpp b/taichi/ui/backends/vulkan/renderable.cpp index b59ce3cd268cf..b8ad7b72c4f27 100644 --- a/taichi/ui/backends/vulkan/renderable.cpp +++ b/taichi/ui/backends/vulkan/renderable.cpp @@ -165,9 +165,15 @@ const Pipeline &Renderable::pipeline() const { } void Renderable::create_bindings() { - ResourceBinder *binder = pipeline_->resource_binder(); - binder->vertex_buffer(vertex_buffer_.get_ptr(0), 0); - binder->index_buffer(index_buffer_.get_ptr(0), 32); + if (!resource_set_) { + resource_set_ = app_context_->device().create_resource_set_unique(); + } + if (!raster_state_) { + raster_state_ = app_context_->device().create_raster_resources_unique(); + } + + raster_state_->vertex_buffer(vertex_buffer_.get_ptr(0), 0); + raster_state_->index_buffer(index_buffer_.get_ptr(0), 32); } void Renderable::create_graphics_pipeline() { @@ -287,13 +293,16 @@ void Renderable::destroy_storage_buffers() { } void Renderable::cleanup() { + resource_set_.reset(); + raster_state_.reset(); free_buffers(); pipeline_.reset(); } void Renderable::record_this_frame_commands(CommandList *command_list) { command_list->bind_pipeline(pipeline_.get()); - command_list->bind_resources(pipeline_->resource_binder()); + command_list->bind_raster_resources(raster_state_.get()); + command_list->bind_shader_resources(resource_set_.get()); if (indexed_) { command_list->draw_indexed(config_.draw_index_count, diff --git a/taichi/ui/backends/vulkan/renderable.h b/taichi/ui/backends/vulkan/renderable.h index 6568212f75177..024a2eadf5de5 100644 --- a/taichi/ui/backends/vulkan/renderable.h +++ b/taichi/ui/backends/vulkan/renderable.h @@ -67,6 +67,8 @@ class Renderable { AppContext *app_context_; std::unique_ptr pipeline_{nullptr}; + std::unique_ptr resource_set_{nullptr}; + std::unique_ptr raster_state_{nullptr}; taichi::lang::DeviceAllocation vertex_buffer_; taichi::lang::DeviceAllocation index_buffer_; diff --git a/taichi/ui/backends/vulkan/renderables/circles.cpp b/taichi/ui/backends/vulkan/renderables/circles.cpp index 187ae58541905..ec42731a9a040 100644 --- a/taichi/ui/backends/vulkan/renderables/circles.cpp +++ b/taichi/ui/backends/vulkan/renderables/circles.cpp @@ -61,8 +61,7 @@ void Circles::update_ubo(glm::vec3 color, void Circles::create_bindings() { Renderable::create_bindings(); - ResourceBinder *binder = pipeline_->resource_binder(); - binder->buffer(0, 0, uniform_buffer_); + resource_set_->buffer(0, uniform_buffer_); } } // namespace vulkan diff --git a/taichi/ui/backends/vulkan/renderables/lines.cpp b/taichi/ui/backends/vulkan/renderables/lines.cpp index 8f426a2275220..2fdbb292640a7 100644 --- a/taichi/ui/backends/vulkan/renderables/lines.cpp +++ b/taichi/ui/backends/vulkan/renderables/lines.cpp @@ -59,13 +59,13 @@ void Lines::update_ubo(glm::vec3 color, bool use_per_vertex_color) { void Lines::create_bindings() { Renderable::create_bindings(); - ResourceBinder *binder = pipeline_->resource_binder(); - binder->buffer(0, 0, uniform_buffer_); + resource_set_->buffer(0, uniform_buffer_); } void Lines::record_this_frame_commands(CommandList *command_list) { command_list->bind_pipeline(pipeline_.get()); - command_list->bind_resources(pipeline_->resource_binder()); + command_list->bind_raster_resources(raster_state_.get()); + command_list->bind_shader_resources(resource_set_.get()); command_list->set_line_width(curr_width_ * app_context_->config.height); if (indexed_) { diff --git a/taichi/ui/backends/vulkan/renderables/mesh.cpp b/taichi/ui/backends/vulkan/renderables/mesh.cpp index bccb5fb14ead1..b139da0f4a46e 100644 --- a/taichi/ui/backends/vulkan/renderables/mesh.cpp +++ b/taichi/ui/backends/vulkan/renderables/mesh.cpp @@ -118,7 +118,8 @@ void Mesh::update_data(const MeshInfo &info, const Scene &scene) { void Mesh::record_this_frame_commands(taichi::lang::CommandList *command_list) { command_list->bind_pipeline(pipeline_.get()); - command_list->bind_resources(pipeline_->resource_binder()); + command_list->bind_raster_resources(raster_state_.get()); + command_list->bind_shader_resources(resource_set_.get()); if (indexed_) { command_list->draw_indexed_instance( @@ -161,10 +162,9 @@ void Mesh::init_mesh(AppContext *app_context, void Mesh::create_bindings() { Renderable::create_bindings(); - ResourceBinder *binder = pipeline_->resource_binder(); - binder->buffer(0, 0, uniform_buffer_); - binder->rw_buffer(0, 1, storage_buffer_); - binder->rw_buffer(0, 2, mesh_storage_buffer_); + resource_set_->buffer(0, uniform_buffer_); + resource_set_->rw_buffer(1, storage_buffer_); + resource_set_->rw_buffer(2, mesh_storage_buffer_); } void Mesh::create_mesh_storage_buffers() { diff --git a/taichi/ui/backends/vulkan/renderables/particles.cpp b/taichi/ui/backends/vulkan/renderables/particles.cpp index 8c8ce70699fe2..8368aef6690bd 100644 --- a/taichi/ui/backends/vulkan/renderables/particles.cpp +++ b/taichi/ui/backends/vulkan/renderables/particles.cpp @@ -80,9 +80,8 @@ void Particles::init_particles(AppContext *app_context, void Particles::create_bindings() { Renderable::create_bindings(); - ResourceBinder *binder = pipeline_->resource_binder(); - binder->buffer(0, 0, uniform_buffer_); - binder->rw_buffer(0, 1, storage_buffer_); + resource_set_->buffer(0, uniform_buffer_); + resource_set_->rw_buffer(1, storage_buffer_); } } // namespace vulkan diff --git a/taichi/ui/backends/vulkan/renderables/scene_lines.cpp b/taichi/ui/backends/vulkan/renderables/scene_lines.cpp index 4b2e12b39bc1b..fa53c981504b2 100644 --- a/taichi/ui/backends/vulkan/renderables/scene_lines.cpp +++ b/taichi/ui/backends/vulkan/renderables/scene_lines.cpp @@ -60,14 +60,14 @@ void SceneLines::update_ubo(const SceneLinesInfo &info, const Scene &scene) { void SceneLines::create_bindings() { Renderable::create_bindings(); - ResourceBinder *binder = pipeline_->resource_binder(); - binder->buffer(0, 0, uniform_buffer_); - binder->rw_buffer(0, 1, storage_buffer_); + resource_set_->buffer(0, uniform_buffer_); + resource_set_->rw_buffer(1, storage_buffer_); } void SceneLines::record_this_frame_commands(CommandList *command_list) { command_list->bind_pipeline(pipeline_.get()); - command_list->bind_resources(pipeline_->resource_binder()); + command_list->bind_raster_resources(raster_state_.get()); + command_list->bind_shader_resources(resource_set_.get()); command_list->set_line_width(curr_width_); if (indexed_) { diff --git a/taichi/ui/backends/vulkan/renderables/set_image.cpp b/taichi/ui/backends/vulkan/renderables/set_image.cpp index 2bbae13b38dc0..80661db26324b 100644 --- a/taichi/ui/backends/vulkan/renderables/set_image.cpp +++ b/taichi/ui/backends/vulkan/renderables/set_image.cpp @@ -321,9 +321,8 @@ void SetImage::update_index_buffer() { void SetImage::create_bindings() { Renderable::create_bindings(); - ResourceBinder *binder = pipeline_->resource_binder(); - binder->image(0, 0, texture_, {}); - binder->buffer(0, 1, uniform_buffer_); + resource_set_->image(0, texture_, {}); + resource_set_->buffer(1, uniform_buffer_); } void SetImage::cleanup() { diff --git a/taichi/ui/backends/vulkan/renderables/triangles.cpp b/taichi/ui/backends/vulkan/renderables/triangles.cpp index 45d9a14d13592..1b6debd557dea 100644 --- a/taichi/ui/backends/vulkan/renderables/triangles.cpp +++ b/taichi/ui/backends/vulkan/renderables/triangles.cpp @@ -55,8 +55,7 @@ void Triangles::update_ubo(glm::vec3 color, bool use_per_vertex_color) { void Triangles::create_bindings() { Renderable::create_bindings(); - ResourceBinder *binder = pipeline_->resource_binder(); - binder->buffer(0, 0, uniform_buffer_); + resource_set_->buffer(0, uniform_buffer_); } } // namespace vulkan diff --git a/tests/python/test_ggui.py b/tests/python/test_ggui.py index 64ed46a3367a0..19ef43e501543 100644 --- a/tests/python/test_ggui.py +++ b/tests/python/test_ggui.py @@ -456,9 +456,11 @@ def render(): render() if (platform.system() == 'Darwin'): # TODO:Fix the bug that mac not support wide lines - verify_image(window.get_image_buffer_as_numpy(), 'test_draw_lines.mac') + verify_image(window.get_image_buffer_as_numpy(), 'test_draw_lines.mac', + 0.2) else: - verify_image(window.get_image_buffer_as_numpy(), 'test_draw_lines') + verify_image(window.get_image_buffer_as_numpy(), 'test_draw_lines', + 0.2) window.destroy()