From b809d70980dd48ed927661caf69018b7e8a54bf1 Mon Sep 17 00:00:00 2001 From: kwbm Date: Sun, 10 Jul 2022 12:53:22 +0800 Subject: [PATCH] Reduce unnecessary copies of memory data to improve rendering performance --- .../render/passes/directional_light_pass.cpp | 30 +++++----- .../render/passes/main_camera_pass.cpp | 59 +++++++++---------- .../function/render/passes/pick_pass.cpp | 22 +++---- .../render/passes/point_light_pass.cpp | 30 +++++----- .../runtime/function/render/render_common.h | 11 ++-- .../runtime/function/render/render_scene.cpp | 21 ++++--- 6 files changed, 84 insertions(+), 89 deletions(-) diff --git a/engine/source/runtime/function/render/passes/directional_light_pass.cpp b/engine/source/runtime/function/render/passes/directional_light_pass.cpp index c99e02232..49ba0e917 100644 --- a/engine/source/runtime/function/render/passes/directional_light_pass.cpp +++ b/engine/source/runtime/function/render/passes/directional_light_pass.cpp @@ -2,6 +2,7 @@ #include "runtime/function/render/render_mesh.h" #include "runtime/function/render/rhi/vulkan/vulkan_rhi.h" #include "runtime/function/render/rhi/vulkan/vulkan_util.h" +#include "runtime/function/render/glm_wrapper.h" #include "runtime/function/render/passes/directional_light_pass.h" @@ -472,9 +473,9 @@ namespace Piccolo { struct MeshNode { - glm::mat4 model_matrix; - glm::mat4 joint_matrices[m_mesh_vertex_blending_max_joint_count]; - bool enable_vertex_blending; + const Matrix4x4* model_matrix {nullptr}; + const Matrix4x4* joint_matrices {nullptr}; + uint32_t joint_count {0}; }; std::map>> @@ -487,14 +488,11 @@ namespace Piccolo auto& mesh_nodes = mesh_instanced[node.ref_mesh]; MeshNode temp; - temp.model_matrix = node.model_matrix; - temp.enable_vertex_blending = node.enable_vertex_blending; + temp.model_matrix = node.model_matrix; if (node.enable_vertex_blending) { - for (uint32_t i = 0; i < m_mesh_vertex_blending_max_joint_count; ++i) - { - temp.joint_matrices[i] = node.joint_matrices[i]; - } + temp.joint_matrices = node.joint_matrices; + temp.joint_count = node.joint_count; } mesh_nodes.push_back(temp); @@ -629,10 +627,10 @@ namespace Piccolo for (uint32_t i = 0; i < current_instance_count; ++i) { perdrawcall_storage_buffer_object.mesh_instances[i].model_matrix = - mesh_nodes[drawcall_max_instance_count * drawcall_index + i].model_matrix; + GLMUtil::fromMat4x4(*mesh_nodes[drawcall_max_instance_count * drawcall_index + i].model_matrix); perdrawcall_storage_buffer_object.mesh_instances[i].enable_vertex_blending = mesh_nodes[drawcall_max_instance_count * drawcall_index + i] - .enable_vertex_blending ? + .joint_matrices ? 1.0 : -1.0; } @@ -643,7 +641,7 @@ namespace Piccolo for (uint32_t i = 0; i < current_instance_count; ++i) { if (!mesh_nodes[drawcall_max_instance_count * drawcall_index + i] - .enable_vertex_blending) + .joint_matrices) { least_one_enable_vertex_blending = false; break; @@ -676,14 +674,14 @@ namespace Piccolo for (uint32_t i = 0; i < current_instance_count; ++i) { if (mesh_nodes[drawcall_max_instance_count * drawcall_index + i] - .enable_vertex_blending) + .joint_matrices) { - for (uint32_t j = 0; j < m_mesh_vertex_blending_max_joint_count; ++j) + for (uint32_t j = 0; j < mesh_nodes[drawcall_max_instance_count * drawcall_index + i].joint_count; ++j) { per_drawcall_vertex_blending_storage_buffer_object .joint_matrices[m_mesh_vertex_blending_max_joint_count * i + j] = - mesh_nodes[drawcall_max_instance_count * drawcall_index + i] - .joint_matrices[j]; + GLMUtil::fromMat4x4(mesh_nodes[drawcall_max_instance_count * drawcall_index + i] + .joint_matrices[j]); } } } diff --git a/engine/source/runtime/function/render/passes/main_camera_pass.cpp b/engine/source/runtime/function/render/passes/main_camera_pass.cpp index 25fdd96ad..e7d6b2d07 100644 --- a/engine/source/runtime/function/render/passes/main_camera_pass.cpp +++ b/engine/source/runtime/function/render/passes/main_camera_pass.cpp @@ -2,6 +2,7 @@ #include "runtime/function/render/render_helper.h" #include "runtime/function/render/render_mesh.h" #include "runtime/function/render/render_resource.h" +#include "runtime/function/render/glm_wrapper.h" #include "runtime/function/render/rhi/vulkan/vulkan_rhi.h" #include "runtime/function/render/rhi/vulkan/vulkan_util.h" @@ -2416,9 +2417,9 @@ namespace Piccolo { struct MeshNode { - glm::mat4 model_matrix; - glm::mat4 joint_matrices[m_mesh_vertex_blending_max_joint_count]; - bool enable_vertex_blending; + const Matrix4x4* model_matrix {nullptr}; + const Matrix4x4* joint_matrices {nullptr}; + uint32_t joint_count {0}; }; std::map>> main_camera_mesh_drawcall_batch; @@ -2430,14 +2431,11 @@ namespace Piccolo auto& mesh_nodes = mesh_instanced[node.ref_mesh]; MeshNode temp; - temp.model_matrix = node.model_matrix; - temp.enable_vertex_blending = node.enable_vertex_blending; + temp.model_matrix = node.model_matrix; if (node.enable_vertex_blending) { - for (uint32_t i = 0; i < m_mesh_vertex_blending_max_joint_count; ++i) - { - temp.joint_matrices[i] = node.joint_matrices[i]; - } + temp.joint_matrices = node.joint_matrices; + temp.joint_count = node.joint_count; } mesh_nodes.push_back(temp); @@ -2561,9 +2559,9 @@ namespace Piccolo for (uint32_t i = 0; i < current_instance_count; ++i) { perdrawcall_storage_buffer_object.mesh_instances[i].model_matrix = - mesh_nodes[drawcall_max_instance_count * drawcall_index + i].model_matrix; + GLMUtil::fromMat4x4(*mesh_nodes[drawcall_max_instance_count * drawcall_index + i].model_matrix); perdrawcall_storage_buffer_object.mesh_instances[i].enable_vertex_blending = - mesh_nodes[drawcall_max_instance_count * drawcall_index + i].enable_vertex_blending ? + mesh_nodes[drawcall_max_instance_count * drawcall_index + i].joint_matrices ? 1.0 : -1.0; } @@ -2573,7 +2571,7 @@ namespace Piccolo bool least_one_enable_vertex_blending = true; for (uint32_t i = 0; i < current_instance_count; ++i) { - if (!mesh_nodes[drawcall_max_instance_count * drawcall_index + i].enable_vertex_blending) + if (!mesh_nodes[drawcall_max_instance_count * drawcall_index + i].joint_matrices) { least_one_enable_vertex_blending = false; break; @@ -2604,14 +2602,14 @@ namespace Piccolo per_drawcall_vertex_blending_dynamic_offset)); for (uint32_t i = 0; i < current_instance_count; ++i) { - if (mesh_nodes[drawcall_max_instance_count * drawcall_index + i].enable_vertex_blending) + if (mesh_nodes[drawcall_max_instance_count * drawcall_index + i].joint_matrices) { - for (uint32_t j = 0; j < m_mesh_vertex_blending_max_joint_count; ++j) + for (uint32_t j = 0; j < mesh_nodes[drawcall_max_instance_count * drawcall_index + i].joint_count; ++j) { per_drawcall_vertex_blending_storage_buffer_object .joint_matrices[m_mesh_vertex_blending_max_joint_count * i + j] = - mesh_nodes[drawcall_max_instance_count * drawcall_index + i] - .joint_matrices[j]; + GLMUtil::fromMat4x4(mesh_nodes[drawcall_max_instance_count * drawcall_index + i] + .joint_matrices[j]); } } } @@ -2700,9 +2698,9 @@ namespace Piccolo { struct MeshNode { - glm::mat4 model_matrix; - glm::mat4 joint_matrices[m_mesh_vertex_blending_max_joint_count]; - bool enable_vertex_blending; + const Matrix4x4* model_matrix {nullptr}; + const Matrix4x4* joint_matrices {nullptr}; + uint32_t joint_count {0}; }; std::map>> main_camera_mesh_drawcall_batch; @@ -2714,14 +2712,11 @@ namespace Piccolo auto& mesh_nodes = mesh_instanced[node.ref_mesh]; MeshNode temp; - temp.model_matrix = node.model_matrix; - temp.enable_vertex_blending = node.enable_vertex_blending; + temp.model_matrix = node.model_matrix; if (node.enable_vertex_blending) { - for (uint32_t i = 0; i < m_mesh_vertex_blending_max_joint_count; ++i) - { - temp.joint_matrices[i] = node.joint_matrices[i]; - } + temp.joint_matrices = node.joint_matrices; + temp.joint_count = node.joint_count; } mesh_nodes.push_back(temp); @@ -2845,9 +2840,9 @@ namespace Piccolo for (uint32_t i = 0; i < current_instance_count; ++i) { perdrawcall_storage_buffer_object.mesh_instances[i].model_matrix = - mesh_nodes[drawcall_max_instance_count * drawcall_index + i].model_matrix; + GLMUtil::fromMat4x4(*mesh_nodes[drawcall_max_instance_count * drawcall_index + i].model_matrix); perdrawcall_storage_buffer_object.mesh_instances[i].enable_vertex_blending = - mesh_nodes[drawcall_max_instance_count * drawcall_index + i].enable_vertex_blending ? + mesh_nodes[drawcall_max_instance_count * drawcall_index + i].joint_matrices ? 1.0 : -1.0; } @@ -2857,7 +2852,7 @@ namespace Piccolo bool least_one_enable_vertex_blending = true; for (uint32_t i = 0; i < current_instance_count; ++i) { - if (!mesh_nodes[drawcall_max_instance_count * drawcall_index + i].enable_vertex_blending) + if (!mesh_nodes[drawcall_max_instance_count * drawcall_index + i].joint_matrices) { least_one_enable_vertex_blending = false; break; @@ -2888,14 +2883,14 @@ namespace Piccolo per_drawcall_vertex_blending_dynamic_offset)); for (uint32_t i = 0; i < current_instance_count; ++i) { - if (mesh_nodes[drawcall_max_instance_count * drawcall_index + i].enable_vertex_blending) + if (mesh_nodes[drawcall_max_instance_count * drawcall_index + i].joint_matrices) { - for (uint32_t j = 0; j < m_mesh_vertex_blending_max_joint_count; ++j) + for (uint32_t j = 0; j < mesh_nodes[drawcall_max_instance_count * drawcall_index + i].joint_count; ++j) { per_drawcall_vertex_blending_storage_buffer_object .joint_matrices[m_mesh_vertex_blending_max_joint_count * i + j] = - mesh_nodes[drawcall_max_instance_count * drawcall_index + i] - .joint_matrices[j]; + GLMUtil::fromMat4x4(mesh_nodes[drawcall_max_instance_count * drawcall_index + i] + .joint_matrices[j]); } } } diff --git a/engine/source/runtime/function/render/passes/pick_pass.cpp b/engine/source/runtime/function/render/passes/pick_pass.cpp index f43d9fa47..cbaad4368 100644 --- a/engine/source/runtime/function/render/passes/pick_pass.cpp +++ b/engine/source/runtime/function/render/passes/pick_pass.cpp @@ -3,6 +3,7 @@ #include "runtime/function/render/rhi/vulkan/vulkan_util.h" #include "runtime/function/render/render_helper.h" +#include "runtime/function/render/glm_wrapper.h" #include "runtime/function/render/passes/pick_pass.h" @@ -439,9 +440,10 @@ namespace Piccolo struct MeshNode { - glm::mat4 model_matrix; - uint32_t node_id; - glm::mat4 joint_matrices[m_mesh_vertex_blending_max_joint_count]; + const Matrix4x4* model_matrix {nullptr}; + const Matrix4x4* joint_matrices {nullptr}; + uint32_t joint_count {0}; + uint32_t node_id; }; std::map>> main_camera_mesh_drawcall_batch; @@ -454,13 +456,11 @@ namespace Piccolo MeshNode temp; temp.model_matrix = node.model_matrix; - temp.node_id = node.node_id; + temp.node_id = node.node_id; if (node.ref_mesh->enable_vertex_blending) { - for (uint32_t i = 0; i < m_mesh_vertex_blending_max_joint_count; ++i) - { - temp.joint_matrices[i] = node.joint_matrices[i]; - } + temp.joint_matrices = node.joint_matrices; + temp.joint_count = node.joint_count; } model_nodes.push_back(temp); @@ -651,7 +651,7 @@ namespace Piccolo for (uint32_t i = 0; i < current_instance_count; ++i) { perdrawcall_storage_buffer_object.model_matrices[i] = - mesh_nodes[drawcall_max_instance_count * drawcall_index + i].model_matrix; + GLMUtil::fromMat4x4(*mesh_nodes[drawcall_max_instance_count * drawcall_index + i].model_matrix); perdrawcall_storage_buffer_object.node_ids[i] = mesh_nodes[drawcall_max_instance_count * drawcall_index + i].node_id; } @@ -684,11 +684,11 @@ namespace Piccolo per_drawcall_vertex_blending_dynamic_offset)); for (uint32_t i = 0; i < current_instance_count; ++i) { - for (uint32_t j = 0; j < m_mesh_vertex_blending_max_joint_count; ++j) + for (uint32_t j = 0; j < mesh_nodes[drawcall_max_instance_count * drawcall_index + i].joint_count; ++j) { per_drawcall_vertex_blending_storage_buffer_object .joint_matrices[m_mesh_vertex_blending_max_joint_count * i + j] = - mesh_nodes[drawcall_max_instance_count * drawcall_index + i].joint_matrices[j]; + GLMUtil::fromMat4x4(mesh_nodes[drawcall_max_instance_count * drawcall_index + i].joint_matrices[j]); } } } diff --git a/engine/source/runtime/function/render/passes/point_light_pass.cpp b/engine/source/runtime/function/render/passes/point_light_pass.cpp index 42f2baac2..f9660f397 100644 --- a/engine/source/runtime/function/render/passes/point_light_pass.cpp +++ b/engine/source/runtime/function/render/passes/point_light_pass.cpp @@ -2,6 +2,7 @@ #include "runtime/function/render/render_mesh.h" #include "runtime/function/render/rhi/vulkan/vulkan_rhi.h" #include "runtime/function/render/rhi/vulkan/vulkan_util.h" +#include "runtime/function/render/glm_wrapper.h" #include "runtime/function/render/passes/point_light_pass.h" @@ -496,9 +497,9 @@ namespace Piccolo { struct MeshNode { - glm::mat4 model_matrix; - glm::mat4 joint_matrices[m_mesh_vertex_blending_max_joint_count]; - bool enable_vertex_blending; + const Matrix4x4* model_matrix {nullptr}; + const Matrix4x4* joint_matrices {nullptr}; + uint32_t joint_count {0}; }; std::map>> point_lights_mesh_drawcall_batch; @@ -510,14 +511,11 @@ namespace Piccolo auto& mesh_nodes = mesh_instanced[node.ref_mesh]; MeshNode temp; - temp.model_matrix = node.model_matrix; - temp.enable_vertex_blending = node.enable_vertex_blending; + temp.model_matrix = node.model_matrix; if (node.enable_vertex_blending) { - for (uint32_t i = 0; i < m_mesh_vertex_blending_max_joint_count; ++i) - { - temp.joint_matrices[i] = node.joint_matrices[i]; - } + temp.joint_matrices = node.joint_matrices; + temp.joint_count = node.joint_count; } mesh_nodes.push_back(temp); @@ -643,10 +641,10 @@ namespace Piccolo for (uint32_t i = 0; i < current_instance_count; ++i) { perdrawcall_storage_buffer_object.mesh_instances[i].model_matrix = - mesh_nodes[drawcall_max_instance_count * drawcall_index + i].model_matrix; + GLMUtil::fromMat4x4(*mesh_nodes[drawcall_max_instance_count * drawcall_index + i].model_matrix); perdrawcall_storage_buffer_object.mesh_instances[i].enable_vertex_blending = mesh_nodes[drawcall_max_instance_count * drawcall_index + i] - .enable_vertex_blending ? + .joint_matrices ? 1.0 : -1.0; } @@ -657,7 +655,7 @@ namespace Piccolo for (uint32_t i = 0; i < current_instance_count; ++i) { if (!mesh_nodes[drawcall_max_instance_count * drawcall_index + i] - .enable_vertex_blending) + .joint_matrices) { least_one_enable_vertex_blending = false; break; @@ -690,14 +688,14 @@ namespace Piccolo for (uint32_t i = 0; i < current_instance_count; ++i) { if (mesh_nodes[drawcall_max_instance_count * drawcall_index + i] - .enable_vertex_blending) + .joint_matrices) { - for (uint32_t j = 0; j < m_mesh_vertex_blending_max_joint_count; ++j) + for (uint32_t j = 0; j < mesh_nodes[drawcall_max_instance_count * drawcall_index + i].joint_count; ++j) { per_drawcall_vertex_blending_storage_buffer_object .joint_matrices[m_mesh_vertex_blending_max_joint_count * i + j] = - mesh_nodes[drawcall_max_instance_count * drawcall_index + i] - .joint_matrices[j]; + GLMUtil::fromMat4x4(mesh_nodes[drawcall_max_instance_count * drawcall_index + i] + .joint_matrices[j]); } } } diff --git a/engine/source/runtime/function/render/render_common.h b/engine/source/runtime/function/render/render_common.h index 9c16beb77..4cb3a309a 100644 --- a/engine/source/runtime/function/render/render_common.h +++ b/engine/source/runtime/function/render/render_common.h @@ -226,12 +226,13 @@ namespace Piccolo // nodes struct RenderMeshNode { - glm::mat4 model_matrix; - glm::mat4 joint_matrices[m_mesh_vertex_blending_max_joint_count]; - VulkanMesh* ref_mesh = nullptr; - VulkanPBRMaterial* ref_material = nullptr; + const Matrix4x4* model_matrix {nullptr}; + const Matrix4x4* joint_matrices {nullptr}; + uint32_t joint_count {0}; + VulkanMesh* ref_mesh {nullptr}; + VulkanPBRMaterial* ref_material {nullptr}; uint32_t node_id; - bool enable_vertex_blending = false; + bool enable_vertex_blending {false}; }; struct RenderAxisNode diff --git a/engine/source/runtime/function/render/render_scene.cpp b/engine/source/runtime/function/render/render_scene.cpp index 0a64363c8..62467c64f 100644 --- a/engine/source/runtime/function/render/render_scene.cpp +++ b/engine/source/runtime/function/render/render_scene.cpp @@ -109,12 +109,13 @@ namespace Piccolo m_directional_light_visible_mesh_nodes.emplace_back(); RenderMeshNode& temp_node = m_directional_light_visible_mesh_nodes.back(); - temp_node.model_matrix = GLMUtil::fromMat4x4(entity.m_model_matrix); + temp_node.model_matrix = &entity.m_model_matrix; assert(entity.m_joint_matrices.size() <= m_mesh_vertex_blending_max_joint_count); - for (size_t joint_index = 0; joint_index < entity.m_joint_matrices.size(); joint_index++) + if (!entity.m_joint_matrices.empty()) { - temp_node.joint_matrices[joint_index] = GLMUtil::fromMat4x4(entity.m_joint_matrices[joint_index]); + temp_node.joint_count = static_cast(entity.m_joint_matrices.size()); + temp_node.joint_matrices = entity.m_joint_matrices.data(); } temp_node.node_id = entity.m_instance_id; @@ -163,12 +164,13 @@ namespace Piccolo m_point_lights_visible_mesh_nodes.emplace_back(); RenderMeshNode& temp_node = m_point_lights_visible_mesh_nodes.back(); - temp_node.model_matrix = GLMUtil::fromMat4x4(entity.m_model_matrix); + temp_node.model_matrix = &entity.m_model_matrix; assert(entity.m_joint_matrices.size() <= m_mesh_vertex_blending_max_joint_count); - for (size_t joint_index = 0; joint_index < entity.m_joint_matrices.size(); joint_index++) + if (!entity.m_joint_matrices.empty()) { - temp_node.joint_matrices[joint_index] = GLMUtil::fromMat4x4(entity.m_joint_matrices[joint_index]); + temp_node.joint_count = static_cast(entity.m_joint_matrices.size()); + temp_node.joint_matrices = entity.m_joint_matrices.data(); } temp_node.node_id = entity.m_instance_id; @@ -205,12 +207,13 @@ namespace Piccolo m_main_camera_visible_mesh_nodes.emplace_back(); RenderMeshNode& temp_node = m_main_camera_visible_mesh_nodes.back(); - temp_node.model_matrix = GLMUtil::fromMat4x4(entity.m_model_matrix); + temp_node.model_matrix = &entity.m_model_matrix; assert(entity.m_joint_matrices.size() <= m_mesh_vertex_blending_max_joint_count); - for (size_t joint_index = 0; joint_index < entity.m_joint_matrices.size(); joint_index++) + if (!entity.m_joint_matrices.empty()) { - temp_node.joint_matrices[joint_index] = GLMUtil::fromMat4x4(entity.m_joint_matrices[joint_index]); + temp_node.joint_count = static_cast(entity.m_joint_matrices.size()); + temp_node.joint_matrices = entity.m_joint_matrices.data(); } temp_node.node_id = entity.m_instance_id;