diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs index 995c8bfa59f2c..992ae4cf3fa7d 100644 --- a/crates/bevy_pbr/src/render/mesh.rs +++ b/crates/bevy_pbr/src/render/mesh.rs @@ -6,7 +6,7 @@ use crate::{ CLUSTERED_FORWARD_STORAGE_BUFFER_COUNT, MAX_CASCADES_PER_LIGHT, MAX_DIRECTIONAL_LIGHTS, }; use bevy_app::Plugin; -use bevy_asset::{load_internal_asset, AssetId, Assets, Handle}; +use bevy_asset::{load_internal_asset, AssetId, Handle}; use bevy_core_pipeline::{ core_3d::{AlphaMask3d, Opaque3d, Transparent3d}, prepass::ViewPrepassTextures, @@ -19,15 +19,11 @@ use bevy_ecs::{ query::{QueryItem, ROQueryItem}, system::{lifetimeless::*, SystemParamItem, SystemState}, }; -use bevy_math::{Affine3, Mat4, Vec2, Vec4}; +use bevy_math::{Affine3, Vec2, Vec4}; use bevy_render::{ - batching::{ - batch_and_prepare_render_phase, write_batched_instance_buffer, GetBatchData, - NoAutomaticBatching, - }, + batching::{batch_and_prepare_render_phase, write_batched_instance_buffer, GetBatchData}, globals::{GlobalsBuffer, GlobalsUniform}, mesh::{ - skinning::{SkinnedMesh, SkinnedMeshInverseBindposes}, GpuBufferInfo, InnerMeshVertexBufferLayout, Mesh, MeshVertexBufferLayout, VertexAttributeDescriptor, }, @@ -48,17 +44,13 @@ use bevy_utils::{tracing::error, HashMap, Hashed}; use crate::render::{ morph::{extract_morphs, prepare_morphs, MorphIndex, MorphUniform}, + skin::{extract_skins, prepare_skins, SkinIndex, SkinUniform}, MeshLayouts, }; #[derive(Default)] pub struct MeshRenderPlugin; -/// Maximum number of joints supported for skinned meshes. -pub const MAX_JOINTS: usize = 256; -const JOINT_SIZE: usize = std::mem::size_of::(); -pub(crate) const JOINT_BUFFER_SIZE: usize = MAX_JOINTS * JOINT_SIZE; - pub const MESH_VERTEX_OUTPUT: Handle = Handle::weak_from_u128(2645551199423808407); pub const MESH_VIEW_TYPES_HANDLE: Handle = Handle::weak_from_u128(8140454348013264787); pub const MESH_VIEW_BINDINGS_HANDLE: Handle = Handle::weak_from_u128(9076678235888822571); @@ -112,12 +104,12 @@ impl Plugin for MeshRenderPlugin { if let Ok(render_app) = app.get_sub_app_mut(RenderApp) { render_app - .init_resource::() .init_resource::() + .init_resource::() .init_resource::() .add_systems( ExtractSchedule, - (extract_meshes, extract_skinned_meshes, extract_morphs), + (extract_meshes, extract_skins, extract_morphs), ) .add_systems( Render, @@ -131,7 +123,7 @@ impl Plugin for MeshRenderPlugin { .in_set(RenderSet::PrepareResources), write_batched_instance_buffer:: .in_set(RenderSet::PrepareResourcesFlush), - prepare_skinned_meshes.in_set(RenderSet::PrepareResources), + prepare_skins.in_set(RenderSet::PrepareResources), prepare_morphs.in_set(RenderSet::PrepareResources), prepare_mesh_bind_group.in_set(RenderSet::PrepareBindGroups), prepare_mesh_view_bind_groups.in_set(RenderSet::PrepareBindGroups), @@ -270,91 +262,6 @@ pub fn extract_meshes( commands.insert_or_spawn_batch(not_caster_commands); } -#[derive(Component)] -pub struct SkinnedMeshJoints { - pub index: u32, -} - -impl SkinnedMeshJoints { - #[inline] - pub fn build( - skin: &SkinnedMesh, - inverse_bindposes: &Assets, - joints: &Query<&GlobalTransform>, - buffer: &mut BufferVec, - ) -> Option { - let inverse_bindposes = inverse_bindposes.get(&skin.inverse_bindposes)?; - let start = buffer.len(); - let target = start + skin.joints.len().min(MAX_JOINTS); - buffer.extend( - joints - .iter_many(&skin.joints) - .zip(inverse_bindposes.iter()) - .take(MAX_JOINTS) - .map(|(joint, bindpose)| joint.affine() * *bindpose), - ); - // iter_many will skip any failed fetches. This will cause it to assign the wrong bones, - // so just bail by truncating to the start. - if buffer.len() != target { - buffer.truncate(start); - return None; - } - - // Pad to 256 byte alignment - while buffer.len() % 4 != 0 { - buffer.push(Mat4::ZERO); - } - Some(Self { - index: start as u32, - }) - } - - /// Updated index to be in address space based on [`SkinnedMeshUniform`] size. - pub fn to_buffer_index(mut self) -> Self { - self.index *= std::mem::size_of::() as u32; - self - } -} - -pub fn extract_skinned_meshes( - mut commands: Commands, - mut previous_len: Local, - mut uniform: ResMut, - query: Extract>, - inverse_bindposes: Extract>>, - joint_query: Extract>, -) { - uniform.buffer.clear(); - let mut values = Vec::with_capacity(*previous_len); - let mut last_start = 0; - - for (entity, view_visibility, skin) in &query { - if !view_visibility.get() { - continue; - } - // PERF: This can be expensive, can we move this to prepare? - if let Some(skinned_joints) = - SkinnedMeshJoints::build(skin, &inverse_bindposes, &joint_query, &mut uniform.buffer) - { - last_start = last_start.max(skinned_joints.index as usize); - // NOTE: The skinned joints uniform buffer has to be bound at a dynamic offset per - // entity and so cannot currently be batched. - values.push(( - entity, - (skinned_joints.to_buffer_index(), NoAutomaticBatching), - )); - } - } - - // Pad out the buffer to ensure that there's enough space for bindings - while uniform.buffer.len() - last_start < MAX_JOINTS { - uniform.buffer.push(Mat4::ZERO); - } - - *previous_len = values.len(); - commands.insert_or_spawn_batch(values); -} - #[derive(Resource, Clone)] pub struct MeshPipeline { pub view_layout: BindGroupLayout, @@ -1043,7 +950,7 @@ pub fn prepare_mesh_bind_group( mesh_pipeline: Res, render_device: Res, mesh_uniforms: Res>, - skinned_mesh_uniform: Res, + skins_uniform: Res, weights_uniform: Res, ) { groups.reset(); @@ -1053,7 +960,7 @@ pub fn prepare_mesh_bind_group( }; groups.model_only = Some(layouts.model_only(&render_device, &model)); - let skin = skinned_mesh_uniform.buffer.buffer(); + let skin = skins_uniform.buffer.buffer(); if let Some(skin) = skin { groups.skinned = Some(layouts.skinned(&render_device, &model, skin)); } @@ -1072,41 +979,6 @@ pub fn prepare_mesh_bind_group( } } -// NOTE: This is using BufferVec because it is using a trick to allow a fixed-size array -// in a uniform buffer to be used like a variable-sized array by only writing the valid data -// into the buffer, knowing the number of valid items starting from the dynamic offset, and -// ignoring the rest, whether they're valid for other dynamic offsets or not. This trick may -// be supported later in encase, and then we should make use of it. - -#[derive(Resource)] -pub struct SkinnedMeshUniform { - pub buffer: BufferVec, -} - -impl Default for SkinnedMeshUniform { - fn default() -> Self { - Self { - buffer: BufferVec::new(BufferUsages::UNIFORM), - } - } -} - -pub fn prepare_skinned_meshes( - render_device: Res, - render_queue: Res, - mut skinned_mesh_uniform: ResMut, -) { - if skinned_mesh_uniform.buffer.is_empty() { - return; - } - - let len = skinned_mesh_uniform.buffer.len(); - skinned_mesh_uniform.buffer.reserve(len, &render_device); - skinned_mesh_uniform - .buffer - .write_buffer(&render_device, &render_queue); -} - #[derive(Component)] pub struct MeshViewBindGroup { pub value: BindGroup, @@ -1308,7 +1180,7 @@ impl RenderCommand

for SetMeshBindGroup { type ViewWorldQuery = (); type ItemWorldQuery = ( Read>, - Option>, + Option>, Option>, ); diff --git a/crates/bevy_pbr/src/render/mesh_bindings.rs b/crates/bevy_pbr/src/render/mesh_bindings.rs index e46af242fdf85..dcc01e1aa4c8b 100644 --- a/crates/bevy_pbr/src/render/mesh_bindings.rs +++ b/crates/bevy_pbr/src/render/mesh_bindings.rs @@ -1,5 +1,6 @@ //! Bind group layout related definitions for the mesh pipeline. +use bevy_math::Mat4; use bevy_render::{ mesh::morph::MAX_MORPH_WEIGHTS, render_resource::{ @@ -9,13 +10,17 @@ use bevy_render::{ renderer::RenderDevice, }; +use crate::render::skin::MAX_JOINTS; + const MORPH_WEIGHT_SIZE: usize = std::mem::size_of::(); pub const MORPH_BUFFER_SIZE: usize = MAX_MORPH_WEIGHTS * MORPH_WEIGHT_SIZE; +const JOINT_SIZE: usize = std::mem::size_of::(); +pub(crate) const JOINT_BUFFER_SIZE: usize = MAX_JOINTS * JOINT_SIZE; + /// Individual layout entries. mod layout_entry { - use super::MORPH_BUFFER_SIZE; - use crate::render::mesh::JOINT_BUFFER_SIZE; + use super::{JOINT_BUFFER_SIZE, MORPH_BUFFER_SIZE}; use crate::MeshUniform; use bevy_render::{ render_resource::{ @@ -66,8 +71,7 @@ mod layout_entry { /// Individual [`BindGroupEntry`](bevy_render::render_resource::BindGroupEntry) /// for bind groups. mod entry { - use super::MORPH_BUFFER_SIZE; - use crate::render::mesh::JOINT_BUFFER_SIZE; + use super::{JOINT_BUFFER_SIZE, MORPH_BUFFER_SIZE}; use bevy_render::render_resource::{ BindGroupEntry, BindingResource, Buffer, BufferBinding, BufferSize, TextureView, }; diff --git a/crates/bevy_pbr/src/render/mod.rs b/crates/bevy_pbr/src/render/mod.rs index 5448850d30723..b9d0d239c3874 100644 --- a/crates/bevy_pbr/src/render/mod.rs +++ b/crates/bevy_pbr/src/render/mod.rs @@ -3,8 +3,10 @@ mod light; pub(crate) mod mesh; mod mesh_bindings; mod morph; +mod skin; pub use fog::*; pub use light::*; pub use mesh::*; pub use mesh_bindings::MeshLayouts; +pub use skin::{extract_skins, prepare_skins, SkinIndex, SkinUniform, MAX_JOINTS}; diff --git a/crates/bevy_pbr/src/render/morph.rs b/crates/bevy_pbr/src/render/morph.rs index 5b98de2ad84d9..b39064c7f34ba 100644 --- a/crates/bevy_pbr/src/render/morph.rs +++ b/crates/bevy_pbr/src/render/morph.rs @@ -28,16 +28,16 @@ impl Default for MorphUniform { } pub fn prepare_morphs( - device: Res, - queue: Res, + render_device: Res, + render_queue: Res, mut uniform: ResMut, ) { if uniform.buffer.is_empty() { return; } - let buffer = &mut uniform.buffer; - buffer.reserve(buffer.len(), &device); - buffer.write_buffer(&device, &queue); + let len = uniform.buffer.len(); + uniform.buffer.reserve(len, &render_device); + uniform.buffer.write_buffer(&render_device, &render_queue); } const fn can_align(step: usize, target: usize) -> bool { @@ -69,6 +69,8 @@ fn add_to_alignment(buffer: &mut BufferVec) { buffer.extend(iter::repeat_with(T::default).take(ts_to_add)); } +// Notes on implementation: see comment on top of the extract_skins system in skin module. +// This works similarly, but for `f32` instead of `Mat4` pub fn extract_morphs( mut commands: Commands, mut previous_len: Local, diff --git a/crates/bevy_pbr/src/render/skin.rs b/crates/bevy_pbr/src/render/skin.rs new file mode 100644 index 0000000000000..871f504d3ebe2 --- /dev/null +++ b/crates/bevy_pbr/src/render/skin.rs @@ -0,0 +1,139 @@ +use bevy_asset::Assets; +use bevy_ecs::prelude::*; +use bevy_math::Mat4; +use bevy_render::{ + batching::NoAutomaticBatching, + mesh::skinning::{SkinnedMesh, SkinnedMeshInverseBindposes}, + render_resource::{BufferUsages, BufferVec}, + renderer::{RenderDevice, RenderQueue}, + view::ViewVisibility, + Extract, +}; +use bevy_transform::prelude::GlobalTransform; + +/// Maximum number of joints supported for skinned meshes. +pub const MAX_JOINTS: usize = 256; + +#[derive(Component)] +pub struct SkinIndex { + pub index: u32, +} +impl SkinIndex { + /// Index to be in address space based on [`SkinUniform`] size. + const fn new(start: usize) -> Self { + SkinIndex { + index: (start * std::mem::size_of::()) as u32, + } + } +} + +// Notes on implementation: see comment on top of the `extract_skins` system. +#[derive(Resource)] +pub struct SkinUniform { + pub buffer: BufferVec, +} +impl Default for SkinUniform { + fn default() -> Self { + Self { + buffer: BufferVec::new(BufferUsages::UNIFORM), + } + } +} + +pub fn prepare_skins( + render_device: Res, + render_queue: Res, + mut uniform: ResMut, +) { + if uniform.buffer.is_empty() { + return; + } + + let len = uniform.buffer.len(); + uniform.buffer.reserve(len, &render_device); + uniform.buffer.write_buffer(&render_device, &render_queue); +} + +// Notes on implementation: +// We define the uniform binding as an array, N> in the shader, +// where N is the maximum number of Mat4s we can fit in the uniform binding, +// which may be as little as 16kB or 64kB. But, we may not need all N. +// We may only need, for example, 10. +// +// If we used uniform buffers ‘normally’ then we would have to write a full +// binding of data for each dynamic offset binding, which is wasteful, makes +// the buffer much larger than it needs to be, and uses more memory bandwidth +// to transfer the data, which then costs frame time So @superdump came up +// with this design: just bind data at the specified offset and interpret +// the data at that offset as an array regardless of what is there. +// +// So instead of writing N Mat4s when you only need 10, you write 10, and +// then pad up to the next dynamic offset alignment. Then write the next. +// And for the last dynamic offset binding, make sure there is a full binding +// of data after it so that the buffer is of size +// `last dynamic offset` + `array>`. +// +// Then when binding the first dynamic offset, the first 10 entries in the array +// are what you expect, but if you read the 11th you’re reading ‘invalid’ data +// which could be padding or could be from the next binding. +// +// In this way, we can pack ‘variable sized arrays’ into uniform buffer bindings +// which normally only support fixed size arrays. You just have to make sure +// in the shader that you only read the values that are valid for that binding. +pub fn extract_skins( + mut commands: Commands, + mut previous_len: Local, + mut uniform: ResMut, + query: Extract>, + inverse_bindposes: Extract>>, + joints: Extract>, +) { + uniform.buffer.clear(); + + let mut values = Vec::with_capacity(*previous_len); + let mut last_start = 0; + + // PERF: This can be expensive, can we move this to prepare? + for (entity, view_visibility, skin) in &query { + if !view_visibility.get() { + continue; + } + let buffer = &mut uniform.buffer; + let Some(inverse_bindposes) = inverse_bindposes.get(&skin.inverse_bindposes) else { + continue; + }; + let start = buffer.len(); + + let target = start + skin.joints.len().min(MAX_JOINTS); + buffer.extend( + joints + .iter_many(&skin.joints) + .zip(inverse_bindposes.iter()) + .take(MAX_JOINTS) + .map(|(joint, bindpose)| joint.affine() * *bindpose), + ); + // iter_many will skip any failed fetches. This will cause it to assign the wrong bones, + // so just bail by truncating to the start. + if buffer.len() != target { + buffer.truncate(start); + continue; + } + last_start = last_start.max(start); + + // Pad to 256 byte alignment + while buffer.len() % 4 != 0 { + buffer.push(Mat4::ZERO); + } + // NOTE: The skinned joints uniform buffer has to be bound at a dynamic offset per + // entity and so cannot currently be batched. + values.push((entity, (SkinIndex::new(start), NoAutomaticBatching))); + } + + // Pad out the buffer to ensure that there's enough space for bindings + while uniform.buffer.len() - last_start < MAX_JOINTS { + uniform.buffer.push(Mat4::ZERO); + } + + *previous_len = values.len(); + commands.insert_or_spawn_batch(values); +}