diff --git a/crates/bevy_pbr/src/meshlet/downsample_depth.wgsl b/crates/bevy_pbr/src/meshlet/downsample_depth.wgsl index fbb70bf31679f..04e8f3f56af08 100644 --- a/crates/bevy_pbr/src/meshlet/downsample_depth.wgsl +++ b/crates/bevy_pbr/src/meshlet/downsample_depth.wgsl @@ -1,16 +1,293 @@ -#import bevy_core_pipeline::fullscreen_vertex_shader::FullscreenVertexOutput +@group(0) @binding(0) var mip_0: texture_depth_2d; +@group(0) @binding(1) var mip_1: texture_storage_2d; +@group(0) @binding(2) var mip_2: texture_storage_2d; +@group(0) @binding(3) var mip_3: texture_storage_2d; +@group(0) @binding(4) var mip_4: texture_storage_2d; +@group(0) @binding(5) var mip_5: texture_storage_2d; +@group(0) @binding(6) var mip_6: texture_storage_2d; +@group(0) @binding(7) var mip_7: texture_storage_2d; +@group(0) @binding(8) var mip_8: texture_storage_2d; +@group(0) @binding(9) var mip_9: texture_storage_2d; +@group(0) @binding(10) var mip_10: texture_storage_2d; +@group(0) @binding(11) var mip_11: texture_storage_2d; +@group(0) @binding(12) var mip_12: texture_storage_2d; +@group(0) @binding(13) var samplr: sampler; +var max_mip_level: u32; -@group(0) @binding(0) var input_depth: texture_2d; -@group(0) @binding(1) var samplr: sampler; +/// Generates a hierarchical depth buffer. +/// Based on FidelityFX SPD v2.1 https://github.com/GPUOpen-LibrariesAndSDKs/FidelityFX-SDK/blob/d7531ae47d8b36a5d4025663e731a47a38be882f/sdk/include/FidelityFX/gpu/spd/ffx_spd.h#L528 -/// Performs a 2x2 downsample on a depth texture to generate the next mip level of a hierarchical depth buffer. +var intermediate_memory: array, 16>; -@fragment -fn downsample_depth(in: FullscreenVertexOutput) -> @location(0) vec4 { - let depth_quad = textureGather(0, input_depth, samplr, in.uv); - let downsampled_depth = min( - min(depth_quad.x, depth_quad.y), - min(depth_quad.z, depth_quad.w), +@compute +@workgroup_size(256, 1, 1) +fn downsample_depth_first( + @builtin(num_workgroups) num_workgroups: vec3u, + @builtin(workgroup_id) workgroup_id: vec3u, + @builtin(local_invocation_index) local_invocation_index: u32, +) { + let sub_xy = remap_for_wave_reduction(local_invocation_index % 64u); + let x = sub_xy.x + 8u * ((local_invocation_index >> 6u) % 2u); + let y = sub_xy.y + 8u * (local_invocation_index >> 7u); + + downsample_mips_0_and_1(x, y, workgroup_id.xy, local_invocation_index); + + downsample_mips_2_to_5(x, y, workgroup_id.xy, local_invocation_index); +} + +@compute +@workgroup_size(256, 1, 1) +fn downsample_depth_second(@builtin(local_invocation_index) local_invocation_index: u32) { + let sub_xy = remap_for_wave_reduction(local_invocation_index % 64u); + let x = sub_xy.x + 8u * ((local_invocation_index >> 6u) % 2u); + let y = sub_xy.y + 8u * (local_invocation_index >> 7u); + + downsample_mips_6_and_7(x, y); + + downsample_mips_8_to_11(x, y, local_invocation_index); +} + +fn downsample_mips_0_and_1(x: u32, y: u32, workgroup_id: vec2u, local_invocation_index: u32) { + var v: vec4f; + + var tex = vec2(workgroup_id * 64u) + vec2(x * 2u, y * 2u); + var pix = vec2(workgroup_id * 32u) + vec2(x, y); + v[0] = reduce_load_mip_0(tex); + textureStore(mip_1, pix, vec4(v[0])); + + tex = vec2(workgroup_id * 64u) + vec2(x * 2u + 32u, y * 2u); + pix = vec2(workgroup_id * 32u) + vec2(x + 16u, y); + v[1] = reduce_load_mip_0(tex); + textureStore(mip_1, pix, vec4(v[1])); + + tex = vec2(workgroup_id * 64u) + vec2(x * 2u, y * 2u + 32u); + pix = vec2(workgroup_id * 32u) + vec2(x, y + 16u); + v[2] = reduce_load_mip_0(tex); + textureStore(mip_1, pix, vec4(v[2])); + + tex = vec2(workgroup_id * 64u) + vec2(x * 2u + 32u, y * 2u + 32u); + pix = vec2(workgroup_id * 32u) + vec2(x + 16u, y + 16u); + v[3] = reduce_load_mip_0(tex); + textureStore(mip_1, pix, vec4(v[3])); + + if max_mip_level <= 1u { return; } + + for (var i = 0u; i < 4u; i++) { + intermediate_memory[x][y] = v[i]; + workgroupBarrier(); + if local_invocation_index < 64u { + v[i] = reduce_4(vec4( + intermediate_memory[x * 2u + 0u][y * 2u + 0u], + intermediate_memory[x * 2u + 1u][y * 2u + 0u], + intermediate_memory[x * 2u + 0u][y * 2u + 1u], + intermediate_memory[x * 2u + 1u][y * 2u + 1u], + )); + pix = (workgroup_id * 16u) + vec2( + x + (i % 2u) * 8u, + y + (i / 2u) * 8u, + ); + textureStore(mip_2, pix, vec4(v[i])); + } + workgroupBarrier(); + } + + if local_invocation_index < 64u { + intermediate_memory[x + 0u][y + 0u] = v[0]; + intermediate_memory[x + 8u][y + 0u] = v[1]; + intermediate_memory[x + 0u][y + 8u] = v[2]; + intermediate_memory[x + 8u][y + 8u] = v[3]; + } +} + +fn downsample_mips_2_to_5(x: u32, y: u32, workgroup_id: vec2u, local_invocation_index: u32) { + if max_mip_level <= 2u { return; } + workgroupBarrier(); + downsample_mip_2(x, y, workgroup_id, local_invocation_index); + + if max_mip_level <= 3u { return; } + workgroupBarrier(); + downsample_mip_3(x, y, workgroup_id, local_invocation_index); + + if max_mip_level <= 4u { return; } + workgroupBarrier(); + downsample_mip_4(x, y, workgroup_id, local_invocation_index); + + if max_mip_level <= 5u { return; } + workgroupBarrier(); + downsample_mip_5(workgroup_id, local_invocation_index); +} + +fn downsample_mip_2(x: u32, y: u32, workgroup_id: vec2u, local_invocation_index: u32) { + if local_invocation_index < 64u { + let v = reduce_4(vec4( + intermediate_memory[x * 2u + 0u][y * 2u + 0u], + intermediate_memory[x * 2u + 1u][y * 2u + 0u], + intermediate_memory[x * 2u + 0u][y * 2u + 1u], + intermediate_memory[x * 2u + 1u][y * 2u + 1u], + )); + textureStore(mip_3, (workgroup_id * 8u) + vec2(x, y), vec4(v)); + intermediate_memory[x * 2u + y % 2u][y * 2u] = v; + } +} + +fn downsample_mip_3(x: u32, y: u32, workgroup_id: vec2u, local_invocation_index: u32) { + if local_invocation_index < 16u { + let v = reduce_4(vec4( + intermediate_memory[x * 4u + 0u + 0u][y * 4u + 0u], + intermediate_memory[x * 4u + 2u + 0u][y * 4u + 0u], + intermediate_memory[x * 4u + 0u + 1u][y * 4u + 2u], + intermediate_memory[x * 4u + 2u + 1u][y * 4u + 2u], + )); + textureStore(mip_4, (workgroup_id * 4u) + vec2(x, y), vec4(v)); + intermediate_memory[x * 4u + y][y * 4u] = v; + } +} + +fn downsample_mip_4(x: u32, y: u32, workgroup_id: vec2u, local_invocation_index: u32) { + if local_invocation_index < 4u { + let v = reduce_4(vec4( + intermediate_memory[x * 8u + 0u + 0u + y * 2u][y * 8u + 0u], + intermediate_memory[x * 8u + 4u + 0u + y * 2u][y * 8u + 0u], + intermediate_memory[x * 8u + 0u + 1u + y * 2u][y * 8u + 4u], + intermediate_memory[x * 8u + 4u + 1u + y * 2u][y * 8u + 4u], + )); + textureStore(mip_5, (workgroup_id * 2u) + vec2(x, y), vec4(v)); + intermediate_memory[x + y * 2u][0u] = v; + } +} + +fn downsample_mip_5(workgroup_id: vec2u, local_invocation_index: u32) { + if local_invocation_index < 1u { + let v = reduce_4(vec4( + intermediate_memory[0u][0u], + intermediate_memory[1u][0u], + intermediate_memory[2u][0u], + intermediate_memory[3u][0u], + )); + textureStore(mip_6, workgroup_id, vec4(v)); + } +} + +fn downsample_mips_6_and_7(x: u32, y: u32) { + var v: vec4f; + + var tex = vec2(x * 4u + 0u, y * 4u + 0u); + var pix = vec2(x * 2u + 0u, y * 2u + 0u); + v[0] = reduce_load_mip_6(tex); + textureStore(mip_7, pix, vec4(v[0])); + + tex = vec2(x * 4u + 2u, y * 4u + 0u); + pix = vec2(x * 2u + 1u, y * 2u + 0u); + v[1] = reduce_load_mip_6(tex); + textureStore(mip_7, pix, vec4(v[1])); + + tex = vec2(x * 4u + 0u, y * 4u + 2u); + pix = vec2(x * 2u + 0u, y * 2u + 1u); + v[2] = reduce_load_mip_6(tex); + textureStore(mip_7, pix, vec4(v[2])); + + tex = vec2(x * 4u + 2u, y * 4u + 2u); + pix = vec2(x * 2u + 1u, y * 2u + 1u); + v[3] = reduce_load_mip_6(tex); + textureStore(mip_7, pix, vec4(v[3])); + + if max_mip_level <= 7u { return; } + + let vr = reduce_4(v); + textureStore(mip_8, vec2(x, y), vec4(vr)); + intermediate_memory[x][y] = vr; +} + +fn downsample_mips_8_to_11(x: u32, y: u32, local_invocation_index: u32) { + if max_mip_level <= 8u { return; } + workgroupBarrier(); + downsample_mip_8(x, y, local_invocation_index); + + if max_mip_level <= 9u { return; } + workgroupBarrier(); + downsample_mip_9(x, y, local_invocation_index); + + if max_mip_level <= 10u { return; } + workgroupBarrier(); + downsample_mip_10(x, y, local_invocation_index); + + if max_mip_level <= 11u { return; } + workgroupBarrier(); + downsample_mip_11(local_invocation_index); +} + +fn downsample_mip_8(x: u32, y: u32, local_invocation_index: u32) { + if local_invocation_index < 64u { + let v = reduce_4(vec4( + intermediate_memory[x * 2u + 0u][y * 2u + 0u], + intermediate_memory[x * 2u + 1u][y * 2u + 0u], + intermediate_memory[x * 2u + 0u][y * 2u + 1u], + intermediate_memory[x * 2u + 1u][y * 2u + 1u], + )); + textureStore(mip_9, vec2(x, y), vec4(v)); + intermediate_memory[x * 2u + y % 2u][y * 2u] = v; + } +} + +fn downsample_mip_9(x: u32, y: u32, local_invocation_index: u32) { + if local_invocation_index < 16u { + let v = reduce_4(vec4( + intermediate_memory[x * 4u + 0u + 0u][y * 4u + 0u], + intermediate_memory[x * 4u + 2u + 0u][y * 4u + 0u], + intermediate_memory[x * 4u + 0u + 1u][y * 4u + 2u], + intermediate_memory[x * 4u + 2u + 1u][y * 4u + 2u], + )); + textureStore(mip_10, vec2(x, y), vec4(v)); + intermediate_memory[x * 4u + y][y * 4u] = v; + } +} + +fn downsample_mip_10(x: u32, y: u32, local_invocation_index: u32) { + if local_invocation_index < 4u { + let v = reduce_4(vec4( + intermediate_memory[x * 8u + 0u + 0u + y * 2u][y * 8u + 0u], + intermediate_memory[x * 8u + 4u + 0u + y * 2u][y * 8u + 0u], + intermediate_memory[x * 8u + 0u + 1u + y * 2u][y * 8u + 4u], + intermediate_memory[x * 8u + 4u + 1u + y * 2u][y * 8u + 4u], + )); + textureStore(mip_11, vec2(x, y), vec4(v)); + intermediate_memory[x + y * 2u][0u] = v; + } +} + +fn downsample_mip_11(local_invocation_index: u32) { + if local_invocation_index < 1u { + let v = reduce_4(vec4( + intermediate_memory[0u][0u], + intermediate_memory[1u][0u], + intermediate_memory[2u][0u], + intermediate_memory[3u][0u], + )); + textureStore(mip_12, vec2(0u, 0u), vec4(v)); + } +} + +fn remap_for_wave_reduction(a: u32) -> vec2u { + return vec2( + insertBits(extractBits(a, 2u, 3u), a, 0u, 1u), + insertBits(extractBits(a, 3u, 3u), extractBits(a, 1u, 2u), 0u, 2u), ); - return vec4(downsampled_depth, 0.0, 0.0, 0.0); +} + +fn reduce_load_mip_0(tex: vec2u) -> f32 { + let uv = (vec2f(tex) + 0.5) / vec2f(textureDimensions(mip_0)); + return reduce_4(textureGather(mip_0, samplr, uv)); +} + +fn reduce_load_mip_6(tex: vec2u) -> f32 { + return reduce_4(vec4( + textureLoad(mip_6, tex + vec2(0u, 0u)).r, + textureLoad(mip_6, tex + vec2(0u, 1u)).r, + textureLoad(mip_6, tex + vec2(1u, 0u)).r, + textureLoad(mip_6, tex + vec2(1u, 1u)).r, + )); +} + +fn reduce_4(v: vec4f) -> f32 { + return min(min(v.x, v.y), min(v.z, v.w)); } diff --git a/crates/bevy_pbr/src/meshlet/fill_cluster_buffers.wgsl b/crates/bevy_pbr/src/meshlet/fill_cluster_buffers.wgsl index 89e64de0c197b..f228ba050875f 100644 --- a/crates/bevy_pbr/src/meshlet/fill_cluster_buffers.wgsl +++ b/crates/bevy_pbr/src/meshlet/fill_cluster_buffers.wgsl @@ -6,6 +6,8 @@ meshlet_cluster_meshlet_ids, } +/// Writes out instance_id and meshlet_id to the global buffers for each cluster in the scene. + @compute @workgroup_size(128, 1, 1) // 128 threads per workgroup, 1 cluster per thread fn fill_cluster_buffers( diff --git a/crates/bevy_pbr/src/meshlet/gpu_scene.rs b/crates/bevy_pbr/src/meshlet/gpu_scene.rs index a986260003c71..a886155494d6f 100644 --- a/crates/bevy_pbr/src/meshlet/gpu_scene.rs +++ b/crates/bevy_pbr/src/meshlet/gpu_scene.rs @@ -17,6 +17,7 @@ use bevy_ecs::{ system::{Commands, Local, Query, Res, ResMut, Resource, SystemState}, world::{FromWorld, World}, }; +use bevy_math::{UVec2, Vec4Swizzles}; use bevy_render::{ render_resource::{binding_types::*, *}, renderer::{RenderDevice, RenderQueue}, @@ -28,7 +29,7 @@ use bevy_transform::components::GlobalTransform; use bevy_utils::{default, HashMap, HashSet}; use encase::internal::WriteInto; use std::{ - iter, + array, iter, mem::size_of, ops::{DerefMut, Range}, sync::{atomic::AtomicBool, Arc}, @@ -374,9 +375,8 @@ pub fn prepare_meshlet_per_frame_resources( }); let depth_pyramid_size = Extent3d { - // Round down to the nearest power of 2 to ensure depth is conservative - width: previous_power_of_2(view.viewport.z), - height: previous_power_of_2(view.viewport.w), + width: view.viewport.z.div_ceil(2), + height: view.viewport.w.div_ceil(2), depth_or_array_layers: 1, }; let depth_pyramid_mip_count = depth_pyramid_size.max_mips(TextureDimension::D2); @@ -389,24 +389,26 @@ pub fn prepare_meshlet_per_frame_resources( sample_count: 1, dimension: TextureDimension::D2, format: TextureFormat::R32Float, - usage: TextureUsages::RENDER_ATTACHMENT | TextureUsages::TEXTURE_BINDING, + usage: TextureUsages::STORAGE_BINDING | TextureUsages::TEXTURE_BINDING, view_formats: &[], }, ); - let depth_pyramid_mips = (0..depth_pyramid_mip_count) - .map(|i| { + let depth_pyramid_mips = array::from_fn(|i| { + if (i as u32) < depth_pyramid_mip_count { depth_pyramid.texture.create_view(&TextureViewDescriptor { label: Some("meshlet_depth_pyramid_texture_view"), format: Some(TextureFormat::R32Float), dimension: Some(TextureViewDimension::D2), aspect: TextureAspect::All, - base_mip_level: i, + base_mip_level: i as u32, mip_level_count: Some(1), base_array_layer: 0, array_layer_count: Some(1), }) - }) - .collect::>(); + } else { + gpu_scene.depth_pyramid_dummy_texture.clone() + } + }); let depth_pyramid_all_mips = depth_pyramid.default_view.clone(); let previous_depth_pyramid = match gpu_scene.previous_depth_pyramids.get(&view_entity) { @@ -459,11 +461,13 @@ pub fn prepare_meshlet_per_frame_resources( visibility_buffer_draw_triangle_buffer: visibility_buffer_draw_triangle_buffer.clone(), depth_pyramid_all_mips, depth_pyramid_mips, + depth_pyramid_mip_count, previous_depth_pyramid, material_depth_color: not_shadow_view .then(|| texture_cache.get(&render_device, material_depth_color)), material_depth: not_shadow_view .then(|| texture_cache.get(&render_device, material_depth)), + view_size: view.viewport.zw(), }); } } @@ -571,22 +575,26 @@ pub fn prepare_meshlet_view_bind_groups( (None, Some(shadow_view)) => &shadow_view.depth_attachment.view, _ => unreachable!(), }; - let downsample_depth = (0..view_resources.depth_pyramid_mips.len()) - .map(|i| { - render_device.create_bind_group( - "meshlet_downsample_depth_bind_group", - &gpu_scene.downsample_depth_bind_group_layout, - &BindGroupEntries::sequential(( - if i == 0 { - view_depth_texture - } else { - &view_resources.depth_pyramid_mips[i - 1] - }, - &gpu_scene.depth_pyramid_sampler, - )), - ) - }) - .collect(); + let downsample_depth = render_device.create_bind_group( + "meshlet_downsample_depth_bind_group", + &gpu_scene.downsample_depth_bind_group_layout, + &BindGroupEntries::sequential(( + view_depth_texture, + &view_resources.depth_pyramid_mips[0], + &view_resources.depth_pyramid_mips[1], + &view_resources.depth_pyramid_mips[2], + &view_resources.depth_pyramid_mips[3], + &view_resources.depth_pyramid_mips[4], + &view_resources.depth_pyramid_mips[5], + &view_resources.depth_pyramid_mips[6], + &view_resources.depth_pyramid_mips[7], + &view_resources.depth_pyramid_mips[8], + &view_resources.depth_pyramid_mips[9], + &view_resources.depth_pyramid_mips[10], + &view_resources.depth_pyramid_mips[11], + &gpu_scene.depth_pyramid_sampler, + )), + ); let entries = BindGroupEntries::sequential(( cluster_meshlet_ids.as_entire_binding(), @@ -696,6 +704,7 @@ pub struct MeshletGpuScene { copy_material_depth_bind_group_layout: BindGroupLayout, material_draw_bind_group_layout: BindGroupLayout, depth_pyramid_sampler: Sampler, + depth_pyramid_dummy_texture: TextureView, } impl FromWorld for MeshletGpuScene { @@ -781,13 +790,30 @@ impl FromWorld for MeshletGpuScene { ), downsample_depth_bind_group_layout: render_device.create_bind_group_layout( "meshlet_downsample_depth_bind_group_layout", - &BindGroupLayoutEntries::sequential( - ShaderStages::FRAGMENT, + &BindGroupLayoutEntries::sequential(ShaderStages::COMPUTE, { + let write_only_r32float = || { + texture_storage_2d(TextureFormat::R32Float, StorageTextureAccess::WriteOnly) + }; ( - texture_2d(TextureSampleType::Float { filterable: false }), + texture_depth_2d(), + write_only_r32float(), + write_only_r32float(), + write_only_r32float(), + write_only_r32float(), + write_only_r32float(), + texture_storage_2d( + TextureFormat::R32Float, + StorageTextureAccess::ReadWrite, + ), + write_only_r32float(), + write_only_r32float(), + write_only_r32float(), + write_only_r32float(), + write_only_r32float(), + write_only_r32float(), sampler(SamplerBindingType::NonFiltering), - ), - ), + ) + }), ), visibility_buffer_raster_bind_group_layout: render_device.create_bind_group_layout( "meshlet_visibility_buffer_raster_bind_group_layout", @@ -834,6 +860,31 @@ impl FromWorld for MeshletGpuScene { label: Some("meshlet_depth_pyramid_sampler"), ..default() }), + depth_pyramid_dummy_texture: render_device + .create_texture(&TextureDescriptor { + label: Some("meshlet_depth_pyramid_dummy_texture"), + size: Extent3d { + width: 1, + height: 1, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: TextureDimension::D2, + format: TextureFormat::R32Float, + usage: TextureUsages::STORAGE_BINDING, + view_formats: &[], + }) + .create_view(&TextureViewDescriptor { + label: Some("meshlet_depth_pyramid_dummy_texture_view"), + format: Some(TextureFormat::R32Float), + dimension: Some(TextureViewDimension::D2), + aspect: TextureAspect::All, + base_mip_level: 0, + mip_level_count: Some(1), + base_array_layer: 0, + array_layer_count: Some(1), + }), } } } @@ -976,10 +1027,12 @@ pub struct MeshletViewResources { pub visibility_buffer_draw_indirect_args_second: Buffer, visibility_buffer_draw_triangle_buffer: Buffer, depth_pyramid_all_mips: TextureView, - pub depth_pyramid_mips: Box<[TextureView]>, + depth_pyramid_mips: [TextureView; 12], + pub depth_pyramid_mip_count: u32, previous_depth_pyramid: TextureView, pub material_depth_color: Option, pub material_depth: Option, + pub view_size: UVec2, } #[derive(Component)] @@ -988,18 +1041,8 @@ pub struct MeshletViewBindGroups { pub fill_cluster_buffers: BindGroup, pub culling_first: BindGroup, pub culling_second: BindGroup, - pub downsample_depth: Box<[BindGroup]>, + pub downsample_depth: BindGroup, pub visibility_buffer_raster: BindGroup, pub copy_material_depth: Option, pub material_draw: Option, } - -fn previous_power_of_2(x: u32) -> u32 { - // If x is a power of 2, halve it - if x.count_ones() == 1 { - x / 2 - } else { - // Else calculate the largest power of 2 that is less than x - 1 << (31 - x.leading_zeros()) - } -} diff --git a/crates/bevy_pbr/src/meshlet/pipelines.rs b/crates/bevy_pbr/src/meshlet/pipelines.rs index 551efbe176f19..2feaf9518b330 100644 --- a/crates/bevy_pbr/src/meshlet/pipelines.rs +++ b/crates/bevy_pbr/src/meshlet/pipelines.rs @@ -24,7 +24,8 @@ pub struct MeshletPipelines { fill_cluster_buffers: CachedComputePipelineId, cull_first: CachedComputePipelineId, cull_second: CachedComputePipelineId, - downsample_depth: CachedRenderPipelineId, + downsample_depth_first: CachedComputePipelineId, + downsample_depth_second: CachedComputePipelineId, visibility_buffer_raster: CachedRenderPipelineId, visibility_buffer_raster_depth_only: CachedRenderPipelineId, visibility_buffer_raster_depth_only_clamp_ortho: CachedRenderPipelineId, @@ -81,25 +82,33 @@ impl FromWorld for MeshletPipelines { entry_point: "cull_meshlets".into(), }), - downsample_depth: pipeline_cache.queue_render_pipeline(RenderPipelineDescriptor { - label: Some("meshlet_downsample_depth".into()), - layout: vec![downsample_depth_layout], - push_constant_ranges: vec![], - vertex: fullscreen_shader_vertex_state(), - primitive: PrimitiveState::default(), - depth_stencil: None, - multisample: MultisampleState::default(), - fragment: Some(FragmentState { + downsample_depth_first: pipeline_cache.queue_compute_pipeline( + ComputePipelineDescriptor { + label: Some("meshlet_downsample_depth_first_pipeline".into()), + layout: vec![downsample_depth_layout.clone()], + push_constant_ranges: vec![PushConstantRange { + stages: ShaderStages::COMPUTE, + range: 0..4, + }], shader: MESHLET_DOWNSAMPLE_DEPTH_SHADER_HANDLE, shader_defs: vec![], - entry_point: "downsample_depth".into(), - targets: vec![Some(ColorTargetState { - format: TextureFormat::R32Float, - blend: None, - write_mask: ColorWrites::ALL, - })], - }), - }), + entry_point: "downsample_depth_first".into(), + }, + ), + + downsample_depth_second: pipeline_cache.queue_compute_pipeline( + ComputePipelineDescriptor { + label: Some("meshlet_downsample_depth_second_pipeline".into()), + layout: vec![downsample_depth_layout], + push_constant_ranges: vec![PushConstantRange { + stages: ShaderStages::COMPUTE, + range: 0..4, + }], + shader: MESHLET_DOWNSAMPLE_DEPTH_SHADER_HANDLE, + shader_defs: vec![], + entry_point: "downsample_depth_second".into(), + }, + ), visibility_buffer_raster: pipeline_cache.queue_render_pipeline( RenderPipelineDescriptor { @@ -233,7 +242,7 @@ impl FromWorld for MeshletPipelines { ), copy_material_depth: pipeline_cache.queue_render_pipeline(RenderPipelineDescriptor { - label: Some("meshlet_copy_material_depth".into()), + label: Some("meshlet_copy_material_depth_pipeline".into()), layout: vec![copy_material_depth_layout], push_constant_ranges: vec![], vertex: fullscreen_shader_vertex_state(), @@ -264,7 +273,8 @@ impl MeshletPipelines { &ComputePipeline, &ComputePipeline, &ComputePipeline, - &RenderPipeline, + &ComputePipeline, + &ComputePipeline, &RenderPipeline, &RenderPipeline, &RenderPipeline, @@ -276,7 +286,8 @@ impl MeshletPipelines { pipeline_cache.get_compute_pipeline(pipeline.fill_cluster_buffers)?, pipeline_cache.get_compute_pipeline(pipeline.cull_first)?, pipeline_cache.get_compute_pipeline(pipeline.cull_second)?, - pipeline_cache.get_render_pipeline(pipeline.downsample_depth)?, + pipeline_cache.get_compute_pipeline(pipeline.downsample_depth_first)?, + pipeline_cache.get_compute_pipeline(pipeline.downsample_depth_second)?, pipeline_cache.get_render_pipeline(pipeline.visibility_buffer_raster)?, pipeline_cache.get_render_pipeline(pipeline.visibility_buffer_raster_depth_only)?, pipeline_cache diff --git a/crates/bevy_pbr/src/meshlet/visibility_buffer_raster_node.rs b/crates/bevy_pbr/src/meshlet/visibility_buffer_raster_node.rs index f3ffb1865ed50..2e03d7cd9e6f1 100644 --- a/crates/bevy_pbr/src/meshlet/visibility_buffer_raster_node.rs +++ b/crates/bevy_pbr/src/meshlet/visibility_buffer_raster_node.rs @@ -53,6 +53,7 @@ impl Node for MeshletVisibilityBufferRasterPassNode { self.view_light_query.update_archetypes(world); } + // TODO: Reuse compute/render passes between logical passes where possible, as they're expensive fn run( &self, graph: &mut RenderGraphContext, @@ -76,7 +77,8 @@ impl Node for MeshletVisibilityBufferRasterPassNode { fill_cluster_buffers_pipeline, culling_first_pipeline, culling_second_pipeline, - downsample_depth_pipeline, + downsample_depth_first_pipeline, + downsample_depth_second_pipeline, visibility_buffer_raster_pipeline, visibility_buffer_raster_depth_only_pipeline, visibility_buffer_raster_depth_only_clamp_ortho, @@ -136,7 +138,8 @@ impl Node for MeshletVisibilityBufferRasterPassNode { render_context, meshlet_view_resources, meshlet_view_bind_groups, - downsample_depth_pipeline, + downsample_depth_first_pipeline, + downsample_depth_second_pipeline, ); cull_pass( "culling_second", @@ -169,7 +172,8 @@ impl Node for MeshletVisibilityBufferRasterPassNode { render_context, meshlet_view_resources, meshlet_view_bind_groups, - downsample_depth_pipeline, + downsample_depth_first_pipeline, + downsample_depth_second_pipeline, ); render_context.command_encoder().pop_debug_group(); @@ -224,7 +228,8 @@ impl Node for MeshletVisibilityBufferRasterPassNode { render_context, meshlet_view_resources, meshlet_view_bind_groups, - downsample_depth_pipeline, + downsample_depth_first_pipeline, + downsample_depth_second_pipeline, ); cull_pass( "culling_second", @@ -250,7 +255,8 @@ impl Node for MeshletVisibilityBufferRasterPassNode { render_context, meshlet_view_resources, meshlet_view_bind_groups, - downsample_depth_pipeline, + downsample_depth_first_pipeline, + downsample_depth_second_pipeline, ); render_context.command_encoder().pop_debug_group(); } @@ -259,7 +265,6 @@ impl Node for MeshletVisibilityBufferRasterPassNode { } } -// TODO: Reuse same compute pass as cull_pass fn fill_cluster_buffers_pass( render_context: &mut RenderContext, fill_cluster_buffers_bind_group: &BindGroup, @@ -379,35 +384,30 @@ fn downsample_depth( render_context: &mut RenderContext, meshlet_view_resources: &MeshletViewResources, meshlet_view_bind_groups: &MeshletViewBindGroups, - downsample_depth_pipeline: &RenderPipeline, + downsample_depth_first_pipeline: &ComputePipeline, + downsample_depth_second_pipeline: &ComputePipeline, ) { - render_context - .command_encoder() - .push_debug_group("meshlet_downsample_depth"); - - for i in 0..meshlet_view_resources.depth_pyramid_mips.len() { - let downsample_pass = RenderPassDescriptor { - label: Some("downsample_depth"), - color_attachments: &[Some(RenderPassColorAttachment { - view: &meshlet_view_resources.depth_pyramid_mips[i], - resolve_target: None, - ops: Operations { - load: LoadOp::Clear(LinearRgba::BLACK.into()), - store: StoreOp::Store, - }, - })], - depth_stencil_attachment: None, - timestamp_writes: None, - occlusion_query_set: None, - }; + let command_encoder = render_context.command_encoder(); + let mut downsample_pass = command_encoder.begin_compute_pass(&ComputePassDescriptor { + label: Some("downsample_depth"), + timestamp_writes: None, + }); + downsample_pass.set_pipeline(downsample_depth_first_pipeline); + downsample_pass.set_push_constants( + 0, + &meshlet_view_resources.depth_pyramid_mip_count.to_le_bytes(), + ); + downsample_pass.set_bind_group(0, &meshlet_view_bind_groups.downsample_depth, &[]); + downsample_pass.dispatch_workgroups( + meshlet_view_resources.view_size.x.div_ceil(64), + meshlet_view_resources.view_size.y.div_ceil(64), + 1, + ); - let mut downsample_pass = render_context.begin_tracked_render_pass(downsample_pass); - downsample_pass.set_render_pipeline(downsample_depth_pipeline); - downsample_pass.set_bind_group(0, &meshlet_view_bind_groups.downsample_depth[i], &[]); - downsample_pass.draw(0..3, 0..1); + if meshlet_view_resources.depth_pyramid_mip_count >= 7 { + downsample_pass.set_pipeline(downsample_depth_second_pipeline); + downsample_pass.dispatch_workgroups(1, 1, 1); } - - render_context.command_encoder().pop_debug_group(); } fn copy_material_depth_pass(