diff --git a/Cargo.lock b/Cargo.lock index cd9a41aa97..e07924b542 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -814,6 +814,12 @@ version = "0.20.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f43e957e744be03f5801a55472f593d43fabdebf25a4585db250f04d86b1675f" +[[package]] +name = "glam" +version = "0.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "518faa5064866338b013ff9b2350dc318e14cc4fcd6cb8206d7e7c9886c98815" + [[package]] name = "glow" version = "0.11.2" @@ -2358,7 +2364,7 @@ dependencies = [ "ddsfile", "env_logger", "futures-intrusive", - "glam", + "glam 0.20.5", "js-sys", "log", "naga", @@ -2421,6 +2427,7 @@ dependencies = [ "env_logger", "foreign-types 0.3.2", "fxhash", + "glam 0.21.3", "glow", "glutin", "gpu-alloc", diff --git a/wgpu-core/src/binding_model.rs b/wgpu-core/src/binding_model.rs index 71f95a723d..7ef9acac2a 100644 --- a/wgpu-core/src/binding_model.rs +++ b/wgpu-core/src/binding_model.rs @@ -328,6 +328,7 @@ impl BindingTypeMaxCountValidator { wgt::BindingType::StorageTexture { .. } => { self.storage_textures.add(binding.visibility, count); } + wgt::BindingType::AccelerationStructure => todo!(), } } diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index a5c5cbe51c..68d8b8fa59 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -1504,6 +1504,7 @@ impl Device { }, ) } + Bt::AccelerationStructure => todo!(), }; // Validate the count parameter @@ -1977,6 +1978,7 @@ impl Device { buffers: &hal_buffers, samplers: &hal_samplers, textures: &hal_textures, + acceleration_structures: &[], }; let raw = unsafe { self.raw diff --git a/wgpu-hal/Cargo.toml b/wgpu-hal/Cargo.toml index b89f400537..9fcc70e8b6 100644 --- a/wgpu-hal/Cargo.toml +++ b/wgpu-hal/Cargo.toml @@ -109,6 +109,7 @@ features = ["wgsl-in"] [dev-dependencies] env_logger = "0.9" winit = "0.27.1" # for "halmark" example +glam = "0.21.3" # for ray-traced-triangle example [target.'cfg(not(target_arch = "wasm32"))'.dev-dependencies] glutin = "0.28.0" # for "gles" example diff --git a/wgpu-hal/examples/halmark/main.rs b/wgpu-hal/examples/halmark/main.rs index b9df5f2171..f380318a2d 100644 --- a/wgpu-hal/examples/halmark/main.rs +++ b/wgpu-hal/examples/halmark/main.rs @@ -420,6 +420,7 @@ impl Example { buffers: &[global_buffer_binding], samplers: &[&sampler], textures: &[texture_binding], + acceleration_structures: &[], entries: &[ hal::BindGroupEntry { binding: 0, @@ -453,6 +454,7 @@ impl Example { buffers: &[local_buffer_binding], samplers: &[], textures: &[], + acceleration_structures: &[], entries: &[hal::BindGroupEntry { binding: 0, resource_index: 0, diff --git a/wgpu-hal/examples/ray-traced-triangle/main.rs b/wgpu-hal/examples/ray-traced-triangle/main.rs new file mode 100644 index 0000000000..49fb58686b --- /dev/null +++ b/wgpu-hal/examples/ray-traced-triangle/main.rs @@ -0,0 +1,934 @@ +extern crate wgpu_hal as hal; + +use hal::{ + Adapter as _, CommandEncoder as _, Device as _, Instance as _, Queue as _, Surface as _, +}; +use raw_window_handle::{HasRawDisplayHandle, HasRawWindowHandle}; + +use glam::{Mat4, Vec3}; +use std::{ + borrow::{Borrow, Cow}, + iter, mem, + mem::{align_of, size_of}, + ptr::{self, copy_nonoverlapping}, + time::Instant, +}; + +const COMMAND_BUFFER_PER_CONTEXT: usize = 100; +const DESIRED_FRAMES: u32 = 3; + +fn pack_24_8(low_24: u32, high_8: u8) -> u32 { + (low_24 & 0x00ff_ffff) | (u32::from(high_8) << 24) +} + +#[derive(Debug)] +#[repr(C)] +struct Instance { + transform: [f32; 12], + instance_custom_index_and_mask: u32, + instance_shader_binding_table_record_offset_and_flags: u32, + acceleration_structure_reference: u64, +} + +fn transpose_matrix_for_acceleration_structure_instance(matrix: Mat4) -> [f32; 12] { + let row_0 = matrix.row(0); + let row_1 = matrix.row(1); + let row_2 = matrix.row(2); + [ + row_0.x, row_0.y, row_0.z, row_0.w, row_1.x, row_1.y, row_1.z, row_1.w, row_2.x, row_2.y, + row_2.z, row_2.w, + ] +} + +struct ExecutionContext { + encoder: A::CommandEncoder, + fence: A::Fence, + fence_value: hal::FenceValue, + used_views: Vec, + used_cmd_bufs: Vec, + frames_recorded: usize, +} + +impl ExecutionContext { + unsafe fn wait_and_clear(&mut self, device: &A::Device) { + device.wait(&self.fence, self.fence_value, !0).unwrap(); + self.encoder.reset_all(self.used_cmd_bufs.drain(..)); + for view in self.used_views.drain(..) { + device.destroy_texture_view(view); + } + self.frames_recorded = 0; + } +} + +#[allow(dead_code)] +struct Example { + instance: A::Instance, + adapter: A::Adapter, + surface: A::Surface, + surface_format: wgt::TextureFormat, + device: A::Device, + queue: A::Queue, + + contexts: Vec>, + context_index: usize, + extent: [u32; 2], + start: Instant, + pipeline: A::ComputePipeline, + bind_group: A::BindGroup, + bgl: A::BindGroupLayout, + shader_module: A::ShaderModule, + texture_view: A::TextureView, + uniform_buffer: A::Buffer, + pipeline_layout: A::PipelineLayout, + vertices_buffer: A::Buffer, + indices_buffer: A::Buffer, + texture: A::Texture, + instances: [Instance; 1], + instances_buffer: A::Buffer, + blas: A::AccelerationStructure, + tlas: A::AccelerationStructure, + scratch_buffer: A::Buffer, + time: f32, +} + +impl Example { + fn init(window: &winit::window::Window) -> Result { + let instance_desc = hal::InstanceDescriptor { + name: "example", + flags: if cfg!(debug_assertions) { + hal::InstanceFlags::all() + } else { + hal::InstanceFlags::empty() + }, + }; + let instance = unsafe { A::Instance::init(&instance_desc)? }; + let mut surface = unsafe { + instance + .create_surface(window.raw_display_handle(), window.raw_window_handle()) + .unwrap() + }; + + let (adapter, features) = unsafe { + let mut adapters = instance.enumerate_adapters(); + if adapters.is_empty() { + return Err(hal::InstanceError); + } + let exposed = adapters.swap_remove(0); + dbg!(exposed.features); + (exposed.adapter, exposed.features) + }; + let surface_caps = + unsafe { adapter.surface_capabilities(&surface) }.ok_or(hal::InstanceError)?; + log::info!("Surface caps: {:#?}", surface_caps); + + let hal::OpenDevice { device, mut queue } = + unsafe { adapter.open(features, &wgt::Limits::default()).unwrap() }; + + let window_size: (u32, u32) = window.inner_size().into(); + let surface_config = hal::SurfaceConfiguration { + swap_chain_size: DESIRED_FRAMES + .max(*surface_caps.swap_chain_sizes.start()) + .min(*surface_caps.swap_chain_sizes.end()), + present_mode: wgt::PresentMode::Fifo, + composite_alpha_mode: hal::CompositeAlphaMode::Opaque, + format: wgt::TextureFormat::Rgba8Unorm, + extent: wgt::Extent3d { + width: window_size.0, + height: window_size.1, + depth_or_array_layers: 1, + }, + usage: hal::TextureUses::COLOR_TARGET | hal::TextureUses::COPY_DST, + }; + unsafe { + surface.configure(&device, &surface_config).unwrap(); + }; + + #[allow(dead_code)] + struct Uniforms { + view_inverse: glam::Mat4, + proj_inverse: glam::Mat4, + } + + let bgl_desc = hal::BindGroupLayoutDescriptor { + label: None, + flags: hal::BindGroupLayoutFlags::empty(), + entries: &[ + wgt::BindGroupLayoutEntry { + binding: 0, + visibility: wgt::ShaderStages::COMPUTE, + ty: wgt::BindingType::Buffer { + ty: wgt::BufferBindingType::Uniform, + has_dynamic_offset: false, + min_binding_size: wgt::BufferSize::new(mem::size_of::() as _), + }, + count: None, + }, + wgt::BindGroupLayoutEntry { + binding: 1, + visibility: wgt::ShaderStages::COMPUTE, + ty: wgt::BindingType::StorageTexture { + access: wgt::StorageTextureAccess::WriteOnly, + format: wgt::TextureFormat::Rgba8Unorm, + view_dimension: wgt::TextureViewDimension::D2, + }, + count: None, + }, + wgt::BindGroupLayoutEntry { + binding: 2, + visibility: wgt::ShaderStages::COMPUTE, + ty: wgt::BindingType::AccelerationStructure, + count: None, + }, + ], + }; + + let bgl = unsafe { device.create_bind_group_layout(&bgl_desc).unwrap() }; + + pub fn make_spirv_raw(data: &[u8]) -> Cow<[u32]> { + const MAGIC_NUMBER: u32 = 0x0723_0203; + assert_eq!( + data.len() % size_of::(), + 0, + "data size is not a multiple of 4" + ); + + //If the data happens to be aligned, directly use the byte array, + // otherwise copy the byte array in an owned vector and use that instead. + let words = if data.as_ptr().align_offset(align_of::()) == 0 { + let (pre, words, post) = unsafe { data.align_to::() }; + debug_assert!(pre.is_empty()); + debug_assert!(post.is_empty()); + Cow::from(words) + } else { + let mut words = vec![0u32; data.len() / size_of::()]; + unsafe { + copy_nonoverlapping(data.as_ptr(), words.as_mut_ptr() as *mut u8, data.len()); + } + Cow::from(words) + }; + + assert_eq!( + words[0], MAGIC_NUMBER, + "wrong magic word {:x}. Make sure you are using a binary SPIRV file.", + words[0] + ); + + words + } + + let shader_module = unsafe { + device + .create_shader_module( + &hal::ShaderModuleDescriptor { + label: None, + runtime_checks: false, + }, + hal::ShaderInput::SpirV(&make_spirv_raw(include_bytes!("shader.comp.spv"))), + ) + .unwrap() + }; + + let pipeline_layout_desc = hal::PipelineLayoutDescriptor { + label: None, + flags: hal::PipelineLayoutFlags::empty(), + bind_group_layouts: &[&bgl], + push_constant_ranges: &[], + }; + let pipeline_layout = unsafe { + device + .create_pipeline_layout(&pipeline_layout_desc) + .unwrap() + }; + + let pipeline = unsafe { + device.create_compute_pipeline(&hal::ComputePipelineDescriptor { + label: Some("pipeline"), + layout: &pipeline_layout, + stage: hal::ProgrammableStage { + module: &shader_module, + entry_point: "main", + }, + }) + } + .unwrap(); + + let vertices: [f32; 9] = [1.0, 1.0, 0.0, -1.0, 1.0, 0.0, 0.0, -1.0, 0.0]; + + let vertices_size_in_bytes = vertices.len() * 4; + + let indices: [u32; 3] = [0, 1, 2]; + + let indices_size_in_bytes = indices.len() * 4; + + let transform_matrix = [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0]; + + let vertices_buffer = unsafe { + let vertices_buffer = device + .create_buffer(&hal::BufferDescriptor { + label: Some("vertices buffer"), + size: vertices_size_in_bytes as u64, + usage: hal::BufferUses::MAP_WRITE + | hal::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT, + memory_flags: hal::MemoryFlags::TRANSIENT | hal::MemoryFlags::PREFER_COHERENT, + }) + .unwrap(); + + let mapping = device + .map_buffer(&vertices_buffer, 0..vertices_size_in_bytes as u64) + .unwrap(); + ptr::copy_nonoverlapping( + vertices.as_ptr() as *const u8, + mapping.ptr.as_ptr(), + vertices_size_in_bytes, + ); + device.unmap_buffer(&vertices_buffer).unwrap(); + assert!(mapping.is_coherent); + + vertices_buffer + }; + + let indices_buffer = unsafe { + let indices_buffer = device + .create_buffer(&hal::BufferDescriptor { + label: Some("indices buffer"), + size: indices_size_in_bytes as u64, + usage: hal::BufferUses::MAP_WRITE + | hal::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT, + memory_flags: hal::MemoryFlags::TRANSIENT | hal::MemoryFlags::PREFER_COHERENT, + }) + .unwrap(); + + let mapping = device + .map_buffer(&indices_buffer, 0..indices_size_in_bytes as u64) + .unwrap(); + ptr::copy_nonoverlapping( + indices.as_ptr() as *const u8, + mapping.ptr.as_ptr(), + indices_size_in_bytes, + ); + device.unmap_buffer(&indices_buffer).unwrap(); + assert!(mapping.is_coherent); + + indices_buffer + }; + + let blas_sizes = unsafe { + device.get_acceleration_structure_build_sizes( + &hal::GetAccelerationStructureBuildSizesDescriptor { + geometry_info: hal::AccelerationStructureGeometryInfo::Triangles { + vertex_format: wgt::VertexFormat::Float32x3, + max_vertex: 3, + index_format: Some(wgt::IndexFormat::Uint32), + }, + format: hal::AccelerationStructureFormat::BottomLevel, + mode: hal::AccelerationStructureBuildMode::Build, + flags: hal::AccelerationStructureBuildFlags::PREFER_FAST_TRACE, + primitive_count: 1, + }, + ) + }; + + let tlas_flags = hal::AccelerationStructureBuildFlags::PREFER_FAST_TRACE + | hal::AccelerationStructureBuildFlags::ALLOW_UPDATE; + + let tlas_sizes = unsafe { + device.get_acceleration_structure_build_sizes( + &hal::GetAccelerationStructureBuildSizesDescriptor { + geometry_info: hal::AccelerationStructureGeometryInfo::Instances, + format: hal::AccelerationStructureFormat::TopLevel, + mode: hal::AccelerationStructureBuildMode::Build, + flags: tlas_flags, + primitive_count: 1, + }, + ) + }; + + let blas = unsafe { + device.create_acceleration_structure(&hal::AccelerationStructureDescriptor { + label: Some("blas"), + size: blas_sizes.acceleration_structure_size, + format: hal::AccelerationStructureFormat::BottomLevel, + }) + } + .unwrap(); + + let tlas = unsafe { + device.create_acceleration_structure(&hal::AccelerationStructureDescriptor { + label: Some("tlas"), + size: tlas_sizes.acceleration_structure_size, + format: hal::AccelerationStructureFormat::TopLevel, + }) + } + .unwrap(); + + let uniforms = { + let view = Mat4::look_at_rh(Vec3::new(0.0, 0.0, 2.5), Vec3::ZERO, Vec3::Y); + let proj = Mat4::perspective_rh(59.0_f32.to_radians(), 1.0, 0.001, 1000.0); + + Uniforms { + view_inverse: view.inverse(), + proj_inverse: proj.inverse(), + } + }; + + let uniforms_size = std::mem::size_of::(); + + let uniform_buffer = unsafe { + let uniform_buffer = device + .create_buffer(&hal::BufferDescriptor { + label: Some("uniform buffer"), + size: uniforms_size as u64, + usage: hal::BufferUses::MAP_WRITE | hal::BufferUses::UNIFORM, + memory_flags: hal::MemoryFlags::PREFER_COHERENT, + }) + .unwrap(); + + let mapping = device + .map_buffer(&uniform_buffer, 0..uniforms_size as u64) + .unwrap(); + ptr::copy_nonoverlapping( + &uniforms as *const Uniforms as *const u8, + mapping.ptr.as_ptr(), + uniforms_size, + ); + device.unmap_buffer(&uniform_buffer).unwrap(); + assert!(mapping.is_coherent); + uniform_buffer + }; + + let texture_desc = hal::TextureDescriptor { + label: None, + size: wgt::Extent3d { + width: 512, + height: 512, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: wgt::TextureDimension::D2, + format: wgt::TextureFormat::Rgba8Unorm, + usage: hal::TextureUses::STORAGE_READ_WRITE | hal::TextureUses::COPY_SRC, + memory_flags: hal::MemoryFlags::empty(), + }; + let texture = unsafe { device.create_texture(&texture_desc).unwrap() }; + + let view_desc = hal::TextureViewDescriptor { + label: None, + format: texture_desc.format, + dimension: wgt::TextureViewDimension::D2, + usage: hal::TextureUses::STORAGE_READ_WRITE | hal::TextureUses::COPY_SRC, + range: wgt::ImageSubresourceRange::default(), + }; + let texture_view = unsafe { device.create_texture_view(&texture, &view_desc).unwrap() }; + + let bind_group = { + let buffer_binding = hal::BufferBinding { + buffer: &uniform_buffer, + offset: 0, + size: None, + }; + let texture_binding = hal::TextureBinding { + view: &texture_view, + usage: hal::TextureUses::STORAGE_READ_WRITE, + }; + let group_desc = hal::BindGroupDescriptor { + label: Some("bind group"), + layout: &bgl, + buffers: &[buffer_binding], + samplers: &[], + textures: &[texture_binding], + acceleration_structures: &[&tlas], + entries: &[ + hal::BindGroupEntry { + binding: 0, + resource_index: 0, + count: 1, + }, + hal::BindGroupEntry { + binding: 1, + resource_index: 0, + count: 1, + }, + hal::BindGroupEntry { + binding: 2, + resource_index: 0, + count: 1, + }, + ], + }; + unsafe { device.create_bind_group(&group_desc).unwrap() } + }; + + let scratch_buffer = unsafe { + device + .create_buffer(&hal::BufferDescriptor { + label: Some("scratch buffer"), + size: blas_sizes + .build_scratch_size + .max(tlas_sizes.build_scratch_size), + usage: hal::BufferUses::ACCELERATION_STRUCTURE_SCRATCH, + memory_flags: hal::MemoryFlags::empty(), + }) + .unwrap() + }; + + let instances = [ + Instance { + transform: transform_matrix, + instance_custom_index_and_mask: pack_24_8(0, 0xff), + instance_shader_binding_table_record_offset_and_flags: pack_24_8(0, 0), + acceleration_structure_reference: unsafe { + device.get_acceleration_structure_device_address(&blas) + }, + }, + /*Instance { + transform: transpose_matrix_for_acceleration_structure_instance( + Mat4::from_rotation_y(1.0), + ), + instance_custom_index_and_mask: pack_24_8(0, 0xff), + instance_shader_binding_table_record_offset_and_flags: pack_24_8(0, 0), + acceleration_structure_reference: unsafe { + device.get_acceleration_structure_device_address(&blas) + }, + }, + Instance { + transform: transpose_matrix_for_acceleration_structure_instance( + Mat4::from_rotation_y(-1.0), + ), + instance_custom_index_and_mask: pack_24_8(0, 0xff), + instance_shader_binding_table_record_offset_and_flags: pack_24_8(0, 0), + acceleration_structure_reference: unsafe { + device.get_acceleration_structure_device_address(&blas) + }, + },*/ + ]; + + let instances_buffer_size = instances.len() * std::mem::size_of::(); + + let instances_buffer = unsafe { + let instances_buffer = device + .create_buffer(&hal::BufferDescriptor { + label: Some("instances_buffer"), + size: instances_buffer_size as u64, + usage: hal::BufferUses::MAP_WRITE + | hal::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT, + memory_flags: hal::MemoryFlags::TRANSIENT | hal::MemoryFlags::PREFER_COHERENT, + }) + .unwrap(); + + let mapping = device + .map_buffer(&instances_buffer, 0..instances_buffer_size as u64) + .unwrap(); + ptr::copy_nonoverlapping( + instances.as_ptr() as *const u8, + mapping.ptr.as_ptr(), + instances_buffer_size, + ); + device.unmap_buffer(&instances_buffer).unwrap(); + assert!(mapping.is_coherent); + + instances_buffer + }; + + let cmd_encoder_desc = hal::CommandEncoderDescriptor { + label: None, + queue: &queue, + }; + let mut cmd_encoder = unsafe { device.create_command_encoder(&cmd_encoder_desc).unwrap() }; + + unsafe { cmd_encoder.begin_encoding(Some("init")).unwrap() }; + + unsafe { + cmd_encoder.build_acceleration_structures(&hal::BuildAccelerationStructureDescriptor { + geometry: &hal::AccelerationStructureGeometry::Triangles { + vertex_buffer: &vertices_buffer, + vertex_format: wgt::VertexFormat::Float32x3, + max_vertex: vertices.len() as u32, + vertex_stride: 3 * 4, + indices: Some(hal::AccelerationStructureGeometryIndices { + buffer: &indices_buffer, + format: wgt::IndexFormat::Uint32, + }), + }, + format: hal::AccelerationStructureFormat::BottomLevel, + mode: hal::AccelerationStructureBuildMode::Build, + flags: hal::AccelerationStructureBuildFlags::PREFER_FAST_TRACE, + primitive_count: indices.len() as u32 / 3, + primitive_offset: 0, + destination_acceleration_structure: &blas, + scratch_buffer: &scratch_buffer, + }); + + let as_barrier = hal::BufferBarrier { + buffer: &scratch_buffer, + usage: hal::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT + ..hal::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT, + }; + cmd_encoder.transition_buffers(iter::once(as_barrier)); + + cmd_encoder.build_acceleration_structures(&hal::BuildAccelerationStructureDescriptor { + geometry: &hal::AccelerationStructureGeometry::Instances { + buffer: &instances_buffer, + }, + format: hal::AccelerationStructureFormat::TopLevel, + mode: hal::AccelerationStructureBuildMode::Build, + flags: tlas_flags, + primitive_count: instances.len() as u32, + primitive_offset: 0, + destination_acceleration_structure: &tlas, + scratch_buffer: &scratch_buffer, + }); + + let texture_barrier = hal::TextureBarrier { + texture: &texture, + range: wgt::ImageSubresourceRange::default(), + usage: hal::TextureUses::UNINITIALIZED..hal::TextureUses::STORAGE_READ_WRITE, + }; + + cmd_encoder.transition_textures(iter::once(texture_barrier)); + } + + let init_fence_value = 1; + let fence = unsafe { + let mut fence = device.create_fence().unwrap(); + let init_cmd = cmd_encoder.end_encoding().unwrap(); + queue + .submit(&[&init_cmd], Some((&mut fence, init_fence_value))) + .unwrap(); + device.wait(&fence, init_fence_value, !0).unwrap(); + cmd_encoder.reset_all(iter::once(init_cmd)); + fence + }; + + Ok(Self { + instance, + adapter, + surface, + surface_format: surface_config.format, + device, + queue, + pipeline, + contexts: vec![ExecutionContext { + encoder: cmd_encoder, + fence, + fence_value: init_fence_value + 1, + used_views: Vec::new(), + used_cmd_bufs: Vec::new(), + frames_recorded: 0, + }], + context_index: 0, + extent: [window_size.0, window_size.1], + start: Instant::now(), + pipeline_layout, + bind_group, + texture, + instances, + instances_buffer, + blas, + tlas, + scratch_buffer, + time: 0.0, + indices_buffer, + vertices_buffer, + uniform_buffer, + texture_view, + bgl, + shader_module, + }) + } + + fn update(&mut self, _event: winit::event::WindowEvent) {} + + fn render(&mut self) { + let ctx = &mut self.contexts[self.context_index]; + + let surface_tex = unsafe { self.surface.acquire_texture(None).unwrap().unwrap().texture }; + + let target_barrier0 = hal::TextureBarrier { + texture: surface_tex.borrow(), + range: wgt::ImageSubresourceRange::default(), + usage: hal::TextureUses::UNINITIALIZED..hal::TextureUses::COPY_DST, + }; + + let instances_buffer_size = self.instances.len() * std::mem::size_of::(); + + let tlas_flags = hal::AccelerationStructureBuildFlags::PREFER_FAST_TRACE + | hal::AccelerationStructureBuildFlags::ALLOW_UPDATE; + + self.time += 1.0 / 60.0; + + self.instances[0] = Instance { + transform: transpose_matrix_for_acceleration_structure_instance(Mat4::from_rotation_y( + self.time, + )), + instance_custom_index_and_mask: pack_24_8(0, 0xff), + instance_shader_binding_table_record_offset_and_flags: pack_24_8(0, 0), + acceleration_structure_reference: unsafe { + self.device + .get_acceleration_structure_device_address(&self.blas) + }, + }; + + unsafe { + let mapping = self + .device + .map_buffer(&self.instances_buffer, 0..instances_buffer_size as u64) + .unwrap(); + ptr::copy_nonoverlapping( + self.instances.as_ptr() as *const u8, + mapping.ptr.as_ptr(), + instances_buffer_size, + ); + self.device.unmap_buffer(&self.instances_buffer).unwrap(); + assert!(mapping.is_coherent); + } + + unsafe { + ctx.encoder.begin_encoding(Some("frame")).unwrap(); + + ctx.encoder + .build_acceleration_structures(&hal::BuildAccelerationStructureDescriptor { + geometry: &hal::AccelerationStructureGeometry::Instances { + buffer: &self.instances_buffer, + }, + format: hal::AccelerationStructureFormat::TopLevel, + mode: hal::AccelerationStructureBuildMode::Update, + flags: tlas_flags, + primitive_count: self.instances.len() as u32, + primitive_offset: 0, + destination_acceleration_structure: &self.tlas, + scratch_buffer: &self.scratch_buffer, + }); + + let as_barrier = hal::BufferBarrier { + buffer: &self.scratch_buffer, + usage: hal::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT + ..hal::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT, + }; + ctx.encoder.transition_buffers(iter::once(as_barrier)); + + ctx.encoder.transition_textures(iter::once(target_barrier0)); + } + + let surface_view_desc = hal::TextureViewDescriptor { + label: None, + format: self.surface_format, + dimension: wgt::TextureViewDimension::D2, + usage: hal::TextureUses::COPY_DST, + range: wgt::ImageSubresourceRange::default(), + }; + let surface_tex_view = unsafe { + self.device + .create_texture_view(surface_tex.borrow(), &surface_view_desc) + .unwrap() + }; + unsafe { + ctx.encoder + .begin_compute_pass(&hal::ComputePassDescriptor { label: None }); + ctx.encoder.set_compute_pipeline(&self.pipeline); + ctx.encoder + .set_bind_group(&self.pipeline_layout, 0, &self.bind_group, &[]); + ctx.encoder.dispatch([512 / 8, 512 / 8, 1]); + } + + ctx.frames_recorded += 1; + let do_fence = ctx.frames_recorded > COMMAND_BUFFER_PER_CONTEXT; + + let target_barrier1 = hal::TextureBarrier { + texture: surface_tex.borrow(), + range: wgt::ImageSubresourceRange::default(), + usage: hal::TextureUses::COPY_DST..hal::TextureUses::PRESENT, + }; + let target_barrier2 = hal::TextureBarrier { + texture: &self.texture, + range: wgt::ImageSubresourceRange::default(), + usage: hal::TextureUses::STORAGE_READ_WRITE..hal::TextureUses::COPY_SRC, + }; + let target_barrier3 = hal::TextureBarrier { + texture: &self.texture, + range: wgt::ImageSubresourceRange::default(), + usage: hal::TextureUses::COPY_SRC..hal::TextureUses::STORAGE_READ_WRITE, + }; + unsafe { + ctx.encoder.end_compute_pass(); + ctx.encoder.transition_textures(iter::once(target_barrier2)); + ctx.encoder.copy_texture_to_texture( + &self.texture, + hal::TextureUses::COPY_SRC, + &surface_tex.borrow(), + std::iter::once(hal::TextureCopy { + src_base: hal::TextureCopyBase { + mip_level: 0, + array_layer: 0, + origin: wgt::Origin3d::ZERO, + aspect: hal::FormatAspects::COLOR, + }, + dst_base: hal::TextureCopyBase { + mip_level: 0, + array_layer: 0, + origin: wgt::Origin3d::ZERO, + aspect: hal::FormatAspects::COLOR, + }, + size: hal::CopyExtent { + width: 512, + height: 512, + depth: 1, + }, + }), + ); + ctx.encoder.transition_textures(iter::once(target_barrier1)); + ctx.encoder.transition_textures(iter::once(target_barrier3)); + } + + unsafe { + let cmd_buf = ctx.encoder.end_encoding().unwrap(); + let fence_param = if do_fence { + Some((&mut ctx.fence, ctx.fence_value)) + } else { + None + }; + self.queue.submit(&[&cmd_buf], fence_param).unwrap(); + self.queue.present(&mut self.surface, surface_tex).unwrap(); + ctx.used_cmd_bufs.push(cmd_buf); + ctx.used_views.push(surface_tex_view); + }; + + if do_fence { + log::info!("Context switch from {}", self.context_index); + let old_fence_value = ctx.fence_value; + if self.contexts.len() == 1 { + let hal_desc = hal::CommandEncoderDescriptor { + label: None, + queue: &self.queue, + }; + self.contexts.push(unsafe { + ExecutionContext { + encoder: self.device.create_command_encoder(&hal_desc).unwrap(), + fence: self.device.create_fence().unwrap(), + fence_value: 0, + used_views: Vec::new(), + used_cmd_bufs: Vec::new(), + frames_recorded: 0, + } + }); + } + self.context_index = (self.context_index + 1) % self.contexts.len(); + let next = &mut self.contexts[self.context_index]; + unsafe { + next.wait_and_clear(&self.device); + } + next.fence_value = old_fence_value + 1; + } + } + + fn exit(mut self) { + unsafe { + { + let ctx = &mut self.contexts[self.context_index]; + self.queue + .submit(&[], Some((&mut ctx.fence, ctx.fence_value))) + .unwrap(); + } + + for mut ctx in self.contexts { + ctx.wait_and_clear(&self.device); + self.device.destroy_command_encoder(ctx.encoder); + self.device.destroy_fence(ctx.fence); + } + + self.device.destroy_bind_group(self.bind_group); + self.device.destroy_buffer(self.scratch_buffer); + self.device.destroy_buffer(self.instances_buffer); + self.device.destroy_buffer(self.indices_buffer); + self.device.destroy_buffer(self.vertices_buffer); + self.device.destroy_buffer(self.uniform_buffer); + self.device.destroy_acceleration_structure(self.tlas); + self.device.destroy_acceleration_structure(self.blas); + self.device.destroy_texture_view(self.texture_view); + self.device.destroy_texture(self.texture); + self.device.destroy_compute_pipeline(self.pipeline); + self.device.destroy_pipeline_layout(self.pipeline_layout); + self.device.destroy_bind_group_layout(self.bgl); + self.device.destroy_shader_module(self.shader_module); + + self.surface.unconfigure(&self.device); + self.device.exit(self.queue); + self.instance.destroy_surface(self.surface); + drop(self.adapter); + } + } +} + +#[cfg(all(feature = "metal"))] +type Api = hal::api::Metal; +#[cfg(all(feature = "vulkan", not(feature = "metal")))] +type Api = hal::api::Vulkan; +#[cfg(all(feature = "gles", not(feature = "metal"), not(feature = "vulkan")))] +type Api = hal::api::Gles; +#[cfg(all( + feature = "dx12", + not(feature = "metal"), + not(feature = "vulkan"), + not(feature = "gles") +))] +type Api = hal::api::Dx12; +#[cfg(not(any( + feature = "metal", + feature = "vulkan", + feature = "gles", + feature = "dx12" +)))] +type Api = hal::api::Empty; + +fn main() { + env_logger::init(); + + let event_loop = winit::event_loop::EventLoop::new(); + let window = winit::window::WindowBuilder::new() + .with_title("hal-bunnymark") + .with_inner_size(winit::dpi::PhysicalSize { + width: 512, + height: 512, + }) + .build(&event_loop) + .unwrap(); + + let example_result = Example::::init(&window); + let mut example = Some(example_result.expect("Selected backend is not supported")); + + event_loop.run(move |event, _, control_flow| { + let _ = &window; // force ownership by the closure + *control_flow = winit::event_loop::ControlFlow::Poll; + match event { + winit::event::Event::RedrawEventsCleared => { + window.request_redraw(); + } + winit::event::Event::WindowEvent { event, .. } => match event { + winit::event::WindowEvent::KeyboardInput { + input: + winit::event::KeyboardInput { + virtual_keycode: Some(winit::event::VirtualKeyCode::Escape), + state: winit::event::ElementState::Pressed, + .. + }, + .. + } + | winit::event::WindowEvent::CloseRequested => { + *control_flow = winit::event_loop::ControlFlow::Exit; + } + _ => { + example.as_mut().unwrap().update(event); + } + }, + winit::event::Event::RedrawRequested(_) => { + let ex = example.as_mut().unwrap(); + + ex.render(); + } + winit::event::Event::LoopDestroyed => { + example.take().unwrap().exit(); + } + _ => {} + } + }); +} diff --git a/wgpu-hal/examples/ray-traced-triangle/shader.comp b/wgpu-hal/examples/ray-traced-triangle/shader.comp new file mode 100644 index 0000000000..d31f29115f --- /dev/null +++ b/wgpu-hal/examples/ray-traced-triangle/shader.comp @@ -0,0 +1,44 @@ +#version 460 +#extension GL_EXT_ray_query : enable + +layout(set = 0, binding = 0) uniform Uniforms +{ + mat4 viewInverse; + mat4 projInverse; +} cam; +layout(set = 0, binding = 1, rgba8) uniform image2D image; +layout(set = 0, binding = 2) uniform accelerationStructureEXT tlas; + +layout(local_size_x = 8, local_size_y = 8) in; + +void main() +{ + uvec2 launch_id = gl_GlobalInvocationID.xy; + uvec2 launch_size = gl_NumWorkGroups.xy * 8; + + const vec2 pixelCenter = vec2(launch_id) + vec2(0.5); + const vec2 inUV = pixelCenter/vec2(launch_size); + vec2 d = inUV * 2.0 - 1.0; + + vec4 origin = cam.viewInverse * vec4(0,0,0,1); + vec4 target = cam.projInverse * vec4(d.x, d.y, 1, 1) ; + vec4 direction = cam.viewInverse*vec4(normalize(target.xyz), 0) ; + + float tmin = 0.001; + float tmax = 10000.0; + + rayQueryEXT rayQuery; + rayQueryInitializeEXT(rayQuery, tlas, gl_RayFlagsOpaqueEXT, 0xff, origin.xyz, tmin, direction.xyz, tmax); + + rayQueryProceedEXT(rayQuery); + + vec3 out_colour = vec3(0.0, 0.0, 0.0); + + if (rayQueryGetIntersectionTypeEXT(rayQuery, true) == gl_RayQueryCommittedIntersectionTriangleEXT ) { + vec2 barycentrics = rayQueryGetIntersectionBarycentricsEXT(rayQuery, true); + + out_colour = vec3(barycentrics.x, barycentrics.y, 1.0 - barycentrics.x - barycentrics.y); + } + + imageStore(image, ivec2(launch_id), vec4(out_colour, 1.0)); +} \ No newline at end of file diff --git a/wgpu-hal/examples/ray-traced-triangle/shader.comp.spv b/wgpu-hal/examples/ray-traced-triangle/shader.comp.spv new file mode 100644 index 0000000000..345085c948 Binary files /dev/null and b/wgpu-hal/examples/ray-traced-triangle/shader.comp.spv differ diff --git a/wgpu-hal/src/dx11/command.rs b/wgpu-hal/src/dx11/command.rs index 1c73f3c325..3ec95d0c33 100644 --- a/wgpu-hal/src/dx11/command.rs +++ b/wgpu-hal/src/dx11/command.rs @@ -265,4 +265,11 @@ impl crate::CommandEncoder for super::CommandEncoder { unsafe fn dispatch_indirect(&mut self, buffer: &super::Buffer, offset: wgt::BufferAddress) { todo!() } + + unsafe fn build_acceleration_structures( + &mut self, + desc: &crate::BuildAccelerationStructureDescriptor, + ) { + todo!() + } } diff --git a/wgpu-hal/src/dx11/device.rs b/wgpu-hal/src/dx11/device.rs index 7b095ba1df..ee73329412 100644 --- a/wgpu-hal/src/dx11/device.rs +++ b/wgpu-hal/src/dx11/device.rs @@ -200,6 +200,31 @@ impl crate::Device for super::Device { unsafe fn stop_capture(&self) { todo!() } + + unsafe fn create_acceleration_structure( + &self, + desc: &crate::AccelerationStructureDescriptor, + ) -> Result { + todo!() + } + unsafe fn get_acceleration_structure_build_sizes( + &self, + desc: &crate::GetAccelerationStructureBuildSizesDescriptor, + ) -> crate::AccelerationStructureBuildSizes { + todo!() + } + unsafe fn get_acceleration_structure_device_address( + &self, + acceleration_structure: &super::AccelerationStructure, + ) -> wgt::BufferAddress { + todo!() + } + unsafe fn destroy_acceleration_structure( + &self, + acceleration_structure: super::AccelerationStructure, + ) { + todo!() + } } impl crate::Queue for super::Queue { diff --git a/wgpu-hal/src/dx11/mod.rs b/wgpu-hal/src/dx11/mod.rs index a77bb95919..e67feb3fad 100644 --- a/wgpu-hal/src/dx11/mod.rs +++ b/wgpu-hal/src/dx11/mod.rs @@ -36,6 +36,8 @@ impl crate::Api for Api { type ShaderModule = ShaderModule; type RenderPipeline = RenderPipeline; type ComputePipeline = ComputePipeline; + + type AccelerationStructure = AccelerationStructure; } pub struct Instance { @@ -106,6 +108,8 @@ pub struct BindGroup {} pub struct PipelineLayout {} #[derive(Debug)] pub struct ShaderModule {} +#[derive(Debug)] +pub struct AccelerationStructure {} pub struct RenderPipeline {} pub struct ComputePipeline {} diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index ca2f036430..4f843146f8 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -1019,4 +1019,13 @@ impl crate::CommandEncoder for super::CommandEncoder { 0, ); } + + unsafe fn build_acceleration_structures( + &mut self, + _desc: &crate::BuildAccelerationStructureDescriptor, + ) { + // Implement using `BuildRaytracingAccelerationStructure`: + // https://microsoft.github.io/DirectX-Specs/d3d/Raytracing.html#buildraytracingaccelerationstructure + todo!() + } } diff --git a/wgpu-hal/src/dx12/conv.rs b/wgpu-hal/src/dx12/conv.rs index 4114fba002..bbe14cefff 100644 --- a/wgpu-hal/src/dx12/conv.rs +++ b/wgpu-hal/src/dx12/conv.rs @@ -107,6 +107,7 @@ pub fn map_binding_type(ty: &wgt::BindingType) -> native::DescriptorRangeType { .. } | Bt::StorageTexture { .. } => native::DescriptorRangeType::UAV, + Bt::AccelerationStructure => todo!(), } } diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index de81b4e1bd..106f815711 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -706,6 +706,7 @@ impl crate::Device for super::Device { num_texture_views += count } wgt::BindingType::Sampler { .. } => num_samplers += count, + wgt::BindingType::AccelerationStructure => todo!(), } } @@ -1189,6 +1190,7 @@ impl crate::Device for super::Device { cpu_samplers.as_mut().unwrap().stage.push(data.handle.raw); } } + wgt::BindingType::AccelerationStructure => todo!(), } } @@ -1567,4 +1569,38 @@ impl crate::Device for super::Device { self.render_doc .end_frame_capture(self.raw.as_mut_ptr() as *mut _, ptr::null_mut()) } + + unsafe fn get_acceleration_structure_build_sizes( + &self, + _desc: &crate::GetAccelerationStructureBuildSizesDescriptor, + ) -> crate::AccelerationStructureBuildSizes { + // Implement using `GetRaytracingAccelerationStructurePrebuildInfo`: + // https://microsoft.github.io/DirectX-Specs/d3d/Raytracing.html#getraytracingaccelerationstructureprebuildinfo + todo!() + } + + unsafe fn get_acceleration_structure_device_address( + &self, + _acceleration_structure: &super::AccelerationStructure, + ) -> wgt::BufferAddress { + // Implement using `GetGPUVirtualAddress`: + // https://docs.microsoft.com/en-us/windows/win32/api/d3d12/nf-d3d12-id3d12resource-getgpuvirtualaddress + todo!() + } + + unsafe fn create_acceleration_structure( + &self, + _desc: &crate::AccelerationStructureDescriptor, + ) -> Result { + // Create a D3D12 resource as per-usual. + todo!() + } + + unsafe fn destroy_acceleration_structure( + &self, + _acceleration_structure: super::AccelerationStructure, + ) { + // Destroy a D3D12 resource as per-usual. + todo!() + } } diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index 61d2ad9576..195fd429fe 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -79,6 +79,8 @@ impl crate::Api for Api { type ShaderModule = ShaderModule; type RenderPipeline = RenderPipeline; type ComputePipeline = ComputePipeline; + + type AccelerationStructure = AccelerationStructure; } // Limited by D3D12's root signature size of 64. Each element takes 1 or 2 entries. @@ -524,6 +526,9 @@ pub struct ComputePipeline { unsafe impl Send for ComputePipeline {} unsafe impl Sync for ComputePipeline {} +#[derive(Debug)] +pub struct AccelerationStructure {} + impl SwapChain { unsafe fn release_resources(self) -> native::WeakPtr { for resource in self.resources { diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs index 24e7720be2..e547044f3f 100644 --- a/wgpu-hal/src/empty.rs +++ b/wgpu-hal/src/empty.rs @@ -28,6 +28,7 @@ impl crate::Api for Api { type Sampler = Resource; type QuerySet = Resource; type Fence = Resource; + type AccelerationStructure = Resource; type BindGroupLayout = Resource; type BindGroup = Resource; @@ -230,6 +231,25 @@ impl crate::Device for Context { false } unsafe fn stop_capture(&self) {} + unsafe fn create_acceleration_structure( + &self, + desc: &crate::AccelerationStructureDescriptor, + ) -> DeviceResult { + Ok(Resource) + } + unsafe fn get_acceleration_structure_build_sizes( + &self, + _desc: &crate::GetAccelerationStructureBuildSizesDescriptor, + ) -> crate::AccelerationStructureBuildSizes { + Default::default() + } + unsafe fn get_acceleration_structure_device_address( + &self, + _acceleration_structure: &Resource, + ) -> wgt::BufferAddress { + Default::default() + } + unsafe fn destroy_acceleration_structure(&self, _acceleration_structure: Resource) {} } impl crate::CommandEncoder for Encoder { @@ -392,4 +412,10 @@ impl crate::CommandEncoder for Encoder { unsafe fn dispatch(&mut self, count: [u32; 3]) {} unsafe fn dispatch_indirect(&mut self, buffer: &Resource, offset: wgt::BufferAddress) {} + + unsafe fn build_acceleration_structures( + &mut self, + _desc: &crate::BuildAccelerationStructureDescriptor, + ) { + } } diff --git a/wgpu-hal/src/gles/command.rs b/wgpu-hal/src/gles/command.rs index beaf600e6e..e61e70357b 100644 --- a/wgpu-hal/src/gles/command.rs +++ b/wgpu-hal/src/gles/command.rs @@ -1000,4 +1000,11 @@ impl crate::CommandEncoder for super::CommandEncoder { indirect_offset: offset, }); } + + unsafe fn build_acceleration_structures( + &mut self, + _desc: &crate::BuildAccelerationStructureDescriptor, + ) { + unimplemented!() + } } diff --git a/wgpu-hal/src/gles/device.rs b/wgpu-hal/src/gles/device.rs index 04ecdffe02..1858825463 100644 --- a/wgpu-hal/src/gles/device.rs +++ b/wgpu-hal/src/gles/device.rs @@ -861,6 +861,7 @@ impl crate::Device for super::Device { ty: wgt::BufferBindingType::Storage { .. }, .. } => &mut num_storage_buffers, + wgt::BindingType::AccelerationStructure => unimplemented!(), }; binding_to_slot[entry.binding as usize] = *counter; @@ -941,6 +942,7 @@ impl crate::Device for super::Device { format: format_desc.internal, }) } + wgt::BindingType::AccelerationStructure => unimplemented!(), }; contents.push(binding); } @@ -1161,6 +1163,25 @@ impl crate::Device for super::Device { self.render_doc .end_frame_capture(ptr::null_mut(), ptr::null_mut()) } + unsafe fn create_acceleration_structure( + &self, + _desc: &crate::AccelerationStructureDescriptor, + ) -> Result<(), crate::DeviceError> { + unimplemented!() + } + unsafe fn get_acceleration_structure_build_sizes( + &self, + _desc: &crate::GetAccelerationStructureBuildSizesDescriptor, + ) -> crate::AccelerationStructureBuildSizes { + unimplemented!() + } + unsafe fn get_acceleration_structure_device_address( + &self, + _acceleration_structure: &(), + ) -> wgt::BufferAddress { + unimplemented!() + } + unsafe fn destroy_acceleration_structure(&self, _acceleration_structure: ()) {} } // SAFE: WASM doesn't have threads diff --git a/wgpu-hal/src/gles/mod.rs b/wgpu-hal/src/gles/mod.rs index 717502f2c6..a688f230e7 100644 --- a/wgpu-hal/src/gles/mod.rs +++ b/wgpu-hal/src/gles/mod.rs @@ -112,6 +112,7 @@ impl crate::Api for Api { type Sampler = Sampler; type QuerySet = QuerySet; type Fence = Fence; + type AccelerationStructure = (); type BindGroupLayout = BindGroupLayout; type BindGroup = BindGroup; diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index 259318bf59..321d8664ee 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -172,6 +172,8 @@ pub trait Api: Clone + Sized { type ShaderModule: fmt::Debug + Send + Sync; type RenderPipeline: Send + Sync; type ComputePipeline: Send + Sync; + + type AccelerationStructure: fmt::Debug + Send + Sync + 'static; } pub trait Instance: Sized + Send + Sync { @@ -324,6 +326,23 @@ pub trait Device: Send + Sync { unsafe fn start_capture(&self) -> bool; unsafe fn stop_capture(&self); + + unsafe fn create_acceleration_structure( + &self, + desc: &AccelerationStructureDescriptor, + ) -> Result; + unsafe fn get_acceleration_structure_build_sizes( + &self, + desc: &GetAccelerationStructureBuildSizesDescriptor, + ) -> AccelerationStructureBuildSizes; + unsafe fn get_acceleration_structure_device_address( + &self, + acceleration_structure: &A::AccelerationStructure, + ) -> wgt::BufferAddress; + unsafe fn destroy_acceleration_structure( + &self, + acceleration_structure: A::AccelerationStructure, + ); } pub trait Queue: Send + Sync { @@ -524,6 +543,11 @@ pub trait CommandEncoder: Send + Sync { unsafe fn dispatch(&mut self, count: [u32; 3]); unsafe fn dispatch_indirect(&mut self, buffer: &A::Buffer, offset: wgt::BufferAddress); + + unsafe fn build_acceleration_structures( + &mut self, + desc: &BuildAccelerationStructureDescriptor, + ); } bitflags!( @@ -660,6 +684,9 @@ bitflags::bitflags! { const STORAGE_READ_WRITE = 1 << 8; /// The indirect or count buffer in a indirect draw or dispatch. const INDIRECT = 1 << 9; + const ACCELERATION_STRUCTURE_SCRATCH = 1 << 10; + const BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT = 1 << 11; + const TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT = 1 << 12; /// The combination of states that a buffer may be in _at the same time_. const INCLUSIVE = Self::MAP_READ.bits | Self::COPY_SRC.bits | Self::INDEX.bits | Self::VERTEX.bits | Self::UNIFORM.bits | @@ -929,6 +956,7 @@ pub struct BindGroupDescriptor<'a, A: Api> { pub samplers: &'a [&'a A::Sampler], pub textures: &'a [TextureBinding<'a, A>], pub entries: &'a [BindGroupEntry], + pub acceleration_structures: &'a [&'a A::AccelerationStructure], } #[derive(Clone, Debug)] @@ -1228,3 +1256,85 @@ fn test_default_limits() { let limits = wgt::Limits::default(); assert!(limits.max_bind_groups <= MAX_BIND_GROUPS as u32); } + +#[derive(Clone, Debug)] +pub struct AccelerationStructureDescriptor<'a> { + pub label: Label<'a>, + pub size: wgt::BufferAddress, + pub format: AccelerationStructureFormat, +} + +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +pub enum AccelerationStructureFormat { + TopLevel, + BottomLevel, +} + +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +pub enum AccelerationStructureBuildMode { + Build, + Update, +} + +#[derive(Clone, Debug, Default)] +pub struct AccelerationStructureBuildSizes { + pub acceleration_structure_size: wgt::BufferAddress, + pub update_scratch_size: wgt::BufferAddress, + pub build_scratch_size: wgt::BufferAddress, +} + +pub struct GetAccelerationStructureBuildSizesDescriptor { + pub geometry_info: AccelerationStructureGeometryInfo, + pub format: AccelerationStructureFormat, + pub mode: AccelerationStructureBuildMode, + pub flags: AccelerationStructureBuildFlags, + pub primitive_count: u32, +} + +#[derive(Clone, Copy)] +pub enum AccelerationStructureGeometryInfo { + Triangles { + vertex_format: wgt::VertexFormat, + max_vertex: u32, + index_format: Option, + }, + Instances, +} + +pub struct BuildAccelerationStructureDescriptor<'a, A: Api> { + pub geometry: &'a AccelerationStructureGeometry<'a, A>, + pub format: AccelerationStructureFormat, + pub mode: AccelerationStructureBuildMode, + pub flags: AccelerationStructureBuildFlags, + pub primitive_count: u32, + pub primitive_offset: u32, + pub destination_acceleration_structure: &'a A::AccelerationStructure, + pub scratch_buffer: &'a A::Buffer, +} + +pub enum AccelerationStructureGeometry<'a, A: Api> { + Triangles { + vertex_buffer: &'a A::Buffer, + vertex_format: wgt::VertexFormat, + max_vertex: u32, + vertex_stride: wgt::BufferAddress, + indices: Option>, + }, + Instances { + buffer: &'a A::Buffer, + }, +} + +pub struct AccelerationStructureGeometryIndices<'a, A: Api> { + pub format: wgt::IndexFormat, + pub buffer: &'a A::Buffer, +} + +bitflags!( + pub struct AccelerationStructureBuildFlags: u32 { + const PREFER_FAST_TRACE = 1 << 0; + const PREFER_FAST_BUILD = 1 << 1; + const ALLOW_UPDATE = 1 << 2; + const LOW_MEMORY = 1 << 3; + } +); diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs index 49337ee7ea..6048aca0f4 100644 --- a/wgpu-hal/src/metal/command.rs +++ b/wgpu-hal/src/metal/command.rs @@ -962,4 +962,11 @@ impl crate::CommandEncoder for super::CommandEncoder { let encoder = self.state.compute.as_ref().unwrap(); encoder.dispatch_thread_groups_indirect(&buffer.raw, offset, self.state.raw_wg_size); } + + unsafe fn build_acceleration_structures( + &mut self, + _desc: &crate::BuildAccelerationStructureDescriptor, + ) { + unimplemented!() + } } diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs index 81b9461f87..a18cc3d1f3 100644 --- a/wgpu-hal/src/metal/device.rs +++ b/wgpu-hal/src/metal/device.rs @@ -595,6 +595,7 @@ impl crate::Device for super::Device { wgt::StorageTextureAccess::ReadWrite => true, }; } + wgt::BindingType::AccelerationStructure => unimplemented!(), } let br = naga::ResourceBinding { @@ -755,6 +756,7 @@ impl crate::Device for super::Device { ); counter.textures += size; } + wgt::BindingType::AccelerationStructure => unimplemented!(), } } } @@ -1123,4 +1125,32 @@ impl crate::Device for super::Device { } shared_capture_manager.stop_capture(); } + + unsafe fn get_acceleration_structure_build_sizes( + &self, + _desc: &crate::GetAccelerationStructureBuildSizesDescriptor, + ) -> crate::AccelerationStructureBuildSizes { + unimplemented!() + } + + unsafe fn get_acceleration_structure_device_address( + &self, + _acceleration_structure: &super::AccelerationStructure, + ) -> wgt::BufferAddress { + unimplemented!() + } + + unsafe fn create_acceleration_structure( + &self, + _desc: &crate::AccelerationStructureDescriptor, + ) -> Result { + unimplemented!() + } + + unsafe fn destroy_acceleration_structure( + &self, + _acceleration_structure: super::AccelerationStructure, + ) { + unimplemented!() + } } diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs index d4ba2d14cf..7ca548f6db 100644 --- a/wgpu-hal/src/metal/mod.rs +++ b/wgpu-hal/src/metal/mod.rs @@ -59,6 +59,8 @@ impl crate::Api for Api { type ShaderModule = ShaderModule; type RenderPipeline = RenderPipeline; type ComputePipeline = ComputePipeline; + + type AccelerationStructure = AccelerationStructure; } pub struct Instance { @@ -734,3 +736,6 @@ pub struct CommandBuffer { unsafe impl Send for CommandBuffer {} unsafe impl Sync for CommandBuffer {} + +#[derive(Debug)] +pub struct AccelerationStructure; diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index 0a3afb690e..025eb4cb4b 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -27,6 +27,9 @@ pub struct PhysicalDeviceFeatures { vk::PhysicalDeviceShaderFloat16Int8Features, vk::PhysicalDevice16BitStorageFeatures, )>, + acceleration_structure: Option, + buffer_device_address: Option, + ray_query: Option, } // This is safe because the structs have `p_next: *mut c_void`, which we null out/never read. @@ -65,6 +68,15 @@ impl PhysicalDeviceFeatures { info = info.push_next(f16_i8_feature); info = info.push_next(_16bit_feature); } + if let Some(ref mut feature) = self.acceleration_structure { + info = info.push_next(feature); + } + if let Some(ref mut feature) = self.buffer_device_address { + info = info.push_next(feature); + } + if let Some(ref mut feature) = self.ray_query { + info = info.push_next(feature); + } info } @@ -295,6 +307,37 @@ impl PhysicalDeviceFeatures { } else { None }, + acceleration_structure: if enabled_extensions + .contains(&vk::KhrAccelerationStructureFn::name()) + { + Some( + vk::PhysicalDeviceAccelerationStructureFeaturesKHR::builder() + .acceleration_structure(true) + .build(), + ) + } else { + None + }, + buffer_device_address: if enabled_extensions + .contains(&vk::KhrBufferDeviceAddressFn::name()) + { + Some( + vk::PhysicalDeviceBufferDeviceAddressFeaturesKHR::builder() + .buffer_device_address(true) + .build(), + ) + } else { + None + }, + ray_query: if enabled_extensions.contains(&vk::KhrRayQueryFn::name()) { + Some( + vk::PhysicalDeviceRayQueryFeaturesKHR::builder() + .ray_query(true) + .build(), + ) + } else { + None + }, } } @@ -492,6 +535,14 @@ impl PhysicalDeviceFeatures { ), ); + features.set( + F::RAY_TRACING, + caps.supports_extension(vk::KhrDeferredHostOperationsFn::name()) + && caps.supports_extension(vk::KhrAccelerationStructureFn::name()) + && caps.supports_extension(vk::KhrBufferDeviceAddressFn::name()) + && caps.supports_extension(vk::KhrRayQueryFn::name()), + ); + (features, dl_flags) } @@ -506,11 +557,12 @@ impl PhysicalDeviceFeatures { } /// Information gathered about a physical device capabilities. -#[derive(Default)] +#[derive(Default, Debug)] pub struct PhysicalDeviceCapabilities { supported_extensions: Vec, properties: vk::PhysicalDeviceProperties, descriptor_indexing: Option, + acceleration_structure: Option, } // This is safe because the structs have `p_next: *mut c_void`, which we null out/never read. @@ -579,6 +631,13 @@ impl PhysicalDeviceCapabilities { extensions.push(vk::KhrDrawIndirectCountFn::name()); } + if requested_features.contains(wgt::Features::RAY_TRACING) { + extensions.push(vk::KhrDeferredHostOperationsFn::name()); + extensions.push(vk::KhrAccelerationStructureFn::name()); + extensions.push(vk::KhrBufferDeviceAddressFn::name()); + extensions.push(vk::KhrRayQueryFn::name()); + } + if requested_features.contains(wgt::Features::CONSERVATIVE_RASTERIZATION) { extensions.push(vk::ExtConservativeRasterizationFn::name()); } @@ -737,6 +796,9 @@ impl super::InstanceShared { let supports_descriptor_indexing = capabilities.supports_extension(vk::ExtDescriptorIndexingFn::name()); + let supports_acceleration_structure = + capabilities.supports_extension(vk::KhrAccelerationStructureFn::name()); + let mut builder = vk::PhysicalDeviceProperties2::builder(); if supports_descriptor_indexing { @@ -746,6 +808,13 @@ impl super::InstanceShared { builder = builder.push_next(next); } + if supports_acceleration_structure { + let next = capabilities + .acceleration_structure + .insert(vk::PhysicalDeviceAccelerationStructurePropertiesKHR::default()); + builder = builder.push_next(next); + } + let mut properties2 = builder.build(); unsafe { get_device_properties.get_physical_device_properties2(phd, &mut properties2); @@ -831,6 +900,12 @@ impl super::InstanceShared { builder = builder.push_next(&mut next.0); builder = builder.push_next(&mut next.1); } + if capabilities.supports_extension(vk::KhrAccelerationStructureFn::name()) { + let next = features + .acceleration_structure + .insert(vk::PhysicalDeviceAccelerationStructureFeaturesKHR::default()); + builder = builder.push_next(next); + } let mut features2 = builder.build(); unsafe { @@ -1098,6 +1173,22 @@ impl super::Adapter { } else { None }; + let ray_tracing_fns = if enabled_extensions.contains(&khr::AccelerationStructure::name()) + && enabled_extensions.contains(&khr::BufferDeviceAddress::name()) + { + Some(super::RayTracingDeviceExtensionFunctions { + acceleration_structure: khr::AccelerationStructure::new( + &self.instance.raw, + &raw_device, + ), + buffer_device_address: khr::BufferDeviceAddress::new( + &self.instance.raw, + &raw_device, + ), + }) + } else { + None + }; let naga_options = { use naga::back::spv; @@ -1190,6 +1281,7 @@ impl super::Adapter { extension_fns: super::DeviceExtensionFunctions { draw_indirect_count: indirect_count_fn, timeline_semaphore: timeline_semaphore_fn, + ray_tracing: ray_tracing_fns, }, vendor_id: self.phd_capabilities.properties.vendor_id, timestamp_period: self.phd_capabilities.properties.limits.timestamp_period, @@ -1238,7 +1330,8 @@ impl super::Adapter { size: memory_heap.size, }) .collect(), - buffer_device_address: false, + buffer_device_address: enabled_extensions + .contains(&khr::BufferDeviceAddress::name()), }; gpu_alloc::GpuAllocator::new(config, properties) }; diff --git a/wgpu-hal/src/vulkan/command.rs b/wgpu-hal/src/vulkan/command.rs index e225ca8356..02768f3c74 100644 --- a/wgpu-hal/src/vulkan/command.rs +++ b/wgpu-hal/src/vulkan/command.rs @@ -341,6 +341,115 @@ impl crate::CommandEncoder for super::CommandEncoder { ); } + unsafe fn build_acceleration_structures( + &mut self, + desc: &crate::BuildAccelerationStructureDescriptor, + ) { + let ray_tracing_functions = match self.device.extension_fns.ray_tracing { + Some(ref functions) => functions, + None => panic!("Feature `RAY_TRACING` not enabled"), + }; + + let geometry = match *desc.geometry { + crate::AccelerationStructureGeometry::Instances { buffer } => { + let instances = vk::AccelerationStructureGeometryInstancesDataKHR::builder().data( + vk::DeviceOrHostAddressConstKHR { + device_address: ray_tracing_functions + .buffer_device_address + .get_buffer_device_address( + &vk::BufferDeviceAddressInfo::builder().buffer(buffer.raw), + ), + }, + ); + + vk::AccelerationStructureGeometryKHR::builder() + .geometry_type(vk::GeometryTypeKHR::INSTANCES) + .geometry(vk::AccelerationStructureGeometryDataKHR { + instances: *instances, + }) + .flags(vk::GeometryFlagsKHR::empty()) + } + crate::AccelerationStructureGeometry::Triangles { + vertex_buffer, + vertex_format, + max_vertex, + vertex_stride, + ref indices, + } => { + let mut triangles_data = + vk::AccelerationStructureGeometryTrianglesDataKHR::builder() + .vertex_data(vk::DeviceOrHostAddressConstKHR { + device_address: ray_tracing_functions + .buffer_device_address + .get_buffer_device_address( + &vk::BufferDeviceAddressInfo::builder() + .buffer(vertex_buffer.raw), + ), + }) + .vertex_format(conv::map_vertex_format(vertex_format)) + .vertex_stride(vertex_stride) + .max_vertex(max_vertex); + + if let Some(ref indices) = *indices { + triangles_data = triangles_data + .index_type(conv::map_index_format(indices.format)) + .index_data(vk::DeviceOrHostAddressConstKHR { + device_address: ray_tracing_functions + .buffer_device_address + .get_buffer_device_address( + &vk::BufferDeviceAddressInfo::builder() + .buffer(indices.buffer.raw), + ), + }) + } + + let triangles_data = triangles_data.build(); + + vk::AccelerationStructureGeometryKHR::builder() + .geometry_type(vk::GeometryTypeKHR::TRIANGLES) + .geometry(vk::AccelerationStructureGeometryDataKHR { + triangles: triangles_data, + }) + .flags(vk::GeometryFlagsKHR::empty()) + } + }; + + let geometries = &[*geometry]; + + let range = vk::AccelerationStructureBuildRangeInfoKHR::builder() + .primitive_count(desc.primitive_count) + .primitive_offset(desc.primitive_offset) + .build(); + + let mut geometry_info = vk::AccelerationStructureBuildGeometryInfoKHR::builder() + .ty(conv::map_acceleration_structure_format(desc.format)) + .mode(conv::map_acceleration_structure_build_mode(desc.mode)) + .flags(conv::map_acceleration_structure_flags(desc.flags)) + .geometries(geometries) + .dst_acceleration_structure(desc.destination_acceleration_structure.raw) + .scratch_data(vk::DeviceOrHostAddressKHR { + device_address: ray_tracing_functions + .buffer_device_address + .get_buffer_device_address( + &vk::BufferDeviceAddressInfo::builder().buffer(desc.scratch_buffer.raw), + ), + }); + + if desc.mode == crate::AccelerationStructureBuildMode::Update { + geometry_info.src_acceleration_structure = desc.destination_acceleration_structure.raw; + } + + let geometry_info = geometry_info.build(); + + let range = &[range][..]; + let range = &[range][..]; + let geometry_info = &[geometry_info]; + + ray_tracing_functions + .acceleration_structure + .cmd_build_acceleration_structures(self.active, geometry_info, range); + } + // render unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor) { diff --git a/wgpu-hal/src/vulkan/conv.rs b/wgpu-hal/src/vulkan/conv.rs index dc5b915970..7677a2ab56 100644 --- a/wgpu-hal/src/vulkan/conv.rs +++ b/wgpu-hal/src/vulkan/conv.rs @@ -491,6 +491,16 @@ pub fn map_buffer_usage(usage: crate::BufferUses) -> vk::BufferUsageFlags { if usage.contains(crate::BufferUses::INDIRECT) { flags |= vk::BufferUsageFlags::INDIRECT_BUFFER; } + if usage.contains(crate::BufferUses::ACCELERATION_STRUCTURE_SCRATCH) { + flags |= vk::BufferUsageFlags::STORAGE_BUFFER | vk::BufferUsageFlags::SHADER_DEVICE_ADDRESS; + } + if usage.intersects( + crate::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT + | crate::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT, + ) { + flags |= vk::BufferUsageFlags::ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_KHR + | vk::BufferUsageFlags::SHADER_DEVICE_ADDRESS; + } flags } @@ -543,6 +553,15 @@ pub fn map_buffer_usage_to_barrier( stages |= vk::PipelineStageFlags::DRAW_INDIRECT; access |= vk::AccessFlags::INDIRECT_COMMAND_READ; } + if usage.intersects( + crate::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT + | crate::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT + | crate::BufferUses::ACCELERATION_STRUCTURE_SCRATCH, + ) { + stages |= vk::PipelineStageFlags::ACCELERATION_STRUCTURE_BUILD_KHR; + access |= vk::AccessFlags::ACCELERATION_STRUCTURE_READ_KHR + | vk::AccessFlags::ACCELERATION_STRUCTURE_WRITE_KHR; + } (stages, access) } @@ -696,6 +715,7 @@ pub fn map_binding_type(ty: wgt::BindingType) -> vk::DescriptorType { wgt::BindingType::Sampler { .. } => vk::DescriptorType::SAMPLER, wgt::BindingType::Texture { .. } => vk::DescriptorType::SAMPLED_IMAGE, wgt::BindingType::StorageTexture { .. } => vk::DescriptorType::STORAGE_IMAGE, + wgt::BindingType::AccelerationStructure => vk::DescriptorType::ACCELERATION_STRUCTURE_KHR, } } @@ -823,3 +843,51 @@ pub fn map_pipeline_statistics( } flags } + +pub fn map_acceleration_structure_format( + format: crate::AccelerationStructureFormat, +) -> vk::AccelerationStructureTypeKHR { + match format { + crate::AccelerationStructureFormat::TopLevel => vk::AccelerationStructureTypeKHR::TOP_LEVEL, + crate::AccelerationStructureFormat::BottomLevel => { + vk::AccelerationStructureTypeKHR::BOTTOM_LEVEL + } + } +} + +pub fn map_acceleration_structure_build_mode( + format: crate::AccelerationStructureBuildMode, +) -> vk::BuildAccelerationStructureModeKHR { + match format { + crate::AccelerationStructureBuildMode::Build => { + vk::BuildAccelerationStructureModeKHR::BUILD + } + crate::AccelerationStructureBuildMode::Update => { + vk::BuildAccelerationStructureModeKHR::UPDATE + } + } +} + +pub fn map_acceleration_structure_flags( + flags: crate::AccelerationStructureBuildFlags, +) -> vk::BuildAccelerationStructureFlagsKHR { + let mut vk_flags = vk::BuildAccelerationStructureFlagsKHR::empty(); + + if flags.contains(crate::AccelerationStructureBuildFlags::PREFER_FAST_TRACE) { + vk_flags |= vk::BuildAccelerationStructureFlagsKHR::PREFER_FAST_TRACE; + } + + if flags.contains(crate::AccelerationStructureBuildFlags::PREFER_FAST_BUILD) { + vk_flags |= vk::BuildAccelerationStructureFlagsKHR::PREFER_FAST_BUILD; + } + + if flags.contains(crate::AccelerationStructureBuildFlags::ALLOW_UPDATE) { + vk_flags |= vk::BuildAccelerationStructureFlagsKHR::ALLOW_UPDATE; + } + + if flags.contains(crate::AccelerationStructureBuildFlags::LOW_MEMORY) { + vk_flags |= vk::BuildAccelerationStructureFlagsKHR::LOW_MEMORY; + } + + vk_flags +} diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs index b9d74e36e3..afff881b18 100644 --- a/wgpu-hal/src/vulkan/device.rs +++ b/wgpu-hal/src/vulkan/device.rs @@ -788,7 +788,14 @@ impl crate::Device for super::Device { &*self.shared, gpu_alloc::Request { size: req.size, - align_mask: req.alignment - 1, + align_mask: if desc + .usage + .contains(crate::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT) + { + 16 + } else { + req.alignment + } - 1, usage: alloc_usage, memory_types: req.memory_type_bits & self.valid_ash_memory_types, }, @@ -1136,6 +1143,9 @@ impl crate::Device for super::Device { wgt::BindingType::StorageTexture { .. } => { desc_count.storage_image += count; } + wgt::BindingType::AccelerationStructure => { + desc_count.acceleration_structure += count; + } } } @@ -1334,6 +1344,10 @@ impl crate::Device for super::Device { let mut buffer_infos = Vec::with_capacity(desc.buffers.len()); let mut sampler_infos = Vec::with_capacity(desc.samplers.len()); let mut image_infos = Vec::with_capacity(desc.textures.len()); + let mut acceleration_structure_infos = + Vec::with_capacity(desc.acceleration_structures.len()); + let mut raw_acceleration_structures = + Vec::with_capacity(desc.acceleration_structures.len()); for entry in desc.entries { let (ty, size) = desc.layout.types[entry.binding as usize]; if size == 0 { @@ -1343,6 +1357,9 @@ impl crate::Device for super::Device { .dst_set(*set.raw()) .dst_binding(entry.binding) .descriptor_type(ty); + + let mut extra_descriptor_count = 0; + write = match ty { vk::DescriptorType::SAMPLER => { let index = sampler_infos.len(); @@ -1391,9 +1408,40 @@ impl crate::Device for super::Device { )); write.buffer_info(&buffer_infos[index..]) } + vk::DescriptorType::ACCELERATION_STRUCTURE_KHR => { + let index = acceleration_structure_infos.len(); + let start = entry.resource_index; + let end = start + entry.count; + + let raw_start = raw_acceleration_structures.len(); + + raw_acceleration_structures.extend( + desc.acceleration_structures[start as usize..end as usize] + .iter() + .map(|acceleration_structure| acceleration_structure.raw), + ); + + let acceleration_structure_info = + vk::WriteDescriptorSetAccelerationStructureKHR::builder() + .acceleration_structures(&raw_acceleration_structures[raw_start..]); + + // todo: Dereference the struct to get around lifetime issues. Safe as long as we never resize + // `raw_acceleration_structures`. + let acceleration_structure_info: vk::WriteDescriptorSetAccelerationStructureKHR = *acceleration_structure_info; + + acceleration_structure_infos.push(acceleration_structure_info); + + extra_descriptor_count += 1; + + write.push_next(&mut acceleration_structure_infos[index]) + } _ => unreachable!(), }; - writes.push(write.build()); + + let mut write = write.build(); + write.descriptor_count += extra_descriptor_count; + + writes.push(write); } self.shared.raw.update_descriptor_sets(&writes, &[]); @@ -1903,6 +1951,171 @@ impl crate::Device for super::Device { .end_frame_capture(raw_vk_instance_dispatch_table, ptr::null_mut()) } } + + unsafe fn get_acceleration_structure_build_sizes( + &self, + desc: &crate::GetAccelerationStructureBuildSizesDescriptor, + ) -> crate::AccelerationStructureBuildSizes { + let ray_tracing_functions = match self.shared.extension_fns.ray_tracing { + Some(ref functions) => functions, + None => panic!("Feature `RAY_TRACING` not enabled"), + }; + + let geometry = match desc.geometry_info { + crate::AccelerationStructureGeometryInfo::Instances => { + let instances_data = vk::AccelerationStructureGeometryInstancesDataKHR::builder(); + + vk::AccelerationStructureGeometryKHR::builder() + .geometry_type(vk::GeometryTypeKHR::INSTANCES) + .geometry(vk::AccelerationStructureGeometryDataKHR { + instances: *instances_data, + }) + .flags(vk::GeometryFlagsKHR::empty()) + } + crate::AccelerationStructureGeometryInfo::Triangles { + vertex_format, + max_vertex, + index_format, + } => { + let mut triangles_data = + vk::AccelerationStructureGeometryTrianglesDataKHR::builder() + .vertex_format(conv::map_vertex_format(vertex_format)) + .max_vertex(max_vertex); + + if let Some(index_format) = index_format { + triangles_data = + triangles_data.index_type(conv::map_index_format(index_format)); + } + + vk::AccelerationStructureGeometryKHR::builder() + .geometry_type(vk::GeometryTypeKHR::TRIANGLES) + .geometry(vk::AccelerationStructureGeometryDataKHR { + triangles: *triangles_data, + }) + .flags(vk::GeometryFlagsKHR::empty()) + } + }; + + let geometries = &[*geometry]; + + let geometry_info = vk::AccelerationStructureBuildGeometryInfoKHR::builder() + .ty(conv::map_acceleration_structure_format(desc.format)) + .mode(conv::map_acceleration_structure_build_mode(desc.mode)) + .flags(conv::map_acceleration_structure_flags(desc.flags)) + .geometries(geometries); + + let raw = ray_tracing_functions + .acceleration_structure + .get_acceleration_structure_build_sizes( + vk::AccelerationStructureBuildTypeKHR::DEVICE, + &geometry_info, + &[desc.primitive_count], + ); + + crate::AccelerationStructureBuildSizes { + acceleration_structure_size: raw.acceleration_structure_size, + update_scratch_size: raw.update_scratch_size, + build_scratch_size: raw.build_scratch_size, + } + } + + unsafe fn get_acceleration_structure_device_address( + &self, + acceleration_structure: &super::AccelerationStructure, + ) -> wgt::BufferAddress { + let ray_tracing_functions = match self.shared.extension_fns.ray_tracing { + Some(ref functions) => functions, + None => panic!("Feature `RAY_TRACING` not enabled"), + }; + + ray_tracing_functions + .acceleration_structure + .get_acceleration_structure_device_address( + &vk::AccelerationStructureDeviceAddressInfoKHR::builder() + .acceleration_structure(acceleration_structure.raw), + ) + } + + unsafe fn create_acceleration_structure( + &self, + desc: &crate::AccelerationStructureDescriptor, + ) -> Result { + let ray_tracing_functions = match self.shared.extension_fns.ray_tracing { + Some(ref functions) => functions, + None => panic!("Feature `RAY_TRACING` not enabled"), + }; + + let vk_buffer_info = vk::BufferCreateInfo::builder() + .size(desc.size) + .usage(vk::BufferUsageFlags::ACCELERATION_STRUCTURE_STORAGE_KHR) + .sharing_mode(vk::SharingMode::EXCLUSIVE); + + let raw_buffer = self.shared.raw.create_buffer(&vk_buffer_info, None)?; + let req = self.shared.raw.get_buffer_memory_requirements(raw_buffer); + + let block = self.mem_allocator.lock().alloc( + &*self.shared, + gpu_alloc::Request { + size: req.size, + align_mask: req.alignment - 1, + usage: gpu_alloc::UsageFlags::FAST_DEVICE_ACCESS, + memory_types: req.memory_type_bits & self.valid_ash_memory_types, + }, + )?; + + self.shared + .raw + .bind_buffer_memory(raw_buffer, *block.memory(), block.offset())?; + + if let Some(label) = desc.label { + self.shared + .set_object_name(vk::ObjectType::BUFFER, raw_buffer, label); + } + + let vk_info = vk::AccelerationStructureCreateInfoKHR::builder() + .buffer(raw_buffer) + .offset(0) + .size(desc.size) + .ty(conv::map_acceleration_structure_format(desc.format)); + + let raw_acceleration_structure = ray_tracing_functions + .acceleration_structure + .create_acceleration_structure(&vk_info, None)?; + + if let Some(label) = desc.label { + self.shared.set_object_name( + vk::ObjectType::ACCELERATION_STRUCTURE_KHR, + raw_acceleration_structure, + label, + ); + } + + Ok(super::AccelerationStructure { + raw: raw_acceleration_structure, + buffer: raw_buffer, + block: Mutex::new(block), + }) + } + + unsafe fn destroy_acceleration_structure( + &self, + acceleration_structure: super::AccelerationStructure, + ) { + let ray_tracing_functions = match self.shared.extension_fns.ray_tracing { + Some(ref functions) => functions, + None => panic!("Feature `RAY_TRACING` not enabled"), + }; + + ray_tracing_functions + .acceleration_structure + .destroy_acceleration_structure(acceleration_structure.raw, None); + self.shared + .raw + .destroy_buffer(acceleration_structure.buffer, None); + self.mem_allocator + .lock() + .dealloc(&*self.shared, acceleration_structure.block.into_inner()); + } } impl From for crate::DeviceError { diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs index d3416a50ed..c552a6179b 100644 --- a/wgpu-hal/src/vulkan/mod.rs +++ b/wgpu-hal/src/vulkan/mod.rs @@ -65,6 +65,7 @@ impl crate::Api for Api { type Sampler = Sampler; type QuerySet = QuerySet; type Fence = Fence; + type AccelerationStructure = AccelerationStructure; type BindGroupLayout = BindGroupLayout; type BindGroup = BindGroup; @@ -147,6 +148,12 @@ enum ExtensionFn { struct DeviceExtensionFunctions { draw_indirect_count: Option, timeline_semaphore: Option>, + ray_tracing: Option, +} + +struct RayTracingDeviceExtensionFunctions { + acceleration_structure: khr::AccelerationStructure, + buffer_device_address: khr::BufferDeviceAddress, } /// Set of internal capabilities, which don't show up in the exposed @@ -344,6 +351,13 @@ pub struct Buffer { block: Mutex>, } +#[derive(Debug)] +pub struct AccelerationStructure { + raw: vk::AccelerationStructureKHR, + buffer: vk::Buffer, + block: Mutex>, +} + #[derive(Debug)] pub struct Texture { raw: vk::Image, diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index 8718d79794..b33702e6ec 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -638,6 +638,14 @@ bitflags::bitflags! { /// - DX12 /// - Metal (Intel and AMD GPUs) const WRITE_TIMESTAMP_INSIDE_PASSES = 1 << 41; + + /// Allows for the creation of ray-tracing acceleration structures and ray queries within shaders. + /// + /// Supported platforms: + /// - Vulkan + /// + /// This is a native-only feature. + const RAY_TRACING = 1 << 42; } } @@ -4040,6 +4048,15 @@ pub enum BindingType { /// Dimension of the texture view that is going to be sampled. view_dimension: TextureViewDimension, }, + + /// A ray-tracing acceleration structure binding. + /// + /// Example GLSL syntax: + /// ```cpp,ignore + /// layout(binding = 0) + /// uniform accelerationStructureEXT as; + /// ``` + AccelerationStructure, } impl BindingType { diff --git a/wgpu/src/backend/web.rs b/wgpu/src/backend/web.rs index fcd6c89b78..15b1cd9dcd 100644 --- a/wgpu/src/backend/web.rs +++ b/wgpu/src/backend/web.rs @@ -1445,6 +1445,7 @@ impl crate::Context for Context { storage_texture.view_dimension(map_texture_view_dimension(view_dimension)); mapped_entry.storage_texture(&storage_texture); } + wgt::BindingType::AccelerationStructure => todo!(), } mapped_entry