From 53ff5df5767343f1578f9c9bb93946f770e36b01 Mon Sep 17 00:00:00 2001 From: FL33TW00D Date: Tue, 28 Mar 2023 19:51:58 +0100 Subject: [PATCH 01/45] feat: timestamp query on metal --- Cargo.lock | 13 +++-- Cargo.toml | 4 +- wgpu-core/src/command/compute.rs | 73 +++++++++++++++++++++++++++-- wgpu-hal/Cargo.toml | 2 +- wgpu-hal/src/empty.rs | 2 +- wgpu-hal/src/gles/command.rs | 2 +- wgpu-hal/src/lib.rs | 19 +++++++- wgpu-hal/src/metal/adapter.rs | 6 ++- wgpu-hal/src/metal/command.rs | 61 +++++++++++++++++++----- wgpu-hal/src/metal/device.rs | 37 ++++++++++++++- wgpu-hal/src/metal/mod.rs | 21 ++++++++- wgpu/Cargo.toml | 2 + wgpu/examples/hello-compute/main.rs | 63 +++++++++++++++++++++++-- wgpu/src/backend/direct.rs | 18 +++++++ wgpu/src/lib.rs | 35 +++++++++++++- 15 files changed, 322 insertions(+), 36 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f87a460231..86d583b26d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1495,16 +1495,15 @@ dependencies = [ [[package]] name = "metal" -version = "0.24.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de11355d1f6781482d027a3b4d4de7825dcedb197bf573e0596d00008402d060" +version = "0.25.0" dependencies = [ "bitflags 1.3.2", "block", "core-graphics-types", - "foreign-types 0.3.2", + "foreign-types 0.5.0", "log", "objc", + "paste", ] [[package]] @@ -1841,6 +1840,12 @@ dependencies = [ "windows-sys 0.42.0", ] +[[package]] +name = "paste" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f746c4065a8fa3fe23974dd82f15431cc8d40779821001404d10d2e79ca7d79" + [[package]] name = "percent-encoding" version = "2.2.0" diff --git a/Cargo.toml b/Cargo.toml index ac351a52b2..1f34cf3981 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -90,7 +90,7 @@ winit = "0.27.1" # Metal dependencies block = "0.1" foreign-types = "0.3" -metal = "0.24.0" +metal = "0.25.0" objc = "0.2.5" core-graphics-types = "0.1" @@ -147,7 +147,7 @@ wgpu-types = { path = "./wgpu-types" } #naga = { path = "../naga" } #glow = { path = "../glow" } #d3d12 = { path = "../d3d12-rs" } -#metal = { path = "../metal-rs" } +metal = { path = "../metal-rs" } #web-sys = { path = "../wasm-bindgen/crates/web-sys" } #js-sys = { path = "../wasm-bindgen/crates/js-sys" } #wasm-bindgen = { path = "../wasm-bindgen" } diff --git a/wgpu-core/src/command/compute.rs b/wgpu-core/src/command/compute.rs index d5b514f194..069a4f8b6c 100644 --- a/wgpu-core/src/command/compute.rs +++ b/wgpu-core/src/command/compute.rs @@ -22,9 +22,14 @@ use crate::{ }; use hal::CommandEncoder as _; +#[cfg(any(feature = "serial-pass", feature = "replay"))] +use serde::Deserialize; +#[cfg(any(feature = "serial-pass", feature = "trace"))] +use serde::Serialize; + use thiserror::Error; -use std::{fmt, mem, str}; +use std::{borrow::Cow, fmt, mem, str}; #[doc(hidden)] #[derive(Clone, Copy, Debug)] @@ -90,6 +95,7 @@ pub enum ComputeCommand { pub struct ComputePass { base: BasePass, parent_id: id::CommandEncoderId, + timestamp_writes: Vec, // Resource binding dedupe state. #[cfg_attr(feature = "serial-pass", serde(skip))] @@ -103,6 +109,7 @@ impl ComputePass { Self { base: BasePass::new(&desc.label), parent_id, + timestamp_writes: desc.timestamp_writes.iter().cloned().collect(), current_bind_groups: BindGroupStateChange::new(), current_pipeline: StateChange::new(), @@ -131,9 +138,36 @@ impl fmt::Debug for ComputePass { } } +/// Location to write a timestamp to (beginning or end of the pass). +#[repr(C)] +#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)] +#[cfg_attr(any(feature = "serial-pass", feature = "trace"), derive(Serialize))] +#[cfg_attr(any(feature = "serial-pass", feature = "replay"), derive(Deserialize))] +#[cfg_attr(feature = "serde", serde(rename_all = "kebab-case"))] +pub enum ComputePassTimestampLocation { + Beginning = 0, + End = 1, +} + +/// Describes the writing of a single timestamp value. +#[repr(C)] +#[derive(Clone, Debug, PartialEq)] +#[cfg_attr(any(feature = "serial-pass", feature = "trace"), derive(Serialize))] +#[cfg_attr(any(feature = "serial-pass", feature = "replay"), derive(Deserialize))] +pub struct ComputePassTimestampWrite { + /// The query set to write the timestamp to. + pub query_set: id::QuerySetId, + /// The index of the query within the query set to write the timestamp to. + pub query_index: u32, + /// The location of the timestamp + pub location: ComputePassTimestampLocation, +} + #[derive(Clone, Debug, Default)] pub struct ComputePassDescriptor<'a> { pub label: Label<'a>, + /// Defines where and when timestamp values will be written for this pass. + pub timestamp_writes: Cow<'a, [ComputePassTimestampWrite]>, } #[derive(Clone, Debug, Error, Eq, PartialEq)] @@ -321,7 +355,11 @@ impl Global { encoder_id: id::CommandEncoderId, pass: &ComputePass, ) -> Result<(), ComputePassError> { - self.command_encoder_run_compute_pass_impl::(encoder_id, pass.base.as_ref()) + self.command_encoder_run_compute_pass_impl::( + encoder_id, + pass.base.as_ref(), + &pass.timestamp_writes, + ) } #[doc(hidden)] @@ -329,6 +367,7 @@ impl Global { &self, encoder_id: id::CommandEncoderId, base: BasePassRef, + timestamp_writes: &[ComputePassTimestampWrite], ) -> Result<(), ComputePassError> { profiling::scope!("CommandEncoder::run_compute_pass"); let init_scope = PassErrorScope::Pass(encoder_id); @@ -376,6 +415,30 @@ impl Global { let mut string_offset = 0; let mut active_query = None; + let hal_timestamp_writes = timestamp_writes + .iter() + .map(|tw| { + let query_set: &resource::QuerySet = cmd_buf + .trackers + .query_sets + .add_single(&*query_set_guard, tw.query_set) + .ok_or(ComputePassErrorInner::InvalidQuerySet(tw.query_set)) + .map_pass_err(init_scope) + .unwrap(); + + hal::ComputePassTimestampWrite { + query_set: &query_set.raw, + query_index: tw.query_index, + location: match tw.location { + ComputePassTimestampLocation::Beginning => { + hal::ComputePassTimestampLocation::BEGINNING + } + ComputePassTimestampLocation::End => hal::ComputePassTimestampLocation::END, + }, + } + }) + .collect::>(); + cmd_buf.trackers.set_size( Some(&*buffer_guard), Some(&*texture_guard), @@ -388,7 +451,11 @@ impl Global { Some(&*query_set_guard), ); - let hal_desc = hal::ComputePassDescriptor { label: base.label }; + let hal_desc = hal::ComputePassDescriptor { + label: base.label, + timestamp_writes: &hal_timestamp_writes, + }; + unsafe { raw.begin_compute_pass(&hal_desc); } diff --git a/wgpu-hal/Cargo.toml b/wgpu-hal/Cargo.toml index 495c241c6e..49e005ca1d 100644 --- a/wgpu-hal/Cargo.toml +++ b/wgpu-hal/Cargo.toml @@ -102,7 +102,7 @@ d3d12 = { version = "0.6.0", git = "https://github.com/gfx-rs/d3d12-rs", rev = " block = { version = "0.1", optional = true } foreign-types = { version = "0.3", optional = true } -metal = "0.24.0" +metal = "0.25.0" objc = "0.2.5" core-graphics-types = "0.1" diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs index 1497acad91..b72ea0aade 100644 --- a/wgpu-hal/src/empty.rs +++ b/wgpu-hal/src/empty.rs @@ -403,7 +403,7 @@ impl crate::CommandEncoder for Encoder { // compute - unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor) {} + unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor) {} unsafe fn end_compute_pass(&mut self) {} unsafe fn set_compute_pipeline(&mut self, pipeline: &Resource) {} diff --git a/wgpu-hal/src/gles/command.rs b/wgpu-hal/src/gles/command.rs index 5e3a1c52c8..7450523304 100644 --- a/wgpu-hal/src/gles/command.rs +++ b/wgpu-hal/src/gles/command.rs @@ -1033,7 +1033,7 @@ impl crate::CommandEncoder for super::CommandEncoder { // compute - unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor) { + unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor) { if let Some(label) = desc.label { let range = self.cmd_buffer.add_marker(label); self.cmd_buffer.commands.push(C::PushDebugGroup(range)); diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index 1758149380..51539e096a 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -541,7 +541,7 @@ pub trait CommandEncoder: Send + Sync + fmt::Debug { // compute passes // Begins a compute pass, clears all active bindings. - unsafe fn begin_compute_pass(&mut self, desc: &ComputePassDescriptor); + unsafe fn begin_compute_pass(&mut self, desc: &ComputePassDescriptor); unsafe fn end_compute_pass(&mut self); unsafe fn set_compute_pipeline(&mut self, pipeline: &A::ComputePipeline); @@ -1271,9 +1271,24 @@ pub struct RenderPassDescriptor<'a, A: Api> { pub multiview: Option, } +bitflags!( + pub struct ComputePassTimestampLocation: u8 { + const BEGINNING = 1 << 0; + const END = 1 << 1; + } +); + +#[derive(Clone, Debug)] +pub struct ComputePassTimestampWrite<'a, A: Api> { + pub query_set: &'a A::QuerySet, + pub query_index: u32, + pub location: ComputePassTimestampLocation, +} + #[derive(Clone, Debug)] -pub struct ComputePassDescriptor<'a> { +pub struct ComputePassDescriptor<'a, A: Api> { pub label: Label<'a>, + pub timestamp_writes: &'a [ComputePassTimestampWrite<'a, A>], } /// Stores if any API validation error has occurred in this process diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs index e5c3de3417..8144d97952 100644 --- a/wgpu-hal/src/metal/adapter.rs +++ b/wgpu-hal/src/metal/adapter.rs @@ -740,6 +740,7 @@ impl super::PrivateCapabilities { } else { None }, + supports_timestamp_period: version.at_least((10, 15), (14, 0), os_is_mac), } } @@ -765,7 +766,10 @@ impl super::PrivateCapabilities { | F::TEXTURE_FORMAT_16BIT_NORM | F::SHADER_F16 | F::DEPTH32FLOAT_STENCIL8 - | F::MULTI_DRAW_INDIRECT; + | F::MULTI_DRAW_INDIRECT + | F::TIMESTAMP_QUERY; + + //TODO: if not on apple silicon, we can do timestamps within pass. features.set(F::TEXTURE_COMPRESSION_ASTC, self.format_astc); features.set(F::TEXTURE_COMPRESSION_ASTC_HDR, self.format_astc_hdr); diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs index 866e163a64..9945d3bb7a 100644 --- a/wgpu-hal/src/metal/command.rs +++ b/wgpu-hal/src/metal/command.rs @@ -1,5 +1,7 @@ +use crate::ComputePassTimestampLocation; + use super::{conv, AsNative}; -use std::{borrow::Cow, mem, ops::Range}; +use std::{borrow::Cow, collections::HashMap, mem, ops::Range}; // has to match `Temp::binding_sizes` const WORD_SIZE: usize = 4; @@ -339,14 +341,27 @@ impl crate::CommandEncoder for super::CommandEncoder { _: wgt::BufferSize, // Metal doesn't support queries that are bigger than a single element are not supported ) { let encoder = self.enter_blit(); - let size = (range.end - range.start) as u64 * crate::QUERY_SIZE; - encoder.copy_from_buffer( - &set.raw_buffer, - range.start as u64 * crate::QUERY_SIZE, - &buffer.raw, - offset, - size, - ); + match set.ty { + wgt::QueryType::Occlusion => { + let size = (range.end - range.start) as u64 * crate::QUERY_SIZE; + encoder.copy_from_buffer( + &set.raw_buffer, + range.start as u64 * crate::QUERY_SIZE, + &buffer.raw, + offset, + size, + ); + } + wgt::QueryType::Timestamp => { + encoder.resolve_counters( + &set.counter_sample_buffer.as_ref().unwrap(), + metal::NSRange::new(range.start as u64, range.end as u64), + &buffer.raw, + offset, + ); + } + wgt::QueryType::PipelineStatistics(_) => todo!(), + } } // render @@ -906,15 +921,39 @@ impl crate::CommandEncoder for super::CommandEncoder { // compute - unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor) { + unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor) { self.begin_pass(); let raw = self.raw_cmd_buf.as_ref().unwrap(); + objc::rc::autoreleasepool(|| { - let encoder = raw.new_compute_command_encoder(); + let descriptor = metal::ComputePassDescriptor::new(); + + let sba_descriptor = descriptor + .sample_buffer_attachments() + .object_at(0 as _) //TODO: move inside + .unwrap(); + for (i, at) in desc.timestamp_writes.iter().enumerate() { + //Problem here is that we can't attach the same counter sample buffer + //to the pass descriptor twice. + sba_descriptor + .set_sample_buffer(at.query_set.counter_sample_buffer.as_ref().unwrap()); + match at.location { + ComputePassTimestampLocation::BEGINNING => { + sba_descriptor.set_start_of_encoder_sample_index(at.query_index as _); + } + ComputePassTimestampLocation::END => { + sba_descriptor.set_end_of_encoder_sample_index(at.query_index as _); + } + _ => {} + } + } + + let encoder = raw.compute_command_encoder_with_descriptor(&descriptor); if let Some(label) = desc.label { encoder.set_label(label); } + self.state.compute = Some(encoder.to_owned()); }); } diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs index f8a1ad9a9f..db3b8e4ee5 100644 --- a/wgpu-hal/src/metal/device.rs +++ b/wgpu-hal/src/metal/device.rs @@ -8,6 +8,7 @@ use std::{ use super::conv; use crate::auxil::map_naga_stage; +use metal::{foreign_types::ForeignTypeRef, MTLCounterSampleBufferDescriptor}; type DeviceResult = Result; @@ -1093,11 +1094,43 @@ impl crate::Device for super::Device { } Ok(super::QuerySet { raw_buffer, + counter_sample_buffer: None, ty: desc.ty, }) } - wgt::QueryType::Timestamp | wgt::QueryType::PipelineStatistics(_) => { - Err(crate::DeviceError::OutOfMemory) + wgt::QueryType::Timestamp => { + let size = desc.count as u64 * crate::QUERY_SIZE; + let device = self.shared.device.lock(); + let destination_buffer = + device.new_buffer(size, metal::MTLResourceOptions::empty()); + + let csb_desc = metal::CounterSampleBufferDescriptor::new(); + csb_desc.set_storage_mode(metal::MTLStorageMode::Shared); + csb_desc.set_sample_count(desc.count as _); + if let Some(label) = desc.label { + csb_desc.set_label(label); + } + + let counter_sets = device.counter_sets(); + let timestamp_counter = counter_sets + .iter() + .find(|cs| cs.name() == "timestamp") + //TODO: better error type? + .ok_or(crate::DeviceError::OutOfMemory)?; + csb_desc.set_counter_set(×tamp_counter); + + let counter_sample_buffer = device + .new_counter_sample_buffer_with_descriptor(&csb_desc) + .map_err(|e| crate::DeviceError::OutOfMemory)?; + + Ok(super::QuerySet { + raw_buffer: destination_buffer, + counter_sample_buffer: Some(counter_sample_buffer), + ty: desc.ty, + }) + } + _ => { + todo!() } } }) diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs index b77685bd94..d6316b9556 100644 --- a/wgpu-hal/src/metal/mod.rs +++ b/wgpu-hal/src/metal/mod.rs @@ -29,6 +29,7 @@ use std::{ use arrayvec::ArrayVec; use foreign_types::ForeignTypeRef as _; +use metal::foreign_types::ForeignTypeRef as _; use parking_lot::Mutex; #[derive(Clone)] @@ -232,6 +233,7 @@ struct PrivateCapabilities { supports_preserve_invariance: bool, supports_shader_primitive_index: bool, has_unified_memory: Option, + supports_timestamp_period: bool, } #[derive(Clone, Debug)] @@ -401,8 +403,21 @@ impl crate::Queue for Queue { } unsafe fn get_timestamp_period(&self) -> f32 { - // TODO: This is hard, see https://github.com/gpuweb/gpuweb/issues/1325 - 1.0 + let queue = self.raw.lock(); + let (mut cpu_timestamp0, mut gpu_timestamp0) = (0_u64, 0_u64); + let device = queue.device().to_owned(); + device.sample_timestamps(&mut cpu_timestamp0, &mut gpu_timestamp0); + if cpu_timestamp0 == 0 || gpu_timestamp0 == 0 { + return 1.0; + } + + let command_buffer = queue.new_command_buffer(); + command_buffer.commit(); + command_buffer.wait_until_scheduled(); + let (mut cpu_timestamp1, mut gpu_timestamp1) = (0_u64, 0_u64); + device.sample_timestamps(&mut cpu_timestamp1, &mut gpu_timestamp1); + + (cpu_timestamp1 - cpu_timestamp0) as f32 / (gpu_timestamp1 - gpu_timestamp0) as f32 } } @@ -696,6 +711,8 @@ unsafe impl Sync for ComputePipeline {} #[derive(Debug)] pub struct QuerySet { raw_buffer: metal::Buffer, + //Metal has a custom buffer for counters. + counter_sample_buffer: Option, ty: wgt::QueryType, } diff --git a/wgpu/Cargo.toml b/wgpu/Cargo.toml index 69cdbe5a8d..f681ce0212 100644 --- a/wgpu/Cargo.toml +++ b/wgpu/Cargo.toml @@ -233,8 +233,10 @@ web-sys = { workspace = true, features = [ "GpuCompilationMessageType", "GpuComputePassDescriptor", "GpuComputePassEncoder", + "GpuComputePassTimestampWrite", "GpuComputePipeline", "GpuComputePipelineDescriptor", + "GpuComputePassTimestampLocation", "GpuCullMode", "GpuDepthStencilState", "GpuDevice", diff --git a/wgpu/examples/hello-compute/main.rs b/wgpu/examples/hello-compute/main.rs index afdf7744c9..fd999c6766 100644 --- a/wgpu/examples/hello-compute/main.rs +++ b/wgpu/examples/hello-compute/main.rs @@ -1,9 +1,12 @@ use std::{borrow::Cow, str::FromStr}; use wgpu::util::DeviceExt; +use wgpu::Buffer; // Indicates a u32 overflow in an intermediate Collatz value const OVERFLOW: u32 = 0xffffffff; +const NUM_SAMPLES: u64 = 2; + async fn run() { let numbers = if std::env::args().len() <= 1 { let default = vec![1, 2, 3, 4]; @@ -46,7 +49,7 @@ async fn execute_gpu(numbers: &[u32]) -> Option> { .request_device( &wgpu::DeviceDescriptor { label: None, - features: wgpu::Features::empty(), + features: wgpu::Features::empty() | wgpu::Features::TIMESTAMP_QUERY, limits: wgpu::Limits::downlevel_defaults(), }, None, @@ -68,6 +71,14 @@ async fn execute_gpu_inner( queue: &wgpu::Queue, numbers: &[u32], ) -> Option> { + //Create query set + let query_set = device.create_query_set(&wgpu::QuerySetDescriptor { + label: Some("Timestamp query set"), + count: NUM_SAMPLES as u32, + ty: wgpu::QueryType::Timestamp, + }); + let timestamp_period = queue.get_timestamp_period(); + // Loads the shader from WGSL let cs_module = device.create_shader_module(wgpu::ShaderModuleDescriptor { label: None, @@ -127,12 +138,27 @@ async fn execute_gpu_inner( }], }); + let beginning = wgpu::ComputePassTimestampWrite { + query_set: &query_set, + query_index: 0, + location: wgpu::ComputePassTimestampLocation::Beginning, + }; + + let end = wgpu::ComputePassTimestampWrite { + query_set: &query_set, + query_index: 1, + location: wgpu::ComputePassTimestampLocation::End, + }; + // A command encoder executes one or many pipelines. // It is to WebGPU what a command buffer is to Vulkan. let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None }); { - let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { label: None }); + let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { + label: None, + timestamp_writes: &vec![beginning, end], + }); cpass.set_pipeline(&compute_pipeline); cpass.set_bind_group(0, &bind_group, &[]); cpass.insert_debug_marker("compute collatz iterations"); @@ -142,6 +168,14 @@ async fn execute_gpu_inner( // Will copy data from storage buffer on GPU to staging buffer on CPU. encoder.copy_buffer_to_buffer(&storage_buffer, 0, &staging_buffer, 0, size); + let destination_buffer = device.create_buffer(&wgpu::BufferDescriptor { + label: Some("destination buffer"), + size: (std::mem::size_of::() * NUM_SAMPLES as usize) as wgpu::BufferAddress, + usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ, + mapped_at_creation: false, + }); + encoder.resolve_query_set(&query_set, 0..NUM_SAMPLES as u32, &destination_buffer, 0); + // Submits command encoder for processing queue.submit(Some(encoder.finish())); @@ -157,7 +191,7 @@ async fn execute_gpu_inner( device.poll(wgpu::Maintain::Wait); // Awaits until `buffer_future` can be read from - if let Some(Ok(())) = receiver.receive().await { + let res = if let Some(Ok(())) = receiver.receive().await { // Gets contents of buffer let data = buffer_slice.get_mapped_range(); // Since contents are got in bytes, this converts these bytes back to u32 @@ -175,8 +209,27 @@ async fn execute_gpu_inner( // Returns data from buffer Some(result) } else { - panic!("failed to run compute on gpu!") - } + None + }; + + destination_buffer + .slice(..) + .map_async(wgpu::MapMode::Read, |_| ()); + device.poll(wgpu::Maintain::Wait); + resolve_timestamps(&destination_buffer, timestamp_period); + + res +} + +fn resolve_timestamps(destination_buffer: &Buffer, timestamp_period: f32) { + let timestamp_view = destination_buffer + .slice(..(std::mem::size_of::() * 2) as wgpu::BufferAddress) + .get_mapped_range(); + + let timestamps: &[u64] = bytemuck::cast_slice(×tamp_view); + log::info!("Timestamps: {:?}", timestamps); + let elapsed_ns = (timestamps[1] - timestamps[0]) as f64 * timestamp_period as f64; + log::info!("Elapsed time: {:.2} μs", elapsed_ns / 1000.0); } fn main() { diff --git a/wgpu/src/backend/direct.rs b/wgpu/src/backend/direct.rs index 6d181043e8..3f0a5a8cd4 100644 --- a/wgpu/src/backend/direct.rs +++ b/wgpu/src/backend/direct.rs @@ -1812,12 +1812,30 @@ impl crate::Context for Context { _encoder_data: &Self::CommandEncoderData, desc: &ComputePassDescriptor, ) -> (Self::ComputePassId, Self::ComputePassData) { + let timestamp_writes = desc + .timestamp_writes + .as_ref() + .iter() + .map(|t| wgc::command::ComputePassTimestampWrite { + query_set: t.query_set.id.into(), + query_index: t.query_index, + location: match t.location { + crate::ComputePassTimestampLocation::Beginning => { + wgc::command::ComputePassTimestampLocation::Beginning + } + crate::ComputePassTimestampLocation::End => { + wgc::command::ComputePassTimestampLocation::End + } + }, + }) + .collect::>(); ( Unused, wgc::command::ComputePass::new( *encoder, &wgc::command::ComputePassDescriptor { label: desc.label.map(Borrowed), + timestamp_writes: Borrowed(×tamp_writes), }, ), ) diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs index ea26098916..9b1c8a0145 100644 --- a/wgpu/src/lib.rs +++ b/wgpu/src/lib.rs @@ -1169,16 +1169,49 @@ pub struct RenderPipelineDescriptor<'a> { } static_assertions::assert_impl_all!(RenderPipelineDescriptor: Send, Sync); +/// Describes the location of a timestamp in a compute pass. +/// +/// For use with [`ComputePassTimestampWrite`]. +/// +/// Corresponds to [WebGPU `GPUComputePassTimestampLocation`]( +/// https://gpuweb.github.io/gpuweb/#enumdef-gpucomputepasstimestamplocation). +#[derive(Clone)] +pub enum ComputePassTimestampLocation { + /// The timestamp is at the start of the compute pass. + Beginning, + /// The timestamp is at the end of the compute pass. + End, +} + +/// Describes the timestamp writes of a compute pass. +/// +/// For use with [`ComputePassDescriptor`]. +/// +/// Corresponds to [WebGPU `GPUComputePassTimestampWrite`]( +/// https://gpuweb.github.io/gpuweb/#dictdef-gpucomputepasstimestampwrite). +#[derive(Clone)] +pub struct ComputePassTimestampWrite<'a> { + /// The query set to write to. + pub query_set: &'a QuerySet, + /// The index of the query to write to. + pub query_index: u32, + /// The location of the timestamp. + pub location: ComputePassTimestampLocation, +} +static_assertions::assert_impl_all!(ComputePassTimestampWrite: Send, Sync); + /// Describes the attachments of a compute pass. /// /// For use with [`CommandEncoder::begin_compute_pass`]. /// /// Corresponds to [WebGPU `GPUComputePassDescriptor`]( /// https://gpuweb.github.io/gpuweb/#dictdef-gpucomputepassdescriptor). -#[derive(Clone, Debug, Default)] +#[derive(Clone, Default)] pub struct ComputePassDescriptor<'a> { /// Debug label of the compute pass. This will show up in graphics debuggers for easy identification. pub label: Label<'a>, + /// A sequence of ComputePassTimestampWrite values define where and when timestamp values will be written for this pass. + pub timestamp_writes: &'a [ComputePassTimestampWrite<'a>], } static_assertions::assert_impl_all!(ComputePassDescriptor: Send, Sync); From 15c21f440ac8bd6e23161af4e5947ae759268faf Mon Sep 17 00:00:00 2001 From: FL33TW00D Date: Sun, 2 Apr 2023 15:34:05 +0100 Subject: [PATCH 02/45] chore: clippy --- wgpu-core/src/command/compute.rs | 2 +- wgpu-hal/src/metal/command.rs | 6 +++--- wgpu-hal/src/metal/device.rs | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/wgpu-core/src/command/compute.rs b/wgpu-core/src/command/compute.rs index 069a4f8b6c..f3f42036bd 100644 --- a/wgpu-core/src/command/compute.rs +++ b/wgpu-core/src/command/compute.rs @@ -151,7 +151,7 @@ pub enum ComputePassTimestampLocation { /// Describes the writing of a single timestamp value. #[repr(C)] -#[derive(Clone, Debug, PartialEq)] +#[derive(Clone, Debug, PartialEq, Eq)] #[cfg_attr(any(feature = "serial-pass", feature = "trace"), derive(Serialize))] #[cfg_attr(any(feature = "serial-pass", feature = "replay"), derive(Deserialize))] pub struct ComputePassTimestampWrite { diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs index 9945d3bb7a..8d4907b06a 100644 --- a/wgpu-hal/src/metal/command.rs +++ b/wgpu-hal/src/metal/command.rs @@ -354,7 +354,7 @@ impl crate::CommandEncoder for super::CommandEncoder { } wgt::QueryType::Timestamp => { encoder.resolve_counters( - &set.counter_sample_buffer.as_ref().unwrap(), + set.counter_sample_buffer.as_ref().unwrap(), metal::NSRange::new(range.start as u64, range.end as u64), &buffer.raw, offset, @@ -933,7 +933,7 @@ impl crate::CommandEncoder for super::CommandEncoder { .sample_buffer_attachments() .object_at(0 as _) //TODO: move inside .unwrap(); - for (i, at) in desc.timestamp_writes.iter().enumerate() { + for (_i, at) in desc.timestamp_writes.iter().enumerate() { //Problem here is that we can't attach the same counter sample buffer //to the pass descriptor twice. sba_descriptor @@ -949,7 +949,7 @@ impl crate::CommandEncoder for super::CommandEncoder { } } - let encoder = raw.compute_command_encoder_with_descriptor(&descriptor); + let encoder = raw.compute_command_encoder_with_descriptor(descriptor); if let Some(label) = desc.label { encoder.set_label(label); } diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs index db3b8e4ee5..c3a8c277c3 100644 --- a/wgpu-hal/src/metal/device.rs +++ b/wgpu-hal/src/metal/device.rs @@ -1117,11 +1117,11 @@ impl crate::Device for super::Device { .find(|cs| cs.name() == "timestamp") //TODO: better error type? .ok_or(crate::DeviceError::OutOfMemory)?; - csb_desc.set_counter_set(×tamp_counter); + csb_desc.set_counter_set(timestamp_counter); let counter_sample_buffer = device .new_counter_sample_buffer_with_descriptor(&csb_desc) - .map_err(|e| crate::DeviceError::OutOfMemory)?; + .map_err(|_| crate::DeviceError::OutOfMemory)?; Ok(super::QuerySet { raw_buffer: destination_buffer, From 31b5870f7280b875cb7ec82e723e50bd2c203da0 Mon Sep 17 00:00:00 2001 From: FL33TW00D Date: Sun, 2 Apr 2023 15:42:07 +0100 Subject: [PATCH 03/45] chore: CHANGELOG --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0d042c06be..92808b81ad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -154,6 +154,13 @@ Some texture format names have changed to get back in line with the spec. By @cwfitzgerald in [#3671](https://github.com/gfx-rs/wgpu/pull/3671). +#### Pass timestamp queries + +Addition of `TimestampWrites` to compute and render passes to allow profiling. +This brings us in line with the spec. + +By @FL33TW00D & @wumpf in [#3636](https://github.com/gfx-rs/wgpu/pull/3636). + #### Misc Breaking Changes - Change type of `mip_level_count` and `array_layer_count` (members of `TextureViewDescriptor` and `ImageSubresourceRange`) from `Option` to `Option`. By @teoxoy in [#3445](https://github.com/gfx-rs/wgpu/pull/3445) From f86bff8bd3bb34b750f04f08eeb9f15966ff07e1 Mon Sep 17 00:00:00 2001 From: FL33TW00D Date: Sun, 2 Apr 2023 17:39:35 +0100 Subject: [PATCH 04/45] chore: initial render mock --- wgpu-core/src/command/clear.rs | 1 + wgpu-core/src/command/render.rs | 33 +++++++++++++++++++++++++++ wgpu-hal/src/lib.rs | 15 +++++++++++++ wgpu-hal/src/metal/command.rs | 28 ++++++++++++++++++----- wgpu/src/backend/direct.rs | 19 ++++++++++++++++ wgpu/src/lib.rs | 40 ++++++++++++++++++++++++++++++--- 6 files changed, 128 insertions(+), 8 deletions(-) diff --git a/wgpu-core/src/command/clear.rs b/wgpu-core/src/command/clear.rs index de11258c42..1d9a4976d5 100644 --- a/wgpu-core/src/command/clear.rs +++ b/wgpu-core/src/command/clear.rs @@ -448,6 +448,7 @@ fn clear_texture_via_render_passes( color_attachments, depth_stencil_attachment, multiview: None, + timestamp_writes: &[], }); encoder.end_render_pass(); } diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs index b38e79c4b4..a6238f474c 100644 --- a/wgpu-core/src/command/render.rs +++ b/wgpu-core/src/command/render.rs @@ -175,6 +175,31 @@ impl RenderPassDepthStencilAttachment { } } +/// Location to write a timestamp to (beginning or end of the pass). +#[repr(C)] +#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)] +#[cfg_attr(any(feature = "serial-pass", feature = "trace"), derive(Serialize))] +#[cfg_attr(any(feature = "serial-pass", feature = "replay"), derive(Deserialize))] +#[cfg_attr(feature = "serde", serde(rename_all = "kebab-case"))] +pub enum RenderPassTimestampLocation { + Beginning = 0, + End = 1, +} + +/// Describes the writing of a single timestamp value. +#[repr(C)] +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(any(feature = "serial-pass", feature = "trace"), derive(Serialize))] +#[cfg_attr(any(feature = "serial-pass", feature = "replay"), derive(Deserialize))] +pub struct RenderPassTimestampWrite { + /// The query set to write the timestamp to. + pub query_set: id::QuerySetId, + /// The index of the query within the query set to write the timestamp to. + pub query_index: u32, + /// The location of the timestamp + pub location: RenderPassTimestampLocation, +} + /// Describes the attachments of a render pass. #[derive(Clone, Debug, Default, PartialEq)] pub struct RenderPassDescriptor<'a> { @@ -183,6 +208,8 @@ pub struct RenderPassDescriptor<'a> { pub color_attachments: Cow<'a, [Option]>, /// The depth and stencil attachment of the render pass, if any. pub depth_stencil_attachment: Option<&'a RenderPassDepthStencilAttachment>, + /// Defines where and when timestamp values will be written for this pass. + pub timestamp_writes: Cow<'a, [RenderPassTimestampWrite]>, } #[cfg_attr(feature = "serial-pass", derive(Deserialize, Serialize))] @@ -191,6 +218,7 @@ pub struct RenderPass { parent_id: id::CommandEncoderId, color_targets: ArrayVec, { hal::MAX_COLOR_ATTACHMENTS }>, depth_stencil_target: Option, + timestamp_writes: Vec, // Resource binding dedupe state. #[cfg_attr(feature = "serial-pass", serde(skip))] @@ -206,6 +234,7 @@ impl RenderPass { parent_id, color_targets: desc.color_attachments.iter().cloned().collect(), depth_stencil_target: desc.depth_stencil_attachment.cloned(), + timestamp_writes: desc.timestamp_writes.iter().cloned().collect(), current_bind_groups: BindGroupStateChange::new(), current_pipeline: StateChange::new(), @@ -1086,6 +1115,7 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> { color_attachments: &colors, depth_stencil_attachment: depth_stencil, multiview, + timestamp_writes: &[], //TODO }; unsafe { cmd_buf.encoder.raw.begin_render_pass(&hal_desc); @@ -1173,6 +1203,7 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> { clear_value: (0.0, 0), }), multiview: self.multiview, + timestamp_writes: &[], //TODO }; unsafe { raw.begin_render_pass(&desc); @@ -1197,6 +1228,7 @@ impl Global { pass.base.as_ref(), &pass.color_targets, pass.depth_stencil_target.as_ref(), + &pass.timestamp_writes, ) } @@ -1207,6 +1239,7 @@ impl Global { base: BasePassRef, color_attachments: &[Option], depth_stencil_attachment: Option<&RenderPassDepthStencilAttachment>, + timestamp_writes: &[RenderPassTimestampWrite], ) -> Result<(), RenderPassError> { profiling::scope!("CommandEncoder::run_render_pass"); let init_scope = PassErrorScope::Pass(encoder_id); diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index 51539e096a..512f82e84e 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -1261,6 +1261,20 @@ pub struct DepthStencilAttachment<'a, A: Api> { pub clear_value: (f32, u32), } +bitflags!( + pub struct RenderPassTimestampLocation: u8 { + const BEGINNING = 1 << 0; + const END = 1 << 1; + } +); + +#[derive(Clone, Debug)] +pub struct RenderPassTimestampWrite<'a, A: Api> { + pub query_set: &'a A::QuerySet, + pub query_index: u32, + pub location: RenderPassTimestampLocation, +} + #[derive(Clone, Debug)] pub struct RenderPassDescriptor<'a, A: Api> { pub label: Label<'a>, @@ -1269,6 +1283,7 @@ pub struct RenderPassDescriptor<'a, A: Api> { pub color_attachments: &'a [Option>], pub depth_stencil_attachment: Option>, pub multiview: Option, + pub timestamp_writes: &'a [RenderPassTimestampWrite<'a, A>], } bitflags!( diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs index 8d4907b06a..c2e0525c56 100644 --- a/wgpu-hal/src/metal/command.rs +++ b/wgpu-hal/src/metal/command.rs @@ -1,7 +1,5 @@ -use crate::ComputePassTimestampLocation; - use super::{conv, AsNative}; -use std::{borrow::Cow, collections::HashMap, mem, ops::Range}; +use std::{borrow::Cow, mem, ops::Range}; // has to match `Temp::binding_sizes` const WORD_SIZE: usize = 4; @@ -441,6 +439,26 @@ impl crate::CommandEncoder for super::CommandEncoder { } } + let sba_descriptor = descriptor + .sample_buffer_attachments() + .object_at(0 as _) //TODO: move inside + .unwrap(); + for (_i, at) in desc.timestamp_writes.iter().enumerate() { + //Problem here is that we can't attach the same counter sample buffer + //to the pass descriptor twice. + sba_descriptor + .set_sample_buffer(at.query_set.counter_sample_buffer.as_ref().unwrap()); + match at.location { + crate::RenderPassTimestampLocation::BEGINNING => { + sba_descriptor.set_start_of_vertex_sample_index(at.query_index as _); + } + crate::RenderPassTimestampLocation::END => { + sba_descriptor.set_end_of_fragment_sample_index(at.query_index as _); + } + _ => {} + } + } + let raw = self.raw_cmd_buf.as_ref().unwrap(); let encoder = raw.new_render_command_encoder(descriptor); if let Some(label) = desc.label { @@ -939,10 +957,10 @@ impl crate::CommandEncoder for super::CommandEncoder { sba_descriptor .set_sample_buffer(at.query_set.counter_sample_buffer.as_ref().unwrap()); match at.location { - ComputePassTimestampLocation::BEGINNING => { + crate::ComputePassTimestampLocation::BEGINNING => { sba_descriptor.set_start_of_encoder_sample_index(at.query_index as _); } - ComputePassTimestampLocation::END => { + crate::ComputePassTimestampLocation::END => { sba_descriptor.set_end_of_encoder_sample_index(at.query_index as _); } _ => {} diff --git a/wgpu/src/backend/direct.rs b/wgpu/src/backend/direct.rs index 3f0a5a8cd4..2731efea76 100644 --- a/wgpu/src/backend/direct.rs +++ b/wgpu/src/backend/direct.rs @@ -1899,6 +1899,24 @@ impl crate::Context for Context { } }); + let timestamp_writes = desc + .timestamp_writes + .as_ref() + .iter() + .map(|t| wgc::command::RenderPassTimestampWrite { + query_set: t.query_set.id.into(), + query_index: t.query_index, + location: match t.location { + crate::RenderPassTimestampLocation::Beginning => { + wgc::command::RenderPassTimestampLocation::Beginning + } + crate::RenderPassTimestampLocation::End => { + wgc::command::RenderPassTimestampLocation::End + } + }, + }) + .collect::>(); + ( Unused, wgc::command::RenderPass::new( @@ -1907,6 +1925,7 @@ impl crate::Context for Context { label: desc.label.map(Borrowed), color_attachments: Borrowed(&colors), depth_stencil_attachment: depth_stencil.as_ref(), + timestamp_writes: Borrowed(×tamp_writes), }, ), ) diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs index 9b1c8a0145..fc0970b22b 100644 --- a/wgpu/src/lib.rs +++ b/wgpu/src/lib.rs @@ -689,6 +689,7 @@ impl Drop for RenderBundle { /// It can be created with [`Device::create_query_set`]. /// /// Corresponds to [WebGPU `GPUQuerySet`](https://gpuweb.github.io/gpuweb/#queryset). +#[derive(Debug)] pub struct QuerySet { context: Arc, id: ObjectId, @@ -827,6 +828,37 @@ impl Default for Operations { } } +/// Describes the location of a timestamp in a render pass. +/// +/// For use with [`RenderPassTimestampWrite`]. +/// +/// Corresponds to [WebGPU `GPURenderPassTimestampLocation`]( +/// https://gpuweb.github.io/gpuweb/#enumdef-gpurenderpasstimestamplocation). +#[derive(Clone, Debug)] +pub enum RenderPassTimestampLocation { + /// The timestamp is at the start of the render pass. + Beginning, + /// The timestamp is at the end of the render pass. + End, +} + +/// Describes the timestamp writes of a render pass. +/// +/// For use with [`RenderPassDescriptor`]. +/// +/// Corresponds to [WebGPU `GPURenderPassTimestampWrite`]( +/// https://gpuweb.github.io/gpuweb/#dictdef-gpurenderpasstimestampwrite). +#[derive(Clone, Debug)] +pub struct RenderPassTimestampWrite<'a> { + /// The query set to write to. + pub query_set: &'a QuerySet, + /// The index of the query to write to. + pub query_index: u32, + /// The location of the timestamp. + pub location: RenderPassTimestampLocation, +} +static_assertions::assert_impl_all!(RenderPassTimestampWrite: Send, Sync); + /// Describes a color attachment to a [`RenderPass`]. /// /// For use with [`RenderPassDescriptor`]. @@ -1085,6 +1117,8 @@ pub struct RenderPassDescriptor<'tex, 'desc> { pub color_attachments: &'desc [Option>], /// The depth and stencil attachment of the render pass, if any. pub depth_stencil_attachment: Option>, + /// A sequence of RenderPassTimestampWrite values define where and when timestamp values will be written for this pass. + pub timestamp_writes: &'desc [RenderPassTimestampWrite<'desc>], } static_assertions::assert_impl_all!(RenderPassDescriptor: Send, Sync); @@ -1175,7 +1209,7 @@ static_assertions::assert_impl_all!(RenderPipelineDescriptor: Send, Sync); /// /// Corresponds to [WebGPU `GPUComputePassTimestampLocation`]( /// https://gpuweb.github.io/gpuweb/#enumdef-gpucomputepasstimestamplocation). -#[derive(Clone)] +#[derive(Clone, Debug)] pub enum ComputePassTimestampLocation { /// The timestamp is at the start of the compute pass. Beginning, @@ -1189,7 +1223,7 @@ pub enum ComputePassTimestampLocation { /// /// Corresponds to [WebGPU `GPUComputePassTimestampWrite`]( /// https://gpuweb.github.io/gpuweb/#dictdef-gpucomputepasstimestampwrite). -#[derive(Clone)] +#[derive(Clone, Debug)] pub struct ComputePassTimestampWrite<'a> { /// The query set to write to. pub query_set: &'a QuerySet, @@ -1206,7 +1240,7 @@ static_assertions::assert_impl_all!(ComputePassTimestampWrite: Send, Sync); /// /// Corresponds to [WebGPU `GPUComputePassDescriptor`]( /// https://gpuweb.github.io/gpuweb/#dictdef-gpucomputepassdescriptor). -#[derive(Clone, Default)] +#[derive(Clone, Default, Debug)] pub struct ComputePassDescriptor<'a> { /// Debug label of the compute pass. This will show up in graphics debuggers for easy identification. pub label: Label<'a>, From cd5d1470a40d049229000919e335857f6b31b317 Mon Sep 17 00:00:00 2001 From: FL33TW00D Date: Sun, 2 Apr 2023 18:03:46 +0100 Subject: [PATCH 05/45] chore: more work --- wgpu-core/src/command/render.rs | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs index a6238f474c..85d8014693 100644 --- a/wgpu-core/src/command/render.rs +++ b/wgpu-core/src/command/render.rs @@ -18,7 +18,7 @@ use crate::{ id, init_tracker::{MemoryInitKind, TextureInitRange, TextureInitTrackerAction}, pipeline::{self, PipelineFlags}, - resource::{self, Buffer, Texture, TextureView, TextureViewNotRenderableReason}, + resource::{self, Buffer, QuerySet, Texture, TextureView, TextureViewNotRenderableReason}, track::{TextureSelector, UsageConflict, UsageScope}, validation::{ check_buffer_usage, check_texture_usage, MissingBufferUsageError, MissingTextureUsageError, @@ -614,6 +614,8 @@ pub enum RenderPassErrorInner { "Multiview pass texture views with more than one array layer must have D2Array dimension" )] MultiViewDimensionMismatch, + #[error("QuerySet {0:?} is invalid")] + InvalidQuerySet(id::QuerySetId), } impl PrettyError for RenderPassErrorInner { @@ -743,10 +745,12 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> { label: Option<&str>, color_attachments: &[Option], depth_stencil_attachment: Option<&RenderPassDepthStencilAttachment>, + timestamp_writes: &[RenderPassTimestampWrite], cmd_buf: &mut CommandBuffer, view_guard: &'a Storage, id::TextureViewId>, buffer_guard: &'a Storage, id::BufferId>, texture_guard: &'a Storage, id::TextureId>, + query_set_guard: &'a Storage, id::QuerySetId>, ) -> Result { profiling::scope!("RenderPassInfo::start"); @@ -1108,6 +1112,27 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> { multiview, }; + let mut hal_timestamp_writes = Vec::with_capacity(timestamp_writes.len()); + for (index, tw) in timestamp_writes.iter().enumerate() { + let query_set: &resource::QuerySet = cmd_buf + .trackers + .query_sets + .add_single(&*query_set_guard, tw.query_set) + .ok_or(RenderPassErrorInner::InvalidQuerySet(tw.query_set))?; + + let hal_tw = hal::RenderPassTimestampWrite { + query_set: &query_set.raw, + query_index: tw.query_index, + location: match tw.location { + RenderPassTimestampLocation::Beginning => { + hal::RenderPassTimestampLocation::BEGINNING + } + RenderPassTimestampLocation::End => hal::RenderPassTimestampLocation::END, + }, + }; + hal_timestamp_writes.push(hal_tw); + } + let hal_desc = hal::RenderPassDescriptor { label, extent, @@ -1115,7 +1140,7 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> { color_attachments: &colors, depth_stencil_attachment: depth_stencil, multiview, - timestamp_writes: &[], //TODO + timestamp_writes: &hal_timestamp_writes, }; unsafe { cmd_buf.encoder.raw.begin_render_pass(&hal_desc); @@ -1292,10 +1317,12 @@ impl Global { base.label, color_attachments, depth_stencil_attachment, + timestamp_writes, cmd_buf, &*view_guard, &*buffer_guard, &*texture_guard, + &*query_set_guard, ) .map_pass_err(init_scope)?; From 326de792bd1ca49d650985dd61630862ccf84286 Mon Sep 17 00:00:00 2001 From: FL33TW00D Date: Sun, 2 Apr 2023 19:09:49 +0100 Subject: [PATCH 06/45] chore: working render example --- wgpu/examples/hello-triangle/main.rs | 58 +++++++++++++++++++++++++++- 1 file changed, 56 insertions(+), 2 deletions(-) diff --git a/wgpu/examples/hello-triangle/main.rs b/wgpu/examples/hello-triangle/main.rs index 98abf5b8d5..27029a582b 100644 --- a/wgpu/examples/hello-triangle/main.rs +++ b/wgpu/examples/hello-triangle/main.rs @@ -5,6 +5,8 @@ use winit::{ window::Window, }; +const NUM_SAMPLES: usize = 2; + async fn run(event_loop: EventLoop<()>, window: Window) { let size = window.inner_size(); @@ -26,7 +28,7 @@ async fn run(event_loop: EventLoop<()>, window: Window) { .request_device( &wgpu::DeviceDescriptor { label: None, - features: wgpu::Features::empty(), + features: wgpu::Features::empty() | wgpu::Features::TIMESTAMP_QUERY, // Make sure we use the texture resolution limits from the adapter, so we can support images the size of the swapchain. limits: wgpu::Limits::downlevel_webgl2_defaults() .using_resolution(adapter.limits()), @@ -42,6 +44,12 @@ async fn run(event_loop: EventLoop<()>, window: Window) { source: wgpu::ShaderSource::Wgsl(Cow::Borrowed(include_str!("shader.wgsl"))), }); + let query_set = device.create_query_set(&wgpu::QuerySetDescriptor { + label: Some("Timestamp query set"), + count: NUM_SAMPLES as u32, + ty: wgpu::QueryType::Timestamp, + }); + let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { label: None, bind_group_layouts: &[], @@ -82,11 +90,20 @@ async fn run(event_loop: EventLoop<()>, window: Window) { surface.configure(&device, &config); + let destination_buffer = device.create_buffer(&wgpu::BufferDescriptor { + label: Some("destination buffer"), + size: (std::mem::size_of::() * NUM_SAMPLES as usize) as wgpu::BufferAddress, + usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ, + mapped_at_creation: false, + }); + + let timestamp_period = queue.get_timestamp_period(); + event_loop.run(move |event, _, control_flow| { // Have the closure take ownership of the resources. // `event_loop.run` never returns, therefore we must do this to ensure // the resources are properly cleaned up. - let _ = (&instance, &adapter, &shader, &pipeline_layout); + let _ = (&instance, &adapter, &shader, &pipeline_layout, &query_set); *control_flow = ControlFlow::Wait; match event { @@ -122,13 +139,37 @@ async fn run(event_loop: EventLoop<()>, window: Window) { }, })], depth_stencil_attachment: None, + timestamp_writes: &[ + wgpu::RenderPassTimestampWrite { + query_set: &query_set, + query_index: 0, + location: wgpu::RenderPassTimestampLocation::Beginning, + }, + wgpu::RenderPassTimestampWrite { + query_set: &query_set, + query_index: 1, + location: wgpu::RenderPassTimestampLocation::End, + }, + ], }); rpass.set_pipeline(&render_pipeline); rpass.draw(0..3, 0..1); } + encoder.resolve_query_set( + &query_set, + 0..NUM_SAMPLES as u32, + &destination_buffer, + 0, + ); queue.submit(Some(encoder.finish())); frame.present(); + + destination_buffer + .slice(..) + .map_async(wgpu::MapMode::Read, |_| ()); + device.poll(wgpu::Maintain::Wait); + resolve_timestamps(&destination_buffer, timestamp_period); } Event::WindowEvent { event: WindowEvent::CloseRequested, @@ -139,6 +180,19 @@ async fn run(event_loop: EventLoop<()>, window: Window) { }); } +fn resolve_timestamps(destination_buffer: &wgpu::Buffer, timestamp_period: f32) { + { + let timestamp_view = destination_buffer + .slice(..(std::mem::size_of::() * 2) as wgpu::BufferAddress) + .get_mapped_range(); + + let timestamps: &[u64] = bytemuck::cast_slice(×tamp_view); + let elapsed_ns = (timestamps[1] - timestamps[0]) as f64 * timestamp_period as f64; + log::info!("Elapsed time: {:.2} μs", elapsed_ns / 1000.0); + } + destination_buffer.unmap(); +} + fn main() { let event_loop = EventLoop::new(); let window = winit::window::Window::new(&event_loop).unwrap(); From 92329a636843e2482a4b75c02302b1a2f6cf81ed Mon Sep 17 00:00:00 2001 From: FL33TW00D Date: Sun, 2 Apr 2023 19:52:07 +0100 Subject: [PATCH 07/45] chore: update tests, one failing --- wgpu-core/src/command/render.rs | 2 +- wgpu-hal/examples/halmark/main.rs | 1 + wgpu-hal/examples/raw-gles.rs | 1 + wgpu/examples/boids/main.rs | 7 +++++-- wgpu/examples/bunnymark/main.rs | 1 + wgpu/examples/capture/main.rs | 1 + wgpu/examples/conservative-raster/main.rs | 2 ++ wgpu/examples/cube/main.rs | 1 + wgpu/examples/hello-compute/tests.rs | 4 ++++ wgpu/examples/hello-triangle/main.rs | 12 ++++++------ wgpu/examples/hello-windows/main.rs | 1 + wgpu/examples/mipmap/main.rs | 2 ++ wgpu/examples/msaa-line/main.rs | 1 + wgpu/examples/shadow/main.rs | 2 ++ wgpu/examples/skybox/main.rs | 1 + wgpu/examples/stencil-triangles/main.rs | 1 + wgpu/examples/texture-arrays/main.rs | 1 + wgpu/examples/water/main.rs | 3 +++ wgpu/tests/regression/issue_3457.rs | 2 ++ wgpu/tests/shader/mod.rs | 1 + wgpu/tests/shader_primitive_index/mod.rs | 1 + wgpu/tests/shader_view_format/mod.rs | 1 + wgpu/tests/vertex_indices/mod.rs | 1 + wgpu/tests/zero_init_texture_after_discard.rs | 4 ++++ 24 files changed, 45 insertions(+), 9 deletions(-) diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs index 85d8014693..eb4a6e275a 100644 --- a/wgpu-core/src/command/render.rs +++ b/wgpu-core/src/command/render.rs @@ -1228,7 +1228,7 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> { clear_value: (0.0, 0), }), multiview: self.multiview, - timestamp_writes: &[], //TODO + timestamp_writes: &[], }; unsafe { raw.begin_render_pass(&desc); diff --git a/wgpu-hal/examples/halmark/main.rs b/wgpu-hal/examples/halmark/main.rs index 588de61063..680872c441 100644 --- a/wgpu-hal/examples/halmark/main.rs +++ b/wgpu-hal/examples/halmark/main.rs @@ -679,6 +679,7 @@ impl Example { })], depth_stencil_attachment: None, multiview: None, + timestamp_writes: &[], }; unsafe { ctx.encoder.begin_render_pass(&pass_desc); diff --git a/wgpu-hal/examples/raw-gles.rs b/wgpu-hal/examples/raw-gles.rs index d9dfc492fa..ae06a59f82 100644 --- a/wgpu-hal/examples/raw-gles.rs +++ b/wgpu-hal/examples/raw-gles.rs @@ -174,6 +174,7 @@ fn fill_screen(exposed: &hal::ExposedAdapter, width: u32, height })], depth_stencil_attachment: None, multiview: None, + timestamp_writes: &[], }; unsafe { encoder.begin_encoding(None).unwrap(); diff --git a/wgpu/examples/boids/main.rs b/wgpu/examples/boids/main.rs index a906bde3c8..2dd469785b 100644 --- a/wgpu/examples/boids/main.rs +++ b/wgpu/examples/boids/main.rs @@ -286,6 +286,7 @@ impl framework::Example for Example { label: None, color_attachments: &color_attachments, depth_stencil_attachment: None, + timestamp_writes: &[], }; // get command encoder @@ -295,8 +296,10 @@ impl framework::Example for Example { command_encoder.push_debug_group("compute boid movement"); { // compute pass - let mut cpass = - command_encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { label: None }); + let mut cpass = command_encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { + label: None, + timestamp_writes: &[], + }); cpass.set_pipeline(&self.compute_pipeline); cpass.set_bind_group(0, &self.particle_bind_groups[self.frame_num % 2], &[]); cpass.dispatch_workgroups(self.work_group_count, 1, 1); diff --git a/wgpu/examples/bunnymark/main.rs b/wgpu/examples/bunnymark/main.rs index 2154be0d01..0422f83546 100644 --- a/wgpu/examples/bunnymark/main.rs +++ b/wgpu/examples/bunnymark/main.rs @@ -339,6 +339,7 @@ impl framework::Example for Example { }, })], depth_stencil_attachment: None, + timestamp_writes: &[], }); rpass.set_pipeline(&self.pipeline); rpass.set_bind_group(0, &self.global_group, &[]); diff --git a/wgpu/examples/capture/main.rs b/wgpu/examples/capture/main.rs index 252777355a..344a24cab1 100644 --- a/wgpu/examples/capture/main.rs +++ b/wgpu/examples/capture/main.rs @@ -104,6 +104,7 @@ async fn create_red_image_with_dimensions( }, })], depth_stencil_attachment: None, + timestamp_writes: &[], }); // Copy the data from the texture to the buffer diff --git a/wgpu/examples/conservative-raster/main.rs b/wgpu/examples/conservative-raster/main.rs index 69336cc993..156d717662 100644 --- a/wgpu/examples/conservative-raster/main.rs +++ b/wgpu/examples/conservative-raster/main.rs @@ -276,6 +276,7 @@ impl framework::Example for Example { }, })], depth_stencil_attachment: None, + timestamp_writes: &[], }); rpass.set_pipeline(&self.pipeline_triangle_conservative); @@ -295,6 +296,7 @@ impl framework::Example for Example { }, })], depth_stencil_attachment: None, + timestamp_writes: &[], }); rpass.set_pipeline(&self.pipeline_upscale); diff --git a/wgpu/examples/cube/main.rs b/wgpu/examples/cube/main.rs index 2be0fba5e9..e1b935a76f 100644 --- a/wgpu/examples/cube/main.rs +++ b/wgpu/examples/cube/main.rs @@ -379,6 +379,7 @@ impl framework::Example for Example { }, })], depth_stencil_attachment: None, + timestamp_writes: &[], }); rpass.push_debug_group("Prepare data for draw."); rpass.set_pipeline(&self.pipeline); diff --git a/wgpu/examples/hello-compute/tests.rs b/wgpu/examples/hello-compute/tests.rs index 52e62d1c81..6a6040ee9d 100644 --- a/wgpu/examples/hello-compute/tests.rs +++ b/wgpu/examples/hello-compute/tests.rs @@ -15,6 +15,7 @@ fn test_compute_1() { TestParameters::default() .downlevel_flags(wgpu::DownlevelFlags::COMPUTE_SHADERS) .limits(wgpu::Limits::downlevel_defaults()) + .features(wgpu::Features::TIMESTAMP_QUERY) .specific_failure(None, None, Some("V3D"), true), |ctx| { let input = &[1, 2, 3, 4]; @@ -36,6 +37,7 @@ fn test_compute_2() { TestParameters::default() .downlevel_flags(wgpu::DownlevelFlags::COMPUTE_SHADERS) .limits(wgpu::Limits::downlevel_defaults()) + .features(wgpu::Features::TIMESTAMP_QUERY) .specific_failure(None, None, Some("V3D"), true), |ctx| { let input = &[5, 23, 10, 9]; @@ -57,6 +59,7 @@ fn test_compute_overflow() { TestParameters::default() .downlevel_flags(wgpu::DownlevelFlags::COMPUTE_SHADERS) .limits(wgpu::Limits::downlevel_defaults()) + .features(wgpu::Features::TIMESTAMP_QUERY) .specific_failure(None, None, Some("V3D"), true), |ctx| { let input = &[77031, 837799, 8400511, 63728127]; @@ -77,6 +80,7 @@ fn test_multithreaded_compute() { TestParameters::default() .downlevel_flags(wgpu::DownlevelFlags::COMPUTE_SHADERS) .limits(wgpu::Limits::downlevel_defaults()) + .features(wgpu::Features::TIMESTAMP_QUERY) .specific_failure(None, None, Some("V3D"), true) // https://github.com/gfx-rs/wgpu/issues/3250 .specific_failure(Some(wgpu::Backends::GL), None, Some("llvmpipe"), true), diff --git a/wgpu/examples/hello-triangle/main.rs b/wgpu/examples/hello-triangle/main.rs index 27029a582b..fa2a017b3c 100644 --- a/wgpu/examples/hello-triangle/main.rs +++ b/wgpu/examples/hello-triangle/main.rs @@ -182,13 +182,13 @@ async fn run(event_loop: EventLoop<()>, window: Window) { fn resolve_timestamps(destination_buffer: &wgpu::Buffer, timestamp_period: f32) { { - let timestamp_view = destination_buffer - .slice(..(std::mem::size_of::() * 2) as wgpu::BufferAddress) - .get_mapped_range(); + let timestamp_view = destination_buffer + .slice(..(std::mem::size_of::() * 2) as wgpu::BufferAddress) + .get_mapped_range(); - let timestamps: &[u64] = bytemuck::cast_slice(×tamp_view); - let elapsed_ns = (timestamps[1] - timestamps[0]) as f64 * timestamp_period as f64; - log::info!("Elapsed time: {:.2} μs", elapsed_ns / 1000.0); + let timestamps: &[u64] = bytemuck::cast_slice(×tamp_view); + let elapsed_ns = (timestamps[1] - timestamps[0]) as f64 * timestamp_period as f64; + log::info!("Elapsed time: {:.2} μs", elapsed_ns / 1000.0); } destination_buffer.unmap(); } diff --git a/wgpu/examples/hello-windows/main.rs b/wgpu/examples/hello-windows/main.rs index c6798d865f..0cf4a0d1b1 100644 --- a/wgpu/examples/hello-windows/main.rs +++ b/wgpu/examples/hello-windows/main.rs @@ -135,6 +135,7 @@ async fn run(event_loop: EventLoop<()>, viewports: Vec<(Window, wgpu::Color)>) { }, })], depth_stencil_attachment: None, + timestamp_writes: &[], }); } diff --git a/wgpu/examples/mipmap/main.rs b/wgpu/examples/mipmap/main.rs index 6175685dec..e49748ee60 100644 --- a/wgpu/examples/mipmap/main.rs +++ b/wgpu/examples/mipmap/main.rs @@ -163,6 +163,7 @@ impl Example { }, })], depth_stencil_attachment: None, + timestamp_writes: &[], }); if let Some(ref query_sets) = query_sets { rpass.write_timestamp(&query_sets.timestamp, timestamp_query_index_base); @@ -474,6 +475,7 @@ impl framework::Example for Example { }, })], depth_stencil_attachment: None, + timestamp_writes: &[], }); rpass.set_pipeline(&self.draw_pipeline); rpass.set_bind_group(0, &self.bind_group, &[]); diff --git a/wgpu/examples/msaa-line/main.rs b/wgpu/examples/msaa-line/main.rs index 1de7044bc3..e1455ddb6f 100644 --- a/wgpu/examples/msaa-line/main.rs +++ b/wgpu/examples/msaa-line/main.rs @@ -303,6 +303,7 @@ impl framework::Example for Example { label: None, color_attachments: &[Some(rpass_color_attachment)], depth_stencil_attachment: None, + timestamp_writes: &[], }) .execute_bundles(iter::once(&self.bundle)); } diff --git a/wgpu/examples/shadow/main.rs b/wgpu/examples/shadow/main.rs index b5acb59530..538cecca7c 100644 --- a/wgpu/examples/shadow/main.rs +++ b/wgpu/examples/shadow/main.rs @@ -780,6 +780,7 @@ impl framework::Example for Example { }), stencil_ops: None, }), + timestamp_writes: &[], }); pass.set_pipeline(&self.shadow_pass.pipeline); pass.set_bind_group(0, &self.shadow_pass.bind_group, &[]); @@ -822,6 +823,7 @@ impl framework::Example for Example { }), stencil_ops: None, }), + timestamp_writes: &[], }); pass.set_pipeline(&self.forward_pass.pipeline); pass.set_bind_group(0, &self.forward_pass.bind_group, &[]); diff --git a/wgpu/examples/skybox/main.rs b/wgpu/examples/skybox/main.rs index d5a5652b7e..c02438dec7 100644 --- a/wgpu/examples/skybox/main.rs +++ b/wgpu/examples/skybox/main.rs @@ -442,6 +442,7 @@ impl framework::Example for Skybox { }), stencil_ops: None, }), + timestamp_writes: &[], }); rpass.set_bind_group(0, &self.bind_group, &[]); diff --git a/wgpu/examples/stencil-triangles/main.rs b/wgpu/examples/stencil-triangles/main.rs index aaed5c08a9..2ec04bfe85 100644 --- a/wgpu/examples/stencil-triangles/main.rs +++ b/wgpu/examples/stencil-triangles/main.rs @@ -214,6 +214,7 @@ impl framework::Example for Triangles { store: true, }), }), + timestamp_writes: &[], }); rpass.set_stencil_reference(1); diff --git a/wgpu/examples/texture-arrays/main.rs b/wgpu/examples/texture-arrays/main.rs index 1ddf681ab4..1edf2ea20e 100644 --- a/wgpu/examples/texture-arrays/main.rs +++ b/wgpu/examples/texture-arrays/main.rs @@ -386,6 +386,7 @@ impl framework::Example for Example { }, })], depth_stencil_attachment: None, + timestamp_writes: &[], }); rpass.set_pipeline(&self.pipeline); diff --git a/wgpu/examples/water/main.rs b/wgpu/examples/water/main.rs index da7ad3aaee..92eee2e575 100644 --- a/wgpu/examples/water/main.rs +++ b/wgpu/examples/water/main.rs @@ -755,6 +755,7 @@ impl framework::Example for Example { }), stencil_ops: None, }), + timestamp_writes: &[], }); rpass.execute_bundles([&self.terrain_bundle]); @@ -780,6 +781,7 @@ impl framework::Example for Example { }), stencil_ops: None, }), + timestamp_writes: &[], }); rpass.set_pipeline(&self.terrain_pipeline); rpass.set_bind_group(0, &self.terrain_normal_bind_group, &[]); @@ -804,6 +806,7 @@ impl framework::Example for Example { depth_ops: None, stencil_ops: None, }), + timestamp_writes: &[], }); rpass.set_pipeline(&self.water_pipeline); diff --git a/wgpu/tests/regression/issue_3457.rs b/wgpu/tests/regression/issue_3457.rs index 2fb010103f..ef90017ea5 100644 --- a/wgpu/tests/regression/issue_3457.rs +++ b/wgpu/tests/regression/issue_3457.rs @@ -144,6 +144,7 @@ fn pass_reset_vertex_buffer() { }, })], depth_stencil_attachment: None, + timestamp_writes: &[], }); double_rpass.set_pipeline(&double_pipeline); @@ -177,6 +178,7 @@ fn pass_reset_vertex_buffer() { }, })], depth_stencil_attachment: None, + timestamp_writes: &[], }); single_rpass.set_pipeline(&single_pipeline); diff --git a/wgpu/tests/shader/mod.rs b/wgpu/tests/shader/mod.rs index 518b7f940b..4e98ea85cd 100644 --- a/wgpu/tests/shader/mod.rs +++ b/wgpu/tests/shader/mod.rs @@ -326,6 +326,7 @@ fn shader_input_output_test( let mut cpass = encoder.begin_compute_pass(&ComputePassDescriptor { label: Some(&format!("cpass {test_name}")), + timestamp_writes: &[], }); cpass.set_pipeline(&pipeline); cpass.set_bind_group(0, &bg, &[]); diff --git a/wgpu/tests/shader_primitive_index/mod.rs b/wgpu/tests/shader_primitive_index/mod.rs index c10a842cc8..ddb43a1f9d 100644 --- a/wgpu/tests/shader_primitive_index/mod.rs +++ b/wgpu/tests/shader_primitive_index/mod.rs @@ -183,6 +183,7 @@ fn pulling_common( })], depth_stencil_attachment: None, label: None, + timestamp_writes: &[], }); rpass.set_pipeline(&pipeline); diff --git a/wgpu/tests/shader_view_format/mod.rs b/wgpu/tests/shader_view_format/mod.rs index 045198f1bc..ef0b4d86b7 100644 --- a/wgpu/tests/shader_view_format/mod.rs +++ b/wgpu/tests/shader_view_format/mod.rs @@ -137,6 +137,7 @@ fn reinterpret( })], depth_stencil_attachment: None, label: None, + timestamp_writes: &[], }); rpass.set_pipeline(&pipeline); rpass.set_bind_group(0, &bind_group, &[]); diff --git a/wgpu/tests/vertex_indices/mod.rs b/wgpu/tests/vertex_indices/mod.rs index 055e10a40b..d50b3401cf 100644 --- a/wgpu/tests/vertex_indices/mod.rs +++ b/wgpu/tests/vertex_indices/mod.rs @@ -115,6 +115,7 @@ fn pulling_common( })], depth_stencil_attachment: None, label: None, + timestamp_writes: &[], }); rpass.set_pipeline(&pipeline); diff --git a/wgpu/tests/zero_init_texture_after_discard.rs b/wgpu/tests/zero_init_texture_after_discard.rs index eda28ff954..544be508d6 100644 --- a/wgpu/tests/zero_init_texture_after_discard.rs +++ b/wgpu/tests/zero_init_texture_after_discard.rs @@ -160,6 +160,7 @@ impl<'ctx> TestCase<'ctx> { store: true, }), }), + timestamp_writes: &[], }); ctx.queue.submit([encoder.finish()]); } else { @@ -243,6 +244,7 @@ impl<'ctx> TestCase<'ctx> { }), }, ), + timestamp_writes: &[], }); } @@ -266,6 +268,7 @@ impl<'ctx> TestCase<'ctx> { }), }, ), + timestamp_writes: &[], }); } @@ -289,6 +292,7 @@ impl<'ctx> TestCase<'ctx> { }), }, ), + timestamp_writes: &[], }); } From 1ce3e4ee45415594cda17b428a74026a64c29d77 Mon Sep 17 00:00:00 2001 From: FL33TW00D Date: Mon, 3 Apr 2023 13:58:08 +0100 Subject: [PATCH 08/45] chore: testing --- Cargo.lock | 8 ++++++++ Cargo.toml | 4 ++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 86d583b26d..dfe37d04d0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3581,3 +3581,11 @@ name = "xml-rs" version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d2d7d3948613f75c98fd9328cfdcc45acc4d360655289d0a7d4ec931392200a3" + +[[patch.unused]] +name = "naga" +version = "0.11.0" + +[[patch.unused]] +name = "naga" +version = "0.11.0" diff --git a/Cargo.toml b/Cargo.toml index 1f34cf3981..e131126fdf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -135,7 +135,7 @@ wgpu-core = { path = "./wgpu-core" } wgpu-types = { path = "./wgpu-types" } [patch."https://github.com/gfx-rs/naga"] -#naga = { path = "../naga" } +naga = { path = "../naga" } [patch."https://github.com/zakarumych/gpu-descriptor"] #gpu-descriptor = { path = "../gpu-descriptor/gpu-descriptor" } @@ -144,7 +144,7 @@ wgpu-types = { path = "./wgpu-types" } #gpu-alloc = { path = "../gpu-alloc/gpu-alloc" } [patch.crates-io] -#naga = { path = "../naga" } +naga = { path = "../naga" } #glow = { path = "../glow" } #d3d12 = { path = "../d3d12-rs" } metal = { path = "../metal-rs" } From c7669c1baab77bda0438485cc0124a100f28aa49 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Sun, 30 Apr 2023 21:34:58 +0200 Subject: [PATCH 09/45] remove unnecessary naga patch again --- Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index e131126fdf..1f34cf3981 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -135,7 +135,7 @@ wgpu-core = { path = "./wgpu-core" } wgpu-types = { path = "./wgpu-types" } [patch."https://github.com/gfx-rs/naga"] -naga = { path = "../naga" } +#naga = { path = "../naga" } [patch."https://github.com/zakarumych/gpu-descriptor"] #gpu-descriptor = { path = "../gpu-descriptor/gpu-descriptor" } @@ -144,7 +144,7 @@ naga = { path = "../naga" } #gpu-alloc = { path = "../gpu-alloc/gpu-alloc" } [patch.crates-io] -naga = { path = "../naga" } +#naga = { path = "../naga" } #glow = { path = "../glow" } #d3d12 = { path = "../d3d12-rs" } metal = { path = "../metal-rs" } From dfd8a3b2eb689a0e040a52509d291e1cb965f5e3 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Mon, 1 May 2023 12:10:32 +0200 Subject: [PATCH 10/45] incorporate webgpu spec changes --- deno_webgpu/command_encoder.rs | 2 + wgpu-core/src/command/clear.rs | 2 +- wgpu-core/src/command/compute.rs | 74 +++++++++++----------------- wgpu-core/src/command/render.rs | 49 +++++++++--------- wgpu-hal/src/lib.rs | 56 ++++++++++++--------- wgpu-hal/src/metal/command.rs | 70 +++++++++++++------------- wgpu-hal/src/metal/device.rs | 2 +- wgpu-hal/src/vulkan/command.rs | 2 +- wgpu/Cargo.toml | 1 - wgpu/examples/hello-compute/main.rs | 41 ++++++++------- wgpu/examples/hello-triangle/main.rs | 40 ++++++++------- wgpu/src/backend/direct.rs | 54 +++++++------------- wgpu/src/lib.rs | 70 +++++++++----------------- 13 files changed, 211 insertions(+), 252 deletions(-) diff --git a/deno_webgpu/command_encoder.rs b/deno_webgpu/command_encoder.rs index 6c169677a4..8a30586b20 100644 --- a/deno_webgpu/command_encoder.rs +++ b/deno_webgpu/command_encoder.rs @@ -175,6 +175,7 @@ pub fn op_webgpu_command_encoder_begin_render_pass( label: label.map(Cow::from), color_attachments: Cow::from(color_attachments), depth_stencil_attachment: processed_depth_stencil_attachment.as_ref(), + timestamp_writes: None, }; let render_pass = wgpu_core::command::RenderPass::new(command_encoder_resource.1, &descriptor); @@ -200,6 +201,7 @@ pub fn op_webgpu_command_encoder_begin_compute_pass( let descriptor = wgpu_core::command::ComputePassDescriptor { label: label.map(Cow::from), + timestamp_writes: None, }; let compute_pass = diff --git a/wgpu-core/src/command/clear.rs b/wgpu-core/src/command/clear.rs index 1d9a4976d5..870f268671 100644 --- a/wgpu-core/src/command/clear.rs +++ b/wgpu-core/src/command/clear.rs @@ -448,7 +448,7 @@ fn clear_texture_via_render_passes( color_attachments, depth_stencil_attachment, multiview: None, - timestamp_writes: &[], + timestamp_writes: None, }); encoder.end_render_pass(); } diff --git a/wgpu-core/src/command/compute.rs b/wgpu-core/src/command/compute.rs index f3f42036bd..ccd74c8366 100644 --- a/wgpu-core/src/command/compute.rs +++ b/wgpu-core/src/command/compute.rs @@ -95,7 +95,7 @@ pub enum ComputeCommand { pub struct ComputePass { base: BasePass, parent_id: id::CommandEncoderId, - timestamp_writes: Vec, + timestamp_writes: Option, // Resource binding dedupe state. #[cfg_attr(feature = "serial-pass", serde(skip))] @@ -109,7 +109,7 @@ impl ComputePass { Self { base: BasePass::new(&desc.label), parent_id, - timestamp_writes: desc.timestamp_writes.iter().cloned().collect(), + timestamp_writes: desc.timestamp_writes.cloned(), current_bind_groups: BindGroupStateChange::new(), current_pipeline: StateChange::new(), @@ -138,36 +138,25 @@ impl fmt::Debug for ComputePass { } } -/// Location to write a timestamp to (beginning or end of the pass). -#[repr(C)] -#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)] -#[cfg_attr(any(feature = "serial-pass", feature = "trace"), derive(Serialize))] -#[cfg_attr(any(feature = "serial-pass", feature = "replay"), derive(Deserialize))] -#[cfg_attr(feature = "serde", serde(rename_all = "kebab-case"))] -pub enum ComputePassTimestampLocation { - Beginning = 0, - End = 1, -} - -/// Describes the writing of a single timestamp value. +/// Describes the writing of timestamp values in a compute pass. #[repr(C)] #[derive(Clone, Debug, PartialEq, Eq)] #[cfg_attr(any(feature = "serial-pass", feature = "trace"), derive(Serialize))] #[cfg_attr(any(feature = "serial-pass", feature = "replay"), derive(Deserialize))] -pub struct ComputePassTimestampWrite { - /// The query set to write the timestamp to. +pub struct ComputePassTimestampWrites { + /// The query set to write the timestamps to. pub query_set: id::QuerySetId, - /// The index of the query within the query set to write the timestamp to. - pub query_index: u32, - /// The location of the timestamp - pub location: ComputePassTimestampLocation, + /// The index of the query at which the start timestamp of the pass is written if any. + pub beginning_of_pass_write_index: Option, + /// The index of the query at which the end timestamp of the pass is written if any. + pub end_of_pass_write_index: Option, } #[derive(Clone, Debug, Default)] pub struct ComputePassDescriptor<'a> { pub label: Label<'a>, /// Defines where and when timestamp values will be written for this pass. - pub timestamp_writes: Cow<'a, [ComputePassTimestampWrite]>, + pub timestamp_writes: Option<&'a ComputePassTimestampWrites>, } #[derive(Clone, Debug, Error, Eq, PartialEq)] @@ -358,7 +347,7 @@ impl Global { self.command_encoder_run_compute_pass_impl::( encoder_id, pass.base.as_ref(), - &pass.timestamp_writes, + pass.timestamp_writes.as_ref(), ) } @@ -367,7 +356,7 @@ impl Global { &self, encoder_id: id::CommandEncoderId, base: BasePassRef, - timestamp_writes: &[ComputePassTimestampWrite], + timestamp_writes: Option<&ComputePassTimestampWrites>, ) -> Result<(), ComputePassError> { profiling::scope!("CommandEncoder::run_compute_pass"); let init_scope = PassErrorScope::Pass(encoder_id); @@ -415,29 +404,22 @@ impl Global { let mut string_offset = 0; let mut active_query = None; - let hal_timestamp_writes = timestamp_writes - .iter() - .map(|tw| { - let query_set: &resource::QuerySet = cmd_buf - .trackers - .query_sets - .add_single(&*query_set_guard, tw.query_set) - .ok_or(ComputePassErrorInner::InvalidQuerySet(tw.query_set)) - .map_pass_err(init_scope) - .unwrap(); - - hal::ComputePassTimestampWrite { - query_set: &query_set.raw, - query_index: tw.query_index, - location: match tw.location { - ComputePassTimestampLocation::Beginning => { - hal::ComputePassTimestampLocation::BEGINNING - } - ComputePassTimestampLocation::End => hal::ComputePassTimestampLocation::END, - }, - } + let timestamp_writes = if let Some(tw) = timestamp_writes { + let query_set: &resource::QuerySet = cmd_buf + .trackers + .query_sets + .add_single(&*query_set_guard, tw.query_set) + .ok_or(ComputePassErrorInner::InvalidQuerySet(tw.query_set)) + .map_pass_err(init_scope)?; + + Some(hal::ComputePassTimestampWrites { + query_set: &query_set.raw, + beginning_of_pass_write_index: tw.beginning_of_pass_write_index, + end_of_pass_write_index: tw.end_of_pass_write_index, }) - .collect::>(); + } else { + None + }; cmd_buf.trackers.set_size( Some(&*buffer_guard), @@ -453,7 +435,7 @@ impl Global { let hal_desc = hal::ComputePassDescriptor { label: base.label, - timestamp_writes: &hal_timestamp_writes, + timestamp_writes, }; unsafe { diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs index eb4a6e275a..394248b376 100644 --- a/wgpu-core/src/command/render.rs +++ b/wgpu-core/src/command/render.rs @@ -186,18 +186,18 @@ pub enum RenderPassTimestampLocation { End = 1, } -/// Describes the writing of a single timestamp value. +/// Describes the writing of timestamp values in a render pass. #[repr(C)] #[derive(Clone, Debug, PartialEq, Eq)] #[cfg_attr(any(feature = "serial-pass", feature = "trace"), derive(Serialize))] #[cfg_attr(any(feature = "serial-pass", feature = "replay"), derive(Deserialize))] -pub struct RenderPassTimestampWrite { +pub struct RenderPassTimestampWrites { /// The query set to write the timestamp to. pub query_set: id::QuerySetId, - /// The index of the query within the query set to write the timestamp to. - pub query_index: u32, - /// The location of the timestamp - pub location: RenderPassTimestampLocation, + /// The index of the query at which the start timestamp of the pass is written if any. + pub beginning_of_pass_write_index: Option, + /// The index of the query at which the end timestamp of the pass is written if any. + pub end_of_pass_write_index: Option, } /// Describes the attachments of a render pass. @@ -209,7 +209,7 @@ pub struct RenderPassDescriptor<'a> { /// The depth and stencil attachment of the render pass, if any. pub depth_stencil_attachment: Option<&'a RenderPassDepthStencilAttachment>, /// Defines where and when timestamp values will be written for this pass. - pub timestamp_writes: Cow<'a, [RenderPassTimestampWrite]>, + pub timestamp_writes: Option<&'a RenderPassTimestampWrites>, } #[cfg_attr(feature = "serial-pass", derive(Deserialize, Serialize))] @@ -218,7 +218,7 @@ pub struct RenderPass { parent_id: id::CommandEncoderId, color_targets: ArrayVec, { hal::MAX_COLOR_ATTACHMENTS }>, depth_stencil_target: Option, - timestamp_writes: Vec, + timestamp_writes: Option, // Resource binding dedupe state. #[cfg_attr(feature = "serial-pass", serde(skip))] @@ -234,7 +234,7 @@ impl RenderPass { parent_id, color_targets: desc.color_attachments.iter().cloned().collect(), depth_stencil_target: desc.depth_stencil_attachment.cloned(), - timestamp_writes: desc.timestamp_writes.iter().cloned().collect(), + timestamp_writes: desc.timestamp_writes.cloned(), current_bind_groups: BindGroupStateChange::new(), current_pipeline: StateChange::new(), @@ -745,7 +745,7 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> { label: Option<&str>, color_attachments: &[Option], depth_stencil_attachment: Option<&RenderPassDepthStencilAttachment>, - timestamp_writes: &[RenderPassTimestampWrite], + timestamp_writes: Option<&RenderPassTimestampWrites>, cmd_buf: &mut CommandBuffer, view_guard: &'a Storage, id::TextureViewId>, buffer_guard: &'a Storage, id::BufferId>, @@ -1112,26 +1112,21 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> { multiview, }; - let mut hal_timestamp_writes = Vec::with_capacity(timestamp_writes.len()); - for (index, tw) in timestamp_writes.iter().enumerate() { + let timestamp_writes = if let Some(tw) = timestamp_writes { let query_set: &resource::QuerySet = cmd_buf .trackers .query_sets .add_single(&*query_set_guard, tw.query_set) .ok_or(RenderPassErrorInner::InvalidQuerySet(tw.query_set))?; - let hal_tw = hal::RenderPassTimestampWrite { + Some(hal::RenderPassTimestampWrites { query_set: &query_set.raw, - query_index: tw.query_index, - location: match tw.location { - RenderPassTimestampLocation::Beginning => { - hal::RenderPassTimestampLocation::BEGINNING - } - RenderPassTimestampLocation::End => hal::RenderPassTimestampLocation::END, - }, - }; - hal_timestamp_writes.push(hal_tw); - } + beginning_of_pass_write_index: tw.beginning_of_pass_write_index, + end_of_pass_write_index: tw.end_of_pass_write_index, + }) + } else { + None + }; let hal_desc = hal::RenderPassDescriptor { label, @@ -1140,7 +1135,7 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> { color_attachments: &colors, depth_stencil_attachment: depth_stencil, multiview, - timestamp_writes: &hal_timestamp_writes, + timestamp_writes, }; unsafe { cmd_buf.encoder.raw.begin_render_pass(&hal_desc); @@ -1228,7 +1223,7 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> { clear_value: (0.0, 0), }), multiview: self.multiview, - timestamp_writes: &[], + timestamp_writes: None, }; unsafe { raw.begin_render_pass(&desc); @@ -1253,7 +1248,7 @@ impl Global { pass.base.as_ref(), &pass.color_targets, pass.depth_stencil_target.as_ref(), - &pass.timestamp_writes, + pass.timestamp_writes.as_ref(), ) } @@ -1264,7 +1259,7 @@ impl Global { base: BasePassRef, color_attachments: &[Option], depth_stencil_attachment: Option<&RenderPassDepthStencilAttachment>, - timestamp_writes: &[RenderPassTimestampWrite], + timestamp_writes: Option<&RenderPassTimestampWrites>, ) -> Result<(), RenderPassError> { profiling::scope!("CommandEncoder::run_render_pass"); let init_scope = PassErrorScope::Pass(encoder_id); diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index 512f82e84e..bb38a4fd12 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -1261,18 +1261,22 @@ pub struct DepthStencilAttachment<'a, A: Api> { pub clear_value: (f32, u32), } -bitflags!( - pub struct RenderPassTimestampLocation: u8 { - const BEGINNING = 1 << 0; - const END = 1 << 1; - } -); - -#[derive(Clone, Debug)] -pub struct RenderPassTimestampWrite<'a, A: Api> { +#[derive(Debug)] +pub struct RenderPassTimestampWrites<'a, A: Api> { pub query_set: &'a A::QuerySet, - pub query_index: u32, - pub location: RenderPassTimestampLocation, + pub beginning_of_pass_write_index: Option, + pub end_of_pass_write_index: Option, +} + +// Rust gets confused about the impl requirements for `A` +impl Clone for RenderPassTimestampWrites<'_, A> { + fn clone(&self) -> Self { + Self { + query_set: self.query_set, + beginning_of_pass_write_index: self.beginning_of_pass_write_index, + end_of_pass_write_index: self.end_of_pass_write_index, + } + } } #[derive(Clone, Debug)] @@ -1283,27 +1287,31 @@ pub struct RenderPassDescriptor<'a, A: Api> { pub color_attachments: &'a [Option>], pub depth_stencil_attachment: Option>, pub multiview: Option, - pub timestamp_writes: &'a [RenderPassTimestampWrite<'a, A>], + pub timestamp_writes: Option>, } -bitflags!( - pub struct ComputePassTimestampLocation: u8 { - const BEGINNING = 1 << 0; - const END = 1 << 1; - } -); - -#[derive(Clone, Debug)] -pub struct ComputePassTimestampWrite<'a, A: Api> { +#[derive(Debug)] +pub struct ComputePassTimestampWrites<'a, A: Api> { pub query_set: &'a A::QuerySet, - pub query_index: u32, - pub location: ComputePassTimestampLocation, + pub beginning_of_pass_write_index: Option, + pub end_of_pass_write_index: Option, +} + +// Rust gets confused about the impl requirements for `A` +impl Clone for ComputePassTimestampWrites<'_, A> { + fn clone(&self) -> Self { + Self { + query_set: self.query_set, + beginning_of_pass_write_index: self.beginning_of_pass_write_index, + end_of_pass_write_index: self.end_of_pass_write_index, + } + } } #[derive(Clone, Debug)] pub struct ComputePassDescriptor<'a, A: Api> { pub label: Label<'a>, - pub timestamp_writes: &'a [ComputePassTimestampWrite<'a, A>], + pub timestamp_writes: Option>, } /// Stores if any API validation error has occurred in this process diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs index c2e0525c56..d64b10a8c9 100644 --- a/wgpu-hal/src/metal/command.rs +++ b/wgpu-hal/src/metal/command.rs @@ -439,23 +439,24 @@ impl crate::CommandEncoder for super::CommandEncoder { } } - let sba_descriptor = descriptor - .sample_buffer_attachments() - .object_at(0 as _) //TODO: move inside - .unwrap(); - for (_i, at) in desc.timestamp_writes.iter().enumerate() { - //Problem here is that we can't attach the same counter sample buffer - //to the pass descriptor twice. - sba_descriptor - .set_sample_buffer(at.query_set.counter_sample_buffer.as_ref().unwrap()); - match at.location { - crate::RenderPassTimestampLocation::BEGINNING => { - sba_descriptor.set_start_of_vertex_sample_index(at.query_index as _); - } - crate::RenderPassTimestampLocation::END => { - sba_descriptor.set_end_of_fragment_sample_index(at.query_index as _); - } - _ => {} + if let Some(timestamp_writes) = &desc.timestamp_writes { + let sba_descriptor = descriptor + .sample_buffer_attachments() + .object_at(0 as _) + .unwrap(); + sba_descriptor.set_sample_buffer( + timestamp_writes + .query_set + .counter_sample_buffer + .as_ref() + .unwrap(), + ); + + if let Some(start_index) = timestamp_writes.beginning_of_pass_write_index { + sba_descriptor.set_start_of_vertex_sample_index(start_index as _); + } + if let Some(end_index) = timestamp_writes.end_of_pass_write_index { + sba_descriptor.set_end_of_fragment_sample_index(end_index as _); } } @@ -947,23 +948,24 @@ impl crate::CommandEncoder for super::CommandEncoder { objc::rc::autoreleasepool(|| { let descriptor = metal::ComputePassDescriptor::new(); - let sba_descriptor = descriptor - .sample_buffer_attachments() - .object_at(0 as _) //TODO: move inside - .unwrap(); - for (_i, at) in desc.timestamp_writes.iter().enumerate() { - //Problem here is that we can't attach the same counter sample buffer - //to the pass descriptor twice. - sba_descriptor - .set_sample_buffer(at.query_set.counter_sample_buffer.as_ref().unwrap()); - match at.location { - crate::ComputePassTimestampLocation::BEGINNING => { - sba_descriptor.set_start_of_encoder_sample_index(at.query_index as _); - } - crate::ComputePassTimestampLocation::END => { - sba_descriptor.set_end_of_encoder_sample_index(at.query_index as _); - } - _ => {} + if let Some(timestamp_writes) = &desc.timestamp_writes { + let sba_descriptor = descriptor + .sample_buffer_attachments() + .object_at(0 as _) + .unwrap(); + sba_descriptor.set_sample_buffer( + timestamp_writes + .query_set + .counter_sample_buffer + .as_ref() + .unwrap(), + ); + + if let Some(start_index) = timestamp_writes.beginning_of_pass_write_index { + sba_descriptor.set_start_of_encoder_sample_index(start_index as _); + } + if let Some(end_index) = timestamp_writes.end_of_pass_write_index { + sba_descriptor.set_end_of_encoder_sample_index(end_index as _); } } diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs index c3a8c277c3..c69782d2e6 100644 --- a/wgpu-hal/src/metal/device.rs +++ b/wgpu-hal/src/metal/device.rs @@ -8,7 +8,7 @@ use std::{ use super::conv; use crate::auxil::map_naga_stage; -use metal::{foreign_types::ForeignTypeRef, MTLCounterSampleBufferDescriptor}; +use metal::foreign_types::ForeignTypeRef; type DeviceResult = Result; diff --git a/wgpu-hal/src/vulkan/command.rs b/wgpu-hal/src/vulkan/command.rs index f6c871026c..ba4e088fc3 100644 --- a/wgpu-hal/src/vulkan/command.rs +++ b/wgpu-hal/src/vulkan/command.rs @@ -777,7 +777,7 @@ impl crate::CommandEncoder for super::CommandEncoder { // compute - unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor) { + unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor<'_, super::Api>) { self.bind_point = vk::PipelineBindPoint::COMPUTE; if let Some(label) = desc.label { unsafe { self.begin_debug_marker(label) }; diff --git a/wgpu/Cargo.toml b/wgpu/Cargo.toml index f681ce0212..d74dec38b4 100644 --- a/wgpu/Cargo.toml +++ b/wgpu/Cargo.toml @@ -236,7 +236,6 @@ web-sys = { workspace = true, features = [ "GpuComputePassTimestampWrite", "GpuComputePipeline", "GpuComputePipelineDescriptor", - "GpuComputePassTimestampLocation", "GpuCullMode", "GpuDepthStencilState", "GpuDevice", diff --git a/wgpu/examples/hello-compute/main.rs b/wgpu/examples/hello-compute/main.rs index fd999c6766..9dffe8c571 100644 --- a/wgpu/examples/hello-compute/main.rs +++ b/wgpu/examples/hello-compute/main.rs @@ -138,18 +138,6 @@ async fn execute_gpu_inner( }], }); - let beginning = wgpu::ComputePassTimestampWrite { - query_set: &query_set, - query_index: 0, - location: wgpu::ComputePassTimestampLocation::Beginning, - }; - - let end = wgpu::ComputePassTimestampWrite { - query_set: &query_set, - query_index: 1, - location: wgpu::ComputePassTimestampLocation::End, - }; - // A command encoder executes one or many pipelines. // It is to WebGPU what a command buffer is to Vulkan. let mut encoder = @@ -157,7 +145,11 @@ async fn execute_gpu_inner( { let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { label: None, - timestamp_writes: &vec![beginning, end], + timestamp_writes: Some(wgpu::ComputePassTimestampWrites { + query_set: &query_set, + beginning_of_pass_write_index: Some(0), + end_of_pass_write_index: Some(1), + }), }); cpass.set_pipeline(&compute_pipeline); cpass.set_bind_group(0, &bind_group, &[]); @@ -168,13 +160,26 @@ async fn execute_gpu_inner( // Will copy data from storage buffer on GPU to staging buffer on CPU. encoder.copy_buffer_to_buffer(&storage_buffer, 0, &staging_buffer, 0, size); - let destination_buffer = device.create_buffer(&wgpu::BufferDescriptor { - label: Some("destination buffer"), + let query_resolve_buffer = device.create_buffer(&wgpu::BufferDescriptor { + label: Some("query resolve buffer"), + size: (std::mem::size_of::() * NUM_SAMPLES as usize) as wgpu::BufferAddress, + usage: wgpu::BufferUsages::COPY_SRC | wgpu::BufferUsages::QUERY_RESOLVE, + mapped_at_creation: false, + }); + let query_destination_buffer = device.create_buffer(&wgpu::BufferDescriptor { + label: Some("query dest buffer"), size: (std::mem::size_of::() * NUM_SAMPLES as usize) as wgpu::BufferAddress, usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ, mapped_at_creation: false, }); - encoder.resolve_query_set(&query_set, 0..NUM_SAMPLES as u32, &destination_buffer, 0); + encoder.resolve_query_set(&query_set, 0..NUM_SAMPLES as u32, &query_resolve_buffer, 0); + encoder.copy_buffer_to_buffer( + &query_resolve_buffer, + 0, + &query_destination_buffer, + 0, + query_resolve_buffer.size(), + ); // Submits command encoder for processing queue.submit(Some(encoder.finish())); @@ -212,11 +217,11 @@ async fn execute_gpu_inner( None }; - destination_buffer + query_destination_buffer .slice(..) .map_async(wgpu::MapMode::Read, |_| ()); device.poll(wgpu::Maintain::Wait); - resolve_timestamps(&destination_buffer, timestamp_period); + resolve_timestamps(&query_destination_buffer, timestamp_period); res } diff --git a/wgpu/examples/hello-triangle/main.rs b/wgpu/examples/hello-triangle/main.rs index fa2a017b3c..e20e8ceb18 100644 --- a/wgpu/examples/hello-triangle/main.rs +++ b/wgpu/examples/hello-triangle/main.rs @@ -90,8 +90,14 @@ async fn run(event_loop: EventLoop<()>, window: Window) { surface.configure(&device, &config); - let destination_buffer = device.create_buffer(&wgpu::BufferDescriptor { - label: Some("destination buffer"), + let query_resolve_buffer = device.create_buffer(&wgpu::BufferDescriptor { + label: Some("query resolve buffer"), + size: (std::mem::size_of::() * NUM_SAMPLES as usize) as wgpu::BufferAddress, + usage: wgpu::BufferUsages::COPY_SRC | wgpu::BufferUsages::QUERY_RESOLVE, + mapped_at_creation: false, + }); + let query_destination_buffer = device.create_buffer(&wgpu::BufferDescriptor { + label: Some("query dest buffer"), size: (std::mem::size_of::() * NUM_SAMPLES as usize) as wgpu::BufferAddress, usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ, mapped_at_creation: false, @@ -139,18 +145,11 @@ async fn run(event_loop: EventLoop<()>, window: Window) { }, })], depth_stencil_attachment: None, - timestamp_writes: &[ - wgpu::RenderPassTimestampWrite { - query_set: &query_set, - query_index: 0, - location: wgpu::RenderPassTimestampLocation::Beginning, - }, - wgpu::RenderPassTimestampWrite { - query_set: &query_set, - query_index: 1, - location: wgpu::RenderPassTimestampLocation::End, - }, - ], + timestamp_writes: Some(wgpu::RenderPassTimestampWrites { + query_set: &query_set, + beginning_of_pass_write_index: Some(0), + end_of_pass_write_index: Some(1), + }), }); rpass.set_pipeline(&render_pipeline); rpass.draw(0..3, 0..1); @@ -159,17 +158,24 @@ async fn run(event_loop: EventLoop<()>, window: Window) { encoder.resolve_query_set( &query_set, 0..NUM_SAMPLES as u32, - &destination_buffer, + &query_resolve_buffer, + 0, + ); + encoder.copy_buffer_to_buffer( + &query_resolve_buffer, + 0, + &query_destination_buffer, 0, + query_resolve_buffer.size(), ); queue.submit(Some(encoder.finish())); frame.present(); - destination_buffer + query_destination_buffer .slice(..) .map_async(wgpu::MapMode::Read, |_| ()); device.poll(wgpu::Maintain::Wait); - resolve_timestamps(&destination_buffer, timestamp_period); + resolve_timestamps(&query_destination_buffer, timestamp_period); } Event::WindowEvent { event: WindowEvent::CloseRequested, diff --git a/wgpu/src/backend/direct.rs b/wgpu/src/backend/direct.rs index 2731efea76..c9245be7a7 100644 --- a/wgpu/src/backend/direct.rs +++ b/wgpu/src/backend/direct.rs @@ -1812,30 +1812,21 @@ impl crate::Context for Context { _encoder_data: &Self::CommandEncoderData, desc: &ComputePassDescriptor, ) -> (Self::ComputePassId, Self::ComputePassData) { - let timestamp_writes = desc - .timestamp_writes - .as_ref() - .iter() - .map(|t| wgc::command::ComputePassTimestampWrite { - query_set: t.query_set.id.into(), - query_index: t.query_index, - location: match t.location { - crate::ComputePassTimestampLocation::Beginning => { - wgc::command::ComputePassTimestampLocation::Beginning - } - crate::ComputePassTimestampLocation::End => { - wgc::command::ComputePassTimestampLocation::End - } - }, - }) - .collect::>(); + let timestamp_writes = + desc.timestamp_writes + .as_ref() + .map(|tw| wgc::command::ComputePassTimestampWrites { + query_set: tw.query_set.id.into(), + beginning_of_pass_write_index: tw.beginning_of_pass_write_index, + end_of_pass_write_index: tw.end_of_pass_write_index, + }); ( Unused, wgc::command::ComputePass::new( *encoder, &wgc::command::ComputePassDescriptor { label: desc.label.map(Borrowed), - timestamp_writes: Borrowed(×tamp_writes), + timestamp_writes: timestamp_writes.as_ref(), }, ), ) @@ -1899,23 +1890,14 @@ impl crate::Context for Context { } }); - let timestamp_writes = desc - .timestamp_writes - .as_ref() - .iter() - .map(|t| wgc::command::RenderPassTimestampWrite { - query_set: t.query_set.id.into(), - query_index: t.query_index, - location: match t.location { - crate::RenderPassTimestampLocation::Beginning => { - wgc::command::RenderPassTimestampLocation::Beginning - } - crate::RenderPassTimestampLocation::End => { - wgc::command::RenderPassTimestampLocation::End - } - }, - }) - .collect::>(); + let timestamp_writes = + desc.timestamp_writes + .as_ref() + .map(|tw| wgc::command::RenderPassTimestampWrites { + query_set: tw.query_set.id.into(), + beginning_of_pass_write_index: tw.beginning_of_pass_write_index, + end_of_pass_write_index: tw.end_of_pass_write_index, + }); ( Unused, @@ -1925,7 +1907,7 @@ impl crate::Context for Context { label: desc.label.map(Borrowed), color_attachments: Borrowed(&colors), depth_stencil_attachment: depth_stencil.as_ref(), - timestamp_writes: Borrowed(×tamp_writes), + timestamp_writes: timestamp_writes.as_ref(), }, ), ) diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs index fc0970b22b..a8b9eaeefc 100644 --- a/wgpu/src/lib.rs +++ b/wgpu/src/lib.rs @@ -828,36 +828,23 @@ impl Default for Operations { } } -/// Describes the location of a timestamp in a render pass. -/// -/// For use with [`RenderPassTimestampWrite`]. -/// -/// Corresponds to [WebGPU `GPURenderPassTimestampLocation`]( -/// https://gpuweb.github.io/gpuweb/#enumdef-gpurenderpasstimestamplocation). -#[derive(Clone, Debug)] -pub enum RenderPassTimestampLocation { - /// The timestamp is at the start of the render pass. - Beginning, - /// The timestamp is at the end of the render pass. - End, -} - /// Describes the timestamp writes of a render pass. /// /// For use with [`RenderPassDescriptor`]. +/// At least one of `beginning_of_pass_write_index` and `end_of_pass_write_index` must be `Some`. /// /// Corresponds to [WebGPU `GPURenderPassTimestampWrite`]( -/// https://gpuweb.github.io/gpuweb/#dictdef-gpurenderpasstimestampwrite). +/// https://gpuweb.github.io/gpuweb/#dictdef-gpurenderpasstimestampwrites). #[derive(Clone, Debug)] -pub struct RenderPassTimestampWrite<'a> { +pub struct RenderPassTimestampWrites<'a> { /// The query set to write to. pub query_set: &'a QuerySet, - /// The index of the query to write to. - pub query_index: u32, - /// The location of the timestamp. - pub location: RenderPassTimestampLocation, + /// The index of the query at which the start timestamp of the pass is written if any. + pub beginning_of_pass_write_index: Option, + /// The index of the query at which the end timestamp of the pass is written if any. + pub end_of_pass_write_index: Option, } -static_assertions::assert_impl_all!(RenderPassTimestampWrite: Send, Sync); +static_assertions::assert_impl_all!(RenderPassTimestampWrites: Send, Sync); /// Describes a color attachment to a [`RenderPass`]. /// @@ -1117,8 +1104,10 @@ pub struct RenderPassDescriptor<'tex, 'desc> { pub color_attachments: &'desc [Option>], /// The depth and stencil attachment of the render pass, if any. pub depth_stencil_attachment: Option>, - /// A sequence of RenderPassTimestampWrite values define where and when timestamp values will be written for this pass. - pub timestamp_writes: &'desc [RenderPassTimestampWrite<'desc>], + /// Defines which timestamp values will be written for this pass, and where to write them to. + /// + /// Requires `Features::TIMESTAMP_QUERY` to be enabled. + pub timestamp_writes: Option>, } static_assertions::assert_impl_all!(RenderPassDescriptor: Send, Sync); @@ -1203,36 +1192,23 @@ pub struct RenderPipelineDescriptor<'a> { } static_assertions::assert_impl_all!(RenderPipelineDescriptor: Send, Sync); -/// Describes the location of a timestamp in a compute pass. -/// -/// For use with [`ComputePassTimestampWrite`]. -/// -/// Corresponds to [WebGPU `GPUComputePassTimestampLocation`]( -/// https://gpuweb.github.io/gpuweb/#enumdef-gpucomputepasstimestamplocation). -#[derive(Clone, Debug)] -pub enum ComputePassTimestampLocation { - /// The timestamp is at the start of the compute pass. - Beginning, - /// The timestamp is at the end of the compute pass. - End, -} - /// Describes the timestamp writes of a compute pass. /// /// For use with [`ComputePassDescriptor`]. +/// At least one of `beginning_of_pass_write_index` and `end_of_pass_write_index` must be `Some`. /// /// Corresponds to [WebGPU `GPUComputePassTimestampWrite`]( -/// https://gpuweb.github.io/gpuweb/#dictdef-gpucomputepasstimestampwrite). +/// https://gpuweb.github.io/gpuweb/#dictdef-gpucomputepasstimestampwrites). #[derive(Clone, Debug)] -pub struct ComputePassTimestampWrite<'a> { +pub struct ComputePassTimestampWrites<'a> { /// The query set to write to. pub query_set: &'a QuerySet, - /// The index of the query to write to. - pub query_index: u32, - /// The location of the timestamp. - pub location: ComputePassTimestampLocation, + /// The index of the query at which the start timestamp of the pass is written if any. + pub beginning_of_pass_write_index: Option, + /// The index of the query at which the end timestamp of the pass is written if any. + pub end_of_pass_write_index: Option, } -static_assertions::assert_impl_all!(ComputePassTimestampWrite: Send, Sync); +static_assertions::assert_impl_all!(ComputePassTimestampWrites: Send, Sync); /// Describes the attachments of a compute pass. /// @@ -1244,8 +1220,10 @@ static_assertions::assert_impl_all!(ComputePassTimestampWrite: Send, Sync); pub struct ComputePassDescriptor<'a> { /// Debug label of the compute pass. This will show up in graphics debuggers for easy identification. pub label: Label<'a>, - /// A sequence of ComputePassTimestampWrite values define where and when timestamp values will be written for this pass. - pub timestamp_writes: &'a [ComputePassTimestampWrite<'a>], + /// Defines which timestamp values will be written for this pass, and where to write them to. + /// + /// Requires `Features::TIMESTAMP_QUERY` to be enabled. + pub timestamp_writes: Option>, } static_assertions::assert_impl_all!(ComputePassDescriptor: Send, Sync); From 6eba839d1ef1591becc24d6364305b375e16c37e Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Mon, 1 May 2023 18:34:08 +0200 Subject: [PATCH 11/45] "fix" timestamp conversion on metal, comments to clarify the situation on timestamp periods generally --- wgpu-hal/src/metal/adapter.rs | 37 ++++++++++++++++++++++++++++++++--- wgpu-hal/src/metal/mod.rs | 21 +++++--------------- wgpu/src/backend/web.rs | 3 ++- wgpu/src/lib.rs | 4 ++++ 4 files changed, 45 insertions(+), 20 deletions(-) diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs index 8144d97952..0b07b54ee6 100644 --- a/wgpu-hal/src/metal/adapter.rs +++ b/wgpu-hal/src/metal/adapter.rs @@ -27,6 +27,33 @@ impl crate::Adapter for super::Adapter { .device .lock() .new_command_queue_with_max_command_buffer_count(MAX_COMMAND_BUFFERS); + + // Acquiring the meaning of timestamp ticks is hard with Metal! + // The only thing there is is a method correlating cpu & gpu timestamps (`device.sample_timestamps`). + // Users are supposed to call this method twice and calculate the difference, + // see "Converting GPU Timestamps into CPU Time": + // https://developer.apple.com/documentation/metal/gpu_counters_and_counter_sample_buffers/converting_gpu_timestamps_into_cpu_time + // Not only does this mean we get an approximate value, this is as also *very slow*! + // Chromium opted to solve this using a linear regression that they stop at some point + // https://source.chromium.org/chromium/chromium/src/+/refs/heads/main:third_party/dawn/src/dawn/native/metal/DeviceMTL.mm;drc=76be2f9f117654f3fe4faa477b0445114fccedda;bpv=0;bpt=1;l=46 + // Generally, the assumption is that timestamp values aren't changing over time, after all all other APIs provide stable values. + // + // We should do as Chromium does for the general case, but this requires quite some state tracking + // and doesn't even provide perfectly accurate values, especially at the start of the application when + // we didn't have the chance to sample a lot of values just yet. + // + // So instead, we're doing the dangerous but easy thing and use our "knowledge" of timestamps + // conversions on different devices, after all Metal isn't supported on that many ;) + // Based on: + // * https://github.com/gfx-rs/wgpu/pull/2528 + // * https://github.com/gpuweb/gpuweb/issues/1325#issuecomment-761041326 + let timestamp_period = if self.shared.device.lock().name().starts_with("Intel") { + 83.333 + } else { + // Known for Apple Silicon (at least M1 & M2, iPad Pro 2018) and AMD GPUs. + 1.0 + }; + Ok(crate::OpenDevice { device: super::Device { shared: Arc::clone(&self.shared), @@ -34,6 +61,7 @@ impl crate::Adapter for super::Adapter { }, queue: super::Queue { raw: Arc::new(Mutex::new(queue)), + timestamp_period, }, }) } @@ -766,10 +794,13 @@ impl super::PrivateCapabilities { | F::TEXTURE_FORMAT_16BIT_NORM | F::SHADER_F16 | F::DEPTH32FLOAT_STENCIL8 - | F::MULTI_DRAW_INDIRECT - | F::TIMESTAMP_QUERY; + | F::MULTI_DRAW_INDIRECT; - //TODO: if not on apple silicon, we can do timestamps within pass. + if self.supports_timestamp_period { + features.insert(F::TIMESTAMP_QUERY); + // TODO: if not on apple silicon, we can do timestamps within pass. + //features.insert(F::TIMESTAMP_QUERY_INSIDE_PASSES); + } features.set(F::TEXTURE_COMPRESSION_ASTC, self.format_astc); features.set(F::TEXTURE_COMPRESSION_ASTC_HDR, self.format_astc_hdr); diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs index d6316b9556..9ff63e98d3 100644 --- a/wgpu-hal/src/metal/mod.rs +++ b/wgpu-hal/src/metal/mod.rs @@ -282,18 +282,21 @@ pub struct Adapter { pub struct Queue { raw: Arc>, + timestamp_period: f32, } unsafe impl Send for Queue {} unsafe impl Sync for Queue {} impl Queue { - pub unsafe fn queue_from_raw(raw: metal::CommandQueue) -> Self { + pub unsafe fn queue_from_raw(raw: metal::CommandQueue, timestamp_period: f32) -> Self { Self { raw: Arc::new(Mutex::new(raw)), + timestamp_period, } } } + pub struct Device { shared: Arc, features: wgt::Features, @@ -403,21 +406,7 @@ impl crate::Queue for Queue { } unsafe fn get_timestamp_period(&self) -> f32 { - let queue = self.raw.lock(); - let (mut cpu_timestamp0, mut gpu_timestamp0) = (0_u64, 0_u64); - let device = queue.device().to_owned(); - device.sample_timestamps(&mut cpu_timestamp0, &mut gpu_timestamp0); - if cpu_timestamp0 == 0 || gpu_timestamp0 == 0 { - return 1.0; - } - - let command_buffer = queue.new_command_buffer(); - command_buffer.commit(); - command_buffer.wait_until_scheduled(); - let (mut cpu_timestamp1, mut gpu_timestamp1) = (0_u64, 0_u64); - device.sample_timestamps(&mut cpu_timestamp1, &mut gpu_timestamp1); - - (cpu_timestamp1 - cpu_timestamp0) as f32 / (gpu_timestamp1 - gpu_timestamp0) as f32 + self.timestamp_period } } diff --git a/wgpu/src/backend/web.rs b/wgpu/src/backend/web.rs index 299626eef1..601420e65b 100644 --- a/wgpu/src/backend/web.rs +++ b/wgpu/src/backend/web.rs @@ -2515,7 +2515,8 @@ impl crate::context::Context for Context { _queue: &Self::QueueId, _queue_data: &Self::QueueData, ) -> f32 { - 1.0 //TODO + // Timestamp values are always in nanoseconds, see https://gpuweb.github.io/gpuweb/#timestamp + 1.0 } fn queue_on_submitted_work_done( diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs index a8b9eaeefc..2b552df564 100644 --- a/wgpu/src/lib.rs +++ b/wgpu/src/lib.rs @@ -4095,6 +4095,10 @@ impl Queue { /// Gets the amount of nanoseconds each tick of a timestamp query represents. /// /// Returns zero if timestamp queries are unsupported. + /// + /// TODO: https://github.com/gfx-rs/wgpu/issues/3741 + /// Timestamp values are supposed to represent nanosecond values, see https://gpuweb.github.io/gpuweb/#timestamp + /// Therefore, this is always 1.0 on the web, but on wgpu-core a manual conversion is required currently. pub fn get_timestamp_period(&self) -> f32 { DynContext::queue_get_timestamp_period(&*self.context, &self.id, self.data.as_ref()) } From 77bbe868d8f6806629eacff6ad0646f4645a650a Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Mon, 1 May 2023 19:29:51 +0200 Subject: [PATCH 12/45] fix metal timequery feature detection --- wgpu-hal/src/metal/adapter.rs | 20 +++++++++++++------- wgpu-hal/src/metal/mod.rs | 3 ++- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs index 0b07b54ee6..e9427e17f2 100644 --- a/wgpu-hal/src/metal/adapter.rs +++ b/wgpu-hal/src/metal/adapter.rs @@ -768,7 +768,13 @@ impl super::PrivateCapabilities { } else { None }, - supports_timestamp_period: version.at_least((10, 15), (14, 0), os_is_mac), + support_timestamp_query: version.at_least((11, 0), (14, 0), os_is_mac) + && device + .supports_counter_sampling(metal::MTLCounterSamplingPoint::AtStageBoundary), + support_timestamp_query_in_passes: version.at_least((11, 0), (14, 0), os_is_mac) + && device.supports_counter_sampling(metal::MTLCounterSamplingPoint::AtDrawBoundary) + && device + .supports_counter_sampling(metal::MTLCounterSamplingPoint::AtDispatchBoundary), } } @@ -796,12 +802,12 @@ impl super::PrivateCapabilities { | F::DEPTH32FLOAT_STENCIL8 | F::MULTI_DRAW_INDIRECT; - if self.supports_timestamp_period { - features.insert(F::TIMESTAMP_QUERY); - // TODO: if not on apple silicon, we can do timestamps within pass. - //features.insert(F::TIMESTAMP_QUERY_INSIDE_PASSES); - } - + features.set(F::TIMESTAMP_QUERY, self.support_timestamp_query); + // TODO: Not yet implemented. + // features.set( + // F::TIMESTAMP_QUERY_INSIDE_PASSES, + // self.support_timestamp_query_in_passes, + // ); features.set(F::TEXTURE_COMPRESSION_ASTC, self.format_astc); features.set(F::TEXTURE_COMPRESSION_ASTC_HDR, self.format_astc_hdr); features.set(F::TEXTURE_COMPRESSION_BC, self.format_bc); diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs index 9ff63e98d3..a59e1d01a3 100644 --- a/wgpu-hal/src/metal/mod.rs +++ b/wgpu-hal/src/metal/mod.rs @@ -233,7 +233,8 @@ struct PrivateCapabilities { supports_preserve_invariance: bool, supports_shader_primitive_index: bool, has_unified_memory: Option, - supports_timestamp_period: bool, + support_timestamp_query: bool, + support_timestamp_query_in_passes: bool, } #[derive(Clone, Debug)] From 8d445f1616a88c1699488cf04df5e9964c2cb337 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Mon, 1 May 2023 19:31:38 +0200 Subject: [PATCH 13/45] warning fixes --- wgpu-core/src/command/compute.rs | 2 +- wgpu-core/src/command/render.rs | 8 ++++---- wgpu-hal/src/metal/device.rs | 2 -- wgpu-hal/src/metal/mod.rs | 1 - 4 files changed, 5 insertions(+), 8 deletions(-) diff --git a/wgpu-core/src/command/compute.rs b/wgpu-core/src/command/compute.rs index ccd74c8366..a42dda5d27 100644 --- a/wgpu-core/src/command/compute.rs +++ b/wgpu-core/src/command/compute.rs @@ -29,7 +29,7 @@ use serde::Serialize; use thiserror::Error; -use std::{borrow::Cow, fmt, mem, str}; +use std::{fmt, mem, str}; #[doc(hidden)] #[derive(Clone, Copy, Debug)] diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs index 394248b376..ccd96f3edb 100644 --- a/wgpu-core/src/command/render.rs +++ b/wgpu-core/src/command/render.rs @@ -18,7 +18,7 @@ use crate::{ id, init_tracker::{MemoryInitKind, TextureInitRange, TextureInitTrackerAction}, pipeline::{self, PipelineFlags}, - resource::{self, Buffer, QuerySet, Texture, TextureView, TextureViewNotRenderableReason}, + resource::{Buffer, QuerySet, Texture, TextureView, TextureViewNotRenderableReason}, track::{TextureSelector, UsageConflict, UsageScope}, validation::{ check_buffer_usage, check_texture_usage, MissingBufferUsageError, MissingTextureUsageError, @@ -1113,7 +1113,7 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> { }; let timestamp_writes = if let Some(tw) = timestamp_writes { - let query_set: &resource::QuerySet = cmd_buf + let query_set = cmd_buf .trackers .query_sets .add_single(&*query_set_guard, tw.query_set) @@ -2061,7 +2061,7 @@ impl Global { .require_features(wgt::Features::TIMESTAMP_QUERY_INSIDE_PASSES) .map_pass_err(scope)?; - let query_set: &resource::QuerySet = cmd_buf + let query_set = cmd_buf .trackers .query_sets .add_single(&*query_set_guard, query_set_id) @@ -2083,7 +2083,7 @@ impl Global { } => { let scope = PassErrorScope::BeginPipelineStatisticsQuery; - let query_set: &resource::QuerySet = cmd_buf + let query_set = cmd_buf .trackers .query_sets .add_single(&*query_set_guard, query_set_id) diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs index c69782d2e6..e3b7ffd439 100644 --- a/wgpu-hal/src/metal/device.rs +++ b/wgpu-hal/src/metal/device.rs @@ -326,8 +326,6 @@ impl crate::Device for super::Device { &self, desc: &crate::TextureDescriptor, ) -> DeviceResult { - use foreign_types::ForeignTypeRef; - let mtl_format = self.shared.private_caps.map_format(desc.format); objc::rc::autoreleasepool(|| { diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs index a59e1d01a3..1304036bd5 100644 --- a/wgpu-hal/src/metal/mod.rs +++ b/wgpu-hal/src/metal/mod.rs @@ -28,7 +28,6 @@ use std::{ }; use arrayvec::ArrayVec; -use foreign_types::ForeignTypeRef as _; use metal::foreign_types::ForeignTypeRef as _; use parking_lot::Mutex; From cc84ae1e436e70d54f0e3705202225a9eee381cf Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Mon, 1 May 2023 19:49:29 +0200 Subject: [PATCH 14/45] better error handling for failing to create metal timestamp counter --- wgpu-core/src/device/mod.rs | 3 +++ wgpu-core/src/instance.rs | 1 + wgpu-hal/src/lib.rs | 2 ++ wgpu-hal/src/metal/device.rs | 24 ++++++++++++++++-------- 4 files changed, 22 insertions(+), 8 deletions(-) diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index 36d353b672..ec5b63274d 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -3394,6 +3394,8 @@ pub enum DeviceError { Lost, #[error("Not enough memory left")] OutOfMemory, + #[error("Creation of a resource failed for a reason other than running out of memory.")] + ResourceCreationFailed, } impl From for DeviceError { @@ -3401,6 +3403,7 @@ impl From for DeviceError { match error { hal::DeviceError::Lost => DeviceError::Lost, hal::DeviceError::OutOfMemory => DeviceError::OutOfMemory, + hal::DeviceError::ResourceCreationFailed => DeviceError::ResourceCreationFailed, } } } diff --git a/wgpu-core/src/instance.rs b/wgpu-core/src/instance.rs index c5989f4bf4..cd3ffdc189 100644 --- a/wgpu-core/src/instance.rs +++ b/wgpu-core/src/instance.rs @@ -349,6 +349,7 @@ impl Adapter { |err| match err { hal::DeviceError::Lost => RequestDeviceError::DeviceLost, hal::DeviceError::OutOfMemory => RequestDeviceError::OutOfMemory, + hal::DeviceError::ResourceCreationFailed => RequestDeviceError::Internal, }, )?; diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index bb38a4fd12..d01b1dcd5f 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -117,6 +117,8 @@ pub enum DeviceError { OutOfMemory, #[error("Device is lost")] Lost, + #[error("Creation of a resource failed for a reason other than running out of memory.")] + ResourceCreationFailed, } #[derive(Clone, Debug, Eq, PartialEq, Error)] diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs index e3b7ffd439..3582c6345a 100644 --- a/wgpu-hal/src/metal/device.rs +++ b/wgpu-hal/src/metal/device.rs @@ -1110,16 +1110,24 @@ impl crate::Device for super::Device { } let counter_sets = device.counter_sets(); - let timestamp_counter = counter_sets - .iter() - .find(|cs| cs.name() == "timestamp") - //TODO: better error type? - .ok_or(crate::DeviceError::OutOfMemory)?; + let timestamp_counter = + match counter_sets.iter().find(|cs| cs.name() == "timestamp") { + Some(counter) => counter, + None => { + log::error!("Failed to obtain timestamp counter set."); + return Err(crate::DeviceError::ResourceCreationFailed); + } + }; csb_desc.set_counter_set(timestamp_counter); - let counter_sample_buffer = device - .new_counter_sample_buffer_with_descriptor(&csb_desc) - .map_err(|_| crate::DeviceError::OutOfMemory)?; + let counter_sample_buffer = + match device.new_counter_sample_buffer_with_descriptor(&csb_desc) { + Ok(buffer) => buffer, + Err(err) => { + log::error!("Failed to create counter sample buffer: {:?}", err); + return Err(crate::DeviceError::ResourceCreationFailed); + } + }; Ok(super::QuerySet { raw_buffer: destination_buffer, From c8433d87953cba7013a7de8be5b14393906f89dd Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Sat, 6 May 2023 19:01:08 +0200 Subject: [PATCH 15/45] use public version of metal in patch --- Cargo.lock | 7 ++++--- Cargo.toml | 3 ++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dfe37d04d0..cb1b8df1b8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1496,6 +1496,7 @@ dependencies = [ [[package]] name = "metal" version = "0.25.0" +source = "git+https://github.com/gfx-rs/metal-rs?branch=master#05df13a4f751910ef54d7805e8e4682488b23b8d" dependencies = [ "bitflags 1.3.2", "block", @@ -3099,7 +3100,7 @@ dependencies = [ "nanorand", "noise", "obj", - "parking_lot 0.11.2", + "parking_lot 0.12.1", "png", "pollster", "profiling", @@ -3127,7 +3128,7 @@ dependencies = [ "codespan-reporting", "log", "naga", - "parking_lot 0.11.2", + "parking_lot 0.12.1", "profiling", "raw-window-handle 0.5.2", "ron", @@ -3168,7 +3169,7 @@ dependencies = [ "metal", "naga", "objc", - "parking_lot 0.11.2", + "parking_lot 0.12.1", "profiling", "range-alloc", "raw-window-handle 0.5.2", diff --git a/Cargo.toml b/Cargo.toml index 1f34cf3981..9c528ed664 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -147,7 +147,8 @@ wgpu-types = { path = "./wgpu-types" } #naga = { path = "../naga" } #glow = { path = "../glow" } #d3d12 = { path = "../d3d12-rs" } -metal = { path = "../metal-rs" } +#metal = { path = "../metal-rs" } #web-sys = { path = "../wasm-bindgen/crates/web-sys" } #js-sys = { path = "../wasm-bindgen/crates/js-sys" } #wasm-bindgen = { path = "../wasm-bindgen" } +metal = { git = "https://github.com/gfx-rs/metal-rs", branch = "master" } From 0c26cd94d693fcdb6dfea109ba73574e5bd2369b Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Sat, 6 May 2023 19:18:07 +0200 Subject: [PATCH 16/45] disable metal deprecation warnings (and comment why and what) --- Cargo.lock | 8 -------- wgpu-hal/src/metal/mod.rs | 5 +++++ 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cb1b8df1b8..2c2eaf177d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3582,11 +3582,3 @@ name = "xml-rs" version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d2d7d3948613f75c98fd9328cfdcc45acc4d360655289d0a7d4ec931392200a3" - -[[patch.unused]] -name = "naga" -version = "0.11.0" - -[[patch.unused]] -name = "naga" -version = "0.11.0" diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs index 1304036bd5..3a8ebc5570 100644 --- a/wgpu-hal/src/metal/mod.rs +++ b/wgpu-hal/src/metal/mod.rs @@ -13,6 +13,11 @@ end of the VS buffer table. !*/ +// `MTLFeatureSet` is superseded by `MTLGpuFamily`. +// However, `MTLGpuFamily` is only supported starting MacOS 10.15, whereas our minimum target is MacOS 10.13, +// See https://github.com/gpuweb/gpuweb/issues/1069 for minimum spec. +// TODO: Eventually all deprecated features should be abstracted and use new api when available. +#[allow(deprecated)] mod adapter; mod command; mod conv; From d2f7e8b094d9c351baa7828779747d26219ede6f Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Sat, 6 May 2023 20:11:07 +0200 Subject: [PATCH 17/45] better documentation for timestamp features --- wgpu-types/src/lib.rs | 19 ++++++++++++++----- wgpu/src/lib.rs | 4 ++-- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index a4af880ef2..49b20ae675 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -237,9 +237,14 @@ bitflags::bitflags! { /// This is a web and native feature. const DEPTH_CLIP_CONTROL = 1 << 0; /// Enables use of Timestamp Queries. These queries tell the current gpu timestamp when - /// all work before the query is finished. Call [`CommandEncoder::write_timestamp`], - /// [`RenderPassEncoder::write_timestamp`], or [`ComputePassEncoder::write_timestamp`] to - /// write out a timestamp. + /// all work before the query is finished. + /// + /// This feature allows the use of + /// - [`CommandEncoder::write_timestamp`] + /// - [`RenderPassDescriptor::timestamp_writes`] + /// - [`ComputePassDescriptor::timestamp_writes`] + /// to write out timestamps. + /// For timestamps within passes refer to [`Features::TIMESTAMP_QUERY_INSIDE_PASSES`] /// /// They must be resolved using [`CommandEncoder::resolve_query_sets`] into a buffer, /// then the result must be multiplied by the timestamp period [`Queue::get_timestamp_period`] @@ -249,8 +254,7 @@ bitflags::bitflags! { /// Supported Platforms: /// - Vulkan /// - DX12 - /// - /// This is currently unimplemented on Metal. + /// - Metal /// /// This is a web and native feature. const TIMESTAMP_QUERY = 1 << 1; @@ -431,12 +435,17 @@ bitflags::bitflags! { /// /// Implies [`Features::TIMESTAMP_QUERY`] is supported. /// + /// Additionally allows for timestamp queries to be used inside render & compute passes using: + /// - [`RenderPassEncoder::write_timestamp`] + /// - [`ComputePassEncoder::write_timestamp`] + /// /// Supported platforms: /// - Vulkan /// - DX12 /// /// This is currently unimplemented on Metal. /// When implemented, it will be supported on Metal on AMD and Intel GPUs, but not Apple GPUs. + /// (This is a common limitation of tile-based rasterization GPUs) /// /// This is a native only feature with a [proposal](https://github.com/gpuweb/gpuweb/blob/0008bd30da2366af88180b511a5d0d0c1dffbc36/proposals/timestamp-query-inside-passes.md) for the web. const TIMESTAMP_QUERY_INSIDE_PASSES = 1 << 33; diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs index 2b552df564..060e871231 100644 --- a/wgpu/src/lib.rs +++ b/wgpu/src/lib.rs @@ -1106,7 +1106,7 @@ pub struct RenderPassDescriptor<'tex, 'desc> { pub depth_stencil_attachment: Option>, /// Defines which timestamp values will be written for this pass, and where to write them to. /// - /// Requires `Features::TIMESTAMP_QUERY` to be enabled. + /// Requires [`Features::TIMESTAMP_QUERY`] to be enabled. pub timestamp_writes: Option>, } static_assertions::assert_impl_all!(RenderPassDescriptor: Send, Sync); @@ -1222,7 +1222,7 @@ pub struct ComputePassDescriptor<'a> { pub label: Label<'a>, /// Defines which timestamp values will be written for this pass, and where to write them to. /// - /// Requires `Features::TIMESTAMP_QUERY` to be enabled. + /// Requires [`Features::TIMESTAMP_QUERY`] to be enabled. pub timestamp_writes: Option>, } static_assertions::assert_impl_all!(ComputePassDescriptor: Send, Sync); From 9680a181f060421b86f70a5209d7739173d07629 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Sun, 7 May 2023 10:14:43 +0200 Subject: [PATCH 18/45] dedicated sample for timestamp queries --- CHANGELOG.md | 2 + wgpu/examples/README.md | 70 ++--- wgpu/examples/hello-compute/main.rs | 53 +--- wgpu/examples/hello-triangle/main.rs | 61 +--- wgpu/examples/timestamp-queries/README.md | 9 + wgpu/examples/timestamp-queries/main.rs | 310 ++++++++++++++++++++ wgpu/examples/timestamp-queries/shader.wgsl | 34 +++ 7 files changed, 393 insertions(+), 146 deletions(-) create mode 100644 wgpu/examples/timestamp-queries/README.md create mode 100644 wgpu/examples/timestamp-queries/main.rs create mode 100644 wgpu/examples/timestamp-queries/shader.wgsl diff --git a/CHANGELOG.md b/CHANGELOG.md index 92808b81ad..ec939c30ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -159,6 +159,8 @@ By @cwfitzgerald in [#3671](https://github.com/gfx-rs/wgpu/pull/3671). Addition of `TimestampWrites` to compute and render passes to allow profiling. This brings us in line with the spec. +Added new example to demonstrate the various kinds of timestamps. + By @FL33TW00D & @wumpf in [#3636](https://github.com/gfx-rs/wgpu/pull/3636). #### Misc Breaking Changes diff --git a/wgpu/examples/README.md b/wgpu/examples/README.md index ef427e7351..bf2a3205b0 100644 --- a/wgpu/examples/README.md +++ b/wgpu/examples/README.md @@ -12,41 +12,41 @@ All the examples use [WGSL](https://gpuweb.github.io/gpuweb/wgsl.html) shaders u All framework-based examples render to the window and are reftested against the screenshot in the directory. ## Feature matrix -| Feature | boids | bunnymark | cube | mipmap | msaa-line | shadow | skybox | texture-arrays | water | conservative-raster | stencil-triangles | -|------------------------------| ------ | --------- | ------ | ------ | --------- | ------ | ------ | -------------- | ------ | ------------------- |-------------------| -| vertex attributes | :star: | | :star: | | :star: | :star: | :star: | :star: | :star: | | | -| instancing | :star: | | | | | | | | | | | -| lines and points | | | | | :star: | | | | | :star: | | -| dynamic buffer offsets | | :star: | | | | :star: | | | | | | -| implicit layout | | | | :star: | | | | | | | | -| sampled color textures | :star: | :star: | :star: | :star: | | | :star: | :star: | :star: | :star: | | -| storage textures | :star: | | | | | | | | | | | -| comparison samplers | | | | | | :star: | | | | | | -| subresource views | | | | :star: | | :star: | | | | | | -| cubemaps | | | | | | | :star: | | | | | -| multisampling | | | | | :star: | | | | | | | -| off-screen rendering | | | | | | :star: | | | :star: | :star: | | -| stencil testing | | | | | | | | | | | :star: | -| depth testing | | | | | | :star: | :star: | | :star: | | | -| depth biasing | | | | | | :star: | | | | | | -| read-only depth | | | | | | | | | :star: | | | -| blending | | :star: | :star: | | | | | | :star: | | | -| render bundles | | | | | :star: | | | | :star: | | | -| compute passes | :star: | | | | | | | | | | | -| error scopes | | | :star: | | | | | | | | | -| *optional extensions* | | | | | | | | :star: | | | | -| - SPIR-V shaders | | | | | | | | | | | | -| - binding array | | | | | | | | :star: | | | | -| - push constants | | | | | | | | | | | | -| - depth clamping | | | | | | :star: | | | | | | -| - compressed textures | | | | | | | :star: | | | | | -| - polygon mode | | | :star: | | | | | | | | | -| - queries | | | | :star: | | | | | | | | -| - conservative rasterization | | | | | | | | | | :star: | | -| *integrations* | | | | | | | | | | | | -| - staging belt | | | | | | | :star: | | | | | -| - typed arena | | | | | | | | | | | | -| - obj loading | | | | | | | :star: | | | | | +| Feature | boids | bunnymark | cube | mipmap | msaa-line | shadow | skybox | texture-arrays | water | conservative-raster | stencil-triangles | timestamp-queries | +|------------------------------| ------ | --------- | ------ | ------ | --------- | ------ | ------ | -------------- | ------ | ------------------- |-------------------|-------------------| +| vertex attributes | :star: | | :star: | | :star: | :star: | :star: | :star: | :star: | | | | +| instancing | :star: | | | | | | | | | | | | +| lines and points | | | | | :star: | | | | | :star: | | | +| dynamic buffer offsets | | :star: | | | | :star: | | | | | | | +| implicit layout | | | | :star: | | | | | | | | | +| sampled color textures | :star: | :star: | :star: | :star: | | | :star: | :star: | :star: | :star: | | | +| storage textures | :star: | | | | | | | | | | | | +| comparison samplers | | | | | | :star: | | | | | | | +| subresource views | | | | :star: | | :star: | | | | | | | +| cubemaps | | | | | | | :star: | | | | | | +| multisampling | | | | | :star: | | | | | | | | +| off-screen rendering | | | | | | :star: | | | :star: | :star: | | | +| stencil testing | | | | | | | | | | | :star: | | +| depth testing | | | | | | :star: | :star: | | :star: | | | | +| depth biasing | | | | | | :star: | | | | | | | +| read-only depth | | | | | | | | | :star: | | | | +| blending | | :star: | :star: | | | | | | :star: | | | | +| render bundles | | | | | :star: | | | | :star: | | | | +| compute passes | :star: | | | | | | | | | | | :star: | +| error scopes | | | :star: | | | | | | | | | | +| *optional extensions* | | | | | | | | :star: | | | | | +| - SPIR-V shaders | | | | | | | | | | | | | +| - binding array | | | | | | | | :star: | | | | | +| - push constants | | | | | | | | | | | | | +| - depth clamping | | | | | | :star: | | | | | | | +| - compressed textures | | | | | | | :star: | | | | | | +| - polygon mode | | | :star: | | | | | | | | | | +| - queries | | | | :star: | | | | | | | | :star: | +| - conservative rasterization | | | | | | | | | | :star: | | | +| *integrations* | | | | | | | | | | | | | +| - staging belt | | | | | | | :star: | | | | | | +| - typed arena | | | | | | | | | | | | | +| - obj loading | | | | | | | :star: | | | | | | ## Hacking diff --git a/wgpu/examples/hello-compute/main.rs b/wgpu/examples/hello-compute/main.rs index 9dffe8c571..2a0f58aad8 100644 --- a/wgpu/examples/hello-compute/main.rs +++ b/wgpu/examples/hello-compute/main.rs @@ -1,12 +1,9 @@ use std::{borrow::Cow, str::FromStr}; use wgpu::util::DeviceExt; -use wgpu::Buffer; // Indicates a u32 overflow in an intermediate Collatz value const OVERFLOW: u32 = 0xffffffff; -const NUM_SAMPLES: u64 = 2; - async fn run() { let numbers = if std::env::args().len() <= 1 { let default = vec![1, 2, 3, 4]; @@ -71,14 +68,6 @@ async fn execute_gpu_inner( queue: &wgpu::Queue, numbers: &[u32], ) -> Option> { - //Create query set - let query_set = device.create_query_set(&wgpu::QuerySetDescriptor { - label: Some("Timestamp query set"), - count: NUM_SAMPLES as u32, - ty: wgpu::QueryType::Timestamp, - }); - let timestamp_period = queue.get_timestamp_period(); - // Loads the shader from WGSL let cs_module = device.create_shader_module(wgpu::ShaderModuleDescriptor { label: None, @@ -145,11 +134,7 @@ async fn execute_gpu_inner( { let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { label: None, - timestamp_writes: Some(wgpu::ComputePassTimestampWrites { - query_set: &query_set, - beginning_of_pass_write_index: Some(0), - end_of_pass_write_index: Some(1), - }), + timestamp_writes: None, }); cpass.set_pipeline(&compute_pipeline); cpass.set_bind_group(0, &bind_group, &[]); @@ -160,27 +145,6 @@ async fn execute_gpu_inner( // Will copy data from storage buffer on GPU to staging buffer on CPU. encoder.copy_buffer_to_buffer(&storage_buffer, 0, &staging_buffer, 0, size); - let query_resolve_buffer = device.create_buffer(&wgpu::BufferDescriptor { - label: Some("query resolve buffer"), - size: (std::mem::size_of::() * NUM_SAMPLES as usize) as wgpu::BufferAddress, - usage: wgpu::BufferUsages::COPY_SRC | wgpu::BufferUsages::QUERY_RESOLVE, - mapped_at_creation: false, - }); - let query_destination_buffer = device.create_buffer(&wgpu::BufferDescriptor { - label: Some("query dest buffer"), - size: (std::mem::size_of::() * NUM_SAMPLES as usize) as wgpu::BufferAddress, - usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ, - mapped_at_creation: false, - }); - encoder.resolve_query_set(&query_set, 0..NUM_SAMPLES as u32, &query_resolve_buffer, 0); - encoder.copy_buffer_to_buffer( - &query_resolve_buffer, - 0, - &query_destination_buffer, - 0, - query_resolve_buffer.size(), - ); - // Submits command encoder for processing queue.submit(Some(encoder.finish())); @@ -217,26 +181,11 @@ async fn execute_gpu_inner( None }; - query_destination_buffer - .slice(..) - .map_async(wgpu::MapMode::Read, |_| ()); device.poll(wgpu::Maintain::Wait); - resolve_timestamps(&query_destination_buffer, timestamp_period); res } -fn resolve_timestamps(destination_buffer: &Buffer, timestamp_period: f32) { - let timestamp_view = destination_buffer - .slice(..(std::mem::size_of::() * 2) as wgpu::BufferAddress) - .get_mapped_range(); - - let timestamps: &[u64] = bytemuck::cast_slice(×tamp_view); - log::info!("Timestamps: {:?}", timestamps); - let elapsed_ns = (timestamps[1] - timestamps[0]) as f64 * timestamp_period as f64; - log::info!("Elapsed time: {:.2} μs", elapsed_ns / 1000.0); -} - fn main() { #[cfg(not(target_arch = "wasm32"))] { diff --git a/wgpu/examples/hello-triangle/main.rs b/wgpu/examples/hello-triangle/main.rs index e20e8ceb18..1a0bcd2382 100644 --- a/wgpu/examples/hello-triangle/main.rs +++ b/wgpu/examples/hello-triangle/main.rs @@ -5,8 +5,6 @@ use winit::{ window::Window, }; -const NUM_SAMPLES: usize = 2; - async fn run(event_loop: EventLoop<()>, window: Window) { let size = window.inner_size(); @@ -44,12 +42,6 @@ async fn run(event_loop: EventLoop<()>, window: Window) { source: wgpu::ShaderSource::Wgsl(Cow::Borrowed(include_str!("shader.wgsl"))), }); - let query_set = device.create_query_set(&wgpu::QuerySetDescriptor { - label: Some("Timestamp query set"), - count: NUM_SAMPLES as u32, - ty: wgpu::QueryType::Timestamp, - }); - let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { label: None, bind_group_layouts: &[], @@ -90,26 +82,11 @@ async fn run(event_loop: EventLoop<()>, window: Window) { surface.configure(&device, &config); - let query_resolve_buffer = device.create_buffer(&wgpu::BufferDescriptor { - label: Some("query resolve buffer"), - size: (std::mem::size_of::() * NUM_SAMPLES as usize) as wgpu::BufferAddress, - usage: wgpu::BufferUsages::COPY_SRC | wgpu::BufferUsages::QUERY_RESOLVE, - mapped_at_creation: false, - }); - let query_destination_buffer = device.create_buffer(&wgpu::BufferDescriptor { - label: Some("query dest buffer"), - size: (std::mem::size_of::() * NUM_SAMPLES as usize) as wgpu::BufferAddress, - usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ, - mapped_at_creation: false, - }); - - let timestamp_period = queue.get_timestamp_period(); - event_loop.run(move |event, _, control_flow| { // Have the closure take ownership of the resources. // `event_loop.run` never returns, therefore we must do this to ensure // the resources are properly cleaned up. - let _ = (&instance, &adapter, &shader, &pipeline_layout, &query_set); + let _ = (&instance, &adapter, &shader, &pipeline_layout); *control_flow = ControlFlow::Wait; match event { @@ -145,37 +122,16 @@ async fn run(event_loop: EventLoop<()>, window: Window) { }, })], depth_stencil_attachment: None, - timestamp_writes: Some(wgpu::RenderPassTimestampWrites { - query_set: &query_set, - beginning_of_pass_write_index: Some(0), - end_of_pass_write_index: Some(1), - }), + timestamp_writes: None, }); rpass.set_pipeline(&render_pipeline); rpass.draw(0..3, 0..1); } - encoder.resolve_query_set( - &query_set, - 0..NUM_SAMPLES as u32, - &query_resolve_buffer, - 0, - ); - encoder.copy_buffer_to_buffer( - &query_resolve_buffer, - 0, - &query_destination_buffer, - 0, - query_resolve_buffer.size(), - ); queue.submit(Some(encoder.finish())); frame.present(); - query_destination_buffer - .slice(..) - .map_async(wgpu::MapMode::Read, |_| ()); device.poll(wgpu::Maintain::Wait); - resolve_timestamps(&query_destination_buffer, timestamp_period); } Event::WindowEvent { event: WindowEvent::CloseRequested, @@ -186,19 +142,6 @@ async fn run(event_loop: EventLoop<()>, window: Window) { }); } -fn resolve_timestamps(destination_buffer: &wgpu::Buffer, timestamp_period: f32) { - { - let timestamp_view = destination_buffer - .slice(..(std::mem::size_of::() * 2) as wgpu::BufferAddress) - .get_mapped_range(); - - let timestamps: &[u64] = bytemuck::cast_slice(×tamp_view); - let elapsed_ns = (timestamps[1] - timestamps[0]) as f64 * timestamp_period as f64; - log::info!("Elapsed time: {:.2} μs", elapsed_ns / 1000.0); - } - destination_buffer.unmap(); -} - fn main() { let event_loop = EventLoop::new(); let window = winit::window::Window::new(&event_loop).unwrap(); diff --git a/wgpu/examples/timestamp-queries/README.md b/wgpu/examples/timestamp-queries/README.md new file mode 100644 index 0000000000..e1b186031c --- /dev/null +++ b/wgpu/examples/timestamp-queries/README.md @@ -0,0 +1,9 @@ +# timestamp-queries + +This example shows various ways of querying time when supported. + +## To Run + +``` +cargo run --example timestamp-queries +``` diff --git a/wgpu/examples/timestamp-queries/main.rs b/wgpu/examples/timestamp-queries/main.rs new file mode 100644 index 0000000000..ecac2952cc --- /dev/null +++ b/wgpu/examples/timestamp-queries/main.rs @@ -0,0 +1,310 @@ +//! Sample demonstrating different kinds of timestamp queries. + +use std::borrow::Cow; + +use wgpu::util::DeviceExt; + +// Queries: +// * render start +// * render in-between (optional) +// * render end +// * compute start +// * compute in-between (optional) +// * compute end +const NUM_QUERIES: usize = 6; + +struct Queries { + set: wgpu::QuerySet, + resolve_buffer: wgpu::Buffer, + destination_buffer: wgpu::Buffer, +} + +impl Queries { + fn new(device: &wgpu::Device) -> Self { + Queries { + set: device.create_query_set(&wgpu::QuerySetDescriptor { + label: Some("Timestamp query set"), + count: NUM_QUERIES as _, + ty: wgpu::QueryType::Timestamp, + }), + resolve_buffer: device.create_buffer(&wgpu::BufferDescriptor { + label: Some("query resolve buffer"), + size: (std::mem::size_of::() * NUM_QUERIES) as wgpu::BufferAddress, + usage: wgpu::BufferUsages::COPY_SRC | wgpu::BufferUsages::QUERY_RESOLVE, + mapped_at_creation: false, + }), + destination_buffer: device.create_buffer(&wgpu::BufferDescriptor { + label: Some("query dest buffer"), + size: (std::mem::size_of::() * NUM_QUERIES) as wgpu::BufferAddress, + usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ, + mapped_at_creation: false, + }), + } + } + + fn resolve(&self, encoder: &mut wgpu::CommandEncoder) { + encoder.resolve_query_set(&self.set, 0..NUM_QUERIES as u32, &self.resolve_buffer, 0); + encoder.copy_buffer_to_buffer( + &self.resolve_buffer, + 0, + &self.destination_buffer, + 0, + self.resolve_buffer.size(), + ); + } + + fn wait_and_print_results(&self, device: &wgpu::Device, queue: &wgpu::Queue) { + self.destination_buffer + .slice(..) + .map_async(wgpu::MapMode::Read, |_| ()); + device.poll(wgpu::Maintain::Wait); + + { + let timestamp_view = self + .destination_buffer + .slice(..(std::mem::size_of::() * NUM_QUERIES) as wgpu::BufferAddress) + .get_mapped_range(); + + let timestamps: &[u64] = bytemuck::cast_slice(×tamp_view); + println!("Raw timestamp buffer contents: {:?}", timestamps); + + let elapsed_us = |start, end| { + let period = queue.get_timestamp_period(); + (end - start) as f64 * period as f64 / 1000.0 + }; + + println!( + "Elapsed time render pass: {:.2} μs", + elapsed_us(timestamps[0], timestamps[2]) + ); + if device + .features() + .contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES) + { + println!( + "Elapsed time first triangle: {:.2} μs", + elapsed_us(timestamps[1], timestamps[2]) + ); + } + + println!( + "Elapsed time compute pass: {:.2} μs", + elapsed_us(timestamps[3], timestamps[5]) + ); + if device + .features() + .contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES) + { + println!( + "Elapsed time first compute: {:.2} μs", + elapsed_us(timestamps[3], timestamps[3]) + ); + } + } + + self.destination_buffer.unmap(); + } +} + +async fn run() { + // Instantiates instance of WebGPU + let instance = wgpu::Instance::default(); + + // `request_adapter` instantiates the general connection to the GPU + let adapter = instance + .request_adapter(&wgpu::RequestAdapterOptions::default()) + .await + .expect("Failed to request adapter"); + + // Check timestamp features. + if adapter.features().contains(wgpu::Features::TIMESTAMP_QUERY) { + println!("Adapter supports timestamp queries"); + } else { + println!("Adapter does not support timestamp queries, aborting"); + return; + } + let mut features = wgpu::Features::empty() | wgpu::Features::TIMESTAMP_QUERY; + if adapter + .features() + .contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES) + { + features |= wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES; + } + + // `request_device` instantiates the feature specific connection to the GPU, defining some parameters, + // `features` being the available features. + let (device, queue) = adapter + .request_device( + &wgpu::DeviceDescriptor { + label: None, + features, + limits: wgpu::Limits::downlevel_defaults(), + }, + None, + ) + .await + .unwrap(); + + let mut encoder = + device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None }); + + let queries = Queries::new(&device); + + let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor { + label: None, + source: wgpu::ShaderSource::Wgsl(Cow::Borrowed(include_str!("shader.wgsl"))), + }); + + // Render two triangles and profile it. + render_pass(&device, &shader, &mut encoder, &queries.set, 0); + + // Compute a hash function on a single thread a bunch of time and profile it. + compute_pass(&device, &shader, &mut encoder, &queries.set, 3); + + queries.resolve(&mut encoder); + queue.submit(Some(encoder.finish())); + queries.wait_and_print_results(&device, &queue); +} + +fn compute_pass( + device: &wgpu::Device, + shader: &wgpu::ShaderModule, + encoder: &mut wgpu::CommandEncoder, + query_set: &wgpu::QuerySet, + query_offset: u32, +) { + let storage_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor { + label: Some("Storage Buffer"), + contents: bytemuck::cast_slice(&[42]), + usage: wgpu::BufferUsages::STORAGE, + }); + let compute_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor { + label: None, + layout: None, + module: &shader, + entry_point: "main_cs", + }); + let bind_group_layout = compute_pipeline.get_bind_group_layout(0); + let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { + label: None, + layout: &bind_group_layout, + entries: &[wgpu::BindGroupEntry { + binding: 0, + resource: storage_buffer.as_entire_binding(), + }], + }); + + let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { + label: None, + timestamp_writes: Some(wgpu::ComputePassTimestampWrites { + query_set, + beginning_of_pass_write_index: Some(query_offset + 0), + end_of_pass_write_index: Some(query_offset + 2), + }), + }); + cpass.set_pipeline(&compute_pipeline); + cpass.set_bind_group(0, &bind_group, &[]); + cpass.dispatch_workgroups(1, 1, 1); + if device + .features() + .contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES) + { + cpass.write_timestamp(query_set, query_offset + 1); + } + cpass.dispatch_workgroups(1, 1, 1); +} + +fn render_pass( + device: &wgpu::Device, + shader: &wgpu::ShaderModule, + encoder: &mut wgpu::CommandEncoder, + query_set: &wgpu::QuerySet, + query_offset: u32, +) { + let format = wgpu::TextureFormat::Rgba8Unorm; + + let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { + label: None, + bind_group_layouts: &[], + push_constant_ranges: &[], + }); + + let render_pipeline = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor { + label: None, + layout: Some(&pipeline_layout), + vertex: wgpu::VertexState { + module: &shader, + entry_point: "vs_main", + buffers: &[], + }, + fragment: Some(wgpu::FragmentState { + module: &shader, + entry_point: "fs_main", + targets: &[Some(format.into())], + }), + primitive: wgpu::PrimitiveState::default(), + depth_stencil: None, + multisample: wgpu::MultisampleState::default(), + multiview: None, + }); + + let render_target = device.create_texture(&wgpu::TextureDescriptor { + label: Some("rendertarget"), + size: wgpu::Extent3d { + width: 512, + height: 512, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: wgpu::TextureDimension::D2, + format, + usage: wgpu::TextureUsages::RENDER_ATTACHMENT, + view_formats: &[format], + }); + let render_target_view = render_target.create_view(&wgpu::TextureViewDescriptor::default()); + + let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor { + label: None, + color_attachments: &[Some(wgpu::RenderPassColorAttachment { + view: &render_target_view, + resolve_target: None, + ops: wgpu::Operations { + load: wgpu::LoadOp::Clear(wgpu::Color::GREEN), + store: true, + }, + })], + depth_stencil_attachment: None, + timestamp_writes: Some(wgpu::RenderPassTimestampWrites { + query_set, + beginning_of_pass_write_index: Some(query_offset + 0), + end_of_pass_write_index: Some(query_offset + 2), + }), + }); + + rpass.set_pipeline(&render_pipeline); + + rpass.draw(0..3, 0..1); + if device + .features() + .contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES) + { + rpass.write_timestamp(query_set, query_offset + 1); + } + + rpass.draw(0..3, 0..1); +} + +fn main() { + #[cfg(not(target_arch = "wasm32"))] + { + env_logger::init(); + pollster::block_on(run()); + } + #[cfg(target_arch = "wasm32")] + { + std::panic::set_hook(Box::new(console_error_panic_hook::hook)); + console_log::init().expect("could not initialize logger"); + wasm_bindgen_futures::spawn_local(run()); + } +} diff --git a/wgpu/examples/timestamp-queries/shader.wgsl b/wgpu/examples/timestamp-queries/shader.wgsl new file mode 100644 index 0000000000..c0689b0236 --- /dev/null +++ b/wgpu/examples/timestamp-queries/shader.wgsl @@ -0,0 +1,34 @@ +@vertex +fn vs_main(@builtin(vertex_index) in_vertex_index: u32) -> @builtin(position) vec4 { + let x = f32(i32(in_vertex_index) - 1); + let y = f32(i32(in_vertex_index & 1u) * 2 - 1); + return vec4(x, y, 0.0, 1.0); +} + +@fragment +fn fs_main() -> @location(0) vec4 { + return vec4(1.0, 0.0, 0.0, 1.0); +} + + +@group(0) +@binding(0) +var buffer: array; // Used as both input and output for convenience. + +fn pcg_hash(input: u32) -> u32 { + let state = input * 747796405u + 2891336453u; + let word = ((state >> ((state >> 28u) + 4u)) ^ state) * 277803737u; + return (word >> 22u) ^ word; +} + +@compute +@workgroup_size(1) +fn main_cs(@builtin(global_invocation_id) global_id: vec3) { + var value = buffer[0]; + + for (var i = 0u; i < 128u; i += 1u) { + value = pcg_hash(value); + } + + buffer[0] = value; +} From 76b55bd6f98d674a60add0264b67f0deaac569e3 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Sun, 7 May 2023 10:57:46 +0200 Subject: [PATCH 19/45] add command encoder timestamps on example, notes on (missing) implementation for metal --- wgpu-hal/src/metal/command.rs | 11 ++++++++++- wgpu-types/src/lib.rs | 2 +- wgpu/examples/timestamp-queries/main.rs | 26 ++++++++++++++++++------- 3 files changed, 30 insertions(+), 9 deletions(-) diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs index d64b10a8c9..23b478b2fa 100644 --- a/wgpu-hal/src/metal/command.rs +++ b/wgpu-hal/src/metal/command.rs @@ -321,7 +321,16 @@ impl crate::CommandEncoder for super::CommandEncoder { _ => {} } } - unsafe fn write_timestamp(&mut self, _set: &super::QuerySet, _index: u32) {} + unsafe fn write_timestamp(&mut self, _set: &super::QuerySet, _index: u32) { + // TODO: If MTLCounterSamplingPoint::AtDrawBoundary/AtBlitBoundary/AtDispatchBoundary is supported, + // we don't need to insert a new encoder, but can instead use respective current one. + //let encoder = self.enter_any().unwrap_or_else(|| self.enter_blit()); + + // TODO: Otherwise, we need to create a new blit command encoder with a descriptor that inserts the timestamps. + // Note that as of writing creating a new encoder is not exposed by the metal crate. + // https://developer.apple.com/documentation/metal/mtlcommandbuffer/3564431-makeblitcommandencoder + } + unsafe fn reset_queries(&mut self, set: &super::QuerySet, range: Range) { let encoder = self.enter_blit(); let raw_range = metal::NSRange { diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index 49b20ae675..b24e22807b 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -254,7 +254,7 @@ bitflags::bitflags! { /// Supported Platforms: /// - Vulkan /// - DX12 - /// - Metal + /// - Metal - TODO: Not yet supported on command encoder. /// /// This is a web and native feature. const TIMESTAMP_QUERY = 1 << 1; diff --git a/wgpu/examples/timestamp-queries/main.rs b/wgpu/examples/timestamp-queries/main.rs index ecac2952cc..685cd2d72f 100644 --- a/wgpu/examples/timestamp-queries/main.rs +++ b/wgpu/examples/timestamp-queries/main.rs @@ -2,16 +2,19 @@ use std::borrow::Cow; +use wgc::command::RenderBundleEncoderDescriptor; use wgpu::util::DeviceExt; // Queries: +// * encoder timestamp start +// * encoder timestamp end // * render start // * render in-between (optional) // * render end // * compute start // * compute in-between (optional) // * compute end -const NUM_QUERIES: usize = 6; +const NUM_QUERIES: usize = 8; struct Queries { set: wgpu::QuerySet, @@ -73,9 +76,14 @@ impl Queries { (end - start) as f64 * period as f64 / 1000.0 }; + println!( + "Elapsed time render + compute: {:.2} μs", + elapsed_us(timestamps[0], timestamps[1]) + ); + println!( "Elapsed time render pass: {:.2} μs", - elapsed_us(timestamps[0], timestamps[2]) + elapsed_us(timestamps[2], timestamps[4]) ); if device .features() @@ -83,13 +91,13 @@ impl Queries { { println!( "Elapsed time first triangle: {:.2} μs", - elapsed_us(timestamps[1], timestamps[2]) + elapsed_us(timestamps[3], timestamps[4]) ); } println!( "Elapsed time compute pass: {:.2} μs", - elapsed_us(timestamps[3], timestamps[5]) + elapsed_us(timestamps[5], timestamps[7]) ); if device .features() @@ -97,7 +105,7 @@ impl Queries { { println!( "Elapsed time first compute: {:.2} μs", - elapsed_us(timestamps[3], timestamps[3]) + elapsed_us(timestamps[5], timestamps[6]) ); } } @@ -155,11 +163,15 @@ async fn run() { source: wgpu::ShaderSource::Wgsl(Cow::Borrowed(include_str!("shader.wgsl"))), }); + encoder.write_timestamp(&queries.set, 0); + // Render two triangles and profile it. - render_pass(&device, &shader, &mut encoder, &queries.set, 0); + render_pass(&device, &shader, &mut encoder, &queries.set, 2); // Compute a hash function on a single thread a bunch of time and profile it. - compute_pass(&device, &shader, &mut encoder, &queries.set, 3); + compute_pass(&device, &shader, &mut encoder, &queries.set, 5); + + encoder.write_timestamp(&queries.set, 1); queries.resolve(&mut encoder); queue.submit(Some(encoder.finish())); From 7f48055ef6ffbfec2bb93a50e720e74d8712d9fe Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Sun, 7 May 2023 16:24:13 +0200 Subject: [PATCH 20/45] finish rolling back changes to hello-triangle & hello-compute --- wgpu/examples/hello-compute/main.rs | 10 +++------- wgpu/examples/hello-triangle/main.rs | 2 -- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/wgpu/examples/hello-compute/main.rs b/wgpu/examples/hello-compute/main.rs index 2a0f58aad8..ca34ad3f69 100644 --- a/wgpu/examples/hello-compute/main.rs +++ b/wgpu/examples/hello-compute/main.rs @@ -160,7 +160,7 @@ async fn execute_gpu_inner( device.poll(wgpu::Maintain::Wait); // Awaits until `buffer_future` can be read from - let res = if let Some(Ok(())) = receiver.receive().await { + if let Some(Ok(())) = receiver.receive().await { // Gets contents of buffer let data = buffer_slice.get_mapped_range(); // Since contents are got in bytes, this converts these bytes back to u32 @@ -178,12 +178,8 @@ async fn execute_gpu_inner( // Returns data from buffer Some(result) } else { - None - }; - - device.poll(wgpu::Maintain::Wait); - - res + panic!("failed to run compute on gpu!") + } } fn main() { diff --git a/wgpu/examples/hello-triangle/main.rs b/wgpu/examples/hello-triangle/main.rs index 1a0bcd2382..b87f5cba62 100644 --- a/wgpu/examples/hello-triangle/main.rs +++ b/wgpu/examples/hello-triangle/main.rs @@ -130,8 +130,6 @@ async fn run(event_loop: EventLoop<()>, window: Window) { queue.submit(Some(encoder.finish())); frame.present(); - - device.poll(wgpu::Maintain::Wait); } Event::WindowEvent { event: WindowEvent::CloseRequested, From e3f71b70f950712e8c6719a0760e2e26370288b0 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Sun, 7 May 2023 18:30:32 +0200 Subject: [PATCH 21/45] reset timerqueries for being/end pass queries, implement begin/end pass queries for vulkan --- wgpu-core/src/command/compute.rs | 22 +++++++++++++- wgpu-core/src/command/mod.rs | 2 ++ wgpu-core/src/command/query.rs | 6 ++-- wgpu-core/src/command/render.rs | 27 ++++++++++++----- wgpu-hal/src/dx11/command.rs | 5 +++- wgpu-hal/src/dx12/command.rs | 5 +++- wgpu-hal/src/vulkan/command.rs | 50 ++++++++++++++++++++++++++++++-- wgpu-hal/src/vulkan/device.rs | 1 + wgpu-hal/src/vulkan/mod.rs | 4 +++ 9 files changed, 106 insertions(+), 16 deletions(-) diff --git a/wgpu-core/src/command/compute.rs b/wgpu-core/src/command/compute.rs index a42dda5d27..d40f6cd55e 100644 --- a/wgpu-core/src/command/compute.rs +++ b/wgpu-core/src/command/compute.rs @@ -7,7 +7,8 @@ use crate::{ end_pipeline_statistics_query, memory_init::{fixup_discarded_surfaces, SurfacesInDiscardState}, BasePass, BasePassRef, BindGroupStateChange, CommandBuffer, CommandEncoderError, - CommandEncoderStatus, MapPassErr, PassErrorScope, QueryUseError, StateChange, + CommandEncoderStatus, MapPassErr, PassErrorScope, QueryUseError, + StateChange, }, device::{MissingDownlevelFlags, MissingFeatures}, error::{ErrorFormatter, PrettyError}, @@ -412,6 +413,25 @@ impl Global { .ok_or(ComputePassErrorInner::InvalidQuerySet(tw.query_set)) .map_pass_err(init_scope)?; + // Unlike in render passes we can't delay resetting the query sets since + // there is no auxillary pass. + let range = if let (Some(index_a), Some(index_b)) = + (tw.beginning_of_pass_write_index, tw.end_of_pass_write_index) + { + Some(index_a.min(index_b)..index_a.max(index_b) + 1) + } else { + tw.beginning_of_pass_write_index + .or(tw.end_of_pass_write_index) + .map(|i| i..i + 1) + }; + // Range should always be Some, both values being None should lead to a validation err.r + // But no point in erroring over that nuance here! + if let Some(range) = range { + unsafe { + raw.reset_queries(&query_set.raw, range); + } + } + Some(hal::ComputePassTimestampWrites { query_set: &query_set.raw, beginning_of_pass_write_index: tw.beginning_of_pass_write_index, diff --git a/wgpu-core/src/command/mod.rs b/wgpu-core/src/command/mod.rs index 8c5e5f2d40..99d21ca010 100644 --- a/wgpu-core/src/command/mod.rs +++ b/wgpu-core/src/command/mod.rs @@ -99,6 +99,7 @@ pub struct CommandBuffer { pub(crate) trackers: Tracker, buffer_memory_init_actions: Vec, texture_memory_actions: CommandBufferTextureMemoryActions, + pub(crate) pending_query_resets: QueryResetMap, limits: wgt::Limits, support_clear_texture: bool, #[cfg(feature = "trace")] @@ -127,6 +128,7 @@ impl CommandBuffer { trackers: Tracker::new(), buffer_memory_init_actions: Default::default(), texture_memory_actions: Default::default(), + pending_query_resets: QueryResetMap::new(), limits, support_clear_texture: features.contains(wgt::Features::CLEAR_TEXTURE), #[cfg(feature = "trace")] diff --git a/wgpu-core/src/command/query.rs b/wgpu-core/src/command/query.rs index 6181f83fa8..17dab277e2 100644 --- a/wgpu-core/src/command/query.rs +++ b/wgpu-core/src/command/query.rs @@ -15,7 +15,7 @@ use thiserror::Error; use wgt::BufferAddress; #[derive(Debug)] -pub(super) struct QueryResetMap { +pub(crate) struct QueryResetMap { map: FastHashMap, Epoch)>, _phantom: PhantomData, } @@ -43,12 +43,12 @@ impl QueryResetMap { } pub fn reset_queries( - self, + &mut self, raw_encoder: &mut A::CommandEncoder, query_set_storage: &Storage, id::QuerySetId>, backend: wgt::Backend, ) -> Result<(), id::QuerySetId> { - for (query_set_id, (state, epoch)) in self.map.into_iter() { + for (query_set_id, (state, epoch)) in self.map.drain() { let id = Id::zip(query_set_id, epoch, backend); let query_set = query_set_storage.get(id).map_err(|_| id)?; diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs index ccd96f3edb..a2b192b110 100644 --- a/wgpu-core/src/command/render.rs +++ b/wgpu-core/src/command/render.rs @@ -6,8 +6,8 @@ use crate::{ end_pipeline_statistics_query, memory_init::{fixup_discarded_surfaces, SurfacesInDiscardState}, BasePass, BasePassRef, BindGroupStateChange, CommandBuffer, CommandEncoderError, - CommandEncoderStatus, DrawError, ExecutionError, MapPassErr, PassErrorScope, QueryResetMap, - QueryUseError, RenderCommand, RenderCommandError, StateChange, + CommandEncoderStatus, DrawError, ExecutionError, MapPassErr, PassErrorScope, QueryUseError, + RenderCommand, RenderCommandError, StateChange, }, device::{ AttachmentData, Device, MissingDownlevelFlags, MissingFeatures, @@ -1119,6 +1119,17 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> { .add_single(&*query_set_guard, tw.query_set) .ok_or(RenderPassErrorInner::InvalidQuerySet(tw.query_set))?; + if let Some(index) = tw.beginning_of_pass_write_index { + cmd_buf + .pending_query_resets + .use_query_set(tw.query_set, query_set, index); + } + if let Some(index) = tw.end_of_pass_write_index { + cmd_buf + .pending_query_resets + .use_query_set(tw.query_set, query_set, index); + } + Some(hal::RenderPassTimestampWrites { query_set: &query_set.raw, beginning_of_pass_write_index: tw.beginning_of_pass_write_index, @@ -1268,7 +1279,7 @@ impl Global { let mut token = Token::root(); let (device_guard, mut token) = hub.devices.read(&mut token); - let (scope, query_reset_state, pending_discard_init_fixups) = { + let (scope, pending_discard_init_fixups) = { let (mut cmb_guard, mut token) = hub.command_buffers.write(&mut token); // Spell out the type, to placate rust-analyzer. @@ -1349,7 +1360,6 @@ impl Global { let mut dynamic_offset_count = 0; let mut string_offset = 0; let mut active_query = None; - let mut query_reset_state = QueryResetMap::new(); for command in base.commands { match *command { @@ -2073,7 +2083,7 @@ impl Global { raw, query_set_id, query_index, - Some(&mut query_reset_state), + Some(&mut cmd_buf.pending_query_resets), ) .map_pass_err(scope)?; } @@ -2095,7 +2105,7 @@ impl Global { raw, query_set_id, query_index, - Some(&mut query_reset_state), + Some(&mut cmd_buf.pending_query_resets), &mut active_query, ) .map_pass_err(scope)?; @@ -2192,7 +2202,7 @@ impl Global { info.finish(raw, &*texture_guard).map_pass_err(init_scope)?; cmd_buf.encoder.close(); - (trackers, query_reset_state, pending_discard_init_fixups) + (trackers, pending_discard_init_fixups) }; let (mut cmb_guard, mut token) = hub.command_buffers.write(&mut token); @@ -2212,7 +2222,8 @@ impl Global { &device_guard[cmd_buf.device_id.value], ); - query_reset_state + cmd_buf + .pending_query_resets .reset_queries( transit, &query_set_guard, diff --git a/wgpu-hal/src/dx11/command.rs b/wgpu-hal/src/dx11/command.rs index 1c73f3c325..17cd5a22d2 100644 --- a/wgpu-hal/src/dx11/command.rs +++ b/wgpu-hal/src/dx11/command.rs @@ -246,7 +246,10 @@ impl crate::CommandEncoder for super::CommandEncoder { todo!() } - unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor) { + unsafe fn begin_compute_pass<'a>( + &mut self, + desc: &crate::ComputePassDescriptor<'a, super::Api>, + ) { todo!() } diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index 4786a61bf9..43d27f6626 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -1103,7 +1103,10 @@ impl crate::CommandEncoder for super::CommandEncoder { // compute - unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor) { + unsafe fn begin_compute_pass<'a>( + &mut self, + desc: &crate::ComputePassDescriptor<'a, super::Api>, + ) { unsafe { self.begin_pass(super::PassKind::Compute, desc.label) }; } unsafe fn end_compute_pass(&mut self) { diff --git a/wgpu-hal/src/vulkan/command.rs b/wgpu-hal/src/vulkan/command.rs index ba4e088fc3..76fdfb6905 100644 --- a/wgpu-hal/src/vulkan/command.rs +++ b/wgpu-hal/src/vulkan/command.rs @@ -45,6 +45,21 @@ impl super::DeviceShared { } } +impl super::CommandEncoder { + fn write_pass_end_timestamp_if_requested(&mut self) { + if let Some((query_set, index)) = self.end_of_pass_timer_query.take() { + unsafe { + self.device.raw.cmd_write_timestamp( + self.active, + vk::PipelineStageFlags::BOTTOM_OF_PIPE, + query_set, + index, + ); + } + } + } +} + impl crate::CommandEncoder for super::CommandEncoder { unsafe fn begin_encoding(&mut self, label: crate::Label) -> Result<(), crate::DeviceError> { if self.free.is_empty() { @@ -485,6 +500,18 @@ impl crate::CommandEncoder for super::CommandEncoder { self.rpass_debug_marker_active = true; } + // Start timestamp if any (before all other commands but after debug marker) + if let Some(timestamp_writes) = &desc.timestamp_writes { + if let Some(index) = timestamp_writes.beginning_of_pass_write_index { + unsafe { + self.write_timestamp(timestamp_writes.query_set, index); + } + } + self.end_of_pass_timer_query = timestamp_writes + .end_of_pass_write_index + .map(|index| (timestamp_writes.query_set.raw, index)); + } + unsafe { self.device .raw @@ -504,10 +531,16 @@ impl crate::CommandEncoder for super::CommandEncoder { unsafe fn end_render_pass(&mut self) { unsafe { self.device.raw.cmd_end_render_pass(self.active); - if self.rpass_debug_marker_active { + } + + // After all other commands but before debug marker, so this is still seen as part of this pass. + self.write_pass_end_timestamp_if_requested(); + + if self.rpass_debug_marker_active { + unsafe { self.end_debug_marker(); - self.rpass_debug_marker_active = false; } + self.rpass_debug_marker_active = false; } } @@ -783,8 +816,21 @@ impl crate::CommandEncoder for super::CommandEncoder { unsafe { self.begin_debug_marker(label) }; self.rpass_debug_marker_active = true; } + + if let Some(timestamp_writes) = &desc.timestamp_writes { + if let Some(index) = timestamp_writes.beginning_of_pass_write_index { + unsafe { + self.write_timestamp(timestamp_writes.query_set, index); + } + } + self.end_of_pass_timer_query = timestamp_writes + .end_of_pass_write_index + .map(|index| (timestamp_writes.query_set.raw, index)); + } } unsafe fn end_compute_pass(&mut self) { + self.write_pass_end_timestamp_if_requested(); + if self.rpass_debug_marker_active { unsafe { self.end_debug_marker() }; self.rpass_debug_marker_active = false diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs index 09b887772c..650a85c1af 100644 --- a/wgpu-hal/src/vulkan/device.rs +++ b/wgpu-hal/src/vulkan/device.rs @@ -1166,6 +1166,7 @@ impl crate::Device for super::Device { free: Vec::new(), discarded: Vec::new(), rpass_debug_marker_active: false, + end_of_pass_timer_query: None, }) } unsafe fn destroy_command_encoder(&self, cmd_encoder: super::CommandEncoder) { diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs index 27200dc4e0..09fc5a6ba4 100644 --- a/wgpu-hal/src/vulkan/mod.rs +++ b/wgpu-hal/src/vulkan/mod.rs @@ -377,6 +377,10 @@ pub struct CommandEncoder { /// If this is true, the active renderpass enabled a debug span, /// and needs to be disabled on renderpass close. rpass_debug_marker_active: bool, + + /// If set, the end of the next render/compute pass will write a timestamp at + /// the given pool & location. + end_of_pass_timer_query: Option<(vk::QueryPool, u32)>, } impl fmt::Debug for CommandEncoder { From 55627de610bbb6a209fc646b6d1ac56f7dd56944 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Sun, 7 May 2023 18:38:50 +0200 Subject: [PATCH 22/45] implement begin/end pass timer queries for DX12 --- wgpu-hal/src/dx12/command.rs | 39 ++++++++++++++++++++++++++++++++++++ wgpu-hal/src/dx12/device.rs | 1 + wgpu-hal/src/dx12/mod.rs | 4 ++++ 3 files changed, 44 insertions(+) diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index 43d27f6626..35c7f357ee 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -225,6 +225,18 @@ impl super::CommandEncoder { self.pass.layout = layout.clone(); self.pass.dirty_root_elements = (1 << layout.total_root_elements) - 1; } + + fn write_pass_end_timestamp_if_requested(&mut self) { + if let Some((query_set_raw, index)) = self.end_of_pass_timer_query.take() { + unsafe { + self.list.unwrap().EndQuery( + query_set_raw.as_mut_ptr(), + d3d12_ty::D3D12_QUERY_TYPE_TIMESTAMP, + index, + ); + } + } + } } impl crate::CommandEncoder for super::CommandEncoder { @@ -655,6 +667,19 @@ impl crate::CommandEncoder for super::CommandEncoder { unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor) { unsafe { self.begin_pass(super::PassKind::Render, desc.label) }; + + // Start timestamp if any (before all other commands but after debug marker) + if let Some(timestamp_writes) = &desc.timestamp_writes { + if let Some(index) = timestamp_writes.beginning_of_pass_write_index { + unsafe { + self.write_timestamp(timestamp_writes.query_set, index); + } + } + self.end_of_pass_timer_query = timestamp_writes + .end_of_pass_write_index + .map(|index| (timestamp_writes.query_set.raw, index)); + } + let mut color_views = [d3d12::CpuDescriptor { ptr: 0 }; crate::MAX_COLOR_ATTACHMENTS]; for (rtv, cat) in color_views.iter_mut().zip(desc.color_attachments.iter()) { if let Some(cat) = cat.as_ref() { @@ -824,6 +849,8 @@ impl crate::CommandEncoder for super::CommandEncoder { } } + self.write_pass_end_timestamp_if_requested(); + unsafe { self.end_pass() }; } @@ -1108,8 +1135,20 @@ impl crate::CommandEncoder for super::CommandEncoder { desc: &crate::ComputePassDescriptor<'a, super::Api>, ) { unsafe { self.begin_pass(super::PassKind::Compute, desc.label) }; + + if let Some(timestamp_writes) = &desc.timestamp_writes { + if let Some(index) = timestamp_writes.beginning_of_pass_write_index { + unsafe { + self.write_timestamp(timestamp_writes.query_set, index); + } + } + self.end_of_pass_timer_query = timestamp_writes + .end_of_pass_write_index + .map(|index| (timestamp_writes.query_set.raw, index)); + } } unsafe fn end_compute_pass(&mut self) { + self.write_pass_end_timestamp_if_requested(); unsafe { self.end_pass() }; } diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index 7e14818572..54a4265d7d 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -638,6 +638,7 @@ impl crate::Device for super::Device { free_lists: Vec::new(), pass: super::PassState::new(), temp: super::Temp::default(), + end_of_pass_timer_query: None, }) } unsafe fn destroy_command_encoder(&self, encoder: super::CommandEncoder) { diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index 6cdf3ffe64..ad11ff57dc 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -373,6 +373,10 @@ pub struct CommandEncoder { free_lists: Vec, pass: PassState, temp: Temp, + + /// If set, the end of the next render/compute pass will write a timestamp at + /// the given pool & location. + end_of_pass_timer_query: Option<(d3d12::QueryHeap, u32)>, } unsafe impl Send for CommandEncoder {} From f9df58886998d2f1ddccfbda4756ff36079818fb Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Sun, 7 May 2023 18:40:47 +0200 Subject: [PATCH 23/45] minor cleanup in new timerstamp-query sample --- wgpu/examples/timestamp-queries/main.rs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/wgpu/examples/timestamp-queries/main.rs b/wgpu/examples/timestamp-queries/main.rs index 685cd2d72f..5ddcc5981f 100644 --- a/wgpu/examples/timestamp-queries/main.rs +++ b/wgpu/examples/timestamp-queries/main.rs @@ -2,7 +2,6 @@ use std::borrow::Cow; -use wgc::command::RenderBundleEncoderDescriptor; use wgpu::util::DeviceExt; // Queries: @@ -71,9 +70,9 @@ impl Queries { let timestamps: &[u64] = bytemuck::cast_slice(×tamp_view); println!("Raw timestamp buffer contents: {:?}", timestamps); - let elapsed_us = |start, end| { + let elapsed_us = |start, end: u64| { let period = queue.get_timestamp_period(); - (end - start) as f64 * period as f64 / 1000.0 + end.wrapping_sub(start) as f64 * period as f64 / 1000.0 }; println!( @@ -122,13 +121,13 @@ async fn run() { let adapter = instance .request_adapter(&wgpu::RequestAdapterOptions::default()) .await - .expect("Failed to request adapter"); + .expect("Failed to request adapter."); // Check timestamp features. if adapter.features().contains(wgpu::Features::TIMESTAMP_QUERY) { - println!("Adapter supports timestamp queries"); + println!("Adapter supports timestamp queries."); } else { - println!("Adapter does not support timestamp queries, aborting"); + println!("Adapter does not support timestamp queries, aborting."); return; } let mut features = wgpu::Features::empty() | wgpu::Features::TIMESTAMP_QUERY; @@ -136,7 +135,10 @@ async fn run() { .features() .contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES) { + println!("Adapter supports timestamp queries within passes."); features |= wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES; + } else { + println!("Adapter does not support timestamp queries within passes."); } // `request_device` instantiates the feature specific connection to the GPU, defining some parameters, From 70414a34dcf80e9fb77c2c38ef21bcb121d58895 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Sun, 7 May 2023 19:01:42 +0200 Subject: [PATCH 24/45] compile & warning fixes (in particular for trace feature) --- player/src/lib.rs | 15 ++++++++++++--- wgpu-core/src/command/compute.rs | 9 ++++++--- wgpu-core/src/command/render.rs | 4 +++- wgpu-core/src/device/trace.rs | 2 ++ wgpu-hal/examples/halmark/main.rs | 2 +- wgpu-hal/examples/raw-gles.rs | 2 +- wgpu-hal/src/metal/command.rs | 4 ++-- wgpu-hal/src/vulkan/command.rs | 4 ++-- wgpu/examples/boids/main.rs | 4 ++-- wgpu/examples/bunnymark/main.rs | 2 +- wgpu/examples/capture/main.rs | 2 +- wgpu/examples/conservative-raster/main.rs | 4 ++-- wgpu/examples/cube/main.rs | 2 +- wgpu/examples/hello-windows/main.rs | 2 +- wgpu/examples/mipmap/main.rs | 4 ++-- wgpu/examples/msaa-line/main.rs | 2 +- wgpu/examples/shadow/main.rs | 4 ++-- wgpu/examples/skybox/main.rs | 2 +- wgpu/examples/stencil-triangles/main.rs | 2 +- wgpu/examples/texture-arrays/main.rs | 2 +- wgpu/examples/water/main.rs | 6 +++--- wgpu/src/lib.rs | 4 ++-- wgpu/tests/regression/issue_3457.rs | 4 ++-- wgpu/tests/shader/mod.rs | 2 +- wgpu/tests/shader_primitive_index/mod.rs | 2 +- wgpu/tests/shader_view_format/mod.rs | 2 +- wgpu/tests/vertex_indices/mod.rs | 2 +- wgpu/tests/zero_init_texture_after_discard.rs | 8 ++++---- 28 files changed, 60 insertions(+), 44 deletions(-) diff --git a/player/src/lib.rs b/player/src/lib.rs index 0ef6080b77..f09b99d66b 100644 --- a/player/src/lib.rs +++ b/player/src/lib.rs @@ -119,20 +119,29 @@ impl GlobalPlay for wgc::hub::Global { trace::Command::InsertDebugMarker(marker) => self .command_encoder_insert_debug_marker::(encoder, &marker) .unwrap(), - trace::Command::RunComputePass { base } => { - self.command_encoder_run_compute_pass_impl::(encoder, base.as_ref()) - .unwrap(); + trace::Command::RunComputePass { + base, + timestamp_writes, + } => { + self.command_encoder_run_compute_pass_impl::( + encoder, + base.as_ref(), + timestamp_writes.as_ref(), + ) + .unwrap(); } trace::Command::RunRenderPass { base, target_colors, target_depth_stencil, + timestamp_writes, } => { self.command_encoder_run_render_pass_impl::( encoder, base.as_ref(), &target_colors, target_depth_stencil.as_ref(), + timestamp_writes.as_ref(), ) .unwrap(); } diff --git a/wgpu-core/src/command/compute.rs b/wgpu-core/src/command/compute.rs index d40f6cd55e..507beb78dd 100644 --- a/wgpu-core/src/command/compute.rs +++ b/wgpu-core/src/command/compute.rs @@ -7,8 +7,7 @@ use crate::{ end_pipeline_statistics_query, memory_init::{fixup_discarded_surfaces, SurfacesInDiscardState}, BasePass, BasePassRef, BindGroupStateChange, CommandBuffer, CommandEncoderError, - CommandEncoderStatus, MapPassErr, PassErrorScope, QueryUseError, - StateChange, + CommandEncoderStatus, MapPassErr, PassErrorScope, QueryUseError, StateChange, }, device::{MissingDownlevelFlags, MissingFeatures}, error::{ErrorFormatter, PrettyError}, @@ -123,7 +122,10 @@ impl ComputePass { #[cfg(feature = "trace")] pub fn into_command(self) -> crate::device::trace::Command { - crate::device::trace::Command::RunComputePass { base: self.base } + crate::device::trace::Command::RunComputePass { + base: self.base, + timestamp_writes: self.timestamp_writes, + } } } @@ -383,6 +385,7 @@ impl Global { if let Some(ref mut list) = cmd_buf.commands { list.push(crate::device::trace::Command::RunComputePass { base: BasePass::from_ref(base), + timestamp_writes: timestamp_writes.cloned(), }); } diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs index a2b192b110..d7eb587c26 100644 --- a/wgpu-core/src/command/render.rs +++ b/wgpu-core/src/command/render.rs @@ -251,6 +251,7 @@ impl RenderPass { base: self.base, target_colors: self.color_targets.into_iter().collect(), target_depth_stencil: self.depth_stencil_target, + timestamp_writes: self.timestamp_writes, } } @@ -1116,7 +1117,7 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> { let query_set = cmd_buf .trackers .query_sets - .add_single(&*query_set_guard, tw.query_set) + .add_single(query_set_guard, tw.query_set) .ok_or(RenderPassErrorInner::InvalidQuerySet(tw.query_set))?; if let Some(index) = tw.beginning_of_pass_write_index { @@ -1298,6 +1299,7 @@ impl Global { base: BasePass::from_ref(base), target_colors: color_attachments.to_vec(), target_depth_stencil: depth_stencil_attachment.cloned(), + timestamp_writes: timestamp_writes.cloned(), }); } diff --git a/wgpu-core/src/device/trace.rs b/wgpu-core/src/device/trace.rs index 57f82c181e..ee19613d5a 100644 --- a/wgpu-core/src/device/trace.rs +++ b/wgpu-core/src/device/trace.rs @@ -176,11 +176,13 @@ pub enum Command { InsertDebugMarker(String), RunComputePass { base: crate::command::BasePass, + timestamp_writes: Option, }, RunRenderPass { base: crate::command::BasePass, target_colors: Vec>, target_depth_stencil: Option, + timestamp_writes: Option, }, } diff --git a/wgpu-hal/examples/halmark/main.rs b/wgpu-hal/examples/halmark/main.rs index 680872c441..56309d586a 100644 --- a/wgpu-hal/examples/halmark/main.rs +++ b/wgpu-hal/examples/halmark/main.rs @@ -679,7 +679,7 @@ impl Example { })], depth_stencil_attachment: None, multiview: None, - timestamp_writes: &[], + timestamp_writes: None, }; unsafe { ctx.encoder.begin_render_pass(&pass_desc); diff --git a/wgpu-hal/examples/raw-gles.rs b/wgpu-hal/examples/raw-gles.rs index ae06a59f82..16bff34148 100644 --- a/wgpu-hal/examples/raw-gles.rs +++ b/wgpu-hal/examples/raw-gles.rs @@ -174,7 +174,7 @@ fn fill_screen(exposed: &hal::ExposedAdapter, width: u32, height })], depth_stencil_attachment: None, multiview: None, - timestamp_writes: &[], + timestamp_writes: None, }; unsafe { encoder.begin_encoding(None).unwrap(); diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs index 23b478b2fa..5a17c3089f 100644 --- a/wgpu-hal/src/metal/command.rs +++ b/wgpu-hal/src/metal/command.rs @@ -448,7 +448,7 @@ impl crate::CommandEncoder for super::CommandEncoder { } } - if let Some(timestamp_writes) = &desc.timestamp_writes { + if let Some(timestamp_writes) = desc.timestamp_writes.as_ref() { let sba_descriptor = descriptor .sample_buffer_attachments() .object_at(0 as _) @@ -957,7 +957,7 @@ impl crate::CommandEncoder for super::CommandEncoder { objc::rc::autoreleasepool(|| { let descriptor = metal::ComputePassDescriptor::new(); - if let Some(timestamp_writes) = &desc.timestamp_writes { + if let Some(timestamp_writes) = desc.timestamp_writes.as_ref() { let sba_descriptor = descriptor .sample_buffer_attachments() .object_at(0 as _) diff --git a/wgpu-hal/src/vulkan/command.rs b/wgpu-hal/src/vulkan/command.rs index 76fdfb6905..d697a8d771 100644 --- a/wgpu-hal/src/vulkan/command.rs +++ b/wgpu-hal/src/vulkan/command.rs @@ -501,7 +501,7 @@ impl crate::CommandEncoder for super::CommandEncoder { } // Start timestamp if any (before all other commands but after debug marker) - if let Some(timestamp_writes) = &desc.timestamp_writes { + if let Some(timestamp_writes) = desc.timestamp_writes.as_ref() { if let Some(index) = timestamp_writes.beginning_of_pass_write_index { unsafe { self.write_timestamp(timestamp_writes.query_set, index); @@ -817,7 +817,7 @@ impl crate::CommandEncoder for super::CommandEncoder { self.rpass_debug_marker_active = true; } - if let Some(timestamp_writes) = &desc.timestamp_writes { + if let Some(timestamp_writes) = desc.timestamp_writes.as_ref() { if let Some(index) = timestamp_writes.beginning_of_pass_write_index { unsafe { self.write_timestamp(timestamp_writes.query_set, index); diff --git a/wgpu/examples/boids/main.rs b/wgpu/examples/boids/main.rs index 2dd469785b..d9039533d1 100644 --- a/wgpu/examples/boids/main.rs +++ b/wgpu/examples/boids/main.rs @@ -286,7 +286,7 @@ impl framework::Example for Example { label: None, color_attachments: &color_attachments, depth_stencil_attachment: None, - timestamp_writes: &[], + timestamp_writes: None, }; // get command encoder @@ -298,7 +298,7 @@ impl framework::Example for Example { // compute pass let mut cpass = command_encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { label: None, - timestamp_writes: &[], + timestamp_writes: None, }); cpass.set_pipeline(&self.compute_pipeline); cpass.set_bind_group(0, &self.particle_bind_groups[self.frame_num % 2], &[]); diff --git a/wgpu/examples/bunnymark/main.rs b/wgpu/examples/bunnymark/main.rs index 0422f83546..476abf612b 100644 --- a/wgpu/examples/bunnymark/main.rs +++ b/wgpu/examples/bunnymark/main.rs @@ -339,7 +339,7 @@ impl framework::Example for Example { }, })], depth_stencil_attachment: None, - timestamp_writes: &[], + timestamp_writes: None, }); rpass.set_pipeline(&self.pipeline); rpass.set_bind_group(0, &self.global_group, &[]); diff --git a/wgpu/examples/capture/main.rs b/wgpu/examples/capture/main.rs index 344a24cab1..60fc9ec2f5 100644 --- a/wgpu/examples/capture/main.rs +++ b/wgpu/examples/capture/main.rs @@ -104,7 +104,7 @@ async fn create_red_image_with_dimensions( }, })], depth_stencil_attachment: None, - timestamp_writes: &[], + timestamp_writes: None, }); // Copy the data from the texture to the buffer diff --git a/wgpu/examples/conservative-raster/main.rs b/wgpu/examples/conservative-raster/main.rs index 156d717662..406e48a1c2 100644 --- a/wgpu/examples/conservative-raster/main.rs +++ b/wgpu/examples/conservative-raster/main.rs @@ -276,7 +276,7 @@ impl framework::Example for Example { }, })], depth_stencil_attachment: None, - timestamp_writes: &[], + timestamp_writes: None, }); rpass.set_pipeline(&self.pipeline_triangle_conservative); @@ -296,7 +296,7 @@ impl framework::Example for Example { }, })], depth_stencil_attachment: None, - timestamp_writes: &[], + timestamp_writes: None, }); rpass.set_pipeline(&self.pipeline_upscale); diff --git a/wgpu/examples/cube/main.rs b/wgpu/examples/cube/main.rs index e1b935a76f..59d6fb3cab 100644 --- a/wgpu/examples/cube/main.rs +++ b/wgpu/examples/cube/main.rs @@ -379,7 +379,7 @@ impl framework::Example for Example { }, })], depth_stencil_attachment: None, - timestamp_writes: &[], + timestamp_writes: None, }); rpass.push_debug_group("Prepare data for draw."); rpass.set_pipeline(&self.pipeline); diff --git a/wgpu/examples/hello-windows/main.rs b/wgpu/examples/hello-windows/main.rs index 0cf4a0d1b1..274175720b 100644 --- a/wgpu/examples/hello-windows/main.rs +++ b/wgpu/examples/hello-windows/main.rs @@ -135,7 +135,7 @@ async fn run(event_loop: EventLoop<()>, viewports: Vec<(Window, wgpu::Color)>) { }, })], depth_stencil_attachment: None, - timestamp_writes: &[], + timestamp_writes: None, }); } diff --git a/wgpu/examples/mipmap/main.rs b/wgpu/examples/mipmap/main.rs index e49748ee60..969ee8856c 100644 --- a/wgpu/examples/mipmap/main.rs +++ b/wgpu/examples/mipmap/main.rs @@ -163,7 +163,7 @@ impl Example { }, })], depth_stencil_attachment: None, - timestamp_writes: &[], + timestamp_writes: None, }); if let Some(ref query_sets) = query_sets { rpass.write_timestamp(&query_sets.timestamp, timestamp_query_index_base); @@ -475,7 +475,7 @@ impl framework::Example for Example { }, })], depth_stencil_attachment: None, - timestamp_writes: &[], + timestamp_writes: None, }); rpass.set_pipeline(&self.draw_pipeline); rpass.set_bind_group(0, &self.bind_group, &[]); diff --git a/wgpu/examples/msaa-line/main.rs b/wgpu/examples/msaa-line/main.rs index e1455ddb6f..8828094d16 100644 --- a/wgpu/examples/msaa-line/main.rs +++ b/wgpu/examples/msaa-line/main.rs @@ -303,7 +303,7 @@ impl framework::Example for Example { label: None, color_attachments: &[Some(rpass_color_attachment)], depth_stencil_attachment: None, - timestamp_writes: &[], + timestamp_writes: None, }) .execute_bundles(iter::once(&self.bundle)); } diff --git a/wgpu/examples/shadow/main.rs b/wgpu/examples/shadow/main.rs index 538cecca7c..5303343c96 100644 --- a/wgpu/examples/shadow/main.rs +++ b/wgpu/examples/shadow/main.rs @@ -780,7 +780,7 @@ impl framework::Example for Example { }), stencil_ops: None, }), - timestamp_writes: &[], + timestamp_writes: None, }); pass.set_pipeline(&self.shadow_pass.pipeline); pass.set_bind_group(0, &self.shadow_pass.bind_group, &[]); @@ -823,7 +823,7 @@ impl framework::Example for Example { }), stencil_ops: None, }), - timestamp_writes: &[], + timestamp_writes: None, }); pass.set_pipeline(&self.forward_pass.pipeline); pass.set_bind_group(0, &self.forward_pass.bind_group, &[]); diff --git a/wgpu/examples/skybox/main.rs b/wgpu/examples/skybox/main.rs index c02438dec7..571751710f 100644 --- a/wgpu/examples/skybox/main.rs +++ b/wgpu/examples/skybox/main.rs @@ -442,7 +442,7 @@ impl framework::Example for Skybox { }), stencil_ops: None, }), - timestamp_writes: &[], + timestamp_writes: None, }); rpass.set_bind_group(0, &self.bind_group, &[]); diff --git a/wgpu/examples/stencil-triangles/main.rs b/wgpu/examples/stencil-triangles/main.rs index 2ec04bfe85..7a869c8d6b 100644 --- a/wgpu/examples/stencil-triangles/main.rs +++ b/wgpu/examples/stencil-triangles/main.rs @@ -214,7 +214,7 @@ impl framework::Example for Triangles { store: true, }), }), - timestamp_writes: &[], + timestamp_writes: None, }); rpass.set_stencil_reference(1); diff --git a/wgpu/examples/texture-arrays/main.rs b/wgpu/examples/texture-arrays/main.rs index 1edf2ea20e..bd8b0599c1 100644 --- a/wgpu/examples/texture-arrays/main.rs +++ b/wgpu/examples/texture-arrays/main.rs @@ -386,7 +386,7 @@ impl framework::Example for Example { }, })], depth_stencil_attachment: None, - timestamp_writes: &[], + timestamp_writes: None, }); rpass.set_pipeline(&self.pipeline); diff --git a/wgpu/examples/water/main.rs b/wgpu/examples/water/main.rs index 92eee2e575..a5e8fa5c00 100644 --- a/wgpu/examples/water/main.rs +++ b/wgpu/examples/water/main.rs @@ -755,7 +755,7 @@ impl framework::Example for Example { }), stencil_ops: None, }), - timestamp_writes: &[], + timestamp_writes: None, }); rpass.execute_bundles([&self.terrain_bundle]); @@ -781,7 +781,7 @@ impl framework::Example for Example { }), stencil_ops: None, }), - timestamp_writes: &[], + timestamp_writes: None, }); rpass.set_pipeline(&self.terrain_pipeline); rpass.set_bind_group(0, &self.terrain_normal_bind_group, &[]); @@ -806,7 +806,7 @@ impl framework::Example for Example { depth_ops: None, stencil_ops: None, }), - timestamp_writes: &[], + timestamp_writes: None, }); rpass.set_pipeline(&self.water_pipeline); diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs index 060e871231..c5c6bd19af 100644 --- a/wgpu/src/lib.rs +++ b/wgpu/src/lib.rs @@ -4096,8 +4096,8 @@ impl Queue { /// /// Returns zero if timestamp queries are unsupported. /// - /// TODO: https://github.com/gfx-rs/wgpu/issues/3741 - /// Timestamp values are supposed to represent nanosecond values, see https://gpuweb.github.io/gpuweb/#timestamp + /// TODO: `` + /// Timestamp values are supposed to represent nanosecond values, see `` /// Therefore, this is always 1.0 on the web, but on wgpu-core a manual conversion is required currently. pub fn get_timestamp_period(&self) -> f32 { DynContext::queue_get_timestamp_period(&*self.context, &self.id, self.data.as_ref()) diff --git a/wgpu/tests/regression/issue_3457.rs b/wgpu/tests/regression/issue_3457.rs index ef90017ea5..7ce95c74cb 100644 --- a/wgpu/tests/regression/issue_3457.rs +++ b/wgpu/tests/regression/issue_3457.rs @@ -144,7 +144,7 @@ fn pass_reset_vertex_buffer() { }, })], depth_stencil_attachment: None, - timestamp_writes: &[], + timestamp_writes: None, }); double_rpass.set_pipeline(&double_pipeline); @@ -178,7 +178,7 @@ fn pass_reset_vertex_buffer() { }, })], depth_stencil_attachment: None, - timestamp_writes: &[], + timestamp_writes: None, }); single_rpass.set_pipeline(&single_pipeline); diff --git a/wgpu/tests/shader/mod.rs b/wgpu/tests/shader/mod.rs index 4e98ea85cd..735bb86731 100644 --- a/wgpu/tests/shader/mod.rs +++ b/wgpu/tests/shader/mod.rs @@ -326,7 +326,7 @@ fn shader_input_output_test( let mut cpass = encoder.begin_compute_pass(&ComputePassDescriptor { label: Some(&format!("cpass {test_name}")), - timestamp_writes: &[], + timestamp_writes: None, }); cpass.set_pipeline(&pipeline); cpass.set_bind_group(0, &bg, &[]); diff --git a/wgpu/tests/shader_primitive_index/mod.rs b/wgpu/tests/shader_primitive_index/mod.rs index ddb43a1f9d..603ff1f8b6 100644 --- a/wgpu/tests/shader_primitive_index/mod.rs +++ b/wgpu/tests/shader_primitive_index/mod.rs @@ -183,7 +183,7 @@ fn pulling_common( })], depth_stencil_attachment: None, label: None, - timestamp_writes: &[], + timestamp_writes: None, }); rpass.set_pipeline(&pipeline); diff --git a/wgpu/tests/shader_view_format/mod.rs b/wgpu/tests/shader_view_format/mod.rs index ef0b4d86b7..af0f6f930d 100644 --- a/wgpu/tests/shader_view_format/mod.rs +++ b/wgpu/tests/shader_view_format/mod.rs @@ -137,7 +137,7 @@ fn reinterpret( })], depth_stencil_attachment: None, label: None, - timestamp_writes: &[], + timestamp_writes: None, }); rpass.set_pipeline(&pipeline); rpass.set_bind_group(0, &bind_group, &[]); diff --git a/wgpu/tests/vertex_indices/mod.rs b/wgpu/tests/vertex_indices/mod.rs index d50b3401cf..bb537f27ea 100644 --- a/wgpu/tests/vertex_indices/mod.rs +++ b/wgpu/tests/vertex_indices/mod.rs @@ -115,7 +115,7 @@ fn pulling_common( })], depth_stencil_attachment: None, label: None, - timestamp_writes: &[], + timestamp_writes: None, }); rpass.set_pipeline(&pipeline); diff --git a/wgpu/tests/zero_init_texture_after_discard.rs b/wgpu/tests/zero_init_texture_after_discard.rs index 544be508d6..d9d39448d1 100644 --- a/wgpu/tests/zero_init_texture_after_discard.rs +++ b/wgpu/tests/zero_init_texture_after_discard.rs @@ -160,7 +160,7 @@ impl<'ctx> TestCase<'ctx> { store: true, }), }), - timestamp_writes: &[], + timestamp_writes: None, }); ctx.queue.submit([encoder.finish()]); } else { @@ -244,7 +244,7 @@ impl<'ctx> TestCase<'ctx> { }), }, ), - timestamp_writes: &[], + timestamp_writes: None, }); } @@ -268,7 +268,7 @@ impl<'ctx> TestCase<'ctx> { }), }, ), - timestamp_writes: &[], + timestamp_writes: None, }); } @@ -292,7 +292,7 @@ impl<'ctx> TestCase<'ctx> { }), }, ), - timestamp_writes: &[], + timestamp_writes: None, }); } From 641b0db3840dc7985624bc8038cae51032eeeaf8 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Sun, 14 May 2023 18:22:01 +0200 Subject: [PATCH 25/45] fix remaining warnings & errors --- deno_webgpu/error.rs | 4 +++- wgpu-hal/src/dx12/command.rs | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/deno_webgpu/error.rs b/deno_webgpu/error.rs index 17ff7c1ef4..a68592adfc 100644 --- a/deno_webgpu/error.rs +++ b/deno_webgpu/error.rs @@ -104,7 +104,9 @@ impl From for WebGpuError { match err { DeviceError::Lost => WebGpuError::Lost, DeviceError::OutOfMemory => WebGpuError::OutOfMemory, - DeviceError::Invalid => WebGpuError::Validation(fmt_err(&err)), + DeviceError::ResourceCreationFailed | DeviceError::Invalid => { + WebGpuError::Validation(fmt_err(&err)) + } } } } diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index 35c7f357ee..d4ebb8ef38 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -669,7 +669,7 @@ impl crate::CommandEncoder for super::CommandEncoder { unsafe { self.begin_pass(super::PassKind::Render, desc.label) }; // Start timestamp if any (before all other commands but after debug marker) - if let Some(timestamp_writes) = &desc.timestamp_writes { + if let Some(timestamp_writes) = desc.timestamp_writes.as_ref() { if let Some(index) = timestamp_writes.beginning_of_pass_write_index { unsafe { self.write_timestamp(timestamp_writes.query_set, index); @@ -1136,7 +1136,7 @@ impl crate::CommandEncoder for super::CommandEncoder { ) { unsafe { self.begin_pass(super::PassKind::Compute, desc.label) }; - if let Some(timestamp_writes) = &desc.timestamp_writes { + if let Some(timestamp_writes) = desc.timestamp_writes.as_ref() { if let Some(index) = timestamp_writes.beginning_of_pass_write_index { unsafe { self.write_timestamp(timestamp_writes.query_set, index); From 83d24022238621bf795a35de35775cea0444a3d8 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Sun, 14 May 2023 18:26:36 +0200 Subject: [PATCH 26/45] remove timestamp feature requirement again from hello-compute/triangle --- wgpu/examples/hello-compute/main.rs | 2 +- wgpu/examples/hello-triangle/main.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/wgpu/examples/hello-compute/main.rs b/wgpu/examples/hello-compute/main.rs index ca34ad3f69..327a9de69a 100644 --- a/wgpu/examples/hello-compute/main.rs +++ b/wgpu/examples/hello-compute/main.rs @@ -46,7 +46,7 @@ async fn execute_gpu(numbers: &[u32]) -> Option> { .request_device( &wgpu::DeviceDescriptor { label: None, - features: wgpu::Features::empty() | wgpu::Features::TIMESTAMP_QUERY, + features: wgpu::Features::empty(), limits: wgpu::Limits::downlevel_defaults(), }, None, diff --git a/wgpu/examples/hello-triangle/main.rs b/wgpu/examples/hello-triangle/main.rs index b87f5cba62..563dc0efe3 100644 --- a/wgpu/examples/hello-triangle/main.rs +++ b/wgpu/examples/hello-triangle/main.rs @@ -26,7 +26,7 @@ async fn run(event_loop: EventLoop<()>, window: Window) { .request_device( &wgpu::DeviceDescriptor { label: None, - features: wgpu::Features::empty() | wgpu::Features::TIMESTAMP_QUERY, + features: wgpu::Features::empty(), // Make sure we use the texture resolution limits from the adapter, so we can support images the size of the swapchain. limits: wgpu::Limits::downlevel_webgl2_defaults() .using_resolution(adapter.limits()), From 788b2197d66f59c61057125a9b74f797141a8bac Mon Sep 17 00:00:00 2001 From: FL33TW00D Date: Sun, 21 May 2023 09:26:02 +0100 Subject: [PATCH 27/45] chore: remove patch --- Cargo.lock | 3 ++- Cargo.toml | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7d81b90830..db27d2609c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1496,7 +1496,8 @@ dependencies = [ [[package]] name = "metal" version = "0.25.0" -source = "git+https://github.com/gfx-rs/metal-rs?branch=master#05df13a4f751910ef54d7805e8e4682488b23b8d" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "550b24b0cd4cf923f36bae78eca457b3a10d8a6a14a9c84cb2687b527e6a84af" dependencies = [ "bitflags 1.3.2", "block", diff --git a/Cargo.toml b/Cargo.toml index 9c528ed664..0cf7e242b8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -151,4 +151,3 @@ wgpu-types = { path = "./wgpu-types" } #web-sys = { path = "../wasm-bindgen/crates/web-sys" } #js-sys = { path = "../wasm-bindgen/crates/js-sys" } #wasm-bindgen = { path = "../wasm-bindgen" } -metal = { git = "https://github.com/gfx-rs/metal-rs", branch = "master" } From 0c0c78705dbb01d115f2c1bd6269f3b0b129f8de Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Mon, 29 May 2023 09:08:00 +0200 Subject: [PATCH 28/45] remove direct dependency to foreign-types --- Cargo.lock | 1 - Cargo.toml | 1 - wgpu-hal/Cargo.toml | 3 +-- 3 files changed, 1 insertion(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index db27d2609c..186d1014cc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3155,7 +3155,6 @@ dependencies = [ "core-graphics-types", "d3d12", "env_logger", - "foreign-types 0.3.2", "glow", "glutin", "gpu-alloc", diff --git a/Cargo.toml b/Cargo.toml index 0cf7e242b8..3c619bf0a3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -89,7 +89,6 @@ winit = "0.27.1" # Metal dependencies block = "0.1" -foreign-types = "0.3" metal = "0.25.0" objc = "0.2.5" core-graphics-types = "0.1" diff --git a/wgpu-hal/Cargo.toml b/wgpu-hal/Cargo.toml index 49e005ca1d..6205a23a2e 100644 --- a/wgpu-hal/Cargo.toml +++ b/wgpu-hal/Cargo.toml @@ -34,7 +34,7 @@ targets = [ [features] default = [] -metal = ["naga/msl-out", "block", "foreign-types"] +metal = ["naga/msl-out", "block"] vulkan = ["naga/spv-out", "ash", "gpu-alloc", "gpu-descriptor", "libloading", "smallvec"] gles = ["naga/glsl-out", "glow", "khronos-egl", "libloading"] dx11 = ["naga/hlsl-out", "d3d12", "libloading", "winapi/d3d11", "winapi/std", "winapi/d3d11_1", "winapi/d3d11_2", "winapi/d3d11sdklayers", "winapi/dxgi1_6"] @@ -100,7 +100,6 @@ d3d12 = { version = "0.6.0", git = "https://github.com/gfx-rs/d3d12-rs", rev = " [target.'cfg(any(target_os="macos", target_os="ios"))'.dependencies] # backend: Metal block = { version = "0.1", optional = true } -foreign-types = { version = "0.3", optional = true } metal = "0.25.0" objc = "0.2.5" From a11184474877af067f82ff5ab45eca27800ebded Mon Sep 17 00:00:00 2001 From: Connor Fitzgerald Date: Mon, 5 Jun 2023 13:03:54 -0400 Subject: [PATCH 29/45] Format --- wgpu-hal/src/metal/command.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs index 54413e7965..11d84db122 100644 --- a/wgpu-hal/src/metal/command.rs +++ b/wgpu-hal/src/metal/command.rs @@ -955,7 +955,7 @@ impl crate::CommandEncoder for super::CommandEncoder { unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor) { self.begin_pass(); - + debug_assert!(self.state.blit.is_none()); debug_assert!(self.state.compute.is_none()); debug_assert!(self.state.render.is_none()); From bb5ad0e1cff4d35b8dcb7123ca3e6dd968732a86 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Sun, 23 Jul 2023 10:23:33 +0200 Subject: [PATCH 30/45] dx12: reuse write_timestamp method more often --- wgpu-hal/src/dx12/command.rs | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index 98729df64e..8ea14886a2 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -231,13 +231,7 @@ impl super::CommandEncoder { fn write_pass_end_timestamp_if_requested(&mut self) { if let Some((query_set_raw, index)) = self.end_of_pass_timer_query.take() { - unsafe { - self.list.unwrap().EndQuery( - query_set_raw.as_mut_ptr(), - d3d12_ty::D3D12_QUERY_TYPE_TIMESTAMP, - index, - ); - } + self.write_timestamp(query_set_raw.query_set, index); } } } From 43dcabdd66ecb3b6f4b71119d5aead71b36e9973 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Sun, 23 Jul 2023 10:31:11 +0200 Subject: [PATCH 31/45] longer expose for timestamp query example --- examples/timestamp-queries/src/main.rs | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/examples/timestamp-queries/src/main.rs b/examples/timestamp-queries/src/main.rs index 5ddcc5981f..6e6eaabd62 100644 --- a/examples/timestamp-queries/src/main.rs +++ b/examples/timestamp-queries/src/main.rs @@ -1,4 +1,21 @@ -//! Sample demonstrating different kinds of timestamp queries. +//! Sample demonstrating different kinds of gpu timestamp queries. +//! +//! Timestamp queries are typically used to profile how long certain operations take on the GPU. +//! wgpu has several ways of performing gpu timestamp queries: +//! * `wgpu::Encoder::write_timestamp` writes a between any commands recorded on an encoder. +//! (enabled with wgpu::Features::TIMESTAMP_QUERY) +//! * passing `wgpu::RenderPassTimestampWrites`/`wgpu::ComputePassTimestampWrites` during render/compute pass creation. +//! This writes timestamps for the beginning and end of a given pass. +//! (enabled with wgpu::Features::TIMESTAMP_QUERY) +//! * `wgpu::RenderPass/ComputePass::write_timestamp` writes a timestamp within commands of a render pass. +//! Note that some GPU architectures do not support this. +//! (native only, enabled with wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES) +//! +//! Any timestamp is written to a `wgpu::QuerySet` which needs to be resolved to a buffer with `wgpu::BufferUsages::QUERY_RESOLVE`. +//! Since this usage is incompatible with `wgpu::BufferUsages::MAP_READ` we need to copy the resolved timestamps to a separate buffer afterwards. +//! +//! The period, i.e. the unit of time, of the timestamps in wgpu is undetermined and needs to be queried with `wgpu::Queue::get_timestamp_period` +//! in order to get comparable results. use std::borrow::Cow; From 079754c8c9a31ddbb6c5d321ce4355cbb3bf6be5 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Sun, 23 Jul 2023 10:36:59 +0200 Subject: [PATCH 32/45] comment fixes/improvements --- wgpu-core/src/command/compute.rs | 4 ++-- wgpu-core/src/command/render.rs | 4 ++-- wgpu/src/lib.rs | 13 ++++++------- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/wgpu-core/src/command/compute.rs b/wgpu-core/src/command/compute.rs index daa73aa468..e6f2943e35 100644 --- a/wgpu-core/src/command/compute.rs +++ b/wgpu-core/src/command/compute.rs @@ -153,9 +153,9 @@ impl fmt::Debug for ComputePass { pub struct ComputePassTimestampWrites { /// The query set to write the timestamps to. pub query_set: id::QuerySetId, - /// The index of the query at which the start timestamp of the pass is written if any. + /// The index of the query set at which a start timestamp of this pass is written, if any. pub beginning_of_pass_write_index: Option, - /// The index of the query at which the end timestamp of the pass is written if any. + /// The index of the query set at which an end timestamp of this pass is written, if any. pub end_of_pass_write_index: Option, } diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs index 1c8fcad563..bc2105661c 100644 --- a/wgpu-core/src/command/render.rs +++ b/wgpu-core/src/command/render.rs @@ -198,9 +198,9 @@ pub enum RenderPassTimestampLocation { pub struct RenderPassTimestampWrites { /// The query set to write the timestamp to. pub query_set: id::QuerySetId, - /// The index of the query at which the start timestamp of the pass is written if any. + /// The index of the query set at which a start timestamp of this pass is written, if any. pub beginning_of_pass_write_index: Option, - /// The index of the query at which the end timestamp of the pass is written if any. + /// The index of the query set at which an end timestamp of this pass is written, if any. pub end_of_pass_write_index: Option, } diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs index 169cc2f5eb..86c9b4ea51 100644 --- a/wgpu/src/lib.rs +++ b/wgpu/src/lib.rs @@ -1041,9 +1041,9 @@ impl Default for Operations { pub struct RenderPassTimestampWrites<'a> { /// The query set to write to. pub query_set: &'a QuerySet, - /// The index of the query at which the start timestamp of the pass is written if any. + /// The index of the query set at which a start timestamp of this pass is written, if any. pub beginning_of_pass_write_index: Option, - /// The index of the query at which the end timestamp of the pass is written if any. + /// The index of the query set at which an end timestamp of this pass is written, if any. pub end_of_pass_write_index: Option, } static_assertions::assert_impl_all!(RenderPassTimestampWrites: Send, Sync); @@ -1482,9 +1482,9 @@ static_assertions::assert_impl_all!(RenderPipelineDescriptor: Send, Sync); pub struct ComputePassTimestampWrites<'a> { /// The query set to write to. pub query_set: &'a QuerySet, - /// The index of the query at which the start timestamp of the pass is written if any. + /// The index of the query set at which a start timestamp of this pass is written, if any. pub beginning_of_pass_write_index: Option, - /// The index of the query at which the end timestamp of the pass is written if any. + /// The index of the query set at which an end timestamp of this pass is written, if any. pub end_of_pass_write_index: Option, } static_assertions::assert_impl_all!(ComputePassTimestampWrites: Send, Sync); @@ -4527,9 +4527,8 @@ impl Queue { /// /// Returns zero if timestamp queries are unsupported. /// - /// TODO: `` - /// Timestamp values are supposed to represent nanosecond values, see `` - /// Therefore, this is always 1.0 on the web, but on wgpu-core a manual conversion is required currently. + /// Timestamp values are represented in nanosecond values on WebGPU, see `` + /// Therefore, this is always 1.0 on the web, but on wgpu-core a manual conversion is required. pub fn get_timestamp_period(&self) -> f32 { DynContext::queue_get_timestamp_period(&*self.context, &self.id, self.data.as_ref()) } From 1b1db9bbbd9310c10909c93c58cd7bc1833b6441 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Sun, 23 Jul 2023 11:21:03 +0200 Subject: [PATCH 33/45] added timestamp query tests, fix test compilation --- Cargo.lock | 2 + examples/timestamp-queries/Cargo.toml | 4 + examples/timestamp-queries/src/main.rs | 240 +++++++++++++++------- tests/tests/scissor_tests/mod.rs | 1 + tests/tests/shader_primitive_index/mod.rs | 1 + wgpu-hal/src/metal/command.rs | 2 + 6 files changed, 178 insertions(+), 72 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2625ecc055..c08ba02e88 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3303,7 +3303,9 @@ dependencies = [ "log", "pollster", "wasm-bindgen-futures", + "wasm-bindgen-test", "wgpu", + "wgpu-test", "winit", ] diff --git a/examples/timestamp-queries/Cargo.toml b/examples/timestamp-queries/Cargo.toml index 534ef48dd8..f2d7de3f1e 100644 --- a/examples/timestamp-queries/Cargo.toml +++ b/examples/timestamp-queries/Cargo.toml @@ -24,3 +24,7 @@ console_log.workspace = true log.workspace = true wasm-bindgen-futures.workspace = true +[dev-dependencies] +wasm-bindgen-test.workspace = true +wgpu-test.workspace = true + diff --git a/examples/timestamp-queries/src/main.rs b/examples/timestamp-queries/src/main.rs index 6e6eaabd62..34404f99c2 100644 --- a/examples/timestamp-queries/src/main.rs +++ b/examples/timestamp-queries/src/main.rs @@ -17,52 +17,115 @@ //! The period, i.e. the unit of time, of the timestamps in wgpu is undetermined and needs to be queried with `wgpu::Queue::get_timestamp_period` //! in order to get comparable results. -use std::borrow::Cow; - use wgpu::util::DeviceExt; -// Queries: -// * encoder timestamp start -// * encoder timestamp end -// * render start -// * render in-between (optional) -// * render end -// * compute start -// * compute in-between (optional) -// * compute end -const NUM_QUERIES: usize = 8; - struct Queries { set: wgpu::QuerySet, resolve_buffer: wgpu::Buffer, destination_buffer: wgpu::Buffer, + num_queries: u64, +} + +struct QueryResults { + encoder_timestamps: [u64; 2], + render_start_end_timestamps: [u64; 2], + render_inside_timestamp: Option, + compute_start_end_timestamps: [u64; 2], + compute_inside_timestamp: Option, +} + +impl QueryResults { + // Queries: + // * encoder timestamp start + // * encoder timestamp end + // * render start + // * render in-between (optional) + // * render end + // * compute start + // * compute in-between (optional) + // * compute end + const NUM_QUERIES: u64 = 8; + + fn from_raw_results(timestamps: Vec, timestamps_inside_passes: bool) -> Self { + assert_eq!(timestamps.len(), Self::NUM_QUERIES as usize); + + QueryResults { + encoder_timestamps: [timestamps[0], timestamps[1]], + render_start_end_timestamps: [timestamps[2], timestamps[4]], + render_inside_timestamp: timestamps_inside_passes.then_some(timestamps[3]), + compute_start_end_timestamps: [timestamps[5], timestamps[7]], + compute_inside_timestamp: timestamps_inside_passes.then_some(timestamps[6]), + } + } + + fn print(&self, queue: &wgpu::Queue) { + let period = queue.get_timestamp_period(); + let elapsed_us = |start, end: u64| end.wrapping_sub(start) as f64 * period as f64 / 1000.0; + + println!( + "Elapsed time render + compute: {:.2} μs", + elapsed_us(self.encoder_timestamps[0], self.encoder_timestamps[1]) + ); + println!( + "Elapsed time render pass: {:.2} μs", + elapsed_us( + self.render_start_end_timestamps[0], + self.render_start_end_timestamps[1] + ) + ); + if let Some(timestamp) = self.render_inside_timestamp { + println!( + "Elapsed time first triangle: {:.2} μs", + elapsed_us(self.render_start_end_timestamps[0], timestamp) + ); + } + println!( + "Elapsed time compute pass: {:.2} μs", + elapsed_us( + self.compute_start_end_timestamps[0], + self.compute_start_end_timestamps[1] + ) + ); + if let Some(timestamp) = self.compute_inside_timestamp { + println!( + "Elapsed time after first dispatch: {:.2} μs", + elapsed_us(self.compute_start_end_timestamps[0], timestamp) + ); + } + } } impl Queries { - fn new(device: &wgpu::Device) -> Self { + fn new(device: &wgpu::Device, num_queries: u64) -> Self { Queries { set: device.create_query_set(&wgpu::QuerySetDescriptor { label: Some("Timestamp query set"), - count: NUM_QUERIES as _, + count: num_queries as _, ty: wgpu::QueryType::Timestamp, }), resolve_buffer: device.create_buffer(&wgpu::BufferDescriptor { label: Some("query resolve buffer"), - size: (std::mem::size_of::() * NUM_QUERIES) as wgpu::BufferAddress, + size: std::mem::size_of::() as u64 * num_queries, usage: wgpu::BufferUsages::COPY_SRC | wgpu::BufferUsages::QUERY_RESOLVE, mapped_at_creation: false, }), destination_buffer: device.create_buffer(&wgpu::BufferDescriptor { label: Some("query dest buffer"), - size: (std::mem::size_of::() * NUM_QUERIES) as wgpu::BufferAddress, + size: std::mem::size_of::() as u64 * num_queries, usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ, mapped_at_creation: false, }), + num_queries, } } fn resolve(&self, encoder: &mut wgpu::CommandEncoder) { - encoder.resolve_query_set(&self.set, 0..NUM_QUERIES as u32, &self.resolve_buffer, 0); + encoder.resolve_query_set( + &self.set, + 0..self.num_queries as u32, + &self.resolve_buffer, + 0, + ); encoder.copy_buffer_to_buffer( &self.resolve_buffer, 0, @@ -72,61 +135,23 @@ impl Queries { ); } - fn wait_and_print_results(&self, device: &wgpu::Device, queue: &wgpu::Queue) { + fn wait_for_results(&self, device: &wgpu::Device) -> Vec { self.destination_buffer .slice(..) .map_async(wgpu::MapMode::Read, |_| ()); device.poll(wgpu::Maintain::Wait); - { + let timestamps = { let timestamp_view = self .destination_buffer - .slice(..(std::mem::size_of::() * NUM_QUERIES) as wgpu::BufferAddress) + .slice(..(std::mem::size_of::() as wgpu::BufferAddress * self.num_queries)) .get_mapped_range(); - - let timestamps: &[u64] = bytemuck::cast_slice(×tamp_view); - println!("Raw timestamp buffer contents: {:?}", timestamps); - - let elapsed_us = |start, end: u64| { - let period = queue.get_timestamp_period(); - end.wrapping_sub(start) as f64 * period as f64 / 1000.0 - }; - - println!( - "Elapsed time render + compute: {:.2} μs", - elapsed_us(timestamps[0], timestamps[1]) - ); - - println!( - "Elapsed time render pass: {:.2} μs", - elapsed_us(timestamps[2], timestamps[4]) - ); - if device - .features() - .contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES) - { - println!( - "Elapsed time first triangle: {:.2} μs", - elapsed_us(timestamps[3], timestamps[4]) - ); - } - - println!( - "Elapsed time compute pass: {:.2} μs", - elapsed_us(timestamps[5], timestamps[7]) - ); - if device - .features() - .contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES) - { - println!( - "Elapsed time first compute: {:.2} μs", - elapsed_us(timestamps[5], timestamps[6]) - ); - } - } + bytemuck::cast_slice(×tamp_view).to_vec() + }; self.destination_buffer.unmap(); + + timestamps } } @@ -148,10 +173,10 @@ async fn run() { return; } let mut features = wgpu::Features::empty() | wgpu::Features::TIMESTAMP_QUERY; - if adapter + let timestamps_inside_passes = adapter .features() - .contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES) - { + .contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES); + if timestamps_inside_passes { println!("Adapter supports timestamp queries within passes."); features |= wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES; } else { @@ -172,29 +197,39 @@ async fn run() { .await .unwrap(); + let queries = submit_render_and_compute_pass_with_queries(&device, &queue); + let raw_results = queries.wait_for_results(&device); + println!("Raw timestamp buffer contents: {:?}", raw_results); + QueryResults::from_raw_results(raw_results, timestamps_inside_passes).print(&queue); +} + +fn submit_render_and_compute_pass_with_queries( + device: &wgpu::Device, + queue: &wgpu::Queue, +) -> Queries { let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None }); - let queries = Queries::new(&device); - + let queries = Queries::new(device, QueryResults::NUM_QUERIES); let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor { label: None, - source: wgpu::ShaderSource::Wgsl(Cow::Borrowed(include_str!("shader.wgsl"))), + source: wgpu::ShaderSource::Wgsl(std::borrow::Cow::Borrowed(include_str!("shader.wgsl"))), }); encoder.write_timestamp(&queries.set, 0); // Render two triangles and profile it. - render_pass(&device, &shader, &mut encoder, &queries.set, 2); + render_pass(device, &shader, &mut encoder, &queries.set, 2); // Compute a hash function on a single thread a bunch of time and profile it. - compute_pass(&device, &shader, &mut encoder, &queries.set, 5); + compute_pass(device, &shader, &mut encoder, &queries.set, 5); encoder.write_timestamp(&queries.set, 1); queries.resolve(&mut encoder); queue.submit(Some(encoder.finish())); - queries.wait_and_print_results(&device, &queue); + + queries } fn compute_pass( @@ -339,3 +374,64 @@ fn main() { wasm_bindgen_futures::spawn_local(run()); } } + +#[cfg(test)] +mod tests { + use crate::{submit_render_and_compute_pass_with_queries, QueryResults}; + + #[test] + #[wasm_bindgen_test::wasm_bindgen_test] + fn test_timestamps_encoder() { + wgpu_test::initialize_test( + wgpu_test::TestParameters::default() + .limits(wgpu::Limits::downlevel_defaults()) + .features(wgpu::Features::TIMESTAMP_QUERY), + |ctx| { + test_timestamps(ctx, false); + }, + ); + } + + #[test] + #[wasm_bindgen_test::wasm_bindgen_test] + fn test_timestamps_passes() { + wgpu_test::initialize_test( + wgpu_test::TestParameters::default() + .limits(wgpu::Limits::downlevel_defaults()) + .features( + wgpu::Features::TIMESTAMP_QUERY | wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES, + ), + |ctx| { + test_timestamps(ctx, true); + }, + ); + } + + fn test_timestamps(ctx: wgpu_test::TestingContext, timestamps_inside_passes: bool) { + let queries = submit_render_and_compute_pass_with_queries(&ctx.device, &ctx.queue); + let raw_results = queries.wait_for_results(&ctx.device); + + let QueryResults { + encoder_timestamps, + render_start_end_timestamps, + render_inside_timestamp, + compute_start_end_timestamps, + compute_inside_timestamp, + } = QueryResults::from_raw_results(raw_results, timestamps_inside_passes); + // TODO: Metal encoder timestamps aren't implemented yet. + if ctx.adapter.get_info().backend != wgpu::Backend::Metal { + assert!(encoder_timestamps[0] > 0); + assert!(encoder_timestamps[1] > 0); + } + assert!(render_start_end_timestamps[0] > 0); + assert!(render_start_end_timestamps[1] > 0); + assert!(render_inside_timestamp + .map(|t| t > 0) + .unwrap_or(!timestamps_inside_passes)); + assert!(compute_start_end_timestamps[0] > 0); + assert!(compute_start_end_timestamps[1] > 0); + assert!(compute_inside_timestamp + .map(|t| t > 0) + .unwrap_or(!timestamps_inside_passes)); + } +} diff --git a/tests/tests/scissor_tests/mod.rs b/tests/tests/scissor_tests/mod.rs index 6855b410bd..08862fefe6 100644 --- a/tests/tests/scissor_tests/mod.rs +++ b/tests/tests/scissor_tests/mod.rs @@ -79,6 +79,7 @@ fn scissor_test_impl(ctx: &TestingContext, scissor_rect: Rect, expected_data: [u }, })], depth_stencil_attachment: None, + timestamp_writes: None, }); render_pass.set_pipeline(&pipeline); render_pass.set_scissor_rect( diff --git a/tests/tests/shader_primitive_index/mod.rs b/tests/tests/shader_primitive_index/mod.rs index 68daae873e..48918590af 100644 --- a/tests/tests/shader_primitive_index/mod.rs +++ b/tests/tests/shader_primitive_index/mod.rs @@ -186,6 +186,7 @@ fn pulling_common( })], depth_stencil_attachment: None, label: None, + timestamp_writes: None, }); rpass.set_pipeline(&pipeline); diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs index 11d84db122..d457008dc1 100644 --- a/wgpu-hal/src/metal/command.rs +++ b/wgpu-hal/src/metal/command.rs @@ -329,6 +329,8 @@ impl crate::CommandEncoder for super::CommandEncoder { // TODO: Otherwise, we need to create a new blit command encoder with a descriptor that inserts the timestamps. // Note that as of writing creating a new encoder is not exposed by the metal crate. // https://developer.apple.com/documentation/metal/mtlcommandbuffer/3564431-makeblitcommandencoder + + // TODO: Enable respective test in `examples/timestamp-queries/src/tests.rs`. } unsafe fn reset_queries(&mut self, set: &super::QuerySet, range: Range) { From d15dabfafc53c1bb312c03aa500100586098e4a0 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Sun, 23 Jul 2023 11:24:16 +0200 Subject: [PATCH 34/45] warning fixes --- examples/timestamp-queries/src/main.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/examples/timestamp-queries/src/main.rs b/examples/timestamp-queries/src/main.rs index 34404f99c2..3cfe90ce2d 100644 --- a/examples/timestamp-queries/src/main.rs +++ b/examples/timestamp-queries/src/main.rs @@ -234,7 +234,7 @@ fn submit_render_and_compute_pass_with_queries( fn compute_pass( device: &wgpu::Device, - shader: &wgpu::ShaderModule, + module: &wgpu::ShaderModule, encoder: &mut wgpu::CommandEncoder, query_set: &wgpu::QuerySet, query_offset: u32, @@ -247,7 +247,7 @@ fn compute_pass( let compute_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor { label: None, layout: None, - module: &shader, + module, entry_point: "main_cs", }); let bind_group_layout = compute_pipeline.get_bind_group_layout(0); @@ -264,7 +264,7 @@ fn compute_pass( label: None, timestamp_writes: Some(wgpu::ComputePassTimestampWrites { query_set, - beginning_of_pass_write_index: Some(query_offset + 0), + beginning_of_pass_write_index: Some(query_offset), end_of_pass_write_index: Some(query_offset + 2), }), }); @@ -282,7 +282,7 @@ fn compute_pass( fn render_pass( device: &wgpu::Device, - shader: &wgpu::ShaderModule, + module: &wgpu::ShaderModule, encoder: &mut wgpu::CommandEncoder, query_set: &wgpu::QuerySet, query_offset: u32, @@ -299,12 +299,12 @@ fn render_pass( label: None, layout: Some(&pipeline_layout), vertex: wgpu::VertexState { - module: &shader, + module, entry_point: "vs_main", buffers: &[], }, fragment: Some(wgpu::FragmentState { - module: &shader, + module, entry_point: "fs_main", targets: &[Some(format.into())], }), @@ -343,7 +343,7 @@ fn render_pass( depth_stencil_attachment: None, timestamp_writes: Some(wgpu::RenderPassTimestampWrites { query_set, - beginning_of_pass_write_index: Some(query_offset + 0), + beginning_of_pass_write_index: Some(query_offset), end_of_pass_write_index: Some(query_offset + 2), }), }); From 9b5423effa848f232ca70ab1274abf5d04b90ec2 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Sun, 23 Jul 2023 11:47:52 +0200 Subject: [PATCH 35/45] fix dx12 compilation --- wgpu-hal/src/dx12/command.rs | 11 +++++++---- wgpu-hal/src/dx12/mod.rs | 4 ++-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index 8ea14886a2..4da4485faf 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -230,8 +230,11 @@ impl super::CommandEncoder { } fn write_pass_end_timestamp_if_requested(&mut self) { - if let Some((query_set_raw, index)) = self.end_of_pass_timer_query.take() { - self.write_timestamp(query_set_raw.query_set, index); + if let Some((query_set, index)) = self.end_of_pass_timer_query.take() { + use crate::CommandEncoder as _; + unsafe { + self.write_timestamp(&query_set, index); + } } } } @@ -672,7 +675,7 @@ impl crate::CommandEncoder for super::CommandEncoder { } self.end_of_pass_timer_query = timestamp_writes .end_of_pass_write_index - .map(|index| (timestamp_writes.query_set.raw, index)); + .map(|index| (timestamp_writes.query_set.clone(), index)); } let mut color_views = [d3d12::CpuDescriptor { ptr: 0 }; crate::MAX_COLOR_ATTACHMENTS]; @@ -1144,7 +1147,7 @@ impl crate::CommandEncoder for super::CommandEncoder { } self.end_of_pass_timer_query = timestamp_writes .end_of_pass_write_index - .map(|index| (timestamp_writes.query_set.raw, index)); + .map(|index| (timestamp_writes.query_set.clone(), index)); } } unsafe fn end_compute_pass(&mut self) { diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index 564bc349c6..faf693c7a2 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -350,7 +350,7 @@ pub struct CommandEncoder { /// If set, the end of the next render/compute pass will write a timestamp at /// the given pool & location. - end_of_pass_timer_query: Option<(d3d12::QueryHeap, u32)>, + end_of_pass_timer_query: Option<(QuerySet, u32)>, } unsafe impl Send for CommandEncoder {} @@ -457,7 +457,7 @@ pub struct Sampler { unsafe impl Send for Sampler {} unsafe impl Sync for Sampler {} -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct QuerySet { raw: d3d12::QueryHeap, raw_ty: d3d12_ty::D3D12_QUERY_TYPE, From 280e50ce63a4c68c2349342dac0c8948109c4d25 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Sun, 23 Jul 2023 18:21:10 +0200 Subject: [PATCH 36/45] d3d12::QuerySet no longer implements Clone --- wgpu-hal/src/dx12/command.rs | 14 ++++++++++---- wgpu-hal/src/dx12/mod.rs | 4 ++-- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index 4da4485faf..719e63a36f 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -230,10 +230,16 @@ impl super::CommandEncoder { } fn write_pass_end_timestamp_if_requested(&mut self) { - if let Some((query_set, index)) = self.end_of_pass_timer_query.take() { + if let Some((query_set_raw, index)) = self.end_of_pass_timer_query.take() { use crate::CommandEncoder as _; unsafe { - self.write_timestamp(&query_set, index); + self.write_timestamp( + &crate::dx12::QuerySet { + raw: query_set_raw, + raw_ty: d3d12_ty::D3D12_QUERY_TYPE_TIMESTAMP, + }, + index, + ); } } } @@ -675,7 +681,7 @@ impl crate::CommandEncoder for super::CommandEncoder { } self.end_of_pass_timer_query = timestamp_writes .end_of_pass_write_index - .map(|index| (timestamp_writes.query_set.clone(), index)); + .map(|index| (timestamp_writes.query_set.raw.clone(), index)); } let mut color_views = [d3d12::CpuDescriptor { ptr: 0 }; crate::MAX_COLOR_ATTACHMENTS]; @@ -1147,7 +1153,7 @@ impl crate::CommandEncoder for super::CommandEncoder { } self.end_of_pass_timer_query = timestamp_writes .end_of_pass_write_index - .map(|index| (timestamp_writes.query_set.clone(), index)); + .map(|index| (timestamp_writes.query_set.raw.clone(), index)); } } unsafe fn end_compute_pass(&mut self) { diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index faf693c7a2..564bc349c6 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -350,7 +350,7 @@ pub struct CommandEncoder { /// If set, the end of the next render/compute pass will write a timestamp at /// the given pool & location. - end_of_pass_timer_query: Option<(QuerySet, u32)>, + end_of_pass_timer_query: Option<(d3d12::QueryHeap, u32)>, } unsafe impl Send for CommandEncoder {} @@ -457,7 +457,7 @@ pub struct Sampler { unsafe impl Send for Sampler {} unsafe impl Sync for Sampler {} -#[derive(Debug, Clone)] +#[derive(Debug)] pub struct QuerySet { raw: d3d12::QueryHeap, raw_ty: d3d12_ty::D3D12_QUERY_TYPE, From 6a8dc8b4213e5cd6067cb0cc5ee40fad759f26fe Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Sun, 23 Jul 2023 18:22:17 +0200 Subject: [PATCH 37/45] improve timestamp test assertions --- examples/timestamp-queries/src/main.rs | 33 ++++++++++++++++---------- 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/examples/timestamp-queries/src/main.rs b/examples/timestamp-queries/src/main.rs index 3cfe90ce2d..6094d9733a 100644 --- a/examples/timestamp-queries/src/main.rs +++ b/examples/timestamp-queries/src/main.rs @@ -418,20 +418,29 @@ mod tests { compute_start_end_timestamps, compute_inside_timestamp, } = QueryResults::from_raw_results(raw_results, timestamps_inside_passes); + + // Timestamps may wrap around, so can't really only reason about deltas! + let render_delta = + render_start_end_timestamps[1].wrapping_sub(render_start_end_timestamps[0]); + assert!(render_delta > 0); + let compute_delta = + compute_start_end_timestamps[1].wrapping_sub(compute_start_end_timestamps[0]); + assert!(compute_delta > 0); + // TODO: Metal encoder timestamps aren't implemented yet. if ctx.adapter.get_info().backend != wgpu::Backend::Metal { - assert!(encoder_timestamps[0] > 0); - assert!(encoder_timestamps[1] > 0); + let encoder_delta = encoder_timestamps[1].wrapping_sub(encoder_timestamps[0]); + assert!(encoder_delta > 0); + assert!(encoder_delta >= render_delta + compute_delta); + } + + if let Some(render_inside_timestamp) = render_inside_timestamp { + assert!(render_inside_timestamp >= render_start_end_timestamps[0]); + assert!(render_inside_timestamp <= render_start_end_timestamps[1]); + } + if let Some(compute_inside_timestamp) = compute_inside_timestamp { + assert!(compute_inside_timestamp >= compute_start_end_timestamps[0]); + assert!(compute_inside_timestamp <= compute_start_end_timestamps[1]); } - assert!(render_start_end_timestamps[0] > 0); - assert!(render_start_end_timestamps[1] > 0); - assert!(render_inside_timestamp - .map(|t| t > 0) - .unwrap_or(!timestamps_inside_passes)); - assert!(compute_start_end_timestamps[0] > 0); - assert!(compute_start_end_timestamps[1] > 0); - assert!(compute_inside_timestamp - .map(|t| t > 0) - .unwrap_or(!timestamps_inside_passes)); } } From 4aaf26400a05322f7c18ad579e7b0a3b536b9628 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Sun, 23 Jul 2023 18:54:28 +0200 Subject: [PATCH 38/45] fix send/sync assertions on wasm builds --- wgpu/src/lib.rs | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs index 86c9b4ea51..b7d4b2b57a 100644 --- a/wgpu/src/lib.rs +++ b/wgpu/src/lib.rs @@ -864,6 +864,13 @@ pub struct QuerySet { not(target_feature = "atomics") ) ))] +#[cfg(any( + not(target_arch = "wasm32"), + all( + feature = "fragile-send-sync-non-atomic-wasm", + not(target_feature = "atomics") + ) +))] static_assertions::assert_impl_all!(QuerySet: Send, Sync); impl Drop for QuerySet { @@ -1046,6 +1053,13 @@ pub struct RenderPassTimestampWrites<'a> { /// The index of the query set at which an end timestamp of this pass is written, if any. pub end_of_pass_write_index: Option, } +#[cfg(any( + not(target_arch = "wasm32"), + all( + feature = "fragile-send-sync-non-atomic-wasm", + not(target_feature = "atomics") + ) +))] static_assertions::assert_impl_all!(RenderPassTimestampWrites: Send, Sync); /// Describes a color attachment to a [`RenderPass`]. @@ -1487,6 +1501,13 @@ pub struct ComputePassTimestampWrites<'a> { /// The index of the query set at which an end timestamp of this pass is written, if any. pub end_of_pass_write_index: Option, } +#[cfg(any( + not(target_arch = "wasm32"), + all( + feature = "fragile-send-sync-non-atomic-wasm", + not(target_feature = "atomics") + ) +))] static_assertions::assert_impl_all!(ComputePassTimestampWrites: Send, Sync); /// Describes the attachments of a compute pass. @@ -1504,6 +1525,13 @@ pub struct ComputePassDescriptor<'a> { /// Requires [`Features::TIMESTAMP_QUERY`] to be enabled. pub timestamp_writes: Option>, } +#[cfg(any( + not(target_arch = "wasm32"), + all( + feature = "fragile-send-sync-non-atomic-wasm", + not(target_feature = "atomics") + ) +))] static_assertions::assert_impl_all!(ComputePassDescriptor: Send, Sync); /// Describes a compute pipeline. From 27bcd2539fd0247da57e94fae80e1235bb8441ee Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Sun, 23 Jul 2023 19:10:32 +0200 Subject: [PATCH 39/45] disable timestamp test for molkenvk --- examples/timestamp-queries/src/main.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/examples/timestamp-queries/src/main.rs b/examples/timestamp-queries/src/main.rs index 6094d9733a..d9e388132b 100644 --- a/examples/timestamp-queries/src/main.rs +++ b/examples/timestamp-queries/src/main.rs @@ -385,7 +385,9 @@ mod tests { wgpu_test::initialize_test( wgpu_test::TestParameters::default() .limits(wgpu::Limits::downlevel_defaults()) - .features(wgpu::Features::TIMESTAMP_QUERY), + .features(wgpu::Features::TIMESTAMP_QUERY) + // Reports zero timestamp periods on MoltenVK. + .molten_vk_failure(), |ctx| { test_timestamps(ctx, false); }, @@ -400,7 +402,9 @@ mod tests { .limits(wgpu::Limits::downlevel_defaults()) .features( wgpu::Features::TIMESTAMP_QUERY | wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES, - ), + ) + // Reports zero timestamp periods on MoltenVK. + .molten_vk_failure(), |ctx| { test_timestamps(ctx, true); }, From 1c7c175def3219f9028c405bd6a0924ccecfe474 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Sun, 30 Jul 2023 16:26:25 +0200 Subject: [PATCH 40/45] Workaround https://github.com/gfx-rs/wgpu/issues/3993 --- Cargo.lock | 2 +- examples/timestamp-queries/src/main.rs | 81 +++++++++++++++++++------- wgpu-core/src/command/query.rs | 1 + 3 files changed, 61 insertions(+), 23 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1cd2ea0234..de7474eae1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3498,7 +3498,7 @@ dependencies = [ "wasm-bindgen-test", "wgpu", "wgpu-test", - "winit", + "winit 0.28.6", ] [[package]] diff --git a/examples/timestamp-queries/src/main.rs b/examples/timestamp-queries/src/main.rs index d9e388132b..f73076febf 100644 --- a/examples/timestamp-queries/src/main.rs +++ b/examples/timestamp-queries/src/main.rs @@ -24,6 +24,7 @@ struct Queries { resolve_buffer: wgpu::Buffer, destination_buffer: wgpu::Buffer, num_queries: u64, + next_unused_query: u32, } struct QueryResults { @@ -49,12 +50,27 @@ impl QueryResults { fn from_raw_results(timestamps: Vec, timestamps_inside_passes: bool) -> Self { assert_eq!(timestamps.len(), Self::NUM_QUERIES as usize); + let mut next_slot = 0; + let mut get_next_slot = || { + let slot = timestamps[next_slot]; + next_slot += 1; + slot + }; + + let mut encoder_timestamps = [0, 0]; + encoder_timestamps[0] = get_next_slot(); + let render_start_end_timestamps = [get_next_slot(), get_next_slot()]; + let render_inside_timestamp = timestamps_inside_passes.then_some(get_next_slot()); + let compute_start_end_timestamps = [get_next_slot(), get_next_slot()]; + let compute_inside_timestamp = timestamps_inside_passes.then_some(get_next_slot()); + encoder_timestamps[1] = get_next_slot(); + QueryResults { - encoder_timestamps: [timestamps[0], timestamps[1]], - render_start_end_timestamps: [timestamps[2], timestamps[4]], - render_inside_timestamp: timestamps_inside_passes.then_some(timestamps[3]), - compute_start_end_timestamps: [timestamps[5], timestamps[7]], - compute_inside_timestamp: timestamps_inside_passes.then_some(timestamps[6]), + encoder_timestamps, + render_start_end_timestamps, + render_inside_timestamp, + compute_start_end_timestamps, + compute_inside_timestamp, } } @@ -116,6 +132,7 @@ impl Queries { mapped_at_creation: false, }), num_queries, + next_unused_query: 0, } } @@ -157,7 +174,11 @@ impl Queries { async fn run() { // Instantiates instance of WebGPU - let instance = wgpu::Instance::default(); + let backends = wgpu::util::backend_bits_from_env().unwrap_or_else(wgpu::Backends::all); + let instance = wgpu::Instance::new(wgpu::InstanceDescriptor { + backends, + dx12_shader_compiler: wgpu::Dx12Compiler::default(), + }); // `request_adapter` instantiates the general connection to the GPU let adapter = instance @@ -210,21 +231,35 @@ fn submit_render_and_compute_pass_with_queries( let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None }); - let queries = Queries::new(device, QueryResults::NUM_QUERIES); + let mut queries = Queries::new(device, QueryResults::NUM_QUERIES); let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor { label: None, source: wgpu::ShaderSource::Wgsl(std::borrow::Cow::Borrowed(include_str!("shader.wgsl"))), }); - encoder.write_timestamp(&queries.set, 0); + encoder.write_timestamp(&queries.set, queries.next_unused_query); + queries.next_unused_query += 1; // Render two triangles and profile it. - render_pass(device, &shader, &mut encoder, &queries.set, 2); + render_pass( + device, + &shader, + &mut encoder, + &queries.set, + &mut queries.next_unused_query, + ); // Compute a hash function on a single thread a bunch of time and profile it. - compute_pass(device, &shader, &mut encoder, &queries.set, 5); + compute_pass( + device, + &shader, + &mut encoder, + &queries.set, + &mut queries.next_unused_query, + ); - encoder.write_timestamp(&queries.set, 1); + encoder.write_timestamp(&queries.set, queries.next_unused_query); + queries.next_unused_query += 1; queries.resolve(&mut encoder); queue.submit(Some(encoder.finish())); @@ -237,7 +272,7 @@ fn compute_pass( module: &wgpu::ShaderModule, encoder: &mut wgpu::CommandEncoder, query_set: &wgpu::QuerySet, - query_offset: u32, + next_unused_query: &mut u32, ) { let storage_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor { label: Some("Storage Buffer"), @@ -264,10 +299,11 @@ fn compute_pass( label: None, timestamp_writes: Some(wgpu::ComputePassTimestampWrites { query_set, - beginning_of_pass_write_index: Some(query_offset), - end_of_pass_write_index: Some(query_offset + 2), + beginning_of_pass_write_index: Some(*next_unused_query), + end_of_pass_write_index: Some(*next_unused_query + 1), }), }); + *next_unused_query += 2; cpass.set_pipeline(&compute_pipeline); cpass.set_bind_group(0, &bind_group, &[]); cpass.dispatch_workgroups(1, 1, 1); @@ -275,7 +311,8 @@ fn compute_pass( .features() .contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES) { - cpass.write_timestamp(query_set, query_offset + 1); + cpass.write_timestamp(query_set, *next_unused_query); + *next_unused_query += 1; } cpass.dispatch_workgroups(1, 1, 1); } @@ -285,7 +322,7 @@ fn render_pass( module: &wgpu::ShaderModule, encoder: &mut wgpu::CommandEncoder, query_set: &wgpu::QuerySet, - query_offset: u32, + next_unused_query: &mut u32, ) { let format = wgpu::TextureFormat::Rgba8Unorm; @@ -343,10 +380,11 @@ fn render_pass( depth_stencil_attachment: None, timestamp_writes: Some(wgpu::RenderPassTimestampWrites { query_set, - beginning_of_pass_write_index: Some(query_offset), - end_of_pass_write_index: Some(query_offset + 2), + beginning_of_pass_write_index: Some(*next_unused_query), + end_of_pass_write_index: Some(*next_unused_query + 1), }), }); + *next_unused_query += 2; rpass.set_pipeline(&render_pipeline); @@ -355,7 +393,8 @@ fn render_pass( .features() .contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES) { - rpass.write_timestamp(query_set, query_offset + 1); + rpass.write_timestamp(query_set, *next_unused_query); + *next_unused_query += 1; } rpass.draw(0..3, 0..1); @@ -414,7 +453,6 @@ mod tests { fn test_timestamps(ctx: wgpu_test::TestingContext, timestamps_inside_passes: bool) { let queries = submit_render_and_compute_pass_with_queries(&ctx.device, &ctx.queue); let raw_results = queries.wait_for_results(&ctx.device); - let QueryResults { encoder_timestamps, render_start_end_timestamps, @@ -424,12 +462,11 @@ mod tests { } = QueryResults::from_raw_results(raw_results, timestamps_inside_passes); // Timestamps may wrap around, so can't really only reason about deltas! + // Making things worse, deltas are allowed to be zero. let render_delta = render_start_end_timestamps[1].wrapping_sub(render_start_end_timestamps[0]); - assert!(render_delta > 0); let compute_delta = compute_start_end_timestamps[1].wrapping_sub(compute_start_end_timestamps[0]); - assert!(compute_delta > 0); // TODO: Metal encoder timestamps aren't implemented yet. if ctx.adapter.get_info().backend != wgpu::Backend::Metal { diff --git a/wgpu-core/src/command/query.rs b/wgpu-core/src/command/query.rs index 9c6da51834..679fa3632e 100644 --- a/wgpu-core/src/command/query.rs +++ b/wgpu-core/src/command/query.rs @@ -411,6 +411,7 @@ impl Global { .into()); } + // TODO(https://github.com/gfx-rs/wgpu/issues/3993): Need to track initialization state. cmd_buf .buffer_memory_init_actions .extend(dst_buffer.initialization_status.create_action( From f942813a1db16119f270e50321c300afb3f092ac Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Sun, 30 Jul 2023 16:37:13 +0200 Subject: [PATCH 41/45] enable test_timestamps_passes test for molten --- examples/timestamp-queries/src/main.rs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/examples/timestamp-queries/src/main.rs b/examples/timestamp-queries/src/main.rs index f73076febf..d5dc33983b 100644 --- a/examples/timestamp-queries/src/main.rs +++ b/examples/timestamp-queries/src/main.rs @@ -442,11 +442,10 @@ mod tests { .features( wgpu::Features::TIMESTAMP_QUERY | wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES, ) - // Reports zero timestamp periods on MoltenVK. - .molten_vk_failure(), - |ctx| { - test_timestamps(ctx, true); - }, + | ctx + | { + test_timestamps(ctx, true); + }, ); } From dec97f81ad850d49a302eee8f9ed6d22849f9473 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Sun, 30 Jul 2023 16:40:51 +0200 Subject: [PATCH 42/45] fix querying invalid queries in example --- examples/timestamp-queries/src/main.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/timestamp-queries/src/main.rs b/examples/timestamp-queries/src/main.rs index d5dc33983b..6939c0f6cb 100644 --- a/examples/timestamp-queries/src/main.rs +++ b/examples/timestamp-queries/src/main.rs @@ -139,7 +139,8 @@ impl Queries { fn resolve(&self, encoder: &mut wgpu::CommandEncoder) { encoder.resolve_query_set( &self.set, - 0..self.num_queries as u32, + // TODO(https://github.com/gfx-rs/wgpu/issues/3993): Musn't be larger than the number valid queries in the set. + 0..self.next_unused_query as u32, &self.resolve_buffer, 0, ); @@ -441,11 +442,10 @@ mod tests { .limits(wgpu::Limits::downlevel_defaults()) .features( wgpu::Features::TIMESTAMP_QUERY | wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES, - ) - | ctx - | { - test_timestamps(ctx, true); - }, + ), + |ctx| { + test_timestamps(ctx, true); + }, ); } From 00f0095b1068c36a4798a95216aff02741531773 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Sun, 30 Jul 2023 16:43:08 +0200 Subject: [PATCH 43/45] enable test_timestamps_encoder on molten --- examples/timestamp-queries/src/main.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/examples/timestamp-queries/src/main.rs b/examples/timestamp-queries/src/main.rs index 6939c0f6cb..9b36989c5c 100644 --- a/examples/timestamp-queries/src/main.rs +++ b/examples/timestamp-queries/src/main.rs @@ -425,9 +425,7 @@ mod tests { wgpu_test::initialize_test( wgpu_test::TestParameters::default() .limits(wgpu::Limits::downlevel_defaults()) - .features(wgpu::Features::TIMESTAMP_QUERY) - // Reports zero timestamp periods on MoltenVK. - .molten_vk_failure(), + .features(wgpu::Features::TIMESTAMP_QUERY), |ctx| { test_timestamps(ctx, false); }, From f23322eff17a631cc3efeba9e6306ec7db64fff8 Mon Sep 17 00:00:00 2001 From: Connor Fitzgerald Date: Wed, 2 Aug 2023 16:31:47 -0400 Subject: [PATCH 44/45] Apply suggestions from code review --- examples/timestamp-queries/src/main.rs | 4 ++-- wgpu-core/src/command/compute.rs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/timestamp-queries/src/main.rs b/examples/timestamp-queries/src/main.rs index 9b36989c5c..73cd2e3e69 100644 --- a/examples/timestamp-queries/src/main.rs +++ b/examples/timestamp-queries/src/main.rs @@ -174,7 +174,7 @@ impl Queries { } async fn run() { - // Instantiates instance of WebGPU + // Instantiates instance of wgpu let backends = wgpu::util::backend_bits_from_env().unwrap_or_else(wgpu::Backends::all); let instance = wgpu::Instance::new(wgpu::InstanceDescriptor { backends, @@ -194,7 +194,7 @@ async fn run() { println!("Adapter does not support timestamp queries, aborting."); return; } - let mut features = wgpu::Features::empty() | wgpu::Features::TIMESTAMP_QUERY; + let mut features = wgpu::Features::TIMESTAMP_QUERY; let timestamps_inside_passes = adapter .features() .contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES); diff --git a/wgpu-core/src/command/compute.rs b/wgpu-core/src/command/compute.rs index e6f2943e35..567ef4efa7 100644 --- a/wgpu-core/src/command/compute.rs +++ b/wgpu-core/src/command/compute.rs @@ -436,7 +436,7 @@ impl Global { .or(tw.end_of_pass_write_index) .map(|i| i..i + 1) }; - // Range should always be Some, both values being None should lead to a validation err.r + // Range should always be Some, both values being None should lead to a validation error. // But no point in erroring over that nuance here! if let Some(range) = range { unsafe { From 2fc4cf269563c3f18e8811bbe7642c173f3e0481 Mon Sep 17 00:00:00 2001 From: Connor Fitzgerald Date: Wed, 2 Aug 2023 16:34:45 -0400 Subject: [PATCH 45/45] Simplify feature logic --- examples/timestamp-queries/src/main.rs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/examples/timestamp-queries/src/main.rs b/examples/timestamp-queries/src/main.rs index 73cd2e3e69..4b576c712a 100644 --- a/examples/timestamp-queries/src/main.rs +++ b/examples/timestamp-queries/src/main.rs @@ -188,19 +188,17 @@ async fn run() { .expect("Failed to request adapter."); // Check timestamp features. - if adapter.features().contains(wgpu::Features::TIMESTAMP_QUERY) { + let features = adapter.features() + & (wgpu::Features::TIMESTAMP_QUERY | wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES); + if features.contains(wgpu::Features::TIMESTAMP_QUERY) { println!("Adapter supports timestamp queries."); } else { println!("Adapter does not support timestamp queries, aborting."); return; } - let mut features = wgpu::Features::TIMESTAMP_QUERY; - let timestamps_inside_passes = adapter - .features() - .contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES); + let timestamps_inside_passes = features.contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES); if timestamps_inside_passes { println!("Adapter supports timestamp queries within passes."); - features |= wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES; } else { println!("Adapter does not support timestamp queries within passes."); }