diff --git a/CHANGELOG.md b/CHANGELOG.md index e80adea94e..730e03a06c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,12 +1,19 @@ # Change Log -## wgpu-0.12.1 +## wgpu-core-0.12.1, wgpu-hal-0.12.1 (2021-12-29) - zero initialization uses now render target clears when possible (faster and doesn't enforce COPY_DST internally if not necessary) - fix use of MSAA targets in WebGL - fix not providing `COPY_DST` flag for textures causing assertions in some cases - fix surface textures not getting zero initialized - clear_texture supports now depth/stencil targets - error message on creating depth/stencil volume texture + - Vulkan: + - fix validation error on debug message types + - DX12: + - fix check for integrated GPUs + - fix stencil subresource transitions + - Metal: + - implement push constants ## wgpu-0.12 (2021-12-18) - API: diff --git a/Cargo.lock b/Cargo.lock index e02857f707..47f17ea587 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -43,9 +43,9 @@ dependencies = [ [[package]] name = "ash" -version = "0.33.3+1.2.191" +version = "0.34.0+1.2.203" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc4f1d82f164f838ae413296d1131aa6fa79b917d25bebaa7033d25620c09219" +checksum = "b0f780da53d0063880d45554306489f09dd8d1bda47688b4a57bc579119356df" dependencies = [ "libloading", ] @@ -1656,7 +1656,7 @@ dependencies = [ [[package]] name = "wgpu-core" -version = "0.12.0" +version = "0.12.1" dependencies = [ "arrayvec", "bitflags", @@ -1679,7 +1679,7 @@ dependencies = [ [[package]] name = "wgpu-hal" -version = "0.12.0" +version = "0.12.1" dependencies = [ "arrayvec", "ash", diff --git a/wgpu-core/Cargo.toml b/wgpu-core/Cargo.toml index 8d68c4c8c6..a88f2a8ae4 100644 --- a/wgpu-core/Cargo.toml +++ b/wgpu-core/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "wgpu-core" -version = "0.12.0" +version = "0.12.1" authors = ["wgpu developers"] edition = "2018" description = "WebGPU core logic on wgpu-hal" diff --git a/wgpu-hal/Cargo.toml b/wgpu-hal/Cargo.toml index 43e850ba6f..7ca9722b3c 100644 --- a/wgpu-hal/Cargo.toml +++ b/wgpu-hal/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "wgpu-hal" -version = "0.12.0" +version = "0.12.1" authors = ["wgpu developers"] edition = "2018" description = "WebGPU hardware abstraction layer" @@ -37,7 +37,7 @@ block = { version = "0.1", optional = true } foreign-types = { version = "0.3", optional = true } # backend: Vulkan -ash = { version = "0.33", optional = true } +ash = { version = "0.34", optional = true, default-features = false, features = ["debug", "loaded"] } gpu-alloc = { version = "0.5", optional = true } gpu-descriptor = { version = "0.2", optional = true } inplace_it = { version ="0.3.3", optional = true } diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs index 37046dffcc..69f63b9350 100644 --- a/wgpu-hal/src/dx12/adapter.rs +++ b/wgpu-hal/src/dx12/adapter.rs @@ -98,7 +98,7 @@ impl super::Adapter { device_type: if (desc.Flags & dxgi::DXGI_ADAPTER_FLAG_SOFTWARE) != 0 { workarounds.avoid_cpu_descriptor_overwrites = true; wgt::DeviceType::Cpu - } else if features_architecture.CacheCoherentUMA != 0 { + } else if features_architecture.UMA != 0 { wgt::DeviceType::IntegratedGpu } else { wgt::DeviceType::DiscreteGpu diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index 91a6bd9b67..48f72377b0 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -319,15 +319,30 @@ impl crate::CommandEncoder for super::CommandEncoder { // Only one barrier if it affects the whole image. self.temp.barriers.push(raw); } else { - // Generate barrier for each layer/level combination. + // Selected texture aspect is relevant if the texture format has both depth _and_ stencil aspects. + let planes = if crate::FormatAspects::from(barrier.texture.format) + .contains(crate::FormatAspects::DEPTH | crate::FormatAspects::STENCIL) + { + match barrier.range.aspect { + wgt::TextureAspect::All => 0..2, + wgt::TextureAspect::StencilOnly => 1..2, + wgt::TextureAspect::DepthOnly => 0..1, + } + } else { + 0..1 + }; + for rel_mip_level in 0..mip_level_count { for rel_array_layer in 0..array_layer_count { - raw.u.Transition_mut().Subresource = barrier.texture.calc_subresource( - barrier.range.base_mip_level + rel_mip_level, - barrier.range.base_array_layer + rel_array_layer, - 0, - ); - self.temp.barriers.push(raw); + for plane in planes.clone() { + raw.u.Transition_mut().Subresource = + barrier.texture.calc_subresource( + barrier.range.base_mip_level + rel_mip_level, + barrier.range.base_array_layer + rel_array_layer, + plane, + ); + self.temp.barriers.push(raw); + } } } } @@ -607,10 +622,15 @@ impl crate::CommandEncoder for super::CommandEncoder { } if let Some(ref ds) = desc.depth_stencil_attachment { let mut flags = native::ClearFlags::empty(); - if !ds.depth_ops.contains(crate::AttachmentOps::LOAD) { + let aspects = ds.target.view.format_aspects; + if !ds.depth_ops.contains(crate::AttachmentOps::LOAD) + && aspects.contains(crate::FormatAspects::DEPTH) + { flags |= native::ClearFlags::DEPTH; } - if !ds.stencil_ops.contains(crate::AttachmentOps::LOAD) { + if !ds.stencil_ops.contains(crate::AttachmentOps::LOAD) + && aspects.contains(crate::FormatAspects::STENCIL) + { flags |= native::ClearFlags::STENCIL; } diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index 25423ed5f5..660601d41a 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -1,3 +1,5 @@ +use crate::FormatAspects; + use super::{conv, descriptor, view, HResult as _}; use parking_lot::Mutex; use std::{ffi, mem, num::NonZeroU32, ptr, slice, sync::Arc}; @@ -495,6 +497,7 @@ impl crate::Device for super::Device { Ok(super::TextureView { raw_format: view_desc.format, + format_aspects: FormatAspects::from(desc.format), target_base: ( texture.resource, texture.calc_subresource(desc.range.base_mip_level, desc.range.base_array_layer, 0), @@ -558,7 +561,7 @@ impl crate::Device for super::Device { .usage .intersects(crate::TextureUses::DEPTH_STENCIL_WRITE) { - let raw_desc = view_desc.to_dsv(crate::FormatAspects::empty()); + let raw_desc = view_desc.to_dsv(FormatAspects::empty()); let handle = self.dsv_pool.lock().alloc_handle(); self.raw.CreateDepthStencilView( texture.resource.as_mut_ptr(), diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index 1867d02368..b4dd295059 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -422,6 +422,7 @@ impl Texture { #[derive(Debug)] pub struct TextureView { raw_format: native::Format, + format_aspects: crate::FormatAspects, // May explicitly ignore stencil aspect of raw_format! target_base: (native::Resource, u32), handle_srv: Option, handle_uav: Option, diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs index bb7b5d3832..a794ec12d0 100644 --- a/wgpu-hal/src/metal/adapter.rs +++ b/wgpu-hal/src/metal/adapter.rs @@ -918,6 +918,7 @@ impl super::PrivateCapabilities { | F::MAPPABLE_PRIMARY_BUFFERS | F::VERTEX_WRITABLE_STORAGE | F::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES + | F::PUSH_CONSTANTS | F::POLYGON_MODE_LINE | F::CLEAR_COMMANDS | F::TEXTURE_FORMAT_16BIT_NORM; diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs index 465d91e197..e7a0642b89 100644 --- a/wgpu-hal/src/metal/command.rs +++ b/wgpu-hal/src/metal/command.rs @@ -16,6 +16,7 @@ impl Default for super::CommandState { stage_infos: Default::default(), storage_buffer_length_map: Default::default(), work_group_memory_sizes: Vec::new(), + push_constants: Vec::new(), } } } @@ -61,6 +62,7 @@ impl super::CommandState { self.stage_infos.fs.clear(); self.stage_infos.cs.clear(); self.work_group_memory_sizes.clear(); + self.push_constants.clear(); } fn make_sizes_buffer_update<'a>( @@ -587,12 +589,41 @@ impl crate::CommandEncoder for super::CommandEncoder { unsafe fn set_push_constants( &mut self, - _layout: &super::PipelineLayout, - _stages: wgt::ShaderStages, - _offset: u32, - _data: &[u32], + layout: &super::PipelineLayout, + stages: wgt::ShaderStages, + offset: u32, + data: &[u32], ) { - //TODO + let state_pc = &mut self.state.push_constants; + if state_pc.len() < layout.total_push_constants as usize { + state_pc.resize(layout.total_push_constants as usize, 0); + } + assert_eq!(offset as usize % WORD_SIZE, 0); + + let offset = offset as usize / WORD_SIZE; + state_pc[offset..offset + data.len()].copy_from_slice(data); + + if stages.contains(wgt::ShaderStages::COMPUTE) { + self.state.compute.as_ref().unwrap().set_bytes( + layout.push_constants_infos.cs.unwrap().buffer_index as _, + (layout.total_push_constants as usize * WORD_SIZE) as _, + state_pc.as_ptr() as _, + ) + } + if stages.contains(wgt::ShaderStages::VERTEX) { + self.state.render.as_ref().unwrap().set_vertex_bytes( + layout.push_constants_infos.vs.unwrap().buffer_index as _, + (layout.total_push_constants as usize * WORD_SIZE) as _, + state_pc.as_ptr() as _, + ) + } + if stages.contains(wgt::ShaderStages::FRAGMENT) { + self.state.render.as_ref().unwrap().set_fragment_bytes( + layout.push_constants_infos.fs.unwrap().buffer_index as _, + (layout.total_push_constants as usize * WORD_SIZE) as _, + state_pc.as_ptr() as _, + ) + } } unsafe fn insert_debug_marker(&mut self, label: &str) { diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs index d4018dcfd1..51b96a6657 100644 --- a/wgpu-hal/src/metal/device.rs +++ b/wgpu-hal/src/metal/device.rs @@ -471,6 +471,7 @@ impl crate::Device for super::Device { let mut bind_group_infos = arrayvec::ArrayVec::new(); // First, place the push constants + let mut total_push_constants = 0; for info in stage_data.iter_mut() { for pcr in desc.push_constant_ranges { if pcr.stages.contains(map_naga_stage(info.stage)) { @@ -492,6 +493,8 @@ impl crate::Device for super::Device { info.pc_buffer = Some(info.counters.buffers); info.counters.buffers += 1; } + + total_push_constants = total_push_constants.max(info.pc_limit); } // Second, place the described resources @@ -641,6 +644,7 @@ impl crate::Device for super::Device { image: naga::proc::BoundsCheckPolicy::ReadZeroSkipWrite, }, }, + total_push_constants, }) } unsafe fn destroy_pipeline_layout(&self, _pipeline_layout: super::PipelineLayout) {} diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs index 9dc686d3c8..2ea58c2635 100644 --- a/wgpu-hal/src/metal/mod.rs +++ b/wgpu-hal/src/metal/mod.rs @@ -524,6 +524,7 @@ pub struct PipelineLayout { bind_group_infos: ArrayVec, push_constants_infos: MultiStageData>, total_counters: MultiStageResourceCounters, + total_push_constants: u32, } trait AsNative { @@ -709,6 +710,7 @@ struct CommandState { stage_infos: MultiStageData, storage_buffer_length_map: fxhash::FxHashMap, work_group_memory_sizes: Vec, + push_constants: Vec, } pub struct CommandEncoder { diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index be4e94046a..8c98f62df6 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -1116,8 +1116,8 @@ impl super::Adapter { let timeline_semaphore_fn = if enabled_extensions.contains(&khr::TimelineSemaphore::name()) { Some(super::ExtensionFn::Extension(khr::TimelineSemaphore::new( - &self.instance.entry, &self.instance.raw, + &raw_device, ))) } else if self.phd_capabilities.properties.api_version >= vk::API_VERSION_1_2 { Some(super::ExtensionFn::Promoted) diff --git a/wgpu-hal/src/vulkan/command.rs b/wgpu-hal/src/vulkan/command.rs index deca4839df..dbcb1123ee 100644 --- a/wgpu-hal/src/vulkan/command.rs +++ b/wgpu-hal/src/vulkan/command.rs @@ -594,9 +594,11 @@ impl crate::CommandEncoder for super::CommandEncoder { .cmd_set_scissor(self.active, 0, &vk_scissors); } unsafe fn set_stencil_reference(&mut self, value: u32) { - self.device - .raw - .cmd_set_stencil_reference(self.active, vk::StencilFaceFlags::all(), value); + self.device.raw.cmd_set_stencil_reference( + self.active, + vk::StencilFaceFlags::FRONT_AND_BACK, + value, + ); } unsafe fn set_blend_constants(&mut self, color: &[f32; 4]) { self.device.raw.cmd_set_blend_constants(self.active, color); diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs index 1b21385c7c..fd1b49853c 100644 --- a/wgpu-hal/src/vulkan/device.rs +++ b/wgpu-hal/src/vulkan/device.rs @@ -1716,7 +1716,7 @@ impl crate::Device for super::Device { .values(&values); let result = match self.shared.extension_fns.timeline_semaphore { Some(super::ExtensionFn::Extension(ref ext)) => { - ext.wait_semaphores(self.shared.raw.handle(), &vk_info, timeout_us) + ext.wait_semaphores(&vk_info, timeout_us) } Some(super::ExtensionFn::Promoted) => { self.shared.raw.wait_semaphores(&vk_info, timeout_us) diff --git a/wgpu-hal/src/vulkan/instance.rs b/wgpu-hal/src/vulkan/instance.rs index bf1297bdac..71e0fc7e38 100644 --- a/wgpu-hal/src/vulkan/instance.rs +++ b/wgpu-hal/src/vulkan/instance.rs @@ -208,7 +208,11 @@ impl super::Instance { let vk_info = vk::DebugUtilsMessengerCreateInfoEXT::builder() .flags(vk::DebugUtilsMessengerCreateFlagsEXT::empty()) .message_severity(severity) - .message_type(vk::DebugUtilsMessageTypeFlagsEXT::all()) + .message_type( + vk::DebugUtilsMessageTypeFlagsEXT::GENERAL + | vk::DebugUtilsMessageTypeFlagsEXT::VALIDATION + | vk::DebugUtilsMessageTypeFlagsEXT::PERFORMANCE, + ) .pfn_user_callback(Some(debug_utils_messenger_callback)); let messenger = extension .create_debug_utils_messenger(&vk_info, None) @@ -438,7 +442,7 @@ impl Drop for super::InstanceShared { impl crate::Instance for super::Instance { unsafe fn init(desc: &crate::InstanceDescriptor) -> Result { - let entry = match ash::Entry::new() { + let entry = match ash::Entry::load() { Ok(entry) => entry, Err(err) => { log::info!("Missing Vulkan entry points: {:?}", err); diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs index 457911a3cd..234d737a19 100644 --- a/wgpu-hal/src/vulkan/mod.rs +++ b/wgpu-hal/src/vulkan/mod.rs @@ -514,9 +514,7 @@ impl Fence { match *self { Self::TimelineSemaphore(raw) => unsafe { Ok(match *extension.unwrap() { - ExtensionFn::Extension(ref ext) => { - ext.get_semaphore_counter_value(device.handle(), raw)? - } + ExtensionFn::Extension(ref ext) => ext.get_semaphore_counter_value(raw)?, ExtensionFn::Promoted => device.get_semaphore_counter_value(raw)?, }) }, diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index f7df88fd11..5acacbca68 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -628,9 +628,9 @@ pub struct Limits { pub max_sampled_textures_per_shader_stage: u32, /// Amount of samplers visible in a single shader stage. Defaults to 16. Higher is "better". pub max_samplers_per_shader_stage: u32, - /// Amount of storage buffers visible in a single shader stage. Defaults to 4. Higher is "better". + /// Amount of storage buffers visible in a single shader stage. Defaults to 8. Higher is "better". pub max_storage_buffers_per_shader_stage: u32, - /// Amount of storage textures visible in a single shader stage. Defaults to 4. Higher is "better". + /// Amount of storage textures visible in a single shader stage. Defaults to 8. Higher is "better". pub max_storage_textures_per_shader_stage: u32, /// Amount of uniform buffers visible in a single shader stage. Defaults to 12. Higher is "better". pub max_uniform_buffers_per_shader_stage: u32, @@ -667,11 +667,13 @@ pub struct Limits { /// Defaults to 256. Lower is "better". pub min_storage_buffer_offset_alignment: u32, /// Maximum allowed number of components (scalars) of input or output locations for - /// inter-stage communication (vertex outputs to fragment inputs). + /// inter-stage communication (vertex outputs to fragment inputs). Defaults to 60. pub max_inter_stage_shader_components: u32, - /// Maximum number of bytes used for workgroup memory in a compute entry point. + /// Maximum number of bytes used for workgroup memory in a compute entry point. Defaults to + /// 16352. pub max_compute_workgroup_storage_size: u32, /// Maximum value of the product of the `workgroup_size` dimensions for a compute entry-point. + /// Defaults to 256. pub max_compute_invocations_per_workgroup: u32, /// The maximum value of the workgroup_size X dimension for a compute stage `ShaderModule` entry-point. /// Defaults to 256. @@ -680,7 +682,7 @@ pub struct Limits { /// Defaults to 256. pub max_compute_workgroup_size_y: u32, /// The maximum value of the workgroup_size Z dimension for a compute stage `ShaderModule` entry-point. - /// Defaults to 256. + /// Defaults to 64. pub max_compute_workgroup_size_z: u32, /// The maximum value for each dimension of a `ComputePass::dispatch(x, y, z)` operation. /// Defaults to 65535.