diff --git a/naga/src/lib.rs b/naga/src/lib.rs index 85fd7a4508..4184639d19 100644 --- a/naga/src/lib.rs +++ b/naga/src/lib.rs @@ -1934,12 +1934,15 @@ pub enum Statement { /// If [`SHADER_INT64_ATOMIC_MIN_MAX`] or [`SHADER_INT64_ATOMIC_ALL_OPS`] are /// enabled, this may also be [`I64`] or [`U64`]. /// + /// If [`SHADER_FLT32_ATOMIC`] is enabled, this may be [`F32`]. + /// /// [`Pointer`]: TypeInner::Pointer /// [`Atomic`]: TypeInner::Atomic /// [`I32`]: Scalar::I32 /// [`U32`]: Scalar::U32 /// [`SHADER_INT64_ATOMIC_MIN_MAX`]: crate::valid::Capabilities::SHADER_INT64_ATOMIC_MIN_MAX /// [`SHADER_INT64_ATOMIC_ALL_OPS`]: crate::valid::Capabilities::SHADER_INT64_ATOMIC_ALL_OPS + /// [`SHADER_FLT32_ATOMIC`]: crate::valid::Capabilities::SHADER_FLT32_ATOMIC /// [`I64`]: Scalar::I64 /// [`U64`]: Scalar::U64 pointer: Handle, @@ -1957,9 +1960,16 @@ pub enum Statement { /// - If neither of those capabilities are present, then 64-bit scalar /// atomics are not allowed. /// + /// If [`pointer`] refers to a 32-bit floating-point atomic value, then: + /// + /// - The [`SHADER_FLT32_ATOMIC`] capability allows allows + /// [`AtomicFunction::Add`], [`AtomicFunction::Subtract`] and + /// [`AtomicFunction::Exchange`] here. + /// /// [`pointer`]: Statement::Atomic::pointer /// [`SHADER_INT64_ATOMIC_MIN_MAX`]: crate::valid::Capabilities::SHADER_INT64_ATOMIC_MIN_MAX /// [`SHADER_INT64_ATOMIC_ALL_OPS`]: crate::valid::Capabilities::SHADER_INT64_ATOMIC_ALL_OPS + /// [`SHADER_FLT32_ATOMIC`]: crate::valid::Capabilities::SHADER_FLT32_ATOMIC fun: AtomicFunction, /// Value to use in the function. @@ -1986,6 +1996,7 @@ pub enum Statement { /// [`Exchange { compare: None }`]: AtomicFunction::Exchange /// [`SHADER_INT64_ATOMIC_MIN_MAX`]: crate::valid::Capabilities::SHADER_INT64_ATOMIC_MIN_MAX /// [`SHADER_INT64_ATOMIC_ALL_OPS`]: crate::valid::Capabilities::SHADER_INT64_ATOMIC_ALL_OPS + /// [`SHADER_FLT32_ATOMIC`]: crate::valid::Capabilities::SHADER_FLT32_ATOMIC result: Option>, }, /// Load uniformly from a uniform pointer in the workgroup address space. diff --git a/naga/src/valid/function.rs b/naga/src/valid/function.rs index 23e6204ccb..2b859b6c9c 100644 --- a/naga/src/valid/function.rs +++ b/naga/src/valid/function.rs @@ -41,8 +41,10 @@ pub enum CallError { pub enum AtomicError { #[error("Pointer {0:?} to atomic is invalid.")] InvalidPointer(Handle), - #[error("Address space {0:?} does not support 64bit atomics.")] + #[error("Address space {0:?} is not supported.")] InvalidAddressSpace(crate::AddressSpace), + #[error("Function {0:?} is not supported.")] + InvalidFunction(crate::AtomicFunction), #[error("Operand {0:?} has invalid type.")] InvalidOperand(Handle), #[error("Result expression {0:?} is not an `AtomicResult` expression")] @@ -446,6 +448,47 @@ impl super::Validator { } } + // Check for the special restrictions on 32-bit floating-point atomic operations. + // + // We don't need to consider other widths here: this function has already checked + // that `pointer`'s type is an `Atomic`, and `validate_type` has already checked + // that that `Atomic` type has a permitted scalar width. + if let crate::ScalarKind::Float = pointer_scalar.kind { + // `Capabilities::SHADER_FLT32_ATOMIC` enables 32-bit floating-point + // atomic operations including `Add`, `Subtract`, and `Exchange` + // in storage address space. + if !matches!( + *fun, + crate::AtomicFunction::Add + | crate::AtomicFunction::Subtract + | crate::AtomicFunction::Exchange { compare: _ } + ) { + log::error!("Float32 atomic operation {:?} is not supported", fun); + return Err(AtomicError::InvalidFunction(*fun) + .with_span_handle(value, context.expressions) + .into_other()); + } + if !self + .capabilities + .contains(super::Capabilities::SHADER_FLT32_ATOMIC) + { + log::error!("Float32 atomic operations are not supported"); + return Err(AtomicError::MissingCapability( + super::Capabilities::SHADER_FLT32_ATOMIC, + ) + .with_span_handle(value, context.expressions) + .into_other()); + } + if !matches!(pointer_space, crate::AddressSpace::Storage { .. }) { + log::error!( + "Float32 atomic operations are only supported in storage address space" + ); + return Err(AtomicError::InvalidAddressSpace(pointer_space) + .with_span_handle(value, context.expressions) + .into_other()); + } + } + // The result expression must be appropriate to the operation. match result { Some(result) => { diff --git a/naga/src/valid/mod.rs b/naga/src/valid/mod.rs index c314ec2ac8..ae5eb3edbe 100644 --- a/naga/src/valid/mod.rs +++ b/naga/src/valid/mod.rs @@ -143,6 +143,15 @@ bitflags::bitflags! { const SHADER_INT64_ATOMIC_MIN_MAX = 0x80000; /// Support for all atomic operations on 64-bit integers. const SHADER_INT64_ATOMIC_ALL_OPS = 0x100000; + /// Support for [`AtomicFunction::Add`], [`AtomicFunction::Sub`] + /// and [`AtomicFunction::Exchange`] on 32-bit floating-point numbers + /// in the [`Storage`] address space. + /// + /// [`AtomicFunction::Add`]: crate::AtomicFunction::Add + /// [`AtomicFunction::Sub`]: crate::AtomicFunction::Sub + /// [`AtomicFunction::Exchange`]: crate::AtomicFunction::Exchange + /// [`Storage`]: crate::AddressSpace::Storage + const SHADER_FLT32_ATOMIC = 0x200000; } } @@ -601,6 +610,8 @@ impl Validator { .into_boxed_slice(), }; + // TODO: Error + for (handle, ty) in module.types.iter() { let ty_info = self .validate_type(handle, module.to_ctx()) diff --git a/naga/src/valid/type.rs b/naga/src/valid/type.rs index 32d5d58f1c..3cff36f386 100644 --- a/naga/src/valid/type.rs +++ b/naga/src/valid/type.rs @@ -362,7 +362,6 @@ impl super::Validator { Ti::Atomic(crate::Scalar { kind, width }) => { match kind { crate::ScalarKind::Bool - | crate::ScalarKind::Float | crate::ScalarKind::AbstractInt | crate::ScalarKind::AbstractFloat => { return Err(TypeError::InvalidAtomicWidth(kind, width)) @@ -381,6 +380,20 @@ impl super::Validator { return Err(TypeError::InvalidAtomicWidth(kind, width)); } } + crate::ScalarKind::Float => { + if width == 4 { + if !self + .capabilities + .intersects(Capabilities::SHADER_FLT32_ATOMIC) + { + return Err(TypeError::MissingCapability( + Capabilities::SHADER_FLT32_ATOMIC, + )); + } + } else { + return Err(TypeError::InvalidAtomicWidth(kind, width)); + } + } }; TypeInfo::new( TypeFlags::DATA | TypeFlags::SIZED | TypeFlags::HOST_SHAREABLE, diff --git a/naga/tests/in/atomicOps-flt32.param.ron b/naga/tests/in/atomicOps-flt32.param.ron new file mode 100644 index 0000000000..e8cdbb4f04 --- /dev/null +++ b/naga/tests/in/atomicOps-flt32.param.ron @@ -0,0 +1,11 @@ +( + god_mode: true, + msl: ( + lang_version: (3, 0), + per_entry_point_map: {}, + inline_samplers: [], + spirv_cross_compatibility: false, + fake_missing_bindings: true, + zero_initialize_workgroup_memory: false, + ), +) diff --git a/naga/tests/in/atomicOps-flt32.wgsl b/naga/tests/in/atomicOps-flt32.wgsl new file mode 100644 index 0000000000..05d16c70ed --- /dev/null +++ b/naga/tests/in/atomicOps-flt32.wgsl @@ -0,0 +1,54 @@ +struct Struct { + atomic_scalar: atomic, + atomic_arr: array, 2>, +} + +@group(0) @binding(0) +var storage_atomic_scalar: atomic; +@group(0) @binding(1) +var storage_atomic_arr: array, 2>; +@group(0) @binding(2) +var storage_struct: Struct; + +@compute +@workgroup_size(2) +fn cs_main(@builtin(local_invocation_id) id: vec3) { + atomicStore(&storage_atomic_scalar, 1.0); + atomicStore(&storage_atomic_arr[1], 1.0); + atomicStore(&storage_struct.atomic_scalar, 1.0); + atomicStore(&storage_struct.atomic_arr[1], 1.0); + + workgroupBarrier(); + + let l0 = atomicLoad(&storage_atomic_scalar); + let l1 = atomicLoad(&storage_atomic_arr[1]); + let l2 = atomicLoad(&storage_struct.atomic_scalar); + let l3 = atomicLoad(&storage_struct.atomic_arr[1]); + + workgroupBarrier(); + + atomicAdd(&storage_atomic_scalar, 1.0); + atomicAdd(&storage_atomic_arr[1], 1.0); + atomicAdd(&storage_struct.atomic_scalar, 1.0); + atomicAdd(&storage_struct.atomic_arr[1], 1.0); + + workgroupBarrier(); + + atomicSub(&storage_atomic_scalar, 1.0); + atomicSub(&storage_atomic_arr[1], 1.0); + atomicSub(&storage_struct.atomic_scalar, 1.0); + atomicSub(&storage_struct.atomic_arr[1], 1.0); + + workgroupBarrier(); + + atomicExchange(&storage_atomic_scalar, 1.0); + atomicExchange(&storage_atomic_arr[1], 1.0); + atomicExchange(&storage_struct.atomic_scalar, 1.0); + atomicExchange(&storage_struct.atomic_arr[1], 1.0); + + // // TODO: https://github.com/gpuweb/gpuweb/issues/2021 + // atomicCompareExchangeWeak(&storage_atomic_scalar, 1.0); + // atomicCompareExchangeWeak(&storage_atomic_arr[1], 1.0); + // atomicCompareExchangeWeak(&storage_struct.atomic_scalar, 1.0); + // atomicCompareExchangeWeak(&storage_struct.atomic_arr[1], 1.0); +} diff --git a/naga/tests/out/msl/atomicOps-flt32.msl b/naga/tests/out/msl/atomicOps-flt32.msl new file mode 100644 index 0000000000..87661917dc --- /dev/null +++ b/naga/tests/out/msl/atomicOps-flt32.msl @@ -0,0 +1,48 @@ +// language: metal3.0 +#include +#include + +using metal::uint; + +struct type_1 { + metal::atomic_float inner[2]; +}; +struct Struct { + metal::atomic_float atomic_scalar; + type_1 atomic_arr; +}; + +struct cs_mainInput { +}; +kernel void cs_main( + metal::uint3 id [[thread_position_in_threadgroup]] +, device metal::atomic_float& storage_atomic_scalar [[user(fake0)]] +, device type_1& storage_atomic_arr [[user(fake0)]] +, device Struct& storage_struct [[user(fake0)]] +) { + metal::atomic_store_explicit(&storage_atomic_scalar, 1.0, metal::memory_order_relaxed); + metal::atomic_store_explicit(&storage_atomic_arr.inner[1], 1.0, metal::memory_order_relaxed); + metal::atomic_store_explicit(&storage_struct.atomic_scalar, 1.0, metal::memory_order_relaxed); + metal::atomic_store_explicit(&storage_struct.atomic_arr.inner[1], 1.0, metal::memory_order_relaxed); + metal::threadgroup_barrier(metal::mem_flags::mem_threadgroup); + float l0_ = metal::atomic_load_explicit(&storage_atomic_scalar, metal::memory_order_relaxed); + float l1_ = metal::atomic_load_explicit(&storage_atomic_arr.inner[1], metal::memory_order_relaxed); + float l2_ = metal::atomic_load_explicit(&storage_struct.atomic_scalar, metal::memory_order_relaxed); + float l3_ = metal::atomic_load_explicit(&storage_struct.atomic_arr.inner[1], metal::memory_order_relaxed); + metal::threadgroup_barrier(metal::mem_flags::mem_threadgroup); + float _e27 = metal::atomic_fetch_add_explicit(&storage_atomic_scalar, 1.0, metal::memory_order_relaxed); + float _e31 = metal::atomic_fetch_add_explicit(&storage_atomic_arr.inner[1], 1.0, metal::memory_order_relaxed); + float _e35 = metal::atomic_fetch_add_explicit(&storage_struct.atomic_scalar, 1.0, metal::memory_order_relaxed); + float _e40 = metal::atomic_fetch_add_explicit(&storage_struct.atomic_arr.inner[1], 1.0, metal::memory_order_relaxed); + metal::threadgroup_barrier(metal::mem_flags::mem_threadgroup); + float _e43 = metal::atomic_fetch_sub_explicit(&storage_atomic_scalar, 1.0, metal::memory_order_relaxed); + float _e47 = metal::atomic_fetch_sub_explicit(&storage_atomic_arr.inner[1], 1.0, metal::memory_order_relaxed); + float _e51 = metal::atomic_fetch_sub_explicit(&storage_struct.atomic_scalar, 1.0, metal::memory_order_relaxed); + float _e56 = metal::atomic_fetch_sub_explicit(&storage_struct.atomic_arr.inner[1], 1.0, metal::memory_order_relaxed); + metal::threadgroup_barrier(metal::mem_flags::mem_threadgroup); + float _e59 = metal::atomic_exchange_explicit(&storage_atomic_scalar, 1.0, metal::memory_order_relaxed); + float _e63 = metal::atomic_exchange_explicit(&storage_atomic_arr.inner[1], 1.0, metal::memory_order_relaxed); + float _e67 = metal::atomic_exchange_explicit(&storage_struct.atomic_scalar, 1.0, metal::memory_order_relaxed); + float _e72 = metal::atomic_exchange_explicit(&storage_struct.atomic_arr.inner[1], 1.0, metal::memory_order_relaxed); + return; +} diff --git a/naga/tests/out/wgsl/atomicOps-flt32.wgsl b/naga/tests/out/wgsl/atomicOps-flt32.wgsl new file mode 100644 index 0000000000..b29f5f4f6d --- /dev/null +++ b/naga/tests/out/wgsl/atomicOps-flt32.wgsl @@ -0,0 +1,40 @@ +struct Struct { + atomic_scalar: atomic, + atomic_arr: array, 2>, +} + +@group(0) @binding(0) +var storage_atomic_scalar: atomic; +@group(0) @binding(1) +var storage_atomic_arr: array, 2>; +@group(0) @binding(2) +var storage_struct: Struct; + +@compute @workgroup_size(2, 1, 1) +fn cs_main(@builtin(local_invocation_id) id: vec3) { + atomicStore((&storage_atomic_scalar), 1f); + atomicStore((&storage_atomic_arr[1]), 1f); + atomicStore((&storage_struct.atomic_scalar), 1f); + atomicStore((&storage_struct.atomic_arr[1]), 1f); + workgroupBarrier(); + let l0_ = atomicLoad((&storage_atomic_scalar)); + let l1_ = atomicLoad((&storage_atomic_arr[1])); + let l2_ = atomicLoad((&storage_struct.atomic_scalar)); + let l3_ = atomicLoad((&storage_struct.atomic_arr[1])); + workgroupBarrier(); + let _e27 = atomicAdd((&storage_atomic_scalar), 1f); + let _e31 = atomicAdd((&storage_atomic_arr[1]), 1f); + let _e35 = atomicAdd((&storage_struct.atomic_scalar), 1f); + let _e40 = atomicAdd((&storage_struct.atomic_arr[1]), 1f); + workgroupBarrier(); + let _e43 = atomicSub((&storage_atomic_scalar), 1f); + let _e47 = atomicSub((&storage_atomic_arr[1]), 1f); + let _e51 = atomicSub((&storage_struct.atomic_scalar), 1f); + let _e56 = atomicSub((&storage_struct.atomic_arr[1]), 1f); + workgroupBarrier(); + let _e59 = atomicExchange((&storage_atomic_scalar), 1f); + let _e63 = atomicExchange((&storage_atomic_arr[1]), 1f); + let _e67 = atomicExchange((&storage_struct.atomic_scalar), 1f); + let _e72 = atomicExchange((&storage_struct.atomic_arr[1]), 1f); + return; +} diff --git a/naga/tests/snapshots.rs b/naga/tests/snapshots.rs index 0e285e7b07..3ca141eaf3 100644 --- a/naga/tests/snapshots.rs +++ b/naga/tests/snapshots.rs @@ -782,6 +782,7 @@ fn convert_wgsl() { "atomicOps-int64-min-max", Targets::SPIRV | Targets::METAL | Targets::HLSL | Targets::WGSL, ), + ("atomicOps-flt32", Targets::METAL | Targets::WGSL), ( "atomicCompareExchange-int64", Targets::SPIRV | Targets::WGSL, diff --git a/tests/tests/shader/numeric_builtins.rs b/tests/tests/shader/numeric_builtins.rs index f6cb0bb39f..01daae88f2 100644 --- a/tests/tests/shader/numeric_builtins.rs +++ b/tests/tests/shader/numeric_builtins.rs @@ -135,6 +135,46 @@ fn create_int64_atomic_all_ops_test() -> Vec { tests } +fn create_flt32_atomic_test() -> Vec { + let mut tests = Vec::new(); + + let test = ShaderTest::new( + "atomicAdd".into(), + "value: f32".into(), + "atomicStore(&output, 0.0); atomicAdd(&output, 1.0); atomicAdd(&output, 1.0);".into(), + &[0.0_f32], + &[2.0_f32], + ) + .output_type("atomic".into()); + + tests.push(test); + + let test = ShaderTest::new( + "atomicSub".into(), + "value: f32".into(), + "atomicStore(&output, 0.0); atomicSub(&output, -1.0); atomicSub(&output, 0.5);".into(), + &[0.0_f32], + &[0.5_f32], + ) + .output_type("atomic".into()); + + tests.push(test); + + tests +} + +#[gpu_test] +static FLT32_ATOMIC: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters( + TestParameters::default() + .features(wgt::Features::SHADER_FLT32_ATOMIC) + .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS) + .limits(Limits::downlevel_defaults()), + ) + .run_async(|ctx| { + shader_input_output_test(ctx, InputStorageType::Storage, create_flt32_atomic_test()) + }); + #[gpu_test] static INT64_ATOMIC_ALL_OPS: GpuTestConfiguration = GpuTestConfiguration::new() .parameters( diff --git a/wgpu-core/src/command/bind.rs b/wgpu-core/src/command/bind.rs index 620027994f..57455bdbc0 100644 --- a/wgpu-core/src/command/bind.rs +++ b/wgpu-core/src/command/bind.rs @@ -174,10 +174,16 @@ mod compat { } } + #[derive(Clone, Debug, Error)] + #[error("Unknown reason")] + struct Unknown(); + Err(Error::Incompatible { expected_bgl: expected_bgl.error_ident(), assigned_bgl: assigned_bgl.error_ident(), - inner: MultiError::new(errors.drain(..)).unwrap(), + inner: MultiError::new(errors.drain(..)).unwrap_or_else(|| { + MultiError::new(core::iter::once(Unknown())).unwrap() + }), }) } } else { diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index b9c3467754..bf50e2e4ae 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -531,6 +531,10 @@ pub fn create_validator( Caps::SHADER_INT64_ATOMIC_ALL_OPS, features.contains(wgt::Features::SHADER_INT64_ATOMIC_ALL_OPS), ); + caps.set( + Caps::SHADER_FLT32_ATOMIC, + features.contains(wgt::Features::SHADER_FLT32_ATOMIC), + ); caps.set( Caps::MULTISAMPLED_SHADING, downlevel.contains(wgt::DownlevelFlags::MULTISAMPLED_SHADING), diff --git a/wgpu-core/src/lib.rs b/wgpu-core/src/lib.rs index ccbe64d527..ea7960fa57 100644 --- a/wgpu-core/src/lib.rs +++ b/wgpu-core/src/lib.rs @@ -2,6 +2,20 @@ //! It is designed for integration into browsers, as well as wrapping //! into other language-specific user-friendly libraries. //! +#![cfg_attr( + not(any(not(doc), wgpu_core_doc)), + doc = r#"\ +## Documentation hidden + +As a workaround for [an issue in rustdoc](https://github.com/rust-lang/rust/issues/114891) +that [affects `wgpu-core` documentation builds \ +severely](https://github.com/gfx-rs/wgpu/issues/4905), +the documentation for `wgpu-core` is empty unless built with +`RUSTFLAGS="--cfg wgpu_core_doc"`, which may take a very long time. +"# +)] +#![cfg(any(not(doc), wgpu_core_doc))] +//! //! ## Feature flags #![doc = document_features::document_features!()] //! diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs index e7db97a1f9..2f1df75d8b 100644 --- a/wgpu-hal/src/metal/adapter.rs +++ b/wgpu-hal/src/metal/adapter.rs @@ -828,6 +828,12 @@ impl super::PrivateCapabilities { && ((device.supports_family(MTLGPUFamily::Apple8) && device.supports_family(MTLGPUFamily::Mac2)) || device.supports_family(MTLGPUFamily::Apple9)), + // https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf#page=6 + float_atomics: family_check + && (device.supports_family(MTLGPUFamily::Apple7) + || device.supports_family(MTLGPUFamily::Apple8) + || device.supports_family(MTLGPUFamily::Apple9) + || device.supports_family(MTLGPUFamily::Mac2)), } } @@ -908,6 +914,10 @@ impl super::PrivateCapabilities { F::SHADER_INT64_ATOMIC_MIN_MAX, self.int64_atomics && self.msl_version >= MTLLanguageVersion::V2_4, ); + features.set( + F::SHADER_FLT32_ATOMIC, + self.float_atomics && self.msl_version >= MTLLanguageVersion::V3_0, + ); features.set( F::ADDRESS_MODE_CLAMP_TO_BORDER, diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs index 62d409a8ff..0dd28cb1b7 100644 --- a/wgpu-hal/src/metal/mod.rs +++ b/wgpu-hal/src/metal/mod.rs @@ -297,6 +297,7 @@ struct PrivateCapabilities { supports_simd_scoped_operations: bool, int64: bool, int64_atomics: bool, + float_atomics: bool, } #[derive(Clone, Debug)] diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index e44f91665c..3b9256c64a 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -953,6 +953,13 @@ bitflags::bitflags! { /// /// This is a native only feature. const SHADER_INT64_ATOMIC_ALL_OPS = 1 << 61; + /// Allows shaders to use all f32 atomic operations. + /// + /// Supported platforms: + /// - Metal (with MSL 3.0+) + /// + /// This is a native only feature. + const SHADER_FLT32_ATOMIC = 1 << 62; /// Allows using the [VK_GOOGLE_display_timing] Vulkan extension. /// /// This is used for frame pacing to reduce latency, and is generally only available on Android. @@ -968,7 +975,7 @@ bitflags::bitflags! { /// /// [VK_GOOGLE_display_timing]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_GOOGLE_display_timing.html /// [`Surface::as_hal()`]: https://docs.rs/wgpu/latest/wgpu/struct.Surface.html#method.as_hal - const VULKAN_GOOGLE_DISPLAY_TIMING = 1 << 62; + const VULKAN_GOOGLE_DISPLAY_TIMING = 1 << 63; } }