From 0a7dde34ef3b3f8ad81021db764d6bd53f796443 Mon Sep 17 00:00:00 2001 From: atlas dostal Date: Wed, 11 Dec 2024 17:48:47 -0500 Subject: [PATCH] 64 bit image atomics --- CHANGELOG.md | 1 + naga/src/back/glsl/features.rs | 1 + naga/src/back/glsl/mod.rs | 1 + naga/src/back/hlsl/conv.rs | 1 + naga/src/back/msl/writer.rs | 6 ++- naga/src/back/spv/image.rs | 4 ++ naga/src/back/spv/instructions.rs | 1 + naga/src/back/spv/writer.rs | 11 +++-- naga/src/back/wgsl/writer.rs | 1 + naga/src/front/glsl/parser/types.rs | 1 + naga/src/front/spv/convert.rs | 1 + naga/src/front/wgsl/parse/conv.rs | 1 + naga/src/front/wgsl/parse/mod.rs | 4 ++ naga/src/front/wgsl/to_wgsl.rs | 1 + naga/src/lib.rs | 1 + naga/src/proc/mod.rs | 7 ++- naga/src/valid/function.rs | 28 +++++++++++ naga/src/valid/mod.rs | 2 + naga/tests/in/atomicTexture-int64.param.ron | 24 +++++++++ naga/tests/in/atomicTexture-int64.wgsl | 12 +++++ naga/tests/out/hlsl/atomicTexture-int64.hlsl | 17 +++++++ naga/tests/out/hlsl/atomicTexture-int64.ron | 12 +++++ naga/tests/out/msl/atomicTexture-int64.msl | 18 +++++++ naga/tests/out/spv/atomicTexture-int64.spvasm | 49 +++++++++++++++++++ naga/tests/out/wgsl/atomicTexture-int64.wgsl | 10 ++++ naga/tests/snapshots.rs | 4 ++ .../tests/image_atomics/image_64_atomics.wgsl | 13 +++++ tests/tests/image_atomics/mod.rs | 38 ++++++++++++-- wgpu-core/src/device/mod.rs | 4 ++ wgpu-core/src/validation.rs | 3 ++ wgpu-hal/src/auxil/dxgi/conv.rs | 1 + wgpu-hal/src/dx12/adapter.rs | 7 +++ wgpu-hal/src/gles/adapter.rs | 2 + wgpu-hal/src/gles/conv.rs | 1 + wgpu-hal/src/metal/adapter.rs | 18 +++++++ wgpu-hal/src/vulkan/adapter.rs | 45 ++++++++++++++++- wgpu-hal/src/vulkan/conv.rs | 1 + wgpu-info/src/texture.rs | 3 +- wgpu-types/src/lib.rs | 47 +++++++++++++++--- 39 files changed, 382 insertions(+), 20 deletions(-) create mode 100644 naga/tests/in/atomicTexture-int64.param.ron create mode 100644 naga/tests/in/atomicTexture-int64.wgsl create mode 100644 naga/tests/out/hlsl/atomicTexture-int64.hlsl create mode 100644 naga/tests/out/hlsl/atomicTexture-int64.ron create mode 100644 naga/tests/out/msl/atomicTexture-int64.msl create mode 100644 naga/tests/out/spv/atomicTexture-int64.spvasm create mode 100644 naga/tests/out/wgsl/atomicTexture-int64.wgsl create mode 100644 tests/tests/image_atomics/image_64_atomics.wgsl diff --git a/CHANGELOG.md b/CHANGELOG.md index e034c70399..6cc7e8fc1e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -165,6 +165,7 @@ By @wumpf in [#6849](https://github.com/gfx-rs/wgpu/pull/6849). - Add build support for Apple Vision Pro. By @guusw in [#6611](https://github.com/gfx-rs/wgpu/pull/6611). - Add `wgsl_language_features` for obtaining available WGSL language feature by @sagudev in [#6814](https://github.com/gfx-rs/wgpu/pull/6814) - Image atomic support in shaders. By @atlv24 in [#6706](https://github.com/gfx-rs/wgpu/pull/6706) +- 64 bit image atomic support in shaders. By @atlv24 in [#5537](https://github.com/gfx-rs/wgpu/pull/5537) - Add `no_std` support to `wgpu-types`. By @bushrat011899 in [#6892](https://github.com/gfx-rs/wgpu/pull/6892). ##### Vulkan diff --git a/naga/src/back/glsl/features.rs b/naga/src/back/glsl/features.rs index 6735eef786..bef54bd4f8 100644 --- a/naga/src/back/glsl/features.rs +++ b/naga/src/back/glsl/features.rs @@ -408,6 +408,7 @@ impl Writer<'_, W> { | StorageFormat::Rgb10a2Uint | StorageFormat::Rgb10a2Unorm | StorageFormat::Rg11b10Ufloat + | StorageFormat::R64Uint | StorageFormat::Rg32Uint | StorageFormat::Rg32Sint | StorageFormat::Rg32Float => { diff --git a/naga/src/back/glsl/mod.rs b/naga/src/back/glsl/mod.rs index df20f074c8..b058ae5ee8 100644 --- a/naga/src/back/glsl/mod.rs +++ b/naga/src/back/glsl/mod.rs @@ -4944,6 +4944,7 @@ fn glsl_storage_format(format: crate::StorageFormat) -> Result<&'static str, Err Sf::Rgb10a2Uint => "rgb10_a2ui", Sf::Rgb10a2Unorm => "rgb10_a2", Sf::Rg11b10Ufloat => "r11f_g11f_b10f", + Sf::R64Uint => "r64ui", Sf::Rg32Uint => "rg32ui", Sf::Rg32Sint => "rg32i", Sf::Rg32Float => "rg32f", diff --git a/naga/src/back/hlsl/conv.rs b/naga/src/back/hlsl/conv.rs index 83c7667eab..9573fce2a8 100644 --- a/naga/src/back/hlsl/conv.rs +++ b/naga/src/back/hlsl/conv.rs @@ -125,6 +125,7 @@ impl crate::StorageFormat { Self::R8Snorm | Self::R16Snorm => "snorm float", Self::R8Uint | Self::R16Uint | Self::R32Uint => "uint", Self::R8Sint | Self::R16Sint | Self::R32Sint => "int", + Self::R64Uint => "uint64_t", Self::Rg16Float | Self::Rg32Float => "float2", Self::Rg8Unorm | Self::Rg16Unorm => "unorm float2", diff --git a/naga/src/back/msl/writer.rs b/naga/src/back/msl/writer.rs index a1b2f764c3..4589d39892 100644 --- a/naga/src/back/msl/writer.rs +++ b/naga/src/back/msl/writer.rs @@ -1212,7 +1212,11 @@ impl Writer { ) -> BackendResult { write!(self.out, "{level}")?; self.put_expression(image, &context.expression, false)?; - let op = fun.to_msl(); + let op = if context.expression.resolve_type(value).scalar_width() == Some(8) { + fun.to_msl_64_bit()? + } else { + fun.to_msl() + }; write!(self.out, ".atomic_{}(", op)?; // coordinates in IR are int, but Metal expects uint self.put_cast_to_uint_scalar_or_vector(address.coordinate, &context.expression)?; diff --git a/naga/src/back/spv/image.rs b/naga/src/back/spv/image.rs index 84ec3018e1..fe4001060e 100644 --- a/naga/src/back/spv/image.rs +++ b/naga/src/back/spv/image.rs @@ -1253,6 +1253,10 @@ impl BlockContext<'_> { class: spirv::StorageClass::Image, })); let signed = scalar.kind == crate::ScalarKind::Sint; + if scalar.width == 8 { + self.writer + .require_any("64 bit image atomics", &[spirv::Capability::Int64Atomics])?; + } let pointer_id = self.gen_id(); let coordinates = self.write_image_coordinates(coordinate, array_index, block)?; let sample_id = self.writer.get_constant_scalar(crate::Literal::U32(0)); diff --git a/naga/src/back/spv/instructions.rs b/naga/src/back/spv/instructions.rs index 32b8113c69..38aed8c351 100644 --- a/naga/src/back/spv/instructions.rs +++ b/naga/src/back/spv/instructions.rs @@ -1206,6 +1206,7 @@ impl From for spirv::ImageFormat { Sf::Rgb10a2Uint => Self::Rgb10a2ui, Sf::Rgb10a2Unorm => Self::Rgb10A2, Sf::Rg11b10Ufloat => Self::R11fG11fB10f, + Sf::R64Uint => Self::R64ui, Sf::Rg32Uint => Self::Rg32ui, Sf::Rg32Sint => Self::Rg32i, Sf::Rg32Float => Self::Rg32f, diff --git a/naga/src/back/spv/writer.rs b/naga/src/back/spv/writer.rs index 56e0029509..9d15c2f014 100644 --- a/naga/src/back/spv/writer.rs +++ b/naga/src/back/spv/writer.rs @@ -1089,10 +1089,13 @@ impl Writer { "storage image format", &[spirv::Capability::StorageImageExtendedFormats], ), - If::R64ui | If::R64i => self.require_any( - "64-bit integer storage image format", - &[spirv::Capability::Int64ImageEXT], - ), + If::R64ui | If::R64i => { + self.use_extension("SPV_EXT_shader_image_int64"); + self.require_any( + "64-bit integer storage image format", + &[spirv::Capability::Int64ImageEXT], + ) + } If::Unknown | If::Rgba32f | If::Rgba16f diff --git a/naga/src/back/wgsl/writer.rs b/naga/src/back/wgsl/writer.rs index 07cffc2e73..a7cd8f95c9 100644 --- a/naga/src/back/wgsl/writer.rs +++ b/naga/src/back/wgsl/writer.rs @@ -2079,6 +2079,7 @@ const fn storage_format_str(format: crate::StorageFormat) -> &'static str { Sf::Rgb10a2Uint => "rgb10a2uint", Sf::Rgb10a2Unorm => "rgb10a2unorm", Sf::Rg11b10Ufloat => "rg11b10float", + Sf::R64Uint => "r64uint", Sf::Rg32Uint => "rg32uint", Sf::Rg32Sint => "rg32sint", Sf::Rg32Float => "rg32float", diff --git a/naga/src/front/glsl/parser/types.rs b/naga/src/front/glsl/parser/types.rs index e787b0c95a..501d53805c 100644 --- a/naga/src/front/glsl/parser/types.rs +++ b/naga/src/front/glsl/parser/types.rs @@ -430,6 +430,7 @@ fn map_image_format(word: &str) -> Option { "rgba32ui" => Sf::Rgba32Uint, "rgba16ui" => Sf::Rgba16Uint, "rgba8ui" => Sf::Rgba8Uint, + "r64ui" => Sf::R64Uint, "rg32ui" => Sf::Rg32Uint, "rg16ui" => Sf::Rg16Uint, "rg8ui" => Sf::Rg8Uint, diff --git a/naga/src/front/spv/convert.rs b/naga/src/front/spv/convert.rs index 33ed4793cf..6baf74225c 100644 --- a/naga/src/front/spv/convert.rs +++ b/naga/src/front/spv/convert.rs @@ -105,6 +105,7 @@ pub(super) fn map_image_format(word: spirv::Word) -> Result Ok(crate::StorageFormat::Rgb10a2Uint), Some(spirv::ImageFormat::Rgb10A2) => Ok(crate::StorageFormat::Rgb10a2Unorm), Some(spirv::ImageFormat::R11fG11fB10f) => Ok(crate::StorageFormat::Rg11b10Ufloat), + Some(spirv::ImageFormat::R64ui) => Ok(crate::StorageFormat::R64Uint), Some(spirv::ImageFormat::Rg32ui) => Ok(crate::StorageFormat::Rg32Uint), Some(spirv::ImageFormat::Rg32i) => Ok(crate::StorageFormat::Rg32Sint), Some(spirv::ImageFormat::Rg32f) => Ok(crate::StorageFormat::Rg32Float), diff --git a/naga/src/front/wgsl/parse/conv.rs b/naga/src/front/wgsl/parse/conv.rs index 0c9341eb62..00c19d877d 100644 --- a/naga/src/front/wgsl/parse/conv.rs +++ b/naga/src/front/wgsl/parse/conv.rs @@ -95,6 +95,7 @@ pub fn map_storage_format(word: &str, span: Span) -> Result Sf::Rgb10a2Uint, "rgb10a2unorm" => Sf::Rgb10a2Unorm, "rg11b10float" => Sf::Rg11b10Ufloat, + "r64uint" => Sf::R64Uint, "rg32uint" => Sf::Rg32Uint, "rg32sint" => Sf::Rg32Sint, "rg32float" => Sf::Rg32Float, diff --git a/naga/src/front/wgsl/parse/mod.rs b/naga/src/front/wgsl/parse/mod.rs index 219bba551f..7d8a390c0e 100644 --- a/naga/src/front/wgsl/parse/mod.rs +++ b/naga/src/front/wgsl/parse/mod.rs @@ -1633,6 +1633,10 @@ impl Parser { kind: Float | Sint | Uint, width: 4, } => Ok(()), + Scalar { + kind: Uint, + width: 8, + } => Ok(()), _ => Err(Error::BadTextureSampleType { span, scalar }), } } diff --git a/naga/src/front/wgsl/to_wgsl.rs b/naga/src/front/wgsl/to_wgsl.rs index 4d401b0708..7d4c17f5f0 100644 --- a/naga/src/front/wgsl/to_wgsl.rs +++ b/naga/src/front/wgsl/to_wgsl.rs @@ -178,6 +178,7 @@ impl crate::StorageFormat { Sf::Rgb10a2Uint => "rgb10a2uint", Sf::Rgb10a2Unorm => "rgb10a2unorm", Sf::Rg11b10Ufloat => "rg11b10float", + Sf::R64Uint => "r64uint", Sf::Rg32Uint => "rg32uint", Sf::Rg32Sint => "rg32sint", Sf::Rg32Float => "rg32float", diff --git a/naga/src/lib.rs b/naga/src/lib.rs index ffda755740..ddf78f1b68 100644 --- a/naga/src/lib.rs +++ b/naga/src/lib.rs @@ -642,6 +642,7 @@ pub enum StorageFormat { Rg11b10Ufloat, // 64-bit formats + R64Uint, Rg32Uint, Rg32Sint, Rg32Float, diff --git a/naga/src/proc/mod.rs b/naga/src/proc/mod.rs index 76698fd102..fafac8cb30 100644 --- a/naga/src/proc/mod.rs +++ b/naga/src/proc/mod.rs @@ -49,6 +49,7 @@ impl From for super::Scalar { Sf::Rgb10a2Uint => Sk::Uint, Sf::Rgb10a2Unorm => Sk::Float, Sf::Rg11b10Ufloat => Sk::Float, + Sf::R64Uint => Sk::Uint, Sf::Rg32Uint => Sk::Uint, Sf::Rg32Sint => Sk::Sint, Sf::Rg32Float => Sk::Float, @@ -65,7 +66,11 @@ impl From for super::Scalar { Sf::Rgba16Unorm => Sk::Float, Sf::Rgba16Snorm => Sk::Float, }; - super::Scalar { kind, width: 4 } + let width = match format { + Sf::R64Uint => 8, + _ => 4, + }; + super::Scalar { kind, width } } } diff --git a/naga/src/valid/function.rs b/naga/src/valid/function.rs index eaae2301fc..a910be992c 100644 --- a/naga/src/valid/function.rs +++ b/naga/src/valid/function.rs @@ -1282,6 +1282,34 @@ impl super::Validator { .with_span_handle(image, context.expressions)); } match format { + crate::StorageFormat::R64Uint => { + if !self.capabilities.intersects( + super::Capabilities::TEXTURE_INT64_ATOMIC, + ) { + return Err(FunctionError::MissingCapability( + super::Capabilities::TEXTURE_INT64_ATOMIC, + ) + .with_span_static( + span, + "missing capability for this operation", + )); + } + match fun { + crate::AtomicFunction::Min + | crate::AtomicFunction::Max => {} + _ => { + return Err( + FunctionError::InvalidImageAtomicFunction( + fun, + ) + .with_span_handle( + image, + context.expressions, + ), + ); + } + } + } crate::StorageFormat::R32Sint | crate::StorageFormat::R32Uint => { if !self diff --git a/naga/src/valid/mod.rs b/naga/src/valid/mod.rs index 953e6cded6..906d449362 100644 --- a/naga/src/valid/mod.rs +++ b/naga/src/valid/mod.rs @@ -154,6 +154,8 @@ bitflags::bitflags! { const SHADER_FLOAT32_ATOMIC = 1 << 21; /// Support for atomic operations on images. const TEXTURE_ATOMIC = 1 << 22; + /// Support for atomic operations on 64-bit images. + const TEXTURE_INT64_ATOMIC = 1 << 23; } } diff --git a/naga/tests/in/atomicTexture-int64.param.ron b/naga/tests/in/atomicTexture-int64.param.ron new file mode 100644 index 0000000000..ffc7fb4cb7 --- /dev/null +++ b/naga/tests/in/atomicTexture-int64.param.ron @@ -0,0 +1,24 @@ +( + god_mode: true, + spv: ( + version: (1, 0), + capabilities: [ Int64, Int64ImageEXT, Int64Atomics ], + ), + hlsl: ( + shader_model: V6_6, + binding_map: {}, + fake_missing_bindings: true, + special_constants_binding: Some((space: 1, register: 0)), + push_constants_target: Some((space: 0, register: 0)), + zero_initialize_workgroup_memory: true, + restrict_indexing: true + ), + msl: ( + lang_version: (3, 1), + per_entry_point_map: {}, + inline_samplers: [], + spirv_cross_compatibility: false, + fake_missing_bindings: true, + zero_initialize_workgroup_memory: true, + ), +) diff --git a/naga/tests/in/atomicTexture-int64.wgsl b/naga/tests/in/atomicTexture-int64.wgsl new file mode 100644 index 0000000000..d8bf298ba9 --- /dev/null +++ b/naga/tests/in/atomicTexture-int64.wgsl @@ -0,0 +1,12 @@ +@group(0) @binding(0) +var image: texture_storage_2d; + +@compute +@workgroup_size(2) +fn cs_main(@builtin(local_invocation_id) id: vec3) { + textureAtomicMax(image, vec2(0, 0), 1lu); + + workgroupBarrier(); + + textureAtomicMin(image, vec2(0, 0), 1lu); +} diff --git a/naga/tests/out/hlsl/atomicTexture-int64.hlsl b/naga/tests/out/hlsl/atomicTexture-int64.hlsl new file mode 100644 index 0000000000..056489c790 --- /dev/null +++ b/naga/tests/out/hlsl/atomicTexture-int64.hlsl @@ -0,0 +1,17 @@ +struct NagaConstants { + int first_vertex; + int first_instance; + uint other; +}; +ConstantBuffer _NagaConstants: register(b0, space1); + +RWTexture2D image : register(u0); + +[numthreads(2, 1, 1)] +void cs_main(uint3 id : SV_GroupThreadID) +{ + InterlockedMax(image[int2(0, 0)],1uL); + GroupMemoryBarrierWithGroupSync(); + InterlockedMin(image[int2(0, 0)],1uL); + return; +} diff --git a/naga/tests/out/hlsl/atomicTexture-int64.ron b/naga/tests/out/hlsl/atomicTexture-int64.ron new file mode 100644 index 0000000000..67a9035512 --- /dev/null +++ b/naga/tests/out/hlsl/atomicTexture-int64.ron @@ -0,0 +1,12 @@ +( + vertex:[ + ], + fragment:[ + ], + compute:[ + ( + entry_point:"cs_main", + target_profile:"cs_6_6", + ), + ], +) diff --git a/naga/tests/out/msl/atomicTexture-int64.msl b/naga/tests/out/msl/atomicTexture-int64.msl new file mode 100644 index 0000000000..c00d8b7654 --- /dev/null +++ b/naga/tests/out/msl/atomicTexture-int64.msl @@ -0,0 +1,18 @@ +// language: metal3.1 +#include +#include + +using metal::uint; + + +struct cs_mainInput { +}; +kernel void cs_main( + metal::uint3 id [[thread_position_in_threadgroup]] +, metal::texture2d image [[user(fake0)]] +) { + image.atomic_max(metal::uint2(metal::int2(0, 0)), 1uL); + metal::threadgroup_barrier(metal::mem_flags::mem_threadgroup); + image.atomic_min(metal::uint2(metal::int2(0, 0)), 1uL); + return; +} diff --git a/naga/tests/out/spv/atomicTexture-int64.spvasm b/naga/tests/out/spv/atomicTexture-int64.spvasm new file mode 100644 index 0000000000..0238f44d7e --- /dev/null +++ b/naga/tests/out/spv/atomicTexture-int64.spvasm @@ -0,0 +1,49 @@ +; SPIR-V +; Version: 1.0 +; Generator: rspirv +; Bound: 31 +OpCapability Shader +OpCapability Int64ImageEXT +OpCapability Int64 +OpCapability Int64Atomics +OpExtension "SPV_EXT_shader_image_int64" +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint GLCompute %15 "cs_main" %12 +OpExecutionMode %15 LocalSize 2 1 1 +OpDecorate %9 DescriptorSet 0 +OpDecorate %9 Binding 0 +OpDecorate %12 BuiltIn LocalInvocationId +%2 = OpTypeVoid +%4 = OpTypeInt 64 0 +%3 = OpTypeImage %4 2D 0 0 0 2 R64ui +%6 = OpTypeInt 32 0 +%5 = OpTypeVector %6 3 +%8 = OpTypeInt 32 1 +%7 = OpTypeVector %8 2 +%10 = OpTypePointer UniformConstant %3 +%9 = OpVariable %10 UniformConstant +%13 = OpTypePointer Input %5 +%12 = OpVariable %13 Input +%16 = OpTypeFunction %2 +%18 = OpConstant %8 0 +%19 = OpConstantComposite %7 %18 %18 +%20 = OpConstant %4 1 +%22 = OpTypePointer Image %4 +%24 = OpConstant %6 0 +%26 = OpConstant %8 4 +%27 = OpConstant %6 2 +%28 = OpConstant %6 264 +%15 = OpFunction %2 None %16 +%11 = OpLabel +%14 = OpLoad %5 %12 +%17 = OpLoad %3 %9 +OpBranch %21 +%21 = OpLabel +%23 = OpImageTexelPointer %22 %9 %19 %24 +%25 = OpAtomicUMax %4 %23 %26 %24 %20 +OpControlBarrier %27 %27 %28 +%29 = OpImageTexelPointer %22 %9 %19 %24 +%30 = OpAtomicUMin %4 %29 %26 %24 %20 +OpReturn +OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/wgsl/atomicTexture-int64.wgsl b/naga/tests/out/wgsl/atomicTexture-int64.wgsl new file mode 100644 index 0000000000..52bbe76771 --- /dev/null +++ b/naga/tests/out/wgsl/atomicTexture-int64.wgsl @@ -0,0 +1,10 @@ +@group(0) @binding(0) +var image: texture_storage_2d; + +@compute @workgroup_size(2, 1, 1) +fn cs_main(@builtin(local_invocation_id) id: vec3) { + textureAtomicMax(image, vec2(0i, 0i), 1lu); + workgroupBarrier(); + textureAtomicMin(image, vec2(0i, 0i), 1lu); + return; +} diff --git a/naga/tests/snapshots.rs b/naga/tests/snapshots.rs index db5fcdf19e..e15948c7b1 100644 --- a/naga/tests/snapshots.rs +++ b/naga/tests/snapshots.rs @@ -796,6 +796,10 @@ fn convert_wgsl() { "atomicOps-float32", Targets::SPIRV | Targets::METAL | Targets::WGSL, ), + ( + "atomicTexture-int64", + Targets::SPIRV | Targets::METAL | Targets::HLSL | Targets::WGSL, + ), ( "atomicCompareExchange-int64", Targets::SPIRV | Targets::WGSL, diff --git a/tests/tests/image_atomics/image_64_atomics.wgsl b/tests/tests/image_atomics/image_64_atomics.wgsl new file mode 100644 index 0000000000..c9a967ca59 --- /dev/null +++ b/tests/tests/image_atomics/image_64_atomics.wgsl @@ -0,0 +1,13 @@ +@group(0) @binding(0) +var image: texture_storage_2d; + +@compute +@workgroup_size(4, 4, 1) +fn cs_main(@builtin(local_invocation_id) id: vec3, @builtin(workgroup_id) group_id: vec3) { + let pixel = id + group_id * 4; + textureAtomicMax(image, pixel.xy, u64(pixel.x)); + + storageBarrier(); + + textureAtomicMin(image, pixel.xy, u64(pixel.y)); +} \ No newline at end of file diff --git a/tests/tests/image_atomics/mod.rs b/tests/tests/image_atomics/mod.rs index f9a32c4056..0063602f4d 100644 --- a/tests/tests/image_atomics/mod.rs +++ b/tests/tests/image_atomics/mod.rs @@ -5,18 +5,46 @@ use wgpu_test::{ fail, gpu_test, image::ReadbackBuffers, GpuTestConfiguration, TestParameters, TestingContext, }; +#[gpu_test] +static IMAGE_64_ATOMICS: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters( + TestParameters::default() + .limits(wgpu::Limits { + max_storage_textures_per_shader_stage: 1, + max_compute_invocations_per_workgroup: 64, + max_compute_workgroup_size_x: 4, + max_compute_workgroup_size_y: 4, + max_compute_workgroup_size_z: 4, + max_compute_workgroups_per_dimension: wgpu::COPY_BYTES_PER_ROW_ALIGNMENT, + ..wgpu::Limits::downlevel_webgl2_defaults() + }) + .features( + wgpu::Features::TEXTURE_ATOMIC + | wgpu::Features::TEXTURE_INT64_ATOMIC + | wgpu::Features::SHADER_INT64, + ), + ) + .run_async(|ctx| async move { + test_format( + ctx, + wgpu::TextureFormat::R64Uint, + wgpu::include_wgsl!("image_64_atomics.wgsl"), + ) + .await; + }); + #[gpu_test] static IMAGE_32_ATOMICS: GpuTestConfiguration = GpuTestConfiguration::new() .parameters( TestParameters::default() - .limits(wgt::Limits { + .limits(wgpu::Limits { max_storage_textures_per_shader_stage: 1, max_compute_invocations_per_workgroup: 64, max_compute_workgroup_size_x: 4, max_compute_workgroup_size_y: 4, max_compute_workgroup_size_z: 4, - max_compute_workgroups_per_dimension: wgt::COPY_BYTES_PER_ROW_ALIGNMENT, - ..wgt::Limits::downlevel_webgl2_defaults() + max_compute_workgroups_per_dimension: wgpu::COPY_BYTES_PER_ROW_ALIGNMENT, + ..wgpu::Limits::downlevel_webgl2_defaults() }) .features(wgpu::Features::TEXTURE_ATOMIC), ) @@ -36,8 +64,8 @@ async fn test_format( ) { let pixel_bytes = format.target_pixel_byte_cost().unwrap(); let size = wgpu::Extent3d { - width: wgt::COPY_BYTES_PER_ROW_ALIGNMENT, - height: wgt::COPY_BYTES_PER_ROW_ALIGNMENT, + width: wgpu::COPY_BYTES_PER_ROW_ALIGNMENT, + height: wgpu::COPY_BYTES_PER_ROW_ALIGNMENT, depth_or_array_layers: 1, }; let bind_group_layout_entry = wgpu::BindGroupLayoutEntry { diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index 87364ab5cb..e9600f72d6 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -429,6 +429,10 @@ pub fn create_validator( Caps::TEXTURE_ATOMIC, features.contains(wgt::Features::TEXTURE_ATOMIC), ); + caps.set( + Caps::TEXTURE_INT64_ATOMIC, + features.contains(wgt::Features::TEXTURE_INT64_ATOMIC), + ); caps.set( Caps::SHADER_FLOAT32_ATOMIC, features.contains(wgt::Features::SHADER_FLOAT32_ATOMIC), diff --git a/wgpu-core/src/validation.rs b/wgpu-core/src/validation.rs index a930e26cc6..8a1384ad49 100644 --- a/wgpu-core/src/validation.rs +++ b/wgpu-core/src/validation.rs @@ -312,6 +312,7 @@ fn map_storage_format_to_naga(format: wgt::TextureFormat) -> Option Sf::Rgb10a2Unorm, Tf::Rg11b10Ufloat => Sf::Rg11b10Ufloat, + Tf::R64Uint => Sf::R64Uint, Tf::Rg32Uint => Sf::Rg32Uint, Tf::Rg32Sint => Sf::Rg32Sint, Tf::Rg32Float => Sf::Rg32Float, @@ -368,6 +369,7 @@ fn map_storage_format_from_naga(format: naga::StorageFormat) -> wgt::TextureForm Sf::Rgb10a2Unorm => Tf::Rgb10a2Unorm, Sf::Rg11b10Ufloat => Tf::Rg11b10Ufloat, + Sf::R64Uint => Tf::R64Uint, Sf::Rg32Uint => Tf::Rg32Uint, Sf::Rg32Sint => Tf::Rg32Sint, Sf::Rg32Float => Tf::Rg32Float, @@ -712,6 +714,7 @@ impl NumericType { Tf::Rg8Unorm | Tf::Rg8Snorm | Tf::Rg16Float | Tf::Rg32Float => { (NumericDimension::Vector(Vs::Bi), Scalar::F32) } + Tf::R64Uint => (NumericDimension::Scalar, Scalar::U64), Tf::Rg8Uint | Tf::Rg16Uint | Tf::Rg32Uint => { (NumericDimension::Vector(Vs::Bi), Scalar::U32) } diff --git a/wgpu-hal/src/auxil/dxgi/conv.rs b/wgpu-hal/src/auxil/dxgi/conv.rs index 0f94575df8..a88853de11 100644 --- a/wgpu-hal/src/auxil/dxgi/conv.rs +++ b/wgpu-hal/src/auxil/dxgi/conv.rs @@ -48,6 +48,7 @@ pub fn map_texture_format_failable( Tf::Rgb10a2Uint => DXGI_FORMAT_R10G10B10A2_UINT, Tf::Rgb10a2Unorm => DXGI_FORMAT_R10G10B10A2_UNORM, Tf::Rg11b10Ufloat => DXGI_FORMAT_R11G11B10_FLOAT, + Tf::R64Uint => DXGI_FORMAT_R32G32_UINT, // R64 emulated by R32G32 Tf::Rg32Uint => DXGI_FORMAT_R32G32_UINT, Tf::Rg32Sint => DXGI_FORMAT_R32G32_SINT, Tf::Rg32Float => DXGI_FORMAT_R32G32_FLOAT, diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs index 7859f06e5d..fd02917ff2 100644 --- a/wgpu-hal/src/dx12/adapter.rs +++ b/wgpu-hal/src/dx12/adapter.rs @@ -389,6 +389,13 @@ impl super::Adapter { && features1.Int64ShaderOps.as_bool(), ); + features.set( + wgt::Features::TEXTURE_INT64_ATOMIC, + shader_model >= naga::back::hlsl::ShaderModel::V6_6 + && hr.is_ok() + && features1.Int64ShaderOps.as_bool(), + ); + features.set( wgt::Features::SUBGROUP, shader_model >= naga::back::hlsl::ShaderModel::V6_0 diff --git a/wgpu-hal/src/gles/adapter.rs b/wgpu-hal/src/gles/adapter.rs index 2c3b60e9c3..67ff20ff19 100644 --- a/wgpu-hal/src/gles/adapter.rs +++ b/wgpu-hal/src/gles/adapter.rs @@ -1083,6 +1083,7 @@ impl crate::Adapter for super::Adapter { let texture_float_linear = feature_fn(wgt::Features::FLOAT32_FILTERABLE, filterable); let image_atomic = feature_fn(wgt::Features::TEXTURE_ATOMIC, Tfc::STORAGE_ATOMIC); + let image_64_atomic = feature_fn(wgt::Features::TEXTURE_INT64_ATOMIC, Tfc::STORAGE_ATOMIC); match format { Tf::R8Unorm => filterable_renderable, @@ -1115,6 +1116,7 @@ impl crate::Adapter for super::Adapter { Tf::Rgb10a2Uint => renderable, Tf::Rgb10a2Unorm => filterable_renderable, Tf::Rg11b10Ufloat => filterable | float_renderable, + Tf::R64Uint => image_64_atomic, Tf::Rg32Uint => renderable, Tf::Rg32Sint => renderable, Tf::Rg32Float => unfilterable | float_renderable | texture_float_linear, diff --git a/wgpu-hal/src/gles/conv.rs b/wgpu-hal/src/gles/conv.rs index 2307769411..7348f2f19e 100644 --- a/wgpu-hal/src/gles/conv.rs +++ b/wgpu-hal/src/gles/conv.rs @@ -50,6 +50,7 @@ impl super::AdapterShared { glow::RGB, glow::UNSIGNED_INT_10F_11F_11F_REV, ), + Tf::R64Uint => (glow::RG32UI, glow::RED_INTEGER, glow::UNSIGNED_INT), Tf::Rg32Uint => (glow::RG32UI, glow::RG_INTEGER, glow::UNSIGNED_INT), Tf::Rg32Sint => (glow::RG32I, glow::RG_INTEGER, glow::INT), Tf::Rg32Float => (glow::RG32F, glow::RG, glow::FLOAT), diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs index c9e09497dc..ecff2b7a6a 100644 --- a/wgpu-hal/src/metal/adapter.rs +++ b/wgpu-hal/src/metal/adapter.rs @@ -115,6 +115,12 @@ impl crate::Adapter for super::Adapter { Tfc::empty() }; + let image_64_atomic_if = if pc.int64_atomics { + Tfc::STORAGE_ATOMIC + } else { + Tfc::empty() + }; + // Metal defined pixel format capabilities let all_caps = Tfc::SAMPLED_LINEAR | Tfc::STORAGE_WRITE_ONLY @@ -200,6 +206,12 @@ impl crate::Adapter for super::Adapter { flags.set(Tfc::STORAGE_WRITE_ONLY, pc.format_rg11b10_all); flags } + Tf::R64Uint => { + Tfc::COLOR_ATTACHMENT + | Tfc::STORAGE_WRITE_ONLY + | image_64_atomic_if + | read_write_tier1_if + } Tf::Rg32Uint | Tf::Rg32Sint => { Tfc::COLOR_ATTACHMENT | Tfc::STORAGE_WRITE_ONLY | msaa_count } @@ -927,6 +939,10 @@ impl super::PrivateCapabilities { F::SHADER_INT64_ATOMIC_MIN_MAX, self.int64_atomics && self.msl_version >= MTLLanguageVersion::V2_4, ); + features.set( + F::TEXTURE_INT64_ATOMIC, + self.int64_atomics && self.msl_version >= MTLLanguageVersion::V3_1, + ); features.set( F::TEXTURE_ATOMIC, self.msl_version >= MTLLanguageVersion::V3_1, @@ -1070,6 +1086,8 @@ impl super::PrivateCapabilities { Tf::Rgb10a2Uint => RGB10A2Uint, Tf::Rgb10a2Unorm => RGB10A2Unorm, Tf::Rg11b10Ufloat => RG11B10Float, + // Ruint64 textures are emulated on metal + Tf::R64Uint => RG32Uint, Tf::Rg32Uint => RG32Uint, Tf::Rg32Sint => RG32Sint, Tf::Rg32Float => RG32Float, diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index ccff457fae..fd190fc34a 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -109,6 +109,9 @@ pub struct PhysicalDeviceFeatures { /// Features provided by `VK_KHR_shader_atomic_int64`, promoted to Vulkan 1.2. shader_atomic_int64: Option>, + /// Features provided by `VK_EXT_shader_image_atomic_int64` + shader_image_atomic_int64: Option>, + /// Features provided by `VK_EXT_shader_atomic_float`. shader_atomic_float: Option>, @@ -160,6 +163,9 @@ impl PhysicalDeviceFeatures { if let Some(ref mut feature) = self.shader_atomic_int64 { info = info.push_next(feature); } + if let Some(ref mut feature) = self.shader_image_atomic_int64 { + info = info.push_next(feature); + } if let Some(ref mut feature) = self.shader_atomic_float { info = info.push_next(feature); } @@ -444,6 +450,17 @@ impl PhysicalDeviceFeatures { } else { None }, + shader_image_atomic_int64: if enabled_extensions + .contains(&ext::shader_image_atomic_int64::NAME) + { + let needed = requested_features.intersects(wgt::Features::TEXTURE_INT64_ATOMIC); + Some( + vk::PhysicalDeviceShaderImageAtomicInt64FeaturesEXT::default() + .shader_image_int64_atomics(needed), + ) + } else { + None + }, shader_atomic_float: if enabled_extensions.contains(&ext::shader_atomic_float::NAME) { let needed = requested_features.contains(wgt::Features::SHADER_FLOAT32_ATOMIC); Some( @@ -599,6 +616,16 @@ impl PhysicalDeviceFeatures { ); } + if let Some(ref shader_image_atomic_int64) = self.shader_image_atomic_int64 { + features.set( + F::TEXTURE_INT64_ATOMIC, + shader_image_atomic_int64 + .shader_image_int64_atomics(true) + .shader_image_int64_atomics + != 0, + ); + } + if let Some(ref shader_atomic_float) = self.shader_atomic_float { features.set( F::SHADER_FLOAT32_ATOMIC, @@ -1019,6 +1046,11 @@ impl PhysicalDeviceProperties { extensions.push(khr::shader_atomic_int64::NAME); } + // Require `VK_EXT_shader_image_atomic_int64` if the associated feature was requested + if requested_features.intersects(wgt::Features::TEXTURE_INT64_ATOMIC) { + extensions.push(ext::shader_image_atomic_int64::NAME); + } + // Require `VK_EXT_shader_atomic_float` if the associated feature was requested if requested_features.contains(wgt::Features::SHADER_FLOAT32_ATOMIC) { extensions.push(ext::shader_atomic_float::NAME); @@ -1319,6 +1351,12 @@ impl super::InstanceShared { features2 = features2.push_next(next); } + if capabilities.supports_extension(ext::shader_image_atomic_int64::NAME) { + let next = features + .shader_image_atomic_int64 + .insert(vk::PhysicalDeviceShaderImageAtomicInt64FeaturesEXT::default()); + features2 = features2.push_next(next); + } if capabilities.supports_extension(ext::shader_atomic_float::NAME) { let next = features .shader_atomic_float @@ -1815,11 +1853,16 @@ impl super::Adapter { if features.intersects( wgt::Features::SHADER_INT64_ATOMIC_ALL_OPS - | wgt::Features::SHADER_INT64_ATOMIC_MIN_MAX, + | wgt::Features::SHADER_INT64_ATOMIC_MIN_MAX + | wgt::Features::TEXTURE_INT64_ATOMIC, ) { capabilities.push(spv::Capability::Int64Atomics); } + if features.intersects(wgt::Features::TEXTURE_INT64_ATOMIC) { + capabilities.push(spv::Capability::Int64ImageEXT); + } + if features.contains(wgt::Features::SHADER_FLOAT32_ATOMIC) { capabilities.push(spv::Capability::AtomicFloat32AddEXT); } diff --git a/wgpu-hal/src/vulkan/conv.rs b/wgpu-hal/src/vulkan/conv.rs index 257b5b626f..e72d28d72a 100644 --- a/wgpu-hal/src/vulkan/conv.rs +++ b/wgpu-hal/src/vulkan/conv.rs @@ -37,6 +37,7 @@ impl super::PrivateCapabilities { Tf::Rgb10a2Uint => F::A2B10G10R10_UINT_PACK32, Tf::Rgb10a2Unorm => F::A2B10G10R10_UNORM_PACK32, Tf::Rg11b10Ufloat => F::B10G11R11_UFLOAT_PACK32, + Tf::R64Uint => F::R64_UINT, Tf::Rg32Uint => F::R32G32_UINT, Tf::Rg32Sint => F::R32G32_SINT, Tf::Rg32Float => F::R32G32_SFLOAT, diff --git a/wgpu-info/src/texture.rs b/wgpu-info/src/texture.rs index 2487bf350f..64325f0e5b 100644 --- a/wgpu-info/src/texture.rs +++ b/wgpu-info/src/texture.rs @@ -1,6 +1,6 @@ // Lets keep these on one line #[rustfmt::skip] -pub const TEXTURE_FORMAT_LIST: [wgpu::TextureFormat; 116] = [ +pub const TEXTURE_FORMAT_LIST: [wgpu::TextureFormat; 117] = [ wgpu::TextureFormat::R8Unorm, wgpu::TextureFormat::R8Snorm, wgpu::TextureFormat::R8Uint, @@ -33,6 +33,7 @@ pub const TEXTURE_FORMAT_LIST: [wgpu::TextureFormat; 116] = [ wgpu::TextureFormat::Rgb10a2Uint, wgpu::TextureFormat::Rgb10a2Unorm, wgpu::TextureFormat::Rg11b10Ufloat, + wgpu::TextureFormat::R64Uint, wgpu::TextureFormat::Rg32Uint, wgpu::TextureFormat::Rg32Sint, wgpu::TextureFormat::Rg32Float, diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index d92de24937..52b9210d9a 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -406,7 +406,7 @@ bitflags::bitflags! { /// This is a web and native feature. const FLOAT32_FILTERABLE = 1 << 11; - // Bits 12-19 available for webgpu features. Should you chose to use some of them for + // Bits 12-18 available for webgpu features. Should you chose to use some of them for // for native features, don't forget to update `all_webgpu_mask` and `all_native_mask` // accordingly. @@ -416,6 +416,16 @@ bitflags::bitflags! { // Native Features: // + /// Enables R64Uint image atomic min and max. + /// + /// Supported platforms: + /// - Vulkan (with VK_EXT_shader_image_atomic_int64) + /// - DX12 (with SM 6.6+) + /// - Metal (with MSL 3.1+) + /// + /// This is a native only feature. + const TEXTURE_INT64_ATOMIC = 1 << 18; + /// Allows shaders to use f32 atomic load, store, add, sub, and exchange. /// /// Supported platforms: @@ -995,7 +1005,7 @@ impl Features { /// Mask of all features which are part of the upstream WebGPU standard. #[must_use] pub const fn all_webgpu_mask() -> Self { - Self::from_bits_truncate(0x7FFFF) + Self::from_bits_truncate(0x3FFFF) } /// Mask of all features that are only available when targeting native (not web). @@ -2627,6 +2637,10 @@ pub enum TextureFormat { Rg11b10Ufloat, // Normal 64 bit formats + /// Red channel only. 64 bit integer per channel. Unsigned in shader. + /// + /// [`Features::TEXTURE_INT64_ATOMIC`] must be enabled to use this texture format. + R64Uint, /// Red and green channels. 32 bit integer per channel. Unsigned in shader. Rg32Uint, /// Red and green channels. 32 bit integer per channel. Signed in shader. @@ -2913,6 +2927,7 @@ impl<'de> Deserialize<'de> for TextureFormat { "rgb10a2uint" => TextureFormat::Rgb10a2Uint, "rgb10a2unorm" => TextureFormat::Rgb10a2Unorm, "rg11b10ufloat" => TextureFormat::Rg11b10Ufloat, + "r64uint" => TextureFormat::R64Uint, "rg32uint" => TextureFormat::Rg32Uint, "rg32sint" => TextureFormat::Rg32Sint, "rg32float" => TextureFormat::Rg32Float, @@ -3041,6 +3056,7 @@ impl Serialize for TextureFormat { TextureFormat::Rgb10a2Uint => "rgb10a2uint", TextureFormat::Rgb10a2Unorm => "rgb10a2unorm", TextureFormat::Rg11b10Ufloat => "rg11b10ufloat", + TextureFormat::R64Uint => "r64uint", TextureFormat::Rg32Uint => "rg32uint", TextureFormat::Rg32Sint => "rg32sint", TextureFormat::Rg32Float => "rg32float", @@ -3283,6 +3299,7 @@ impl TextureFormat { | Self::Rgb10a2Uint | Self::Rgb10a2Unorm | Self::Rg11b10Ufloat + | Self::R64Uint | Self::Rg32Uint | Self::Rg32Sint | Self::Rg32Float @@ -3406,6 +3423,8 @@ impl TextureFormat { | Self::Depth24PlusStencil8 | Self::Depth32Float => Features::empty(), + Self::R64Uint => Features::TEXTURE_INT64_ATOMIC, + Self::Depth32FloatStencil8 => Features::DEPTH32FLOAT_STENCIL8, Self::NV12 => Features::TEXTURE_FORMAT_NV12, @@ -3471,11 +3490,12 @@ impl TextureFormat { let storage = basic | TextureUsages::STORAGE_BINDING; let binding = TextureUsages::TEXTURE_BINDING; let all_flags = attachment | storage | binding; - let atomic = if device_features.contains(Features::TEXTURE_ATOMIC) { - all_flags | TextureUsages::STORAGE_ATOMIC + let atomic_64 = if device_features.contains(Features::TEXTURE_ATOMIC) { + storage | binding | TextureUsages::STORAGE_ATOMIC } else { - all_flags + storage | binding }; + let atomic = attachment | atomic_64; let rg11b10f = if device_features.contains(Features::RG11B10UFLOAT_RENDERABLE) { attachment } else { @@ -3522,6 +3542,7 @@ impl TextureFormat { Self::Rgb10a2Uint => ( msaa, attachment), Self::Rgb10a2Unorm => (msaa_resolve, attachment), Self::Rg11b10Ufloat => ( msaa, rg11b10f), + Self::R64Uint => ( s_ro_wo, atomic_64), Self::Rg32Uint => ( s_ro_wo, all_flags), Self::Rg32Sint => ( s_ro_wo, all_flags), Self::Rg32Float => ( s_ro_wo, all_flags), @@ -3647,6 +3668,7 @@ impl TextureFormat { | Self::Rg16Uint | Self::Rgba16Uint | Self::R32Uint + | Self::R64Uint | Self::Rg32Uint | Self::Rgba32Uint | Self::Rgb10a2Uint => Some(uint), @@ -3777,7 +3799,7 @@ impl TextureFormat { | Self::Rgba16Uint | Self::Rgba16Sint | Self::Rgba16Float => Some(8), - Self::Rg32Uint | Self::Rg32Sint | Self::Rg32Float => Some(8), + Self::R64Uint | Self::Rg32Uint | Self::Rg32Sint | Self::Rg32Float => Some(8), Self::Rgba32Uint | Self::Rgba32Sint | Self::Rgba32Float => Some(16), @@ -3871,6 +3893,7 @@ impl TextureFormat { | Self::Rgba16Unorm | Self::Rgba16Snorm | Self::Rgba16Float + | Self::R64Uint | Self::Rg32Uint | Self::Rg32Sint | Self::Rg32Float @@ -3952,6 +3975,7 @@ impl TextureFormat { Self::R32Uint | Self::R32Sint | Self::R32Float + | Self::R64Uint | Self::Rg32Uint | Self::Rg32Sint | Self::Rg32Float @@ -4020,7 +4044,8 @@ impl TextureFormat { | Self::R16Float | Self::R32Uint | Self::R32Sint - | Self::R32Float => 1, + | Self::R32Float + | Self::R64Uint => 1, Self::Rg8Unorm | Self::Rg8Snorm @@ -4274,6 +4299,10 @@ fn texture_format_serialize() { serde_json::to_string(&TextureFormat::Rg11b10Ufloat).unwrap(), "\"rg11b10ufloat\"".to_string() ); + assert_eq!( + serde_json::to_string(&TextureFormat::R64Uint).unwrap(), + "\"r64uint\"".to_string() + ); assert_eq!( serde_json::to_string(&TextureFormat::Rg32Uint).unwrap(), "\"rg32uint\"".to_string() @@ -4570,6 +4599,10 @@ fn texture_format_deserialize() { serde_json::from_str::("\"rg11b10ufloat\"").unwrap(), TextureFormat::Rg11b10Ufloat ); + assert_eq!( + serde_json::from_str::("\"r64uint\"").unwrap(), + TextureFormat::R64Uint + ); assert_eq!( serde_json::from_str::("\"rg32uint\"").unwrap(), TextureFormat::Rg32Uint