diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 088c8177ae..2f7a3cbe23 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -3,8 +3,8 @@ name: CI on: push: branches-ignore: [ - # CI is running on the merge queue, so we don't need to run on trunk at all. - "trunk", + # We don't need to run on dependabot PRs. + "dependabot/**", # This is the branch the merge queue creates. "gh-readonly-queue/**" ] diff --git a/.github/workflows/shaders.yml b/.github/workflows/shaders.yml index 53cd58f65b..c85bcae290 100644 --- a/.github/workflows/shaders.yml +++ b/.github/workflows/shaders.yml @@ -3,8 +3,8 @@ name: Shaders on: push: branches-ignore: [ - # CI is running on the merge queue, so we don't need to run on trunk at all. - "trunk", + # We don't need to run on dependabot PRs. + "dependabot/**", # This is the branch the merge queue creates. "gh-readonly-queue/**" ] diff --git a/CHANGELOG.md b/CHANGELOG.md index ae8754d7be..c17a2c1ff6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -164,6 +164,8 @@ By @wumpf in [#6849](https://github.com/gfx-rs/wgpu/pull/6849). - `DeviceType` and `AdapterInfo` now impl `Hash` by @cwfitzgerald in [#6868](https://github.com/gfx-rs/wgpu/pull/6868) - Add build support for Apple Vision Pro. By @guusw in [#6611](https://github.com/gfx-rs/wgpu/pull/6611). - Add `wgsl_language_features` for obtaining available WGSL language feature by @sagudev in [#6814](https://github.com/gfx-rs/wgpu/pull/6814) +- Image atomic support in shaders. By @atlv24 in [#6706](https://github.com/gfx-rs/wgpu/pull/6706) +- 64 bit image atomic support in shaders. By @atlv24 in [#5537](https://github.com/gfx-rs/wgpu/pull/5537) - Add `no_std` support to `wgpu-types`. By @bushrat011899 in [#6892](https://github.com/gfx-rs/wgpu/pull/6892). ##### Vulkan @@ -176,6 +178,10 @@ By @wumpf in [#6849](https://github.com/gfx-rs/wgpu/pull/6849). - Add build support for Apple Vision Pro. By @guusw in [#6611](https://github.com/gfx-rs/wgpu/pull/6611). - Add `raw_handle` method to access raw Metal textures in [#6894](https://github.com/gfx-rs/wgpu/pull/6894). +#### D3D12 + +- Support DXR (DirectX Ray-tracing) in wgpu-hal. By @Vecvec in [#6777](https://github.com/gfx-rs/wgpu/pull/6777) + #### Changes ##### Naga @@ -185,6 +191,7 @@ By @wumpf in [#6849](https://github.com/gfx-rs/wgpu/pull/6849). - Add a note to help with a common syntax error case for global diagnostic filter directives. By @e-hat in [#6718](https://github.com/gfx-rs/wgpu/pull/6718) - Change arithmetic operations between two i32 variables to wrap on overflow to match WGSL spec. By @matthew-wong1 in [#6835](https://github.com/gfx-rs/wgpu/pull/6835). - Add directives to suggestions in error message for parsing global items. By @e-hat in [#6723](https://github.com/gfx-rs/wgpu/pull/6723). +- Automatic conversion for `override` initializers. By @sagudev in [6920](https://github.com/gfx-rs/wgpu/pull/6920) ##### General @@ -193,6 +200,7 @@ By @wumpf in [#6849](https://github.com/gfx-rs/wgpu/pull/6849). - Add actual sample type to `CreateBindGroupError::InvalidTextureSampleType` error message. By @ErichDonGubler in [#6530](https://github.com/gfx-rs/wgpu/pull/6530). - Improve binding error to give a clearer message when there is a mismatch between resource binding as it is in the shader and as it is in the binding layout. By @eliemichel in [#6553](https://github.com/gfx-rs/wgpu/pull/6553). - `Surface::configure` and `Surface::get_current_texture` are no longer fatal. By @alokedesai in [#6253](https://github.com/gfx-rs/wgpu/pull/6253) +- Rename `BlasTriangleGeometry::index_buffer_offset` to `BlasTriangleGeometry::first_index`. By @Vecvec in [#6873](https://github.com/gfx-rs/wgpu/pull/6873/files) ##### D3D12 @@ -234,6 +242,12 @@ By @wumpf in [#6849](https://github.com/gfx-rs/wgpu/pull/6849). - Fix crash when a texture argument is missing. By @aedm in [#6486](https://github.com/gfx-rs/wgpu/pull/6486) - Emit an error in constant evaluation, rather than crash, in certain cases where `vecN` constructors have less than N arguments. By @ErichDonGubler in [#6508](https://github.com/gfx-rs/wgpu/pull/6508). - Fix an error in template list matching `>=` in `a=c`. By @KentSlaney in [#6898](https://github.com/gfx-rs/wgpu/pull/6898). +- Correctly validate handles in override-sized array types. By @jimblandy in [#6882](https://github.com/gfx-rs/wgpu/pull/6882). +- Clean up validation of `Statement::ImageStore`. By @jimblandy in [#6729](https://github.com/gfx-rs/wgpu-pull/6729). +- In compaction, avoid cloning the type arena. By @jimblandy in [#6790](https://github.com/gfx-rs/wgpu-pull/6790) +- In validation, forbid cycles between global expressions and types. By @jimblandy in [#6800](https://github.com/gfx-rs/wgpu-pull/6800) +- Allow abstract scalars in modf and frexp results. By @jimblandy in [#6821](https://github.com/gfx-rs/wgpu-pull/6821) +- In the WGSL front end, apply automatic conversions to values being assigned. By @jimblandy in [#6822](https://github.com/gfx-rs/wgpu-pull/6822) #### Vulkan diff --git a/Cargo.lock b/Cargo.lock index 6f344382a2..3dadf48a9a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -68,7 +68,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ee91c0c2905bae44f84bfa4e044536541df26b7703fd0888deeb9060fcc44289" dependencies = [ "android-properties", - "bitflags 2.6.0", + "bitflags 2.7.0", "cc", "cesu8", "jni", @@ -307,7 +307,7 @@ version = "0.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f49d8fed880d473ea71efb9bf597651e77201bdd4893efe54c9e5d65ae04ce6f" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.7.0", "cexpr", "clang-sys", "itertools 0.13.0", @@ -359,9 +359,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.6.0" +version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" +checksum = "1be3f42a67d6d345ecd59f675f3f012d6974981560836e938c22b424b85ce1be" dependencies = [ "arbitrary", "serde", @@ -448,7 +448,7 @@ version = "0.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fba7adb4dd5aa98e5553510223000e7148f621165ec5f9acd7113f6ca4995298" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.7.0", "log", "polling", "rustix", @@ -476,9 +476,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.2.7" +version = "1.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a012a0df96dd6d06ba9a1b29d6402d1a5d77c6befd2566afdc26e10603dc93d7" +checksum = "c8293772165d9345bdaaa39b45b2109591e63fe5e6fbc23c6ff930a048aa310b" dependencies = [ "jobserver", "libc", @@ -567,9 +567,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.24" +version = "4.5.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9560b07a799281c7e0958b9296854d6fafd4c5f31444a7e5bb1ad6dde5ccf1bd" +checksum = "a8eb5e908ef3a6efbe1ed62520fb7287959888c88485abe072543190ecc66783" dependencies = [ "clap_builder", "clap_derive", @@ -577,9 +577,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.24" +version = "4.5.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "874e0dd3eb68bf99058751ac9712f622e61e6f393a94f7128fa26e3f02f5c7cd" +checksum = "96b01801b5fc6a0a232407abc821660c9c6d25a1cafc0d4f85f29fb8d9afc121" dependencies = [ "anstream", "anstyle", @@ -668,9 +668,9 @@ dependencies = [ [[package]] name = "const_panic" -version = "0.2.11" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53857514f72ee4a2b583de67401e3ff63a5472ca4acf289d09a9ea7636dfec17" +checksum = "2459fc9262a1aa204eb4b5764ad4f189caec88aea9634389c0a25f8be7f6265e" [[package]] name = "cooked-waker" @@ -995,7 +995,7 @@ dependencies = [ "deno_core", "raw-window-handle 0.6.2", "serde", - "thiserror 2.0.10", + "thiserror 2.0.11", "tokio", "wgpu-core", "wgpu-types", @@ -1151,7 +1151,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" dependencies = [ "libc", - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] @@ -1190,6 +1190,12 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" +[[package]] +name = "fixedbitset" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" + [[package]] name = "flate2" version = "1.0.35" @@ -1326,9 +1332,9 @@ checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" [[package]] name = "futures-lite" -version = "2.5.0" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cef40d21ae2c515b51041df9ed313ed21e572df340ea58a922a0aefe7e8891a1" +checksum = "f5edaec856126859abb19ed65f39e90fea3a9574b9707f13539acf4abf7eb532" dependencies = [ "fastrand", "futures-core", @@ -1464,7 +1470,7 @@ version = "0.31.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "18fcd4ae4e86d991ad1300b8f57166e5be0c95ef1f63f3f5b827f8a164548746" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.7.0", "cfg_aliases 0.1.1", "cgl", "core-foundation", @@ -1514,9 +1520,9 @@ dependencies = [ [[package]] name = "glutin_wgl_sys" -version = "0.6.0" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a4e1951bbd9434a81aa496fe59ccc2235af3820d27b85f9314e279609211e2c" +checksum = "2c4ee00b289aba7a9e5306d57c2d05499b2e5dc427f84ac708bd2c090212cf3e" dependencies = [ "gl_generator", ] @@ -1527,7 +1533,7 @@ version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fbcd2dba93594b227a1f57ee09b8b9da8892c34d55aa332e034a228d0fe6a171" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.7.0", "gpu-alloc-types", ] @@ -1537,7 +1543,7 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "98ff03b468aa837d70984d55f5d3f846f6ec31fe34bbb97c4f85219caeee1ca4" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.7.0", ] [[package]] @@ -1558,7 +1564,7 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dcf29e94d6d243368b7a56caa16bc213e4f9f8ed38c4d9557069527b5d5281ca" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.7.0", "gpu-descriptor-types", "hashbrown", ] @@ -1569,7 +1575,7 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fdf242682df893b86f33a73828fb09ca4b2d3bb6cc95249707fc684d27484b91" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.7.0", ] [[package]] @@ -1966,7 +1972,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34" dependencies = [ "cfg-if", - "windows-targets 0.52.6", + "windows-targets 0.48.5", ] [[package]] @@ -1975,7 +1981,7 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.7.0", "libc", "redox_syscall 0.5.8", ] @@ -2101,7 +2107,7 @@ name = "metal" version = "0.30.0" source = "git+https://github.com/gfx-rs/metal-rs.git?rev=ef768ff9d7#ef768ff9d742ae6a0f4e83ddc8031264e7d460c4" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.7.0", "block", "core-graphics-types", "foreign-types", @@ -2163,7 +2169,7 @@ dependencies = [ "arbitrary", "arrayvec", "bit-set 0.8.0", - "bitflags 2.6.0", + "bitflags 2.7.0", "cfg_aliases 0.2.1", "codespan-reporting", "diff", @@ -2173,7 +2179,7 @@ dependencies = [ "indexmap", "itertools 0.13.0", "log", - "petgraph", + "petgraph 0.7.1", "pp-rs", "ron", "rspirv", @@ -2182,7 +2188,7 @@ dependencies = [ "spirv 0.3.0+sdk-1.3.268.0", "strum 0.26.3", "termcolor", - "thiserror 2.0.10", + "thiserror 2.0.11", "unicode-xid", ] @@ -2239,7 +2245,7 @@ version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2076a31b7010b17a38c01907c45b945e8f11495ee4dd588309718901b1f7a5b7" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.7.0", "jni-sys", "log", "ndk-sys", @@ -2490,7 +2496,7 @@ dependencies = [ "backtrace", "cfg-if", "libc", - "petgraph", + "petgraph 0.6.5", "redox_syscall 0.5.8", "smallvec", "thread-id", @@ -2515,7 +2521,17 @@ version = "0.6.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" dependencies = [ - "fixedbitset", + "fixedbitset 0.4.2", + "indexmap", +] + +[[package]] +name = "petgraph" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" +dependencies = [ + "fixedbitset 0.5.7", "indexmap", ] @@ -2656,9 +2672,9 @@ checksum = "e8cf8e6a8aa66ce33f63993ffc4ea4271eb5b0530a9002db8455ea6050c77bfa" [[package]] name = "prettyplease" -version = "0.2.27" +version = "0.2.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "483f8c21f64f3ea09fe0f30f5d48c3e8eefe5dac9129f0075f76593b4c1da705" +checksum = "6924ced06e1f7dfe3fa48d57b9f74f55d8915f5036121bef647ef4b204895fac" dependencies = [ "proc-macro2", "syn", @@ -2698,9 +2714,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.92" +version = "1.0.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" +checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99" dependencies = [ "unicode-ident", ] @@ -2812,7 +2828,7 @@ version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "03a862b389f93e68874fbf580b9de08dd02facb9a788ebadaf4a3fd33cf58834" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.7.0", ] [[package]] @@ -2872,7 +2888,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b91f7eff05f748767f183df4320a63d6936e9c6107d97c9e6bdd9784f4289c94" dependencies = [ "base64", - "bitflags 2.6.0", + "bitflags 2.7.0", "serde", "serde_derive", ] @@ -2919,11 +2935,11 @@ version = "0.38.43" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a78891ee6bf2340288408954ac787aa063d8e8817e9f53abb37c695c6d834ef6" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.7.0", "errno", "libc", "linux-raw-sys", - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] @@ -3111,7 +3127,7 @@ version = "0.18.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "922fd3eeab3bd820d76537ce8f582b1cf951eceb5475c28500c7457d9d17f53a" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.7.0", "calloop", "calloop-wayland-source", "cursor-icon", @@ -3191,7 +3207,7 @@ version = "0.3.0+sdk-1.3.268.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eda41003dc44290527a59b13432d4a0379379fa074b70174882adfbdfd917844" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.7.0", "serde", ] @@ -3271,9 +3287,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.95" +version = "2.0.96" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46f71c0377baf4ef1cc3e3402ded576dccc315800fbc62dfc7fe04b009773b4a" +checksum = "d5d0adab1ae378d7f53bdebc67a39f1f151407ef230f0ce2883572f5d8985c80" dependencies = [ "proc-macro2", "quote", @@ -3323,11 +3339,11 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.10" +version = "2.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3ac7f54ca534db81081ef1c1e7f6ea8a3ef428d2fc069097c079443d24124d3" +checksum = "d452f284b73e6d76dd36758a0c8684b1d5be31f92b89d07fd5822175732206fc" dependencies = [ - "thiserror-impl 2.0.10", + "thiserror-impl 2.0.11", ] [[package]] @@ -3343,9 +3359,9 @@ dependencies = [ [[package]] name = "thiserror-impl" -version = "2.0.10" +version = "2.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e9465d30713b56a37ede7185763c3492a91be2f5fa68d958c44e41ab9248beb" +checksum = "26afc1baea8a989337eeb52b6e72a039780ce45c3edfcc9c5b9d112feeb173c2" dependencies = [ "proc-macro2", "quote", @@ -3547,7 +3563,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69fff37da548239c3bf9e64a12193d261e8b22b660991c6fd2df057c168f435f" dependencies = [ "cc", - "windows-targets 0.52.6", + "windows-targets 0.48.5", ] [[package]] @@ -3686,9 +3702,9 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.11.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8c5f0a0af699448548ad1a2fbf920fb4bee257eae39953ba95cb84891a0446a" +checksum = "b913a3b5fe84142e269d63cc62b64319ccaf89b748fc31fe025177f767a756c4" dependencies = [ "getrandom", "serde", @@ -3701,7 +3717,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a511192602f7b435b0a241c1947aa743eb7717f20a9195f4b5e8ed1952e01db1" dependencies = [ "bindgen", - "bitflags 2.6.0", + "bitflags 2.7.0", "fslock", "gzip-header", "home", @@ -3866,7 +3882,7 @@ version = "0.31.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b66249d3fc69f76fd74c82cc319300faa554e9d865dab1f7cd66cc20db10b280" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.7.0", "rustix", "wayland-backend", "wayland-scanner", @@ -3878,7 +3894,7 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "625c5029dbd43d25e6aa9615e88b829a5cad13b2819c4ae129fdbb7c31ab4c7e" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.7.0", "cursor-icon", "wayland-backend", ] @@ -3900,7 +3916,7 @@ version = "0.31.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f81f365b8b4a97f422ac0e8737c438024b5951734506b0e1d775c73030561f4" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.7.0", "wayland-backend", "wayland-client", "wayland-scanner", @@ -3912,7 +3928,7 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "23803551115ff9ea9bce586860c5c5a971e360825a0309264102a9495a5ff479" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.7.0", "wayland-backend", "wayland-client", "wayland-protocols", @@ -3925,7 +3941,7 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ad1f61b76b6c2d8742e10f9ba5c3737f6530b4c243132c2a2ccc8aa96fe25cd6" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.7.0", "wayland-backend", "wayland-client", "wayland-protocols", @@ -3980,7 +3996,7 @@ name = "wgpu" version = "23.0.1" dependencies = [ "arrayvec", - "bitflags 2.6.0", + "bitflags 2.7.0", "cfg_aliases 0.2.1", "document-features", "js-sys", @@ -4022,7 +4038,7 @@ version = "23.0.1" dependencies = [ "arrayvec", "bit-vec 0.8.0", - "bitflags 2.6.0", + "bitflags 2.7.0", "bytemuck", "cfg_aliases 0.2.1", "document-features", @@ -4037,7 +4053,7 @@ dependencies = [ "rustc-hash", "serde", "smallvec", - "thiserror 2.0.10", + "thiserror 2.0.11", "wgpu-hal", "wgpu-types", ] @@ -4081,7 +4097,7 @@ dependencies = [ "arrayvec", "ash", "bit-set 0.8.0", - "bitflags 2.6.0", + "bitflags 2.7.0", "block", "bytemuck", "cfg-if", @@ -4092,7 +4108,7 @@ dependencies = [ "glow", "glutin", "glutin-winit", - "glutin_wgl_sys 0.6.0", + "glutin_wgl_sys 0.6.1", "gpu-alloc", "gpu-allocator", "gpu-descriptor", @@ -4116,7 +4132,7 @@ dependencies = [ "renderdoc-sys", "rustc-hash", "smallvec", - "thiserror 2.0.10", + "thiserror 2.0.11", "wasm-bindgen", "web-sys", "wgpu-types", @@ -4130,7 +4146,7 @@ name = "wgpu-info" version = "23.0.1" dependencies = [ "anyhow", - "bitflags 2.6.0", + "bitflags 2.7.0", "env_logger", "pico-args", "serde", @@ -4153,7 +4169,7 @@ version = "23.0.1" dependencies = [ "anyhow", "arrayvec", - "bitflags 2.6.0", + "bitflags 2.7.0", "bytemuck", "cfg-if", "console_log", @@ -4188,7 +4204,7 @@ dependencies = [ name = "wgpu-types" version = "23.0.0" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.7.0", "js-sys", "serde", "serde_json", @@ -4241,7 +4257,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.48.0", ] [[package]] @@ -4537,7 +4553,7 @@ dependencies = [ "ahash", "android-activity", "atomic-waker", - "bitflags 2.6.0", + "bitflags 2.7.0", "bytemuck", "calloop", "cfg_aliases 0.1.1", @@ -4579,9 +4595,9 @@ dependencies = [ [[package]] name = "winnow" -version = "0.6.22" +version = "0.6.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39281189af81c07ec09db316b302a3e67bf9bd7cbf6c820b50e35fee9c2fa980" +checksum = "c8d71a593cc5c42ad7876e2c1fda56f314f3754c084128833e64f1345ff8a03a" dependencies = [ "memchr", ] @@ -4657,7 +4673,7 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d039de8032a9a8856a6be89cea3e5d12fdd82306ab7c94d74e6deab2460651c5" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.7.0", "dlib", "log", "once_cell", diff --git a/Cargo.toml b/Cargo.toml index ce47485df9..e4f0d474be 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -79,8 +79,8 @@ argh = "0.1.13" arrayvec = "0.7" bincode = "1" bit-vec = "0.8" -bitflags = "2.6" -bytemuck = { version = "1.21" } +bitflags = "2.7" +bytemuck = { version = "1.21", features = ["derive", "min_const_generics"] } cfg_aliases = "0.2.1" cfg-if = "1" criterion = "0.5" @@ -134,7 +134,13 @@ strum = { version = "0.26.0", features = ["derive"] } trybuild = "1" tracy-client = "0.17" thiserror = "2" -wgpu = { version = "23.0.1", path = "./wgpu", default-features = false } +wgpu = { version = "23.0.1", path = "./wgpu", default-features = false, features = [ + "serde", + "wgsl", + "dx12", + "metal", + "static-dxc", +] } wgpu-core = { version = "23.0.1", path = "./wgpu-core" } wgpu-macros = { version = "23.0.0", path = "./wgpu-macros" } wgpu-test = { version = "23.0.0", path = "./tests" } diff --git a/benches/Cargo.toml b/benches/Cargo.toml index d3e4966502..d00cecf62d 100644 --- a/benches/Cargo.toml +++ b/benches/Cargo.toml @@ -47,4 +47,4 @@ pollster.workspace = true profiling.workspace = true rayon.workspace = true tracy-client = { workspace = true, optional = true } -wgpu = { workspace = true, features = ["wgsl", "metal", "dx12"] } +wgpu.workspace = true diff --git a/benches/benches/bind_groups.rs b/benches/benches/bind_groups.rs index 762f8126bc..0f707db04e 100644 --- a/benches/benches/bind_groups.rs +++ b/benches/benches/bind_groups.rs @@ -62,52 +62,52 @@ impl BindGroupState { fn run_bench(ctx: &mut Criterion) { let state = LazyLock::new(BindGroupState::new); - if !state - .device_state - .device - .features() - .contains(wgpu::Features::TEXTURE_BINDING_ARRAY) - { - return; - } - let mut group = ctx.benchmark_group("Bind Group Creation"); for count in [5, 50, 500, 5_000, 50_000] { - if count - > state - .device_state - .device - .limits() - .max_sampled_textures_per_shader_stage - { - continue; - } - - let bind_group_layout = - state - .device_state - .device - .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { - label: None, - entries: &[wgpu::BindGroupLayoutEntry { - binding: 0, - visibility: wgpu::ShaderStages::FRAGMENT, - ty: wgpu::BindingType::Texture { - sample_type: wgpu::TextureSampleType::Float { filterable: true }, - view_dimension: wgpu::TextureViewDimension::D2, - multisampled: false, - }, - count: Some(NonZeroU32::new(count).unwrap()), - }], - }); - group.throughput(Throughput::Elements(count as u64)); group.bench_with_input( format!("{} Element Bind Group", count), &count, |b, &count| { b.iter_custom(|iters| { + if !state + .device_state + .device + .features() + .contains(wgpu::Features::TEXTURE_BINDING_ARRAY) + { + return Duration::ZERO; + } + + if count + > state + .device_state + .device + .limits() + .max_sampled_textures_per_shader_stage + { + return Duration::ZERO; + } + + let bind_group_layout = state.device_state.device.create_bind_group_layout( + &wgpu::BindGroupLayoutDescriptor { + label: None, + entries: &[wgpu::BindGroupLayoutEntry { + binding: 0, + visibility: wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Texture { + sample_type: wgpu::TextureSampleType::Float { + filterable: true, + }, + view_dimension: wgpu::TextureViewDimension::D2, + multisampled: false, + }, + count: Some(NonZeroU32::new(count).unwrap()), + }], + }, + ); + let texture_view_refs: Vec<_> = state.texture_views.iter().take(count as usize).collect(); diff --git a/benches/benches/computepass.rs b/benches/benches/computepass.rs index 1ac14c092a..300e461f30 100644 --- a/benches/benches/computepass.rs +++ b/benches/benches/computepass.rs @@ -550,7 +550,7 @@ fn run_bench(ctx: &mut Criterion) { // Need bindless to run this benchmark if state.bindless_bind_group.is_none() { - return Duration::from_secs_f32(1.0); + return Duration::from_secs(1); } let mut duration = Duration::ZERO; diff --git a/benches/benches/renderpass.rs b/benches/benches/renderpass.rs index 5471fb94c4..156a41d0d1 100644 --- a/benches/benches/renderpass.rs +++ b/benches/benches/renderpass.rs @@ -457,7 +457,7 @@ fn run_bench(ctx: &mut Criterion) { // This benchmark hangs on Apple Paravirtualized GPUs. No idea why. if state.device_state.adapter_info.name.contains("Paravirtual") { - return Duration::from_secs_f32(1.0); + return Duration::from_secs(1); } let mut duration = Duration::ZERO; diff --git a/examples/Cargo.toml b/examples/Cargo.toml index 02fb524a1c..1bef728f3d 100644 --- a/examples/Cargo.toml +++ b/examples/Cargo.toml @@ -29,7 +29,7 @@ webgl = ["wgpu/webgl"] webgpu = ["wgpu/webgpu"] [dependencies] -bytemuck = { workspace = true, features = ["derive"] } +bytemuck.workspace = true cfg-if.workspace = true encase = { workspace = true, features = ["glam"] } flume.workspace = true @@ -43,11 +43,7 @@ obj.workspace = true png.workspace = true pollster.workspace = true web-time.workspace = true -wgpu = { workspace = true, default-features = false, features = [ - "wgsl", - "dx12", - "metal", -] } +wgpu.workspace = true winit.workspace = true [dev-dependencies] diff --git a/examples/src/ray_cube_compute/mod.rs b/examples/src/ray_cube_compute/mod.rs index 801b4796ed..743ca17650 100644 --- a/examples/src/ray_cube_compute/mod.rs +++ b/examples/src/ray_cube_compute/mod.rs @@ -141,7 +141,6 @@ struct Example { impl crate::framework::Example for Example { fn required_features() -> wgpu::Features { wgpu::Features::TEXTURE_BINDING_ARRAY - | wgpu::Features::STORAGE_RESOURCE_BINDING_ARRAY | wgpu::Features::VERTEX_WRITABLE_STORAGE | wgpu::Features::EXPERIMENTAL_RAY_QUERY | wgpu::Features::EXPERIMENTAL_RAY_TRACING_ACCELERATION_STRUCTURE @@ -378,7 +377,7 @@ impl crate::framework::Example for Example { first_vertex: 0, vertex_stride: mem::size_of::() as u64, index_buffer: Some(&index_buf), - index_buffer_offset: Some(0), + first_index: Some(0), transform_buffer: None, transform_buffer_offset: None, }, diff --git a/examples/src/ray_cube_fragment/mod.rs b/examples/src/ray_cube_fragment/mod.rs index b9dfba9a16..9ebf36fb32 100644 --- a/examples/src/ray_cube_fragment/mod.rs +++ b/examples/src/ray_cube_fragment/mod.rs @@ -248,7 +248,7 @@ impl crate::framework::Example for Example { first_vertex: 0, vertex_stride: mem::size_of::() as u64, index_buffer: Some(&index_buf), - index_buffer_offset: Some(0), + first_index: Some(0), transform_buffer: None, transform_buffer_offset: None, }, diff --git a/examples/src/ray_scene/mod.rs b/examples/src/ray_scene/mod.rs index 886d0c0183..1d681b064e 100644 --- a/examples/src/ray_scene/mod.rs +++ b/examples/src/ray_scene/mod.rs @@ -264,7 +264,7 @@ fn upload_scene_components( first_vertex: vertex_range.start as u32, vertex_stride: mem::size_of::() as u64, index_buffer: Some(&indices), - index_buffer_offset: Some(scene.geometries[i].0.start as u64 * 4), + first_index: Some(scene.geometries[i].0.start as u32), transform_buffer: None, transform_buffer_offset: None, }) diff --git a/examples/src/ray_shadows/mod.rs b/examples/src/ray_shadows/mod.rs index adf25cd454..d605662283 100644 --- a/examples/src/ray_shadows/mod.rs +++ b/examples/src/ray_shadows/mod.rs @@ -258,7 +258,7 @@ impl crate::framework::Example for Example { first_vertex: 0, vertex_stride: mem::size_of::() as u64, index_buffer: Some(&index_buf), - index_buffer_offset: Some(0), + first_index: Some(0), transform_buffer: None, transform_buffer_offset: None, }, diff --git a/examples/src/ray_traced_triangle/mod.rs b/examples/src/ray_traced_triangle/mod.rs index d508e6113e..900aafea81 100644 --- a/examples/src/ray_traced_triangle/mod.rs +++ b/examples/src/ray_traced_triangle/mod.rs @@ -216,9 +216,9 @@ impl crate::framework::Example for Example { vertex_buffer: &vertex_buffer, first_vertex: 0, vertex_stride: mem::size_of::<[f32; 3]>() as wgpu::BufferAddress, - // in this case since one triangle gets no compression from an index buffer `index_buffer` and `index_buffer_offset` could be `None`. + // in this case since one triangle gets no compression from an index buffer `index_buffer` and `first_index` could be `None`. index_buffer: Some(&index_buffer), - index_buffer_offset: Some(0), + first_index: Some(0), transform_buffer: None, transform_buffer_offset: None, }]), diff --git a/naga/Cargo.toml b/naga/Cargo.toml index 3f754a51c5..e743a8d4e6 100644 --- a/naga/Cargo.toml +++ b/naga/Cargo.toml @@ -82,8 +82,7 @@ serde = { version = "1.0.217", features = [ "default", "derive", ], optional = true } -# Hold on updating to 0.7 until https://github.com/petgraph/petgraph/pull/714 is on crates.io -petgraph = { version = "0.6", optional = true } +petgraph = { version = "0.7", optional = true } pp-rs = { version = "0.2.1", optional = true } hexf-parse = { version = "0.2.1", optional = true } unicode-xid = { version = "0.2.6", optional = true } diff --git a/naga/src/back/dot/mod.rs b/naga/src/back/dot/mod.rs index 2780879657..e44e8d8eae 100644 --- a/naga/src/back/dot/mod.rs +++ b/naga/src/back/dot/mod.rs @@ -254,6 +254,21 @@ impl StatementGraph { } "Atomic" } + S::ImageAtomic { + image, + coordinate, + array_index, + fun: _, + value, + } => { + self.dependencies.push((id, image, "image")); + self.dependencies.push((id, coordinate, "coordinate")); + if let Some(expr) = array_index { + self.dependencies.push((id, expr, "array_index")); + } + self.dependencies.push((id, value, "value")); + "ImageAtomic" + } S::WorkGroupUniformLoad { pointer, result } => { self.emits.push((id, result)); self.dependencies.push((id, pointer, "pointer")); diff --git a/naga/src/back/glsl/features.rs b/naga/src/back/glsl/features.rs index 0065db2f58..bef54bd4f8 100644 --- a/naga/src/back/glsl/features.rs +++ b/naga/src/back/glsl/features.rs @@ -52,6 +52,8 @@ bitflags::bitflags! { const TEXTURE_SHADOW_LOD = 1 << 23; /// Subgroup operations const SUBGROUP_OPERATIONS = 1 << 24; + /// Image atomics + const TEXTURE_ATOMICS = 1 << 25; } } @@ -120,6 +122,7 @@ impl FeaturesManager { check_feature!(DYNAMIC_ARRAY_SIZE, 430, 310); check_feature!(DUAL_SOURCE_BLENDING, 330, 300 /* with extension */); check_feature!(SUBGROUP_OPERATIONS, 430, 310); + check_feature!(TEXTURE_ATOMICS, 420, 310); match version { Version::Embedded { is_webgl: true, .. } => check_feature!(MULTI_VIEW, 140, 300), _ => check_feature!(MULTI_VIEW, 140, 310), @@ -278,6 +281,11 @@ impl FeaturesManager { )?; } + if self.0.contains(Features::TEXTURE_ATOMICS) { + // https://www.khronos.org/registry/OpenGL/extensions/OES/OES_shader_image_atomic.txt + writeln!(out, "#extension GL_OES_shader_image_atomic : require")?; + } + Ok(()) } } @@ -400,6 +408,7 @@ impl Writer<'_, W> { | StorageFormat::Rgb10a2Uint | StorageFormat::Rgb10a2Unorm | StorageFormat::Rg11b10Ufloat + | StorageFormat::R64Uint | StorageFormat::Rg32Uint | StorageFormat::Rg32Sint | StorageFormat::Rg32Float => { @@ -546,6 +555,22 @@ impl Writer<'_, W> { } } + for blocks in module + .functions + .iter() + .map(|(_, f)| &f.body) + .chain(std::iter::once(&entry_point.function.body)) + { + for (stmt, _) in blocks.span_iter() { + match *stmt { + crate::Statement::ImageAtomic { .. } => { + features.request(Features::TEXTURE_ATOMICS) + } + _ => {} + } + } + } + self.features.check_availability(self.options.version) } diff --git a/naga/src/back/glsl/mod.rs b/naga/src/back/glsl/mod.rs index 83aeeebdd3..b058ae5ee8 100644 --- a/naga/src/back/glsl/mod.rs +++ b/naga/src/back/glsl/mod.rs @@ -2475,6 +2475,17 @@ impl<'a, W: Write> Writer<'a, W> { self.write_expr(value, ctx)?; writeln!(self.out, ");")?; } + // Stores a value into an image. + Statement::ImageAtomic { + image, + coordinate, + array_index, + fun, + value, + } => { + write!(self.out, "{level}")?; + self.write_image_atomic(ctx, image, coordinate, array_index, fun, value)? + } Statement::RayQuery { .. } => unreachable!(), Statement::SubgroupBallot { result, predicate } => { write!(self.out, "{level}")?; @@ -4137,6 +4148,56 @@ impl<'a, W: Write> Writer<'a, W> { Ok(()) } + /// Helper method to write the `ImageAtomic` statement + fn write_image_atomic( + &mut self, + ctx: &back::FunctionCtx, + image: Handle, + coordinate: Handle, + array_index: Option>, + fun: crate::AtomicFunction, + value: Handle, + ) -> Result<(), Error> { + use crate::ImageDimension as IDim; + + // NOTE: openGL requires that `imageAtomic`s have no effects when the texel is invalid + // so we don't need to generate bounds checks (OpenGL 4.2 Core ยง3.9.20) + + // This will only panic if the module is invalid + let dim = match *ctx.resolve_type(image, &self.module.types) { + TypeInner::Image { dim, .. } => dim, + _ => unreachable!(), + }; + + // Begin our call to `imageAtomic` + let fun_str = fun.to_glsl(); + write!(self.out, "imageAtomic{fun_str}(")?; + self.write_expr(image, ctx)?; + // Separate the image argument from the coordinates + write!(self.out, ", ")?; + + // openGL es doesn't have 1D images so we need workaround it + let tex_1d_hack = dim == IDim::D1 && self.options.version.is_es(); + // Write the coordinate vector + self.write_texture_coord( + ctx, + // Get the size of the coordinate vector + self.get_coordinate_vector_size(dim, false), + coordinate, + array_index, + tex_1d_hack, + )?; + + // Separate the coordinate from the value to write and write the expression + // of the value to write. + write!(self.out, ", ")?; + self.write_expr(value, ctx)?; + // End the call to `imageAtomic` and the statement. + writeln!(self.out, ");")?; + + Ok(()) + } + /// Helper method for writing an `ImageLoad` expression. #[allow(clippy::too_many_arguments)] fn write_image_load( @@ -4533,6 +4594,9 @@ impl<'a, W: Write> Writer<'a, W> { /// they can only be used to query information about the resource which isn't what /// we want here so when storage access is both `LOAD` and `STORE` add no modifiers fn write_storage_access(&mut self, storage_access: crate::StorageAccess) -> BackendResult { + if storage_access.contains(crate::StorageAccess::ATOMIC) { + return Ok(()); + } if !storage_access.contains(crate::StorageAccess::STORE) { write!(self.out, "readonly ")?; } @@ -4880,6 +4944,7 @@ fn glsl_storage_format(format: crate::StorageFormat) -> Result<&'static str, Err Sf::Rgb10a2Uint => "rgb10_a2ui", Sf::Rgb10a2Unorm => "rgb10_a2", Sf::Rg11b10Ufloat => "r11f_g11f_b10f", + Sf::R64Uint => "r64ui", Sf::Rg32Uint => "rg32ui", Sf::Rg32Sint => "rg32i", Sf::Rg32Float => "rg32f", diff --git a/naga/src/back/hlsl/conv.rs b/naga/src/back/hlsl/conv.rs index 83c7667eab..9573fce2a8 100644 --- a/naga/src/back/hlsl/conv.rs +++ b/naga/src/back/hlsl/conv.rs @@ -125,6 +125,7 @@ impl crate::StorageFormat { Self::R8Snorm | Self::R16Snorm => "snorm float", Self::R8Uint | Self::R16Uint | Self::R32Uint => "uint", Self::R8Sint | Self::R16Sint | Self::R32Sint => "int", + Self::R64Uint => "uint64_t", Self::Rg16Float | Self::Rg32Float => "float2", Self::Rg8Unorm | Self::Rg16Unorm => "unorm float2", diff --git a/naga/src/back/hlsl/help.rs b/naga/src/back/hlsl/help.rs index 347addd67e..f63c9d2cfd 100644 --- a/naga/src/back/hlsl/help.rs +++ b/naga/src/back/hlsl/help.rs @@ -841,6 +841,9 @@ impl super::Writer<'_, W> { &crate::PredeclaredType::AtomicCompareExchangeWeakResult { .. } => {} } } + if module.special_types.ray_desc.is_some() { + self.write_ray_desc_from_ray_desc_constructor_function(module)?; + } Ok(()) } @@ -852,16 +855,30 @@ impl super::Writer<'_, W> { expressions: &crate::Arena, ) -> BackendResult { for (handle, _) in expressions.iter() { - if let crate::Expression::Compose { ty, .. } = expressions[handle] { - match module.types[ty].inner { - crate::TypeInner::Struct { .. } | crate::TypeInner::Array { .. } => { - let constructor = WrappedConstructor { ty }; - if self.wrapped.constructors.insert(constructor) { - self.write_wrapped_constructor_function(module, constructor)?; + match expressions[handle] { + crate::Expression::Compose { ty, .. } => { + match module.types[ty].inner { + crate::TypeInner::Struct { .. } | crate::TypeInner::Array { .. } => { + let constructor = WrappedConstructor { ty }; + if self.wrapped.constructors.insert(constructor) { + self.write_wrapped_constructor_function(module, constructor)?; + } + } + _ => {} + }; + } + crate::Expression::RayQueryGetIntersection { committed, .. } => { + if committed { + if !self.written_committed_intersection { + self.write_committed_intersection_function(module)?; + self.written_committed_intersection = true; } + } else if !self.written_candidate_intersection { + self.write_candidate_intersection_function(module)?; + self.written_candidate_intersection = true; } - _ => {} - }; + } + _ => {} } } Ok(()) diff --git a/naga/src/back/hlsl/keywords.rs b/naga/src/back/hlsl/keywords.rs index 2cb715c42c..c15e17636c 100644 --- a/naga/src/back/hlsl/keywords.rs +++ b/naga/src/back/hlsl/keywords.rs @@ -814,6 +814,7 @@ pub const RESERVED: &[&str] = &[ "TextureBuffer", "ConstantBuffer", "RayQuery", + "RayDesc", // Naga utilities super::writer::MODF_FUNCTION, super::writer::FREXP_FUNCTION, diff --git a/naga/src/back/hlsl/mod.rs b/naga/src/back/hlsl/mod.rs index fe7d4f6d67..dcce866bac 100644 --- a/naga/src/back/hlsl/mod.rs +++ b/naga/src/back/hlsl/mod.rs @@ -101,6 +101,7 @@ accessing individual columns by dynamic index. mod conv; mod help; mod keywords; +mod ray; mod storage; mod writer; @@ -331,6 +332,8 @@ pub struct Writer<'a, W> { /// Set of expressions that have associated temporary variables named_expressions: crate::NamedExpressions, wrapped: Wrapped, + written_committed_intersection: bool, + written_candidate_intersection: bool, continue_ctx: back::continue_forward::ContinueCtx, /// A reference to some part of a global variable, lowered to a series of diff --git a/naga/src/back/hlsl/ray.rs b/naga/src/back/hlsl/ray.rs new file mode 100644 index 0000000000..ab57f06a6c --- /dev/null +++ b/naga/src/back/hlsl/ray.rs @@ -0,0 +1,163 @@ +use crate::back::hlsl::BackendResult; +use crate::{RayQueryIntersection, TypeInner}; +use std::fmt::Write; + +impl super::Writer<'_, W> { + // constructs hlsl RayDesc from wgsl RayDesc + pub(super) fn write_ray_desc_from_ray_desc_constructor_function( + &mut self, + module: &crate::Module, + ) -> BackendResult { + write!(self.out, "RayDesc RayDescFromRayDesc_(")?; + self.write_type(module, module.special_types.ray_desc.unwrap())?; + writeln!(self.out, " arg0) {{")?; + writeln!(self.out, " RayDesc ret = (RayDesc)0;")?; + writeln!(self.out, " ret.Origin = arg0.origin;")?; + writeln!(self.out, " ret.TMin = arg0.tmin;")?; + writeln!(self.out, " ret.Direction = arg0.dir;")?; + writeln!(self.out, " ret.TMax = arg0.tmax;")?; + writeln!(self.out, " return ret;")?; + writeln!(self.out, "}}")?; + writeln!(self.out)?; + Ok(()) + } + pub(super) fn write_committed_intersection_function( + &mut self, + module: &crate::Module, + ) -> BackendResult { + self.write_type(module, module.special_types.ray_intersection.unwrap())?; + write!(self.out, " GetCommittedIntersection(")?; + self.write_value_type(module, &TypeInner::RayQuery)?; + writeln!(self.out, " rq) {{")?; + write!(self.out, " ")?; + self.write_type(module, module.special_types.ray_intersection.unwrap())?; + write!(self.out, " ret = (")?; + self.write_type(module, module.special_types.ray_intersection.unwrap())?; + writeln!(self.out, ")0;")?; + writeln!(self.out, " ret.kind = rq.CommittedStatus();")?; + writeln!( + self.out, + " if( rq.CommittedStatus() == COMMITTED_NOTHING) {{}} else {{" + )?; + writeln!(self.out, " ret.t = rq.CommittedRayT();")?; + writeln!( + self.out, + " ret.instance_custom_index = rq.CommittedInstanceID();" + )?; + writeln!( + self.out, + " ret.instance_id = rq.CommittedInstanceIndex();" + )?; + writeln!( + self.out, + " ret.sbt_record_offset = rq.CommittedInstanceContributionToHitGroupIndex();" + )?; + writeln!( + self.out, + " ret.geometry_index = rq.CommittedGeometryIndex();" + )?; + writeln!( + self.out, + " ret.primitive_index = rq.CommittedPrimitiveIndex();" + )?; + writeln!( + self.out, + " if( rq.CommittedStatus() == COMMITTED_TRIANGLE_HIT ) {{" + )?; + writeln!( + self.out, + " ret.barycentrics = rq.CommittedTriangleBarycentrics();" + )?; + writeln!( + self.out, + " ret.front_face = rq.CommittedTriangleFrontFace();" + )?; + writeln!(self.out, " }}")?; + writeln!( + self.out, + " ret.object_to_world = rq.CommittedObjectToWorld4x3();" + )?; + writeln!( + self.out, + " ret.world_to_object = rq.CommittedWorldToObject4x3();" + )?; + writeln!(self.out, " }}")?; + writeln!(self.out, " return ret;")?; + writeln!(self.out, "}}")?; + writeln!(self.out)?; + Ok(()) + } + pub(super) fn write_candidate_intersection_function( + &mut self, + module: &crate::Module, + ) -> BackendResult { + self.write_type(module, module.special_types.ray_intersection.unwrap())?; + write!(self.out, " GetCandidateIntersection(")?; + self.write_value_type(module, &TypeInner::RayQuery)?; + writeln!(self.out, " rq) {{")?; + write!(self.out, " ")?; + self.write_type(module, module.special_types.ray_intersection.unwrap())?; + write!(self.out, " ret = (")?; + self.write_type(module, module.special_types.ray_intersection.unwrap())?; + writeln!(self.out, ")0;")?; + writeln!(self.out, " CANDIDATE_TYPE kind = rq.CandidateType();")?; + writeln!( + self.out, + " if (kind == CANDIDATE_NON_OPAQUE_TRIANGLE) {{" + )?; + writeln!( + self.out, + " ret.kind = {};", + RayQueryIntersection::Triangle as u32 + )?; + writeln!(self.out, " ret.t = rq.CandidateTriangleRayT();")?; + writeln!( + self.out, + " ret.barycentrics = rq.CandidateTriangleBarycentrics();" + )?; + writeln!( + self.out, + " ret.front_face = rq.CandidateTriangleFrontFace();" + )?; + writeln!(self.out, " }} else {{")?; + writeln!( + self.out, + " ret.kind = {};", + RayQueryIntersection::Aabb as u32 + )?; + writeln!(self.out, " }}")?; + + writeln!( + self.out, + " ret.instance_custom_index = rq.CandidateInstanceID();" + )?; + writeln!( + self.out, + " ret.instance_id = rq.CandidateInstanceIndex();" + )?; + writeln!( + self.out, + " ret.sbt_record_offset = rq.CandidateInstanceContributionToHitGroupIndex();" + )?; + writeln!( + self.out, + " ret.geometry_index = rq.CandidateGeometryIndex();" + )?; + writeln!( + self.out, + " ret.primitive_index = rq.CandidatePrimitiveIndex();" + )?; + writeln!( + self.out, + " ret.object_to_world = rq.CandidateObjectToWorld4x3();" + )?; + writeln!( + self.out, + " ret.world_to_object = rq.CandidateWorldToObject4x3();" + )?; + writeln!(self.out, " return ret;")?; + writeln!(self.out, "}}")?; + writeln!(self.out)?; + Ok(()) + } +} diff --git a/naga/src/back/hlsl/writer.rs b/naga/src/back/hlsl/writer.rs index 459798c771..b5df135766 100644 --- a/naga/src/back/hlsl/writer.rs +++ b/naga/src/back/hlsl/writer.rs @@ -9,7 +9,7 @@ use super::{ use crate::{ back::{self, Baked}, proc::{self, index, ExpressionKindTracker, NameKey}, - valid, Handle, Module, Scalar, ScalarKind, ShaderStage, TypeInner, + valid, Handle, Module, RayQueryFunction, Scalar, ScalarKind, ShaderStage, TypeInner, }; use std::{fmt, mem}; @@ -104,6 +104,8 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { entry_point_io: Vec::new(), named_expressions: crate::NamedExpressions::default(), wrapped: super::Wrapped::default(), + written_committed_intersection: false, + written_candidate_intersection: false, continue_ctx: back::continue_forward::ContinueCtx::default(), temp_access_chain: Vec::new(), need_bake_expressions: Default::default(), @@ -123,6 +125,8 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { self.entry_point_io.clear(); self.named_expressions.clear(); self.wrapped.clear(); + self.written_committed_intersection = false; + self.written_candidate_intersection = false; self.continue_ctx.clear(); self.need_bake_expressions.clear(); } @@ -1218,6 +1222,13 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { TypeInner::Array { base, size, .. } | TypeInner::BindingArray { base, size } => { self.write_array_size(module, base, size)?; } + TypeInner::AccelerationStructure => { + write!(self.out, "RaytracingAccelerationStructure")?; + } + TypeInner::RayQuery => { + // these are constant flags, there are dynamic flags also but constant flags are not supported by naga + write!(self.out, "RayQuery")?; + } _ => return Err(Error::Unimplemented(format!("write_value_type {inner:?}"))), } @@ -1375,15 +1386,20 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { self.write_array_size(module, base, size)?; } - write!(self.out, " = ")?; - // Write the local initializer if needed - if let Some(init) = local.init { - self.write_expr(module, init, func_ctx)?; - } else { - // Zero initialize local variables - self.write_default_init(module, local.ty)?; + match module.types[local.ty].inner { + // from https://microsoft.github.io/DirectX-Specs/d3d/Raytracing.html#tracerayinline-example-1 it seems that ray queries shouldn't be zeroed + TypeInner::RayQuery => {} + _ => { + write!(self.out, " = ")?; + // Write the local initializer if needed + if let Some(init) = local.init { + self.write_expr(module, init, func_ctx)?; + } else { + // Zero initialize local variables + self.write_default_init(module, local.ty)?; + } + } } - // Finish the local with `;` and add a newline (only for readability) writeln!(self.out, ";")? } @@ -2210,6 +2226,32 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { writeln!(self.out, ");")?; } + Statement::ImageAtomic { + image, + coordinate, + array_index, + fun, + value, + } => { + write!(self.out, "{level}")?; + + let fun_str = fun.to_hlsl_suffix(); + write!(self.out, "Interlocked{fun_str}(")?; + self.write_expr(module, image, func_ctx)?; + write!(self.out, "[")?; + self.write_texture_coordinates( + "int", + coordinate, + array_index, + None, + module, + func_ctx, + )?; + write!(self.out, "],")?; + + self.write_expr(module, value, func_ctx)?; + writeln!(self.out, ");")?; + } Statement::WorkGroupUniformLoad { pointer, result } => { self.write_barrier(crate::Barrier::WORK_GROUP, level)?; write!(self.out, "{level}")?; @@ -2224,7 +2266,37 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { } => { self.write_switch(module, func_ctx, level, selector, cases)?; } - Statement::RayQuery { .. } => unreachable!(), + Statement::RayQuery { query, ref fun } => match *fun { + RayQueryFunction::Initialize { + acceleration_structure, + descriptor, + } => { + write!(self.out, "{level}")?; + self.write_expr(module, query, func_ctx)?; + write!(self.out, ".TraceRayInline(")?; + self.write_expr(module, acceleration_structure, func_ctx)?; + write!(self.out, ", ")?; + self.write_expr(module, descriptor, func_ctx)?; + write!(self.out, ".flags, ")?; + self.write_expr(module, descriptor, func_ctx)?; + write!(self.out, ".cull_mask, ")?; + write!(self.out, "RayDescFromRayDesc_(")?; + self.write_expr(module, descriptor, func_ctx)?; + writeln!(self.out, "));")?; + } + RayQueryFunction::Proceed { result } => { + write!(self.out, "{level}")?; + let name = Baked(result).to_string(); + write!(self.out, "const bool {name} = ")?; + self.named_expressions.insert(result, name); + self.write_expr(module, query, func_ctx)?; + writeln!(self.out, ".Proceed();")?; + } + RayQueryFunction::Terminate => { + self.write_expr(module, query, func_ctx)?; + writeln!(self.out, ".Abort();")?; + } + }, Statement::SubgroupBallot { result, predicate } => { write!(self.out, "{level}")?; let name = Baked(result).to_string(); @@ -3582,8 +3654,17 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { self.write_expr(module, reject, func_ctx)?; write!(self.out, ")")? } - // Not supported yet - Expression::RayQueryGetIntersection { .. } => unreachable!(), + Expression::RayQueryGetIntersection { query, committed } => { + if committed { + write!(self.out, "GetCommittedIntersection(")?; + self.write_expr(module, query, func_ctx)?; + write!(self.out, ")")?; + } else { + write!(self.out, "GetCandidateIntersection(")?; + self.write_expr(module, query, func_ctx)?; + write!(self.out, ")")?; + } + } // Nothing to do here, since call expression already cached Expression::CallResult(_) | Expression::AtomicResult { .. } diff --git a/naga/src/back/msl/writer.rs b/naga/src/back/msl/writer.rs index 2386f1825b..4589d39892 100644 --- a/naga/src/back/msl/writer.rs +++ b/naga/src/back/msl/writer.rs @@ -344,7 +344,7 @@ impl TypedGlobalVariable<'_> { let (space, access, reference) = match var.space.to_msl_name() { Some(space) if self.reference => { let access = if var.space.needs_access_qualifier() - && !self.usage.contains(valid::GlobalUse::WRITE) + && !self.usage.intersects(valid::GlobalUse::WRITE) { "const" } else { @@ -1201,6 +1201,32 @@ impl Writer { Ok(()) } + fn put_image_atomic( + &mut self, + level: back::Level, + image: Handle, + address: &TexelAddress, + fun: crate::AtomicFunction, + value: Handle, + context: &StatementContext, + ) -> BackendResult { + write!(self.out, "{level}")?; + self.put_expression(image, &context.expression, false)?; + let op = if context.expression.resolve_type(value).scalar_width() == Some(8) { + fun.to_msl_64_bit()? + } else { + fun.to_msl() + }; + write!(self.out, ".atomic_{}(", op)?; + // coordinates in IR are int, but Metal expects uint + self.put_cast_to_uint_scalar_or_vector(address.coordinate, &context.expression)?; + write!(self.out, ", ")?; + self.put_expression(value, &context.expression, true)?; + writeln!(self.out, ");")?; + + Ok(()) + } + fn put_image_store( &mut self, level: back::Level, @@ -3248,6 +3274,21 @@ impl Writer { // Done writeln!(self.out, ";")?; } + crate::Statement::ImageAtomic { + image, + coordinate, + array_index, + fun, + value, + } => { + let address = TexelAddress { + coordinate, + array_index, + sample: None, + level: None, + }; + self.put_image_atomic(level, image, &address, fun, value, context)? + } crate::Statement::WorkGroupUniformLoad { pointer, result } => { self.write_barrier(crate::Barrier::WORK_GROUP, level)?; diff --git a/naga/src/back/pipeline_constants.rs b/naga/src/back/pipeline_constants.rs index 7d72354250..420b5acf4f 100644 --- a/naga/src/back/pipeline_constants.rs +++ b/naga/src/back/pipeline_constants.rs @@ -736,6 +736,20 @@ fn adjust_stmt(new_pos: &HandleVec>, stmt: &mut S | crate::AtomicFunction::Exchange { compare: None } => {} } } + Statement::ImageAtomic { + ref mut image, + ref mut coordinate, + ref mut array_index, + fun: _, + ref mut value, + } => { + adjust(image); + adjust(coordinate); + if let Some(ref mut array_index) = *array_index { + adjust(array_index); + } + adjust(value); + } Statement::WorkGroupUniformLoad { ref mut pointer, ref mut result, diff --git a/naga/src/back/spv/block.rs b/naga/src/back/spv/block.rs index b595d7f2f1..152c348689 100644 --- a/naga/src/back/spv/block.rs +++ b/naga/src/back/spv/block.rs @@ -2921,6 +2921,22 @@ impl BlockContext<'_> { block.body.push(instruction); } + Statement::ImageAtomic { + image, + coordinate, + array_index, + fun, + value, + } => { + self.write_image_atomic( + image, + coordinate, + array_index, + fun, + value, + &mut block, + )?; + } Statement::WorkGroupUniformLoad { pointer, result } => { self.writer .write_barrier(crate::Barrier::WORK_GROUP, &mut block); diff --git a/naga/src/back/spv/image.rs b/naga/src/back/spv/image.rs index 7dedf37502..fe4001060e 100644 --- a/naga/src/back/spv/image.rs +++ b/naga/src/back/spv/image.rs @@ -1225,4 +1225,81 @@ impl BlockContext<'_> { Ok(()) } + + pub(super) fn write_image_atomic( + &mut self, + image: Handle, + coordinate: Handle, + array_index: Option>, + fun: crate::AtomicFunction, + value: Handle, + block: &mut Block, + ) -> Result<(), Error> { + let image_id = match self.ir_function.originating_global(image) { + Some(handle) => self.writer.global_variables[handle].var_id, + _ => return Err(Error::Validation("Unexpected image type")), + }; + let crate::TypeInner::Image { class, .. } = + *self.fun_info[image].ty.inner_with(&self.ir_module.types) + else { + return Err(Error::Validation("Invalid image type")); + }; + let crate::ImageClass::Storage { format, .. } = class else { + return Err(Error::Validation("Invalid image class")); + }; + let scalar = format.into(); + let pointer_type_id = self.get_type_id(LookupType::Local(LocalType::LocalPointer { + base: NumericType::Scalar(scalar), + class: spirv::StorageClass::Image, + })); + let signed = scalar.kind == crate::ScalarKind::Sint; + if scalar.width == 8 { + self.writer + .require_any("64 bit image atomics", &[spirv::Capability::Int64Atomics])?; + } + let pointer_id = self.gen_id(); + let coordinates = self.write_image_coordinates(coordinate, array_index, block)?; + let sample_id = self.writer.get_constant_scalar(crate::Literal::U32(0)); + block.body.push(Instruction::image_texel_pointer( + pointer_type_id, + pointer_id, + image_id, + coordinates.value_id, + sample_id, + )); + + let op = match fun { + crate::AtomicFunction::Add => spirv::Op::AtomicIAdd, + crate::AtomicFunction::Subtract => spirv::Op::AtomicISub, + crate::AtomicFunction::And => spirv::Op::AtomicAnd, + crate::AtomicFunction::ExclusiveOr => spirv::Op::AtomicXor, + crate::AtomicFunction::InclusiveOr => spirv::Op::AtomicOr, + crate::AtomicFunction::Min if signed => spirv::Op::AtomicSMin, + crate::AtomicFunction::Min => spirv::Op::AtomicUMin, + crate::AtomicFunction::Max if signed => spirv::Op::AtomicSMax, + crate::AtomicFunction::Max => spirv::Op::AtomicUMax, + crate::AtomicFunction::Exchange { .. } => { + return Err(Error::Validation("Exchange atomics are not supported yet")) + } + }; + let result_type_id = self.get_expression_type_id(&self.fun_info[value].ty); + let id = self.gen_id(); + let space = crate::AddressSpace::Handle; + let (semantics, scope) = space.to_spirv_semantics_and_scope(); + let scope_constant_id = self.get_scope_constant(scope as u32); + let semantics_id = self.get_index_constant(semantics.bits()); + let value_id = self.cached[value]; + + block.body.push(Instruction::image_atomic( + op, + result_type_id, + id, + pointer_id, + scope_constant_id, + semantics_id, + value_id, + )); + + Ok(()) + } } diff --git a/naga/src/back/spv/instructions.rs b/naga/src/back/spv/instructions.rs index 9bd58508a1..38aed8c351 100644 --- a/naga/src/back/spv/instructions.rs +++ b/naga/src/back/spv/instructions.rs @@ -702,6 +702,41 @@ impl super::Instruction { instruction } + pub(super) fn image_texel_pointer( + result_type_id: Word, + id: Word, + image: Word, + coordinates: Word, + sample: Word, + ) -> Self { + let mut instruction = Self::new(Op::ImageTexelPointer); + instruction.set_type(result_type_id); + instruction.set_result(id); + instruction.add_operand(image); + instruction.add_operand(coordinates); + instruction.add_operand(sample); + instruction + } + + pub(super) fn image_atomic( + op: Op, + result_type_id: Word, + id: Word, + pointer: Word, + scope_id: Word, + semantics_id: Word, + value: Word, + ) -> Self { + let mut instruction = Self::new(op); + instruction.set_type(result_type_id); + instruction.set_result(id); + instruction.add_operand(pointer); + instruction.add_operand(scope_id); + instruction.add_operand(semantics_id); + instruction.add_operand(value); + instruction + } + pub(super) fn image_query(op: Op, result_type_id: Word, id: Word, image: Word) -> Self { let mut instruction = Self::new(op); instruction.set_type(result_type_id); @@ -1171,6 +1206,7 @@ impl From for spirv::ImageFormat { Sf::Rgb10a2Uint => Self::Rgb10a2ui, Sf::Rgb10a2Unorm => Self::Rgb10A2, Sf::Rg11b10Ufloat => Self::R11fG11fB10f, + Sf::R64Uint => Self::R64ui, Sf::Rg32Uint => Self::Rg32ui, Sf::Rg32Sint => Self::Rg32i, Sf::Rg32Float => Self::Rg32f, diff --git a/naga/src/back/spv/subgroup.rs b/naga/src/back/spv/subgroup.rs index 02285c93fc..28d05e531c 100644 --- a/naga/src/back/spv/subgroup.rs +++ b/naga/src/back/spv/subgroup.rs @@ -135,13 +135,15 @@ impl BlockContext<'_> { &[spirv::Capability::GroupNonUniformBallot], )?; match *mode { - crate::GatherMode::BroadcastFirst | crate::GatherMode::Broadcast(_) => { + crate::GatherMode::BroadcastFirst => { self.writer.require_any( "GroupNonUniformBallot", &[spirv::Capability::GroupNonUniformBallot], )?; } - crate::GatherMode::Shuffle(_) | crate::GatherMode::ShuffleXor(_) => { + crate::GatherMode::Shuffle(_) + | crate::GatherMode::ShuffleXor(_) + | crate::GatherMode::Broadcast(_) => { self.writer.require_any( "GroupNonUniformShuffle", &[spirv::Capability::GroupNonUniformShuffle], diff --git a/naga/src/back/spv/writer.rs b/naga/src/back/spv/writer.rs index 56e0029509..9d15c2f014 100644 --- a/naga/src/back/spv/writer.rs +++ b/naga/src/back/spv/writer.rs @@ -1089,10 +1089,13 @@ impl Writer { "storage image format", &[spirv::Capability::StorageImageExtendedFormats], ), - If::R64ui | If::R64i => self.require_any( - "64-bit integer storage image format", - &[spirv::Capability::Int64ImageEXT], - ), + If::R64ui | If::R64i => { + self.use_extension("SPV_EXT_shader_image_int64"); + self.require_any( + "64-bit integer storage image format", + &[spirv::Capability::Int64ImageEXT], + ) + } If::Unknown | If::Rgba32f | If::Rgba16f diff --git a/naga/src/back/wgsl/writer.rs b/naga/src/back/wgsl/writer.rs index ed581c59e2..a7cd8f95c9 100644 --- a/naga/src/back/wgsl/writer.rs +++ b/naga/src/back/wgsl/writer.rs @@ -481,7 +481,10 @@ impl Writer { "storage_", "", storage_format_str(format), - if access.contains(crate::StorageAccess::LOAD | crate::StorageAccess::STORE) + if access.contains(crate::StorageAccess::ATOMIC) { + ",atomic" + } else if access + .contains(crate::StorageAccess::LOAD | crate::StorageAccess::STORE) { ",read_write" } else if access.contains(crate::StorageAccess::LOAD) { @@ -790,6 +793,27 @@ impl Writer { self.write_expr(module, value, func_ctx)?; writeln!(self.out, ");")? } + Statement::ImageAtomic { + image, + coordinate, + array_index, + ref fun, + value, + } => { + write!(self.out, "{level}")?; + let fun_str = fun.to_wgsl(); + write!(self.out, "textureAtomic{fun_str}(")?; + self.write_expr(module, image, func_ctx)?; + write!(self.out, ", ")?; + self.write_expr(module, coordinate, func_ctx)?; + if let Some(array_index_expr) = array_index { + write!(self.out, ", ")?; + self.write_expr(module, array_index_expr, func_ctx)?; + } + write!(self.out, ", ")?; + self.write_expr(module, value, func_ctx)?; + writeln!(self.out, ");")?; + } Statement::WorkGroupUniformLoad { pointer, result } => { write!(self.out, "{level}")?; // TODO: Obey named expressions here. @@ -2055,6 +2079,7 @@ const fn storage_format_str(format: crate::StorageFormat) -> &'static str { Sf::Rgb10a2Uint => "rgb10a2uint", Sf::Rgb10a2Unorm => "rgb10a2unorm", Sf::Rg11b10Ufloat => "rg11b10float", + Sf::R64Uint => "r64uint", Sf::Rg32Uint => "rg32uint", Sf::Rg32Sint => "rg32sint", Sf::Rg32Float => "rg32float", @@ -2107,7 +2132,9 @@ const fn address_space_str( As::Private => "private", As::Uniform => "uniform", As::Storage { access } => { - if access.contains(crate::StorageAccess::STORE) { + if access.contains(crate::StorageAccess::ATOMIC) { + return (Some("storage"), Some("atomic")); + } else if access.contains(crate::StorageAccess::STORE) { return (Some("storage"), Some("read_write")); } else { "storage" diff --git a/naga/src/compact/mod.rs b/naga/src/compact/mod.rs index 6b41a2c9e2..9414a9804c 100644 --- a/naga/src/compact/mod.rs +++ b/naga/src/compact/mod.rs @@ -73,14 +73,6 @@ pub fn compact(module: &mut crate::Module) { } } - for e in module.entry_points.iter() { - if let Some(sizes) = e.workgroup_size_overrides { - for size in sizes.iter().filter_map(|x| *x) { - module_tracer.global_expressions_used.insert(size); - } - } - } - // We assume that all functions are used. // // Observe which types, constant expressions, constants, and @@ -106,6 +98,13 @@ pub fn compact(module: &mut crate::Module) { .iter() .map(|e| { log::trace!("tracing entry point {:?}", e.function.name); + + if let Some(sizes) = e.workgroup_size_overrides { + for size in sizes.iter().filter_map(|x| *x) { + module_tracer.global_expressions_used.insert(size); + } + } + let mut used = module_tracer.as_function(&e.function); used.trace(); FunctionMap::from(used) diff --git a/naga/src/compact/statements.rs b/naga/src/compact/statements.rs index 759dcc2eda..596f9d4067 100644 --- a/naga/src/compact/statements.rs +++ b/naga/src/compact/statements.rs @@ -79,6 +79,20 @@ impl FunctionTracer<'_> { self.expressions_used.insert(result); } } + St::ImageAtomic { + image, + coordinate, + array_index, + fun: _, + value, + } => { + self.expressions_used.insert(image); + self.expressions_used.insert(coordinate); + if let Some(array_index) = array_index { + self.expressions_used.insert(array_index); + } + self.expressions_used.insert(value); + } St::WorkGroupUniformLoad { pointer, result } => { self.expressions_used.insert(pointer); self.expressions_used.insert(result); @@ -261,6 +275,20 @@ impl FunctionMap { adjust(result); } } + St::ImageAtomic { + ref mut image, + ref mut coordinate, + ref mut array_index, + fun: _, + ref mut value, + } => { + adjust(image); + adjust(coordinate); + if let Some(ref mut array_index) = *array_index { + adjust(array_index); + } + adjust(value); + } St::WorkGroupUniformLoad { ref mut pointer, ref mut result, diff --git a/naga/src/front/glsl/parser/types.rs b/naga/src/front/glsl/parser/types.rs index b85b3e9d6a..501d53805c 100644 --- a/naga/src/front/glsl/parser/types.rs +++ b/naga/src/front/glsl/parser/types.rs @@ -228,7 +228,7 @@ impl ParsingContext<'_> { } TokenValue::Buffer => { StorageQualifier::AddressSpace(AddressSpace::Storage { - access: crate::StorageAccess::all(), + access: crate::StorageAccess::LOAD | crate::StorageAccess::STORE, }) } _ => unreachable!(), @@ -274,10 +274,12 @@ impl ParsingContext<'_> { qualifiers.precision = Some((p, token.meta)); } TokenValue::MemoryQualifier(access) => { + let load_store = crate::StorageAccess::LOAD | crate::StorageAccess::STORE; let storage_access = qualifiers .storage_access - .get_or_insert((crate::StorageAccess::all(), Span::default())); - if !storage_access.0.contains(!access) { + .get_or_insert((load_store, Span::default())); + + if !storage_access.0.contains(!access & load_store) { frontend.errors.push(Error { kind: ErrorKind::SemanticError( "The same memory qualifier can only be used once".into(), @@ -428,6 +430,7 @@ fn map_image_format(word: &str) -> Option { "rgba32ui" => Sf::Rgba32Uint, "rgba16ui" => Sf::Rgba16Uint, "rgba8ui" => Sf::Rgba8Uint, + "r64ui" => Sf::R64Uint, "rg32ui" => Sf::Rg32Uint, "rg16ui" => Sf::Rg16Uint, "rg8ui" => Sf::Rg8Uint, diff --git a/naga/src/front/glsl/types.rs b/naga/src/front/glsl/types.rs index f6836169c0..ad5e188fd9 100644 --- a/naga/src/front/glsl/types.rs +++ b/naga/src/front/glsl/types.rs @@ -154,7 +154,7 @@ pub fn parse_type(type_name: &str) -> Option { let class = ImageClass::Storage { format: crate::StorageFormat::R8Uint, - access: crate::StorageAccess::all(), + access: crate::StorageAccess::LOAD | crate::StorageAccess::STORE, }; // TODO: glsl support multisampled storage images, naga doesn't diff --git a/naga/src/front/spv/convert.rs b/naga/src/front/spv/convert.rs index 68b870fb01..6baf74225c 100644 --- a/naga/src/front/spv/convert.rs +++ b/naga/src/front/spv/convert.rs @@ -105,6 +105,7 @@ pub(super) fn map_image_format(word: spirv::Word) -> Result Ok(crate::StorageFormat::Rgb10a2Uint), Some(spirv::ImageFormat::Rgb10A2) => Ok(crate::StorageFormat::Rgb10a2Unorm), Some(spirv::ImageFormat::R11fG11fB10f) => Ok(crate::StorageFormat::Rg11b10Ufloat), + Some(spirv::ImageFormat::R64ui) => Ok(crate::StorageFormat::R64Uint), Some(spirv::ImageFormat::Rg32ui) => Ok(crate::StorageFormat::Rg32Uint), Some(spirv::ImageFormat::Rg32i) => Ok(crate::StorageFormat::Rg32Sint), Some(spirv::ImageFormat::Rg32f) => Ok(crate::StorageFormat::Rg32Float), @@ -174,7 +175,7 @@ pub(super) fn map_storage_class(word: spirv::Word) -> Result Ec::Global(crate::AddressSpace::Handle), Some(Sc::StorageBuffer) => Ec::Global(crate::AddressSpace::Storage { //Note: this is restricted by decorations later - access: crate::StorageAccess::all(), + access: crate::StorageAccess::LOAD | crate::StorageAccess::STORE, }), // we expect the `Storage` case to be filtered out before calling this function. Some(Sc::Uniform) => Ec::Global(crate::AddressSpace::Uniform), diff --git a/naga/src/front/spv/mod.rs b/naga/src/front/spv/mod.rs index c8288f5dde..b8087fc8b0 100644 --- a/naga/src/front/spv/mod.rs +++ b/naga/src/front/spv/mod.rs @@ -177,7 +177,7 @@ bitflags::bitflags! { impl DecorationFlags { fn to_storage_access(self) -> crate::StorageAccess { - let mut access = crate::StorageAccess::all(); + let mut access = crate::StorageAccess::LOAD | crate::StorageAccess::STORE; if self.contains(DecorationFlags::NON_READABLE) { access &= !crate::StorageAccess::LOAD; } @@ -4490,6 +4490,7 @@ impl> Frontend { | S::Store { .. } | S::ImageStore { .. } | S::Atomic { .. } + | S::ImageAtomic { .. } | S::RayQuery { .. } | S::SubgroupBallot { .. } | S::SubgroupCollectiveOperation { .. } diff --git a/naga/src/front/wgsl/error.rs b/naga/src/front/wgsl/error.rs index 8b1e4f354f..44ac7885cc 100644 --- a/naga/src/front/wgsl/error.rs +++ b/naga/src/front/wgsl/error.rs @@ -359,22 +359,22 @@ impl<'a> Error<'a> { Error::Unexpected(unexpected_span, expected) => { let expected_str = match expected { ExpectedToken::Token(token) => match token { - Token::Separator(c) => format!("'{c}'"), - Token::Paren(c) => format!("'{c}'"), + Token::Separator(c) => format!("`{c}`"), + Token::Paren(c) => format!("`{c}`"), Token::Attribute => "@".to_string(), Token::Number(_) => "number".to_string(), Token::Word(s) => s.to_string(), - Token::Operation(c) => format!("operation ('{c}')"), - Token::LogicalOperation(c) => format!("logical operation ('{c}')"), - Token::ShiftOperation(c) => format!("bitshift ('{c}{c}')"), + Token::Operation(c) => format!("operation (`{c}`)"), + Token::LogicalOperation(c) => format!("logical operation (`{c}`)"), + Token::ShiftOperation(c) => format!("bitshift (`{c}{c}`)"), Token::AssignmentOperation(c) if c == '<' || c == '>' => { - format!("bitshift ('{c}{c}=')") + format!("bitshift (`{c}{c}=`)") } - Token::AssignmentOperation(c) => format!("operation ('{c}=')"), + Token::AssignmentOperation(c) => format!("operation (`{c}=`)"), Token::IncrementOperation => "increment operation".to_string(), Token::DecrementOperation => "decrement operation".to_string(), Token::Arrow => "->".to_string(), - Token::Unknown(c) => format!("unknown ('{c}')"), + Token::Unknown(c) => format!("unknown (`{c}`)"), Token::Trivia => "trivia".to_string(), Token::End => "end".to_string(), }, @@ -382,15 +382,15 @@ impl<'a> Error<'a> { ExpectedToken::PrimaryExpression => "expression".to_string(), ExpectedToken::Assignment => "assignment or increment/decrement".to_string(), ExpectedToken::SwitchItem => concat!( - "switch item ('case' or 'default') or a closing curly bracket ", - "to signify the end of the switch statement ('}')" + "switch item (`case` or `default`) or a closing curly bracket ", + "to signify the end of the switch statement (`}`)" ) .to_string(), ExpectedToken::WorkgroupSizeSeparator => { - "workgroup size separator (',') or a closing parenthesis".to_string() + "workgroup size separator (`,`) or a closing parenthesis".to_string() } ExpectedToken::GlobalItem => concat!( - "global item ('struct', 'const', 'var', 'alias', 'fn', 'diagnostic', 'enable', 'requires', ';') ", + "global item (`struct`, `const`, `var`, `alias`, `fn`, `diagnostic`, `enable`, `requires`, `;`) ", "or the end of the file" ) .to_string(), @@ -398,18 +398,18 @@ impl<'a> Error<'a> { ExpectedToken::Variable => "variable access".to_string(), ExpectedToken::Function => "function name".to_string(), ExpectedToken::AfterIdentListArg => { - "next argument, trailing comma, or end of list (',' or ';')".to_string() + "next argument, trailing comma, or end of list (`,` or `;`)".to_string() } ExpectedToken::AfterIdentListComma => { - "next argument or end of list (';')".to_string() + "next argument or end of list (`;`)".to_string() } ExpectedToken::DiagnosticAttribute => { - "the 'diagnostic' attribute identifier".to_string() + "the `diagnostic` attribute identifier".to_string() } }; ParseError { message: format!( - "expected {}, found '{}'", + "expected {}, found {:?}", expected_str, &source[unexpected_span], ), labels: vec![(unexpected_span, format!("expected {expected_str}").into())], @@ -445,12 +445,12 @@ impl<'a> Error<'a> { notes: vec![], }, Error::UnknownIdent(ident_span, ident) => ParseError { - message: format!("no definition in scope for identifier: '{ident}'"), + message: format!("no definition in scope for identifier: `{ident}`"), labels: vec![(ident_span, "unknown identifier".into())], notes: vec![], }, Error::UnknownScalarType(bad_span) => ParseError { - message: format!("unknown scalar type: '{}'", &source[bad_span]), + message: format!("unknown scalar type: `{}`", &source[bad_span]), labels: vec![(bad_span, "unknown scalar type".into())], notes: vec!["Valid scalar types are f32, f64, i32, u32, bool".into()], }, @@ -473,7 +473,7 @@ impl<'a> Error<'a> { }, Error::BadTexture(bad_span) => ParseError { message: format!( - "expected an image, but found '{}' which is not an image", + "expected an image, but found `{}` which is not an image", &source[bad_span] ), labels: vec![(bad_span, "not an image".into())], @@ -498,7 +498,7 @@ impl<'a> Error<'a> { }, Error::InvalidForInitializer(bad_span) => ParseError { message: format!( - "for(;;) initializer is not an assignment or a function call: '{}'", + "for(;;) initializer is not an assignment or a function call: `{}`", &source[bad_span] ), labels: vec![(bad_span, "not an assignment or function call".into())], @@ -511,7 +511,7 @@ impl<'a> Error<'a> { }, Error::InvalidGatherComponent(bad_span) => ParseError { message: format!( - "textureGather component '{}' doesn't exist, must be 0, 1, 2, or 3", + "textureGather component `{}` doesn't exist, must be 0, 1, 2, or 3", &source[bad_span] ), labels: vec![(bad_span, "invalid component".into())], @@ -523,58 +523,58 @@ impl<'a> Error<'a> { notes: vec![], }, Error::InvalidIdentifierUnderscore(bad_span) => ParseError { - message: "Identifier can't be '_'".to_string(), + message: "Identifier can't be `_`".to_string(), labels: vec![(bad_span, "invalid identifier".into())], notes: vec![ - "Use phony assignment instead ('_ =' notice the absence of 'let' or 'var')" + "Use phony assignment instead (`_ =` notice the absence of `let` or `var`)" .to_string(), ], }, Error::ReservedIdentifierPrefix(bad_span) => ParseError { message: format!( - "Identifier starts with a reserved prefix: '{}'", + "Identifier starts with a reserved prefix: `{}`", &source[bad_span] ), labels: vec![(bad_span, "invalid identifier".into())], notes: vec![], }, Error::UnknownAddressSpace(bad_span) => ParseError { - message: format!("unknown address space: '{}'", &source[bad_span]), + message: format!("unknown address space: `{}`", &source[bad_span]), labels: vec![(bad_span, "unknown address space".into())], notes: vec![], }, Error::RepeatedAttribute(bad_span) => ParseError { - message: format!("repeated attribute: '{}'", &source[bad_span]), + message: format!("repeated attribute: `{}`", &source[bad_span]), labels: vec![(bad_span, "repeated attribute".into())], notes: vec![], }, Error::UnknownAttribute(bad_span) => ParseError { - message: format!("unknown attribute: '{}'", &source[bad_span]), + message: format!("unknown attribute: `{}`", &source[bad_span]), labels: vec![(bad_span, "unknown attribute".into())], notes: vec![], }, Error::UnknownBuiltin(bad_span) => ParseError { - message: format!("unknown builtin: '{}'", &source[bad_span]), + message: format!("unknown builtin: `{}`", &source[bad_span]), labels: vec![(bad_span, "unknown builtin".into())], notes: vec![], }, Error::UnknownAccess(bad_span) => ParseError { - message: format!("unknown access: '{}'", &source[bad_span]), + message: format!("unknown access: `{}`", &source[bad_span]), labels: vec![(bad_span, "unknown access".into())], notes: vec![], }, Error::UnknownStorageFormat(bad_span) => ParseError { - message: format!("unknown storage format: '{}'", &source[bad_span]), + message: format!("unknown storage format: `{}`", &source[bad_span]), labels: vec![(bad_span, "unknown storage format".into())], notes: vec![], }, Error::UnknownConservativeDepth(bad_span) => ParseError { - message: format!("unknown conservative depth: '{}'", &source[bad_span]), + message: format!("unknown conservative depth: `{}`", &source[bad_span]), labels: vec![(bad_span, "unknown conservative depth".into())], notes: vec![], }, Error::UnknownType(bad_span) => ParseError { - message: format!("unknown type: '{}'", &source[bad_span]), + message: format!("unknown type: `{}`", &source[bad_span]), labels: vec![(bad_span, "unknown type".into())], notes: vec![], }, @@ -702,7 +702,7 @@ impl<'a> Error<'a> { InvalidAssignmentType::ImmutableBinding(binding_span) => ( Some((binding_span, "this is an immutable binding".into())), vec![format!( - "consider declaring '{}' with `var` instead of `let`", + "consider declaring `{}` with `var` instead of `let`", &source[binding_span] )], ), @@ -782,11 +782,11 @@ impl<'a> Error<'a> { .into(), )], notes: vec![if uint { - format!("suffix the integer with a `u`: '{}u'", &source[span]) + format!("suffix the integer with a `u`: `{}u`", &source[span]) } else { let span = span.to_range().unwrap(); format!( - "remove the `u` suffix: '{}'", + "remove the `u` suffix: `{}`", &source[span.start..span.end - 1] ) }], @@ -833,10 +833,10 @@ impl<'a> Error<'a> { Error::ExpectedConstExprConcreteIntegerScalar(span) => ParseError { message: concat!( "must be a const-expression that ", - "resolves to a concrete integer scalar (u32 or i32)" + "resolves to a concrete integer scalar (`u32` or `i32`)" ) .to_string(), - labels: vec![(span, "must resolve to u32 or i32".into())], + labels: vec![(span, "must resolve to `u32` or `i32`".into())], notes: vec![], }, Error::ExpectedNonNegative(span) => ParseError { @@ -858,7 +858,7 @@ impl<'a> Error<'a> { message: "workgroup size is missing on compute shader entry point".to_string(), labels: vec![( span, - "must be paired with a @workgroup_size attribute".into(), + "must be paired with a `@workgroup_size` attribute".into(), )], notes: vec![], }, @@ -947,13 +947,13 @@ impl<'a> Error<'a> { notes: vec![], }, Error::NotBool(span) => ParseError { - message: "must be a const-expression that resolves to a bool".to_string(), - labels: vec![(span, "must resolve to bool".into())], + message: "must be a const-expression that resolves to a `bool`".to_string(), + labels: vec![(span, "must resolve to `bool`".into())], notes: vec![], }, Error::ConstAssertFailed(span) => ParseError { - message: "const_assert failure".to_string(), - labels: vec![(span, "evaluates to false".into())], + message: "`const_assert` failure".to_string(), + labels: vec![(span, "evaluates to `false`".into())], notes: vec![], }, Error::DirectiveAfterFirstGlobalDecl { directive_span } => ParseError { diff --git a/naga/src/front/wgsl/lower/mod.rs b/naga/src/front/wgsl/lower/mod.rs index dcfa38116b..eb6f919930 100644 --- a/naga/src/front/wgsl/lower/mod.rs +++ b/naga/src/front/wgsl/lower/mod.rs @@ -1042,38 +1042,8 @@ impl<'source, 'temp> Lowerer<'source, 'temp> { v.ty.map(|ast| self.resolve_ast_type(ast, &mut ctx)) .transpose()?; - let mut ectx = ctx.as_override(); - - let ty; - let initializer; - match (v.init, explicit_ty) { - (Some(init), Some(explicit_ty)) => { - let init = self.expression_for_abstract(init, &mut ectx)?; - let ty_res = crate::proc::TypeResolution::Handle(explicit_ty); - let init = ectx - .try_automatic_conversions(init, &ty_res, v.name.span) - .map_err(|error| match error { - Error::AutoConversion(e) => Error::InitializationTypeMismatch { - name: v.name.span, - expected: e.dest_type, - got: e.source_type, - }, - other => other, - })?; - ty = explicit_ty; - initializer = Some(init); - } - (Some(init), None) => { - let concretized = self.expression(init, &mut ectx)?; - ty = ectx.register_type(concretized)?; - initializer = Some(concretized); - } - (None, Some(explicit_ty)) => { - ty = explicit_ty; - initializer = None; - } - (None, None) => return Err(Error::DeclMissingTypeAndInit(v.name.span)), - } + let (ty, initializer) = + self.type_and_init(v.name, v.init, explicit_ty, &mut ctx.as_override())?; let binding = if let Some(ref binding) = v.binding { Some(crate::ResourceBinding { @@ -1136,18 +1106,14 @@ impl<'source, 'temp> Lowerer<'source, 'temp> { .insert(c.name.name, LoweredGlobalDecl::Const(handle)); } ast::GlobalDeclKind::Override(ref o) => { - let init = o - .init - .map(|init| self.expression(init, &mut ctx.as_override())) - .transpose()?; - let inferred_type = init - .map(|init| ctx.as_const().register_type(init)) - .transpose()?; - let explicit_ty = - o.ty.map(|ty| self.resolve_ast_type(ty, &mut ctx)) + o.ty.map(|ast| self.resolve_ast_type(ast, &mut ctx)) .transpose()?; + let mut ectx = ctx.as_override(); + + let (ty, init) = self.type_and_init(o.name, o.init, explicit_ty, &mut ectx)?; + let id = o.id.map(|id| self.const_u32(id, &mut ctx.as_const())) .transpose()?; @@ -1161,26 +1127,6 @@ impl<'source, 'temp> Lowerer<'source, 'temp> { None }; - let ty = match (explicit_ty, inferred_type) { - (Some(explicit_ty), Some(inferred_type)) => { - if explicit_ty == inferred_type { - explicit_ty - } else { - let gctx = ctx.module.to_ctx(); - return Err(Error::InitializationTypeMismatch { - name: o.name.span, - expected: explicit_ty.to_wgsl(&gctx).into(), - got: inferred_type.to_wgsl(&gctx).into(), - }); - } - } - (Some(explicit_ty), None) => explicit_ty, - (None, Some(inferred_type)) => inferred_type, - (None, None) => { - return Err(Error::DeclMissingTypeAndInit(o.name.span)); - } - }; - let handle = ctx.module.overrides.append( crate::Override { name: Some(o.name.name.to_string()), @@ -1233,6 +1179,47 @@ impl<'source, 'temp> Lowerer<'source, 'temp> { Ok(module) } + /// Obtain (inferred) type and initializer after automatic conversion + fn type_and_init( + &mut self, + name: ast::Ident<'source>, + init: Option>>, + explicit_ty: Option>, + ectx: &mut ExpressionContext<'source, '_, '_>, + ) -> Result<(Handle, Option>), Error<'source>> { + let ty; + let initializer; + match (init, explicit_ty) { + (Some(init), Some(explicit_ty)) => { + let init = self.expression_for_abstract(init, ectx)?; + let ty_res = crate::proc::TypeResolution::Handle(explicit_ty); + let init = ectx + .try_automatic_conversions(init, &ty_res, name.span) + .map_err(|error| match error { + Error::AutoConversion(e) => Error::InitializationTypeMismatch { + name: name.span, + expected: e.dest_type, + got: e.source_type, + }, + other => other, + })?; + ty = explicit_ty; + initializer = Some(init); + } + (Some(init), None) => { + let concretized = self.expression(init, ectx)?; + ty = ectx.register_type(concretized)?; + initializer = Some(concretized); + } + (None, Some(explicit_ty)) => { + ty = explicit_ty; + initializer = None; + } + (None, None) => return Err(Error::DeclMissingTypeAndInit(name.span)), + } + Ok((ty, initializer)) + } + fn function( &mut self, f: &ast::Function<'source>, @@ -2425,6 +2412,50 @@ impl<'source, 'temp> Lowerer<'source, 'temp> { ); return Ok(Some(result)); } + "textureAtomicMin" | "textureAtomicMax" | "textureAtomicAdd" + | "textureAtomicAnd" | "textureAtomicOr" | "textureAtomicXor" => { + let mut args = ctx.prepare_args(arguments, 3, span); + + let image = args.next()?; + let image_span = ctx.ast_expressions.get_span(image); + let image = self.expression(image, ctx)?; + + let coordinate = self.expression(args.next()?, ctx)?; + + let (_, arrayed) = ctx.image_data(image, image_span)?; + let array_index = arrayed + .then(|| { + args.min_args += 1; + self.expression(args.next()?, ctx) + }) + .transpose()?; + + let value = self.expression(args.next()?, ctx)?; + + args.finish()?; + + let rctx = ctx.runtime_expression_ctx(span)?; + rctx.block + .extend(rctx.emitter.finish(&rctx.function.expressions)); + rctx.emitter.start(&rctx.function.expressions); + let stmt = crate::Statement::ImageAtomic { + image, + coordinate, + array_index, + fun: match function.name { + "textureAtomicMin" => crate::AtomicFunction::Min, + "textureAtomicMax" => crate::AtomicFunction::Max, + "textureAtomicAdd" => crate::AtomicFunction::Add, + "textureAtomicAnd" => crate::AtomicFunction::And, + "textureAtomicOr" => crate::AtomicFunction::InclusiveOr, + "textureAtomicXor" => crate::AtomicFunction::ExclusiveOr, + _ => unreachable!(), + }, + value, + }; + rctx.block.push(stmt, span); + return Ok(None); + } "storageBarrier" => { ctx.prepare_args(arguments, 0, span).finish()?; diff --git a/naga/src/front/wgsl/parse/conv.rs b/naga/src/front/wgsl/parse/conv.rs index 0c9341eb62..00c19d877d 100644 --- a/naga/src/front/wgsl/parse/conv.rs +++ b/naga/src/front/wgsl/parse/conv.rs @@ -95,6 +95,7 @@ pub fn map_storage_format(word: &str, span: Span) -> Result Sf::Rgb10a2Uint, "rgb10a2unorm" => Sf::Rgb10a2Unorm, "rg11b10float" => Sf::Rg11b10Ufloat, + "r64uint" => Sf::R64Uint, "rg32uint" => Sf::Rg32Uint, "rg32sint" => Sf::Rg32Sint, "rg32float" => Sf::Rg32Float, diff --git a/naga/src/front/wgsl/parse/lexer.rs b/naga/src/front/wgsl/parse/lexer.rs index 51b151b53b..0b0b6edebc 100644 --- a/naga/src/front/wgsl/parse/lexer.rs +++ b/naga/src/front/wgsl/parse/lexer.rs @@ -443,6 +443,9 @@ impl<'a> Lexer<'a> { "read" => Ok(crate::StorageAccess::LOAD), "write" => Ok(crate::StorageAccess::STORE), "read_write" => Ok(crate::StorageAccess::LOAD | crate::StorageAccess::STORE), + "atomic" => Ok(crate::StorageAccess::ATOMIC + | crate::StorageAccess::LOAD + | crate::StorageAccess::STORE), _ => Err(Error::UnknownAccess(span)), } } diff --git a/naga/src/front/wgsl/parse/mod.rs b/naga/src/front/wgsl/parse/mod.rs index cd37051430..739a1b15c6 100644 --- a/naga/src/front/wgsl/parse/mod.rs +++ b/naga/src/front/wgsl/parse/mod.rs @@ -1630,6 +1630,10 @@ impl Parser { kind: Float | Sint | Uint, width: 4, } => Ok(()), + Scalar { + kind: Uint, + width: 8, + } => Ok(()), _ => Err(Error::BadTextureSampleType { span, scalar }), } } diff --git a/naga/src/front/wgsl/to_wgsl.rs b/naga/src/front/wgsl/to_wgsl.rs index 4d401b0708..7d4c17f5f0 100644 --- a/naga/src/front/wgsl/to_wgsl.rs +++ b/naga/src/front/wgsl/to_wgsl.rs @@ -178,6 +178,7 @@ impl crate::StorageFormat { Sf::Rgb10a2Uint => "rgb10a2uint", Sf::Rgb10a2Unorm => "rgb10a2unorm", Sf::Rg11b10Ufloat => "rg11b10float", + Sf::R64Uint => "r64uint", Sf::Rg32Uint => "rg32uint", Sf::Rg32Sint => "rg32sint", Sf::Rg32Float => "rg32float", diff --git a/naga/src/lib.rs b/naga/src/lib.rs index d9873bfedd..ddf78f1b68 100644 --- a/naga/src/lib.rs +++ b/naga/src/lib.rs @@ -597,6 +597,8 @@ bitflags::bitflags! { const LOAD = 0x1; /// Storage can be used as a target for store ops. const STORE = 0x2; + /// Storage can be used as a target for atomic ops. + const ATOMIC = 0x4; } } @@ -640,6 +642,7 @@ pub enum StorageFormat { Rg11b10Ufloat, // 64-bit formats + R64Uint, Rg32Uint, Rg32Sint, Rg32Float, @@ -2017,6 +2020,49 @@ pub enum Statement { /// [`SHADER_INT64_ATOMIC_ALL_OPS`]: crate::valid::Capabilities::SHADER_INT64_ATOMIC_ALL_OPS result: Option>, }, + /// Performs an atomic operation on a texel value of an image. + /// + /// Doing atomics on images with mipmaps is not supported, so there is no + /// `level` operand. + ImageAtomic { + /// The image to perform an atomic operation on. This must have type + /// [`Image`]. (This will necessarily be a [`GlobalVariable`] or + /// [`FunctionArgument`] expression, since no other expressions are + /// allowed to have that type.) + /// + /// [`Image`]: TypeInner::Image + /// [`GlobalVariable`]: Expression::GlobalVariable + /// [`FunctionArgument`]: Expression::FunctionArgument + image: Handle, + + /// The coordinate of the texel we wish to load. This must be a scalar + /// for [`D1`] images, a [`Bi`] vector for [`D2`] images, and a [`Tri`] + /// vector for [`D3`] images. (Array indices, sample indices, and + /// explicit level-of-detail values are supplied separately.) Its + /// component type must be [`Sint`]. + /// + /// [`D1`]: ImageDimension::D1 + /// [`D2`]: ImageDimension::D2 + /// [`D3`]: ImageDimension::D3 + /// [`Bi`]: VectorSize::Bi + /// [`Tri`]: VectorSize::Tri + /// [`Sint`]: ScalarKind::Sint + coordinate: Handle, + + /// The index into an arrayed image. If the [`arrayed`] flag in + /// `image`'s type is `true`, then this must be `Some(expr)`, where + /// `expr` is a [`Sint`] scalar. Otherwise, it must be `None`. + /// + /// [`arrayed`]: TypeInner::Image::arrayed + /// [`Sint`]: ScalarKind::Sint + array_index: Option>, + + /// The kind of atomic operation to perform on the texel. + fun: AtomicFunction, + + /// The value with which to perform the atomic operation. + value: Handle, + }, /// Load uniformly from a uniform pointer in the workgroup address space. /// /// Corresponds to the [`workgroupUniformLoad`](https://www.w3.org/TR/WGSL/#workgroupUniformLoad-builtin) diff --git a/naga/src/proc/mod.rs b/naga/src/proc/mod.rs index 76698fd102..fafac8cb30 100644 --- a/naga/src/proc/mod.rs +++ b/naga/src/proc/mod.rs @@ -49,6 +49,7 @@ impl From for super::Scalar { Sf::Rgb10a2Uint => Sk::Uint, Sf::Rgb10a2Unorm => Sk::Float, Sf::Rg11b10Ufloat => Sk::Float, + Sf::R64Uint => Sk::Uint, Sf::Rg32Uint => Sk::Uint, Sf::Rg32Sint => Sk::Sint, Sf::Rg32Float => Sk::Float, @@ -65,7 +66,11 @@ impl From for super::Scalar { Sf::Rgba16Unorm => Sk::Float, Sf::Rgba16Snorm => Sk::Float, }; - super::Scalar { kind, width: 4 } + let width = match format { + Sf::R64Uint => 8, + _ => 4, + }; + super::Scalar { kind, width } } } diff --git a/naga/src/proc/terminator.rs b/naga/src/proc/terminator.rs index 5edf55cb73..19c37294ec 100644 --- a/naga/src/proc/terminator.rs +++ b/naga/src/proc/terminator.rs @@ -36,6 +36,7 @@ pub fn ensure_block_returns(block: &mut crate::Block) { | S::Call { .. } | S::RayQuery { .. } | S::Atomic { .. } + | S::ImageAtomic { .. } | S::WorkGroupUniformLoad { .. } | S::SubgroupBallot { .. } | S::SubgroupCollectiveOperation { .. } diff --git a/naga/src/valid/analyzer.rs b/naga/src/valid/analyzer.rs index 4b207d0274..8417bf77be 100644 --- a/naga/src/valid/analyzer.rs +++ b/naga/src/valid/analyzer.rs @@ -131,6 +131,8 @@ bitflags::bitflags! { const WRITE = 0x2; /// The information about the data is queried. const QUERY = 0x4; + /// Atomic operations will be performed on the variable. + const ATOMIC = 0x8; } } @@ -1061,6 +1063,21 @@ impl FunctionInfo { } FunctionUniformity::new() } + S::ImageAtomic { + image, + coordinate, + array_index, + fun: _, + value, + } => { + let _ = self.add_ref_impl(image, GlobalUse::ATOMIC); + let _ = self.add_ref(coordinate); + if let Some(expr) = array_index { + let _ = self.add_ref(expr); + } + let _ = self.add_ref(value); + FunctionUniformity::new() + } S::RayQuery { query, ref fun } => { let _ = self.add_ref(query); if let crate::RayQueryFunction::Initialize { diff --git a/naga/src/valid/expression.rs b/naga/src/valid/expression.rs index 5f3c0a819c..9ef3a9edfb 100644 --- a/naga/src/valid/expression.rs +++ b/naga/src/valid/expression.rs @@ -79,6 +79,10 @@ pub enum ExpressionError { ExpectedSamplerType(Handle), #[error("Unable to operate on image class {0:?}")] InvalidImageClass(crate::ImageClass), + #[error("Image atomics are not supported for storage format {0:?}")] + InvalidImageFormat(crate::StorageFormat), + #[error("Image atomics require atomic storage access, {0:?} is insufficient")] + InvalidImageStorageAccess(crate::StorageAccess), #[error("Derivatives can only be taken from scalar and vector floats")] InvalidDerivative, #[error("Image array index parameter is misplaced")] @@ -258,7 +262,7 @@ impl super::Validator { | Ti::Array { .. } | Ti::Pointer { .. } | Ti::ValuePointer { size: Some(_), .. } - | Ti::BindingArray { .. } => false, + | Ti::BindingArray { .. } => {} ref other => { log::error!("Indexing of {:?}", other); return Err(ExpressionError::InvalidBaseType(base)); diff --git a/naga/src/valid/function.rs b/naga/src/valid/function.rs index b7b96a6c7d..a910be992c 100644 --- a/naga/src/valid/function.rs +++ b/naga/src/valid/function.rs @@ -148,6 +148,12 @@ pub enum FunctionError { }, #[error("Image store parameters are invalid")] InvalidImageStore(#[source] ExpressionError), + #[error("Image atomic parameters are invalid")] + InvalidImageAtomic(#[source] ExpressionError), + #[error("Image atomic function is invalid")] + InvalidImageAtomicFunction(crate::AtomicFunction), + #[error("Image atomic value is invalid")] + InvalidImageAtomicValue(Handle), #[error("Call to {function:?} is invalid")] InvalidCall { function: Handle, @@ -1187,6 +1193,189 @@ impl super::Validator { } => { self.validate_atomic(pointer, fun, value, result, span, context)?; } + S::ImageAtomic { + image, + coordinate, + array_index, + fun, + value, + } => { + let var = match *context.get_expression(image) { + crate::Expression::GlobalVariable(var_handle) => { + &context.global_vars[var_handle] + } + // We're looking at a binding index situation, so punch through the index and look at the global behind it. + crate::Expression::Access { base, .. } + | crate::Expression::AccessIndex { base, .. } => { + match *context.get_expression(base) { + crate::Expression::GlobalVariable(var_handle) => { + &context.global_vars[var_handle] + } + _ => { + return Err(FunctionError::InvalidImageAtomic( + ExpressionError::ExpectedGlobalVariable, + ) + .with_span_handle(image, context.expressions)) + } + } + } + _ => { + return Err(FunctionError::InvalidImageAtomic( + ExpressionError::ExpectedGlobalVariable, + ) + .with_span_handle(image, context.expressions)) + } + }; + + // Punch through a binding array to get the underlying type + let global_ty = match context.types[var.ty].inner { + Ti::BindingArray { base, .. } => &context.types[base].inner, + ref inner => inner, + }; + + let value_ty = match *global_ty { + Ti::Image { + class, + arrayed, + dim, + } => { + match context + .resolve_type(coordinate, &self.valid_expression_set)? + .image_storage_coordinates() + { + Some(coord_dim) if coord_dim == dim => {} + _ => { + return Err(FunctionError::InvalidImageAtomic( + ExpressionError::InvalidImageCoordinateType( + dim, coordinate, + ), + ) + .with_span_handle(coordinate, context.expressions)); + } + }; + if arrayed != array_index.is_some() { + return Err(FunctionError::InvalidImageAtomic( + ExpressionError::InvalidImageArrayIndex, + ) + .with_span_handle(coordinate, context.expressions)); + } + if let Some(expr) = array_index { + match *context.resolve_type(expr, &self.valid_expression_set)? { + Ti::Scalar(crate::Scalar { + kind: crate::ScalarKind::Sint | crate::ScalarKind::Uint, + width: _, + }) => {} + _ => { + return Err(FunctionError::InvalidImageAtomic( + ExpressionError::InvalidImageArrayIndexType(expr), + ) + .with_span_handle(expr, context.expressions)); + } + } + } + match class { + crate::ImageClass::Storage { format, access } => { + if !access.contains(crate::StorageAccess::ATOMIC) { + return Err(FunctionError::InvalidImageAtomic( + ExpressionError::InvalidImageStorageAccess(access), + ) + .with_span_handle(image, context.expressions)); + } + match format { + crate::StorageFormat::R64Uint => { + if !self.capabilities.intersects( + super::Capabilities::TEXTURE_INT64_ATOMIC, + ) { + return Err(FunctionError::MissingCapability( + super::Capabilities::TEXTURE_INT64_ATOMIC, + ) + .with_span_static( + span, + "missing capability for this operation", + )); + } + match fun { + crate::AtomicFunction::Min + | crate::AtomicFunction::Max => {} + _ => { + return Err( + FunctionError::InvalidImageAtomicFunction( + fun, + ) + .with_span_handle( + image, + context.expressions, + ), + ); + } + } + } + crate::StorageFormat::R32Sint + | crate::StorageFormat::R32Uint => { + if !self + .capabilities + .intersects(super::Capabilities::TEXTURE_ATOMIC) + { + return Err(FunctionError::MissingCapability( + super::Capabilities::TEXTURE_ATOMIC, + ) + .with_span_static( + span, + "missing capability for this operation", + )); + } + match fun { + crate::AtomicFunction::Add + | crate::AtomicFunction::And + | crate::AtomicFunction::ExclusiveOr + | crate::AtomicFunction::InclusiveOr + | crate::AtomicFunction::Min + | crate::AtomicFunction::Max => {} + _ => { + return Err( + FunctionError::InvalidImageAtomicFunction( + fun, + ) + .with_span_handle( + image, + context.expressions, + ), + ); + } + } + } + _ => { + return Err(FunctionError::InvalidImageAtomic( + ExpressionError::InvalidImageFormat(format), + ) + .with_span_handle(image, context.expressions)); + } + } + crate::TypeInner::Scalar(format.into()) + } + _ => { + return Err(FunctionError::InvalidImageAtomic( + ExpressionError::InvalidImageClass(class), + ) + .with_span_handle(image, context.expressions)); + } + } + } + _ => { + return Err(FunctionError::InvalidImageAtomic( + ExpressionError::ExpectedImageType(var.ty), + ) + .with_span() + .with_handle(var.ty, context.types) + .with_handle(image, context.expressions)) + } + }; + + if *context.resolve_type(value, &self.valid_expression_set)? != value_ty { + return Err(FunctionError::InvalidImageAtomicValue(value) + .with_span_handle(value, context.expressions)); + } + } S::WorkGroupUniformLoad { pointer, result } => { stages &= super::ShaderStages::COMPUTE; let pointer_inner = diff --git a/naga/src/valid/handles.rs b/naga/src/valid/handles.rs index 44ff80a333..260d442c79 100644 --- a/naga/src/valid/handles.rs +++ b/naga/src/valid/handles.rs @@ -664,6 +664,19 @@ impl super::Validator { } Ok(()) } + crate::Statement::ImageAtomic { + image, + coordinate, + array_index, + fun: _, + value, + } => { + validate_expr(image)?; + validate_expr(coordinate)?; + validate_expr_opt(array_index)?; + validate_expr(value)?; + Ok(()) + } crate::Statement::WorkGroupUniformLoad { pointer, result } => { validate_expr(pointer)?; validate_expr(result)?; diff --git a/naga/src/valid/interface.rs b/naga/src/valid/interface.rs index 335826d12c..08bdda0329 100644 --- a/naga/src/valid/interface.rs +++ b/naga/src/valid/interface.rs @@ -129,6 +129,9 @@ fn storage_usage(access: crate::StorageAccess) -> GlobalUse { if access.contains(crate::StorageAccess::STORE) { storage_usage |= GlobalUse::WRITE; } + if access.contains(crate::StorageAccess::ATOMIC) { + storage_usage |= GlobalUse::ATOMIC; + } storage_usage } @@ -758,7 +761,9 @@ impl super::Validator { } => storage_usage(access), _ => GlobalUse::READ | GlobalUse::QUERY, }, - crate::AddressSpace::Private | crate::AddressSpace::WorkGroup => GlobalUse::all(), + crate::AddressSpace::Private | crate::AddressSpace::WorkGroup => { + GlobalUse::READ | GlobalUse::WRITE | GlobalUse::QUERY + } crate::AddressSpace::PushConstant => GlobalUse::READ, }; if !allowed_usage.contains(usage) { diff --git a/naga/src/valid/mod.rs b/naga/src/valid/mod.rs index 828c784a7a..906d449362 100644 --- a/naga/src/valid/mod.rs +++ b/naga/src/valid/mod.rs @@ -152,6 +152,10 @@ bitflags::bitflags! { /// [`AtomicFunction::Exchange { compare: None }`]: crate::AtomicFunction::Exchange /// [`Storage`]: crate::AddressSpace::Storage const SHADER_FLOAT32_ATOMIC = 1 << 21; + /// Support for atomic operations on images. + const TEXTURE_ATOMIC = 1 << 22; + /// Support for atomic operations on 64-bit images. + const TEXTURE_INT64_ATOMIC = 1 << 23; } } diff --git a/naga/tests/in/atomicTexture-int64.param.ron b/naga/tests/in/atomicTexture-int64.param.ron new file mode 100644 index 0000000000..ffc7fb4cb7 --- /dev/null +++ b/naga/tests/in/atomicTexture-int64.param.ron @@ -0,0 +1,24 @@ +( + god_mode: true, + spv: ( + version: (1, 0), + capabilities: [ Int64, Int64ImageEXT, Int64Atomics ], + ), + hlsl: ( + shader_model: V6_6, + binding_map: {}, + fake_missing_bindings: true, + special_constants_binding: Some((space: 1, register: 0)), + push_constants_target: Some((space: 0, register: 0)), + zero_initialize_workgroup_memory: true, + restrict_indexing: true + ), + msl: ( + lang_version: (3, 1), + per_entry_point_map: {}, + inline_samplers: [], + spirv_cross_compatibility: false, + fake_missing_bindings: true, + zero_initialize_workgroup_memory: true, + ), +) diff --git a/naga/tests/in/atomicTexture-int64.wgsl b/naga/tests/in/atomicTexture-int64.wgsl new file mode 100644 index 0000000000..d8bf298ba9 --- /dev/null +++ b/naga/tests/in/atomicTexture-int64.wgsl @@ -0,0 +1,12 @@ +@group(0) @binding(0) +var image: texture_storage_2d; + +@compute +@workgroup_size(2) +fn cs_main(@builtin(local_invocation_id) id: vec3) { + textureAtomicMax(image, vec2(0, 0), 1lu); + + workgroupBarrier(); + + textureAtomicMin(image, vec2(0, 0), 1lu); +} diff --git a/naga/tests/in/atomicTexture.param.ron b/naga/tests/in/atomicTexture.param.ron new file mode 100644 index 0000000000..34f638160d --- /dev/null +++ b/naga/tests/in/atomicTexture.param.ron @@ -0,0 +1,30 @@ +( + god_mode: true, + spv: ( + version: (1, 0), + capabilities: [], + ), + hlsl: ( + shader_model: V5_1, + binding_map: {}, + fake_missing_bindings: true, + special_constants_binding: Some((space: 1, register: 0)), + push_constants_target: Some((space: 0, register: 0)), + zero_initialize_workgroup_memory: true, + restrict_indexing: true + ), + msl: ( + lang_version: (3, 1), + per_entry_point_map: {}, + inline_samplers: [], + spirv_cross_compatibility: false, + fake_missing_bindings: true, + zero_initialize_workgroup_memory: true, + ), + glsl: ( + version: Desktop(420), + writer_flags: (""), + binding_map: {}, + zero_initialize_workgroup_memory: true, + ), +) diff --git a/naga/tests/in/atomicTexture.wgsl b/naga/tests/in/atomicTexture.wgsl new file mode 100644 index 0000000000..40859ff04c --- /dev/null +++ b/naga/tests/in/atomicTexture.wgsl @@ -0,0 +1,22 @@ +@group(0) @binding(0) +var image_u: texture_storage_2d; +@group(0) @binding(1) +var image_s: texture_storage_2d; + +@compute +@workgroup_size(2) +fn cs_main(@builtin(local_invocation_id) id: vec3) { + textureAtomicMax(image_u, vec2(0, 0), 1u); + textureAtomicMin(image_u, vec2(0, 0), 1u); + textureAtomicAdd(image_u, vec2(0, 0), 1u); + textureAtomicAnd(image_u, vec2(0, 0), 1u); + textureAtomicOr(image_u, vec2(0, 0), 1u); + textureAtomicXor(image_u, vec2(0, 0), 1u); + + textureAtomicMax(image_s, vec2(0, 0), 1i); + textureAtomicMin(image_s, vec2(0, 0), 1i); + textureAtomicAdd(image_s, vec2(0, 0), 1i); + textureAtomicAnd(image_s, vec2(0, 0), 1i); + textureAtomicOr(image_s, vec2(0, 0), 1i); + textureAtomicXor(image_s, vec2(0, 0), 1i); +} diff --git a/naga/tests/in/overrides.wgsl b/naga/tests/in/overrides.wgsl index a746ce1c76..7603e881f7 100644 --- a/naga/tests/in/overrides.wgsl +++ b/naga/tests/in/overrides.wgsl @@ -13,6 +13,8 @@ override inferred_f32 = 2.718; +override auto_conversion: u32 = 0; + var gain_x_10: f32 = gain * 10.; var store_override: f32; diff --git a/naga/tests/in/ray-query.param.ron b/naga/tests/in/ray-query.param.ron index c400db8c64..481d311fa4 100644 --- a/naga/tests/in/ray-query.param.ron +++ b/naga/tests/in/ray-query.param.ron @@ -11,4 +11,11 @@ per_entry_point_map: {}, inline_samplers: [], ), + hlsl: ( + shader_model: V6_5, + binding_map: {}, + fake_missing_bindings: true, + special_constants_binding: None, + zero_initialize_workgroup_memory: true, + ) ) diff --git a/naga/tests/out/analysis/overrides.info.ron b/naga/tests/out/analysis/overrides.info.ron index 835525e52d..fcc8a9cf1b 100644 --- a/naga/tests/out/analysis/overrides.info.ron +++ b/naga/tests/out/analysis/overrides.info.ron @@ -2,6 +2,7 @@ type_flags: [ ("DATA | SIZED | COPY | CREATION_RESOLVED | ARGUMENT | CONSTRUCTIBLE"), ("DATA | SIZED | COPY | IO_SHAREABLE | HOST_SHAREABLE | CREATION_RESOLVED | ARGUMENT | CONSTRUCTIBLE"), + ("DATA | SIZED | COPY | IO_SHAREABLE | HOST_SHAREABLE | CREATION_RESOLVED | ARGUMENT | CONSTRUCTIBLE"), ], functions: [], entry_points: [ @@ -189,6 +190,10 @@ kind: Float, width: 4, ))), + Value(Scalar(( + kind: Uint, + width: 4, + ))), Handle(1), Value(Scalar(( kind: Float, diff --git a/naga/tests/out/glsl/atomicTexture.cs_main.Compute.glsl b/naga/tests/out/glsl/atomicTexture.cs_main.Compute.glsl new file mode 100644 index 0000000000..4defe425f9 --- /dev/null +++ b/naga/tests/out/glsl/atomicTexture.cs_main.Compute.glsl @@ -0,0 +1,27 @@ +#version 420 core +#extension GL_ARB_compute_shader : require +#extension GL_OES_shader_image_atomic : require +layout(local_size_x = 2, local_size_y = 1, local_size_z = 1) in; + +layout(r32ui) uniform uimage2D _group_0_binding_0_cs; + +layout(r32i) uniform iimage2D _group_0_binding_1_cs; + + +void main() { + uvec3 id = gl_LocalInvocationID; + imageAtomicMax(_group_0_binding_0_cs, ivec2(0, 0), 1u); + imageAtomicMin(_group_0_binding_0_cs, ivec2(0, 0), 1u); + imageAtomicAdd(_group_0_binding_0_cs, ivec2(0, 0), 1u); + imageAtomicAnd(_group_0_binding_0_cs, ivec2(0, 0), 1u); + imageAtomicOr(_group_0_binding_0_cs, ivec2(0, 0), 1u); + imageAtomicXor(_group_0_binding_0_cs, ivec2(0, 0), 1u); + imageAtomicMax(_group_0_binding_1_cs, ivec2(0, 0), 1); + imageAtomicMin(_group_0_binding_1_cs, ivec2(0, 0), 1); + imageAtomicAdd(_group_0_binding_1_cs, ivec2(0, 0), 1); + imageAtomicAnd(_group_0_binding_1_cs, ivec2(0, 0), 1); + imageAtomicOr(_group_0_binding_1_cs, ivec2(0, 0), 1); + imageAtomicXor(_group_0_binding_1_cs, ivec2(0, 0), 1); + return; +} + diff --git a/naga/tests/out/glsl/overrides.main.Compute.glsl b/naga/tests/out/glsl/overrides.main.Compute.glsl index d1170df962..684e7cca58 100644 --- a/naga/tests/out/glsl/overrides.main.Compute.glsl +++ b/naga/tests/out/glsl/overrides.main.Compute.glsl @@ -12,6 +12,7 @@ const float width = 0.0; const float depth = 2.3; const float height = 4.6; const float inferred_f32_ = 2.718; +const uint auto_conversion = 0u; float gain_x_10_ = 11.0; diff --git a/naga/tests/out/hlsl/atomicTexture-int64.hlsl b/naga/tests/out/hlsl/atomicTexture-int64.hlsl new file mode 100644 index 0000000000..056489c790 --- /dev/null +++ b/naga/tests/out/hlsl/atomicTexture-int64.hlsl @@ -0,0 +1,17 @@ +struct NagaConstants { + int first_vertex; + int first_instance; + uint other; +}; +ConstantBuffer _NagaConstants: register(b0, space1); + +RWTexture2D image : register(u0); + +[numthreads(2, 1, 1)] +void cs_main(uint3 id : SV_GroupThreadID) +{ + InterlockedMax(image[int2(0, 0)],1uL); + GroupMemoryBarrierWithGroupSync(); + InterlockedMin(image[int2(0, 0)],1uL); + return; +} diff --git a/naga/tests/out/hlsl/atomicTexture-int64.ron b/naga/tests/out/hlsl/atomicTexture-int64.ron new file mode 100644 index 0000000000..67a9035512 --- /dev/null +++ b/naga/tests/out/hlsl/atomicTexture-int64.ron @@ -0,0 +1,12 @@ +( + vertex:[ + ], + fragment:[ + ], + compute:[ + ( + entry_point:"cs_main", + target_profile:"cs_6_6", + ), + ], +) diff --git a/naga/tests/out/hlsl/atomicTexture.hlsl b/naga/tests/out/hlsl/atomicTexture.hlsl new file mode 100644 index 0000000000..241cdab678 --- /dev/null +++ b/naga/tests/out/hlsl/atomicTexture.hlsl @@ -0,0 +1,27 @@ +struct NagaConstants { + int first_vertex; + int first_instance; + uint other; +}; +ConstantBuffer _NagaConstants: register(b0, space1); + +RWTexture2D image_u : register(u0); +RWTexture2D image_s : register(u1); + +[numthreads(2, 1, 1)] +void cs_main(uint3 id : SV_GroupThreadID) +{ + InterlockedMax(image_u[int2(0, 0)],1u); + InterlockedMin(image_u[int2(0, 0)],1u); + InterlockedAdd(image_u[int2(0, 0)],1u); + InterlockedAnd(image_u[int2(0, 0)],1u); + InterlockedOr(image_u[int2(0, 0)],1u); + InterlockedXor(image_u[int2(0, 0)],1u); + InterlockedMax(image_s[int2(0, 0)],1); + InterlockedMin(image_s[int2(0, 0)],1); + InterlockedAdd(image_s[int2(0, 0)],1); + InterlockedAnd(image_s[int2(0, 0)],1); + InterlockedOr(image_s[int2(0, 0)],1); + InterlockedXor(image_s[int2(0, 0)],1); + return; +} diff --git a/naga/tests/out/hlsl/atomicTexture.ron b/naga/tests/out/hlsl/atomicTexture.ron new file mode 100644 index 0000000000..5c261e59b2 --- /dev/null +++ b/naga/tests/out/hlsl/atomicTexture.ron @@ -0,0 +1,12 @@ +( + vertex:[ + ], + fragment:[ + ], + compute:[ + ( + entry_point:"cs_main", + target_profile:"cs_5_1", + ), + ], +) diff --git a/naga/tests/out/hlsl/overrides.hlsl b/naga/tests/out/hlsl/overrides.hlsl index aae0b491bf..5cb0e75687 100644 --- a/naga/tests/out/hlsl/overrides.hlsl +++ b/naga/tests/out/hlsl/overrides.hlsl @@ -5,6 +5,7 @@ static const float width = 0.0; static const float depth = 2.3; static const float height = 4.6; static const float inferred_f32_ = 2.718; +static const uint auto_conversion = 0u; static float gain_x_10_ = 11.0; static float store_override = (float)0; diff --git a/naga/tests/out/hlsl/ray-query.hlsl b/naga/tests/out/hlsl/ray-query.hlsl new file mode 100644 index 0000000000..9a0a2da1ce --- /dev/null +++ b/naga/tests/out/hlsl/ray-query.hlsl @@ -0,0 +1,152 @@ +struct RayIntersection { + uint kind; + float t; + uint instance_custom_index; + uint instance_id; + uint sbt_record_offset; + uint geometry_index; + uint primitive_index; + float2 barycentrics; + bool front_face; + int _pad9_0; + int _pad9_1; + row_major float4x3 object_to_world; + int _pad10_0; + row_major float4x3 world_to_object; + int _end_pad_0; +}; + +struct RayDesc_ { + uint flags; + uint cull_mask; + float tmin; + float tmax; + float3 origin; + int _pad5_0; + float3 dir; + int _end_pad_0; +}; + +struct Output { + uint visible; + int _pad1_0; + int _pad1_1; + int _pad1_2; + float3 normal; + int _end_pad_0; +}; + +RayDesc RayDescFromRayDesc_(RayDesc_ arg0) { + RayDesc ret = (RayDesc)0; + ret.Origin = arg0.origin; + ret.TMin = arg0.tmin; + ret.Direction = arg0.dir; + ret.TMax = arg0.tmax; + return ret; +} + +RaytracingAccelerationStructure acc_struct : register(t0); +RWByteAddressBuffer output : register(u1); + +RayDesc_ ConstructRayDesc_(uint arg0, uint arg1, float arg2, float arg3, float3 arg4, float3 arg5) { + RayDesc_ ret = (RayDesc_)0; + ret.flags = arg0; + ret.cull_mask = arg1; + ret.tmin = arg2; + ret.tmax = arg3; + ret.origin = arg4; + ret.dir = arg5; + return ret; +} + +RayIntersection GetCommittedIntersection(RayQuery rq) { + RayIntersection ret = (RayIntersection)0; + ret.kind = rq.CommittedStatus(); + if( rq.CommittedStatus() == COMMITTED_NOTHING) {} else { + ret.t = rq.CommittedRayT(); + ret.instance_custom_index = rq.CommittedInstanceID(); + ret.instance_id = rq.CommittedInstanceIndex(); + ret.sbt_record_offset = rq.CommittedInstanceContributionToHitGroupIndex(); + ret.geometry_index = rq.CommittedGeometryIndex(); + ret.primitive_index = rq.CommittedPrimitiveIndex(); + if( rq.CommittedStatus() == COMMITTED_TRIANGLE_HIT ) { + ret.barycentrics = rq.CommittedTriangleBarycentrics(); + ret.front_face = rq.CommittedTriangleFrontFace(); + } + ret.object_to_world = rq.CommittedObjectToWorld4x3(); + ret.world_to_object = rq.CommittedWorldToObject4x3(); + } + return ret; +} + +RayIntersection query_loop(float3 pos, float3 dir, RaytracingAccelerationStructure acs) +{ + RayQuery rq_1; + + rq_1.TraceRayInline(acs, ConstructRayDesc_(4u, 255u, 0.1, 100.0, pos, dir).flags, ConstructRayDesc_(4u, 255u, 0.1, 100.0, pos, dir).cull_mask, RayDescFromRayDesc_(ConstructRayDesc_(4u, 255u, 0.1, 100.0, pos, dir))); + while(true) { + const bool _e9 = rq_1.Proceed(); + if (_e9) { + } else { + break; + } + { + } + } + const RayIntersection rayintersection = GetCommittedIntersection(rq_1); + return rayintersection; +} + +float3 get_torus_normal(float3 world_point, RayIntersection intersection) +{ + float3 local_point = mul(float4(world_point, 1.0), intersection.world_to_object); + float2 point_on_guiding_line = (normalize(local_point.xy) * 2.4); + float3 world_point_on_guiding_line = mul(float4(point_on_guiding_line, 0.0, 1.0), intersection.object_to_world); + return normalize((world_point - world_point_on_guiding_line)); +} + +[numthreads(1, 1, 1)] +void main() +{ + float3 pos_1 = (0.0).xxx; + float3 dir_1 = float3(0.0, 1.0, 0.0); + const RayIntersection _e7 = query_loop(pos_1, dir_1, acc_struct); + output.Store(0, asuint(uint((_e7.kind == 0u)))); + const float3 _e18 = get_torus_normal((dir_1 * _e7.t), _e7); + output.Store3(16, asuint(_e18)); + return; +} + +RayIntersection GetCandidateIntersection(RayQuery rq) { + RayIntersection ret = (RayIntersection)0; + CANDIDATE_TYPE kind = rq.CandidateType(); + if (kind == CANDIDATE_NON_OPAQUE_TRIANGLE) { + ret.kind = 1; + ret.t = rq.CandidateTriangleRayT(); + ret.barycentrics = rq.CandidateTriangleBarycentrics(); + ret.front_face = rq.CandidateTriangleFrontFace(); + } else { + ret.kind = 3; + } + ret.instance_custom_index = rq.CandidateInstanceID(); + ret.instance_id = rq.CandidateInstanceIndex(); + ret.sbt_record_offset = rq.CandidateInstanceContributionToHitGroupIndex(); + ret.geometry_index = rq.CandidateGeometryIndex(); + ret.primitive_index = rq.CandidatePrimitiveIndex(); + ret.object_to_world = rq.CandidateObjectToWorld4x3(); + ret.world_to_object = rq.CandidateWorldToObject4x3(); + return ret; +} + +[numthreads(1, 1, 1)] +void main_candidate() +{ + RayQuery rq; + + float3 pos_2 = (0.0).xxx; + float3 dir_2 = float3(0.0, 1.0, 0.0); + rq.TraceRayInline(acc_struct, ConstructRayDesc_(4u, 255u, 0.1, 100.0, pos_2, dir_2).flags, ConstructRayDesc_(4u, 255u, 0.1, 100.0, pos_2, dir_2).cull_mask, RayDescFromRayDesc_(ConstructRayDesc_(4u, 255u, 0.1, 100.0, pos_2, dir_2))); + RayIntersection intersection_1 = GetCandidateIntersection(rq); + output.Store(0, asuint(uint((intersection_1.kind == 3u)))); + return; +} diff --git a/naga/tests/out/hlsl/ray-query.ron b/naga/tests/out/hlsl/ray-query.ron new file mode 100644 index 0000000000..a31e1db125 --- /dev/null +++ b/naga/tests/out/hlsl/ray-query.ron @@ -0,0 +1,16 @@ +( + vertex:[ + ], + fragment:[ + ], + compute:[ + ( + entry_point:"main", + target_profile:"cs_6_5", + ), + ( + entry_point:"main_candidate", + target_profile:"cs_6_5", + ), + ], +) diff --git a/naga/tests/out/ir/overrides.compact.ron b/naga/tests/out/ir/overrides.compact.ron index d99beb19c6..00c57fa434 100644 --- a/naga/tests/out/ir/overrides.compact.ron +++ b/naga/tests/out/ir/overrides.compact.ron @@ -14,6 +14,13 @@ width: 4, )), ), + ( + name: None, + inner: Scalar(( + kind: Uint, + width: 4, + )), + ), ], special_types: ( ray_desc: None, @@ -64,6 +71,12 @@ ty: 1, init: Some(6), ), + ( + name: Some("auto_conversion"), + id: None, + ty: 2, + init: Some(7), + ), ], global_variables: [ ( @@ -71,7 +84,7 @@ space: Private, binding: None, ty: 1, - init: Some(9), + init: Some(10), ), ( name: Some("store_override"), @@ -93,12 +106,13 @@ right: 3, ), Literal(F32(2.718)), + Literal(U32(0)), Override(2), Literal(F32(10.0)), Binary( op: Multiply, - left: 7, - right: 8, + left: 8, + right: 9, ), ], functions: [], diff --git a/naga/tests/out/ir/overrides.ron b/naga/tests/out/ir/overrides.ron index d99beb19c6..00c57fa434 100644 --- a/naga/tests/out/ir/overrides.ron +++ b/naga/tests/out/ir/overrides.ron @@ -14,6 +14,13 @@ width: 4, )), ), + ( + name: None, + inner: Scalar(( + kind: Uint, + width: 4, + )), + ), ], special_types: ( ray_desc: None, @@ -64,6 +71,12 @@ ty: 1, init: Some(6), ), + ( + name: Some("auto_conversion"), + id: None, + ty: 2, + init: Some(7), + ), ], global_variables: [ ( @@ -71,7 +84,7 @@ space: Private, binding: None, ty: 1, - init: Some(9), + init: Some(10), ), ( name: Some("store_override"), @@ -93,12 +106,13 @@ right: 3, ), Literal(F32(2.718)), + Literal(U32(0)), Override(2), Literal(F32(10.0)), Binary( op: Multiply, - left: 7, - right: 8, + left: 8, + right: 9, ), ], functions: [], diff --git a/naga/tests/out/msl/atomicTexture-int64.msl b/naga/tests/out/msl/atomicTexture-int64.msl new file mode 100644 index 0000000000..c00d8b7654 --- /dev/null +++ b/naga/tests/out/msl/atomicTexture-int64.msl @@ -0,0 +1,18 @@ +// language: metal3.1 +#include +#include + +using metal::uint; + + +struct cs_mainInput { +}; +kernel void cs_main( + metal::uint3 id [[thread_position_in_threadgroup]] +, metal::texture2d image [[user(fake0)]] +) { + image.atomic_max(metal::uint2(metal::int2(0, 0)), 1uL); + metal::threadgroup_barrier(metal::mem_flags::mem_threadgroup); + image.atomic_min(metal::uint2(metal::int2(0, 0)), 1uL); + return; +} diff --git a/naga/tests/out/msl/atomicTexture.msl b/naga/tests/out/msl/atomicTexture.msl new file mode 100644 index 0000000000..2cb17a081e --- /dev/null +++ b/naga/tests/out/msl/atomicTexture.msl @@ -0,0 +1,28 @@ +// language: metal3.1 +#include +#include + +using metal::uint; + + +struct cs_mainInput { +}; +kernel void cs_main( + metal::uint3 id [[thread_position_in_threadgroup]] +, metal::texture2d image_u [[user(fake0)]] +, metal::texture2d image_s [[user(fake0)]] +) { + image_u.atomic_fetch_max(metal::uint2(metal::int2(0, 0)), 1u); + image_u.atomic_fetch_min(metal::uint2(metal::int2(0, 0)), 1u); + image_u.atomic_fetch_add(metal::uint2(metal::int2(0, 0)), 1u); + image_u.atomic_fetch_and(metal::uint2(metal::int2(0, 0)), 1u); + image_u.atomic_fetch_or(metal::uint2(metal::int2(0, 0)), 1u); + image_u.atomic_fetch_xor(metal::uint2(metal::int2(0, 0)), 1u); + image_s.atomic_fetch_max(metal::uint2(metal::int2(0, 0)), 1); + image_s.atomic_fetch_min(metal::uint2(metal::int2(0, 0)), 1); + image_s.atomic_fetch_add(metal::uint2(metal::int2(0, 0)), 1); + image_s.atomic_fetch_and(metal::uint2(metal::int2(0, 0)), 1); + image_s.atomic_fetch_or(metal::uint2(metal::int2(0, 0)), 1); + image_s.atomic_fetch_xor(metal::uint2(metal::int2(0, 0)), 1); + return; +} diff --git a/naga/tests/out/msl/overrides.msl b/naga/tests/out/msl/overrides.msl index d3638dd4cd..02770d613d 100644 --- a/naga/tests/out/msl/overrides.msl +++ b/naga/tests/out/msl/overrides.msl @@ -11,6 +11,7 @@ constant float width = 0.0; constant float depth = 2.3; constant float height = 4.6; constant float inferred_f32_ = 2.718; +constant uint auto_conversion = 0u; kernel void main_( ) { diff --git a/naga/tests/out/spv/atomicTexture-int64.spvasm b/naga/tests/out/spv/atomicTexture-int64.spvasm new file mode 100644 index 0000000000..0238f44d7e --- /dev/null +++ b/naga/tests/out/spv/atomicTexture-int64.spvasm @@ -0,0 +1,49 @@ +; SPIR-V +; Version: 1.0 +; Generator: rspirv +; Bound: 31 +OpCapability Shader +OpCapability Int64ImageEXT +OpCapability Int64 +OpCapability Int64Atomics +OpExtension "SPV_EXT_shader_image_int64" +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint GLCompute %15 "cs_main" %12 +OpExecutionMode %15 LocalSize 2 1 1 +OpDecorate %9 DescriptorSet 0 +OpDecorate %9 Binding 0 +OpDecorate %12 BuiltIn LocalInvocationId +%2 = OpTypeVoid +%4 = OpTypeInt 64 0 +%3 = OpTypeImage %4 2D 0 0 0 2 R64ui +%6 = OpTypeInt 32 0 +%5 = OpTypeVector %6 3 +%8 = OpTypeInt 32 1 +%7 = OpTypeVector %8 2 +%10 = OpTypePointer UniformConstant %3 +%9 = OpVariable %10 UniformConstant +%13 = OpTypePointer Input %5 +%12 = OpVariable %13 Input +%16 = OpTypeFunction %2 +%18 = OpConstant %8 0 +%19 = OpConstantComposite %7 %18 %18 +%20 = OpConstant %4 1 +%22 = OpTypePointer Image %4 +%24 = OpConstant %6 0 +%26 = OpConstant %8 4 +%27 = OpConstant %6 2 +%28 = OpConstant %6 264 +%15 = OpFunction %2 None %16 +%11 = OpLabel +%14 = OpLoad %5 %12 +%17 = OpLoad %3 %9 +OpBranch %21 +%21 = OpLabel +%23 = OpImageTexelPointer %22 %9 %19 %24 +%25 = OpAtomicUMax %4 %23 %26 %24 %20 +OpControlBarrier %27 %27 %28 +%29 = OpImageTexelPointer %22 %9 %19 %24 +%30 = OpAtomicUMin %4 %29 %26 %24 %20 +OpReturn +OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/atomicTexture.spvasm b/naga/tests/out/spv/atomicTexture.spvasm new file mode 100644 index 0000000000..42eaa3d33f --- /dev/null +++ b/naga/tests/out/spv/atomicTexture.spvasm @@ -0,0 +1,69 @@ +; SPIR-V +; Version: 1.0 +; Generator: rspirv +; Bound: 54 +OpCapability Shader +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint GLCompute %17 "cs_main" %14 +OpExecutionMode %17 LocalSize 2 1 1 +OpDecorate %9 DescriptorSet 0 +OpDecorate %9 Binding 0 +OpDecorate %11 DescriptorSet 0 +OpDecorate %11 Binding 1 +OpDecorate %14 BuiltIn LocalInvocationId +%2 = OpTypeVoid +%4 = OpTypeInt 32 0 +%3 = OpTypeImage %4 2D 0 0 0 2 R32ui +%6 = OpTypeInt 32 1 +%5 = OpTypeImage %6 2D 0 0 0 2 R32i +%7 = OpTypeVector %4 3 +%8 = OpTypeVector %6 2 +%10 = OpTypePointer UniformConstant %3 +%9 = OpVariable %10 UniformConstant +%12 = OpTypePointer UniformConstant %5 +%11 = OpVariable %12 UniformConstant +%15 = OpTypePointer Input %7 +%14 = OpVariable %15 Input +%18 = OpTypeFunction %2 +%21 = OpConstant %6 0 +%22 = OpConstantComposite %8 %21 %21 +%23 = OpConstant %4 1 +%24 = OpConstant %6 1 +%26 = OpTypePointer Image %4 +%28 = OpConstant %4 0 +%30 = OpConstant %6 4 +%41 = OpTypePointer Image %6 +%17 = OpFunction %2 None %18 +%13 = OpLabel +%16 = OpLoad %7 %14 +%19 = OpLoad %3 %9 +%20 = OpLoad %5 %11 +OpBranch %25 +%25 = OpLabel +%27 = OpImageTexelPointer %26 %9 %22 %28 +%29 = OpAtomicUMax %4 %27 %30 %28 %23 +%31 = OpImageTexelPointer %26 %9 %22 %28 +%32 = OpAtomicUMin %4 %31 %30 %28 %23 +%33 = OpImageTexelPointer %26 %9 %22 %28 +%34 = OpAtomicIAdd %4 %33 %30 %28 %23 +%35 = OpImageTexelPointer %26 %9 %22 %28 +%36 = OpAtomicAnd %4 %35 %30 %28 %23 +%37 = OpImageTexelPointer %26 %9 %22 %28 +%38 = OpAtomicOr %4 %37 %30 %28 %23 +%39 = OpImageTexelPointer %26 %9 %22 %28 +%40 = OpAtomicXor %4 %39 %30 %28 %23 +%42 = OpImageTexelPointer %41 %11 %22 %28 +%43 = OpAtomicSMax %6 %42 %30 %28 %24 +%44 = OpImageTexelPointer %41 %11 %22 %28 +%45 = OpAtomicSMin %6 %44 %30 %28 %24 +%46 = OpImageTexelPointer %41 %11 %22 %28 +%47 = OpAtomicIAdd %6 %46 %30 %28 %24 +%48 = OpImageTexelPointer %41 %11 %22 %28 +%49 = OpAtomicAnd %6 %48 %30 %28 %24 +%50 = OpImageTexelPointer %41 %11 %22 %28 +%51 = OpAtomicOr %6 %50 %30 %28 %24 +%52 = OpImageTexelPointer %41 %11 %22 %28 +%53 = OpAtomicXor %6 %52 %30 %28 %24 +OpReturn +OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/overrides.main.spvasm b/naga/tests/out/spv/overrides.main.spvasm index 5c748a01b2..a685272479 100644 --- a/naga/tests/out/spv/overrides.main.spvasm +++ b/naga/tests/out/spv/overrides.main.spvasm @@ -1,46 +1,48 @@ ; SPIR-V ; Version: 1.0 ; Generator: rspirv -; Bound: 33 +; Bound: 35 OpCapability Shader %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 -OpEntryPoint GLCompute %20 "main" -OpExecutionMode %20 LocalSize 1 1 1 +OpEntryPoint GLCompute %22 "main" +OpExecutionMode %22 LocalSize 1 1 1 %2 = OpTypeVoid %3 = OpTypeBool %4 = OpTypeFloat 32 -%5 = OpConstantTrue %3 -%6 = OpConstant %4 2.3 -%7 = OpConstant %4 0.0 -%8 = OpConstantFalse %3 -%9 = OpConstant %4 1.1 -%10 = OpConstant %4 2.0 -%11 = OpConstant %4 4.6 -%12 = OpConstant %4 2.718 -%13 = OpConstant %4 10.0 -%14 = OpConstant %4 11.0 -%16 = OpTypePointer Private %4 -%15 = OpVariable %16 Private %14 -%18 = OpConstantNull %4 -%17 = OpVariable %16 Private %18 -%21 = OpTypeFunction %2 -%22 = OpConstant %4 23.0 -%24 = OpTypePointer Function %4 -%26 = OpTypePointer Function %3 -%27 = OpConstantNull %3 -%29 = OpConstantNull %4 -%20 = OpFunction %2 None %21 -%19 = OpLabel -%23 = OpVariable %24 Function %22 -%25 = OpVariable %26 Function %27 -%28 = OpVariable %24 Function %29 -OpBranch %30 -%30 = OpLabel -OpStore %25 %5 -%31 = OpLoad %4 %15 -%32 = OpFMul %4 %31 %13 -OpStore %28 %32 -OpStore %17 %9 +%5 = OpTypeInt 32 0 +%6 = OpConstantTrue %3 +%7 = OpConstant %4 2.3 +%8 = OpConstant %4 0.0 +%9 = OpConstantFalse %3 +%10 = OpConstant %4 1.1 +%11 = OpConstant %4 2.0 +%12 = OpConstant %4 4.6 +%13 = OpConstant %4 2.718 +%14 = OpConstant %5 0 +%15 = OpConstant %4 10.0 +%16 = OpConstant %4 11.0 +%18 = OpTypePointer Private %4 +%17 = OpVariable %18 Private %16 +%20 = OpConstantNull %4 +%19 = OpVariable %18 Private %20 +%23 = OpTypeFunction %2 +%24 = OpConstant %4 23.0 +%26 = OpTypePointer Function %4 +%28 = OpTypePointer Function %3 +%29 = OpConstantNull %3 +%31 = OpConstantNull %4 +%22 = OpFunction %2 None %23 +%21 = OpLabel +%25 = OpVariable %26 Function %24 +%27 = OpVariable %28 Function %29 +%30 = OpVariable %26 Function %31 +OpBranch %32 +%32 = OpLabel +OpStore %27 %6 +%33 = OpLoad %4 %17 +%34 = OpFMul %4 %33 %15 +OpStore %30 %34 +OpStore %19 %10 OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/wgsl/atomicTexture-int64.wgsl b/naga/tests/out/wgsl/atomicTexture-int64.wgsl new file mode 100644 index 0000000000..52bbe76771 --- /dev/null +++ b/naga/tests/out/wgsl/atomicTexture-int64.wgsl @@ -0,0 +1,10 @@ +@group(0) @binding(0) +var image: texture_storage_2d; + +@compute @workgroup_size(2, 1, 1) +fn cs_main(@builtin(local_invocation_id) id: vec3) { + textureAtomicMax(image, vec2(0i, 0i), 1lu); + workgroupBarrier(); + textureAtomicMin(image, vec2(0i, 0i), 1lu); + return; +} diff --git a/naga/tests/out/wgsl/atomicTexture.wgsl b/naga/tests/out/wgsl/atomicTexture.wgsl new file mode 100644 index 0000000000..f5bd7db002 --- /dev/null +++ b/naga/tests/out/wgsl/atomicTexture.wgsl @@ -0,0 +1,21 @@ +@group(0) @binding(0) +var image_u: texture_storage_2d; +@group(0) @binding(1) +var image_s: texture_storage_2d; + +@compute @workgroup_size(2, 1, 1) +fn cs_main(@builtin(local_invocation_id) id: vec3) { + textureAtomicMax(image_u, vec2(0i, 0i), 1u); + textureAtomicMin(image_u, vec2(0i, 0i), 1u); + textureAtomicAdd(image_u, vec2(0i, 0i), 1u); + textureAtomicAnd(image_u, vec2(0i, 0i), 1u); + textureAtomicOr(image_u, vec2(0i, 0i), 1u); + textureAtomicXor(image_u, vec2(0i, 0i), 1u); + textureAtomicMax(image_s, vec2(0i, 0i), 1i); + textureAtomicMin(image_s, vec2(0i, 0i), 1i); + textureAtomicAdd(image_s, vec2(0i, 0i), 1i); + textureAtomicAnd(image_s, vec2(0i, 0i), 1i); + textureAtomicOr(image_s, vec2(0i, 0i), 1i); + textureAtomicXor(image_s, vec2(0i, 0i), 1i); + return; +} diff --git a/naga/tests/snapshots.rs b/naga/tests/snapshots.rs index b6ab1046a7..691878959d 100644 --- a/naga/tests/snapshots.rs +++ b/naga/tests/snapshots.rs @@ -788,10 +788,18 @@ fn convert_wgsl() { "atomicOps-int64-min-max", Targets::SPIRV | Targets::METAL | Targets::HLSL | Targets::WGSL, ), + ( + "atomicTexture", + Targets::SPIRV | Targets::METAL | Targets::GLSL | Targets::HLSL | Targets::WGSL, + ), ( "atomicOps-float32", Targets::SPIRV | Targets::METAL | Targets::WGSL, ), + ( + "atomicTexture-int64", + Targets::SPIRV | Targets::METAL | Targets::HLSL | Targets::WGSL, + ), ( "atomicCompareExchange-int64", Targets::SPIRV | Targets::WGSL, @@ -871,7 +879,7 @@ fn convert_wgsl() { ("sprite", Targets::SPIRV), ("force_point_size_vertex_shader_webgl", Targets::GLSL), ("invariant", Targets::GLSL), - ("ray-query", Targets::SPIRV | Targets::METAL), + ("ray-query", Targets::SPIRV | Targets::METAL | Targets::HLSL), ("hlsl-keyword", Targets::HLSL), ( "constructors", diff --git a/naga/tests/wgsl_errors.rs b/naga/tests/wgsl_errors.rs index abba829d5f..fc4d7211f8 100644 --- a/naga/tests/wgsl_errors.rs +++ b/naga/tests/wgsl_errors.rs @@ -38,7 +38,7 @@ fn very_negative_integers() { fn reserved_identifier_prefix() { check( "var __bad;", - r###"error: Identifier starts with a reserved prefix: '__bad' + r###"error: Identifier starts with a reserved prefix: `__bad` โ”Œโ”€ wgsl:1:5 โ”‚ 1 โ”‚ var __bad; @@ -52,7 +52,7 @@ fn reserved_identifier_prefix() { fn function_without_identifier() { check( "fn () {}", - r###"error: expected identifier, found '(' + r###"error: expected identifier, found "(" โ”Œโ”€ wgsl:1:4 โ”‚ 1 โ”‚ fn () {} @@ -66,7 +66,7 @@ fn function_without_identifier() { fn invalid_integer() { check( "fn foo([location(1.)] x: i32) {}", - r###"error: expected identifier, found '[' + r###"error: expected identifier, found "[" โ”Œโ”€ wgsl:1:8 โ”‚ 1 โ”‚ fn foo([location(1.)] x: i32) {} @@ -80,7 +80,7 @@ fn invalid_integer() { fn invalid_float() { check( "const scale: f32 = 1.1.;", - r###"error: expected identifier, found ';' + r###"error: expected identifier, found ";" โ”Œโ”€ wgsl:1:24 โ”‚ 1 โ”‚ const scale: f32 = 1.1.; @@ -112,7 +112,7 @@ fn unknown_identifier() { return x * schmoo; } "###, - r###"error: no definition in scope for identifier: 'schmoo' + r###"error: no definition in scope for identifier: `schmoo` โ”Œโ”€ wgsl:3:30 โ”‚ 3 โ”‚ return x * schmoo; @@ -134,7 +134,7 @@ fn bad_texture() { return textureSample(a, sampler1, vec2(0.0)); } "#, - r#"error: expected an image, but found 'a' which is not an image + r#"error: expected an image, but found `a` which is not an image โ”Œโ”€ wgsl:7:38 โ”‚ 7 โ”‚ return textureSample(a, sampler1, vec2(0.0)); @@ -266,7 +266,7 @@ fn bad_for_initializer() { for ({};;) {} } "#, - r#"error: for(;;) initializer is not an assignment or a function call: '{}' + r#"error: for(;;) initializer is not an assignment or a function call: `{}` โ”Œโ”€ wgsl:3:22 โ”‚ 3 โ”‚ for ({};;) {} @@ -282,7 +282,7 @@ fn unknown_storage_class() { r#" @group(0) @binding(0) var texture: texture_2d; "#, - r#"error: unknown address space: 'bad' + r#"error: unknown address space: `bad` โ”Œโ”€ wgsl:2:39 โ”‚ 2 โ”‚ @group(0) @binding(0) var texture: texture_2d; @@ -299,7 +299,7 @@ fn unknown_attribute() { @a fn x() {} "#, - r#"error: unknown attribute: 'a' + r#"error: unknown attribute: `a` โ”Œโ”€ wgsl:2:14 โ”‚ 2 โ”‚ @a @@ -315,7 +315,7 @@ fn unknown_built_in() { r#" fn x(@builtin(unknown_built_in) y: u32) {} "#, - r#"error: unknown builtin: 'unknown_built_in' + r#"error: unknown builtin: `unknown_built_in` โ”Œโ”€ wgsl:2:27 โ”‚ 2 โ”‚ fn x(@builtin(unknown_built_in) y: u32) {} @@ -331,7 +331,7 @@ fn unknown_access() { r#" var x: array; "#, - r#"error: unknown access: 'unknown_access' + r#"error: unknown access: `unknown_access` โ”Œโ”€ wgsl:2:25 โ”‚ 2 โ”‚ var x: array; @@ -349,7 +349,7 @@ fn unknown_ident() { let a = b; } "#, - r#"error: no definition in scope for identifier: 'b' + r#"error: no definition in scope for identifier: `b` โ”Œโ”€ wgsl:3:25 โ”‚ 3 โ”‚ let a = b; @@ -365,7 +365,7 @@ fn unknown_scalar_type() { r#" const a = vec2(); "#, - r#"error: unknown scalar type: 'vec2f' + r#"error: unknown scalar type: `vec2f` โ”Œโ”€ wgsl:2:28 โ”‚ 2 โ”‚ const a = vec2(); @@ -383,7 +383,7 @@ fn unknown_type() { r#" const a: Vec = 10; "#, - r#"error: unknown type: 'Vec' + r#"error: unknown type: `Vec` โ”Œโ”€ wgsl:2:22 โ”‚ 2 โ”‚ const a: Vec = 10; @@ -399,7 +399,7 @@ fn unknown_storage_format() { r#" const storage1: texture_storage_1d; "#, - r#"error: unknown storage format: 'rgba' + r#"error: unknown storage format: `rgba` โ”Œโ”€ wgsl:2:48 โ”‚ 2 โ”‚ const storage1: texture_storage_1d; @@ -415,7 +415,7 @@ fn unknown_conservative_depth() { r#" @early_depth_test(abc) fn main() {} "#, - r#"error: unknown conservative depth: 'abc' + r#"error: unknown conservative depth: `abc` โ”Œโ”€ wgsl:2:31 โ”‚ 2 โ”‚ @early_depth_test(abc) fn main() {} @@ -503,7 +503,7 @@ fn unknown_local_function() { for (a();;) {} } "#, - r#"error: no definition in scope for identifier: 'a' + r#"error: no definition in scope for identifier: `a` โ”Œโ”€ wgsl:3:22 โ”‚ 3 โ”‚ for (a();;) {} @@ -1010,11 +1010,11 @@ fn invalid_arrays() { check( "alias Bad = array;", - r###"error: must be a const-expression that resolves to a concrete integer scalar (u32 or i32) + r###"error: must be a const-expression that resolves to a concrete integer scalar (`u32` or `i32`) โ”Œโ”€ wgsl:1:24 โ”‚ 1 โ”‚ alias Bad = array; - โ”‚ ^^^^ must resolve to u32 or i32 + โ”‚ ^^^^ must resolve to `u32` or `i32` "###, ); @@ -1024,11 +1024,11 @@ fn invalid_arrays() { const length: f32 = 2.718; alias Bad = array; "#, - r###"error: must be a const-expression that resolves to a concrete integer scalar (u32 or i32) + r###"error: must be a const-expression that resolves to a concrete integer scalar (`u32` or `i32`) โ”Œโ”€ wgsl:3:36 โ”‚ 3 โ”‚ alias Bad = array; - โ”‚ ^^^^^^ must resolve to u32 or i32 + โ”‚ ^^^^^^ must resolve to `u32` or `i32` "###, ); @@ -1797,7 +1797,7 @@ fn binary_statement() { 3 + 5; } ", - r###"error: expected assignment or increment/decrement, found ';' + r###"error: expected assignment or increment/decrement, found ";" โ”Œโ”€ wgsl:3:18 โ”‚ 3 โ”‚ 3 + 5; @@ -1842,7 +1842,7 @@ fn assign_to_let() { 4 โ”‚ a = 20; โ”‚ ^ cannot assign to this expression โ”‚ - = note: consider declaring 'a' with `var` instead of `let` + = note: consider declaring `a` with `var` instead of `let` "###, ); @@ -1862,7 +1862,7 @@ fn assign_to_let() { 4 โ”‚ a[0] = 1; โ”‚ ^^^^ cannot assign to this expression โ”‚ - = note: consider declaring 'a' with `var` instead of `let` + = note: consider declaring `a` with `var` instead of `let` "###, ); @@ -1884,7 +1884,7 @@ fn assign_to_let() { 6 โ”‚ a.a = 20; โ”‚ ^^^ cannot assign to this expression โ”‚ - = note: consider declaring 'a' with `var` instead of `let` + = note: consider declaring `a` with `var` instead of `let` "###, ); @@ -1954,7 +1954,7 @@ fn switch_signed_unsigned_mismatch() { 4 โ”‚ case 1: {} โ”‚ ^ expected unsigned integer โ”‚ - = note: suffix the integer with a `u`: '1u' + = note: suffix the integer with a `u`: `1u` "###, ); @@ -1973,7 +1973,7 @@ fn switch_signed_unsigned_mismatch() { 4 โ”‚ case 1u: {} โ”‚ ^^ expected signed integer โ”‚ - = note: remove the `u` suffix: '1' + = note: remove the `u` suffix: `1` "###, ); @@ -2407,11 +2407,11 @@ fn const_assert_must_be_bool() { " const_assert(5); // 5 is not bool ", - r###"error: must be a const-expression that resolves to a bool + r###"error: must be a const-expression that resolves to a `bool` โ”Œโ”€ wgsl:2:26 โ”‚ 2 โ”‚ const_assert(5); // 5 is not bool - โ”‚ ^ must resolve to bool + โ”‚ ^ must resolve to `bool` "###, ); @@ -2423,12 +2423,26 @@ fn const_assert_failed() { " const_assert(false); ", - r###"error: const_assert failure + r###"error: `const_assert` failure โ”Œโ”€ wgsl:2:26 โ”‚ 2 โ”‚ const_assert(false); - โ”‚ ^^^^^ evaluates to false + โ”‚ ^^^^^ evaluates to `false` "###, ); } + +#[test] +fn reject_utf8_bom() { + check( + "\u{FEFF}fn main() {}", + r#"error: expected global item (`struct`, `const`, `var`, `alias`, `fn`, `diagnostic`, `enable`, `requires`, `;`) or the end of the file, found "\u{feff}" + โ”Œโ”€ wgsl:1:1 + โ”‚ +1 โ”‚ ๏ปฟfn main() {} + โ”‚ expected global item (`struct`, `const`, `var`, `alias`, `fn`, `diagnostic`, `enable`, `requires`, `;`) or the end of the file + +"#, + ); +} diff --git a/player/src/lib.rs b/player/src/lib.rs index f042ed6dd8..28ae23ad7c 100644 --- a/player/src/lib.rs +++ b/player/src/lib.rs @@ -133,7 +133,7 @@ impl GlobalPlay for wgc::global::Global { transform_buffer: tg.transform_buffer, first_vertex: tg.first_vertex, vertex_stride: tg.vertex_stride, - index_buffer_offset: tg.index_buffer_offset, + first_index: tg.first_index, transform_buffer_offset: tg.transform_buffer_offset, } }); @@ -171,7 +171,7 @@ impl GlobalPlay for wgc::global::Global { transform_buffer: tg.transform_buffer, first_vertex: tg.first_vertex, vertex_stride: tg.vertex_stride, - index_buffer_offset: tg.index_buffer_offset, + first_index: tg.first_index, transform_buffer_offset: tg.transform_buffer_offset, } }); @@ -451,18 +451,12 @@ impl GlobalPlay for wgc::global::Global { Action::CreateBlas { id, desc, sizes } => { self.device_create_blas(device, &desc, sizes, Some(id)); } - Action::FreeBlas(id) => { - self.blas_destroy(id).unwrap(); - } Action::DestroyBlas(id) => { self.blas_drop(id); } Action::CreateTlas { id, desc } => { self.device_create_tlas(device, &desc, Some(id)); } - Action::FreeTlas(id) => { - self.tlas_destroy(id).unwrap(); - } Action::DestroyTlas(id) => { self.tlas_drop(id); } diff --git a/tests/Cargo.toml b/tests/Cargo.toml index 410de588f9..214aec9f04 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -45,7 +45,7 @@ serde_json.workspace = true serde.workspace = true strum = { workspace = true, features = ["derive"] } trybuild.workspace = true -wgpu = { workspace = true, features = ["wgsl", "static-dxc"] } +wgpu.workspace = true wgpu-macros.workspace = true wgt = { workspace = true, features = ["serde"] } diff --git a/tests/tests/image_atomics/image_32_atomics.wgsl b/tests/tests/image_atomics/image_32_atomics.wgsl new file mode 100644 index 0000000000..32ae1bb4ee --- /dev/null +++ b/tests/tests/image_atomics/image_32_atomics.wgsl @@ -0,0 +1,13 @@ +@group(0) @binding(0) +var image: texture_storage_2d; + +@compute +@workgroup_size(4, 4, 1) +fn cs_main(@builtin(local_invocation_id) id: vec3, @builtin(workgroup_id) group_id: vec3) { + let pixel = id + group_id * 4; + textureAtomicMax(image, pixel.xy, u32(pixel.x)); + + storageBarrier(); + + textureAtomicMin(image, pixel.xy, u32(pixel.y)); +} \ No newline at end of file diff --git a/tests/tests/image_atomics/image_64_atomics.wgsl b/tests/tests/image_atomics/image_64_atomics.wgsl new file mode 100644 index 0000000000..c9a967ca59 --- /dev/null +++ b/tests/tests/image_atomics/image_64_atomics.wgsl @@ -0,0 +1,13 @@ +@group(0) @binding(0) +var image: texture_storage_2d; + +@compute +@workgroup_size(4, 4, 1) +fn cs_main(@builtin(local_invocation_id) id: vec3, @builtin(workgroup_id) group_id: vec3) { + let pixel = id + group_id * 4; + textureAtomicMax(image, pixel.xy, u64(pixel.x)); + + storageBarrier(); + + textureAtomicMin(image, pixel.xy, u64(pixel.y)); +} \ No newline at end of file diff --git a/tests/tests/image_atomics/mod.rs b/tests/tests/image_atomics/mod.rs new file mode 100644 index 0000000000..0063602f4d --- /dev/null +++ b/tests/tests/image_atomics/mod.rs @@ -0,0 +1,217 @@ +//! Tests for image atomics. + +use wgpu::ShaderModuleDescriptor; +use wgpu_test::{ + fail, gpu_test, image::ReadbackBuffers, GpuTestConfiguration, TestParameters, TestingContext, +}; + +#[gpu_test] +static IMAGE_64_ATOMICS: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters( + TestParameters::default() + .limits(wgpu::Limits { + max_storage_textures_per_shader_stage: 1, + max_compute_invocations_per_workgroup: 64, + max_compute_workgroup_size_x: 4, + max_compute_workgroup_size_y: 4, + max_compute_workgroup_size_z: 4, + max_compute_workgroups_per_dimension: wgpu::COPY_BYTES_PER_ROW_ALIGNMENT, + ..wgpu::Limits::downlevel_webgl2_defaults() + }) + .features( + wgpu::Features::TEXTURE_ATOMIC + | wgpu::Features::TEXTURE_INT64_ATOMIC + | wgpu::Features::SHADER_INT64, + ), + ) + .run_async(|ctx| async move { + test_format( + ctx, + wgpu::TextureFormat::R64Uint, + wgpu::include_wgsl!("image_64_atomics.wgsl"), + ) + .await; + }); + +#[gpu_test] +static IMAGE_32_ATOMICS: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters( + TestParameters::default() + .limits(wgpu::Limits { + max_storage_textures_per_shader_stage: 1, + max_compute_invocations_per_workgroup: 64, + max_compute_workgroup_size_x: 4, + max_compute_workgroup_size_y: 4, + max_compute_workgroup_size_z: 4, + max_compute_workgroups_per_dimension: wgpu::COPY_BYTES_PER_ROW_ALIGNMENT, + ..wgpu::Limits::downlevel_webgl2_defaults() + }) + .features(wgpu::Features::TEXTURE_ATOMIC), + ) + .run_async(|ctx| async move { + test_format( + ctx, + wgpu::TextureFormat::R32Uint, + wgpu::include_wgsl!("image_32_atomics.wgsl"), + ) + .await; + }); + +async fn test_format( + ctx: TestingContext, + format: wgpu::TextureFormat, + desc: ShaderModuleDescriptor<'_>, +) { + let pixel_bytes = format.target_pixel_byte_cost().unwrap(); + let size = wgpu::Extent3d { + width: wgpu::COPY_BYTES_PER_ROW_ALIGNMENT, + height: wgpu::COPY_BYTES_PER_ROW_ALIGNMENT, + depth_or_array_layers: 1, + }; + let bind_group_layout_entry = wgpu::BindGroupLayoutEntry { + binding: 0, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::StorageTexture { + access: wgpu::StorageTextureAccess::Atomic, + format, + view_dimension: wgpu::TextureViewDimension::D2, + }, + count: None, + }; + + let bind_group_layout = ctx + .device + .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: None, + entries: &[bind_group_layout_entry], + }); + + let pipeline_layout = ctx + .device + .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { + label: None, + bind_group_layouts: &[&bind_group_layout], + push_constant_ranges: &[], + }); + let shader = ctx.device.create_shader_module(desc); + let pipeline = ctx + .device + .create_compute_pipeline(&wgpu::ComputePipelineDescriptor { + label: Some("image atomics pipeline"), + layout: Some(&pipeline_layout), + module: &shader, + entry_point: Some("cs_main"), + compilation_options: wgpu::PipelineCompilationOptions::default(), + cache: None, + }); + + let tex = ctx.device.create_texture(&wgpu::TextureDescriptor { + label: None, + dimension: wgpu::TextureDimension::D2, + size, + format, + usage: wgpu::TextureUsages::STORAGE_BINDING + | wgpu::TextureUsages::STORAGE_ATOMIC + | wgpu::TextureUsages::COPY_SRC, + mip_level_count: 1, + sample_count: 1, + view_formats: &[], + }); + let view = tex.create_view(&wgpu::TextureViewDescriptor { + format: Some(format), + aspect: wgpu::TextureAspect::All, + ..wgpu::TextureViewDescriptor::default() + }); + let bind_group = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor { + label: None, + layout: &pipeline.get_bind_group_layout(0), + entries: &[wgpu::BindGroupEntry { + binding: 0, + resource: wgpu::BindingResource::TextureView(&view), + }], + }); + + let mut encoder = ctx + .device + .create_command_encoder(&wgpu::CommandEncoderDescriptor::default()); + let mut rpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { + label: None, + timestamp_writes: None, + }); + rpass.set_pipeline(&pipeline); + rpass.set_bind_group(0, Some(&bind_group), &[]); + rpass.dispatch_workgroups(size.width, size.height, 1); + drop(rpass); + + let readback_buffers = ReadbackBuffers::new(&ctx.device, &tex); + readback_buffers.copy_from(&ctx.device, &mut encoder, &tex); + + ctx.queue.submit([encoder.finish()]); + + let padding = [0].repeat(pixel_bytes as usize - size_of::()); + let data: Vec = (0..size.width as usize * size.height as usize) + .flat_map(|i| { + let x = i as u32 % size.width; + let y = i as u32 / size.width; + [bytemuck::bytes_of(&u32::min(x, y)), &padding].concat() + }) + .collect(); + + readback_buffers.assert_buffer_contents(&ctx, &data).await; +} + +#[gpu_test] +static IMAGE_ATOMICS_NOT_ENABLED: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters(TestParameters::default()) + .run_sync(|ctx| { + let size = wgpu::Extent3d { + width: 256, + height: 256, + depth_or_array_layers: 1, + }; + + fail( + &ctx.device, + || { + let _ = ctx.device.create_texture(&wgpu::TextureDescriptor { + label: None, + dimension: wgpu::TextureDimension::D2, + size, + format: wgpu::TextureFormat::R32Uint, + usage: wgpu::TextureUsages::STORAGE_ATOMIC, + mip_level_count: 1, + sample_count: 1, + view_formats: &[], + }); + }, + Some("Texture usages TextureUsages(STORAGE_ATOMIC) are not allowed on a texture of type R32Uint"), + ); + }); + +#[gpu_test] +static IMAGE_ATOMICS_NOT_SUPPORTED: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters(TestParameters::default().features(wgpu::Features::TEXTURE_ATOMIC)) + .run_sync(|ctx| { + let size = wgpu::Extent3d { + width: 256, + height: 256, + depth_or_array_layers: 1, + }; + + fail( + &ctx.device, + || { + let _ = ctx.device.create_texture(&wgpu::TextureDescriptor { + label: None, + dimension: wgpu::TextureDimension::D2, + size, + format: wgpu::TextureFormat::R8Uint, + usage: wgpu::TextureUsages::STORAGE_ATOMIC, + mip_level_count: 1, + sample_count: 1, + view_formats: &[], + }); + }, + Some("Texture usages TextureUsages(STORAGE_ATOMIC) are not allowed on a texture of type R8Uint"), + ); + }); diff --git a/tests/tests/ray_tracing/as_build.rs b/tests/tests/ray_tracing/as_build.rs index a75b280724..5255694011 100644 --- a/tests/tests/ray_tracing/as_build.rs +++ b/tests/tests/ray_tracing/as_build.rs @@ -75,7 +75,7 @@ impl AsBuildContext { first_vertex: 0, vertex_stride: mem::size_of::<[f32; 3]>() as BufferAddress, index_buffer: None, - index_buffer_offset: None, + first_index: None, transform_buffer: None, transform_buffer_offset: None, }]), @@ -406,7 +406,7 @@ fn build_with_transform(ctx: TestingContext) { first_vertex: 0, vertex_stride: mem::size_of::<[f32; 3]>() as BufferAddress, index_buffer: None, - index_buffer_offset: None, + first_index: None, transform_buffer: Some(&transform), transform_buffer_offset: Some(0), }]), diff --git a/tests/tests/ray_tracing/as_use_after_free.rs b/tests/tests/ray_tracing/as_use_after_free.rs index ae6c49da28..fcbc75b3a5 100644 --- a/tests/tests/ray_tracing/as_use_after_free.rs +++ b/tests/tests/ray_tracing/as_use_after_free.rs @@ -78,7 +78,7 @@ fn acceleration_structure_use_after_free(ctx: TestingContext) { first_vertex: 0, vertex_stride: mem::size_of::<[f32; 3]>() as BufferAddress, index_buffer: None, - index_buffer_offset: None, + first_index: None, transform_buffer: None, transform_buffer_offset: None, }]), diff --git a/tests/tests/ray_tracing/scene/mod.rs b/tests/tests/ray_tracing/scene/mod.rs index 299323af58..bd3a08da05 100644 --- a/tests/tests/ray_tracing/scene/mod.rs +++ b/tests/tests/ray_tracing/scene/mod.rs @@ -85,7 +85,7 @@ fn acceleration_structure_build(ctx: &TestingContext, use_index_buffer: bool) { first_vertex: 0, vertex_stride: mem::size_of::() as u64, index_buffer: use_index_buffer.then_some(&index_buffer), - index_buffer_offset: use_index_buffer.then_some(0), + first_index: use_index_buffer.then_some(0), transform_buffer: None, transform_buffer_offset: None, }]), diff --git a/tests/tests/root.rs b/tests/tests/root.rs index ebc7ebdc79..9e71ff60f9 100644 --- a/tests/tests/root.rs +++ b/tests/tests/root.rs @@ -27,6 +27,7 @@ mod dispatch_workgroups_indirect; mod encoder; mod external_texture; mod float32_filterable; +mod image_atomics; mod instance; mod life_cycle; mod mem_leaks; diff --git a/wgpu-core/src/binding_model.rs b/wgpu-core/src/binding_model.rs index c7867ab210..5f1ebce991 100644 --- a/wgpu-core/src/binding_model.rs +++ b/wgpu-core/src/binding_model.rs @@ -37,8 +37,10 @@ use thiserror::Error; pub enum BindGroupLayoutEntryError { #[error("Cube dimension is not expected for texture storage")] StorageTextureCube, - #[error("Read-write and read-only storage textures are not allowed by webgpu, they require the native only feature TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES")] + #[error("Read-write and read-only storage textures are not allowed by baseline webgpu, they require the native only feature TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES")] StorageTextureReadWrite, + #[error("Atomic storage textures are not allowed by baseline webgpu, they require the native only feature TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES")] + StorageTextureAtomic, #[error("Arrays of bindings unsupported for this type of binding")] ArrayUnsupported, #[error("Multisampled binding with sample type `TextureSampleType::Float` must have filterable set to false.")] @@ -185,6 +187,8 @@ pub enum CreateBindGroupError { DepthStencilAspect, #[error("The adapter does not support read access for storage textures of format {0:?}")] StorageReadNotSupported(wgt::TextureFormat), + #[error("The adapter does not support atomics for storage textures of format {0:?}")] + StorageAtomicNotSupported(wgt::TextureFormat), #[error("The adapter does not support write access for storage textures of format {0:?}")] StorageWriteNotSupported(wgt::TextureFormat), #[error("The adapter does not support read-write access for storage textures of format {0:?}")] diff --git a/wgpu-core/src/command/bundle.rs b/wgpu-core/src/command/bundle.rs index c7f433c3a0..6aa614ac5f 100644 --- a/wgpu-core/src/command/bundle.rs +++ b/wgpu-core/src/command/bundle.rs @@ -1174,10 +1174,7 @@ impl IndexState { /// /// Panic if no index buffer has been set. fn limit(&self) -> u64 { - let bytes_per_index = match self.format { - wgt::IndexFormat::Uint16 => 2, - wgt::IndexFormat::Uint32 => 4, - }; + let bytes_per_index = self.format.byte_size() as u64; (self.range.end - self.range.start) / bytes_per_index } diff --git a/wgpu-core/src/command/mod.rs b/wgpu-core/src/command/mod.rs index 88cf874d3a..f4ff30a392 100644 --- a/wgpu-core/src/command/mod.rs +++ b/wgpu-core/src/command/mod.rs @@ -28,6 +28,7 @@ pub use timestamp_writes::PassTimestampWrites; use self::memory_init::CommandBufferTextureMemoryActions; +use crate::device::queue::TempResource; use crate::device::{Device, DeviceError, MissingFeatures}; use crate::lock::{rank, Mutex}; use crate::snatch::SnatchGuard; @@ -432,6 +433,7 @@ impl Drop for CommandEncoder { pub(crate) struct BakedCommands { pub(crate) encoder: CommandEncoder, pub(crate) trackers: Tracker, + pub(crate) temp_resources: Vec, buffer_memory_init_actions: Vec, texture_memory_actions: CommandBufferTextureMemoryActions, } @@ -460,6 +462,7 @@ pub struct CommandBufferMutable { blas_actions: Vec, tlas_actions: Vec, + temp_resources: Vec, #[cfg(feature = "trace")] pub(crate) commands: Option>, @@ -479,6 +482,7 @@ impl CommandBufferMutable { BakedCommands { encoder: self.encoder, trackers: self.trackers, + temp_resources: self.temp_resources, buffer_memory_init_actions: self.buffer_memory_init_actions, texture_memory_actions: self.texture_memory_actions, } @@ -545,6 +549,7 @@ impl CommandBuffer { pending_query_resets: QueryResetMap::new(), blas_actions: Default::default(), tlas_actions: Default::default(), + temp_resources: Default::default(), #[cfg(feature = "trace")] commands: if device.trace.lock().is_some() { Some(Vec::new()) diff --git a/wgpu-core/src/command/ray_tracing.rs b/wgpu-core/src/command/ray_tracing.rs index 9395c20fc1..22970d542b 100644 --- a/wgpu-core/src/command/ray_tracing.rs +++ b/wgpu-core/src/command/ray_tracing.rs @@ -1,5 +1,5 @@ use crate::{ - device::{queue::TempResource, Device}, + device::queue::TempResource, global::Global, hub::Hub, id::CommandEncoderId, @@ -81,12 +81,7 @@ impl Global { let device = &cmd_buf.device; - if !device - .features - .contains(Features::EXPERIMENTAL_RAY_TRACING_ACCELERATION_STRUCTURE) - { - return Err(BuildAccelerationStructureError::MissingFeature); - } + device.require_features(Features::EXPERIMENTAL_RAY_TRACING_ACCELERATION_STRUCTURE)?; let build_command_index = NonZeroU64::new( device @@ -109,7 +104,7 @@ impl Global { transform_buffer: tg.transform_buffer, first_vertex: tg.first_vertex, vertex_stride: tg.vertex_stride, - index_buffer_offset: tg.index_buffer_offset, + first_index: tg.first_index, transform_buffer_offset: tg.transform_buffer_offset, }) .collect(), @@ -149,7 +144,7 @@ impl Global { transform_buffer: tg.transform_buffer, first_vertex: tg.first_vertex, vertex_stride: tg.vertex_stride, - index_buffer_offset: tg.index_buffer_offset, + first_index: tg.first_index, transform_buffer_offset: tg.transform_buffer_offset, }); BlasGeometries::TriangleGeometries(Box::new(iter)) @@ -179,7 +174,6 @@ impl Global { build_command_index, &mut buf_storage, hub, - device, )?; let snatch_guard = device.snatchable_lock.read(); @@ -199,18 +193,13 @@ impl Global { let mut tlas_buf_storage = Vec::new(); for entry in tlas_iter { - let instance_buffer = match hub.buffers.get(entry.instance_buffer_id).get() { - Ok(buffer) => buffer, - Err(_) => { - return Err(BuildAccelerationStructureError::InvalidBufferId); - } - }; + let instance_buffer = hub.buffers.get(entry.instance_buffer_id).get()?; let data = cmd_buf_data.trackers.buffers.set_single( &instance_buffer, BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT, ); tlas_buf_storage.push(TlasBufferStore { - buffer: instance_buffer.clone(), + buffer: instance_buffer, transition: data, entry: entry.clone(), }); @@ -221,14 +210,9 @@ impl Global { let instance_buffer = { let (instance_buffer, instance_pending) = (&mut tlas_buf.buffer, &mut tlas_buf.transition); - let instance_raw = instance_buffer.raw.get(&snatch_guard).ok_or( - BuildAccelerationStructureError::InvalidBuffer(instance_buffer.error_ident()), - )?; - if !instance_buffer.usage.contains(BufferUsages::TLAS_INPUT) { - return Err(BuildAccelerationStructureError::MissingTlasInputUsageFlag( - instance_buffer.error_ident(), - )); - } + let instance_raw = instance_buffer.try_raw(&snatch_guard)?; + instance_buffer.check_usage(BufferUsages::TLAS_INPUT)?; + if let Some(barrier) = instance_pending .take() .map(|pending| pending.into_hal(instance_buffer, &snatch_guard)) @@ -238,15 +222,8 @@ impl Global { instance_raw }; - let tlas = hub - .tlas_s - .get(entry.tlas_id) - .get() - .map_err(|_| BuildAccelerationStructureError::InvalidTlasId)?; - cmd_buf_data.trackers.tlas_s.set_single(tlas.clone()); - if let Some(queue) = device.get_queue() { - queue.pending_writes.lock().insert_tlas(&tlas); - } + let tlas = hub.tlas_s.get(entry.tlas_id).get()?; + cmd_buf_data.trackers.tlas_s.insert_single(tlas.clone()); cmd_buf_data.tlas_actions.push(TlasAction { tlas: tlas.clone(), @@ -266,7 +243,7 @@ impl Global { tlas, entries: hal::AccelerationStructureEntries::Instances( hal::AccelerationStructureInstances { - buffer: Some(instance_buffer.as_ref()), + buffer: Some(instance_buffer), offset: 0, count: entry.instance_count, }, @@ -311,9 +288,7 @@ impl Global { mode: hal::AccelerationStructureBuildMode::Build, flags: tlas.flags, source_acceleration_structure: None, - destination_acceleration_structure: tlas.raw(&snatch_guard).ok_or( - BuildAccelerationStructureError::InvalidTlas(tlas.error_ident()), - )?, + destination_acceleration_structure: tlas.try_raw(&snatch_guard)?, scratch_buffer: scratch_buffer.raw(), scratch_buffer_offset: *scratch_buffer_offset, }) @@ -354,12 +329,9 @@ impl Global { } } - if let Some(queue) = device.get_queue() { - queue - .pending_writes - .lock() - .consume_temp(TempResource::ScratchBuffer(scratch_buffer)); - } + cmd_buf_data + .temp_resources + .push(TempResource::ScratchBuffer(scratch_buffer)); cmd_buf_data_guard.mark_successful(); Ok(()) @@ -381,12 +353,7 @@ impl Global { let device = &cmd_buf.device; - if !device - .features - .contains(Features::EXPERIMENTAL_RAY_TRACING_ACCELERATION_STRUCTURE) - { - return Err(BuildAccelerationStructureError::MissingFeature); - } + device.require_features(Features::EXPERIMENTAL_RAY_TRACING_ACCELERATION_STRUCTURE)?; let build_command_index = NonZeroU64::new( device @@ -408,7 +375,7 @@ impl Global { transform_buffer: tg.transform_buffer, first_vertex: tg.first_vertex, vertex_stride: tg.vertex_stride, - index_buffer_offset: tg.index_buffer_offset, + first_index: tg.first_index, transform_buffer_offset: tg.transform_buffer_offset, }) .collect(), @@ -461,7 +428,7 @@ impl Global { transform_buffer: tg.transform_buffer, first_vertex: tg.first_vertex, vertex_stride: tg.vertex_stride, - index_buffer_offset: tg.index_buffer_offset, + first_index: tg.first_index, transform_buffer_offset: tg.transform_buffer_offset, }); BlasGeometries::TriangleGeometries(Box::new(iter)) @@ -504,7 +471,6 @@ impl Global { build_command_index, &mut buf_storage, hub, - device, )?; let snatch_guard = device.snatchable_lock.read(); @@ -521,17 +487,11 @@ impl Global { let mut tlas_lock_store = Vec::<(Option, Arc)>::new(); for package in tlas_iter { - let tlas = hub - .tlas_s - .get(package.tlas_id) - .get() - .map_err(|_| BuildAccelerationStructureError::InvalidTlasId)?; - if let Some(queue) = device.get_queue() { - queue.pending_writes.lock().insert_tlas(&tlas); - } - cmd_buf_data.trackers.tlas_s.set_single(tlas.clone()); + let tlas = hub.tlas_s.get(package.tlas_id).get()?; + + cmd_buf_data.trackers.tlas_s.insert_single(tlas.clone()); - tlas_lock_store.push((Some(package), tlas.clone())) + tlas_lock_store.push((Some(package), tlas)) } let mut scratch_buffer_tlas_size = 0; @@ -558,14 +518,9 @@ impl Global { tlas.error_ident(), )); } - let blas = hub - .blas_s - .get(instance.blas_id) - .get() - .map_err(|_| BuildAccelerationStructureError::InvalidBlasIdForInstance)? - .clone(); + let blas = hub.blas_s.get(instance.blas_id).get()?; - cmd_buf_data.trackers.blas_s.set_single(blas.clone()); + cmd_buf_data.trackers.blas_s.insert_single(blas.clone()); instance_buffer_staging_source.extend(device.raw().tlas_instance_to_bytes( hal::TlasInstance { @@ -581,7 +536,7 @@ impl Global { dependencies.push(blas.clone()); cmd_buf_data.blas_actions.push(BlasAction { - blas: blas.clone(), + blas, kind: crate::ray_tracing::BlasActionKind::Use, }); } @@ -659,13 +614,7 @@ impl Global { mode: hal::AccelerationStructureBuildMode::Build, flags: tlas.flags, source_acceleration_structure: None, - destination_acceleration_structure: tlas - .raw - .get(&snatch_guard) - .ok_or(BuildAccelerationStructureError::InvalidTlas( - tlas.error_ident(), - ))? - .as_ref(), + destination_acceleration_structure: tlas.try_raw(&snatch_guard)?, scratch_buffer: scratch_buffer.raw(), scratch_buffer_offset: *scratch_buffer_offset, }) @@ -773,21 +722,15 @@ impl Global { } if let Some(staging_buffer) = staging_buffer { - if let Some(queue) = device.get_queue() { - queue - .pending_writes - .lock() - .consume_temp(TempResource::StagingBuffer(staging_buffer)); - } + cmd_buf_data + .temp_resources + .push(TempResource::StagingBuffer(staging_buffer)); } } - if let Some(queue) = device.get_queue() { - queue - .pending_writes - .lock() - .consume_temp(TempResource::ScratchBuffer(scratch_buffer)); - } + cmd_buf_data + .temp_resources + .push(TempResource::ScratchBuffer(scratch_buffer)); cmd_buf_data_guard.mark_successful(); Ok(()) @@ -857,9 +800,7 @@ impl CommandBufferMutable { action.tlas.error_ident(), )); } - if blas.raw.get(snatch_guard).is_none() { - return Err(ValidateTlasActionsError::InvalidBlas(blas.error_ident())); - } + blas.try_raw(snatch_guard)?; } } } @@ -875,19 +816,11 @@ fn iter_blas<'a>( build_command_index: NonZeroU64, buf_storage: &mut Vec>, hub: &Hub, - device: &Device, ) -> Result<(), BuildAccelerationStructureError> { let mut temp_buffer = Vec::new(); for entry in blas_iter { - let blas = hub - .blas_s - .get(entry.blas_id) - .get() - .map_err(|_| BuildAccelerationStructureError::InvalidBlasId)?; - cmd_buf_data.trackers.blas_s.set_single(blas.clone()); - if let Some(queue) = device.get_queue() { - queue.pending_writes.lock().insert_blas(&blas); - } + let blas = hub.blas_s.get(entry.blas_id).get()?; + cmd_buf_data.trackers.blas_s.insert_single(blas.clone()); cmd_buf_data.blas_actions.push(BlasAction { blas: blas.clone(), @@ -966,20 +899,14 @@ fn iter_blas<'a>( blas.error_ident(), )); } - let vertex_buffer = match hub.buffers.get(mesh.vertex_buffer).get() { - Ok(buffer) => buffer, - Err(_) => return Err(BuildAccelerationStructureError::InvalidBufferId), - }; + let vertex_buffer = hub.buffers.get(mesh.vertex_buffer).get()?; let vertex_pending = cmd_buf_data.trackers.buffers.set_single( &vertex_buffer, BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT, ); let index_data = if let Some(index_id) = mesh.index_buffer { - let index_buffer = match hub.buffers.get(index_id).get() { - Ok(buffer) => buffer, - Err(_) => return Err(BuildAccelerationStructureError::InvalidBufferId), - }; - if mesh.index_buffer_offset.is_none() + let index_buffer = hub.buffers.get(index_id).get()?; + if mesh.first_index.is_none() || mesh.size.index_count.is_none() || mesh.size.index_count.is_none() { @@ -991,15 +918,12 @@ fn iter_blas<'a>( &index_buffer, BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT, ); - Some((index_buffer.clone(), data)) + Some((index_buffer, data)) } else { None }; let transform_data = if let Some(transform_id) = mesh.transform_buffer { - let transform_buffer = match hub.buffers.get(transform_id).get() { - Ok(buffer) => buffer, - Err(_) => return Err(BuildAccelerationStructureError::InvalidBufferId), - }; + let transform_buffer = hub.buffers.get(transform_id).get()?; if mesh.transform_buffer_offset.is_none() { return Err(BuildAccelerationStructureError::MissingAssociatedData( transform_buffer.error_ident(), @@ -1014,7 +938,7 @@ fn iter_blas<'a>( None }; temp_buffer.push(TriangleBufferStore { - vertex_buffer: vertex_buffer.clone(), + vertex_buffer, vertex_transition: vertex_pending, index_buffer_transition: index_data, transform_buffer_transition: transform_data, @@ -1024,7 +948,7 @@ fn iter_blas<'a>( } if let Some(last) = temp_buffer.last_mut() { - last.ending_blas = Some(blas.clone()); + last.ending_blas = Some(blas); buf_storage.append(&mut temp_buffer); } } @@ -1050,14 +974,9 @@ fn iter_buffers<'a, 'b>( let mesh = &buf.geometry; let vertex_buffer = { let vertex_buffer = buf.vertex_buffer.as_ref(); - let vertex_raw = vertex_buffer.raw.get(snatch_guard).ok_or( - BuildAccelerationStructureError::InvalidBuffer(vertex_buffer.error_ident()), - )?; - if !vertex_buffer.usage.contains(BufferUsages::BLAS_INPUT) { - return Err(BuildAccelerationStructureError::MissingBlasInputUsageFlag( - vertex_buffer.error_ident(), - )); - } + let vertex_raw = vertex_buffer.try_raw(snatch_guard)?; + vertex_buffer.check_usage(BufferUsages::BLAS_INPUT)?; + if let Some(barrier) = buf .vertex_transition .take() @@ -1077,10 +996,7 @@ fn iter_buffers<'a, 'b>( let vertex_buffer_offset = mesh.first_vertex as u64 * mesh.vertex_stride; cmd_buf_data.buffer_memory_init_actions.extend( vertex_buffer.initialization_status.read().create_action( - &hub.buffers - .get(mesh.vertex_buffer) - .get() - .map_err(|_| BuildAccelerationStructureError::InvalidBufferId)?, + &hub.buffers.get(mesh.vertex_buffer).get()?, vertex_buffer_offset ..(vertex_buffer_offset + mesh.size.vertex_count as u64 * mesh.vertex_stride), @@ -1092,29 +1008,17 @@ fn iter_buffers<'a, 'b>( let index_buffer = if let Some((ref mut index_buffer, ref mut index_pending)) = buf.index_buffer_transition { - let index_raw = index_buffer.raw.get(snatch_guard).ok_or( - BuildAccelerationStructureError::InvalidBuffer(index_buffer.error_ident()), - )?; - if !index_buffer.usage.contains(BufferUsages::BLAS_INPUT) { - return Err(BuildAccelerationStructureError::MissingBlasInputUsageFlag( - index_buffer.error_ident(), - )); - } + let index_raw = index_buffer.try_raw(snatch_guard)?; + index_buffer.check_usage(BufferUsages::BLAS_INPUT)?; + if let Some(barrier) = index_pending .take() .map(|pending| pending.into_hal(index_buffer, snatch_guard)) { input_barriers.push(barrier); } - let index_stride = match mesh.size.index_format.unwrap() { - wgt::IndexFormat::Uint16 => 2, - wgt::IndexFormat::Uint32 => 4, - }; - if mesh.index_buffer_offset.unwrap() % index_stride != 0 { - return Err(BuildAccelerationStructureError::UnalignedIndexBufferOffset( - index_buffer.error_ident(), - )); - } + let index_stride = mesh.size.index_format.unwrap().byte_size() as u64; + let offset = mesh.first_index.unwrap() as u64 * index_stride; let index_buffer_size = mesh.size.index_count.unwrap() as u64 * index_stride; if mesh.size.index_count.unwrap() % 3 != 0 { @@ -1123,23 +1027,18 @@ fn iter_buffers<'a, 'b>( mesh.size.index_count.unwrap(), )); } - if index_buffer.size - < mesh.size.index_count.unwrap() as u64 * index_stride - + mesh.index_buffer_offset.unwrap() - { + if index_buffer.size < mesh.size.index_count.unwrap() as u64 * index_stride + offset { return Err(BuildAccelerationStructureError::InsufficientBufferSize( index_buffer.error_ident(), index_buffer.size, - mesh.size.index_count.unwrap() as u64 * index_stride - + mesh.index_buffer_offset.unwrap(), + mesh.size.index_count.unwrap() as u64 * index_stride + offset, )); } cmd_buf_data.buffer_memory_init_actions.extend( index_buffer.initialization_status.read().create_action( index_buffer, - mesh.index_buffer_offset.unwrap() - ..(mesh.index_buffer_offset.unwrap() + index_buffer_size), + offset..(offset + index_buffer_size), MemoryInitKind::NeedsInitializedMemory, ), ); @@ -1155,14 +1054,9 @@ fn iter_buffers<'a, 'b>( transform_buffer.error_ident(), )); } - let transform_raw = transform_buffer.raw.get(snatch_guard).ok_or( - BuildAccelerationStructureError::InvalidBuffer(transform_buffer.error_ident()), - )?; - if !transform_buffer.usage.contains(BufferUsages::BLAS_INPUT) { - return Err(BuildAccelerationStructureError::MissingBlasInputUsageFlag( - transform_buffer.error_ident(), - )); - } + let transform_raw = transform_buffer.try_raw(snatch_guard)?; + transform_buffer.check_usage(BufferUsages::BLAS_INPUT)?; + if let Some(barrier) = transform_pending .take() .map(|pending| pending.into_hal(transform_buffer, snatch_guard)) @@ -1199,22 +1093,23 @@ fn iter_buffers<'a, 'b>( }; let triangles = hal::AccelerationStructureTriangles { - vertex_buffer: Some(vertex_buffer.as_ref()), + vertex_buffer: Some(vertex_buffer), vertex_format: mesh.size.vertex_format, first_vertex: mesh.first_vertex, vertex_count: mesh.size.vertex_count, vertex_stride: mesh.vertex_stride, - indices: index_buffer.map(|index_buffer| hal::AccelerationStructureTriangleIndices::< - dyn hal::DynBuffer, - > { - format: mesh.size.index_format.unwrap(), - buffer: Some(index_buffer.as_ref()), - offset: mesh.index_buffer_offset.unwrap() as u32, - count: mesh.size.index_count.unwrap(), + indices: index_buffer.map(|index_buffer| { + let index_stride = mesh.size.index_format.unwrap().byte_size() as u32; + hal::AccelerationStructureTriangleIndices:: { + format: mesh.size.index_format.unwrap(), + buffer: Some(index_buffer), + offset: mesh.first_index.unwrap() * index_stride, + count: mesh.size.index_count.unwrap(), + } }), transform: transform_buffer.map(|transform_buffer| { hal::AccelerationStructureTriangleTransform { - buffer: transform_buffer.as_ref(), + buffer: transform_buffer, offset: mesh.transform_buffer_offset.unwrap() as u32, } }), @@ -1264,13 +1159,7 @@ fn map_blas<'a>( mode: hal::AccelerationStructureBuildMode::Build, flags: blas.flags, source_acceleration_structure: None, - destination_acceleration_structure: blas - .raw - .get(snatch_guard) - .ok_or(BuildAccelerationStructureError::InvalidBlas( - blas.error_ident(), - ))? - .as_ref(), + destination_acceleration_structure: blas.try_raw(snatch_guard)?, scratch_buffer, scratch_buffer_offset: *scratch_buffer_offset, }) diff --git a/wgpu-core/src/conv.rs b/wgpu-core/src/conv.rs index a4f967c4c5..27eaff6039 100644 --- a/wgpu-core/src/conv.rs +++ b/wgpu-core/src/conv.rs @@ -145,6 +145,10 @@ pub fn map_texture_usage( hal::TextureUses::DEPTH_STENCIL_READ | hal::TextureUses::DEPTH_STENCIL_WRITE, usage.contains(wgt::TextureUsages::RENDER_ATTACHMENT) && !is_color, ); + u.set( + hal::TextureUses::STORAGE_ATOMIC, + usage.contains(wgt::TextureUsages::STORAGE_ATOMIC), + ); u } @@ -200,6 +204,10 @@ pub fn map_texture_usage_from_hal(uses: hal::TextureUses) -> wgt::TextureUsages wgt::TextureUsages::RENDER_ATTACHMENT, uses.contains(hal::TextureUses::COLOR_TARGET), ); + u.set( + wgt::TextureUsages::STORAGE_ATOMIC, + uses.contains(hal::TextureUses::STORAGE_ATOMIC), + ); u } diff --git a/wgpu-core/src/device/life.rs b/wgpu-core/src/device/life.rs index 83fe377d81..4d91d1d98f 100644 --- a/wgpu-core/src/device/life.rs +++ b/wgpu-core/src/device/life.rs @@ -9,7 +9,6 @@ use crate::{ }; use smallvec::SmallVec; -use crate::resource::{Blas, Tlas}; use std::sync::Arc; use thiserror::Error; @@ -29,9 +28,6 @@ struct ActiveSubmission { /// submission has completed. index: SubmissionIndex, - /// Temporary resources to be freed once this queue submission has completed. - temp_resources: Vec, - /// Buffers to be mapped once this submission has completed. mapped: Vec>, @@ -104,44 +100,6 @@ impl ActiveSubmission { false } - - pub fn contains_blas(&self, blas: &Blas) -> bool { - for encoder in &self.encoders { - // The ownership location of blas's depends on where the command encoder - // came from. If it is the staging command encoder on the queue, it is - // in the pending buffer list. If it came from a user command encoder, - // it is in the tracker. - - if encoder.trackers.blas_s.contains(blas) { - return true; - } - - if encoder.pending_blas_s.contains_key(&blas.tracker_index()) { - return true; - } - } - - false - } - - pub fn contains_tlas(&self, tlas: &Tlas) -> bool { - for encoder in &self.encoders { - // The ownership location of tlas's depends on where the command encoder - // came from. If it is the staging command encoder on the queue, it is - // in the pending buffer list. If it came from a user command encoder, - // it is in the tracker. - - if encoder.trackers.tlas_s.contains(tlas) { - return true; - } - - if encoder.pending_tlas_s.contains_key(&tlas.tracker_index()) { - return true; - } - } - - false - } } #[derive(Clone, Debug, Error)] @@ -211,15 +169,9 @@ impl LifetimeTracker { } /// Start tracking resources associated with a new queue submission. - pub fn track_submission( - &mut self, - index: SubmissionIndex, - temp_resources: impl Iterator, - encoders: Vec, - ) { + pub fn track_submission(&mut self, index: SubmissionIndex, encoders: Vec) { self.active.push(ActiveSubmission { index, - temp_resources: temp_resources.collect(), mapped: Vec::new(), encoders, work_done_closures: SmallVec::new(), @@ -257,34 +209,6 @@ impl LifetimeTracker { }) } - /// Returns the submission index of the most recent submission that uses the - /// given blas. - pub fn get_blas_latest_submission_index(&self, blas: &Blas) -> Option { - // We iterate in reverse order, so that we can bail out early as soon - // as we find a hit. - self.active.iter().rev().find_map(|submission| { - if submission.contains_blas(blas) { - Some(submission.index) - } else { - None - } - }) - } - - /// Returns the submission index of the most recent submission that uses the - /// given tlas. - pub fn get_tlas_latest_submission_index(&self, tlas: &Tlas) -> Option { - // We iterate in reverse order, so that we can bail out early as soon - // as we find a hit. - self.active.iter().rev().find_map(|submission| { - if submission.contains_tlas(tlas) { - Some(submission.index) - } else { - None - } - }) - } - /// Returns the submission index of the most recent submission that uses the /// given texture. pub fn get_texture_latest_submission_index( @@ -340,7 +264,6 @@ impl LifetimeTracker { profiling::scope!("drop command buffer trackers"); drop(encoder); } - drop(a.temp_resources); work_done_closures.extend(a.work_done_closures); } work_done_closures @@ -355,7 +278,12 @@ impl LifetimeTracker { .active .iter_mut() .find(|a| a.index == last_submit_index) - .map(|a| &mut a.temp_resources); + .map(|a| { + // Because this resource's `last_submit_index` matches `a.index`, + // we know that we must have done something with the resource, + // so `a.encoders` should not be empty. + &mut a.encoders.last_mut().unwrap().temp_resources + }); if let Some(resources) = resources { resources.push(temp_resource); } diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index 14ff1ec9b3..e9600f72d6 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -425,6 +425,14 @@ pub fn create_validator( Caps::SHADER_INT64_ATOMIC_ALL_OPS, features.contains(wgt::Features::SHADER_INT64_ATOMIC_ALL_OPS), ); + caps.set( + Caps::TEXTURE_ATOMIC, + features.contains(wgt::Features::TEXTURE_ATOMIC), + ); + caps.set( + Caps::TEXTURE_INT64_ATOMIC, + features.contains(wgt::Features::TEXTURE_INT64_ATOMIC), + ); caps.set( Caps::SHADER_FLOAT32_ATOMIC, features.contains(wgt::Features::SHADER_FLOAT32_ATOMIC), diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index cd6731ae04..763edf2121 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -27,7 +27,6 @@ use crate::{ use smallvec::SmallVec; -use crate::resource::{Blas, DestroyedAccelerationStructure, Tlas}; use crate::scratch::ScratchBuffer; use std::{ iter, @@ -257,7 +256,6 @@ pub enum TempResource { ScratchBuffer(ScratchBuffer), DestroyedBuffer(DestroyedBuffer), DestroyedTexture(DestroyedTexture), - DestroyedAccelerationStructure(DestroyedAccelerationStructure), } /// A series of raw [`CommandBuffer`]s that have been submitted to a @@ -268,15 +266,12 @@ pub enum TempResource { pub(crate) struct EncoderInFlight { inner: crate::command::CommandEncoder, pub(crate) trackers: Tracker, + pub(crate) temp_resources: Vec, /// These are the buffers that have been tracked by `PendingWrites`. pub(crate) pending_buffers: FastHashMap>, /// These are the textures that have been tracked by `PendingWrites`. pub(crate) pending_textures: FastHashMap>, - /// These are the BLASes that have been tracked by `PendingWrites`. - pub(crate) pending_blas_s: FastHashMap>, - /// These are the TLASes that have been tracked by `PendingWrites`. - pub(crate) pending_tlas_s: FastHashMap>, } /// A private command encoder for writes made directly on the device @@ -314,8 +309,6 @@ pub(crate) struct PendingWrites { temp_resources: Vec, dst_buffers: FastHashMap>, dst_textures: FastHashMap>, - dst_blas_s: FastHashMap>, - dst_tlas_s: FastHashMap>, } impl PendingWrites { @@ -326,8 +319,6 @@ impl PendingWrites { temp_resources: Vec::new(), dst_buffers: FastHashMap::default(), dst_textures: FastHashMap::default(), - dst_blas_s: FastHashMap::default(), - dst_tlas_s: FastHashMap::default(), } } @@ -349,22 +340,6 @@ impl PendingWrites { self.dst_textures.contains_key(&texture.tracker_index()) } - pub fn insert_blas(&mut self, blas: &Arc) { - self.dst_blas_s.insert(blas.tracker_index(), blas.clone()); - } - - pub fn insert_tlas(&mut self, tlas: &Arc) { - self.dst_tlas_s.insert(tlas.tracker_index(), tlas.clone()); - } - - pub fn contains_blas(&mut self, blas: &Arc) -> bool { - self.dst_blas_s.contains_key(&blas.tracker_index()) - } - - pub fn contains_tlas(&mut self, tlas: &Arc) -> bool { - self.dst_tlas_s.contains_key(&tlas.tracker_index()) - } - pub fn consume_temp(&mut self, resource: TempResource) { self.temp_resources.push(resource); } @@ -383,8 +358,6 @@ impl PendingWrites { if self.is_recording { let pending_buffers = mem::take(&mut self.dst_buffers); let pending_textures = mem::take(&mut self.dst_textures); - let pending_blas_s = mem::take(&mut self.dst_blas_s); - let pending_tlas_s = mem::take(&mut self.dst_tlas_s); let cmd_buf = unsafe { self.command_encoder.end_encoding() } .map_err(|e| device.handle_hal_error(e))?; @@ -403,10 +376,9 @@ impl PendingWrites { hal_label: None, }, trackers: Tracker::new(), + temp_resources: mem::take(&mut self.temp_resources), pending_buffers, pending_textures, - pending_blas_s, - pending_tlas_s, }; Ok(Some(encoder)) } else { @@ -1223,10 +1195,9 @@ impl Queue { active_executions.push(EncoderInFlight { inner: baked.encoder, trackers: baked.trackers, + temp_resources: baked.temp_resources, pending_buffers: FastHashMap::default(), pending_textures: FastHashMap::default(), - pending_blas_s: FastHashMap::default(), - pending_tlas_s: FastHashMap::default(), }); } @@ -1323,11 +1294,8 @@ impl Queue { profiling::scope!("cleanup"); // this will register the new submission to the life time tracker - self.lock_life().track_submission( - submit_index, - pending_writes.temp_resources.drain(..), - active_executions, - ); + self.lock_life() + .track_submission(submit_index, active_executions); drop(pending_writes); // This will schedule destruction of all resources that are no longer needed diff --git a/wgpu-core/src/device/ray_tracing.rs b/wgpu-core/src/device/ray_tracing.rs index 12afc7e6a8..0917831afa 100644 --- a/wgpu-core/src/device/ray_tracing.rs +++ b/wgpu-core/src/device/ray_tracing.rs @@ -1,12 +1,12 @@ use std::mem::ManuallyDrop; use std::sync::Arc; +use crate::api_log; #[cfg(feature = "trace")] use crate::device::trace; -use crate::lock::{rank, Mutex}; +use crate::lock::rank; use crate::resource::{Fallible, TrackingData}; use crate::snatch::Snatchable; -use crate::weak_vec::WeakVec; use crate::{ device::{Device, DeviceError}, global::Global, @@ -24,6 +24,9 @@ impl Device { blas_desc: &resource::BlasDescriptor, sizes: wgt::BlasGeometrySizeDescriptors, ) -> Result, CreateBlasError> { + self.check_is_valid()?; + self.require_features(Features::EXPERIMENTAL_RAY_TRACING_ACCELERATION_STRUCTURE)?; + let size_info = match &sizes { wgt::BlasGeometrySizeDescriptors::Triangles { descriptors } => { let mut entries = @@ -109,6 +112,9 @@ impl Device { self: &Arc, desc: &resource::TlasDescriptor, ) -> Result, CreateTlasError> { + self.check_is_valid()?; + self.require_features(Features::EXPERIMENTAL_RAY_TRACING_ACCELERATION_STRUCTURE)?; + let size_info = unsafe { self.raw().get_acceleration_structure_build_sizes( &hal::GetAccelerationStructureBuildSizesDescriptor { @@ -159,7 +165,6 @@ impl Device { label: desc.label.to_string(), max_instance_count: desc.max_instances, tracking_data: TrackingData::new(self.tracker_indices.tlas_s.clone()), - bind_groups: Mutex::new(rank::TLAS_BIND_GROUPS, WeakVec::new()), })) } } @@ -174,23 +179,10 @@ impl Global { ) -> (BlasId, Option, Option) { profiling::scope!("Device::create_blas"); - let hub = &self.hub; - let fid = hub.blas_s.prepare(id_in); + let fid = self.hub.blas_s.prepare(id_in); - let device_guard = hub.devices.read(); let error = 'error: { - let device = device_guard.get(device_id); - match device.check_is_valid() { - Ok(_) => {} - Err(err) => break 'error CreateBlasError::Device(err), - }; - - if !device - .features - .contains(Features::EXPERIMENTAL_RAY_TRACING_ACCELERATION_STRUCTURE) - { - break 'error CreateBlasError::MissingFeature; - } + let device = self.hub.devices.get(device_id); #[cfg(feature = "trace")] if let Some(trace) = device.trace.lock().as_mut() { @@ -207,8 +199,8 @@ impl Global { }; let handle = blas.handle; - let id = fid.assign(Fallible::Valid(blas.clone())); - log::info!("Created blas {:?} with {:?}", id, desc); + let id = fid.assign(Fallible::Valid(blas)); + api_log!("Device::create_blas -> {id:?}"); return (id, Some(handle), None); }; @@ -225,23 +217,10 @@ impl Global { ) -> (TlasId, Option) { profiling::scope!("Device::create_tlas"); - let hub = &self.hub; - let fid = hub.tlas_s.prepare(id_in); + let fid = self.hub.tlas_s.prepare(id_in); - let device_guard = hub.devices.read(); let error = 'error: { - let device = device_guard.get(device_id); - match device.check_is_valid() { - Ok(_) => {} - Err(e) => break 'error CreateTlasError::Device(e), - } - - if !device - .features - .contains(Features::EXPERIMENTAL_RAY_TRACING_ACCELERATION_STRUCTURE) - { - break 'error CreateTlasError::MissingFeature; - } + let device = self.hub.devices.get(device_id); #[cfg(feature = "trace")] if let Some(trace) = device.trace.lock().as_mut() { @@ -257,7 +236,7 @@ impl Global { }; let id = fid.assign(Fallible::Valid(tlas)); - log::info!("Created tlas {:?} with {:?}", id, desc); + api_log!("Device::create_tlas -> {id:?}"); return (id, None); }; @@ -266,88 +245,29 @@ impl Global { (id, Some(error)) } - pub fn blas_destroy(&self, blas_id: BlasId) -> Result<(), resource::DestroyError> { - profiling::scope!("Blas::destroy"); - log::info!("Blas::destroy {blas_id:?}"); - - let hub = &self.hub; - - let blas = hub.blas_s.get(blas_id).get()?; - let _device = &blas.device; - - #[cfg(feature = "trace")] - if let Some(trace) = _device.trace.lock().as_mut() { - trace.add(trace::Action::FreeBlas(blas_id)); - } - - blas.destroy() - } - pub fn blas_drop(&self, blas_id: BlasId) { profiling::scope!("Blas::drop"); - log::debug!("blas {:?} is dropped", blas_id); + api_log!("Blas::drop {blas_id:?}"); - let hub = &self.hub; - - let _blas = match hub.blas_s.remove(blas_id).get() { - Ok(blas) => blas, - Err(_) => { - return; - } - }; + let _blas = self.hub.blas_s.remove(blas_id); #[cfg(feature = "trace")] - { - let mut lock = _blas.device.trace.lock(); - - if let Some(t) = lock.as_mut() { + if let Ok(blas) = _blas.get() { + if let Some(t) = blas.device.trace.lock().as_mut() { t.add(trace::Action::DestroyBlas(blas_id)); } } } - pub fn tlas_destroy(&self, tlas_id: TlasId) -> Result<(), resource::DestroyError> { - profiling::scope!("Tlas::destroy"); - - let hub = &self.hub; - - log::info!("Tlas {:?} is destroyed", tlas_id); - let tlas_guard = hub.tlas_s.write(); - let tlas = tlas_guard - .get(tlas_id) - .get() - .map_err(resource::DestroyError::InvalidResource)? - .clone(); - drop(tlas_guard); - - let _device = &mut tlas.device.clone(); - - #[cfg(feature = "trace")] - if let Some(trace) = _device.trace.lock().as_mut() { - trace.add(trace::Action::FreeTlas(tlas_id)); - } - - tlas.destroy() - } - pub fn tlas_drop(&self, tlas_id: TlasId) { profiling::scope!("Tlas::drop"); - log::debug!("tlas {:?} is dropped", tlas_id); + api_log!("Tlas::drop {tlas_id:?}"); - let hub = &self.hub; - - let _tlas = match hub.tlas_s.remove(tlas_id).get() { - Ok(tlas) => tlas, - Err(_) => { - return; - } - }; + let _tlas = self.hub.tlas_s.remove(tlas_id); #[cfg(feature = "trace")] - { - let mut lock = _tlas.device.trace.lock(); - - if let Some(t) = lock.as_mut() { + if let Ok(tlas) = _tlas.get() { + if let Some(t) = tlas.device.trace.lock().as_mut() { t.add(trace::Action::DestroyTlas(tlas_id)); } } diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs index 79d70f424e..681367735f 100644 --- a/wgpu-core/src/device/resource.rs +++ b/wgpu-core/src/device/resource.rs @@ -38,7 +38,7 @@ use wgt::{ math::align_to, DeviceLostReason, TextureFormat, TextureSampleType, TextureViewDimension, }; -use crate::resource::{AccelerationStructure, DestroyedResourceError, Tlas}; +use crate::resource::{AccelerationStructure, Tlas}; use std::{ borrow::Cow, mem::{self, ManuallyDrop}, @@ -1790,6 +1790,14 @@ impl Device { _ => (), } match access { + wgt::StorageTextureAccess::Atomic + if !self.features.contains(wgt::Features::TEXTURE_ATOMIC) => + { + return Err(binding_model::CreateBindGroupLayoutError::Entry { + binding: entry.binding, + error: BindGroupLayoutEntryError::StorageTextureAtomic, + }); + } wgt::StorageTextureAccess::ReadOnly | wgt::StorageTextureAccess::ReadWrite if !self.features.contains( @@ -1820,6 +1828,10 @@ impl Device { wgt::Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES; WritableStorage::Yes } + wgt::StorageTextureAccess::Atomic => { + required_features |= wgt::Features::TEXTURE_ATOMIC; + WritableStorage::Yes + } }, ) } @@ -2174,9 +2186,7 @@ impl Device { } } - Ok(tlas - .raw(snatch_guard) - .ok_or(DestroyedResourceError(tlas.error_ident()))?) + Ok(tlas.try_raw(snatch_guard)?) } // This function expects the provided bind group layout to be resolved @@ -2550,6 +2560,17 @@ impl Device { hal::TextureUses::STORAGE_READ_WRITE } + wgt::StorageTextureAccess::Atomic => { + if !view + .format_features + .flags + .contains(wgt::TextureFormatFeatureFlags::STORAGE_ATOMIC) + { + return Err(Error::StorageAtomicNotSupported(view.desc.format)); + } + + hal::TextureUses::STORAGE_ATOMIC + } }; view.check_usage(wgt::TextureUsages::STORAGE_BINDING)?; Ok(internal_use) diff --git a/wgpu-core/src/device/trace.rs b/wgpu-core/src/device/trace.rs index 2274d9e945..16902ea865 100644 --- a/wgpu-core/src/device/trace.rs +++ b/wgpu-core/src/device/trace.rs @@ -132,13 +132,11 @@ pub enum Action<'a> { desc: crate::resource::BlasDescriptor<'a>, sizes: wgt::BlasGeometrySizeDescriptors, }, - FreeBlas(id::BlasId), DestroyBlas(id::BlasId), CreateTlas { id: id::TlasId, desc: crate::resource::TlasDescriptor<'a>, }, - FreeTlas(id::TlasId), DestroyTlas(id::TlasId), } diff --git a/wgpu-core/src/id.rs b/wgpu-core/src/id.rs index fbf366982d..2fdfde9116 100644 --- a/wgpu-core/src/id.rs +++ b/wgpu-core/src/id.rs @@ -263,7 +263,6 @@ ids! { pub type QuerySetId QuerySet; pub type BlasId Blas; pub type TlasId Tlas; - pub type TlasInstanceId TlasInstance; } // The CommandBuffer type serves both as encoder and diff --git a/wgpu-core/src/instance.rs b/wgpu-core/src/instance.rs index 34fdabb637..75340ddb78 100644 --- a/wgpu-core/src/instance.rs +++ b/wgpu-core/src/instance.rs @@ -538,13 +538,20 @@ impl Adapter { allowed_usages.set( wgt::TextureUsages::STORAGE_BINDING, caps.intersects( - Tfc::STORAGE_WRITE_ONLY | Tfc::STORAGE_READ_ONLY | Tfc::STORAGE_READ_WRITE, + Tfc::STORAGE_WRITE_ONLY + | Tfc::STORAGE_READ_ONLY + | Tfc::STORAGE_READ_WRITE + | Tfc::STORAGE_ATOMIC, ), ); allowed_usages.set( wgt::TextureUsages::RENDER_ATTACHMENT, caps.intersects(Tfc::COLOR_ATTACHMENT | Tfc::DEPTH_STENCIL_ATTACHMENT), ); + allowed_usages.set( + wgt::TextureUsages::STORAGE_ATOMIC, + caps.contains(Tfc::STORAGE_ATOMIC), + ); let mut flags = wgt::TextureFormatFeatureFlags::empty(); flags.set( @@ -560,6 +567,11 @@ impl Adapter { caps.contains(Tfc::STORAGE_READ_WRITE), ); + flags.set( + wgt::TextureFormatFeatureFlags::STORAGE_ATOMIC, + caps.contains(Tfc::STORAGE_ATOMIC), + ); + flags.set( wgt::TextureFormatFeatureFlags::FILTERABLE, caps.contains(Tfc::SAMPLED_LINEAR), diff --git a/wgpu-core/src/lock/rank.rs b/wgpu-core/src/lock/rank.rs index 51c6c54318..652165ebda 100644 --- a/wgpu-core/src/lock/rank.rs +++ b/wgpu-core/src/lock/rank.rs @@ -148,7 +148,6 @@ define_lock_ranks! { rank BLAS_BUILT_INDEX "Blas::built_index" followed by { } rank TLAS_BUILT_INDEX "Tlas::built_index" followed by { } rank TLAS_DEPENDENCIES "Tlas::dependencies" followed by { } - rank TLAS_BIND_GROUPS "Tlas::bind_groups" followed by { } #[cfg(test)] rank PAWN "pawn" followed by { ROOK, BISHOP } diff --git a/wgpu-core/src/ray_tracing.rs b/wgpu-core/src/ray_tracing.rs index 9f4a11946d..fe6a1f7f6a 100644 --- a/wgpu-core/src/ray_tracing.rs +++ b/wgpu-core/src/ray_tracing.rs @@ -9,9 +9,9 @@ use crate::{ command::CommandEncoderError, - device::DeviceError, + device::{DeviceError, MissingFeatures}, id::{BlasId, BufferId, TlasId}, - resource::CreateBufferError, + resource::{DestroyedResourceError, InvalidResourceError, MissingBufferUsageError}, }; use std::num::NonZeroU64; use std::sync::Arc; @@ -25,15 +25,13 @@ pub enum CreateBlasError { #[error(transparent)] Device(#[from] DeviceError), #[error(transparent)] - CreateBufferError(#[from] CreateBufferError), + MissingFeatures(#[from] MissingFeatures), #[error( "Only one of 'index_count' and 'index_format' was provided (either provide both or none)" )] MissingIndexData, #[error("Provided format was not within allowed formats. Provided format: {0:?}. Allowed formats: {1:?}")] InvalidVertexFormat(VertexFormat, Vec), - #[error("Features::RAY_TRACING_ACCELERATION_STRUCTURE is not enabled")] - MissingFeature, } #[derive(Clone, Debug, Error)] @@ -41,9 +39,7 @@ pub enum CreateTlasError { #[error(transparent)] Device(#[from] DeviceError), #[error(transparent)] - CreateBufferError(#[from] CreateBufferError), - #[error("Features::RAY_TRACING_ACCELERATION_STRUCTURE is not enabled")] - MissingFeature, + MissingFeatures(#[from] MissingFeatures), } /// Error encountered while attempting to do a copy on a command encoder. @@ -55,14 +51,17 @@ pub enum BuildAccelerationStructureError { #[error(transparent)] Device(#[from] DeviceError), - #[error("BufferId is invalid or destroyed")] - InvalidBufferId, + #[error(transparent)] + InvalidResource(#[from] InvalidResourceError), + + #[error(transparent)] + DestroyedResource(#[from] DestroyedResourceError), - #[error("Buffer {0:?} is invalid or destroyed")] - InvalidBuffer(ResourceErrorIdent), + #[error(transparent)] + MissingBufferUsage(#[from] MissingBufferUsageError), - #[error("Buffer {0:?} is missing `BLAS_INPUT` usage flag")] - MissingBlasInputUsageFlag(ResourceErrorIdent), + #[error(transparent)] + MissingFeatures(#[from] MissingFeatures), #[error( "Buffer {0:?} size is insufficient for provided size information (size: {1}, required: {2}" @@ -111,12 +110,6 @@ pub enum BuildAccelerationStructureError { #[error("Blas {0:?} build sizes require index buffer but none was provided")] MissingIndexBuffer(ResourceErrorIdent), - #[error("BlasId is invalid")] - InvalidBlasId, - - #[error("Blas {0:?} is destroyed")] - InvalidBlas(ResourceErrorIdent), - #[error( "Tlas {0:?} an associated instances contains an invalid custom index (more than 24bits)" )] @@ -126,21 +119,6 @@ pub enum BuildAccelerationStructureError { "Tlas {0:?} has {1} active instances but only {2} are allowed as specified by the descriptor at creation" )] TlasInstanceCountExceeded(ResourceErrorIdent, u32, u32), - - #[error("BlasId is invalid or destroyed (for instance)")] - InvalidBlasIdForInstance, - - #[error("TlasId is invalid or destroyed")] - InvalidTlasId, - - #[error("Tlas {0:?} is invalid or destroyed")] - InvalidTlas(ResourceErrorIdent), - - #[error("Features::RAY_TRACING_ACCELERATION_STRUCTURE is not enabled")] - MissingFeature, - - #[error("Buffer {0:?} is missing `TLAS_INPUT` usage flag")] - MissingTlasInputUsageFlag(ResourceErrorIdent), } #[derive(Clone, Debug, Error)] @@ -151,15 +129,15 @@ pub enum ValidateBlasActionsError { #[derive(Clone, Debug, Error)] pub enum ValidateTlasActionsError { + #[error(transparent)] + DestroyedResource(#[from] DestroyedResourceError), + #[error("Tlas {0:?} is used before it is built")] UsedUnbuilt(ResourceErrorIdent), #[error("Blas {0:?} is used before it is built (in Tlas {1:?})")] UsedUnbuiltBlas(ResourceErrorIdent, ResourceErrorIdent), - #[error("BlasId is destroyed (in Tlas {0:?})")] - InvalidBlas(ResourceErrorIdent), - #[error("Blas {0:?} is newer than the containing Tlas {1:?}")] BlasNewerThenTlas(ResourceErrorIdent, ResourceErrorIdent), } @@ -172,7 +150,7 @@ pub struct BlasTriangleGeometry<'a> { pub transform_buffer: Option, pub first_vertex: u32, pub vertex_stride: BufferAddress, - pub index_buffer_offset: Option, + pub first_index: Option, pub transform_buffer_offset: Option, } @@ -243,7 +221,7 @@ pub struct TraceBlasTriangleGeometry { pub transform_buffer: Option, pub first_vertex: u32, pub vertex_stride: BufferAddress, - pub index_buffer_offset: Option, + pub first_index: Option, pub transform_buffer_offset: Option, } diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs index 0b13ad3bd0..9c2252e665 100644 --- a/wgpu-core/src/resource.rs +++ b/wgpu-core/src/resource.rs @@ -1889,7 +1889,10 @@ pub type BlasDescriptor<'a> = wgt::CreateBlasDescriptor>; pub type TlasDescriptor<'a> = wgt::CreateTlasDescriptor>; pub(crate) trait AccelerationStructure: Trackable { - fn raw<'a>(&'a self, guard: &'a SnatchGuard) -> Option<&'a dyn hal::DynAccelerationStructure>; + fn try_raw<'a>( + &'a self, + guard: &'a SnatchGuard, + ) -> Result<&'a dyn hal::DynAccelerationStructure, DestroyedResourceError>; } #[derive(Debug)] @@ -1920,49 +1923,14 @@ impl Drop for Blas { } impl AccelerationStructure for Blas { - fn raw<'a>(&'a self, guard: &'a SnatchGuard) -> Option<&'a dyn hal::DynAccelerationStructure> { - Some(self.raw.get(guard)?.as_ref()) - } -} - -impl Blas { - pub(crate) fn destroy(self: &Arc) -> Result<(), DestroyError> { - let device = &self.device; - - let temp = { - let mut snatch_guard = device.snatchable_lock.write(); - - let raw = match self.raw.snatch(&mut snatch_guard) { - Some(raw) => raw, - None => { - return Err(DestroyError::AlreadyDestroyed); - } - }; - - drop(snatch_guard); - - queue::TempResource::DestroyedAccelerationStructure(DestroyedAccelerationStructure { - raw: ManuallyDrop::new(raw), - device: Arc::clone(&self.device), - label: self.label().to_owned(), - bind_groups: WeakVec::new(), - }) - }; - - if let Some(queue) = device.get_queue() { - let mut pending_writes = queue.pending_writes.lock(); - if pending_writes.contains_blas(self) { - pending_writes.consume_temp(temp); - } else { - let mut life_lock = queue.lock_life(); - let last_submit_index = life_lock.get_blas_latest_submission_index(self); - if let Some(last_submit_index) = last_submit_index { - life_lock.schedule_resource_destruction(temp, last_submit_index); - } - } - } - - Ok(()) + fn try_raw<'a>( + &'a self, + guard: &'a SnatchGuard, + ) -> Result<&'a dyn hal::DynAccelerationStructure, DestroyedResourceError> { + self.raw + .get(guard) + .map(|raw| raw.as_ref()) + .ok_or_else(|| DestroyedResourceError(self.error_ident())) } } @@ -1986,7 +1954,6 @@ pub struct Tlas { /// The `label` from the descriptor used to create the resource. pub(crate) label: String, pub(crate) tracking_data: TrackingData, - pub(crate) bind_groups: Mutex>, } impl Drop for Tlas { @@ -2003,8 +1970,14 @@ impl Drop for Tlas { } impl AccelerationStructure for Tlas { - fn raw<'a>(&'a self, guard: &'a SnatchGuard) -> Option<&'a dyn hal::DynAccelerationStructure> { - Some(self.raw.get(guard)?.as_ref()) + fn try_raw<'a>( + &'a self, + guard: &'a SnatchGuard, + ) -> Result<&'a dyn hal::DynAccelerationStructure, DestroyedResourceError> { + self.raw + .get(guard) + .map(|raw| raw.as_ref()) + .ok_or_else(|| DestroyedResourceError(self.error_ident())) } } @@ -2013,76 +1986,3 @@ crate::impl_labeled!(Tlas); crate::impl_parent_device!(Tlas); crate::impl_storage_item!(Tlas); crate::impl_trackable!(Tlas); - -impl Tlas { - pub(crate) fn destroy(self: &Arc) -> Result<(), DestroyError> { - let device = &self.device; - - let temp = { - let mut snatch_guard = device.snatchable_lock.write(); - - let raw = match self.raw.snatch(&mut snatch_guard) { - Some(raw) => raw, - None => { - return Err(DestroyError::AlreadyDestroyed); - } - }; - - drop(snatch_guard); - - queue::TempResource::DestroyedAccelerationStructure(DestroyedAccelerationStructure { - raw: ManuallyDrop::new(raw), - device: Arc::clone(&self.device), - label: self.label().to_owned(), - bind_groups: mem::take(&mut self.bind_groups.lock()), - }) - }; - - if let Some(queue) = device.get_queue() { - let mut pending_writes = queue.pending_writes.lock(); - if pending_writes.contains_tlas(self) { - pending_writes.consume_temp(temp); - } else { - let mut life_lock = queue.lock_life(); - let last_submit_index = life_lock.get_tlas_latest_submission_index(self); - if let Some(last_submit_index) = last_submit_index { - life_lock.schedule_resource_destruction(temp, last_submit_index); - } - } - } - - Ok(()) - } -} - -#[derive(Debug)] -pub struct DestroyedAccelerationStructure { - raw: ManuallyDrop>, - device: Arc, - label: String, - // only filled if the acceleration structure is a TLAS - bind_groups: WeakVec, -} - -impl DestroyedAccelerationStructure { - pub fn label(&self) -> &dyn Debug { - &self.label - } -} - -impl Drop for DestroyedAccelerationStructure { - fn drop(&mut self) { - let mut deferred = self.device.deferred_destroy.lock(); - deferred.push(DeferredDestroy::BindGroups(mem::take( - &mut self.bind_groups, - ))); - drop(deferred); - - resource_log!("Destroy raw Buffer (destroyed) {:?}", self.label()); - // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point. - let raw = unsafe { ManuallyDrop::take(&mut self.raw) }; - unsafe { - hal::DynDevice::destroy_acceleration_structure(self.device.raw(), raw); - } - } -} diff --git a/wgpu-core/src/track/mod.rs b/wgpu-core/src/track/mod.rs index 261bb0458e..a0b91be5e6 100644 --- a/wgpu-core/src/track/mod.rs +++ b/wgpu-core/src/track/mod.rs @@ -98,7 +98,6 @@ Device <- CommandBuffer = insert(device.start, device.end, buffer.start, buffer. mod buffer; mod metadata; mod range; -mod ray_tracing; mod stateless; mod texture; @@ -113,7 +112,6 @@ use crate::{ use std::{fmt, ops, sync::Arc}; use thiserror::Error; -use crate::track::ray_tracing::AccelerationStructureTracker; pub(crate) use buffer::{ BufferBindGroupState, BufferTracker, BufferUsageScope, DeviceBufferTracker, }; @@ -602,8 +600,8 @@ impl DeviceTracker { pub(crate) struct Tracker { pub buffers: BufferTracker, pub textures: TextureTracker, - pub blas_s: AccelerationStructureTracker, - pub tlas_s: AccelerationStructureTracker, + pub blas_s: StatelessTracker, + pub tlas_s: StatelessTracker, pub views: StatelessTracker, pub bind_groups: StatelessTracker, pub compute_pipelines: StatelessTracker, @@ -617,8 +615,8 @@ impl Tracker { Self { buffers: BufferTracker::new(), textures: TextureTracker::new(), - blas_s: AccelerationStructureTracker::new(), - tlas_s: AccelerationStructureTracker::new(), + blas_s: StatelessTracker::new(), + tlas_s: StatelessTracker::new(), views: StatelessTracker::new(), bind_groups: StatelessTracker::new(), compute_pipelines: StatelessTracker::new(), diff --git a/wgpu-core/src/track/ray_tracing.rs b/wgpu-core/src/track/ray_tracing.rs deleted file mode 100644 index c344526dfb..0000000000 --- a/wgpu-core/src/track/ray_tracing.rs +++ /dev/null @@ -1,81 +0,0 @@ -use crate::resource::AccelerationStructure; -use crate::track::metadata::ResourceMetadata; -use crate::track::ResourceUses; -use hal::AccelerationStructureUses; -use std::sync::Arc; -use wgt::strict_assert; - -pub(crate) struct AccelerationStructureTracker { - start: Vec, - end: Vec, - - metadata: ResourceMetadata>, -} - -impl AccelerationStructureTracker { - pub fn new() -> Self { - Self { - start: Vec::new(), - end: Vec::new(), - - metadata: ResourceMetadata::new(), - } - } - - fn tracker_assert_in_bounds(&self, index: usize) { - strict_assert!(index < self.start.len()); - strict_assert!(index < self.end.len()); - self.metadata.tracker_assert_in_bounds(index); - } - - /// Sets the size of all the vectors inside the tracker. - /// - /// Must be called with the highest possible Buffer ID before - /// all unsafe functions are called. - pub fn set_size(&mut self, size: usize) { - self.start.resize(size, AccelerationStructureUses::empty()); - self.end.resize(size, AccelerationStructureUses::empty()); - - self.metadata.set_size(size); - } - - /// Extend the vectors to let the given index be valid. - fn allow_index(&mut self, index: usize) { - if index >= self.start.len() { - self.set_size(index + 1); - } - } - - /// Returns true if the given buffer is tracked. - pub fn contains(&self, acceleration_structure: &T) -> bool { - self.metadata - .contains(acceleration_structure.tracker_index().as_usize()) - } - - /// Inserts a single resource into the resource tracker. - pub fn set_single(&mut self, resource: Arc) { - let index: usize = resource.tracker_index().as_usize(); - - self.allow_index(index); - - self.tracker_assert_in_bounds(index); - } -} - -impl ResourceUses for AccelerationStructureUses { - const EXCLUSIVE: Self = Self::empty(); - - type Selector = (); - - fn bits(self) -> u16 { - Self::bits(&self) as u16 - } - - fn all_ordered(self) -> bool { - true - } - - fn any_exclusive(self) -> bool { - self.intersects(Self::EXCLUSIVE) - } -} diff --git a/wgpu-core/src/validation.rs b/wgpu-core/src/validation.rs index be6abe9725..8a1384ad49 100644 --- a/wgpu-core/src/validation.rs +++ b/wgpu-core/src/validation.rs @@ -312,6 +312,7 @@ fn map_storage_format_to_naga(format: wgt::TextureFormat) -> Option Sf::Rgb10a2Unorm, Tf::Rg11b10Ufloat => Sf::Rg11b10Ufloat, + Tf::R64Uint => Sf::R64Uint, Tf::Rg32Uint => Sf::Rg32Uint, Tf::Rg32Sint => Sf::Rg32Sint, Tf::Rg32Float => Sf::Rg32Float, @@ -368,6 +369,7 @@ fn map_storage_format_from_naga(format: naga::StorageFormat) -> wgt::TextureForm Sf::Rgb10a2Unorm => Tf::Rgb10a2Unorm, Sf::Rg11b10Ufloat => Tf::Rg11b10Ufloat, + Sf::R64Uint => Tf::R64Uint, Sf::Rg32Uint => Tf::Rg32Uint, Sf::Rg32Sint => Tf::Rg32Sint, Sf::Rg32Float => Tf::Rg32Float, @@ -519,7 +521,14 @@ impl Resource { let naga_access = match access { wgt::StorageTextureAccess::ReadOnly => naga::StorageAccess::LOAD, wgt::StorageTextureAccess::WriteOnly => naga::StorageAccess::STORE, - wgt::StorageTextureAccess::ReadWrite => naga::StorageAccess::all(), + wgt::StorageTextureAccess::ReadWrite => { + naga::StorageAccess::LOAD | naga::StorageAccess::STORE + } + wgt::StorageTextureAccess::Atomic => { + naga::StorageAccess::ATOMIC + | naga::StorageAccess::LOAD + | naga::StorageAccess::STORE + } }; naga::ImageClass::Storage { format: naga_format, @@ -610,11 +619,15 @@ impl Resource { }, naga::ImageClass::Storage { format, access } => BindingType::StorageTexture { access: { - const LOAD_STORE: naga::StorageAccess = naga::StorageAccess::all(); + const LOAD_STORE: naga::StorageAccess = + naga::StorageAccess::LOAD.union(naga::StorageAccess::STORE); match access { naga::StorageAccess::LOAD => wgt::StorageTextureAccess::ReadOnly, naga::StorageAccess::STORE => wgt::StorageTextureAccess::WriteOnly, LOAD_STORE => wgt::StorageTextureAccess::ReadWrite, + _ if access.contains(naga::StorageAccess::ATOMIC) => { + wgt::StorageTextureAccess::Atomic + } _ => unreachable!(), } }, @@ -701,6 +714,7 @@ impl NumericType { Tf::Rg8Unorm | Tf::Rg8Snorm | Tf::Rg16Float | Tf::Rg32Float => { (NumericDimension::Vector(Vs::Bi), Scalar::F32) } + Tf::R64Uint => (NumericDimension::Scalar, Scalar::U64), Tf::Rg8Uint | Tf::Rg16Uint | Tf::Rg32Uint => { (NumericDimension::Vector(Vs::Bi), Scalar::U32) } diff --git a/wgpu-hal/examples/ray-traced-triangle/main.rs b/wgpu-hal/examples/ray-traced-triangle/main.rs index 9987380c34..3e048e9396 100644 --- a/wgpu-hal/examples/ray-traced-triangle/main.rs +++ b/wgpu-hal/examples/ray-traced-triangle/main.rs @@ -284,7 +284,7 @@ impl Example { dbg!(&surface_caps.formats); let surface_format = if surface_caps .formats - .contains(&wgt::TextureFormat::Rgba8Snorm) + .contains(&wgt::TextureFormat::Rgba8Unorm) { wgt::TextureFormat::Rgba8Unorm } else { @@ -473,7 +473,8 @@ impl Example { vertex_buffer: Some(&vertices_buffer), first_vertex: 0, vertex_format: wgt::VertexFormat::Float32x3, - vertex_count: vertices.len() as u32, + // each vertex is 3 floats, and floats are stored raw in the array + vertex_count: vertices.len() as u32 / 3, vertex_stride: 3 * 4, indices: indices_buffer.as_ref().map(|(buf, len)| { hal::AccelerationStructureTriangleIndices { diff --git a/wgpu-hal/src/auxil/dxgi/conv.rs b/wgpu-hal/src/auxil/dxgi/conv.rs index 0f94575df8..a88853de11 100644 --- a/wgpu-hal/src/auxil/dxgi/conv.rs +++ b/wgpu-hal/src/auxil/dxgi/conv.rs @@ -48,6 +48,7 @@ pub fn map_texture_format_failable( Tf::Rgb10a2Uint => DXGI_FORMAT_R10G10B10A2_UINT, Tf::Rgb10a2Unorm => DXGI_FORMAT_R10G10B10A2_UNORM, Tf::Rg11b10Ufloat => DXGI_FORMAT_R11G11B10_FLOAT, + Tf::R64Uint => DXGI_FORMAT_R32G32_UINT, // R64 emulated by R32G32 Tf::Rg32Uint => DXGI_FORMAT_R32G32_UINT, Tf::Rg32Sint => DXGI_FORMAT_R32G32_SINT, Tf::Rg32Float => DXGI_FORMAT_R32G32_FLOAT, diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs index 0ef706d8c8..e71e2fce3a 100644 --- a/wgpu-hal/src/dx12/adapter.rs +++ b/wgpu-hal/src/dx12/adapter.rs @@ -244,7 +244,6 @@ impl super::Adapter { _ => unreachable!(), } }; - let private_caps = super::PrivateCapabilities { instance_flags, heterogeneous_resource_heaps: options.ResourceHeapTier @@ -319,7 +318,8 @@ impl super::Adapter { | wgt::Features::RG11B10UFLOAT_RENDERABLE | wgt::Features::DUAL_SOURCE_BLENDING | wgt::Features::TEXTURE_FORMAT_NV12 - | wgt::Features::FLOAT32_FILTERABLE; + | wgt::Features::FLOAT32_FILTERABLE + | wgt::Features::TEXTURE_ATOMIC; //TODO: in order to expose this, we need to run a compute shader // that extract the necessary statistics out of the D3D12 result. @@ -388,12 +388,40 @@ impl super::Adapter { && features1.Int64ShaderOps.as_bool(), ); + features.set( + wgt::Features::TEXTURE_INT64_ATOMIC, + shader_model >= naga::back::hlsl::ShaderModel::V6_6 + && hr.is_ok() + && features1.Int64ShaderOps.as_bool(), + ); + features.set( wgt::Features::SUBGROUP, shader_model >= naga::back::hlsl::ShaderModel::V6_0 && hr.is_ok() && features1.WaveOps.as_bool(), ); + let mut features5 = Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS5::default(); + let has_features5 = unsafe { + device.CheckFeatureSupport( + Direct3D12::D3D12_FEATURE_D3D12_OPTIONS5, + <*mut _>::cast(&mut features5), + size_of_val(&features5) as u32, + ) + } + .is_ok(); + + // Since all features for raytracing pipeline (geometry index) and ray queries both come + // from here, there is no point in adding an extra call here given that there will be no + // feature using EXPERIMENTAL_RAY_TRACING_ACCELERATION_STRUCTURE if all these are not met. + // Once ray tracing pipelines are supported they also will go here + features.set( + wgt::Features::EXPERIMENTAL_RAY_QUERY + | wgt::Features::EXPERIMENTAL_RAY_TRACING_ACCELERATION_STRUCTURE, + features5.RaytracingTier == Direct3D12::D3D12_RAYTRACING_TIER_1_1 + && shader_model >= naga::back::hlsl::ShaderModel::V6_5 + && has_features5, + ); let atomic_int64_on_typed_resource_supported = { let mut features9 = Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS9::default(); @@ -528,8 +556,9 @@ impl super::Adapter { // Direct3D correctly bounds-checks all array accesses: // https://microsoft.github.io/DirectX-Specs/d3d/archive/D3D11_3_FunctionalSpec.htm#18.6.8.2%20Device%20Memory%20Reads uniform_bounds_check_alignment: wgt::BufferSize::new(1).unwrap(), - raw_tlas_instance_size: 0, - ray_tracing_scratch_buffer_alignment: 0, + raw_tlas_instance_size: size_of::(), + ray_tracing_scratch_buffer_alignment: + Direct3D12::D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BYTE_ALIGNMENT, }, downlevel, }, @@ -682,6 +711,12 @@ impl crate::Adapter for super::Adapter { .Support2 .contains(Direct3D12::D3D12_FORMAT_SUPPORT2_UAV_TYPED_LOAD), ); + caps.set( + Tfc::STORAGE_ATOMIC, + data_srv_uav + .Support2 + .contains(Direct3D12::D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_UNSIGNED_MIN_OR_MAX), + ); caps.set( Tfc::STORAGE_WRITE_ONLY, data_srv_uav diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index 9296a20393..99cee37373 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -1,12 +1,15 @@ -use std::{mem, ops::Range}; - -use windows::Win32::{Foundation, Graphics::Direct3D12}; - use super::conv; use crate::{ auxil::{self, dxgi::result::HResult as _}, dx12::borrow_interface_temporarily, + AccelerationStructureEntries, +}; +use std::{mem, ops::Range}; +use windows::Win32::{ + Foundation, + Graphics::{Direct3D12, Dxgi}, }; +use windows_core::Interface; fn make_box(origin: &wgt::Origin3d, size: &crate::CopyExtent) -> Direct3D12::D3D12_BOX { Direct3D12::D3D12_BOX { @@ -777,8 +780,8 @@ impl crate::CommandEncoder for super::CommandEncoder { // ) // TODO: Replace with the above in the next breaking windows-rs release, // when https://github.com/microsoft/win32metadata/pull/1971 is in. - (windows_core::Interface::vtable(list).ClearDepthStencilView)( - windows_core::Interface::as_raw(list), + (Interface::vtable(list).ClearDepthStencilView)( + Interface::as_raw(list), ds_view, flags, ds.clear_value.0, @@ -1259,7 +1262,7 @@ impl crate::CommandEncoder for super::CommandEncoder { unsafe fn build_acceleration_structures<'a, T>( &mut self, _descriptor_count: u32, - _descriptors: T, + descriptors: T, ) where super::Api: 'a, T: IntoIterator< @@ -1272,13 +1275,189 @@ impl crate::CommandEncoder for super::CommandEncoder { { // Implement using `BuildRaytracingAccelerationStructure`: // https://microsoft.github.io/DirectX-Specs/d3d/Raytracing.html#buildraytracingaccelerationstructure - todo!() + let list = self + .list + .as_ref() + .unwrap() + .cast::() + .unwrap(); + for descriptor in descriptors { + // TODO: This is the same as getting build sizes apart from requiring buffers, should this be de-duped? + let mut geometry_desc; + let ty; + let inputs0; + let num_desc; + match descriptor.entries { + AccelerationStructureEntries::Instances(instances) => { + let desc_address = unsafe { + instances + .buffer + .expect("needs buffer to build") + .resource + .GetGPUVirtualAddress() + } + instances.offset as u64; + ty = Direct3D12::D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL; + inputs0 = Direct3D12::D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS_0 { + InstanceDescs: desc_address, + }; + num_desc = instances.count; + } + AccelerationStructureEntries::Triangles(triangles) => { + geometry_desc = Vec::with_capacity(triangles.len()); + for triangle in triangles { + let transform_address = + triangle.transform.as_ref().map_or(0, |transform| unsafe { + transform.buffer.resource.GetGPUVirtualAddress() + + transform.offset as u64 + }); + let index_format = triangle + .indices + .as_ref() + .map_or(Dxgi::Common::DXGI_FORMAT_UNKNOWN, |indices| { + auxil::dxgi::conv::map_index_format(indices.format) + }); + let vertex_format = + auxil::dxgi::conv::map_vertex_format(triangle.vertex_format); + let index_count = + triangle.indices.as_ref().map_or(0, |indices| indices.count); + let index_address = triangle.indices.as_ref().map_or(0, |indices| unsafe { + indices + .buffer + .expect("needs buffer to build") + .resource + .GetGPUVirtualAddress() + + indices.offset as u64 + }); + let vertex_address = unsafe { + triangle + .vertex_buffer + .expect("needs buffer to build") + .resource + .GetGPUVirtualAddress() + + (triangle.first_vertex as u64 * triangle.vertex_stride) + }; + + let triangle_desc = Direct3D12::D3D12_RAYTRACING_GEOMETRY_TRIANGLES_DESC { + Transform3x4: transform_address, + IndexFormat: index_format, + VertexFormat: vertex_format, + IndexCount: index_count, + VertexCount: triangle.vertex_count, + IndexBuffer: index_address, + VertexBuffer: Direct3D12::D3D12_GPU_VIRTUAL_ADDRESS_AND_STRIDE { + StartAddress: vertex_address, + StrideInBytes: triangle.vertex_stride, + }, + }; + + geometry_desc.push(Direct3D12::D3D12_RAYTRACING_GEOMETRY_DESC { + Type: Direct3D12::D3D12_RAYTRACING_GEOMETRY_TYPE_TRIANGLES, + Flags: conv::map_acceleration_structure_geometry_flags(triangle.flags), + Anonymous: Direct3D12::D3D12_RAYTRACING_GEOMETRY_DESC_0 { + Triangles: triangle_desc, + }, + }) + } + ty = Direct3D12::D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL; + inputs0 = Direct3D12::D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS_0 { + pGeometryDescs: geometry_desc.as_ptr(), + }; + num_desc = geometry_desc.len() as u32; + } + AccelerationStructureEntries::AABBs(aabbs) => { + geometry_desc = Vec::with_capacity(aabbs.len()); + for aabb in aabbs { + let aabb_address = unsafe { + aabb.buffer + .expect("needs buffer to build") + .resource + .GetGPUVirtualAddress() + + (aabb.offset as u64 * aabb.stride) + }; + + let aabb_desc = Direct3D12::D3D12_RAYTRACING_GEOMETRY_AABBS_DESC { + AABBCount: aabb.count as u64, + AABBs: Direct3D12::D3D12_GPU_VIRTUAL_ADDRESS_AND_STRIDE { + StartAddress: aabb_address, + StrideInBytes: aabb.stride, + }, + }; + + geometry_desc.push(Direct3D12::D3D12_RAYTRACING_GEOMETRY_DESC { + Type: Direct3D12::D3D12_RAYTRACING_GEOMETRY_TYPE_PROCEDURAL_PRIMITIVE_AABBS, + Flags: conv::map_acceleration_structure_geometry_flags(aabb.flags), + Anonymous: Direct3D12::D3D12_RAYTRACING_GEOMETRY_DESC_0 { + AABBs: aabb_desc, + }, + }) + } + ty = Direct3D12::D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL; + inputs0 = Direct3D12::D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS_0 { + pGeometryDescs: geometry_desc.as_ptr(), + }; + num_desc = geometry_desc.len() as u32; + } + }; + let acceleration_structure_inputs = + Direct3D12::D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS { + Type: ty, + Flags: conv::map_acceleration_structure_build_flags( + descriptor.flags, + Some(descriptor.mode), + ), + NumDescs: num_desc, + DescsLayout: Direct3D12::D3D12_ELEMENTS_LAYOUT_ARRAY, + Anonymous: inputs0, + }; + + let dst_acceleration_structure_address = unsafe { + descriptor + .destination_acceleration_structure + .resource + .GetGPUVirtualAddress() + }; + let src_acceleration_structure_address = descriptor + .source_acceleration_structure + .as_ref() + .map_or(0, |source| unsafe { + source.resource.GetGPUVirtualAddress() + }); + let scratch_address = unsafe { + descriptor.scratch_buffer.resource.GetGPUVirtualAddress() + + descriptor.scratch_buffer_offset + }; + + let desc = Direct3D12::D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC { + DestAccelerationStructureData: dst_acceleration_structure_address, + Inputs: acceleration_structure_inputs, + SourceAccelerationStructureData: src_acceleration_structure_address, + ScratchAccelerationStructureData: scratch_address, + }; + unsafe { list.BuildRaytracingAccelerationStructure(&desc, None) }; + } } unsafe fn place_acceleration_structure_barrier( &mut self, _barriers: crate::AccelerationStructureBarrier, ) { - todo!() + // TODO: This is not very optimal, we should be using [enhanced barriers](https://microsoft.github.io/DirectX-Specs/d3d/D3D12EnhancedBarriers.html) if possible + let list = self + .list + .as_ref() + .unwrap() + .cast::() + .unwrap(); + unsafe { + list.ResourceBarrier(&[Direct3D12::D3D12_RESOURCE_BARRIER { + Type: Direct3D12::D3D12_RESOURCE_BARRIER_TYPE_UAV, + Flags: Direct3D12::D3D12_RESOURCE_BARRIER_FLAG_NONE, + Anonymous: Direct3D12::D3D12_RESOURCE_BARRIER_0 { + UAV: mem::ManuallyDrop::new(Direct3D12::D3D12_RESOURCE_UAV_BARRIER { + pResource: Default::default(), + }), + }, + }]) + } } } diff --git a/wgpu-hal/src/dx12/conv.rs b/wgpu-hal/src/dx12/conv.rs index 3457d6446e..5117378942 100644 --- a/wgpu-hal/src/dx12/conv.rs +++ b/wgpu-hal/src/dx12/conv.rs @@ -112,7 +112,7 @@ pub fn map_binding_type(ty: &wgt::BindingType) -> Direct3D12::D3D12_DESCRIPTOR_R .. } | Bt::StorageTexture { .. } => Direct3D12::D3D12_DESCRIPTOR_RANGE_TYPE_UAV, - Bt::AccelerationStructure => todo!(), + Bt::AccelerationStructure => Direct3D12::D3D12_DESCRIPTOR_RANGE_TYPE_SRV, } } @@ -350,3 +350,51 @@ pub fn map_depth_stencil(ds: &wgt::DepthStencilState) -> Direct3D12::D3D12_DEPTH BackFace: map_stencil_face(&ds.stencil.back), } } + +pub(crate) fn map_acceleration_structure_build_flags( + flags: wgt::AccelerationStructureFlags, + mode: Option, +) -> Direct3D12::D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAGS { + let mut d3d_flags = Default::default(); + if flags.contains(wgt::AccelerationStructureFlags::ALLOW_COMPACTION) { + d3d_flags |= + Direct3D12::D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_ALLOW_COMPACTION; + } + + if flags.contains(wgt::AccelerationStructureFlags::ALLOW_UPDATE) { + d3d_flags |= Direct3D12::D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_ALLOW_UPDATE; + } + + if flags.contains(wgt::AccelerationStructureFlags::LOW_MEMORY) { + d3d_flags |= Direct3D12::D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_MINIMIZE_MEMORY; + } + + if flags.contains(wgt::AccelerationStructureFlags::PREFER_FAST_BUILD) { + d3d_flags |= + Direct3D12::D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PREFER_FAST_BUILD; + } + + if flags.contains(wgt::AccelerationStructureFlags::PREFER_FAST_TRACE) { + d3d_flags |= + Direct3D12::D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PREFER_FAST_TRACE; + } + + if let Some(crate::AccelerationStructureBuildMode::Update) = mode { + d3d_flags |= Direct3D12::D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PERFORM_UPDATE + } + + d3d_flags +} + +pub(crate) fn map_acceleration_structure_geometry_flags( + flags: wgt::AccelerationStructureGeometryFlags, +) -> Direct3D12::D3D12_RAYTRACING_GEOMETRY_FLAGS { + let mut d3d_flags = Default::default(); + if flags.contains(wgt::AccelerationStructureGeometryFlags::OPAQUE) { + d3d_flags |= Direct3D12::D3D12_RAYTRACING_GEOMETRY_FLAG_OPAQUE; + } + if flags.contains(wgt::AccelerationStructureGeometryFlags::NO_DUPLICATE_ANY_HIT_INVOCATION) { + d3d_flags |= Direct3D12::D3D12_RAYTRACING_GEOMETRY_FLAG_NO_DUPLICATE_ANYHIT_INVOCATION; + } + d3d_flags +} diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index 20dc20164f..b9a825845a 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -2,7 +2,7 @@ use std::{ ffi, mem::{self, size_of, size_of_val}, num::NonZeroU32, - ptr, + ptr, slice, sync::Arc, time::{Duration, Instant}, }; @@ -21,7 +21,7 @@ use super::{conv, descriptor, D3D12Lib}; use crate::{ auxil::{self, dxgi::result::HResult}, dx12::{borrow_optional_interface_temporarily, shader_compilation, Event}, - TlasInstance, + AccelerationStructureEntries, TlasInstance, }; // this has to match Naga's HLSL backend, and also needs to be null-terminated @@ -763,7 +763,12 @@ impl crate::Device for super::Device { &self, desc: &crate::BindGroupLayoutDescriptor, ) -> Result { - let (mut num_buffer_views, mut num_samplers, mut num_texture_views) = (0, 0, 0); + let ( + mut num_buffer_views, + mut num_samplers, + mut num_texture_views, + mut num_acceleration_structures, + ) = (0, 0, 0, 0); for entry in desc.entries.iter() { let count = entry.count.map_or(1, NonZeroU32::get); match entry.ty { @@ -776,13 +781,13 @@ impl crate::Device for super::Device { num_texture_views += count } wgt::BindingType::Sampler { .. } => num_samplers += count, - wgt::BindingType::AccelerationStructure => todo!(), + wgt::BindingType::AccelerationStructure => num_acceleration_structures += count, } } self.counters.bind_group_layouts.add(1); - let num_views = num_buffer_views + num_texture_views; + let num_views = num_buffer_views + num_texture_views + num_acceleration_structures; Ok(super::BindGroupLayout { entries: desc.entries.to_vec(), cpu_heap_views: if num_views != 0 { @@ -1389,7 +1394,33 @@ impl crate::Device for super::Device { cpu_samplers.as_mut().unwrap().stage.push(data.handle.raw); } } - wgt::BindingType::AccelerationStructure => todo!(), + wgt::BindingType::AccelerationStructure => { + let start = entry.resource_index as usize; + let end = start + entry.count as usize; + for data in &desc.acceleration_structures[start..end] { + let inner = cpu_views.as_mut().unwrap(); + let cpu_index = inner.stage.len() as u32; + let handle = desc.layout.cpu_heap_views.as_ref().unwrap().at(cpu_index); + let raw_desc = Direct3D12::D3D12_SHADER_RESOURCE_VIEW_DESC { + Format: Dxgi::Common::DXGI_FORMAT_UNKNOWN, + Shader4ComponentMapping: + Direct3D12::D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING, + ViewDimension: + Direct3D12::D3D12_SRV_DIMENSION_RAYTRACING_ACCELERATION_STRUCTURE, + Anonymous: Direct3D12::D3D12_SHADER_RESOURCE_VIEW_DESC_0 { + RaytracingAccelerationStructure: + Direct3D12::D3D12_RAYTRACING_ACCELERATION_STRUCTURE_SRV { + Location: unsafe { data.resource.GetGPUVirtualAddress() }, + }, + }, + }; + unsafe { + self.raw + .CreateShaderResourceView(None, Some(&raw_desc), handle) + }; + inner.stage.push(handle); + } + } } } @@ -1888,36 +1919,167 @@ impl crate::Device for super::Device { unsafe fn get_acceleration_structure_build_sizes<'a>( &self, - _desc: &crate::GetAccelerationStructureBuildSizesDescriptor<'a, super::Buffer>, + desc: &crate::GetAccelerationStructureBuildSizesDescriptor<'a, super::Buffer>, ) -> crate::AccelerationStructureBuildSizes { - // Implement using `GetRaytracingAccelerationStructurePrebuildInfo`: - // https://microsoft.github.io/DirectX-Specs/d3d/Raytracing.html#getraytracingaccelerationstructureprebuildinfo - todo!() + let mut geometry_desc; + let device5 = self.raw.cast::().unwrap(); + let ty; + let inputs0; + let num_desc; + match desc.entries { + AccelerationStructureEntries::Instances(instances) => { + ty = Direct3D12::D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL; + inputs0 = Direct3D12::D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS_0 { + InstanceDescs: 0, + }; + num_desc = instances.count; + } + AccelerationStructureEntries::Triangles(triangles) => { + geometry_desc = Vec::with_capacity(triangles.len()); + for triangle in triangles { + let index_format = triangle + .indices + .as_ref() + .map_or(Dxgi::Common::DXGI_FORMAT_UNKNOWN, |indices| { + auxil::dxgi::conv::map_index_format(indices.format) + }); + let index_count = triangle.indices.as_ref().map_or(0, |indices| indices.count); + + let triangle_desc = Direct3D12::D3D12_RAYTRACING_GEOMETRY_TRIANGLES_DESC { + Transform3x4: 0, + IndexFormat: index_format, + VertexFormat: auxil::dxgi::conv::map_vertex_format(triangle.vertex_format), + IndexCount: index_count, + VertexCount: triangle.vertex_count, + IndexBuffer: 0, + VertexBuffer: Direct3D12::D3D12_GPU_VIRTUAL_ADDRESS_AND_STRIDE { + StartAddress: 0, + StrideInBytes: triangle.vertex_stride, + }, + }; + + geometry_desc.push(Direct3D12::D3D12_RAYTRACING_GEOMETRY_DESC { + Type: Direct3D12::D3D12_RAYTRACING_GEOMETRY_TYPE_TRIANGLES, + Flags: conv::map_acceleration_structure_geometry_flags(triangle.flags), + Anonymous: Direct3D12::D3D12_RAYTRACING_GEOMETRY_DESC_0 { + Triangles: triangle_desc, + }, + }) + } + ty = Direct3D12::D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL; + inputs0 = Direct3D12::D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS_0 { + pGeometryDescs: geometry_desc.as_ptr(), + }; + num_desc = geometry_desc.len() as u32; + } + AccelerationStructureEntries::AABBs(aabbs) => { + geometry_desc = Vec::with_capacity(aabbs.len()); + for aabb in aabbs { + let aabb_desc = Direct3D12::D3D12_RAYTRACING_GEOMETRY_AABBS_DESC { + AABBCount: aabb.count as u64, + AABBs: Direct3D12::D3D12_GPU_VIRTUAL_ADDRESS_AND_STRIDE { + StartAddress: 0, + StrideInBytes: aabb.stride, + }, + }; + geometry_desc.push(Direct3D12::D3D12_RAYTRACING_GEOMETRY_DESC { + Type: Direct3D12::D3D12_RAYTRACING_GEOMETRY_TYPE_PROCEDURAL_PRIMITIVE_AABBS, + Flags: conv::map_acceleration_structure_geometry_flags(aabb.flags), + Anonymous: Direct3D12::D3D12_RAYTRACING_GEOMETRY_DESC_0 { + AABBs: aabb_desc, + }, + }) + } + ty = Direct3D12::D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL; + inputs0 = Direct3D12::D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS_0 { + pGeometryDescs: geometry_desc.as_ptr(), + }; + num_desc = geometry_desc.len() as u32; + } + }; + let acceleration_structure_inputs = + Direct3D12::D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS { + Type: ty, + Flags: conv::map_acceleration_structure_build_flags(desc.flags, None), + NumDescs: num_desc, + DescsLayout: Direct3D12::D3D12_ELEMENTS_LAYOUT_ARRAY, + Anonymous: inputs0, + }; + let mut info = Direct3D12::D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO::default(); + unsafe { + device5.GetRaytracingAccelerationStructurePrebuildInfo( + &acceleration_structure_inputs, + &mut info, + ) + }; + crate::AccelerationStructureBuildSizes { + acceleration_structure_size: info.ResultDataMaxSizeInBytes, + update_scratch_size: info.UpdateScratchDataSizeInBytes, + build_scratch_size: info.ScratchDataSizeInBytes, + } } unsafe fn get_acceleration_structure_device_address( &self, - _acceleration_structure: &super::AccelerationStructure, + acceleration_structure: &super::AccelerationStructure, ) -> wgt::BufferAddress { - // Implement using `GetGPUVirtualAddress`: - // https://docs.microsoft.com/en-us/windows/win32/api/d3d12/nf-d3d12-id3d12resource-getgpuvirtualaddress - todo!() + unsafe { acceleration_structure.resource.GetGPUVirtualAddress() } } unsafe fn create_acceleration_structure( &self, - _desc: &crate::AccelerationStructureDescriptor, + desc: &crate::AccelerationStructureDescriptor, ) -> Result { // Create a D3D12 resource as per-usual. - todo!() + let size = desc.size; + + let raw_desc = Direct3D12::D3D12_RESOURCE_DESC { + Dimension: Direct3D12::D3D12_RESOURCE_DIMENSION_BUFFER, + Alignment: 0, + Width: size, + Height: 1, + DepthOrArraySize: 1, + MipLevels: 1, + Format: Dxgi::Common::DXGI_FORMAT_UNKNOWN, + SampleDesc: Dxgi::Common::DXGI_SAMPLE_DESC { + Count: 1, + Quality: 0, + }, + Layout: Direct3D12::D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + // TODO: when moving to enhanced barriers use Direct3D12::D3D12_RESOURCE_FLAG_RAYTRACING_ACCELERATION_STRUCTURE + Flags: Direct3D12::D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS, + }; + + let (resource, allocation) = + super::suballocation::create_acceleration_structure_resource(self, desc, raw_desc)?; + + if let Some(label) = desc.label { + unsafe { resource.SetName(&windows::core::HSTRING::from(label)) } + .into_device_result("SetName")?; + } + + // for some reason there is no counter for acceleration structures + + Ok(super::AccelerationStructure { + resource, + allocation, + }) } unsafe fn destroy_acceleration_structure( &self, - _acceleration_structure: super::AccelerationStructure, + mut acceleration_structure: super::AccelerationStructure, ) { - // Destroy a D3D12 resource as per-usual. - todo!() + if let Some(alloc) = acceleration_structure.allocation.take() { + // Resource should be dropped before suballocation is freed + drop(acceleration_structure); + + super::suballocation::free_acceleration_structure_allocation( + self, + alloc, + &self.mem_allocator, + ); + } } fn get_internal_counters(&self) -> wgt::HalCounters { @@ -1954,7 +2116,21 @@ impl crate::Device for super::Device { }) } - fn tlas_instance_to_bytes(&self, _instance: TlasInstance) -> Vec { - todo!() + fn tlas_instance_to_bytes(&self, instance: TlasInstance) -> Vec { + const MAX_U24: u32 = (1u32 << 24u32) - 1u32; + let temp = Direct3D12::D3D12_RAYTRACING_INSTANCE_DESC { + Transform: instance.transform, + _bitfield1: (instance.custom_index & MAX_U24) | (u32::from(instance.mask) << 24), + _bitfield2: 0, + AccelerationStructure: instance.blas_address, + }; + let temp: *const _ = &temp; + unsafe { + slice::from_raw_parts( + temp.cast::(), + size_of::(), + ) + .to_vec() + } } } diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index d58d79300a..809d53c74d 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -1031,7 +1031,10 @@ pub struct PipelineCache; impl crate::DynPipelineCache for PipelineCache {} #[derive(Debug)] -pub struct AccelerationStructure {} +pub struct AccelerationStructure { + resource: Direct3D12::ID3D12Resource, + allocation: Option, +} impl crate::DynAccelerationStructure for AccelerationStructure {} diff --git a/wgpu-hal/src/dx12/suballocation.rs b/wgpu-hal/src/dx12/suballocation.rs index bdb3e85129..2b0cbf8a47 100644 --- a/wgpu-hal/src/dx12/suballocation.rs +++ b/wgpu-hal/src/dx12/suballocation.rs @@ -151,6 +151,54 @@ pub(crate) fn create_texture_resource( Ok((resource, Some(AllocationWrapper { allocation }))) } +pub(crate) fn create_acceleration_structure_resource( + device: &crate::dx12::Device, + desc: &crate::AccelerationStructureDescriptor, + raw_desc: Direct3D12::D3D12_RESOURCE_DESC, +) -> Result<(Direct3D12::ID3D12Resource, Option), crate::DeviceError> { + // Workaround for Intel Xe drivers + if !device.private_caps.suballocation_supported { + return create_committed_acceleration_structure_resource(device, desc, raw_desc) + .map(|resource| (resource, None)); + } + + let location = MemoryLocation::GpuOnly; + + let name = desc.label.unwrap_or("Unlabeled acceleration structure"); + + let mut allocator = device.mem_allocator.lock(); + + let allocation_desc = AllocationCreateDesc::from_d3d12_resource_desc( + allocator.allocator.device(), + &raw_desc, + name, + location, + ); + let allocation = allocator.allocator.allocate(&allocation_desc)?; + let mut resource = None; + + unsafe { + device.raw.CreatePlacedResource( + allocation.heap(), + allocation.offset(), + &raw_desc, + Direct3D12::D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE, + None, + &mut resource, + ) + } + .into_device_result("Placed acceleration structure creation")?; + + let resource = resource.ok_or(crate::DeviceError::Unexpected)?; + + device + .counters + .acceleration_structure_memory + .add(allocation.size() as isize); + + Ok((resource, Some(AllocationWrapper { allocation }))) +} + pub(crate) fn free_buffer_allocation( device: &crate::dx12::Device, allocation: AllocationWrapper, @@ -183,6 +231,22 @@ pub(crate) fn free_texture_allocation( }; } +pub(crate) fn free_acceleration_structure_allocation( + device: &crate::dx12::Device, + allocation: AllocationWrapper, + allocator: &Mutex, +) { + device + .counters + .acceleration_structure_memory + .sub(allocation.allocation.size() as isize); + match allocator.lock().allocator.free(allocation.allocation) { + Ok(_) => (), + // TODO: Don't panic here + Err(e) => panic!("Failed to destroy dx12 acceleration structure, {e}"), + }; +} + impl From for crate::DeviceError { fn from(result: gpu_allocator::AllocationError) -> Self { match result { @@ -304,3 +368,40 @@ pub(crate) fn create_committed_texture_resource( resource.ok_or(crate::DeviceError::Unexpected) } + +pub(crate) fn create_committed_acceleration_structure_resource( + device: &crate::dx12::Device, + _desc: &crate::AccelerationStructureDescriptor, + raw_desc: Direct3D12::D3D12_RESOURCE_DESC, +) -> Result { + let heap_properties = Direct3D12::D3D12_HEAP_PROPERTIES { + Type: Direct3D12::D3D12_HEAP_TYPE_CUSTOM, + CPUPageProperty: Direct3D12::D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE, + MemoryPoolPreference: match device.private_caps.memory_architecture { + crate::dx12::MemoryArchitecture::NonUnified => Direct3D12::D3D12_MEMORY_POOL_L1, + _ => Direct3D12::D3D12_MEMORY_POOL_L0, + }, + CreationNodeMask: 0, + VisibleNodeMask: 0, + }; + + let mut resource = None; + + unsafe { + device.raw.CreateCommittedResource( + &heap_properties, + if device.private_caps.heap_create_not_zeroed { + Direct3D12::D3D12_HEAP_FLAG_CREATE_NOT_ZEROED + } else { + Direct3D12::D3D12_HEAP_FLAG_NONE + }, + &raw_desc, + Direct3D12::D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE, + None, + &mut resource, + ) + } + .into_device_result("Committed acceleration structure creation")?; + + resource.ok_or(crate::DeviceError::Unexpected) +} diff --git a/wgpu-hal/src/gles/adapter.rs b/wgpu-hal/src/gles/adapter.rs index d901324205..67ff20ff19 100644 --- a/wgpu-hal/src/gles/adapter.rs +++ b/wgpu-hal/src/gles/adapter.rs @@ -1082,6 +1082,9 @@ impl crate::Adapter for super::Adapter { let texture_float_linear = feature_fn(wgt::Features::FLOAT32_FILTERABLE, filterable); + let image_atomic = feature_fn(wgt::Features::TEXTURE_ATOMIC, Tfc::STORAGE_ATOMIC); + let image_64_atomic = feature_fn(wgt::Features::TEXTURE_INT64_ATOMIC, Tfc::STORAGE_ATOMIC); + match format { Tf::R8Unorm => filterable_renderable, Tf::R8Snorm => filterable, @@ -1096,8 +1099,8 @@ impl crate::Adapter for super::Adapter { Tf::Rg8Snorm => filterable, Tf::Rg8Uint => renderable, Tf::Rg8Sint => renderable, - Tf::R32Uint => renderable | storage, - Tf::R32Sint => renderable | storage, + Tf::R32Uint => renderable | storage | image_atomic, + Tf::R32Sint => renderable | storage | image_atomic, Tf::R32Float => unfilterable | storage | float_renderable | texture_float_linear, Tf::Rg16Uint => renderable, Tf::Rg16Sint => renderable, @@ -1113,6 +1116,7 @@ impl crate::Adapter for super::Adapter { Tf::Rgb10a2Uint => renderable, Tf::Rgb10a2Unorm => filterable_renderable, Tf::Rg11b10Ufloat => filterable | float_renderable, + Tf::R64Uint => image_64_atomic, Tf::Rg32Uint => renderable, Tf::Rg32Sint => renderable, Tf::Rg32Float => unfilterable | float_renderable | texture_float_linear, diff --git a/wgpu-hal/src/gles/conv.rs b/wgpu-hal/src/gles/conv.rs index 029e03bf5e..7348f2f19e 100644 --- a/wgpu-hal/src/gles/conv.rs +++ b/wgpu-hal/src/gles/conv.rs @@ -50,6 +50,7 @@ impl super::AdapterShared { glow::RGB, glow::UNSIGNED_INT_10F_11F_11F_REV, ), + Tf::R64Uint => (glow::RG32UI, glow::RED_INTEGER, glow::UNSIGNED_INT), Tf::Rg32Uint => (glow::RG32UI, glow::RG_INTEGER, glow::UNSIGNED_INT), Tf::Rg32Sint => (glow::RG32I, glow::RG_INTEGER, glow::INT), Tf::Rg32Float => (glow::RG32F, glow::RG, glow::FLOAT), @@ -414,6 +415,7 @@ pub(super) fn map_storage_access(access: wgt::StorageTextureAccess) -> u32 { wgt::StorageTextureAccess::ReadOnly => glow::READ_ONLY, wgt::StorageTextureAccess::WriteOnly => glow::WRITE_ONLY, wgt::StorageTextureAccess::ReadWrite => glow::READ_WRITE, + wgt::StorageTextureAccess::Atomic => glow::READ_WRITE, } } diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index 12234d6364..4cc0ef80bd 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -1728,20 +1728,22 @@ bitflags::bitflags! { const STORAGE_WRITE_ONLY = 1 << 9; /// Read-write storage texture usage. const STORAGE_READ_WRITE = 1 << 10; + /// Image atomic enabled storage + const STORAGE_ATOMIC = 1 << 11; /// The combination of states that a texture may be in _at the same time_. const INCLUSIVE = Self::COPY_SRC.bits() | Self::RESOURCE.bits() | Self::DEPTH_STENCIL_READ.bits(); /// The combination of states that a texture must exclusively be in. - const EXCLUSIVE = Self::COPY_DST.bits() | Self::COLOR_TARGET.bits() | Self::DEPTH_STENCIL_WRITE.bits() | Self::STORAGE_READ_ONLY.bits() | Self::STORAGE_WRITE_ONLY.bits() | Self::STORAGE_READ_WRITE.bits() | Self::PRESENT.bits(); + const EXCLUSIVE = Self::COPY_DST.bits() | Self::COLOR_TARGET.bits() | Self::DEPTH_STENCIL_WRITE.bits() | Self::STORAGE_READ_ONLY.bits() | Self::STORAGE_WRITE_ONLY.bits() | Self::STORAGE_READ_WRITE.bits() | Self::STORAGE_ATOMIC.bits() | Self::PRESENT.bits(); /// The combination of all usages that the are guaranteed to be be ordered by the hardware. /// If a usage is ordered, then if the texture state doesn't change between draw calls, there /// are no barriers needed for synchronization. const ORDERED = Self::INCLUSIVE.bits() | Self::COLOR_TARGET.bits() | Self::DEPTH_STENCIL_WRITE.bits() | Self::STORAGE_READ_ONLY.bits(); /// Flag used by the wgpu-core texture tracker to say a texture is in different states for every sub-resource - const COMPLEX = 1 << 11; + const COMPLEX = 1 << 12; /// Flag used by the wgpu-core texture tracker to say that the tracker does not know the state of the sub-resource. /// This is different from UNINITIALIZED as that says the tracker does know, but the texture has not been initialized. - const UNKNOWN = 1 << 12; + const UNKNOWN = 1 << 13; } } diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs index c2a9541bee..ecff2b7a6a 100644 --- a/wgpu-hal/src/metal/adapter.rs +++ b/wgpu-hal/src/metal/adapter.rs @@ -109,6 +109,18 @@ impl crate::Adapter for super::Adapter { ], ); + let image_atomic_if = if pc.msl_version >= MTLLanguageVersion::V3_1 { + Tfc::STORAGE_ATOMIC + } else { + Tfc::empty() + }; + + let image_64_atomic_if = if pc.int64_atomics { + Tfc::STORAGE_ATOMIC + } else { + Tfc::empty() + }; + // Metal defined pixel format capabilities let all_caps = Tfc::SAMPLED_LINEAR | Tfc::STORAGE_WRITE_ONLY @@ -154,7 +166,11 @@ impl crate::Adapter for super::Adapter { Tfc::STORAGE_WRITE_ONLY | Tfc::COLOR_ATTACHMENT | msaa_count } Tf::R32Uint | Tf::R32Sint => { - read_write_tier1_if | Tfc::STORAGE_WRITE_ONLY | Tfc::COLOR_ATTACHMENT | msaa_count + read_write_tier1_if + | Tfc::STORAGE_WRITE_ONLY + | Tfc::COLOR_ATTACHMENT + | msaa_count + | image_atomic_if } Tf::R32Float => { let flags = if pc.format_r32float_all { @@ -190,6 +206,12 @@ impl crate::Adapter for super::Adapter { flags.set(Tfc::STORAGE_WRITE_ONLY, pc.format_rg11b10_all); flags } + Tf::R64Uint => { + Tfc::COLOR_ATTACHMENT + | Tfc::STORAGE_WRITE_ONLY + | image_64_atomic_if + | read_write_tier1_if + } Tf::Rg32Uint | Tf::Rg32Sint => { Tfc::COLOR_ATTACHMENT | Tfc::STORAGE_WRITE_ONLY | msaa_count } @@ -917,6 +939,14 @@ impl super::PrivateCapabilities { F::SHADER_INT64_ATOMIC_MIN_MAX, self.int64_atomics && self.msl_version >= MTLLanguageVersion::V2_4, ); + features.set( + F::TEXTURE_INT64_ATOMIC, + self.int64_atomics && self.msl_version >= MTLLanguageVersion::V3_1, + ); + features.set( + F::TEXTURE_ATOMIC, + self.msl_version >= MTLLanguageVersion::V3_1, + ); features.set( F::SHADER_FLOAT32_ATOMIC, self.float_atomics && self.msl_version >= MTLLanguageVersion::V3_0, @@ -1056,6 +1086,8 @@ impl super::PrivateCapabilities { Tf::Rgb10a2Uint => RGB10A2Uint, Tf::Rgb10a2Unorm => RGB10A2Unorm, Tf::Rg11b10Ufloat => RG11B10Float, + // Ruint64 textures are emulated on metal + Tf::R64Uint => RG32Uint, Tf::Rg32Uint => RG32Uint, Tf::Rg32Sint => RG32Sint, Tf::Rg32Float => RG32Float, diff --git a/wgpu-hal/src/metal/conv.rs b/wgpu-hal/src/metal/conv.rs index ef71f168ca..fecd3ffa09 100644 --- a/wgpu-hal/src/metal/conv.rs +++ b/wgpu-hal/src/metal/conv.rs @@ -27,6 +27,11 @@ pub fn map_texture_usage( format.is_combined_depth_stencil_format(), ); + mtl_usage.set( + metal::MTLTextureUsage::ShaderAtomic, + usage.intersects(Tu::STORAGE_ATOMIC), + ); + mtl_usage } @@ -351,7 +356,7 @@ pub fn map_resource_usage(ty: &wgt::BindingType) -> metal::MTLResourceUsage { wgt::BindingType::StorageTexture { access, .. } => match access { wgt::StorageTextureAccess::WriteOnly => metal::MTLResourceUsage::Write, wgt::StorageTextureAccess::ReadOnly => metal::MTLResourceUsage::Read, - wgt::StorageTextureAccess::ReadWrite => { + wgt::StorageTextureAccess::Atomic | wgt::StorageTextureAccess::ReadWrite => { metal::MTLResourceUsage::Read | metal::MTLResourceUsage::Write } }, diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs index b64fa7c935..b2e514b4a3 100644 --- a/wgpu-hal/src/metal/device.rs +++ b/wgpu-hal/src/metal/device.rs @@ -733,6 +733,7 @@ impl crate::Device for super::Device { wgt::StorageTextureAccess::ReadOnly => false, wgt::StorageTextureAccess::WriteOnly => true, wgt::StorageTextureAccess::ReadWrite => true, + wgt::StorageTextureAccess::Atomic => true, }; } wgt::BindingType::AccelerationStructure => unimplemented!(), diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index cde18162ea..fd190fc34a 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -109,6 +109,9 @@ pub struct PhysicalDeviceFeatures { /// Features provided by `VK_KHR_shader_atomic_int64`, promoted to Vulkan 1.2. shader_atomic_int64: Option>, + /// Features provided by `VK_EXT_shader_image_atomic_int64` + shader_image_atomic_int64: Option>, + /// Features provided by `VK_EXT_shader_atomic_float`. shader_atomic_float: Option>, @@ -160,6 +163,9 @@ impl PhysicalDeviceFeatures { if let Some(ref mut feature) = self.shader_atomic_int64 { info = info.push_next(feature); } + if let Some(ref mut feature) = self.shader_image_atomic_int64 { + info = info.push_next(feature); + } if let Some(ref mut feature) = self.shader_atomic_float { info = info.push_next(feature); } @@ -444,6 +450,17 @@ impl PhysicalDeviceFeatures { } else { None }, + shader_image_atomic_int64: if enabled_extensions + .contains(&ext::shader_image_atomic_int64::NAME) + { + let needed = requested_features.intersects(wgt::Features::TEXTURE_INT64_ATOMIC); + Some( + vk::PhysicalDeviceShaderImageAtomicInt64FeaturesEXT::default() + .shader_image_int64_atomics(needed), + ) + } else { + None + }, shader_atomic_float: if enabled_extensions.contains(&ext::shader_atomic_float::NAME) { let needed = requested_features.contains(wgt::Features::SHADER_FLOAT32_ATOMIC); Some( @@ -494,7 +511,8 @@ impl PhysicalDeviceFeatures { | F::TIMESTAMP_QUERY_INSIDE_PASSES | F::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES | F::CLEAR_TEXTURE - | F::PIPELINE_CACHE; + | F::PIPELINE_CACHE + | F::TEXTURE_ATOMIC; let mut dl_flags = Df::COMPUTE_SHADERS | Df::BASE_VERTEX @@ -598,6 +616,16 @@ impl PhysicalDeviceFeatures { ); } + if let Some(ref shader_image_atomic_int64) = self.shader_image_atomic_int64 { + features.set( + F::TEXTURE_INT64_ATOMIC, + shader_image_atomic_int64 + .shader_image_int64_atomics(true) + .shader_image_int64_atomics + != 0, + ); + } + if let Some(ref shader_atomic_float) = self.shader_atomic_float { features.set( F::SHADER_FLOAT32_ATOMIC, @@ -1018,6 +1046,11 @@ impl PhysicalDeviceProperties { extensions.push(khr::shader_atomic_int64::NAME); } + // Require `VK_EXT_shader_image_atomic_int64` if the associated feature was requested + if requested_features.intersects(wgt::Features::TEXTURE_INT64_ATOMIC) { + extensions.push(ext::shader_image_atomic_int64::NAME); + } + // Require `VK_EXT_shader_atomic_float` if the associated feature was requested if requested_features.contains(wgt::Features::SHADER_FLOAT32_ATOMIC) { extensions.push(ext::shader_atomic_float::NAME); @@ -1318,6 +1351,12 @@ impl super::InstanceShared { features2 = features2.push_next(next); } + if capabilities.supports_extension(ext::shader_image_atomic_int64::NAME) { + let next = features + .shader_image_atomic_int64 + .insert(vk::PhysicalDeviceShaderImageAtomicInt64FeaturesEXT::default()); + features2 = features2.push_next(next); + } if capabilities.supports_extension(ext::shader_atomic_float::NAME) { let next = features .shader_atomic_float @@ -1814,11 +1853,16 @@ impl super::Adapter { if features.intersects( wgt::Features::SHADER_INT64_ATOMIC_ALL_OPS - | wgt::Features::SHADER_INT64_ATOMIC_MIN_MAX, + | wgt::Features::SHADER_INT64_ATOMIC_MIN_MAX + | wgt::Features::TEXTURE_INT64_ATOMIC, ) { capabilities.push(spv::Capability::Int64Atomics); } + if features.intersects(wgt::Features::TEXTURE_INT64_ATOMIC) { + capabilities.push(spv::Capability::Int64ImageEXT); + } + if features.contains(wgt::Features::SHADER_FLOAT32_ATOMIC) { capabilities.push(spv::Capability::AtomicFloat32AddEXT); } @@ -2135,7 +2179,10 @@ impl crate::Adapter for super::Adapter { // features.contains(vk::FormatFeatureFlags::SAMPLED_IMAGE_FILTER_MINMAX), // ); flags.set( - Tfc::STORAGE_READ_WRITE | Tfc::STORAGE_WRITE_ONLY | Tfc::STORAGE_READ_ONLY, + Tfc::STORAGE_READ_WRITE + | Tfc::STORAGE_WRITE_ONLY + | Tfc::STORAGE_READ_ONLY + | Tfc::STORAGE_ATOMIC, features.contains(vk::FormatFeatureFlags::STORAGE_IMAGE), ); flags.set( @@ -2162,6 +2209,10 @@ impl crate::Adapter for super::Adapter { Tfc::COPY_DST, features.intersects(vk::FormatFeatureFlags::TRANSFER_DST), ); + flags.set( + Tfc::STORAGE_ATOMIC, + features.intersects(vk::FormatFeatureFlags::STORAGE_IMAGE_ATOMIC), + ); // Vulkan is very permissive about MSAA flags.set(Tfc::MULTISAMPLE_RESOLVE, !format.is_compressed()); diff --git a/wgpu-hal/src/vulkan/conv.rs b/wgpu-hal/src/vulkan/conv.rs index b5ae72b4db..e72d28d72a 100644 --- a/wgpu-hal/src/vulkan/conv.rs +++ b/wgpu-hal/src/vulkan/conv.rs @@ -37,6 +37,7 @@ impl super::PrivateCapabilities { Tf::Rgb10a2Uint => F::A2B10G10R10_UINT_PACK32, Tf::Rgb10a2Unorm => F::A2B10G10R10_UNORM_PACK32, Tf::Rg11b10Ufloat => F::B10G11R11_UFLOAT_PACK32, + Tf::R64Uint => F::R64_UINT, Tf::Rg32Uint => F::R32G32_UINT, Tf::Rg32Sint => F::R32G32_SINT, Tf::Rg32Float => F::R32G32_SFLOAT, @@ -266,7 +267,8 @@ pub fn map_texture_usage(usage: crate::TextureUses) -> vk::ImageUsageFlags { if usage.intersects( crate::TextureUses::STORAGE_READ_ONLY | crate::TextureUses::STORAGE_WRITE_ONLY - | crate::TextureUses::STORAGE_READ_WRITE, + | crate::TextureUses::STORAGE_READ_WRITE + | crate::TextureUses::STORAGE_ATOMIC, ) { flags |= vk::ImageUsageFlags::STORAGE; } @@ -309,15 +311,19 @@ pub fn map_texture_usage_to_barrier( access |= vk::AccessFlags::DEPTH_STENCIL_ATTACHMENT_READ | vk::AccessFlags::DEPTH_STENCIL_ATTACHMENT_WRITE; } - if usage - .intersects(crate::TextureUses::STORAGE_READ_ONLY | crate::TextureUses::STORAGE_READ_WRITE) - { + if usage.intersects( + crate::TextureUses::STORAGE_READ_ONLY + | crate::TextureUses::STORAGE_READ_WRITE + | crate::TextureUses::STORAGE_ATOMIC, + ) { stages |= shader_stages; access |= vk::AccessFlags::SHADER_READ; } - if usage - .intersects(crate::TextureUses::STORAGE_WRITE_ONLY | crate::TextureUses::STORAGE_READ_WRITE) - { + if usage.intersects( + crate::TextureUses::STORAGE_WRITE_ONLY + | crate::TextureUses::STORAGE_READ_WRITE + | crate::TextureUses::STORAGE_ATOMIC, + ) { stages |= shader_stages; access |= vk::AccessFlags::SHADER_WRITE; } @@ -352,7 +358,8 @@ pub fn map_vk_image_usage(usage: vk::ImageUsageFlags) -> crate::TextureUses { if usage.contains(vk::ImageUsageFlags::STORAGE) { bits |= crate::TextureUses::STORAGE_READ_ONLY | crate::TextureUses::STORAGE_WRITE_ONLY - | crate::TextureUses::STORAGE_READ_WRITE; + | crate::TextureUses::STORAGE_READ_WRITE + | crate::TextureUses::STORAGE_ATOMIC; } bits } diff --git a/wgpu-info/Cargo.toml b/wgpu-info/Cargo.toml index 39428e7be2..8d05139ad0 100644 --- a/wgpu-info/Cargo.toml +++ b/wgpu-info/Cargo.toml @@ -16,4 +16,4 @@ env_logger.workspace = true pico-args.workspace = true serde = { workspace = true, features = ["default"] } serde_json.workspace = true -wgpu = { workspace = true, features = ["serde", "dx12", "metal", "static-dxc"] } +wgpu.workspace = true diff --git a/wgpu-info/src/texture.rs b/wgpu-info/src/texture.rs index 2487bf350f..64325f0e5b 100644 --- a/wgpu-info/src/texture.rs +++ b/wgpu-info/src/texture.rs @@ -1,6 +1,6 @@ // Lets keep these on one line #[rustfmt::skip] -pub const TEXTURE_FORMAT_LIST: [wgpu::TextureFormat; 116] = [ +pub const TEXTURE_FORMAT_LIST: [wgpu::TextureFormat; 117] = [ wgpu::TextureFormat::R8Unorm, wgpu::TextureFormat::R8Snorm, wgpu::TextureFormat::R8Uint, @@ -33,6 +33,7 @@ pub const TEXTURE_FORMAT_LIST: [wgpu::TextureFormat; 116] = [ wgpu::TextureFormat::Rgb10a2Uint, wgpu::TextureFormat::Rgb10a2Unorm, wgpu::TextureFormat::Rg11b10Ufloat, + wgpu::TextureFormat::R64Uint, wgpu::TextureFormat::Rg32Uint, wgpu::TextureFormat::Rg32Sint, wgpu::TextureFormat::Rg32Float, diff --git a/wgpu-types/src/counters.rs b/wgpu-types/src/counters.rs index 6137a6a2b4..ff38b33c66 100644 --- a/wgpu-types/src/counters.rs +++ b/wgpu-types/src/counters.rs @@ -126,6 +126,8 @@ pub struct HalCounters { pub buffer_memory: InternalCounter, /// Amount of allocated gpu memory attributed to textures, in bytes. pub texture_memory: InternalCounter, + /// Amount of allocated gpu memory attributed to acceleration structures, in bytes. + pub acceleration_structure_memory: InternalCounter, /// Number of gpu memory allocations. pub memory_allocations: InternalCounter, } diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index f6426a5221..31dc152d6c 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -406,7 +406,7 @@ bitflags::bitflags! { /// This is a web and native feature. const FLOAT32_FILTERABLE = 1 << 11; - // Bits 12-19 available for webgpu features. Should you chose to use some of them for + // Bits 12-18 available for webgpu features. Should you chose to use some of them for // for native features, don't forget to update `all_webgpu_mask` and `all_native_mask` // accordingly. @@ -416,6 +416,16 @@ bitflags::bitflags! { // Native Features: // + /// Enables R64Uint image atomic min and max. + /// + /// Supported platforms: + /// - Vulkan (with VK_EXT_shader_image_atomic_int64) + /// - DX12 (with SM 6.6+) + /// - Metal (with MSL 3.1+) + /// + /// This is a native only feature. + const TEXTURE_INT64_ATOMIC = 1 << 18; + /// Allows shaders to use f32 atomic load, store, add, sub, and exchange. /// /// Supported platforms: @@ -811,6 +821,15 @@ bitflags::bitflags! { /// /// This is a native only feature. const VERTEX_ATTRIBUTE_64BIT = 1 << 45; + /// Enables image atomic fetch add, and, xor, or, min, and max for R32Uint and R32Sint textures. + /// + /// Supported platforms: + /// - Vulkan + /// - DX12 + /// - Metal (with MSL 3.1+) + /// + /// This is a native only feature. + const TEXTURE_ATOMIC = 1 << 46; /// Allows for creation of textures of format [`TextureFormat::NV12`] /// /// Supported platforms: @@ -986,7 +1005,7 @@ impl Features { /// Mask of all features which are part of the upstream WebGPU standard. #[must_use] pub const fn all_webgpu_mask() -> Self { - Self::from_bits_truncate(0x7FFFF) + Self::from_bits_truncate(0x3FFFF) } /// Mask of all features that are only available when targeting native (not web). @@ -2424,8 +2443,11 @@ bitflags::bitflags! { /// When used as a STORAGE texture, then a texture with this format can be bound with /// [`StorageTextureAccess::ReadWrite`]. const STORAGE_READ_WRITE = 1 << 8; + /// When used as a STORAGE texture, then a texture with this format can be bound with + /// [`StorageTextureAccess::Atomic`]. + const STORAGE_ATOMIC = 1 << 9; /// If not present, the texture can't be blended into the render target. - const BLENDABLE = 1 << 9; + const BLENDABLE = 1 << 10; } } @@ -2615,6 +2637,10 @@ pub enum TextureFormat { Rg11b10Ufloat, // Normal 64 bit formats + /// Red channel only. 64 bit integer per channel. Unsigned in shader. + /// + /// [`Features::TEXTURE_INT64_ATOMIC`] must be enabled to use this texture format. + R64Uint, /// Red and green channels. 32 bit integer per channel. Unsigned in shader. Rg32Uint, /// Red and green channels. 32 bit integer per channel. Signed in shader. @@ -2901,6 +2927,7 @@ impl<'de> Deserialize<'de> for TextureFormat { "rgb10a2uint" => TextureFormat::Rgb10a2Uint, "rgb10a2unorm" => TextureFormat::Rgb10a2Unorm, "rg11b10ufloat" => TextureFormat::Rg11b10Ufloat, + "r64uint" => TextureFormat::R64Uint, "rg32uint" => TextureFormat::Rg32Uint, "rg32sint" => TextureFormat::Rg32Sint, "rg32float" => TextureFormat::Rg32Float, @@ -3029,6 +3056,7 @@ impl Serialize for TextureFormat { TextureFormat::Rgb10a2Uint => "rgb10a2uint", TextureFormat::Rgb10a2Unorm => "rgb10a2unorm", TextureFormat::Rg11b10Ufloat => "rg11b10ufloat", + TextureFormat::R64Uint => "r64uint", TextureFormat::Rg32Uint => "rg32uint", TextureFormat::Rg32Sint => "rg32sint", TextureFormat::Rg32Float => "rg32float", @@ -3271,6 +3299,7 @@ impl TextureFormat { | Self::Rgb10a2Uint | Self::Rgb10a2Unorm | Self::Rg11b10Ufloat + | Self::R64Uint | Self::Rg32Uint | Self::Rg32Sint | Self::Rg32Float @@ -3394,6 +3423,8 @@ impl TextureFormat { | Self::Depth24PlusStencil8 | Self::Depth32Float => Features::empty(), + Self::R64Uint => Features::TEXTURE_INT64_ATOMIC, + Self::Depth32FloatStencil8 => Features::DEPTH32FLOAT_STENCIL8, Self::NV12 => Features::TEXTURE_FORMAT_NV12, @@ -3458,7 +3489,13 @@ impl TextureFormat { let attachment = basic | TextureUsages::RENDER_ATTACHMENT; let storage = basic | TextureUsages::STORAGE_BINDING; let binding = TextureUsages::TEXTURE_BINDING; - let all_flags = TextureUsages::all(); + let all_flags = attachment | storage | binding; + let atomic_64 = if device_features.contains(Features::TEXTURE_ATOMIC) { + storage | binding | TextureUsages::STORAGE_ATOMIC + } else { + storage | binding + }; + let atomic = attachment | atomic_64; let rg11b10f = if device_features.contains(Features::RG11B10UFLOAT_RENDERABLE) { attachment } else { @@ -3489,8 +3526,8 @@ impl TextureFormat { Self::Rg8Snorm => ( none, basic), Self::Rg8Uint => ( msaa, attachment), Self::Rg8Sint => ( msaa, attachment), - Self::R32Uint => ( s_all, all_flags), - Self::R32Sint => ( s_all, all_flags), + Self::R32Uint => ( s_all, atomic), + Self::R32Sint => ( s_all, atomic), Self::R32Float => (msaa | s_all, all_flags), Self::Rg16Uint => ( msaa, attachment), Self::Rg16Sint => ( msaa, attachment), @@ -3505,6 +3542,7 @@ impl TextureFormat { Self::Rgb10a2Uint => ( msaa, attachment), Self::Rgb10a2Unorm => (msaa_resolve, attachment), Self::Rg11b10Ufloat => ( msaa, rg11b10f), + Self::R64Uint => ( s_ro_wo, atomic_64), Self::Rg32Uint => ( s_ro_wo, all_flags), Self::Rg32Sint => ( s_ro_wo, all_flags), Self::Rg32Float => ( s_ro_wo, all_flags), @@ -3573,6 +3611,10 @@ impl TextureFormat { flags.set(TextureFormatFeatureFlags::FILTERABLE, is_filterable); flags.set(TextureFormatFeatureFlags::BLENDABLE, is_blendable); + flags.set( + TextureFormatFeatureFlags::STORAGE_ATOMIC, + allowed_usages.contains(TextureUsages::STORAGE_ATOMIC), + ); TextureFormatFeatures { allowed_usages, @@ -3626,6 +3668,7 @@ impl TextureFormat { | Self::Rg16Uint | Self::Rgba16Uint | Self::R32Uint + | Self::R64Uint | Self::Rg32Uint | Self::Rgba32Uint | Self::Rgb10a2Uint => Some(uint), @@ -3756,7 +3799,7 @@ impl TextureFormat { | Self::Rgba16Uint | Self::Rgba16Sint | Self::Rgba16Float => Some(8), - Self::Rg32Uint | Self::Rg32Sint | Self::Rg32Float => Some(8), + Self::R64Uint | Self::Rg32Uint | Self::Rg32Sint | Self::Rg32Float => Some(8), Self::Rgba32Uint | Self::Rgba32Sint | Self::Rgba32Float => Some(16), @@ -3850,6 +3893,7 @@ impl TextureFormat { | Self::Rgba16Unorm | Self::Rgba16Snorm | Self::Rgba16Float + | Self::R64Uint | Self::Rg32Uint | Self::Rg32Sint | Self::Rg32Float @@ -3931,6 +3975,7 @@ impl TextureFormat { Self::R32Uint | Self::R32Sint | Self::R32Float + | Self::R64Uint | Self::Rg32Uint | Self::Rg32Sint | Self::Rg32Float @@ -3999,7 +4044,8 @@ impl TextureFormat { | Self::R16Float | Self::R32Uint | Self::R32Sint - | Self::R32Float => 1, + | Self::R32Float + | Self::R64Uint => 1, Self::Rg8Unorm | Self::Rg8Snorm @@ -4253,6 +4299,10 @@ fn texture_format_serialize() { serde_json::to_string(&TextureFormat::Rg11b10Ufloat).unwrap(), "\"rg11b10ufloat\"".to_string() ); + assert_eq!( + serde_json::to_string(&TextureFormat::R64Uint).unwrap(), + "\"r64uint\"".to_string() + ); assert_eq!( serde_json::to_string(&TextureFormat::Rg32Uint).unwrap(), "\"rg32uint\"".to_string() @@ -4549,6 +4599,10 @@ fn texture_format_deserialize() { serde_json::from_str::("\"rg11b10ufloat\"").unwrap(), TextureFormat::Rg11b10Ufloat ); + assert_eq!( + serde_json::from_str::("\"r64uint\"").unwrap(), + TextureFormat::R64Uint + ); assert_eq!( serde_json::from_str::("\"rg32uint\"").unwrap(), TextureFormat::Rg32Uint @@ -5083,6 +5137,16 @@ pub enum IndexFormat { Uint32 = 1, } +impl IndexFormat { + /// Returns the size in bytes of the index format + pub fn byte_size(&self) -> usize { + match self { + IndexFormat::Uint16 => 2, + IndexFormat::Uint32 => 4, + } + } +} + /// Operation to perform on the stencil value. /// /// Corresponds to [WebGPU `GPUStencilOperation`]( @@ -5686,6 +5750,11 @@ bitflags::bitflags! { #[cfg_attr(feature = "serde", serde(transparent))] #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] pub struct TextureUsages: u32 { + // + // ---- Start numbering at 1 << 0 ---- + // + // WebGPU features: + // /// Allows a texture to be the source in a [`CommandEncoder::copy_texture_to_buffer`] or /// [`CommandEncoder::copy_texture_to_texture`] operation. const COPY_SRC = 1 << 0; @@ -5698,6 +5767,14 @@ bitflags::bitflags! { const STORAGE_BINDING = 1 << 3; /// Allows a texture to be an output attachment of a render pass. const RENDER_ATTACHMENT = 1 << 4; + + // + // ---- Restart Numbering for Native Features --- + // + // Native Features: + // + /// Allows a texture to be used with image atomics. Requires [`Features::TEXTURE_ATOMIC`]. + const STORAGE_ATOMIC = 1 << 16; } } @@ -6882,6 +6959,18 @@ pub enum StorageTextureAccess { /// layout(set=0, binding=0, r32f) uniform image2D myStorageImage; /// ``` ReadWrite, + /// The texture can be both read and written in the shader via atomics and must be annotated + /// with `read_write` in WGSL. + /// + /// [`Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES`] must be enabled to use this access + /// mode. This is a nonstandard, native-only extension. + /// + /// Example WGSL syntax: + /// ```rust,ignore + /// @group(0) @binding(0) + /// var my_storage_image: texture_storage_2d; + /// ``` + Atomic, } /// Specific type of a sampler binding. diff --git a/wgpu/src/api/blas.rs b/wgpu/src/api/blas.rs index 57f2f869d4..8f681f17d5 100644 --- a/wgpu/src/api/blas.rs +++ b/wgpu/src/api/blas.rs @@ -33,8 +33,7 @@ static_assertions::assert_impl_all!(CreateBlasDescriptor<'_>: Send, Sync); /// /// Each one contains: /// - A reference to a BLAS, this ***must*** be interacted with using [TlasInstance::new] or [TlasInstance::set_blas], a -/// TlasInstance that references a BLAS keeps that BLAS from being dropped, but if the BLAS is explicitly destroyed (e.g. -/// using [Blas::destroy]) the TlasInstance becomes invalid +/// TlasInstance that references a BLAS keeps that BLAS from being dropped /// - A user accessible transformation matrix /// - A user accessible mask /// - A user accessible custom index @@ -102,8 +101,8 @@ pub struct BlasTriangleGeometry<'a> { pub vertex_stride: wgt::BufferAddress, /// Index buffer (optional). pub index_buffer: Option<&'a Buffer>, - /// Index buffer offset in bytes (optional, required if index buffer is present). - pub index_buffer_offset: Option, + /// Number of indexes to skip in the index buffer (optional, required if index buffer is present). + pub first_index: Option, /// Transform buffer containing 3x4 (rows x columns, row major) affine transform matrices `[f32; 12]` (optional). pub transform_buffer: Option<&'a Buffer>, /// Transform buffer offset in bytes (optional, required if transform buffer is present). @@ -148,10 +147,6 @@ impl Blas { pub fn handle(&self) -> Option { self.handle } - /// Destroy the associated native resources as soon as possible. - pub fn destroy(&self) { - self.inner.destroy(); - } } /// Context version of [BlasTriangleGeometry]. diff --git a/wgpu/src/api/tlas.rs b/wgpu/src/api/tlas.rs index b260951152..c7a80ef635 100644 --- a/wgpu/src/api/tlas.rs +++ b/wgpu/src/api/tlas.rs @@ -31,13 +31,6 @@ static_assertions::assert_impl_all!(Tlas: WasmNotSendSync); crate::cmp::impl_eq_ord_hash_proxy!(Tlas => .shared.inner); -impl Tlas { - /// Destroy the associated native resources as soon as possible. - pub fn destroy(&self) { - self.shared.inner.destroy(); - } -} - /// Entry for a top level acceleration structure build. /// Used with raw instance buffers for an unvalidated builds. /// See [TlasPackage] for the safe version. diff --git a/wgpu/src/backend/webgpu.rs b/wgpu/src/backend/webgpu.rs index 1f6af6d9d5..9c11a129c2 100644 --- a/wgpu/src/backend/webgpu.rs +++ b/wgpu/src/backend/webgpu.rs @@ -1931,6 +1931,10 @@ impl dispatch::DeviceInterface for WebDevice { wgt::StorageTextureAccess::ReadWrite => { webgpu_sys::GpuStorageTextureAccess::ReadWrite } + wgt::StorageTextureAccess::Atomic => { + // Validated out by `BindGroupLayoutEntryError::StorageTextureAtomic` + unreachable!() + } }; let storage_texture = webgpu_sys::GpuStorageTextureBindingLayout::new( map_texture_format(format), @@ -2740,22 +2744,14 @@ impl Drop for WebTexture { } } -impl dispatch::BlasInterface for WebBlas { - fn destroy(&self) { - unimplemented!("Raytracing not implemented for web"); - } -} +impl dispatch::BlasInterface for WebBlas {} impl Drop for WebBlas { fn drop(&mut self) { // no-op } } -impl dispatch::TlasInterface for WebTlas { - fn destroy(&self) { - unimplemented!("Raytracing not implemented for web"); - } -} +impl dispatch::TlasInterface for WebTlas {} impl Drop for WebTlas { fn drop(&mut self) { // no-op diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs index 1f1e248dea..ad750860bc 100644 --- a/wgpu/src/backend/wgpu_core.rs +++ b/wgpu/src/backend/wgpu_core.rs @@ -1465,7 +1465,7 @@ impl dispatch::DeviceInterface for CoreDevice { global.device_create_tlas(self.id, &desc.map_label(|l| l.map(Borrowed)), None); if let Some(cause) = error { self.context - .handle_error(&self.error_sink, cause, desc.label, "Device::create_blas"); + .handle_error(&self.error_sink, cause, desc.label, "Device::create_tlas"); } CoreTlas { context: self.context.clone(), @@ -2004,12 +2004,7 @@ impl Drop for CoreTexture { } } -impl dispatch::BlasInterface for CoreBlas { - fn destroy(&self) { - // Per spec, no error to report. Even calling destroy multiple times is valid. - let _ = self.context.0.blas_destroy(self.id); - } -} +impl dispatch::BlasInterface for CoreBlas {} impl Drop for CoreBlas { fn drop(&mut self) { @@ -2017,12 +2012,7 @@ impl Drop for CoreBlas { } } -impl dispatch::TlasInterface for CoreTlas { - fn destroy(&self) { - // Per spec, no error to report. Even calling destroy multiple times is valid. - let _ = self.context.0.tlas_destroy(self.id); - } -} +impl dispatch::TlasInterface for CoreTlas {} impl Drop for CoreTlas { fn drop(&mut self) { @@ -2457,7 +2447,7 @@ impl dispatch::CommandEncoderInterface for CoreCommandEncoder { transform_buffer_offset: tg.transform_buffer_offset, first_vertex: tg.first_vertex, vertex_stride: tg.vertex_stride, - index_buffer_offset: tg.index_buffer_offset, + first_index: tg.first_index, } }); wgc::ray_tracing::BlasGeometries::TriangleGeometries(Box::new(iter)) @@ -2507,7 +2497,7 @@ impl dispatch::CommandEncoderInterface for CoreCommandEncoder { transform_buffer_offset: tg.transform_buffer_offset, first_vertex: tg.first_vertex, vertex_stride: tg.vertex_stride, - index_buffer_offset: tg.index_buffer_offset, + first_index: tg.first_index, } }); wgc::ray_tracing::BlasGeometries::TriangleGeometries(Box::new(iter)) diff --git a/wgpu/src/dispatch.rs b/wgpu/src/dispatch.rs index bdf57b24c8..a58decf65f 100644 --- a/wgpu/src/dispatch.rs +++ b/wgpu/src/dispatch.rs @@ -269,12 +269,8 @@ pub trait TextureInterface: CommonTraits { fn destroy(&self); } -pub trait BlasInterface: CommonTraits { - fn destroy(&self); -} -pub trait TlasInterface: CommonTraits { - fn destroy(&self); -} +pub trait BlasInterface: CommonTraits {} +pub trait TlasInterface: CommonTraits {} pub trait QuerySetInterface: CommonTraits {} pub trait PipelineLayoutInterface: CommonTraits {} pub trait RenderPipelineInterface: CommonTraits {