diff --git a/.gitignore b/.gitignore index 49ea053..e7cf6f9 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,5 @@ build .DS_Store imgui.ini .vscode +.vs +out \ No newline at end of file diff --git a/.gitmodules b/.gitmodules index be3204e..17562cc 100644 --- a/.gitmodules +++ b/.gitmodules @@ -4,12 +4,6 @@ [submodule "third_party/CLI11"] path = third_party/CLI11 url = https://github.com/CLIUtils/CLI11 -[submodule "third_party/tinyexr"] - path = third_party/tinyexr - url = https://github.com/syoyo/tinyexr -[submodule "third_party/miniz"] - path = third_party/miniz - url = https://github.com/richgel999/miniz.git [submodule "assets/CapsaicinTestMedia"] path = assets/CapsaicinTestMedia - url = ../CapsaicinTestMedia.git + url = https://github.com/GPUOpen-LibrariesAndSDKs/CapsaicinTestMedia.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 2879d73..413c931 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,37 +1,36 @@ -cmake_minimum_required(VERSION 3.10.0) +cmake_minimum_required(VERSION 3.24.0) project(Capsaicin - VERSION 1.0.0 + VERSION 1.1.0 DESCRIPTION "AMD experimental real-time rendering framework designed for graphics research and development" ) -set(GFX_BUILD_EXAMPLES OFF CACHE BOOL "Build gfx examples") -set(TINYGLTF_BUILD_LOADER_EXAMPLE OFF CACHE BOOL "Build loader_example") - set_property(GLOBAL PROPERTY USE_FOLDERS ON) # Set preprocessor definitions -add_definitions(/MP - -D_HAS_ITERATOR_DEBUGGING=0 +add_definitions( + /MP ) -# Gather dependencies -add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/third_party/gfx) -add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/third_party/CLI11) -set(BUILD_SHARED_LIBS OFF CACHE INTERNAL "" FORCE) -add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/third_party/miniz) +# Disable unused parameters from 3rd party directories +set(GFX_BUILD_EXAMPLES OFF CACHE BOOL "") +set(BUILD_TESTING OFF CACHE BOOL "") -# Set linker flags -set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /SUBSYSTEM:WINDOWS") +# Enable gfx options +set(GFX_ENABLE_SCENE ON CACHE BOOL "") +set(GFX_ENABLE_GUI ON CACHE BOOL "") -set_property(GLOBAL PROPERTY USE_FOLDERS ON) +# Gather dependencies +add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/third_party/gfx EXCLUDE_FROM_ALL) +add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/third_party/CLI11 EXCLUDE_FROM_ALL) +set(BUILD_SHARED_LIBS OFF CACHE INTERNAL "" FORCE) # Organize third party projects set_target_properties(uninstall PROPERTIES FOLDER "third_party") -set_target_properties(miniz PROPERTIES FOLDER "third_party") set_target_properties(gfx PROPERTIES FOLDER "third_party") set_target_properties(CLI11 PROPERTIES FOLDER "third_party") set_target_properties(tinyobjloader PROPERTIES FOLDER "third_party/gfx_deps") +set_target_properties(tinyexr PROPERTIES FOLDER "third_party/gfx_deps") set_target_properties(ktx PROPERTIES FOLDER "third_party/gfx_deps") set_target_properties(astcenc-avx2-static PROPERTIES FOLDER "third_party/gfx_deps/ktx_deps") set_target_properties(ktx_read PROPERTIES FOLDER "third_party/gfx_deps/ktx_deps") @@ -65,18 +64,11 @@ ELSE() SET(CAPSAICIN_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY} CACHE STRING "Path for archive output files") ENDIF() +set(CMAKE_INSTALL_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/install") + # Build Capsaicin add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/src) # Set up startup project set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY VS_STARTUP_PROJECT scene_viewer) - -# Install assets and shaders directories -install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/assets DESTINATION . FILES_MATCHING PATTERN "*.*") -install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/src/core/shaders DESTINATION src/core FILES_MATCHING PATTERN "*.*") - -# Configure CPack -set(CPACK_GENERATOR "ZIP") -set(CPACK_PACKAGE_FILE_NAME "${CMAKE_PROJECT_NAME}") -include(CPack) diff --git a/CMakePresets.json b/CMakePresets.json new file mode 100644 index 0000000..93f110c --- /dev/null +++ b/CMakePresets.json @@ -0,0 +1,77 @@ +{ + "version": 3, + "configurePresets": [ + { + "name": "msvc-base", + "description": "Target Windows with the Visual Studio development environment.", + "hidden": true, + "generator": "Ninja", + "binaryDir": "${sourceDir}/out/build/${presetName}", + "installDir": "${sourceDir}/out/install/${presetName}", + "cacheVariables": { + "CMAKE_C_COMPILER": "cl.exe", + "CMAKE_CXX_COMPILER": "cl.exe" + }, + "toolset": { + "value": "host=x64", + "strategy": "external" + }, + "architecture": { + "value": "x64", + "strategy": "external" + }, + "condition": { + "type": "equals", + "lhs": "${hostSystemName}", + "rhs": "Windows" + } + }, + { + "name": "x64-debug", + "displayName": "x64-Debug", + "description": "Target Windows (64-bit) with the Visual Studio development environment. (Debug)", + "inherits": "msvc-base", + "cacheVariables": { + "CMAKE_BUILD_TYPE": "Debug" + } + }, + { + "name": "x64-release-with-debug-info", + "displayName": "x64-RelWithDebInfo", + "description": "Target Windows (64-bit) with the Visual Studio development environment. (Release with Debug Info)", + "inherits": "msvc-base", + "cacheVariables": { + "CMAKE_BUILD_TYPE": "RelWithDebInfo" + } + }, + { + "name": "x64-release", + "displayName": "x64-Release", + "description": "Target Windows (64-bit) with the Visual Studio development environment. (Release)", + "inherits": "msvc-base", + "cacheVariables": { + "CMAKE_BUILD_TYPE": "Release" + } + } + ], + "buildPresets": [ + { + "name": "debug-build-windows", + "displayName": "x64-Debug", + "configurePreset": "x64-debug", + "description": "Debug Windows build" + }, + { + "name": "release-with-debug-info-build-windows", + "displayName": "x64-RelWithDebInfo", + "configurePreset": "x64-release-with-debug-info", + "description": "Release with Debug Info Windows build" + }, + { + "name": "release-build-windows", + "displayName": "x64-Release", + "configurePreset": "x64-release", + "description": "Release Windows build" + } + ] +} diff --git a/LICENSE.txt b/LICENSE.txt index 0c7438a..e18da31 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,4 +1,4 @@ -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index fcb3f40..a061a84 100644 --- a/README.md +++ b/README.md @@ -15,17 +15,17 @@ Features: ![Capsaicin](docs/images/scene_viewer.png) -## GI-1.0 +## GI-1.1 -We used Capsaicin to implement our GI-1.0 technique for estimating diffuse indirect illumination in real-time. +We used Capsaicin to implement our GI-1.1 technique for estimating diffuse and specular indirect illumination in real-time. The technique uses two levels of radiance caching to allow for reduced sampling rate in order to improve performance while making the most of every ray through better sampling. -Please refer to our [publication](https://gpuopen.com/download/publications/GPUOpen2022_GI1_0.pdf) for more technical details. +Please refer to our [GI-1.0 technical report](https://gpuopen.com/download/publications/GPUOpen2022_GI1_0.pdf) and [GI-1.1 paper](https://gpuopen.com/download/publications/SA2023_RealTimeReflection.pdf) for more technical details. #### Note on light support -GI-1.0 is primarily an indirect lighting solution and as such is expected to be combined with an existing direct lighting technique for integration into a rendering pipeline. +GI-1.1 is primarily an indirect lighting solution and as such is expected to be combined with an existing direct lighting technique for integration into a rendering pipeline. All common light types are supported when evaluating the indirect lighting component (e.g., point lights, spot lights, etc.) using our grid-based light sampler and (optional) reservoir-based resampling. @@ -60,15 +60,43 @@ Capsaicin uses the [CMake](https://cmake.org/) build system. See the [Getting St ## Citation -If Capsaicin is used any any published work, ensure to cite it using: +If Capsaicin is used in any published work, please ensure to cite it using: ```bibtex @Misc{Capsaicin23, - author = {Guillaume Boissé, Matthew Oliver, Sylvain Meunier, Héloïse Dupont de Dinechin and Kenta Eto}, + author = {Boissé, Guillaume and Oliver, Matthew and Meunier, Sylvain and Dupont de Dinechin, Héloïse and Eto, Kenta}, title = {The {AMD Capsaicin Framework}}, year = {2023}, - month = {5}, + month = {8}, url = {https://github.com/GPUOpen-LibrariesAndSDKs/Capsaicin}, - note = {\url{https://github.com/GPUOpen-LibrariesAndSDKs/Capsaicin}} +} +``` + +If our techniques are referenced in any published work, please ensure to cite them using: + +```bibtex +@inproceedings{10.1145/3610543.3626167, +author = {Eto, Kenta and Meunier, Sylvain and Harada, Takahiro and Boiss\'{e}, Guillaume}, +title = {Real-Time Rendering of Glossy Reflections Using Ray Tracing and Two-Level Radiance Caching}, +year = {2023}, +isbn = {9798400703140}, +publisher = {Association for Computing Machinery}, +address = {New York, NY, USA}, +url = {https://doi.org/10.1145/3610543.3626167}, +doi = {10.1145/3610543.3626167}, +abstract = {Estimation of glossy reflections remains a challenging topic for real-time renderers. Ray tracing is a robust solution for evaluating the specular lobe of a given BRDF; however, it is computationally expensive and introduces noise that requires filtering. Other solutions, such as light probe systems, offer to approximate the signal with little to no noise and better performance but tend to introduce additional bias in the form of overly blurred visuals. This paper introduces a novel approach to rendering reflections in real time that combines the radiance probes of an existing diffuse global illumination framework with denoised ray-traced reflections calculated at a low sampling rate. We will show how combining these two sources allows producing an efficient and high-quality estimation of glossy reflections that is suitable for real-time applications such as games.}, +booktitle = {SIGGRAPH Asia 2023 Technical Communications}, +articleno = {4}, +numpages = {4}, +keywords = {real-time, ray tracing, rendering}, +location = {, Sydney, NSW, Australia, }, +series = {SA '23} +} + +@misc{gi10, + author = {Guillaume Boissé and Sylvain Meunier and Heloise de Dinechin and Pieterjan Bartels and Alexander Veselov and Kenta Eto and Takahiro Harada}, + title = {GI-1.0: A Fast Scalable Two-Level Radiance Caching Scheme for Real-Time Global Illumination}, + year = {2023}, + url = {https://gpuopen.com/download/publications/GPUOpen2022_GI1_0.pdf} } ``` diff --git a/assets/CapsaicinTestMedia b/assets/CapsaicinTestMedia index f7d2000..d9b2c39 160000 --- a/assets/CapsaicinTestMedia +++ b/assets/CapsaicinTestMedia @@ -1 +1 @@ -Subproject commit f7d20009dce2f28f1a42510729468314f066f11a +Subproject commit d9b2c3940de9216a292f6808babbd27d15189891 diff --git a/docs/development/component.md b/docs/development/component.md index 97e6fa6..00656fc 100644 --- a/docs/development/component.md +++ b/docs/development/component.md @@ -10,13 +10,19 @@ Each new *Component* should be added in its own sub-folder, so for example a new `src/core/components/my_component/my_component.cpp`\ Note: It is not required that the source file has the same name as the sub-folder it is contained within. -All new *Components* must inherit from the abstract base class `Component` using its inbuilt factory registration helper `Component::RegistrarName`. Doing so registers the new *Component* with the component factory. To ensure this registration works correctly the new *Component* must implement an empty default constructor (cannot use `=default`) as well as a static constant string containing a unique name for the *Component*. +All new *Components* must inherit from the abstract base class `Component`. To make the new component searchable by the rest of the system then it should also be added to the component factory by also inheriting from `ComponentFactory::Registrar`. Doing so registers the new *Component* with the component factory. To ensure this registration works correctly the new *Component* must implement an empty default constructor (cannot use `=default`) as well as a static constant string containing a unique name for the *Component*. The member functions that need overriding are: +- `Constructor()`:\ + A default constructor that initialises the `Component` base class with a unique name for the current *Component*. +- `~Destructor()`:\ + Each *Component* must provide a destructor that properly frees all internally created resources. - `bool init(CapsaicinInternal const &capsaicin)`:\ - This function is called automatically by the framework after the *Renderer Technique* and any requested *Render Options*, *Components*, *AOVs* (see below), or other requested items have been created and initialised. It is the responsibility of the *Render Technique* to perform all required initialisation operations within this function, such as creating any used CPU|GPU resources that are required to persist over the lifetime of the *Render Technique*. The return value for this function can be used to signal to the framework if resource allocation or other initialisation operations have failed and the *Render Technique* would not be able to operate as a result. Returning `false` indicates an error state while `true` signifies correct initialisation. + This function is called automatically by the framework after the *Component* and any requested *Render Options*, *Components*, *AOVs* (see below), or other requested items have been created and initialised. It is the responsibility of the *Component* to perform all required initialisation operations within this function, such as creating any used CPU|GPU resources that are required to persist over the lifetime of the *Component*. The return value for this function can be used to signal to the framework if resource allocation or other initialisation operations have failed and the *Component* would not be able to operate as a result. Returning `false` indicates an error state while `true` signifies correct initialisation. - `void run(CapsaicinInternal &capsaicin)`:\ This function is called automatically every frame and is responsible for performing all the required main operations of the component. Current render settings and other internal framework state can be retrieved from the passed in `capsaicin` object. This object can be used to retrieve internal *Render Options* or other settings. Unlike *Render Techniques* not all of the rendering operations must be performed within this function. *Components* can also provide additional member functions that can be explicitly called by *Render Techniques* to perform additional work or parameter passing. This function is always run by the engine before running the per-frame functions of any *Render Techniques*. +- `void terminate()`:\ + This function is automatically called when a *Component* is being destroyed or when a reset has occurred. It is the responsibility of the *Component* to perform all required destruction operations within this function, such as releasing all used CPU|GPU resources. It is not always guaranteed that this function will be called when destroying a *Component* so a components destructor should also call this function to destroy any created resources. The member functions that can be optionally overridden if needed are: - `RenderOptionList getRenderOptions()`:\ @@ -25,6 +31,8 @@ The member functions that can be optionally overridden if needed are: This function is called on *Component* creation and is responsible for returning a list of all additionally required *Components* required by the current *Component*. If no *Components* are required overriding this function is not necessary or the returned list can be empty. The internal framework uses this list to create *Components* in addition to the ones requested by *Render Techniques*. Each *Component* can gain access to other *Components* by using `Capsaicin.getComponent("Name")` or `Capsaicin.getComponent()`. - `BufferList getBuffers() const`:\ This function is called on *Component* creation and is responsible for returning a list of all required shared memory buffer objects. If no buffers are required overriding this function is not necessary or the returned list can be empty. Each requested buffer is identified by a unique name string as well as additional information such as the buffers requested size etc. The internal framework uses this list to create buffers for all *Components* in addition to the ones requested by *Renderer Techniques*. Each *Component* can then gain access to each created buffer using `Capsaicin.getBuffer("Name")`. +- `void renderGUI(CapsaicinInternal &capsaicin) const`:\ + This function can be used to draw *Component* specific UI elements to aid in visualisation and/or debugging. This function will be called by the parent *Capsaicin* `renderGUI` call which will execute all *Components* `renderGUI` functions followed by all *Render Techniques* `renderGUI` functions in the order that were added to the system. Any UI elements output by this function will be displayed in the 'Render Settings' UI section. Any *Component* that outputs a large number of UI parameters should wrap them in a collapsible tree node so as not to pollute the UI. An example blank implementation of `my_component` would look like: ``` @@ -32,18 +40,20 @@ An example blank implementation of `my_component` would look like: namespace Capsaicin { -class MyComponent : public Component::RegistrarName +class MyComponent : public Component + , public ComponentFactory::Registrar { public: /***** Must define unique name to represent new type *****/ static constexpr std::string_view Name = "My Component"; - /***** Must have empty constructor *****/ - MyComponent() noexcept {} + /***** Must have constructor to initialise base 'Component' *****/ + MyComponent() noexcept : Component(Name) {} ~MyComponent() { /***** Must clean-up any created member variables/data *****/ + terminate(); } RenderOptionList getRenderOptions() noexcept override @@ -51,39 +61,41 @@ public: RenderOptionList newOptions; /***** Push any desired options to the returned list here (else just 'return {}') *****/ /***** Example (using provided helper RENDER_OPTION_MAKE): *****/ - /***** newOptions.emplace(RENDER_OPTION_MAKE(my_technique_enable, options)); *****/ + /***** newOptions.emplace(RENDER_OPTION_MAKE(my_component_enable, options)); *****/ return newOptions; } struct RenderOptions { - /***** Any member variable options can be added here. *****/ + /***** Any member variable options can be added here. *****/ + /***** This struct can be entirely omitted if not being used. *****/ /***** This represent the internal format of options where as 'RenderOptionList' has these stored as strings and variants *****/ + /***** Example: bool my_component_enable; *****/ }; - static RenderOptions convertOptions(RenderSettings const &settings) noexcept + static RenderOptions convertOptions(RenderOptionList const &options) noexcept { /***** Optional function only required if actually providing RenderOptions *****/ RenderOptions newOptions; /***** Used to convert options between external string/variant and internal data type 'RenderOptions' *****/ /***** Example: (using provided helper RENDER_OPTION_GET): *****/ - /***** RENDER_OPTION_GET(my_technique_enable, newOptions, settings.options_); *****/ + /***** RENDER_OPTION_GET(my_component_enable, newOptions, options); *****/ return newOptions; } - ComponentList RenderTechnique::getComponents() const noexcept + ComponentList getComponents() const noexcept override { ComponentList components; - /***** Push any desired Components to the returned list here (else just 'return {}') *****/ - /***** Example: if corresponding header is already included (using provided helper COMPONENT_MAKE): *****/ - /***** components.emplace_back(COMPONENT_MAKE(TypeOfComponent)); *****/ + /***** Push any desired Components to the returned list here (else just 'return {}' or dont override) *****/ + /***** Example: if corresponding header is already included (using provided helper COMPONENT_MAKE): *****/ + /***** components.emplace_back(COMPONENT_MAKE(TypeOfComponent)); *****/ return components; } BufferList getBuffers() const noexcept override { BufferList buffers; - /***** Push any desired Buffers to the returned list here (else just 'return {}') *****/ + /***** Push any desired Buffers to the returned list here (else just 'return {}' or dont override) *****/ return buffers; } @@ -95,18 +107,30 @@ public: void run(CapsaicinInternal &capsaicin) noexcept override { - auto &renderSettings = capsaicin.getRenderSettings(); - RenderOptions newOptions = convertOptions(renderSettings); - /***** Perform any required rendering operations here *****/ - options = newOptions; + /***** If any options are provided they should be checked for changes here *****/ + /***** Example: *****/ + /***** RenderOptions newOptions = convertOptions(capsaicin.getOptions()); *****/ + /***** Check for changes and handle accordingly *****/ + /***** options = newOptions; *****/ + /***** Perform any required rendering operations here *****/ } + void terminate() noexcept override + { + /***** Cleanup any created CPU or GPU resources *****/ + } + + void renderGUI(CapsaicinInternal &capsaicin) const noexcept override + { + /***** Add any UI drawing commands here *****/ + } + /***** Additional member functions can also be provided *****/ protected: /***** Internal member data can be added here *****/ - /***** Example: *****/ - /***** RenderOptions options; *****/ + /***** Example: *****/ + /***** RenderOptions options; *****/ }; } // namespace Capsaicin ``` \ No newline at end of file diff --git a/docs/development/getting_started.md b/docs/development/getting_started.md index bfe1b4d..ef89266 100644 --- a/docs/development/getting_started.md +++ b/docs/development/getting_started.md @@ -21,6 +21,8 @@ There are several ways to get started compiling the source code: - Use CMake to generate a Visual Studio project - `cmake CMakeLists.txt -B ./build -G "Visual Studio 17 2022"` (note other Visual Studio versions can be used) - Open the newly created project in Visual Studio and build as normal +- Open directly in VS Code or Visual Studio + - Both VS Code and newer versions of Visual Studio enable loading CMake projects directly (see corresponding documentation for your IDE) - Build on command line - `cmake -S ./ -B ./build -A x64` - `cmake --build ./build --config RelWithDebInfo` @@ -31,7 +33,7 @@ Code is separated by functionality with the expectation that files (i.e. headers All source code is written using C++ and uses the `.cpp` file extension. Header files use the `.h` extension. This extension is also used for any files shared between host and device code. All device code uses the `.hlsl` extension. -- `assets` : Contains bundled test scenes +- `assets` : Contains bundled test scenes and associated data - `docs` : Contains the documentation - `dump` : The default location for saved screenshots - `shader_pdb` : The default location for saved shader debugging information @@ -40,11 +42,11 @@ All source code is written using C++ and uses the `.cpp` file extension. Header - `include` : Contains the single `capsaicin.h` header file used to interface with the framework - `src` - `capsaicin` : Contains the main internal framework code + - `components` : The location of all available components (each within its own sub-folder) - `lights` : Common HLSL headers for lighting evaluation/sampling - `materials` : Common HLSL headers for material evaluation/sampling - `math` : Common mathematical helper functions - - `random` : Random number generators - - `render_technique` : The location of all available render techniques (each within its own sub-folder) + - `render_techniques` : The location of all available render techniques (each within its own sub-folder) - `renderers` : All available renderers (each within its own sub-folder) - `utilities` : Reusable host side utility helpers (sort, reduce etc.) - `scene_viewer` : The default application @@ -74,3 +76,7 @@ Available controls: `Mouse Wheel (Horizontal)` - Change Field of View `Space` - Pause/Resume animations +`Left` - Step animation back 1 frame +`Right` - Step animation forward 1 frame +`Up` - Increase playback speed +`Down` - Decrease playback speed \ No newline at end of file diff --git a/docs/development/render_technique.md b/docs/development/render_technique.md index 3ad0e2a..2767a16 100644 --- a/docs/development/render_technique.md +++ b/docs/development/render_technique.md @@ -13,14 +13,16 @@ Note: It is not required that the source file has the same name as the sub-folde All new *Renderer Techniques* must inherit from the base class `RenderTechnique` and override all required member functions. The member functions that need overriding are: +- `Constructor()`:\ + A default constructor that initialises the `RenderTechnique` base class with a unique name for the current *Renderer Technique*. +- `~Destructor()`:\ + Each *Renderer Technique* must provide a destructor that properly frees all internally created resources. - `bool init(CapsaicinInternal const &capsaicin)`:\ This function is called automatically by the framework after the *Renderer Technique* and any requested *Render Options*, *Components*, *AOVs* (see below), or other requested items have been created and initialised. It is the responsibility of the *Render Technique* to perform all required initialisation operations within this function, such as creating any used CPU|GPU resources that are required to persist over the lifetime of the *Render Technique*. The return value for this function can be used to signal to the framework if resource allocation or other initialisation operations have failed and the *Render Technique* would not be able to operate as a result. Returning `false` indicates an error state while `true` signifies correct initialisation. - `void render(CapsaicinInternal &capsaicin)`:\ This function is called every frame and is responsible for performing all the required operations of the *Renderer Technique*. Current render settings, debug views and other internal framework state can be retrieved from the passed in `capsaicin` object. This object can be used to retrieve internal *AOV*s using `capsaicin.getAOVBuffer("Name")` as well as current *Debug Views* and *Render Options*. It is the responsibility of the *Render Technique* to perform all required per-frame operations within this function. -- `Constructor()`:\ - A default constructor that initialises the `RenderTechnique` base class with a unique name for the current technique. -- `~Destructor()`:\ - Each technique must provide a destructor that properly frees all internally created resources. +- `void terminate()`:\ + This function is automatically called when a *Renderer Technique* is being destroyed or when a reset has occurred. It is the responsibility of the *Renderer Technique* to perform all required destruction operations within this function, such as releasing all used CPU|GPU resources. It is not always guaranteed that this function will be called when destroying a *Renderer Technique* so a components destructor should also call this function to destroy any created resources. The member functions that can be optionally overridden if needed are: - `RenderOptionList getRenderOptions()`:\ @@ -33,6 +35,8 @@ The member functions that can be optionally overridden if needed are: This function is called on technique creation and is responsible for returning a list of all required *AOV* buffers (shared render buffers e.g. GBuffers) required by the current technique. If no *AOV*s are required overriding this function is not necessary or the returned list can be empty. The `AOVList` type is used to describe each *AOV* using a unique string name as well as the types of operations that will be performed on the *AOV* (e.g. Read, Write, Accumulate). It also holds additional optional values that can be used to define the *AOVs* format, set it to be automatically cleared/backed-up each frame and other options. The internal framework uses this list to create *AOV*s for all *Renderer Techniques*. Each technique can then gain access to each created *AOV* using `Capsaicin.getAOVBuffer("Name")`. It should be noted that some *AOVs* are automatically created by the framework such as the output buffer "Color", other inbuilt buffers such as depth "Depth" and debug output buffers "Debug" are also automatically created but only if a *Renderer Technique* requests to use them. - `DebugViewList getDebugViews() const`:\ This function is called on technique creation and returns a list of any *Debug Views* provided by the technique. If none are provided overriding this function is not necessary or the returned list can be empty. By default the internal framework will provide default *Debug Views* for any known *AOV*s using default rendering shaders based on the format of the *AOV* (e.g. depth etc.). These *Debug Views* will have the same name as the *AOV* its displaying. If a *Render Technique* wishes to add its own additional *Debug View*(s) it can do so by returning a list of provided views using a unique string name to identify them. In cases where the internal frameworks default *Debug View* of an *AOV* is undesirable it is also possible for a technique to override it by providing its own *Debug View* and giving it the same name string as the *AOV*. For any created custom *Debug View* it is the responsibility of the *Render Technique* to check the render settings (using `RenderSettings.::debug_view_`) each frame and output the debug view to the "Debug" *AOV* when requested (see `render(...)` above). +- `void renderGUI(CapsaicinInternal &capsaicin) const`:\ + This function can be used to draw *Renderer Technique* specific UI elements to aid in visualisation and/or debugging. This function will be called by the parent *Capsaicin* `renderGUI` call which will execute all *Components* `renderGUI` functions followed by all *Render Techniques* `renderGUI` functions in the order that were added to the system. Any UI elements output by this function will be displayed in the 'Render Settings' UI section. Any *Renderer Technique* that outputs a large number of UI parameters should wrap them in a collapsible tree node so as not to pollute the UI. An example blank implementation of `my_technique` would look like: ``` @@ -49,6 +53,7 @@ public: ~MyTechnique() { /***** Must clean-up any created member variables/data *****/ + terminate(); } RenderOptionList getRenderOptions() noexcept override @@ -56,51 +61,55 @@ public: RenderOptionList newOptions; /***** Push any desired options to the returned list here (else just 'return {}') *****/ /***** Example (using provided helper RENDER_OPTION_MAKE): *****/ - /***** newOptions.emplace(RENDER_OPTION_MAKE(my_technique_enable, options)); *****/ + /***** newOptions.emplace(RENDER_OPTION_MAKE(my_technique_enable, options)); *****/ return newOptions; } struct RenderOptions { - /***** Any member variable options can be added here. *****/ + /***** Any member variable options can be added here. *****/ + /***** This struct can be entirely omitted if not being used. *****/ /***** This represent the internal format of options where as 'RenderOptionList' has these stored as strings and variants *****/ + /***** Example: bool my_technique_enable; *****/ }; - static RenderOptions convertOptions(RenderSettings const &settings) noexcept + static RenderOptions convertOptions(RenderOptionList const &options) noexcept { /***** Optional function only required if actually providing RenderOptions *****/ RenderOptions newOptions; /***** Used to convert options between external string/variant and internal data type 'RenderOptions' *****/ /***** Example: (using provided helper RENDER_OPTION_GET): *****/ - /***** RENDER_OPTION_GET(my_technique_enable, newOptions, settings.options_); *****/ + /***** RENDER_OPTION_GET(my_technique_enable, newOptions, options); *****/ return newOptions; } - ComponentList RenderTechnique::getComponents() const noexcept + ComponentList getComponents() const noexcept override { ComponentList components; - /***** Push any desired Components to the returned list here (else just 'return {}') *****/ + /***** Push any desired Components to the returned list here (else just 'return {}' or dont override) *****/ + /***** Example: if corresponding header is already included (using provided helper COMPONENT_MAKE): *****/ + /***** components.emplace_back(COMPONENT_MAKE(TypeOfComponent)); *****/ return components; } BufferList getBuffers() const noexcept override { BufferList buffers; - /***** Push any desired Buffers to the returned list here (else just 'return {}') *****/ + /***** Push any desired Buffers to the returned list here (else just 'return {}' or dont override) *****/ return buffers; } AOVList getAOVs() const noexcept override { AOVList aovs; - /***** Push any desired AOVs to the returned list here (else just 'return {}') *****/ + /***** Push any desired AOVs to the returned list here (else just 'return {}' or dont override) *****/ return aovs; } DebugViewList getDebugViews() const noexcept override { DebugViewList views; - /***** Push any desired Debug Views to the returned list here (else just 'return {}') *****/ + /***** Push any desired Debug Views to the returned list here (else just 'return {}' or dont override) *****/ return views; } @@ -112,17 +121,29 @@ public: void render(CapsaicinInternal &capsaicin) noexcept override { - auto &renderSettings = capsaicin.getRenderSettings(); - RenderOptions newOptions = convertOptions(renderSettings); - /***** Perform all required rendering operations here *****/ - /***** Debug Views can be checked with 'renderSettings.debug_view_' *****/ - options = newOptions; + /***** If any options are provided they should be checked for changes here *****/ + /***** Example: *****/ + /***** RenderOptions newOptions = convertOptions(capsaicin.getOptions()); *****/ + /***** Check for changes and handle accordingly *****/ + /***** options = newOptions; *****/ + /***** Perform any required rendering operations here *****/ + /***** Debug Views can be checked with 'capsaicin.getCurrentDebugView()' *****/ } + + void terminate() noexcept override + { + /***** Cleanup any created CPU or GPU resources *****/ + } + + void renderGUI(CapsaicinInternal &capsaicin) const noexcept override + { + /***** Add any UI drawing commands here *****/ + } protected: /***** Internal member data can be added here *****/ - /***** Example: *****/ - /***** RenderOptions options; *****/ + /***** Example: *****/ + /***** RenderOptions options; *****/ }; } // namespace Capsaicin ``` \ No newline at end of file diff --git a/docs/development/renderer.md b/docs/development/renderer.md index e083a73..80bcd3f 100644 --- a/docs/development/renderer.md +++ b/docs/development/renderer.md @@ -9,15 +9,15 @@ Each new *Renderer* should be added in its own sub-folder, so for example a new `src/core/renderers/my_renderer/my_renderer.cpp`\ Note: It is not required that the source file has the same name as the sub-folder it is contained within. -All new *Renderers* must inherit from the abstract base class `Renderer` using its inbuilt factory registration helper `Renderer::Registrar`. Doing so registers the new *Renderer* with the renderer factory. To ensure this registration works correctly the new *Renderer* must implement an empty default constructor (cannot use `=default`) as well as a static constant string containing a unique name for the *Renderer*. +All new *Renderers* must inherit from the abstract base class `Renderer`. To make the new *Renderer* searchable by the rest of the system then it should also be added to the renderer factory by also inheriting from `RendererFactory::Registrar`. Doing so registers the new *Renderer* with the renderer factory. To ensure this registration works correctly the new *Renderer* must implement an empty default constructor (cannot use `=default`) as well as a static constant string containing a unique name for the *Renderer*. The new *Renderer* should then override all base class member functions as required. The member functions that need overriding are: +- `Constructor()`:\ + A blank constructor. - `std::vector> setupRenderTechniques(...)`:\ This function is responsible for returning a list of all required *Render Techniques* in the order that they are required to operate during rendering. The return from this function transfers ownership of the *Render Techniques* to the internal framework which will then manage their lifetime after that. -- `Constructor()`:\ - A default blank constructor. An example blank implementation of `my_renderer` would look like: ``` @@ -26,7 +26,9 @@ An example blank implementation of `my_renderer` would look like: namespace Capsaicin { -class MyRenderer : public Renderer::Registrar +class MyRenderer + : public Renderer + , public RendererFactory::Registrar { public: /***** Must define unique name to represent new type *****/ @@ -36,7 +38,7 @@ public: MyRenderer() noexcept {} std::vector> setupRenderTechniques( - RenderSettings const &render_settings) noexcept override + RenderOptionList const &renderOptions) noexcept override { std::vector> render_techniques; /***** Emplace any desired render techniques to the returned list here *****/ diff --git a/docs/index.md b/docs/index.md index 134ad20..ae15801 100644 --- a/docs/index.md +++ b/docs/index.md @@ -8,4 +8,4 @@ - [Creating a Renderer](./development/renderer.md) - [Creating a Renderer Technique](./development/render_technique.md) - [Creating a Component](./development/component.md) - - [Shader Debugging](./development/shader_debugging.md) + - [Shader Debugging](./development/shader_debugging.md) \ No newline at end of file diff --git a/dump/.gitignore b/dump/.gitignore new file mode 100644 index 0000000..72e8ffc --- /dev/null +++ b/dump/.gitignore @@ -0,0 +1 @@ +* diff --git a/launch.vs.json b/launch.vs.json new file mode 100644 index 0000000..b584fc1 --- /dev/null +++ b/launch.vs.json @@ -0,0 +1,13 @@ +{ + "version": "0.2.1", + "defaults": {}, + "configurations": [ + { + "type": "default", + "project": "CMakeLists.txt", + "projectTarget": "scene_viewer.exe (bin\\scene_viewer.exe)", + "name": "scene_viewer.exe (bin\\scene_viewer.exe)", + "currentDir": "${workspaceRoot}" + } + ] +} \ No newline at end of file diff --git a/shader_pdb/.gitignore b/shader_pdb/.gitignore new file mode 100644 index 0000000..72e8ffc --- /dev/null +++ b/shader_pdb/.gitignore @@ -0,0 +1 @@ +* diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 1540b5f..ff30b02 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -1,11 +1,9 @@ file(GLOB_RECURSE HEADER_FILES CONFIGURE_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/src/*.h - ${CMAKE_CURRENT_SOURCE_DIR}/include/*.h) - -file(GLOB_RECURSE INLINE_FILES - CONFIGURE_DEPENDS - ${CMAKE_CURRENT_SOURCE_DIR}/src/*.inl) + ${CMAKE_CURRENT_SOURCE_DIR}/src/*.inl + ${CMAKE_CURRENT_SOURCE_DIR}/include/*.h +) file(GLOB_RECURSE SHADER_FILES CONFIGURE_DEPENDS @@ -13,47 +11,54 @@ file(GLOB_RECURSE SHADER_FILES ${CMAKE_CURRENT_SOURCE_DIR}/src/*.frag ${CMAKE_CURRENT_SOURCE_DIR}/src/*.geom ${CMAKE_CURRENT_SOURCE_DIR}/src/*.comp - ${CMAKE_CURRENT_SOURCE_DIR}/src/*.hlsl) + ${CMAKE_CURRENT_SOURCE_DIR}/src/*.hlsl + ${CMAKE_CURRENT_SOURCE_DIR}/src/*.rt +) file(GLOB_RECURSE SOURCE_FILES CONFIGURE_DEPENDS - ${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp) + ${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp +) set_source_files_properties(${SHADER_FILES} PROPERTIES VS_TOOL_OVERRIDE - "None") + "None" +) -add_library(core SHARED ${SOURCE_FILES}) +add_library(capsaicin SHARED ${SOURCE_FILES}) set(CMAKE_CXX_VISIBILITY_PRESET hidden) set(CMAKE_VISIBILITY_INLINES_HIDDEN 1) include(GenerateExportHeader) -generate_export_header(core BASE_NAME capsaicin) +generate_export_header(capsaicin BASE_NAME capsaicin) configure_file(include/version.h.in version.h) -target_sources(core PRIVATE ${HEADER_FILES} ${INLINE_FILES} ${SHADER_FILES} - ${PROJECT_BINARY_DIR}/src/core/capsaicin_export.h) +target_sources(capsaicin + PRIVATE ${HEADER_FILES} ${INLINE_FILES} ${SHADER_FILES} ${SHADER_FILES} + ${PROJECT_BINARY_DIR}/src/core/capsaicin_export.h +) -target_include_directories(core PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include - ${PROJECT_BINARY_DIR}/src/core) +target_include_directories(capsaicin + PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include + ${PROJECT_BINARY_DIR}/src/core +) -target_include_directories(core PRIVATE +target_include_directories(capsaicin PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_SOURCE_DIR}/src/capsaicin ${CMAKE_CURRENT_SOURCE_DIR}/src/render_techniques ${CMAKE_CURRENT_SOURCE_DIR}/src/renderers ${CMAKE_CURRENT_SOURCE_DIR}/src/utilities - ${CMAKE_CURRENT_SOURCE_DIR}/../../third_party/ffx-bx ${CMAKE_CURRENT_SOURCE_DIR}/../../third_party/ffx-parallelsort - ${CMAKE_CURRENT_SOURCE_DIR}/../../third_party/miniz - ${CMAKE_CURRENT_SOURCE_DIR}/../../third_party/samplerCPP - ${CMAKE_CURRENT_SOURCE_DIR}/../../third_party/tinyexr) + ${CMAKE_CURRENT_SOURCE_DIR}/../../third_party/gfx/third_party/stb + ${CMAKE_CURRENT_SOURCE_DIR}/../../third_party/gfx/third_party/tinyexr +) -target_compile_features(core PUBLIC cxx_std_20) -target_compile_options(core PRIVATE - /W3 /WX +target_compile_features(capsaicin PUBLIC cxx_std_20) +target_compile_options(capsaicin PRIVATE + /W4 /WX /external:anglebrackets /external:W0 /analyze:external- -D_CRT_SECURE_NO_WARNINGS -D_HAS_EXCEPTIONS=0 -D_SILENCE_CXX17_ITERATOR_BASE_CLASS_DEPRECATION_WARNING @@ -61,16 +66,16 @@ target_compile_options(core PRIVATE -DGLM_FORCE_CTOR_INIT -DGLM_FORCE_XYZW_ONLY -DGLM_FORCE_DEPTH_ZERO_TO_ONE + -DNOMINMAX ) +target_link_options(capsaicin PRIVATE "/SUBSYSTEM:WINDOWS") + function(assign_source_group arg1) foreach(_source IN ITEMS ${ARGN}) get_filename_component(PARENT_DIR "${_source}" DIRECTORY) string(REPLACE "${CMAKE_CURRENT_SOURCE_DIR}/include" "" GROUP "${PARENT_DIR}") string(REPLACE "${CMAKE_CURRENT_SOURCE_DIR}/src" "" GROUP "${GROUP}") - string(REPLACE "../gi10/src" "" GROUP "${GROUP}") - string(REPLACE "../gi10/include" "" GROUP "${GROUP}") - string(REPLACE "../gi10/shaders" "" GROUP "${GROUP}") string(REPLACE "${CMAKE_CURRENT_SOURCE_DIR}" "" GROUP "${GROUP}") string(REPLACE "/" "\\" GROUP "${GROUP}") source_group("${arg1}\\${GROUP}" FILES "${_source}") @@ -78,35 +83,54 @@ function(assign_source_group arg1) endfunction(assign_source_group) assign_source_group("Header Files" ${HEADER_FILES}) -assign_source_group("Inline Headers" ${INLINE_FILES}) assign_source_group("Source Files" ${SOURCE_FILES}) assign_source_group("Shader Files" ${SHADER_FILES}) -assign_source_group("Header Files\\gi10" ${GI10_HEADER_FILES}) -assign_source_group("Source Files\\gi10" ${GI10_SOURCE_FILES}) -assign_source_group("Shader Files\\gi10" ${GI10_SHADER_FILES}) - add_library(D3D12Core SHARED IMPORTED) set_target_properties(D3D12Core PROPERTIES IMPORTED_LOCATION ${CMAKE_SOURCE_DIR}/third_party/agility_sdk/D3D12Core.dll - IMPORTED_IMPLIB d3d12.lib + IMPORTED_IMPLIB ${CMAKE_SOURCE_DIR}/third_party/agility_sdk/dummy.lib ) add_library(d3d12SDKLayers SHARED IMPORTED) set_target_properties(d3d12SDKLayers PROPERTIES IMPORTED_LOCATION ${CMAKE_SOURCE_DIR}/third_party/agility_sdk/d3d12SDKLayers.dll - IMPORTED_IMPLIB d3d12.lib + IMPORTED_IMPLIB ${CMAKE_SOURCE_DIR}/third_party/agility_sdk/dummy.lib ) -target_link_libraries(core PUBLIC gfx glm - PRIVATE tinyobjloader miniz D3D12Core d3d12SDKLayers) +target_link_libraries(capsaicin PUBLIC gfx glm + PRIVATE d3d12 D3D12Core d3d12SDKLayers tinyexr) -set_target_properties(core PROPERTIES +set_target_properties(capsaicin PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CAPSAICIN_RUNTIME_OUTPUT_DIRECTORY} LIBRARY_OUTPUT_DIRECTORY ${CAPSAICIN_LIBRARY_OUTPUT_DIRECTORY} ARCHIVE_OUTPUT_DIRECTORY ${CAPSAICIN_ARCHIVE_OUTPUT_DIRECTORY} ) -add_custom_command(TARGET core POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy $ $ - COMMAND_EXPAND_LISTS -) \ No newline at end of file +add_custom_command(TARGET capsaicin POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy $ $ + COMMAND_EXPAND_LISTS +) + +set_target_properties(capsaicin PROPERTIES PUBLIC_HEADER "include/capsaicin.h;${CMAKE_BINARY_DIR}/src/core/version.h;${CMAKE_BINARY_DIR}/src/core/capsaicin_export.h") + +# Install the library and headers +include(GNUInstallDirs) +install(TARGETS capsaicin + EXPORT capsaicin-targets + PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}/capsaicin + ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_BINDIR} +) + +# Install shader files into binary directory +install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/src + DESTINATION ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_BINDIR} + FILES_MATCHING + PATTERN "*.vert" + PATTERN "*.frag" + PATTERN "*.geom" + PATTERN "*.comp" + PATTERN "*.hlsl" + PATTERN "*.rt" +) diff --git a/src/core/include/capsaicin.h b/src/core/include/capsaicin.h index d3fc562..0ab3e17 100644 --- a/src/core/include/capsaicin.h +++ b/src/core/include/capsaicin.h @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -23,8 +23,8 @@ THE SOFTWARE. #include "capsaicin_export.h" -#define NOMINMAX -#include "gfx_scene.h" +#include +#include #include #include @@ -32,133 +32,82 @@ THE SOFTWARE. namespace Capsaicin { -enum PlayMode -{ - kPlayMode_None = 0, - kPlayMode_FrameByFrame, - kPlayMode_Count -}; +/** + * Initializes Capsaicin. Must be called before any other functions. + * @param gfx The gfx context to use inside Capsaicin. + * @param imgui_context (Optional) The ImGui context. + */ +CAPSAICIN_EXPORT void Initialize(GfxContext gfx, ImGuiContext *imgui_context = nullptr) noexcept; -CAPSAICIN_EXPORT extern char const *g_play_modes[]; +/** + * Gets the list of supported renderers. + * @returns The renderers list. + */ +CAPSAICIN_EXPORT std::vector GetRenderers() noexcept; -struct RenderSettings -{ - PlayMode play_mode_ = kPlayMode_None; - uint32_t play_to_frame_index_ = 1; - bool play_from_start_ = false; - - float delta_time_ = 0.0f; // Use system time by default - float frame_by_frame_delta_time_ = 1.0f / 30.0f; - - GfxConstRef environment_map_; - - std::string_view renderer_; /**< The requested renderer to use (get available from @GetRenderers()) */ - std::string_view debug_view_; /**< The debug view to use (get available from GetDebugViews() - - "None" or empty for default behaviour) */ - - using option = std::variant; - std::map - options_; /**< Options for controlling the operation of each render technique */ - - /** - * Checks if an options exists with the specified type. - * @tparam T Generic type parameter of the requested option. - * @param name The name of the option to get. - * @returns True if options is found and has correct type, False otherwise. - */ - template - bool hasOption(std::string_view const &name) const noexcept - { - if (auto i = options_.find(name); i != options_.end()) - { - return std::holds_alternative(i->second); - } - return false; - } +/** + * Gets the currently set renderer. + * @returns The current renderer name. + */ +CAPSAICIN_EXPORT std::string_view GetCurrentRenderer() noexcept; - /** - * Gets an option from internal options list. - * @tparam T Generic type parameter of the requested option. - * @param name The name of the option to get. - * @returns The options value (nullptr if option does not exists or typename does not match). - */ - template - T const &getOption(std::string_view const &name) const noexcept - { - if (auto i = options_.find(name); i != options_.end()) - { - if (std::holds_alternative(i->second)) - { - return *std::get_if(&(i->second)); - } - } - GFX_PRINTLN("Error: Unknown settings options requested: %s", name.data()); - static T unknown; - return unknown; - } +/** + * Sets the current renderer. + * @param name The name of the renderer to set (must be one of the options from GetRenderers()). + * @returns True if successful, False otherwise. + */ +CAPSAICIN_EXPORT bool SetRenderer(std::string_view const &name) noexcept; - /** - * Gets a reference to an option from internal options list. - * @tparam T Generic type parameter of the requested option. - * @param name The name of the option to get. - * @returns The options value (nullptr if option does not exists or typename does not match). - */ - template - T &getOption(std::string_view const &name) noexcept - { - if (auto i = options_.find(name); i != options_.end()) - { - if (std::holds_alternative(i->second)) - { - return *std::get_if(&(i->second)); - } - } - GFX_PRINTLN("Error: Unknown settings options requested: %s", name.data()); - static T unknown; - return unknown; - } +/** + * Gets the currently set scenes. + * @returns The current scene name. + */ +CAPSAICIN_EXPORT std::vector GetCurrentScenes() noexcept; - /** - * Sets an options value in the internal options list. - * If the option does not exists it is created. - * @tparam T Generic type parameter of the requested option. - * @param name The name of the option to set. - * @param value The new value of the option. - */ - template - void setOption(std::string_view const &name, const T value) noexcept - { - if (auto i = options_.find(name); i != options_.end()) - { - if (std::holds_alternative(i->second)) - { - *std::get_if(&(i->second)) = value; - } - } - else - { - options_.emplace(name, value); - } - } -}; +/** + * Sets the current scenes. + * @param name The name of the scene file. + * @returns True if successful, False otherwise. + */ +CAPSAICIN_EXPORT bool SetScenes(std::vector const &names) noexcept; /** - * Initializes Capsaicin. Must be called before any other functions. - * @param gfx The gfx context to use inside Capsaicin. + * Gets the list of cameras available in the current scene. + * @returns The cameras list. */ -CAPSAICIN_EXPORT void Initialize(GfxContext gfx); +CAPSAICIN_EXPORT std::vector GetSceneCameras() noexcept; /** - * Gets the list of supported renderers that can be set inside RenderSettings. - * @returns The renderers list. + * Gets the name of the currently set scene camera. + * @returns The current camera name. */ -CAPSAICIN_EXPORT std::vector GetRenderers() noexcept; +CAPSAICIN_EXPORT std::string_view GetSceneCurrentCamera() noexcept; /** - * Gets the list of currently available AOVs. - * @returns The AOV list. + * Gets the current scenes camera. + * @returns The requested camera object. */ -CAPSAICIN_EXPORT std::vector GetAOVs() noexcept; +CAPSAICIN_EXPORT GfxRef GetSceneCamera() noexcept; + +/** + * Sets the current scenes camera. + * @param name The name of the camera to set (must be one of the options from GetSceneCameras()). + * @returns True if successful, False otherwise. + */ +CAPSAICIN_EXPORT bool SetSceneCamera(std::string_view const &name) noexcept; + +/** + * Gets the currently set environment map. + * @returns The current environment map name. + */ +CAPSAICIN_EXPORT std::string GetCurrentEnvironmentMap() noexcept; + +/** + * Sets the current scene environment map. + * @param name The name of the image file (blank to disable environment map). + * @returns True if successful, False otherwise. + */ +CAPSAICIN_EXPORT bool SetEnvironmentMap(std::string const &name) noexcept; /** * Gets the list of currently available debug views. @@ -166,62 +115,281 @@ CAPSAICIN_EXPORT std::vector GetAOVs() noexcept; */ CAPSAICIN_EXPORT std::vector GetDebugViews() noexcept; +/** + * Gets the currently set debug view. + * @returns The current debug view name. + */ +CAPSAICIN_EXPORT std::string_view GetCurrentDebugView() noexcept; + +/** + * Sets the current debug view. + * @param name The name of the debug view to set (must be one of the options from GetDebugViews()). + * @returns True if successful, False otherwise. + */ +CAPSAICIN_EXPORT bool SetDebugView(std::string_view const &name) noexcept; + +/** + * Gets the list of currently available AOVs. + * @returns The AOV list. + */ +CAPSAICIN_EXPORT std::vector GetAOVs() noexcept; + /** * Render the current frame. - * @param scene The scene to render. - * @param [in,out] render_settings The render settings to use during rendering. */ -CAPSAICIN_EXPORT void Render(GfxScene scene, RenderSettings &render_settings); -CAPSAICIN_EXPORT uint32_t GetFrameIndex(); -CAPSAICIN_EXPORT double GetSequenceTime(); -CAPSAICIN_EXPORT void SetSequenceTime(double time); -CAPSAICIN_EXPORT bool GetAnimate(); -CAPSAICIN_EXPORT void SetAnimate(bool animation); +CAPSAICIN_EXPORT void Render() noexcept; + +/** + * Render UI elements related to current internal state + * Must be called between ImGui::Begin() and ImGui::End(). + * @param readOnly (Optional) True to only display read only data, False to display controls accepting user + * input. + */ +CAPSAICIN_EXPORT void RenderGUI(bool readOnly = false) noexcept; + +/** + * Get the current frame index (starts at zero) + * @return The index of the current frame to/being rendered. + */ +CAPSAICIN_EXPORT uint32_t GetFrameIndex() noexcept; + +/** + * Get the elapsed time since the last render call. + * @return The elapsed frame time (seconds) + */ +CAPSAICIN_EXPORT double GetFrameTime() noexcept; + +/** + * Get the average frame time. + * @return The elapsed frame time (seconds) + */ +CAPSAICIN_EXPORT double GetAverageFrameTime() noexcept; + +/** + * Check if the current scene has any usable animations. + * @return True if animations are present, False otherwise. + */ +CAPSAICIN_EXPORT bool HasAnimation() noexcept; + +/** + * Set the current playback play/paused state + * @param paused True to pause animation, False to play. + */ +CAPSAICIN_EXPORT void SetPaused(bool paused) noexcept; + +/** + * Get the current animation play/paused state. + * @return True if playback is paused, False otherwise. + */ +CAPSAICIN_EXPORT bool GetPaused() noexcept; + +/** + * Set the current playback mode. + * @param playMode The new playback mode (False to playback in real-time mode, True uses fixed frame + * rate). + */ +CAPSAICIN_EXPORT void SetFixedFrameRate(bool playMode) noexcept; + +/** + * Set the current fixed rate frame time. + * @param fixed_frame_time A duration in seconds. + */ +CAPSAICIN_EXPORT void SetFixedFrameTime(double fixed_frame_time) noexcept; + +/** + * Get current playback mode. + * @return True if using fixed frame rate, False is using real-time. + */ +CAPSAICIN_EXPORT bool GetFixedFrameRate() noexcept; + +/** + * Restart playback to start of animation. + */ +CAPSAICIN_EXPORT void RestartPlayback() noexcept; + +/** + * Increase current playback speed by double. + */ +CAPSAICIN_EXPORT void IncreasePlaybackSpeed() noexcept; + +/** + * Decrease current playback speed by half. + */ +CAPSAICIN_EXPORT void DecreasePlaybackSpeed() noexcept; + +/** + * Get the current playback speed. + * @return The current playback speed. + */ +CAPSAICIN_EXPORT double GetPlaybackSpeed() noexcept; + +/** + * Reset the playback speed to default. + */ +CAPSAICIN_EXPORT void ResetPlaybackSpeed() noexcept; + +/** + * Step playback forward by specified number of frames. + * @param frames The number of frames to step forward. + */ +CAPSAICIN_EXPORT void StepPlaybackForward(uint32_t frames) noexcept; + +/** + * Step playback backward by specified number of frames. + * @param frames The number of frames to step backward. + */ +CAPSAICIN_EXPORT void StepPlaybackBackward(uint32_t frames) noexcept; + +/** + * Set the playback to forward/rewind. + * @param rewind Set to True to rewind, False to playback forward. + */ +CAPSAICIN_EXPORT void SetPlayRewind(bool rewind) noexcept; + +/** + * Get the current playback forward/rewind state + * @return True if in rewind, False if forward. + */ +CAPSAICIN_EXPORT bool GetPlayRewind() noexcept; + +/** + * Set the current render state. Pausing prevents any new frames from being rendered. + * @param paused True to pause rendering. + */ +CAPSAICIN_EXPORT void SetRenderPaused(bool paused) noexcept; + +/** + * Get the current render paused state. + * @return True if rendering is paused, False otherwise. + */ +CAPSAICIN_EXPORT bool GetRenderPaused() noexcept; + +/** + * Step jitter frame index by a specified number of frames. + * @param frames The number of frames to step. + */ +CAPSAICIN_EXPORT void StepJitterFrameIndex(uint32_t frames); /** * Gets count of enabled delta lights (point,spot,direction) in current scene. * @returns The delta light count. */ -CAPSAICIN_EXPORT uint32_t GetDeltaLightCount(); +CAPSAICIN_EXPORT uint32_t GetDeltaLightCount() noexcept; /** * Gets count of enabled area lights in current scene. * @returns The area light count. */ -CAPSAICIN_EXPORT uint32_t GetAreaLightCount(); +CAPSAICIN_EXPORT uint32_t GetAreaLightCount() noexcept; /** * Gets count of enabled environment lights in current scene. * @returns The environment light count. */ -CAPSAICIN_EXPORT uint32_t GetEnvironmentLightCount(); +CAPSAICIN_EXPORT uint32_t GetEnvironmentLightCount() noexcept; + +/** + * Gets count of number of triangles present in current scene. + * @returns The triangle count. + */ +CAPSAICIN_EXPORT uint32_t GetTriangleCount() noexcept; -struct TimeStamp +/** + * Gets size of the acceleration structure (in bytes). + * @returns The acceleration structure size. + */ +CAPSAICIN_EXPORT uint64_t GetBvhDataSize() noexcept; + +/** + * Gets the internal configuration options. + * @returns The list of available options. + */ +CAPSAICIN_EXPORT +std::map> &GetOptions() noexcept; + +/** + * Checks if an options exists with the specified type. + * @tparam T Generic type parameter of the requested option. + * @param name The name of the option to get. + * @returns True if options is found and has correct type, False otherwise. + */ +template +bool hasOption(std::string_view const &name) noexcept { - std::string_view name_; /**< The name of the timestamp */ - float time_; /**< The time in seconds */ -}; + auto &options = GetOptions(); + if (auto i = options.find(name); i != options.end()) + { + return std::holds_alternative(i->second); + } + return false; +} -struct NodeTimestamps +/** + * Gets a reference to an option from internal options list. + * @tparam T Generic type parameter of the requested option. + * @param name The name of the option to get. + * @returns The options value (nullptr if option does not exists or typename does not match). + */ +template +T &getOption(std::string_view const &name) noexcept { - std::string_view name_; /**< The name of current timestamp node */ - std::vector children_; /**< The list of timestamps for all child timestamps (The first entry is - the timestamp for the whole node) */ -}; + auto &options = GetOptions(); + if (auto i = options.find(name); i != options.end()) + { + if (std::holds_alternative(i->second)) + { + return *std::get_if(&(i->second)); + } + } + GFX_PRINTLN("Error: Unknown settings options requested: %s", name.data()); + static T unknown; + return unknown; +} /** - * Gets the profiling information for each timed section from the current frame. - * @returns The total frame time as well as timestamps for each sub-section (see NodeTimestamps for details). + * Sets an options value in the internal options list. + * If the option does not exists it is created. + * @tparam T Generic type parameter of the requested option. + * @param name The name of the option to set. + * @param value The new value of the option. */ -CAPSAICIN_EXPORT std::pair> GetProfiling() noexcept; +template +void setOption(std::string_view const &name, const T value) noexcept +{ + auto &options = GetOptions(); + if (auto i = options.find(name); i != options.end()) + { + if (std::holds_alternative(i->second)) + { + *std::get_if(&(i->second)) = value; + } + } + else + { + options.emplace(name, value); + } +} /** Terminates this object. Should be called after all other operations. */ -CAPSAICIN_EXPORT void Terminate(); +CAPSAICIN_EXPORT void Terminate() noexcept; + +/** + * Reload all shader code currently in use + */ +CAPSAICIN_EXPORT void ReloadShaders() noexcept; /** * Saves an AOV buffer to disk. * @param file_path Full pathname to the file to save as. * @param aov The buffer to save (get available from @GetAOVs()). */ -CAPSAICIN_EXPORT void DumpAOVBuffer(char const *file_path, std::string_view const &aov); +CAPSAICIN_EXPORT void DumpAOVBuffer(char const *file_path, std::string_view const &aov) noexcept; + +/** + * Saves current camera attributes to disk. + * @param file_path Full pathname to the file to save as. + * @param jittered Jittered camera or not. + */ +CAPSAICIN_EXPORT void DumpCamera(char const *file_path, bool jittered) noexcept; + } // namespace Capsaicin diff --git a/src/core/include/version.h.in b/src/core/include/version.h.in index cb19f90..d26d0d0 100644 --- a/src/core/include/version.h.in +++ b/src/core/include/version.h.in @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/src/core/src/capsaicin/capsaicin.cpp b/src/core/src/capsaicin/capsaicin.cpp index 8c6d35c..eec2caf 100644 --- a/src/core/src/capsaicin/capsaicin.cpp +++ b/src/core/src/capsaicin/capsaicin.cpp @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -22,7 +22,6 @@ THE SOFTWARE. #include "capsaicin.h" #include "capsaicin_internal.h" -#include "components/light_sampler/light_sampler.h" #include "thread_pool.h" namespace @@ -32,17 +31,12 @@ Capsaicin::CapsaicinInternal *g_renderer = nullptr; namespace Capsaicin { -char const *g_play_modes[] = {"None", "Frame-by-frame"}; - -static_assert( - ARRAYSIZE(g_play_modes) == Capsaicin::kPlayMode_Count, "An invalid number of play modes was supplied"); - -void Initialize(GfxContext gfx) +void Initialize(GfxContext gfx, ImGuiContext *imgui_context) noexcept { if (g_renderer != nullptr) Terminate(); ThreadPool::Create(std::thread::hardware_concurrency()); g_renderer = new CapsaicinInternal(); - g_renderer->initialize(gfx); + g_renderer->initialize(gfx, imgui_context); } std::vector GetRenderers() noexcept @@ -50,99 +44,272 @@ std::vector GetRenderers() noexcept return CapsaicinInternal::GetRenderers(); } -std::vector GetAOVs() noexcept +std::string_view GetCurrentRenderer() noexcept { - if (g_renderer != nullptr) return g_renderer->getAOVs(); + if (g_renderer != nullptr) return g_renderer->getCurrentRenderer(); + return ""; +} + +bool SetRenderer(std::string_view const &name) noexcept +{ + if (g_renderer != nullptr) return g_renderer->setRenderer(name); + return false; +} + +std::vector GetCurrentScenes() noexcept +{ + if (g_renderer != nullptr) return g_renderer->getCurrentScenes(); + return {}; +} + +bool SetScenes(std::vector const &names) noexcept +{ + if (g_renderer != nullptr) return g_renderer->setScenes(names); + return false; +} + +std::vector GetSceneCameras() noexcept +{ + if (g_renderer != nullptr) return g_renderer->getSceneCameras(); return {}; } +std::string_view GetSceneCurrentCamera() noexcept +{ + if (g_renderer != nullptr) return g_renderer->getSceneCurrentCamera(); + return ""; +} + +GfxRef GetSceneCamera() noexcept +{ + if (g_renderer != nullptr) return g_renderer->getSceneCamera(); + return {}; +} + +bool SetSceneCamera(std::string_view const &name) noexcept +{ + if (g_renderer != nullptr) return g_renderer->setSceneCamera(name); + return false; +} + +std::string GetCurrentEnvironmentMap() noexcept +{ + if (g_renderer != nullptr) return g_renderer->getCurrentEnvironmentMap(); + return ""; +} + +bool SetEnvironmentMap(std::string const &name) noexcept +{ + if (g_renderer != nullptr) return g_renderer->setEnvironmentMap(name); + return false; +} + std::vector GetDebugViews() noexcept { if (g_renderer != nullptr) return g_renderer->getDebugViews(); return {}; } -void Render(GfxScene scene, RenderSettings &render_settings) +std::string_view GetCurrentDebugView() noexcept { - if (g_renderer != nullptr) g_renderer->render(scene, render_settings); + if (g_renderer != nullptr) return g_renderer->getCurrentDebugView(); + return ""; } -uint32_t GetFrameIndex() +bool SetDebugView(std::string_view const &name) noexcept +{ + if (g_renderer != nullptr) return g_renderer->setDebugView(name); + return false; +} + +std::vector GetAOVs() noexcept +{ + if (g_renderer != nullptr) return g_renderer->getAOVs(); + return {}; +} + +void Render() noexcept +{ + if (g_renderer != nullptr) g_renderer->render(); +} + +void RenderGUI(bool readOnly) noexcept +{ + if (g_renderer != nullptr) g_renderer->renderGUI(readOnly); +} + +uint32_t GetFrameIndex() noexcept { if (g_renderer != nullptr) return g_renderer->getFrameIndex(); return 0; } -double GetSequenceTime() +double GetFrameTime() noexcept { - if (g_renderer != nullptr) return g_renderer->getTime(); + if (g_renderer != nullptr) return g_renderer->getFrameTime(); + return 0.0; +} - return 0; +double GetAverageFrameTime() noexcept +{ + if (g_renderer != nullptr) return g_renderer->getAverageFrameTime(); + return 0.0; +} + +bool HasAnimation() noexcept +{ + if (g_renderer != nullptr) return g_renderer->hasAnimation(); + return false; +} + +void SetPaused(bool paused) noexcept +{ + if (g_renderer != nullptr) g_renderer->setPaused(paused); } -void SetSequenceTime(double time) +bool GetPaused() noexcept { - if (g_renderer != nullptr) g_renderer->setTime(time); + if (g_renderer != nullptr) return g_renderer->getPaused(); + return true; } -bool GetAnimate() +void SetFixedFrameRate(bool playMode) noexcept { - if (g_renderer != nullptr) return g_renderer->getAnimate(); + if (g_renderer != nullptr) g_renderer->setFixedFrameRate(playMode); +} + +void SetFixedFrameTime(double fixed_frame_time) noexcept +{ + if (g_renderer != nullptr) g_renderer->setFixedFrameTime(fixed_frame_time); +} +bool GetFixedFrameRate() noexcept +{ + if (g_renderer != nullptr) return g_renderer->getFixedFrameRate(); return false; } -void SetAnimate(bool animation) +void RestartPlayback() noexcept +{ + if (g_renderer != nullptr) g_renderer->restartPlayback(); +} + +void IncreasePlaybackSpeed() noexcept +{ + if (g_renderer != nullptr) g_renderer->increasePlaybackSpeed(); +} + +void DecreasePlaybackSpeed() noexcept +{ + if (g_renderer != nullptr) g_renderer->decreasePlaybackSpeed(); +} + +double GetPlaybackSpeed() noexcept +{ + if (g_renderer != nullptr) return g_renderer->getPlaybackSpeed(); + return 1.0; +} + +void ResetPlaybackSpeed() noexcept +{ + if (g_renderer != nullptr) g_renderer->resetPlaybackSpeed(); +} + +void StepPlaybackForward(uint32_t frames) noexcept +{ + if (g_renderer != nullptr) g_renderer->stepPlaybackForward(frames); +} + +void StepPlaybackBackward(uint32_t frames) noexcept +{ + if (g_renderer != nullptr) g_renderer->stepPlaybackBackward(frames); +} + +void SetPlayRewind(bool rewind) noexcept { - if (g_renderer != nullptr) g_renderer->setAnimate(animation); + if (g_renderer != nullptr) g_renderer->setPlayRewind(rewind); } -uint32_t GetDeltaLightCount() +bool GetPlayRewind() noexcept { - if (g_renderer != nullptr) - { - if (g_renderer->hasComponent("LightSampler")) - { - return g_renderer->getComponent()->getDeltaLightCount(); - } - } + if (g_renderer != nullptr) return g_renderer->getPlayRewind(); + return false; +} + +void SetRenderPaused(bool paused) noexcept +{ + if (g_renderer != nullptr) g_renderer->setRenderPaused(paused); +} + +bool GetRenderPaused() noexcept +{ + if (g_renderer != nullptr) return g_renderer->getRenderPaused(); + return true; +} + +void StepJitterFrameIndex(uint32_t frames) +{ + if (g_renderer != nullptr) g_renderer->stepJitterFrameIndex(frames); +} + +uint32_t GetDeltaLightCount() noexcept +{ + if (g_renderer != nullptr) return g_renderer->getDeltaLightCount(); return 0; } -uint32_t GetAreaLightCount() +uint32_t GetAreaLightCount() noexcept { - if (g_renderer != nullptr) - { - if (g_renderer->hasComponent("LightSampler")) - { - return g_renderer->getComponent()->getAreaLightCount(); - } - } + if (g_renderer != nullptr) return g_renderer->getAreaLightCount(); return 0; } -uint32_t GetEnvironmentLightCount() +uint32_t GetEnvironmentLightCount() noexcept { if (g_renderer != nullptr) return g_renderer->getEnvironmentLightCount(); return 0; } -std::pair> GetProfiling() noexcept +uint32_t GetTriangleCount() noexcept { - if (g_renderer != nullptr) return g_renderer->getProfiling(); - return {}; + if (g_renderer != nullptr) return g_renderer->getTriangleCount(); + return 0; } -void Terminate() +uint64_t GetBvhDataSize() noexcept +{ + if (g_renderer != nullptr) return g_renderer->getBvhDataSize(); + return 0; +} + +RenderOptionList &GetOptions() noexcept +{ + if (g_renderer != nullptr) return g_renderer->getOptions(); + static RenderOptionList nullList; + return nullList; +} + +void Terminate() noexcept { delete g_renderer; g_renderer = nullptr; ThreadPool::Destroy(); } -void DumpAOVBuffer(char const *file_path, std::string_view const &aov) +CAPSAICIN_EXPORT void ReloadShaders() noexcept +{ + if (g_renderer != nullptr) g_renderer->reloadShaders(); +} + +void DumpAOVBuffer(char const *file_path, std::string_view const &aov) noexcept { if (g_renderer != nullptr) g_renderer->dumpAOVBuffer(file_path, aov); } + +void DumpCamera(char const *file_path, bool jittered) noexcept +{ + if (g_renderer != nullptr) g_renderer->dumpCamera(file_path, jittered); +} + } // namespace Capsaicin diff --git a/src/core/src/capsaicin/capsaicin_internal.cpp b/src/core/src/capsaicin/capsaicin_internal.cpp index a5ab881..1c28568 100644 --- a/src/core/src/capsaicin/capsaicin_internal.cpp +++ b/src/core/src/capsaicin/capsaicin_internal.cpp @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -22,31 +22,18 @@ THE SOFTWARE. #include "capsaicin_internal.h" #include "common_functions.inl" -#include "gfx_imgui.h" -#include "glm/gtc/matrix_transform.hpp" +#include "components/light_builder/light_builder.h" #include "hash_reduce.h" #include "render_technique.h" #include "thread_pool.h" #define _USE_MATH_DEFINES #include +#include +#include +#include #include -namespace -{ -static double process_start = 0.0; // time in secs since start of process - -inline double GetTime(bool reset_start) // in secs -{ - using namespace std::chrono; - high_resolution_clock::time_point const timestamp = high_resolution_clock::now(); - if (process_start == 0.0 || reset_start) - process_start = duration_cast(timestamp.time_since_epoch()).count() / 1000000.0; - double const epoch_time = duration_cast(timestamp.time_since_epoch()).count() / 1000000.0; - return epoch_time - process_start; // elapsed time since process start -} -} // unnamed namespace - namespace Capsaicin { CapsaicinInternal::CapsaicinInternal() {} @@ -76,53 +63,153 @@ uint32_t CapsaicinInternal::getHeight() const return gfxGetBackBufferHeight(gfx_); } -uint32_t CapsaicinInternal::getFrameIndex() const +char const *CapsaicinInternal::getShaderPath() const +{ + return shader_path_.c_str(); +} + +uint32_t CapsaicinInternal::getFrameIndex() const noexcept { return frame_index_; } -char const *CapsaicinInternal::getShaderPath() const +double CapsaicinInternal::getFrameTime() const noexcept { - return shader_path_.c_str(); + return frame_time_; +} + +double CapsaicinInternal::getAverageFrameTime() const noexcept +{ + return frameGraph.getAverageValue(); +} + +bool CapsaicinInternal::hasAnimation() const noexcept +{ + return gfxSceneGetAnimationCount(scene_) > 0; +} + +void CapsaicinInternal::setPaused(bool paused) noexcept +{ + play_paused_ = paused; +} + +bool CapsaicinInternal::getPaused() const noexcept +{ + return play_paused_; +} + +void CapsaicinInternal::setFixedFrameRate(bool playMode) noexcept +{ + play_fixed_framerate_ = playMode; +} + +void CapsaicinInternal::setFixedFrameTime(double fixed_frame_time) noexcept +{ + play_fixed_frame_time_ = fixed_frame_time; +} + +bool CapsaicinInternal::getFixedFrameRate() const noexcept +{ + return play_fixed_framerate_; +} + +void CapsaicinInternal::restartPlayback() noexcept +{ + play_time_ = 0.0; + // Also reset frame index so that rendering resumes from start as well + frame_index_ = 0; +} + +void CapsaicinInternal::increasePlaybackSpeed() noexcept +{ + play_speed_ *= 2.0; +} + +void CapsaicinInternal::decreasePlaybackSpeed() noexcept +{ + play_speed_ *= 0.5; +} + +double CapsaicinInternal::getPlaybackSpeed() const noexcept +{ + return play_speed_; } -double CapsaicinInternal::getTime() const +void CapsaicinInternal::resetPlaybackSpeed() noexcept { - return time_; + play_speed_ = 1.0; } -void CapsaicinInternal::setTime(double time) +void CapsaicinInternal::stepPlaybackForward(uint32_t frames) noexcept { - time_ = GetTime(true) + time; - process_start -= time; + play_time_ += (double)frames * play_fixed_frame_time_; } -bool CapsaicinInternal::getAnimate() const +void CapsaicinInternal::stepPlaybackBackward(uint32_t frames) noexcept { - return animate_; + play_time_ -= (double)frames * play_fixed_frame_time_; } -void CapsaicinInternal::setAnimate(bool animate) +void CapsaicinInternal::setPlayRewind(bool rewind) noexcept { - animate_ = animate; + play_rewind_ = rewind; } -bool CapsaicinInternal::getMeshesUpdated() const +bool CapsaicinInternal::getPlayRewind() const noexcept +{ + return play_rewind_; +} + +void CapsaicinInternal::setRenderPaused(bool paused) noexcept +{ + render_paused_ = paused; +} + +bool CapsaicinInternal::getRenderPaused() const noexcept +{ + return render_paused_; +} + +void CapsaicinInternal::stepJitterFrameIndex(uint32_t frames) noexcept +{ + uint32_t remaining_frames = std::numeric_limits::max() - jitter_frame_index_; + + if (frames < remaining_frames) + { + jitter_frame_index_ += frames; + } + else + { + jitter_frame_index_ = frames - remaining_frames; + } +} + +bool CapsaicinInternal::getMeshesUpdated() const noexcept { return mesh_updated_; } -bool CapsaicinInternal::getTransformsUpdated() const +bool CapsaicinInternal::getTransformsUpdated() const noexcept { return transform_updated_; } -bool CapsaicinInternal::getEnvironmentMapUpdated() const +bool CapsaicinInternal::getSceneUpdated() const noexcept +{ + return scene_updated_; +} + +bool CapsaicinInternal::getCameraUpdated() const noexcept +{ + return camera_updated_; +} + +bool CapsaicinInternal::getEnvironmentMapUpdated() const noexcept { return environment_map_updated_; } -std::vector CapsaicinInternal::getAOVs() noexcept +std::vector CapsaicinInternal::getAOVs() const noexcept { std::vector aovs; for (auto const &i : aov_buffers_) @@ -148,7 +235,7 @@ GfxTexture CapsaicinInternal::getAOVBuffer(std::string_view const &aov) const no return {}; } -std::vector CapsaicinInternal::getDebugViews() noexcept +std::vector CapsaicinInternal::getDebugViews() const noexcept { std::vector views; for (auto const &i : debug_views_) @@ -202,6 +289,402 @@ std::shared_ptr const &CapsaicinInternal::getComponent( return nullReturn; } +std::vector CapsaicinInternal::GetRenderers() noexcept +{ + return RendererFactory::getNames(); +} + +std::string_view CapsaicinInternal::getCurrentRenderer() const noexcept +{ + return renderer_name_; +} + +bool CapsaicinInternal::setRenderer(std::string_view const &name) noexcept +{ + auto const renderers = RendererFactory::getNames(); + auto renderer = + std::find_if(renderers.cbegin(), renderers.cend(), [name](auto val) { return name == val; }); + if (renderer == renderers.cend()) + { + GFX_PRINTLN("Error: Requested invalid renderer: %s", name.data()); + return false; + } + if (renderer_ != nullptr) + { + renderer_ = nullptr; + renderer_name_ = ""; + } + frameGraph.reset(); + setupRenderTechniques(name); + return true; +} + +std::vector const &CapsaicinInternal::getCurrentScenes() const noexcept +{ + return scene_files_; +} + +bool CapsaicinInternal::setScenes(std::vector const &names) noexcept +{ + if (scene_files_ == names) + { + // Already loaded + return true; + } + + // Clear any pre-existing scene data + bool initRequired = !!scene_; + if (initRequired) + { + // Reset internal state + gfxFinish(gfx_); // flush & sync + // Remove environment map as its tied to scene + setEnvironmentMap(""); + setDebugView("None"); + gfxDestroyScene(scene_); + scene_ = {}; + scene_files_ = {}; + resetPlaybackState(); + resetRenderState(); + // Also need to reset the component/techniques + for (auto const &i : components_) + { + i.second->setGfxContext(gfx_); + i.second->terminate(); + } + for (auto const &i : render_techniques_) + { + i->setGfxContext(gfx_); + i->terminate(); + } + } + scene_updated_ = true; + // Create new blank scene + scene_ = gfxCreateScene(); + if (!scene_) + { + return false; + } + + // Create default user camera + auto userCamera = gfxSceneCreateCamera(scene_); + userCamera->type = kGfxCameraType_Perspective; + userCamera->eye = {0.0f, 0.0f, -1.0f}; + userCamera->center = {0.0f, 0.0f, 0.0f}; + userCamera->up = {0.0f, 1.0f, 0.0f}; + userCamera->aspect = getWidth() / (float)getHeight(); + userCamera->fovY = DegreesToRadians(90.0f); + userCamera->nearZ = 0.1f; + userCamera->farZ = 1e4f; + GfxMetadata userCameraMeta; + userCameraMeta.object_name = "User"; + gfxSceneSetCameraMetadata(scene_, gfxSceneGetCameraHandle(scene_, 0), userCameraMeta); + + // Load in scene based on current requested scene index + for (auto const &name : names) + { + if (gfxSceneImport(scene_, name.c_str()) != kGfxResult_NoError) + { + GFX_PRINT_ERROR(kGfxResult_InternalError, + "Scene '%s' can't be loaded, clear the scene and abort.", name.c_str()); + gfxSceneClear(scene_); + return false; + } + } + + scene_files_ = names; + + // Set up camera based on internal scene data + uint32_t cameraIndex = 0; + const uint32_t cameraCount = gfxSceneGetCameraCount(scene_); + if (cameraCount > 1) + { + cameraIndex = 1; // Use first scene camera + // Try and find 'Main' camera + for (uint32_t i = 1; i < cameraCount; ++i) + { + auto cameraHandle = gfxSceneGetCameraHandle(scene_, i); + auto metaData = gfxSceneGetCameraMetadata(scene_, cameraHandle); + std::string cameraName = metaData.getObjectName(); + if (cameraName.find("Camera") == 0 && cameraName.length() > 6) + { + cameraName = cameraName.substr(6); + metaData.object_name = cameraName; + gfxSceneSetCameraMetadata(scene_, cameraHandle, metaData); + } + if (cameraName.find("Main") != std::string_view::npos) + { + cameraIndex = i; + } + } + // Set user camera equal to first camera + auto defaultCamera = gfxSceneGetCameraHandle(scene_, cameraIndex); + userCamera->eye = defaultCamera->eye; + userCamera->center = defaultCamera->center; + userCamera->up = defaultCamera->up; + } + auto camera = gfxSceneGetCameraHandle(scene_, cameraIndex); + camera->aspect = + static_cast(gfxGetBackBufferWidth(gfx_)) / static_cast(gfxGetBackBufferHeight(gfx_)); + if (!gfxSceneSetActiveCamera(scene_, camera) == kGfxResult_NoError) + { + return false; + } + + // Calculate some scene stats + triangle_count_ = 0; + for (uint32_t i = 0; i < gfxSceneGetObjectCount(scene_); ++i) + { + if (gfxSceneGetObjects(scene_)[i].mesh) + { + GfxMesh const &mesh = *gfxSceneGetObjects(scene_)[i].mesh; + triangle_count_ += (uint32_t)(mesh.indices.size() / 3); + } + } + + // Re-initialise the components/techniques. Also handle delayed loading of renderer when a scene + // previously hadnt been set. + if (initRequired || !renderer_name_.empty()) + { + // Initialise all components + for (auto const &i : components_) + { + i.second->setGfxContext(gfx_); + if (!i.second->init(*this)) + { + GFX_PRINTLN("Error: Failed to initialise component: %s", i.first.data()); + } + } + + // Initialise all render techniques + for (auto const &i : render_techniques_) + { + i->setGfxContext(gfx_); + if (!i->init(*this)) + { + GFX_PRINTLN("Error: Failed to initialise render technique: %s", i->getName().data()); + } + } + + // Reset flags as everything just got forced reset anyway + mesh_updated_ = false; + transform_updated_ = false; + environment_map_updated_ = false; + scene_updated_ = false; + camera_updated_ = false; + } + + return true; +} + +std::vector CapsaicinInternal::getSceneCameras() const noexcept +{ + std::vector ret; + for (uint32_t i = 0; i < gfxSceneGetCameraCount(scene_); ++i) + { + auto cameraHandle = gfxSceneGetCameraHandle(scene_, i); + ret.emplace_back(gfxSceneGetCameraMetadata(scene_, cameraHandle).getObjectName()); + } + return ret; +} + +std::string_view CapsaicinInternal::getSceneCurrentCamera() const noexcept +{ + auto const ret = gfxSceneGetCameraMetadata(scene_, gfxSceneGetActiveCamera(scene_)).getObjectName(); + return ret; +} + +GfxRef CapsaicinInternal::getSceneCamera() const noexcept +{ + return gfxSceneGetActiveCamera(scene_); +} + +bool CapsaicinInternal::setSceneCamera(std::string_view const &name) noexcept +{ + // Convert camera name to an index + auto const cameras = getSceneCameras(); + auto const cameraIndex = std::find(cameras.begin(), cameras.end(), name); + if (cameraIndex == cameras.end()) + { + GFX_PRINTLN("Error: Invalid camera requested: %s", name.data()); + return false; + } + auto camera = gfxSceneGetCameraHandle(scene_, static_cast(cameraIndex - cameras.begin())); + camera->aspect = + static_cast(gfxGetBackBufferWidth(gfx_)) / static_cast(gfxGetBackBufferHeight(gfx_)); + if (!gfxSceneSetActiveCamera(scene_, camera) == kGfxResult_NoError) + { + return false; + } + camera_updated_ = true; + resetRenderState(); + return true; +} + +std::string CapsaicinInternal::getCurrentEnvironmentMap() const noexcept +{ + return environment_map_file_; +} + +bool CapsaicinInternal::setEnvironmentMap(std::string const &name) noexcept +{ + if (environment_map_file_ == name) + { + // Already loaded + return true; + } + + // Remove the old environment map + if (!!environment_buffer_) + { + gfxDestroyTexture(gfx_, environment_buffer_); + environment_buffer_ = {}; + } + environment_map_updated_ = true; + + resetRenderState(); + + if (name.empty()) + { + // If empty file requested then just use blank environment map + environment_map_file_ = ""; + return true; + } + // Load in the new environment map + if (gfxSceneImport(scene_, name.c_str()) != kGfxResult_NoError) + { + return false; + } + + // Update render settings + auto environmentMap = gfxSceneFindObjectByAssetFile(scene_, name.c_str()); + + if (!environmentMap) + { + GFX_PRINTLN("Failed to find valid environment map source file: %s", name.data()); + return false; + } + environment_map_file_ = name; + + // Create environment map texture + uint32_t const environment_buffer_size = 1024; + uint32_t const environment_buffer_mips = gfxCalculateMipCount(environment_buffer_size); + + environment_buffer_ = gfxCreateTextureCube( + gfx_, environment_buffer_size, DXGI_FORMAT_R16G16B16A16_FLOAT, environment_buffer_mips); + environment_buffer_.setName("Capsaicin_EnvironmentBuffer"); + + uint32_t const environment_map_width = environmentMap->width; + uint32_t const environment_map_height = environmentMap->height; + uint32_t const environment_map_mip_count = + gfxCalculateMipCount(environment_map_width, environment_map_height); + uint32_t const environment_map_channel_count = environmentMap->channel_count; + uint32_t const environment_map_bytes_per_channel = environmentMap->bytes_per_channel; + + GfxTexture environment_map = gfxCreateTexture2D(gfx_, environment_map_width, environment_map_height, + environmentMap->format, environment_map_mip_count); + { + GfxBuffer upload_buffer = gfxCreateBuffer(gfx_, + (size_t)environment_map_width * environment_map_height * environment_map_channel_count + * environment_map_bytes_per_channel, + environmentMap->data.data(), kGfxCpuAccess_Write); + gfxCommandCopyBufferToTexture(gfx_, environment_map, upload_buffer); + gfxCommandGenerateMips(gfx_, environment_map); + gfxDestroyBuffer(gfx_, upload_buffer); + } + + glm::dvec3 const forward_vectors[] = {glm::dvec3(-1.0, 0.0, 0.0), glm::dvec3(1.0, 0.0, 0.0), + glm::dvec3(0.0, 1.0, 0.0), glm::dvec3(0.0, -1.0, 0.0), glm::dvec3(0.0, 0.0, -1.0), + glm::dvec3(0.0, 0.0, 1.0)}; + + glm::dvec3 const up_vectors[] = {glm::dvec3(0.0, -1.0, 0.0), glm::dvec3(0.0, -1.0, 0.0), + glm::dvec3(0.0, 0.0, -1.0), glm::dvec3(0.0, 0.0, 1.0), glm::dvec3(0.0, -1.0, 0.0), + glm::dvec3(0.0, -1.0, 0.0)}; + + for (uint32_t cubemap_face = 0; cubemap_face < 6; ++cubemap_face) + { + GfxDrawState draw_sky_state = {}; + gfxDrawStateSetColorTarget(draw_sky_state, 0, environment_buffer_, 0, cubemap_face); + + GfxKernel draw_sky_kernel = + gfxCreateGraphicsKernel(gfx_, convolve_ibl_program_, draw_sky_state, "DrawSky"); + + uint32_t const buffer_dimensions[] = { + environment_buffer_.getWidth(), environment_buffer_.getHeight()}; + + glm::dmat4 const view = + glm::lookAt(glm::dvec3(0.0), forward_vectors[cubemap_face], up_vectors[cubemap_face]); + glm::dmat4 const proj = glm::perspective(M_PI / 2.0, 1.0, 0.1, 1e4); + glm::mat4 const view_proj_inv = glm::mat4(glm::inverse(proj * view)); + + gfxProgramSetParameter(gfx_, convolve_ibl_program_, "g_BufferDimensions", buffer_dimensions); + gfxProgramSetParameter(gfx_, convolve_ibl_program_, "g_ViewProjectionInverse", view_proj_inv); + + gfxProgramSetParameter(gfx_, convolve_ibl_program_, "g_EnvironmentMap", environment_map); + + gfxProgramSetParameter(gfx_, convolve_ibl_program_, "g_LinearSampler", linear_sampler_); + + gfxCommandBindKernel(gfx_, draw_sky_kernel); + gfxCommandDraw(gfx_, 3); + + gfxDestroyKernel(gfx_, draw_sky_kernel); + } + + GfxKernel blur_sky_kernel = gfxCreateComputeKernel(gfx_, convolve_ibl_program_, "BlurSky"); + + for (uint32_t mip_level = 1; mip_level < environment_buffer_mips; ++mip_level) + { + gfxProgramSetParameter( + gfx_, convolve_ibl_program_, "g_InEnvironmentBuffer", environment_buffer_, mip_level - 1); + gfxProgramSetParameter( + gfx_, convolve_ibl_program_, "g_OutEnvironmentBuffer", environment_buffer_, mip_level); + + uint32_t const *num_threads = gfxKernelGetNumThreads(gfx_, blur_sky_kernel); + uint32_t const num_groups_x = + (GFX_MAX(environment_buffer_size >> mip_level, 1u) + num_threads[0] - 1) / num_threads[0]; + uint32_t const num_groups_y = + (GFX_MAX(environment_buffer_size >> mip_level, 1u) + num_threads[1] - 1) / num_threads[1]; + uint32_t const num_groups_z = 6; // blur all faces + + gfxCommandBindKernel(gfx_, blur_sky_kernel); + gfxCommandDispatch(gfx_, num_groups_x, num_groups_y, num_groups_z); + } + + gfxDestroyKernel(gfx_, blur_sky_kernel); + auto handle = gfxSceneGetImageHandle(scene_, environmentMap.getIndex()); + gfxSceneDestroyImage(scene_, handle); + gfxDestroyTexture(gfx_, environment_map); + return true; +} + +std::string_view CapsaicinInternal::getCurrentDebugView() const noexcept +{ + return debug_view_; +} + +bool CapsaicinInternal::setDebugView(std::string_view const &name) noexcept +{ + auto debugView = std::find_if( + debug_views_.cbegin(), debug_views_.cend(), [this](auto val) { return debug_view_ == val.first; }); + if (debugView == debug_views_.cend()) + { + GFX_PRINTLN("Error: Requested invalid debug view: %s", name.data()); + return false; + } + debug_view_ = name; + return true; +} + +RenderOptionList const &CapsaicinInternal::getOptions() const noexcept +{ + return options_; +} + +RenderOptionList &CapsaicinInternal::getOptions() noexcept +{ + return options_; +} + glm::vec4 CapsaicinInternal::getInvDeviceZ() const { return glm::vec4(0.0f); // this is only here for compatibility with UE5 @@ -219,7 +702,9 @@ GfxTexture CapsaicinInternal::getEnvironmentBuffer() const GfxCamera const &CapsaicinInternal::getCamera() const { - return camera_; + // Get hold of the active camera (can be animated) + GfxConstRef camera_ref = gfxSceneGetActiveCamera(scene_); + return *camera_ref; } CameraMatrices const &CapsaicinInternal::getCameraMatrices(bool jittered) const @@ -232,19 +717,48 @@ GfxBuffer CapsaicinInternal::getCameraMatricesBuffer(bool jittered) const return camera_matrices_buffer_[jittered]; } -RenderSettings const &CapsaicinInternal::getRenderSettings() const +uint32_t CapsaicinInternal::getDeltaLightCount() const noexcept { - return render_settings_; + if (hasComponent("LightBuilder")) + { + return getComponent()->getDeltaLightCount(); + } + return 0; } -RenderSettings &CapsaicinInternal::getRenderSettings() +uint32_t CapsaicinInternal::getAreaLightCount() const noexcept { - return render_settings_; + if (hasComponent("LightBuilder")) + { + return getComponent()->getAreaLightCount(); + } + return 0; } -uint32_t CapsaicinInternal::getEnvironmentLightCount() const +uint32_t CapsaicinInternal::getEnvironmentLightCount() const noexcept { - return !!environment_map_ ? 1 : 0; + return !!environment_buffer_ ? 1 : 0; +} + +uint32_t CapsaicinInternal::getTriangleCount() const noexcept +{ + return triangle_count_; +} + +uint64_t CapsaicinInternal::getBvhDataSize() const noexcept +{ + uint64_t bvh_data_size = gfxAccelerationStructureGetDataSize(gfx_, acceleration_structure_); + uint32_t const rt_primitive_count = + gfxAccelerationStructureGetRaytracingPrimitiveCount(gfx_, acceleration_structure_); + + for (uint32_t i = 0; i < rt_primitive_count; ++i) + { + GfxRaytracingPrimitive const &rt_primitive = + gfxAccelerationStructureGetRaytracingPrimitives(gfx_, acceleration_structure_)[i]; + bvh_data_size += gfxRaytracingPrimitiveGetDataSize(gfx_, rt_primitive); + } + + return bvh_data_size; } GfxBuffer CapsaicinInternal::getInstanceBuffer() const @@ -287,7 +801,7 @@ GfxBuffer CapsaicinInternal::getTransformBuffer() const return transform_buffer_; } -glm::mat4 const *CapsaicinInternal::getTransformData() const +glm::mat4x3 const *CapsaicinInternal::getTransformData() const { return transform_data_.data(); } @@ -297,7 +811,7 @@ GfxBuffer CapsaicinInternal::getPrevTransformBuffer() const return prev_transform_buffer_; } -glm::mat4 const *CapsaicinInternal::getPrevTransformData() const +glm::mat4x3 const *CapsaicinInternal::getPrevTransformData() const { return prev_transform_data_.data(); } @@ -327,6 +841,11 @@ GfxSamplerState CapsaicinInternal::getLinearSampler() const return linear_sampler_; } +GfxSamplerState CapsaicinInternal::getLinearWrapSampler() const +{ + return linear_wrap_sampler_; +} + GfxSamplerState CapsaicinInternal::getNearestSampler() const { return nearest_sampler_; @@ -440,7 +959,7 @@ GfxBuffer CapsaicinInternal::allocateConstantBuffer(uint64_t size) return constant_buffer; } -void CapsaicinInternal::initialize(GfxContext gfx) +void CapsaicinInternal::initialize(GfxContext gfx, ImGuiContext *imgui_context) { if (!gfx) { @@ -454,11 +973,35 @@ void CapsaicinInternal::initialize(GfxContext gfx) { linear_sampler_ = gfxCreateSamplerState(gfx, D3D12_FILTER_MIN_MAG_MIP_LINEAR); + linear_wrap_sampler_ = gfxCreateSamplerState(gfx, D3D12_FILTER_MIN_MAG_MIP_LINEAR, + D3D12_TEXTURE_ADDRESS_MODE_WRAP, D3D12_TEXTURE_ADDRESS_MODE_WRAP); nearest_sampler_ = gfxCreateSamplerState(gfx, D3D12_FILTER_MIN_MAG_MIP_POINT); anisotropic_sampler_ = gfxCreateSamplerState( gfx, D3D12_FILTER_ANISOTROPIC, D3D12_TEXTURE_ADDRESS_MODE_WRAP, D3D12_TEXTURE_ADDRESS_MODE_WRAP); } shader_path_ = "src/core/src/"; + // Check if shader source can be found + std::error_code ec; + bool found = false; + for (uint32_t i = 0; i < 4; ++i) + { + if (std::filesystem::exists(shader_path_ + "gpu_shared.h", ec)) + { + found = true; + break; + } + shader_path_.insert(0, "../"); + } + if (!found) + { + GFX_PRINTLN("Could not find directory containing shader source files"); + return; + } + + sbt_stride_in_entries_[kGfxShaderGroupType_Raygen] = 1; + sbt_stride_in_entries_[kGfxShaderGroupType_Miss] = 2; + sbt_stride_in_entries_[kGfxShaderGroupType_Hit] = 2; + sbt_stride_in_entries_[kGfxShaderGroupType_Callable] = 1; char const *screen_triangle_vs = "struct VS_OUTPUT { float4 pos : SV_POSITION; float2 texcoord : TEXCOORD; };" @@ -496,694 +1039,560 @@ void CapsaicinInternal::initialize(GfxContext gfx) buffer_width_ = gfxGetBackBufferWidth(gfx); buffer_height_ = gfxGetBackBufferHeight(gfx); + ImGui::SetCurrentContext(imgui_context); + gfx_ = gfx; } -void CapsaicinInternal::renderNextFrame(GfxScene scene) +void CapsaicinInternal::render() { - scene_ = scene; - constant_buffer_pool_cursor_ = 0; - was_resized_ = - (buffer_width_ != gfxGetBackBufferWidth(gfx_) || buffer_height_ != gfxGetBackBufferHeight(gfx_)); - buffer_width_ = gfxGetBackBufferWidth(gfx_); - buffer_height_ = gfxGetBackBufferHeight(gfx_); + // Update current frame time + auto const previousTime = current_time_; + auto wallTime = std::chrono::duration_cast( + std::chrono::high_resolution_clock::now().time_since_epoch()); + current_time_ = static_cast(wallTime.count()) / 1000000.0; + frame_time_ = current_time_ - previousTime; - if (render_settings_.play_mode_ == kPlayMode_FrameByFrame) - { - if (render_settings_.play_from_start_) time_ = 0.f; - time_ += render_settings_.frame_by_frame_delta_time_; - } - else if (render_settings_.delta_time_ > 0.f) - { - if (render_settings_.play_from_start_) time_ = 0.f; - time_ += render_settings_.delta_time_; - } - else - { - time_ = GetTime(render_settings_.play_from_start_); - } + // Check if manual frame increment/decrement has been applied + bool manual_play = play_time_ != play_time_old_; - // Set up our environment map - environment_map_updated_ = false; - if (render_settings_.environment_map_ != environment_map_) + if (!render_paused_ || manual_play || frame_index_ == 0) { - environment_map_updated_ = - !!render_settings_.environment_map_ != !!environment_map_; // Only change if map added/removed - environment_map_ = render_settings_.environment_map_; + // Reset update flags + mesh_updated_ = false; + transform_updated_ = false; - gfxDestroyTexture(gfx_, environment_buffer_); + frameGraph.addValue(static_cast(frame_time_)); - environment_buffer_ = {}; + constant_buffer_pool_cursor_ = 0; + was_resized_ = + (buffer_width_ != gfxGetBackBufferWidth(gfx_) || buffer_height_ != gfxGetBackBufferHeight(gfx_)); + buffer_width_ = gfxGetBackBufferWidth(gfx_); + buffer_height_ = gfxGetBackBufferHeight(gfx_); - if (environment_map_ != GfxConstRef()) + // Run the animations + bool animation = false; + if (!play_paused_ || manual_play) { - uint32_t const environment_buffer_size = 512; - uint32_t const environment_buffer_mips = gfxCalculateMipCount(environment_buffer_size); - - environment_buffer_ = gfxCreateTextureCube( - gfx_, environment_buffer_size, DXGI_FORMAT_R16G16B16A16_FLOAT, environment_buffer_mips); - environment_buffer_.setName("Capsaicin_EnvironmentBuffer"); - - uint32_t const environment_map_width = environment_map_->width; - uint32_t const environment_map_height = environment_map_->height; - uint32_t const environment_map_mip_count = - gfxCalculateMipCount(environment_map_width, environment_map_height); - uint32_t const environment_map_channel_count = environment_map_->channel_count; - uint32_t const environment_map_bytes_per_channel = environment_map_->bytes_per_channel; - - GfxTexture environment_map = gfxCreateTexture2D(gfx_, environment_map_width, - environment_map_height, environment_map_->format, environment_map_mip_count); + if (!play_paused_) { - GfxBuffer upload_buffer = gfxCreateBuffer(gfx_, - (size_t)environment_map_width * environment_map_height * environment_map_channel_count - * environment_map_bytes_per_channel, - environment_map_->data.data(), kGfxCpuAccess_Write); - gfxCommandCopyBufferToTexture(gfx_, environment_map, upload_buffer); - gfxCommandGenerateMips(gfx_, environment_map); - gfxDestroyBuffer(gfx_, upload_buffer); + if (play_fixed_framerate_) + { + play_time_ += play_fixed_frame_time_ * play_speed_ * (!play_rewind_ ? 1.0 : -1.0); + } + else + { + play_time_ += frame_time_ * play_speed_ * (!play_rewind_ ? 1.0 : -1.0); + } } - - glm::dvec3 const forward_vectors[] = {glm::dvec3(-1.0, 0.0, 0.0), glm::dvec3(1.0, 0.0, 0.0), - glm::dvec3(0.0, 1.0, 0.0), glm::dvec3(0.0, -1.0, 0.0), glm::dvec3(0.0, 0.0, -1.0), - glm::dvec3(0.0, 0.0, 1.0)}; - - glm::dvec3 const up_vectors[] = {glm::dvec3(0.0, -1.0, 0.0), glm::dvec3(0.0, -1.0, 0.0), - glm::dvec3(0.0, 0.0, -1.0), glm::dvec3(0.0, 0.0, 1.0), glm::dvec3(0.0, -1.0, 0.0), - glm::dvec3(0.0, -1.0, 0.0)}; - - for (uint32_t cubemap_face = 0; cubemap_face < 6; ++cubemap_face) + play_time_old_ = play_time_; + uint32_t const animation_count = gfxSceneGetAnimationCount(scene_); + animation = animation_count > 0; + for (uint32_t animation_index = 0; animation_index < animation_count; ++animation_index) { - GfxDrawState draw_sky_state = {}; - gfxDrawStateSetColorTarget(draw_sky_state, 0, environment_buffer_, 0, cubemap_face); - - GfxKernel draw_sky_kernel = - gfxCreateGraphicsKernel(gfx_, convolve_ibl_program_, draw_sky_state, "DrawSky"); - - uint32_t const buffer_dimensions[] = { - environment_buffer_.getWidth(), environment_buffer_.getHeight()}; - - glm::dmat4 const view = - glm::lookAt(glm::dvec3(0.0), forward_vectors[cubemap_face], up_vectors[cubemap_face]); - glm::dmat4 const proj = glm::perspective(M_PI / 2.0, 1.0, 0.1, 1e4); - glm::mat4 const view_proj_inv = glm::mat4(glm::inverse(proj * view)); - - gfxProgramSetParameter(gfx_, convolve_ibl_program_, "g_BufferDimensions", buffer_dimensions); - gfxProgramSetParameter(gfx_, convolve_ibl_program_, "g_ViewProjectionInverse", view_proj_inv); - - gfxProgramSetParameter(gfx_, convolve_ibl_program_, "g_EnvironmentMap", environment_map); - - gfxProgramSetParameter(gfx_, convolve_ibl_program_, "g_LinearSampler", linear_sampler_); - - gfxCommandBindKernel(gfx_, draw_sky_kernel); - gfxCommandDraw(gfx_, 3); - - gfxDestroyKernel(gfx_, draw_sky_kernel); + GfxConstRef animation_ref = gfxSceneGetAnimationHandle(scene_, animation_index); + float const animation_length = gfxSceneGetAnimationLength(scene_, animation_ref); + float time_in_seconds = (float)fmod(play_time_, (double)animation_length); + // Handle negative playback times + time_in_seconds = + (time_in_seconds >= 0.0f) ? time_in_seconds : animation_length + time_in_seconds; + gfxSceneApplyAnimation(scene_, animation_ref, time_in_seconds); } + } - GfxKernel blur_sky_kernel = gfxCreateComputeKernel(gfx_, convolve_ibl_program_, "BlurSky"); + // Calculate the camera matrices for this frame + { + uint32_t const jitter_index = jitter_frame_index_ != ~0 ? jitter_frame_index_ : frame_index_; - for (uint32_t mip_level = 1; mip_level < environment_buffer_mips; ++mip_level) + auto const &camera = getCamera(); + for (uint32_t i = 0; i < 2; ++i) { - gfxProgramSetParameter( - gfx_, convolve_ibl_program_, "g_InEnvironmentBuffer", environment_buffer_, mip_level - 1); - gfxProgramSetParameter( - gfx_, convolve_ibl_program_, "g_OutEnvironmentBuffer", environment_buffer_, mip_level); - - uint32_t const *num_threads = gfxKernelGetNumThreads(gfx_, blur_sky_kernel); - uint32_t const num_groups_x = - (GFX_MAX(environment_buffer_size >> mip_level, 1u) + num_threads[0] - 1) / num_threads[0]; - uint32_t const num_groups_y = - (GFX_MAX(environment_buffer_size >> mip_level, 1u) + num_threads[1] - 1) / num_threads[1]; - uint32_t const num_groups_z = 6; // blur all faces - - gfxCommandBindKernel(gfx_, blur_sky_kernel); - gfxCommandDispatch(gfx_, num_groups_x, num_groups_y, num_groups_z); + camera_jitter_x_ = + (i > 0 ? (2.0f * CalculateHaltonNumber(jitter_index + 1, 2) - 1.0f) / getWidth() : 0.0f); + camera_jitter_y_ = + (i > 0 ? (2.0f * CalculateHaltonNumber(jitter_index + 1, 3) - 1.0f) / getHeight() : 0.0f); + camera_matrices_[i].projection[2][0] = camera_jitter_x_; + camera_matrices_[i].projection[2][1] = camera_jitter_y_; + camera_matrices_[i].view_projection = + glm::dmat4(camera_matrices_[i].projection) * glm::dmat4(camera_matrices_[i].view); + camera_matrices_[i].view_prev = camera_matrices_[i].view; + camera_matrices_[i].projection_prev = camera_matrices_[i].projection; + camera_matrices_[i].view_projection_prev = camera_matrices_[i].view_projection; + camera_matrices_[i].inv_view_projection_prev = camera_matrices_[i].inv_view_projection; + glm::dmat4 const view = + glm::lookAt(glm::dvec3(camera.eye), glm::dvec3(camera.center), glm::dvec3(camera.up)); + camera_matrices_[i].view = glm::mat4(view); + glm::dmat4 projection = glm::perspective( + (double)camera.fovY, (double)camera.aspect, (double)camera.nearZ, (double)camera.farZ); + projection[2][0] = camera_jitter_x_; + projection[2][1] = camera_jitter_y_; + camera_matrices_[i].projection = glm::mat4(projection); + glm::dmat4 const view_projection = projection * view; + camera_matrices_[i].view_projection = glm::mat4(view_projection); + glm::dmat4 const inv_view_projection = glm::inverse(view_projection); + camera_matrices_[i].inv_view_projection = glm::mat4(inv_view_projection); + camera_matrices_[i].inv_projection = glm::mat4(glm::inverse(projection)); + camera_matrices_[i].inv_view = glm::mat4(glm::inverse(view)); + camera_matrices_[i].reprojection = + glm::mat4(glm::dmat4(camera_matrices_[i].view_projection_prev) * inv_view_projection); + + // Update camera matrices + { + gfxDestroyBuffer(gfx_, camera_matrices_buffer_[i]); + camera_matrices_buffer_[i] = allocateConstantBuffer(1); + memcpy(gfxBufferGetData(gfx_, camera_matrices_buffer_[i]), &camera_matrices_[i], + sizeof(camera_matrices_[i])); + } } - - gfxDestroyKernel(gfx_, blur_sky_kernel); - - gfxDestroyTexture(gfx_, environment_map); } - } - // Run the animations - bool animation = false; - if (animate_) - { - uint32_t const animation_count = gfxSceneGetAnimationCount(scene); - animation = animation_count > 0; - for (uint32_t animation_index = 0; animation_index < animation_count; ++animation_index) + // Update the scene history { - GfxConstRef animation_ref = gfxSceneGetAnimationHandle(scene, animation_index); - float const animation_length = gfxSceneGetAnimationLength(scene, animation_ref); - float const time_in_seconds = (float)fmod(time_, (double)animation_length); - gfxSceneApplyAnimation(scene, animation_ref, time_in_seconds); - } - } - - // Get hold of the active camera (can be animated) - GfxConstRef camera_ref = gfxSceneGetActiveCamera(scene); - - if (camera_ref) - camera_ = *camera_ref; - else - { - static bool warned; - if (!warned) GFX_PRINTLN("No active camera was found for rendering; initializing a dummy camera..."); - warned = true; - - camera_.type = kGfxCameraType_Perspective; - camera_.eye = glm::vec3(0.0f, 1.0f, 3.0f); - camera_.center = glm::vec3(0.0f, 1.0f, 0.0f); - camera_.up = glm::vec3(0.0f, 1.0f, 0.0f); - camera_.aspect = getWidth() / (float)getHeight(); - camera_.fovY = DegreesToRadians(90.0f); - camera_.nearZ = 0.1f; - camera_.farZ = 1e4f; - } + for (size_t i = 0; i < prev_transform_data_.size(); ++i) + { + prev_transform_data_[i] = transform_data_[i]; + } - { - // Calculate the camera matrices for this frame - for (uint32_t i = 0; i < 2; ++i) - { - const uint32_t jitter_index = frame_index_; - float const jitter_x = - (i > 0 ? (2.0f * CalculateHaltonNumber(jitter_index + 1, 2) - 1.0f) / getWidth() : 0.0f); - float const jitter_y = - (i > 0 ? (2.0f * CalculateHaltonNumber(jitter_index + 1, 3) - 1.0f) / getHeight() : 0.0f); - camera_matrices_[i].projection[2][0] = jitter_x; - camera_matrices_[i].projection[2][1] = jitter_y; - camera_matrices_[i].view_projection = - glm::dmat4(camera_matrices_[i].projection) * glm::dmat4(camera_matrices_[i].view); - camera_matrices_[i].view_prev = camera_matrices_[i].view; - camera_matrices_[i].projection_prev = camera_matrices_[i].projection; - camera_matrices_[i].view_projection_prev = camera_matrices_[i].view_projection; - camera_matrices_[i].view = glm::lookAt(camera_.eye, camera_.center, camera_.up); - camera_matrices_[i].projection = - glm::perspective(camera_.fovY, camera_.aspect, camera_.nearZ, camera_.farZ); - camera_matrices_[i].projection[2][0] = jitter_x; - camera_matrices_[i].projection[2][1] = jitter_y; - camera_matrices_[i].view_projection = - glm::dmat4(camera_matrices_[i].projection) * glm::dmat4(camera_matrices_[i].view); - camera_matrices_[i].inv_view_projection = glm::inverse(camera_matrices_[i].view_projection); - camera_matrices_[i].inv_projection = glm::inverse(camera_matrices_[i].projection); - camera_matrices_[i].inv_view = glm::inverse(camera_matrices_[i].view); - - // Update camera matrices + if (!prev_transform_data_.empty()) { - gfxDestroyBuffer(gfx_, camera_matrices_buffer_[i]); - camera_matrices_buffer_[i] = allocateConstantBuffer(1); - memcpy(gfxBufferGetData(gfx_, camera_matrices_buffer_[i]), &camera_matrices_[i], - sizeof(camera_matrices_[i])); + GfxCommandEvent const command_event(gfx_, "UpdatePreviousTranforms"); + GfxBuffer prev_transform_buffer = + allocateConstantBuffer((uint32_t)prev_transform_data_.size()); + memcpy(gfxBufferGetData(gfx_, prev_transform_buffer), prev_transform_data_.data(), + prev_transform_data_.size() * sizeof(glm::mat4x3)); + gfxCommandCopyBuffer(gfx_, prev_transform_buffer_, prev_transform_buffer); + gfxDestroyBuffer(gfx_, prev_transform_buffer); } } - } - { - // Update the scene history - for (size_t i = 0; i < prev_transform_data_.size(); ++i) + // Update the AOV history { - prev_transform_data_[i] = transform_data_[i]; - } + GfxCommandEvent const command_event(gfx_, "UpdatePreviousGBuffers"); - if (!prev_transform_data_.empty()) - { - GfxCommandEvent const command_event(gfx_, "UpdatePreviousTranforms"); - GfxBuffer prev_transform_buffer = - allocateConstantBuffer((uint32_t)prev_transform_data_.size()); - memcpy(gfxBufferGetData(gfx_, prev_transform_buffer), prev_transform_data_.data(), - prev_transform_data_.size() * sizeof(glm::mat4)); - gfxCommandCopyBuffer(gfx_, prev_transform_buffer_, prev_transform_buffer); - gfxDestroyBuffer(gfx_, prev_transform_buffer); + for (auto &i : aov_backup_buffers_) + { + gfxCommandCopyTexture(gfx_, i.second, i.first); + } } - } - // Update the AOV history - { - GfxCommandEvent const command_event(gfx_, "UpdatePreviousGBuffers"); - - for (auto &i : aov_backup_buffers_) + // Clear our AOVs { - gfxCommandCopyTexture(gfx_, i.second, i.first); - } - } - - // Clear our AOVs - { - const GfxCommandEvent command_event(gfx_, "ClearGBuffers"); + const GfxCommandEvent command_event(gfx_, "ClearGBuffers"); - if (!was_resized_) - { - for (auto &i : aov_clear_buffers_) + if (!was_resized_) { - gfxCommandClearTexture(gfx_, i); - } + for (auto &i : aov_clear_buffers_) + { + gfxCommandClearTexture(gfx_, i); + } - if (!render_settings_.debug_view_.empty() && render_settings_.debug_view_ != "None") - { - gfxCommandClearTexture(gfx_, getAOVBuffer("Debug")); + if (!debug_view_.empty() && debug_view_ != "None") + { + gfxCommandClearTexture(gfx_, getAOVBuffer("Debug")); + } } - } - else - { - for (auto &i : aov_buffers_) + else { - gfxCommandClearTexture(gfx_, i.second); + for (auto &i : aov_buffers_) + { + gfxCommandClearTexture(gfx_, i.second); + } } } - } - - // Check whether we need to re-build our acceleration structure - size_t mesh_hash = mesh_hash_; - if (frame_index_ == 0 || animation) - mesh_hash = HashReduce(gfxSceneGetObjects(scene), gfxSceneGetObjectCount(scene)); - mesh_updated_ = false; - if (mesh_hash != mesh_hash_) - { - GfxCommandEvent const command_event(gfx_, "BuildScene"); - mesh_updated_ = true; + // Check whether we need to re-build our acceleration structure + size_t mesh_hash = mesh_hash_; + if (frame_index_ == 0 || animation) + mesh_hash = + HashReduce(gfxSceneGetObjects(scene_), gfxSceneGetObjectCount(scene_)); - mesh_data_.clear(); - index_data_.clear(); - vertex_data_.clear(); - instance_data_.clear(); - material_data_.clear(); - transform_data_.clear(); + if (mesh_hash != mesh_hash_) + { + GfxCommandEvent const command_event(gfx_, "BuildScene"); + mesh_updated_ = true; + + mesh_data_.clear(); + index_data_.clear(); + vertex_data_.clear(); + instance_data_.clear(); + material_data_.clear(); + transform_data_.clear(); + + gfxDestroyBuffer(gfx_, mesh_buffer_); + gfxDestroyBuffer(gfx_, index_buffer_); + gfxDestroyBuffer(gfx_, vertex_buffer_); + gfxDestroyBuffer(gfx_, instance_buffer_); + gfxDestroyBuffer(gfx_, material_buffer_); + gfxDestroyBuffer(gfx_, transform_buffer_); + + for (GfxTexture const &texture : texture_atlas_) + { + gfxDestroyTexture(gfx_, texture); + } - gfxDestroyBuffer(gfx_, mesh_buffer_); - gfxDestroyBuffer(gfx_, index_buffer_); - gfxDestroyBuffer(gfx_, vertex_buffer_); - gfxDestroyBuffer(gfx_, instance_buffer_); - gfxDestroyBuffer(gfx_, material_buffer_); - gfxDestroyBuffer(gfx_, transform_buffer_); + texture_atlas_.clear(); + raytracing_primitives_.clear(); - for (GfxTexture const &texture : texture_atlas_) - { - gfxDestroyTexture(gfx_, texture); - } + gfxDestroyAccelerationStructure(gfx_, acceleration_structure_); - texture_atlas_.clear(); - raytracing_primitives_.clear(); + GfxMaterial const *materials = gfxSceneGetObjects(scene_); + uint32_t const material_count = gfxSceneGetObjectCount(scene_); - gfxDestroyAccelerationStructure(gfx_, acceleration_structure_); + for (uint32_t i = 0; i < material_count; ++i) + { + Material material = {}; + + material.albedo = float4( + float3(materials[i].albedo), glm::uintBitsToFloat((uint32_t)materials[i].albedo_map)); + material.emissivity = float4( + materials[i].emissivity, glm::uintBitsToFloat((uint32_t)materials[i].emissivity_map)); + material.metallicity_roughness = float4(materials[i].metallicity, + glm::uintBitsToFloat((uint32_t)materials[i].metallicity_map), materials[i].roughness, + glm::uintBitsToFloat((uint32_t)materials[i].roughness_map)); + material.normal_alpha_side = + float4(glm::uintBitsToFloat((uint32_t)materials[i].normal_map), materials[i].albedo.w, + glm::uintBitsToFloat( + (uint32_t)((materials[i].flags & kGfxMaterialFlag_DoubleSided) != 0)), + 0.0f); + + uint32_t const material_index = gfxSceneGetObjectHandle(scene_, i); + + if (material_index >= material_data_.size()) + { + material_data_.resize((size_t)material_index + 1); + } - GfxMaterial const *materials = gfxSceneGetObjects(scene); - uint32_t const material_count = gfxSceneGetObjectCount(scene); + material_data_[material_index] = material; + } - for (uint32_t i = 0; i < material_count; ++i) - { - Material material = {}; + material_buffer_ = + gfxCreateBuffer(gfx_, (uint32_t)material_data_.size(), material_data_.data()); + material_buffer_.setName("Capsaicin_MaterialBuffer"); - float alpha = materials[i].albedo.w; + uint32_t const image_count = gfxSceneGetObjectCount(scene_); - if (alpha != 1.0f && (uint32_t)materials[i].albedo_map == -1) + for (uint32_t i = 0; i < image_count; ++i) { - // Material has fixed alpha not found in a texture so one must be created to fit in with the - // current material description - GfxRef image_ref = gfxSceneCreateImage(scene); - image_ref->width = 1; - image_ref->height = 1; - image_ref->channel_count = 4; - image_ref->bytes_per_channel = sizeof(float); - image_ref->format = DXGI_FORMAT_R32G32B32A32_FLOAT; - image_ref->data.resize(4 * sizeof(float)); - image_ref->flags = kGfxImageFlag_HasAlphaChannel; - uint8_t *data = image_ref->data.data(); - float4 albedoAlpha = materials[i].albedo; - albedoAlpha.w = alpha; - memcpy(data, &albedoAlpha, 4 * sizeof(float)); - GfxMaterial *material = - gfxSceneGetObject(scene, gfxSceneGetObjectHandle(scene, i)); - material->albedo = float4(1.0f, 1.0f, 1.0f, 1.0f); - material->albedo_map = image_ref; - } - material.albedo = - float4(float3(materials[i].albedo), glm::uintBitsToFloat((uint32_t)materials[i].albedo_map)); - material.emissivity = - float4(materials[i].emissivity, glm::uintBitsToFloat((uint32_t)materials[i].emissivity_map)); - material.metallicity_roughness = - float4(materials[i].metallicity, glm::uintBitsToFloat((uint32_t)materials[i].metallicity_map), - materials[i].roughness, glm::uintBitsToFloat((uint32_t)materials[i].roughness_map)); - material.normal_ao = float4(glm::uintBitsToFloat((uint32_t)materials[i].normal_map), - glm::uintBitsToFloat((uint32_t)materials[i].ao_map), 0.0f, 0.0f); - - uint32_t const material_index = gfxSceneGetObjectHandle(scene, i); - - if (material_index >= material_data_.size()) - { - material_data_.resize((size_t)material_index + 1); - } + GfxConstRef image_ref = gfxSceneGetObjectHandle(scene_, i); - material_data_[material_index] = material; - } + uint32_t const image_index = (uint32_t)image_ref; - material_buffer_ = - gfxCreateBuffer(gfx_, (uint32_t)material_data_.size(), material_data_.data()); - material_buffer_.setName("Capsaicin_MaterialBuffer"); - - GfxImage const *images = gfxSceneGetObjects(scene); - uint32_t const image_count = gfxSceneGetObjectCount(scene); + if (image_index >= texture_atlas_.size()) + { + texture_atlas_.resize((size_t)image_index + 1); + } - for (uint32_t i = 0; i < image_count; ++i) - { - GfxConstRef image_ref = gfxSceneGetObjectHandle(scene, i); + GfxTexture &texture = texture_atlas_[image_index]; - if (image_ref == environment_map_) continue; + DXGI_FORMAT format = image_ref->format; + uint32_t image_width = image_ref->width; + uint32_t image_height = image_ref->height; + uint32_t const image_mips = gfxCalculateMipCount(image_width, image_height); + uint32_t const image_channels = image_ref->channel_count; - uint32_t const image_index = (uint32_t)image_ref; + texture = gfxCreateTexture2D(gfx_, image_width, image_height, format, image_mips); + texture.setName(gfxSceneGetObjectMetadata(scene_, image_ref).getObjectName()); - if (image_index >= texture_atlas_.size()) - { - texture_atlas_.resize((size_t)image_index + 1); + if (!image_ref->width || !image_ref->height) + { + gfxCommandClearTexture(gfx_, texture); + } + else + { + uint8_t const *image_data = image_ref->data.data(); + + const uint64_t uncompressed_size = + (uint64_t)image_width * image_height * image_channels * image_ref->bytes_per_channel; + uint64_t texture_size = + !gfxImageIsFormatCompressed(*image_ref) ? uncompressed_size : image_ref->data.size(); + bool const mips = image_ref->flags & kGfxImageFlag_HasMipLevels; + if (mips && !gfxImageIsFormatCompressed(*image_ref)) + { + texture_size += texture_size / 3; + } + texture_size = GFX_MIN(texture_size, image_ref->data.size()); + GfxBuffer texture_data = + gfxCreateBuffer(gfx_, texture_size, image_data, kGfxCpuAccess_Write); + + gfxCommandCopyBufferToTexture(gfx_, texture, texture_data); + if (!mips && !gfxImageIsFormatCompressed(*image_ref)) + gfxCommandGenerateMips(gfx_, texture); + gfxDestroyBuffer(gfx_, texture_data); + } } + GfxMesh const *meshes = gfxSceneGetObjects(scene_); + uint32_t const mesh_count = gfxSceneGetObjectCount(scene_); - GfxTexture &texture = texture_atlas_[image_index]; + for (uint32_t i = 0; i < mesh_count; ++i) + { + Mesh mesh = {}; - DXGI_FORMAT format = image_ref->format; - uint32_t image_width = image_ref->width; - uint32_t image_height = image_ref->height; - uint32_t const image_mips = gfxCalculateMipCount(image_width, image_height); - uint32_t const image_channels = image_ref->channel_count; + mesh.vertex_offset_idx = (uint32_t)vertex_data_.size(); + mesh.index_offset_idx = (uint32_t)index_data_.size(); + mesh.index_count = (uint32_t)meshes[i].indices.size(); - texture = gfxCreateTexture2D(gfx_, image_width, image_height, format, image_mips); - texture.setName(gfxSceneGetObjectMetadata(scene, image_ref).getObjectName()); + uint32_t const mesh_index = gfxSceneGetObjectHandle(scene_, i); - if (!image_ref->width || !image_ref->height) - { - gfxCommandClearTexture(gfx_, texture); - } - else - { - uint8_t const *image_data = image_ref->data.data(); - - const uint64_t uncompressed_size = - (uint64_t)image_width * image_height * image_channels * image_ref->bytes_per_channel; - uint64_t texture_size = - !gfxImageIsFormatCompressed(*image_ref) ? uncompressed_size : image_ref->data.size(); - bool const mips = image_ref->flags & kGfxImageFlag_HasMipLevels; - if (mips && !gfxImageIsFormatCompressed(*image_ref)) + if (mesh_index >= mesh_data_.size()) { - texture_size += texture_size / 3; + mesh_data_.resize((size_t)mesh_index + 1); } - texture_size = GFX_MIN(texture_size, image_ref->data.size()); - GfxBuffer texture_data = gfxCreateBuffer(gfx_, texture_size, image_data, kGfxCpuAccess_Write); - gfxCommandCopyBufferToTexture(gfx_, texture, texture_data); - if (!mips && !gfxImageIsFormatCompressed(*image_ref)) gfxCommandGenerateMips(gfx_, texture); - gfxDestroyBuffer(gfx_, texture_data); - } - } + mesh_data_[mesh_index] = mesh; + for (size_t j = 0; j < meshes[i].indices.size(); ++j) + index_data_.push_back(meshes[i].indices[j]); + + for (size_t j = 0; j < meshes[i].vertices.size(); ++j) + { + Vertex vertex = {}; - GfxMesh const *meshes = gfxSceneGetObjects(scene); - uint32_t const mesh_count = gfxSceneGetObjectCount(scene); + vertex.position = float4(meshes[i].vertices[j].position, 1.0f); + vertex.normal = float4(meshes[i].vertices[j].normal, 0.0f); + vertex.uv = float2(meshes[i].vertices[j].uv); - for (uint32_t i = 0; i < mesh_count; ++i) - { - Mesh mesh = {}; + vertex_data_.push_back(vertex); + } + } - mesh.material_index = (uint32_t)meshes[i].material; - mesh.vertex_offset = (uint32_t)vertex_data_.size() * sizeof(Vertex); - mesh.vertex_stride = sizeof(Vertex); - mesh.index_offset = (uint32_t)index_data_.size() * sizeof(uint32_t); - mesh.index_stride = sizeof(uint32_t); - mesh.index_count = (uint32_t)meshes[i].indices.size(); + mesh_buffer_ = gfxCreateBuffer(gfx_, (uint32_t)mesh_data_.size(), mesh_data_.data()); + index_buffer_ = gfxCreateBuffer(gfx_, (uint32_t)index_data_.size(), index_data_.data()); + vertex_buffer_ = + gfxCreateBuffer(gfx_, (uint32_t)vertex_data_.size(), vertex_data_.data()); - uint32_t const mesh_index = gfxSceneGetObjectHandle(scene, i); + mesh_buffer_.setName("Capsaicin_MeshBuffer"); + index_buffer_.setName("Capsaicin_IndexBuffer"); + vertex_buffer_.setName("Capsaicin_VertexBuffer"); - if (mesh_index >= mesh_data_.size()) + // NVIDIA-specific fix + if (gfx_.getVendorId() == 0x10DEu) // NVIDIA { - mesh_data_.resize((size_t)mesh_index + 1); + vertex_buffer_.setStride(4); } - mesh_data_[mesh_index] = mesh; - for (size_t j = 0; j < meshes[i].indices.size(); ++j) - index_data_.push_back(meshes[i].indices[j]); + acceleration_structure_ = gfxCreateAccelerationStructure(gfx_); + acceleration_structure_.setName("Capsaicin_AccelerationStructure"); - for (size_t j = 0; j < meshes[i].vertices.size(); ++j) + GfxInstance const *instances = gfxSceneGetObjects(scene_); + uint32_t const instance_count = gfxSceneGetObjectCount(scene_); + + for (uint32_t i = 0; i < instance_count; ++i) { - Vertex vertex = {}; + Instance instance = {}; - vertex.position = float4(meshes[i].vertices[j].position, 1.0f); - vertex.normal = float4(meshes[i].vertices[j].normal, 0.0f); - vertex.uv = float2(meshes[i].vertices[j].uv); + GfxConstRef mesh_ref = instances[i].mesh; + GfxConstRef material_ref = instances[i].material; - vertex_data_.push_back(vertex); - } - } + uint32_t const instance_index = gfxSceneGetObjectHandle(scene_, i); - mesh_buffer_ = gfxCreateBuffer(gfx_, (uint32_t)mesh_data_.size(), mesh_data_.data()); - index_buffer_ = gfxCreateBuffer(gfx_, (uint32_t)index_data_.size(), index_data_.data()); - vertex_buffer_ = gfxCreateBuffer(gfx_, (uint32_t)vertex_data_.size(), vertex_data_.data()); + instance.mesh_index = (uint32_t)mesh_ref; + instance.material_index = (uint32_t)material_ref; + instance.transform_index = instance_index; - mesh_buffer_.setName("Capsaicin_MeshBuffer"); - index_buffer_.setName("Capsaicin_IndexBuffer"); - vertex_buffer_.setName("Capsaicin_VertexBuffer"); + if (instance_index >= instance_data_.size()) + { + instance_data_.resize((size_t)instance_index + 1); - acceleration_structure_ = gfxCreateAccelerationStructure(gfx_); - acceleration_structure_.setName("Capsaicin_AccelerationStructure"); + instance_min_bounds_.resize((size_t)instance_index + 1); + instance_max_bounds_.resize((size_t)instance_index + 1); + } - GfxInstance const *instances = gfxSceneGetObjects(scene); - uint32_t const instance_count = gfxSceneGetObjectCount(scene); + instance_data_[instance_index] = instance; - for (uint32_t i = 0; i < instance_count; ++i) - { - Instance instance = {}; + if (instance.transform_index >= transform_data_.size()) + { + transform_data_.resize((size_t)instance.transform_index + 1); + } - GfxConstRef mesh_ref = instances[i].mesh; + transform_data_[instance.transform_index] = instances[i].transform; - uint32_t const instance_index = gfxSceneGetObjectHandle(scene, i); + Mesh const &mesh = mesh_data_[(uint32_t)mesh_ref]; - instance.mesh_index = (uint32_t)mesh_ref; - instance.transform_index = instance_index; - instance.bx_id = (uint)-1; - instance.padding = 0; + uint32_t const index_count = (uint32_t)mesh_ref->indices.size(); + uint32_t const vertex_count = (uint32_t)mesh_ref->vertices.size(); - if (instance_index >= instance_data_.size()) - { - instance_data_.resize((size_t)instance_index + 1); + if (instance_index >= raytracing_primitives_.size()) + { + raytracing_primitives_.resize((size_t)instance_index + 1); + } - instance_min_bounds_.resize((size_t)instance_index + 1); - instance_max_bounds_.resize((size_t)instance_index + 1); - } + GfxRaytracingPrimitive &rt_mesh = raytracing_primitives_[instance_index]; - instance_data_[instance_index] = instance; + rt_mesh = gfxCreateRaytracingPrimitive(gfx_, acceleration_structure_); - if (instance.transform_index >= transform_data_.size()) - { - transform_data_.resize((size_t)instance.transform_index + 1); - } + GfxBuffer index_buffer = + gfxCreateBufferRange(gfx_, index_buffer_, mesh.index_offset_idx, index_count); + GfxBuffer vertex_buffer = + gfxCreateBufferRange(gfx_, vertex_buffer_, mesh.vertex_offset_idx, vertex_count); - transform_data_[instance.transform_index] = instances[i].transform; + uint32_t non_opaque = + !material_ref + || (material_ref->albedo.w >= 1.0f + && (!material_ref->albedo_map + || (material_ref->albedo_map->flags & kGfxImageFlag_HasAlphaChannel) + == 0)) + ? kGfxBuildRaytracingPrimitiveFlag_Opaque + : 0; - Mesh const &mesh = mesh_data_[(uint32_t)mesh_ref]; + gfxRaytracingPrimitiveBuild(gfx_, rt_mesh, index_buffer, vertex_buffer, 0, non_opaque); - uint32_t const index_count = (uint32_t)mesh_ref->indices.size(); - uint32_t const vertex_count = (uint32_t)mesh_ref->vertices.size(); + glm::mat4 const row_major_transform = glm::transpose(instances[i].transform); - if (instance_index >= raytracing_primitives_.size()) - { - raytracing_primitives_.resize((size_t)instance_index + 1); - } + gfxRaytracingPrimitiveSetTransform(gfx_, rt_mesh, &row_major_transform[0][0]); + gfxRaytracingPrimitiveSetInstanceID(gfx_, rt_mesh, instance_index); + gfxRaytracingPrimitiveSetInstanceContributionToHitGroupIndex( + gfx_, rt_mesh, instance_index * sbt_stride_in_entries_[kGfxShaderGroupType_Hit]); - GfxRaytracingPrimitive &rt_mesh = raytracing_primitives_[instance_index]; + gfxDestroyBuffer(gfx_, index_buffer); + gfxDestroyBuffer(gfx_, vertex_buffer); + } - rt_mesh = gfxCreateRaytracingPrimitive(gfx_, acceleration_structure_); + instance_buffer_ = + gfxCreateBuffer(gfx_, (uint32_t)instance_data_.size(), instance_data_.data()); + instance_buffer_.setName("Capsaicin_InstanceBuffer"); - GfxBuffer index_buffer = gfxCreateBufferRange( - gfx_, index_buffer_, mesh.index_offset / mesh.index_stride, index_count); - GfxBuffer vertex_buffer = gfxCreateBufferRange( - gfx_, vertex_buffer_, mesh.vertex_offset / mesh.vertex_stride, vertex_count); + transform_buffer_ = + gfxCreateBuffer(gfx_, (uint32_t)transform_data_.size(), transform_data_.data()); + transform_buffer_.setName("Capsaicin_TransformBuffer"); - uint32_t non_opaque = - !mesh_ref->material || !mesh_ref->material->albedo_map - || (mesh_ref->material->albedo_map->flags & kGfxImageFlag_HasAlphaChannel) == 0 - ? kGfxBuildRaytracingPrimitiveFlag_Opaque - : 0; + prev_transform_data_.resize(transform_data_.size()); - gfxRaytracingPrimitiveBuild(gfx_, rt_mesh, index_buffer, vertex_buffer, 0, non_opaque); + for (size_t i = 0; i < prev_transform_data_.size(); ++i) + { + prev_transform_data_[i] = transform_data_[i]; + } - glm::mat4 const row_major_transform = glm::transpose(instances[i].transform); + prev_transform_buffer_ = gfxCreateBuffer( + gfx_, (uint32_t)prev_transform_data_.size(), prev_transform_data_.data()); + prev_transform_buffer_.setName("Capsaicin_PrevTransformBuffer"); - gfxRaytracingPrimitiveSetTransform(gfx_, rt_mesh, &row_major_transform[0][0]); - gfxRaytracingPrimitiveSetInstanceID(gfx_, rt_mesh, instance_index); + gfxAccelerationStructureUpdate(gfx_, acceleration_structure_); - gfxDestroyBuffer(gfx_, index_buffer); - gfxDestroyBuffer(gfx_, vertex_buffer); + mesh_hash_ = mesh_hash; } - instance_buffer_ = - gfxCreateBuffer(gfx_, (uint32_t)instance_data_.size(), instance_data_.data()); - instance_buffer_.setName("Capsaicin_InstanceBuffer"); + GfxInstance const *instances = gfxSceneGetObjects(scene_); + uint32_t const instance_count = gfxSceneGetObjectCount(scene_); - transform_buffer_ = - gfxCreateBuffer(gfx_, (uint32_t)transform_data_.size(), transform_data_.data()); - transform_buffer_.setName("Capsaicin_TransformBuffer"); + // Check whether we need to re-build our transform data + size_t transform_hash = transform_hash_; + if (frame_index_ == 0 || animation) transform_hash = HashReduce(instances, instance_count); - prev_transform_data_.resize(transform_data_.size()); - - for (size_t i = 0; i < prev_transform_data_.size(); ++i) + if (transform_hash != transform_hash_ || mesh_updated_) { - prev_transform_data_[i] = transform_data_[i]; - } + transform_updated_ = true; + transform_hash_ = transform_hash; - prev_transform_buffer_ = gfxCreateBuffer( - gfx_, (uint32_t)prev_transform_data_.size(), prev_transform_data_.data()); - prev_transform_buffer_.setName("Capsaicin_PrevTransformBuffer"); + // Update our transforms + GfxBuffer transform_buffer = + allocateConstantBuffer((uint32_t)transform_data_.size()); + glm::mat4x3 *transform_data = (glm::mat4x3 *)gfxBufferGetData(gfx_, transform_buffer); - gfxAccelerationStructureUpdate(gfx_, acceleration_structure_); - - mesh_hash_ = mesh_hash; - } - - GfxInstance const *instances = gfxSceneGetObjects(scene); - uint32_t const instance_count = gfxSceneGetObjectCount(scene); + for (uint32_t i = 0; i < instance_count; ++i) + { + uint32_t const instance_index = gfxSceneGetObjectHandle(scene_, i); - // Check whether we need to re-build our transform data - size_t transform_hash = transform_hash_; - if (frame_index_ == 0 || animation) transform_hash = HashReduce(instances, instance_count); + if (instance_index >= instance_data_.size()) + { + continue; + } - transform_updated_ = false; - if (transform_hash != transform_hash_ || mesh_updated_) - { - transform_updated_ = true; - transform_hash_ = transform_hash; + GFX_ASSERT(instance_index < instance_min_bounds_.size()); + GFX_ASSERT(instance_index < instance_max_bounds_.size()); - // Update our transforms - GfxBuffer transform_buffer = allocateConstantBuffer((uint32_t)transform_data_.size()); - glm::mat4 *transform_data = (glm::mat4 *)gfxBufferGetData(gfx_, transform_buffer); + Instance const &instance = instance_data_[instance_index]; - for (uint32_t i = 0; i < instance_count; ++i) - { - uint32_t const instance_index = gfxSceneGetObjectHandle(scene, i); + transform_data[instance.transform_index] = instances[i].transform; + transform_data_[instance.transform_index] = instances[i].transform; - if (instance_index >= instance_data_.size()) - { - continue; - } + if (instances[i].mesh) + { + GfxMesh const &mesh = *instances[i].mesh; - GFX_ASSERT(instance_index < instance_min_bounds_.size()); - GFX_ASSERT(instance_index < instance_max_bounds_.size()); + CalculateTransformedBounds(mesh.bounds_min, mesh.bounds_max, instances[i].transform, + instance_min_bounds_[instance_index], instance_max_bounds_[instance_index]); + } - Instance const &instance = instance_data_[instance_index]; + glm::mat4 const row_major_transform = glm::transpose(instances[i].transform); - transform_data[instance.transform_index] = instances[i].transform; - transform_data_[instance.transform_index] = instances[i].transform; + gfxRaytracingPrimitiveSetTransform( + gfx_, raytracing_primitives_[instance_index], &row_major_transform[0][0]); + } - if (instances[i].mesh) + // Update our acceleration structure { - GfxMesh const &mesh = *instances[i].mesh; + GfxCommandEvent const command_event(gfx_, "UpdateTLAS"); - CalculateTransformedBounds(mesh.bounds_min, mesh.bounds_max, instances[i].transform, - instance_min_bounds_[instance_index], instance_max_bounds_[instance_index]); + gfxCommandCopyBuffer(gfx_, transform_buffer_, transform_buffer); + gfxAccelerationStructureUpdate(gfx_, acceleration_structure_); + gfxDestroyBuffer(gfx_, transform_buffer); } - glm::mat4 const row_major_transform = glm::transpose(instances[i].transform); - - gfxRaytracingPrimitiveSetTransform( - gfx_, raytracing_primitives_[instance_index], &row_major_transform[0][0]); - } - - // Update our acceleration structure - { - GfxCommandEvent const command_event(gfx_, "UpdateTLAS"); - - gfxCommandCopyBuffer(gfx_, transform_buffer_, transform_buffer); - gfxAccelerationStructureUpdate(gfx_, acceleration_structure_); - gfxDestroyBuffer(gfx_, transform_buffer); - } - - // Set up our instance indirection table - instance_id_data_.resize(gfxSceneGetObjectCount(scene)); + // Set up our instance indirection table + instance_id_data_.resize(gfxSceneGetObjectCount(scene_)); - for (size_t i = 0; i < instance_id_data_.size(); ++i) - { - instance_id_data_[i] = gfxSceneGetObjectHandle(scene, (uint32_t)i); - } + for (size_t i = 0; i < instance_id_data_.size(); ++i) + { + instance_id_data_[i] = gfxSceneGetObjectHandle(scene_, (uint32_t)i); + } - GfxBuffer instance_id_buffer = allocateConstantBuffer((uint32_t)instance_id_data_.size()); - memcpy(gfxBufferGetData(gfx_, instance_id_buffer), instance_id_data_.data(), - instance_id_data_.size() * sizeof(uint32_t)); + GfxBuffer instance_id_buffer = + allocateConstantBuffer((uint32_t)instance_id_data_.size()); + memcpy(gfxBufferGetData(gfx_, instance_id_buffer), instance_id_data_.data(), + instance_id_data_.size() * sizeof(uint32_t)); - if (!instance_id_buffer_ || instance_id_buffer.getSize() != instance_id_buffer_.getSize()) - { - gfxDestroyBuffer(gfx_, instance_id_buffer_); - instance_id_buffer_ = gfxCreateBuffer(gfx_, (uint32_t)instance_id_data_.size()); - instance_id_buffer_.setName("Capsaicin_InstanceIDBuffer"); - } + if (!instance_id_buffer_ || instance_id_buffer.getSize() != instance_id_buffer_.getSize()) + { + gfxDestroyBuffer(gfx_, instance_id_buffer_); + instance_id_buffer_ = gfxCreateBuffer(gfx_, (uint32_t)instance_id_data_.size()); + instance_id_buffer_.setName("Capsaicin_InstanceIDBuffer"); + } - // Update our instance table - { - GfxCommandEvent const command_event(gfx_, "UpdateInstanceTable"); - gfxCommandCopyBuffer(gfx_, instance_id_buffer_, instance_id_buffer); - gfxDestroyBuffer(gfx_, instance_id_buffer); + // Update our instance table + { + GfxCommandEvent const command_event(gfx_, "UpdateInstanceTable"); + gfxCommandCopyBuffer(gfx_, instance_id_buffer_, instance_id_buffer); + gfxDestroyBuffer(gfx_, instance_id_buffer); + } } - } - - // Bind our global index and vertex data - gfxCommandBindIndexBuffer(gfx_, index_buffer_); - gfxCommandBindVertexBuffer(gfx_, vertex_buffer_); - // Update the components - for (auto const &component : components_) - { - component.second->setGfxContext(gfx_); - component.second->resetQueries(); - { - Component::TimedSection const timed_section(*component.second, component.second->getName()); - component.second->run(*this); - } - } + // Bind our global index and vertex data + gfxCommandBindIndexBuffer(gfx_, index_buffer_); + gfxCommandBindVertexBuffer(gfx_, vertex_buffer_); - // Execute our render techniques - for (auto const &render_technique : render_techniques_) - { - render_technique->setGfxContext(gfx_); - render_technique->resetQueries(); + // Update the components + for (auto const &component : components_) { - RenderTechnique::TimedSection const timed_section(*render_technique, render_technique->getName()); - render_technique->render(*this); + component.second->setGfxContext(gfx_); + component.second->resetQueries(); + { + Component::TimedSection const timed_section(*component.second, component.second->getName()); + component.second->run(*this); + } } - } - - // We've completed a new frame - ++frame_index_; -} -void CapsaicinInternal::render(GfxScene scene, RenderSettings &render_settings) -{ - bool rebuild_render_techniques = - (render_settings.renderer_ != render_settings_.renderer_ || renderer_ == nullptr); - - bool render_next_frame = true; - if (render_settings.play_mode_ == kPlayMode_FrameByFrame) - { - bool restart_animation = render_settings.play_from_start_; - if (restart_animation) + // Execute our render techniques + for (auto const &render_technique : render_techniques_) { - rebuild_render_techniques = true; // Rebuild technique caches - frame_index_ = 0; + render_technique->setGfxContext(gfx_); + render_technique->resetQueries(); + { + RenderTechnique::TimedSection const timed_section( + *render_technique, render_technique->getName()); + render_technique->render(*this); + } } - render_next_frame = (frame_index_ < render_settings.play_to_frame_index_ || // Play next frames - restart_animation); - } - - render_settings_ = render_settings; - - if (rebuild_render_techniques) - { - render_next_frame = true; - setupRenderTechniques(render_settings); - } + // We've completed a new frame + ++frame_index_; - if (render_next_frame) - { - renderNextFrame(scene); + // Reset remaining update flags + environment_map_updated_ = false; + scene_updated_ = false; + camera_updated_ = false; } // Show debug visualizations if requested or blit kAOV_Color - if (render_settings_.debug_view_.empty() || render_settings_.debug_view_ == "None") + if (debug_view_.empty() || debug_view_ == "None") { const GfxCommandEvent command_event(gfx_, "Blit"); gfxProgramSetParameter(gfx_, blit_program_, "ColorBuffer", getAOVBuffer("Color")); @@ -1193,10 +1602,10 @@ void CapsaicinInternal::render(GfxScene scene, RenderSettings &render_settings) else { auto debugView = std::find_if(debug_views_.cbegin(), debug_views_.cend(), - [this](auto val) { return render_settings_.debug_view_ == val.first; }); + [this](auto val) { return debug_view_ == val.first; }); if (debugView == debug_views_.cend()) { - GFX_PRINTLN("Error: Invalid debug view requested: %s", render_settings_.debug_view_.data()); + GFX_PRINTLN("Error: Invalid debug view requested: %s", debug_view_.data()); const GfxCommandEvent command_event(gfx_, "DrawInvalidDebugView"); gfxCommandClearBackBuffer(gfx_); } @@ -1204,7 +1613,10 @@ void CapsaicinInternal::render(GfxScene scene, RenderSettings &render_settings) { // Output AOV auto aov = getAOVBuffer(debugView->first); - if (aov.getFormat() == DXGI_FORMAT_D32_FLOAT) + if (aov.getFormat() == DXGI_FORMAT_D32_FLOAT + || (aov.getFormat() == DXGI_FORMAT_R32_FLOAT + && (strstr(aov.getName(), "Depth") != nullptr + || strstr(aov.getName(), "depth") != nullptr))) { const GfxCommandEvent command_event(gfx_, "DrawDepthDebugView"); gfxProgramSetParameter(gfx_, debug_depth_program_, "DepthBuffer", getAOVBuffer("Depth")); @@ -1218,8 +1630,7 @@ void CapsaicinInternal::render(GfxScene scene, RenderSettings &render_settings) // If tonemapping is enabled then we allow it to tonemap the AOV into the Debug buffer and // then output from there auto const format = aov.getFormat(); - if (render_settings_.hasOption("tonemap_enable") - && render_settings_.getOption("tonemap_enable") + if (hasOption("tonemap_enable") && getOption("tonemap_enable") && (format == DXGI_FORMAT_R32G32B32A32_FLOAT || format == DXGI_FORMAT_R32G32B32_FLOAT || format == DXGI_FORMAT_R16G16B16A16_FLOAT || format == DXGI_FORMAT_R11G11B10_FLOAT)) { @@ -1241,50 +1652,244 @@ void CapsaicinInternal::render(GfxScene scene, RenderSettings &render_settings) } } - // Dump + // Dump buffers while (dump_requests_.size() > 0) { - auto &[dump_file_path, dump_aov] = dump_requests_.front(); + auto const &[dump_file_path, dump_aov] = dump_requests_.front(); if (hasAOVBuffer(dump_aov)) { - const GfxCommandEvent command_event(gfx_, "Dump AOV '%s'", dump_aov); + const GfxCommandEvent command_event(gfx_, "Dump AOV '%s'", dump_aov.c_str()); dumpBuffer(dump_file_path.c_str(), getAOVBuffer(dump_aov)); } dump_requests_.pop_front(); } - while (dump_in_flight_buffers_.size() > 0) + uint32_t dump_available_buffer_count = 0; + for (auto &dump_in_flight_buffer : dump_in_flight_buffers_) { - auto &[dump_buffer, dump_buffer_width, dump_buffer_height, dump_file_path, dump_frame_index] = - dump_in_flight_buffers_.front(); + uint32_t dump_frame_index = std::get<4>(dump_in_flight_buffer); if (frame_index_ > dump_frame_index + kGfxConstant_BackBufferCount) { - saveImage(dump_buffer, dump_buffer_width, dump_buffer_height, dump_file_path.c_str()); - gfxDestroyBuffer(gfx_, dump_buffer); - dump_in_flight_buffers_.pop_front(); + dump_available_buffer_count++; } else { + // BE CAREFUL: dump_frame_index is monotonically increasing break; } } + + ThreadPool().Dispatch( + [&](uint32_t buffer_index) { + auto const &[dump_buffer, dump_buffer_width, dump_buffer_height, dump_file_path, + dump_frame_index] = dump_in_flight_buffers_[buffer_index]; + saveImage(dump_buffer, dump_buffer_width, dump_buffer_height, dump_file_path.c_str()); + }, + dump_available_buffer_count, 1); + + for (uint32_t available_buffer_index = 0; available_buffer_index < dump_available_buffer_count; + available_buffer_index++) + { + gfxDestroyBuffer(gfx_, std::get<0>(dump_in_flight_buffers_.front())); + dump_in_flight_buffers_.pop_front(); + } + + // Dump cameras + ThreadPool().Dispatch( + [&](uint32_t camera_index) { + auto const &[dump_file_path, dump_jittered] = dump_camera_requests_[camera_index]; + dumpCamera(dump_file_path.c_str(), camera_matrices_[dump_jittered], + dump_jittered ? camera_jitter_x_ : 0.f, dump_jittered ? camera_jitter_y_ : 0.f); + }, + (uint32_t)dump_camera_requests_.size(), 1); + + dump_camera_requests_.clear(); } -void CapsaicinInternal::terminate() +void CapsaicinInternal::renderGUI(bool readOnly) { - gfxFinish(gfx_); // flush & sync + if (!ImGui::GetCurrentContext()) + { + static bool warned; + if (!warned) + GFX_PRINT_ERROR(kGfxResult_InvalidOperation, + "No ImGui context was supplied on initialization; cannot call `Capsaicin::RenderGUI()'"); + warned = true; + return; // no ImGui context was supplied on initialization + } - if (dump_in_flight_buffers_.size() > 0) + ImGui::Text("Selected device : %s", gfx_.getName()); + ImGui::Separator(); + const uint32_t deltaLightCount = getDeltaLightCount(); + const uint32_t areaLightCount = getAreaLightCount(); + const uint32_t envLightCount = getEnvironmentLightCount(); + const uint32_t triangleCount = getTriangleCount(); + const uint64_t bvhDataSize = getBvhDataSize(); + ImGui::Text("Triangle Count : %u", triangleCount); + ImGui::Text("Light Count : %u", areaLightCount + deltaLightCount + envLightCount); + ImGui::Text(" Area Light Count : %u", areaLightCount); + ImGui::Text(" Delta Light Count : %u", deltaLightCount); + ImGui::Text(" Environment Light Count : %u", envLightCount); + ImGui::Text("BVH Data Size : %.1f MiB", bvhDataSize / (1024.0 * 1024.0)); + ImGui::Text("Render Resolution : %ux%u", getWidth(), getHeight()); + + if (!readOnly) { - while (dump_in_flight_buffers_.size() > 0) + // Display renderer specific options + if (ImGui::CollapsingHeader("Renderer Settings", ImGuiTreeNodeFlags_DefaultOpen)) { - auto &[dump_buffer, dump_buffer_width, dump_buffer_height, dump_file_path, dump_frame] = - dump_in_flight_buffers_.front(); - saveImage(dump_buffer, dump_buffer_width, dump_buffer_height, dump_file_path.c_str()); - gfxDestroyBuffer(gfx_, dump_buffer); - dump_in_flight_buffers_.pop_front(); + for (auto const &component : components_) + { + component.second->renderGUI(*this); + } + for (auto const &render_technique : render_techniques_) + { + render_technique->renderGUI(*this); + } + } + ImGui::Separator(); + } + + if (ImGui::CollapsingHeader("Profiling", ImGuiTreeNodeFlags_DefaultOpen)) + { + bool children = false; + size_t maxStringSize = 0; + float totalTimestampTime = 0.0f; + auto getTimestamps = [&](Timeable *timeable) -> void { + const uint32_t timestamp_query_count = timeable->getTimestampQueryCount(); + + if (!timestamp_query_count) + { + return; // no profiling info available + } + + bool hasChildren = timestamp_query_count > 1; + children = children || hasChildren; + const ImGuiTreeNodeFlags flags = + (hasChildren ? ImGuiTreeNodeFlags_None : ImGuiTreeNodeFlags_Leaf); + + const auto ×tamp_queries = timeable->getTimestampQueries(); + auto total_query_duration = gfxTimestampQueryGetDuration(gfx_, timestamp_queries[0].query); + totalTimestampTime += total_query_duration; + + if (ImGui::TreeNodeEx(timeable->getName().data(), flags, "%-20s: %.3f ms", + timeable->getName().data(), total_query_duration)) + { + maxStringSize = std::max(maxStringSize, timeable->getName().length()); + if (hasChildren) + { + for (uint32_t i = 1; i < timestamp_query_count; ++i) + { + ImGui::TreeNodeEx(std::to_string(i).c_str(), + ImGuiTreeNodeFlags_Leaf | ImGuiTreeNodeFlags_NoTreePushOnOpen, "%-17s: %.3f ms", + timestamp_queries[i].name.data(), + gfxTimestampQueryGetDuration(gfx_, timestamp_queries[i].query)); + } + } + ImGui::TreePop(); + } + }; + for (auto const &component : components_) + { + getTimestamps(&*component.second); + } + for (auto const &render_technique : render_techniques_) + { + getTimestamps(&*render_technique); + } + if (ImGui::TreeNodeEx( + "Total", ImGuiTreeNodeFlags_Leaf, "%-20s: %.3f ms", "Total", totalTimestampTime)) + { + ImGui::TreePop(); + } + + ImGui::Separator(); + + const std::string graphName = std::format("{:.2f}", frame_time_ * 1000.0) + " ms (" + + std::format("{:.2f}", 1.0f / frame_time_) + " fps)"; + + ImGui::PushID("Total frame time"); + std::string text = "Total frame time"; + size_t additionalSpace = maxStringSize > text.size() ? maxStringSize - text.size() : 0; + if (children) + { + text.insert(0, " "); + } + for (size_t i = 0; i < additionalSpace + 1; ++i) + { + text.append(" "); + } + text.append(":"); + ImGui::Text(text.data()); + ImGui::SameLine(); + ImGui::PlotLines("", Graph::GetValueAtIndex, &frameGraph, frameGraph.getValueCount(), 0, + graphName.c_str(), 0.0f, FLT_MAX, ImVec2(150, 20)); + ImGui::PopID(); + + ImGui::PushID("Frame"); + text = "Frame"; + additionalSpace = maxStringSize > text.size() ? maxStringSize - text.size() : 0; + if (children) + { + text.insert(0, " "); + } + for (size_t i = 0; i < additionalSpace + 1; ++i) + { + text.append(" "); + } + text.append(":"); + ImGui::Text(text.data()); + ImGui::SameLine(); + ImGui::Text(std::to_string(frame_index_).c_str()); + ImGui::PopID(); + } + + if (!readOnly) + { + if (ImGui::CollapsingHeader("Render Options", ImGuiTreeNodeFlags_None)) + { + for (auto &i : options_) + { + if (std::holds_alternative(i.second)) + { + ImGui::Checkbox(i.first.data(), std::get_if(&(i.second))); + } + else if (std::holds_alternative(i.second)) + { + uint32_t *option = std::get_if(&(i.second)); + ImGui::DragInt(i.first.data(), reinterpret_cast(option), 1, 0); + } + else if (std::holds_alternative(i.second)) + { + ImGui::DragInt(i.first.data(), std::get_if(&(i.second)), 1); + } + else if (std::holds_alternative(i.second)) + { + ImGui::DragFloat(i.first.data(), std::get_if(&(i.second)), 5e-3f); + } + } } } +} + +void CapsaicinInternal::terminate() +{ + gfxFinish(gfx_); // flush & sync + + // Dump remaining buffers, they are all available after gfxFinish + ThreadPool().Dispatch( + [&](uint32_t buffer_index) { + auto &[dump_buffer, dump_buffer_width, dump_buffer_height, dump_file_path, dump_frame_index] = + dump_in_flight_buffers_[buffer_index]; + saveImage(dump_buffer, dump_buffer_width, dump_buffer_height, dump_file_path.c_str()); + }, + (uint32_t)dump_in_flight_buffers_.size(), 1); + + while (dump_in_flight_buffers_.size() > 0) + { + gfxDestroyBuffer(gfx_, std::get<0>(dump_in_flight_buffers_.front())); + dump_in_flight_buffers_.pop_front(); + } gfxDestroyKernel(gfx_, blit_kernel_); gfxDestroyProgram(gfx_, blit_program_); @@ -1308,6 +1913,7 @@ void CapsaicinInternal::terminate() gfxDestroyTexture(gfx_, environment_buffer_); gfxDestroySamplerState(gfx_, linear_sampler_); + gfxDestroySamplerState(gfx_, linear_wrap_sampler_); gfxDestroySamplerState(gfx_, nearest_sampler_); gfxDestroySamplerState(gfx_, anisotropic_sampler_); @@ -1344,48 +1950,50 @@ void CapsaicinInternal::terminate() render_techniques_.clear(); components_.clear(); renderer_ = nullptr; + + gfxDestroyScene(scene_); + scene_ = {}; } -std::pair> CapsaicinInternal::getProfiling() noexcept +void CapsaicinInternal::reloadShaders() noexcept { - float total_frame_time = 0.0f; - std::vector timestamps; + // Instead of just recompiling kernels we re-initialise all component/techniques. This has the side effect + // of not only recompiling kernels but also re-initialising old data that may no longer contain correct + // values + gfxFinish(gfx_); // flush & sync - auto getTimestamps = [&total_frame_time, ×tamps, this](Timeable *timeable) -> void { - const uint32_t timestamp_query_count = timeable->getTimestampQueryCount(); + // Reset the component/techniques + for (auto const &i : components_) + { + i.second->setGfxContext(gfx_); + i.second->terminate(); + } + for (auto const &i : render_techniques_) + { + i->setGfxContext(gfx_); + i->terminate(); + } - if (!timestamp_query_count) - { - return; // no profiling info available - } + resetRenderState(); - std::vector nodeTimestamps; - TimestampQuery const *timestamp_queries = timeable->getTimestampQueries(); - total_frame_time += gfxTimestampQueryGetDuration(gfx_, timestamp_queries[0].query); - for (uint32_t i = 0; i < timestamp_query_count; ++i) + // Re-initialise the components/techniques + for (auto const &i : components_) + { + if (!i.second->init(*this)) { - nodeTimestamps.emplace_back( - timestamp_queries[i].name, gfxTimestampQueryGetDuration(gfx_, timestamp_queries[i].query)); + GFX_PRINTLN("Error: Failed to initialise component: %s", i.first.data()); } - timestamps.emplace_back(timeable->getName(), std::move(nodeTimestamps)); - }; - for (auto const &component : components_) - { - getTimestamps(&*component.second); } - for (auto const &render_technique : render_techniques_) + for (auto const &i : render_techniques_) { - getTimestamps(&*render_technique); + if (!i->init(*this)) + { + GFX_PRINTLN("Error: Failed to initialise render technique: %s", i->getName().data()); + } } - return std::make_pair(total_frame_time, timestamps); -} - -std::vector CapsaicinInternal::GetRenderers() noexcept -{ - return Renderer::getNames(); } -void CapsaicinInternal::setupRenderTechniques(RenderSettings &render_settings) noexcept +void CapsaicinInternal::setupRenderTechniques(std::string_view const &name) noexcept { // Clear any existing AOVs for (auto &i : aov_buffers_) @@ -1399,7 +2007,7 @@ void CapsaicinInternal::setupRenderTechniques(RenderSettings &render_settings) n gfxFinish(gfx_); // flush & sync // Delete old AOVS, debug views and buffers - render_settings_.options_.clear(); + options_.clear(); components_.clear(); for (auto &i : shared_buffers_) { @@ -1415,16 +2023,21 @@ void CapsaicinInternal::setupRenderTechniques(RenderSettings &render_settings) n aov_clear_buffers_.clear(); debug_views_.clear(); debug_views_.emplace_back("None", nullptr); + debug_view_ = "None"; + renderer_name_ = ""; + renderer_ = nullptr; + resetPlaybackState(); // Create the new renderer - renderer_ = Renderer::make(render_settings.renderer_); + renderer_ = RendererFactory::make(name); if (renderer_) { - render_techniques_ = std::move(renderer_->setupRenderTechniques(render_settings)); + render_techniques_ = std::move(renderer_->setupRenderTechniques(options_)); + renderer_name_ = name; } else { - GFX_PRINTLN("Error: Unknown renderer requested: %s", render_settings.renderer_.data()); + GFX_PRINTLN("Error: Unknown renderer requested: %s", name.data()); return; } @@ -1432,29 +2045,9 @@ void CapsaicinInternal::setupRenderTechniques(RenderSettings &render_settings) n // Get render technique options for (auto const &i : render_techniques_) { - render_settings_.options_.merge(i->getRenderOptions()); - } - - // Update render options based on passed in settings - // Loop through and update to any values stored in passed in render settings - for (auto &i : render_settings_.options_) - { - if (auto j = render_settings.options_.find(i.first); j != render_settings.options_.end()) - { - if (j->second.index() == i.second.index()) - { - i.second = j->second; - } - } - else - { - // Update user version with required changed options - render_settings.options_.emplace(i.first, i.second); - } + options_.merge(i->getRenderOptions()); } - } - { // Get requested components std::vector requestedComponents; for (auto const &i : render_techniques_) @@ -1474,7 +2067,7 @@ void CapsaicinInternal::setupRenderTechniques(RenderSettings &render_settings) n for (auto &i : requestedComponents) { // Create the new component - auto component = Component::make(i); + auto component = ComponentFactory::make(i); if (component) { components_.try_emplace(i, std::move(component)); @@ -1512,7 +2105,7 @@ void CapsaicinInternal::setupRenderTechniques(RenderSettings &render_settings) n for (auto &i : newRequestedComponents) { // Create the new component - auto component = Component::make(i); + auto component = ComponentFactory::make(i); if (component) { components_.try_emplace(i, std::move(component)); @@ -1528,14 +2121,14 @@ void CapsaicinInternal::setupRenderTechniques(RenderSettings &render_settings) n // Get component options for (auto const &i : components_) { - render_settings_.options_.merge(i.second->getRenderOptions()); + options_.merge(i.second->getRenderOptions()); } // Update render options based on passed in settings // Loop through and update to any values stored in passed in render settings - for (auto &i : render_settings_.options_) + for (auto &i : options_) { - if (auto j = render_settings.options_.find(i.first); j != render_settings.options_.end()) + if (auto j = options_.find(i.first); j != options_.end()) { if (j->second.index() == i.second.index()) { @@ -1545,7 +2138,7 @@ void CapsaicinInternal::setupRenderTechniques(RenderSettings &render_settings) n else { // Update user version with required changed options - render_settings.options_.emplace(i.first, i.second); + options_.emplace(i.first, i.second); } } } @@ -1588,11 +2181,11 @@ void CapsaicinInternal::setupRenderTechniques(RenderSettings &render_settings) n for (auto &i : requestedBuffers) { // Create new texture - GfxBuffer buffer = gfxCreateBuffer(gfx_, i.second.size); - std::string name = "Capsaicin_"; - name += i.first; - name += "Buffer"; - buffer.setName(name.c_str()); + GfxBuffer buffer = gfxCreateBuffer(gfx_, i.second.size); + std::string bufferName = "Capsaicin_"; + bufferName += i.first; + bufferName += "Buffer"; + buffer.setName(bufferName.c_str()); shared_buffers_.emplace_back(i.first, buffer); } @@ -1623,6 +2216,7 @@ void CapsaicinInternal::setupRenderTechniques(RenderSettings &render_settings) n }; aovList requestedAOVs = defaultAOVs; std::unordered_map backupAOVs; + aovList optionalAOVs; for (auto const &i : render_techniques_) { for (auto &j : i->getAOVs()) @@ -1672,21 +2266,32 @@ void CapsaicinInternal::setupRenderTechniques(RenderSettings &render_settings) n newParams.flags = k->second.flags; } } - // Add the new AOV to requested list - requestedAOVs.try_emplace(j.name, std::move(newParams)); - // Check if also a backup AOV - if (!j.backup_name.empty()) + if (j.flags & AOV::Optional) { - if (auto pos = backupAOVs.find(j.backup_name); - pos != backupAOVs.end() && pos->second != j.name) + optionalAOVs.try_emplace(j.name, std::move(newParams)); + if (!j.backup_name.empty()) { - GFX_PRINTLN( - "Error: Cannot create multiple different backups with same name: %s, %s", - j.name.data(), j.backup_name.data()); + GFX_PRINTLN("Error: Requested backup of optional AOV: %s", j.name.data()); } - else + } + else + { + // Add the new AOV to requested list + requestedAOVs.try_emplace(j.name, std::move(newParams)); + // Check if also a backup AOV + if (!j.backup_name.empty()) { - backupAOVs.emplace(j.backup_name, j.name); + if (auto pos = backupAOVs.find(j.backup_name); + pos != backupAOVs.end() && pos->second != j.name) + { + GFX_PRINTLN( + "Error: Cannot create multiple different backups with same name: %s, %s", + j.name.data(), j.backup_name.data()); + } + else + { + backupAOVs.emplace(j.backup_name, j.name); + } } } } @@ -1734,15 +2339,61 @@ void CapsaicinInternal::setupRenderTechniques(RenderSettings &render_settings) n } } + // Merge optional AOVS + for (auto &i : optionalAOVs) + { + if (auto j = requestedAOVs.find(i.first); j != requestedAOVs.end()) + { + // Update existing format if it doesn't have one + if (j->second.format == DXGI_FORMAT_UNKNOWN) + { + j->second.format = i.second.format; + } + // Validate that requested values match the existing ones + else if (i.second.format != j->second.format && i.second.format != DXGI_FORMAT_UNKNOWN) + { + GFX_PRINTLN("Error: Requested AOV with different formats: %s", i.first.data()); + } + if (((i.second.flags & AOV::Clear) && (j->second.flags & AOV::Accumulate)) + || ((i.second.flags & AOV::Accumulate) && (j->second.flags & AOV::Clear))) + { + GFX_PRINTLN("Error: Requested AOV with different clear settings: %s", i.first.data()); + } + + // Add backup name if requested + if (!i.second.backup.empty()) + { + if (j->second.backup.empty()) + { + j->second.backup = i.second.backup; + } + else if (j->second.backup != i.second.backup) + { + GFX_PRINTLN("Error: Requested AOV with different backup names: %s, %2", + i.first.data(), j->second.backup.data()); + } + } + // Add clear/accumulate flag if requested + if (i.second.flags & AOV::Clear) + { + j->second.flags = AOV::Flags(j->second.flags | AOV::Clear); + } + else if (i.second.flags & AOV::Accumulate) + { + j->second.flags = AOV::Flags(j->second.flags | AOV::Accumulate); + } + } + } + // Create all requested AOVs for (auto &i : requestedAOVs) { // Create new texture - GfxTexture texture = gfxCreateTexture2D(gfx_, i.second.format); - std::string name = "Capsaicin_"; - name += i.first; - name += "AOV"; - texture.setName(name.c_str()); + GfxTexture texture = gfxCreateTexture2D(gfx_, i.second.format); + std::string bufferName = "Capsaicin_"; + bufferName += i.first; + bufferName += "AOV"; + texture.setName(bufferName.c_str()); aov_buffers_.emplace_back(i.first, texture); // Add to backup list @@ -1798,6 +2449,8 @@ void CapsaicinInternal::setupRenderTechniques(RenderSettings &render_settings) n } } + // If no scene currently loaded then delay initialisation till scene load + if (!!scene_) { // Initialise all components for (auto const &i : components_) @@ -1818,6 +2471,41 @@ void CapsaicinInternal::setupRenderTechniques(RenderSettings &render_settings) n GFX_PRINTLN("Error: Failed to initialise render technique: %s", i->getName().data()); } } + + // Reset flags as everything just got forced reset anyway + mesh_updated_ = false; + transform_updated_ = false; + environment_map_updated_ = false; + scene_updated_ = false; + camera_updated_ = false; + } +} + +void CapsaicinInternal::resetPlaybackState() noexcept +{ + // Reset frame index + frame_index_ = 0; + // Reset frame time + auto wallTime = std::chrono::duration_cast( + std::chrono::high_resolution_clock::now().time_since_epoch()); + current_time_ = static_cast(wallTime.count()) / 1000000.0; + frame_time_ = 0.0f; +} + +void CapsaicinInternal::resetRenderState() noexcept +{ + // Reset frame index to signal component/techniques an initialisation is required/occurred + frame_index_ = 0; + + // Reset the AOV history + { + GfxCommandEvent const command_event(gfx_, "ResetPreviousGBuffers"); + + for (auto &i : aov_backup_buffers_) + { + gfxCommandClearTexture(gfx_, i.second); + } } } + } // namespace Capsaicin diff --git a/src/core/src/capsaicin/capsaicin_internal.h b/src/core/src/capsaicin/capsaicin_internal.h index 34d3e8c..9a5e87c 100644 --- a/src/core/src/capsaicin/capsaicin_internal.h +++ b/src/core/src/capsaicin/capsaicin_internal.h @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -22,9 +22,12 @@ THE SOFTWARE. #pragma once #include "gpu_shared.h" +#include "graph.h" #include "renderer.h" #include +#include +#include namespace Capsaicin { @@ -40,37 +43,166 @@ class CapsaicinInternal GfxScene getScene() const; uint32_t getWidth() const; uint32_t getHeight() const; - uint32_t getFrameIndex() const; char const *getShaderPath() const; - double getTime() const; - void setTime(double time); - bool getAnimate() const; - void setAnimate(bool animate); + /** + * Get the current frame index (starts at zero) + * @return The index of the current frame to/being rendered. + */ + uint32_t getFrameIndex() const noexcept; + + /** + * Get the elapsed time since the last render call. + * @return The elapsed frame time (seconds) + */ + double getFrameTime() const noexcept; + + /** + * Get the average frame time. + * @return The elapsed frame time (seconds) + */ + double getAverageFrameTime() const noexcept; + + /** + * Check if the current scene has any usable animations. + * @return True if animations are present, False otherwise. + */ + bool hasAnimation() const noexcept; + + /** + * Set the current playback play/paused state. + * @param paused True to pause animation, False to play. + */ + void setPaused(bool paused) noexcept; + + /** + * Get the current animation play/paused state. + * @return True if playback is paused, False otherwise. + */ + bool getPaused() const noexcept; + + /** + * Set the current playback mode. + * @param playMode The new playback mode (False to playback in real-time mode, True uses fixed frame + * rate). + */ + void setFixedFrameRate(bool playMode) noexcept; + + /** + * Set the current fixed rate frame time. + * @param fixed_frame_time A duration in seconds. + */ + void setFixedFrameTime(double fixed_frame_time) noexcept; + + /** + * Get current playback mode. + * @return True if using fixed frame rate, False is using real-time. + */ + bool getFixedFrameRate() const noexcept; + + /** + * Restart playback to start of animation. + */ + void restartPlayback() noexcept; + + /** + * Increase current playback speed by double. + */ + void increasePlaybackSpeed() noexcept; + + /** + * Decrease current playback speed by half. + */ + void decreasePlaybackSpeed() noexcept; + + /** + * Get the current playback speed. + * @return The current playback speed. + */ + double getPlaybackSpeed() const noexcept; + + /** + * Reset the playback speed to default. + */ + void resetPlaybackSpeed() noexcept; + + /** + * Step playback forward by specified number of frames. + * @param frames The number of frames to step forward. + */ + void stepPlaybackForward(uint32_t frames) noexcept; + + /** + * Step playback backward by specified number of frames. + * @param frames The number of frames to step backward. + */ + void stepPlaybackBackward(uint32_t frames) noexcept; + + /** + * Set the playback to forward/rewind. + * @param rewind Set to True to rewind, False to playback forward. + */ + void setPlayRewind(bool rewind) noexcept; + + /** + * Get the current playback forward/rewind state. + * @return True if in rewind, False if forward. + */ + bool getPlayRewind() const noexcept; + + /** + * Set the current render state. Pausing prevents any new frames from being rendered. + * @param paused True to pause rendering. + */ + void setRenderPaused(bool paused) noexcept; + + /** + * Get the current render paused state. + * @return True if rendering is paused, False otherwise. + */ + bool getRenderPaused() const noexcept; + + /** + */ + void stepJitterFrameIndex(uint32_t frames) noexcept; /** * Check if the scenes mesh data was changed this frame. * @return True if mesh data has changed. */ - bool getMeshesUpdated() const; + bool getMeshesUpdated() const noexcept; /** * Check if the scenes instance transform data was changed this frame. * @return True if instance data has changed. */ - bool getTransformsUpdated() const; + bool getTransformsUpdated() const noexcept; + + /** + * Check if the scene was changed this frame. + * @return True if scene has changed. + */ + bool getSceneUpdated() const noexcept; + + /** + * Check if the scene camera was changed this frame. + * @note Only flags change in which camera is active, this does not track changes to any specific cameras + * parameters. + * @return True if camera has changed. + */ + bool getCameraUpdated() const noexcept; /** * Check if the environment map was changed this frame. * @return True if environment map has changed. */ - bool getEnvironmentMapUpdated() const; + bool getEnvironmentMapUpdated() const noexcept; /** * Gets the list of currently available AOVs. * @returns The AOV list. */ - std::vector getAOVs() noexcept; + std::vector getAOVs() const noexcept; /** * Query if a AOV buffer currently exists. @@ -86,12 +218,6 @@ class CapsaicinInternal */ GfxTexture getAOVBuffer(std::string_view const &aov) const noexcept; - /** - * Gets the list of currently available debug views. - * @returns The debug view list. - */ - std::vector getDebugViews() noexcept; - /** * Checks whether a debug view is of an AOV. * @param view The debug view to check. @@ -139,6 +265,186 @@ class CapsaicinInternal return std::dynamic_pointer_cast(getComponent(static_cast(toStaticString()))); } + /** + * Gets the list of supported renderers. + * @returns The renderers list. + */ + static std::vector GetRenderers() noexcept; + + /** + * Gets the name of the currently set renderer. + * @returns The current renderer name. + */ + std::string_view getCurrentRenderer() const noexcept; + + /** + * Sets the current renderer. + * @param name The name of the renderer to set (must be one of the options from GetRenderers()). + * @returns True if successful, False otherwise. + */ + bool setRenderer(std::string_view const &name) noexcept; + + /** + * Gets the currently set scene. + * @returns The current scene name. + */ + std::vector const &getCurrentScenes() const noexcept; + + /** + * Sets the current scene. + * @param name The name of the scene file. + * @returns True if successful, False otherwise. + */ + bool setScenes(std::vector const &names) noexcept; + + /** + * Gets the list of cameras available in the current scene. + * @returns The cameras list. + */ + std::vector getSceneCameras() const noexcept; + + /** + * Gets the name of the currently set scene camera. + * @returns The current camera name. + */ + std::string_view getSceneCurrentCamera() const noexcept; + + /** + * Gets the current scenes camera. + * @returns The requested camera object. + */ + GfxRef getSceneCamera() const noexcept; + + /** + * Sets the current scenes camera. + * @param name The name of the camera to set (must be one of the options from getSceneCameras()). + * @returns True if successful, False otherwise. + */ + bool setSceneCamera(std::string_view const &name) noexcept; + + /** + * Gets the currently set environment map. + * @returns The current environment map name. + */ + std::string getCurrentEnvironmentMap() const noexcept; + + /** + * Sets the current scene environment map. + * @param name The name of the image file (blank to disable environment map). + * @returns True if successful, False otherwise. + */ + bool setEnvironmentMap(std::string const &name) noexcept; + + /** + * Gets the list of currently available debug views. + * @returns The debug view list. + */ + std::vector getDebugViews() const noexcept; + + /** + * Gets the currently set debug view. + * @returns The debug view string (empty string if none selected). + */ + std::string_view getCurrentDebugView() const noexcept; + + /** + * Sets the current debug view. + * @param name The name of the debug view to set (must be one of the options from GetDebugViews()). + * @returns True if successful, False otherwise. + */ + bool setDebugView(std::string_view const &name) noexcept; + + /** + * Gets render options currently in use. + * @returns The render options. + */ + RenderOptionList const &getOptions() const noexcept; + RenderOptionList &getOptions() noexcept; + + /** + * Checks if an options exists with the specified type. + * @tparam T Generic type parameter of the requested option. + * @param name The name of the option to get. + * @returns True if options is found and has correct type, False otherwise. + */ + template + bool hasOption(std::string_view const &name) noexcept + { + auto &options = getOptions(); + if (auto i = options.find(name); i != options.end()) + { + return std::holds_alternative(i->second); + } + return false; + } + + /** + * Gets an option from internal options list. + * @tparam T Generic type parameter of the requested option. + * @param name The name of the option to get. + * @returns The options value (nullptr if option does not exists or typename does not match). + */ + template + T const &getOption(std::string_view const &name) const noexcept + { + if (auto i = options_.find(name); i != options_.end()) + { + if (std::holds_alternative(i->second)) + { + return *std::get_if(&(i->second)); + } + } + GFX_PRINTLN("Error: Unknown settings options requested: %s", name.data()); + static T unknown; + return unknown; + } + + /** + * Gets a reference to an option from internal options list. + * @tparam T Generic type parameter of the requested option. + * @param name The name of the option to get. + * @returns The options value (nullptr if option does not exists or typename does not match). + */ + template + T &getOption(std::string_view const &name) noexcept + { + auto &options = getOptions(); + if (auto i = options.find(name); i != options.end()) + { + if (std::holds_alternative(i->second)) + { + return *std::get_if(&(i->second)); + } + } + GFX_PRINTLN("Error: Unknown settings options requested: %s", name.data()); + static T unknown; + return unknown; + } + + /** + * Sets an options value in the internal options list. + * If the option does not exists it is created. + * @tparam T Generic type parameter of the requested option. + * @param name The name of the option to set. + * @param value The new value of the option. + */ + template + void setOption(std::string_view const &name, const T value) noexcept + { + auto &options = getOptions(); + if (auto i = options.find(name); i != options.end()) + { + if (std::holds_alternative(i->second)) + { + *std::get_if(&(i->second)) = value; + } + } + else + { + options.emplace(name, value); + } + } + glm::vec4 getInvDeviceZ() const; glm::vec3 getPreViewTranslation() const; GfxTexture getEnvironmentBuffer() const; @@ -166,17 +472,34 @@ class CapsaicinInternal GfxBuffer getCameraMatricesBuffer(bool jittered = false) const; /** - * Gets render settings currently in use. - * @returns The render settings. + * Gets count of enabled delta lights (point,spot,direction) in current scene. + * @returns The delta light count. */ - RenderSettings const &getRenderSettings() const; - RenderSettings &getRenderSettings(); + uint32_t getDeltaLightCount() const noexcept; + + /** + * Gets count of enabled area lights in current scene. + * @returns The area light count. + */ + uint32_t getAreaLightCount() const noexcept; /** * Gets count of enabled environment lights in current scene. * @returns The environment light count. */ - uint32_t getEnvironmentLightCount() const; + uint32_t getEnvironmentLightCount() const noexcept; + + /** + * Gets count of number of triangles present in current scene. + * @returns The triangle count. + */ + uint32_t getTriangleCount() const noexcept; + + /** + * Gets size of the acceleration structure (in bytes). + * @returns The acceleration structure size. + */ + uint64_t getBvhDataSize() const noexcept; GfxBuffer getInstanceBuffer() const; Instance const *getInstanceData() const; @@ -186,10 +509,10 @@ class CapsaicinInternal GfxBuffer getInstanceIdBuffer() const; uint32_t const *getInstanceIdData() const; - glm::mat4 const *getTransformData() const; - GfxBuffer getTransformBuffer() const; - GfxBuffer getPrevTransformBuffer() const; - glm::mat4 const *getPrevTransformData() const; + glm::mat4x3 const *getTransformData() const; + GfxBuffer getTransformBuffer() const; + GfxBuffer getPrevTransformBuffer() const; + glm::mat4x3 const *getPrevTransformData() const; GfxBuffer getMaterialBuffer() const; Material const *getMaterialData() const; @@ -197,6 +520,7 @@ class CapsaicinInternal GfxTexture const *getTextures() const; uint32_t getTextureCount() const; GfxSamplerState getLinearSampler() const; + GfxSamplerState getLinearWrapSampler() const; GfxSamplerState getNearestSampler() const; GfxSamplerState getAnisotropicSampler() const; @@ -214,6 +538,8 @@ class CapsaicinInternal GfxAccelerationStructure getAccelerationStructure() const; + inline uint32_t getSbtStrideInEntries(GfxShaderGroupType type) const { return sbt_stride_in_entries_[type]; } + /** * Calculate and return the AABB surrounding current scene contents. * @returns The scene bounds (min, max). @@ -233,25 +559,30 @@ class CapsaicinInternal /** * Initializes Capsaicin. Must be called before any other functions. * @param gfx The gfx context to use inside Capsaicin. + * @param imgui_context (Optional) The ImGui context. */ - void initialize(GfxContext gfx); + void initialize(GfxContext gfx, ImGuiContext *imgui_context); /** * Render the current frame. - * @param scene The scene to render. - * @param [in,out] render_settings The render settings to use during rendering. */ - void render(GfxScene scene, RenderSettings &render_settings); + void render(); + + /** + * Render UI elements related to current internal state + * Must be called between ImGui::Begin() and ImGui::End(). + * @param readOnly (Optional) True to only display read only data, False to display controls accepting + * user input. + */ + void renderGUI(bool readOnly = false); /** Terminates this object */ void terminate(); /** - * Gets the profiling information for each timed section from the current frame. - * @returns The total frame time as well as timestamps for each sub-section (see NodeTimestamps for - * details). + * Reload all shader code currently in use */ - std::pair> getProfiling() noexcept; + void reloadShaders() noexcept; /** * Saves an AOV buffer to disk. @@ -268,20 +599,30 @@ class CapsaicinInternal void dumpAnyBuffer(char const *file_path, GfxTexture dump_buffer); /** - * Gets the list of supported renderers that can be set inside RenderSettings. - * @returns The renderers list. + * Saves current camera attributes to disk. + * @param file_path Full pathname to the file to save as. + * @param jittered Jittered camera or not. */ - static std::vector GetRenderers() noexcept; + void dumpCamera(char const *file_path, bool jittered); private: /** * Sets up the render techniques for the currently set renderer. * This will setup any required AOVs, views or buffers required for all specified render techniques. - * @param [in,out] render_settings The render settings used to setup. + * @param name Name of the renderer to setup. */ - void setupRenderTechniques(RenderSettings &render_settings) noexcept; + void setupRenderTechniques(std::string_view const &name) noexcept; - void renderNextFrame(GfxScene scene); + /** + * Reset current frame index and duration state. + * This should be called whenever and renderer or scene changes are made. + */ + void resetPlaybackState() noexcept; + + /** + * Reset internal data such as AOVs and history to initial state. + */ + void resetRenderState() noexcept; void dumpBuffer(char const *file_path, GfxTexture dump_buffer); void saveImage(GfxBuffer dump_buffer, uint32_t dump_buffer_width, uint32_t dump_buffer_height, @@ -290,6 +631,8 @@ class CapsaicinInternal char const *exr_file_path); void saveJPG(GfxBuffer dump_buffer, uint32_t dump_buffer_width, uint32_t dump_buffer_height, char const *jpg_file_path); + void dumpCamera(char const *file_path, CameraMatrices const &camera_matrices, float camera_jitter_x, + float camera_jitter_y); size_t mesh_hash_ = 0; size_t transform_hash_ = 0; @@ -297,26 +640,49 @@ class CapsaicinInternal bool mesh_updated_ = true; bool transform_updated_ = true; bool environment_map_updated_ = true; + bool scene_updated_ = true; + bool camera_updated_ = true; - GfxContext gfx_; /**< The graphics context to be used. */ - GfxScene scene_; /**< The scene to be rendered. */ - double time_ = 0.0f; /**< The elapsed time (in secs). */ - bool animate_ = true; /**< Whether to animate the scene. */ - uint32_t frame_index_ = 0; + GfxContext gfx_; /**< The graphics context to be used. */ std::string shader_path_; uint32_t buffer_width_ = 0; uint32_t buffer_height_ = 0; - GfxCamera camera_; /**< The camera to be used for drawing. */ + GfxScene scene_; /**< The scene to be rendered. */ + GfxTexture environment_buffer_; + std::vector scene_files_; + std::string environment_map_file_; + + uint32_t frame_index_ = 0; /**< Current frame number (incremented each render call) */ + uint32_t jitter_frame_index_ = ~0u; /**< Current jitter frame number */ + double current_time_ = 0.0; /**< Current wall clock time used for timing (seconds) */ + double frame_time_ = 0.0; /**< Elapsed frame time for most recent frame (seconds) */ + + bool play_paused_ = true; /**< Current animation play/paused state (True if paused) */ + bool play_fixed_framerate_ = false; /**< Current animation playback mode (True if fixed frame rate) */ + double play_time_ = 0.0f; /**< Current animation absolute playback position (s) */ + double play_time_old_ = -1.0f; /**< Previous animation absolute playback position (s) */ + double play_fixed_frame_time_ = 1.0f / 30.0f; /**< Frame time used with fixed frame rate mode */ + double play_speed_ = 1.0f; /**< Current playback speed */ + bool play_rewind_ = false; /**< Current rewind state (True if rewinding) */ + bool render_paused_ = false; /**< Current render paused state (True to pause rendering of new frames) */ + CameraMatrices camera_matrices_[2]; /**< Unjittered and jittered matrices */ - RenderSettings render_settings_; /**< The settings to be used for rendering. */ + float camera_jitter_x_; + float camera_jitter_y_; + + RenderOptionList options_; /**< Options for controlling the operation of each render technique */ + std::vector> render_techniques_; /**< The list of render techniques to be applied. */ std::map> components_; /**< The list of render techniques to be applied. */ + std::string_view renderer_name_; /**< Currently used renderer string name */ std::unique_ptr renderer_ = nullptr; /**< Currently used renderer */ using debug_views = std::vector>; - debug_views debug_views_; /**< List of available debug views */ + debug_views debug_views_; /**< List of available debug views */ + std::string_view debug_view_; /**< The debug view to use (get available from GetDebugViews() - + "None" or empty for default behaviour) */ GfxKernel blit_kernel_; /**< The kernel to blit the color buffer to the back buffer. */ GfxProgram blit_program_; /**< The program to blit the color buffer to the back buffer. */ @@ -335,36 +701,41 @@ class CapsaicinInternal GfxBuffer constant_buffer_pools_[kGfxConstant_BackBufferCount]; uint64_t constant_buffer_pool_cursor_ = 0; - GfxConstRef environment_map_; - GfxTexture environment_buffer_; - - GfxBuffer camera_matrices_buffer_[2]; /**< Unjittered and jittered camera matrices */ - std::vector instance_data_; - GfxBuffer instance_buffer_; - std::vector instance_min_bounds_; - std::vector instance_max_bounds_; - std::vector instance_id_data_; - GfxBuffer instance_id_buffer_; - std::vector transform_data_; - GfxBuffer transform_buffer_; - std::vector prev_transform_data_; - GfxBuffer prev_transform_buffer_; - std::vector material_data_; - GfxBuffer material_buffer_; - std::vector texture_atlas_; - GfxSamplerState linear_sampler_; - GfxSamplerState nearest_sampler_; - GfxSamplerState anisotropic_sampler_; - std::vector mesh_data_; - GfxBuffer mesh_buffer_; - std::vector index_data_; - GfxBuffer index_buffer_; /**< The buffer storing all indices so it can be access via RT. */ - std::vector vertex_data_; + GfxBuffer camera_matrices_buffer_[2]; /**< Unjittered and jittered camera matrices */ + std::vector instance_data_; + GfxBuffer instance_buffer_; + std::vector instance_min_bounds_; + std::vector instance_max_bounds_; + std::vector instance_id_data_; + GfxBuffer instance_id_buffer_; + std::vector transform_data_; + GfxBuffer transform_buffer_; + std::vector prev_transform_data_; + GfxBuffer prev_transform_buffer_; + std::vector material_data_; + GfxBuffer material_buffer_; + std::vector texture_atlas_; + GfxSamplerState linear_sampler_; + GfxSamplerState linear_wrap_sampler_; + GfxSamplerState nearest_sampler_; + GfxSamplerState anisotropic_sampler_; + std::vector mesh_data_; + GfxBuffer mesh_buffer_; + std::vector index_data_; + GfxBuffer index_buffer_; /**< The buffer storing all indices so it can be access via RT. */ + std::vector vertex_data_; GfxBuffer vertex_buffer_; /**< The buffer storing all vertices so it can be access via RT. */ GfxAccelerationStructure acceleration_structure_; std::vector raytracing_primitives_; + uint32_t sbt_stride_in_entries_[kGfxShaderGroupType_Count] = {}; + + // Scene statistics for currently loaded scene + uint32_t triangle_count_ = 0; + + Graph frameGraph; /**< The stored frame history graph */ - std::deque> dump_requests_; + std::deque> dump_requests_; + std::deque> dump_camera_requests_; std::deque> dump_in_flight_buffers_; GfxKernel dump_copy_to_buffer_kernel_; GfxProgram dump_copy_to_buffer_program_; diff --git a/src/core/src/capsaicin/capsaicin_internal_dump.cpp b/src/core/src/capsaicin/capsaicin_internal_dump.cpp index 2fca642..abc0caa 100644 --- a/src/core/src/capsaicin/capsaicin_internal_dump.cpp +++ b/src/core/src/capsaicin/capsaicin_internal_dump.cpp @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -21,20 +21,21 @@ THE SOFTWARE. ********************************************************************/ #include "capsaicin_internal.h" -#pragma warning(push) -#pragma warning(disable : 4018) -#define TINYEXR_IMPLEMENTATION +#include +#include #include -#pragma warning(pop) - -#define STB_IMAGE_WRITE_IMPLEMENTATION #include namespace Capsaicin { void CapsaicinInternal::dumpAOVBuffer(char const *file_path, std::string_view const &aov) { - dump_requests_.push_back({file_path, aov}); + dump_requests_.push_back({file_path, std::string(aov)}); +} + +void CapsaicinInternal::dumpCamera(char const *file_path, bool jittered) +{ + dump_camera_requests_.push_back({file_path, jittered}); } void CapsaicinInternal::dumpAnyBuffer(char const *file_path, GfxTexture dump_buffer) @@ -49,7 +50,7 @@ void CapsaicinInternal::dumpBuffer(char const *dump_file_path, GfxTexture dumped dumped_buffer.getWidth() ? dumped_buffer.getWidth() : gfxGetBackBufferWidth(gfx_); uint32_t dump_buffer_height = dumped_buffer.getHeight() ? dumped_buffer.getHeight() : gfxGetBackBufferHeight(gfx_); - uint64_t dump_buffer_size = dump_buffer_width * dump_buffer_height * 4 * sizeof(float); + uint64_t dump_buffer_size = (uint64_t)dump_buffer_width * dump_buffer_height * 4 * sizeof(float); GfxBuffer dump_copy_buffer = gfxCreateBuffer(gfx_, dump_buffer_size, nullptr, kGfxCpuAccess_None); dump_copy_buffer.setName("Capsaicin_DumpCopyBuffer"); @@ -96,9 +97,9 @@ void CapsaicinInternal::saveImage( void CapsaicinInternal::saveEXR( GfxBuffer dump_buffer, uint32_t dump_buffer_width, uint32_t dump_buffer_height, char const *exr_file_path) { - char const channel_names[] = {'A', 'B', 'G', 'R'}; + char const channel_names[] = {'B', 'G', 'R'}; int const channel_count = ARRAYSIZE(channel_names); - static_assert(channel_count > 0 && channel_count <= 4); + static_assert(channel_count > 0 && channel_count <= 3); // Header std::vector channel_infos; @@ -115,7 +116,7 @@ void CapsaicinInternal::saveEXR( EXRHeader exr_header; InitEXRHeader(&exr_header); - exr_header.compression_type = TINYEXR_COMPRESSIONTYPE_NONE; + exr_header.compression_type = TINYEXR_COMPRESSIONTYPE_PIZ; exr_header.num_channels = channel_count; exr_header.channels = &channel_infos[0]; exr_header.pixel_types = &pixel_types[0]; @@ -131,13 +132,12 @@ void CapsaicinInternal::saveEXR( std::vector images; for (char channel_name : channel_names) { - int dump_channel_offset; + int dump_channel_offset = 0; switch (channel_name) { case 'R': dump_channel_offset = 0; break; case 'G': dump_channel_offset = 1; break; case 'B': dump_channel_offset = 2; break; - case 'A': dump_channel_offset = 3; break; default: assert(false); } @@ -184,7 +184,7 @@ void CapsaicinInternal::saveJPG( const uint32_t image_pixel_count = dump_buffer_width * dump_buffer_height; std::vector image_data; - image_data.resize(image_width * image_height * 4); + image_data.resize((size_t)image_width * image_height * 3); for (size_t pixel_index = 0; pixel_index < image_pixel_count; ++pixel_index) { @@ -193,16 +193,126 @@ void CapsaicinInternal::saveJPG( glm::clamp(dump_buffer_data[4 * pixel_index + channel_offset], 0.f, 1.f) * 255.f); }; - image_data[4 * pixel_index + 0] = quantize(0); - image_data[4 * pixel_index + 1] = quantize(1); - image_data[4 * pixel_index + 2] = quantize(2); - image_data[4 * pixel_index + 3] = quantize(3); + image_data[3 * pixel_index + 0] = quantize(0); + image_data[3 * pixel_index + 1] = quantize(1); + image_data[3 * pixel_index + 2] = quantize(2); } - int ret = stbi_write_jpg(jpg_file_path, image_width, image_height, 4, image_data.data(), 90); + int ret = stbi_write_jpg(jpg_file_path, image_width, image_height, 3, image_data.data(), 90); if (ret == 0) { GFX_PRINT_ERROR(kGfxResult_InternalError, "Can't save '%s'", jpg_file_path); } } + +// clang-format off + +void CapsaicinInternal::dumpCamera(char const *json_file_path, CameraMatrices const &camera_matrices, float camera_jitter_x, float camera_jitter_y) +{ + const auto& _c = getCamera(); + const auto& _0 = camera_matrices.view; +#if 0 + // BE CAREFUL: previous frame matrices are tweaked for computing motion vectors + const auto& _1 = camera_matrices.view_prev; +#endif + const auto& _2 = camera_matrices.inv_view; + const auto& _3 = camera_matrices.projection; +#if 0 + // BE CAREFUL: previous frame matrices are tweaked for computing motion vectors + const auto& _4 = camera_matrices.projection_prev; +#endif + const auto& _5 = camera_matrices.inv_projection; + const auto& _6 = camera_matrices.view_projection; +#if 0 + // BE CAREFUL: previous frame matrices are tweaked for computing motion vectors + const auto& _7 = camera_matrices.view_projection_prev; +#endif + const auto& _8 = camera_matrices.inv_view_projection; + + auto formatMatrix = [](const glm::mat4x4& matrix) -> std::string { + std::ostringstream json_matrix; + json_matrix + << matrix[0][0] << ", " << matrix[0][1] << ", " << matrix[0][2] << ", " << matrix[0][3] << ", " + << matrix[1][0] << ", " << matrix[1][1] << ", " << matrix[1][2] << ", " << matrix[1][3] << ", " + << matrix[2][0] << ", " << matrix[2][1] << ", " << matrix[2][2] << ", " << matrix[2][3] << ", " + << matrix[3][0] << ", " << matrix[3][1] << ", " << matrix[3][2] << ", " << matrix[3][3]; + return json_matrix.str(); + }; + + auto formatVector = [](const glm::vec3& vector) -> std::string { + std::ostringstream json_vector; + json_vector + << vector[0] << ", " << vector[1] << ", " << vector[2]; + return json_vector.str(); + }; + + std::ofstream json_file(json_file_path); + if (json_file.is_open()) + { + json_file + << "{" << '\n' + << " \"type\": \"perpective\"," << '\n' + << " \"eye\": [" << '\n' + << " " << formatVector(_c.eye) << '\n' + << " ]," << '\n' + << " \"center\": [" << '\n' + << " " << formatVector(_c.center) << '\n' + << " ]," << '\n' + << " \"up\": [" << '\n' + << " " << formatVector(_c.up) << '\n' + << " ]," << '\n' + << " \"aspect\": " << _c.aspect << ", " << '\n' + << " \"fovY\": " << _c.fovY << ", " << '\n' + << " \"nearZ\": " << _c.nearZ << ", " << '\n' + << " \"farZ\": " << _c.farZ << ", " << '\n' + << " \"jitterX\": " << camera_jitter_x << ", " << '\n' + << " \"jitterY\": " << camera_jitter_y << ", " << '\n' + << " \"view\": [" << '\n' + << " " << formatMatrix(_0) << '\n' + << " ]," << '\n' + #if 0 + // BE CAREFUL: previous frame matrices are tweaked for computing motion vectors + << " \"view_prev\": [" << '\n' + << " " << formatMatrix(_1) << '\n' + << " ]," << '\n' + #endif + << " \"inv_view\": [" << '\n' + << " " << formatMatrix(_2) << '\n' + << " ]," << '\n' + << " \"projection\": [" << '\n' + << " " << formatMatrix(_3) << '\n' + << " ]," << '\n' + #if 0 + // BE CAREFUL: previous frame matrices are tweaked for computing motion vectors + << " \"projection_prev\": [" << '\n' + << " " << formatMatrix(_4) << '\n' + << " ]," << '\n' + #endif + << " \"inv_projection\": [" << '\n' + << " " << formatMatrix(_5) << '\n' + << " ]," << '\n' + << " \"view_projection\": [" << '\n' + << " " << formatMatrix(_6) << '\n' + << " ]," << '\n' + #if 0 + // BE CAREFUL: previous frame matrices are tweaked for computing motion vectors + << " \"view_projection_prev\": [" << '\n' + << " " << formatMatrix(_7) << '\n' + << " ]," << '\n' + #endif + << " \"inv_view_projection\": [" << '\n' + << " " << formatMatrix(_8) << '\n' + << " ]" << '\n' + << "}" << std::endl; + + json_file.close(); + } + else + { + GFX_PRINT_ERROR(kGfxResult_InternalError, "Can't write to '%s'", json_file_path); + } +} + +// clang-format on + } // namespace Capsaicin diff --git a/src/core/src/capsaicin/capsaicin_internal_types.h b/src/core/src/capsaicin/capsaicin_internal_types.h index 76e9d00..e121d52 100644 --- a/src/core/src/capsaicin/capsaicin_internal_types.h +++ b/src/core/src/capsaicin/capsaicin_internal_types.h @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -21,11 +21,10 @@ THE SOFTWARE. ********************************************************************/ #pragma once -#include "capsaicin.h" - #include #include #include +#include #include namespace Capsaicin @@ -50,6 +49,7 @@ struct AOV Clear = 1 << 1, /**< True to clear buffer every frame */ Accumulate = 1 << 2, /**< True to allow the buffer to accumulate over frames (this is used for error checking to prevent the frame being cleared) */ + Optional = 1 << 3, /**< True if AOV should only be used if another non-optional request is made */ } flags = None; const DXGI_FORMAT format = DXGI_FORMAT_UNKNOWN; /**< The internal buffer format (If using read then @@ -100,7 +100,8 @@ using ComponentList = std::vector; #define RENDER_OPTION_GET(variable, ret, options) \ ret.variable = *std::get_if(&options.at(#variable)); -#define COMPONENT_MAKE(type) Component::Registrar::registeredName<> +#define COMPONENT_MAKE(type) ComponentFactory::Registrar::registeredName<> -using RenderOptionList = std::map; +using option = std::variant; +using RenderOptionList = std::map; } // namespace Capsaicin diff --git a/src/core/src/capsaicin/common_functions.inl b/src/core/src/capsaicin/common_functions.inl index 4c63f0b..5e3ec3c 100644 --- a/src/core/src/capsaicin/common_functions.inl +++ b/src/core/src/capsaicin/common_functions.inl @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -21,8 +21,6 @@ THE SOFTWARE. ********************************************************************/ #pragma once -#include "capsaicin.h" - namespace Capsaicin { static constexpr float const kPi = 3.14159265358979323846f; diff --git a/src/core/src/capsaicin/convolve_ibl.comp b/src/core/src/capsaicin/convolve_ibl.comp index 2078cfa..9c5adef 100644 --- a/src/core/src/capsaicin/convolve_ibl.comp +++ b/src/core/src/capsaicin/convolve_ibl.comp @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/src/core/src/capsaicin/convolve_ibl.frag b/src/core/src/capsaicin/convolve_ibl.frag index 6e59cf0..24b3e1d 100644 --- a/src/core/src/capsaicin/convolve_ibl.frag +++ b/src/core/src/capsaicin/convolve_ibl.frag @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/src/core/src/capsaicin/convolve_ibl.vert b/src/core/src/capsaicin/convolve_ibl.vert index f2b72a8..8ef084a 100644 --- a/src/core/src/capsaicin/convolve_ibl.vert +++ b/src/core/src/capsaicin/convolve_ibl.vert @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/src/core/src/capsaicin/dump_copy_aov_to_buffer.comp b/src/core/src/capsaicin/dump_copy_aov_to_buffer.comp index 5d49fd4..86926df 100644 --- a/src/core/src/capsaicin/dump_copy_aov_to_buffer.comp +++ b/src/core/src/capsaicin/dump_copy_aov_to_buffer.comp @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/src/core/src/capsaicin/factory.h b/src/core/src/capsaicin/factory.h index 5ab84e3..3613862 100644 --- a/src/core/src/capsaicin/factory.h +++ b/src/core/src/capsaicin/factory.h @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -25,6 +25,7 @@ THE SOFTWARE. #include #include +#include #include namespace Capsaicin @@ -33,36 +34,48 @@ namespace Capsaicin * A object factory class. * Used to create self-registering types that can be dynamically retrieved using the factory @make function. * @tparam Base Type of the base parent class. - * @tparam Args Variadic parameters required for type constructor. * @example - * //The base parent class for types to be added to the factory should inherit from Factory - * class Shape : public Factory + * // The base parent class for types to be added to the factory should create a factor handler by inheriting + * from Factory + * class ShapeFactory : public Factory * { - * public: - * Shape(Key) {} //Required constructor * } * // The child classes then should inherit from Base::Registrar - * class Circle : public Shape::Registrar + * class Circle : public Shape, public ShapeFactory::Registrar * { } */ -template +template class Factory { friend Base; public: + using FunctionType = std::unique_ptr (*)() noexcept; + Factory() noexcept = default; + + /** + * Gets the list of internal types and corresponding type names. + * @return The list of names and type constructor functions. + */ + static auto &GetList() noexcept + { + static std::unordered_map list; + return list; + } + /** * Makes a new instance of a requested type. - * @tparam Args2 Variadic parameters to pass to type constructor. - * @param name The name of the type of create. - * @param args The optional arguments to pass to the types constructor. + * @param name The name of the type of create. + * @param args The optional arguments to pass to the types constructor. * @return A pointer to the newly created type (nullptr if type does not exist). */ - template - static std::unique_ptr make(std::string_view const &name, Args2 &&...args) noexcept + static std::unique_ptr make(std::string_view const &name) noexcept { auto &list = GetList(); - if (auto i = list.find(name); i != list.cend()) return i->second(std::forward(args)...); + if (auto i = list.find(name); i != list.cend()) + { + return i->second(); + } return nullptr; } @@ -85,7 +98,7 @@ class Factory * @tparam T Generic type parameter of the child type to register. */ template - class Registrar : public Base + class Registrar { template static constexpr bool isDefined = false; @@ -107,85 +120,31 @@ class Factory */ static bool registerType() noexcept { - Factory::GetList()[registeredName] = [](Args... args) noexcept -> std::unique_ptr { - return std::make_unique(std::forward(args)...); - }; - return true; - } - - static bool registered; /**< Internal boolean used to force @registerType to be called */ - - private: - Registrar() noexcept - : Base(Key {}) - { - (void)registered; - } - }; - - /** - * Class used to encapsulate self-registration using internal consteval name. - * @tparam T Generic type parameter of the child type to register. - */ - template - class RegistrarName : public Base - { - public: - friend T; - - /** - * Registers the type to the factory. - * @return True if it succeeds, false if it fails. - */ - static bool registerType() noexcept - { - auto name = T::Name; - Factory::GetList()[name] = [](Args... args) noexcept -> std::unique_ptr { - return std::make_unique(std::forward(args)...); + Factory::GetList()[registeredName] = []() noexcept -> std::unique_ptr { + if constexpr (std::is_constructible_v) + { + return std::make_unique(registeredName); + } + else + { + return std::make_unique(); + } }; return true; } static bool registered; /**< Internal boolean used to force @registerType to be called */ - private: - RegistrarName() noexcept - : Base(Key {}, T::Name) - { - (void)registered; - } - }; - -private: - class Key - { - Key() noexcept {}; - template - friend class Registrar; - template - friend class RegistrarName; + Registrar() noexcept { (void)registered; } }; - - using FunctionType = std::unique_ptr (*)(Args...) noexcept; - Factory() noexcept = default; - - /** - * Gets the list of internal types and corresponding type names. - * @return The list of names and type constructor functions. - */ - static auto &GetList() noexcept - { - static std::unordered_map list; - return list; - } }; -template +template template -bool Factory::Registrar::registered = Factory::Registrar::registerType(); +bool Factory::Registrar::registered = Factory::Registrar::registerType(); -template -template -bool Factory::RegistrarName::registered = - Factory::RegistrarName::registerType(); +#define MANUALLY_REGISTER_TO_FACTORY(FactoryName, TypeName) \ + class TypeName##Register : public FactoryName::Registrar \ + {}; \ + static TypeName##Register register##TypeName; } // namespace Capsaicin diff --git a/src/core/src/capsaicin/graph.cpp b/src/core/src/capsaicin/graph.cpp new file mode 100644 index 0000000..c67b8ec --- /dev/null +++ b/src/core/src/capsaicin/graph.cpp @@ -0,0 +1,80 @@ +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ +#pragma once + +#include "graph.h" + +namespace Capsaicin +{ + +uint32_t Graph::getValueCount() const noexcept +{ + return static_cast(values.size()); +} + +void Graph::addValue(float value) noexcept +{ + values[current] = value; + current = (current + 1) % static_cast(values.size()); +} + +float Graph::getLastAddedValue() const noexcept +{ + if (current == 0) return getValueAtIndex(static_cast(values.size() - 1)); + return getValueAtIndex(current - 1); +} + +float Graph::getValueAtIndex(uint32_t index) const noexcept +{ + return values[index]; +} + +float Graph::getAverageValue() const noexcept +{ + double runningCount = 0.0; + uint32_t validFrames = 0; + for (uint32_t i = 0; i < getValueCount(); ++i) + { + runningCount += (double)getValueAtIndex(i); + if (getValueAtIndex(i) != 0.0f) + { + ++validFrames; + } + } + return static_cast(runningCount / (double)validFrames); +} + +void Graph::reset() noexcept +{ + current = 0; + values.fill(0.0f); +} + +float Graph::GetValueAtIndex(void *object, int32_t index) noexcept +{ + Graph const &graph = *static_cast(object); + const int32_t offset = (int32_t)(graph.values.size()) - index; + const int32_t newIndex = (int32_t)(graph.current) - offset; + const int32_t fixedIndex = (newIndex < 0 ? (int32_t)(graph.values.size()) + newIndex : newIndex); + return graph.values[fixedIndex]; +} +} // namespace Capsaicin diff --git a/src/core/src/capsaicin/graph.h b/src/core/src/capsaicin/graph.h new file mode 100644 index 0000000..3a652b1 --- /dev/null +++ b/src/core/src/capsaicin/graph.h @@ -0,0 +1,46 @@ +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ +#pragma once + +#include + +namespace Capsaicin +{ +class Graph +{ +public: + Graph() noexcept = default; + + uint32_t getValueCount() const noexcept; + void addValue(float value) noexcept; + float getLastAddedValue() const noexcept; + float getValueAtIndex(uint32_t index) const noexcept; + float getAverageValue() const noexcept; + void reset() noexcept; + + static float GetValueAtIndex(void *object, int32_t index) noexcept; + +private: + uint32_t current = 0; /**< The current cursor into values circular buffer */ + std::array values = {0.0}; /**< The stored list of values */ +}; +} // namespace Capsaicin diff --git a/src/core/src/capsaicin/hash_reduce.h b/src/core/src/capsaicin/hash_reduce.h index 7336d80..bcd7be4 100644 --- a/src/core/src/capsaicin/hash_reduce.h +++ b/src/core/src/capsaicin/hash_reduce.h @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -109,7 +109,6 @@ struct hash { size_t hash = 0x12345678u; - Capsaicin::HashCombine(hash, (uint64_t)value.material); Capsaicin::HashCombine(hash, value.bounds_min); Capsaicin::HashCombine(hash, value.bounds_max); Capsaicin::HashCombine(hash, value.vertices.size()); @@ -127,6 +126,7 @@ struct hash size_t hash = 0x12345678u; Capsaicin::HashCombine(hash, (uint64_t)value.mesh); + Capsaicin::HashCombine(hash, (uint64_t)value.material); Capsaicin::HashCombine(hash, value.transform); return hash; diff --git a/src/core/src/capsaicin/static_string.h b/src/core/src/capsaicin/static_string.h index a292a59..14885a1 100644 --- a/src/core/src/capsaicin/static_string.h +++ b/src/core/src/capsaicin/static_string.h @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -21,6 +21,7 @@ THE SOFTWARE. ********************************************************************/ #pragma once +#include #include #include #include @@ -311,12 +312,16 @@ class StaticString constexpr size_t rfind(char ch, size_t start = Size - 1) const noexcept { if (start > Size - 1) return npos; - for (size_t i = start; i >= 0; --i) + for (size_t i = start; ; --i) { if (dataArray[i] == ch) { return i; } + if (i == 0) + { + break; + } } return npos; } @@ -333,7 +338,7 @@ class StaticString StaticString const &subString, const size_t start = Size - Size2) const noexcept { if (Size < Size2 || start > Size - Size2) return npos; - for (size_t i = start; i >= 0; --i) + for (size_t i = start; ; --i) { for (size_t j = 0; j < Size2; ++j) { @@ -346,6 +351,10 @@ class StaticString return i; } } + if (i == 0) + { + break; + } } return npos; } diff --git a/src/core/src/capsaicin/thread_pool.cpp b/src/core/src/capsaicin/thread_pool.cpp index 5184ef3..4ad0cfc 100644 --- a/src/core/src/capsaicin/thread_pool.cpp +++ b/src/core/src/capsaicin/thread_pool.cpp @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/src/core/src/capsaicin/thread_pool.h b/src/core/src/capsaicin/thread_pool.h index dc524af..f5862ff 100644 --- a/src/core/src/capsaicin/thread_pool.h +++ b/src/core/src/capsaicin/thread_pool.h @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/src/core/src/capsaicin/timeable.cpp b/src/core/src/capsaicin/timeable.cpp index 2b933a6..7f62dcb 100644 --- a/src/core/src/capsaicin/timeable.cpp +++ b/src/core/src/capsaicin/timeable.cpp @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -61,9 +61,9 @@ uint32_t Timeable::getTimestampQueryCount() const noexcept return queryCount; } -TimestampQuery const *Timeable::getTimestampQueries() const noexcept +std::vector const &Timeable::getTimestampQueries() const noexcept { - return queries.data(); + return queries; } void Timeable::resetQueries() noexcept diff --git a/src/core/src/capsaicin/timeable.h b/src/core/src/capsaicin/timeable.h index 92c4a30..b5d4a06 100644 --- a/src/core/src/capsaicin/timeable.h +++ b/src/core/src/capsaicin/timeable.h @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -68,7 +68,7 @@ class Timeable * Gets timestamp queries. * @returns The timestamp queries. */ - virtual TimestampQuery const *getTimestampQueries() const noexcept; + virtual std::vector const &getTimestampQueries() const noexcept; /** Resets the timed section queries */ virtual void resetQueries() noexcept; @@ -89,6 +89,6 @@ class Timeable std::vector queries; /**< The array of timestamp queries. */ uint32_t queryCount = 0; /**< The number of timestamp queries. */ GfxContext gfx_; /**< The rendering context to be used. */ - std::string_view name_; /**< The name of the render technique. */ + std::string_view name_; /**< The name of the timeable. */ }; } // namespace Capsaicin diff --git a/src/core/src/components/blue_noise_sampler/blue_noise_sampler.cpp b/src/core/src/components/blue_noise_sampler/blue_noise_sampler.cpp index 656208f..20763e0 100644 --- a/src/core/src/components/blue_noise_sampler/blue_noise_sampler.cpp +++ b/src/core/src/components/blue_noise_sampler/blue_noise_sampler.cpp @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -27,14 +27,18 @@ THE SOFTWARE. namespace Capsaicin { +/** Constructor. */ + +inline BlueNoiseSampler::BlueNoiseSampler() noexcept + : Component(Name) +{} + BlueNoiseSampler::~BlueNoiseSampler() noexcept { - gfxDestroyBuffer(gfx_, sobolBuffer); - gfxDestroyBuffer(gfx_, rankingTileBuffer); - gfxDestroyBuffer(gfx_, scramblingTileBuffer); + terminate(); } -bool BlueNoiseSampler::init(CapsaicinInternal const &capsaicin) noexcept +bool BlueNoiseSampler::init([[maybe_unused]] CapsaicinInternal const &capsaicin) noexcept { sobolBuffer = gfxCreateBuffer(gfx_, sizeof(Sobol256x256), Sobol256x256); rankingTileBuffer = gfxCreateBuffer(gfx_, sizeof(RankingTiles), RankingTiles); @@ -42,13 +46,20 @@ bool BlueNoiseSampler::init(CapsaicinInternal const &capsaicin) noexcept return true; } -void BlueNoiseSampler::run(CapsaicinInternal &capsaicin) noexcept +void BlueNoiseSampler::run([[maybe_unused]] CapsaicinInternal &capsaicin) noexcept { // Nothing to do } +void BlueNoiseSampler::terminate() noexcept +{ + gfxDestroyBuffer(gfx_, sobolBuffer); + gfxDestroyBuffer(gfx_, rankingTileBuffer); + gfxDestroyBuffer(gfx_, scramblingTileBuffer); +} + void BlueNoiseSampler::addProgramParameters( - CapsaicinInternal const &capsaicin, GfxProgram program) const noexcept + [[maybe_unused]] CapsaicinInternal const &capsaicin, GfxProgram program) const noexcept { gfxProgramSetParameter(gfx_, program, "g_SobolBuffer", sobolBuffer); gfxProgramSetParameter(gfx_, program, "g_RankingTile", rankingTileBuffer); diff --git a/src/core/src/components/blue_noise_sampler/blue_noise_sampler.h b/src/core/src/components/blue_noise_sampler/blue_noise_sampler.h index 9e41fd0..38565c7 100644 --- a/src/core/src/components/blue_noise_sampler/blue_noise_sampler.h +++ b/src/core/src/components/blue_noise_sampler/blue_noise_sampler.h @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -25,18 +25,20 @@ THE SOFTWARE. namespace Capsaicin { -class BlueNoiseSampler : public Component::RegistrarName +class BlueNoiseSampler + : public Component + , public ComponentFactory::Registrar { public: static constexpr std::string_view Name = "BlueNoiseSampler"; - /** Constructor. */ - BlueNoiseSampler() noexcept {} - BlueNoiseSampler(BlueNoiseSampler const &) noexcept = delete; BlueNoiseSampler(BlueNoiseSampler &&) noexcept = default; + /** Constructor. */ + BlueNoiseSampler() noexcept; + /** Destructor. */ virtual ~BlueNoiseSampler() noexcept; @@ -55,6 +57,11 @@ class BlueNoiseSampler : public Component::RegistrarName */ void run(CapsaicinInternal &capsaicin) noexcept override; + /** + * Destroy any used internal resources and shutdown. + */ + void terminate() noexcept override; + /** * Add the required program parameters to a shader based on current settings. * @param capsaicin Current framework context. diff --git a/src/core/src/components/blue_noise_sampler/blue_noise_sampler.hlsl b/src/core/src/components/blue_noise_sampler/blue_noise_sampler.hlsl index 15f3fe3..ed1ab78 100644 --- a/src/core/src/components/blue_noise_sampler/blue_noise_sampler.hlsl +++ b/src/core/src/components/blue_noise_sampler/blue_noise_sampler.hlsl @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -52,6 +52,18 @@ float samplerBlueNoiseErrorDistribution_128x128_OptimizedFor_2d2d2d2d_1spp(in in return (0.5f + value) / 256.0f; } +float BlueNoise_Sample1D(in uint2 pixel, in uint sample_index, in uint dimension_offset) +{ + // https://blog.demofox.org/2017/10/31/animating-noise-for-integration-over-time/ + float s = samplerBlueNoiseErrorDistribution_128x128_OptimizedFor_2d2d2d2d_1spp(pixel.x, pixel.y, 0, dimension_offset); + + return fmod(s + (sample_index & 255) * GOLDEN_RATIO, 1.0f); +} + +float BlueNoise_Sample1D(in uint2 pixel, in uint sample_index) +{ + return BlueNoise_Sample1D(pixel, sample_index, 0); +} float2 BlueNoise_Sample2D(in uint2 pixel, in uint sample_index, in uint dimension_offset) { @@ -67,4 +79,19 @@ float2 BlueNoise_Sample2D(in uint2 pixel, in uint sample_index) return BlueNoise_Sample2D(pixel, sample_index, 0); } +float3 BlueNoise_Sample3D(in uint2 pixel, in uint sample_index, in uint dimension_offset) +{ + // https://blog.demofox.org/2017/10/31/animating-noise-for-integration-over-time/ + float3 s = float3(samplerBlueNoiseErrorDistribution_128x128_OptimizedFor_2d2d2d2d_1spp(pixel.x, pixel.y, 0, dimension_offset + 0), + samplerBlueNoiseErrorDistribution_128x128_OptimizedFor_2d2d2d2d_1spp(pixel.x, pixel.y, 0, dimension_offset + 1), + samplerBlueNoiseErrorDistribution_128x128_OptimizedFor_2d2d2d2d_1spp(pixel.x, pixel.y, 0, dimension_offset + 2)); + + return fmod(s + (sample_index & 255) * GOLDEN_RATIO, 1.0f); +} + +float3 BlueNoise_Sample3D(in uint2 pixel, in uint sample_index) +{ + return BlueNoise_Sample3D(pixel, sample_index, 0); +} + #endif // BLUE_NOISE_SAMPLER_HLSL diff --git a/src/core/src/components/brdf_lut/brdf_lut.comp b/src/core/src/components/brdf_lut/brdf_lut.comp new file mode 100644 index 0000000..daa5492 --- /dev/null +++ b/src/core/src/components/brdf_lut/brdf_lut.comp @@ -0,0 +1,86 @@ +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ + +#ifndef BRDF_LUT_HLSL +#define BRDF_LUT_HLSL + +// NOTE: these are not used, but need for shader compilation +uint g_FrameIndex; +Texture2D g_TextureMaps[] : register(space99); +SamplerState g_TextureSampler; + +#include "../../materials/material_sampling.hlsl" + +uint g_LutSize; +RWTexture2D g_LutBuffer; +uint g_SampleSize; + +// Returns position of i-th element in 2D Hammersley Point Set of N elements +float2 Hammersley2D(uint i, uint N) +{ + // Radical inverse based on http://holger.dammertz.org/stuff/notes_HammersleyOnHemisphere.html + uint bits = (i << 16u) | (i >> 16u); + bits = ((bits & 0x55555555u) << 1u) | ((bits & 0xAAAAAAAAu) >> 1u); + bits = ((bits & 0x33333333u) << 2u) | ((bits & 0xCCCCCCCCu) >> 2u); + bits = ((bits & 0x0F0F0F0Fu) << 4u) | ((bits & 0xF0F0F0F0u) >> 4u); + bits = ((bits & 0x00FF00FFu) << 8u) | ((bits & 0xFF00FF00u) >> 8u); + float rdi = float(bits) * 2.3283064365386963e-10; + return float2(float(i) / float(N), rdi); +} + +[numthreads(8, 8, 1)] +void ComputeBrdfLut(uint2 did : SV_DispatchThreadID) +{ + if (any(did >= g_LutSize)) + { + return; + } + + float2 uv = (did + 0.5f) / g_LutSize; + + float dotNV = uv.x; + float roughness = uv.y; + float alpha = roughness * roughness; + float3 wo = float3(sqrt(1.0f - dotNV * dotNV), 0.0f, dotNV); + + float2 lut_value = 0.0f; + for (uint i = 0; i < g_SampleSize; ++i) + { + float2 xi = Hammersley2D(i, g_SampleSize); + float3 wi = sampleGGX(alpha, wo, xi); + + float3 h = normalize(wo + wi); + + float dotHV = saturate(dot(h, wo)); + float dotNH = clamp(h.z, -1.0f, 1.0f); + float dotNL = clamp(wi.z, -1.0f, 1.0f); + float3 F; + float3 FGD = evaluateGGX(alpha, alpha * alpha, 0.0f, dotHV, dotNH, dotNL, dotNV, F); + float3 GD = FGD / F; + + float pdf = sampleGGXPDF(alpha * alpha, dotNH, dotNV, wo); + + lut_value += float2(GD.x, FGD.x) * saturate(dotNL) / pdf; // saturate(dotNL) = abs(dotNL) * Heaviside function for the upper hemisphere. + } + lut_value /= float(g_SampleSize); + + g_LutBuffer[did] = lut_value; +} + +#endif diff --git a/src/core/src/components/brdf_lut/brdf_lut.cpp b/src/core/src/components/brdf_lut/brdf_lut.cpp new file mode 100644 index 0000000..2ecbd57 --- /dev/null +++ b/src/core/src/components/brdf_lut/brdf_lut.cpp @@ -0,0 +1,81 @@ +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ + +#include "brdf_lut.h" + +#include "capsaicin_internal.h" + +namespace Capsaicin +{ +inline BrdfLut::BrdfLut() noexcept + : Component(Name) +{} + +BrdfLut::~BrdfLut() noexcept +{ + terminate(); +} + +bool BrdfLut::init(CapsaicinInternal const &capsaicin) noexcept +{ + brdf_lut_buffer_ = gfxCreateTexture2D(gfx_, brdf_lut_size_, brdf_lut_size_, DXGI_FORMAT_R16G16_FLOAT); + brdf_lut_buffer_.setName("Capsaicin_BrdfLut_LutBuffer"); + + GfxProgram const brdf_lut_program = + gfxCreateProgram(gfx_, "components/brdf_lut/brdf_lut", capsaicin.getShaderPath()); + GfxKernel const brdf_lut_kernel = gfxCreateComputeKernel(gfx_, brdf_lut_program, "ComputeBrdfLut"); + + gfxProgramSetParameter(gfx_, brdf_lut_program, "g_LutBuffer", brdf_lut_buffer_); + gfxProgramSetParameter(gfx_, brdf_lut_program, "g_LutSize", brdf_lut_size_); + gfxProgramSetParameter(gfx_, brdf_lut_program, "g_SampleSize", brdf_lut_sample_size_); + + // Compute BRDF LUT once in initialization + uint32_t const *num_threads = gfxKernelGetNumThreads(gfx_, brdf_lut_kernel); + uint32_t const num_groups_x = (brdf_lut_size_ + num_threads[0] - 1) / num_threads[0]; + uint32_t const num_groups_y = (brdf_lut_size_ + num_threads[1] - 1) / num_threads[1]; + gfxCommandBindKernel(gfx_, brdf_lut_kernel); + gfxCommandDispatch(gfx_, num_groups_x, num_groups_y, 1); + + gfxDestroyKernel(gfx_, brdf_lut_kernel); + gfxDestroyProgram(gfx_, brdf_lut_program); + + return true; +} + +void BrdfLut::run([[maybe_unused]] CapsaicinInternal &capsaicin) noexcept +{ + // Nothing to do +} + +void BrdfLut::terminate() noexcept +{ + gfxDestroyTexture(gfx_, brdf_lut_buffer_); +} + +void BrdfLut::addProgramParameters( + [[maybe_unused]] CapsaicinInternal const &capsaicin, GfxProgram program) const noexcept +{ + gfxProgramSetParameter(gfx_, program, "g_LutBuffer", brdf_lut_buffer_); + gfxProgramSetParameter(gfx_, program, "g_LutSize", brdf_lut_size_); +} + +} // namespace Capsaicin diff --git a/src/core/src/components/brdf_lut/brdf_lut.h b/src/core/src/components/brdf_lut/brdf_lut.h new file mode 100644 index 0000000..761a051 --- /dev/null +++ b/src/core/src/components/brdf_lut/brdf_lut.h @@ -0,0 +1,75 @@ +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ +#pragma once + +#include "components/component.h" + +namespace Capsaicin +{ + +class BrdfLut + : public Component + , public ComponentFactory::Registrar +{ +public: + static constexpr std::string_view Name = "BrdfLut"; + + /** Constructor. */ + BrdfLut() noexcept; + + ~BrdfLut() noexcept; + + /** + * Initialise any internal data or state. + * @note This is automatically called by the framework after construction and should be used to create + * any required CPU|GPU resources. + * @param capsaicin Current framework context. + * @return True if initialisation succeeded, False otherwise. + */ + bool init(CapsaicinInternal const &capsaicin) noexcept override; + + /** + * Run internal operations. + * @param [in,out] capsaicin Current framework context. + */ + void run(CapsaicinInternal &capsaicin) noexcept override; + + /** + * Destroy any used internal resources and shutdown. + */ + void terminate() noexcept override; + + /** + * Add the required program parameters to a shader based on current settings. + * @param capsaicin Current framework context. + * @param program The shader program to bind parameters to. + */ + void addProgramParameters(CapsaicinInternal const &capsaicin, GfxProgram program) const noexcept; + +private: + GfxTexture brdf_lut_buffer_; + + uint32_t brdf_lut_size_ = 32; + uint32_t brdf_lut_sample_size_ = 4096; +}; + +} // namespace Capsaicin diff --git a/src/core/src/components/component.cpp b/src/core/src/components/component.cpp index b938ad7..0cd80fc 100644 --- a/src/core/src/components/component.cpp +++ b/src/core/src/components/component.cpp @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -24,7 +24,7 @@ THE SOFTWARE. namespace Capsaicin { -Component::Component(Key, std::string_view const &name) noexcept +Component::Component(std::string_view const &name) noexcept : Timeable(name) {} @@ -42,4 +42,6 @@ BufferList Component::getBuffers() const noexcept { return {}; } + +void Component::renderGUI([[maybe_unused]] CapsaicinInternal &capsaicin) const noexcept {} } // namespace Capsaicin diff --git a/src/core/src/components/component.h b/src/core/src/components/component.h index b991914..4958062 100644 --- a/src/core/src/components/component.h +++ b/src/core/src/components/component.h @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -28,18 +28,16 @@ THE SOFTWARE. namespace Capsaicin { class CapsaicinInternal; -class RenderTechnique; /** A abstract component class used to encapsulate shared operations between render techniques. */ -class Component - : public Factory - , public Timeable +class Component : public Timeable { Component(Component const &) = delete; Component &operator=(Component const &) = delete; + Component() = delete; public: - Component(Key, std::string_view const &name) noexcept; + Component(std::string_view const &name) noexcept; virtual ~Component() = default; @@ -82,6 +80,20 @@ class Component */ virtual void run(CapsaicinInternal &capsaicin) noexcept = 0; + /** + * Destroy any used internal resources and shutdown. + */ + virtual void terminate() noexcept = 0; + + /** + * Render GUI options. + * @param [in,out] capsaicin The current capsaicin context. + */ + virtual void renderGUI(CapsaicinInternal &capsaicin) const noexcept; + protected: }; + +class ComponentFactory : public Factory +{}; } // namespace Capsaicin diff --git a/src/core/src/components/light_sampler/gather_area_lights.geom b/src/core/src/components/light_builder/gather_area_lights.geom similarity index 51% rename from src/core/src/components/light_sampler/gather_area_lights.geom rename to src/core/src/components/light_builder/gather_area_lights.geom index a9e9081..7ff73a0 100644 --- a/src/core/src/components/light_sampler/gather_area_lights.geom +++ b/src/core/src/components/light_builder/gather_area_lights.geom @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -21,30 +21,31 @@ THE SOFTWARE. ********************************************************************/ #include "../../lights/lights_shared.h" -#include "../../math/geometry.hlsl" +#include "../../geometry/geometry.hlsl" #include "../../math/pack.hlsl" RWStructuredBuffer g_LightBuffer; -RWStructuredBuffer g_LightBufferSize; +RWStructuredBuffer g_LightBufferSize; +RWStructuredBuffer g_LightInstanceBuffer; +RWStructuredBuffer g_LightInstancePrimitiveBuffer; StructuredBuffer g_MaterialBuffer; -Texture2D g_TextureMaps[] : register(space99); -SamplerState g_LinearSampler; +Texture2D g_TextureMaps[] : register(space99); +SamplerState g_TextureSampler; +uint g_LightCount; struct Params { - float4 position : SV_Position; - float2 uv : TEXCOORD; - uint materialID : MATERIAL_ID; + float4 position : SV_Position; + float2 uv : TEXCOORD; + uint instanceID : INSTANCE_ID; + uint materialID : MATERIAL_ID; }; -[maxvertexcount(3)] -void main(in triangle Params params[3], in uint primitiveID : SV_PrimitiveID, inout TriangleStream triangle_stream) +bool CheckIsEmissive(in uint material_index, in float2 uv0, in float2 uv1, in float2 uv2, out float4 radiance) { - uint material_index = params[0].materialID; - // Get any light maps - float4 radiance = g_MaterialBuffer[material_index].emissivity; + radiance = g_MaterialBuffer[material_index].emissivity; uint tex = asuint(radiance.w); float3 emissivity = radiance.xyz; if (tex != uint(-1)) @@ -54,30 +55,67 @@ void main(in triangle Params params[3], in uint primitiveID : SV_PrimitiveID, in g_TextureMaps[NonUniformResourceIndex(tex)].GetDimensions(size.x, size.y); // Approximate ray cone projection (ray tracing gems chapter 20) - float2 edgeUV0 = params[1].uv - params[0].uv; - float2 edgeUV1 = params[2].uv - params[0].uv; + float2 edgeUV0 = uv1 - uv0; + float2 edgeUV1 = uv2 - uv0; float area_uv = size.x * size.y * abs(edgeUV0.x * edgeUV1.y - edgeUV1.x * edgeUV0.y); float offset = 0.5f * log2(area_uv); // Calculate texture LOD based on projected area - float2 uv = interpolate(params[0].uv, params[1].uv, params[2].uv, (1.0f / 3.0f).xx); - emissivity *= g_TextureMaps[NonUniformResourceIndex(tex)].SampleLevel(g_LinearSampler, uv, offset).xyz; + float2 uv = interpolate(uv0, uv1, uv2, (1.0f / 3.0f).xx); + emissivity *= g_TextureMaps[NonUniformResourceIndex(tex)].SampleLevel(g_TextureSampler, uv, offset).xyz; } - bool is_emissive = any(emissivity > 0.0f); + return any(emissivity > 0.0f); +} + +[maxvertexcount(3)] +void CountAreaLights(in triangle Params params[3], in uint primitiveID : SV_PrimitiveID, inout TriangleStream triangle_stream) +{ + uint instance_index = params[0].instanceID; + uint material_index = params[0].materialID; + + // Check whether the primitive is emissive + float4 radiance; + bool is_emissive = + CheckIsEmissive(material_index, params[0].uv, params[1].uv, params[2].uv, radiance); + // Compute number of items to append for the whole wave - uint lane_append_offset = WavePrefixCountBits(is_emissive); uint append_count = WaveActiveCountBits(is_emissive); - // Update the output location for this whole wave - uint append_offset; if (WaveIsFirstLane()) { - // this way, we only issue one atomic for the entire wave, which reduces contention - // and keeps the output data for each lane in this wave together in the output buffer - InterlockedAdd(g_LightBufferSize[0], append_count, append_offset); + // This way, we only issue one atomic for the entire wave, + // which reduces contention + InterlockedAdd(g_LightBufferSize[0], append_count); } - append_offset = WaveReadLaneFirst(append_offset); // broadcast value - append_offset += lane_append_offset; // and add in the offset for this lane + + // Write out the number of emissive primitives + uint idx = primitiveID + g_LightInstanceBuffer[instance_index]; + g_LightInstancePrimitiveBuffer[idx] = (is_emissive ? 1 : 0); + + // This is useless since there's no pixel shader nor render target, + // but required by DXC so the shader compiles + if (is_emissive) + { + triangle_stream.Append(params[0]); + triangle_stream.Append(params[1]); + triangle_stream.Append(params[2]); + + triangle_stream.RestartStrip(); + } +} + +[maxvertexcount(3)] +void ScatterAreaLights(in triangle Params params[3], in uint primitiveID : SV_PrimitiveID, inout TriangleStream triangle_stream) +{ + uint instance_index = params[0].instanceID; + uint material_index = params[0].materialID; + + // Check whether the primitive is emissive + float4 radiance; + bool is_emissive = + CheckIsEmissive(material_index, params[0].uv, params[1].uv, params[2].uv, radiance); + + // Write the area light to memory if needed if (is_emissive) { Light light; @@ -86,6 +124,8 @@ void main(in triangle Params params[3], in uint primitiveID : SV_PrimitiveID, in light.v2 = float4(params[1].position.xyz, packUVs(params[1].uv)); light.v3 = float4(params[2].position.xyz, packUVs(params[2].uv)); + uint idx = primitiveID + g_LightInstanceBuffer[instance_index]; + uint append_offset = (g_LightInstancePrimitiveBuffer[idx] += g_LightCount); g_LightBuffer[append_offset] = light; // write to the offset location for this lane triangle_stream.Append(params[0]); diff --git a/src/core/src/components/light_sampler/gather_area_lights.vert b/src/core/src/components/light_builder/gather_area_lights.vert similarity index 64% rename from src/core/src/components/light_sampler/gather_area_lights.vert rename to src/core/src/components/light_builder/gather_area_lights.vert index 20f505e..e8bc3af 100644 --- a/src/core/src/components/light_sampler/gather_area_lights.vert +++ b/src/core/src/components/light_builder/gather_area_lights.vert @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -21,33 +21,45 @@ THE SOFTWARE. ********************************************************************/ #include "../../lights/lights_shared.h" +#include "../../math/transform.hlsl" StructuredBuffer g_MeshBuffer; StructuredBuffer g_InstanceBuffer; -StructuredBuffer g_TransformBuffer; +StructuredBuffer g_TransformBuffer; StructuredBuffer g_InstanceIDBuffer; struct Params { - float4 position : SV_Position; - float2 uv : TEXCOORD; - uint materialID : MATERIAL_ID; + float4 position : SV_Position; + float2 uv : TEXCOORD; + uint instanceID : INSTANCE_ID; + uint materialID : MATERIAL_ID; }; -Params main(in Vertex vertex, in uint drawID : gfx_DrawID) +Params VS(in Vertex vertex, in uint drawID) { uint instanceID = g_InstanceIDBuffer[drawID]; Instance instance = g_InstanceBuffer[instanceID]; - Mesh mesh = g_MeshBuffer[instance.mesh_index]; - float4x4 transform = g_TransformBuffer[instance.transform_index]; - float3 position = mul(transform, float4(vertex.position.xyz, 1.0f)).xyz; + float3x4 transform = g_TransformBuffer[instance.transform_index]; + float3 position = transformPoint(vertex.position.xyz, transform); Params params; - params.position = float4(position, 1.0f); - params.uv = vertex.uv.xy; - params.materialID = mesh.material_index; + params.position = float4(position, 1.0f); + params.uv = vertex.uv.xy; + params.instanceID = instanceID; + params.materialID = instance.material_index; return params; } + +Params CountAreaLights(in Vertex vertex, in uint drawID : gfx_DrawID) +{ + return VS(vertex, drawID); +} + +Params ScatterAreaLights(in Vertex vertex, in uint drawID : gfx_DrawID) +{ + return VS(vertex, drawID); +} diff --git a/src/core/src/components/light_sampler/light_sampler.cpp b/src/core/src/components/light_builder/light_builder.cpp similarity index 58% rename from src/core/src/components/light_sampler/light_sampler.cpp rename to src/core/src/components/light_builder/light_builder.cpp index 33bd50e..aa32dee 100644 --- a/src/core/src/components/light_sampler/light_sampler.cpp +++ b/src/core/src/components/light_builder/light_builder.cpp @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -20,7 +20,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ********************************************************************/ -#include "light_sampler.h" +#include "light_builder.h" #include "capsaicin_internal.h" #include "hash_reduce.h" @@ -28,20 +28,16 @@ THE SOFTWARE. namespace Capsaicin { -LightSampler::~LightSampler() noexcept -{ - gfxDestroyBuffer(gfx_, lightBuffer); - gfxDestroyBuffer(gfx_, lightCountBuffer); - for (auto &i : lightCountBufferTemp) - { - gfxDestroyBuffer(gfx_, i.second); - } +LightBuilder::LightBuilder() noexcept + : Component(Name) +{} - gfxDestroyKernel(gfx_, gatherAreaLightsKernel); - gfxDestroyProgram(gfx_, gatherAreaLightsProgram); +LightBuilder::~LightBuilder() noexcept +{ + terminate(); } -RenderOptionList LightSampler::getRenderOptions() noexcept +RenderOptionList LightBuilder::getRenderOptions() noexcept { RenderOptionList newOptions; newOptions.emplace(RENDER_OPTION_MAKE(delta_light_enable, options)); @@ -50,20 +46,21 @@ RenderOptionList LightSampler::getRenderOptions() noexcept return newOptions; } -LightSampler::RenderOptions LightSampler::convertOptions(RenderSettings const &settings) noexcept +LightBuilder::RenderOptions LightBuilder::convertOptions(RenderOptionList const &options) noexcept { RenderOptions newOptions; - RENDER_OPTION_GET(delta_light_enable, newOptions, settings.options_) - RENDER_OPTION_GET(area_light_enable, newOptions, settings.options_) - RENDER_OPTION_GET(environment_light_enable, newOptions, settings.options_) + RENDER_OPTION_GET(delta_light_enable, newOptions, options) + RENDER_OPTION_GET(area_light_enable, newOptions, options) + RENDER_OPTION_GET(environment_light_enable, newOptions, options) return newOptions; } -bool LightSampler::init(CapsaicinInternal const &capsaicin) noexcept +bool LightBuilder::init(CapsaicinInternal const &capsaicin) noexcept { gatherAreaLightsProgram = - gfxCreateProgram(gfx_, "components/light_sampler/gather_area_lights", capsaicin.getShaderPath()); - gatherAreaLightsKernel = gfxCreateGraphicsKernel(gfx_, gatherAreaLightsProgram); + gfxCreateProgram(gfx_, "components/light_builder/gather_area_lights", capsaicin.getShaderPath()); + countAreaLightsKernel = gfxCreateGraphicsKernel(gfx_, gatherAreaLightsProgram, "CountAreaLights"); + scatterAreaLightsKernel = gfxCreateGraphicsKernel(gfx_, gatherAreaLightsProgram, "ScatterAreaLights"); lightCountBuffer = gfxCreateBuffer(gfx_, 1); lightCountBuffer.setName("LightCountBuffer"); @@ -76,70 +73,77 @@ bool LightSampler::init(CapsaicinInternal const &capsaicin) noexcept std::string name = "AreaLightCountCopyBuffer"; name += std::to_string(i); buffer.setName(name.c_str()); - lightCountBufferTemp.emplace_back(0, buffer); + lightCountBufferTemp.emplace_back(false, buffer); } + lightHash = 0; return !!gatherAreaLightsProgram; } -void LightSampler::run(CapsaicinInternal &capsaicin) noexcept +void LightBuilder::run(CapsaicinInternal &capsaicin) noexcept { - auto const optionsNew = convertOptions(capsaicin.getRenderSettings()); - auto scene = capsaicin.getScene(); + auto optionsNew = convertOptions(capsaicin.getOptions()); + auto scene = capsaicin.getScene(); // Check if meshes were updated - uint32_t oldAreaLightTotal = areaLightTotal; - if (capsaicin.getMeshesUpdated()) + std::vector lightInstancePrimitiveCount; + if (capsaicin.getMeshesUpdated() || capsaicin.getFrameIndex() == 0) { - areaLightTotal = 0; - GfxMesh const *meshes = gfxSceneGetObjects(scene); - uint32_t const meshCount = gfxSceneGetObjectCount(scene); - - for (uint32_t i = 0; i < meshCount; ++i) + areaLightTotal = 0; + lightInstancePrimitiveCount.resize(gfxSceneGetObjectCount(scene)); + for (uint32_t i = 0; i < gfxSceneGetObjectCount(scene); ++i) { - if (meshes[i].material && gfxMaterialIsEmissive(*meshes[i].material)) + auto const &instance = gfxSceneGetObjects(scene)[i]; + if (instance.mesh && instance.material && gfxMaterialIsEmissive(*instance.material)) { - areaLightTotal += (uint32_t)meshes[i].indices.size() / 3; + lightInstancePrimitiveCount[i] = areaLightTotal; + areaLightTotal += (uint32_t)instance.mesh->indices.size() / 3; } } } // Check whether we need to update lighting structures size_t oldLightHash = lightHash; - if (capsaicin.getFrameIndex() == 0 || capsaicin.getAnimate()) + if (!capsaicin.getPaused() || capsaicin.getFrameIndex() == 0) lightHash = Capsaicin::HashReduce( gfxSceneGetObjects(scene), gfxSceneGetObjectCount(scene)); // Get last valid area light count value const uint32_t bufferIndex = gfxGetBackBufferIndex(gfx_); - if (lightCountBufferTemp[bufferIndex].first != 0) + if (lightCountBufferTemp[bufferIndex].first) { areaLightCount = *gfxBufferGetData(gfx_, lightCountBufferTemp[bufferIndex].second); + areaLightCount -= deltaLightCount + environmentMapCount; + lightCountBufferTemp[bufferIndex].first = false; } - auto environmentMap = capsaicin.getEnvironmentBuffer(); - lightsUpdated = false; + auto environmentMap = capsaicin.getEnvironmentBuffer(); uint32_t oldDeltaLightCount = deltaLightCount; deltaLightCount = (optionsNew.delta_light_enable) ? gfxSceneGetObjectCount(scene) : 0; uint32_t oldAreaLightMaxCount = areaLightMaxCount; areaLightMaxCount = (optionsNew.area_light_enable) ? areaLightTotal : 0; - lightSettingChanged = - ((options.delta_light_enable != optionsNew.delta_light_enable) - && (deltaLightCount != oldDeltaLightCount)) - || (options.area_light_enable != optionsNew.area_light_enable - && areaLightMaxCount != oldAreaLightMaxCount) - || (options.environment_light_enable != optionsNew.environment_light_enable && !!environmentMap); - options.delta_light_enable = optionsNew.delta_light_enable && deltaLightCount > 0; - options.area_light_enable = optionsNew.area_light_enable && areaLightMaxCount > 0; - options.environment_light_enable = optionsNew.environment_light_enable && !!environmentMap; + + // Disable lights that are not found in scene + /*optionsNew.delta_light_enable = optionsNew.delta_light_enable && deltaLightCount > 0; + optionsNew.area_light_enable = optionsNew.area_light_enable && areaLightMaxCount > 0; + optionsNew.environment_light_enable = optionsNew.environment_light_enable && !!environmentMap;*/ + + lightsUpdated = false; + lightBufferIndex = (1 - lightBufferIndex); + lightSettingChanged = options.delta_light_enable != optionsNew.delta_light_enable + || options.area_light_enable != optionsNew.area_light_enable + || options.environment_light_enable != optionsNew.environment_light_enable; + options = optionsNew; if (oldLightHash != lightHash || (capsaicin.getEnvironmentMapUpdated() && options.environment_light_enable) - || (oldAreaLightMaxCount != areaLightMaxCount) || lightSettingChanged + || (oldAreaLightMaxCount != areaLightMaxCount) || (oldDeltaLightCount != deltaLightCount) + || lightSettingChanged || (areaLightMaxCount > 0 && (capsaicin.getMeshesUpdated() || capsaicin.getTransformsUpdated()))) { lightsUpdated = true; // Update lights + uint32_t lightCount; { TimedSection const timedSection(*this, "UpdateLights"); @@ -147,10 +151,12 @@ void LightSampler::run(CapsaicinInternal &capsaicin) noexcept // Add the environment map to the light list // Note: other parts require that the environment map is always first in the list + environmentMapCount = 0; if (!!environmentMap && options.environment_light_enable) { Light light = MakeEnvironmentLight(environmentMap.getWidth(), environmentMap.getHeight()); allLightData.push_back(light); + environmentMapCount = 1; } // Add delta lights to the list @@ -189,23 +195,28 @@ void LightSampler::run(CapsaicinInternal &capsaicin) noexcept } const uint32_t numLights = areaLightMaxCount + (uint32_t)allLightData.size(); - if (lightBuffer.getCount() < numLights) - { - // Create light buffer - gfxDestroyBuffer(gfx_, lightBuffer); - lightBuffer = gfxCreateBuffer(gfx_, numLights); - lightBuffer.setName("Capsaicin_AllLightBuffer"); - } + if (lightBuffers->getCount() < numLights) + for (uint32_t i = 0; i < ARRAYSIZE(lightBuffers); ++i) + { + char buffer[64]; + GFX_SNPRINTF(buffer, sizeof(buffer), "Capsaicin_AllLightBuffer%u", i); + + // Create light buffer + gfxDestroyBuffer(gfx_, lightBuffers[i]); + + lightBuffers[i] = gfxCreateBuffer(gfx_, numLights); + lightBuffers[i].setName(buffer); + } if (!allLightData.empty()) { // Copy delta lights to start of buffer (after any environment maps) GfxBuffer const upload_buffer = gfxCreateBuffer( gfx_, (uint32_t)allLightData.size(), allLightData.data(), kGfxCpuAccess_Write); gfxCommandCopyBuffer( - gfx_, lightBuffer, 0, upload_buffer, 0, allLightData.size() * sizeof(Light)); + gfx_, lightBuffers[lightBufferIndex], 0, upload_buffer, 0, allLightData.size() * sizeof(Light)); gfxDestroyBuffer(gfx_, upload_buffer); } - uint32_t lightCount = (uint32_t)allLightData.size(); + lightCount = (uint32_t)allLightData.size(); gfxCommandClearBuffer(gfx_, lightCountBuffer, lightCount); } @@ -225,12 +236,29 @@ void LightSampler::run(CapsaicinInternal &capsaicin) noexcept D3D12_DRAW_INDEXED_ARGUMENTS *drawCommands = (D3D12_DRAW_INDEXED_ARGUMENTS *)gfxBufferGetData(gfx_, drawCommandBuffer); + if (!lightInstancePrimitiveCount.empty()) + { + // Create light mesh buffer + gfxDestroyBuffer(gfx_, lightInstanceBuffer); + lightInstanceBuffer = + gfxCreateBuffer(gfx_, static_cast(lightInstancePrimitiveCount.size()), + lightInstancePrimitiveCount.data()); + lightInstanceBuffer.setName("Capsaicin_LightInstanceBuffer"); + } + if (lightInstancePrimitiveBuffer.getCount() < areaLightMaxCount) + { + // Create light mesh primitive buffer + gfxDestroyBuffer(gfx_, lightInstancePrimitiveBuffer); + lightInstancePrimitiveBuffer = gfxCreateBuffer(gfx_, areaLightMaxCount); + lightInstancePrimitiveBuffer.setName("Capsaicin_LightInstancePrimitiveBuffer"); + } + for (uint32_t i = 0; i < instanceCount; ++i) { GfxConstRef instanceRef = gfxSceneGetObjectHandle(scene, i); - if (!instanceRef->mesh || !instanceRef->mesh->material - || !gfxMaterialIsEmissive(*instanceRef->mesh->material)) + if (!instanceRef->mesh || !instanceRef->material + || !gfxMaterialIsEmissive(*instanceRef->material)) { continue; // not an emissive primitive } @@ -242,17 +270,20 @@ void LightSampler::run(CapsaicinInternal &capsaicin) noexcept drawCommands[drawCommandIndex].IndexCountPerInstance = mesh.index_count; drawCommands[drawCommandIndex].InstanceCount = 1; - drawCommands[drawCommandIndex].StartIndexLocation = mesh.index_offset / mesh.index_stride; - drawCommands[drawCommandIndex].BaseVertexLocation = mesh.vertex_offset / mesh.vertex_stride; + drawCommands[drawCommandIndex].StartIndexLocation = mesh.index_offset_idx; + drawCommands[drawCommandIndex].BaseVertexLocation = mesh.vertex_offset_idx; drawCommands[drawCommandIndex].StartInstanceLocation = drawCommandIndex; instanceIDData[drawCommandIndex] = instanceIndex; } - gfxProgramSetParameter(gfx_, gatherAreaLightsProgram, "g_LightBuffer", lightBuffer); + gfxProgramSetParameter(gfx_, gatherAreaLightsProgram, "g_LightBuffer", lightBuffers[lightBufferIndex]); gfxProgramSetParameter(gfx_, gatherAreaLightsProgram, "g_LightBufferSize", lightCountBuffer); + gfxProgramSetParameter( + gfx_, gatherAreaLightsProgram, "g_LightInstanceBuffer", lightInstanceBuffer); + gfxProgramSetParameter(gfx_, gatherAreaLightsProgram, "g_LightInstancePrimitiveBuffer", + lightInstancePrimitiveBuffer); - gfxProgramSetParameter(gfx_, gatherAreaLightsProgram, "g_MeshBuffer", capsaicin.getMeshBuffer()); gfxProgramSetParameter( gfx_, gatherAreaLightsProgram, "g_InstanceBuffer", capsaicin.getInstanceBuffer()); gfxProgramSetParameter( @@ -263,30 +294,34 @@ void LightSampler::run(CapsaicinInternal &capsaicin) noexcept gfxProgramSetParameter(gfx_, gatherAreaLightsProgram, "g_InstanceIDBuffer", instanceIDBuffer); gfxProgramSetParameter(gfx_, gatherAreaLightsProgram, "g_TextureMaps", capsaicin.getTextures(), capsaicin.getTextureCount()); - gfxProgramSetParameter( - gfx_, gatherAreaLightsProgram, "g_TextureSampler", capsaicin.getNearestSampler()); + gfx_, gatherAreaLightsProgram, "g_TextureSampler", capsaicin.getLinearSampler()); + gfxProgramSetParameter(gfx_, gatherAreaLightsProgram, "g_LightCount", lightCount); - gfxCommandBindKernel(gfx_, gatherAreaLightsKernel); + gfxCommandBindKernel(gfx_, countAreaLightsKernel); + gfxCommandMultiDrawIndexedIndirect(gfx_, drawCommandBuffer, drawCommandCount); + gfxCommandScanSum( + gfx_, kGfxDataType_Uint, lightInstancePrimitiveBuffer, lightInstancePrimitiveBuffer); + gfxCommandBindKernel(gfx_, scatterAreaLightsKernel); gfxCommandMultiDrawIndexedIndirect(gfx_, drawCommandBuffer, drawCommandCount); gfxDestroyBuffer(gfx_, instanceIDBuffer); gfxDestroyBuffer(gfx_, drawCommandBuffer); // If we actually have a change in the number of lights then we need to invalidate previous count - // history If all that happened is a change in transforms then we can ignore + // history. If all that happened is a change in transforms then we can ignore if (oldAreaLightMaxCount != areaLightMaxCount || capsaicin.getMeshesUpdated()) { for (auto &i : lightCountBufferTemp) { - i.first = 0; + i.first = false; } areaLightCount = areaLightMaxCount; } // Begin copy of new value (will take 'bufferIndex' number of frames to become valid) gfxCommandCopyBuffer(gfx_, lightCountBufferTemp[bufferIndex].second, lightCountBuffer); - lightCountBufferTemp[bufferIndex].first = areaLightMaxCount; + lightCountBufferTemp[bufferIndex].first = true; } else { @@ -298,14 +333,55 @@ void LightSampler::run(CapsaicinInternal &capsaicin) noexcept areaLightCount = 0; } } + else + { + // Lights haven't changed since last frame, so simply copy the previous light data across. + gfxCommandCopyBuffer(gfx_, lightBuffers[lightBufferIndex], lightBuffers[1 - lightBufferIndex]); + } +} + +void LightBuilder::terminate() noexcept +{ + for (GfxBuffer &lightBuffer : lightBuffers) + { + gfxDestroyBuffer(gfx_, lightBuffer); + lightBuffer = {}; + } + gfxDestroyBuffer(gfx_, lightCountBuffer); + lightCountBuffer = {}; + gfxDestroyBuffer(gfx_, lightInstanceBuffer); + lightInstanceBuffer = {}; + gfxDestroyBuffer(gfx_, lightInstancePrimitiveBuffer); + lightInstancePrimitiveBuffer = {}; + for (auto &i : lightCountBufferTemp) + { + gfxDestroyBuffer(gfx_, i.second); + i.second = {}; + } + lightCountBufferTemp.clear(); + + gfxDestroyKernel(gfx_, countAreaLightsKernel); + countAreaLightsKernel = {}; + gfxDestroyKernel(gfx_, scatterAreaLightsKernel); + scatterAreaLightsKernel = {}; + gfxDestroyProgram(gfx_, gatherAreaLightsProgram); + gatherAreaLightsProgram = {}; +} + +void LightBuilder::renderGUI(CapsaicinInternal &capsaicin) const noexcept +{ + ImGui::Checkbox("Enable Delta Lights", &capsaicin.getOption("delta_light_enable")); + ImGui::Checkbox("Enable Area Lights", &capsaicin.getOption("area_light_enable")); + ImGui::Checkbox("Enable Environment Lights", &capsaicin.getOption("environment_light_enable")); } -bool LightSampler::needsRecompile(CapsaicinInternal const &capsaicin) const noexcept +bool LightBuilder::needsRecompile([[maybe_unused]] CapsaicinInternal const &capsaicin) const noexcept { return getLightSettingsUpdated(); } -std::vector LightSampler::getShaderDefines(CapsaicinInternal const &capsaicin) const noexcept +std::vector LightBuilder::getShaderDefines( + [[maybe_unused]] CapsaicinInternal const &capsaicin) const noexcept { std::vector baseDefines; if (!options.delta_light_enable) @@ -323,33 +399,37 @@ std::vector LightSampler::getShaderDefines(CapsaicinInternal const return baseDefines; } -void LightSampler::addProgramParameters(CapsaicinInternal const &capsaicin, GfxProgram program) const noexcept +void LightBuilder::addProgramParameters( + [[maybe_unused]] CapsaicinInternal const &capsaicin, GfxProgram program) const noexcept { gfxProgramSetParameter(gfx_, program, "g_LightBufferSize", lightCountBuffer); - gfxProgramSetParameter(gfx_, program, "g_LightBuffer", lightBuffer); + gfxProgramSetParameter(gfx_, program, "g_LightBuffer", lightBuffers[lightBufferIndex]); + gfxProgramSetParameter(gfx_, program, "g_PrevLightBuffer", lightBuffers[1 - lightBufferIndex]); + gfxProgramSetParameter(gfx_, program, "g_LightInstanceBuffer", lightInstanceBuffer); + gfxProgramSetParameter(gfx_, program, "g_LightInstancePrimitiveBuffer", lightInstancePrimitiveBuffer); } -uint32_t LightSampler::getAreaLightCount() const +uint32_t LightBuilder::getAreaLightCount() const { return areaLightCount; } -uint32_t LightSampler::getDeltaLightCount() const +uint32_t LightBuilder::getDeltaLightCount() const { return deltaLightCount; } -uint32_t LightSampler::getLightCount() const +uint32_t LightBuilder::getLightCount() const { return areaLightCount + deltaLightCount + environmentMapCount; } -bool LightSampler::getLightsUpdated() const +bool LightBuilder::getLightsUpdated() const { return lightsUpdated; } -bool LightSampler::getLightSettingsUpdated() const +bool LightBuilder::getLightSettingsUpdated() const { return lightSettingChanged; } diff --git a/src/core/src/components/light_builder/light_builder.h b/src/core/src/components/light_builder/light_builder.h new file mode 100644 index 0000000..b85eb7b --- /dev/null +++ b/src/core/src/components/light_builder/light_builder.h @@ -0,0 +1,171 @@ +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ +#pragma once + +#include "components/component.h" + +namespace Capsaicin +{ +class LightBuilder + : public Component + , public ComponentFactory::Registrar +{ +public: + static constexpr std::string_view Name = "LightBuilder"; + + /** Constructor. */ + LightBuilder() noexcept; + + LightBuilder(LightBuilder const &) noexcept = delete; + + LightBuilder(LightBuilder &&) noexcept = default; + + /** Destructor. */ + virtual ~LightBuilder() noexcept; + + /* + * Gets configuration options for current technique. + * @return A list of all valid configuration options. + */ + RenderOptionList getRenderOptions() noexcept; + + struct RenderOptions + { + bool delta_light_enable = true; /**< True to enable delta light in light sampling */ + bool area_light_enable = true; /**< True to enable area lights in light sampling */ + bool environment_light_enable = true; /**< True to enable environment lights in light sampling */ + }; + + /** + * Convert render options to internal options format. + * @param options Current render options. + * @returns The options converted. + */ + static RenderOptions convertOptions(RenderOptionList const &options) noexcept; + + /** + * Initialise any internal data or state. + * @note This is automatically called by the framework after construction and should be used to create + * any required CPU|GPU resources. + * @param capsaicin Current framework context. + * @returns True if initialisation succeeded, False otherwise. + */ + bool init(CapsaicinInternal const &capsaicin) noexcept override; + + /** + * Run internal operations. + * @param [in,out] capsaicin Current framework context. + */ + void run(CapsaicinInternal &capsaicin) noexcept override; + + /** + * Destroy any used internal resources and shutdown. + */ + void terminate() noexcept override; + + /** + * Render GUI options. + * @param [in,out] capsaicin The current capsaicin context. + */ + void renderGUI(CapsaicinInternal &capsaicin) const noexcept override; + + /** + * Check to determine if any kernels using light sampler code need to be (re)compiled. + * @param capsaicin Current framework context. + * @returns True if an update occurred requiring internal updates to be performed. + */ + virtual bool needsRecompile(CapsaicinInternal const &capsaicin) const noexcept; + + /** + * Get the list of shader defines that should be passed to any kernel that uses the lightSampler. + * @param capsaicin Current framework context. + * @returns A vector with each required define. + */ + virtual std::vector getShaderDefines(CapsaicinInternal const &capsaicin) const noexcept; + + /** + * Add the required program parameters to a shader based on current settings. + * @param capsaicin Current framework context. + * @param program The shader program to bind parameters to. + */ + virtual void addProgramParameters(CapsaicinInternal const &capsaicin, GfxProgram program) const noexcept; + + /** + * Gets count of enabled area lights in current scene. + * @returns The area light count. + */ + uint32_t getAreaLightCount() const; + + /** + * Gets count of enabled delta lights (point,spot,direction) in current scene. + * @returns The delta light count. + */ + uint32_t getDeltaLightCount() const; + + /** + * Gets approximate light count within the light buffer. + * The light count is a maximum upper bound of possible lights in the light list. Since lights are culled + * on the GPU it takes several frames for the exact value to be read back. + * This should not be used in ant shader operations as @getLightCountBuffer() should be used instead. + * @returns The light count. + */ + uint32_t getLightCount() const; + + /** + * Check if the scenes lighting data was changed this frame. + * @returns True if light data has changed. + */ + bool getLightsUpdated() const; + + /** + * Check if the light settings have changed (i.e. enabled/disabled lights). + * @returns True if light settings have changed. + */ + bool getLightSettingsUpdated() const; + +private: + RenderOptions options; + + uint32_t areaLightTotal = 0; /**< Number of area lights in meshes (may not be all enabled) */ + size_t lightHash = 0; + uint32_t areaLightMaxCount = 0; /**< Max number of area lights in light buffer */ + uint32_t areaLightCount = 0; /**< Approximate number of area lights in light buffer */ + uint32_t deltaLightCount = 0; /**< Number of delta lights in light buffer */ + uint32_t environmentMapCount = 0; /**< Number of environment map lights in buffer */ + uint32_t lightBufferIndex = 0; /**< Index of currently active light buffer */ + + bool lightsUpdated = true; + bool lightSettingChanged = true; + + GfxBuffer lightBuffers[2]; /**< Buffers used to hold all light list */ + GfxBuffer lightCountBuffer; /**< Buffer used to hold number of lights in light buffer */ + GfxBuffer lightInstanceBuffer; /**< Buffer used to hold the offset of the instance primitives */ + GfxBuffer + lightInstancePrimitiveBuffer; /**< Buffer used to hold the light identifier per emissive primitive */ + std::vector> + lightCountBufferTemp; /**< Buffer used to copy light count into cpu memory */ + + GfxKernel countAreaLightsKernel; + GfxKernel scatterAreaLightsKernel; + GfxProgram gatherAreaLightsProgram; +}; +} // namespace Capsaicin diff --git a/src/core/src/components/light_builder/light_builder.hlsl b/src/core/src/components/light_builder/light_builder.hlsl new file mode 100644 index 0000000..8ccf9f8 --- /dev/null +++ b/src/core/src/components/light_builder/light_builder.hlsl @@ -0,0 +1,99 @@ +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ + +#ifndef LIGHT_SAMPLER_HLSL +#define LIGHT_SAMPLER_HLSL + +#include "../../lights/lights.hlsl" + +// Requires the following data to be defined in any shader that uses this file +StructuredBuffer g_LightBufferSize; +StructuredBuffer g_LightBuffer; +RWStructuredBuffer g_LightInstanceBuffer; +RWStructuredBuffer g_LightInstancePrimitiveBuffer; + +/** + * Check if the current scene has an environment light. + * @returns True if environment light could be found. + */ +bool hasLights() +{ + return (g_LightBufferSize[0] > 0); +} + +/** + * Get number of lights. + * @returns The number of lights currently in the scene. + */ +uint getNumberLights() +{ + return g_LightBufferSize[0]; +} + +/** + * Get a light corresponding to a light index. + * @param index The index of the light to retrieve (range [0, getNumberLights())). + * @returns The number of lights currently in the scene. + */ +Light getLight(uint index) +{ + return g_LightBuffer[index]; +} + +/** + * Check if the current scene has an environment light. + * @returns True if environment light could be found. + */ +bool hasEnvironmentLight() +{ + if (g_LightBufferSize[0] == 0) + { + return false; + } + // Assumes that the environment light is always first + Light selectedLight = g_LightBuffer[0]; + return selectedLight.get_light_type() == kLight_Environment; +} + +/** + * Get the current environment map. + * @note This is only valid if the scene contains a valid environment map. + * @returns The environment map. + */ +LightEnvironment getEnvironmentLight() +{ + return MakeLightEnvironment(g_LightBuffer[0]); +} + +/** + * Get the light ID for a specific area light. + * @note The inputs are not checked to ensure they actually map to a valid emissive surface. + * @param instanceIndex Instance ID of requested area light. + * @param primitiveIndex Primitive ID of requested area light. + * @returns The light ID, undefined if inputs are invalid. + */ +uint getAreaLightIndex(uint instanceIndex, uint primitiveIndex) +{ + return g_LightInstancePrimitiveBuffer[primitiveIndex + g_LightInstanceBuffer[instanceIndex]]; +} + +#endif diff --git a/src/core/src/components/light_sampler/light_sampler.h b/src/core/src/components/light_sampler/light_sampler.h index 816e31b..65aaf30 100644 --- a/src/core/src/components/light_sampler/light_sampler.h +++ b/src/core/src/components/light_sampler/light_sampler.h @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -21,133 +21,65 @@ THE SOFTWARE. ********************************************************************/ #pragma once +#include "capsaicin_internal_types.h" #include "components/component.h" +#include "factory.h" namespace Capsaicin { -class LightSampler : public Component::RegistrarName -{ -public: - static constexpr std::string_view Name = "LightSampler"; - - /** Constructor. */ - LightSampler() noexcept {} - - LightSampler(LightSampler const &) noexcept = delete; - - LightSampler(LightSampler &&) noexcept = default; - - /** Destructor. */ - virtual ~LightSampler() noexcept; - - /* - * Gets configuration options for current technique. - * @return A list of all valid configuration options. - */ - RenderOptionList getRenderOptions() noexcept; - - struct RenderOptions - { - bool delta_light_enable = true; /**< True to enable delta light in light sampling */ - bool area_light_enable = false; /**< True to enable area lights in light sampling */ - bool environment_light_enable = true; /**< True to enable environment lights in light sampling */ - }; +class CapsaicinInternal; - /** - * Convert render settings to internal options format. - * @param settings Current render settings. - * @returns The options converted. - */ - static RenderOptions convertOptions(RenderSettings const &settings) noexcept; - - /** - * Initialise any internal data or state. - * @note This is automatically called by the framework after construction and should be used to create - * any required CPU|GPU resources. - * @param capsaicin Current framework context. - * @returns True if initialisation succeeded, False otherwise. - */ - bool init(CapsaicinInternal const &capsaicin) noexcept override; +class LightSampler : public Component +{ + LightSampler(LightSampler const &) = delete; + LightSampler &operator=(LightSampler const &) = delete; - /** - * Run internal operations. - * @param [in,out] capsaicin Current framework context. - */ - void run(CapsaicinInternal &capsaicin) noexcept override; +public: + using Component::Component; + using Component::getBuffers; + using Component::getComponents; + using Component::getRenderOptions; + using Component::init; + using Component::renderGUI; + using Component::run; + using Component::terminate; /** * Check to determine if any kernels using light sampler code need to be (re)compiled. * @param capsaicin Current framework context. - * @returns True if an update occurred requiring internal updates to be performed. + * @return True if an update occurred requiring internal updates to be performed. */ - virtual bool needsRecompile(CapsaicinInternal const &capsaicin) const noexcept; + virtual bool needsRecompile(CapsaicinInternal const &capsaicin) const noexcept = 0; /** - * Get the list of shader defines that should be passed to any kernel that uses the lightSampler. + * Get the list of shader defines that should be passed to any kernel that uses this lightSampler. * @param capsaicin Current framework context. - * @returns A vector with each required define. + * @return A vector with each required define. */ - virtual std::vector getShaderDefines(CapsaicinInternal const &capsaicin) const noexcept; + virtual std::vector getShaderDefines(CapsaicinInternal const &capsaicin) const noexcept = 0; /** * Add the required program parameters to a shader based on current settings. * @param capsaicin Current framework context. * @param program The shader program to bind parameters to. */ - virtual void addProgramParameters(CapsaicinInternal const &capsaicin, GfxProgram program) const noexcept; - - /** - * Gets count of enabled area lights in current scene. - * @returns The area light count. - */ - uint32_t getAreaLightCount() const; - - /** - * Gets count of enabled delta lights (point,spot,direction) in current scene. - * @returns The delta light count. - */ - uint32_t getDeltaLightCount() const; - - /** - * Gets approximate light count within the light buffer. - * The light count is a maximum upper bound of possible lights in the light list. Since lights are culled - * on the GPU it takes several frames for the exact value to be read back. - * This should not be used in ant shader operations as @getLightCountBuffer() should be used instead. - * @returns The light count. - */ - uint32_t getLightCount() const; + virtual void addProgramParameters( + CapsaicinInternal const &capsaicin, GfxProgram program) const noexcept = 0; /** * Check if the scenes lighting data was changed this frame. + * @param capsaicin Current framework context. * @returns True if light data has changed. */ - bool getLightsUpdated() const; + virtual bool getLightsUpdated(CapsaicinInternal const &capsaicin) const noexcept = 0; /** - * Check if the light settings have changed (i.e. enabled/disabled lights). - * @returns True if light settings have changed. + * Get the name of the header file used in HLSL code to include necessary sampler functions. + * @return String name of the HLSL header include. */ - bool getLightSettingsUpdated() const; - -private: - RenderOptions options; - - uint32_t areaLightTotal = 0; /**< Number of area lights in meshes (may not be all enabled) */ - size_t lightHash = 0; - uint32_t areaLightMaxCount = 0; /**< Max number of area lights in light buffer */ - uint32_t areaLightCount = 0; /**< Approximate number of area lights in light buffer */ - uint32_t deltaLightCount = 0; /**< Number of delta lights in light buffer */ - uint32_t environmentMapCount = 0; /**< Number of environment map lights in buffer */ - - bool lightsUpdated = true; - bool lightSettingChanged = true; - - GfxBuffer lightBuffer; /**< Buffer used to hold all light list */ - GfxBuffer lightCountBuffer; /**< Buffer used to hold number of lights in light buffer */ - std::vector> - lightCountBufferTemp; /**< Buffer used to copy light count into cpu memory */ - - GfxKernel gatherAreaLightsKernel; - GfxProgram gatherAreaLightsProgram; + virtual std::string_view getHeaderFile() const noexcept = 0; }; + +class LightSamplerFactory : public Factory +{}; } // namespace Capsaicin diff --git a/src/core/src/components/light_sampler/light_sampler.hlsl b/src/core/src/components/light_sampler/light_sampler.hlsl index 8028948..295ad67 100644 --- a/src/core/src/components/light_sampler/light_sampler.hlsl +++ b/src/core/src/components/light_sampler/light_sampler.hlsl @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -20,66 +20,4 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ********************************************************************/ -#ifndef LIGHT_SAMPLER_HLSL -#define LIGHT_SAMPLER_HLSL - -#include "../../lights/lights.hlsl" - -// Requires the following data to be defined in any shader that uses this file -StructuredBuffer g_LightBufferSize; -StructuredBuffer g_LightBuffer; - -/** - * Check if the current scene has an environment light. - * @returns True if environment light could be found. - */ -bool hasLights() -{ - return (g_LightBufferSize[0] > 0); -} - -/** - * Get number of lights. - * @returns The number of lights currently in the scene. - */ -uint getNumberLights() -{ - return g_LightBufferSize[0]; -} - -/** - * Get a light corresponding to a light index. - * @param index The index of the light to retrieve (range [0, getNumberLights())). - * @returns The number of lights currently in the scene. - */ -Light getLight(uint index) -{ - return g_LightBuffer[index]; -} - -/** - * Check if the current scene has an environment light. - * @returns True if environment light could be found. - */ -bool hasEnvironmentLight() -{ - if (g_LightBufferSize[0] == 0) - { - return false; - } - // Assumes that the environment light is always first - Light selectedLight = g_LightBuffer[0]; - return selectedLight.get_light_type() == kLight_Environment; -} - -/** - * Get the current environment map. - * @note This is only valid if the scene contains a valid environment map. - * @returns The environment map. - */ -LightEnvironment getEnvironmentLight() -{ - return MakeLightEnvironment(g_LightBuffer[0]); -} - -#endif +#include LIGHT_SAMPLER_HEADER diff --git a/src/core/src/components/light_sampler/light_sampler_switcher.cpp b/src/core/src/components/light_sampler/light_sampler_switcher.cpp new file mode 100644 index 0000000..e3f3ba7 --- /dev/null +++ b/src/core/src/components/light_sampler/light_sampler_switcher.cpp @@ -0,0 +1,192 @@ +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ + +#include "light_sampler_switcher.h" + +#include "capsaicin_internal.h" +#include "light_sampler.h" + +#include + +namespace Capsaicin +{ +LightSamplerSwitcher::LightSamplerSwitcher() noexcept + : Component(Name) +{} + +LightSamplerSwitcher::~LightSamplerSwitcher() noexcept +{ + terminate(); +} + +RenderOptionList LightSamplerSwitcher::getRenderOptions() noexcept +{ + RenderOptionList newOptions; + newOptions.emplace(RENDER_OPTION_MAKE(light_sampler_type, options)); + for (auto &i : LightSamplerFactory::getNames()) + { + for (auto &j : LightSamplerFactory::make(i)->getRenderOptions()) + { + if (std::find(newOptions.cbegin(), newOptions.cend(), j) == newOptions.cend()) + { + // Add the new component to requested list + newOptions.emplace(std::move(j)); + } + } + } + return newOptions; +} + +LightSamplerSwitcher::RenderOptions LightSamplerSwitcher::convertOptions( + RenderOptionList const &options) noexcept +{ + RenderOptions newOptions; + RENDER_OPTION_GET(light_sampler_type, newOptions, options) + return newOptions; +} + +ComponentList LightSamplerSwitcher::getComponents() const noexcept +{ + ComponentList components; + // Loop through all possible light samplers and get used components + for (auto &i : LightSamplerFactory::getNames()) + { + for (auto &j : LightSamplerFactory::make(i)->getComponents()) + { + if (std::find(components.cbegin(), components.cend(), j) == components.cend()) + { + // Add the new component to requested list + components.emplace_back(std::move(j)); + } + } + } + return components; +} + +bool LightSamplerSwitcher::init(CapsaicinInternal const &capsaicin) noexcept +{ + options = convertOptions(capsaicin.getOptions()); + // Initialise the requested light sampler + auto newSampler = LightSamplerFactory::make(LightSamplerFactory::getNames()[options.light_sampler_type]); + std::swap(currentSampler, newSampler); + currentSampler->setGfxContext(gfx_); + currentSampler->init(capsaicin); + return true; +} + +void LightSamplerSwitcher::run(CapsaicinInternal &capsaicin) noexcept +{ + samplerChanged = false; + auto const optionsNew = convertOptions(capsaicin.getOptions()); + if (optionsNew.light_sampler_type != options.light_sampler_type) + { + samplerChanged = true; + currentSampler->terminate(); + // Initialise the requested light sampler + auto newSampler = + LightSamplerFactory::make(LightSamplerFactory::getNames()[optionsNew.light_sampler_type]); + std::swap(currentSampler, newSampler); + currentSampler->setGfxContext(gfx_); + currentSampler->init(capsaicin); + } + options = optionsNew; + currentSampler->run(capsaicin); +} + +void LightSamplerSwitcher::terminate() noexcept +{ + currentSampler->terminate(); +} + +void LightSamplerSwitcher::renderGUI(CapsaicinInternal &capsaicin) const noexcept +{ + // Select which renderer to use + std::string samplerString; + auto samplerList = LightSamplerFactory::getNames(); + for (auto &i : samplerList) + { + samplerString += i; + samplerString += '\0'; + } + ImGui::Combo("Light Sampler", + reinterpret_cast(&capsaicin.getOption("light_sampler_type")), + samplerString.c_str()); + return currentSampler->renderGUI(capsaicin); +} + +bool LightSamplerSwitcher::needsRecompile(CapsaicinInternal const &capsaicin) const noexcept +{ + return samplerChanged || currentSampler->needsRecompile(capsaicin); +} + +std::vector LightSamplerSwitcher::getShaderDefines( + CapsaicinInternal const &capsaicin) const noexcept +{ + auto ret = currentSampler->getShaderDefines(capsaicin); + std::string samplerHeader = "LIGHT_SAMPLER_HEADER="; + samplerHeader += currentSampler->getHeaderFile(); + ret.emplace_back(samplerHeader); + return ret; +} + +void LightSamplerSwitcher::addProgramParameters( + CapsaicinInternal const &capsaicin, GfxProgram program) const noexcept +{ + currentSampler->addProgramParameters(capsaicin, program); +} + +bool LightSamplerSwitcher::getLightsUpdated(CapsaicinInternal const &capsaicin) const noexcept +{ + return samplerChanged || currentSampler->getLightsUpdated(capsaicin); +} + +uint32_t LightSamplerSwitcher::getTimestampQueryCount() const noexcept +{ + return Timeable::getTimestampQueryCount() + currentSampler->getTimestampQueryCount(); +} + +std::vector const &LightSamplerSwitcher::getTimestampQueries() const noexcept +{ + static std::vector tempQueries; + // Need to add child queries to the current list of queries + tempQueries = queries; + auto const &childQueries = currentSampler->getTimestampQueries(); + tempQueries.insert(tempQueries.begin() + this->queryCount, childQueries.begin(), childQueries.end()); + return tempQueries; +} + +void LightSamplerSwitcher::resetQueries() noexcept +{ + Timeable::resetQueries(); + currentSampler->resetQueries(); +} + +void LightSamplerSwitcher::setGfxContext(GfxContext const &gfx) noexcept +{ + gfx_ = gfx; + if (currentSampler) + { + currentSampler->setGfxContext(gfx); + } +} + +} // namespace Capsaicin diff --git a/src/core/src/components/light_sampler/light_sampler_switcher.h b/src/core/src/components/light_sampler/light_sampler_switcher.h new file mode 100644 index 0000000..ab2a7f6 --- /dev/null +++ b/src/core/src/components/light_sampler/light_sampler_switcher.h @@ -0,0 +1,152 @@ +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ +#pragma once + +#include "capsaicin_internal.h" +#include "components/light_sampler/light_sampler.h" +#include "render_technique.h" + +namespace Capsaicin +{ +class LightSamplerSwitcher + : public Component + , public ComponentFactory::Registrar +{ +public: + static constexpr std::string_view Name = "LightSamplerSwitcher"; + + LightSamplerSwitcher(LightSamplerSwitcher const &) noexcept = delete; + + LightSamplerSwitcher(LightSamplerSwitcher &&) noexcept = default; + + /** Constructor. */ + LightSamplerSwitcher() noexcept; + + /** Destructor. */ + ~LightSamplerSwitcher() noexcept; + + /* + * Gets configuration options for current technique. + * @return A list of all valid configuration options. + */ + RenderOptionList getRenderOptions() noexcept override; + + struct RenderOptions + { + uint32_t light_sampler_type = 1; /**< The light sampler to use */ + }; + + /** + * Convert render options to internal options format. + * @param options Current render options. + * @returns The options converted. + */ + static RenderOptions convertOptions(RenderOptionList const &options) noexcept; + + /** + * Gets a list of any shared components used by the current render technique. + * @return A list of all supported components. + */ + ComponentList getComponents() const noexcept override; + + /** + * Initialise any internal data or state. + * @note This is automatically called by the framework after construction and should be used to create + * any required CPU|GPU resources. + * @param capsaicin Current framework context. + * @return True if initialisation succeeded, False otherwise. + */ + bool init(CapsaicinInternal const &capsaicin) noexcept override; + + /** + * Run internal operations. + * @param [in,out] capsaicin Current framework context. + */ + void run(CapsaicinInternal &capsaicin) noexcept override; + + /** + * Destroy any used internal resources and shutdown. + */ + void terminate() noexcept override; + + /** + * Render GUI options. + * @param [in,out] capsaicin The current capsaicin context. + */ + void renderGUI(CapsaicinInternal &capsaicin) const noexcept override; + + /** + * Check to determine if any kernels using light sampler code need to be (re)compiled. + * @note Must be called before run(). + * @param capsaicin Current framework context. + * @return True if an update occurred requiring internal updates to be performed. + */ + bool needsRecompile(CapsaicinInternal const &capsaicin) const noexcept; + + /** + * Get the list of shader defines that should be passed to any kernel that uses this lightSampler. + * @param capsaicin Current framework context. + * @return A vector with each required define. + */ + std::vector getShaderDefines(CapsaicinInternal const &capsaicin) const noexcept; + + /** + * Add the required program parameters to a shader based on current settings. + * @param capsaicin Current framework context. + * @param program The shader program to bind parameters to. + */ + void addProgramParameters(CapsaicinInternal const &capsaicin, GfxProgram program) const noexcept; + + /** + * Check if the scenes lighting data was changed this frame. + * @param capsaicin Current framework context. + * @returns True if light data has changed. + */ + bool getLightsUpdated(CapsaicinInternal const &capsaicin) const noexcept; + + /** + * Gets number of timestamp queries. + * @returns The timestamp query count. + */ + uint32_t getTimestampQueryCount() const noexcept override; + + /** + * Gets timestamp queries. + * @returns The timestamp queries. + */ + std::vector const &getTimestampQueries() const noexcept override; + + /** Resets the timed section queries */ + void resetQueries() noexcept override; + + /** + * Sets internal graphics context + * @param gfx The gfx context. + */ + void setGfxContext(GfxContext const &gfx) noexcept override; + +private: + RenderOptions options; + std::unique_ptr currentSampler = nullptr; /**< The currently active light sampler */ + bool samplerChanged = true; /**< Flag indicating if a sampler change has occurred */ +}; +} // namespace Capsaicin diff --git a/src/core/src/components/light_sampler/light_sampler_uniform.hlsl b/src/core/src/components/light_sampler/light_sampler_uniform.hlsl deleted file mode 100644 index 76fc0f5..0000000 --- a/src/core/src/components/light_sampler/light_sampler_uniform.hlsl +++ /dev/null @@ -1,117 +0,0 @@ -/********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ - -#ifndef LIGHT_SAMPLER_UNIFORM_HLSL -#define LIGHT_SAMPLER_UNIFORM_HLSL - -/* -// Requires the following data to be defined in any shader that uses this file -TextureCube g_EnvironmentBuffer; -Texture2D g_TextureMaps[] : register(space99); -SamplerState g_LinearSampler; -*/ - -#include "light_sampler.hlsl" -#include "../../lights/light_sampling.hlsl" -#include "../../lights/reservoir.hlsl" - -/** - * Get a sample light. - * @tparam RNG The type of random number sampler to be used. - * @param randomNG Random number sampler used to sample light. - * @param position Current position on surface. - * @param normal Shading normal vector at current position. - * @param lightPDF (Out) The PDF for the calculated sample (is equal to zero if no valid samples could be found). - * @returns The index of the new light sample - */ -template -uint sampleLights(/*inout TODO: dxc crash*/ RNG randomNG, float3 position, float3 normal, out float lightPDF) -{ - float totalLights = getNumberLights(); - - // Return invalid sample if there are no lights - if (totalLights == 0) - { - lightPDF = 0.0f; - return 0; - } - - // Choose a light to sample from - lightPDF = 1.0f / totalLights; - uint lightIndex = randomNG.randInt(totalLights); - return lightIndex; -} - -/** - * Calculate the PDF of sampling a given light. - * @param position The position on the surface currently being shaded. - * @returns The calculated PDF with respect to the light. - */ -float sampleLightPDF(float3 position) -{ - return 1.0f / getNumberLights(); -} - -/** - * Sample multiple lights into a reservoir. - * @tparam numSampledLights Number of lights to sample. - * @tparam RNG The type of random number sampler to be used. - * @param randomNG Random number sampler used to sample light. - * @param position Current position on surface. - * @param normal Shading normal vector at current position. - * @param viewDirection View direction vector at current position. - * @param solidAngle Solid angle around view direction of visible ray cone. - * @param material Material for current surface position. - * @returns Reservoir containing combined samples. - */ -template -Reservoir sampleLightListCone(/*inout TODO: dxc crash*/ RNG randomNG, float3 position, float3 normal, float3 viewDirection, float solidAngle, MaterialBRDF material) -{ - // Check if we actually have any lights - const uint totalLights = getNumberLights(); - const uint numLights = min(totalLights, numSampledLights); - - // Return invalid sample if there are no lights - if (numLights == 0) - { - return MakeReservoir(); - } - - // Create reservoir updater - ReservoirUpdater updater = MakeReservoirUpdater(); - - // Loop through until we have the requested number of lights - float lightPDF = 1.0f / totalLights; - for (uint lightsAdded = 0; lightsAdded < numLights; ++lightsAdded) - { - // Choose a light to sample from - const uint lightIndex = randomNG.randInt(totalLights); - - // Add the light sample to the reservoir - updateReservoir(updater, randomNG, lightIndex, lightPDF, material, position, normal, viewDirection, solidAngle); - } - - // Get finalised reservoir for return - return getUpdatedReservoir(updater); -} - -#endif // LIGHT_SAMPLER_UNIFORM_HLSL diff --git a/src/core/src/components/light_sampler_bounds/light_sampler_bounds.comp b/src/core/src/components/light_sampler_bounds/light_sampler_bounds.comp deleted file mode 100644 index c770add..0000000 --- a/src/core/src/components/light_sampler_bounds/light_sampler_bounds.comp +++ /dev/null @@ -1,227 +0,0 @@ -/********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ - -#include "light_sampler_bounds_shared.h" - -ConstantBuffer g_LightSampler_Constants; -RWStructuredBuffer g_DispatchCommandBuffer; - -TextureCube g_EnvironmentBuffer; -Texture2D g_TextureMaps[] : register(space99); -SamplerState g_TextureSampler; // Is a linear sampler - -uint g_FrameIndex; - -#include "light_sampler_bounds.hlsl" -#include "../../lights/light_sampling_volume.hlsl" -#include "../../math/random.hlsl" - -#ifdef LIGHTSAMPLERBOUNDS_USE_THRESHOLD -#define STOCHASTIC_THRESHOLD_RADIANCE (1.0f / 2048.0f) -#endif - -/** - * Get the bounding box for a specific grid cell. - * @param cellID The ID of the grid cell. - * @param maxBB (Out) The return bounding box max values. - * @return The bounding box min values. - */ -float3 getCellBB(uint3 cellID, out float3 maxBB) -{ - const float3 minBB = ((float3)cellID * g_LightSampler_Configuration[0].cellSize) + g_LightSampler_Configuration[0].sceneMin; - maxBB = minBB + g_LightSampler_Configuration[0].cellSize; - return minBB; -} - -#define THREADX 4 -#define THREADY 4 -#define THREADZ 4 - -/** - * Create required internal configuration values. - * Calculates the required number of grid cells that need to be dispatched through an indirect call to Build. - */ -[numthreads(1, 1, 1)] -void CalculateBounds() -{ - const uint3 dispatch = uint3(THREADX, THREADY, THREADZ); - // Get the scene bounds - const float3 sceneMin = g_LightSampler_MinBounds[0]; - const float3 sceneMax = g_LightSampler_MaxBounds[0]; - - // Ensure each cell is square - const float3 sceneExtent = sceneMax - sceneMin; - const float largestAxis = max(sceneExtent.x, max(sceneExtent.y, sceneExtent.z)); - const float cellScale = largestAxis / g_LightSampler_Constants.maxCellsPerAxis; - const float3 cellNum = ceil(sceneExtent / cellScale); - - // Clamp max number of lights to those actually available - const uint lightsPerCell = min(g_LightSampler_Constants.maxNumLightsPerCell, getNumberLights()); - - // Update internal configuration values - g_LightSampler_Configuration[0].numCells = uint4((uint3)cellNum, lightsPerCell); - g_LightSampler_Configuration[0].cellSize = sceneExtent / cellNum; - g_LightSampler_Configuration[0].sceneMin = sceneMin; - g_LightSampler_Configuration[0].sceneExtent = sceneExtent; - - // Get total number of grid cells - uint3 groups = g_LightSampler_Configuration[0].numCells.xyz; - - groups = (groups + dispatch - 1.xxx) / dispatch; - g_DispatchCommandBuffer[0].num_groups_x = groups.x; - g_DispatchCommandBuffer[0].num_groups_y = groups.y; - g_DispatchCommandBuffer[0].num_groups_z = groups.z; -} - -/** - * Build an internal grid. - */ -[numthreads(THREADX, THREADY, THREADZ)] -void Build(in uint3 did : SV_DispatchThreadID, in uint gid : SV_GroupIndex) -{ - // Called per cell - const uint3 cellID = did; - if (any(cellID >= g_LightSampler_Configuration[0].numCells.xyz)) - { - return; - } - - // Calculate the bounding box for the current cell - float3 maxBB; - const float3 minBB = getCellBB(cellID, maxBB); - - // Clamp number of lights between max available and total valid lights - float totalLights = getNumberLights(); - const uint maxLightsPerCell = min(g_LightSampler_Configuration[0].numCells.w, totalLights); - - // Loop through all lights - const uint cellIndex = LightSamplerBounds::getCellIndex(cellID); - const uint startIndex = cellIndex + 1; - Random randomNG = MakeRandom(cellIndex, g_FrameIndex); - uint storedLights = 0; //Num of stored lights in cell - float totalWeight = 0.0f; -#ifndef LIGHTSAMPLERBOUNDS_USE_CDF - float Wsum = 0.0f; - uint M = 0; -#endif - for (uint lightIndex = 0; lightIndex < totalLights; ++lightIndex) - { - // Calculate sampled contribution for light - Light selectedLight = getLight(lightIndex); - float y = evaluateLightVolume(selectedLight, minBB, maxBB); - - // Only add to the cells light list if it actually contributes to the lighting -#ifdef LIGHTSAMPLERBOUNDS_USE_THRESHOLD - float probAdd = min(y / STOCHASTIC_THRESHOLD_RADIANCE, 1); - y = max(y, STOCHASTIC_THRESHOLD_RADIANCE); -#endif - -#ifndef LIGHTSAMPLERBOUNDS_USE_CDF - float weight = y * totalLights; // targetPDF / sourcePDF where sourcePDF = 1/numLights - Wsum += weight; - ++M; - if (storedLights < maxLightsPerCell) - { -#ifdef LIGHTSAMPLERBOUNDS_USE_THRESHOLD - // Uses stochastic sampling - if (probAdd >= randomNG.rand()) -#endif - { - g_LightSampler_CellsIndex[startIndex + storedLights] = lightIndex; - g_LightSampler_CellsCDF[startIndex + storedLights] = y; - ++storedLights; - } - } - else - { - if (((randomNG.rand() * Wsum) <= weight)) - { - //A-Chao randomly replace an existing item - uint replaceIndex = randomNG.randInt(maxLightsPerCell); - g_LightSampler_CellsIndex[startIndex + replaceIndex] = lightIndex; - g_LightSampler_CellsCDF[startIndex + replaceIndex] = y; - } - } -#else -#ifdef LIGHTSAMPLERBOUNDS_USE_THRESHOLD - // Uses stochastic sampling - if (probAdd >= randomNG.rand()) -#endif - { - // Store only the most important lights - totalWeight += y; - if (storedLights < maxLightsPerCell) - { - ++storedLights; - g_LightSampler_CellsIndex[cellIndex + storedLights] = lightIndex; - g_LightSampler_CellsCDF[cellIndex + storedLights] = y; - } - else - { - // Find the lowest contributing light and replace - uint smallestLight = -1; - float smallestCDF = y; - uint writeIndex = startIndex + storedLights; - for (uint light = startIndex; light < writeIndex; ++light) - { - if (g_LightSampler_CellsCDF[light] < smallestCDF) - { - smallestLight = light; - smallestCDF = g_LightSampler_CellsCDF[light]; - } - } - if (smallestLight != -1) - { - g_LightSampler_CellsIndex[smallestLight] = lightIndex; - g_LightSampler_CellsCDF[smallestLight] = y; - } - } - } -#endif - } - - // Add table for cells light list - g_LightSampler_CellsIndex[cellIndex] = storedLights; - -#ifndef LIGHTSAMPLERBOUNDS_USE_CDF - // Write out total sample weight - g_LightSampler_CellsCDF[cellIndex] = Wsum / (float)M; -#else - // Convert to CDF - float runningCDF = 0.0f; - for (uint i = startIndex; i <= cellIndex + storedLights; ++i) - { - runningCDF = runningCDF + g_LightSampler_CellsCDF[i]; - g_LightSampler_CellsCDF[i] = runningCDF; - } - float maxCDF = runningCDF; - // Normalise CDF - for (uint j = startIndex; j < cellIndex + storedLights; ++j) - { - g_LightSampler_CellsCDF[j] /= maxCDF; - } - g_LightSampler_CellsCDF[cellIndex + storedLights] = 1.0f; - - // Write out max cdf to cell table - g_LightSampler_CellsCDF[cellIndex] = maxCDF / totalWeight; -#endif -} diff --git a/src/core/src/components/light_sampler_bounds/light_sampler_bounds.cpp b/src/core/src/components/light_sampler_bounds/light_sampler_bounds.cpp deleted file mode 100644 index d502f12..0000000 --- a/src/core/src/components/light_sampler_bounds/light_sampler_bounds.cpp +++ /dev/null @@ -1,379 +0,0 @@ -/********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ - -#include "light_sampler_bounds.h" - -#include "capsaicin_internal.h" -#include "components/stratified_sampler/stratified_sampler.h" -#include "light_sampler_bounds_shared.h" - -namespace Capsaicin -{ -LightSamplerBounds::~LightSamplerBounds() noexcept -{ - terminate(); -} - -RenderOptionList LightSamplerBounds::getRenderOptions() noexcept -{ - RenderOptionList newOptions; - newOptions.emplace(RENDER_OPTION_MAKE(light_bounds_num_cells, options)); - newOptions.emplace(RENDER_OPTION_MAKE(light_bounds_lights_per_cell, options)); - newOptions.emplace(RENDER_OPTION_MAKE(light_bounds_threshold, options)); - newOptions.emplace(RENDER_OPTION_MAKE(light_bounds_cdf, options)); - newOptions.emplace(RENDER_OPTION_MAKE(light_bounds_uniform_sample, options)); - return newOptions; -} - -LightSamplerBounds::RenderOptions LightSamplerBounds::convertOptions(RenderSettings const &settings) noexcept -{ - RenderOptions newOptions; - RENDER_OPTION_GET(light_bounds_num_cells, newOptions, settings.options_) - RENDER_OPTION_GET(light_bounds_lights_per_cell, newOptions, settings.options_) - RENDER_OPTION_GET(light_bounds_threshold, newOptions, settings.options_) - RENDER_OPTION_GET(light_bounds_cdf, newOptions, settings.options_) - RENDER_OPTION_GET(light_bounds_uniform_sample, newOptions, settings.options_) - return newOptions; -} - -ComponentList LightSamplerBounds::getComponents() const noexcept -{ - ComponentList components; - components.emplace_back(COMPONENT_MAKE(LightSampler)); - components.emplace_back(COMPONENT_MAKE(StratifiedSampler)); - return components; -} - -bool LightSamplerBounds::init(CapsaicinInternal const &capsaicin) noexcept -{ - initKernels(capsaicin); - - boundsLengthBuffer = gfxCreateBuffer(gfx_, 1); - boundsLengthBuffer.setName("Capsaicin_LightSamplerBounds_BoundsCountBuffer"); - - initBoundsBuffers(); - - reducerMin.initialise(capsaicin, GPUReduce::Type::Float3, GPUReduce::Operation::Min); - reducerMax.initialise(capsaicin, GPUReduce::Type::Float3, GPUReduce::Operation::Max); - - struct LightSamplingConfiguration - { - uint4 numCells; - float3 cellSize; - float pack; - float3 sceneMin; - float pack2; - float3 sceneExtent; - }; - - configBuffer = gfxCreateBuffer(gfx_, 1); - configBuffer.setName("Capsaicin_LightSamplerBounds_ConfigBuffer"); - - initLightIndexBuffer(); - - dispatchCommandBuffer = gfxCreateBuffer(gfx_, 1); - dispatchCommandBuffer.setName("Capsaicin_LightSamplerBounds_DispatchCommandBuffer"); - - return !!boundsProgram; -} - -void LightSamplerBounds::run(CapsaicinInternal &capsaicin) noexcept -{ - // Nothing to do as requires explicit call to LightSamplerBounds::update() -} - -void LightSamplerBounds::reserveBoundsValues(uint32_t reserve, std::type_info const &caller) noexcept -{ - boundsReservations.emplace(caller.hash_code(), reserve); - // Determine if current buffer needs to be reallocated - uint32_t elements = 0; - for (auto &i : boundsReservations) - { - elements += i.second; - } - boundsMaxLength = elements / 32; // Currently assumes wavefront size of 32 and reduces due to use of - // WaveActiveMin in shader code - - if (boundsMinBuffer.getCount() < boundsMaxLength && boundsMaxLength > 0) - { - gfxDestroyBuffer(gfx_, boundsMinBuffer); - gfxDestroyBuffer(gfx_, boundsMaxBuffer); - initBoundsBuffers(); - } -} - -void LightSamplerBounds::setBounds( - std::pair const &bounds, std::type_info const &caller) noexcept -{ - // Add to internal host reservations - boundsHostReservations.emplace(caller.hash_code(), bounds); - - // Add an additional reserve spot in device buffer so that this can coexist with reserveBoundsValues - reserveBoundsValues(1, this); -} - -void LightSamplerBounds::update(CapsaicinInternal &capsaicin, RenderTechnique &parent) noexcept -{ - // Update internal options - auto const &renderSettings = capsaicin.getRenderSettings(); - auto const optionsNew = convertOptions(renderSettings); - auto lightSampler = capsaicin.getComponent(); - auto stratified_sampler = capsaicin.getComponent(); - - recompileFlag = optionsNew.light_bounds_cdf != options.light_bounds_cdf - || optionsNew.light_bounds_uniform_sample != options.light_bounds_uniform_sample - || optionsNew.light_bounds_threshold != options.light_bounds_threshold - || lightSampler->needsRecompile(capsaicin); - lightingChanged = lightSampler->getLightsUpdated(); - options = optionsNew; - - if (options.light_bounds_uniform_sample) - { - // If using uniform sampling skip building structure - return; - } - - const uint32_t numCells = options.light_bounds_num_cells; - const uint32_t lightsPerCell = options.light_bounds_lights_per_cell - 1; - const uint lightDataLength = numCells * numCells * numCells * lightsPerCell; - if (lightIndexBuffer.getCount() < lightDataLength && lightDataLength > 0) - { - gfxDestroyBuffer(gfx_, lightIndexBuffer); - initLightIndexBuffer(); - } - - if (recompileFlag) - { - gfxDestroyKernel(gfx_, calculateBoundsKernel); - gfxDestroyKernel(gfx_, buildKernel); - gfxDestroyProgram(gfx_, boundsProgram); - - initKernels(capsaicin); - } - - // Calculate host side maximum bounds - bool hostUpdated = false; - if (!boundsHostReservations.empty()) - { - // Sum up everything in boundsHostReservations - std::pair newBounds = boundsHostReservations[0]; - for (auto &i : - std::ranges::subrange(++boundsHostReservations.cbegin(), boundsHostReservations.cend())) - { - newBounds.first = glm::min(newBounds.first, i.second.first); - newBounds.second = glm::max(newBounds.second, i.second.second); - } - - // Check if the host side bounds needs to be uploaded - if (newBounds != currentBounds) - { - currentBounds = newBounds; - hostUpdated = true; - // Check if there are also any device side reservations, if so then upload the host value to the - // last slot so that it will participate in reduceMinMax but wont be overwritten each frame - if (boundsReservations.size() > 1) - { - // Copy to last element boundsMinBuffer and boundsMaxBuffer - GfxBuffer uploadMinBuffer = - gfxCreateBuffer(gfx_, sizeof(float) * 3, &newBounds.first, kGfxCpuAccess_Write); - gfxCommandCopyBuffer(gfx_, boundsMinBuffer, ((size_t)boundsMaxLength - 1) * sizeof(float) * 3, - uploadMinBuffer, 0, sizeof(float) * 3); - GfxBuffer uploadMaxBuffer = - gfxCreateBuffer(gfx_, sizeof(float) * 3, &newBounds.second, kGfxCpuAccess_Write); - gfxCommandCopyBuffer(gfx_, boundsMaxBuffer, ((size_t)boundsMaxLength - 1) * sizeof(float) * 3, - uploadMaxBuffer, 0, sizeof(float) * 3); - } - else - { - GfxBuffer uploadMinBuffer = - gfxCreateBuffer(gfx_, sizeof(float) * 3, &newBounds.first, kGfxCpuAccess_Write); - gfxCommandCopyBuffer(gfx_, boundsMinBuffer, 0, uploadMinBuffer, 0, sizeof(float) * 3); - GfxBuffer uploadMaxBuffer = - gfxCreateBuffer(gfx_, sizeof(float) * 3, &newBounds.second, kGfxCpuAccess_Write); - gfxCommandCopyBuffer(gfx_, boundsMaxBuffer, 0, uploadMaxBuffer, 0, sizeof(float) * 3); - } - } - } - - // Update constants buffer - GfxBuffer samplingConstants = capsaicin.allocateConstantBuffer(1); - LightSamplingConstants constantData = {}; - constantData.maxCellsPerAxis = options.light_bounds_num_cells; - constantData.maxNumLightsPerCell = - options.light_bounds_lights_per_cell - 1; //-1 as need space for table header - gfxBufferGetData(gfx_, samplingConstants)[0] = constantData; - - // Add program parameters - addProgramParameters(capsaicin, boundsProgram); - gfxProgramSetParameter(gfx_, boundsProgram, "g_DispatchCommandBuffer", dispatchCommandBuffer); - gfxProgramSetParameter(gfx_, boundsProgram, "g_LightSampler_Constants", samplingConstants); - - gfxProgramSetParameter(gfx_, boundsProgram, "g_EnvironmentBuffer", capsaicin.getEnvironmentBuffer()); - gfxProgramSetParameter( - gfx_, boundsProgram, "g_TextureMaps", capsaicin.getTextures(), capsaicin.getTextureCount()); - gfxProgramSetParameter(gfx_, boundsProgram, "g_TextureSampler", capsaicin.getLinearSampler()); - - gfxProgramSetParameter(gfx_, boundsProgram, "g_FrameIndex", capsaicin.getFrameIndex()); - - stratified_sampler->addProgramParameters(capsaicin, boundsProgram); - - // Create the light sampling structure bounds by reducing all values stored in the boundsMin|MaxBuffers - { - RenderTechnique::TimedSection const timedSection(parent, "CalculateLightSamplerBounds"); - - if (boundsReservations.size() > (boundsHostReservations.empty() ? 0 : 1)) - { - // Reduce Min/Max - reducerMin.reduceIndirect(boundsMinBuffer, boundsLengthBuffer, boundsMaxLength); - reducerMax.reduceIndirect(boundsMaxBuffer, boundsLengthBuffer, boundsMaxLength); - } - } - - // Calculate the required configuration values - { - RenderTechnique::TimedSection const timedSection(parent, "CalculateLightSamplerConfiguration"); - - if (boundsReservations.size() > (boundsHostReservations.empty() ? 0 : 1) || hostUpdated) - { - // Calculate the required configuration values - gfxCommandBindKernel(gfx_, calculateBoundsKernel); - gfxCommandDispatch(gfx_, 1, 1, 1); - } - } - - // Create the light sampling structure - { - RenderTechnique::TimedSection const timedSection(parent, "BuildLightSampler"); - - // Build the sampling structure - gfxCommandBindKernel(gfx_, buildKernel); - gfxCommandDispatchIndirect(gfx_, dispatchCommandBuffer); - } - - // Release constant buffer - gfxDestroyBuffer(gfx_, samplingConstants); - - // Clear boundsLengthBuffer - gfxCommandClearBuffer(gfx_, boundsLengthBuffer, 0); -} - -bool LightSamplerBounds::needsRecompile(CapsaicinInternal const &capsaicin) const noexcept -{ - return recompileFlag; -} - -std::vector LightSamplerBounds::getShaderDefines( - CapsaicinInternal const &capsaicin) const noexcept -{ - auto lightSampler = capsaicin.getComponent(); - std::vector baseDefines(std::move(lightSampler->getShaderDefines(capsaicin))); - if (options.light_bounds_threshold) - { - baseDefines.push_back("LIGHTSAMPLERBOUNDS_USE_THRESHOLD"); - } - if (options.light_bounds_cdf) - { - baseDefines.push_back("LIGHTSAMPLERBOUNDS_USE_CDF"); - } - if (options.light_bounds_uniform_sample) - { - baseDefines.push_back("LIGHTSAMPLERBOUNDS_USE_UNIFORM_SAMPLING"); - } - return baseDefines; -} - -void LightSamplerBounds::addProgramParameters( - CapsaicinInternal const &capsaicin, GfxProgram program) const noexcept -{ - auto lightSampler = capsaicin.getComponent(); - lightSampler->addProgramParameters(capsaicin, program); - - // Bind the light sampling shader parameters - gfxProgramSetParameter(gfx_, program, "g_LightSampler_BoundsLength", boundsLengthBuffer); - gfxProgramSetParameter(gfx_, program, "g_LightSampler_MinBounds", boundsMinBuffer); - gfxProgramSetParameter(gfx_, program, "g_LightSampler_MaxBounds", boundsMaxBuffer); - gfxProgramSetParameter(gfx_, program, "g_LightSampler_Configuration", configBuffer); - gfxProgramSetParameter(gfx_, program, "g_LightSampler_CellsIndex", lightIndexBuffer); - gfxProgramSetParameter(gfx_, program, "g_LightSampler_CellsCDF", lightCDFBuffer); -} - -bool LightSamplerBounds::getLightsUpdated() const -{ - return lightingChanged; -} - -void LightSamplerBounds::terminate() noexcept -{ - gfxDestroyBuffer(gfx_, boundsLengthBuffer); - gfxDestroyBuffer(gfx_, boundsMinBuffer); - gfxDestroyBuffer(gfx_, boundsMaxBuffer); - - gfxDestroyBuffer(gfx_, configBuffer); - gfxDestroyBuffer(gfx_, lightIndexBuffer); - gfxDestroyBuffer(gfx_, lightCDFBuffer); - - gfxDestroyBuffer(gfx_, dispatchCommandBuffer); - - gfxDestroyKernel(gfx_, calculateBoundsKernel); - gfxDestroyKernel(gfx_, buildKernel); - gfxDestroyProgram(gfx_, boundsProgram); -} - -bool LightSamplerBounds::initKernels(CapsaicinInternal const &capsaicin) noexcept -{ - boundsProgram = gfxCreateProgram( - gfx_, "components/light_sampler_bounds/light_sampler_bounds", capsaicin.getShaderPath()); - auto baseDefines(std::move(getShaderDefines(capsaicin))); - std::vector defines; - for (auto &i : baseDefines) - { - defines.push_back(i.c_str()); - } - calculateBoundsKernel = gfxCreateComputeKernel(gfx_, boundsProgram, "CalculateBounds"); - buildKernel = gfxCreateComputeKernel( - gfx_, boundsProgram, "Build", defines.data(), static_cast(defines.size())); - - return !!buildKernel; -} - -bool LightSamplerBounds::initBoundsBuffers() noexcept -{ - boundsMinBuffer = gfxCreateBuffer(gfx_, boundsMaxLength); - boundsMinBuffer.setName("Capsaicin_LightSamplerBounds_BoundsMinBuffer"); - boundsMaxBuffer = gfxCreateBuffer(gfx_, boundsMaxLength); - boundsMaxBuffer.setName("Capsaicin_LightSamplerBounds_BoundsMaxBuffer"); - return !!boundsMaxBuffer; -} - -bool LightSamplerBounds::initLightIndexBuffer() noexcept -{ - const uint32_t numCells = options.light_bounds_num_cells; - const uint32_t lightsPerCell = options.light_bounds_lights_per_cell; - const uint lightDataLength = numCells * numCells * numCells * lightsPerCell; - - lightIndexBuffer = gfxCreateBuffer(gfx_, lightDataLength); - lightIndexBuffer.setName("Capsaicin_LightSamplerBounds_IndexBuffer"); - lightCDFBuffer = gfxCreateBuffer(gfx_, lightDataLength); - lightCDFBuffer.setName("Capsaicin_LightSamplerBounds_CDFBuffer"); - return !!lightCDFBuffer; -} -} // namespace Capsaicin diff --git a/src/core/src/components/light_sampler_bounds/light_sampler_bounds.hlsl b/src/core/src/components/light_sampler_bounds/light_sampler_bounds.hlsl deleted file mode 100644 index 376d382..0000000 --- a/src/core/src/components/light_sampler_bounds/light_sampler_bounds.hlsl +++ /dev/null @@ -1,363 +0,0 @@ -/********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ - -#ifndef LIGHT_BOUNDS_SAMPLER_HLSL -#define LIGHT_BOUNDS_SAMPLER_HLSL - -/* -// Requires the following data to be defined in any shader that uses this file -TextureCube g_EnvironmentBuffer; -Texture2D g_TextureMaps[] : register(space99); -SamplerState g_LinearSampler; -*/ - -#ifdef LIGHTSAMPLERBOUNDS_USE_UNIFORM_SAMPLING -#include "../light_sampler/light_sampler_uniform.hlsl" - -/** Dummy function call that does nothing */ -void LightBounds_StorePosition(in float3 position) -{} -#else -#include "light_sampler_bounds_shared.h" - -RWStructuredBuffer g_LightSampler_Configuration; -RWStructuredBuffer g_LightSampler_CellsIndex; -RWStructuredBuffer g_LightSampler_CellsCDF; - -// If using GPU based bounds calculation then also: -RWStructuredBuffer g_LightSampler_BoundsLength; -RWStructuredBuffer g_LightSampler_MinBounds; -RWStructuredBuffer g_LightSampler_MaxBounds; - -#include "../light_sampler/light_sampler.hlsl" -#include "../../lights/light_sampling.hlsl" -#include "../../lights/reservoir.hlsl" -#include "../../materials/material_evaluation.hlsl" -namespace LightSamplerBounds -{ - /** - * Calculate the start index for a requested cell within a continuous 1D buffer. - * @param cell Index of requested cell (0-indexed). - * @return The index of the cell. - */ - uint getCellIndex(uint3 cell) - { - const uint3 numCells = g_LightSampler_Configuration[0].numCells.xyz; - const uint maxLightsPerCell = g_LightSampler_Configuration[0].numCells.w; - uint index = cell.x + numCells.x * (cell.y + numCells.y * cell.z); - index *= (maxLightsPerCell + 1); // There is 1 extra slot used for cell table header - return index; - } - - /** - * Calculate which cell a position falls within. - * @param position The world space position. - * @return The index of the grid cell. - */ - uint3 getCellFromPosition(float3 position) - { - const float3 numCells = (float3)g_LightSampler_Configuration[0].numCells.xyz; - float3 relativePos = position - g_LightSampler_Configuration[0].sceneMin; - relativePos /= g_LightSampler_Configuration[0].sceneExtent; - const uint3 cell = clamp(floor(relativePos * numCells), 0.0f, numCells - 1.0f.xxx); - return cell; - } - - /** - * Get the current index into the light sampling cell buffer for a given position. - * @param position Current position on surface. - * @return The index of the current cell. - */ - uint getCellIndexFromPosition(float3 position) - { - // Calculate which cell we are in based on input point - const uint3 cell = getCellFromPosition(position); - - // Calculate position of current cell in output buffer - const uint cellIndex = getCellIndex(cell); - return cellIndex; - } - - /** - * Get the current index into the light sampling cell buffer for a given jittered position. - * @note The current position will be jittered by +- half the current cell size. - * @tparam RNG The type of random number sampler to be used. - * @param position Current position on surface. - * @return The index of the current cell. - */ - template - uint getCellIndexFromJitteredPosition(float3 position, inout RNG randomNG) - { - // Jitter current position by +-half cell size - position += (randomNG.rand3() - 0.5f) * g_LightSampler_Configuration[0].cellSize; - - return getCellIndexFromPosition(position); - } - - /** - * Perform a search of the light list for a light with CDF closest to a given value. - * @param startIndex The index of the first item to start searching at. - * @param numLights The number of values to search through. - * @param value The CDF value of item to find. - * @return The index of the sampled light. - */ - uint binarySearch(uint startIndex, uint numLights, float value) - { - // Search through looking for last element with cdf >= value - uint first = 0; - uint len = numLights; - while (len > 0) - { - uint halfed = len >> 1; - uint middle = first + halfed; - // Bisect range based on value at middle - if (g_LightSampler_CellsCDF[startIndex + middle] < value) - { - first = middle + 1; - len -= halfed + 1; - } - else - { - len = halfed; - } - } - const uint sampledIndexBase = min(first, numLights - 1); - // Add cell index to found position - const uint sampledIndex = sampledIndexBase + startIndex; - return sampledIndex; - } - - /** - * Sample the index and PDF for a sampled light. - * @tparam RNG The type of random number sampler to be used. - * @param randomNG Random number sampler used to sample light. - * @param cellIndex The index of the current cell. - * @param numLights The number of lights in the cell. - * @param normal Shading normal vector at current position. - * @param lightPDF (Out) The PDF for the calculated sample. - * @return The index of the sampled light. - */ - template - uint getSampledLight(inout RNG randomNG, uint cellIndex, uint numLights, float3 normal, out float lightPDF) - { -#ifndef LIGHTSAMPLERBOUNDS_USE_CDF - // Get total weight (=Wsum / M) - const float totalWeight = g_LightSampler_CellsCDF[cellIndex]; - - // Collapse reservoir down to a single sample - const uint startIndex = cellIndex + 1; - uint firstLightIndex = startIndex; - uint sampledIndex = g_LightSampler_CellsIndex[firstLightIndex]; - float sampledPDF = g_LightSampler_CellsCDF[firstLightIndex]; - float Wsum = sampledPDF; - float j = randomNG.rand(); - float pNone = 1.0f; - for (uint currentIndex = startIndex + 1; currentIndex < startIndex + numLights; ++currentIndex) - { - float targetPDF = g_LightSampler_CellsCDF[currentIndex]; - Wsum += targetPDF; - float p = targetPDF / Wsum; - j -= p * pNone; - pNone = pNone * (1.0f - p); - if (j <= 0.0f) - { - sampledIndex = g_LightSampler_CellsIndex[currentIndex]; - sampledPDF = targetPDF; - j = randomNG.rand(); - pNone = 1.0f; - } - } - - // Final result has PDF = (Wsum / M) / targetPDF - lightPDF = sampledPDF / totalWeight; - return sampledIndex; -#else - const uint startIndex = cellIndex + 1; - const uint sampledIndex = binarySearch(startIndex, numLights, randomNG.rand()); - - // Calculate pdf, The pdf is the contribution of the given light divided by the total contribution of all lights multiplied by the number of lights - // This is actually just the difference between the current cdf and the previous - const float previousCDF = (sampledIndex > startIndex) ? g_LightSampler_CellsCDF[sampledIndex - 1] : 0.0f; - lightPDF = g_LightSampler_CellsCDF[sampledIndex] - previousCDF; - lightPDF *= g_LightSampler_CellsCDF[cellIndex]; - return g_LightSampler_CellsIndex[sampledIndex]; -#endif - } -} - -/** - * Records the position of future light lookups. - * @param position Current position on surface. - */ -void LightBounds_StorePosition(in float3 position) -{ - const float3 position_min = WaveActiveMin(position); - const float3 position_max = WaveActiveMax(position); - if (WaveIsFirstLane()) - { - uint offset; - InterlockedAdd(g_LightSampler_BoundsLength[0], 1, offset); - g_LightSampler_MinBounds[offset] = position_min; - g_LightSampler_MaxBounds[offset] = position_max; - } -} - -/** - * Get a sample light. - * @tparam RNG The type of random number sampler to be used. - * @param randomNG Random number sampler used to sample light. - * @param position Current position on surface. - * @param normal Shading normal vector at current position. - * @param lightPDF (Out) The PDF for the calculated sample (is equal to zero if no valid samples could be found). - * @returns The index of the new light sample - */ -template -uint sampleLights(inout RNG randomNG, float3 position, float3 normal, out float lightPDF) -{ - // Get the current cell buffer index - const uint cellIndex = LightSamplerBounds::getCellIndexFromJitteredPosition(position, randomNG); - const uint numLights = g_LightSampler_CellsIndex[cellIndex]; - - // Return invalid sample if the cell doesn't contain any lights - if (numLights == 0) - { - lightPDF = 0.0f; - return 0; - } - - // Choose a light to sample from - uint lightIndex = LightSamplerBounds::getSampledLight(randomNG, cellIndex, numLights, normal, lightPDF); - return lightIndex; -} - -/** - * Calculate the PDF of sampling a given light. - * @param position The position on the surface currently being shaded. - * @returns The calculated PDF with respect to the light. - */ -float sampleLightPDF(float3 position) -{ - // Get the current cell buffer index - const uint cellIndex = LightSamplerBounds::getCellIndexFromPosition(position); - const uint numLights = g_LightSampler_CellsIndex[cellIndex]; - - // TODO: This is technically incorrect as it doesnt take into account the specific PDF for the current light as we need to know what light we actually hit which is currently not available. - return 1.0f / numLights; -} - -/** - * Sample multiple lights into a reservoir. - * @tparam numSampledLights Number of lights to sample. - * @tparam RNG The type of random number sampler to be used. - * @param randomNG Random number sampler used to sample light. - * @param position Current position on surface. - * @param normal Shading normal vector at current position. - * @param viewDirection View direction vector at current position. - * @param solidAngle Solid angle around view direction of visible ray cone. - * @param material Material for current surface position. - * @returns Reservoir containing combined samples. - */ -template -Reservoir sampleLightListCone(/*inout TODO: dxc crash*/ RNG randomNG, float3 position, float3 normal, float3 viewDirection, float solidAngle, MaterialBRDF material) -{ - // Get the current cell buffer index - const uint cellIndex = LightSamplerBounds::getCellIndexFromJitteredPosition(position, randomNG); - const uint numLights = g_LightSampler_CellsIndex[cellIndex]; - const uint newLights = min(numLights, numSampledLights); - - // Return invalid sample if the cell doesn't contain any lights - if (numLights == 0) - { - return MakeReservoir(); - } - - // Create reservoir updater - ReservoirUpdater updater = MakeReservoirUpdater(); - -#ifndef LIGHTSAMPLERBOUNDS_USE_CDF - uint sampledIndexes[numSampledLights]; // Final samples - float sampledPDFs[numSampledLights]; - - // Collapse reservoir down to N samples where N=numSampledLights - const uint startIndex = cellIndex + 1; - uint currentIndex = startIndex; - uint lightsAdded = 0; - float Wsum = 0; - for (; currentIndex < startIndex + newLights; ++currentIndex) - { - // Must pre-fill the N sized final reservoir - sampledIndexes[lightsAdded] = g_LightSampler_CellsIndex[currentIndex]; - float targetPDF = g_LightSampler_CellsCDF[currentIndex]; - sampledPDFs[lightsAdded] = targetPDF; - Wsum += targetPDF; - ++lightsAdded; - } - // Use A-Chao with jumps to sample remainder of cells light list - float j = randomNG.rand(); - float pNone = 1.0f; - for (; currentIndex < startIndex + numLights; ++currentIndex) - { - float targetPDF = g_LightSampler_CellsCDF[currentIndex]; - Wsum += targetPDF; - float p = targetPDF / Wsum; - j -= p * pNone; - pNone = pNone * (1.0f - p); - if (j <= 0.0f) - { - uint replaceIndex = randomNG.randInt(numSampledLights); - sampledIndexes[replaceIndex] = g_LightSampler_CellsIndex[currentIndex]; - sampledPDFs[replaceIndex] = targetPDF; - j = randomNG.rand(); - pNone = 1.0f; - } - } - - // Get total weight (=Wsum / M) - const float totalWeight = g_LightSampler_CellsCDF[cellIndex]; - const float modifierPDF = lightsAdded / totalWeight; - // Loop through the collapsed reservoir and add samples - for (uint i = 0; i < lightsAdded; ++i) - { - // Add the light sample to the reservoir - uint lightIndex = sampledIndexes[i]; - float lightPDF = sampledPDFs[i] * modifierPDF; - updateReservoir(updater, randomNG, lightIndex, lightPDF, material, position, normal, viewDirection, solidAngle); - } -#else - // Loop through until we have the requested number of lights - float lightPDF; - for (uint lightsAdded = 0; lightsAdded < newLights; ++lightsAdded) - { - // Choose a light to sample from - uint lightIndex = LightSamplerBounds::getSampledLight(randomNG, cellIndex, numLights, normal, lightPDF); - - // Add the light sample to the reservoir - updateReservoir(updater, randomNG, lightIndex, lightPDF, material, position, normal, viewDirection, solidAngle); - } -#endif - - // Get finalised reservoir for return - return getUpdatedReservoir(updater); -} -#endif // LIGHTSAMPLERBOUNDS_USE_UNIFORM_SAMPLING - -#endif // LIGHT_BOUNDS_SAMPLER_HLSL diff --git a/src/core/src/components/light_sampler_grid_cdf/light_sampler_grid.hlsl b/src/core/src/components/light_sampler_grid_cdf/light_sampler_grid.hlsl new file mode 100644 index 0000000..e277231 --- /dev/null +++ b/src/core/src/components/light_sampler_grid_cdf/light_sampler_grid.hlsl @@ -0,0 +1,199 @@ +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ + +#ifndef LIGHT_SAMPLER_GRID_HLSL +#define LIGHT_SAMPLER_GRID_HLSL + +namespace LightSamplerGrid +{ + /** + * Calculate the start index for a requested cell within a continuous 1D buffer. + * @param cell Index of requested cell (0-indexed). + * @return The index of the cell. + */ + uint getCellIndex(uint3 cell) + { + const uint3 numCells = g_LightSampler_Configuration[0].numCells.xyz; + const uint maxLightsPerCell = g_LightSampler_Configuration[0].numCells.w; + uint index = cell.x + numCells.x * (cell.y + numCells.y * cell.z); + index *= maxLightsPerCell; + return index; + } + + /** + * Calculate the start index for a requested cell within a continuous 1D buffer when using octahedron sampling. + * @param cell Index of requested cell (0-indexed). + * @param face Face of octahedron based on shading normal. + * @return The index of the cell. + */ + uint getCellOctaIndex(uint3 cell, uint face) + { + const uint3 numCells = g_LightSampler_Configuration[0].numCells.xyz; + const uint maxLightsPerCell = g_LightSampler_Configuration[0].numCells.w; + uint index = cell.x + numCells.x * (cell.y + numCells.y * cell.z); + index *= 8; + index += face; + index *= maxLightsPerCell; + return index; + } + + /** + * Calculate which octahedron face a given direction is oriented within. + * @param normal The direction vector. + * @return The index of the octahedron face. + */ + uint getCellFace(float3 normal) + { + // Faces are mapped to the integer values 0..7 where the 3 bits that + // make up the integer value are taken from the sign bits of the normal direction + // with the x component mapping to bit 0, y component to bit 1 and z to bit 2 + bool3 faceOrientation = normal < 0.0f.xxx; + uint index = faceOrientation.x ? 0x1 : 0; + index += faceOrientation.y ? 0x2 : 0; + index += faceOrientation.z ? 0x4 : 0; + return index; + } + + /** + * Calculate the octahedron face normal. + * @param cellFace The cell face to get normal for. + * @return The normalised normal direction. + */ + float3 getCellNormal(uint cellFace) + { + const float3 faceNormal = normalize(float3(cellFace & 0x1 ? -1.0f : 1.0f, + cellFace & 0x2 ? -1.0f : 1.0f, + cellFace & 0x4 ? -1.0f : 1.0f)); + return faceNormal; + } + + /** + * Calculate which cell a position falls within. + * @param position The world space position. + * @return The index of the grid cell. + */ + uint3 getCellFromPosition(float3 position) + { + const float3 numCells = (float3)g_LightSampler_Configuration[0].numCells.xyz; + float3 relativePos = position - g_LightSampler_Configuration[0].sceneMin; + relativePos /= g_LightSampler_Configuration[0].sceneExtent; + const uint3 cell = clamp(floor(relativePos * numCells), 0.0f, numCells - 1.0f.xxx); + return cell; + } + + /** + * Calculate which cell a position falls within for a given jittered position. + * @note The current position will be jittered by +-quarter the current cell size. + * @param position The world space position. + * @tparam RNG The type of random number sampler to be used. + * @return The index of the grid cell. + */ + template + uint3 getCellFromJitteredPosition(float3 position, inout RNG randomNG) + { + // Jitter current position by +-quarter cell size + position += (randomNG.rand3() - 0.25f) * g_LightSampler_Configuration[0].cellSize; + + return getCellFromPosition(position); + } + + /** + * Get the current index into the light sampling cell buffer for a given position. + * @param position Current position on surface. + * @return The index of the current cell. + */ + uint getCellIndexFromPosition(float3 position) + { + // Calculate which cell we are in based on input point + const uint3 cell = getCellFromPosition(position); + + // Calculate position of current cell in output buffer + const uint cellIndex = getCellIndex(cell); + return cellIndex; + } + + /** + * Get the current index into the light sampling octahedron cell buffer for a given position. + * @param position Current position on surface. + * @param normal Shading normal vector at current position. + * @return The index of the current cell. + */ + uint getCellOctaIndexFromPosition(float3 position, float3 normal) + { + // Calculate which cell we are in based on input point + const uint3 cell = getCellFromPosition(position); + + // Calculate position of current cell in output buffer + const uint cellIndex = getCellOctaIndex(cell, getCellFace(normal)); + return cellIndex; + } + + /** + * Get the bounding box for a specific grid cell. + * @param cellID The ID of the grid cell. + * @param extent (Out) The return bounding box size. + * @return The bounding box min values. + */ + float3 getCellBB(uint3 cellID, out float3 extent) + { + const float3 minBB = ((float3)cellID * g_LightSampler_Configuration[0].cellSize) + g_LightSampler_Configuration[0].sceneMin; + extent = g_LightSampler_Configuration[0].cellSize; + return minBB; + } + + /** + * Get the current index into the light sampling cell buffer for a given jittered position. + * @note The current position will be jittered by +-quarter the current cell size. + * @tparam RNG The type of random number sampler to be used. + * @param position Current position on surface. + * @param randomNG The random number generator. + * @return The index of the current cell. + */ + template + uint getCellIndexFromJitteredPosition(float3 position, inout RNG randomNG) + { + // Jitter current position by +-quarter cell size + position += (randomNG.rand3() - 0.25f) * g_LightSampler_Configuration[0].cellSize; + + return getCellIndexFromPosition(position); + } + + /** + * Get the current index into the light sampling octahedron cell buffer for a given jittered position. + * @note The current position will be jittered by +-quarter the current cell size. + * @tparam RNG The type of random number sampler to be used. + * @param position Current position on surface. + * @param normal Shading normal vector at current position. + * @param randomNG The random number generator. + * @return The index of the current cell. + */ + template + uint getCellOctaIndexFromJitteredPosition(float3 position, float3 normal, inout RNG randomNG) + { + // Jitter current position by +-quarter cell size + position += (randomNG.rand3() - 0.25f) * g_LightSampler_Configuration[0].cellSize; + + return getCellOctaIndexFromPosition(position, normal); + } +} + +#endif // LIGHT_SAMPLER_GRID_HLSL diff --git a/src/core/src/components/light_sampler_grid_cdf/light_sampler_grid_cdf.comp b/src/core/src/components/light_sampler_grid_cdf/light_sampler_grid_cdf.comp new file mode 100644 index 0000000..1b277c4 --- /dev/null +++ b/src/core/src/components/light_sampler_grid_cdf/light_sampler_grid_cdf.comp @@ -0,0 +1,145 @@ +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ + +#include "light_sampler_grid_shared.h" + +TextureCube g_EnvironmentBuffer; +Texture2D g_TextureMaps[] : register(space99); +SamplerState g_TextureSampler; // Is a linear sampler + +uint g_FrameIndex; + +#ifdef LIGHTSAMPLERCDF_USE_THRESHOLD +#define THRESHOLD_RADIANCE (1.0f / 2048.0f) +#endif + +#include "light_sampler_grid_cdf.hlsl" +#include "../../math/random.hlsl" + +#define THREADX 4 +#define THREADY 4 +#define THREADZ 4 + +/** + * Build an internal grid. + */ +[numthreads(THREADX, THREADY, THREADZ)] +void Build(in uint3 did : SV_DispatchThreadID) +{ +#ifdef LIGHTSAMPLERCDF_USE_OCTAHEDRON_SAMPLING + // Called per cell per octahedron face + const uint3 cellID = uint3(did.x / 8, did.y, did.z); +#else + // Called per cell + const uint3 cellID = did; +#endif // LIGHTSAMPLERCDF_USE_OCTAHEDRON_SAMPLING + if (any(cellID >= g_LightSampler_Configuration[0].numCells.xyz)) + { + return; + } + + // Calculate the bounding box for the current cell + float3 extent; + const float3 minBB = LightSamplerGrid::getCellBB(cellID, extent); + + // Get total and max supported number of lights + uint totalLights = getNumberLights(); + const uint maxLightsPerCell = g_LightSampler_Configuration[0].numCells.w - 1; + + // Loop through all lights +#ifdef LIGHTSAMPLERCDF_USE_OCTAHEDRON_SAMPLING + const uint cellFace = did.x - (cellID.x * 8); + uint cellIndex = LightSamplerGrid::getCellOctaIndex(cellID, cellFace); + const float3 normal = LightSamplerGrid::getCellNormal(cellFace); +#else + const uint cellIndex = LightSamplerGrid::getCellIndex(cellID); +#endif // LIGHTSAMPLERCDF_USE_OCTAHEDRON_SAMPLING + const uint startIndex = cellIndex + 1; + Random randomNG = MakeRandom(cellIndex, g_FrameIndex); + uint storedLights = 0; //Num of stored lights in cell + float totalWeight = 0.0f; + for (uint lightIndex = 0; lightIndex < totalLights; ++lightIndex) + { + // Calculate sampled contribution for light + Light selectedLight = getLight(lightIndex); +#ifdef LIGHTSAMPLERCDF_USE_OCTAHEDRON_SAMPLING + float y = sampleLightVolumeNormal(selectedLight, minBB, extent, normal); +#else + float y = sampleLightVolume(selectedLight, minBB, extent); +#endif // LIGHTSAMPLERCDF_USE_OCTAHEDRON_SAMPLING + + if (y > 0.0f) + { + // Store only the most important lights + totalWeight += y; + if (storedLights < maxLightsPerCell) + { + ++storedLights; + g_LightSampler_CellsIndex[cellIndex + storedLights] = lightIndex; + g_LightSampler_CellsCDF[cellIndex + storedLights] = y; + } + else + { + // Find the lowest contributing light and replace + uint smallestLight = -1; + float smallestCDF = y; + uint writeIndex = startIndex + storedLights; + for (uint light = startIndex; light < writeIndex; ++light) + { + if (g_LightSampler_CellsCDF[light] < smallestCDF) + { + smallestLight = light; + smallestCDF = g_LightSampler_CellsCDF[light]; + } + } + if (smallestLight != -1) + { + g_LightSampler_CellsIndex[smallestLight] = lightIndex; + g_LightSampler_CellsCDF[smallestLight] = y; + } + } + } + } + + // Add table for cells light list + g_LightSampler_CellsIndex[cellIndex] = storedLights; + + // Convert to CDF + float runningCDF = 0.0f; + for (uint i = startIndex; i <= cellIndex + storedLights; ++i) + { + runningCDF = runningCDF + g_LightSampler_CellsCDF[i]; + g_LightSampler_CellsCDF[i] = runningCDF; + } + // Normalise CDF + float recipMaxCDF = 1.0f / runningCDF; + for (uint j = startIndex; j < cellIndex + storedLights; ++j) + { + g_LightSampler_CellsCDF[j] *= recipMaxCDF; + } + g_LightSampler_CellsCDF[cellIndex + storedLights] = 1.0f; + + // Write out max cdf to cell table +#ifndef LIGHTSAMPLERCDF_HAS_ALL_LIGHTS + g_LightSampler_CellsCDF[cellIndex] = runningCDF / totalWeight; +#endif +} diff --git a/src/core/src/components/light_sampler_grid_cdf/light_sampler_grid_cdf.cpp b/src/core/src/components/light_sampler_grid_cdf/light_sampler_grid_cdf.cpp new file mode 100644 index 0000000..df244d8 --- /dev/null +++ b/src/core/src/components/light_sampler_grid_cdf/light_sampler_grid_cdf.cpp @@ -0,0 +1,305 @@ +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ + +#include "light_sampler_grid_cdf.h" + +#include "../light_builder/light_builder.h" +#include "capsaicin_internal.h" + +namespace Capsaicin +{ +LightSamplerGridCDF::LightSamplerGridCDF() noexcept + : LightSampler(Name) +{} + +LightSamplerGridCDF::~LightSamplerGridCDF() noexcept +{ + terminate(); +} + +RenderOptionList LightSamplerGridCDF::getRenderOptions() noexcept +{ + RenderOptionList newOptions; + newOptions.emplace(RENDER_OPTION_MAKE(light_grid_cdf_num_cells, options)); + newOptions.emplace(RENDER_OPTION_MAKE(light_grid_cdf_lights_per_cell, options)); + newOptions.emplace(RENDER_OPTION_MAKE(light_grid_cdf_threshold, options)); + newOptions.emplace(RENDER_OPTION_MAKE(light_grid_cdf_octahedron_sampling, options)); + newOptions.emplace(RENDER_OPTION_MAKE(light_grid_cdf_centroid_build, options)); + return newOptions; +} + +LightSamplerGridCDF::RenderOptions LightSamplerGridCDF::convertOptions( + RenderOptionList const &options) noexcept +{ + RenderOptions newOptions; + RENDER_OPTION_GET(light_grid_cdf_num_cells, newOptions, options) + RENDER_OPTION_GET(light_grid_cdf_lights_per_cell, newOptions, options) + RENDER_OPTION_GET(light_grid_cdf_threshold, newOptions, options) + RENDER_OPTION_GET(light_grid_cdf_octahedron_sampling, newOptions, options) + RENDER_OPTION_GET(light_grid_cdf_centroid_build, newOptions, options) + return newOptions; +} + +ComponentList LightSamplerGridCDF::getComponents() const noexcept +{ + ComponentList components; + components.emplace_back(COMPONENT_MAKE(LightBuilder)); + return components; +} + +bool LightSamplerGridCDF::init(CapsaicinInternal const &capsaicin) noexcept +{ + initKernels(capsaicin); + + configBuffer = gfxCreateBuffer(gfx_, 1); + configBuffer.setName("Capsaicin_LightSamplerGridCDF_ConfigBuffer"); + + return !!boundsProgram; +} + +void LightSamplerGridCDF::run(CapsaicinInternal &capsaicin) noexcept +{ + // Update internal options + auto const optionsNew = convertOptions(capsaicin.getOptions()); + auto lightBuilder = capsaicin.getComponent(); + + recompileFlag = + optionsNew.light_grid_cdf_threshold != options.light_grid_cdf_threshold + || optionsNew.light_grid_cdf_octahedron_sampling != options.light_grid_cdf_octahedron_sampling + || optionsNew.light_grid_cdf_centroid_build != options.light_grid_cdf_centroid_build + || (optionsNew.light_grid_cdf_lights_per_cell != options.light_grid_cdf_lights_per_cell + && (optionsNew.light_grid_cdf_lights_per_cell == 0 + || options.light_grid_cdf_lights_per_cell == 0)) + || lightBuilder->needsRecompile(capsaicin); + lightsUpdatedFlag = + optionsNew.light_grid_cdf_octahedron_sampling != options.light_grid_cdf_octahedron_sampling + || optionsNew.light_grid_cdf_lights_per_cell != options.light_grid_cdf_lights_per_cell + || optionsNew.light_grid_cdf_num_cells != options.light_grid_cdf_num_cells + || optionsNew.light_grid_cdf_centroid_build != options.light_grid_cdf_centroid_build; + options = optionsNew; + + if (recompileFlag) + { + gfxDestroyKernel(gfx_, buildKernel); + gfxDestroyProgram(gfx_, boundsProgram); + + initKernels(capsaicin); + } + + if (capsaicin.getMeshesUpdated() || capsaicin.getTransformsUpdated() || lightsUpdatedFlag + || lightBuilder->getLightsUpdated() || recompileFlag || capsaicin.getFrameIndex() == 0 + || config.numCells.x == 0 /*i.e. uninitialised*/) + { + // Update the light sampler using scene bounds + auto [sceneMin, sceneMax] = capsaicin.getSceneBounds(); + + // Ensure each cell is square + const float3 sceneExtent = sceneMax - sceneMin; + float const largestAxis = glm::max(sceneExtent.x, glm::max(sceneExtent.y, sceneExtent.z)); + float const cellScale = largestAxis / options.light_grid_cdf_num_cells; + const float3 cellNum = ceil(sceneExtent / cellScale); + + // Clamp max number of lights to those actually available + uint lightsPerCell = + (options.light_grid_cdf_lights_per_cell == 0) + ? lightBuilder->getLightCount() + : glm::min(options.light_grid_cdf_lights_per_cell, lightBuilder->getLightCount()); + lightsPerCell += 1; // There is 1 extra slot used for cell table header + + // Create updated configuration + config.numCells = uint4((uint3)cellNum, lightsPerCell); + config.cellSize = sceneExtent / cellNum; + config.sceneMin = sceneMin; + config.sceneExtent = sceneExtent; + + GfxBuffer const uploadBuffer = + gfxCreateBuffer(gfx_, 1, &config, kGfxCpuAccess_Write); + gfxCommandCopyBuffer(gfx_, configBuffer, uploadBuffer); + gfxDestroyBuffer(gfx_, uploadBuffer); + } + + uint lightDataLength = config.numCells.x * config.numCells.y * config.numCells.z * config.numCells.w; + if (options.light_grid_cdf_octahedron_sampling) + { + lightDataLength *= 8; + } + if (lightIndexBuffer.getCount() < lightDataLength && lightDataLength > 0) + { + gfxDestroyBuffer(gfx_, lightIndexBuffer); + gfxDestroyBuffer(gfx_, lightCDFBuffer); + + lightIndexBuffer = gfxCreateBuffer(gfx_, lightDataLength); + lightIndexBuffer.setName("Capsaicin_LightSamplerGridCDF_IndexBuffer"); + lightCDFBuffer = gfxCreateBuffer(gfx_, lightDataLength); + lightCDFBuffer.setName("Capsaicin_LightSamplerGridCDF_CDFBuffer"); + + lightsUpdatedFlag = true; + } + + // Create the light sampling structure + if (lightBuilder->getLightsUpdated() || lightsUpdatedFlag || recompileFlag) + { + RenderTechnique::TimedSection const timedSection(*this, "BuildLightSampler"); + + // Add program parameters + addProgramParameters(capsaicin, boundsProgram); + + gfxProgramSetParameter(gfx_, boundsProgram, "g_EnvironmentBuffer", capsaicin.getEnvironmentBuffer()); + gfxProgramSetParameter( + gfx_, boundsProgram, "g_TextureMaps", capsaicin.getTextures(), capsaicin.getTextureCount()); + gfxProgramSetParameter(gfx_, boundsProgram, "g_TextureSampler", capsaicin.getLinearSampler()); + + gfxProgramSetParameter(gfx_, boundsProgram, "g_FrameIndex", capsaicin.getFrameIndex()); + + // Get total number of grid cells + uint3 groups = (uint3)config.numCells; + if (options.light_grid_cdf_octahedron_sampling) + { + groups.x *= 8; + } + + // Build the sampling structure + uint32_t const *numThreads = gfxKernelGetNumThreads(gfx_, buildKernel); + uint32_t const numGroupsX = (groups.x + numThreads[0] - 1) / numThreads[0]; + uint32_t const numGroupsY = (groups.y + numThreads[1] - 1) / numThreads[1]; + uint32_t const numGroupsZ = (groups.z + numThreads[2] - 1) / numThreads[2]; + gfxCommandBindKernel(gfx_, buildKernel); + gfxCommandDispatch(gfx_, numGroupsX, numGroupsY, numGroupsZ); + } +} + +void LightSamplerGridCDF::terminate() noexcept +{ + gfxDestroyBuffer(gfx_, configBuffer); + configBuffer = {}; + gfxDestroyBuffer(gfx_, lightIndexBuffer); + lightIndexBuffer = {}; + gfxDestroyBuffer(gfx_, lightCDFBuffer); + lightCDFBuffer = {}; + + gfxDestroyKernel(gfx_, buildKernel); + buildKernel = {}; + gfxDestroyProgram(gfx_, boundsProgram); + boundsProgram = {}; +} + +void LightSamplerGridCDF::renderGUI(CapsaicinInternal &capsaicin) const noexcept +{ + if (ImGui::CollapsingHeader("Light Sampler Settings", ImGuiTreeNodeFlags_None)) + { + auto lightBuilder = capsaicin.getComponent(); + ImGui::DragInt("Max Cells per Axis", + (int32_t *)&capsaicin.getOption("light_grid_cdf_num_cells"), 1, 1, 128); + bool autoLights = capsaicin.getOption("light_grid_cdf_lights_per_cell") == 0; + auto currentLights = lightBuilder->getLightCount(); + if (autoLights) + { + ImGui::BeginDisabled(); + ImGui::DragInt("Number Lights per Cell", (int32_t *)¤tLights, 1, 1, currentLights); + ImGui::EndDisabled(); + } + else + { + ImGui::DragInt("Number Lights per Cell", + (int32_t *)&capsaicin.getOption("light_grid_cdf_lights_per_cell"), 1, 1, + currentLights); + } + ImGui::SameLine(); + if (ImGui::Checkbox("Auto", &autoLights)) + { + capsaicin.setOption("light_grid_cdf_lights_per_cell", autoLights ? 0 : currentLights); + } + ImGui::Checkbox( + "Cull Low Contributing Lights", &capsaicin.getOption("light_grid_cdf_threshold")); + // ImGui::Checkbox("Octahedral Sampling", + // &capsaicin.getOption("light_grid_cdf_octahedron_sampling")); + ImGui::Checkbox("Fast Centroid Build", &capsaicin.getOption("light_grid_cdf_centroid_build")); + } +} + +bool LightSamplerGridCDF::needsRecompile([[maybe_unused]] CapsaicinInternal const &capsaicin) const noexcept +{ + return recompileFlag; +} + +std::vector LightSamplerGridCDF::getShaderDefines( + CapsaicinInternal const &capsaicin) const noexcept +{ + auto lightBuilder = capsaicin.getComponent(); + std::vector baseDefines(std::move(lightBuilder->getShaderDefines(capsaicin))); + if (options.light_grid_cdf_threshold) + { + baseDefines.push_back("LIGHTSAMPLERCDF_USE_THRESHOLD"); + } + if (options.light_grid_cdf_octahedron_sampling) + { + baseDefines.push_back("LIGHTSAMPLERCDF_USE_OCTAHEDRON_SAMPLING"); + } + if (options.light_grid_cdf_centroid_build) + { + baseDefines.push_back("LIGHT_SAMPLE_VOLUME_CENTROID"); + } + if (options.light_grid_cdf_lights_per_cell == 0) + { + baseDefines.push_back("LIGHTSAMPLERCDF_HAS_ALL_LIGHTS"); + } + return baseDefines; +} + +void LightSamplerGridCDF::addProgramParameters( + CapsaicinInternal const &capsaicin, GfxProgram program) const noexcept +{ + auto lightBuilder = capsaicin.getComponent(); + lightBuilder->addProgramParameters(capsaicin, program); + + // Bind the light sampling shader parameters + gfxProgramSetParameter(gfx_, program, "g_LightSampler_Configuration", configBuffer); + gfxProgramSetParameter(gfx_, program, "g_LightSampler_CellsIndex", lightIndexBuffer); + gfxProgramSetParameter(gfx_, program, "g_LightSampler_CellsCDF", lightCDFBuffer); +} + +bool LightSamplerGridCDF::getLightsUpdated(CapsaicinInternal const &capsaicin) const noexcept +{ + auto lightBuilder = capsaicin.getComponent(); + return lightsUpdatedFlag || lightBuilder->getLightsUpdated(); +} + +std::string_view LightSamplerGridCDF::getHeaderFile() const noexcept +{ + return std::string_view("\"../../components/light_sampler_grid_cdf/light_sampler_grid_cdf.hlsl\""); +} + +bool LightSamplerGridCDF::initKernels(CapsaicinInternal const &capsaicin) noexcept +{ + boundsProgram = gfxCreateProgram( + gfx_, "components/light_sampler_grid_cdf/light_sampler_grid_cdf", capsaicin.getShaderPath()); + auto baseDefines(std::move(getShaderDefines(capsaicin))); + std::vector defines; + for (auto &i : baseDefines) + { + defines.push_back(i.c_str()); + } + buildKernel = gfxCreateComputeKernel( + gfx_, boundsProgram, "Build", defines.data(), static_cast(defines.size())); + return !!buildKernel; +} +} // namespace Capsaicin diff --git a/src/core/src/components/light_sampler_grid_cdf/light_sampler_grid_cdf.h b/src/core/src/components/light_sampler_grid_cdf/light_sampler_grid_cdf.h new file mode 100644 index 0000000..ea166cb --- /dev/null +++ b/src/core/src/components/light_sampler_grid_cdf/light_sampler_grid_cdf.h @@ -0,0 +1,159 @@ +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ +#pragma once + +#include "capsaicin_internal.h" +#include "components/component.h" +#include "components/light_sampler/light_sampler.h" +#include "light_sampler_grid_shared.h" + +namespace Capsaicin +{ +class LightSamplerGridCDF + : public LightSampler + , public ComponentFactory::Registrar + , public LightSamplerFactory::Registrar +{ +public: + static constexpr std::string_view Name = "LightSamplerGridCDF"; + + LightSamplerGridCDF(LightSamplerGridCDF const &) noexcept = delete; + + LightSamplerGridCDF(LightSamplerGridCDF &&) noexcept = default; + + /** Constructor. */ + LightSamplerGridCDF() noexcept; + + /** Destructor. */ + ~LightSamplerGridCDF() noexcept; + + /* + * Gets configuration options for current technique. + * @return A list of all valid configuration options. + */ + RenderOptionList getRenderOptions() noexcept override; + + struct RenderOptions + { + uint32_t light_grid_cdf_num_cells = 16; /**< Maximum number of grid cells along any axis */ + uint32_t light_grid_cdf_lights_per_cell = + 0; /**< Maximum number of lights to store per grid cell (0 causes all lights to be included)*/ + bool light_grid_cdf_threshold = false; /**< Use a cutoff threshold value for light samples */ + bool light_grid_cdf_octahedron_sampling = + false; /**< Use octahedron sampling for each cell to also sample by direction */ + bool light_grid_cdf_centroid_build = + false; /**< Use faster but simpler cell centroid sampling during build */ + }; + + /** + * Convert render options to internal options format. + * @param options Current render options. + * @returns The options converted. + */ + static RenderOptions convertOptions(RenderOptionList const &options) noexcept; + + /** + * Gets a list of any shared components used by the current render technique. + * @return A list of all supported components. + */ + ComponentList getComponents() const noexcept override; + + /** + * Initialise any internal data or state. + * @note This is automatically called by the framework after construction and should be used to create + * any required CPU|GPU resources. + * @param capsaicin Current framework context. + * @return True if initialisation succeeded, False otherwise. + */ + bool init(CapsaicinInternal const &capsaicin) noexcept override; + + /** + * Run internal operations. + * @param [in,out] capsaicin Current framework context. + */ + void run(CapsaicinInternal &capsaicin) noexcept override; + + /** + * Destroy any used internal resources and shutdown. + */ + void terminate() noexcept override; + + /** + * Render GUI options. + * @param [in,out] capsaicin The current capsaicin context. + */ + void renderGUI(CapsaicinInternal &capsaicin) const noexcept override; + + /** + * Check to determine if any kernels using light sampler code need to be (re)compiled. + * @note Must be called before LightSamplerGridCDF::run(). + * @param capsaicin Current framework context. + * @return True if an update occurred requiring internal updates to be performed. + */ + bool needsRecompile(CapsaicinInternal const &capsaicin) const noexcept override; + + /** + * Get the list of shader defines that should be passed to any kernel that uses this lightSampler. + * @note Also includes values from the default lightBuilder. + * @param capsaicin Current framework context. + * @return A vector with each required define. + */ + std::vector getShaderDefines(CapsaicinInternal const &capsaicin) const noexcept override; + + /** + * Add the required program parameters to a shader based on current settings. + * @note Also includes values from the default lightBuilder. + * @param capsaicin Current framework context. + * @param program The shader program to bind parameters to. + */ + void addProgramParameters(CapsaicinInternal const &capsaicin, GfxProgram program) const noexcept override; + + /** + * Check if the scenes lighting data was changed this frame. + * @param capsaicin Current framework context. + * @returns True if light data has changed. + */ + bool getLightsUpdated(CapsaicinInternal const &capsaicin) const noexcept override; + + /** + * Get the name of the header file used in HLSL code to include necessary sampler functions. + * @return String name of the HLSL header include. + */ + std::string_view getHeaderFile() const noexcept override; + +private: + bool initKernels(CapsaicinInternal const &capsaicin) noexcept; + + RenderOptions options; + bool recompileFlag = + false; /**< Flag to indicate if option change requires a shader recompile this frame */ + bool lightsUpdatedFlag = false; /**< Flag to indicate if option change effects light samples */ + + LightSamplingConfiguration config = {uint4 {0}, float3 {0}, float3 {0}, float3 {0}}; + GfxBuffer configBuffer; /**< Buffer used to hold LightSamplingConfiguration */ + GfxBuffer lightIndexBuffer; /**< Buffer used to hold light indexes for all lights in each cell */ + GfxBuffer lightCDFBuffer; /**< Buffer used to hold light CDF for all lights in each cell */ + + GfxProgram boundsProgram; + GfxKernel buildKernel; +}; +} // namespace Capsaicin diff --git a/src/core/src/components/light_sampler_grid_cdf/light_sampler_grid_cdf.hlsl b/src/core/src/components/light_sampler_grid_cdf/light_sampler_grid_cdf.hlsl new file mode 100644 index 0000000..43d625c --- /dev/null +++ b/src/core/src/components/light_sampler_grid_cdf/light_sampler_grid_cdf.hlsl @@ -0,0 +1,291 @@ +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ + +#ifndef LIGHT_SAMPLER_GRID_CDF_HLSL +#define LIGHT_SAMPLER_GRID_CDF_HLSL + +/* +// Requires the following data to be defined in any shader that uses this file +TextureCube g_EnvironmentBuffer; +Texture2D g_TextureMaps[] : register(space99); +SamplerState g_TextureSampler; +*/ + +#include "light_sampler_grid_shared.h" + +RWStructuredBuffer g_LightSampler_Configuration; +RWStructuredBuffer g_LightSampler_CellsIndex; +RWStructuredBuffer g_LightSampler_CellsCDF; + +#include "light_sampler_grid.hlsl" +#include "../light_builder/light_builder.hlsl" +#include "../../lights/light_sampling.hlsl" +#include "../../lights/reservoir.hlsl" +#include "../../materials/material_evaluation.hlsl" +#include "../../lights/light_sampling_volume.hlsl" +#include "../../math/random.hlsl" + + +class LightSamplerGridCDF +{ + Random randomNG; + + /** + * Perform a search of the light list for a light with CDF closest to a given value. + * @param startIndex The index of the first item to start searching at. + * @param numLights The number of values to search through. + * @param value The CDF value of item to find. + * @return The index of the sampled light. + */ + uint binarySearch(uint startIndex, uint numLights, float value) + { + // Search through looking for last element with cdf >= value + uint first = 0; + uint len = numLights; + while (len > 0) + { + uint halfed = len >> 1; + uint middle = first + halfed; + // Bisect range based on value at middle + if (g_LightSampler_CellsCDF[startIndex + middle] < value) + { + first = middle + 1; + len -= halfed + 1; + } + else + { + len = halfed; + } + } + const uint sampledIndexBase = min(first, numLights - 1); + // Add cell index to found position + const uint sampledIndex = sampledIndexBase + startIndex; + return sampledIndex; + } + + /** + * Sample the index and PDF for a sampled light. + * @param cellIndex The index of the current cell. + * @param numLights The number of lights in the cell. + * @param position Current position on surface. + * @param normal Shading normal vector at current position. + * @param lightPDF (Out) The PDF for the calculated sample. + * @return The index of the sampled light. + */ + uint getSampledLight(uint cellIndex, uint numLights, float3 position, float3 normal, out float lightPDF) + { + const uint startIndex = cellIndex + 1; + const uint sampledIndex = binarySearch(startIndex, numLights, randomNG.rand()); + + // Calculate pdf, The pdf is the contribution of the given light divided by the total contribution of all lights multiplied by the number of lights + // This is actually just the difference between the current cdf and the previous + const float previousCDF = (sampledIndex > startIndex) ? g_LightSampler_CellsCDF[sampledIndex - 1] : 0.0f; + lightPDF = g_LightSampler_CellsCDF[sampledIndex] - previousCDF; +#ifndef LIGHTSAMPLERCDF_HAS_ALL_LIGHTS + lightPDF *= g_LightSampler_CellsCDF[cellIndex]; +#endif + return g_LightSampler_CellsIndex[sampledIndex]; + } + + /** + * Get a sample light. + * @param position Current position on surface. + * @param normal Shading normal vector at current position. + * @param lightPDF (Out) The PDF for the calculated sample (is equal to zero if no valid samples could be found). + * @returns The index of the new light sample + */ + uint sampleLights(float3 position, float3 normal, out float lightPDF) + { + // Get the current cell buffer index +#ifdef LIGHTSAMPLERCDF_USE_OCTAHEDRON_SAMPLING + const uint cellIndex = LightSamplerGrid::getCellOctaIndexFromJitteredPosition(position, normal, randomNG); +#else + const uint cellIndex = LightSamplerGrid::getCellIndexFromJitteredPosition(position, randomNG); +#endif + const uint numLights = g_LightSampler_CellsIndex[cellIndex]; + + // Return invalid sample if the cell doesn't contain any lights + if (numLights == 0) + { + lightPDF = 0.0f; + return 0; + } + + // Choose a light to sample from + uint lightIndex = getSampledLight(cellIndex, numLights, position, normal, lightPDF); + return lightIndex; + } + + /** + * Calculate the PDF of sampling a given light. + * @param lightID The index of the given light. + * @param position The position on the surface currently being shaded. + * @param normal Shading normal vector at current position. + * @returns The calculated PDF with respect to the light. + */ + float sampleLightPDF(uint lightID, float3 position, float3 normal) + { + // Calculate which cell we are in based on input point + const uint3 cell = LightSamplerGrid::getCellFromJitteredPosition(position, randomNG); + + // Calculate position of current cell in output buffer +#ifdef LIGHTSAMPLERCDF_USE_OCTAHEDRON_SAMPLING + const uint cellIndex = LightSamplerGrid::getCellOctaIndex(cell, LightSamplerGrid::getCellFace(normal)); +#else + const uint cellIndex = LightSamplerGrid::getCellIndex(cell); +#endif + +#ifndef LIGHTSAMPLERCDF_HAS_ALL_LIGHTS + // For CDF sampling the probability is based on finding it in the current CDF list + const uint numLights = g_LightSampler_CellsIndex[cellIndex]; + const uint startIndex = cellIndex + 1; + for (uint currentIndex = startIndex; currentIndex < startIndex + numLights; ++currentIndex) + { + if (g_LightSampler_CellsIndex[currentIndex] == lightID) + { + const float sourcePDF = g_LightSampler_CellsCDF[currentIndex]; + const float previousCDF = (currentIndex > startIndex) ? g_LightSampler_CellsCDF[currentIndex - 1] : 0.0f; + float lightPDF = sourcePDF - previousCDF; + lightPDF *= g_LightSampler_CellsCDF[cellIndex]; + return lightPDF; + } + } + // A light not in the CDF list has zero probability of being selected. + return FLT_EPSILON; +#else + // Instead of searching for the light in the list we just re-calculate its weight as this + // makes it a constant time operation. + // Calculate the bounding box for the current cell + float3 extent; + const float3 minBB = LightSamplerGrid::getCellBB(cell, extent); + Light selectedLight = getLight(lightID); +#ifdef LIGHTSAMPLERCDF_USE_OCTAHEDRON_SAMPLING + float weight = sampleLightVolumeNormal(selectedLight, minBB, extent, normal); +#else + float weight = sampleLightVolume(selectedLight, minBB, extent); +#endif // LIGHTSAMPLERCDF_USE_OCTAHEDRON_SAMPLING + return weight; +#endif + } + + /** + * Sample multiple lights into a reservoir. + * @tparam numSampledLights Number of lights to sample. + * @param position Current position on surface. + * @param normal Shading normal vector at current position. + * @param viewDirection View direction vector at current position. + * @param material Material for current surface position. + * @returns Reservoir containing combined samples. + */ + template + Reservoir sampleLightList(float3 position, float3 normal, float3 viewDirection, MaterialBRDF material) + { + // Get the current cell buffer index +#ifdef LIGHTSAMPLERCDF_USE_OCTAHEDRON_SAMPLING + const uint cellIndex = LightSamplerGrid::getCellOctaIndexFromJitteredPosition(position, normal, randomNG); +#else + const uint cellIndex = LightSamplerGrid::getCellIndexFromJitteredPosition(position, randomNG); +#endif + const uint numLights = g_LightSampler_CellsIndex[cellIndex]; + const uint newLights = min(numLights, numSampledLights); + + // Return invalid sample if the cell doesn't contain any lights + if (numLights == 0) + { + return MakeReservoir(); + } + + // Create reservoir updater + ReservoirUpdater updater = MakeReservoirUpdater(); + + // Loop through until we have the requested number of lights + for (uint i = 0; i < newLights; ++i) + { + // Choose a light to sample from + float lightPDF; + uint lightIndex = getSampledLight(cellIndex, numLights, position, normal, lightPDF); + + // Add the light sample to the reservoir + updateReservoir(updater, randomNG, lightIndex, lightPDF, material, position, normal, viewDirection); + } + + // Get finalised reservoir for return + return updater.reservoir; + } + + /** + * Sample multiple lights into a reservoir using cone angle. + * @tparam numSampledLights Number of lights to sample. + * @param position Current position on surface. + * @param normal Shading normal vector at current position. + * @param viewDirection View direction vector at current position. + * @param solidAngle Solid angle around view direction of visible ray cone. + * @param material Material for current surface position. + * @returns Reservoir containing combined samples. + */ + template + Reservoir sampleLightListCone(float3 position, float3 normal, float3 viewDirection, float solidAngle, MaterialBRDF material) + { + // Get the current cell buffer index +#ifdef LIGHTSAMPLERCDF_USE_OCTAHEDRON_SAMPLING + const uint cellIndex = LightSamplerGrid::getCellOctaIndexFromJitteredPosition(position, normal, randomNG); +#else + const uint cellIndex = LightSamplerGrid::getCellIndexFromJitteredPosition(position, randomNG); +#endif + const uint numLights = g_LightSampler_CellsIndex[cellIndex]; + const uint newLights = min(numLights, numSampledLights); + + // Return invalid sample if the cell doesn't contain any lights + if (numLights == 0) + { + return MakeReservoir(); + } + + // Create reservoir updater + ReservoirUpdater updater = MakeReservoirUpdater(); + + // Loop through until we have the requested number of lights + for (uint i = 0; i < newLights; ++i) + { + // Choose a light to sample from + float lightPDF; + uint lightIndex = getSampledLight(cellIndex, numLights, position, normal, lightPDF); + + // Add the light sample to the reservoir + updateReservoirCone(updater, randomNG, lightIndex, lightPDF, material, position, normal, viewDirection, solidAngle); + } + + // Get finalised reservoir for return + return updater.reservoir; + } +}; + +LightSamplerGridCDF MakeLightSampler(Random random) +{ + LightSamplerGridCDF ret; + ret.randomNG = random; + return ret; +} + +typedef LightSamplerGridCDF LightSampler; + +#endif // LIGHT_BOUNDS_SAMPLER_HLSL diff --git a/src/core/src/components/light_sampler_bounds/light_sampler_bounds_shared.h b/src/core/src/components/light_sampler_grid_cdf/light_sampler_grid_shared.h similarity index 85% rename from src/core/src/components/light_sampler_bounds/light_sampler_bounds_shared.h rename to src/core/src/components/light_sampler_grid_cdf/light_sampler_grid_shared.h index c20b19e..59b806f 100644 --- a/src/core/src/components/light_sampler_bounds/light_sampler_bounds_shared.h +++ b/src/core/src/components/light_sampler_grid_cdf/light_sampler_grid_shared.h @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -20,8 +20,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ********************************************************************/ -#ifndef LIGHT_BOUNDS_SAMPLER_SHARED_H -#define LIGHT_BOUNDS_SAMPLER_SHARED_H +#ifndef LIGHT_SAMPLER_GRID_SHARED_H +#define LIGHT_SAMPLER_GRID_SHARED_H #include "../../gpu_shared.h" @@ -31,6 +31,10 @@ struct LightSamplingConstants uint maxNumLightsPerCell; }; +#ifdef __cplusplus +# pragma warning(push) +# pragma warning(disable : 4324) // structure was padded due to alignment specifier +#endif struct LightSamplingConfiguration { uint4 numCells; /*< Number of cells in the x,y,z directions respectively */ @@ -45,5 +49,8 @@ struct LightSamplingConfiguration #endif float3 sceneExtent; /*< World space size of the scene bounding box (sceneMax - sceneMin) */ }; +#ifdef __cplusplus +#pragma warning(pop) +#endif #endif diff --git a/src/core/src/components/light_sampler_grid_stream/light_sampler_grid_stream.comp b/src/core/src/components/light_sampler_grid_stream/light_sampler_grid_stream.comp new file mode 100644 index 0000000..92fc08b --- /dev/null +++ b/src/core/src/components/light_sampler_grid_stream/light_sampler_grid_stream.comp @@ -0,0 +1,261 @@ +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ + +#include "../light_sampler_grid_cdf/light_sampler_grid_shared.h" + +ConstantBuffer g_LightSampler_Constants; +RWStructuredBuffer g_DispatchCommandBuffer; + +TextureCube g_EnvironmentBuffer; +Texture2D g_TextureMaps[] : register(space99); +SamplerState g_TextureSampler; // Is a linear sampler + +uint g_FrameIndex; + +#include "light_sampler_grid_stream.hlsl" +#include "../../math/random.hlsl" + +#define THREADX 128 +#define THREADY 1 +#define THREADZ 1 + +#ifdef LIGHTSAMPLERSTREAM_RES_MANYLIGHTS +groupshared uint lightsIDs[THREADX / 16]; //Assume 16 as smallest possible wave size +groupshared float lightsWeights[THREADX / 16]; +groupshared float lightsTotalWeights[THREADX / 16]; + +#define LS_GRID_STREAM_THREADREDUCE 128 /**< Size of thread group when using parallel build */ +#endif + +/** + * Create required internal configuration values. + * Calculates the required number of grid cells that need to be dispatched through an indirect call to Build. + */ +[numthreads(1, 1, 1)] +void CalculateBounds() +{ + // Get the scene bounds + const float3 sceneMin = g_LightSampler_MinBounds[0]; + const float3 sceneMax = g_LightSampler_MaxBounds[0]; + + // Ensure each cell is square + const float3 sceneExtent = sceneMax - sceneMin; + const float largestAxis = max(sceneExtent.x, max(sceneExtent.y, sceneExtent.z)); + const float cellScale = largestAxis / g_LightSampler_Constants.maxCellsPerAxis; + const float3 cellNum = ceil(sceneExtent / cellScale); + + // Clamp max number of lights to those actually available + const uint lightsPerCell = min(g_LightSampler_Constants.maxNumLightsPerCell, getNumberLights()); + + // Update internal configuration values + uint3 groups = (uint3)cellNum; + g_LightSampler_Configuration[0].numCells = uint4(groups, lightsPerCell); + g_LightSampler_Configuration[0].cellSize = sceneExtent / cellNum; + g_LightSampler_Configuration[0].sceneMin = sceneMin; + g_LightSampler_Configuration[0].sceneExtent = sceneExtent; + + // Get total number of grid cells + groups.x *= lightsPerCell; +#ifdef LIGHTSAMPLERSTREAM_USE_OCTAHEDRON_SAMPLING + groups.x *= 8; +#endif +#ifdef LIGHTSAMPLERSTREAM_RES_MANYLIGHTS + groups.x *= LS_GRID_STREAM_THREADREDUCE; +#endif + + const uint3 dispatch = uint3(THREADX, THREADY, THREADZ); + groups = (groups + dispatch - 1.xxx) / dispatch; + g_DispatchCommandBuffer[0].num_groups_x = groups.x; + g_DispatchCommandBuffer[0].num_groups_y = groups.y; + g_DispatchCommandBuffer[0].num_groups_z = groups.z; +} + +/** + * Build an internal grid. + */ +[numthreads(THREADX, THREADY, THREADZ)] +void Build(in uint3 did : SV_DispatchThreadID, in uint gid : SV_GroupIndex) +{ + uint reservoirsPerCell = g_LightSampler_Configuration[0].numCells.w; +#ifdef LIGHTSAMPLERSTREAM_RES_MANYLIGHTS + // Use multiple threads to generate reservoirs then collapse them into final + // Called per cell per reservoir per parralel thread + reservoirsPerCell *= LS_GRID_STREAM_THREADREDUCE; +#endif + uint3 cellID = did; +#ifdef LIGHTSAMPLERSTREAM_USE_OCTAHEDRON_SAMPLING + // Called per cell per reservoir per octahedron face + const uint resCellOffset = 8 * reservoirsPerCell; + cellID.x /= resCellOffset; +#else + // Called per cell per reservoir + cellID.x /= reservoirsPerCell; +#endif + if (any(cellID >= g_LightSampler_Configuration[0].numCells.xyz)) + { + return; + } + + // Check reservoirID against total lights + const uint totalLights = getNumberLights(); +#ifdef LIGHTSAMPLERSTREAM_USE_OCTAHEDRON_SAMPLING + const uint cellFace = (did.x - (cellID.x * resCellOffset)) / reservoirsPerCell; + uint reservoirID = did.x - (cellID.x * resCellOffset) - (cellFace * reservoirsPerCell); +#else + uint reservoirID = did.x - (cellID.x * reservoirsPerCell); +#endif + + // Determine actual write location for current reservoir +#ifdef LIGHTSAMPLERSTREAM_RES_MANYLIGHTS + const uint reservoirIndex = reservoirID / LS_GRID_STREAM_THREADREDUCE; + // Ensure that reservoirs contain the same samples as when building with the non-parallel builds. + // This is importatant in order to distribute the potentially high importance lights stored near + // start of light list across all stored reservoirs. It does come at cost of preventing linear + // memory access from neighbouring threads + reservoirID = reservoirIndex + ((reservoirID - (reservoirIndex * LS_GRID_STREAM_THREADREDUCE)) * g_LightSampler_Configuration[0].numCells.w); +#else + const uint reservoirIndex = reservoirID; +#endif + if (reservoirIndex >= totalLights || reservoirID >= reservoirsPerCell) + { + return; + } + + // Calculate the bounding box for the current cell + float3 extent; + const float3 minBB = LightSamplerGrid::getCellBB(cellID, extent); + +#ifdef LIGHTSAMPLERSTREAM_USE_OCTAHEDRON_SAMPLING + uint cellIndex = LightSamplerGrid::getCellOctaIndex(cellID, cellFace) + reservoirIndex; + const float3 normal = LightSamplerGrid::getCellNormal(cellFace); +#else + const uint cellIndex = LightSamplerGrid::getCellIndex(cellID) + reservoirIndex; +#endif // LIGHTSAMPLERSTREAM_USE_OCTAHEDRON_SAMPLING + + // Loop through all lights and sample + Random randomNG = MakeRandom(cellIndex, g_FrameIndex); + uint storedLight = -1; + float storedLightWeight = 0.0f; + float totalWeight = 0.0f; + float j = randomNG.rand(); + float pNone = 1.0f; + for (uint lightIndex = reservoirID; lightIndex < totalLights; lightIndex += reservoirsPerCell) + { + // Calculate sampled contribution for light + Light selectedLight = getLight(lightIndex); +#ifdef LIGHTSAMPLERSTREAM_USE_OCTAHEDRON_SAMPLING + float sampleWeight = sampleLightVolumeNormal(selectedLight, minBB, extent, normal); +#else + float sampleWeight = sampleLightVolume(selectedLight, minBB, extent); +#endif // LIGHTSAMPLERSTREAM_USE_OCTAHEDRON_SAMPLING + // Must avoid 0 samples at the start of the stream to avoid division by 0 + if (sampleWeight > 0.0f) + { + // weight = targetPDF/sourcePDF where sourcePDF = 1/numLights + // The numLights term cancels out later on + totalWeight += sampleWeight; + float p = sampleWeight / totalWeight; + j -= p * pNone; + pNone *= (1.0f - p); + if (j <= 0.0f) + { + storedLight = lightIndex; + storedLightWeight = sampleWeight; + j = randomNG.rand(); + pNone = 1.0f; + } + } + } + +#ifdef LIGHTSAMPLERSTREAM_RES_MANYLIGHTS + // Must get all parallel reservoirs and collapse into a single one. + // This is done by generating a CDF of all lights total weights and importance sampling it + const float waveCDF = WavePrefixSum(totalWeight) + totalWeight; + // Get total weight of all lanes + totalWeight = WaveActiveSum(totalWeight); + j = randomNG.rand(); + // A sample is considered to be the final sample if it passes the reservoir sample check and + // it is the first reservoir within the wave to do so + bool valid = (j * totalWeight) <= waveCDF; + uint isFirstCheck = WavePrefixCountBits(valid); + // If the parallel thread count is equal to the wave size then we can just use wave + // instructions to collapse the full group. Otherwise we must use group shared memory + // to combine samples across waves. + if (!valid || isFirstCheck != 0) + { + return; + } + if (WaveGetLaneCount() != LS_GRID_STREAM_THREADREDUCE) + { + // Only 1 thread is valid for each wave and it writes its sample out to the LDS + // The first valid thread in the first wave then reads in all the samples from LDS and combines them + const uint waveID = gid / WaveGetLaneCount(); + // Need to use LDS to combine waves + lightsTotalWeights[waveID] = totalWeight; + lightsWeights[waveID] = storedLightWeight; + lightsIDs[waveID] = storedLight; + GroupMemoryBarrierWithGroupSync(); + if (waveID != 0) + { + return; + } + const uint waveCount = LS_GRID_STREAM_THREADREDUCE / WaveGetLaneCount(); + // Merge reservoirs from each wave using standard reservoir merge + storedLight = -1; + storedLightWeight = 0.0f; + totalWeight = 0.0f; + j = randomNG.rand(); + pNone = 1.0f; + for (uint i = 0; i < waveCount; ++i) + { + float sampleWeight = lightsTotalWeights[i]; + if (sampleWeight > 0.0f) + { + totalWeight += sampleWeight; + float p = sampleWeight / totalWeight; + j -= p * pNone; + pNone *= (1.0f - p); + if (j <= 0.0f) + { + storedLight = lightsIDs[i]; + storedLightWeight = lightsWeights[i]; + j = randomNG.rand(); + pNone = 1.0f; + } + } + } + } + else + { + // The parallel group size is equal to the wave size so the thread with the valid sample can + // just write direct to memory + } +#endif + // Write out store data + g_LightSampler_CellsIndex[cellIndex] = storedLight; +#ifdef LIGHTSAMPLERSTREAM_RES_USE_RESAMPLE + float storeValue = (storedLightWeight > 0.0f) ? totalWeight / storedLightWeight : 0.0f; + g_LightSampler_CellsReservoirs[cellIndex] = storeValue; +#else + g_LightSampler_CellsReservoirs[cellIndex] = float2(storedLightWeight, totalWeight); +#endif +} diff --git a/src/core/src/components/light_sampler_grid_stream/light_sampler_grid_stream.cpp b/src/core/src/components/light_sampler_grid_stream/light_sampler_grid_stream.cpp new file mode 100644 index 0000000..64af5ac --- /dev/null +++ b/src/core/src/components/light_sampler_grid_stream/light_sampler_grid_stream.cpp @@ -0,0 +1,467 @@ +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ + +#include "light_sampler_grid_stream.h" + +#include "../light_builder/light_builder.h" +#include "../light_sampler_grid_cdf/light_sampler_grid_shared.h" +#include "capsaicin_internal.h" + +namespace Capsaicin +{ +LightSamplerGridStream::LightSamplerGridStream() noexcept + : LightSampler(Name) +{} + +LightSamplerGridStream::~LightSamplerGridStream() noexcept +{ + terminate(); +} + +RenderOptionList LightSamplerGridStream::getRenderOptions() noexcept +{ + RenderOptionList newOptions; + newOptions.emplace(RENDER_OPTION_MAKE(light_grid_stream_num_cells, options)); + newOptions.emplace(RENDER_OPTION_MAKE(light_grid_stream_lights_per_cell, options)); + newOptions.emplace(RENDER_OPTION_MAKE(light_grid_stream_octahedron_sampling, options)); + newOptions.emplace(RENDER_OPTION_MAKE(light_grid_stream_resample, options)); + newOptions.emplace(RENDER_OPTION_MAKE(light_grid_stream_merge_type, options)); + newOptions.emplace(RENDER_OPTION_MAKE(light_grid_stream_parallel_build, options)); + newOptions.emplace(RENDER_OPTION_MAKE(light_grid_stream_centroid_build, options)); + return newOptions; +} + +LightSamplerGridStream::RenderOptions LightSamplerGridStream::convertOptions( + RenderOptionList const &options) noexcept +{ + RenderOptions newOptions; + RENDER_OPTION_GET(light_grid_stream_num_cells, newOptions, options) + RENDER_OPTION_GET(light_grid_stream_lights_per_cell, newOptions, options) + RENDER_OPTION_GET(light_grid_stream_octahedron_sampling, newOptions, options) + RENDER_OPTION_GET(light_grid_stream_resample, newOptions, options) + RENDER_OPTION_GET(light_grid_stream_merge_type, newOptions, options) + RENDER_OPTION_GET(light_grid_stream_parallel_build, newOptions, options) + RENDER_OPTION_GET(light_grid_stream_centroid_build, newOptions, options) + return newOptions; +} + +ComponentList LightSamplerGridStream::getComponents() const noexcept +{ + ComponentList components; + components.emplace_back(COMPONENT_MAKE(LightBuilder)); + return components; +} + +bool LightSamplerGridStream::init(CapsaicinInternal const &capsaicin) noexcept +{ + initKernels(capsaicin); + + boundsLengthBuffer = gfxCreateBuffer(gfx_, 1); + boundsLengthBuffer.setName("Capsaicin_LightSamplerGridStream_BoundsCountBuffer"); + + initBoundsBuffers(); + + reducerMin.initialise(capsaicin, GPUReduce::Type::Float3, GPUReduce::Operation::Min); + reducerMax.initialise(capsaicin, GPUReduce::Type::Float3, GPUReduce::Operation::Max); + + struct LightSamplingConfiguration + { + uint4 numCells; + float3 cellSize; + float pack; + float3 sceneMin; + float pack2; + float3 sceneExtent; + }; + + configBuffer = gfxCreateBuffer(gfx_, 1); + configBuffer.setName("Capsaicin_LightSamplerGrid_ConfigBuffer"); + + initLightIndexBuffer(); + + dispatchCommandBuffer = gfxCreateBuffer(gfx_, 1); + dispatchCommandBuffer.setName("Capsaicin_LightSamplerGrid_DispatchCommandBuffer"); + + return !!boundsProgram; +} + +void LightSamplerGridStream::run(CapsaicinInternal &capsaicin) noexcept +{ + if (boundsReservations.size() > (boundsHostReservations.empty() ? 0 : 1)) + { + // Nothing to do as requires explicit call to LightSamplerGridStream::update() + } + else + { + // When not being run using device side bounds values then we perform an update here + // Update light sampling data structure + if (capsaicin.getMeshesUpdated() || capsaicin.getTransformsUpdated() + || boundsHostReservations.empty() /*i.e. un-initialised*/) + { + // Update the light sampler using scene bounds + auto sceneBounds = capsaicin.getSceneBounds(); + setBounds(sceneBounds, this); + } + + update(capsaicin, this); + } +} + +void LightSamplerGridStream::terminate() noexcept +{ + gfxDestroyBuffer(gfx_, boundsLengthBuffer); + boundsLengthBuffer = {}; + gfxDestroyBuffer(gfx_, boundsMinBuffer); + boundsMinBuffer = {}; + gfxDestroyBuffer(gfx_, boundsMaxBuffer); + boundsMaxBuffer = {}; + + gfxDestroyBuffer(gfx_, configBuffer); + configBuffer = {}; + gfxDestroyBuffer(gfx_, lightIndexBuffer); + lightIndexBuffer = {}; + gfxDestroyBuffer(gfx_, lightReservoirBuffer); + lightReservoirBuffer = {}; + + gfxDestroyBuffer(gfx_, dispatchCommandBuffer); + dispatchCommandBuffer = {}; + + gfxDestroyKernel(gfx_, calculateBoundsKernel); + calculateBoundsKernel = {}; + gfxDestroyKernel(gfx_, buildKernel); + buildKernel = {}; + gfxDestroyProgram(gfx_, boundsProgram); + boundsProgram = {}; +} + +void LightSamplerGridStream::renderGUI(CapsaicinInternal &capsaicin) const noexcept +{ + if (ImGui::CollapsingHeader("Light Sampler Settings", ImGuiTreeNodeFlags_None)) + { + ImGui::DragInt("Max Cells per Axis", + (int32_t *)&capsaicin.getOption("light_grid_stream_num_cells"), 1, 1, 128); + auto lightBuilder = capsaicin.getComponent(); + ImGui::DragInt("Number Lights per Cell", + (int32_t *)&capsaicin.getOption("light_grid_stream_lights_per_cell"), 1, 1, + lightBuilder->getLightCount()); + // ImGui::Checkbox("Octahedral Sampling", + // &capsaicin.getOption("light_grid_stream_octahedron_sampling")); + ImGui::Checkbox( + "Fast Centroid Build", &capsaicin.getOption("light_grid_stream_centroid_build")); + ImGui::Combo("Merge Algorithm", + reinterpret_cast(&capsaicin.getOption("light_grid_stream_merge_type")), + "Random Select\0Without Replacement\0With Replacement"); + ImGui::Checkbox("Intermediate Resample", &capsaicin.getOption("light_grid_stream_resample")); + ImGui::Checkbox("Parallel Build", &capsaicin.getOption("light_grid_stream_parallel_build")); + } +} + +void LightSamplerGridStream::reserveBoundsValues(uint32_t reserve, std::type_info const &caller) noexcept +{ + boundsReservations.emplace(caller.hash_code(), reserve); + // Determine if current buffer needs to be reallocated + uint32_t elements = 0; + for (auto &i : boundsReservations) + { + elements += i.second; + } + boundsMaxLength = elements / 32; // Currently assumes wavefront size of 32 and reduces due to use of + // WaveActiveMin in shader code + + if (boundsMinBuffer.getCount() < boundsMaxLength && boundsMaxLength > 0) + { + gfxDestroyBuffer(gfx_, boundsMinBuffer); + gfxDestroyBuffer(gfx_, boundsMaxBuffer); + initBoundsBuffers(); + } +} + +void LightSamplerGridStream::setBounds( + std::pair const &bounds, std::type_info const &caller) noexcept +{ + // Add to internal host reservations + boundsHostReservations.emplace(caller.hash_code(), bounds); + + // Add an additional reserve spot in device buffer so that this can coexist with reserveBoundsValues + reserveBoundsValues(1, this); +} + +void LightSamplerGridStream::update(CapsaicinInternal &capsaicin, Timeable *parent) noexcept +{ + // Update internal options + auto const optionsNew = convertOptions(capsaicin.getOptions()); + auto lightBuilder = capsaicin.getComponent(); + + // Check if many lights kernel should be run. This requires greater than 128 lights per reservoir as + // otherwise there will be empty reservoirs. Note: The 128 must match the LS_GRID_STREAM_THREADREDUCE + // value which is currently set to the largest possible wave size. If targeting a specific platform then + // matching this value with the wave size will give better results. + bool manyLights = options.light_grid_stream_parallel_build + && (lightBuilder->getLightCount() > 128 * options.light_grid_stream_lights_per_cell); + + recompileFlag = + optionsNew.light_grid_stream_octahedron_sampling != options.light_grid_stream_octahedron_sampling + || optionsNew.light_grid_stream_resample != options.light_grid_stream_resample + || optionsNew.light_grid_stream_merge_type != options.light_grid_stream_merge_type + || optionsNew.light_grid_stream_centroid_build != options.light_grid_stream_centroid_build + || usingManyLights != manyLights || lightBuilder->needsRecompile(capsaicin); + lightsUpdatedFlag = + optionsNew.light_grid_stream_octahedron_sampling != options.light_grid_stream_octahedron_sampling + || optionsNew.light_grid_stream_lights_per_cell != options.light_grid_stream_lights_per_cell + || optionsNew.light_grid_stream_num_cells != options.light_grid_stream_num_cells + || optionsNew.light_grid_stream_centroid_build != options.light_grid_stream_centroid_build + || usingManyLights != manyLights; + options = optionsNew; + usingManyLights = manyLights; + + const uint32_t numCells = options.light_grid_stream_num_cells; + uint lightDataLength = numCells * numCells * numCells * options.light_grid_stream_lights_per_cell; + if (options.light_grid_stream_octahedron_sampling) + { + lightDataLength *= 8; + } + if (lightIndexBuffer.getCount() < lightDataLength && lightDataLength > 0) + { + gfxDestroyBuffer(gfx_, lightIndexBuffer); + gfxDestroyBuffer(gfx_, lightReservoirBuffer); + initLightIndexBuffer(); + } + + if (recompileFlag) + { + gfxDestroyKernel(gfx_, calculateBoundsKernel); + gfxDestroyKernel(gfx_, buildKernel); + gfxDestroyProgram(gfx_, boundsProgram); + + initKernels(capsaicin); + } + + // Calculate host side maximum bounds + bool hostUpdated = false; + if (!boundsHostReservations.empty()) + { + // Sum up everything in boundsHostReservations + std::pair newBounds = boundsHostReservations[0]; + for (auto &i : + std::ranges::subrange(++boundsHostReservations.cbegin(), boundsHostReservations.cend())) + { + newBounds.first = glm::min(newBounds.first, i.second.first); + newBounds.second = glm::max(newBounds.second, i.second.second); + } + + // Check if the host side bounds needs to be uploaded + if (newBounds != currentBounds) + { + currentBounds = newBounds; + hostUpdated = true; + // Check if there are also any device side reservations, if so then upload the host value to the + // last slot so that it will participate in reduceMinMax but wont be overwritten each frame + if (boundsReservations.size() > 1) + { + // Copy to last element boundsMinBuffer and boundsMaxBuffer + GfxBuffer uploadMinBuffer = + gfxCreateBuffer(gfx_, 3, &newBounds.first, kGfxCpuAccess_Write); + gfxCommandCopyBuffer(gfx_, boundsMinBuffer, ((size_t)boundsMaxLength - 1) * sizeof(float) * 3, + uploadMinBuffer, 0, sizeof(float) * 3); + GfxBuffer uploadMaxBuffer = + gfxCreateBuffer(gfx_, 3, &newBounds.second, kGfxCpuAccess_Write); + gfxCommandCopyBuffer(gfx_, boundsMaxBuffer, ((size_t)boundsMaxLength - 1) * sizeof(float) * 3, + uploadMaxBuffer, 0, sizeof(float) * 3); + gfxDestroyBuffer(gfx_, uploadMinBuffer); + gfxDestroyBuffer(gfx_, uploadMaxBuffer); + } + else + { + GfxBuffer uploadMinBuffer = + gfxCreateBuffer(gfx_, 3, &newBounds.first, kGfxCpuAccess_Write); + gfxCommandCopyBuffer(gfx_, boundsMinBuffer, 0, uploadMinBuffer, 0, sizeof(float) * 3); + GfxBuffer uploadMaxBuffer = + gfxCreateBuffer(gfx_, 3, &newBounds.second, kGfxCpuAccess_Write); + gfxCommandCopyBuffer(gfx_, boundsMaxBuffer, 0, uploadMaxBuffer, 0, sizeof(float) * 3); + gfxDestroyBuffer(gfx_, uploadMinBuffer); + gfxDestroyBuffer(gfx_, uploadMaxBuffer); + } + } + } + + // Update constants buffer + GfxBuffer samplingConstants = capsaicin.allocateConstantBuffer(1); + LightSamplingConstants constantData = {}; + constantData.maxCellsPerAxis = numCells; + constantData.maxNumLightsPerCell = options.light_grid_stream_lights_per_cell; + gfxBufferGetData(gfx_, samplingConstants)[0] = constantData; + + // Add program parameters + addProgramParameters(capsaicin, boundsProgram); + gfxProgramSetParameter(gfx_, boundsProgram, "g_DispatchCommandBuffer", dispatchCommandBuffer); + gfxProgramSetParameter(gfx_, boundsProgram, "g_LightSampler_Constants", samplingConstants); + + gfxProgramSetParameter(gfx_, boundsProgram, "g_EnvironmentBuffer", capsaicin.getEnvironmentBuffer()); + gfxProgramSetParameter( + gfx_, boundsProgram, "g_TextureMaps", capsaicin.getTextures(), capsaicin.getTextureCount()); + gfxProgramSetParameter(gfx_, boundsProgram, "g_TextureSampler", capsaicin.getLinearSampler()); + + gfxProgramSetParameter(gfx_, boundsProgram, "g_FrameIndex", capsaicin.getFrameIndex()); + + // Create the light sampling structure bounds by reducing all values stored in the boundsMin|MaxBuffers + if (boundsReservations.size() > (boundsHostReservations.empty() ? 0 : 1)) + { + RenderTechnique::TimedSection const timedSection(*parent, "CalculateLightSamplerBounds"); + // Reduce Min/Max + reducerMin.reduceIndirect(boundsMinBuffer, boundsLengthBuffer, boundsMaxLength); + reducerMax.reduceIndirect(boundsMaxBuffer, boundsLengthBuffer, boundsMaxLength); + // Clear boundsLengthBuffer + gfxCommandClearBuffer(gfx_, boundsLengthBuffer, 0); + } + + // Calculate the required configuration values + if (boundsReservations.size() > (boundsHostReservations.empty() ? 0 : 1) || hostUpdated + || lightsUpdatedFlag || recompileFlag) + { + RenderTechnique::TimedSection const timedSection(*parent, "CalculateLightSamplerConfiguration"); + // Calculate the required configuration values + gfxCommandBindKernel(gfx_, calculateBoundsKernel); + gfxCommandDispatch(gfx_, 1, 1, 1); + } + + // Create the light sampling structure + { + RenderTechnique::TimedSection const timedSection(*parent, "BuildLightSampler"); + + // Build the sampling structure + gfxCommandBindKernel(gfx_, buildKernel); + gfxCommandDispatchIndirect(gfx_, dispatchCommandBuffer); + } + + // Release constant buffer + gfxDestroyBuffer(gfx_, samplingConstants); +} + +bool LightSamplerGridStream::needsRecompile( + [[maybe_unused]] CapsaicinInternal const &capsaicin) const noexcept +{ + return recompileFlag; +} + +std::vector LightSamplerGridStream::getShaderDefines( + CapsaicinInternal const &capsaicin) const noexcept +{ + auto lightBuilder = capsaicin.getComponent(); + std::vector baseDefines(std::move(lightBuilder->getShaderDefines(capsaicin))); + if (options.light_grid_stream_octahedron_sampling) + { + baseDefines.push_back("LIGHTSAMPLERSTREAM_USE_OCTAHEDRON_SAMPLING"); + } + if (options.light_grid_stream_resample) + { + baseDefines.push_back("LIGHTSAMPLERSTREAM_RES_USE_RESAMPLE"); + } + if (options.light_grid_stream_merge_type == 1) + { + baseDefines.push_back("LIGHTSAMPLERSTREAM_RES_FAST_MERGE"); + } + else if (options.light_grid_stream_merge_type == 0) + { + baseDefines.push_back("LIGHTSAMPLERSTREAM_RES_RANDOM_MERGE"); + } + if (options.light_grid_stream_centroid_build) + { + baseDefines.push_back("LIGHT_SAMPLE_VOLUME_CENTROID"); + } + if (usingManyLights) + { + baseDefines.push_back("LIGHTSAMPLERSTREAM_RES_MANYLIGHTS"); + } + return baseDefines; +} + +void LightSamplerGridStream::addProgramParameters( + CapsaicinInternal const &capsaicin, GfxProgram program) const noexcept +{ + auto lightBuilder = capsaicin.getComponent(); + lightBuilder->addProgramParameters(capsaicin, program); + + // Bind the light sampling shader parameters + gfxProgramSetParameter(gfx_, program, "g_LightSampler_BoundsLength", boundsLengthBuffer); + gfxProgramSetParameter(gfx_, program, "g_LightSampler_MinBounds", boundsMinBuffer); + gfxProgramSetParameter(gfx_, program, "g_LightSampler_MaxBounds", boundsMaxBuffer); + gfxProgramSetParameter(gfx_, program, "g_LightSampler_Configuration", configBuffer); + gfxProgramSetParameter(gfx_, program, "g_LightSampler_CellsIndex", lightIndexBuffer); + gfxProgramSetParameter(gfx_, program, "g_LightSampler_CellsReservoirs", lightReservoirBuffer); +} + +bool LightSamplerGridStream::getLightsUpdated(CapsaicinInternal const &capsaicin) const noexcept +{ + auto lightBuilder = capsaicin.getComponent(); + return lightsUpdatedFlag || lightBuilder->getLightsUpdated(); +} + +std::string_view LightSamplerGridStream::getHeaderFile() const noexcept +{ + return std::string_view("\"../../components/light_sampler_grid_stream/light_sampler_grid_stream.hlsl\""); +} + +bool LightSamplerGridStream::initKernels(CapsaicinInternal const &capsaicin) noexcept +{ + boundsProgram = gfxCreateProgram( + gfx_, "components/light_sampler_grid_stream/light_sampler_grid_stream", capsaicin.getShaderPath()); + auto baseDefines(std::move(getShaderDefines(capsaicin))); + std::vector defines; + for (auto &i : baseDefines) + { + defines.push_back(i.c_str()); + } + calculateBoundsKernel = gfxCreateComputeKernel( + gfx_, boundsProgram, "CalculateBounds", defines.data(), static_cast(defines.size())); + buildKernel = gfxCreateComputeKernel( + gfx_, boundsProgram, "Build", defines.data(), static_cast(defines.size())); + return !!buildKernel; +} + +bool LightSamplerGridStream::initBoundsBuffers() noexcept +{ + boundsMinBuffer = gfxCreateBuffer(gfx_, boundsMaxLength); + boundsMinBuffer.setName("Capsaicin_LightSamplerGrid_BoundsMinBuffer"); + boundsMaxBuffer = gfxCreateBuffer(gfx_, boundsMaxLength); + boundsMaxBuffer.setName("Capsaicin_LightSamplerGrid_BoundsMaxBuffer"); + gfxCommandClearBuffer(gfx_, boundsLengthBuffer, 0); + return !!boundsMaxBuffer; +} + +bool LightSamplerGridStream::initLightIndexBuffer() noexcept +{ + const uint32_t numCells = options.light_grid_stream_num_cells; + const uint32_t lightsPerCell = options.light_grid_stream_lights_per_cell; + uint lightDataLength = numCells * numCells * numCells * lightsPerCell; + if (options.light_grid_stream_octahedron_sampling) + { + lightDataLength *= 8; + } + + lightIndexBuffer = gfxCreateBuffer(gfx_, lightDataLength); + lightIndexBuffer.setName("Capsaicin_LightSamplerGrid_IndexBuffer"); + lightReservoirBuffer = + gfxCreateBuffer(gfx_, lightDataLength); // Only need half as much if use resampling + lightReservoirBuffer.setName("Capsaicin_LightSamplerGrid_ReservoirBuffer"); + return !!lightReservoirBuffer; +} +} // namespace Capsaicin diff --git a/src/core/src/components/light_sampler_bounds/light_sampler_bounds.h b/src/core/src/components/light_sampler_grid_stream/light_sampler_grid_stream.h similarity index 55% rename from src/core/src/components/light_sampler_bounds/light_sampler_bounds.h rename to src/core/src/components/light_sampler_grid_stream/light_sampler_grid_stream.h index 25cce26..a137f7e 100644 --- a/src/core/src/components/light_sampler_bounds/light_sampler_bounds.h +++ b/src/core/src/components/light_sampler_grid_stream/light_sampler_grid_stream.h @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -21,49 +21,62 @@ THE SOFTWARE. ********************************************************************/ #pragma once -#include "../light_sampler/light_sampler.h" -#include "capsaicin.h" -#include "render_technique.h" +#include "capsaicin_internal.h" +#include "components/component.h" +#include "components/light_sampler/light_sampler.h" +#include "components/light_sampler_grid_cdf/light_sampler_grid_cdf.h" #include "utilities/gpu_reduce.h" namespace Capsaicin { -class LightSamplerBounds : public Component::RegistrarName +class LightSamplerGridStream + : public LightSampler + , public ComponentFactory::Registrar + , public LightSamplerFactory::Registrar { public: - static constexpr std::string_view Name = "LightSamplerBounds"; + static constexpr std::string_view Name = "LightSamplerGridStream"; - /** Constructor. */ - LightSamplerBounds() noexcept {} + LightSamplerGridStream(LightSamplerGridStream const &) noexcept = delete; - LightSamplerBounds(LightSamplerBounds const &) noexcept = delete; + LightSamplerGridStream(LightSamplerGridStream &&) noexcept = default; - LightSamplerBounds(LightSamplerBounds &&) noexcept = default; + /** Constructor. */ + LightSamplerGridStream() noexcept; /** Destructor. */ - ~LightSamplerBounds() noexcept; + ~LightSamplerGridStream() noexcept; /* * Gets configuration options for current technique. * @return A list of all valid configuration options. */ - RenderOptionList getRenderOptions() noexcept; + RenderOptionList getRenderOptions() noexcept override; struct RenderOptions { - uint32_t light_bounds_num_cells = 16; /**< Maximum number of grid cells along any axis */ - uint32_t light_bounds_lights_per_cell = 32; /**< Maximum number of lights to store per grid cell */ - bool light_bounds_threshold = true; /**< Use a cutoff threshold value for light samples */ - bool light_bounds_cdf = true; /**< Use CDF or weighted reservoir sampling */ - bool light_bounds_uniform_sample = false; /**< Use uniform sampling instead of bounds structure */ + uint32_t light_grid_stream_num_cells = 16; /**< Maximum number of grid cells along any axis */ + uint32_t light_grid_stream_lights_per_cell = + 64; /**< Maximum number of lights to store per grid cell */ + bool light_grid_stream_octahedron_sampling = + false; /**< Use octahedron sampling for each cell to also sample by direction */ + bool light_grid_stream_resample = + true; /**< Use resampling based on local illumination when using reservoir sampling */ + uint32_t light_grid_stream_merge_type = + 1; /**< Select merge type, 0: Random reservoir selection, 1: Merge without replacement, 2: Merge + with replacement (allows high importance lights to be multiply sampled) */ + bool light_grid_stream_parallel_build = + false; /**< Use faster reservoir parallel build on scene with large light count */ + bool light_grid_stream_centroid_build = + false; /**< Use faster but simpler cell centroid sampling during build */ }; /** - * Convert render settings to internal options format. - * @param settings Current render settings. + * Convert render options to internal options format. + * @param options Current render options. * @returns The options converted. */ - static RenderOptions convertOptions(RenderSettings const &settings) noexcept; + static RenderOptions convertOptions(RenderOptionList const &options) noexcept; /** * Gets a list of any shared components used by the current render technique. @@ -86,10 +99,21 @@ class LightSamplerBounds : public Component::RegistrarName */ void run(CapsaicinInternal &capsaicin) noexcept override; + /** + * Destroy any used internal resources and shutdown. + */ + void terminate() noexcept override; + + /** + * Render GUI options. + * @param [in,out] capsaicin The current capsaicin context. + */ + void renderGUI(CapsaicinInternal &capsaicin) const noexcept override; + /** * Reserve memory used for calculating bounds min/max values at runtime. - * When using the hlsl `LightBounds_StorePosition` function adequate storage must be reserved in order to - * store the values by using this function. Alternatively LightSamplerBounds::setBounds() can be used to + * When using the hlsl `requestLightSampleLocation` function adequate storage must be reserved in order to + * store the values by using this function. Alternatively setBounds() can be used to * directly set the bounds from the host. * @param reserve The number of position values to reserve space for. * @param caller The technique or component that is reserving memory. @@ -97,67 +121,73 @@ class LightSamplerBounds : public Component::RegistrarName void reserveBoundsValues(uint32_t reserve, std::type_info const &caller) noexcept; template - void reserveBoundsValues(uint32_t reserve, T const *const caller) noexcept + void reserveBoundsValues(uint32_t reserve, [[maybe_unused]] T const *const caller) noexcept { reserveBoundsValues(reserve, typeid(T)); } /** * Directly set the bounds that the sampler covers. - * Alternatively if calculating bounds dynamically at runtime the - * LightSamplerBounds::reserveBoundsValues() can be used instead. directly set the bounds from the host. + * Alternatively if calculating bounds dynamically at runtime the reserveBoundsValues() function can be + * used instead. This function can be skipped if just using whole scene bounds. * @param bounds The volume bounds. */ void setBounds(std::pair const &bounds, std::type_info const &caller) noexcept; template - void setBounds(std::pair const &bounds, T const *const caller) noexcept + void setBounds(std::pair const &bounds, [[maybe_unused]] T const *const) noexcept { setBounds(bounds, typeid(T)); } /** * Perform operations to update internal data structure. - * @note Must be called after all hlsl `LightBounds_StorePosition` calls have been made and before any - * hlsl `sampleLightListCone` calls are made + * @note Must be called after all hlsl `requestLightSampleLocation` calls have been made and before any + * hlsl `sampleLightList` calls are made. If not using `requestLightSampleLocation` then this function + * can be ignored. * @param [in,out] capsaicin Current framework context. * @param [in,out] parent The parent render technique that is using the light sampler. */ - void update(CapsaicinInternal &capsaicin, RenderTechnique &parent) noexcept; + void update(CapsaicinInternal &capsaicin, Timeable *parent) noexcept; /** * Check to determine if any kernels using light sampler code need to be (re)compiled. - * @note Must be called before LightSamplerBounds::run(). + * @note Must be called before run(). * @param capsaicin Current framework context. * @return True if an update occurred requiring internal updates to be performed. */ - bool needsRecompile(CapsaicinInternal const &capsaicin) const noexcept; + bool needsRecompile(CapsaicinInternal const &capsaicin) const noexcept override; /** * Get the list of shader defines that should be passed to any kernel that uses this lightSampler. - * @note Also includes values from the default lightSampler. + * @note Also includes values from the default lightBuilder. * @param capsaicin Current framework context. * @return A vector with each required define. */ - std::vector getShaderDefines(CapsaicinInternal const &capsaicin) const noexcept; + std::vector getShaderDefines(CapsaicinInternal const &capsaicin) const noexcept override; /** * Add the required program parameters to a shader based on current settings. - * @note Also includes values from the default lightSampler. + * @note Also includes values from the default lightBuilder. * @param capsaicin Current framework context. * @param program The shader program to bind parameters to. */ - void addProgramParameters(CapsaicinInternal const &capsaicin, GfxProgram program) const noexcept; + void addProgramParameters(CapsaicinInternal const &capsaicin, GfxProgram program) const noexcept override; /** * Check if the scenes lighting data was changed this frame. + * @param capsaicin Current framework context. * @returns True if light data has changed. */ - bool getLightsUpdated() const; + bool getLightsUpdated(CapsaicinInternal const &capsaicin) const noexcept override; -private: - void terminate() noexcept; + /** + * Get the name of the header file used in HLSL code to include necessary sampler functions. + * @return String name of the HLSL header include. + */ + std::string_view getHeaderFile() const noexcept override; +private: bool initKernels(CapsaicinInternal const &capsaicin) noexcept; bool initBoundsBuffers() noexcept; bool initLightIndexBuffer() noexcept; @@ -165,14 +195,13 @@ class LightSamplerBounds : public Component::RegistrarName RenderOptions options; bool recompileFlag = false; /**< Flag to indicate if option change requires a shader recompile this frame */ - bool lightingChanged = - false; /**< Flag to indicate if lighting has changed (e.g. lights added/removed/moved) */ - - std::map boundsReservations; /**< List of any reservations made using - LightSamplerBounds::reserveBoundsValues() */ - std::map> boundsHostReservations; /**< List of any reservations - made using - LightSamplerBounds::reserveBoundsValues() */ + bool lightsUpdatedFlag = false; /**< Flag to indicate if option change effects light samples */ + bool usingManyLights = false; /**< Flag indicating if ,any lights parallel build is in use */ + + std::map + boundsReservations; /**< List of any reservations made using reserveBoundsValues() */ + std::map> + boundsHostReservations; /**< List of any reservations made using reserveBoundsValues() */ std::pair currentBounds = std::make_pair(float3(std::numeric_limits::max()), float3(std::numeric_limits::lowest())); /**< Current calculated bounds for all host bounds in boundsHostReservations */ @@ -184,9 +213,9 @@ class LightSamplerBounds : public Component::RegistrarName GPUReduce reducerMin; /**< Helper to perform GPU parallel reduceMin */ GPUReduce reducerMax; /**< Helper to perform GPU parallel reduceMax */ - GfxBuffer configBuffer; /**< Buffer used to hold LightSamplingConfiguration */ - GfxBuffer lightIndexBuffer; /**< Buffer used to hold light indexes for all lights in each cell */ - GfxBuffer lightCDFBuffer; /**< Buffer used to hold light CDF for all lights in each cell */ + GfxBuffer configBuffer; /**< Buffer used to hold LightSamplingConfiguration */ + GfxBuffer lightIndexBuffer; /**< Buffer used to hold light indexes for all lights in each cell */ + GfxBuffer lightReservoirBuffer; /**< Buffer used to hold light reservoirs for each cell */ GfxBuffer dispatchCommandBuffer; diff --git a/src/core/src/components/light_sampler_grid_stream/light_sampler_grid_stream.hlsl b/src/core/src/components/light_sampler_grid_stream/light_sampler_grid_stream.hlsl new file mode 100644 index 0000000..f0b4945 --- /dev/null +++ b/src/core/src/components/light_sampler_grid_stream/light_sampler_grid_stream.hlsl @@ -0,0 +1,415 @@ +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ + +#ifndef LIGHT_SAMPLER_GRID_STREAM_HLSL +#define LIGHT_SAMPLER_GRID_STREAM_HLSL + +/* +// Requires the following data to be defined in any shader that uses this file +TextureCube g_EnvironmentBuffer; +Texture2D g_TextureMaps[] : register(space99); +SamplerState g_TextureSampler; +*/ + +#include "../light_sampler_grid_cdf/light_sampler_grid_shared.h" + +RWStructuredBuffer g_LightSampler_Configuration; +RWStructuredBuffer g_LightSampler_CellsIndex; +#ifdef LIGHTSAMPLERSTREAM_RES_USE_RESAMPLE +RWStructuredBuffer g_LightSampler_CellsReservoirs; +#else +RWStructuredBuffer g_LightSampler_CellsReservoirs; +#endif + +// If using GPU based bounds calculation then also: +RWStructuredBuffer g_LightSampler_BoundsLength; +RWStructuredBuffer g_LightSampler_MinBounds; +RWStructuredBuffer g_LightSampler_MaxBounds; + +#include "../light_sampler_grid_cdf/light_sampler_grid.hlsl" +#include "../light_builder/light_builder.hlsl" +#include "../../lights/light_sampling.hlsl" +#include "../../lights/reservoir.hlsl" +#include "../../materials/material_evaluation.hlsl" +#include "../../lights/light_sampling_volume.hlsl" +#include "../../math/random.hlsl" + +struct LightSamplerGridStream +{ + Random randomNG; + + /** + * Sample the index and PDF for a sampled light. + * @param cellIndex The index of the current cell. + * @param numLights The number of lights in the cell. + * @param position Current position on surface. + * @param normal Shading normal vector at current position. + * @param lightPDF (Out) The PDF for the calculated sample. + * @return The index of the sampled light. + */ + uint getSampledLight(uint cellIndex, uint numLights, float3 position, float3 normal, out float lightPDF) + { +#ifndef LIGHTSAMPLERSTREAM_RES_RANDOM_MERGE + // Collapse reservoir down to a single sample + uint storedLight = -1; + float storedLightWeight = 0.0f; + float totalWeight = 0.0f; + float j = randomNG.rand(); + float pNone = 1.0f; + for (uint currentIndex = cellIndex; currentIndex < cellIndex + numLights; ++currentIndex) + { +# ifdef LIGHTSAMPLERSTREAM_RES_USE_RESAMPLE + // Use Sampling Importance Resampling to calculate new targetPDF + float sampleWeightMod = g_LightSampler_CellsReservoirs[currentIndex]; //Contains totalWeight/sampleTargetPDF + uint sampledLight = g_LightSampler_CellsIndex[currentIndex]; + float sampleTargetPDF = sampleLightPointNormal(getLight(sampledLight), position, normal); + float sampleWeight = sampleTargetPDF * sampleWeightMod; +# else + float2 reservoirWeights = g_LightSampler_CellsReservoirs[currentIndex]; + // Merge using totalWeight of input reservoir as sample weight + float sampleWeight = reservoirWeights.y; + float sampleTargetPDF = reservoirWeights.x; +# endif + // Must avoid 0 samples at the start of the stream to avoid division by 0 + if (sampleWeight > 0.0f) + { + totalWeight += sampleWeight; + float p = sampleWeight / totalWeight; + j -= p * pNone; + pNone *= (1.0f - p); + if (j <= 0.0f) + { +# ifdef LIGHTSAMPLERSTREAM_RES_USE_RESAMPLE + storedLight = sampledLight; +# else + storedLight = g_LightSampler_CellsIndex[currentIndex]; +# endif + storedLightWeight = sampleTargetPDF; + j = randomNG.rand(); + pNone = 1.0f; + } + } + } + + // Final result has PDF = M * targetPDF / totalWeight + // The M term cancels out with weight calculation resulting in just targetPDF/totalWeight + lightPDF = (totalWeight > 0.0f) ? storedLightWeight / totalWeight : 0.0f; + return storedLight; +#else + // Just randomly select one of the reservoirs + uint storedLight = randomNG.randInt(numLights); + uint currentIndex = cellIndex + storedLight; + storedLight = g_LightSampler_CellsIndex[currentIndex]; +# ifdef LIGHTSAMPLERSTREAM_RES_USE_RESAMPLE + float sampleWeightMod = g_LightSampler_CellsReservoirs[currentIndex]; + float sampleTargetPDF = sampleLightPointNormal(getLight(storedLight), position, normal); + float sampleWeight = sampleTargetPDF * sampleWeightMod; +# else + float2 reservoirWeights = g_LightSampler_CellsReservoirs[currentIndex]; + float sampleWeight = reservoirWeights.y; + float sampleTargetPDF = reservoirWeights.x; +# endif + lightPDF = (sampleWeight > 0.0f) ? sampleTargetPDF / (sampleWeight * (float)numLights) : 0.0f; + return storedLight; +#endif // LIGHTSAMPLERSTREAM_RES_RANDOM_MERGE + } + + /** + * Get a sample light. + * @param position Current position on surface. + * @param normal Shading normal vector at current position. + * @param lightPDF (Out) The PDF for the calculated sample (is equal to zero if no valid samples could be found). + * @returns The index of the new light sample + */ + uint sampleLights(float3 position, float3 normal, out float lightPDF) + { + // Get the current cell buffer index +#ifdef LIGHTSAMPLERSTREAM_USE_OCTAHEDRON_SAMPLING + const uint cellIndex = LightSamplerGrid::getCellOctaIndexFromJitteredPosition(position, normal, randomNG); +#else + const uint cellIndex = LightSamplerGrid::getCellIndexFromJitteredPosition(position, randomNG); +#endif + const uint numLights = min(g_LightSampler_Configuration[0].numCells.w, getNumberLights()); + + // Return invalid sample if the cell doesn't contain any lights + if (numLights == 0) + { + lightPDF = 0.0f; + return 0; + } + + // Choose a light to sample from + uint lightIndex = LightSamplerGridStream::getSampledLight(cellIndex, numLights, position, normal, lightPDF); + return lightIndex; + } + + /** + * Calculate the PDF of sampling a given light. + * @param lightID The index of the given light. + * @param position The position on the surface currently being shaded. + * @param normal Shading normal vector at current position. + * @returns The calculated PDF with respect to the light. + */ + float sampleLightPDF(uint lightID, float3 position, float3 normal) + { + // Calculate position of current cell in output buffer +#ifdef LIGHTSAMPLERSTREAM_USE_OCTAHEDRON_SAMPLING + const uint cellIndex = LightSamplerGrid::getCellOctaIndexFromJitteredPosition(position, normal, randomNG); +#else + const uint cellIndex = LightSamplerGrid::getCellIndexFromJitteredPosition(position, randomNG); +#endif + const uint numLights = min(g_LightSampler_Configuration[0].numCells.w, getNumberLights()); + + // Using reservoir resampling the probability of sampling a given index is based on its + // weight with respect to the total cell weight. + float totalWeight = 0.0f; + float finalTargetPDF = FLT_EPSILON; + + for (uint currentIndex = cellIndex; currentIndex < cellIndex + numLights; ++currentIndex) + { + uint sampledLight = g_LightSampler_CellsIndex[currentIndex]; +#ifdef LIGHTSAMPLERSTREAM_RES_USE_RESAMPLE + float sampleWeightMod = g_LightSampler_CellsIndex[currentIndex]; + float sampleTargetPDF = sampleLightPointNormal(getLight(sampledLight), position, normal); + float sampleWeight = sampleTargetPDF * sampleWeightMod; +#else + float2 reservoirWeights = g_LightSampler_CellsReservoirs[currentIndex]; + // Merge using totalWeight of input reservoir as sample weight + float sampleWeight = reservoirWeights.y; + float sampleTargetPDF = reservoirWeights.x; +#endif // LIGHTSAMPLERSTREAM_RES_USE_RESAMPLE + if (sampledLight == lightID) + { + finalTargetPDF = sampleTargetPDF; +#ifdef LIGHTSAMPLERSTREAM_RES_RANDOM_MERGE + const float divisor = sampleWeight * (float)numLights; + return divisor != 0.0f ? finalTargetPDF / divisor : 0.0f; +#endif + } + totalWeight += sampleWeight; + } +#ifndef LIGHTSAMPLERSTREAM_RES_RANDOM_MERGE + return (totalWeight != 0.0f) ? finalTargetPDF / totalWeight : 0.0f; +#else + return finalTargetPDF; +#endif // LIGHTSAMPLERSTREAM_RES_RANDOM_MERGE + } + + /** + * Sample multiple lights into a reservoir. + * @tparam numSampledLights Number of lights to sample. + * @param position Current position on surface. + * @param normal Shading normal vector at current position. + * @param viewDirection View direction vector at current position. + * @param material Material for current surface position. + * @returns Reservoir containing combined samples. + */ + template + Reservoir sampleLightList(float3 position, float3 normal, float3 viewDirection, MaterialBRDF material) + { + // Get the current cell buffer index +#ifdef LIGHTSAMPLERSTREAM_USE_OCTAHEDRON_SAMPLING + const uint cellIndex = LightSamplerGrid::getCellOctaIndexFromJitteredPosition(position, normal, randomNG); +#else + const uint cellIndex = LightSamplerGrid::getCellIndexFromJitteredPosition(position, randomNG); +#endif + const uint numLights = min(g_LightSampler_Configuration[0].numCells.w, getNumberLights()); + const uint newLights = min(numLights, numSampledLights); + + // Return invalid sample if the cell doesn't contain any lights + if (numLights == 0) + { + return MakeReservoir(); + } + + // Create reservoir updater + ReservoirUpdater updater = MakeReservoirUpdater(); + + // Loop through until we have the requested number of lights +#ifdef LIGHTSAMPLERSTREAM_RES_FAST_MERGE + const uint lightsPerSegment = (numLights + numSampledLights - 1) / numSampledLights; + const uint maxCellIndex = cellIndex + numLights; + uint startIndex = cellIndex; + struct LightResSample + { + uint lightIndex; + float lightPDF; + }; + LightResSample lightSamples[numSampledLights]; + uint lightsAdded = 0; +#endif + for (uint i = 0; i < newLights; ++i) + { + // Choose a light to sample from + float lightPDF; +#ifndef LIGHTSAMPLERSTREAM_RES_FAST_MERGE + uint lightIndex = LightSamplerGridStream::getSampledLight(cellIndex, numLights, position, normal, lightPDF); + + // Add the light sample to the reservoir + updateReservoir(updater, randomNG, lightIndex, lightPDF, material, position, normal, viewDirection); +#else + // Divide total light range into newLights segments and only collapse those + uint endIndex = startIndex + lightsPerSegment; + uint segmentLights = (endIndex <= maxCellIndex) ? lightsPerSegment : maxCellIndex - startIndex; + uint lightIndex = LightSamplerGridStream::getSampledLight(startIndex, segmentLights, position, normal, lightPDF); + // Increment current cell index + startIndex = endIndex; + if (lightIndex == -1) + { + continue; + } + lightSamples[lightsAdded].lightIndex = lightIndex; + lightSamples[lightsAdded].lightPDF = lightPDF; + ++lightsAdded; +#endif + } + +#ifdef LIGHTSAMPLERSTREAM_RES_FAST_MERGE + for (uint i = 0; i < lightsAdded; ++i) + { + // We are now resampling without replacement so the sample PDFs need to be corrected. + // The actual light PDF is effected by the value of M(lightsAdded) which may not always be equal to newLights + // with the final PDF being scaled by the number of actual final valid reservoirs + float lightPDF = lightSamples[i].lightPDF / (float)lightsAdded; + // Add the light sample to the reservoir + updateReservoir(updater, randomNG, lightSamples[i].lightIndex, lightPDF, material, position, normal, viewDirection); + } +#endif + + // Get finalised reservoir for return + return updater.reservoir; + } + + /** + * Sample multiple lights into a reservoir using cone angle. + * @tparam numSampledLights Number of lights to sample. + * @param position Current position on surface. + * @param normal Shading normal vector at current position. + * @param viewDirection View direction vector at current position. + * @param solidAngle Solid angle around view direction of visible ray cone. + * @param material Material for current surface position. + * @returns Reservoir containing combined samples. + */ + template + Reservoir sampleLightListCone(float3 position, float3 normal, float3 viewDirection, float solidAngle, MaterialBRDF material) + { + // Get the current cell buffer index +#ifdef LIGHTSAMPLERSTREAM_USE_OCTAHEDRON_SAMPLING + const uint cellIndex = LightSamplerGrid::getCellOctaIndexFromJitteredPosition(position, normal, randomNG); +#else + const uint cellIndex = LightSamplerGrid::getCellIndexFromJitteredPosition(position, randomNG); +#endif + const uint numLights = min(g_LightSampler_Configuration[0].numCells.w, getNumberLights()); + const uint newLights = min(numLights, numSampledLights); + + // Return invalid sample if the cell doesn't contain any lights + if (numLights == 0) + { + return MakeReservoir(); + } + + // Create reservoir updater + ReservoirUpdater updater = MakeReservoirUpdater(); + + // Loop through until we have the requested number of lights +#ifdef LIGHTSAMPLERSTREAM_RES_FAST_MERGE + const uint lightsPerSegment = (numLights + numSampledLights - 1) / numSampledLights; + const uint maxCellIndex = cellIndex + numLights; + uint startIndex = cellIndex; + struct LightResSample + { + uint lightIndex; + float lightPDF; + }; + LightResSample lightSamples[numSampledLights]; + uint lightsAdded = 0; +#endif + for (uint i = 0; i < newLights; ++i) + { + // Choose a light to sample from + float lightPDF; +#ifndef LIGHTSAMPLERSTREAM_RES_FAST_MERGE + uint lightIndex = LightSamplerGridStream::getSampledLight(cellIndex, numLights, position, normal, lightPDF); + + // Add the light sample to the reservoir + updateReservoirCone(updater, randomNG, lightIndex, lightPDF, material, position, normal, viewDirection, solidAngle); +#else + // Divide total light range into newLights segments and only collapse those + uint endIndex = startIndex + lightsPerSegment; + uint segmentLights = (endIndex <= maxCellIndex) ? lightsPerSegment : maxCellIndex - startIndex; + uint lightIndex = LightSamplerGridStream::getSampledLight(startIndex, segmentLights, position, normal, lightPDF); + // Increment current cell index + startIndex = endIndex; + if (lightIndex == -1) + { + continue; + } + lightSamples[lightsAdded].lightIndex = lightIndex; + lightSamples[lightsAdded].lightPDF = lightPDF; + ++lightsAdded; +#endif + } + +#ifdef LIGHTSAMPLERSTREAM_RES_FAST_MERGE + for (uint i = 0; i < lightsAdded; ++i) + { + // We are now resampling without replacement so the sample PDFs need to be corrected. + // The actual light PDF is effected by the value of M(lightsAdded) which may not always be equal to newLights + // with the final PDF being scaled by the number of actual final valid reservoirs + float lightPDF = lightSamples[i].lightPDF / (float)lightsAdded; + // Add the light sample to the reservoir + updateReservoirCone(updater, randomNG, lightSamples[i].lightIndex, lightPDF, material, position, normal, viewDirection, solidAngle); + } +#endif + + // Get finalised reservoir for return + return updater.reservoir; + } +}; + +/** + * Records the position of future light lookups. + * @param position Current position on surface. + */ +void requestLightSampleLocation(in float3 position) +{ + const float3 position_min = WaveActiveMin(position); + const float3 position_max = WaveActiveMax(position); + if (WaveIsFirstLane()) + { + uint offset; + InterlockedAdd(g_LightSampler_BoundsLength[0], 1, offset); + g_LightSampler_MinBounds[offset] = position_min; + g_LightSampler_MaxBounds[offset] = position_max; + } +} + +LightSamplerGridStream MakeLightSampler(Random random) +{ + LightSamplerGridStream ret; + ret.randomNG = random; + return ret; +} + +typedef LightSamplerGridStream LightSampler; + +#endif // LIGHT_SAMPLER_GRID_STREAM_HLSL diff --git a/src/core/src/components/light_sampler_uniform/light_sampler_uniform.cpp b/src/core/src/components/light_sampler_uniform/light_sampler_uniform.cpp new file mode 100644 index 0000000..efa60d5 --- /dev/null +++ b/src/core/src/components/light_sampler_uniform/light_sampler_uniform.cpp @@ -0,0 +1,87 @@ +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ + +#include "light_sampler_uniform.h" + +#include "../light_builder/light_builder.h" +#include "capsaicin_internal.h" + +namespace Capsaicin +{ +LightSamplerUniform::LightSamplerUniform() noexcept + : LightSampler(Name) +{} + +LightSamplerUniform::~LightSamplerUniform() noexcept +{ + terminate(); +} + +ComponentList LightSamplerUniform::getComponents() const noexcept +{ + ComponentList components; + components.emplace_back(COMPONENT_MAKE(LightBuilder)); + return components; +} + +bool LightSamplerUniform::init([[maybe_unused]] CapsaicinInternal const &capsaicin) noexcept +{ + return true; +} + +void LightSamplerUniform::run([[maybe_unused]] CapsaicinInternal &capsaicin) noexcept {} + +bool LightSamplerUniform::needsRecompile(CapsaicinInternal const &capsaicin) const noexcept +{ + auto lightBuilder = capsaicin.getComponent(); + return lightBuilder->needsRecompile(capsaicin); +} + +std::vector LightSamplerUniform::getShaderDefines( + CapsaicinInternal const &capsaicin) const noexcept +{ + auto lightBuilder = capsaicin.getComponent(); + std::vector baseDefines(std::move(lightBuilder->getShaderDefines(capsaicin))); + return baseDefines; +} + +void LightSamplerUniform::addProgramParameters( + CapsaicinInternal const &capsaicin, GfxProgram program) const noexcept +{ + auto lightBuilder = capsaicin.getComponent(); + lightBuilder->addProgramParameters(capsaicin, program); +} + +bool LightSamplerUniform::getLightsUpdated(CapsaicinInternal const &capsaicin) const noexcept +{ + auto lightBuilder = capsaicin.getComponent(); + return lightBuilder->getLightsUpdated(); +} + +std::string_view LightSamplerUniform::getHeaderFile() const noexcept +{ + return std::string_view("\"../../components/light_sampler_uniform/light_sampler_uniform.hlsl\""); +} + +void LightSamplerUniform::terminate() noexcept {} + +} // namespace Capsaicin diff --git a/src/core/src/components/light_sampler_uniform/light_sampler_uniform.h b/src/core/src/components/light_sampler_uniform/light_sampler_uniform.h new file mode 100644 index 0000000..a3e8c91 --- /dev/null +++ b/src/core/src/components/light_sampler_uniform/light_sampler_uniform.h @@ -0,0 +1,112 @@ +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ +#pragma once + +#include "capsaicin_internal.h" +#include "components/component.h" +#include "components/light_sampler/light_sampler.h" + +namespace Capsaicin +{ +class LightSamplerUniform + : public LightSampler + , public ComponentFactory::Registrar + , public LightSamplerFactory::Registrar +{ +public: + static constexpr std::string_view Name = "LightSamplerUniform"; + + LightSamplerUniform(LightSamplerUniform const &) noexcept = delete; + + LightSamplerUniform(LightSamplerUniform &&) noexcept = default; + + /** Constructor. */ + LightSamplerUniform() noexcept; + + /** Destructor. */ + ~LightSamplerUniform() noexcept; + + /** + * Gets a list of any shared components used by the current render technique. + * @return A list of all supported components. + */ + ComponentList getComponents() const noexcept override; + + /** + * Initialise any internal data or state. + * @note This is automatically called by the framework after construction and should be used to create + * any required CPU|GPU resources. + * @param capsaicin Current framework context. + * @return True if initialisation succeeded, False otherwise. + */ + bool init(CapsaicinInternal const &capsaicin) noexcept override; + + /** + * Run internal operations. + * @param [in,out] capsaicin Current framework context. + */ + void run(CapsaicinInternal &capsaicin) noexcept override; + + /** + * Destroy any used internal resources and shutdown. + */ + void terminate() noexcept override; + + /** + * Check to determine if any kernels using light sampler code need to be (re)compiled. + * @param capsaicin Current framework context. + * @return True if an update occurred requiring internal updates to be performed. + */ + bool needsRecompile(CapsaicinInternal const &capsaicin) const noexcept override; + + /** + * Get the list of shader defines that should be passed to any kernel that uses this lightSampler. + * @note Also includes values from the default lightBuilder. + * @param capsaicin Current framework context. + * @return A vector with each required define. + */ + std::vector getShaderDefines(CapsaicinInternal const &capsaicin) const noexcept override; + + /** + * Add the required program parameters to a shader based on current settings. + * @note Also includes values from the default lightBuilder. + * @param capsaicin Current framework context. + * @param program The shader program to bind parameters to. + */ + void addProgramParameters(CapsaicinInternal const &capsaicin, GfxProgram program) const noexcept override; + + /** + * Check if the scenes lighting data was changed this frame. + * @param capsaicin Current framework context. + * @returns True if light data has changed. + */ + bool getLightsUpdated(CapsaicinInternal const &capsaicin) const noexcept override; + + /** + * Get the name of the header file used in HLSL code to include necessary sampler functions. + * @return String name of the HLSL header include. + */ + virtual std::string_view getHeaderFile() const noexcept override; + +private: +}; +} // namespace Capsaicin diff --git a/src/core/src/components/light_sampler_uniform/light_sampler_uniform.hlsl b/src/core/src/components/light_sampler_uniform/light_sampler_uniform.hlsl new file mode 100644 index 0000000..77ec054 --- /dev/null +++ b/src/core/src/components/light_sampler_uniform/light_sampler_uniform.hlsl @@ -0,0 +1,171 @@ +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ + +#ifndef LIGHT_SAMPLER_UNIFORM_HLSL +#define LIGHT_SAMPLER_UNIFORM_HLSL + +/* +// Requires the following data to be defined in any shader that uses this file +TextureCube g_EnvironmentBuffer; +Texture2D g_TextureMaps[] : register(space99); +SamplerState g_TextureSampler; +*/ + +#include "../../lights/light_sampling.hlsl" +#include "../../lights/reservoir.hlsl" +#include "../../math/random.hlsl" + +struct LightSamplerUniform +{ + Random randomNG; + + /** + * Get a sample light. + * @param position Current position on surface. + * @param normal Shading normal vector at current position. + * @param lightPDF (Out) The PDF for the calculated sample (is equal to zero if no valid samples could be found). + * @returns The index of the new light sample + */ + uint sampleLights(float3 position, float3 normal, out float lightPDF) + { + float totalLights = getNumberLights(); + + // Return invalid sample if there are no lights + if (totalLights == 0) + { + lightPDF = 0.0f; + return 0; + } + + // Choose a light to sample from + lightPDF = 1.0f / totalLights; + uint lightIndex = randomNG.randInt(totalLights); + return lightIndex; + } + + /** + * Calculate the PDF of sampling a given light. + * @param lightID The index of the given light. + * @param position The position on the surface currently being shaded. + * @param normal Shading normal vector at current position. + * @returns The calculated PDF with respect to the light. + */ + float sampleLightPDF(uint lightID, float3 position, float3 normal) + { + return 1.0f / getNumberLights(); + } + + /** + * Sample multiple lights into a reservoir. + * @tparam numSampledLights Number of lights to sample. + * @param position Current position on surface. + * @param normal Shading normal vector at current position. + * @param viewDirection View direction vector at current position. + * @param solidAngle Solid angle around view direction of visible ray cone. + * @param material Material for current surface position. + * @returns Reservoir containing combined samples. + */ + template + Reservoir sampleLightList(float3 position, float3 normal, float3 viewDirection, MaterialBRDF material) + { + // Check if we actually have any lights + const uint totalLights = getNumberLights(); + const uint numLights = numSampledLights; + + // Return invalid sample if there are no lights + if (numLights == 0) + { + return MakeReservoir(); + } + + // Create reservoir updater + ReservoirUpdater updater = MakeReservoirUpdater(); + + // Loop through until we have the requested number of lights + float lightPDF = 1.0f / totalLights; + + for (uint lightsAdded = 0; lightsAdded < numLights; ++lightsAdded) + { + // Choose a light to sample from + const uint lightIndex = randomNG.randInt(totalLights); + + // Add the light sample to the reservoir + updateReservoir(updater, randomNG, lightIndex, lightPDF, material, position, normal, viewDirection); + } + + // Get finalised reservoir for return + return updater.reservoir; + } + + /** + * Sample multiple lights into a reservoir using cone angle. + * @tparam numSampledLights Number of lights to sample. + * @param position Current position on surface. + * @param normal Shading normal vector at current position. + * @param viewDirection View direction vector at current position. + * @param solidAngle Solid angle around view direction of visible ray cone. + * @param material Material for current surface position. + * @returns Reservoir containing combined samples. + */ + template + Reservoir sampleLightListCone(float3 position, float3 normal, float3 viewDirection, float solidAngle, MaterialBRDF material) + { + // Check if we actually have any lights + const uint totalLights = getNumberLights(); + const uint numLights = numSampledLights; + + // Return invalid sample if there are no lights + if (numLights == 0) + { + return MakeReservoir(); + } + + // Create reservoir updater + ReservoirUpdater updater = MakeReservoirUpdater(); + + // Loop through until we have the requested number of lights + float lightPDF = 1.0f / totalLights; + + for (uint lightsAdded = 0; lightsAdded < numLights; ++lightsAdded) + { + // Choose a light to sample from + const uint lightIndex = randomNG.randInt(totalLights); + + // Add the light sample to the reservoir + updateReservoirCone(updater, randomNG, lightIndex, lightPDF, material, position, normal, viewDirection, solidAngle); + } + + // Get finalised reservoir for return + return updater.reservoir; + } +}; + +LightSamplerUniform MakeLightSampler(Random random) +{ + LightSamplerUniform ret; + ret.randomNG = random; + return ret; +} + +typedef LightSamplerUniform LightSampler; + +#endif // LIGHT_SAMPLER_UNIFORM_HLSL diff --git a/src/core/src/components/prefilter_ibl/prefilter_ibl.cpp b/src/core/src/components/prefilter_ibl/prefilter_ibl.cpp new file mode 100644 index 0000000..86b918d --- /dev/null +++ b/src/core/src/components/prefilter_ibl/prefilter_ibl.cpp @@ -0,0 +1,125 @@ +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ + +#include "prefilter_ibl.h" + +#include "capsaicin_internal.h" + +#define _USE_MATH_DEFINES +#include +#include + +namespace Capsaicin +{ +PrefilterIBL::PrefilterIBL() noexcept + : Component(Name) +{} + +PrefilterIBL::~PrefilterIBL() noexcept +{ + terminate(); +} + +bool PrefilterIBL::init(CapsaicinInternal const &capsaicin) noexcept +{ + prefilter_ibl_buffer_ = gfxCreateTextureCube( + gfx_, prefilter_ibl_buffer_size_, DXGI_FORMAT_R16G16B16A16_FLOAT, prefilter_ibl_buffer_mips_); + prefilter_ibl_buffer_.setName("Capsaicin_PrefilterIBL_PrefilterIBLBuffer"); + + prefilter_ibl_program_ = + gfxCreateProgram(gfx_, "components/prefilter_ibl/prefilter_ibl", capsaicin.getShaderPath()); + + // init prefiltered IBL + prefilterIBL(capsaicin); + + return true; +} + +void PrefilterIBL::run(CapsaicinInternal &capsaicin) noexcept +{ + // update prefilted IBL + if (capsaicin.getEnvironmentMapUpdated()) + { + prefilterIBL(capsaicin); + } +} + +void PrefilterIBL::terminate() noexcept +{ + gfxDestroyProgram(gfx_, prefilter_ibl_program_); + gfxDestroyTexture(gfx_, prefilter_ibl_buffer_); +} + +void PrefilterIBL::addProgramParameters( + [[maybe_unused]] CapsaicinInternal const &capsaicin, GfxProgram program) const noexcept +{ + gfxProgramSetParameter(gfx_, program, "g_PrefilteredEnvironmentBuffer", prefilter_ibl_buffer_); +} + +void PrefilterIBL::prefilterIBL(CapsaicinInternal const &capsaicin) noexcept +{ + glm::dvec3 const forward_vectors[] = {glm::dvec3(-1.0, 0.0, 0.0), glm::dvec3(1.0, 0.0, 0.0), + glm::dvec3(0.0, 1.0, 0.0), glm::dvec3(0.0, -1.0, 0.0), glm::dvec3(0.0, 0.0, -1.0), + glm::dvec3(0.0, 0.0, 1.0)}; + + glm::dvec3 const up_vectors[] = {glm::dvec3(0.0, -1.0, 0.0), glm::dvec3(0.0, -1.0, 0.0), + glm::dvec3(0.0, 0.0, -1.0), glm::dvec3(0.0, 0.0, 1.0), glm::dvec3(0.0, -1.0, 0.0), + glm::dvec3(0.0, -1.0, 0.0)}; + + for (uint32_t mip_level = 0; mip_level < prefilter_ibl_buffer_mips_; ++mip_level) + { + for (uint32_t cubemap_face = 0; cubemap_face < 6; ++cubemap_face) + { + GfxDrawState draw_sky_state = {}; + gfxDrawStateSetColorTarget(draw_sky_state, 0, prefilter_ibl_buffer_, mip_level, cubemap_face); + + GfxKernel prefilter_ibl_kernel = + gfxCreateGraphicsKernel(gfx_, prefilter_ibl_program_, draw_sky_state, "PrefilterIBL"); + + uint32_t const buffer_dimensions[] = {std::max(prefilter_ibl_buffer_size_ >> mip_level, 1u), + std::max(prefilter_ibl_buffer_size_ >> mip_level, 1u)}; + + glm::dmat4 const view = + glm::lookAt(glm::dvec3(0.0), forward_vectors[cubemap_face], up_vectors[cubemap_face]); + glm::dmat4 const proj = glm::perspective(M_PI / 2.0, 1.0, 0.1, 1e4); + glm::mat4 const view_proj_inv = glm::mat4(glm::inverse(proj * view)); + + float const roughness = mip_level / float(prefilter_ibl_buffer_mips_ - 1); + + gfxProgramSetParameter(gfx_, prefilter_ibl_program_, "g_BufferDimensions", buffer_dimensions); + gfxProgramSetParameter(gfx_, prefilter_ibl_program_, "g_ViewProjectionInverse", view_proj_inv); + gfxProgramSetParameter( + gfx_, prefilter_ibl_program_, "g_EnvironmentBuffer", capsaicin.getEnvironmentBuffer()); + gfxProgramSetParameter( + gfx_, prefilter_ibl_program_, "g_LinearSampler", capsaicin.getLinearSampler()); + gfxProgramSetParameter(gfx_, prefilter_ibl_program_, "g_Roughness", roughness); + gfxProgramSetParameter(gfx_, prefilter_ibl_program_, "g_SampleSize", prefilter_ibl_sample_size_); + + gfxCommandBindKernel(gfx_, prefilter_ibl_kernel); + gfxCommandDraw(gfx_, 3); + + gfxDestroyKernel(gfx_, prefilter_ibl_kernel); + } + } +} + +} // namespace Capsaicin diff --git a/src/core/src/components/prefilter_ibl/prefilter_ibl.frag b/src/core/src/components/prefilter_ibl/prefilter_ibl.frag new file mode 100644 index 0000000..ef2acee --- /dev/null +++ b/src/core/src/components/prefilter_ibl/prefilter_ibl.frag @@ -0,0 +1,79 @@ +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ + +// NOTE: these are not used, but need for shader compilation +uint g_FrameIndex; +Texture2D g_TextureMaps[] : register(space99); +SamplerState g_TextureSampler; + +#include "../../materials/material_sampling.hlsl" +#include "../../math/transform.hlsl" + +uint2 g_BufferDimensions; +float4x4 g_ViewProjectionInverse; +TextureCube g_EnvironmentBuffer; +SamplerState g_LinearSampler; +float g_Roughness; +uint g_SampleSize; + +// Returns position of i-th element in 2D Hammersley Point Set of N elements +float2 Hammersley2D(uint i, uint N) +{ + // Radical inverse based on http://holger.dammertz.org/stuff/notes_HammersleyOnHemisphere.html + uint bits = (i << 16u) | (i >> 16u); + bits = ((bits & 0x55555555u) << 1u) | ((bits & 0xAAAAAAAAu) >> 1u); + bits = ((bits & 0x33333333u) << 2u) | ((bits & 0xCCCCCCCCu) >> 2u); + bits = ((bits & 0x0F0F0F0Fu) << 4u) | ((bits & 0xF0F0F0F0u) >> 4u); + bits = ((bits & 0x00FF00FFu) << 8u) | ((bits & 0xFF00FF00u) >> 8u); + float rdi = float(bits) * 2.3283064365386963e-10; + return float2(float(i) / float(N), rdi); +} + +float4 PrefilterIBL(in float4 pos : SV_Position) : SV_Target +{ + float2 uv = pos.xy / g_BufferDimensions; + float2 ndc = 2.0f * uv - 1.0f; + + float3 world = transformPointProjection(float3(ndc, 1.0f), g_ViewProjectionInverse); + + float3 wo = normalize(world); + float3 normal = wo; + float roughness = g_Roughness * g_Roughness; + float alpha = roughness * roughness; + + Quaternion localRotation = QuaternionRotationZ(normal); + float3 wo_local = normalize(localRotation.transform(wo)); + + float3 color = 0.0f; + float total_weight = 0.0f; + for (uint i = 0; i < g_SampleSize; ++i) + { + float2 xi = Hammersley2D(i, g_SampleSize); + float3 wi_local = sampleGGX(alpha, wo_local, xi); + float3 wi = normalize(localRotation.inverse().transform(wi_local)); + float weight = saturate(wi_local.z); + wi.x = -wi.x; + wi.z = -wi.z; + color += weight * g_EnvironmentBuffer.SampleLevel(g_LinearSampler, wi, 0.0f).xyz; + total_weight += weight; + } + color /= total_weight; + + return float4(color, 1.0f); +} diff --git a/src/core/src/components/prefilter_ibl/prefilter_ibl.h b/src/core/src/components/prefilter_ibl/prefilter_ibl.h new file mode 100644 index 0000000..788c06d --- /dev/null +++ b/src/core/src/components/prefilter_ibl/prefilter_ibl.h @@ -0,0 +1,82 @@ +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ +#pragma once + +#include "components/component.h" + +namespace Capsaicin +{ +class PrefilterIBL + : public Component + , public ComponentFactory::Registrar +{ +public: + static constexpr std::string_view Name = "PrefilterIBL"; + + PrefilterIBL(PrefilterIBL const &) noexcept = delete; + + PrefilterIBL(PrefilterIBL &&) noexcept = default; + + /** Constructor. */ + PrefilterIBL() noexcept; + + ~PrefilterIBL() noexcept; + + /** + * Initialise any internal data or state. + * @note This is automatically called by the framework after construction and should be used to create + * any required CPU|GPU resources. + * @param capsaicin Current framework context. + * @return True if initialisation succeeded, False otherwise. + */ + bool init(CapsaicinInternal const &capsaicin) noexcept override; + + /** + * Run internal operations. + * @param [in,out] capsaicin Current framework context. + */ + void run(CapsaicinInternal &capsaicin) noexcept override; + + /** + * Destroy any used internal resources and shutdown. + */ + void terminate() noexcept override; + + /** + * Add the required program parameters to a shader based on current settings. + * @param capsaicin Current framework context. + * @param program The shader program to bind parameters to. + */ + + void addProgramParameters(CapsaicinInternal const &capsaicin, GfxProgram program) const noexcept; + +private: + void prefilterIBL(CapsaicinInternal const &capsaicin) noexcept; + + GfxProgram prefilter_ibl_program_; + GfxTexture prefilter_ibl_buffer_; + + uint32_t prefilter_ibl_buffer_size_ = 1024; + uint32_t prefilter_ibl_buffer_mips_ = 5; + uint32_t prefilter_ibl_sample_size_ = 1024; +}; +} // namespace Capsaicin diff --git a/src/core/src/components/prefilter_ibl/prefilter_ibl.vert b/src/core/src/components/prefilter_ibl/prefilter_ibl.vert new file mode 100644 index 0000000..d8b2900 --- /dev/null +++ b/src/core/src/components/prefilter_ibl/prefilter_ibl.vert @@ -0,0 +1,26 @@ +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ + +float4 PrefilterIBL(in uint idx : SV_VertexID) : SV_Position +{ + return 1.0f - float4(4.0f * (idx & 1), 4.0f * (idx >> 1), 1.0f, 0.0f); +} diff --git a/src/core/src/components/stratified_sampler/stratified_sampler.cpp b/src/core/src/components/stratified_sampler/stratified_sampler.cpp index d37a863..fbfabb1 100644 --- a/src/core/src/components/stratified_sampler/stratified_sampler.cpp +++ b/src/core/src/components/stratified_sampler/stratified_sampler.cpp @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -29,10 +29,27 @@ THE SOFTWARE. namespace Capsaicin { +StratifiedSampler::StratifiedSampler() noexcept + : Component(Name) +{} + StratifiedSampler::~StratifiedSampler() noexcept { - gfxDestroyBuffer(gfx_, seedBuffer); - gfxDestroyBuffer(gfx_, sobolBuffer); + terminate(); +} + +RenderOptionList StratifiedSampler::getRenderOptions() noexcept +{ + RenderOptionList newOptions; + newOptions.emplace(RENDER_OPTION_MAKE(stratified_sampler_deterministic, options)); + return newOptions; +} + +StratifiedSampler::RenderOptions StratifiedSampler::convertOptions(RenderOptionList const &options) noexcept +{ + RenderOptions newOptions; + RENDER_OPTION_GET(stratified_sampler_deterministic, newOptions, options) + return newOptions; } bool StratifiedSampler::init(CapsaicinInternal const &capsaicin) noexcept @@ -41,12 +58,23 @@ bool StratifiedSampler::init(CapsaicinInternal const &capsaicin) noexcept sizeof(uint32_t) * std::max(capsaicin.getWidth(), 1920u) * std::max(capsaicin.getHeight(), 1080u); std::vector seedBufferData; - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_int_distribution distrib(0, std::numeric_limits::max()); - for (uint32_t i = 0; i < seedBufferSize; i += 4) + seedBufferData.reserve(seedBufferSize); + if (options.stratified_sampler_deterministic) { - seedBufferData.push_back(distrib(gen)); + std::mt19937 gen(5489U); + for (uint32_t i = 0; i < seedBufferSize; i += 4) + { + seedBufferData.push_back(gen()); + } + } + else + { + std::random_device rd; + std::mt19937 gen(rd()); + for (uint32_t i = 0; i < seedBufferSize; i += 4) + { + seedBufferData.push_back(gen()); + } } seedBuffer = gfxCreateBuffer(gfx_, (uint32_t)seedBufferData.size(), seedBufferData.data()); seedBuffer.setName("StratifiedSampler_SeedBuffer"); @@ -61,11 +89,16 @@ bool StratifiedSampler::init(CapsaicinInternal const &capsaicin) noexcept void StratifiedSampler::run(CapsaicinInternal &capsaicin) noexcept { + // Check for option changed + auto const optionsNew = convertOptions(capsaicin.getOptions()); + bool update = optionsNew.stratified_sampler_deterministic != options.stratified_sampler_deterministic; + options = optionsNew; + // Check if seed buffer needs to be re-initialised uint64_t const seedBufferSize = sizeof(uint32_t) * std::max(capsaicin.getWidth(), 1920u) * std::max(capsaicin.getHeight(), 1080u); - if (seedBufferSize > seedBuffer.getSize()) + if (update || seedBufferSize > seedBuffer.getSize()) { GfxCommandEvent const command_event(gfx_, "InitStratifiedSampler"); @@ -75,8 +108,16 @@ void StratifiedSampler::run(CapsaicinInternal &capsaicin) noexcept } } +void StratifiedSampler::terminate() noexcept +{ + gfxDestroyBuffer(gfx_, seedBuffer); + seedBuffer = {}; + gfxDestroyBuffer(gfx_, sobolBuffer); + sobolBuffer = {}; +} + void StratifiedSampler::addProgramParameters( - CapsaicinInternal const &capsaicin, GfxProgram program) const noexcept + [[maybe_unused]] CapsaicinInternal const &capsaicin, GfxProgram program) const noexcept { gfxProgramSetParameter(gfx_, program, "g_SeedBuffer", seedBuffer); gfxProgramSetParameter(gfx_, program, "g_SobolXorsBuffer", sobolBuffer); diff --git a/src/core/src/components/stratified_sampler/stratified_sampler.h b/src/core/src/components/stratified_sampler/stratified_sampler.h index 8ccd5dc..c3aa754 100644 --- a/src/core/src/components/stratified_sampler/stratified_sampler.h +++ b/src/core/src/components/stratified_sampler/stratified_sampler.h @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -25,20 +25,40 @@ THE SOFTWARE. namespace Capsaicin { -class StratifiedSampler : public Component::RegistrarName +class StratifiedSampler + : public Component + , public ComponentFactory::Registrar { public: static constexpr std::string_view Name = "StratifiedSampler"; - /** Constructor. */ - StratifiedSampler() noexcept {} - StratifiedSampler(StratifiedSampler const &) noexcept = delete; StratifiedSampler(StratifiedSampler &&) noexcept = default; + /** Constructor. */ + StratifiedSampler() noexcept; + /** Destructor. */ - virtual ~StratifiedSampler() noexcept; + ~StratifiedSampler() noexcept; + + /* + * Gets configuration options for current technique. + * @return A list of all valid configuration options. + */ + RenderOptionList getRenderOptions() noexcept override; + + struct RenderOptions + { + bool stratified_sampler_deterministic = true; /**< Use deterministic seeding of random numbers */ + }; + + /** + * Convert render options to internal options format. + * @param options Current render options. + * @returns The options converted. + */ + static RenderOptions convertOptions(RenderOptionList const &options) noexcept; /** * Initialise any internal data or state. @@ -55,14 +75,20 @@ class StratifiedSampler : public Component::RegistrarName */ void run(CapsaicinInternal &capsaicin) noexcept override; + /** + * Destroy any used internal resources and shutdown. + */ + void terminate() noexcept override; + /** * Add the required program parameters to a shader based on current settings. * @param capsaicin Current framework context. * @param program The shader program to bind parameters to. */ - virtual void addProgramParameters(CapsaicinInternal const &capsaicin, GfxProgram program) const noexcept; + void addProgramParameters(CapsaicinInternal const &capsaicin, GfxProgram program) const noexcept; private: + RenderOptions options; GfxBuffer seedBuffer; GfxBuffer sobolBuffer; }; diff --git a/src/core/src/components/stratified_sampler/stratified_sampler.hlsl b/src/core/src/components/stratified_sampler/stratified_sampler.hlsl index 45a531e..394b3f9 100644 --- a/src/core/src/components/stratified_sampler/stratified_sampler.hlsl +++ b/src/core/src/components/stratified_sampler/stratified_sampler.hlsl @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -123,8 +123,8 @@ class StratifiedSampler { dimension = (dimension <= 62) ? dimension : 0; uint val = NoExport::stochasticSobol(index, seed, ++dimension, 64); - // Note: Uses 4294967808 instead of 2^32 in order to ensure [0.0, 1.0) mapping due to floating point rounding error. - float ret = (float)val * (1.0f / 4294967808.0f); + // Note: Use the upper 24 bits to avoid a bias due to floating point rounding error. + float ret = (float)(val >> 8) * 0x1.0p-24f; return ret; } @@ -138,7 +138,7 @@ class StratifiedSampler // Get next Sobol values uint val0 = NoExport::stochasticSobol(index, seed, ++dimension, 64); uint val1 = NoExport::stochasticSobol(index, seed, ++dimension, 64); - return float2(val0, val1) * (1.0f / 4294967808.0f).xx; + return float2(val0 >> 8, val1 >> 8) * 0x1.0p-24f.xx; } /** @@ -152,7 +152,7 @@ class StratifiedSampler uint val0 = NoExport::stochasticSobol(index, seed, ++dimension, 64); uint val1 = NoExport::stochasticSobol(index, seed, ++dimension, 64); uint val2 = NoExport::stochasticSobol(index, seed, ++dimension, 64); - return float3(val0, val1, val2) * (1.0f / 4294967808.0f).xxx; + return float3(val0 >> 8, val1 >> 8, val2 >> 8) * 0x1.0p-24f.xxx; } }; @@ -217,7 +217,7 @@ class StratifiedSampler1D float rand() { uint val = randInt(); - float ret = (float)val * (1.0f / 4294967808.0f); + float ret = (float)(val >> 8) * 0x1.0p-24f; return ret; } }; @@ -276,7 +276,7 @@ class StratifiedSampler2D // Get next Sobol values uint val0 = NoExport::stochasticSobol(index, seed, dimension, 2); uint val1 = NoExport::stochasticSobol(index++, seed, dimension + 1, 2); - return float2(val0, val1) * (1.0f / 4294967808.0f).xx; + return float2(val0 >> 8, val1 >> 8) * 0x1.0p-24f.xx; } }; diff --git a/src/core/src/components/stratified_sampler/stratified_sampler_data.h b/src/core/src/components/stratified_sampler/stratified_sampler_data.h index 6a3eb98..b5cb1f0 100644 --- a/src/core/src/components/stratified_sampler/stratified_sampler_data.h +++ b/src/core/src/components/stratified_sampler/stratified_sampler_data.h @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/src/core/src/math/geometry.hlsl b/src/core/src/geometry/geometry.hlsl similarity index 60% rename from src/core/src/math/geometry.hlsl rename to src/core/src/geometry/geometry.hlsl index 9bfaabc..d905de7 100644 --- a/src/core/src/math/geometry.hlsl +++ b/src/core/src/geometry/geometry.hlsl @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -23,14 +23,14 @@ THE SOFTWARE. #ifndef GEOMETRY_HLSL #define GEOMETRY_HLSL -#include "math.hlsl" +#include "../math/math.hlsl" /** * Offset ray function from Ray Tracing Gems (chapter 6). * @note Offsets ray start position from surface to prevent self intersections * @param position Current position. * @param normal Geometric normal vector at current position. - * @return The offset position. + * @returns The offset position. */ float3 offsetPosition(const float3 position, const float3 normal) { @@ -46,42 +46,36 @@ float3 offsetPosition(const float3 position, const float3 normal) return select(abs(position) < origin.xxx, posOffset, positionI); } -float3 transformDirection(const float3 direction, const float4x4 transform) -{ - // https://github.com/graphitemaster/normals_revisited - float4x4 result; -#define minor(m, r0, r1, r2, c0, c1, c2) \ - (m[c0][r0] * (m[c1][r1] * m[c2][r2] - m[c1][r2] * m[c2][r1]) \ - - m[c1][r0] * (m[c0][r1] * m[c2][r2] - m[c0][r2] * m[c2][r1]) \ - + m[c2][r0] * (m[c0][r1] * m[c1][r2] - m[c0][r2] * m[c1][r1])) - result[0][0] = minor(transform, 1, 2, 3, 1, 2, 3); - result[1][0] = -minor(transform, 1, 2, 3, 0, 2, 3); - result[2][0] = minor(transform, 1, 2, 3, 0, 1, 3); - result[3][0] = -minor(transform, 1, 2, 3, 0, 1, 2); - result[0][1] = -minor(transform, 0, 2, 3, 1, 2, 3); - result[1][1] = minor(transform, 0, 2, 3, 0, 2, 3); - result[2][1] = -minor(transform, 0, 2, 3, 0, 1, 3); - result[3][1] = minor(transform, 0, 2, 3, 0, 1, 2); - result[0][2] = minor(transform, 0, 1, 3, 1, 2, 3); - result[1][2] = -minor(transform, 0, 1, 3, 0, 2, 3); - result[2][2] = minor(transform, 0, 1, 3, 0, 1, 3); - result[3][2] = -minor(transform, 0, 1, 3, 0, 1, 2); - result[0][3] = -minor(transform, 0, 1, 2, 1, 2, 3); - result[1][3] = minor(transform, 0, 1, 2, 0, 2, 3); - result[2][3] = -minor(transform, 0, 1, 2, 0, 1, 3); - result[3][3] = minor(transform, 0, 1, 2, 0, 1, 2); - return mul(result, float4(direction, 0.0f)).xyz; -#undef minor // cleanup -} - +/** + * Determine the location within a 2D triangle using barycentric coordinates. + * @param v1 Triangles first vertex. + * @param v2 Triangles second vertex. + * @param v3 Triangles third vertex. + * @returns The position at the barycentric coordinates. + */ float2 interpolate(const float2 v0, const float2 v1, const float2 v2, const float2 barycentrics) { return (1.0f - barycentrics.x - barycentrics.y) * v0 + barycentrics.x * v1 + barycentrics.y * v2; } +/** + * Determine the location within a 3D triangle using barycentric coordinates. + * @param v1 Triangles first vertex. + * @param v2 Triangles second vertex. + * @param v3 Triangles third vertex. + * @returns The position at the barycentric coordinates. + */ float3 interpolate(const float3 v0, const float3 v1, const float3 v2, const float2 barycentrics) { return (1.0f - barycentrics.x - barycentrics.y) * v0 + barycentrics.x * v1 + barycentrics.y * v2; } +float2 CalculateMotionVector(float4 current_pos, float4 previous_pos) +{ + float2 current_uv = 0.5f * current_pos.xy / current_pos.w; + float2 previous_uv = 0.5f * previous_pos.xy / previous_pos.w; + + return (current_uv - previous_uv) * float2(1.0f, -1.0f); +} + #endif // GEOMETRY_HLSL diff --git a/src/core/src/geometry/intersection.hlsl b/src/core/src/geometry/intersection.hlsl new file mode 100644 index 0000000..1771d50 --- /dev/null +++ b/src/core/src/geometry/intersection.hlsl @@ -0,0 +1,118 @@ +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ + +#ifndef INTERSECTION_HLSL +#define INTERSECTION_HLSL + +/* +// Requires the following data to be defined in any shader that uses this file +StructuredBuffer g_InstanceBuffer; +StructuredBuffer g_MeshBuffer; +StructuredBuffer g_TransformBuffer; +StructuredBuffer g_IndexBuffer; +StructuredBuffer g_VertexBuffer; +StructuredBuffer g_MaterialBuffer; +RaytracingAccelerationStructure g_Scene; +uint g_FrameIndex; +*/ + +#include "geometry.hlsl" +#include "mesh.hlsl" +#include "ray_tracing.hlsl" +#include "../materials/materials.hlsl" +#include "../math/hash.hlsl" + +bool AlphaTest(HitInfo hit_info) +{ + // Get instance information for current object + Instance instance = g_InstanceBuffer[hit_info.instanceIndex]; + + // Get material + Material material = g_MaterialBuffer[instance.material_index]; + + // Check back facing + // We currently only check back facing on alpha flagged surfaces as a performance optimisation. For normal + // geometry we should never intersect the back side of any opaque objects due to visibility being occluded + // by the front of the object (situations were camera is inside an object is ignored). + if (!hit_info.frontFace && asuint(material.normal_alpha_side.z) == 0) + { + return false; + } + + // Get vertices + Mesh mesh = g_MeshBuffer[instance.mesh_index + hit_info.geometryIndex]; + TriangleUV vertices = fetchVerticesUV(mesh, hit_info.primitiveIndex); + + // Calculate UV coordinates + float2 uv = interpolate(vertices.uv0, vertices.uv1, vertices.uv2, hit_info.barycentrics); + MaterialAlpha mask = MakeMaterialAlpha(material, uv); + + // Check the alpha mask + return mask.alpha > 0.5f; +} + +#ifdef DISABLE_ALPHA_TESTING +typedef RayQuery ClosestRayQuery; +typedef RayQuery ShadowRayQuery; + +template +RayQueryType TraceRay(RayDesc incommingRay) +{ + RayQueryType ray_query; + ray_query.TraceRayInline(g_Scene, RAY_FLAG_NONE, 0xFFu, incommingRay); + while (ray_query.Proceed()) + { + } + + return ray_query; +} +#else // DISABLE_ALPHA_TESTING +typedef RayQuery ClosestRayQuery; +typedef RayQuery ShadowRayQuery; + +template +RayQueryType TraceRay(RayDesc incommingRay) +{ + RayQueryType ray_query; + ray_query.TraceRayInline(g_Scene, RAY_FLAG_NONE, 0xFFu, incommingRay); + while (ray_query.Proceed()) + { + if (ray_query.CandidateType() == CANDIDATE_NON_OPAQUE_TRIANGLE) + { + if (AlphaTest(GetHitInfoRtInlineCandidate(ray_query))) + { + ray_query.CommitNonOpaqueTriangleHit(); + } + } + else + { + // Should never get here as we don't support non-triangle geometry + // However if this conditional is removed the driver crashes + ray_query.Abort(); + } + } + + return ray_query; +} +#endif // DISABLE_ALPHA_TESTING + +#endif // INTERSECTION_HLSL diff --git a/src/core/src/geometry/mesh.hlsl b/src/core/src/geometry/mesh.hlsl new file mode 100644 index 0000000..aa065a2 --- /dev/null +++ b/src/core/src/geometry/mesh.hlsl @@ -0,0 +1,211 @@ +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ + +#ifndef MESH_HLSL +#define MESH_HLSL + +/* +// Requires the following data to be defined in any shader that uses this file +StructuredBuffer g_IndexBuffer; +StructuredBuffer g_VertexBuffer; +*/ + +#include "../gpu_shared.h" + +struct Triangle +{ + float3 v0; + float3 v1; + float3 v2; +}; + +struct TriangleUV +{ + float3 v0; + float3 v1; + float3 v2; + float2 uv0; + float2 uv1; + float2 uv2; +}; + +struct TriangleNorm +{ + float3 v0; + float3 v1; + float3 v2; + float3 n0; + float3 n1; + float3 n2; +}; + +struct TriangleNormUV +{ + float3 v0; + float3 v1; + float3 v2; + float3 n0; + float3 n1; + float3 n2; + float2 uv0; + float2 uv1; + float2 uv2; +}; + +struct UVs +{ + float2 uv0; + float2 uv1; + float2 uv2; +}; + +/** + * Fetch the vertices for a given triangle. + * @param mesh The mesh the triangle is located within. + * @param primitiveIndex The index of the primitive within the mesh. + * @return The triangle data. + */ +Triangle fetchVertices(Mesh mesh, uint primitiveIndex) +{ + // Get index buffer values + uint i0 = g_IndexBuffer[mesh.index_offset_idx + 3 * primitiveIndex + 0] + mesh.vertex_offset_idx; + uint i1 = g_IndexBuffer[mesh.index_offset_idx + 3 * primitiveIndex + 1] + mesh.vertex_offset_idx; + uint i2 = g_IndexBuffer[mesh.index_offset_idx + 3 * primitiveIndex + 2] + mesh.vertex_offset_idx; + + // Get vertex values from buffers + float3 v0 = g_VertexBuffer[i0].position.xyz; + float3 v1 = g_VertexBuffer[i1].position.xyz; + float3 v2 = g_VertexBuffer[i2].position.xyz; + + Triangle ret = {v0, v1, v2}; + return ret; +} + +/** + * Fetch the vertices and UVs for a given triangle. + * @param mesh The mesh the triangle is located within. + * @param primitiveIndex The index of the primitive within the mesh. + * @return The triangle data. + */ +TriangleUV fetchVerticesUV(Mesh mesh, uint primitiveIndex) +{ + // Get index buffer values + uint i0 = g_IndexBuffer[mesh.index_offset_idx + 3 * primitiveIndex + 0] + mesh.vertex_offset_idx; + uint i1 = g_IndexBuffer[mesh.index_offset_idx + 3 * primitiveIndex + 1] + mesh.vertex_offset_idx; + uint i2 = g_IndexBuffer[mesh.index_offset_idx + 3 * primitiveIndex + 2] + mesh.vertex_offset_idx; + + // Get vertex values from buffers + float3 v0 = g_VertexBuffer[i0].position.xyz; + float3 v1 = g_VertexBuffer[i1].position.xyz; + float3 v2 = g_VertexBuffer[i2].position.xyz; + + // Get UV values from buffers + float2 uv0 = g_VertexBuffer[i0].uv; + float2 uv1 = g_VertexBuffer[i1].uv; + float2 uv2 = g_VertexBuffer[i2].uv; + + TriangleUV ret = {v0, v1, v2, uv0, uv1, uv2}; + return ret; +} + +/** + * Fetch the vertices and normals for a given triangle. + * @param mesh The mesh the triangle is located within. + * @param primitiveIndex The index of the primitive within the mesh. + * @return The triangle data. + */ +TriangleNorm fetchVerticesNorm(Mesh mesh, uint primitiveIndex) +{ + // Get index buffer values + uint i0 = g_IndexBuffer[mesh.index_offset_idx + 3 * primitiveIndex + 0] + mesh.vertex_offset_idx; + uint i1 = g_IndexBuffer[mesh.index_offset_idx + 3 * primitiveIndex + 1] + mesh.vertex_offset_idx; + uint i2 = g_IndexBuffer[mesh.index_offset_idx + 3 * primitiveIndex + 2] + mesh.vertex_offset_idx; + + // Get vertex values from buffers + float3 v0 = g_VertexBuffer[i0].position.xyz; + float3 v1 = g_VertexBuffer[i1].position.xyz; + float3 v2 = g_VertexBuffer[i2].position.xyz; + + // Get normal values from buffers + float3 n0 = g_VertexBuffer[i0].normal.xyz; + float3 n1 = g_VertexBuffer[i1].normal.xyz; + float3 n2 = g_VertexBuffer[i2].normal.xyz; + + TriangleNorm ret = {v0, v1, v2, n0, n1, n2}; + return ret; +} + +/** + * Fetch the vertices, normals and UVs for a given triangle. + * @param mesh The mesh the triangle is located within. + * @param primitiveIndex The index of the primitive within the mesh. + * @return The triangle data. + */ +TriangleNormUV fetchVerticesNormUV(Mesh mesh, uint primitiveIndex) +{ + // Get index buffer values + uint i0 = g_IndexBuffer[mesh.index_offset_idx + 3 * primitiveIndex + 0] + mesh.vertex_offset_idx; + uint i1 = g_IndexBuffer[mesh.index_offset_idx + 3 * primitiveIndex + 1] + mesh.vertex_offset_idx; + uint i2 = g_IndexBuffer[mesh.index_offset_idx + 3 * primitiveIndex + 2] + mesh.vertex_offset_idx; + + // Get vertex values from buffers + float3 v0 = g_VertexBuffer[i0].position.xyz; + float3 v1 = g_VertexBuffer[i1].position.xyz; + float3 v2 = g_VertexBuffer[i2].position.xyz; + + // Get normal values from buffers + float3 n0 = g_VertexBuffer[i0].normal.xyz; + float3 n1 = g_VertexBuffer[i1].normal.xyz; + float3 n2 = g_VertexBuffer[i2].normal.xyz; + + // Get UV values from buffers + float2 uv0 = g_VertexBuffer[i0].uv; + float2 uv1 = g_VertexBuffer[i1].uv; + float2 uv2 = g_VertexBuffer[i2].uv; + + TriangleNormUV ret = {v0, v1, v2, n0, n1, n2, uv0, uv1, uv2}; + return ret; +} + +/** + * Fetch the UVs for a given triangle. + * @param mesh The mesh the triangle is located within. + * @param primitiveIndex The index of the primitive within the mesh. + * @return The triangle data. + */ +UVs fetchUVs(Mesh mesh, uint primitiveIndex) +{ + // Get index buffer values + uint i0 = g_IndexBuffer[mesh.index_offset_idx + 3 * primitiveIndex + 0] + mesh.vertex_offset_idx; + uint i1 = g_IndexBuffer[mesh.index_offset_idx + 3 * primitiveIndex + 1] + mesh.vertex_offset_idx; + uint i2 = g_IndexBuffer[mesh.index_offset_idx + 3 * primitiveIndex + 2] + mesh.vertex_offset_idx; + + // Get UV values from buffers + float2 uv0 = g_VertexBuffer[i0].uv; + float2 uv1 = g_VertexBuffer[i1].uv; + float2 uv2 = g_VertexBuffer[i2].uv; + + UVs ret = {uv0, uv1, uv2}; + return ret; +} + +#endif // MESH_HLSL diff --git a/src/core/src/geometry/mis.hlsl b/src/core/src/geometry/mis.hlsl new file mode 100644 index 0000000..710b891 --- /dev/null +++ b/src/core/src/geometry/mis.hlsl @@ -0,0 +1,95 @@ +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ + +#ifndef MIS_HLSL +#define MIS_HLSL + +/** + * Balanced heuristic used in MIS weight calculation. + * @param fPDF The PDF of the sampled value. + * @param gPDF The PDF of the MIS weighting value. + * @return The calculated weight. + */ +float balanceHeuristic(float fPDF, float gPDF) +{ + return fPDF / (fPDF + gPDF); +} + +/** + * Balanced heuristic used in MIS weight calculation over 3 input functions. + * @param fPDF The PDF of the sampled value. + * @param gPDF The PDF of the MIS weighting value. + * @param hPDF The PDF of the second MIS weighting value. + * @return The calculated weight. + */ +float balanceHeuristic(float fPDF, float gPDF, float hPDF) +{ + return fPDF / (fPDF + gPDF + gPDF); +} + +/** + * Power heuristic used in MIS weight calculation. + * @param fPDF The PDF of the sampled value. + * @param gPDF The PDF of the MIS weighting value. + * @return The calculated weight. + */ +float powerHeuristic(float fPDF, float gPDF) +{ + return (fPDF * fPDF) / (fPDF * fPDF + gPDF * gPDF); +} + +/** + * Power heuristic used in MIS weight calculation over 3 input functions. + * @param fPDF The PDF of the sampled value. + * @param gPDF The PDF of the MIS weighting value. + * @return The calculated weight. + */ +float powerHeuristic(float fPDF, float gPDF, float hPDF) +{ + return (fPDF * fPDF) / (fPDF * fPDF + gPDF * gPDF + hPDF * hPDF); +} + +/** + * Heuristic used in MIS weight calculation. + * @param fPDF The PDF of the sampled value. + * @param gPDF The PDF of the MIS weighting value. + * @return The calculated weight. + */ +float heuristicMIS(float fPDF, float gPDF) +{ + return balanceHeuristic(fPDF, gPDF); + //return powerHeuristic(fPDF, gPDF); +} + +/** + * Heuristic used in MIS weight calculation over 3 input functions. + * @param fPDF The PDF of the sampled value. + * @param gPDF The PDF of the MIS weighting value. + * @return The calculated weight. + */ +float heuristicMIS(float fPDF, float gPDF, float hPDF) +{ + return balanceHeuristic(fPDF, gPDF, hPDF); + //return powerHeuristic(fPDF, gPDF, hPDF); +} + +#endif // MIS_HLSL diff --git a/src/core/src/geometry/path_tracing.hlsl b/src/core/src/geometry/path_tracing.hlsl new file mode 100644 index 0000000..f42c3c1 --- /dev/null +++ b/src/core/src/geometry/path_tracing.hlsl @@ -0,0 +1,583 @@ +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ + +#ifndef PATH_TRACING_HLSL +#define PATH_TRACING_HLSL + +#include "path_tracing_shared.h" + +#include "../components/stratified_sampler/stratified_sampler.hlsl" +#include "../geometry/intersection.hlsl" +#include "../geometry/mis.hlsl" +#include "../geometry/ray_intersection.hlsl" +#include "../materials/material_sampling.hlsl" +#include "../math/transform.hlsl" +#include "../math/random.hlsl" + +#ifndef USE_INLINE_RT +#define USE_INLINE_RT 1 +#endif + +/** + * The default payload for shading functions is just a standard float3 radiance. + * However if USE_CUSTOM_HIT_FUNCTIONS is defined by any code including this header then instead + * the payload will be the user supplied CustomPayLoad struct. It is also expected that if defined the + * user must also provide shadePathMissCustom, shadePathHitCustom and shadeLightHit functions to + * be used in place of the defaults. + */ +#ifdef USE_CUSTOM_HIT_FUNCTIONS +typedef CustomPayLoad pathPayload; +#else +typedef float3 pathPayload; +#endif + +struct ShadowRayPayload +{ + bool visible; +}; + +struct PathData +{ + LightSampler lightSampler; /**< Sampler used for lighting (essentially just wraps a random number generator) */ + StratifiedSampler randomStratified; /**< Stratified random number generator instance */ + float3 throughput; /**< Accumulated ray throughput for current path segment */ + pathPayload radiance; /**< Accumulated radiance for the current path segment */ + float samplePDF; /**< The PDF of the last sampled BRDF */ + float3 normal; /**< The surface normal at the location the current path originated from */ +#ifdef ENABLE_NEE_RESERVOIR_SAMPLING + float3 sampleReflectance; /**< The evaluated material at the point at the start of the path */ +#endif + uint bounce; /**< Bounce depth of current path segment */ + float3 origin; /**< Return value for new path segment start location */ + float3 direction; /**< Return value for new path segment direction */ + bool terminated; /**< Return value to indicated current paths terminates */ +}; + +/** + * Calculate any radiance from a missed path segment. + * @param ray The traced ray that missed any surfaces. + * @param currentBounce The current number of bounces along path for current segment. + * @param lightSampler Light sampler. + * @param normal Shading normal vector at start of path segment. + * @param samplePDF The PDF of sampling the current paths direction. + * @param throughput The current paths combined throughput. + * @param [in,out] radiance The combined radiance. Any new radiance is added to the existing value and returned. + */ +void shadePathMiss(RayDesc ray, uint currentBounce, inout LightSampler lightSampler, float3 normal, float samplePDF, + float3 throughput, +#ifdef ENABLE_NEE_RESERVOIR_SAMPLING + float3 sampleReflectance, +#endif + inout float3 radiance) +{ +#if !defined(DISABLE_NON_NEE) && !defined(DISABLE_ENVIRONMENT_LIGHTS) +# ifdef DISABLE_DIRECT_LIGHTING + if (currentBounce == 1) return; +# endif // DISABLE_DIRECT_LIGHTING + if (hasEnvironmentLight()) + { + // If nothing was hit then load the environment map + LightEnvironment light = getEnvironmentLight(); + float3 lightRadiance = evaluateEnvironmentLight(light, ray.Direction); + if (currentBounce != 0) + { + // Add lighting contribution +# ifndef DISABLE_NEE + // Account for light contribution along sampled direction + float lightPDF = sampleEnvironmentLightPDF(light, ray.Direction, float3(0.0f.xxx)); + lightPDF *= lightSampler.sampleLightPDF(0, ray.Origin, normal); +# ifdef ENABLE_NEE_RESERVOIR_SAMPLING + float weight = luminance(sampleReflectance * lightRadiance); // This must match Reservoir_EvaluateTargetPdf + lightPDF = (weight != 0.0f)? lightPDF / weight : 0.0f; +# endif + if (lightPDF != 0.0f) + { + float weight = heuristicMIS(samplePDF, lightPDF); + radiance += throughput * lightRadiance * weight.xxx; + } +# else // !DISABLE_NON_NEE + radiance += throughput * lightRadiance; +# endif // !DISABLE_NON_NEE + } + else + { + radiance += throughput * lightRadiance; + } + } +#endif // !DISABLE_NON_NEE && !DISABLE_ENVIRONMENT_LIGHTS +} + +/** + * Calculate any radiance from a hit path segment. + * @param ray The traced ray that hit a surface. + * @param hitData Data associated with the hit surface. + * @param iData Retrieved data associated with the hit surface. + * @param lightSampler Light sampler. + * @param currentBounce The current number of bounces along path for current segment. + * @param normal Shading normal vector at start of path segment (Only valid if bounce > 0). + * @param samplePDF The PDF of sampling the current paths direction. + * @param throughput The current paths combined throughput. + * @param [in,out] radiance The combined radiance. Any new radiance is added to the existing value and returned. + */ +void shadePathHit(RayDesc ray, HitInfo hitData, IntersectData iData, inout LightSampler lightSampler, + uint currentBounce, float3 normal, float samplePDF, float3 throughput, +#ifdef ENABLE_NEE_RESERVOIR_SAMPLING + float3 sampleReflectance, +#endif + inout float3 radiance) +{ +#if !defined(DISABLE_NON_NEE) && !defined(DISABLE_AREA_LIGHTS) +# ifdef DISABLE_DIRECT_LIGHTING + if (currentBounce == 1) {/*ignore emissive hit*/} else { +# endif // DISABLE_DIRECT_LIGHTING + + // Get material emissive values + MaterialEmissive materialEmissive = MakeMaterialEmissive(iData.material, iData.uv); + if (any(materialEmissive.emissive > 0.0f)) + { + // Get light contribution + float3 lightRadiance = materialEmissive.emissive; + if (currentBounce != 0) + { + // Account for light contribution along sampled direction +# ifndef DISABLE_NEE + // Get material properties at intersection + LightArea emissiveLight = MakeLightArea(iData.vertex0, iData.vertex1, iData.vertex2, + // The following parameters are irrelevant for calculating PDF + 0.0f.xxxx, 0.0f, 0.0f, 0.0f); + + float lightPDF = sampleAreaLightPDF(emissiveLight, ray.Origin, iData.position); + lightPDF *= lightSampler.sampleLightPDF(getAreaLightIndex(hitData.instanceIndex, hitData.primitiveIndex), ray.Origin, normal); +# ifdef ENABLE_NEE_RESERVOIR_SAMPLING + float3 newRadiance = evaluateAreaLight(emissiveLight, iData.barycentrics); + float weight = luminance(sampleReflectance * newRadiance); // This must match Reservoir_EvaluateTargetPdf + lightPDF = (weight != 0.0f)? lightPDF / weight : 0.0f; +# endif + if (lightPDF != 0.0f) + { + float weight = heuristicMIS(samplePDF, lightPDF); + radiance += throughput * lightRadiance * weight.xxx; + } +#else // !DISABLE_NON_NEE + radiance += throughput * lightRadiance; +#endif // !DISABLE_NON_NEE + } + else + { + radiance += throughput * lightRadiance; + } + } +# ifdef DISABLE_DIRECT_LIGHTING + } +# endif // DISABLE_DIRECT_LIGHTING +#endif // !DISABLE_NON_NEE && !DISABLE_AREA_LIGHTS +} + +/** + * Calculate any radiance from a hit path segment. + * @param ray The traced ray that hit a surface. + * @param material Material data describing BRDF of surface. + * @param normal Shading normal vector at current position. + * @param viewDirection Outgoing ray view direction. + * @param throughput The current paths combined throughput. + * @param lightPDF The PDF of sampling the returned light direction. + * @param radianceLi The radiance visible along sampled light. + * @param selectedLight The light that was selected for sampling. + * @param [in,out] radiance The combined radiance. Any new radiance is added to the existing value and returned. + */ +void shadeLightHit(RayDesc ray, MaterialBRDF material, float3 normal, float3 viewDirection, float3 throughput, + float lightPDF, float3 radianceLi, Light selectedLight, inout float3 radiance) +{ +#ifdef DISABLE_NON_NEE + float3 sampleReflectance = evaluateBRDF(material, normal, viewDirection, ray.Direction); + radiance += throughput * sampleReflectance * radianceLi / lightPDF.xxx; +#else + // Evaluate BRDF for new light direction and calculate combined PDF for current sample + float3 sampleReflectance; + float samplePDF = sampleBRDFPDFAndEvalute(material, normal, viewDirection, ray.Direction, sampleReflectance); + if (samplePDF != 0.0f) + { + bool deltaLight = isDeltaLight(selectedLight); + float weight = (!deltaLight) ? heuristicMIS(lightPDF, samplePDF) : 1.0f; + radiance += throughput * sampleReflectance * radianceLi * (weight / lightPDF).xxx; + } +#endif // DISABLE_NON_NEE +} + +/** + * Calculates a new light ray direction from a surface by sampling the scenes lighting. + * @tparam RNG The type of random number sampler to be used. + * @param material Material data describing BRDF of surface. + * @param randomStratified Random number sampler used to sample light. + * @param lightSampler Light sampler. + * @param position Current position on surface. + * @param normal Shading normal vector at current position. + * @param geometryNormal Surface normal vector at current position. + * @param viewDirection Outgoing ray view direction. + * @param ray (Out) The ray containing the new light ray parameters. + * @param lightPDF (Out) The PDF of sampling the returned light direction. + * @param radianceLi (Out) The radiance visible along sampled light. + * @param selectedLight (Out) The light that was selected for sampling. + * @return True if light path was generated, False if no ray returned. + */ +bool sampleLightsNEEDirection(MaterialBRDF material, inout StratifiedSampler randomStratified, LightSampler lightSampler, + float3 position, float3 normal, float3 geometryNormal, float3 viewDirection, out RayDesc ray, out float lightPDF, out float3 radianceLi, out Light selectedLight) +{ +#ifdef ENABLE_NEE_RESERVOIR_SAMPLING + Reservoir res = lightSampler.sampleLightList<8>(position, normal, viewDirection, material); + uint lightIndex = res.lightSample.index; + lightPDF = (res.W != 0.0f) ? rcp(res.W) : 0.0f; // Need to invert so it can be used in MIS +#else + uint lightIndex = lightSampler.sampleLights(position, normal, lightPDF); +#endif + + if (lightPDF == 0.0f) + { + return false; + } + + // Initialise returned radiance + float3 lightPosition; + float3 lightDirection; + selectedLight = getLight(lightIndex); +#ifdef ENABLE_NEE_RESERVOIR_SAMPLING + radianceLi = evaluateLightSampled(selectedLight, position, res.lightSample.sampleParams, lightDirection, lightPosition); +#else + float sampledLightPDF; + float2 unused; + radianceLi = sampleLight(selectedLight, randomStratified, position, normal, lightDirection, sampledLightPDF, lightPosition, unused); + + // Combine PDFs + lightPDF *= sampledLightPDF; +#endif + + // Early discard lights behind surface + if (dot(lightDirection, geometryNormal) < 0.0f || dot(lightDirection, normal) < 0.0f || lightPDF == 0.0f) + { + return false; + } + + // Create shadow ray + ray.Origin = position; + ray.Direction = lightDirection; + ray.TMin = 0.0f; + ray.TMax = hasLightPosition(selectedLight) ? length(lightPosition - position) : FLT_MAX; + return true; +} + +/** + * Calculates radiance from a new light ray direction from a surface by sampling the scenes lighting. + * @tparam RNG The type of random number sampler to be used. + * @param material Material data describing BRDF of surface. + * @param randomStratified Random number sampler used to sample light. + * @param lightSampler Light sampler. + * @param position Current position on surface. + * @param normal Shading normal vector at current position. + * @param geometryNormal Surface normal vector at current position. + * @param viewDirection Outgoing ray view direction. + * @param throughput The current paths combined throughput. + * @param [in,out] radiance The combined radiance. Any new radiance is added to the existing value and returned. + */ +void sampleLightsNEE(MaterialBRDF material, inout StratifiedSampler randomStratified, LightSampler lightSampler, + float3 position, float3 normal, float3 geometryNormal, float3 viewDirection, float3 throughput, inout pathPayload radiance) +{ + // Get sampled light direction + float lightPDF; + RayDesc ray; + float3 radianceLi; + Light selectedLight; + if (!sampleLightsNEEDirection(material, randomStratified, lightSampler, position, normal, geometryNormal, viewDirection, ray, lightPDF, radianceLi, selectedLight)) + { + return; + } + + // Trace shadow ray +#if USE_INLINE_RT + ShadowRayQuery rayShadowQuery = TraceRay(ray); + bool hit = rayShadowQuery.CommittedStatus() == COMMITTED_NOTHING; +#else + ShadowRayPayload payload = {false}; + TraceRay(g_Scene, SHADOW_RAY_FLAGS, 0xFFu, 1, 0, 1, ray, payload); + bool hit = payload.visible; +#endif + + // If nothing was hit then we have hit the light + if (hit) + { + // Add lighting contribution +#ifdef USE_CUSTOM_HIT_FUNCTIONS + shadeLightHitCustom(ray, material, normal, viewDirection, throughput, lightPDF, radianceLi, selectedLight, radiance); +#else + shadeLightHit(ray, material, normal, viewDirection, throughput, lightPDF, radianceLi, selectedLight, radiance); +#endif + } +} + +/** + * Calculate the next segment along a path after a valid surface hit. + * @param materialBRDF The material on the hit surface. + * @param randomStratified Random number sampler used for sampling. + * @param lightSampler Light sampler. + * @param currentBounce The current number of bounces along path for current segment. + * @param minBounces The minimum number of allowed bounces along path segment before termination. + * @param maxBounces The maximum number of allowed bounces along path segment. + * @param normal Shading normal vector at current position. + * @param geometryNormal Surface normal vector at current position. + * @param viewDirection Outgoing ray view direction. + * @param [in,out] throughput Combined throughput for current path. + * @param [out] rayDirection New outgoing path segment direction. + * @param [out] samplePDF The PDF of sampling the new paths direction. + * @return True if path has new segment, False if path should be terminated. + */ +bool pathNext(MaterialBRDF materialBRDF, inout StratifiedSampler randomStratified, + inout LightSampler lightSampler, uint currentBounce, uint minBounces, uint maxBounces, float3 normal, + float3 geometryNormal, float3 viewDirection, inout float3 throughput, out float3 rayDirection, out float samplePDF +#ifdef ENABLE_NEE_RESERVOIR_SAMPLING + , out float3 sampleReflectance +#endif +) +{ + // Sample BRDF to get next ray direction +#ifndef ENABLE_NEE_RESERVOIR_SAMPLING + float3 sampleReflectance; +#endif + rayDirection = sampleBRDF(materialBRDF, randomStratified, normal, viewDirection, sampleReflectance, samplePDF); + + // Prevent tracing directions below the surface + if (dot(geometryNormal, rayDirection) <= 0.0f || samplePDF == 0.0f) + { + return false; + } + + // Add sampling weight to current weight + throughput *= sampleReflectance / samplePDF.xxx; + + // Russian Roulette early termination + if (currentBounce > minBounces) + { + float rrSample = hmax(throughput); + if (rrSample <= lightSampler.randomNG.rand()) + { + return false; + } + throughput /= rrSample.xxx; + } + return true; +} + +/** + * Handle case when a traced ray hits a surface. + * @param ray The traced ray that hit a surface. + * @param hitData Data associated with the hit surface. + * @param iData Retrieved data associated with the hit surface. + * @param randomStratified Random number sampler used for sampling. + * @param lightSampler Light sampler. + * @param currentBounce The current number of bounces along path for current segment. + * @param minBounces The minimum number of allowed bounces along path segment before termination. + * @param maxBounces The maximum number of allowed bounces along path segment. + * @param [in,out] normal Shading normal vector at path segments origin (returns shading normal at current position). + * @param [in,out] samplePDF The PDF of sampling the current path segments direction (returns the PDF of sampling the new paths direction). + * @param [in,out] throughput Combined throughput for current path. + * @param [in,out] radiance The visible radiance contribution of the path hit. + * @return True if path has new segment, False if path should be terminated. + */ +bool pathHit(inout RayDesc ray, HitInfo hitData, IntersectData iData, inout StratifiedSampler randomStratified, + inout LightSampler lightSampler, uint currentBounce, uint minBounces, uint maxBounces, inout float3 normal, + inout float samplePDF, inout float3 throughput, +#ifdef ENABLE_NEE_RESERVOIR_SAMPLING + inout float3 sampleReflectance, +#endif + inout pathPayload radiance) +{ + // Shade current position +#ifdef USE_CUSTOM_HIT_FUNCTIONS + shadePathHitCustom(ray, hitData, iData, lightSampler, currentBounce, normal, samplePDF, throughput, +#else + shadePathHit(ray, hitData, iData, lightSampler, currentBounce, normal, samplePDF, throughput, +#endif +#ifdef ENABLE_NEE_RESERVOIR_SAMPLING + sampleReflectance, +#endif + radiance); + + // Terminate early if no more bounces + if (currentBounce == maxBounces) + { + return false; + } + + float3 viewDirection = -ray.Direction; + // Stop if surface normal places ray behind surface (note surface normal != geometric normal) + // Currently disabled due to incorrect normals generated by normal mapping when not using displacement/parallax + //if (dot(iData.normal, viewDirection) <= 0.0f) + //{ + // return false; + //} + + // Offset the intersection position to prevent self intersection on generated rays + float3 offsetOrigin = offsetPosition(iData.position, iData.geometryNormal); + + MaterialBRDF materialBRDF = MakeMaterialBRDF(iData.material, iData.uv); +#ifdef DISABLE_ALBEDO_MATERIAL + // Disable material albedo if requested + if (currentBounce == 0) + { + materialBRDF.albedo = 0.3f.xxx; +# ifndef DISABLE_SPECULAR_MATERIALS + materialBRDF.F0 = 0.0f.xxx; +# endif // !DISABLE_SPECULAR_MATERIALS + } +#endif // DISABLE_ALBEDO_MATERIAL + +#ifndef DISABLE_NEE +# ifdef DISABLE_DIRECT_LIGHTING + // Disable direct lighting if requested + if (currentBounce > 0) +# endif // DISABLE_DIRECT_LIGHTING + { + // Sample a single light + sampleLightsNEE(materialBRDF, randomStratified, lightSampler, offsetOrigin, + iData.normal, iData.geometryNormal, viewDirection, throughput, radiance); + } +#endif // DISABLE_NEE + + // Sample BRDF to get next ray direction + float3 rayDirection; + bool ret = pathNext(materialBRDF, randomStratified, lightSampler, currentBounce, minBounces, maxBounces, + iData.normal, iData.geometryNormal, viewDirection, throughput, rayDirection, samplePDF +#ifdef ENABLE_NEE_RESERVOIR_SAMPLING + , sampleReflectance +#endif + ); + + // Update path information + ray.Origin = offsetOrigin; + ray.Direction = rayDirection; + ray.TMin = 0.0f; + ray.TMax = FLT_MAX; + normal = iData.normal; + return ret; +} + +/** + * Trace a new path. + * @param ray The ray for the first path segment. + * @param randomStratified Random number sampler used for sampling. + * @param lightSampler Light sampler. + * @param currentBounce The current number of bounces along path for current segment. + * @param minBounces The minimum number of allowed bounces along path segment before termination. + * @param maxBounces The maximum number of allowed bounces along path segment. + * @param normal The shading normal of the current surface (Only valid if bounce > 0). + * @param throughput Initial combined throughput for current path. + * @param [in,out] radiance The visible radiance contribution of the path hit. + */ +void tracePath(RayDesc ray, inout StratifiedSampler randomStratified, inout LightSampler lightSampler, + uint currentBounce, uint minBounces, uint maxBounces, float3 normal, float3 throughput, inout pathPayload radiance) +{ + // Initialise per-sample path tracing values +#if USE_INLINE_RT + float samplePDF = 1.0f; // The PDF of the last sampled BRDF +# ifdef ENABLE_NEE_RESERVOIR_SAMPLING + float3 sampleReflectance = 0.0f; +# endif +#else + PathData pathData; + pathData.radiance = radiance; + pathData.throughput = throughput; + pathData.samplePDF = 1.0f; + pathData.terminated = false; + pathData.lightSampler = lightSampler; + pathData.randomStratified = randomStratified; +#endif + + for (uint bounce = currentBounce; bounce <= maxBounces; ++bounce) + { + // Trace the ray through the scene +#if USE_INLINE_RT + ClosestRayQuery rayQuery = TraceRay(ray); + + // Check for valid intersection + if (rayQuery.CommittedStatus() == COMMITTED_NOTHING) + { +# ifdef USE_CUSTOM_HIT_FUNCTIONS + shadePathMissCustom(ray, bounce, lightSampler, normal, samplePDF, throughput +# else + shadePathMiss(ray, bounce, lightSampler, normal, samplePDF, throughput +# endif +# ifdef ENABLE_NEE_RESERVOIR_SAMPLING + , sampleReflectance +# endif + , radiance); + break; + } + else + { + // Get the intersection data + HitInfo hitData = GetHitInfoRtInlineCommitted(rayQuery); + IntersectData iData = MakeIntersectData(hitData); + if (!pathHit(ray, hitData, iData, randomStratified, lightSampler, + bounce, minBounces, maxBounces, normal, samplePDF, throughput +# ifdef ENABLE_NEE_RESERVOIR_SAMPLING + , sampleReflectance +# endif + , radiance)) + { + break; + } + } +#else + pathData.bounce = bounce; + TraceRay(g_Scene, CLOSEST_RAY_FLAGS, 0xFFu, 0, 0, 0, ray, pathData); + // Create new ray + ray.Origin = pathData.origin; + ray.Direction = pathData.direction; + ray.TMin = 0.0f; + ray.TMax = FLT_MAX; + + if (pathData.terminated) + { + break; + } +#endif + } + +#if !USE_INLINE_RT + radiance = pathData.radiance; +#endif +} + +/** + * Trace a new path from beginning. + * @param ray The ray for the first path segment. + * @param randomStratified Random number sampler used for sampling. + * @param lightSampler Light sampler. + * @param minBounces The minimum number of allowed bounces along path segment before termination. + * @param maxBounces The maximum number of allowed bounces along path segment. + * @param [in,out] radiance The visible radiance contribution of the path hit. + */ +void traceFullPath(RayDesc ray, inout StratifiedSampler randomStratified, inout LightSampler lightSampler, + uint minBounces, uint maxBounces, inout pathPayload radiance) +{ + tracePath(ray, randomStratified, lightSampler, 0, minBounces, maxBounces, 0.0f.xxx, 1.0f.xxx, radiance); +} + +#endif // PATH_TRACING_HLSL diff --git a/src/core/src/geometry/path_tracing_shared.h b/src/core/src/geometry/path_tracing_shared.h new file mode 100644 index 0000000..df190b4 --- /dev/null +++ b/src/core/src/geometry/path_tracing_shared.h @@ -0,0 +1,142 @@ +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ + +#ifndef PATH_TRACING_SHARED_H +#define PATH_TRACING_SHARED_H + +#include "../gpu_shared.h" + +#ifdef __cplusplus +# pragma warning(push) +# pragma warning(disable : 4324) // structure was padded due to alignment specifier +#endif +struct RayCamera +{ + float3 origin; /**< The ray starting position */ + float3 directionTL; /**< The direction to the top left of the virtual screen */ + float3 directionX; /**< The virtual screens horizontal direction (length of 1 pixel - left->right)*/ + float3 directionY; /**< The virtual screens vertical direction (length of 1 pixel - top->bottom)*/ + float2 range; /**< The rays near and far distances */ +}; + +struct Camera +{ + float3 origin; + float3 lookAt; + float3 up; + + float aspect; + float fovY; + float nearZ; + float farZ; +}; +#ifdef __cplusplus +# pragma warning(pop) +#endif + +/* + * Converts a camera description to corresponding ray generation camera format. + * @param camera The camera to convert. + * @param width The screen width. + * @param height The screen height. + * @returns The created ray camera. + */ +static inline RayCamera caclulateRayCamera(Camera camera, uint32_t width, uint32_t height) +{ + float3 origin = camera.origin; + float2 range = float2(camera.nearZ, camera.farZ); + + // Get the size of the screen in the X and Y screen direction + float size = tan(camera.fovY / 2.0f); + size *= range.x; + float sizeHalfX = size * camera.aspect; + float sizeHalfY = size; + + // Generate view direction + float3 forward = camera.lookAt - origin; + forward = normalize(forward); + // Generate proper horizontal direction + float3 right = cross(forward, camera.up); + right = normalize(right); + // Generate proper up direction + float3 down = cross(forward, right); + // Normalize vectors + down = normalize(down); + + // Set each of the camera vectors to an orthonormal basis + float3 directionX = right; + float3 directionY = down; + float3 directionZ = forward; + + // Get weighted distance vector + directionZ = directionZ * range.x; + + // Get the Scaled Horizontal and up vectors + directionX *= sizeHalfX; + directionY *= sizeHalfY; + + // Offset the direction vector + float3 directionTL = directionZ - directionX - directionY; + + // Scale the direction X and Y vectors from half size + directionX += directionX; + directionY += directionY; + + // Scale the X and Y vectors to be pixel length + directionX /= (float)width; + directionY /= (float)height; + + RayCamera ret = {origin, directionTL, directionX, directionY, range}; + return ret; +} + +#ifndef __cplusplus +/** + * Generate a primary ray originating from the camera for a given pixel. + * @param pixel Requested pixel (pixel center is at 0.5 +-0.5) + * @param rayCamera Camera raytracing parameters. + * @return The generated ray. + */ +RayDesc generateCameraRay(float2 pixel, in RayCamera rayCamera) +{ + // Setup the ray + RayDesc ray; + + // Get direction from origin to current pixel in screen plane + float3 direction = + (pixel.x * rayCamera.directionX) + (pixel.y * rayCamera.directionY) + rayCamera.directionTL; + + // Set the ray origin + ray.Origin = rayCamera.origin; + + // Compute the ray direction for this pixel + ray.Direction = normalize(direction); + + // Get adjusted range values + ray.TMin = rayCamera.range.x; + ray.TMax = rayCamera.range.y; + + return ray; +} +#endif + +#endif // PATH_TRACING_SHARED_H diff --git a/src/core/src/geometry/ray_intersection.hlsl b/src/core/src/geometry/ray_intersection.hlsl new file mode 100644 index 0000000..0027817 --- /dev/null +++ b/src/core/src/geometry/ray_intersection.hlsl @@ -0,0 +1,174 @@ +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ + +#ifndef RAY_INTERSECTION_HLSL +#define RAY_INTERSECTION_HLSL + +/* +// Requires the following data to be defined in any shader that uses this file +StructuredBuffer g_InstanceBuffer; +StructuredBuffer g_MeshBuffer; +StructuredBuffer g_TransformBuffer; +StructuredBuffer g_IndexBuffer; +StructuredBuffer g_VertexBuffer; +StructuredBuffer g_MaterialBuffer; + +TextureCube g_EnvironmentBuffer; +Texture2D g_TextureMaps[]; +SamplerState g_TextureSampler; // Should be a linear sampler +*/ + +#include "../materials/materials.hlsl" +#include "../geometry/geometry.hlsl" +#include "../geometry/mesh.hlsl" +#include "../geometry/ray_tracing.hlsl" +#include "../math/transform.hlsl" + +/** Data representing a surface intersection with full details */ +struct IntersectData +{ + Material material; /**< The material associated with intersected surface */ + float2 uv; /**< The texture UV values at intersected position */ + float3 vertex0; /**< The surface triangles first vertex */ + float3 vertex1; /**< The surface triangles second vertex */ + float3 vertex2; /**< The surface triangles third vertex */ + float2 uv0; /**< The uv coordinate at the first vertex */ + float2 uv1; /**< The uv coordinate at the second vertex */ + float2 uv2; /**< The uv coordinate at the third vertex */ + float3 position; /**< The ray intersection location */ + float3 normal; /**< The shading normal at position */ + float3 geometryNormal; /**< The normal of actual intersected triangle at position */ + float2 barycentrics; /**< The barycentric coordinates within the intersected primitive */ +}; + +/** + * Determine the complete intersection data for a ray hit. + * @param hitData The intersection information for a ray hit. + * @return The data associated with the intersection. + */ +IntersectData MakeIntersectData(HitInfo hitData) +{ + // Get instance information for current object + Instance instance = g_InstanceBuffer[hitData.instanceIndex]; + Mesh mesh = g_MeshBuffer[instance.mesh_index + hitData.geometryIndex]; + float3x4 transform = g_TransformBuffer[instance.transform_index]; + float3x3 normalTransform = getNormalTransform((float3x3)transform); + + // Fetch vertex data + TriangleNormUV triData = fetchVerticesNormUV(mesh, hitData.primitiveIndex); + + IntersectData iData; + // Set material + iData.material = g_MaterialBuffer[instance.material_index]; + // Calculate UV coordinates + iData.uv = interpolate(triData.uv0, triData.uv1, triData.uv2, hitData.barycentrics); + // Add vertex information needed for lights + iData.vertex0 = transformPoint(triData.v0, transform).xyz; + iData.vertex1 = transformPoint(triData.v1, transform).xyz; + iData.vertex2 = transformPoint(triData.v2, transform).xyz; + iData.uv0 = triData.uv0; + iData.uv1 = triData.uv1; + iData.uv2 = triData.uv2; + // Calculate intersection position + iData.position = interpolate(iData.vertex0, iData.vertex1, iData.vertex2, hitData.barycentrics); + // Calculate geometry normal (assume CCW winding) + float3 edge10 = triData.v1 - triData.v0; + float3 edge20 = triData.v2 - triData.v0; + float3 localGeometryNormal = cross(edge10, edge20) * (hitData.frontFace ? 1.0f : -1.0f); + // Calculate shading normal + float3 normal = interpolate(triData.n0, triData.n1, triData.n2, hitData.barycentrics) * (hitData.frontFace ? 1.0f : -1.0f); + iData.normal = normal; + // Check for normal mapping + uint normalTex = asuint(iData.material.normal_alpha_side.x); + if (normalTex != uint(-1)) + { + // Get normal from texture map + float3 normalTan = 2.0f * g_TextureMaps[NonUniformResourceIndex(normalTex)].SampleLevel(g_TextureSampler, iData.uv, 0.0f).xyz - 1.0f; + normal = normalize(normal); + // Ensure normal is in same hemisphere as geometry normal (This is required when non-uniform negative(mirrored) scaling is applied to a backface surface) + normal = dot(normal, normalize(localGeometryNormal)) >= 0.0f ? normal : -normal; + + // Calculate tangent and bi-tangent basis vectors + float2 edgeUV1 = triData.uv1 - triData.uv0; + float2 edgeUV2 = triData.uv2 - triData.uv0; + float determinate = edgeUV1.x * edgeUV2.y - edgeUV1.y * edgeUV2.x; + // If the determinate is zero then the matrix is non invertable + if (determinate != 0.0f && dot(normalTan, normalTan) > 0.0f) + { + determinate = rcp(determinate); + float3 tangentBasis = (edge10 * edgeUV2.yyy - edge20 * edgeUV1.yyy) * determinate; + float3 bitangentBasis = (edge20 * edgeUV1.xxx - edge10 * edgeUV2.xxx) * determinate; + + // Gram-Schmidt orthogonalise tangent + float3 tangent = normalize(tangentBasis - normal * dot(normal, tangentBasis)); + float3 bitangent = cross(normal, tangent); + + // Correct handedness + bitangent = dot(bitangent, bitangentBasis) >= 0.0f ? -bitangent : bitangent; + + // Convert from tangent space + float3x3 tbn = float3x3(tangent, bitangent, normal); + iData.normal = mul(normalTan, tbn); + } + } + iData.geometryNormal = normalize(mul(normalTransform, localGeometryNormal)); + iData.normal = normalize(mul(normalTransform, iData.normal)); + iData.barycentrics = hitData.barycentrics; + return iData; +} + +/** + * Determine the complete intersection data for a ray hit using visibility buffer information. + * @param hitData The intersection information for a ray hit. + * @return The data associated with the intersection. + */ +IntersectData MakeIntersectData(HitInfo hitData, float3 geometryNormal, float3 shadingNormal) +{ + // Get instance information for current object + Instance instance = g_InstanceBuffer[hitData.instanceIndex]; + Mesh mesh = g_MeshBuffer[instance.mesh_index + hitData.geometryIndex]; + float3x4 transform = g_TransformBuffer[instance.transform_index]; + + // Fetch vertex data + TriangleNormUV triData = fetchVerticesNormUV(mesh, hitData.primitiveIndex); + + IntersectData iData; + // Set material + iData.material = g_MaterialBuffer[instance.material_index]; + // Calculate UV coordinates + iData.uv = interpolate(triData.uv0, triData.uv1, triData.uv2, hitData.barycentrics); + // Add vertex information needed for lights + iData.vertex0 = transformPoint(triData.v0, transform).xyz; + iData.vertex1 = transformPoint(triData.v1, transform).xyz; + iData.vertex2 = transformPoint(triData.v2, transform).xyz; + iData.uv0 = triData.uv0; + iData.uv1 = triData.uv1; + iData.uv2 = triData.uv2; + // Calculate intersection position + iData.position = interpolate(iData.vertex0, iData.vertex1, iData.vertex2, hitData.barycentrics); + iData.geometryNormal = geometryNormal; + iData.normal = shadingNormal; + iData.barycentrics = hitData.barycentrics; + return iData; +} + +#endif // RAY_INTERSECTION_HLSL diff --git a/src/core/src/geometry/ray_tracing.hlsl b/src/core/src/geometry/ray_tracing.hlsl new file mode 100644 index 0000000..2498465 --- /dev/null +++ b/src/core/src/geometry/ray_tracing.hlsl @@ -0,0 +1,80 @@ +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ + +#ifndef RAY_TRACING_HLSL +#define RAY_TRACING_HLSL + +struct HitInfo +{ + uint instanceIndex; + uint geometryIndex; + uint primitiveIndex; + float2 barycentrics; + bool frontFace; +}; + +RayDesc GetRayDescRt() +{ + RayDesc ray; + ray.Origin = WorldRayOrigin(); + ray.TMin = RayTMin(); + ray.Direction = WorldRayDirection(); + ray.TMax = RayTCurrent(); + return ray; +} + +HitInfo GetHitInfoRt(in BuiltInTriangleIntersectionAttributes attr) +{ + HitInfo hit_info; + hit_info.instanceIndex = InstanceIndex(); + hit_info.geometryIndex = GeometryIndex(); + hit_info.primitiveIndex = PrimitiveIndex(); + hit_info.barycentrics = attr.barycentrics; + hit_info.frontFace = HitKind() == HIT_KIND_TRIANGLE_FRONT_FACE; + return hit_info; +} + +template +HitInfo GetHitInfoRtInlineCommitted(in RayQuery ray_query) +{ + HitInfo hit_info; + hit_info.instanceIndex = ray_query.CommittedInstanceIndex(); + hit_info.primitiveIndex = ray_query.CommittedPrimitiveIndex(); + hit_info.geometryIndex = ray_query.CommittedGeometryIndex(); + hit_info.barycentrics = ray_query.CommittedTriangleBarycentrics(); + hit_info.frontFace = ray_query.CommittedTriangleFrontFace(); + return hit_info; +} + +template +HitInfo GetHitInfoRtInlineCandidate(in RayQuery ray_query) +{ + HitInfo hit_info; + hit_info.instanceIndex = ray_query.CandidateInstanceIndex(); + hit_info.primitiveIndex = ray_query.CandidatePrimitiveIndex(); + hit_info.geometryIndex = ray_query.CandidateGeometryIndex(); + hit_info.barycentrics = ray_query.CandidateTriangleBarycentrics(); + hit_info.frontFace = ray_query.CandidateTriangleFrontFace(); + return hit_info; +} + +#endif // RAY_TRACING_HLSL diff --git a/src/core/src/gpu_shared.h b/src/core/src/gpu_shared.h index 9ced004..409c80c 100644 --- a/src/core/src/gpu_shared.h +++ b/src/core/src/gpu_shared.h @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -24,27 +24,48 @@ THE SOFTWARE. #ifdef __cplusplus -# include "gfx_scene.h" - # include # include typedef uint32_t uint; -typedef glm::ivec2 int2; -typedef glm::ivec4 int4; +typedef glm::ivec2 int2; +typedef glm::aligned_ivec3 int3; +typedef glm::ivec4 int4; typedef glm::uvec2 uint2; typedef glm::aligned_uvec3 uint3; typedef glm::uvec4 uint4; +typedef glm::i8vec2 byte2; +typedef glm::aligned_i8vec3 byte3; +typedef glm::i8vec4 byte4; + +typedef glm::u8vec2 ubyte2; +typedef glm::aligned_u8vec3 ubyte3; +typedef glm::u8vec4 ubyte4; + +typedef glm::i16vec2 short2; +typedef glm::aligned_i16vec3 short3; +typedef glm::i16vec4 short4; + +typedef glm::u16vec2 ushort2; +typedef glm::aligned_u16vec3 ushort3; +typedef glm::u16vec4 ushort4; + typedef glm::aligned_vec4 float4; typedef glm::aligned_vec3 float3; typedef glm::aligned_vec2 float2; +typedef glm::dvec4 double4; +typedef glm::dvec3 double3; +typedef glm::dvec2 double2; + typedef glm::bool3 bool3; -typedef glm::mat4 float4x4; +typedef glm::mat4 float4x4; +typedef glm::mat4x3 float3x4; +typedef glm::mat3 float3x3; # define SEMANTIC(X) @@ -72,12 +93,36 @@ struct DrawCommand uint padding; }; +struct GpuVirtualAddressRange +{ + uint64_t start_address; + uint64_t size_in_bytes; +}; + +struct GpuVirtualAddressRangeAndStride +{ + uint64_t start_address; + uint64_t size_in_bytes; + uint64_t stride_in_bytes; +}; + +struct DispatchRaysCommand +{ + GpuVirtualAddressRange ray_generation_shader_record; + GpuVirtualAddressRangeAndStride miss_shader_table; + GpuVirtualAddressRangeAndStride hit_group_table; + GpuVirtualAddressRangeAndStride callable_shader_table; + uint width; + uint height; + uint depth; + uint padding[3]; +}; + struct Instance { uint mesh_index; + uint material_index; uint transform_index; - uint bx_id; - uint padding; }; struct Material @@ -86,18 +131,13 @@ struct Material float4 emissivity; // .xyz = emissivity, .w = emissivity_map float4 metallicity_roughness; // .x = metallicity, .y = metallicity_map, .z = roughness, .w = roughness_map - float4 normal_ao; // .x = normal_map, .y = ao_map, .zw = unused padding + float4 normal_alpha_side; // .x = normal_map, .y = alpha, .z = double_sided, .w = unused padding }; struct Mesh { - uint material_index; - uint vertex_buffer; - uint vertex_offset; // in bytes - uint vertex_stride; // in bytes - uint index_buffer; - uint index_offset; // in bytes - uint index_stride; // in bytes + uint vertex_offset_idx; + uint index_offset_idx; uint index_count; }; @@ -119,7 +159,9 @@ struct CameraMatrices float4x4 inv_projection; float4x4 view_projection; float4x4 view_projection_prev; + float4x4 inv_view_projection_prev; float4x4 inv_view_projection; + float4x4 reprojection; }; #ifdef __cplusplus @@ -128,4 +170,4 @@ struct CameraMatrices #include "lights/lights_shared.h" -#endif // GPU_SHARED_H +#endif // GPU_SHARED_H diff --git a/src/core/src/lights/light_evaluation.hlsl b/src/core/src/lights/light_evaluation.hlsl index 447bff4..f4c5cf6 100644 --- a/src/core/src/lights/light_evaluation.hlsl +++ b/src/core/src/lights/light_evaluation.hlsl @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -24,7 +24,7 @@ THE SOFTWARE. #define LIGHT_EVALUATION_HLSL #include "lights.hlsl" -#include "../math/geometry.hlsl" +#include "../geometry/geometry.hlsl" #include "../math/math_constants.hlsl" #include "../math/sampling.hlsl" #include "../math/pack.hlsl" @@ -37,6 +37,10 @@ Texture2D g_TextureMaps[] : register(space99); SamplerState g_TextureSampler; */ +#if (!defined(DISABLE_AREA_LIGHTS) && (!defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS))) || !defined(DISABLE_DELTA_LIGHTS) || (!defined(DISABLE_ENVIRONMENT_LIGHTS) && (!defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_AREA_LIGHTS))) +# define HAS_MULTIPLE_LIGHT_TYPES +#endif + /** * Get the emitted light from a given area light. * @param light The light to be sampled. @@ -54,7 +58,11 @@ float3 evaluateAreaLight(LightArea light, float2 barycentric) { // Determine texture UVs float2 uv = interpolate(light.uv0, light.uv1, light.uv2, barycentric); - emissivity *= g_TextureMaps[NonUniformResourceIndex(emissivityTex)].SampleLevel(g_TextureSampler, uv, 0.0f).xyz; + float4 textureValue = g_TextureMaps[NonUniformResourceIndex(emissivityTex)].SampleLevel(g_TextureSampler, uv, 0.0f); + emissivity *= textureValue.xyz; + + // Combine with texture alpha map + emissivity *= textureValue.www; } return emissivity; } @@ -106,13 +114,18 @@ float3 evaluateAreaLightCone(LightArea light, float2 barycentric, float3 positio // Calculate surface area of triangle float lightArea = 0.5f * lightNormalLength; + // Calculate texture LOD based on projected area float dotDN = abs(dot(direction, lightNormal)); width = min(lightArea, width); //Clamp so cannot be greater than actual size of triangle float angle = log2(width / dotDN); float lod = offset + angle; - // Calculate texture LOD based on projected area - emissivity *= g_TextureMaps[NonUniformResourceIndex(emissivityTex)].SampleLevel(g_TextureSampler, uv, lod).xyz; + // Get texture emission + float4 textureValue = g_TextureMaps[NonUniformResourceIndex(emissivityTex)].SampleLevel(g_TextureSampler, uv, lod); + emissivity *= textureValue.xyz; + + // Combine with texture alpha map + emissivity *= textureValue.www; } return emissivity; } @@ -207,27 +220,32 @@ float3 evaluateDirectionalLight(LightDirectional light) */ float3 evaluateLight(Light selectedLight, float3 position, float3 direction) { -#ifndef DISABLE_AREA_LIGHTS -# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) - if (selectedLight.get_light_type() == kLight_Area) +#if defined(DISABLE_AREA_LIGHTS) && defined(DISABLE_DELTA_LIGHTS) && defined(DISABLE_ENVIRONMENT_LIGHTS) + return 0.0f.xxx; +#else +# ifdef HAS_MULTIPLE_LIGHT_TYPES + LightType lightType = selectedLight.get_light_type(); # endif +# ifndef DISABLE_AREA_LIGHTS +# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) + if (lightType == kLight_Area) +# endif { // Get the area light LightArea light = MakeLightArea(selectedLight); - // TODO calculate intersection of ray (position, direction) with triangle to calculate UV - // This^ is going to be slow so potentially look at storing this value or just stick with the below hack - float2 lightUV = float2(0.5f, 0.5f); + // Calculating intersection of ray (position, direction) with triangle to calculate UV is expensive so we approximate to the triangle center. + float2 lightUV = (1.0f / 3.0f).xx; // Evaluate the selected area light return evaluateAreaLight(light, lightUV); } -# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) +# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) else -# endif -#endif -#ifndef DISABLE_DELTA_LIGHTS - if (selectedLight.get_light_type() == kLight_Point) +# endif +# endif // DISABLE_AREA_LIGHTS +# ifndef DISABLE_DELTA_LIGHTS + if (lightType == kLight_Point) { // Get the point light LightPoint light = MakeLightPoint(selectedLight); @@ -235,7 +253,7 @@ float3 evaluateLight(Light selectedLight, float3 position, float3 direction) // Evaluate the selected point light return evaluatePointLight(light, position); } - else if (selectedLight.get_light_type() == kLight_Spot) + else if (lightType == kLight_Spot) { // Get the spot light LightSpot light = MakeLightSpot(selectedLight); @@ -244,9 +262,9 @@ float3 evaluateLight(Light selectedLight, float3 position, float3 direction) return evaluateSpotLight(light, position); } else -# ifndef DISABLE_ENVIRONMENT_LIGHTS - if (selectedLight.get_light_type() == kLight_Direction) -# endif +# ifndef DISABLE_ENVIRONMENT_LIGHTS + if (lightType == kLight_Direction) +# endif { // Get the directional light LightDirectional light = MakeLightDirectional(selectedLight); @@ -254,12 +272,12 @@ float3 evaluateLight(Light selectedLight, float3 position, float3 direction) // Evaluate the selected directional light return evaluateDirectionalLight(light); } -# ifndef DISABLE_ENVIRONMENT_LIGHTS +# ifndef DISABLE_ENVIRONMENT_LIGHTS else -# endif -#endif -#ifndef DISABLE_ENVIRONMENT_LIGHTS - /*selectedLight.get_light_type() == kLight_Environment*/ +# endif +# endif // DISABLE_DELTA_LIGHTS +# ifndef DISABLE_ENVIRONMENT_LIGHTS + /*lightType == kLight_Environment*/ { // Get the environment light LightEnvironment light = MakeLightEnvironment(selectedLight); @@ -267,9 +285,7 @@ float3 evaluateLight(Light selectedLight, float3 position, float3 direction) // Evaluate the environment map return evaluateEnvironmentLight(light, direction); } -#endif -#if defined(DISABLE_AREA_LIGHTS) && defined(DISABLE_DELTA_LIGHTS) && defined(DISABLE_ENVIRONMENT_LIGHTS) - return 0.0f.xxx; +# endif // DISABLE_ENVIRONMENT_LIGHTS #endif } @@ -283,27 +299,32 @@ float3 evaluateLight(Light selectedLight, float3 position, float3 direction) */ float3 evaluateLightCone(Light selectedLight, float3 position, float3 direction, float solidAngle) { -#ifndef DISABLE_AREA_LIGHTS -# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) - if (selectedLight.get_light_type() == kLight_Area) +#if defined(DISABLE_AREA_LIGHTS) && defined(DISABLE_DELTA_LIGHTS) && defined(DISABLE_ENVIRONMENT_LIGHTS) + return 0.0f.xxx; +#else +# ifdef HAS_MULTIPLE_LIGHT_TYPES + LightType lightType = selectedLight.get_light_type(); # endif +# ifndef DISABLE_AREA_LIGHTS +# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) + if (lightType == kLight_Area) +# endif { // Get the area light LightArea light = MakeLightArea(selectedLight); - // TODO calculate intersection of ray (position, direction) with triangle to calculate UV - // This^ is going to be slow so potentially look at storing this value or just stick with the below hack - float2 lightUV = float2(0.5f, 0.5f); + // Calculating intersection of ray (position, direction) with triangle to calculate UV is expensive so we approximate to the triangle center. + float2 lightUV = (1.0f / 3.0f).xx; // Evaluate the selected area light return evaluateAreaLightCone(light, lightUV, position, solidAngle); } -# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) +# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) else -# endif -#endif -#ifndef DISABLE_DELTA_LIGHTS - if (selectedLight.get_light_type() == kLight_Point) +# endif +# endif // DISABLE_AREA_LIGHTS +# ifndef DISABLE_DELTA_LIGHTS + if (lightType == kLight_Point) { // Get the point light LightPoint light = MakeLightPoint(selectedLight); @@ -311,7 +332,7 @@ float3 evaluateLightCone(Light selectedLight, float3 position, float3 direction, // Evaluate the selected point light return evaluatePointLight(light, position); } - else if (selectedLight.get_light_type() == kLight_Spot) + else if (lightType == kLight_Spot) { // Get the spot light LightSpot light = MakeLightSpot(selectedLight); @@ -320,9 +341,9 @@ float3 evaluateLightCone(Light selectedLight, float3 position, float3 direction, return evaluateSpotLight(light, position); } else -# ifndef DISABLE_ENVIRONMENT_LIGHTS - if (selectedLight.get_light_type() == kLight_Direction) -# endif +# ifndef DISABLE_ENVIRONMENT_LIGHTS + if (lightType == kLight_Direction) +# endif { // Get the directional light LightDirectional light = MakeLightDirectional(selectedLight); @@ -330,12 +351,12 @@ float3 evaluateLightCone(Light selectedLight, float3 position, float3 direction, // Evaluate the selected directional light return evaluateDirectionalLight(light); } -# ifndef DISABLE_ENVIRONMENT_LIGHTS +# ifndef DISABLE_ENVIRONMENT_LIGHTS else -# endif -#endif -#ifndef DISABLE_ENVIRONMENT_LIGHTS - /*selectedLight.get_light_type() == kLight_Environment*/ +# endif +# endif // DISABLE_DELTA_LIGHTS +# ifndef DISABLE_ENVIRONMENT_LIGHTS + /*lightType == kLight_Environment*/ { // Get the environment light LightEnvironment light = MakeLightEnvironment(selectedLight); @@ -343,78 +364,84 @@ float3 evaluateLightCone(Light selectedLight, float3 position, float3 direction, // Evaluate the environment map return evaluateEnvironmentLightCone(light, direction, solidAngle); } -#endif -#if defined(DISABLE_AREA_LIGHTS) && defined(DISABLE_DELTA_LIGHTS) && defined(DISABLE_ENVIRONMENT_LIGHTS) - return 0.0f.xxx; +# endif // DISABLE_ENVIRONMENT_LIGHTS #endif } /** - * Get the incident light from a sampled light using a uv value returned from @sampleLightUnorm + * Get the incident light from a sampled light using sample values returned from @sampleLight * @param selectedLight The light that was sampled. * @param position Current position on surface to get direction from. - * @param sampleParams UV values returned from @sampleLightUnorm. + * @param sampleParams UV values returned from @sampleLight. * @param lightDirection (Out) The direction to the sampled light. * @param lightPosition (Out) The position of the sampled light (contains invalid data in case of directional or environment lights). * @return The visible light. */ float3 evaluateLightSampled(Light selectedLight, float3 position, float2 sampleParams, out float3 lightDirection, out float3 lightPosition) { -#ifndef DISABLE_AREA_LIGHTS -# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) - if (selectedLight.get_light_type() == kLight_Area) +#if defined(DISABLE_AREA_LIGHTS) && defined(DISABLE_DELTA_LIGHTS) && defined(DISABLE_ENVIRONMENT_LIGHTS) + lightDirection = 0.0f.xxx; + lightPosition = 0.0f.xxx; + return 0.0f.xxx; +#else +# ifdef HAS_MULTIPLE_LIGHT_TYPES + LightType lightType = selectedLight.get_light_type(); # endif +# ifndef DISABLE_AREA_LIGHTS +# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) + if (lightType == kLight_Area) +# endif { // Get the area light LightArea light = MakeLightArea(selectedLight); // Calculate direction - lightPosition = interpolate(light.v0.xyz, light.v1.xyz, light.v2.xyz, sampleParams); + lightPosition = interpolate(light.v0, light.v1, light.v2, sampleParams); lightDirection = normalize(lightPosition - position); // Evaluate the selected area light return evaluateAreaLight(light, sampleParams); } -# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) +# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) else -# endif -#endif -#ifndef DISABLE_DELTA_LIGHTS - if (selectedLight.get_light_type() == kLight_Point) +# endif +# endif // DISABLE_AREA_LIGHTS +# ifndef DISABLE_DELTA_LIGHTS + if (lightType == kLight_Point) { // Get the point light LightPoint light = MakeLightPoint(selectedLight); // Calculate direction - lightPosition = light.position.xyz; + lightPosition = light.position; lightDirection = normalize(lightPosition - position); // Evaluate the selected point light return evaluatePointLight(light, position); } - else if (selectedLight.get_light_type() == kLight_Spot) + else if (lightType == kLight_Spot) { // Get the spot light LightSpot light = MakeLightSpot(selectedLight); // Calculate direction - lightPosition = light.position.xyz; + lightPosition = light.position; lightDirection = normalize(lightPosition - position); // Evaluate the selected spot light return evaluateSpotLight(light, position); } else -# ifndef DISABLE_ENVIRONMENT_LIGHTS - if (selectedLight.get_light_type() == kLight_Direction) -# endif +# ifndef DISABLE_ENVIRONMENT_LIGHTS + if (lightType == kLight_Direction) +# endif { // Get the directional light LightDirectional light = MakeLightDirectional(selectedLight); - lightDirection = light.direction.xyz; + lightDirection = light.direction; // Evaluate the selected directional light return evaluateDirectionalLight(light); } -# ifndef DISABLE_ENVIRONMENT_LIGHTS +# ifndef DISABLE_ENVIRONMENT_LIGHTS else -# endif -#endif -#ifndef DISABLE_ENVIRONMENT_LIGHTS - /*selectedLight.get_light_type() == kLight_Environment*/ +# endif +# endif // DISABLE_DELTA_LIGHTS +# ifndef DISABLE_ENVIRONMENT_LIGHTS + /*lightType == kLight_Environment*/ { // Convert stored uv back to direction lightDirection = MapToSphere(sampleParams); @@ -423,19 +450,15 @@ float3 evaluateLightSampled(Light selectedLight, float3 position, float2 sampleP // Evaluate the environment map return evaluateEnvironmentLight(light, lightDirection); } -#endif -#if defined(DISABLE_AREA_LIGHTS) && defined(DISABLE_DELTA_LIGHTS) && defined(DISABLE_ENVIRONMENT_LIGHTS) - lightDirection = 0.0f.xxx; - lightPosition = 0.0f.xxx; - return 0.0f.xxx; +# endif // DISABLE_ENVIRONMENT_LIGHTS #endif } /** - * Get the incident light from a sampled light using a uv value returned from @sampleLightUnorm + * Get the incident light from a sampled light using a sample values returned from @sampleLightCone * @param selectedLight The light that was sampled. * @param position Current position on surface to get direction from. - * @param sampleParams UV values returned from @sampleLightUnorm. + * @param sampleParams UV values returned from @sampleLight. * @param solidAngle Solid angle of visible light surface, used for evaluating across light surface. * @param lightDirection (Out) The direction to the sampled light. * @param lightPosition (Out) The position of the sampled light (contains invalid data in case of directional or environment lights). @@ -443,61 +466,69 @@ float3 evaluateLightSampled(Light selectedLight, float3 position, float2 sampleP */ float3 evaluateLightConeSampled(Light selectedLight, float3 position, float2 sampleParams, float solidAngle, out float3 lightDirection, out float3 lightPosition) { -#ifndef DISABLE_AREA_LIGHTS -# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) - if (selectedLight.get_light_type() == kLight_Area) +#if defined(DISABLE_AREA_LIGHTS) && defined(DISABLE_DELTA_LIGHTS) && defined(DISABLE_ENVIRONMENT_LIGHTS) + lightDirection = 0.0f.xxx; + lightPosition = 0.0f.xxx; + return 0.0f.xxx; +#else +# ifdef HAS_MULTIPLE_LIGHT_TYPES + LightType lightType = selectedLight.get_light_type(); # endif +# ifndef DISABLE_AREA_LIGHTS +# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) + if (lightType == kLight_Area) +# endif { // Get the area light LightArea light = MakeLightArea(selectedLight); // Calculate direction - lightPosition = interpolate(light.v0.xyz, light.v1.xyz, light.v2.xyz, sampleParams); + lightPosition = interpolate(light.v0, light.v1, light.v2, sampleParams); lightDirection = normalize(lightPosition - position); // Evaluate the selected area light return evaluateAreaLightCone(light, sampleParams, position, solidAngle); } -# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) +# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) else -# endif -#endif -#ifndef DISABLE_DELTA_LIGHTS - if (selectedLight.get_light_type() == kLight_Point) +# endif +# endif // DISABLE_AREA_LIGHTS +# ifndef DISABLE_DELTA_LIGHTS + if (lightType == kLight_Point) { // Get the point light LightPoint light = MakeLightPoint(selectedLight); // Calculate direction - lightPosition = light.position.xyz; + lightPosition = light.position; lightDirection = normalize(lightPosition - position); // Evaluate the selected point light return evaluatePointLight(light, position); } - else if (selectedLight.get_light_type() == kLight_Spot) + else if (lightType == kLight_Spot) { // Get the spot light LightSpot light = MakeLightSpot(selectedLight); // Calculate direction - lightPosition = light.position.xyz; + lightPosition = light.position; lightDirection = normalize(lightPosition - position); // Evaluate the selected spot light return evaluateSpotLight(light, position); } else -# ifndef DISABLE_ENVIRONMENT_LIGHTS - if (selectedLight.get_light_type() == kLight_Direction) -# endif +# ifndef DISABLE_ENVIRONMENT_LIGHTS + if (lightType == kLight_Direction) +# endif { // Get the directional light LightDirectional light = MakeLightDirectional(selectedLight); - lightDirection = light.direction.xyz; + lightDirection = light.direction; // Evaluate the selected directional light return evaluateDirectionalLight(light); } -# ifndef DISABLE_ENVIRONMENT_LIGHTS +# ifndef DISABLE_ENVIRONMENT_LIGHTS else -# endif -#endif -#ifndef DISABLE_ENVIRONMENT_LIGHTS - /*selectedLight.get_light_type() == kLight_Environment*/ +# endif +# endif // DISABLE_DELTA_LIGHTS +# ifndef DISABLE_ENVIRONMENT_LIGHTS + /*lightType == kLight_Environment*/ { // Convert stored uv back to direction lightDirection = MapToSphere(sampleParams); @@ -506,11 +537,7 @@ float3 evaluateLightConeSampled(Light selectedLight, float3 position, float2 sam // Evaluate the environment map return evaluateEnvironmentLightCone(light, lightDirection, solidAngle); } -#endif -#if defined(DISABLE_AREA_LIGHTS) && defined(DISABLE_DELTA_LIGHTS) && defined(DISABLE_ENVIRONMENT_LIGHTS) - lightDirection = 0.0f.xxx; - lightPosition = 0.0f.xxx; - return 0.0f.xxx; +# endif // DISABLE_ENVIRONMENT_LIGHTS #endif } diff --git a/src/core/src/lights/light_sampling.hlsl b/src/core/src/lights/light_sampling.hlsl index b49f6b0..e631f7b 100644 --- a/src/core/src/lights/light_sampling.hlsl +++ b/src/core/src/lights/light_sampling.hlsl @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -24,20 +24,10 @@ THE SOFTWARE. #define LIGHT_SAMPLING_HLSL #include "light_evaluation.hlsl" +#include "../geometry/geometry.hlsl" #include "../math/math_constants.hlsl" -#include "../math/geometry.hlsl" #include "../math/sampling.hlsl" -/* -// Requires the following data to be defined in any shader that uses this file -TextureCube g_EnvironmentBuffer; -Texture2D g_TextureMaps[] : register(space99); - -SamplerState g_LinearSampler; - -// + data inherited from stratified_sampler.hlsl -*/ - /** * Sample the direction, PDF and position on a given area light. * @param light The light to be sampled. @@ -61,11 +51,15 @@ float3 sampleAreaLight(LightArea light, float2 samples, float3 position, out flo float3 edge1 = light.v1 - light.v0; float3 edge2 = light.v2 - light.v0; float3 lightCross = cross(edge1, edge2); - float3 lightNormal = normalize(lightCross); // Calculate surface area of triangle - float lightArea = 0.5f * length(lightCross); - - // Offset the light position to prevent incorrect intersections when casting the lights shadow ray + float lightNormalLength = length(lightCross); + float3 lightNormal = lightCross / lightNormalLength.xxx; + float lightArea = 0.5f * lightNormalLength; + + // We support back face triangles so we must flip the normal as required + // we cant use abs() later as we must have correct normal for offsetting position + float signedDist = dot(position - light.v0, lightNormal); + lightNormal *= sign(signedDist).xxx; lightPosition = offsetPosition(lightPosition, lightNormal); // Determine light direction @@ -93,14 +87,15 @@ float sampleAreaLightPDF(LightArea light, float3 shadingPosition, float3 lightPo float3 edge1 = light.v1 - light.v0; float3 edge2 = light.v2 - light.v0; float3 lightCross = cross(edge1, edge2); - float3 lightNormal = normalize(lightCross); // Calculate surface area of triangle - float lightArea = 0.5f * length(lightCross); + float lightNormalLength = length(lightCross); + float3 lightNormal = lightCross / lightNormalLength.xxx; + float lightArea = 0.5f * lightNormalLength; // Evaluate PDF for current position and direction float3 lightVector = shadingPosition - lightPosition; float3 lightDirection = normalize(lightVector); - float pdf = saturate(dot(lightNormal, lightDirection)) * lightArea; + float pdf = saturate(abs(dot(lightNormal, lightDirection))) * lightArea; pdf = (pdf != 0.0f) ? dot(lightVector, lightVector) / pdf : 0.0f; return pdf; } @@ -116,7 +111,6 @@ float sampleAreaLightPDF(LightArea light, float3 shadingPosition, float3 lightPo float3 sampleEnvironmentLight(LightEnvironment light, float2 samples, float3 normal, out float pdf) { // Currently just uses a uniform spherical sample - // TODO improve sampling based on image contents // Sample uniform sphere float z = 1.0f - 2.0f * samples.x; @@ -204,6 +198,20 @@ float3 samplePointLight(LightPoint light, float3 position, out float pdf, out fl return direction; } +/** + * Calculate the PDF of sampling an point light. + * @param light The light to be sampled. + * @param position The current surface position. + * @return The calculated PDF with respect to the light. + */ +float samplePointLightPDF(LightPoint light, float3 position) +{ + // PDF is a constant as there is only 1 possible direction to the light. + // The PDF is either 1 or 0 depending on if the light is within the specified range. + float3 direction = light.position - position; + return (length(direction) <= light.range) ? 1.0f : 0.0f; +} + /** * Sample the direction, PDF and position for a spot light. * @param light The light to be sampled. @@ -218,14 +226,40 @@ float3 sampleSpotLight(LightSpot light, float3 position, out float pdf, out floa float3 direction = light.position - position; float directionLength = length(direction); direction = direction / directionLength; + + // Cone attenuation + float lightAngle = dot(light.direction, direction); + float angularAttenuation = saturate(lightAngle * light.angleCutoffScale + light.angleCutoffOffset); + // PDF is a constant as there is only 1 possible direction to the light. - // The PDF is either 1 or 0 depending on if the light is within the specified range. - pdf = (directionLength <= light.range) ? 1.0f : 0.0f; + // The PDF is either 1 or 0 depending on if the light is within the specified range and the point is within the cone. + pdf = (angularAttenuation > 0.0f && directionLength <= light.range) ? 1.0f : 0.0f; + // Set light position lightPosition = light.position; return direction; } +/** + * Calculate the PDF of sampling an point light. + * @param light The light to be sampled. + * @param position The current surface position. + * @return The calculated PDF with respect to the light. + */ +float sampleSpotLightPDF(LightSpot light, float3 position) +{ + // Calculate direction to the light + float3 direction = light.position - position; + float lightAngle = dot(light.direction, direction); + + // Cone attenuation + float angularAttenuation = saturate(lightAngle * light.angleCutoffScale + light.angleCutoffOffset); + + // PDF is a constant as there is only 1 possible direction to the light. + // The PDF is either 1 or 0 depending on if the light is within the specified range and the point is within the cone. + return (angularAttenuation > 0.0f && length(direction) <= light.range) ? 1.0f : 0.0f; +} + /** * Sample the direction and PDF for a directional light. * @param light The light to be sampled. @@ -239,6 +273,97 @@ float3 sampleDirectionalLight(LightDirectional light, out float pdf) return light.direction; } +/** + * Calculate the PDF of sampling an point light. + * @param light The light to be sampled. + * @return The calculated PDF with respect to the light. + */ +float sampleDirectionalLightPDF(LightDirectional light) +{ + // Direction lights have a constant effect over entire scene + return 1.0f; +} + +/** + * Calculate the PDF of sampling a given light. + * @param selectedLight The light that was sampled. + * @param position The position on the surface currently being shaded + * @param normal Shading normal vector at current position. + * @param lightDirection The sampled direction to the light. + * @param lightPosition The position on the surface of the light. + * @return The calculated PDF with respect to the light. + */ +float sampleLightPdf(Light selectedLight, float3 position, float3 normal, float3 lightDirection, float3 lightPosition) +{ +#if defined(DISABLE_AREA_LIGHTS) && defined(DISABLE_DELTA_LIGHTS) && defined(DISABLE_ENVIRONMENT_LIGHTS) + return 0.0f; +#else + float solidAnglePdf; +# ifdef HAS_MULTIPLE_LIGHT_TYPES + LightType lightType = selectedLight.get_light_type(); +# endif +# ifndef DISABLE_AREA_LIGHTS +# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) + if (lightType == kLight_Area) +# endif + { + // Get the area light + LightArea light = MakeLightArea(selectedLight); + + solidAnglePdf = sampleAreaLightPDF(light, position, lightPosition); + } +# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) + else +# endif +# endif // DISABLE_AREA_LIGHTS +# ifndef DISABLE_DELTA_LIGHTS + if (lightType == kLight_Point) + { + // Get the point light + LightPoint light = MakeLightPoint(selectedLight); + + solidAnglePdf = samplePointLightPDF(light, position); + + } + else if (lightType == kLight_Spot) + { + // Get the spot light + LightSpot light = MakeLightSpot(selectedLight); + + solidAnglePdf = sampleSpotLightPDF(light, position); + } + else +# ifndef DISABLE_ENVIRONMENT_LIGHTS + if (lightType == kLight_Direction) +# endif + { + // Get the directional light + LightDirectional light = MakeLightDirectional(selectedLight); + + solidAnglePdf = sampleDirectionalLightPDF(light); + } +# ifndef DISABLE_ENVIRONMENT_LIGHTS + else +# endif +# endif // DISABLE_DELTA_LIGHTS +# ifndef DISABLE_ENVIRONMENT_LIGHTS + /*lightType == kLight_Environment*/ + { + // Get the environment light + LightEnvironment light = MakeLightEnvironment(selectedLight); + + solidAnglePdf = sampleEnvironmentLightPDF(light, lightDirection, normal); + } +# endif // DISABLE_ENVIRONMENT_LIGHTS + // Discard lights behind surface + if (dot(lightDirection, normal) < 0.0f) + { + solidAnglePdf = 0.0f; + } + return solidAnglePdf; +#endif +} + /** * Sample the direction and PDF for a specified light. * @tparam RNG The type of random number sampler to be used. @@ -255,14 +380,23 @@ float3 sampleDirectionalLight(LightDirectional light, out float pdf) template float3 sampleLight(Light selectedLight, inout RNG random, float3 position, float3 normal, out float3 lightDirection, out float lightPDF, out float3 lightPosition, out float2 sampleParams) { +#if defined(DISABLE_AREA_LIGHTS) && defined(DISABLE_DELTA_LIGHTS) && defined(DISABLE_ENVIRONMENT_LIGHTS) + lightDirection = 0.0f.xxx; + lightPDF = 0.0f; + sampleParams = 0.0f.xx; + return 0.0f.xxx; +#else float3 radiance; -#if !defined(DISABLE_AREA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) +# ifdef HAS_MULTIPLE_LIGHT_TYPES + LightType lightType = selectedLight.get_light_type(); +# endif +# if !defined(DISABLE_AREA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) float2 randomValues = random.rand2(); -#endif -#ifndef DISABLE_AREA_LIGHTS -# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) - if (selectedLight.get_light_type() == kLight_Area) # endif +# ifndef DISABLE_AREA_LIGHTS +# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) + if (lightType == kLight_Area) +# endif { // Get the area light LightArea light = MakeLightArea(selectedLight); @@ -270,20 +404,14 @@ float3 sampleLight(Light selectedLight, inout RNG random, float3 position, float // Sample the selected area light lightDirection = sampleAreaLight(light, randomValues, position, lightPDF, lightPosition, sampleParams); - // Early discard back facing area lights - //if (lightPDF == 0.0f) - //{ - // return 0.0f.xxx; - //} - radiance = evaluateAreaLight(light, sampleParams); } -# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) +# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) else -# endif -#endif -#ifndef DISABLE_DELTA_LIGHTS - if (selectedLight.get_light_type() == kLight_Point) +# endif +# endif // DISABLE_AREA_LIGHTS +# ifndef DISABLE_DELTA_LIGHTS + if (lightType == kLight_Point) { // Get the point light LightPoint light = MakeLightPoint(selectedLight); @@ -292,7 +420,9 @@ float3 sampleLight(Light selectedLight, inout RNG random, float3 position, float lightDirection = samplePointLight(light, position, lightPDF, lightPosition); radiance = evaluatePointLight(light, position); - } else if (selectedLight.get_light_type() == kLight_Spot) { + } + else if (lightType == kLight_Spot) + { // Get the spot light LightSpot light = MakeLightSpot(selectedLight); @@ -302,9 +432,9 @@ float3 sampleLight(Light selectedLight, inout RNG random, float3 position, float radiance = evaluateSpotLight(light, position); } else -# ifndef DISABLE_ENVIRONMENT_LIGHTS - if (selectedLight.get_light_type() == kLight_Direction) -# endif +# ifndef DISABLE_ENVIRONMENT_LIGHTS + if (lightType == kLight_Direction) +# endif { // Get the directional light LightDirectional light = MakeLightDirectional(selectedLight); @@ -314,12 +444,12 @@ float3 sampleLight(Light selectedLight, inout RNG random, float3 position, float radiance = evaluateDirectionalLight(light); } -# ifndef DISABLE_ENVIRONMENT_LIGHTS +# ifndef DISABLE_ENVIRONMENT_LIGHTS else -# endif -#endif -#ifndef DISABLE_ENVIRONMENT_LIGHTS - /*selectedLight.get_light_type() == kLight_Environment*/ +# endif +# endif // DISABLE_DELTA_LIGHTS +# ifndef DISABLE_ENVIRONMENT_LIGHTS + /*lightType == kLight_Environment*/ { // Get the environment light LightEnvironment light = MakeLightEnvironment(selectedLight); @@ -332,18 +462,16 @@ float3 sampleLight(Light selectedLight, inout RNG random, float3 position, float // Pack light direction into storable UV sampleParams = MapToSphereInverse(lightDirection); } -#endif -#if defined(DISABLE_AREA_LIGHTS) && defined(DISABLE_DELTA_LIGHTS) && defined(DISABLE_ENVIRONMENT_LIGHTS) - lightDirection = 0.0f.xxx; - lightPDF = 0.0f; - sampleParams = 0.0f.xx; - return 0.0f.xxx; -#else +# endif // DISABLE_ENVIRONMENT_LIGHTS // Discard lights behind surface if (dot(lightDirection, normal) < 0.0f) { lightPDF = 0.0f; } + if (lightPDF == 0.0f) + { + return 0.0f.xxx; + } return radiance; #endif } @@ -365,14 +493,23 @@ float3 sampleLight(Light selectedLight, inout RNG random, float3 position, float template float3 sampleLightCone(Light selectedLight, inout RNG random, float3 position, float3 normal, float solidAngle, out float3 lightDirection, out float lightPDF, out float3 lightPosition, out float2 sampleParams) { +#if defined(DISABLE_AREA_LIGHTS) && defined(DISABLE_DELTA_LIGHTS) && defined(DISABLE_ENVIRONMENT_LIGHTS) + lightDirection = 0.0f.xxx; + lightPDF = 0.0f; + sampleParams = 0.0f.xx; + return 0.0f.xxx; +#else float3 radiance; -#if !defined(DISABLE_AREA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) +# ifdef HAS_MULTIPLE_LIGHT_TYPES + LightType lightType = selectedLight.get_light_type(); +# endif +# if !defined(DISABLE_AREA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) float2 randomValues = random.rand2(); -#endif -#ifndef DISABLE_AREA_LIGHTS -# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) - if (selectedLight.get_light_type() == kLight_Area) # endif +# ifndef DISABLE_AREA_LIGHTS +# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) + if (lightType == kLight_Area) +# endif { // Get the area light LightArea light = MakeLightArea(selectedLight); @@ -382,12 +519,12 @@ float3 sampleLightCone(Light selectedLight, inout RNG random, float3 position, f radiance = evaluateAreaLightCone(light, sampleParams, position, solidAngle); } -# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) +# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) else -# endif -#endif -#ifndef DISABLE_DELTA_LIGHTS - if (selectedLight.get_light_type() == kLight_Point) +# endif +# endif // DISABLE_AREA_LIGHTS +# ifndef DISABLE_DELTA_LIGHTS + if (lightType == kLight_Point) { // Get the point light LightPoint light = MakeLightPoint(selectedLight); @@ -396,7 +533,9 @@ float3 sampleLightCone(Light selectedLight, inout RNG random, float3 position, f lightDirection = samplePointLight(light, position, lightPDF, lightPosition); radiance = evaluatePointLight(light, position); - } else if (selectedLight.get_light_type() == kLight_Spot) { + } + else if (lightType == kLight_Spot) + { // Get the spot light LightSpot light = MakeLightSpot(selectedLight); @@ -406,9 +545,9 @@ float3 sampleLightCone(Light selectedLight, inout RNG random, float3 position, f radiance = evaluateSpotLight(light, position); } else -# ifndef DISABLE_ENVIRONMENT_LIGHTS - if (selectedLight.get_light_type() == kLight_Direction) -# endif +# ifndef DISABLE_ENVIRONMENT_LIGHTS + if (lightType == kLight_Direction) +# endif { // Get the directional light LightDirectional light = MakeLightDirectional(selectedLight); @@ -418,12 +557,12 @@ float3 sampleLightCone(Light selectedLight, inout RNG random, float3 position, f radiance = evaluateDirectionalLight(light); } -# ifndef DISABLE_ENVIRONMENT_LIGHTS +# ifndef DISABLE_ENVIRONMENT_LIGHTS else -# endif -#endif -#ifndef DISABLE_ENVIRONMENT_LIGHTS - /*selectedLight.get_light_type() == kLight_Environment*/ +# endif +# endif // DISABLE_DELTA_LIGHTS +# ifndef DISABLE_ENVIRONMENT_LIGHTS + /*lightType == kLight_Environment*/ { // Get the environment light LightEnvironment light = MakeLightEnvironment(selectedLight); @@ -436,13 +575,7 @@ float3 sampleLightCone(Light selectedLight, inout RNG random, float3 position, f // Pack light direction into storable UV sampleParams = MapToSphereInverse(lightDirection); } -#endif -#if defined(DISABLE_AREA_LIGHTS) && defined(DISABLE_DELTA_LIGHTS) && defined(DISABLE_ENVIRONMENT_LIGHTS) - lightDirection = 0.0f.xxx; - lightPDF = 0.0f; - sampleParams = 0.0f.xx; - return 0.0f.xxx; -#else +# endif // DISABLE_ENVIRONMENT_LIGHTS // Discard lights behind surface if (dot(lightDirection, normal) < 0.0f) { @@ -458,11 +591,11 @@ float3 sampleLightCone(Light selectedLight, inout RNG random, float3 position, f /** * Sample the direction and PDF for a specified light. + * @note This differs from `sampleLight` in that it does not require knowing the surface normal. * @tparam RNG The type of random number sampler to be used. * @param selectedLight The light to be sampled. * @param random Random number sampler used to sample light. * @param position Current position on surface. - * @param solidAngle Solid angle around view direction of visible ray cone. * @param lightDirection (Out) The sampled direction to the light. * @param lightPDF (Out) The PDF for the calculated sample. * @param lightPosition (Out) The position of the light sample (only valid if sampled light has a position). @@ -470,32 +603,40 @@ float3 sampleLightCone(Light selectedLight, inout RNG random, float3 position, f * @return The radiance returned from sampled light direction. */ template -float3 sampleLightConeUnorm(Light selectedLight, inout RNG random, float3 position, float solidAngle, out float3 lightDirection, out float lightPDF, out float3 lightPosition, out float2 sampleParams) +float3 sampleLightUnorm(Light selectedLight, inout RNG random, float3 position, out float3 lightDirection, out float lightPDF, out float3 lightPosition, out float2 sampleParams) { +#if defined(DISABLE_AREA_LIGHTS) && defined(DISABLE_DELTA_LIGHTS) && defined(DISABLE_ENVIRONMENT_LIGHTS) + lightDirection = 0.0f.xxx; + lightPDF = 0.0f; + sampleParams = 0.0f.xx; + return 0.0f.xxx; +#else float3 radiance; -#if !defined(DISABLE_AREA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) +# ifdef HAS_MULTIPLE_LIGHT_TYPES + LightType lightType = selectedLight.get_light_type(); +# endif +# if !defined(DISABLE_AREA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) float2 randomValues = random.rand2(); -#endif -#ifndef DISABLE_AREA_LIGHTS -# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) - if (selectedLight.get_light_type() == kLight_Area) # endif +# ifndef DISABLE_AREA_LIGHTS +# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) + if (lightType == kLight_Area) +# endif { // Get the area light LightArea light = MakeLightArea(selectedLight); // Sample the selected area light lightDirection = sampleAreaLight(light, randomValues, position, lightPDF, lightPosition, sampleParams); - //TODO: lightPDF is incorrect when sampling over a solid_angle - radiance = evaluateAreaLightCone(light, sampleParams, position, solidAngle); + radiance = evaluateAreaLight(light, sampleParams); } -# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) +# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) else -# endif -#endif -#ifndef DISABLE_DELTA_LIGHTS - if (selectedLight.get_light_type() == kLight_Point) +# endif +# endif // DISABLE_AREA_LIGHTS +# ifndef DISABLE_DELTA_LIGHTS + if (lightType == kLight_Point) { // Get the point light LightPoint light = MakeLightPoint(selectedLight); @@ -504,7 +645,9 @@ float3 sampleLightConeUnorm(Light selectedLight, inout RNG random, float3 positi lightDirection = samplePointLight(light, position, lightPDF, lightPosition); radiance = evaluatePointLight(light, position); - } else if (selectedLight.get_light_type() == kLight_Spot) { + } + else if (lightType == kLight_Spot) + { // Get the spot light LightSpot light = MakeLightSpot(selectedLight); @@ -514,9 +657,9 @@ float3 sampleLightConeUnorm(Light selectedLight, inout RNG random, float3 positi radiance = evaluateSpotLight(light, position); } else -# ifndef DISABLE_ENVIRONMENT_LIGHTS - if (selectedLight.get_light_type() == kLight_Direction) -# endif +# ifndef DISABLE_ENVIRONMENT_LIGHTS + if (lightType == kLight_Direction) +# endif { // Get the directional light LightDirectional light = MakeLightDirectional(selectedLight); @@ -526,12 +669,12 @@ float3 sampleLightConeUnorm(Light selectedLight, inout RNG random, float3 positi radiance = evaluateDirectionalLight(light); } -# ifndef DISABLE_ENVIRONMENT_LIGHTS +# ifndef DISABLE_ENVIRONMENT_LIGHTS else -# endif -#endif -#ifndef DISABLE_ENVIRONMENT_LIGHTS - /*selectedLight.get_light_type() == kLight_Environment*/ +# endif +# endif // DISABLE_DELTA_LIGHTS +# ifndef DISABLE_ENVIRONMENT_LIGHTS + /*lightType == kLight_Environment*/ { // Get the environment light LightEnvironment light = MakeLightEnvironment(selectedLight); @@ -539,18 +682,12 @@ float3 sampleLightConeUnorm(Light selectedLight, inout RNG random, float3 positi // Sample the environment map lightDirection = sampleEnvironmentLight(light, randomValues, lightPDF); - radiance = evaluateEnvironmentLightCone(light, lightDirection, solidAngle); + radiance = evaluateEnvironmentLight(light, lightDirection); // Pack light direction into storable UV sampleParams = MapToSphereInverse(lightDirection); } -#endif -#if defined(DISABLE_AREA_LIGHTS) && defined(DISABLE_DELTA_LIGHTS) && defined(DISABLE_ENVIRONMENT_LIGHTS) - lightDirection = 0.0f.xxx; - lightPDF = 0.0f; - sampleParams = 0.0f.xx; - return 0.0f.xxx; -#else +# endif // DISABLE_ENVIRONMENT_LIGHTS if (lightPDF == 0.0f) { return 0.0f.xxx; @@ -561,10 +698,12 @@ float3 sampleLightConeUnorm(Light selectedLight, inout RNG random, float3 positi /** * Sample the direction and PDF for a specified light. + * @note This differs from `sampleLightCone` in that it does not require knowing the surface normal. * @tparam RNG The type of random number sampler to be used. * @param selectedLight The light to be sampled. * @param random Random number sampler used to sample light. * @param position Current position on surface. + * @param solidAngle Solid angle around view direction of visible ray cone. * @param lightDirection (Out) The sampled direction to the light. * @param lightPDF (Out) The PDF for the calculated sample. * @param lightPosition (Out) The position of the light sample (only valid if sampled light has a position). @@ -572,16 +711,25 @@ float3 sampleLightConeUnorm(Light selectedLight, inout RNG random, float3 positi * @return The radiance returned from sampled light direction. */ template -float3 sampleLightUnorm(Light selectedLight, inout RNG random, float3 position, out float3 lightDirection, out float lightPDF, out float3 lightPosition, out float2 sampleParams) +float3 sampleLightConeUnorm(Light selectedLight, inout RNG random, float3 position, float solidAngle, out float3 lightDirection, out float lightPDF, out float3 lightPosition, out float2 sampleParams) { +#if defined(DISABLE_AREA_LIGHTS) && defined(DISABLE_DELTA_LIGHTS) && defined(DISABLE_ENVIRONMENT_LIGHTS) + lightDirection = 0.0f.xxx; + lightPDF = 0.0f; + sampleParams = 0.0f.xx; + return 0.0f.xxx; +#else float3 radiance; -#if !defined(DISABLE_AREA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) +# ifdef HAS_MULTIPLE_LIGHT_TYPES + LightType lightType = selectedLight.get_light_type(); +# endif +# if !defined(DISABLE_AREA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) float2 randomValues = random.rand2(); -#endif -#ifndef DISABLE_AREA_LIGHTS -# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) - if (selectedLight.get_light_type() == kLight_Area) # endif +# ifndef DISABLE_AREA_LIGHTS +# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) + if (lightType == kLight_Area) +# endif { // Get the area light LightArea light = MakeLightArea(selectedLight); @@ -589,14 +737,14 @@ float3 sampleLightUnorm(Light selectedLight, inout RNG random, float3 position, // Sample the selected area light lightDirection = sampleAreaLight(light, randomValues, position, lightPDF, lightPosition, sampleParams); - radiance = evaluateAreaLight(light, sampleParams); + radiance = evaluateAreaLightCone(light, sampleParams, position, solidAngle); } -# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) +# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) else -# endif -#endif -#ifndef DISABLE_DELTA_LIGHTS - if (selectedLight.get_light_type() == kLight_Point) +# endif +# endif // DISABLE_AREA_LIGHTS +# ifndef DISABLE_DELTA_LIGHTS + if (lightType == kLight_Point) { // Get the point light LightPoint light = MakeLightPoint(selectedLight); @@ -605,7 +753,9 @@ float3 sampleLightUnorm(Light selectedLight, inout RNG random, float3 position, lightDirection = samplePointLight(light, position, lightPDF, lightPosition); radiance = evaluatePointLight(light, position); - } else if (selectedLight.get_light_type() == kLight_Spot) { + } + else if (lightType == kLight_Spot) + { // Get the spot light LightSpot light = MakeLightSpot(selectedLight); @@ -615,9 +765,9 @@ float3 sampleLightUnorm(Light selectedLight, inout RNG random, float3 position, radiance = evaluateSpotLight(light, position); } else -# ifndef DISABLE_ENVIRONMENT_LIGHTS - if (selectedLight.get_light_type() == kLight_Direction) -# endif +# ifndef DISABLE_ENVIRONMENT_LIGHTS + if (lightType == kLight_Direction) +# endif { // Get the directional light LightDirectional light = MakeLightDirectional(selectedLight); @@ -627,12 +777,12 @@ float3 sampleLightUnorm(Light selectedLight, inout RNG random, float3 position, radiance = evaluateDirectionalLight(light); } -# ifndef DISABLE_ENVIRONMENT_LIGHTS +# ifndef DISABLE_ENVIRONMENT_LIGHTS else -# endif -#endif -#ifndef DISABLE_ENVIRONMENT_LIGHTS - /*selectedLight.get_light_type() == kLight_Environment*/ +# endif +# endif // DISABLE_DELTA_LIGHTS +# ifndef DISABLE_ENVIRONMENT_LIGHTS + /*lightType == kLight_Environment*/ { // Get the environment light LightEnvironment light = MakeLightEnvironment(selectedLight); @@ -640,69 +790,98 @@ float3 sampleLightUnorm(Light selectedLight, inout RNG random, float3 position, // Sample the environment map lightDirection = sampleEnvironmentLight(light, randomValues, lightPDF); - radiance = evaluateEnvironmentLight(light, lightDirection); + radiance = evaluateEnvironmentLightCone(light, lightDirection, solidAngle); // Pack light direction into storable UV sampleParams = MapToSphereInverse(lightDirection); } -#endif -#if defined(DISABLE_AREA_LIGHTS) && defined(DISABLE_DELTA_LIGHTS) && defined(DISABLE_ENVIRONMENT_LIGHTS) - lightDirection = 0.0f.xxx; - lightPDF = 0.0f; - sampleParams = 0.0f.xx; - return 0.0f.xxx; -#else +# endif // DISABLE_ENVIRONMENT_LIGHTS + if (lightPDF == 0.0f) + { + return 0.0f.xxx; + } return radiance; #endif } /** - * Get the direction to a sampled light using a uv value returned from @sampleLightSampled. - * @param light The light that was sampled. - * @param sampleParams UV values returned from @sampleLightSampled. - * @param position Current position on surface to get direction from. + * Get the direction to a sampled light using a sample values returned from `sampleLight`. + * @param light The light that was sampled. + * @param sampleParams UV values returned from `sampleLight`. + * @param position Current position on surface to get direction from. + * @param lightPosition (Out) The position of the light sample (only valid if sampled light has a position). * @return The updated light direction. */ -float3 sampledLightUnpack(Light light, float2 sampleParams, float3 position) +float3 sampledLightUnpack(Light light, float2 sampleParams, float3 position, out float3 lightPosition) { -#ifndef DISABLE_AREA_LIGHTS -# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) - if (light.get_light_type() == kLight_Area) +#if defined(DISABLE_AREA_LIGHTS) && defined(DISABLE_DELTA_LIGHTS) && defined(DISABLE_ENVIRONMENT_LIGHTS) + return 0.0f.xxx; +#else +# ifdef HAS_MULTIPLE_LIGHT_TYPES + LightType lightType = light.get_light_type(); # endif +# ifndef DISABLE_AREA_LIGHTS +# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) + if (lightType == kLight_Area) +# endif { // Calculate direction - float3 lightPosition = interpolate(light.v1.xyz, light.v2.xyz, light.v3.xyz, sampleParams); + lightPosition = interpolate(light.v1.xyz, light.v2.xyz, light.v3.xyz, sampleParams); + + // Need to offset position + float3 edge1 = light.v2.xyz - light.v1.xyz; + float3 edge2 = light.v3.xyz - light.v1.xyz; + float3 lightCross = cross(edge1, edge2); + float lightNormalLength = length(lightCross); + float3 lightNormal = lightCross / lightNormalLength.xxx; + float signedDist = dot(position - light.v1.xyz, lightNormal); + lightNormal *= sign(signedDist).xxx; + lightPosition = offsetPosition(lightPosition, lightNormal); + return normalize(lightPosition - position); } -# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) +# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) else -# endif -#endif -#ifndef DISABLE_DELTA_LIGHTS - if (light.get_light_type() < kLight_Direction) /* Faster check for point or spot light */ +# endif +# endif // DISABLE_AREA_LIGHTS +# ifndef DISABLE_DELTA_LIGHTS + if (lightType < kLight_Direction) /* Faster check for point or spot light */ { // Calculate direction - return normalize(light.v1.xyz - position); - } else -# ifndef DISABLE_ENVIRONMENT_LIGHTS - if (light.get_light_type() == kLight_Direction) -# endif + lightPosition = light.v1.xyz; + return normalize(lightPosition - position); + } + else +# ifndef DISABLE_ENVIRONMENT_LIGHTS + if (lightType == kLight_Direction) +# endif { return light.v2.xyz; } -# ifndef DISABLE_ENVIRONMENT_LIGHTS +# ifndef DISABLE_ENVIRONMENT_LIGHTS else -# endif -#endif -#ifndef DISABLE_ENVIRONMENT_LIGHTS +# endif +# endif // DISABLE_DELTA_LIGHTS +# ifndef DISABLE_ENVIRONMENT_LIGHTS { // Convert stored uv back to direction return MapToSphere(sampleParams); } +# endif // DISABLE_ENVIRONMENT_LIGHTS #endif -#if defined(DISABLE_AREA_LIGHTS) && defined(DISABLE_DELTA_LIGHTS) && defined(DISABLE_ENVIRONMENT_LIGHTS) - return 0.0f.xxx; -#endif +} + +/** + * Get the direction to a sampled light using a sample values returned from @sampleLightSampled. + * @param light The light that was sampled. + * @param sampleParams UV values returned from `sampleLight`. + * @param position Current position on surface to get direction from. + * @return The updated light direction. + */ +float3 sampledLightUnpack(Light light, float2 sampleParams, float3 position) +{ + float3 unused; + return sampledLightUnpack(light, sampleParams, position, unused); } #endif // LIGHT_SAMPLING_HLSL diff --git a/src/core/src/lights/light_sampling_volume.hlsl b/src/core/src/lights/light_sampling_volume.hlsl index 340d4d6..839bc6e 100644 --- a/src/core/src/lights/light_sampling_volume.hlsl +++ b/src/core/src/lights/light_sampling_volume.hlsl @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -24,7 +24,7 @@ THE SOFTWARE. #define LIGHT_SAMPLING_VOLUME_HLSL #include "light_evaluation.hlsl" -#include "../math/geometry.hlsl" +#include "../geometry/geometry.hlsl" #include "../math/color.hlsl" /* @@ -38,41 +38,55 @@ StructuredBuffer g_LightBuffer; StructuredBuffer g_LightBufferSize; */ +/* + * Supports the following config values: + * LIGHT_SAMPLE_VOLUME_CENTROID = Sample volumes only at single position at centroid of volume + * THRESHOLD_RADIANCE = A threshold value used to cull area lights, if defined then additional checks + * are performed to cull lights based on the size of the sphere of influence defined by the radius at + * which the lights contribution drop below the threshold value + */ + /** * Calculate the combined luminance(Y) of a light taken within a bounding box. * @param selectedLight The light to sample. * @param minBB Bounding box minimum values. - * @param maxBB Bounding box maximum values. + * @param extent Bounding box size. * @return The calculated combined luminance. */ -float evaluateLightVolume(Light selectedLight, float3 minBB, float3 maxBB) +float sampleLightVolume(Light selectedLight, float3 minBB, float3 extent) { +#if defined(DISABLE_AREA_LIGHTS) && defined(DISABLE_DELTA_LIGHTS) && defined(DISABLE_ENVIRONMENT_LIGHTS) + return 0.0f; +#else float3 radiance; -#ifndef DISABLE_AREA_LIGHTS -# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) - if (selectedLight.get_light_type() == kLight_Area) +# ifdef HAS_MULTIPLE_LIGHT_TYPES + LightType lightType = selectedLight.get_light_type(); # endif +# ifndef DISABLE_AREA_LIGHTS +# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) + if (lightType == kLight_Area) +# endif { // Get the area light LightArea light = MakeLightArea(selectedLight); // Get light position at approximate midpoint - float3 lightPosition = interpolate(light.v0, light.v1, light.v2, 0.3333333333333f.xx); + float3 lightPosition = interpolate(light.v0, light.v1, light.v2, (1.0f / 3.0f).xx); float3 emissivity = light.emissivity.xyz; -#ifdef THRESHOLD_RADIANCE +# ifdef THRESHOLD_RADIANCE // Quick cull based on range of sphere falloff - float3 extent = (maxBB - minBB) * 0.5f.xxx; - float3 center = minBB + extent; - float radiusSqr = dot(extent, extent); + float3 extentCentre = extent * 0.5f.xxx; + float3 centre = minBB + extentCentre; + float radiusSqr = dot(extentCentre, extentCentre); float radius = sqrt(radiusSqr); const float range = sqrt(max(emissivity.x, max(emissivity.y, emissivity.z)) / THRESHOLD_RADIANCE); - float3 lightDirection = center - lightPosition; + float3 lightDirection = centre - lightPosition; if (length(lightDirection) > (radius + range)) { return 0.0f; } -#endif +# endif // THRESHOLD_RADIANCE uint emissivityTex = asuint(light.emissivity.w); if (emissivityTex != uint(-1)) @@ -95,52 +109,75 @@ float evaluateLightVolume(Light selectedLight, float3 minBB, float3 maxBB) float3 lightCross = cross(edge1, edge2); // Calculate surface area of triangle float lightNormalLength = length(lightCross); - float3 lightNormal = lightCross / lightNormalLength; + float3 lightNormal = lightCross / lightNormalLength.xxx; float lightArea = 0.5f * lightNormalLength; +# ifdef LIGHT_SAMPLE_VOLUME_CENTROID + // Evaluate radiance at cell centre +# ifndef THRESHOLD_RADIANCE + float3 centre = minBB + (extent * 0.5f.xxx); +# endif + float3 lightVector = centre - lightPosition; + float lightLengthSqr = lengthSqr(lightVector); + float pdf = saturate(abs(dot(lightNormal, lightVector * rsqrt(lightLengthSqr).xxx))) * lightArea; + pdf = pdf / (lightLengthSqr + FLT_EPSILON); + radiance = emissivity * pdf; +# else // Contribution is emission scaled by surface area converted to solid angle + // The light is sampled at all 8 corners of the AABB and then interpolated to fill in the internal volume + float3 maxBB = minBB + extent; float3 lightVector = minBB - lightPosition; - float pdf = saturate(dot(lightNormal, normalize(lightVector))) / dot(lightVector, lightVector); + float lightLengthSqr = rcp(lengthSqr(lightVector)); + float pdf = saturate(abs(dot(lightNormal, lightVector * sqrt(lightLengthSqr).xxx))) * lightLengthSqr; lightVector = float3(minBB.x, minBB.y, maxBB.z) - lightPosition; - pdf += saturate(dot(lightNormal, normalize(lightVector))) / dot(lightVector, lightVector); + lightLengthSqr = rcp(lengthSqr(lightVector)); + pdf += saturate(abs(dot(lightNormal, lightVector * sqrt(lightLengthSqr).xxx))) * lightLengthSqr; lightVector = float3(minBB.x, maxBB.y, minBB.z) - lightPosition; - pdf += saturate(dot(lightNormal, normalize(lightVector))) / dot(lightVector, lightVector); + lightLengthSqr = rcp(lengthSqr(lightVector)); + pdf += saturate(abs(dot(lightNormal, lightVector * sqrt(lightLengthSqr).xxx))) * lightLengthSqr; lightVector = float3(minBB.x, maxBB.y, maxBB.z) - lightPosition; - pdf += saturate(dot(lightNormal, normalize(lightVector))) / dot(lightVector, lightVector); + lightLengthSqr = rcp(lengthSqr(lightVector)); + pdf += saturate(abs(dot(lightNormal, lightVector * sqrt(lightLengthSqr).xxx))) * lightLengthSqr; lightVector = float3(maxBB.x, minBB.y, minBB.z) - lightPosition; - pdf += saturate(dot(lightNormal, normalize(lightVector))) / dot(lightVector, lightVector); + lightLengthSqr = rcp(lengthSqr(lightVector)); + pdf += saturate(abs(dot(lightNormal, lightVector * sqrt(lightLengthSqr).xxx))) * lightLengthSqr; lightVector = float3(maxBB.x, minBB.y, maxBB.z) - lightPosition; - pdf += saturate(dot(lightNormal, normalize(lightVector))) / dot(lightVector, lightVector); + lightLengthSqr = rcp(lengthSqr(lightVector)); + pdf += saturate(abs(dot(lightNormal, lightVector * sqrt(lightLengthSqr).xxx))) * lightLengthSqr; lightVector = float3(maxBB.x, maxBB.y, minBB.z) - lightPosition; - pdf += saturate(dot(lightNormal, normalize(lightVector))) / dot(lightVector, lightVector); + lightLengthSqr = rcp(lengthSqr(lightVector)); + pdf += saturate(abs(dot(lightNormal, lightVector * sqrt(lightLengthSqr).xxx))) * lightLengthSqr; lightVector = maxBB - lightPosition; - pdf += saturate(dot(lightNormal, normalize(lightVector))) / dot(lightVector, lightVector); - radiance = (emissivity * (lightArea / 8.0f)) * pdf; + lightLengthSqr = rcp(lengthSqr(lightVector)); + pdf += saturate(abs(dot(lightNormal, lightVector * sqrt(lightLengthSqr).xxx))) * lightLengthSqr; + radiance = (emissivity * (lightArea * 0.125f)) * pdf; +# endif // LIGHT_SAMPLE_VOLUME_CENTROID } -# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) +# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) else -# endif -#endif -#ifndef DISABLE_DELTA_LIGHTS - if (selectedLight.get_light_type() == kLight_Point || selectedLight.get_light_type() == kLight_Spot) +# endif +# endif // DISABLE_AREA_LIGHTS +# ifndef DISABLE_DELTA_LIGHTS + if (lightType == kLight_Point || lightType == kLight_Spot) { // Get the point light LightPoint light = MakeLightPoint(selectedLight); // Quick cull based on range of sphere - float3 extent = (maxBB - minBB) * 0.5f.xxx; - float3 center = minBB + extent; - float radiusSqr = dot(extent, extent); + float3 extentCentre = extent * 0.5f.xxx; + float3 centre = minBB + extentCentre; + float radiusSqr = dot(extentCentre, extentCentre); float radius = sqrt(radiusSqr); - float3 lightDirection = center - light.position; + float3 lightDirection = centre - light.position; if (length(lightDirection) > (radius + light.range)) { return 0.0f; } - if (selectedLight.get_light_type() == kLight_Spot) + if (lightType == kLight_Spot) { // Check if spot cone intersects current cell + // Uses fast cone-sphere test (Hale) bool intersect = false; float3 coneNormal = selectedLight.v2.xyz; float sinAngle = selectedLight.v2.w; @@ -161,28 +198,47 @@ float evaluateLightVolume(Light selectedLight, float3 minBB, float3 maxBB) } } +# ifdef LIGHT_SAMPLE_VOLUME_CENTROID + // Evaluate radiance at cell centre + float dist = distance(light.position, centre); + float distMod = dist / light.range; + float rad = saturate(1.0f - (distMod * distMod * distMod * distMod)) / (dist * dist); + radiance = light.intensity * rad.xxx; +# else // LIGHT_SAMPLE_VOLUME_CENTROID // For each corner of the cell evaluate the radiance - float dist2 = distanceSqr(light.position, minBB); - float rad = 1.0f / (dist2 + 1); - dist2 = distanceSqr(light.position, float3(minBB.x, minBB.y, maxBB.z)); - rad += 1.0f / (dist2 + 1); - dist2 = distanceSqr(light.position, float3(minBB.x, maxBB.y, minBB.z)); - rad += 1.0f / (dist2 + 1); - dist2 = distanceSqr(light.position, float3(minBB.x, maxBB.y, maxBB.z)); - rad += 1.0f / (dist2 + 1); - dist2 = distanceSqr(light.position, float3(maxBB.x, minBB.y, minBB.z)); - rad += 1.0f / (dist2 + 1); - dist2 = distanceSqr(light.position, float3(maxBB.x, minBB.y, maxBB.z)); - rad += 1.0f / (dist2 + 1); - dist2 = distanceSqr(light.position, float3(maxBB.x, maxBB.y, minBB.z)); - rad += 1.0f / (dist2 + 1); - dist2 = distanceSqr(light.position, maxBB); - radiance = light.intensity * (rad / 8.0f).xxx; + float3 maxBB = minBB + extent; + float recipRange = 1.0f / light.range; + float dist = distance(light.position, minBB); + float distMod = dist * recipRange; + float rad = saturate(1.0f - (distMod * distMod * distMod * distMod)) / (dist * dist); + dist = distance(light.position, float3(minBB.x, minBB.y, maxBB.z)); + distMod = dist * recipRange; + rad += saturate(1.0f - (distMod * distMod * distMod * distMod)) / (dist * dist); + dist = distance(light.position, float3(minBB.x, maxBB.y, minBB.z)); + distMod = dist * recipRange; + rad += saturate(1.0f - (distMod * distMod * distMod * distMod)) / (dist * dist); + dist = distance(light.position, float3(minBB.x, maxBB.y, maxBB.z)); + distMod = dist * recipRange; + rad += saturate(1.0f - (distMod * distMod * distMod * distMod)) / (dist * dist); + dist = distance(light.position, float3(maxBB.x, minBB.y, minBB.z)); + distMod = dist * recipRange; + rad += saturate(1.0f - (distMod * distMod * distMod * distMod)) / (dist * dist); + dist = distance(light.position, float3(maxBB.x, minBB.y, maxBB.z)); + distMod = dist * recipRange; + rad += saturate(1.0f - (distMod * distMod * distMod * distMod)) / (dist * dist); + dist = distance(light.position, float3(maxBB.x, maxBB.y, minBB.z)); + distMod = dist * recipRange; + rad += saturate(1.0f - (distMod * distMod * distMod * distMod)) / (dist * dist); + dist = distance(light.position, maxBB); + distMod = dist * recipRange; + rad += saturate(1.0f - (distMod * distMod * distMod * distMod)) / (dist * dist); + radiance = light.intensity * (rad * 0.125f).xxx; +# endif // LIGHT_SAMPLE_VOLUME_CENTROID } else -# ifndef DISABLE_ENVIRONMENT_LIGHTS - if (selectedLight.get_light_type() == kLight_Direction) -# endif +# ifndef DISABLE_ENVIRONMENT_LIGHTS + if (lightType == kLight_Direction) +# endif { // Get the directional light LightDirectional light = MakeLightDirectional(selectedLight); @@ -190,12 +246,12 @@ float evaluateLightVolume(Light selectedLight, float3 minBB, float3 maxBB) // Directional light is constant at all points radiance = light.irradiance; } -# ifndef DISABLE_ENVIRONMENT_LIGHTS +# ifndef DISABLE_ENVIRONMENT_LIGHTS else -# endif -#endif -#ifndef DISABLE_ENVIRONMENT_LIGHTS - /*selectedLight.get_light_type() == kLight_Environment*/ +# endif +# endif // DISABLE_DELTA_LIGHTS +# ifndef DISABLE_ENVIRONMENT_LIGHTS + /*lightType == kLight_Environment*/ { // Get the environment light LightEnvironment light = MakeLightEnvironment(selectedLight); @@ -209,37 +265,70 @@ float evaluateLightVolume(Light selectedLight, float3 minBB, float3 maxBB) radiance += g_EnvironmentBuffer.SampleLevel(g_TextureSampler, float3(0.0f, -1.0f, 0.0f), light.lods).xyz; radiance += g_EnvironmentBuffer.SampleLevel(g_TextureSampler, float3(1.0f, 0.0f, 0.0f), light.lods).xyz; radiance += g_EnvironmentBuffer.SampleLevel(g_TextureSampler, float3(-1.0f, 0.0f, 0.0f), light.lods).xyz; - radiance *= FOUR_PI; + radiance *= FOUR_PI / 6.0f; } -#endif -#if defined(DISABLE_AREA_LIGHTS) && defined(DISABLE_DELTA_LIGHTS) && defined(DISABLE_ENVIRONMENT_LIGHTS) - return 0.0f; -#else +# endif // DISABLE_ENVIRONMENT_LIGHTS return luminance(radiance); #endif } /** - * Calculate the combined luminance(Y) of a light taken within a bounding box. + * Calculate the combined luminance(Y) of a light taken within a bounding box visible from a surface orientation. * @param selectedLight The light to sample. * @param minBB Bounding box minimum values. - * @param maxBB Bounding box maximum values. + * @param extent Bounding box size. * @param normal The face normal of the bounding box region. * @return The calculated combined luminance. */ -float evaluateLightVolumeNormal(Light selectedLight, float3 minBB, float3 maxBB, float3 normal) +float sampleLightVolumeNormal(Light selectedLight, float3 minBB, float3 extent, float3 normal) { +#if defined(DISABLE_AREA_LIGHTS) && defined(DISABLE_DELTA_LIGHTS) && defined(DISABLE_ENVIRONMENT_LIGHTS) + return 0.0f; +#else float3 radiance; -#ifndef DISABLE_AREA_LIGHTS -# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) - if (selectedLight.get_light_type() == kLight_Area) +# ifdef HAS_MULTIPLE_LIGHT_TYPES + LightType lightType = selectedLight.get_light_type(); # endif +# ifndef DISABLE_AREA_LIGHTS +# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) + if (lightType == kLight_Area) +# endif { // Get the area light LightArea light = MakeLightArea(selectedLight); + // Get light position at approximate midpoint + float3 lightPosition = interpolate(light.v0, light.v1, light.v2, (1.0f / 3.0f).xx); + + // Check if inside AABB + float3 maxBB = minBB + extent; + float3 extentCentre = extent * 0.5f.xxx; + float3 centre = minBB + extentCentre; + bool insideAABB = all(lightPosition >= minBB) && all(lightPosition <= maxBB); + + if (!insideAABB) + { + // Cull by visibility by checking if triangle is above plane + if (dot(light.v0 - centre, normal) <= -0.7071f && dot(light.v1 - centre, normal) <= -0.7071f && dot(light.v2 - centre, normal) <= -0.7071f) + { + return 0.0f; + } + } + float3 emissivity = light.emissivity.xyz; +# ifdef THRESHOLD_RADIANCE + // Quick cull based on range of sphere falloff + float radiusSqr = dot(extentCentre, extentCentre); + float radius = sqrt(radiusSqr); + const float range = sqrt(max(emissivity.x, max(emissivity.y, emissivity.z)) / THRESHOLD_RADIANCE); + float3 lightDirection = centre - lightPosition; + if (length(lightDirection) > (radius + range)) + { + return 0.0f; + } +# endif // THRESHOLD_RADIANCE + uint emissivityTex = asuint(light.emissivity.w); if (emissivityTex != uint(-1)) { @@ -261,56 +350,100 @@ float evaluateLightVolumeNormal(Light selectedLight, float3 minBB, float3 maxBB, float3 lightCross = cross(edge1, edge2); // Calculate surface area of triangle float lightNormalLength = length(lightCross); - float3 lightNormal = lightCross / lightNormalLength; + float3 lightNormal = lightCross / lightNormalLength.xxx; float lightArea = 0.5f * lightNormalLength; +# ifdef LIGHT_SAMPLE_VOLUME_CENTROID + // Evaluate radiance at cell centre + float3 lightVector = centre - lightPosition; + float lightLengthSqr = lengthSqr(lightVector); + float pdf = saturate(abs(dot(lightNormal, lightVector * rsqrt(lightLengthSqr).xxx))) * lightArea; + pdf = pdf / (lightLengthSqr + FLT_EPSILON); + radiance = emissivity * pdf; +# else // LIGHT_SAMPLE_VOLUME_CENTROID // Contribution is emission scaled by surface area converted to solid angle - float3 lightPosition = interpolate(light.v0, light.v1, light.v2, 0.3333333333333f.xx); + // The light is sampled at all 8 corners of the AABB and then interpolated to fill in the internal volume float3 lightVector = minBB - lightPosition; - float pdf = saturate(dot(lightNormal, normalize(lightVector))) / dot(lightVector, lightVector); + float lightLengthSqr = rcp(lengthSqr(lightVector)); + lightVector *= sqrt(lightLengthSqr).xxx; + float pdf = saturate(abs(dot(lightNormal, lightVector))) * lightLengthSqr * (!insideAABB && dot(lightVector, normal) >= 0.7071f ? 0.0f : 1.0f); lightVector = float3(minBB.x, minBB.y, maxBB.z) - lightPosition; - pdf += saturate(dot(lightNormal, normalize(lightVector))) / dot(lightVector, lightVector); + lightLengthSqr = rcp(lengthSqr(lightVector)); + lightVector *= sqrt(lightLengthSqr).xxx; + pdf += saturate(abs(dot(lightNormal, lightVector))) * lightLengthSqr * (!insideAABB && dot(lightVector, normal) >= 0.7071f ? 0.0f : 1.0f); lightVector = float3(minBB.x, maxBB.y, minBB.z) - lightPosition; - pdf += saturate(dot(lightNormal, normalize(lightVector))) / dot(lightVector, lightVector); + lightLengthSqr = rcp(lengthSqr(lightVector)); + lightVector *= sqrt(lightLengthSqr).xxx; + pdf += saturate(abs(dot(lightNormal, lightVector))) * lightLengthSqr * (!insideAABB && dot(lightVector, normal) >= 0.7071f ? 0.0f : 1.0f); lightVector = float3(minBB.x, maxBB.y, maxBB.z) - lightPosition; - pdf += saturate(dot(lightNormal, normalize(lightVector))) / dot(lightVector, lightVector); + lightLengthSqr = rcp(lengthSqr(lightVector)); + lightVector *= sqrt(lightLengthSqr).xxx; + pdf += saturate(abs(dot(lightNormal, lightVector))) * lightLengthSqr * (!insideAABB && dot(lightVector, normal) >= 0.7071f ? 0.0f : 1.0f); lightVector = float3(maxBB.x, minBB.y, minBB.z) - lightPosition; - pdf += saturate(dot(lightNormal, normalize(lightVector))) / dot(lightVector, lightVector); + lightLengthSqr = rcp(lengthSqr(lightVector)); + lightVector *= sqrt(lightLengthSqr).xxx; + pdf += saturate(abs(dot(lightNormal, lightVector))) * lightLengthSqr * (!insideAABB && dot(lightVector, normal) >= 0.7071f ? 0.0f : 1.0f); lightVector = float3(maxBB.x, minBB.y, maxBB.z) - lightPosition; - pdf += saturate(dot(lightNormal, normalize(lightVector))) / dot(lightVector, lightVector); + lightLengthSqr = rcp(lengthSqr(lightVector)); + lightVector *= sqrt(lightLengthSqr).xxx; + pdf += saturate(abs(dot(lightNormal, lightVector))) * lightLengthSqr * (!insideAABB && dot(lightVector, normal) >= 0.7071f ? 0.0f : 1.0f); lightVector = float3(maxBB.x, maxBB.y, minBB.z) - lightPosition; - pdf += saturate(dot(lightNormal, normalize(lightVector))) / dot(lightVector, lightVector); + lightLengthSqr = rcp(lengthSqr(lightVector)); + lightVector *= sqrt(lightLengthSqr).xxx; + pdf += saturate(abs(dot(lightNormal, lightVector))) * lightLengthSqr * (!insideAABB && dot(lightVector, normal) >= 0.7071f ? 0.0f : 1.0f); lightVector = maxBB - lightPosition; - pdf += saturate(dot(lightNormal, normalize(lightVector))) / dot(lightVector, lightVector); - radiance = (emissivity * (lightArea / 8.0f)) * pdf; + lightLengthSqr = rcp(lengthSqr(lightVector)); + lightVector *= sqrt(lightLengthSqr).xxx; + pdf += saturate(abs(dot(lightNormal, lightVector))) * lightLengthSqr * (!insideAABB && dot(lightVector, normal) >= 0.7071f ? 0.0f : 1.0f); + radiance = (emissivity * (lightArea * 0.125f)) * pdf; +# endif // LIGHT_SAMPLE_VOLUME_CENTROID } -# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) +# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) else -# endif -#endif -#ifndef DISABLE_DELTA_LIGHTS - if (selectedLight.get_light_type() == kLight_Point || selectedLight.get_light_type() == kLight_Spot) +# endif +# endif // DISABLE_AREA_LIGHTS +# ifndef DISABLE_DELTA_LIGHTS + if (lightType == kLight_Point || lightType == kLight_Spot) { // Get the point light LightPoint light = MakeLightPoint(selectedLight); + // Check if inside AABB + float3 maxBB = minBB + extent; + float3 extentCentre = extent * 0.5f.xxx; + float3 centre = minBB + extentCentre; + const bool insideAABB = all(light.position >= minBB) && all(light.position <= maxBB); + + // Cull by visibility by checking if triangle is above plane + if (!insideAABB && dot(light.position - centre, normal) <= -0.7071f) + { + return 0.0f; + } + // Quick cull based on range of sphere - float3 extent = (maxBB - minBB) * 0.5f.xxx; - float3 center = minBB + extent; - float radiusSqr = dot(extent, extent); + float radiusSqr = dot(extentCentre, extentCentre); float radius = sqrt(radiusSqr); - float3 lightDirection = center - light.position; + float3 lightDirection = centre - light.position; if (length(lightDirection) > (radius + light.range)) { return 0.0f; } - if (selectedLight.get_light_type() == kLight_Spot) + + if (lightType == kLight_Spot) { // Check if spot cone intersects current cell + // Uses fast cone-sphere test (Hale) bool intersect = false; float3 coneNormal = selectedLight.v2.xyz; float sinAngle = selectedLight.v2.w; float tanAngleSqPlusOne = selectedLight.v3.z; + + // Fast check to cull lights based on cell normal + if (dot(coneNormal, normal) <= -0.7071f) + { + return 0.0f; + } + if (dot(lightDirection + (coneNormal * sinAngle * radius), coneNormal) < 0.0f) { float3 cd = sinAngle * lightDirection - coneNormal * radius; @@ -327,34 +460,83 @@ float evaluateLightVolumeNormal(Light selectedLight, float3 minBB, float3 maxBB, } } +# ifdef LIGHT_SAMPLE_VOLUME_CENTROID + // Evaluate radiance at cell centre + float dist = distance(light.position, centre); + float distMod = dist / light.range; + float rad = saturate(1.0f - (distMod * distMod * distMod * distMod)) / (dist * dist); + radiance = light.intensity * rad.xxx; +# else // LIGHT_SAMPLE_VOLUME_CENTROID // For each corner of the cell evaluate the radiance - float dist2 = distanceSqr(light.position, minBB); - float rad = 1.0f / (dist2 + 1); - dist2 = distanceSqr(light.position, float3(minBB.x, minBB.y, maxBB.z)); - rad += 1.0f / (dist2 + 1); - dist2 = distanceSqr(light.position, float3(minBB.x, maxBB.y, minBB.z)); - rad += 1.0f / (dist2 + 1); - dist2 = distanceSqr(light.position, float3(minBB.x, maxBB.y, maxBB.z)); - rad += 1.0f / (dist2 + 1); - dist2 = distanceSqr(light.position, float3(maxBB.x, minBB.y, minBB.z)); - rad += 1.0f / (dist2 + 1); - dist2 = distanceSqr(light.position, float3(maxBB.x, minBB.y, maxBB.z)); - rad += 1.0f / (dist2 + 1); - dist2 = distanceSqr(light.position, float3(maxBB.x, maxBB.y, minBB.z)); - rad += 1.0f / (dist2 + 1); - dist2 = distanceSqr(light.position, maxBB); - radiance = light.intensity * (rad / 8.0f).xxx; + float recipRange = 1.0f / light.range; + float3 lightVector = minBB - light.position; + float lightLengthSqr = lengthSqr(lightVector); + float dist = sqrt(lightLengthSqr); + lightLengthSqr = rcp(lightLengthSqr); + lightVector *= dist.xxx; + float distMod = dist * recipRange; + float pdf = saturate(1.0f - (distMod * distMod * distMod * distMod)) * lightLengthSqr * (!insideAABB && dot(lightVector, normal) >= 0.7071f ? 0.0f : 1.0f); + lightLengthSqr = lengthSqr(lightVector); + dist = sqrt(lightLengthSqr); + lightLengthSqr = rcp(lightLengthSqr); + lightVector *= dist.xxx; + distMod = dist * recipRange; + pdf += saturate(1.0f - (distMod * distMod * distMod * distMod)) * lightLengthSqr * (!insideAABB && dot(lightVector, normal) >= 0.7071f ? 0.0f : 1.0f); + lightVector = float3(minBB.x, maxBB.y, minBB.z) - light.position; + lightLengthSqr = lengthSqr(lightVector); + dist = sqrt(lightLengthSqr); + lightLengthSqr = rcp(lightLengthSqr); + lightVector *= dist.xxx; + distMod = dist * recipRange; + pdf += saturate(1.0f - (distMod * distMod * distMod * distMod)) * lightLengthSqr * (!insideAABB && dot(lightVector, normal) >= 0.7071f ? 0.0f : 1.0f); + lightVector = float3(minBB.x, maxBB.y, maxBB.z) - light.position; + lightLengthSqr = lengthSqr(lightVector); + dist = sqrt(lightLengthSqr); + lightLengthSqr = rcp(lightLengthSqr); + lightVector *= dist.xxx; + distMod = dist * recipRange; + pdf += saturate(1.0f - (distMod * distMod * distMod * distMod)) * lightLengthSqr * (!insideAABB && dot(lightVector, normal) >= 0.7071f ? 0.0f : 1.0f); + lightVector = float3(maxBB.x, minBB.y, minBB.z) - light.position; + lightLengthSqr = lengthSqr(lightVector); + dist = sqrt(lightLengthSqr); + lightLengthSqr = rcp(lightLengthSqr); + lightVector *= dist.xxx; + distMod = dist * recipRange; + pdf += saturate(1.0f - (distMod * distMod * distMod * distMod)) * lightLengthSqr * (!insideAABB && dot(lightVector, normal) >= 0.7071f ? 0.0f : 1.0f); + lightVector = float3(maxBB.x, minBB.y, maxBB.z) - light.position; + lightLengthSqr = lengthSqr(lightVector); + dist = sqrt(lightLengthSqr); + lightLengthSqr = rcp(lightLengthSqr); + lightVector *= dist.xxx; + distMod = dist * recipRange; + pdf += saturate(1.0f - (distMod * distMod * distMod * distMod)) * lightLengthSqr * (!insideAABB && dot(lightVector, normal) >= 0.7071f ? 0.0f : 1.0f); + lightVector = float3(maxBB.x, maxBB.y, minBB.z) - light.position; + lightLengthSqr = lengthSqr(lightVector); + dist = sqrt(lightLengthSqr); + lightLengthSqr = rcp(lightLengthSqr); + lightVector *= dist.xxx; + distMod = dist * recipRange; + pdf += saturate(1.0f - (distMod * distMod * distMod * distMod)) * lightLengthSqr * (!insideAABB && dot(lightVector, normal) >= 0.7071f ? 0.0f : 1.0f); + lightVector = maxBB - light.position; + lightLengthSqr = lengthSqr(lightVector); + dist = sqrt(lightLengthSqr); + lightLengthSqr = rcp(lightLengthSqr); + lightVector *= dist.xxx; + distMod = dist * recipRange; + pdf += saturate(1.0f - (distMod * distMod * distMod * distMod)) * lightLengthSqr * (!insideAABB && dot(lightVector, normal) >= 0.7071f ? 0.0f : 1.0f); + radiance = light.intensity * 0.125f * pdf; +# endif // LIGHT_SAMPLE_VOLUME_CENTROID } else -# ifndef DISABLE_ENVIRONMENT_LIGHTS - if (selectedLight.get_light_type() == kLight_Direction) -# endif +# ifndef DISABLE_ENVIRONMENT_LIGHTS + if (lightType == kLight_Direction) +# endif { // Get the directional light LightDirectional light = MakeLightDirectional(selectedLight); // Fast check to cull lights based on cell normal - if (dot(light.direction, normal) < 0.0f) + if (dot(light.direction, normal) <= -0.7071f) { return 0.0f; } @@ -362,12 +544,252 @@ float evaluateLightVolumeNormal(Light selectedLight, float3 minBB, float3 maxBB, // Directional light is constant at all points radiance = light.irradiance; } -# ifndef DISABLE_ENVIRONMENT_LIGHTS +# ifndef DISABLE_ENVIRONMENT_LIGHTS else +# endif +# endif // DISABLE_DELTA_LIGHTS +# ifndef DISABLE_ENVIRONMENT_LIGHTS + /*lightType == kLight_Environment*/ + { + // Get the environment light + LightEnvironment light = MakeLightEnvironment(selectedLight); + + // Environment light is constant at all points so just sample the environment map at + // lower mip levels to get combined contribution + // Due to normal based sampling the directions straddle multiple cube faces + radiance = 0.0f; + float count = 0.0f; + if (normal.z != 0) + { + radiance += g_EnvironmentBuffer.SampleLevel(g_TextureSampler, float3(0.0f, 0.0f, normal.z), light.lods).xyz; + ++count; + } + if (normal.y != 0) + { + radiance += g_EnvironmentBuffer.SampleLevel(g_TextureSampler, float3(0.0f, normal.y, 0.0f), light.lods).xyz; + ++count; + } + if (normal.x != 0) + { + radiance += g_EnvironmentBuffer.SampleLevel(g_TextureSampler, float3(normal.x, 0.0f, 0.0f), light.lods).xyz; + ++count; + } + radiance *= FOUR_PI / count; + } +# endif // DISABLE_ENVIRONMENT_LIGHTS + return luminance(radiance); +#endif +} + +/** + * Calculate the combined luminance(Y) of a light taken at a specific location. + * @param selectedLight The light to sample. + * @param position Current position on surface. + * @return The calculated combined luminance. + */ +float sampleLightPoint(Light selectedLight, float3 position) +{ +#if defined(DISABLE_AREA_LIGHTS) && defined(DISABLE_DELTA_LIGHTS) && defined(DISABLE_ENVIRONMENT_LIGHTS) + return 0.0f; +#else + float3 radiance; +# ifdef HAS_MULTIPLE_LIGHT_TYPES + LightType lightType = selectedLight.get_light_type(); # endif +# ifndef DISABLE_AREA_LIGHTS +# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) + if (lightType == kLight_Area) +# endif + { + // Get the area light + LightArea light = MakeLightArea(selectedLight); + + // Get light position at approximate midpoint + float3 lightPosition = interpolate(light.v0, light.v1, light.v2, (1.0f / 3.0f).xx); + + float3 emissivity = light.emissivity.xyz; + uint emissivityTex = asuint(light.emissivity.w); + if (emissivityTex != uint(-1)) + { + float2 edgeUV0 = light.uv1 - light.uv0; + float2 edgeUV1 = light.uv2 - light.uv0; + // Get texture dimensions in order to determine LOD of visible solid angle + float2 size; + g_TextureMaps[NonUniformResourceIndex(emissivityTex)].GetDimensions(size.x, size.y); + float areaUV = size.x * size.y * abs(edgeUV0.x * edgeUV1.y - edgeUV1.x * edgeUV0.y); + float lod = 0.5f * log2(areaUV); + + float2 uv = interpolate(light.uv0, light.uv1, light.uv2, 0.3333333333333f.xx); + emissivity *= g_TextureMaps[NonUniformResourceIndex(emissivityTex)].SampleLevel(g_TextureSampler, uv, lod).xyz; + } + + // Calculate lights surface normal vector + float3 edge1 = light.v1 - light.v0; + float3 edge2 = light.v2 - light.v0; + float3 lightCross = cross(edge1, edge2); + // Calculate surface area of triangle + float lightNormalLength = length(lightCross); + float3 lightNormal = lightCross / lightNormalLength.xxx; + float lightArea = 0.5f * lightNormalLength; + + // Evaluate radiance at specified point + float3 lightVector = position - lightPosition; + float lightLengthSqr = lengthSqr(lightVector); + lightVector *= rsqrt(lightLengthSqr).xxx; + float pdf = saturate(abs(dot(lightNormal, lightVector))) * lightArea; + pdf = pdf / (lightLengthSqr + FLT_EPSILON); + radiance = emissivity * pdf; + } +# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) + else +# endif +# endif // DISABLE_AREA_LIGHTS +# ifndef DISABLE_DELTA_LIGHTS + if (lightType == kLight_Point || lightType == kLight_Spot) + { + // Get the point light + LightPoint light = MakeLightPoint(selectedLight); + + // Evaluate radiance at specified point + float3 lightVector = light.position - position; + float dist = length(lightVector); + lightVector /= dist; + float distMod = dist / light.range; + float rad = saturate(1.0f - (distMod * distMod * distMod * distMod)) / (dist * dist); + radiance = light.intensity * rad.xxx; + } + else +# ifndef DISABLE_ENVIRONMENT_LIGHTS + if (lightType == kLight_Direction) +# endif + { + // Get the directional light + LightDirectional light = MakeLightDirectional(selectedLight); + + // Directional light is constant at all points + radiance = light.irradiance; + } +# ifndef DISABLE_ENVIRONMENT_LIGHTS + else +# endif +# endif // DISABLE_DELTA_LIGHTS +# ifndef DISABLE_ENVIRONMENT_LIGHTS + /*lightType == kLight_Environment*/ + { + // Get the environment light + LightEnvironment light = MakeLightEnvironment(selectedLight); + + // Environment light is constant at all points so just sample the environment map at + // lower mip levels to get combined contribution + // Due to use of cube map all 6 sides must be individually sampled + radiance = g_EnvironmentBuffer.SampleLevel(g_TextureSampler, float3(0.0f, 0.0f, 1.0f), light.lods).xyz; + radiance += g_EnvironmentBuffer.SampleLevel(g_TextureSampler, float3(0.0f, 0.0f, -1.0f), light.lods).xyz; + radiance += g_EnvironmentBuffer.SampleLevel(g_TextureSampler, float3(0.0f, 1.0f, 0.0f), light.lods).xyz; + radiance += g_EnvironmentBuffer.SampleLevel(g_TextureSampler, float3(0.0f, -1.0f, 0.0f), light.lods).xyz; + radiance += g_EnvironmentBuffer.SampleLevel(g_TextureSampler, float3(1.0f, 0.0f, 0.0f), light.lods).xyz; + radiance += g_EnvironmentBuffer.SampleLevel(g_TextureSampler, float3(-1.0f, 0.0f, 0.0f), light.lods).xyz; + radiance *= FOUR_PI / 6.0f; + } +# endif // DISABLE_ENVIRONMENT_LIGHTS + return luminance(radiance); #endif -#ifndef DISABLE_ENVIRONMENT_LIGHTS - /*selectedLight.get_light_type() == kLight_Environment*/ +} + +/** + * Calculate the combined luminance(Y) of a light taken at a specific location visible from a surface orientation. + * @param selectedLight The light to sample. + * @param position Current position on surface. + * @param normal Shading normal vector at current position. + * @return The calculated combined luminance. + */ +float sampleLightPointNormal(Light selectedLight, float3 position, float3 normal) +{ +#if defined(DISABLE_AREA_LIGHTS) && defined(DISABLE_DELTA_LIGHTS) && defined(DISABLE_ENVIRONMENT_LIGHTS) + return 0.0f; +#else + float3 radiance; +# ifdef HAS_MULTIPLE_LIGHT_TYPES + LightType lightType = selectedLight.get_light_type(); +# endif +# ifndef DISABLE_AREA_LIGHTS +# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) + if (lightType == kLight_Area) +# endif + { + // Get the area light + LightArea light = MakeLightArea(selectedLight); + + // Get light position at approximate midpoint + float3 lightPosition = interpolate(light.v0, light.v1, light.v2, (1.0f / 3.0f).xx); + + float3 emissivity = light.emissivity.xyz; + uint emissivityTex = asuint(light.emissivity.w); + if (emissivityTex != uint(-1)) + { + float2 edgeUV0 = light.uv1 - light.uv0; + float2 edgeUV1 = light.uv2 - light.uv0; + // Get texture dimensions in order to determine LOD of visible solid angle + float2 size; + g_TextureMaps[NonUniformResourceIndex(emissivityTex)].GetDimensions(size.x, size.y); + float areaUV = size.x * size.y * abs(edgeUV0.x * edgeUV1.y - edgeUV1.x * edgeUV0.y); + float lod = 0.5f * log2(areaUV); + + float2 uv = interpolate(light.uv0, light.uv1, light.uv2, 0.3333333333333f.xx); + emissivity *= g_TextureMaps[NonUniformResourceIndex(emissivityTex)].SampleLevel(g_TextureSampler, uv, lod).xyz; + } + + // Calculate lights surface normal vector + float3 edge1 = light.v1 - light.v0; + float3 edge2 = light.v2 - light.v0; + float3 lightCross = cross(edge1, edge2); + // Calculate surface area of triangle + float lightNormalLength = length(lightCross); + float3 lightNormal = lightCross / lightNormalLength.xxx; + float lightArea = 0.5f * lightNormalLength; + + // Evaluate radiance at specified point + float3 lightVector = position - lightPosition; + float lightLengthSqr = lengthSqr(lightVector); + lightVector *= rsqrt(lightLengthSqr).xxx; + float pdf = saturate(abs(dot(lightNormal, lightVector))) * lightArea; + pdf = pdf / (lightLengthSqr + FLT_EPSILON); + radiance = emissivity * pdf * saturate(-dot(lightVector, normal)); + } +# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) + else +# endif +# endif // DISABLE_AREA_LIGHTS +# ifndef DISABLE_DELTA_LIGHTS + if (lightType == kLight_Point || lightType == kLight_Spot) + { + // Get the point light + LightPoint light = MakeLightPoint(selectedLight); + + // Evaluate radiance at specified point + float3 lightVector = light.position - position; + float dist = length(lightVector); + lightVector /= dist; + float distMod = dist / light.range; + float rad = saturate(1.0f - (distMod * distMod * distMod * distMod)) / (dist * dist); + radiance = light.intensity * rad.xxx * saturate(dot(lightVector, normal)); + } + else +# ifndef DISABLE_ENVIRONMENT_LIGHTS + if (lightType == kLight_Direction) +# endif + { + // Get the directional light + LightDirectional light = MakeLightDirectional(selectedLight); + + // Directional light is constant at all points + radiance = light.irradiance * saturate(dot(light.direction, normal)); + } +# ifndef DISABLE_ENVIRONMENT_LIGHTS + else +# endif +# endif // DISABLE_DELTA_LIGHTS +# ifndef DISABLE_ENVIRONMENT_LIGHTS + /*lightType == kLight_Environment*/ { // Get the environment light LightEnvironment light = MakeLightEnvironment(selectedLight); @@ -376,26 +798,99 @@ float evaluateLightVolumeNormal(Light selectedLight, float3 minBB, float3 maxBB, // lower mip levels to get combined contribution // Due to normal based sampling the directions straddle multiple cube faces radiance = 0.0f; - if (normal.z > 0) + float count = 0.0f; + if (normal.z != 0) { radiance += g_EnvironmentBuffer.SampleLevel(g_TextureSampler, float3(0.0f, 0.0f, normal.z), light.lods).xyz; + ++count; } - if (normal.y > 0) + if (normal.y != 0) { radiance += g_EnvironmentBuffer.SampleLevel(g_TextureSampler, float3(0.0f, normal.y, 0.0f), light.lods).xyz; + ++count; } - if (normal.x > 0) + if (normal.x != 0) { radiance += g_EnvironmentBuffer.SampleLevel(g_TextureSampler, float3(normal.x, 0.0f, 0.0f), light.lods).xyz; + ++count; } - radiance *= TWO_PI; + radiance *= TWO_PI / count; } +# endif // DISABLE_ENVIRONMENT_LIGHTS + return luminance(radiance); #endif +} + + +/** + * Calculate a quick weighting based on the cosine light angle. + * @param selectedLight The light to sample. + * @param position Current position on surface. + * @param normal Shading normal vector at current position. + * @return The calculated angle weight. + */ +float sampleLightPointNormalFast(Light selectedLight, float3 position, float3 normal) +{ #if defined(DISABLE_AREA_LIGHTS) && defined(DISABLE_DELTA_LIGHTS) && defined(DISABLE_ENVIRONMENT_LIGHTS) return 0.0f; #else - return luminance(radiance); +# ifdef HAS_MULTIPLE_LIGHT_TYPES + LightType lightType = selectedLight.get_light_type(); +# endif +# ifndef DISABLE_AREA_LIGHTS +# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) + if (lightType == kLight_Area) +# endif + { + // Get the area light + LightArea light = MakeLightArea(selectedLight); + + // Get light position at approximate midpoint + float3 lightPosition = interpolate(light.v0, light.v1, light.v2, (1.0f / 3.0f).xx); + + // Evaluate radiance at specified point + float3 lightVector = normalize(lightPosition - position); + return saturate(dot(lightVector, normal)); + } +# if !defined(DISABLE_DELTA_LIGHTS) || !defined(DISABLE_ENVIRONMENT_LIGHTS) + else +# endif +# endif // DISABLE_AREA_LIGHTS +# ifndef DISABLE_DELTA_LIGHTS + if (lightType == kLight_Point || lightType == kLight_Spot) + { + // Get the point light + LightPoint light = MakeLightPoint(selectedLight); + + // Evaluate radiance at specified point + float3 lightVector = light.position - position; + return saturate(dot(lightVector, normal)); + } + else +# ifndef DISABLE_ENVIRONMENT_LIGHTS + if (lightType == kLight_Direction) +# endif + { + // Get the directional light + LightDirectional light = MakeLightDirectional(selectedLight); + + // Directional light is constant at all points + return saturate(dot(light.direction, normal)); + } +# ifndef DISABLE_ENVIRONMENT_LIGHTS + else +# endif +# endif // DISABLE_DELTA_LIGHTS +# ifndef DISABLE_ENVIRONMENT_LIGHTS + /*lightType == kLight_Environment*/ + { + // Get the environment light + LightEnvironment light = MakeLightEnvironment(selectedLight); + + return 1.0f; + } +# endif // DISABLE_ENVIRONMENT_LIGHTS #endif } -#endif // LIGHT_SAMPLING_HLSL +#endif // LIGHT_SAMPLING_VOLUME_HLSL diff --git a/src/core/src/lights/lights.hlsl b/src/core/src/lights/lights.hlsl index ec5fee8..ebbfbd5 100644 --- a/src/core/src/lights/lights.hlsl +++ b/src/core/src/lights/lights.hlsl @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -121,7 +121,7 @@ struct LightSpot float3 intensity; /**< The light luminous intensity (lm/sr) */ float3 direction; /**< The light world space direction to the light */ float angleCutoffScale; /**< The light angle cutoff scale (1 / (cos(innerAngle) - cos(outerAngle))) */ - float angleCutoffOffset; /**< The light angle cutoff offset (-cos(outerAngle) * angleCutoofScale) */ + float angleCutoffOffset; /**< The light angle cutoff offset (-cos(outerAngle) * angleCutoffScale) */ }; /** diff --git a/src/core/src/lights/lights_shared.h b/src/core/src/lights/lights_shared.h index bcffb9f..00cf9d4 100644 --- a/src/core/src/lights/lights_shared.h +++ b/src/core/src/lights/lights_shared.h @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -104,7 +104,7 @@ inline Light MakeAreaLight(float3 radiance, float3 vertex1, float3 vertex2, floa { // Create the new light Light light; - light.radiance = float4(radiance, glm::uintBitsToFloat(-1)); + light.radiance = float4(radiance, glm::uintBitsToFloat(UINT_MAX)); light.v1 = float4(vertex1, 0.0f); light.v2 = float4(vertex2, 0.0f); light.v3 = float4(vertex3, 0.0f); @@ -145,9 +145,9 @@ inline Light MakeAreaLight(float3 radiance, float3 vertex1, float3 vertex2, floa inline Light MakePointLight(float3 intensity, float3 position, float range) { Light light; - light.radiance = float4(intensity, glm::uintBitsToFloat(-1)); + light.radiance = float4(intensity, glm::uintBitsToFloat(UINT_MAX)); light.v1 = float4(position, range); - light.v3 = float4(float3(0.0f), glm::uintBitsToFloat(kLight_Point)); + light.v3 = float4(float3(0.0f), glm::uintBitsToFloat(static_cast(kLight_Point))); return light; } @@ -171,11 +171,11 @@ inline Light MakeSpotLight(float3 intensity, float3 position, float range, float float lightAngleOffset = cosOutter * lightAngleScale; float tanAngleSqPlusOne = 1.0f + (tanAngle * tanAngle); Light light; - light.radiance = float4(intensity, glm::uintBitsToFloat(-1)); + light.radiance = float4(intensity, glm::uintBitsToFloat(UINT_MAX)); light.v1 = float4(position, range); light.v2 = float4(normalize(direction), sinAngle); - light.v3 = - float4(lightAngleScale, lightAngleOffset, tanAngleSqPlusOne, glm::uintBitsToFloat(kLight_Spot)); + light.v3 = float4(lightAngleScale, lightAngleOffset, tanAngleSqPlusOne, + glm::uintBitsToFloat(static_cast(kLight_Spot))); return light; } @@ -189,9 +189,9 @@ inline Light MakeSpotLight(float3 intensity, float3 position, float range, float inline Light MakeDirectionalLight(float3 radiance, float3 direction, float range) { Light light; - light.radiance = float4(radiance, glm::uintBitsToFloat(-1)); + light.radiance = float4(radiance, glm::uintBitsToFloat(UINT_MAX)); light.v2 = float4(direction, range); - light.v3 = float4(float3(0.0f), glm::uintBitsToFloat(kLight_Direction)); + light.v3 = float4(float3(0.0f), glm::uintBitsToFloat(static_cast(kLight_Direction))); return light; } @@ -205,8 +205,8 @@ inline Light MakeEnvironmentLight(uint width, uint height) { Light light; uint lod = glm::findMSB(glm::max(width, height)); - light.radiance = float4(float3(glm::uintBitsToFloat(lod), 0.0f, 0.0f), glm::uintBitsToFloat(-1)); - light.v3 = float4(float3(0.0f), glm::uintBitsToFloat(kLight_Environment)); + light.radiance = float4(float3(glm::uintBitsToFloat(lod), 0.0f, 0.0f), glm::uintBitsToFloat(UINT_MAX)); + light.v3 = float4(float3(0.0f), glm::uintBitsToFloat(static_cast(kLight_Environment))); return light; } #endif diff --git a/src/core/src/lights/reservoir.hlsl b/src/core/src/lights/reservoir.hlsl index ec60cd2..77f1eb3 100644 --- a/src/core/src/lights/reservoir.hlsl +++ b/src/core/src/lights/reservoir.hlsl @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -23,7 +23,7 @@ THE SOFTWARE. #ifndef RESERVOIR_HLSL #define RESERVOIR_HLSL -#include "../components/light_sampler/light_sampler.hlsl" +#include "../components/light_builder/light_builder.hlsl" #include "../lights/light_sampling.hlsl" #include "../materials/material_evaluation.hlsl" #include "../math/math_constants.hlsl" @@ -58,9 +58,8 @@ struct Reservoir { // Spatiotemporal reservoir resampling for real-time ray tracing with dynamic direct lighting - Bitterli et al LightSample lightSample; /**< Current light sample information */ - uint M; /**< Number of samples in the reservoir (M) */ + float M; /**< Confidence weight used for MIS. This value is propotional to the number of samples in the reservoir (M) */ float W; /**< Weight of current sample (W) */ - float Wsum; /**< Running sum of the weights of streamed samples (Wsum) */ /** * Checks whether a reservoir contains a valid sample. @@ -70,15 +69,6 @@ struct Reservoir { return (M > 0 && W < FLT_MAX); } - - /** - * Get the source PDF for current sample. - * @returns The source PDF value. - */ - float getSourcePDF() - { - return (M > 0 && W > 0.0f) ? Wsum / (M * W) : 0.0f; - } }; /** @@ -92,15 +82,13 @@ Reservoir MakeReservoir() reservoir.lightSample.sampleParams = float2(0.0f, 0.0f); reservoir.M = 0; reservoir.W = 0.0f; - reservoir.Wsum = 0.0f; return reservoir; - } // Clamps the number of resampled samples on the previous reservoir to limit the temporal bias. -void Reservoir_ClampPrevious(Reservoir current_reservoir, inout Reservoir previous_reservoir) +void Reservoir_ClampPrevious(inout Reservoir previous_reservoir) { - previous_reservoir.M = min(previous_reservoir.M, 5.0f * current_reservoir.M); + previous_reservoir.M = min(previous_reservoir.M, 20.0f); } // Evaluates the target PDF, i.e., the luminance of the unshadowed illumination. @@ -114,24 +102,9 @@ float Reservoir_EvaluateTargetPdf(float3 view_direction, float3 normal, Material struct ReservoirUpdater { Reservoir reservoir; /**< The internal reservoir to update (do not access directly) */ - float sourcePDF; /**< The PDF of the chosen sample (S) */ + float targetPDF; /**< The (unnormalized) target distribution of the chosen sample (S) */ }; -/** - * Creates a reservoir updater from an existing reservoir. - * @param reservoir The reservoir to update. - * @returns The new reservoir updater. - */ -ReservoirUpdater MakeReservoirUpdater(Reservoir reservoir) -{ - ReservoirUpdater ret = - { - reservoir, - reservoir.getSourcePDF(), - }; - return ret; -} - /** * Creates a reservoir updater for a new reservoir. * @returns The new reservoir updater. @@ -147,19 +120,77 @@ ReservoirUpdater MakeReservoirUpdater() } /** - * Get the updated reservoir from reservoir updater. - * @note Retrieving the reservoir causes its RIS weights to be updated accordingly. - * This function should be called once after all updated operations have been performed. - * @param updater The reservoir updater to retrieve the reservoir from. - * @returns The updated reservoir. + * Update a reservoir by passing it a new light sample and that samples contribution. + * @tparam RNG The type of random number sampler to be used. + * @param updater Reservoir updater containing the reservoir to update. + * @param randomNG Random number sampler used to sample light. + * @param lightIndex Index of the current light to sample. + * @param sampledLightPDF The combined PDF for the light sample. + * @param material Material data describing BRDF. + * @param normal Shading normal vector at current position (must be normalised). + * @param viewDirection Outgoing ray view direction (must be normalised). + * @param radiance Total radiance received from the light. + * @param lightDirection Direction towards the light (must be normalised). + * @param sampleParams UV values that can be used to recalculate light parameters using @sampledLightUnpack. */ -Reservoir getUpdatedReservoir(ReservoirUpdater updater) +template +void updateReservoirRadiance(inout ReservoirUpdater updater, inout RNG randomNG, uint lightIndex, float sampledLightPDF, MaterialBRDF material, float3 normal, float3 viewDirection, float3 radiance, float3 lightDirection, float2 sampleParams) { - if (updater.reservoir.M > 0) + // Evaluate the sampling function for the new sample + const float f = (sampledLightPDF != 0.0f) ? Reservoir_EvaluateTargetPdf(viewDirection, normal, material, lightDirection, radiance) : 0.0f; + + // Compute MIS weights + const float misWeight1 = updater.reservoir.M / (updater.reservoir.M + 1.0f); + const float misWeight2 = 1.0f / (updater.reservoir.M + 1.0f); + + // Compute reservoir resampling weights + const float weight1 = misWeight1 * updater.targetPDF * updater.reservoir.W; + const float weight2 = misWeight2 * f / max(sampledLightPDF, FLT_MIN); + const float weightSum = weight1 + weight2; + + // Check if new sample should replace the existing sample + if ((randomNG.rand() * weightSum) < weight2) { - updater.reservoir.W = (updater.sourcePDF > 0.0f ? updater.reservoir.Wsum / (updater.reservoir.M * updater.sourcePDF) : 0.0f); + // Update internal values to add new sample + updater.reservoir.lightSample.index = lightIndex; + updater.reservoir.lightSample.sampleParams = sampleParams; + updater.targetPDF = f; } - return updater.reservoir; + + // Update the contribution weight + updater.reservoir.W = weightSum / max(updater.targetPDF, FLT_MIN); + + // Increment number of samples + updater.reservoir.M += 1.0f; +} + +/** + * Update a reservoir by passing it a new light sample. + * @tparam RNG The type of random number sampler to be used. + * @param updater Reservoir updater containing the reservoir to update. + * @param randomNG Random number sampler used to sample light. + * @param lightIndex Index of the current light to sample. + * @param lightPDF The PDF for the light sample. + * @param material Material data describing BRDF. + * @param position Current position on surface. + * @param normal Shading normal vector at current position (must be normalised). + * @param viewDirection Outgoing ray view direction (must be normalised). + */ +template +void updateReservoir(inout ReservoirUpdater updater, inout RNG randomNG, uint lightIndex, float lightPDF, MaterialBRDF material, float3 position, float3 normal, float3 viewDirection) +{ + // Sample light + Light light = getLight(lightIndex); + float sampledLightPDF; + float3 lightDirection; + float2 sampleParams; + float3 unused; + float3 radiance = sampleLight(light, randomNG, position, normal, lightDirection, sampledLightPDF, unused, sampleParams); + + // Combine PDFs + sampledLightPDF *= lightPDF; + + updateReservoirRadiance(updater, randomNG, lightIndex, sampledLightPDF, material, normal, viewDirection, radiance, lightDirection, sampleParams); } /** @@ -176,7 +207,7 @@ Reservoir getUpdatedReservoir(ReservoirUpdater updater) * @param solidAngle Solid angle around view direction of visible ray cone. */ template -void updateReservoir(inout ReservoirUpdater updater, inout RNG randomNG, uint lightIndex, float lightPDF, MaterialBRDF material, float3 position, float3 normal, float3 viewDirection, float solidAngle) +void updateReservoirCone(inout ReservoirUpdater updater, inout RNG randomNG, uint lightIndex, float lightPDF, MaterialBRDF material, float3 position, float3 normal, float3 viewDirection, float solidAngle) { // Sample light Light light = getLight(lightIndex); @@ -186,31 +217,112 @@ void updateReservoir(inout ReservoirUpdater updater, inout RNG randomNG, uint li float3 unused; float3 radiance = sampleLightCone(light, randomNG, position, normal, solidAngle, lightDirection, sampledLightPDF, unused, sampleParams); + // Combine PDFs + sampledLightPDF *= lightPDF; + + updateReservoirRadiance(updater, randomNG, lightIndex, sampledLightPDF, material, normal, viewDirection, radiance, lightDirection, sampleParams); +} + +/** + * Merge 2 reservoirs together using pre-calculated light contribution. + * @tparam RNG The type of random number sampler to be used. + * @param updater Reservoir updater containing the first reservoir to update. + * @param reservoir2 The second reservoir to add to the first. + * @param randomNG Random number sampler used to sample light. + * @param material Material data describing BRDF. + * @param normal Shading normal vector at current position (must be normalised). + * @param viewDirection Outgoing ray view direction (must be normalised). + * @param radiance Total radiance received from the light. + * @param lightDirection Direction towards the light (must be normalised). + */ +template +void mergeReservoirsRadiance(inout ReservoirUpdater updater, Reservoir reservoir2, inout RNG randomNG, MaterialBRDF material, float3 normal, float3 viewDirection, float3 radiance, float3 lightDirection) +{ + // Evaluate the sampling function for the new sample + const float f = Reservoir_EvaluateTargetPdf(viewDirection, normal, material, lightDirection, radiance); + + // Compute MIS weights. + const float misWeight1 = updater.reservoir.M / (updater.reservoir.M + reservoir2.M); + const float misWeight2 = reservoir2.M / (updater.reservoir.M + reservoir2.M); + + // Compute reservoir resampling weights + const float weight1 = misWeight1 * updater.targetPDF * updater.reservoir.W; + const float weight2 = misWeight2 * f * reservoir2.W; + const float weightSum = weight1 + weight2; + + // Check if new sample should replace the existing sample + if ((randomNG.rand() * weightSum) < weight2) + { + // Update internal values to add new sample + updater.reservoir.lightSample = reservoir2.lightSample; + updater.targetPDF = f; + } + + // Update the contribution weight. + updater.reservoir.W = weightSum / max(updater.targetPDF, FLT_MIN); + // Increment number of samples - updater.reservoir.M += 1; + updater.reservoir.M += reservoir2.M; // This differs from the pseudocode from the original paper as that appears to be incorrect +} + +/** + * Merge 2 reservoirs together using pre-calculated target distributions. + * @tparam RNG The type of random number sampler to be used. + * @param updater Reservoir updater containing the first reservoir to update. + * @param reservoir2 The second reservoir to add to the first. + * @param randomNG Random number sampler used to sample light. + * @param pdf11 Target distribution: p_{domain1 -> domain1}(x_1). + * @param pdf12 Target distribution: p_{domain2 -> domain1}(x_2). + * @param pdf21 Target distribution: p_{domain1 -> domain2}(x_1). + * @param pdf22 Target distribution: p_{domain2 -> domain2}(x_2). + */ +template +void mergeReservoirsRadianceTalbotMIS(inout ReservoirUpdater updater, Reservoir reservoir2, inout RNG randomNG, float pdf11, float pdf12, float pdf21, float pdf22) +{ + // Compute Talbot MIS weights. + const float misWeight1 = updater.reservoir.M * pdf11 / max(updater.reservoir.M * pdf11 + reservoir2.M * pdf21, FLT_MIN); + const float misWeight2 = reservoir2.M * pdf22 / max(updater.reservoir.M * pdf12 + reservoir2.M * pdf22, FLT_MIN); - // Discard any invalid values - if (any(radiance > 0.0f) && lightPDF != 0.0f) + // Compute reservoir resampling weights + const float weight1 = misWeight1 * pdf11 * updater.reservoir.W; + const float weight2 = misWeight2 * pdf12 * reservoir2.W; + const float weightSum = weight1 + weight2; + + // Check if new sample should replace the existing sample + if (randomNG.rand() * weightSum < weight2) { - // Combine PDFs - sampledLightPDF *= lightPDF; - - // Evaluate the sampling function for the new sample - const float f = Reservoir_EvaluateTargetPdf(viewDirection, normal, material, lightDirection, radiance); - const float w = f / sampledLightPDF; - - // Increment the total weight of all samples in the reservoir - updater.reservoir.Wsum += w; - - // Check if new sample should replace the existing sample - if ((randomNG.rand() * updater.reservoir.Wsum) < w) - { - // Update internal values to add new sample - updater.reservoir.lightSample.index = lightIndex; - updater.reservoir.lightSample.sampleParams = sampleParams; - updater.sourcePDF = f; - } + // Update internal values to add new sample + updater.reservoir.lightSample = reservoir2.lightSample; + updater.targetPDF = pdf12; } + + // Update the contribution weight. + updater.reservoir.W = weightSum / max(updater.targetPDF, FLT_MIN); + + // Increment number of samples + updater.reservoir.M += reservoir2.M; +} + +/** + * Merge 2 reservoirs together. + * @tparam RNG The type of random number sampler to be used. + * @param updater Reservoir updater containing the first reservoir to update. + * @param reservoir2 The second reservoir to add to the first. + * @param randomNG Random number sampler used to sample light. + * @param material Material data describing BRDF. + * @param position Current position on surface. + * @param normal Shading normal vector at current position (must be normalised). + * @param viewDirection Outgoing ray view direction (must be normalised). + */ +template +void mergeReservoirs(inout ReservoirUpdater updater, Reservoir reservoir2, inout RNG randomNG, MaterialBRDF material, float3 position, float3 normal, float3 viewDirection) +{ + float3 lightDirection; + const Light selectedLight = getLight(reservoir2.lightSample.index); + float3 lightPosition; + float3 radiance = evaluateLightSampled(selectedLight, position, reservoir2.lightSample.sampleParams, lightDirection, lightPosition); + + mergeReservoirsRadiance(updater, reservoir2, randomNG, material, normal, viewDirection, radiance, lightDirection); } /** @@ -226,35 +338,57 @@ void updateReservoir(inout ReservoirUpdater updater, inout RNG randomNG, uint li * @param solidAngle Solid angle around view direction of visible ray cone. */ template -void mergeReservoirs(inout ReservoirUpdater updater, Reservoir reservoir2, inout RNG randomNG, MaterialBRDF material, float3 position, float3 normal, float3 viewDirection, float solidAngle) +void mergeReservoirsCone(inout ReservoirUpdater updater, Reservoir reservoir2, inout RNG randomNG, MaterialBRDF material, float3 position, float3 normal, float3 viewDirection, float solidAngle) { float3 lightDirection; const Light selectedLight = getLight(reservoir2.lightSample.index); float3 lightPosition; float3 radiance = evaluateLightConeSampled(selectedLight, position, reservoir2.lightSample.sampleParams, solidAngle, lightDirection, lightPosition); - // Increment number of samples - updater.reservoir.M += reservoir2.M; // This differs from the pseudocode from the original paper as that appears to be incorrect + mergeReservoirsRadiance(updater, reservoir2, randomNG, material, normal, viewDirection, radiance, lightDirection); +} - // Discard any invalid values - if (any(radiance > 0.0f)) - { - // Evaluate the sampling function for the new sample - const float f = Reservoir_EvaluateTargetPdf(viewDirection, normal, material, lightDirection, radiance); - const float w = f * reservoir2.W * reservoir2.M; - - // Increment the total weight of all samples in the reservoir - updater.reservoir.Wsum += w; - - // Check if new sample should replace the existing sample - if ((randomNG.rand() * updater.reservoir.Wsum) < w) - { - // Update internal values to add new sample - updater.reservoir.lightSample.index = reservoir2.lightSample.index; - updater.reservoir.lightSample.sampleParams = reservoir2.lightSample.sampleParams; - updater.sourcePDF = f; - } - } +/** + * Merge 2 reservoirs together. + * @tparam RNG The type of random number sampler to be used. + * @param updater Reservoir updater containing the first reservoir to update. + * @param reservoir2 The second reservoir to add to the first. + * @param randomNG Random number sampler used to sample light. + * @param material1 Material data describing BRDF at the current position. + * @param position1 Current position on surface. + * @param normal1 Shading normal vector at current position (must be normalised). + * @param viewDirection1 Outgoing ray view direction from the current position (must be normalised). + * @param material2 Material data describing BRDF at the current position. + * @param position2 Candiadte sample position on surface. + * @param normal2 Shading normal vector at the candidate sample position (must be normalised). + * @param viewDirection2 Outgoing ray view direction from the candidate sample position (must be normalised). + * @param solidAngle Solid angle around view direction of visible ray cone. + */ +template +void mergeReservoirsConeTalbotMIS(inout ReservoirUpdater updater, Reservoir reservoir2, inout RNG randomNG, MaterialBRDF material1, float3 position1, float3 normal1, float3 viewDirection1, MaterialBRDF material2, float3 position2, float3 normal2, float3 viewDirection2, float solidAngle) +{ + const Light selectedLight1 = getLight(updater.reservoir.lightSample.index); + const Light selectedLight2 = getLight(reservoir2.lightSample.index); + + float3 lightDirection12; + float3 lightPosition12; + const float3 radiance12 = evaluateLightConeSampled(selectedLight2, position1, reservoir2.lightSample.sampleParams, solidAngle, lightDirection12, lightPosition12); + + float3 lightDirection21; + float3 lightPosition21; + const float3 radiance21 = evaluateLightConeSampled(selectedLight1, position2, updater.reservoir.lightSample.sampleParams, solidAngle, lightDirection21, lightPosition21); + + float3 lightDirection22; + float3 lightPosition22; + const float3 radiance22 = evaluateLightConeSampled(selectedLight2, position2, reservoir2.lightSample.sampleParams, solidAngle, lightDirection22, lightPosition22); + + // Evaluate the target distributions. + const float pdf11 = updater.targetPDF; + const float pdf12 = Reservoir_EvaluateTargetPdf(viewDirection1, normal1, material1, lightDirection12, radiance12); + const float pdf21 = Reservoir_EvaluateTargetPdf(viewDirection2, normal2, material2, lightDirection21, radiance21); + const float pdf22 = Reservoir_EvaluateTargetPdf(viewDirection2, normal2, material2, lightDirection22, radiance22); + + mergeReservoirsRadianceTalbotMIS(updater, reservoir2, randomNG, pdf11, pdf12, pdf21, pdf22); } /** @@ -266,8 +400,8 @@ uint4 packReservoir(Reservoir reservoir) { return uint4(reservoir.lightSample.index, f32tof16(reservoir.lightSample.sampleParams.x) | (f32tof16(reservoir.lightSample.sampleParams.y) << 16), - reservoir.M, - f32tof16(reservoir.W) | (f32tof16(reservoir.Wsum) << 16)); + asuint(reservoir.W), + f32tof16(reservoir.M)); } /** @@ -280,9 +414,8 @@ Reservoir unpackReservoir(uint4 reservoirData) Reservoir reservoir; reservoir.lightSample.index = reservoirData.x; reservoir.lightSample.sampleParams = float2(f16tof32(reservoirData.y & 0xFFFFu), f16tof32(reservoirData.y >> 16)); - reservoir.M = reservoirData.z; - reservoir.W = f16tof32(reservoirData.w & 0xFFFFu); - reservoir.Wsum = f16tof32(reservoirData.w >> 16); + reservoir.W = asfloat(reservoirData.z); + reservoir.M = f16tof32(reservoirData.w & 0xFFFFu); return reservoir; } #endif diff --git a/src/core/src/materials/material_evaluation.hlsl b/src/core/src/materials/material_evaluation.hlsl index 0068846..623464a 100644 --- a/src/core/src/materials/material_evaluation.hlsl +++ b/src/core/src/materials/material_evaluation.hlsl @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -22,11 +22,6 @@ THE SOFTWARE. #ifndef MATERIAL_EVALUATION_HLSL #define MATERIAL_EVALUATION_HLSL -/* -// Requires the following data to be defined in any shader that uses this file -Texture2D g_TextureMaps[] : register(space99); -SamplerState g_LinearSampler; -*/ #include "materials.hlsl" #include "../math/math_constants.hlsl" @@ -34,31 +29,36 @@ SamplerState g_LinearSampler; /** * Calculates schlick fresnel term. * @param F0 The fresnel reflectance an grazing angle. - * @param angle The angle between view and half-vector. + * @param dotHV The dot product of the half-vector and view direction (range [-1, 1]). * @return The calculated fresnel term. */ -float3 fresnel(float3 F0, float angle) +float3 fresnel(float3 F0, float dotHV) { + // The half-vector may be incorrectly flipped or invisible to the view direction in some cases, and thus + // dotHV may be negative. For this case, we use abs(dotHV) to correct flipping and avoid NaN. float3 F90 = 1.0f.xxx; - return F0 + (F90 - F0) * pow(1.0f - angle, 5.0f); + return F0 + (F90 - F0) * pow(1.0f - saturate(abs(dotHV)), 5.0f); } /** * Calculates the amount to modify the diffuse component of a combined BRDF. * @param f Pre-calculated fresnel value. - * @param dotHV The dot product of the half-vector and view direction. + * @param dotHV The dot product of the half-vector and view direction (range [-1, 1]). * @return The amount to modify diffuse component by. */ float3 diffuseCompensationTerm(float3 f, float dotHV) { // PBR Diffuse Lighting for GGX + Smith Microsurfaces - Hammon 2017 - return (1.0f.xxx - f) * 1.05 * (1.0f - pow(1.0f - dotHV, 5.0f)); + + // The half-vector may be incorrectly flipped or invisible to the view direction in some cases, and thus + // dotHV may be negative. For this case, we use abs(dotHV) to correct flipping and avoid NaN. + return (1.0f.xxx - f) * 1.05 * (1.0f - pow(1.0f - saturate(abs(dotHV)), 5.0f)); } /** * Calculates the amount to modify the diffuse component of a combined BRDF. * @param F0 The fresnel reflectance at grazing angle. - * @param dotHV The dot product of the half-vector and view direction. + * @param dotHV The dot product of the half-vector and view direction (range [-1, 1]). * @return The amount to modify diffuse component by. */ float3 diffuseCompensation(float3 F0, float dotHV) @@ -69,11 +69,17 @@ float3 diffuseCompensation(float3 F0, float dotHV) /** * Evaluate the Trowbridge-Reitz Normal Distribution Function. * @param roughnessAlphaSqr The NDF roughness value squared. - * @param dotNH The dot product of the normal and half vector. + * @param dotNH The dot product of the normal and half vector (range [-1, 1]). * @return The calculated NDF value. */ float evaluateNDFTrowbridgeReitz(float roughnessAlphaSqr, float dotNH) { + // Heaviside function for microfacet normal in the upper hemisphere. + if (dotNH < 0.0f) + { + return 0.0f; + } + float denom = dotNH * dotNH * (roughnessAlphaSqr - 1.0f) + 1.0f; float d = roughnessAlphaSqr / (PI * denom * denom); return d; @@ -82,15 +88,18 @@ float evaluateNDFTrowbridgeReitz(float roughnessAlphaSqr, float dotNH) /** * Evaluate the GGX Visibility function. * @param roughnessAlphaSqr The GGX roughness value squared. - * @param dotNL The dot product of the normal and light direction. - * @param dotNV The dot product of the normal and view direction. + * @param dotNL The dot product of the normal and light direction (range [-1, 1]). + * @param dotNV The dot product of the normal and view direction (range [-1, 1]). * @return The calculated visibility value. */ float evaluateVisibilityGGX(float roughnessAlphaSqr, float dotNL, float dotNV) { + // The masking-shadowing function is indefinite for back-facing shading normals. + // So we use abs(dotNL) and abs(dotNV) for this case. + // This is hacky, but still satisfies the reciprocity and energy conservation. float rMod = 1.0f - roughnessAlphaSqr; - float recipG1 = dotNL + sqrt(roughnessAlphaSqr + (rMod * dotNL * dotNL)); - float recipG2 = dotNV + sqrt(roughnessAlphaSqr + (rMod * dotNV * dotNV)); + float recipG1 = abs(dotNL) + sqrt(roughnessAlphaSqr + (rMod * dotNL * dotNL)); + float recipG2 = abs(dotNV) + sqrt(roughnessAlphaSqr + (rMod * dotNV * dotNV)); float recipV = recipG1 * recipG2; return recipV; } @@ -100,10 +109,10 @@ float evaluateVisibilityGGX(float roughnessAlphaSqr, float dotNL, float dotNV) * @param roughnessAlpha The GGX roughness value. * @param roughnessAlphaSqr The GGX roughness value squared. * @param F0 The fresnel reflectance at grazing angle. - * @param dotHV The dot product of the half-vector and view direction. - * @param dotNH The dot product of the normal and half vector. - * @param dotNL The dot product of the normal and light direction. - * @param dotNV The dot product of the normal and view direction. + * @param dotHV The dot product of the half-vector and view direction (range [-1, 1]). + * @param dotNH The dot product of the normal and half vector (range [-1, 1]). + * @param dotNL The dot product of the normal and light direction (range [-1, 1]). + * @param dotNV The dot product of the normal and view direction (range [-1, 1]). * @param fresnelOut (Out) The returned fresnel value. * @return The calculated reflectance. */ @@ -134,10 +143,10 @@ float3 evaluateLambert(float3 albedo) /** * Evaluate the combined BRDF. * @param material Material data describing BRDF. - * @param dotNH The dot product of the normal and half vector. - * @param dotNL The dot product of the normal and light direction. - * @param dotHV The dot product of the half-vector and view direction. - * @param dotNV The dot product of the normal and view direction. + * @param dotHV The dot product of the half-vector and view direction (range [-1, 1]). + * @param dotNH The dot product of the normal and half vector (range [-1, 1]). + * @param dotNL The dot product of the normal and light direction (range [-1, 1]). + * @param dotNV The dot product of the normal and view direction (range [-1, 1]). * @return The calculated reflectance. */ float3 evaluateBRDF(MaterialBRDF material, float dotHV, float dotNH, float dotNL, float dotNV) @@ -145,22 +154,61 @@ float3 evaluateBRDF(MaterialBRDF material, float dotHV, float dotNH, float dotNL // Calculate diffuse component float3 diffuse = evaluateLambert(material.albedo); -#ifndef DISABLE_SPECULAR_LIGHTING +#ifndef DISABLE_SPECULAR_MATERIALS // Calculate specular component float3 f; float3 specular = evaluateGGX(material.roughnessAlpha, material.roughnessAlphaSqr, material.F0, dotHV, dotNH, dotNL, dotNV, f); // Add the weight of the diffuse compensation term diffuse *= diffuseCompensationTerm(f, dotHV); - float3 brdf = (specular + diffuse) * dotNL; + float3 brdf = (specular + diffuse) * saturate(dotNL); // saturate(dotNL) = abs(dotNL) * Heaviside function for the upper hemisphere. #else // Add the weight of the diffuse compensation term to prevent excessive brightness compared to specular diffuse *= diffuseCompensation(fresnel(0.04f.xxx, dotHV), dotHV); - float3 brdf = diffuse * dotNL; + float3 brdf = diffuse * saturate(dotNL); #endif return brdf; } +/** + * Evaluate the diffuse component of the BRDF. + * @param material Material data describing BRDF. + * @param dotHV The dot product of the half-vector and view direction (range [-1, 1]). + * @param dotNL The dot product of the normal and light direction (range [-1, 1]). + * @return The calculated reflectance. + */ +float3 evaluateBRDFDiffuse(MaterialBRDF material, float dotHV, float dotNL) +{ + // Calculate diffuse component + float3 diffuse = evaluateLambert(material.albedo); + + // Add the weight of the diffuse compensation term to prevent excessive brightness compared to specular + diffuse *= diffuseCompensation(fresnel(0.04f.xxx, dotHV), dotHV); + float3 brdf = diffuse * saturate(dotNL); + return brdf; +} + +#ifndef DISABLE_SPECULAR_MATERIALS +/** + * Evaluate the specular component of the BRDF. + * @param material Material data describing BRDF. + * @param dotNH The dot product of the normal and half vector (range [-1, 1]). + * @param dotNL The dot product of the normal and light direction (range [-1, 1]). + * @param dotHV The dot product of the half-vector and view direction (range [-1, 1]). + * @param dotNV The dot product of the normal and view direction (range [-1, 1]). + * @return The calculated reflectance. + */ +float3 evaluateBRDFSpecular(MaterialBRDF material, float dotHV, float dotNH, float dotNL, float dotNV) +{ + // Calculate specular component + float3 f; + float3 specular = evaluateGGX(material.roughnessAlpha, material.roughnessAlphaSqr, material.F0, dotHV, dotNH, dotNL, dotNV, f); + + float3 brdf = specular * saturate(dotNL); // saturate(dotNL) = abs(dotNL) * Heaviside function for the upper hemisphere. + return brdf; +} +#endif + /** * Evaluate the combined BRDF. * @param material Material data describing BRDF. @@ -169,35 +217,51 @@ float3 evaluateBRDF(MaterialBRDF material, float dotHV, float dotNH, float dotNL * @param lightDirection The direction to the sampled light (must be normalised). * @return The calculated reflectance. */ -float3 evaluateBRDF(MaterialBRDF material, float3 normal, float3 viewDirection, float3 lightDirection) +float3 evaluateBRDF(MaterialBRDF material, float3 normal, float3 viewDirection, float3 lightDirection, out float3 diffuse, out float3 specular) { // Calculate diffuse component - float3 diffuse = evaluateLambert(material.albedo); + diffuse = evaluateLambert(material.albedo); // Calculate shading angles - float dotNL = saturate(dot(normal, lightDirection)); -#ifndef DISABLE_SPECULAR_LIGHTING + float dotNL = clamp(dot(normal, lightDirection), -1.0f, 1.0f); // Calculate half vector float3 halfVector = normalize(viewDirection + lightDirection); float dotHV = saturate(dot(halfVector, viewDirection)); - float dotNH = saturate(dot(normal, halfVector)); - float dotNV = saturate(dot(normal, viewDirection)); +#ifndef DISABLE_SPECULAR_MATERIALS + float dotNH = clamp(dot(normal, halfVector), -1.0f, 1.0f); + float dotNV = clamp(dot(normal, viewDirection), -1.0f, 1.0f); // Calculate specular component float3 f; - float3 specular = evaluateGGX(material.roughnessAlpha, material.roughnessAlphaSqr, material.F0, dotHV, dotNH, dotNL, dotNV, f); + specular = evaluateGGX(material.roughnessAlpha, material.roughnessAlphaSqr, material.F0, dotHV, dotNH, dotNL, dotNV, f); // Add the weight of the diffuse compensation term + specular *= saturate(dotNL); // saturate(dotNL) = abs(dotNL) * Heaviside function for the upper hemisphere. + diffuse *= saturate(dotNL); diffuse *= diffuseCompensationTerm(f, dotHV); - float3 brdf = (specular + diffuse) * dotNL; + float3 brdf = specular + diffuse; #else // Add the weight of the diffuse compensation term to prevent excessive brightness compared to specular - float3 halfVector = normalize(viewDirection + lightDirection); - float dotHV = saturate(dot(halfVector, viewDirection)); diffuse *= diffuseCompensation(0.04f.xxx, dotHV); - float3 brdf = diffuse * dotNL; + specular = 0.f; + float3 brdf = diffuse * saturate(dotNL); #endif return brdf; } +/** + * Evaluate the combined BRDF. + * @param material Material data describing BRDF. + * @param normal Shading normal vector at current position (must be normalised). + * @param viewDirection Outgoing ray view direction (must be normalised). + * @param lightDirection The direction to the sampled light (must be normalised). + * @return The calculated reflectance. + */ +float3 evaluateBRDF(MaterialBRDF material, float3 normal, float3 viewDirection, float3 lightDirection) +{ + float3 diffuse; + float3 specular; + return evaluateBRDF(material, normal, viewDirection, lightDirection, diffuse, specular); +} + #endif // MATERIAL_EVALUATION_HLSL diff --git a/src/core/src/materials/material_sampling.hlsl b/src/core/src/materials/material_sampling.hlsl index 3f1b756..09e10b1 100644 --- a/src/core/src/materials/material_sampling.hlsl +++ b/src/core/src/materials/material_sampling.hlsl @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -27,12 +27,21 @@ THE SOFTWARE. #include "../math/quaternion.hlsl" #include "../math/color.hlsl" -// Allows for view directions in top hemisphere (N.V >0) -float3 sampleGGXVNDFUpper(float roughnessAlpha, float3 viewDirection, float2 samples) +/** + * Calculate a sampled direction for the GGX BRDF using Heitz VNDF sampling. + * @note All calculations are done in the surfaces local tangent space. Only allows + * for view directions in top hemisphere (N.V>0). + * @param roughnessAlpha The GGX roughness value. + * @param localView Outgoing ray view direction (in local space). + * @param samples Random number samples used to sample BRDF. + * @return The sampled direction in local space. + */ +float3 sampleGGXVNDFUpper(float roughnessAlpha, float3 localView, float2 samples) { // A Simpler and Exact Sampling Routine for the GGX Distribution of Visible Normals - Heitz 2017 + // Stretch the view vector as if roughness==1 - float3 stretchedView = normalize(float3(roughnessAlpha * viewDirection.xy, viewDirection.z)); + float3 stretchedView = normalize(float3(roughnessAlpha * localView.xy, localView.z)); // Create an orthonormal basis (requires that viewDirection is always above surface i.e viewDirection.geomNormal > 0)) float3 T1 = (stretchedView.z < 0.9999) ? normalize(cross(stretchedView, float3(0.0f, 0.0f, 1.0f))) : float3(1.0f, 0.0f, 0.0f); float3 T2 = cross(T1, stretchedView); @@ -50,12 +59,20 @@ float3 sampleGGXVNDFUpper(float roughnessAlpha, float3 viewDirection, float2 sam return normal; } -// Allows for view directions in complete sphere -float3 sampleGGXVNDFFull(float roughnessAlpha, float3 viewDirection, float2 samples) +/** + * Calculate a sampled direction for the GGX BRDF using Heitz VNDF sampling of full sphere. + * @note All calculations are done in the surfaces local tangent space. + * @param roughnessAlpha The GGX roughness value. + * @param localView Outgoing ray view direction (in local space). + * @param samples Random number samples used to sample BRDF. + * @return The sampled direction in local space. + */ +float3 sampleGGXVNDFFull(float roughnessAlpha, float3 localView, float2 samples) { // Sampling the GGX Distribution of Visible Normals - Heitz 2018 + // Stretch the view vector as if roughness==1 - float3 stretchedView = normalize(float3(roughnessAlpha * viewDirection.xy, viewDirection.z)); + float3 stretchedView = normalize(float3(roughnessAlpha * localView.xy, localView.z)); // Create an orthonormal basis (with special case if cross product is zero) float lengthSqr = dot(stretchedView.xy, stretchedView.xy); float3 T1 = lengthSqr > 0 ? float3(-stretchedView.y, stretchedView.x, 0.0f) * rsqrt(lengthSqr) : float3(1.0f, 0.0f, 0.0f); @@ -76,17 +93,68 @@ float3 sampleGGXVNDFFull(float roughnessAlpha, float3 viewDirection, float2 samp } /** - * Calculate the sampling direction for a GGX BRDF. + * Calculate a sampled direction for the GGX BRDF using spherical cap sampling. + * @note All calculations are done in the surfaces local tangent space. + * @param roughnessAlpha The GGX roughness value. + * @param localView Outgoing ray view direction (in local space). + * @param samples Random number samples used to sample BRDF. + * @return The sampled direction in local space. + */ +float3 sampleGGXVNDFSphericalCap(float roughnessAlpha, float3 localView, float2 samples) +{ + // Sampling Visible GGX Normals with Spherical Caps - Jonathan Dupuy and Anis Benyoub 2023 + // https://doi.org/10.1111/cgf.14867 + + // Stretch the view vector as if roughness==1 + float3 wiStd = normalize(float3(roughnessAlpha * localView.xy, localView.z)); + + float phi = 2.0f * PI * samples.y; + float z = mad(-wiStd.z, samples.x, 1.0f - samples.x); + float sinTheta = sqrt(saturate(1.0f - z * z)); + float x = sinTheta * cos(phi); + float y = sinTheta * sin(phi); + float3 c = float3(x, y, z); + float3 wmStd = c + wiStd; + + // Convert normal to un-stretched and normalise + float3 wm = normalize(float3(roughnessAlpha * wmStd.xy, wmStd.z)); + return wm; +} + +/** + * Calculate a sampled direction for the GGX BRDF using bounded spherical cap sampling. + * @note All calculations are done in the surfaces local tangent space. * @param roughnessAlpha The GGX roughness value. - * @param viewDirection Outgoing ray view direction (in local space). + * @param localView Outgoing ray view direction (in local space). * @param samples Random number samples used to sample BRDF. * @return The sampled direction in local space. */ -float3 sampleGGXVNDF(float roughnessAlpha, float3 viewDirection, float2 samples) +float3 sampleGGXVNDFBounded(float roughnessAlpha, float3 localView, float2 samples) { - //Can change between Upper or Full - return sampleGGXVNDFUpper(roughnessAlpha, viewDirection, samples); - //return sampleGGXVNDFFull(roughnessAlpha, viewDirection, samples); + // Bounded VNDF Sampling for Smith-GGX Reflections - Kenta Eto and Yusuke Tokuyoshi 2023 + // https://doi.org/10.1145/3610543.3626163 + + // Stretch the view vector as if roughness==1 + float3 wiStd = normalize(float3(roughnessAlpha * localView.xy, localView.z)); + + float phi = 2.0f * PI * samples.y; + float a = roughnessAlpha; // Use a = saturate(min(roughnessAlpha.x, roughnessAlpha.y)) for anisotropic roughness. + float s = 1.0f + sign(1.0f - a) * length(float2(localView.x, localView.y)); + float a2 = a * a; + float s2 = s * s; + float k = (1.0f - a2) * s2 / (s2 + a2 * localView.z * localView.z); + float b = localView.z > 0.0f ? k * wiStd.z : wiStd.z; + + float z = mad(-b, samples.x, 1.0f - samples.x); + float sinTheta = sqrt(saturate(1.0f - z * z)); + float x = sinTheta * cos(phi); + float y = sinTheta * sin(phi); + float3 c = float3(x, y, z); + float3 wmStd = c + wiStd; + + // Convert normal to un-stretched and normalise + float3 wm = normalize(float3(roughnessAlpha * wmStd.xy, wmStd.z)); + return wm; } /** @@ -107,37 +175,90 @@ float3 sampleHemisphere(float2 samples) * Calculate a sampled direction for the GGX BRDF. * @note All calculations are done in the surfaces local tangent space * @param roughnessAlpha The GGX roughness value. - * @param viewDirection Outgoing ray view direction (in local space). + * @param localView Outgoing ray view direction (in local space). * @param samples Random number samples used to sample BRDF. * @return The sampled direction in local space. */ -float3 sampleGGX(float roughnessAlpha, float3 viewDirection, float2 samples) +float3 sampleGGX(float roughnessAlpha, float3 localView, float2 samples) { // Sample the local space micro-facet normal - float3 sampledNormal = sampleGGXVNDF(roughnessAlpha, viewDirection, samples); + // float3 sampledNormal = sampleGGXVNDFUpper(roughnessAlpha, localView, samples); + // float3 sampledNormal = sampleGGXVNDFFull(roughnessAlpha, localView, samples); // Use this for shading normals. + // float3 sampledNormal = sampleGGXVNDFSphericalCap(roughnessAlpha, localView, samples); + float3 sampledNormal = sampleGGXVNDFBounded(roughnessAlpha, localView, samples); // Calculate light direction - float3 sampledLight = reflect(-viewDirection, sampledNormal); + float3 sampledLight = reflect(-localView, sampledNormal); return sampledLight; } /** - * Calculate the PDF for given values for the GGX BRDF. + * Calculate the BVNDF sampling PDF for given values for the GGX BRDF. * @param roughnessAlphaSqr The GGX roughness value squared. - * @param dotNH The dot product of the local normal and half vector. - * @param dotNL The dot product of the local normal and light direction. + * @param dotNH The dot product of the local normal and half vector (range [-1, 1]). * @return The calculated PDF. */ -float sampleGGXPDF(float roughnessAlphaSqr, float dotNH, float dotNV) +float sampleGGXVNDFPDF(float roughnessAlphaSqr, float dotNH, float dotNV) { - // PDF of VNDF distribution is G1(V)D(H)/4N.V - // Calculate NDF function float d = evaluateNDFTrowbridgeReitz(roughnessAlphaSqr, dotNH); - // Calculate GGX Visibility G1 function - float recipG1 = sqrt(roughnessAlphaSqr + ((1.0f - roughnessAlphaSqr) * dotNV * dotNV)) + dotNV; - return d / (2.0f * recipG1); + float dotNV2 = saturate(dotNV * dotNV); + float s = roughnessAlphaSqr * (1.0f - dotNV2); + float t = sqrt(s + dotNV2); + + // Calculate the normalization factor considering backfacing shading normals. + // [Tokuyoshi 2021 "Unbiased VNDF Sampling for Backfacing Shading Normals"] + // https://gpuopen.com/download/publications/Unbiased_VNDF_Sampling_for_Backfacing_Shading_Normals.pdf + // The normalization factor for the Smith-GGX VNDF is (t + dotNV) / 2. + // But t + dotNV can have catastrophic cancellation when dotNV < 0. + // Therefore, we avoid the catastrophic cancellation by equivarently rewriting the form as follows: + // t + dotNV = (t + dotNV) * (t - dotNV) / (t - dotNV) = s / (t - dotNV). + // In this implementation, we clamp dotNV for the case in abs(dotNV) > 1. + float recipNormFactor = dotNV >= 0.0f ? t + saturate(dotNV) : s / (t + saturate(abs(dotNV))); + return d / (2.0f * recipNormFactor); +} + +/** + * Calculate the BVNDF sampling PDF for given values for the GGX BRDF. + * @param roughnessAlphaSqr The GGX roughness value squared. + * @param dotNH The dot product of the local normal and half vector (range [-1, 1]). + * @param localView Outgoing ray view direction (in local space). + * @return The calculated PDF. + */ +float sampleGGXVNDFBoundedPDF(float roughnessAlphaSqr, float dotNH, float3 localView) +{ + // Kenta Eto and Yusuke Tokuyoshi. 2023. Bounded VNDF Sampling for Smith-GGX Reflections. SIGGRAPH Asia 2023 Technical Communications. https://doi.org/10.1145/3610543.3626163 + float ndf = evaluateNDFTrowbridgeReitz(roughnessAlphaSqr, dotNH); + float roughnessAlpha = sqrt(roughnessAlphaSqr); + float2 ai = roughnessAlpha * localView.xy; + float len2 = dot(ai, ai); + float t = sqrt(len2 + localView.z * localView.z); + if (localView.z >= 0.0f) + { + float a = roughnessAlpha; // Use a = saturate(min(roughnessAlpha.x, roughnessAlpha.y)) for anisotropic roughness. + float s = 1.0f + sign(1.0f - a) * length(float2(localView.x, localView.y)); + float a2 = a * a; + float s2 = s * s; + float k = (1.0f - a2) * s2 / (s2 + a2 * localView.z * localView.z); + return ndf / (2.0f * (k * localView.z + t)); + } + return ndf * (t - localView.z) / (2.0f * len2); +} + +/** + * Calculate the PDF for given values for the GGX BRDF. + * @param roughnessAlphaSqr The GGX roughness value squared. + * @param dotNH The dot product of the local normal and half vector (range [-1, 1]). + * @param dotNV The dot product of the local normal and light direction (range [-1, 1]). + * @param localView Outgoing ray view direction (in local space). + * @return The calculated PDF. + */ +float sampleGGXPDF(float roughnessAlphaSqr, float dotNH, float dotNV, float3 localView) +{ + // Can change the sampling method + // return sampleGGXVNDFPDF(roughnessAlphaSqr, dotNH, dotNV); + return sampleGGXVNDFBoundedPDF(roughnessAlphaSqr, dotNH, localView); } /** @@ -173,24 +294,24 @@ float3 sampleLambert(float3 albedo, float2 samples) /** * Calculate the PDF for given values for the Lambert BRDF. - * @param dotNL The dot product of the local normal and view direction. + * @param dotNL The dot product of the local normal and view direction (range [-1, 1]). * @return The calculated PDF. */ float sampleLambertPDF(float dotNL) { - return dotNL / PI; + return saturate(dotNL) / PI; // PDF for the upper hemisphere. } /** * Calculates the probability of selecting the specular component over the diffuse component of a BRDF. * @param F0 The fresnel reflectance at grazing angle. - * @param dotHV The dot product of the half-vector and view direction. + * @param dotHV The dot product of the half-vector and view direction (range [-1, 1]). * @param albedo The diffuse colour term. * @return The probability of selecting the specular direction. */ float calculateBRDFProbability(float3 F0, float dotHV, float3 albedo) { -#ifndef DISABLE_SPECULAR_LIGHTING +#ifndef DISABLE_SPECULAR_MATERIALS // To determine if we are sampling the diffuse or the specular component of the BRDF we need a way to // weight each components contributions. To do this we use a Fresnel blend using the diffuseCompensation // for the diffuse component @@ -210,18 +331,19 @@ float calculateBRDFProbability(float3 F0, float dotHV, float3 albedo) /** * Calculate the PDF for given values for the combined BRDF. - * @param material Material data describing BRDF. - * @param dotNH The dot product of the normal and half vector. - * @param dotNL The dot product of the normal and light direction. - * @param dotHV The dot product of the half-vector and view direction. - * @param dotNV The dot product of the normal and view direction. + * @param material Material data describing BRDF. + * @param dotNH The dot product of the normal and half vector (range [-1, 1]). + * @param dotNL The dot product of the normal and light direction (range [-1, 1]). + * @param dotHV The dot product of the half-vector and view direction (range [-1, 1]). + * @param dotNV The dot product of the normal and view direction (range [-1, 1]). + * @param localView Outgoing ray view direction (in local space). * @return The calculated PDF. */ -float sampleBRDFPDF(MaterialBRDF material, float dotNH, float dotNL, float dotHV, float dotNV) +float sampleBRDFPDF(MaterialBRDF material, float dotNH, float dotNL, float dotHV, float dotNV, float3 localView) { -#ifndef DISABLE_SPECULAR_LIGHTING +#ifndef DISABLE_SPECULAR_MATERIALS float probabilityBRDF = calculateBRDFProbability(material.F0, dotHV, material.albedo); - float pdf = lerp(sampleLambertPDF(dotNL), sampleGGXPDF(material.roughnessAlphaSqr, dotNH, dotNV), probabilityBRDF); + float pdf = lerp(sampleLambertPDF(dotNL), sampleGGXPDF(material.roughnessAlphaSqr, dotNH, dotNV, localView), probabilityBRDF); #else float pdf = sampleLambertPDF(dotNL); #endif @@ -231,16 +353,17 @@ float sampleBRDFPDF(MaterialBRDF material, float dotNH, float dotNL, float dotHV /** * Calculate the PDF for given values for the combined BRDF when the sampling probability is already known. * @param material Material data describing BRDF. - * @param dotNH The dot product of the normal and half vector. - * @param dotNL The dot product of the normal and light direction. - * @param dotNV The dot product of the normal and view direction. + * @param dotNH The dot product of the normal and half vector (range [-1, 1]). + * @param dotNL The dot product of the normal and light direction (range [-1, 1]). + * @param dotNV The dot product of the normal and view direction (range [-1, 1]). * @param probabilityBRDF The calculated probability of selecting the specular direction. + * @param localView Outgoing ray view direction (in local space). * @return The calculated PDF. */ -float sampleBRDFPDF2(MaterialBRDF material, float dotNH, float dotNL, float dotNV, float probabilityBRDF) +float sampleBRDFPDF2(MaterialBRDF material, float dotNH, float dotNL, float dotNV, float probabilityBRDF, float3 localView) { -#ifndef DISABLE_SPECULAR_LIGHTING - float pdf = lerp(sampleLambertPDF(dotNL), sampleGGXPDF(material.roughnessAlphaSqr, dotNH, dotNV), probabilityBRDF); +#ifndef DISABLE_SPECULAR_MATERIALS + float pdf = lerp(sampleLambertPDF(dotNL), sampleGGXPDF(material.roughnessAlphaSqr, dotNH, dotNV, localView), probabilityBRDF); #else float pdf = sampleLambertPDF(dotNL); #endif @@ -260,36 +383,78 @@ float sampleBRDFPDFAndEvalute(MaterialBRDF material, float3 normal, float3 viewD float3 lightDirection, out float3 reflectance) { // Evaluate BRDF for new light direction - float dotNL = saturate(dot(normal, lightDirection)); + float dotNL = clamp(dot(normal, lightDirection), -1.0f, 1.0f); // Calculate half vector float3 halfVector = normalize(viewDirection + lightDirection); // Calculate shading angles float dotHV = saturate(dot(halfVector, viewDirection)); - float dotNH = saturate(dot(normal, halfVector)); - float dotNV = saturate(dot(normal, viewDirection)); + float dotNH = clamp(dot(normal, halfVector), -1.0f, 1.0f); + float dotNV = clamp(dot(normal, viewDirection), -1.0f, 1.0f); reflectance = evaluateBRDF(material, dotHV, dotNH, dotNL, dotNV); + // Transform the view direction into the surfaces tangent coordinate space (oriented so that z axis is aligned to normal) + Quaternion localRotation = QuaternionRotationZ(normal); + float3 localView = localRotation.transform(viewDirection); + // Calculate combined PDF for current sample // Note: has some duplicated calculations in evaluateBRDF and sampleBRDFPDF - float samplePDF = sampleBRDFPDF(material, dotNH, dotNL, dotHV, dotNV); + float samplePDF = sampleBRDFPDF(material, dotNH, dotNL, dotHV, dotNV, localView); return samplePDF; +} + +/** + * Calculate the PDF and evaluate radiance for given values for the diffuse and specular BRDF components separately. + * @param material Material data describing BRDF. + * @param normal Shading normal vector at current position. + * @param viewDirection Outgoing ray view direction. + * @param lightDirection Incoming ray light direction. + * @param reflectanceDiffuse (Out) Evaluated diffuse reflectance associated with the sampled ray direction. + * @param reflectanceSpecular (Out) Evaluated specular reflectance associated with the sampled ray direction. + * @return The calculated PDF. + */ +float sampleBRDFPDFAndEvaluteSplit(MaterialBRDF material, float3 normal, float3 viewDirection, + float3 lightDirection, out float3 reflectanceDiffuse, out float3 reflectanceSpecular) +{ + // Evaluate BRDF for new light direction + float dotNL = clamp(dot(normal, lightDirection), -1.0f, 1.0f); + // Calculate half vector + float3 halfVector = normalize(viewDirection + lightDirection); + // Calculate shading angles + float dotHV = saturate(dot(halfVector, viewDirection)); + float dotNH = clamp(dot(normal, halfVector), -1.0f, 1.0f); + float dotNV = clamp(dot(normal, viewDirection), -1.0f, 1.0f); + reflectanceDiffuse = evaluateBRDFDiffuse(material, dotHV, dotNL); +#ifndef DISABLE_SPECULAR_MATERIALS + reflectanceSpecular = evaluateBRDFSpecular(material, dotHV, dotNH, dotNL, dotNV); +#else + reflectanceSpecular = 0.0f.xxx; +#endif + // Transform the view direction into the surfaces tangent coordinate space (oriented so that z axis is aligned to normal) + Quaternion localRotation = QuaternionRotationZ(normal); + float3 localView = localRotation.transform(viewDirection); + + // Calculate combined PDF for current sample + // Note: has some duplicated calculations in evaluateBRDF and sampleBRDFPDF + float samplePDF = sampleBRDFPDF(material, dotNH, dotNL, dotHV, dotNV, localView); + return samplePDF; } /** * Calculates a reflected ray direction from a surface by sampling its BRDF. * @tparam RNG The type of random number sampler to be used. - * @param material Material data describing BRDF of surface. - * @param randomNG Random number sampler used to sample BRDF. - * @param normal Shading normal vector at current position. - * @param viewDirection Outgoing ray view direction. - * @param reflectance (Out) Evaluated reflectance associated with the sampled ray direction. - * @param pdf (Out) PDF weight associated with the sampled ray direction. + * @param material Material data describing BRDF of surface. + * @param randomNG Random number sampler used to sample BRDF. + * @param normal Shading normal vector at current position. + * @param viewDirection Outgoing ray view direction. + * @param reflectance (Out) Evaluated reflectance associated with the sampled ray direction. + * @param pdf (Out) PDF weight associated with the sampled ray direction. + * @param specularSampled (Out) True if the specular component was sampled, False if diffuse. * @return The new outgoing light ray direction. */ template -float3 sampleBRDF(MaterialBRDF material, inout RNG randomNG, float3 normal, float3 viewDirection, - out float3 reflectance, out float pdf) +float3 sampleBRDFType(MaterialBRDF material, inout RNG randomNG, float3 normal, float3 viewDirection, + out float3 reflectance, out float pdf, out bool specularSampled) { // Transform the view direction into the surfaces tangent coordinate space (oriented so that z axis is aligned to normal) Quaternion localRotation = QuaternionRotationZ(normal); @@ -298,13 +463,14 @@ float3 sampleBRDF(MaterialBRDF material, inout RNG randomNG, float3 normal, floa // Check which BRDF component to sample float3 newLight; float2 samples = randomNG.rand2(); -#ifndef DISABLE_SPECULAR_LIGHTING +#ifndef DISABLE_SPECULAR_MATERIALS float3 specularLightDirection = calculateGGXSpecularDirection(normal, viewDirection, sqrt(material.roughnessAlpha)); float3 specularHalfVector = normalize(viewDirection + specularLightDirection); // Calculate shading angles float specularDotHV = saturate(dot(specularHalfVector, viewDirection)); float probabilityBRDF = calculateBRDFProbability(material.F0, specularDotHV, material.albedo); - if (randomNG.rand() < probabilityBRDF) + specularSampled = randomNG.rand() < probabilityBRDF; + if (specularSampled) { // Sample specular BRDF component newLight = sampleGGX(material.roughnessAlpha, localView, samples); @@ -317,22 +483,23 @@ float3 sampleBRDF(MaterialBRDF material, inout RNG randomNG, float3 normal, floa #else // Sample diffuse BRDF component newLight = sampleLambert(material.albedo, samples); + specularSampled = false; #endif // Evaluate BRDF for new light direction - float dotNL = saturate(newLight.z); + float dotNL = clamp(newLight.z, -1.0f, 1.0f); // Calculate half vector float3 halfVector = normalize(localView + newLight); // Calculate shading angles float dotHV = saturate(dot(halfVector, localView)); - float dotNH = saturate(halfVector.z); - float dotNV = saturate(localView.z); + float dotNH = clamp(halfVector.z, -1.0f, 1.0f); + float dotNV = clamp(localView.z, -1.0f, 1.0f); reflectance = evaluateBRDF(material, dotHV, dotNH, dotNL, dotNV); // Calculate combined PDF for current sample // Note: has some duplicated calculations in evaluateBRDF and sampleBRDFPDF -#ifndef DISABLE_SPECULAR_LIGHTING - pdf = sampleBRDFPDF2(material, dotNH, dotNL, dotNV, probabilityBRDF); +#ifndef DISABLE_SPECULAR_MATERIALS + pdf = sampleBRDFPDF2(material, dotNH, dotNL, dotNV, probabilityBRDF, localView); #else pdf = sampleLambertPDF(dotNL); #endif @@ -342,4 +509,178 @@ float3 sampleBRDF(MaterialBRDF material, inout RNG randomNG, float3 normal, floa return lightDirection; } +/** + * Calculates a reflected ray direction from a surface by sampling its BRDF. + * @tparam RNG The type of random number sampler to be used. + * @param material Material data describing BRDF of surface. + * @param randomNG Random number sampler used to sample BRDF. + * @param normal Shading normal vector at current position. + * @param viewDirection Outgoing ray view direction. + * @param reflectance (Out) Evaluated reflectance associated with the sampled ray direction. + * @param pdf (Out) PDF weight associated with the sampled ray direction. + * @return The new outgoing light ray direction. + */ +template +float3 sampleBRDF(MaterialBRDF material, inout RNG randomNG, float3 normal, float3 viewDirection, + out float3 reflectance, out float pdf) +{ + bool unused; + return sampleBRDFType(material, randomNG, normal, viewDirection, reflectance, pdf, unused); +} + +/** + * Calculates a reflected ray direction from a surface by sampling its BRDFs diffuse component. + * @tparam RNG The type of random number sampler to be used. + * @param material Material data describing BRDF of surface. + * @param randomNG Random number sampler used to sample BRDF. + * @param normal Shading normal vector at current position. + * @param viewDirection Outgoing ray view direction. + * @param reflectance (Out) Evaluated reflectance associated with the sampled ray direction. + * @param pdf (Out) PDF weight associated with the sampled ray direction. + * @return The new outgoing light ray direction. + */ +template +float3 sampleBRDFDiffuse(MaterialBRDF material, inout RNG randomNG, float3 normal, float3 viewDirection, + out float3 reflectance, out float pdf) +{ + // Transform the view direction into the surfaces tangent coordinate space (oriented so that z axis is aligned to normal) + Quaternion localRotation = QuaternionRotationZ(normal); + float3 localView = localRotation.transform(viewDirection); + + // Check which BRDF component to sample + float3 newLight; + float2 samples = randomNG.rand2(); + + // Sample diffuse BRDF component + newLight = sampleLambert(material.albedo, samples); + + // Evaluate BRDF for new light direction + float dotNL = clamp(newLight.z, -1.0f, 1.0f); + // Calculate half vector + float3 halfVector = normalize(localView + newLight); + // Calculate shading angles + float dotHV = saturate(dot(halfVector, localView)); + float dotNH = clamp(halfVector.z, -1.0f, 1.0f); + float dotNV = clamp(localView.z, -1.0f, 1.0f); + reflectance = evaluateBRDF(material, dotHV, dotNH, dotNL, dotNV); + + // Calculate combined PDF for current sample + pdf = sampleLambertPDF(dotNL); + + // Transform the new direction back into world space + float3 lightDirection = normalize(localRotation.inverse().transform(newLight)); + return lightDirection; +} + +/** + * Calculate the PDF and evaluate radiance for given values for the diffuse BRDF component. + * @remark This should only be used for rays generated by sampleBRDFDiffuse as otherwise the PDF + * values will be incorrect. When combining diffuse+specular rays with NEE then 3 component form + * of MIS must be used. + * @param material Material data describing BRDF. + * @param normal Shading normal vector at current position. + * @param viewDirection Outgoing ray view direction. + * @param lightDirection Incoming ray light direction. + * @param reflectance (Out) Evaluated reflectance associated with the sampled ray direction. + * @return The calculated PDF. + */ +float sampleBRDFPDFAndEvaluteDiffuse(MaterialBRDF material, float3 normal, float3 viewDirection, + float3 lightDirection, out float3 reflectance) +{ + // Evaluate BRDF for new light direction + float dotNL = clamp(dot(normal, lightDirection), -1.0f, 1.0f); + // Calculate half vector + float3 halfVector = normalize(viewDirection + lightDirection); + // Calculate shading angles + float dotHV = saturate(dot(halfVector, viewDirection)); + reflectance = evaluateBRDFDiffuse(material, dotHV, dotNL); + + // Calculate combined PDF for current sample + // Note: has some duplicated calculations in evaluateBRDF and sampleBRDFPDF + float samplePDF = sampleLambertPDF(dotNL); + return samplePDF; +} + +#ifndef DISABLE_SPECULAR_MATERIALS +/** + * Calculates a reflected ray direction from a surface by sampling its BRDFs specular component. + * @tparam RNG The type of random number sampler to be used. + * @param material Material data describing BRDF of surface. + * @param randomNG Random number sampler used to sample BRDF. + * @param normal Shading normal vector at current position. + * @param viewDirection Outgoing ray view direction. + * @param reflectance (Out) Evaluated reflectance associated with the sampled ray direction. + * @param pdf (Out) PDF weight associated with the sampled ray direction. + * @return The new outgoing light ray direction. + */ +template +float3 sampleBRDFSpecular(MaterialBRDF material, inout RNG randomNG, float3 normal, float3 viewDirection, + out float3 reflectance, out float pdf) +{ + // Transform the view direction into the surfaces tangent coordinate space (oriented so that z axis is aligned to normal) + Quaternion localRotation = QuaternionRotationZ(normal); + float3 localView = localRotation.transform(viewDirection); + + // Check which BRDF component to sample + float3 newLight; + float2 samples = randomNG.rand2(); + + // Sample specular BRDF component + newLight = sampleGGX(material.roughnessAlpha, localView, samples); + + // Evaluate BRDF for new light direction + float dotNL = clamp(newLight.z, -1.0f, 1.0f); + // Calculate half vector + float3 halfVector = normalize(localView + newLight); + // Calculate shading angles + float dotHV = saturate(dot(halfVector, localView)); + float dotNH = clamp(halfVector.z, -1.0f, 1.0f); + float dotNV = clamp(localView.z, -1.0f, 1.0f); + reflectance = evaluateBRDF(material, dotHV, dotNH, dotNL, dotNV); + + // Calculate combined PDF for current sample + // Note: has some duplicated calculations in evaluateBRDF and sampleBRDFPDF + pdf = sampleGGXPDF(material.roughnessAlphaSqr, dotNH, dotNV, localView); + + // Transform the new direction back into world space + float3 lightDirection = normalize(localRotation.inverse().transform(newLight)); + return lightDirection; +} + +/** + * Calculate the PDF and evaluate radiance for given values for the specular BRDF component. + * @remark This should only be used for rays generated by sampleBRDFSpecular as otherwise the PDF + * values will be incorrect. When combining diffuse+specular rays with NEE then 3 component form + * of MIS must be used. + * @param material Material data describing BRDF. + * @param normal Shading normal vector at current position. + * @param viewDirection Outgoing ray view direction. + * @param lightDirection Incoming ray light direction. + * @param reflectance (Out) Evaluated reflectance associated with the sampled ray direction. + * @return The calculated PDF. + */ +float sampleBRDFPDFAndEvaluteSpecular(MaterialBRDF material, float3 normal, float3 viewDirection, + float3 lightDirection, out float3 reflectance) +{ + // Evaluate BRDF for new light direction + float dotNL = clamp(dot(normal, lightDirection), -1.0f, 1.0f); + // Calculate half vector + float3 halfVector = normalize(viewDirection + lightDirection); + // Calculate shading angles + float dotHV = saturate(dot(halfVector, viewDirection)); + float dotNH = clamp(dot(normal, halfVector), -1.0f, 1.0f); + float dotNV = clamp(dot(normal, viewDirection), -1.0f, 1.0f); + reflectance = evaluateBRDFSpecular(material, dotHV, dotNH, dotNL, dotNV); + + // Transform the view direction into the surfaces tangent coordinate space (oriented so that z axis is aligned to normal) + Quaternion localRotation = QuaternionRotationZ(normal); + float3 localView = localRotation.transform(viewDirection); + + // Calculate combined PDF for current sample + // Note: has some duplicated calculations in evaluateBRDFSpecular and sampleGGXPDF + float samplePDF = sampleGGXPDF(material.roughnessAlphaSqr, dotNH, dotNV, localView); + return samplePDF; +} +#endif // !DISABLE_SPECULAR_MATERIALS + #endif // MATERIAL_SAMPLING_HLSL diff --git a/src/core/src/materials/materials.hlsl b/src/core/src/materials/materials.hlsl index 4c3eb8e..417fedb 100644 --- a/src/core/src/materials/materials.hlsl +++ b/src/core/src/materials/materials.hlsl @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -34,7 +34,7 @@ SamplerState g_TextureSampler; struct MaterialEvaluated { float3 albedo; -#ifndef DISABLE_SPECULAR_LIGHTING +#ifndef DISABLE_SPECULAR_MATERIALS float metallicity; float roughness; #endif @@ -56,7 +56,7 @@ MaterialEvaluated MakeMaterialEvaluated(Material material, float2 uv) albedo *= g_TextureMaps[NonUniformResourceIndex(albedoTex)].SampleLevel(g_TextureSampler, uv, 0.0f).xyz; } -#ifndef DISABLE_SPECULAR_LIGHTING +#ifndef DISABLE_SPECULAR_MATERIALS float metallicity = material.metallicity_roughness.x; uint metallicityTex = asuint(material.metallicity_roughness.y); if (metallicityTex != uint(-1)) @@ -75,7 +75,7 @@ MaterialEvaluated MakeMaterialEvaluated(Material material, float2 uv) MaterialEvaluated ret = { albedo, -#ifndef DISABLE_SPECULAR_LIGHTING +#ifndef DISABLE_SPECULAR_MATERIALS metallicity, roughness #endif @@ -87,7 +87,7 @@ MaterialEvaluated MakeMaterialEvaluated(Material material, float2 uv) struct MaterialBRDF { float3 albedo; -#ifndef DISABLE_SPECULAR_LIGHTING +#ifndef DISABLE_SPECULAR_MATERIALS float roughnessAlpha; float3 F0; float roughnessAlphaSqr; @@ -104,7 +104,7 @@ struct MaterialBRDF MaterialBRDF MakeMaterialBRDF(MaterialEvaluated material) { float3 albedo = material.albedo; -#ifndef DISABLE_SPECULAR_LIGHTING +#ifndef DISABLE_SPECULAR_MATERIALS // Calculate albedo/F0 using metallicity float3 F0 = lerp(0.04f.xxx, albedo, material.metallicity); albedo *= (1.0f - material.metallicity); @@ -117,7 +117,7 @@ MaterialBRDF MakeMaterialBRDF(MaterialEvaluated material) MaterialBRDF ret = { albedo, -#ifndef DISABLE_SPECULAR_LIGHTING +#ifndef DISABLE_SPECULAR_MATERIALS roughnessAlpha, F0, roughnessAlphaSqr @@ -151,12 +151,11 @@ struct MaterialAlpha */ MaterialAlpha MakeMaterialAlpha(Material material, float2 uv) { - + float alpha = material.normal_alpha_side.y; uint albedoTex = asuint(material.albedo.w); - float alpha = 1.0f; if (albedoTex != uint(-1)) { - alpha = g_TextureMaps[NonUniformResourceIndex(albedoTex)].SampleLevel(g_TextureSampler, uv, 0.0f).w; + alpha *= g_TextureMaps[NonUniformResourceIndex(albedoTex)].SampleLevel(g_TextureSampler, uv, 0.0f).w; } MaterialAlpha ret = { @@ -217,7 +216,7 @@ MaterialBSDF MakeMaterialBSDF(Material material, float2 uv) MaterialBSDF ret = { materialBRDF.albedo, -#ifndef DISABLE_SPECULAR_LIGHTING +#ifndef DISABLE_SPECULAR_MATERIALS materialBRDF.roughnessAlpha, materialBRDF.F0, materialBRDF.roughnessAlphaSqr, @@ -237,7 +236,7 @@ MaterialBRDF MakeMaterialBRDF(MaterialBSDF material) MaterialBRDF ret = { material.albedo, -#ifndef DISABLE_SPECULAR_LIGHTING +#ifndef DISABLE_SPECULAR_MATERIALS material.roughnessAlpha, material.F0, material.roughnessAlphaSqr, @@ -259,4 +258,69 @@ MaterialEmissive MakeMaterialEmissive(MaterialBSDF material) }; return ret; } + +/** + * Packs a material into a compressed storage format. + * @param material Evaluated material. + * @return Packed data that can be unpacked using unpackMaterial. + */ +uint packMaterial(MaterialEvaluated material) +{ +#ifdef DISABLE_SPECULAR_MATERIALS + // Pack albedo color onto 10-10-10 format, i.e. 30 bits + uint albedo = (uint(pow(saturate(material.albedo.x), 1.0f / 2.2f) * 1023.0f) << 20) + | (uint(pow(saturate(material.albedo.y), 1.0f / 2.2f) * 1023.0f) << 10) + | (uint(pow(saturate(material.albedo.z), 1.0f / 2.2f) * 1023.0f)); + return albedo; +#else + // Pack albedo color onto 5-6-5 format, i.e. 16 bits + uint albedo = (uint(pow(saturate(material.albedo.x), 1.0f / 2.2f) * 31.0f) << 11) + | (uint(pow(saturate(material.albedo.y), 1.0f / 2.2f) * 63.0f) << 5) + | (uint(pow(saturate(material.albedo.z), 1.0f / 2.2f) * 31.0f) << 0); + + // Pack metallicity and roughness onto 8 bits each + uint metallicityRoughness = (uint(saturate(material.metallicity) * 255.0f) << 8) + | (uint(saturate(material.roughness) * 255.0f) << 0); + + return (albedo << 16) | metallicityRoughness; +#endif +} + +/** + * Unpacks a material from a compressed storage format. + * @param packedMaterial The packed material created using packMaterial. + * @return BRDF material corresponding to the unpacked parameters. + */ +MaterialBRDF unpackMaterial(in uint packedMaterial) +{ + MaterialBRDF material; + +#ifdef DISABLE_SPECULAR_MATERIALS + // Unpack the albedo + material.albedo = float3( + pow(((packedMaterial >> 20) & 0x3FFu) / 1023.0f, 2.2f), + pow(((packedMaterial >> 10) & 0x3FFu) / 1023.0f, 2.2f), + pow(((packedMaterial) & 0x3FFu) / 1023.0f, 2.2f) + ); +#else + MaterialEvaluated material2; + // Unpack the albedo + uint albedo = (packedMaterial >> 16); + material2.albedo = float3( + pow(((albedo >> 11) & 0x1Fu) / 31.0f, 2.2f), + pow(((albedo >> 5) & 0x3Fu) / 63.0f, 2.2f), + pow(((albedo >> 0) & 0x1Fu) / 31.0f, 2.2f) + ); + + // Unpack the metallicity and roughness + uint metallicityRoughness = (packedMaterial & 0xFFFFu); + + material2.metallicity = ((metallicityRoughness >> 8) & 0xFFu) / 255.0f; + material2.roughness = ((metallicityRoughness >> 0) & 0xFFu) / 255.0f; + + material = MakeMaterialBRDF(material2); +#endif + return material; +} + #endif // MATERIALS_HLSL diff --git a/src/core/src/math/color.hlsl b/src/core/src/math/color.hlsl index 3868c25..ad784e6 100644 --- a/src/core/src/math/color.hlsl +++ b/src/core/src/math/color.hlsl @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -73,6 +73,40 @@ float3 convertYCoCgToRGB(float3 color) + color.bbb * float3(-1.0f, 1.0f, -1.0f)); } +/** + * Encode a value using ITU Rec2100 Perceptual Quantizer (PQ) EOTF. + * @param value Input value to encode. + * @return The converted luminance value. + */ +float encodePQEOTF(float value) +{ + const float c1 = 0.8359375f; + const float c2 = 18.8515625f; + const float c3 = 18.6875f; + const float m1 = 0.1593017578125f; + const float m2 = 78.84375f; + + float powM2 = pow(value, 1.0f / m2); + return pow(max(powM2 - c1, 0) / (c2 - c3 * powM2), 1.0f / m1); +} + +/** + * Decode a value using ITU Rec2100 Perceptual Quantizer (PQ) EOTF. + * @param value Input value (should be luminance) to decode. + * @return The converted value. + */ +float decodePQEOTF(float value) +{ + const float c1 = 0.8359375f; + const float c2 = 18.8515625f; + const float c3 = 18.6875f; + const float m1 = 0.1593017578125f; + const float m2 = 78.84375f; + + float powM1 = pow(value, m1); + return pow((c1 + c2 * powM1) / (1.0f + c3 * powM1), m2); +} + /** * Tonemap an input colour using simple Reinhard. * @param color Input colour value to tonemap. diff --git a/src/core/src/math/hash.hlsl b/src/core/src/math/hash.hlsl index 98b5e29..29366e4 100644 --- a/src/core/src/math/hash.hlsl +++ b/src/core/src/math/hash.hlsl @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -23,28 +23,192 @@ THE SOFTWARE. #ifndef HASH_HLSL #define HASH_HLSL -// https://www.pcg-random.org/ -uint pcg(in uint v) +#include "math.hlsl" + +/** + * Hash an input value based on PCG hashing function. + * @param value The input value to hash. + * @return The calculated hash value. + */ +uint pcgHash(uint value) { - uint state = v * 747796405u + 2891336453u; + // https://www.pcg-random.org/ + uint state = value * 747796405u + 2891336453u; uint word = ((state >> ((state >> 28u) + 4u)) ^ state) * 277803737u; - return (word >> 22u) ^ word; } -// xxhash (https://github.com/Cyan4973/xxHash) -// From: https://www.shadertoy.com/view/Xt3cDn -uint xxhash32(in uint p) +/** + * Hash two input values based on PCG hashing function. + * @param values The input values to hash. + * @return The calculated hash value. + */ +uint pcgHash(uint2 values) +{ + values = values * 1664525u + 1013904223u; + values.x += values.y * 1664525u; + values.y += values.x * 1664525u; + values = values ^ (values >> 16u); + values.x += values.y * 1664525u; + values.y += values.x * 1664525u; + values = values ^ (values >> 16u); + return hadd(values); +} + +/** + * Hash three input values based on PCG hashing function. + * @param values The input values to hash. + * @return The calculated hash value. + */ +uint pcgHash(uint3 values) +{ + // Hash Functions for GPU Rendering - Jarzynski + values = values * 1664525u + 1013904223u; + values.x += values.y * values.z; + values.y += values.z * values.x; + values.z += values.x * values.y; + values ^= values >> 16u; + values.x += values.y * values.z; + values.y += values.z * values.x; + values.z += values.x * values.y; + return hadd(values); +} + +/** + * Hash four input values based on PCG hashing function. + * @param values The input values to hash. + * @return The calculated hash value. + */ +uint pcgHash(uint4 values) +{ + values = values * 1664525u + 1013904223u; + values.x += values.y * values.w; + values.y += values.z * values.x; + values.z += values.x * values.y; + values.w += values.y * values.z; + values ^= values >> 16u; + values.x += values.y * values.w; + values.y += values.z * values.x; + values.z += values.x * values.y; + values.w += values.y * values.z; + return hadd(values); +} + +/** + * Hash an input value based on xxHash hashing function. + * @param value The input value to hash. + * @return The calculated hash value. + */ +uint xxHash(uint value) +{ + // xxhash (https://github.com/Cyan4973/xxHash) + const uint prime32_2 = 2246822519u, prime32_3 = 3266489917u; + const uint prime32_4 = 668265263u, prime32_5 = 374761393u; + uint ret = value + prime32_5; + ret = prime32_4 * ((ret << 17) | (ret >> (32 - 17))); + ret = prime32_2 * (ret ^ (ret >> 15)); + ret = prime32_3 * (ret ^ (ret >> 13)); + return ret ^ (ret >> 16); +} + +/** + * Hash two input values based on xxHash hashing function. + * @param values The input values to hash. + * @return The calculated hash value. + */ +uint xxHash(uint2 values) +{ + // xxhash (https://github.com/Cyan4973/xxHash) + const uint prime32_2 = 2246822519u, prime32_3 = 3266489917u; + const uint prime32_4 = 668265263u, prime32_5 = 374761393u; + uint ret = values.y + prime32_5 + values.x * prime32_3; + ret = prime32_4 * ((ret << 17) | (ret >> (32 - 17))); + ret = prime32_2 * (ret ^ (ret >> 15)); + ret = prime32_3 * (ret ^ (ret >> 13)); + return ret ^ (ret >> 16); +} + +/** + * Hash three input values based on xxHash hashing function. + * @param values The input values to hash. + * @return The calculated hash value. + */ +uint xxHash(uint3 values) +{ + // xxhash (https://github.com/Cyan4973/xxHash) + const uint prime32_2 = 2246822519u, prime32_3 = 3266489917u; + const uint prime32_4 = 668265263u, prime32_5 = 374761393u; + uint ret = values.z + prime32_5 + values.x * prime32_3; + ret = prime32_4 * ((ret << 17) | (ret >> (32 - 17))); + ret += values.y * prime32_3; + ret = prime32_4 * ((ret << 17) | (ret >> (32 - 17))); + ret = prime32_2 * (ret ^ (ret >> 15)); + ret = prime32_3 * (ret ^ (ret >> 13)); + return ret ^ (ret >> 16); +} + +/** + * Hash four input values based on xxHash hashing function. + * @param values The input values to hash. + * @return The calculated hash value. + */ +uint xxHash(uint4 values) { - const uint PRIME32_2 = 2246822519U, PRIME32_3 = 3266489917U; - const uint PRIME32_4 = 668265263U, PRIME32_5 = 374761393U; + // xxhash (https://github.com/Cyan4973/xxHash) + const uint prime32_2 = 2246822519u, prime32_3 = 3266489917u; + const uint prime32_4 = 668265263u, prime32_5 = 374761393u; + uint ret = values.w + prime32_5 + values.x * prime32_3; + ret = prime32_4 * ((ret << 17) | (ret >> (32 - 17))); + ret += values.y * prime32_3; + ret = prime32_4 * ((ret << 17) | (ret >> (32 - 17))); + ret += values.z * prime32_3; + ret = prime32_4 * ((ret << 17) | (ret >> (32 - 17))); + ret = prime32_2 * (ret ^ (ret >> 15)); + ret = prime32_3 * (ret ^ (ret >> 13)); + return ret ^ (ret >> 16); +} - uint h32 = p + PRIME32_5; - h32 = PRIME32_4 * ((h32 << 17) | (h32 >> (32 - 17))); - h32 = PRIME32_2 * (h32 ^ (h32 >> 15)); - h32 = PRIME32_3 * (h32 ^ (h32 >> 13)); +/** + * Converts an input integer [0, UINT_MAX) to float [0, 1). + * @param value The input value to convert. + * @return The converted float value. + */ +float hashToFloat(uint value) +{ + // Note: Use the upper 24 bits to avoid a bias due to floating point rounding error. + float ret = (float)(value >> 8) * 0x1.0p-24f; + return ret; +} - return h32 ^ (h32 >> 16); +/** + * Hash two input values based on trig hashing function. + * @param values The input values to hash. + * @return The calculated hash value. + */ +float trigHash(float2 value) +{ + // On generating random numbers, with help of y= [(a+x)sin(bx)] mod 1 - Rey + return frac(43757.5453f * sin(dot(value, float2(12.9898f, 78.233f)))); +} + +/** + * Hash three input values based on trig hashing function. + * @param values The input values to hash. + * @return The calculated hash value. + */ +float trigHash(float3 values) +{ + return trigHash(float2(trigHash(values.xy), values.z)); +} + +/** + * Hash four input values based on trig hashing function. + * @param values The input values to hash. + * @return The calculated hash value. + */ +float trigHash(float4 values) +{ + return trigHash(float3(trigHash(values.xy), values.z, values.w)); } #endif // HASH_HLSL diff --git a/src/core/src/math/math.hlsl b/src/core/src/math/math.hlsl index ffa75ac..9580f6a 100644 --- a/src/core/src/math/math.hlsl +++ b/src/core/src/math/math.hlsl @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -27,8 +27,9 @@ THE SOFTWARE. /** * Clamps a value between (0, 1]. - * @note This prevents values form being clamped to exactly zero but instead to near zero - * @return The clamped value. + * @note This prevents values form being clamped to exactly zero but instead to near zero. + * @param value Value to clamp. + * @returns The clamped value. */ float clampRange(float value) { @@ -41,8 +42,9 @@ float3 clampRange(float3 value) /** * Clamps a value to be greater than epsilon. - * @note This prevents values form being clamped to exactly zero but instead to near zero - * @return The clamped value. + * @note This prevents values form being clamped to exactly zero but instead to near zero. + * @param value Value to clamp. + * @returns The clamped value. */ float clampMax(float value) { @@ -53,17 +55,30 @@ float3 clampMax(float3 value) return max(FLT_EPSILON.xxx, value); } - +/** + * Raises a value to the power of 2 i.e. squared. + * @param value Value to square. + * @returns The squared value. + */ float squared(const float value) { return value * value; } - float3 squared(const float3 value) { return value * value; } +/** + * Get the squared length of a vector. + * @param value Value to get squared length from. + * @returns The squared value. + */ +float lengthSqr(const float3 value) +{ + return dot(value, value); +} + float2 ndcToUv(const float2 ndc) { return 0.5f * ndc * float2(1.0f, -1.0f) + 0.5f; @@ -74,12 +89,27 @@ float2 uvToNdc(const float2 uv) return float2(uv.x, 1.0f - uv.y) * 2.0f - 1.0f; } +/** + * Get the squared distance between 2 points. + * @param a The first point. + * @param b The second point. + * @returns The squared distance. + */ float distanceSqr(float3 a, float3 b) { float3 c = a - b; return dot(c, c); } +/** + * Get the largest value of all elements in a vector. + * @param val The input vector. + * @returns The largest value. + */ +float hmax(float2 val) +{ + return max(val.x, val.y); +} float hmax(float3 val) { return max(val.x, max(val.y, val.z)); @@ -89,6 +119,15 @@ float hmax(float4 val) return max(max(val.x, val.z), max(val.y, val.w)); } +/** + * Get the smallest value of all elements in a vector. + * @param val The input vector. + * @returns The smallest value. + */ +float hmin(float2 val) +{ + return min(val.x, val.y); +} float hmin(float3 val) { return min(val.x, min(val.y, val.z)); @@ -98,6 +137,24 @@ float hmin(float4 val) return min(min(val.x, val.z), min(val.y, val.w)); } +/** + * Sum all elements of a vector. + * @param val The input vector. + * @returns The combined value. + */ +float hadd(float2 val) +{ + return val.x + val.y; +} +float hadd(float3 val) +{ + return val.x + val.y + val.z; +} +float hadd(float4 val) +{ + return val.x + val.y + val.z + val.w; +} + #if __HLSL_VERSION < 2021 float2 select(bool2 a, float2 b, float2 c) { diff --git a/src/core/src/math/math_constants.hlsl b/src/core/src/math/math_constants.hlsl index 1ad8c2a..9909de2 100644 --- a/src/core/src/math/math_constants.hlsl +++ b/src/core/src/math/math_constants.hlsl @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -23,31 +23,31 @@ THE SOFTWARE. #ifndef MATH_CONSTANTS_HLSL #define MATH_CONSTANTS_HLSL -#ifndef QUARTER_PI +#ifndef QUARTER_PI // pi/4 #define QUARTER_PI 0.78539816339744830961566084581988 #endif -#ifndef HALF_PI +#ifndef HALF_PI // pi/2 #define HALF_PI 1.5707963267948966192313216916398 #endif -#ifndef PI +#ifndef PI // pi #define PI 3.1415926535897932384626433832795 #endif -#ifndef TWO_PI +#ifndef TWO_PI // 2pi #define TWO_PI 6.283185307179586476925286766559 #endif -#ifndef FOUR_PI +#ifndef FOUR_PI // 4pi #define FOUR_PI 12.566370614359172953850573533118 #endif -#ifndef INV_TWO_PI +#ifndef INV_TWO_PI // 1/(2pi) #define INV_TWO_PI 0.15915494309189533576888376337251 #endif -#ifndef INV_FOUR_PI +#ifndef INV_FOUR_PI // 1/(4pi) #define INV_FOUR_PI 0.07957747154594766788444188168626 #endif diff --git a/src/core/src/math/pack.hlsl b/src/core/src/math/pack.hlsl index 7d58047..93fb1eb 100644 --- a/src/core/src/math/pack.hlsl +++ b/src/core/src/math/pack.hlsl @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -25,43 +25,485 @@ THE SOFTWARE. #include "../gpu_shared.h" -uint packUnorm4x8(in float4 value) +/** + * Convert float value to single 8bit unorm. + * @note Input values are clamped to the [0, 1] range. + * @param value Input float value. + * @returns 8bit unorm in lower 8bits, high bits are all zero. + */ +uint packUnorm1x8(float value) +{ + uint packedValue = uint(saturate(value) * 255.0f); + return packedValue; +} + +/** + * Pack 2 float values to 8bit unorm values. + * @note Input values are clamped to the [0, 1] range. + * @param value Input float values to pack. + * @returns Packed 8bit unorms in lower bits, high bits are all zero. + */ +uint packUnorm2x8(float2 value) +{ + uint2 packedValue = uint2(saturate(value) * 255.0f.xx); + return packedValue.x | (packedValue.y << 8); +} + +/** + * Pack 3 float values to 8bit unorm values. + * @note Input values are clamped to the [0, 1] range. + * @param value Input float values to pack. + * @returns Packed 8bit unorms in lower bits, high bits are all zero. + */ +uint packUnorm3x8(float3 value) +{ + uint3 packedValue = uint3(saturate(value) * 255.0f.xxx); + return packedValue.x | (packedValue.y << 8) | (packedValue.z << 16); +} + +/** + * Pack 4 float values to 8bit unorm values. + * @note Input values are clamped to the [0, 1] range. + * @param value Input float values to pack. + * @returns Packed 8bit unorms. + */ +uint packUnorm4x8(float4 value) +{ + uint4 packedValue = uint4(saturate(value) * 255.0f.xxxx); + return packedValue.x | (packedValue.y << 8) | (packedValue.z << 16) | (packedValue.w << 24); +} + +/** + * Convert 8bit unorm to float. + * @param packedValue Input unorm value to convert. + * @returns Converted float value (range [0,1]). + */ +float unpackUnorm1x8(uint packedValue) +{ + return float(packedValue & 0xFFu) * (1.0f / 255.0f); +} + +/** + * Convert 8bit unorms to floats. + * @param packedValue Input unorm values to convert. + * @returns Converted float values (range [0,1]). + */ +float2 unpackUnorm2x8(uint packedValue) +{ + uint2 value = uint2(packedValue, packedValue >> 8) & 0xFFu.xx; + return float2(value) * (1.0f / 255.0f).xx; +} + +/** + * Convert 8bit unorms to floats. + * @param packedValue Input unorm values to convert. + * @returns Converted float values (range [0,1]). + */ +float3 unpackUnorm3x8(uint packedValue) +{ + uint3 value = uint3(packedValue, packedValue >> 8, + packedValue >> 16) & 0xFFu.xxx; + return float3(value) * (1.0f / 255.0f).xxx; +} + +/** + * Convert 8bit unorms to floats. + * @param packedValue Input unorm values to convert. + * @returns Converted float values (range [0,1]). + */ +float4 unpackUnorm4x8(uint packedValue) +{ + uint4 value = uint4(packedValue, packedValue >> 8, packedValue >> 16, packedValue >> 24) & 0xFFu.xxxx; + return float4(value) * (1.0f / 255.0f).xxxx; +} + +/** + * Convert float value to single 8bit snorm. + * @note Input values are clamped to the [-1, 1] range. + * @param value Input float value. + * @returns 8bit snorm in lower 8bits, high bits are all zero. + */ +uint packSnorm1x8(float value) +{ + uint packedValue = uint(clamp(value, -1.0f, 1.0f) * 127.0f + (0.5f * sign(value))); + return packedValue; +} + +/** + * Pack 2 float values to 8bit snorm values. + * @note Input values are clamped to the [-1, 1] range. + * @param value Input float values to pack. + * @returns Packed 8bit snorms in lower bits, high bits are all zero. + */ +uint packSnorm2x8(float2 value) +{ + uint2 packedValue = uint2(clamp(value, -1.0f.xx, 1.0f.xx) * 127.0f.xx + (0.5f.xx * sign(value))) & 0xFFu.xx; + return packedValue.x | (packedValue.y << 8); +} + +/** + * Pack 3 float values to 8bit snorm values. + * @note Input values are clamped to the [-1, 1] range. + * @param value Input float values to pack. + * @returns Packed 8bit snorms in lower bits, high bits are all zero. + */ +uint packSnorm3x8(float3 value) +{ + uint3 packedValue = uint3(clamp(value, -1.0f.xxx, 1.0f.xxx) * 127.0f.xxx + (0.5f.xxx * sign(value))) & 0xFFu.xxx; + return packedValue.x | (packedValue.y << 8) | (packedValue.z << 16); +} + +/** + * Pack 4 float values to 8bit snorm values. + * @note Input values are clamped to the [-1, 1] range. + * @param value Input float values to pack. + * @returns Packed 8bit snorms. + */ +uint packSnorm4x8(float4 value) +{ + uint4 packedValue = uint4(clamp(value, -1.0f.xxxx, 1.0f.xxxx) * 127.0f.xxxx + (0.5f.xxxx * sign(value))) & 0xFFu.xxxx; + return packedValue.x | (packedValue.y << 8) | (packedValue.z << 16) | (packedValue.w << 24); +} + +/** + * Convert 8bit snorm to float. + * @param packedValue Input snorm value to convert. + * @returns Converted float value (range [-1,1]). + */ +float unpackSnorm1x8(uint packedValue) +{ + return float(packedValue & 0xFFu) * (1.0f / 127.0f); +} + +/** + * Convert 8bit snorms to floats. + * @param packedValue Input snorm values to convert. + * @returns Converted float values (range [-1,1]). + */ +float2 unpackSnorm2x8(uint packedValue) +{ + uint2 value = uint2(packedValue, packedValue >> 8) & 0xFFu.xx; + return float2(value) * (1.0f / 127.0f).xx; +} + +/** + * Convert 8bit snorms to floats. + * @param packedValue Input snorm values to convert. + * @returns Converted float values (range [-1,1]). + */ +float3 unpackSnorm3x8(uint packedValue) +{ + uint3 value = uint3(packedValue, packedValue >> 8, + packedValue >> 16) & 0xFFu.xxx; + return float3(value) * (1.0f / 127.0f).xxx; +} + +/** + * Convert 8bit snorms to floats. + * @param packedValue Input snorm values to convert. + * @returns Converted float values (range [-1,1]). + */ +float4 unpackSnorm4x8(uint packedValue) +{ + uint4 value = uint4(packedValue, packedValue >> 8, packedValue >> 16, packedValue >> 24) & 0xFFu.xxxx; + return float4(value) * (1.0f / 127.0f).xxxx; +} + +/** + * Convert float value to single 16bit unorm. + * @note Input values are clamped to the [0, 1] range. + * @param value Input float value. + * @returns 16bit unorm in lower 16bits, high bits are all zero. + */ +uint packUnorm1x16(float value) +{ + uint packedValue = uint(saturate(value) * 65535.0f); + return packedValue; +} + +/** + * Pack 2 float values to 16bit unorm values. + * @note Input values are clamped to the [0, 1] range. + * @param value Input float values to pack. + * @returns Packed 16bit unorms + */ +uint packUnorm2x16(float2 value) +{ + uint2 packedValue = uint2(saturate(value) * 65535.0f); + return packedValue.x | (packedValue.y << 16); +} + +/** + * Pack 3 float values to 16bit unorm values. + * @note Input values are clamped to the [0, 1] range. + * @param value Input float values to pack. + * @returns Packed 16bit unorms. + */ +uint2 packUnorm3x16(float3 value) +{ + uint3 packedValue = uint3(saturate(value) * 65535.0f); + return uint2(packedValue.x | (packedValue.y << 16), packedValue.z); +} + +/** + * Pack 4 float values to 16bit unorm values. + * @note Input values are clamped to the [0, 1] range. + * @param value Input float values to pack. + * @returns Packed 16bit unorms. + */ +uint2 packUnorm4x16(float4 value) +{ + uint4 packedValue = uint4(saturate(value) * 65535.0f); + return uint2(packedValue.x | (packedValue.y << 16), packedValue.z | (packedValue.w << 16)); +} + +/** + * Convert 16bit unorm to float. + * @param packedValue Input unorm value to convert. + * @returns Converted float value (range [0,1]). + */ +float unpackUnorm1x16(uint packedValue) +{ + return float(packedValue & 0xFFFFu) * (1.0f / 65535.0f); +} + +/** + * Convert 16bit unorms to floats. + * @param packedValue Input unorm values to convert. + * @returns Converted float values (range [0,1]). + */ +float2 unpackUnorm2x16(uint packedValue) +{ + uint2 value = uint2(packedValue, packedValue >> 16) & 0xFFFFu.xx; + return float2(value) * (1.0f / 65535.0f).xx; +} + +/** + * Convert 16bit unorms to floats. + * @param packedValue Input unorm values to convert. + * @returns Converted float values (range [0,1]). + */ +float3 unpackUnorm3x16(uint packedValue) +{ + uint3 value = uint3(packedValue, packedValue >> 8, + packedValue >> 16) & 0xFFFFu.xxx; + return float3(value) * (1.0f / 65535.0f).xxx; +} + +/** + * Convert 16bit unorms to floats. + * @param packedValue Input unorm values to convert. + * @returns Converted float values (range [0,1]). + */ +float4 unpackUnorm4x16(uint packedValue) +{ + uint4 value = uint4(packedValue, packedValue >> 8, + packedValue >> 16, packedValue >> 24) & 0xFFFFu.xxxx; + return float4(value) * (1.0f / 65535.0f).xxxx; +} + +/** + * Convert float value to single 16bit snorm. + * @note Input values are clamped to the [0, 1] range. + * @param value Input float value. + * @returns 16bit snorm in lower 16bits, high bits are all zero. + */ +uint packSnorm1x16(float value) +{ + uint packedValue = uint(clamp(value, -1.0f, 1.0f) * 32767.0f + (0.5f * sign(value))); + return packedValue; +} + +/** + * Pack 2 float values to 16bit snorm values. + * @note Input values are clamped to the [0, 1] range. + * @param value Input float values to pack. + * @returns Packed 16bit snorms + */ +uint packSnorm2x16(float2 value) +{ + uint2 packedValue = uint2(clamp(value, -1.0f.xx, 1.0f.xx) * 32767.0f.xx + (0.5f.xx * sign(value))); + return packedValue.x | (packedValue.y << 8); +} + +/** + * Pack 3 float values to 16bit snorm values. + * @note Input values are clamped to the [0, 1] range. + * @param value Input float values to pack. + * @returns Packed 16bit snorms. + */ +uint2 packSnorm3x16(float3 value) { - uint4 packed_value = uint4(saturate(value) * 255.0f); + uint3 packedValue = uint3(clamp(value, -1.0f.xxx, 1.0f.xxx) * 32767.0f.xxx + (0.5f.xxx * sign(value))); + return packedValue.x | (packedValue.y << 8) | (packedValue.z << 16); +} + +/** + * Pack 4 float values to 16bit snorm values. + * @note Input values are clamped to the [0, 1] range. + * @param value Input float values to pack. + * @returns Packed 16bit snorms. + */ +uint2 packSnorm4x16(float4 value) +{ + uint4 packedValue = uint4(clamp(value, -1.0f.xxxx, 1.0f.xxxx) * 32767.0f.xxxx + (0.5f.xxxx * sign(value))); + return packedValue.x | (packedValue.y << 8) | (packedValue.z << 16) | (packedValue.w << 24); +} - return (packed_value.x << 0) | (packed_value.y << 8) | (packed_value.z << 16) | (packed_value.w << 24); +/** + * Convert 16bit snorm to float. + * @param packedValue Input snorm value to convert. + * @returns Converted float value (range [0,1]). + */ +float unpackSnorm1x16(uint packedValue) +{ + return float(packedValue & 0xFFFFu) * (1.0f / 32767.0f); +} + +/** + * Convert 16bit snorms to floats. + * @param packedValue Input snorm values to convert. + * @returns Converted float values (range [0,1]). + */ +float2 unpackSnorm2x16(uint packedValue) +{ + uint2 value = uint2(packedValue, packedValue >> 8) & 0xFFFFu.xx; + return float2(value) * (1.0f / 32767.0f).xx; +} + +/** + * Convert 16bit snorms to floats. + * @param packedValue Input snorm values to convert. + * @returns Converted float values (range [0,1]). + */ +float3 unpackSnorm3x16(uint packedValue) +{ + uint3 value = uint3(packedValue, packedValue >> 8, + packedValue >> 16) & 0xFFFFu.xxx; + return float3(value) * (1.0f / 32767.0f).xxx; +} + +/** + * Convert 16bit snorms to floats. + * @param packedValue Input snorm values to convert. + * @returns Converted float values (range [0,1]). + */ +float4 unpackSnorm4x16(uint packedValue) +{ + uint4 value = uint4(packedValue, packedValue >> 8, packedValue >> 16, packedValue >> 24) & 0xFFFFu.xxxx; + return float4(value) * (1.0f / 32767.0f).xxxx; +} + +/** + * Pack 2 float values as half precision. + * @param value Input float values to pack. + * @returns Packed 16bit half values. + */ +uint packHalf2(float2 value) +{ + uint packedValue = f32tof16(value.x) | (f32tof16(value.y) << 16); + return packedValue; +} + +/** + * Pack 3 float values as half precision. + * @param value Input float values to pack. + * @returns Packed 16bit half values. + */ +uint2 packHalf3(float3 value) +{ + uint2 packedValue = uint2(f32tof16(value.x) | (f32tof16(value.y) << 16), + f32tof16(value.z)); + return packedValue; } -float4 unpackUnorm4x8(in uint packed_value) +/** + * Pack 4 float values as half precision. + * @param value Input float values to pack. + * @returns Packed 16bit half values. + */ +uint2 packHalf4(float4 value) { - uint4 value = uint4((packed_value >> 0) & 0xFFu, (packed_value >> 8) & 0xFFu, - (packed_value >> 16) & 0xFFu, (packed_value >> 24) & 0xFFu); + uint2 packedValue = uint2(f32tof16(value.x) | (f32tof16(value.y) << 16), + f32tof16(value.z) | (f32tof16(value.w) << 16)); + return packedValue; +} - return value / 255.0f; +/** + * Convert 8bit unorms to floats. + * @param packedValue Input unorm values to convert. + * @returns Converted float values (range [0,1]). + */ +float2 unpackHalf2(uint packedValue) +{ + return float2(f16tof32(packedValue & 0xFFFFu), f16tof32(packedValue >> 16)); } -uint packNormal(in float3 normal) +/** + * Convert 8bit unorms to floats. + * @param packedValue Input unorm values to convert. + * @returns Converted float values (range [0,1]). + */ +float3 unpackHalf3(uint2 packedValue) { - return packUnorm4x8(float4(0.5f * normal + 0.5, 0.0f)); + return float3(f16tof32(packedValue.x & 0xFFFFu), f16tof32(packedValue.x >> 16), + f16tof32(packedValue.y & 0xFFFFu)); } -float3 unpackNormal(in uint packed_normal) +/** + * Convert 8bit unorms to floats. + * @param packedValue Input unorm values to convert. + * @returns Converted float values (range [0,1]). + */ +float4 unpackHalf4(uint2 packedValue) { - return 2.0f * unpackUnorm4x8(packed_normal).xyz - 1.0f; + return float4(f16tof32(packedValue.x & 0xFFFFu), f16tof32(packedValue.x >> 16), + f16tof32(packedValue.y & 0xFFFFu), f16tof32(packedValue.y >> 16)); } -float packUVs(in float2 uv) +/** + * Pack normal vector values to 10bit snorm values. + * @note Input values are clamped to the [-1, 1] range. + * @param value Input float values to pack. + * @returns Packed 10bit snorms in lower bits, high bits are all zero. + */ +uint packNormal(float3 value) { - uint packed_uv = (f32tof16(uv.x) << 16) | f32tof16(uv.y); + uint3 packedValue = uint3(clamp(value, -1.0f.xxx, 1.0f.xxx) * 511.0f.xxx + (0.5f.xxx * sign(value))) & 0x3FFu.xxx; + return packedValue.x | (packedValue.y << 10) | (packedValue.z << 20); +} - return asfloat(packed_uv); +/** + * Convert 10bit snorms to normal vector. + * @param packedValue Input snorm values to convert. + * @returns Converted float values (range [-1,1]). + */ +float3 unpackNormal(uint packedValue) +{ + uint3 value = uint3(packedValue, packedValue >> 10, + packedValue >> 20) & 0x3FFu.xxx; + return float3(value) * (1.0f / 511.0f).xxx; } -float2 unpackUVs(in float packed_uv) +/** + * Pack UV values values to 16bit floats. + * @param value Input float values to pack. + * @returns Packed 16 half precision values as single float. + */ +float packUVs(float2 value) { - uint uv = asuint(packed_uv); + return asfloat(packHalf2(value)); +} - return float2(f16tof32(uv >> 16), f16tof32(uv & 0xFFFFu)); +/** + * Convert 16bit halfs to UV values. + * @param packedValue Input unorm values to convert. + * @returns Converted float values. + */ +float2 unpackUVs(float packedValue) +{ + uint uv = asuint(packedValue); + return float2(unpackHalf2(uv)); } /** @@ -117,4 +559,90 @@ float3 unpackFloat3(uint packed) float z = f16tof32((packed >> 17) & 0x7FE0); return float3(x, y, z); } + + +/** + * Load 2 elements from the buffer. + * @param buffer Buffer to be read. + * @param index Index of fetch. + * @return Requested result. + */ +uint2 Load2(in RWStructuredBuffer buffer, in uint index) +{ + uint2 value; + value.x = buffer[2 * index + 0]; + value.y = buffer[2 * index + 1]; + return value; +} + +/** + * Load 3 elements from the buffer. + * @param buffer Buffer to be read. + * @param index Index of fetch. + * @return Requested result. + */ +uint3 Load3(in RWStructuredBuffer buffer, in uint index) +{ + uint3 value; + value.x = buffer[3 * index + 0]; + value.y = buffer[3 * index + 1]; + value.z = buffer[3 * index + 2]; + return value; +} + +/** + * Load 4 elements from the buffer. + * @param buffer Buffer to be read. + * @param index Index of fetch. + * @return Requested result. + */ +uint4 Load4(in RWStructuredBuffer buffer, in uint index) +{ + uint4 value; + value.x = buffer[4 * index + 0]; + value.y = buffer[4 * index + 1]; + value.z = buffer[4 * index + 2]; + value.w = buffer[4 * index + 3]; + return value; +} + +/** + * Store 2 elements into the buffer. + * @param buffer Buffer to be written. + * @param index Index for writing. + * @param value Value to be stored. + */ +void Store2(in RWStructuredBuffer buffer, in uint index, in uint2 value) +{ + buffer[2 * index + 0] = value.x; + buffer[2 * index + 1] = value.y; +} + +/** + * Store 3 elements into the buffer. + * @param buffer Buffer to be written. + * @param index Index for writing. + * @param value Value to be stored. + */ +void Store3(in RWStructuredBuffer buffer, in uint index, in uint3 value) +{ + buffer[3 * index + 0] = value.x; + buffer[3 * index + 1] = value.y; + buffer[3 * index + 2] = value.z; +} + +/** + * Store 4 elements into the buffer. + * @param buffer Buffer to be written. + * @param index Index for writing. + * @param value Value to be stored. + */ +void Store4(in RWStructuredBuffer buffer, in uint index, in uint4 value) +{ + buffer[4 * index + 0] = value.x; + buffer[4 * index + 1] = value.y; + buffer[4 * index + 2] = value.z; + buffer[4 * index + 3] = value.w; +} + #endif // PACK_HLSL diff --git a/src/core/src/math/quaternion.hlsl b/src/core/src/math/quaternion.hlsl index bf9d1f5..159a516 100644 --- a/src/core/src/math/quaternion.hlsl +++ b/src/core/src/math/quaternion.hlsl @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/src/core/src/math/random.hlsl b/src/core/src/math/random.hlsl index ab150f9..5e56882 100644 --- a/src/core/src/math/random.hlsl +++ b/src/core/src/math/random.hlsl @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -57,8 +57,8 @@ class Random */ float rand() { - // Note: Uses 4294967808 instead of 2^32 in order to ensure [0.0, 1.0) mapping due to floating point rounding error. - float ret = (float)randInt() * (1.0f / 4294967808.0f); + // Note: Use the upper 24 bits to avoid a bias due to floating point rounding error. + float ret = (float)(randInt() >> 8) * 0x1.0p-24f; return ret; } diff --git a/src/core/src/math/sampling.hlsl b/src/core/src/math/sampling.hlsl index 6d6aa2d..a27c3e7 100644 --- a/src/core/src/math/sampling.hlsl +++ b/src/core/src/math/sampling.hlsl @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -26,6 +26,68 @@ THE SOFTWARE. #include "math_constants.hlsl" #include "math.hlsl" +/** + * Transforms a 3D vector to a position in the unit square. + * @param direction The input direction (must be normalised). + * @returns The 2D mapped value [0, 1]. + */ +float2 mapToHemiOctahedron(float3 direction) +{ + // Modified version of "Fast Equal-Area Mapping of the (Hemi)Sphere using SIMD" - Clarberg + float3 absDir = abs(direction); + + float radius = sqrt(1.0f - absDir.z); + float a = hmax(absDir.xy); + float b = hmin(absDir.xy); + b = a == 0.0f ? 0.0f : b / a; + + float phi = atan(b) * (2.0f / PI); + phi = (absDir.x >= absDir.y) ? phi : 1.0f - phi; + + float t = phi * radius; + float s = radius - t; + float2 st = float2(s, t); + st *= sign(direction).xy; + + // Since we only care about the hemisphere above the surface we rescale and center the output + // value range to the it occupies the whole unit square + st = float2(st.x + st.y, st.x - st.y); + + // Transform from [-1,1] to [0,1] + st = 0.5f.xx * st + 0.5f.xx; + + return st; +} + +/** + * Transforms a mapped position in the unit square back to a 3D direction vector. + * @param mapped The mapped position created using mapToHemiOctahedron. + * @returns The 3D direction vector. + */ +float3 mapToHemiOctahedronInverse(float2 mapped) +{ + // Transform from [0,1] to [-1,1] + float2 st = 2.0f.xx * mapped - 1.0f.xx; + + // Transform from unit square to diamond corresponding to +hemisphere + st = float2(st.x + st.y, st.x - st.y) * 0.5f; + + float2 absMapped = abs(st); + float distance = 1.0f - hadd(absMapped); + float radius = 1.0f - abs(distance); + + float phi = (radius == 0.0f) ? 0.0f : QUARTER_PI * ((absMapped.y - absMapped.x) / radius + 1.0f); + float radiusSqr = radius * radius; + float sinTheta = radius * sqrt(2.0f - radiusSqr); + float sinPhi, cosPhi; + sincos(phi, sinPhi, cosPhi); + float x = sinTheta * sign(st.x) * cosPhi; + float y = sinTheta * sign(st.y) * sinPhi; + float z = sign(distance) * (1.0f - radiusSqr); + + return float3(x, y, z); +} + void GetOrthoVectors(in float3 n, out float3 b1, out float3 b2) { bool sel = abs(n.z) > 0; diff --git a/src/core/src/math/spherical_harmonics.hlsl b/src/core/src/math/spherical_harmonics.hlsl index c9cbda3..bc65918 100644 --- a/src/core/src/math/spherical_harmonics.hlsl +++ b/src/core/src/math/spherical_harmonics.hlsl @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/src/core/src/math/transform.hlsl b/src/core/src/math/transform.hlsl new file mode 100644 index 0000000..d0a4cf1 --- /dev/null +++ b/src/core/src/math/transform.hlsl @@ -0,0 +1,103 @@ +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ + +#ifndef TRANSFORM_HLSL +#define TRANSFORM_HLSL + +#include "math.hlsl" + +/** + * Determine a transformation matrix to correctly transform normal vectors. + * @param transform The original transform matrix. + * @returns The new transform matrix. + */ +float3x3 getNormalTransform(float3x3 transform) +{ + // The transform for a normal is transpose(inverse(M)) + // The inverse is calculated as [1/det(A)]*transpose(C) where C is the cofactor matrix + // This simplifies down to [1/det(A)]*C + float3x3 result; + result._m00 = determinant(float2x2(transform._m11_m12, transform._m21_m22)); + result._m01 = -determinant(float2x2(transform._m10_m12, transform._m20_m22)); + result._m02 = determinant(float2x2(transform._m10_m11, transform._m20_m21)); + result._m10 = -determinant(float2x2(transform._m01_m02, transform._m21_m22)); + result._m11 = determinant(float2x2(transform._m00_m02, transform._m20_m22)); + result._m12 = -determinant(float2x2(transform._m00_m01, transform._m20_m21)); + result._m20 = determinant(float2x2(transform._m01_m02, transform._m11_m12)); + result._m21 = -determinant(float2x2(transform._m00_m02, transform._m10_m12)); + result._m22 = determinant(float2x2(transform._m00_m01, transform._m10_m11)); + const float3 det3 = transform._m00_m01_m02 * result._m00_m01_m02; + const float det = 1.0f / hadd(det3); + return (result * det); +} + +/** + * Transform a normal vector. + * @note This correctly handles converting the transform to operate correctly on a surface normal. + * @param normal The normal vector. + * @param transform The transform matrix. + * @returns The transformed normal. + */ +float3 transformNormal(const float3 normal, const float3x4 transform) +{ + const float3x3 normalTransform = getNormalTransform((float3x3)transform); + return mul(normalTransform, normal); +} + +/** + * Transform a 3D direction vector. + * @param values The direction vector. + * @param transform The transform matrix. + * @returns The new transform matrix. + */ +float3 transformVector(const float3 values, const float3x4 transform) +{ + return mul((float3x3)transform, values); +} + +/** + * Transform a 3D point by an affine matrix. + * @param direction The direction vector. + * @param transform The transform matrix. + * @returns The new transform matrix. + */ +float3 transformPoint(const float3 values, const float3x4 transform) +{ + return mul(transform, float4(values, 1.0f)); +} + +/** + * Transform a 3D point. + * @note This version of transforming a point assumes a non-affine matrix and will handle + * normalisation of the result by the 'w' component. + * @param direction The direction vector. + * @param transform The transform matrix. + * @returns The new transform matrix. + */ +float3 transformPointProjection(const float3 values, const float4x4 transform) +{ + float4 ret = mul(transform, float4(values, 1.0f)); + ret.xyz /= ret.w; // perspective divide + return ret.xyz; +} + +#endif // MATH_HLSL diff --git a/src/core/src/mesh.hlsl b/src/core/src/mesh.hlsl deleted file mode 100644 index 428d9d8..0000000 --- a/src/core/src/mesh.hlsl +++ /dev/null @@ -1,71 +0,0 @@ -/********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#ifndef MESH_H -#define MESH_H - -// Fetches the transform at the given index. -float4x4 FetchTransform(in uint transform_index) -{ - float4x4 transform; - - float4 m0 = g_TransformBuffer[4 * transform_index + 0]; - float4 m1 = g_TransformBuffer[4 * transform_index + 1]; - float4 m2 = g_TransformBuffer[4 * transform_index + 2]; - float4 m3 = g_TransformBuffer[4 * transform_index + 3]; - - transform[0] = float4(m0.x, m1.x, m2.x, m3.x); - transform[1] = float4(m0.y, m1.y, m2.y, m3.y); - transform[2] = float4(m0.z, m1.z, m2.z, m3.z); - transform[3] = float4(m0.w, m1.w, m2.w, m3.w); - - return transform; -} - -// Fetches the vertices of the given mesh primitive. -void FetchVertices(in Mesh mesh, in uint primitive_index, out float3 v0, out float3 v1, out float3 v2) -{ - uint3 indices = g_IndexBuffers[0].Load3(mesh.index_offset + 3 * primitive_index * mesh.index_stride); - - v0 = asfloat(g_VertexBuffers[0].Load3(mesh.vertex_offset + indices.x * mesh.vertex_stride)); - v1 = asfloat(g_VertexBuffers[0].Load3(mesh.vertex_offset + indices.y * mesh.vertex_stride)); - v2 = asfloat(g_VertexBuffers[0].Load3(mesh.vertex_offset + indices.z * mesh.vertex_stride)); -} - -// Fetches the vertices of the given mesh primitive. -void FetchVertices(in Mesh mesh, in uint primitive_index, out Vertex v0, out Vertex v1, out Vertex v2) -{ - uint3 indices = g_IndexBuffers[0].Load3(mesh.index_offset + 3 * primitive_index * mesh.index_stride); - - v0.position = float4(asfloat(g_VertexBuffers[0].Load3(mesh.vertex_offset + indices.x * mesh.vertex_stride)), 1.0f); - v1.position = float4(asfloat(g_VertexBuffers[0].Load3(mesh.vertex_offset + indices.y * mesh.vertex_stride)), 1.0f); - v2.position = float4(asfloat(g_VertexBuffers[0].Load3(mesh.vertex_offset + indices.z * mesh.vertex_stride)), 1.0f); - - v0.normal = float4(asfloat(g_VertexBuffers[0].Load3(mesh.vertex_offset + indices.x * mesh.vertex_stride + 16)), 0.0f); - v1.normal = float4(asfloat(g_VertexBuffers[0].Load3(mesh.vertex_offset + indices.y * mesh.vertex_stride + 16)), 0.0f); - v2.normal = float4(asfloat(g_VertexBuffers[0].Load3(mesh.vertex_offset + indices.z * mesh.vertex_stride + 16)), 0.0f); - - v0.uv = asfloat(g_VertexBuffers[0].Load2(mesh.vertex_offset + indices.x * mesh.vertex_stride + 32)); - v1.uv = asfloat(g_VertexBuffers[0].Load2(mesh.vertex_offset + indices.y * mesh.vertex_stride + 32)); - v2.uv = asfloat(g_VertexBuffers[0].Load2(mesh.vertex_offset + indices.z * mesh.vertex_stride + 32)); -} - -#endif // MESH_H diff --git a/src/core/src/render_techniques/atmosphere/atmosphere.comp b/src/core/src/render_techniques/atmosphere/atmosphere.comp index 65623eb..11937b9 100644 --- a/src/core/src/render_techniques/atmosphere/atmosphere.comp +++ b/src/core/src/render_techniques/atmosphere/atmosphere.comp @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -19,7 +19,9 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ********************************************************************/ + #include "atmosphere.hlsl" +#include "../../math/transform.hlsl" float3 g_Eye; uint g_FaceIndex; @@ -41,11 +43,10 @@ void DrawAtmosphere(in uint2 did : SV_DispatchThreadID) float2 uv = (did + 0.5f) / g_BufferDimensions; float2 ndc = 2.0f * uv - 1.0f; - float4 world = mul(g_ViewProjectionInverse, float4(ndc, 1.0f, 1.0f)); - world /= world.w; // perspective divide + float3 world = transformPointProjection(float3(ndc, 1.0f), g_ViewProjectionInverse); float3 ray_origin = g_Eye; - float3 ray_direction = normalize(world.xyz - g_Eye); + float3 ray_direction = normalize(world - g_Eye); float ray_length = 1e9f; float t = g_FrameIndex / 360.0f; diff --git a/src/core/src/render_techniques/atmosphere/atmosphere.cpp b/src/core/src/render_techniques/atmosphere/atmosphere.cpp index 17490e1..d39e270 100644 --- a/src/core/src/render_techniques/atmosphere/atmosphere.cpp +++ b/src/core/src/render_techniques/atmosphere/atmosphere.cpp @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -55,10 +55,10 @@ RenderOptionList Atmosphere::getRenderOptions() noexcept return newOptions; } -Atmosphere::RenderOptions Atmosphere::convertOptions(RenderSettings const &settings) noexcept +Atmosphere::RenderOptions Atmosphere::convertOptions(RenderOptionList const &options) noexcept { RenderOptions newOptions; - RENDER_OPTION_GET(atmosphere_enable, newOptions, settings.options_) + RENDER_OPTION_GET(atmosphere_enable, newOptions, options) return newOptions; } @@ -73,7 +73,7 @@ bool Atmosphere::init(CapsaicinInternal const &capsaicin) noexcept void Atmosphere::render(CapsaicinInternal &capsaicin) noexcept { - options = convertOptions(capsaicin.getRenderSettings()); + options = convertOptions(capsaicin.getOptions()); if (!options.atmosphere_enable) return; GfxTexture environment_buffer = capsaicin.getEnvironmentBuffer(); @@ -147,7 +147,7 @@ void Atmosphere::render(CapsaicinInternal &capsaicin) noexcept } } -void Atmosphere::terminate() +void Atmosphere::terminate() noexcept { gfxDestroyProgram(gfx_, atmosphere_program_); gfxDestroyKernel(gfx_, draw_atmosphere_kernel_); diff --git a/src/core/src/render_techniques/atmosphere/atmosphere.h b/src/core/src/render_techniques/atmosphere/atmosphere.h index faffb8c..2fc3b88 100644 --- a/src/core/src/render_techniques/atmosphere/atmosphere.h +++ b/src/core/src/render_techniques/atmosphere/atmosphere.h @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -43,11 +43,11 @@ class Atmosphere : public RenderTechnique }; /** - * Convert render settings to internal options format. - * @param settings Current render settings. + * Convert render options to internal options format. + * @param options Current render options. * @returns The options converted. */ - static RenderOptions convertOptions(RenderSettings const &settings) noexcept; + static RenderOptions convertOptions(RenderOptionList const &options) noexcept; /** * Initialise any internal data or state. @@ -64,9 +64,12 @@ class Atmosphere : public RenderTechnique */ void render(CapsaicinInternal &capsaicin) noexcept override; -protected: - void terminate(); + /** + * Destroy any used internal resources and shutdown. + */ + void terminate() noexcept override; +protected: RenderOptions options; GfxProgram atmosphere_program_; GfxKernel draw_atmosphere_kernel_; diff --git a/src/core/src/render_techniques/gi10/gi10.comp b/src/core/src/render_techniques/gi10/gi10.comp index 552bfd9..aa19c6b 100644 --- a/src/core/src/render_techniques/gi10/gi10.comp +++ b/src/core/src/render_techniques/gi10/gi10.comp @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -20,6 +20,10 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ********************************************************************/ +#ifndef USE_INLINE_RT +#define USE_INLINE_RT 1 +#endif + #include "gi10_shared.h" //! @@ -36,37 +40,43 @@ int2 g_BlurDirection; uint2 g_BufferDimensions; uint g_UseDirectLighting; float3 g_PreViewTranslation; +float g_Exposure; Texture2D g_DepthBuffer; -Texture2D g_NormalBuffer; -Texture2D g_DetailsBuffer; +Texture2D g_GeometryNormalBuffer; +Texture2D g_ShadingNormalBuffer; Texture2D g_VelocityBuffer; +Texture2D g_RoughnessBuffer; Texture2D g_OcclusionAndBentNormalBuffer; Texture2D g_NearFieldGlobalIlluminationBuffer; Texture2D g_VisibilityBuffer; Texture2D g_PreviousDepthBuffer; Texture2D g_PreviousNormalBuffer; Texture2D g_PreviousDetailsBuffer; +Texture2D g_PreviousRoughnessBuffer; StructuredBuffer g_CountBuffer; -ByteAddressBuffer g_IndexBuffers[] : register(space1); -ByteAddressBuffer g_VertexBuffers[] : register(space2); - +StructuredBuffer g_IndexBuffer; +StructuredBuffer g_VertexBuffer; StructuredBuffer g_MeshBuffer; StructuredBuffer g_InstanceBuffer; StructuredBuffer g_MaterialBuffer; -StructuredBuffer g_TransformBuffer; +StructuredBuffer g_TransformBuffer; -RWTexture2D g_IrradianceBuffer; -RWStructuredBuffer g_DrawCommandBuffer; -RWStructuredBuffer g_DispatchCommandBuffer; -RWTexture2D g_GlobalIlluminationBuffer; -Texture2D g_PrevCombinedIlluminationBuffer; +RWTexture2D g_IrradianceBuffer; +RWTexture2D g_ReflectionBuffer; +Texture2D g_PreviousReflectionBuffer; +RWStructuredBuffer g_DrawCommandBuffer; +RWStructuredBuffer g_DispatchCommandBuffer; +RWStructuredBuffer g_DispatchRaysCommandBuffer; +RWTexture2D g_GlobalIlluminationBuffer; +Texture2D g_PrevCombinedIlluminationBuffer; RaytracingAccelerationStructure g_Scene; TextureCube g_EnvironmentBuffer; +TextureCube g_PrefilteredEnvironmentBuffer; Texture2D g_TextureMaps[] : register(space99); SamplerState g_NearestSampler; @@ -76,6 +86,8 @@ ConstantBuffer g_GI10Constants; ConstantBuffer g_ScreenProbesConstants; ConstantBuffer g_HashGridCacheConstants; ConstantBuffer g_WorldSpaceReSTIRConstants; +ConstantBuffer g_GlossyReflectionsConstants; +ConstantBuffer g_GlossyReflectionsAtrousConstants; #define g_ViewProjection g_GI10Constants.view_proj #define g_PreviousViewProjection g_GI10Constants.view_proj_prev @@ -87,95 +99,52 @@ ConstantBuffer g_WorldSpaceReSTIRConstants; //! GI-1.0 shader includes. //! -#include "../../components/light_sampler_bounds/light_sampler_bounds.hlsl" +#include "../../components/light_sampler_grid_stream/light_sampler_grid_stream.hlsl" #include "../../components/blue_noise_sampler/blue_noise_sampler.hlsl" -#include "../../components/stratified_sampler/stratified_sampler.hlsl" +#include "../../geometry/intersection.hlsl" +#include "../../geometry/geometry.hlsl" +#include "../../geometry/mesh.hlsl" #include "../../lights/lights.hlsl" #include "../../materials/materials.hlsl" #include "../../materials/material_sampling.hlsl" #include "../../math/color.hlsl" -#include "../../math/geometry.hlsl" #include "../../math/hash.hlsl" #include "../../math/spherical_harmonics.hlsl" -#include "../../mesh.hlsl" #include "../../math/pack.hlsl" +#include "../../math/random.hlsl" #include "gi10.hlsl" #include "screen_probes.hlsl" #include "hash_grid_cache.hlsl" #include "world_space_restir.hlsl" +#include "glossy_reflections.hlsl" #include "gi_denoiser.hlsl" -#ifdef USE_ALPHA_TESTING -typedef RayQuery // TODO: RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES seems to cause the driver to crash -ClosestRayQuery; +#define MAX_HIT_DISTANCE 1e9f -typedef RayQuery -ShadowRayQuery; - -template -RayQueryType TraceRay(RayDesc ray_desc) +struct PopulateScreenProbesPayload { - RayQueryType ray_query; - ray_query.TraceRayInline(g_Scene, RAY_FLAG_NONE, 0xFFu, ray_desc); - while (ray_query.Proceed()) - { - if (ray_query.CandidateType() == CANDIDATE_NON_OPAQUE_TRIANGLE) - { - // Get the intersection data - uint instanceIndex = ray_query.CandidateInstanceIndex(); - uint geometryIndex = ray_query.CandidateGeometryIndex(); - uint primitiveIndex = ray_query.CandidatePrimitiveIndex(); - float2 barycentrics = ray_query.CandidateTriangleBarycentrics(); - - // Get instance information for current object - Instance instance = g_InstanceBuffer[instanceIndex]; - Mesh mesh = g_MeshBuffer[instance.mesh_index + geometryIndex]; - - // Get UV values from buffers - Vertex v0, v1, v2; - FetchVertices(mesh, primitiveIndex, v0, v1, v2); - - // Set material - Material material = g_MaterialBuffer[mesh.material_index]; - // Calculate UV coordinates - float2 uv = interpolate(v0.uv, v1.uv, v2.uv, barycentrics); - MaterialAlpha mask = MakeMaterialAlpha(material, uv); - - // Check the alpha mask - // Approximates alpha blending - if (mask.alpha >= 0.5f) - { - ray_query.CommitNonOpaqueTriangleHit(); - } - } - else - { - // Should never get here as we don't support non-triangle geometry - // However if this conditional is removed the driver crashes - ray_query.Abort(); - } - } + uint2 seed; + float3 sky_sample; + float hit_dist; +}; - return ray_query; -} -#else // USE_ALPHA_TESTING -typedef RayQuery -ClosestRayQuery; - -typedef RayQuery -ShadowRayQuery; - -template -RayQueryType TraceRay(RayDesc ray_desc) +struct PopulateCellsPayload { - RayQueryType ray_query; - ray_query.TraceRayInline(g_Scene, RAY_FLAG_NONE, 0xFFu, ray_desc); - while (ray_query.Proceed()) {} - - return ray_query; -} -#endif // USE_ALPHA_TESTING + uint query_index; + float3 world; + float3 normal; + float3 lighting; + Reservoir reservoir; +}; + +struct TraceReflectionsPayload +{ + int2 full_pos; + float3 radiance; + float2 s; + float hit_distance; +}; //! //! GI-1.0 kernels. @@ -196,6 +165,8 @@ void ClearCounters() g_HashGridCache_UpdateTileCountBuffer[0] = 0; g_HashGridCache_VisibilityCountBuffer[0] = 0; g_HashGridCache_VisibilityRayCountBuffer[0] = 0; + + g_GlossyReflections_RtSampleCountBuffer[0] = 0; } [numthreads(1, 1, 1)] @@ -230,6 +201,23 @@ void GenerateDispatch() g_DispatchCommandBuffer[0] = dispatch_command; } +[numthreads(1, 1, 1)] +void GenerateDispatchRays() +{ + DispatchRaysCommand dispatch_rays_command; + dispatch_rays_command.ray_generation_shader_record = g_GI10Constants.ray_generation_shader_record; + dispatch_rays_command.miss_shader_table = g_GI10Constants.miss_shader_table; + dispatch_rays_command.hit_group_table = g_GI10Constants.hit_group_table; + dispatch_rays_command.callable_shader_table = g_GI10Constants.callable_shader_table; + dispatch_rays_command.width = g_CountBuffer[0]; + dispatch_rays_command.height = 1; + dispatch_rays_command.depth = 1; + dispatch_rays_command.padding[0] = 0; + dispatch_rays_command.padding[1] = 0; + dispatch_rays_command.padding[2] = 0; + g_DispatchRaysCommandBuffer[0] = dispatch_rays_command; +} + //! //! Screen probes kernels. //! @@ -288,7 +276,7 @@ void InitCachedTileLRU(in uint did : SV_DispatchThreadID) void ReprojectScreenProbes(in uint2 did : SV_DispatchThreadID, in uint2 group_id : SV_GroupID, in uint2 local_id : SV_GroupThreadID, in uint local_index : SV_GroupIndex) { float depth = g_DepthBuffer.Load(int3(did, 0)).x; - float3 normal = (all(did < g_BufferDimensions) ? g_NormalBuffer.Load(int3(did, 0)).xyz : float3(0.0f, 0.0f, 0.0f)); + float3 normal = (all(did < g_BufferDimensions) ? g_GeometryNormalBuffer.Load(int3(did, 0)).xyz : float3(0.0f, 0.0f, 0.0f)); bool is_sky_pixel = (dot(normal, normal) == 0.0f ? true : false); normal = normalize(2.0f * normal - 1.0f); // decode normal @@ -362,7 +350,7 @@ void ReprojectScreenProbes(in uint2 did : SV_DispatchThreadID, in uint2 group_id if (local_lane != 0xFFFFu) { depth = g_DepthBuffer.Load(int3(seed, 0)).x; - normal = normalize(2.0f * g_NormalBuffer.Load(int3(seed, 0)).xyz - 1.0f); + normal = normalize(2.0f * g_GeometryNormalBuffer.Load(int3(seed, 0)).xyz - 1.0f); world_pos = InverseProject(g_ViewProjectionInverse, uv, depth); float2 probe_uv = (ScreenProbes_UnpackSeed(probe_mask) + 0.5f) / g_BufferDimensions; @@ -371,7 +359,7 @@ void ReprojectScreenProbes(in uint2 did : SV_DispatchThreadID, in uint2 group_id float3 probe_normal = normalize(2.0f * g_PreviousNormalBuffer.Load(int3(ScreenProbes_UnpackSeed(probe_mask), 0)).xyz - 1.0f); float4 probe_radiance = g_ScreenProbes_PreviousProbeBuffer[((ScreenProbes_UnpackSeed(probe_mask) / g_ScreenProbesConstants.probe_size) * g_ScreenProbesConstants.probe_size) + cell]; - float3 probe_direction = hemioct_to_float32x3(2.0f * (cell + 0.5f) / g_ScreenProbesConstants.probe_size - 1.0f); + float3 probe_direction = mapToHemiOctahedronInverse((cell + 0.5f) / g_ScreenProbesConstants.probe_size); float3 b1, b2; GetOrthoVectors(probe_normal, b1, b2); @@ -385,7 +373,7 @@ void ReprojectScreenProbes(in uint2 did : SV_DispatchThreadID, in uint2 group_id if (dot(normal, reprojected_dir) > 0.0f) { - float2 remap_uv = 0.5f * float32x3_to_hemioct(mul(reprojected_dir, CreateTBN(normal))) + 0.5f; + float2 remap_uv = mapToHemiOctahedron(mul(reprojected_dir, CreateTBN(normal))); uint2 remap_cell = uint2(remap_uv * g_ScreenProbesConstants.probe_size); uint remap_cell_index = remap_cell.x + remap_cell.y * g_ScreenProbesConstants.probe_size; uint4 remap_radiance = ScreenProbes_QuantizeRadiance(float4(probe_radiance.xyz, reprojected_len)); @@ -419,7 +407,7 @@ void ReprojectScreenProbes(in uint2 did : SV_DispatchThreadID, in uint2 group_id float3 radiance = total_radiance.xyz / max(total_radiance.w, 1.0f); float empty_cell_count = (g_ScreenProbesConstants.probe_size * g_ScreenProbesConstants.probe_size - total_radiance.w); - lds_ScreenProbes_RadianceBackup[0] = float4(radiance / max(empty_cell_count, 1.0f), 1e9f); + lds_ScreenProbes_RadianceBackup[0] = float4(radiance / max(empty_cell_count, 1.0f), MAX_HIT_DISTANCE); } GroupMemoryBarrierWithGroupSync(); @@ -511,12 +499,10 @@ void CountScreenProbes(in uint did : SV_DispatchThreadID) uint2 cached_probe = uint2(cached_tile_index % dims.x, cached_tile_index / dims.x); float3 world_pos = g_ScreenProbes_ProbeCachedTileIndexBuffer[cached_probe].xyz; - float4 homogeneous = mul(g_ViewProjection, float4(world_pos, 1.0f)); - - homogeneous.xyz /= homogeneous.w; // perspective divide - homogeneous.xy = 0.5f * float2(homogeneous.x, -homogeneous.y) + 0.5f; + float3 homogeneous = transformPointProjection(world_pos, g_ViewProjection); + homogeneous.xy = 0.5f * float2(homogeneous.x, -homogeneous.y) + 0.5f; - if (all(homogeneous.xyz > 0.0f) && all(homogeneous.xyz < 1.0f)) + if (all(homogeneous > 0.0f) && all(homogeneous < 1.0f)) { uint2 probe_count = (g_BufferDimensions + g_ScreenProbesConstants.probe_size - 1) / g_ScreenProbesConstants.probe_size; uint2 probe = uint2(homogeneous.xy * probe_count); @@ -564,7 +550,7 @@ void SpawnScreenProbes(in uint did : SV_DispatchThreadID) uint2 jitter = min(CalculateHaltonSequence(g_FrameIndex) * g_ScreenProbesConstants.probe_spawn_tile_size, g_ScreenProbesConstants.probe_spawn_tile_size - 1.0f); uint2 seed = min(probe * g_ScreenProbesConstants.probe_spawn_tile_size + jitter, g_BufferDimensions - 1); - float3 normal = g_NormalBuffer.Load(int3(seed, 0)).xyz; + float3 normal = g_GeometryNormalBuffer.Load(int3(seed, 0)).xyz; bool is_sky_pixel = (dot(normal, normal) == 0.0f ? true : false); if (!is_sky_pixel) @@ -626,7 +612,7 @@ void PatchScreenProbes(in uint did : SV_DispatchThreadID) uint2 jitter = min(CalculateHaltonSequence(g_FrameIndex) * g_ScreenProbesConstants.probe_size, g_ScreenProbesConstants.probe_size - 1.0f); uint2 seed = min(probe * g_ScreenProbesConstants.probe_size + jitter, g_BufferDimensions - 1); - float3 normal = g_NormalBuffer.Load(int3(seed, 0)).xyz; + float3 normal = g_GeometryNormalBuffer.Load(int3(seed, 0)).xyz; bool is_sky_pixel = (dot(normal, normal) == 0.0f ? true : false); if (is_sky_pixel) @@ -659,7 +645,7 @@ void SampleScreenProbes(in uint did : SV_DispatchThreadID, in uint local_id : SV float2 uv = (seed + 0.5f) / g_BufferDimensions; float depth = g_DepthBuffer.Load(int3(seed, 0)).x; - float3 normal = normalize(2.0f * g_NormalBuffer.Load(int3(seed, 0)).xyz - 1.0f); + float3 normal = normalize(2.0f * g_GeometryNormalBuffer.Load(int3(seed, 0)).xyz - 1.0f); float3 world_pos = InverseProject(g_ViewProjectionInverse, uv, depth); uint previous_probe_mask = kGI10_InvalidId; @@ -717,9 +703,9 @@ void SampleScreenProbes(in uint did : SV_DispatchThreadID, in uint local_id : SV continue; // prevent leaks from faraway probes } - float3 probe_normal = normalize(2.0f * g_NormalBuffer.Load(int3(ScreenProbes_UnpackSeed(probe_mask), 0)).xyz - 1.0f); + float3 probe_normal = normalize(2.0f * g_GeometryNormalBuffer.Load(int3(ScreenProbes_UnpackSeed(probe_mask), 0)).xyz - 1.0f); float4 probe_radiance = g_ScreenProbes_ProbeBuffer[((ScreenProbes_UnpackSeed(probe_mask) / g_ScreenProbesConstants.probe_size) * g_ScreenProbesConstants.probe_size) + cell]; - float3 probe_direction = hemioct_to_float32x3(2.0f * (cell + 0.5f) / g_ScreenProbesConstants.probe_size - 1.0f); + float3 probe_direction = mapToHemiOctahedronInverse((cell + 0.5f) / g_ScreenProbesConstants.probe_size); float3 b1, b2; GetOrthoVectors(probe_normal, b1, b2); @@ -741,7 +727,7 @@ void SampleScreenProbes(in uint did : SV_DispatchThreadID, in uint local_id : SV InterlockedAdd(lds_ScreenProbes_RadianceReuseSampleCount, 1); } - float2 remap_uv = 0.5f * float32x3_to_hemioct(mul(reprojected_dir, CreateTBN(normal))) + 0.5f; + float2 remap_uv = mapToHemiOctahedron(mul(reprojected_dir, CreateTBN(normal))); uint2 remap_cell = uint2(remap_uv * g_ScreenProbesConstants.probe_size); uint remap_cell_index = remap_cell.x + remap_cell.y * g_ScreenProbesConstants.probe_size; uint4 remap_radiance = ScreenProbes_QuantizeRadiance(float4(probe_radiance.xyz, reprojected_len)); @@ -765,7 +751,7 @@ void SampleScreenProbes(in uint did : SV_DispatchThreadID, in uint local_id : SV { float2 previous_probe_uv = (ScreenProbes_UnpackSeed(previous_probe_mask) + 0.5f) / g_BufferDimensions; float previous_probe_depth = g_DepthBuffer.Load(int3(ScreenProbes_UnpackSeed(previous_probe_mask), 0)).x; - float3 previous_probe_normal = normalize(2.0f * g_NormalBuffer.Load(int3(ScreenProbes_UnpackSeed(previous_probe_mask), 0)).xyz - 1.0f); + float3 previous_probe_normal = normalize(2.0f * g_GeometryNormalBuffer.Load(int3(ScreenProbes_UnpackSeed(previous_probe_mask), 0)).xyz - 1.0f); float3 previous_probe_pos = InverseProject(g_ViewProjectionInverse, previous_probe_uv, previous_probe_depth); uint evicted_probe_mask = kGI10_InvalidId; @@ -813,7 +799,7 @@ void SampleScreenProbes(in uint did : SV_DispatchThreadID, in uint local_id : SV } float4 cached_probe_radiance = g_ScreenProbes_ProbeCachedTileBuffer[cached_pos]; - float3 cached_probe_direction = hemioct_to_float32x3(2.0f * (cell + 0.5f) / g_ScreenProbesConstants.probe_size - 1.0f); + float3 cached_probe_direction = mapToHemiOctahedronInverse((cell + 0.5f) / g_ScreenProbesConstants.probe_size); float3 b1, b2; GetOrthoVectors(cached_probe_normal, b1, b2); @@ -827,7 +813,7 @@ void SampleScreenProbes(in uint did : SV_DispatchThreadID, in uint local_id : SV if (dot(normal, reprojected_dir) > 0.0f) { - float2 remap_uv = 0.5f * float32x3_to_hemioct(mul(reprojected_dir, CreateTBN(normal))) + 0.5f; + float2 remap_uv = mapToHemiOctahedron(mul(reprojected_dir, CreateTBN(normal))); uint2 remap_cell = uint2(remap_uv * g_ScreenProbesConstants.probe_size); uint remap_cell_index = remap_cell.x + remap_cell.y * g_ScreenProbesConstants.probe_size; uint4 remap_radiance = ScreenProbes_QuantizeRadiance(float4(cached_probe_radiance.xyz, reprojected_len)); @@ -910,7 +896,7 @@ void SampleScreenProbes(in uint did : SV_DispatchThreadID, in uint local_id : SV previous_radiance.w = -1.0f; } - float3 direction = hemioct_to_float32x3(2.0f * (cell + 0.5f) / g_ScreenProbesConstants.probe_size - 1.0f); + float3 direction = mapToHemiOctahedronInverse((cell + 0.5f) / g_ScreenProbesConstants.probe_size); float radiance = luminance(previous_radiance.xyz) * dot(direction, float3(0.0f, 0.0f, 1.0f)); ScreenProbes_ScanRadiance(local_id, radiance); // build our sampling CDF @@ -920,23 +906,195 @@ void SampleScreenProbes(in uint did : SV_DispatchThreadID, in uint local_id : SV return; // out of bounds } - StratifiedSampler strat_sampler = MakeStratifiedSampler(did, g_FrameIndex); + { + // get material + float4 visibility = g_VisibilityBuffer.Load(int3(seed, 0)); + float2 barycentrics = visibility.xy; + uint instanceID = asuint(visibility.z); + uint primitiveID = asuint(visibility.w); - uint sampled_cell_index = ScreenProbes_FindCellIndex(local_id, strat_sampler.rand()); - uint2 sampled_cell = (total_weight > 0.0f ? uint2(sampled_cell_index % g_ScreenProbesConstants.probe_size, sampled_cell_index / g_ScreenProbesConstants.probe_size) : cell); + Instance instance = g_InstanceBuffer[instanceID]; + Mesh mesh = g_MeshBuffer[instance.mesh_index]; + float3x4 transform = g_TransformBuffer[instance.transform_index]; - direction = hemioct_to_float32x3(2.0f * (sampled_cell + strat_sampler.rand2()) / g_ScreenProbesConstants.probe_size - 1.0f); + TriangleNormUV vertices = fetchVerticesNormUV(mesh, primitiveID); + float2 mesh_uv = interpolate(vertices.uv0, vertices.uv1, vertices.uv2, barycentrics); - float3 b1, b2; - GetOrthoVectors(normal, b1, b2); - direction = direction.x * b1 + direction.y * b2 + direction.z * normal; + float3 view_direction = normalize(g_Eye - world_pos); + float3 shading_normal = normalize(2.0f * g_ShadingNormalBuffer.Load(int3(seed, 0)).xyz - 1.0f); + + Material material = g_MaterialBuffer[instance.material_index]; + MaterialBRDF materialBRDF = MakeMaterialBRDF(material, mesh_uv); + + // Transform the view direction into the surfaces tangent coordinate space (oriented so that z axis is aligned to normal) + Quaternion localRotation = QuaternionRotationZ(normal); + float3 localView = localRotation.transform(view_direction); + + Random randomNG = MakeRandom(did, g_FrameIndex); + float2 samples = randomNG.rand2(); + +#ifndef DISABLE_SPECULAR_MATERIALS + // calculate diffuse, specular layer probability + float3 specularLightDirection = calculateGGXSpecularDirection(shading_normal, view_direction, sqrt(materialBRDF.roughnessAlpha)); + float3 specularHalfVector = normalize(view_direction + specularLightDirection); + float specularDotHV = saturate(dot(specularHalfVector, view_direction)); + float probabilityBRDF = calculateBRDFProbability(materialBRDF.F0, specularDotHV, materialBRDF.albedo); + + // specular layer case + if (randomNG.rand() < probabilityBRDF) + { + // sample direction by BRDF importance sampling + float3 newLight = sampleGGX(materialBRDF.roughnessAlpha, localView, samples); + direction = normalize(localRotation.inverse().transform(newLight)); + } + // diffuse layer case + else +#endif + { + // sample direction by ray guiding + uint sampled_cell_index = ScreenProbes_FindCellIndex(local_id, randomNG.rand()); + uint2 sampled_cell = (total_weight > 0.0f ? uint2(sampled_cell_index % g_ScreenProbesConstants.probe_size, sampled_cell_index / g_ScreenProbesConstants.probe_size) : cell); + direction = mapToHemiOctahedronInverse((sampled_cell + randomNG.rand2()) / g_ScreenProbesConstants.probe_size); + + float3 b1, b2; + GetOrthoVectors(normal, b1, b2); + direction = direction.x * b1 + direction.y * b2 + direction.z * normal; + } + } g_ScreenProbes_PreviousProbeBuffer[pos] = previous_radiance; g_ScreenProbes_ProbeSpawnSampleBuffer[did] = ScreenProbes_PackSample(direction); } -[numthreads(32, 1, 1)] -void PopulateScreenProbes(in uint did : SV_DispatchThreadID) +void PopulateScreenProbesHandleHit(uint did, inout PopulateScreenProbesPayload payload, RayDesc ray, HitInfo hit_info) +{ + HashGridCache_Data data; + data.eye_position = g_Eye; + data.hit_position = ray.Origin + payload.hit_dist * ray.Direction; + data.direction = ray.Direction; + data.hit_distance = payload.hit_dist; + + uint tile_index; + bool is_new_tile; + uint cell_index = HashGridCache_InsertCell(data, tile_index, is_new_tile); + + if (cell_index != kGI10_InvalidId) + { + // Bump the cell's decay to the max. now that it's been 'touched' + uint previous_tile_decay; + InterlockedExchange(g_HashGridCache_DecayTileBuffer[tile_index], g_FrameIndex, previous_tile_decay); + + HashGridCache_Visibility visibility; + visibility.is_front_face = hit_info.frontFace; + visibility.instance_index = hit_info.instanceIndex; + visibility.geometry_index = hit_info.geometryIndex; + visibility.primitive_index = hit_info.primitiveIndex; + visibility.barycentrics = hit_info.barycentrics; + + // We update the cell index for later passes + uint visibility_index; + InterlockedAdd(g_HashGridCache_VisibilityCountBuffer[0], 1, visibility_index); + g_HashGridCache_VisibilityBuffer[visibility_index] = HashGridCache_PackVisibility(visibility); + g_HashGridCache_VisibilityCellBuffer[visibility_index] = cell_index; + g_HashGridCache_VisibilityQueryBuffer[visibility_index] = did; + + // Write out bounds of visibility + requestLightSampleLocation(data.hit_position); + + // If this cell is inside a new tile, we need to add the tile to the packed storage and clear its cells. + if (is_new_tile) + { + uint packed_tile_index; + InterlockedAdd(g_HashGridCache_PackedTileCountBuffer[0], 1, packed_tile_index); + g_HashGridCache_PackedTileIndexBuffer[packed_tile_index] = tile_index; + + // Clear mip0 cells (others will be reset anyways by UpdateTiles) + for (int cell_offset = 0; cell_offset < g_HashGridCacheConstants.num_cells_per_tile_mip0; ++cell_offset) + { + uint cell_index = HashGridCache_CellIndex(cell_offset, tile_index); + g_HashGridCache_ValueBuffer[cell_index] = uint2(0, 0); + } + } + + // If we're the 1st invocation touching this cell (this frame), we want to clear the + // scratch storage that'll be used for atomically updating the radiance. + // The accumulation will be resolved in the 'UpdateTiles()' kernel to + // avoid integer overflow. + if (is_new_tile || previous_tile_decay != g_FrameIndex) + { + uint update_tile_index; + InterlockedAdd(g_HashGridCache_UpdateTileCountBuffer[0], 1, update_tile_index); + g_HashGridCache_UpdateTileBuffer[update_tile_index] = tile_index; + } + +#ifdef DEBUG_HASH_CELLS + // For debugging purposes, we need to be able to retrieve the position + // & orientation of cells as we iterate the content of the cache. + // So, write the packed cell descriptor out to memory in this case. + if (is_new_tile) + { + // Clear debug cells (all mips) + for (int cell_offset = 0; cell_offset < g_HashGridCacheConstants.num_cells_per_tile; ++cell_offset) + { + uint cell_index = HashGridCache_CellIndex(cell_offset, tile_index); + g_HashGridCache_DebugCellBuffer[cell_index] = HashGridCache_ClearDebugCell(); + } + } + + float4 packed_debug_cell; + uint debug_cell_index = HashGridCache_PackDebugCell(data, tile_index, packed_debug_cell); + + // BE CAREFUL: writing to g_HashGridCache_DebugCellBuffer isn't atomic and several writings could occur + uint previous_cell_decay; + InterlockedExchange(g_HashGridCache_DecayCellBuffer[debug_cell_index], g_FrameIndex, previous_cell_decay); + if (previous_cell_decay != g_FrameIndex) + { + g_HashGridCache_DebugCellBuffer[debug_cell_index] = packed_debug_cell; + } +#endif // DEBUG_HASH_CELLS + } +} + +void PopulateScreenProbesHandleMiss(inout PopulateScreenProbesPayload payload, RayDesc ray) +{ + if (g_UseDirectLighting != 0) + { + payload.sky_sample = g_EnvironmentBuffer.SampleLevel(g_TextureSampler, ray.Direction, 0.0f).xyz; + } +} + +void PopulateScreenProbesTraceRayInline(uint did, inout PopulateScreenProbesPayload payload, RayDesc ray) +{ + ClosestRayQuery ray_query = TraceRay(ray); + + // If we hit some geometry, we append a new world-space hash-grid cache query + if (ray_query.CommittedStatus() == COMMITTED_NOTHING) + { + payload.hit_dist = ray_query.CommittedRayT(); + PopulateScreenProbesHandleMiss(payload, ray); + } + else + { + payload.hit_dist = ray_query.CommittedRayT(); + PopulateScreenProbesHandleHit(did, payload, ray, GetHitInfoRtInlineCommitted(ray_query)); + } +} + +void PopulateScreenProbesTraceRayRt(uint did, inout PopulateScreenProbesPayload payload, RayDesc ray) +{ + TraceRay(g_Scene, RAY_FLAG_NONE, 0xFFu, 0, 0, 0, ray, payload); +} + +void PopulateScreenProbesTraceRay(uint did, inout PopulateScreenProbesPayload payload, RayDesc ray) +{ +#if USE_INLINE_RT + return PopulateScreenProbesTraceRayInline(did, payload, ray); +#else + return PopulateScreenProbesTraceRayRt(did, payload, ray); +#endif +} + +void PopulateScreenProbes(uint did) { uint max_probe_spawn_count = (g_BufferDimensions.x + g_ScreenProbesConstants.probe_spawn_tile_size - 1) / g_ScreenProbesConstants.probe_spawn_tile_size * (g_BufferDimensions.y + g_ScreenProbesConstants.probe_spawn_tile_size - 1) / g_ScreenProbesConstants.probe_spawn_tile_size; @@ -953,7 +1111,7 @@ void PopulateScreenProbes(in uint did : SV_DispatchThreadID) // Read the visibility buffer and decode uint2 seed = ScreenProbes_UnpackSeed(g_ScreenProbes_ProbeSpawnBuffer[probe_index]); - float3 normal = g_NormalBuffer.Load(int3(seed, 0)).xyz; + float3 normal = g_GeometryNormalBuffer.Load(int3(seed, 0)).xyz; bool is_sky_pixel = (dot(normal, normal) == 0.0f ? true : false); if (is_sky_pixel) @@ -971,13 +1129,12 @@ void PopulateScreenProbes(in uint did : SV_DispatchThreadID) Instance instance = g_InstanceBuffer[instanceID]; Mesh mesh = g_MeshBuffer[instance.mesh_index]; - float4x4 transform = FetchTransform(instance.transform_index); + float3x4 transform = g_TransformBuffer[instance.transform_index]; - uint3 indices = g_IndexBuffers[0].Load3(mesh.index_offset + 3 * primitiveID * mesh.index_stride); - - float3 v0 = mul(transform, float4(asfloat(g_VertexBuffers[0].Load3(mesh.vertex_offset + indices.x * mesh.vertex_stride)), 1.0f)).xyz; - float3 v1 = mul(transform, float4(asfloat(g_VertexBuffers[0].Load3(mesh.vertex_offset + indices.y * mesh.vertex_stride)), 1.0f)).xyz; - float3 v2 = mul(transform, float4(asfloat(g_VertexBuffers[0].Load3(mesh.vertex_offset + indices.z * mesh.vertex_stride)), 1.0f)).xyz; + Triangle vertices = fetchVertices(mesh, primitiveID); + float3 v0 = transformPoint(vertices.v0, transform); + float3 v1 = transformPoint(vertices.v1, transform); + float3 v2 = transformPoint(vertices.v2, transform); float3 world = interpolate(v0, v1, v2, barycentrics); float3 origin = offsetPosition(world, normal); @@ -986,105 +1143,20 @@ void PopulateScreenProbes(in uint did : SV_DispatchThreadID) ray_desc.Direction = direction; ray_desc.Origin = origin; ray_desc.TMin = 0.0f; - ray_desc.TMax = 1e9f; - - ClosestRayQuery ray_query = TraceRay(ray_desc); - - float3 sky_sample = float3(0.0f, 0.0f, 0.0f); - - // If we hit some geometry, we append a new world-space hash-grid cache query - if (ray_query.CommittedStatus() == COMMITTED_NOTHING) - { - if (g_UseDirectLighting != 0) - { - sky_sample = g_EnvironmentBuffer.SampleLevel(g_TextureSampler, direction, 0.0f).xyz; - } - } - else - { - float hit_dist = ray_query.CommittedRayT(); - - HashGridCache_Data data; - data.eye_position = g_Eye; - data.hit_position = world + hit_dist * direction; - data.direction = direction; - data.hit_distance = hit_dist; - - uint tile_index; - bool is_new_tile; - uint cell_index = HashGridCache_InsertCell(data, tile_index, is_new_tile); - - if (cell_index != kGI10_InvalidId) - { - // Bump the cell's decay to the max. now that it's been 'touched' - uint previous_tile_decay; - InterlockedExchange(g_HashGridCache_DecayTileBuffer[tile_index], g_FrameIndex, previous_tile_decay); - - // We update the cell index for later passes - uint visibility_index; - InterlockedAdd(g_HashGridCache_VisibilityCountBuffer[0], 1, visibility_index); - g_HashGridCache_VisibilityBuffer[visibility_index] = HashGridCache_PackVisibility(ray_query); - g_HashGridCache_VisibilityCellBuffer[visibility_index] = cell_index; - g_HashGridCache_VisibilityQueryBuffer[visibility_index] = did; - - // Write out bounds of visibility - LightBounds_StorePosition(data.hit_position); - - // If this cell is inside a new tile, we need to add the tile to the packed storage and clear its cells. - if (is_new_tile) - { - uint packed_tile_index; - InterlockedAdd(g_HashGridCache_PackedTileCountBuffer[0], 1, packed_tile_index); - g_HashGridCache_PackedTileIndexBuffer[packed_tile_index] = tile_index; - - // Clear mip0 cells (others will be reset anyways by UpdateTiles) - for (int cell_offset = 0; cell_offset < g_HashGridCacheConstants.num_cells_per_tile_mip0; ++cell_offset) - { - uint cell_index = HashGridCache_CellIndex(cell_offset, tile_index); - g_HashGridCache_ValueBuffer[cell_index] = uint2(0, 0); - } - } - - // If we're the 1st invocation touching this cell (this frame), we want to clear the - // scratch storage that'll be used for atomically updating the radiance. - // The accumulation will be resolved in the 'UpdateTiles()' kernel to - // avoid integer overflow. - if (is_new_tile || previous_tile_decay != g_FrameIndex) - { - uint update_tile_index; - InterlockedAdd(g_HashGridCache_UpdateTileCountBuffer[0], 1, update_tile_index); - g_HashGridCache_UpdateTileBuffer[update_tile_index] = tile_index; - } - -#ifdef DEBUG_HASH_CELLS - // For debugging purposes, we need to be able to retrieve the position - // & orientation of cells as we iterate the content of the cache. - // So, write the packed cell descriptor out to memory in this case. - if (is_new_tile) - { - // Clear debug cells (all mips) - for (int cell_offset = 0; cell_offset < g_HashGridCacheConstants.num_cells_per_tile; ++cell_offset) - { - uint cell_index = HashGridCache_CellIndex(cell_offset, tile_index); - g_HashGridCache_DebugCellBuffer[cell_index] = HashGridCache_ClearDebugCell(); - } - } + ray_desc.TMax = MAX_HIT_DISTANCE; - float4 packed_debug_cell; - uint debug_cell_index = HashGridCache_PackDebugCell(data, tile_index, packed_debug_cell); + PopulateScreenProbesPayload payload; + payload.sky_sample = float3(0.0f, 0.0f, 0.0f); + payload.seed = seed; + PopulateScreenProbesTraceRay(did, payload, ray_desc); - // BE CAREFUL: writing to g_HashGridCache_DebugCellBuffer isn't atomic and several writings could occur - uint previous_cell_decay; - InterlockedExchange(g_HashGridCache_DecayCellBuffer[debug_cell_index], g_FrameIndex, previous_cell_decay); - if (previous_cell_decay != g_FrameIndex) - { - g_HashGridCache_DebugCellBuffer[debug_cell_index] = packed_debug_cell; - } -#endif // DEBUG_HASH_CELLS - } - } + g_ScreenProbes_ProbeSpawnRadianceBuffer[did] = ScreenProbes_PackRadiance(float4(payload.sky_sample, payload.hit_dist)); +} - g_ScreenProbes_ProbeSpawnRadianceBuffer[did] = ScreenProbes_PackRadiance(float4(sky_sample, ray_query.CommittedRayT())); +[numthreads(32, 1, 1)] +void PopulateScreenProbesMain(in uint did : SV_DispatchThreadID) +{ + PopulateScreenProbes(did); } [numthreads(64, 1, 1)] @@ -1117,12 +1189,12 @@ void BlendScreenProbes(in uint did : SV_DispatchThreadID, in uint local_id : SV_ // ... and accumulate into the sampled cells if (probe_index < probe_count) { - float3 normal = normalize(2.0f * g_NormalBuffer.Load(int3(seed, 0)).xyz - 1.0f); + float3 normal = normalize(2.0f * g_GeometryNormalBuffer.Load(int3(seed, 0)).xyz - 1.0f); float3 direction = ScreenProbes_UnpackSample(g_ScreenProbes_ProbeSpawnSampleBuffer[did]); float4 radiance = ScreenProbes_UnpackRadiance(g_ScreenProbes_ProbeSpawnRadianceBuffer[did]); uint4 quantized_radiance = ScreenProbes_QuantizeRadiance(radiance); - uint2 sampled_cell = uint2((0.5f * float32x3_to_hemioct(mul(direction, CreateTBN(normal))) + 0.5f) * g_ScreenProbesConstants.probe_size); + uint2 sampled_cell = uint2(mapToHemiOctahedron(mul(direction, CreateTBN(normal))) * g_ScreenProbesConstants.probe_size); uint sampled_cell_index = sampled_cell.x + sampled_cell.y * g_ScreenProbesConstants.probe_size; InterlockedAdd(lds_ScreenProbes_RadianceValues[(sampled_cell_index << 2) + 0], quantized_radiance.x); @@ -1153,7 +1225,7 @@ void BlendScreenProbes(in uint did : SV_DispatchThreadID, in uint local_id : SV_ float3 radiance = total_radiance.xyz / max(total_radiance.w, 1.0f); float empty_cell_count = (g_ScreenProbesConstants.probe_size * g_ScreenProbesConstants.probe_size - total_radiance.w); - lds_ScreenProbes_RadianceBackup[0] = float4(radiance / max(empty_cell_count, 1.0f), 1e9f); + lds_ScreenProbes_RadianceBackup[0] = float4(radiance / max(empty_cell_count, 1.0f), MAX_HIT_DISTANCE); } GroupMemoryBarrierWithGroupSync(); @@ -1223,7 +1295,7 @@ void BlendScreenProbes(in uint did : SV_DispatchThreadID, in uint local_id : SV_ float2 previous_uv = (ScreenProbes_UnpackSeed(previous_probe_mask) + 0.5f) / g_BufferDimensions; float previous_depth = g_DepthBuffer.Load(int3(ScreenProbes_UnpackSeed(previous_probe_mask), 0)).x; - float3 previous_normal = normalize(2.0f * g_NormalBuffer.Load(int3(ScreenProbes_UnpackSeed(previous_probe_mask), 0)).xyz - 1.0f); + float3 previous_normal = normalize(2.0f * g_GeometryNormalBuffer.Load(int3(ScreenProbes_UnpackSeed(previous_probe_mask), 0)).xyz - 1.0f); float3 previous_world = InverseProject(g_ViewProjectionInverse, previous_uv, previous_depth); g_ScreenProbes_ProbeCachedTileIndexBuffer[cached_probe] = float4(previous_world, asfloat(packNormal(previous_normal))); @@ -1301,12 +1373,12 @@ void FilterScreenProbes(in uint did : SV_DispatchThreadID) float2 uv = (seed + 0.5f) / g_BufferDimensions; float depth = g_DepthBuffer.Load(int3(seed, 0)).x; - float3 normal = normalize(2.0f * g_NormalBuffer.Load(int3(seed, 0)).xyz - 1.0f); + float3 normal = normalize(2.0f * g_GeometryNormalBuffer.Load(int3(seed, 0)).xyz - 1.0f); float3 world_pos = InverseProject(g_ViewProjectionInverse, uv, depth); float cell_size = distance(g_Eye, world_pos) * g_ScreenProbesConstants.cell_size; float4 radiance = g_ScreenProbes_PreviousProbeBuffer[pos]; - float3 direction = hemioct_to_float32x3(2.0f * (cell + 0.5f) / g_ScreenProbesConstants.probe_size - 1.0f); + float3 direction = mapToHemiOctahedronInverse((cell + 0.5f) / g_ScreenProbesConstants.probe_size); float hit_distance = radiance.w; float total_weight = 1.0f; @@ -1330,7 +1402,7 @@ void FilterScreenProbes(in uint did : SV_DispatchThreadID) uint2 probe_seed = ScreenProbes_UnpackSeed(probe_mask); float2 probe_uv = (probe_seed + 0.5f) / g_BufferDimensions; float probe_depth = g_DepthBuffer.Load(int3(probe_seed, 0)).x; - float3 probe_normal = normalize(2.0f * g_NormalBuffer.Load(int3(probe_seed, 0)).xyz - 1.0f); + float3 probe_normal = normalize(2.0f * g_GeometryNormalBuffer.Load(int3(probe_seed, 0)).xyz - 1.0f); float3 probe_world = InverseProject(g_ViewProjectionInverse, probe_uv, probe_depth); if (abs(dot(probe_world - world_pos, normal)) > cell_size || dot(direction, probe_normal) < 0.0f) @@ -1338,12 +1410,12 @@ void FilterScreenProbes(in uint did : SV_DispatchThreadID) continue; // oriented hemispheres do not overlap } - uint2 probe_cell = uint2((0.5f * float32x3_to_hemioct(mul(direction, CreateTBN(probe_normal))) + 0.5f) * g_ScreenProbesConstants.probe_size); + uint2 probe_cell = uint2(mapToHemiOctahedron(mul(direction, CreateTBN(probe_normal))) * g_ScreenProbesConstants.probe_size); uint2 probe_tile = (probe_seed / g_ScreenProbesConstants.probe_size); uint2 probe_pos = (probe_tile * g_ScreenProbesConstants.probe_size) + probe_cell; GetOrthoVectors(probe_normal, b1, b2); - float3 probe_direction = hemioct_to_float32x3(2.0f * (probe_cell + 0.5f) / g_ScreenProbesConstants.probe_size - 1.0f); + float3 probe_direction = mapToHemiOctahedronInverse((probe_cell + 0.5f) / g_ScreenProbesConstants.probe_size); probe_direction = normalize(probe_direction.x * b1 + probe_direction.y * b2 + probe_direction.z * probe_normal); float probe_hit_distance = min(g_ScreenProbes_PreviousProbeBuffer[probe_pos].w, hit_distance); @@ -1386,9 +1458,9 @@ void ProjectScreenProbes(in uint did : SV_DispatchThreadID, in uint local_id : S // Compute our SH probe if (probe_index < probe_count) { - float3 normal = normalize(2.0f * g_NormalBuffer.Load(int3(seed, 0)).xyz - 1.0f); + float3 normal = normalize(2.0f * g_GeometryNormalBuffer.Load(int3(seed, 0)).xyz - 1.0f); float3 radiance = g_ScreenProbes_ProbeBuffer[probe_pos].xyz / g_ScreenProbesConstants.probe_size; - float3 direction = hemioct_to_float32x3(2.0f * (cell + 0.5f) / g_ScreenProbesConstants.probe_size - 1.0f); + float3 direction = mapToHemiOctahedronInverse((cell + 0.5f) / g_ScreenProbesConstants.probe_size); float3 b1, b2; GetOrthoVectors(normal, b1, b2); @@ -1439,13 +1511,17 @@ void ProjectScreenProbes(in uint did : SV_DispatchThreadID, in uint local_id : S void InterpolateScreenProbes(in uint2 did : SV_DispatchThreadID) { float depth = g_DepthBuffer.Load(int3(did, 0)).x; - float3 normal = g_NormalBuffer.Load(int3(did, 0)).xyz; + float3 normal = g_GeometryNormalBuffer.Load(int3(did, 0)).xyz; bool is_sky_pixel = (dot(normal, normal) == 0.0f ? true : false); if (is_sky_pixel || any(did >= g_BufferDimensions)) { g_GIDenoiser_ColorBuffer[did] = float4(0.0f, 0.0f, 0.0f, 1.0f); +#ifndef DISABLE_SPECULAR_MATERIALS + g_ReflectionBuffer[did] = float4(0.0f, 0.0f, 0.0f, 1.0f); +#endif // DISABLE_SPECULAR_MATERIALS + return; // discard sky pixels } @@ -1456,7 +1532,8 @@ void InterpolateScreenProbes(in uint2 did : SV_DispatchThreadID) float3 world_pos = InverseProject(g_ViewProjectionInverse, uv, depth); float cell_size = distance(g_Eye, world_pos) * g_ScreenProbesConstants.cell_size; - float2 s = BlueNoise_Sample2D(did, g_FrameIndex); + // do not use the same seed value with stochastic alpha testing + float2 s = BlueNoise_Sample2D(did, g_FrameIndex, 1); int2 jitter = (2.0f * s - 1.0f) * g_ScreenProbesConstants.probe_spawn_tile_size; uint2 new_pos = clamp(int2(did) + jitter, 0, int2(g_BufferDimensions) - 1); float2 new_uv = (new_pos + 0.5f) / g_BufferDimensions; @@ -1476,6 +1553,10 @@ void InterpolateScreenProbes(in uint2 did : SV_DispatchThreadID) { g_GIDenoiser_ColorBuffer[did] = float4(0.0f, 0.0f, 0.0f, 1.0f); +#ifndef DISABLE_SPECULAR_MATERIALS + g_ReflectionBuffer[did] = float4(0.0f, 0.0f, 0.0f, 1.0f); +#endif // DISABLE_SPECULAR_MATERIALS + return; // couldn't find any nearby probe... } @@ -1507,7 +1588,7 @@ void InterpolateScreenProbes(in uint2 did : SV_DispatchThreadID) else { w[i] = saturate(1.0f - abs(GetLinearDepth(probe_depth) - GetLinearDepth(depth)) / GetLinearDepth(depth)); - w[i] *= max(dot(normal, normalize(2.0f * g_NormalBuffer.Load(int3(probe_seed, 0)).xyz - 1.0f)), 0.0f); + w[i] *= max(dot(normal, normalize(2.0f * g_GeometryNormalBuffer.Load(int3(probe_seed, 0)).xyz - 1.0f)), 0.0f); w[i] = pow(w[i], 8.0f); // make it steep } } @@ -1529,7 +1610,7 @@ void InterpolateScreenProbes(in uint2 did : SV_DispatchThreadID) #ifndef HAS_OCCLUSION float ao = 1.0f; float3 irradiance = float3(0.0f, 0.0f, 0.0f); - normal = normalize(2.0f * g_DetailsBuffer.Load(int3(did, 0)).xyz - 1.0f); + normal = normalize(2.0f * g_ShadingNormalBuffer.Load(int3(did, 0)).xyz - 1.0f); #else // HAS_OCCLUSION float4 ao_and_bent_normal = g_OcclusionAndBentNormalBuffer.Load(int3(did, 0)); @@ -1548,6 +1629,73 @@ void InterpolateScreenProbes(in uint2 did : SV_DispatchThreadID) float denoiser_hint = (use_backup ? 0.0f : 1.0f); // hint to the denoiser that we should ideally not keep this sample... g_GIDenoiser_ColorBuffer[did] = float4(irradiance, denoiser_hint); + + // Perform "specular interpolation" for glossy reflectors +#ifndef DISABLE_SPECULAR_MATERIALS + { + float roughness = g_RoughnessBuffer.Load(int3(did, 0)).x; + + if (roughness <= g_GlossyReflectionsConstants.low_roughness_threshold) + { + if (GlossyReflections_QueueSample(did)) + { + uint rt_sample_index; + InterlockedAdd(g_GlossyReflections_RtSampleCountBuffer[0], 1, rt_sample_index); + + g_GlossyReflections_RtSampleBuffer[rt_sample_index] = GlossyReflections_PackSample(did); + } + + return; // raytrace low roughness pixels at half resolution + } + else if (roughness > g_GlossyReflectionsConstants.high_roughness_threshold) + { + g_ReflectionBuffer[did] = float4(irradiance / PI, denoiser_hint); + + return; // fall back to diffuse on high roughness surfaces + } + + float3 radiance = float3(0.0f, 0.0f, 0.0f); + float3 view_direction = normalize(g_Eye - world_pos); + float3 detail_normal = normalize(2.0f * g_ShadingNormalBuffer.Load(int3(did, 0)).xyz - 1.0f); + + // Sample specular direction + Quaternion localRotation = QuaternionRotationZ(detail_normal); + float3 localView = localRotation.transform(view_direction); + float roughnessAlpha = max(0.000001f, squared(roughness)); + float3 newLight = sampleGGX(roughnessAlpha, localView, s); + float3 direction = normalize(localRotation.inverse().transform(newLight)); + + for (uint k = 0; k < 4; ++k) + { + uint2 probe_seed = ScreenProbes_UnpackSeed(probes[k]); + float3 probe_normal = normalize(2.0f * g_GeometryNormalBuffer.Load(int3(probe_seed, 0)).xyz - 1.0f); + + // Ignore directions in -Z hemisphere + if (dot(direction, probe_normal) < 0.0f) + continue; + float3 probe_direction = mul(direction, CreateTBN(probe_normal)); + + float2 probe_uv = mapToHemiOctahedron(probe_direction); + uint2 probe_tile = (probe_seed / g_ScreenProbesConstants.probe_size); + uint2 probe_pos = (probe_tile * g_ScreenProbesConstants.probe_size) + uint2(probe_uv * g_ScreenProbesConstants.probe_size); + + radiance += w[k] * g_ScreenProbes_ProbeBuffer[probe_pos].xyz; + } + + radiance.xyz /= (1.0f + radiance.xyz); + + if (g_GlossyReflectionsConstants.half_res && GlossyReflections_QueueSample(did)) + { + g_GlossyReflections_SpecularBuffer[did >> 1] = float4(radiance, denoiser_hint); + g_GlossyReflections_DirectionBuffer[did >> 1] = float4(direction, 0.f); // BE CAREFUL: no parallax correction + } + else + { + g_GlossyReflections_SpecularBuffer[did] = float4(radiance, denoiser_hint); + g_GlossyReflections_DirectionBuffer[did] = float4(direction, 0.f); // BE CAREFUL: no parallax correction + } + } +#endif // DISABLE_SPECULAR_MATERIALS } //! @@ -1599,8 +1747,67 @@ void PurgeTiles(in uint did : SV_DispatchThreadID) g_HashGridCache_PackedTileIndexBuffer[packed_tile_index] = tile_index; } -[numthreads(32, 1, 1)] -void PopulateCells(in uint did : SV_DispatchThreadID) +void PopulateCellsHandleHit(uint did, inout PopulateCellsPayload payload, RayDesc ray) +{ + payload.lighting = float3(0.0f, 0.0f, 0.0f); + + Reservoir reservoir = payload.reservoir; + // Update our reservoir cache for next frame's temporal resampling +#ifdef USE_RESAMPLING + reservoir.W = 0.0f; // invalidate the reservoir + + g_Reservoir_IndirectSampleReservoirBuffer[did] = packReservoir(reservoir); +#endif // USE_RESAMPLING +} + +void PopulateCellsHandleMiss(uint did, inout PopulateCellsPayload payload, RayDesc ray) +{ + float3 light_radiance = payload.lighting; + float light_weight = payload.reservoir.W; + uint probe_index = ScreenProbes_GetCellAndProbeIndex(payload.query_index).y; + uint2 seed = ScreenProbes_UnpackSeed(g_ScreenProbes_ProbeSpawnBuffer[probe_index]); + MaterialBRDF material = unpackMaterial(g_Reservoir_IndirectSampleMaterialBuffer[did]); + + // Recover the ray origin + float depth = g_DepthBuffer.Load(int3(seed, 0)).x; + float2 uv = (seed + 0.5f) / g_BufferDimensions; + float3 origin = InverseProject(g_ViewProjectionInverse, uv, depth); + + // And evaluate our lighting + payload.lighting = + evaluateBRDF(material, payload.normal, normalize(origin - payload.world), ray.Direction) + * light_radiance * light_weight; +} + +void PopulateCellsTraceRayInline(uint did, inout PopulateCellsPayload payload, RayDesc ray) +{ + ShadowRayQuery ray_query = TraceRay(ray); + + if (ray_query.CommittedStatus() == COMMITTED_NOTHING) + { + PopulateCellsHandleMiss(did, payload, ray); + } + else + { + PopulateCellsHandleHit(did, payload, ray); + } +} + +void PopulateCellsTraceRayRt(uint did, inout PopulateCellsPayload payload, RayDesc ray) +{ + TraceRay(g_Scene, RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH, 0xFFu, 0, 0, 0, ray, payload); +} + +void PopulateCellsTraceRay(uint did, inout PopulateCellsPayload payload, RayDesc ray) +{ +#if USE_INLINE_RT + return PopulateCellsTraceRayInline(did, payload, ray); +#else + return PopulateCellsTraceRayRt(did, payload, ray); +#endif +} + +void PopulateCells(uint did) { if (did >= g_HashGridCache_VisibilityRayCountBuffer[0]) { @@ -1611,33 +1818,26 @@ void PopulateCells(in uint did : SV_DispatchThreadID) HashGridCache_Visibility visibility = HashGridCache_UnpackVisibility(g_HashGridCache_VisibilityBuffer[visibility_index]); uint query_index = g_HashGridCache_VisibilityQueryBuffer[visibility_index]; - uint probe_index = ScreenProbes_GetCellAndProbeIndex(query_index).y; // Reconstruct world-space position and normal Instance instance = g_InstanceBuffer[visibility.instance_index]; Mesh mesh = g_MeshBuffer[instance.mesh_index + visibility.geometry_index]; - float4x4 transform = FetchTransform(instance.transform_index); - - Vertex v0, v1, v2; - FetchVertices(mesh, visibility.primitive_index, v0, v1, v2); + float3x4 transform = g_TransformBuffer[instance.transform_index]; - v0.position.xyz = mul(transform, float4(v0.position.xyz, 1.0f)).xyz - g_PreViewTranslation; - v1.position.xyz = mul(transform, float4(v1.position.xyz, 1.0f)).xyz - g_PreViewTranslation; - v2.position.xyz = mul(transform, float4(v2.position.xyz, 1.0f)).xyz - g_PreViewTranslation; + Triangle vertices = fetchVertices(mesh, visibility.primitive_index); - v0.normal.xyz = transformDirection(v0.normal.xyz, transform); - v1.normal.xyz = transformDirection(v1.normal.xyz, transform); - v2.normal.xyz = transformDirection(v2.normal.xyz, transform); - - float3 world = interpolate(v0.position.xyz, v1.position.xyz, v2.position.xyz, visibility.barycentrics); - float3 normal = (visibility.is_front_face ? 1.0f : -1.0f) * normalize(interpolate(v0.normal.xyz, v1.normal.xyz, v2.normal.xyz, visibility.barycentrics)); + vertices.v0 = transformPoint(vertices.v0, transform) - g_PreViewTranslation; + vertices.v1 = transformPoint(vertices.v1, transform) - g_PreViewTranslation; + vertices.v2 = transformPoint(vertices.v2, transform) - g_PreViewTranslation; + float3 world = interpolate(vertices.v0, vertices.v1, vertices.v2, visibility.barycentrics); + float3 edge10 = vertices.v1 - vertices.v0; + float3 edge20 = vertices.v2 - vertices.v0; + float3 normal = normalize(transformNormal(cross(edge10, edge20) * (visibility.is_front_face ? 1.0f : -1.0f), transform)); // Retrieve the light sample that we should use for our shadow ray - MaterialBRDF material = unpackMaterial(g_Reservoir_IndirectSampleMaterialBuffer[did]); Reservoir reservoir = unpackReservoir(g_Reservoir_IndirectSampleReservoirBuffer[did]); // Decode the light sample from our reservoir - float light_weight = reservoir.W; float3 direction, light_position; // Approximate visible light surface based on number of samples being used const float solid_angle = FOUR_PI / (kReservoir_SampleCount * 12000.0f); @@ -1651,37 +1851,19 @@ void PopulateCells(in uint did : SV_DispatchThreadID) ray_desc.TMin = 0.0f; ray_desc.TMax = hasLightPosition(selected_light) ? length(light_position - ray_desc.Origin) : FLT_MAX; - ShadowRayQuery ray_query = TraceRay(ray_desc); - - // Evaluate the radiance contribution - float3 lighting = float3(0.0f, 0.0f, 0.0f); - uint2 seed = ScreenProbes_UnpackSeed(g_ScreenProbes_ProbeSpawnBuffer[probe_index]); - - if (ray_query.CommittedStatus() == COMMITTED_NOTHING) - { - // Recover the ray origin - float depth = g_DepthBuffer.Load(int3(seed, 0)).x; - float2 uv = (seed + 0.5f) / g_BufferDimensions; - float3 origin = InverseProject(g_ViewProjectionInverse, uv, depth); - - // And evaluate our lighting - lighting += evaluateBRDF(material, normal, normalize(origin - world), direction) * light_radiance * light_weight; - } - else - { - // Update our reservoir cache for next frame's temporal resampling -#ifdef USE_RESAMPLING - reservoir.W = 0.0f; // invalidate the reservoir - - g_Reservoir_IndirectSampleReservoirBuffer[did] = packReservoir(reservoir); -#endif // USE_RESAMPLING - } + PopulateCellsPayload payload; + payload.query_index = query_index; + payload.world = world; + payload.normal = normal; + payload.lighting = light_radiance; + payload.reservoir = reservoir; + PopulateCellsTraceRay(did, payload, ray_desc); // And update the hash-grid cell payload uint cell_index = g_HashGridCache_VisibilityCellBuffer[visibility_index]; - uint4 quantized_radiance = HashGridCache_QuantizeRadiance(lighting); + uint4 quantized_radiance = HashGridCache_QuantizeRadiance(payload.lighting); - if (dot(lighting, lighting) > 0.0f) + if (dot(payload.lighting, payload.lighting) > 0.0f) { InterlockedAdd(g_HashGridCache_UpdateCellValueBuffer[4 * cell_index + 0], quantized_radiance.x); InterlockedAdd(g_HashGridCache_UpdateCellValueBuffer[4 * cell_index + 1], quantized_radiance.y); @@ -1696,10 +1878,16 @@ void PopulateCells(in uint did : SV_DispatchThreadID) if (is_bypass_cache) { - ScreenProbes_AccumulateRadiance(query_index, GIDenoiser_RemoveNaNs(lighting)); + ScreenProbes_AccumulateRadiance(query_index, GIDenoiser_RemoveNaNs(payload.lighting)); } } +[numthreads(32, 1, 1)] +void PopulateCellsMain(in uint did : SV_DispatchThreadID) +{ + PopulateCells(did); +} + // BE CAREFUL: we assume // UPDATE_TILES_NOT_SLICED_GROUP_* >= g_HashGridCacheConstants.size_tile_mip0 // UPDATE_TILES_NOT_SLICED_GROUP_* % g_HashGridCacheConstants.size_tile_mip0 == 0 @@ -1903,22 +2091,21 @@ void GenerateReservoirs(in uint did : SV_DispatchThreadID) // Reconstruct world-space position and normal Instance instance = g_InstanceBuffer[visibility.instance_index]; Mesh mesh = g_MeshBuffer[instance.mesh_index + visibility.geometry_index]; - float4x4 transform = FetchTransform(instance.transform_index); + float3x4 transform = g_TransformBuffer[instance.transform_index]; - Vertex v0, v1, v2; - FetchVertices(mesh, visibility.primitive_index, v0, v1, v2); + TriangleNormUV vertices = fetchVerticesNormUV(mesh, visibility.primitive_index); - v0.position.xyz = mul(transform, float4(v0.position.xyz, 1.0f)).xyz - g_PreViewTranslation; - v1.position.xyz = mul(transform, float4(v1.position.xyz, 1.0f)).xyz - g_PreViewTranslation; - v2.position.xyz = mul(transform, float4(v2.position.xyz, 1.0f)).xyz - g_PreViewTranslation; + vertices.v0 = transformPoint(vertices.v0, transform) - g_PreViewTranslation; + vertices.v1 = transformPoint(vertices.v1, transform) - g_PreViewTranslation; + vertices.v2 = transformPoint(vertices.v2, transform) - g_PreViewTranslation; - v0.normal.xyz = transformDirection(v0.normal.xyz, transform); - v1.normal.xyz = transformDirection(v1.normal.xyz, transform); - v2.normal.xyz = transformDirection(v2.normal.xyz, transform); + vertices.n0 = transformNormal(vertices.n0, transform); + vertices.n1 = transformNormal(vertices.n1, transform); + vertices.n2 = transformNormal(vertices.n2, transform); - float3 world = interpolate(v0.position.xyz, v1.position.xyz, v2.position.xyz, visibility.barycentrics); - float3 normal = (visibility.is_front_face ? 1.0f : -1.0f) * normalize(interpolate(v0.normal.xyz, v1.normal.xyz, v2.normal.xyz, visibility.barycentrics)); - Material material = g_MaterialBuffer[mesh.material_index]; + float3 world = interpolate(vertices.v0, vertices.v1, vertices.v2, visibility.barycentrics); + float3 normal = (visibility.is_front_face ? 1.0f : -1.0f) * normalize(interpolate(vertices.n0, vertices.n1, vertices.n2, visibility.barycentrics)); + Material material = g_MaterialBuffer[instance.material_index]; // Recover the ray origin uint query_index = g_HashGridCache_VisibilityQueryBuffer[did]; @@ -1928,7 +2115,7 @@ void GenerateReservoirs(in uint did : SV_DispatchThreadID) float depth = g_DepthBuffer.Load(int3(seed, 0)).x; float2 uv = (seed + 0.5f) / g_BufferDimensions; float3 origin = InverseProject(g_ViewProjectionInverse, uv, depth); - float2 mesh_uv = interpolate(v0.uv.xy, v1.uv.xy, v2.uv.xy, visibility.barycentrics); + float2 mesh_uv = interpolate(vertices.uv0, vertices.uv1, vertices.uv2, visibility.barycentrics); // Patch the screen probes with some emissivity information: // We bypass the hash-grid cache entirely here, as adding emissive information to the cells @@ -1946,11 +2133,9 @@ void GenerateReservoirs(in uint did : SV_DispatchThreadID) // evaluated. // If successful, we inject the reprojected radiance into the cache so it can be re-used // by neighbor vertices but bypass the filtered readback as the sample is already denoised. -#ifdef HAS_FEEDBACK if (g_UseDirectLighting != 0) { - float4 homogeneous = mul(g_ViewProjection, float4(world, 1.0f)); - homogeneous.xyz /= homogeneous.w; // perspective divide + float3 homogeneous = transformPointProjection(world, g_ViewProjection); uv = 0.5f * float2(homogeneous.x, -homogeneous.y) + 0.5f; depth = homogeneous.z; @@ -1961,13 +2146,13 @@ void GenerateReservoirs(in uint did : SV_DispatchThreadID) if (all(previous_uv > 0.0f) && all(previous_uv < 1.0f)) { - homogeneous = mul(g_Reprojection, float4(2.0f * float2(uv.x, 1.0f - uv.y) - 1.0f, depth, 1.0f)); - homogeneous.z = GetLinearDepth(homogeneous.z / homogeneous.w); // perspective divide + float3 homogeneous2 = transformPointProjection(float3(2.0f * float2(uv.x, 1.0f - uv.y) - 1.0f, depth), g_Reprojection); + homogeneous2.z = GetLinearDepth(homogeneous2.z); float previous_depth = GetLinearDepth(g_PreviousDepthBuffer.SampleLevel(g_NearestSampler, previous_uv, 0.0f).x); float3 previous_normal = normalize(2.0f * g_PreviousNormalBuffer.SampleLevel(g_NearestSampler, previous_uv, 0.0f).xyz - 1.0f); - if (dot(previous_normal, normal) > 5e-1f && abs(previous_depth - homogeneous.z) / homogeneous.z < 5e-2f) + if (dot(previous_normal, normal) > 5e-1f && abs(previous_depth - homogeneous2.z) / homogeneous2.z < 5e-2f) { float3 previous_lighting = g_PrevCombinedIlluminationBuffer.SampleLevel(g_NearestSampler, previous_uv, 0.0f).xyz; @@ -1989,16 +2174,21 @@ void GenerateReservoirs(in uint did : SV_DispatchThreadID) } } } -#endif // HAS_FEEDBACK float3 view_direction = normalize(origin - world); // Sample new lights const float solid_angle = FOUR_PI / (kReservoir_SampleCount * 1.5f); MaterialEvaluated material2 = MakeMaterialEvaluated(material, mesh_uv); +#ifndef DISABLE_SPECULAR_MATERIALS + // Force diffuse lighting on secondary bounce to reduce noise + material2.metallicity = 0.0f; + material2.roughness = 1.0f; +#endif // DISABLE_SPECULAR_MATERIALS MaterialBRDF materialBRDF = MakeMaterialBRDF(material2); Random random = MakeRandom(did, g_FrameIndex); - Reservoir reservoir = sampleLightListCone(random, world, normal, view_direction, solid_angle, materialBRDF); + LightSampler lightSampler = MakeLightSampler(random); + Reservoir reservoir = lightSampler.sampleLightListCone( world, normal, view_direction, solid_angle, materialBRDF); if (!reservoir.isValid()) { @@ -2091,6 +2281,9 @@ void ResampleReservoirs(in uint did : SV_DispatchThreadID) MaterialBRDF material = unpackMaterial(g_Reservoir_IndirectSampleMaterialBuffer[did]); Reservoir reservoir = unpackReservoir(g_Reservoir_IndirectSampleReservoirBuffer[did]); + // Normalize the sample count M (a.k.a. confidence weight) with the initial sample count for simplicity. + reservoir.M = 1.0f; + Random random = MakeRandom(did, g_FrameIndex); // Locate our hash table cell @@ -2114,8 +2307,10 @@ void ResampleReservoirs(in uint did : SV_DispatchThreadID) uint offset = random.randInt(increment); // And combine the reservoirs - ReservoirUpdater updater = MakeReservoirUpdater(reservoir); float3 view_direction = normalize(origin - world); + const float solid_angle = FOUR_PI / (kReservoir_SampleCount * 12000.0f); + ReservoirUpdater updater = MakeReservoirUpdater(); + mergeReservoirsCone(updater, reservoir, random, material, world, normal, view_direction, solid_angle); for (uint i = 0; i < count; i += increment) { // Load up the iterated reservoir @@ -2134,9 +2329,8 @@ void ResampleReservoirs(in uint did : SV_DispatchThreadID) if (bilateral_weight < kReservoir_BilateralThreshold) continue; // And combine it with our current reservoir - Reservoir_ClampPrevious(reservoir, reservoir2); - const float solid_angle = FOUR_PI / (kReservoir_SampleCount * 12000.0f); - mergeReservoirs(updater, reservoir2, random, material, world, normal, view_direction, solid_angle); + Reservoir_ClampPrevious(reservoir2); + mergeReservoirsCone(updater, reservoir2, random, material, world, normal, view_direction, solid_angle); } if (!updater.reservoir.M) @@ -2145,57 +2339,1258 @@ void ResampleReservoirs(in uint did : SV_DispatchThreadID) } // Finalize our reservoir - reservoir = getUpdatedReservoir(updater); + reservoir = updater.reservoir; // And write it out to memory g_Reservoir_IndirectSampleReservoirBuffer[did] = packReservoir(reservoir); } //! -//! GI-1.0 denoiser kernels. +//! Glossy reflections kernels. //! -[numthreads(8, 8, 1)] -void ReprojectGI(in uint2 did : SV_DispatchThreadID) +// BE CAREFUL: pick FP16 friendly values +#define TRACE_NONE_DISTANCE -1.f +#define TRACE_SKY_DISTANCE 65519.f + +void TraceReflectionsHandleHit(uint did, inout TraceReflectionsPayload payload, RayDesc ray, HitInfo hit_info, float hit_distance) { - if (all(did == 0)) - { - g_GIDenoiser_BlurSampleCountBuffer[0] = 0; - } + float3 hit_position = (ray.Origin + hit_distance * ray.Direction); - if (any(did >= g_BufferDimensions)) - { - return; // out of bounds - } + uint hit_instance_index = hit_info.instanceIndex; + uint hit_geometry_index = hit_info.geometryIndex; + uint hit_primitive_index = hit_info.primitiveIndex; + bool hit_is_front_face = hit_info.frontFace; + float2 hit_barycentrics = hit_info.barycentrics; - float4 color = g_GIDenoiser_ColorBuffer[did]; - float4 lighting = float4(0.0f, 0.0f, 0.0f, 0.0f); - float3 normal = g_DetailsBuffer.Load(int3(did, 0)).xyz; + Instance hit_instance = g_InstanceBuffer[hit_instance_index]; + Mesh hit_mesh = g_MeshBuffer[hit_instance.mesh_index + hit_geometry_index]; + float3x4 hit_transform = g_TransformBuffer[hit_instance.transform_index]; + Material hit_material = g_MaterialBuffer[hit_instance.material_index]; - float alpha_blend = 1.0f; - float color_delta = 0.0f; - float2 uv = (did + 0.5f) / g_BufferDimensions; - bool is_sky_pixel = (dot(normal, normal) == 0.0f ? true : false); + TriangleNormUV vertices2 = fetchVerticesNormUV(hit_mesh, hit_primitive_index); - if (!is_sky_pixel) + vertices2.n0 = transformNormal(vertices2.n0, hit_transform); + vertices2.n1 = transformNormal(vertices2.n1, hit_transform); + vertices2.n2 = transformNormal(vertices2.n2, hit_transform); + + float3 hit_normal = (hit_is_front_face ? 1.0f : -1.0f) * normalize(interpolate(vertices2.n0, vertices2.n1, vertices2.n2, hit_barycentrics)); + float2 hit_mesh_uv = interpolate(vertices2.uv0, vertices2.uv1, vertices2.uv2, hit_barycentrics); + + float3 homogeneous = transformPointProjection(hit_position, g_ViewProjection); + float2 uv = 0.5f * float2(homogeneous.x, -homogeneous.y) + 0.5f; + float depth = homogeneous.z; + + // Evaluate direct illumination from area light + if(g_UseDirectLighting != 0 && hit_is_front_face && dot(hit_material.emissivity.xyz, hit_material.emissivity.xyz) > 0.0f) { - float2 velocity = g_VelocityBuffer.SampleLevel(g_NearestSampler, uv, 0.0f).xy; - float2 previous_uv = (uv - velocity); + MaterialEmissive emissive = MakeMaterialEmissive(hit_material, hit_mesh_uv); + payload.hit_distance = hit_distance; + payload.radiance = emissive.emissive; + } + // Evaluate indirect illumination + else + { + // Use previous frame lighting when available + bool previous_frame_available = false; + if (all(uv > 0.0f) && all(uv < 1.0f) && depth > 0.0f && depth < 1.0f) + { + float2 previous_uv = uv - g_VelocityBuffer.SampleLevel(g_NearestSampler, uv, 0.0f).xy; + if (all(previous_uv > 0.0f) && all(previous_uv < 1.0f)) + { + float previous_depth = GetLinearDepth(g_PreviousDepthBuffer.SampleLevel(g_NearestSampler, previous_uv, 0.0f).x); + float3 previous_normal = normalize(2.0f * g_PreviousNormalBuffer.SampleLevel(g_NearestSampler, previous_uv, 0.0f).xyz - 1.0f); - float depth = g_DepthBuffer.Load(int3(did, 0)).x; - normal = 2.0f * normal - 1.0f; + float3 homogeneous2 = transformPointProjection(float3(2.0f * float2(uv.x, 1.0f - uv.y) - 1.0f, depth), g_Reprojection); + homogeneous2.z = GetLinearDepth(homogeneous2.z); - if (all(previous_uv > 0.0f) && all(previous_uv < 1.0f)) + if (abs(homogeneous2.z - previous_depth) / homogeneous2.z < 1e-2f && dot(hit_normal, previous_normal) > 0.95f) + { + float3 previous_lighting = g_PrevCombinedIlluminationBuffer.SampleLevel(g_NearestSampler, previous_uv, 0.0f).xyz; + payload.hit_distance = hit_distance; + payload.radiance = previous_lighting; + previous_frame_available = true; + } + } + } + + // Use hash grid cache when previous frame lighting is not available + if (!previous_frame_available) { - float3 world = InverseProject(g_GI10Constants.view_proj_inv, uv, depth); - float cell_size = distance(g_Eye, world) * g_ScreenProbesConstants.cell_size; + // jitter hit position to reduce tiled artifacts + float2 jitter = (2.0f * payload.s - 1.0f); + jitter *= HashGridCache_GetCellSize(hit_position); - float weight = 0.0f; - float2 texel_size = 1.0f / g_BufferDimensions; + float3 t, b; + GetOrthoVectors(hit_normal, t, b); - const float kOneOverSqrtOfTwo = 0.707107f; + HashGridCache_Data data; + data.eye_position = g_Eye; + data.hit_position = hit_position + jitter.x * t + jitter.y * b; + data.direction = ray.Direction; + data.hit_distance = hit_distance; - for (float y = -1.0f; y <= 1.0f; ++y) + uint tile_index; + uint cell_index = HashGridCache_FindCell(data, tile_index); + + if (cell_index != kGI10_InvalidId) + { + // Bump the cell's decay to the max. now that it's been 'touched' + uint previous_tile_decay; + InterlockedExchange(g_HashGridCache_DecayTileBuffer[tile_index], g_FrameIndex, previous_tile_decay); + + float4 li = HashGridCache_FilteredRadiance(cell_index, false); + payload.radiance = (li.xyz / max(li.w, 1.0f)); + payload.hit_distance = hit_distance; + } + } + } +} + +void TraceReflectionsHandleMiss(uint did, inout TraceReflectionsPayload payload, RayDesc ray) +{ + // Evaluate direct illumination from envmap + if (g_UseDirectLighting != 0) + { + payload.radiance = g_EnvironmentBuffer.SampleLevel(g_NearestSampler, ray.Direction, 0.0f).xyz; + payload.hit_distance = TRACE_SKY_DISTANCE; + } +} + +void TraceReflectionsTraceRayInline(uint did, inout TraceReflectionsPayload payload, RayDesc ray) +{ + ClosestRayQuery ray_query = TraceRay(ray); + + if (ray_query.CommittedStatus() == COMMITTED_TRIANGLE_HIT) + { + TraceReflectionsHandleHit(did, payload, ray, GetHitInfoRtInlineCommitted(ray_query), ray_query.CommittedRayT()); + } + else + { + TraceReflectionsHandleMiss(did, payload, ray); + } +} + +void TraceReflectionsTraceRayRt(uint did, inout TraceReflectionsPayload payload, RayDesc ray) +{ + TraceRay(g_Scene, RAY_FLAG_NONE, 0xFFu, 0, 0, 0, ray, payload); +} + +void TraceReflectionsTraceRay(uint did, inout TraceReflectionsPayload payload, RayDesc ray) +{ +#if USE_INLINE_RT + return TraceReflectionsTraceRayInline(did, payload, ray); +#else + return TraceReflectionsTraceRayRt(did, payload, ray); +#endif +} + +void TraceReflections(in uint did) +{ + if (did >= g_GlossyReflections_RtSampleCountBuffer[0]) + { + return; // out of bounds + } + + int2 full_pos = GlossyReflections_UnpackSample(g_GlossyReflections_RtSampleBuffer[did]); + float3 normal = g_GeometryNormalBuffer.Load(int3(full_pos, 0)).xyz; + bool is_sky_pixel = (dot(normal, normal) == 0.0f ? true : false); + + if (is_sky_pixel) + { + return; // discard sky pixels + } + + normal = normalize(2.0f * normal - 1.0f); + + // Texture coordinates + float4 visibility = g_VisibilityBuffer[full_pos]; + float2 barycentrics = visibility.xy; + uint instanceID = asuint(visibility.z); + uint primitiveID = asuint(visibility.w); + + Instance instance = g_InstanceBuffer[instanceID]; + Mesh mesh = g_MeshBuffer[instance.mesh_index]; + + Triangle vertices = fetchVertices(mesh, primitiveID); + + // Reconstruct world space position from barycentrics + float3x4 transform = g_TransformBuffer[instance.transform_index]; + vertices.v0 = transformPoint(vertices.v0, transform) - g_PreViewTranslation; + vertices.v1 = transformPoint(vertices.v1, transform) - g_PreViewTranslation; + vertices.v2 = transformPoint(vertices.v2, transform) - g_PreViewTranslation; + + // Frame + float2 uv = (full_pos + 0.5f) / g_BufferDimensions; + float depth = g_DepthBuffer.Load(int3(full_pos, 0)).x; + float3 world = interpolate(vertices.v0, vertices.v1, vertices.v2, barycentrics); + + // Robust ray origin offset + float3 origin = offsetPosition(world + g_PreViewTranslation, normal); + float3 view_direction = normalize(g_Eye - origin); + float3 detail_normal = normalize(2.0f * g_ShadingNormalBuffer.Load(int3(full_pos, 0)).xyz - 1.0f); + + float roughness = g_RoughnessBuffer.Load(int3(full_pos, 0)).x; + // do not use the same seed value with stochastic alpha testing + float2 s = BlueNoise_Sample2D(full_pos, g_FrameIndex, 2); + + // Sample specular direction + Quaternion localRotation = QuaternionRotationZ(detail_normal); + float3 localView = localRotation.transform(view_direction); + float roughnessAlpha = max(0.000001f, squared(roughness)); + float3 newLight = sampleGGX(roughnessAlpha, localView, s); + float3 direction = normalize(localRotation.inverse().transform(newLight)); + + // Get sampled direction PDF + float roughnessAlphaSqr = max(0.000001f, squared(roughnessAlpha)); + float3 halfVector = normalize(localView + newLight); + float dotNH = clamp(halfVector.z, -1.0f,1.0f); + float dotNV = clamp(localView.z, -1.0f,1.0f); + float specular_pdf = sampleGGXPDF(roughnessAlphaSqr, dotNH, dotNV, localView); + + RayDesc ray_desc; + ray_desc.Origin = origin; + ray_desc.Direction = direction; + ray_desc.TMin = 0.0f; + ray_desc.TMax = MAX_HIT_DISTANCE; + + TraceReflectionsPayload payload; + payload.full_pos = full_pos; + payload.radiance = 0.f; + payload.s = s; + payload.hit_distance = TRACE_NONE_DISTANCE; + + TraceReflectionsTraceRay(did, payload, ray_desc); + + payload.radiance /= (1.0f + payload.radiance); + + int2 half_pos = GlossyReflections_FullToHalfRes(full_pos); + g_GlossyReflections_SpecularBuffer[half_pos] = float4(payload.radiance, payload.hit_distance); + g_GlossyReflections_DirectionBuffer[half_pos] = payload.hit_distance > 0.f && payload.hit_distance < 100.f + ? float4(world + payload.hit_distance * direction - g_Eye, 1.f) + : float4(direction, 0.f); +} + +[numthreads(64, 1, 1)] +void TraceReflectionsMain(in uint did : SV_DispatchThreadID) +{ + TraceReflections(did); +} + +float RatioEstimator_GaussianFilter(in float x, in float radius) +{ + float sigma = 0.375f * radius; + return /*(radius / (sqrt(2.f * PI) * sigma)) */ exp(-(x * x) / (2.f * sigma * sigma)); +} + +[numthreads(8, 8, 1)] +void ResolveReflections_SplitRatioEstimatorX(in int2 did : SV_DispatchThreadID) +{ + int2 split_pos = did; + if (any(split_pos >= GlossyReflections_SplitRes())) + { + return; // out of bounds + } + + int2 full_pos = GlossyReflections_SplitToFullRes(split_pos); + float3 normal = g_ShadingNormalBuffer.Load(int3(full_pos, 0)).xyz; + float roughness = g_RoughnessBuffer.Load(int3(full_pos, 0)).x; + bool is_sky_pixel = (dot(normal, normal) == 0.0f ? true : false); + + if (is_sky_pixel || roughness > g_GlossyReflectionsConstants.high_roughness_threshold) + { + return; // pixel was already updated + } + + normal = normalize(2.0f * normal - 1.0f); + + float2 uv = (full_pos + 0.5f) / GlossyReflections_FullRes(); + float center_depth = g_DepthBuffer.Load(int3(full_pos, 0)).x; + float3 center_world = InverseProject(g_GI10Constants.view_proj_inv, uv, center_depth); + float3 center_normal = normalize(2.f * g_GeometryNormalBuffer.Load(int3(full_pos, 0)).xyz - 1.f); + float center_view_depth = GetLinearDepth(center_depth); + float3 view_direction = normalize(g_Eye - center_world); + Quaternion localRotation = QuaternionRotationZ(normal); + float3 localView = localRotation.transform(view_direction); + + float4 reflection_x = 0.f; + float3 reflection_average_squared_x = 0.f; + float reflection_weight_x = 0.f; + + // Micro-facet alpha is equal to roughness^2 + float roughnessAlpha = roughness * roughness; + roughnessAlpha = max(0.000001f, roughnessAlpha); // fix for GGX not being able to handle 0 roughness + float roughnessAlphaSqr = max(0.000001f, roughnessAlpha * roughnessAlpha); + + int2 half_pos = GlossyReflections_FullToHalfRes(full_pos); + int full_radius = GlossyReflections_FullRadius(); // Default was 7 + int half_radius = GlossyReflections_FullToHalfRadius(full_radius); + + for (int half_offset_x = -half_radius; half_offset_x <= +half_radius; half_offset_x += 1) + { + int2 half_sample_pos = half_pos + int2(half_offset_x, 0); + int2 full_sample_pos = GlossyReflections_HalfToFullRes(half_sample_pos); + if (any(full_sample_pos < 0) || any(full_sample_pos >= GlossyReflections_FullRes()) || + g_RoughnessBuffer.Load(int3(full_sample_pos, 0)).x > g_GlossyReflectionsConstants.high_roughness_threshold) + { + continue; // out of bounds + } + + // Compute pdf for reused sample (don't evaluate GGX) + float4 light_direction_or_hit_position = g_GlossyReflections_DirectionBuffer[half_sample_pos]; + float3 light_direction = light_direction_or_hit_position.w > 0.5f ? normalize(light_direction_or_hit_position.xyz + g_Eye - center_world) : light_direction_or_hit_position.xyz; + float3 halfVector = normalize(view_direction + light_direction.xyz); + float dotNH = clamp(dot(normal, halfVector), -1.0f, 1.0f); // Maybe we can remove this clamping. + float dotNV = clamp(dot(normal, view_direction), -1.0f, 1.0f); // Maybe we can remove this clamping. + float pdf_weight = sampleGGXPDF(roughnessAlphaSqr, dotNH, dotNV, localView); + + // Plane weight + float sample_depth = g_DepthBuffer.Load(int3(full_sample_pos, 0)).x; + float2 sample_uv = (full_sample_pos + 0.5f) / GlossyReflections_FullRes(); + float3 sample_world = InverseProject(g_GI10Constants.view_proj_inv, sample_uv, sample_depth); + float plane_weight = 1.f - saturate(abs(dot(center_normal, center_world - sample_world) / center_view_depth) * 200.0f - 0.0f); + + // Gaussian + int2 filter_pos = GlossyReflections_HalfToFullRes(half_sample_pos) - full_pos; + float filter_weight = RatioEstimator_GaussianFilter(filter_pos.x, full_radius); + + // Filter + // BE CAREFUL: adding the ratio estimator in the neighborhood helps with ghosting but reduce stability... + float4 radiance_and_hit_distance = g_GlossyReflections_SpecularBuffer[half_sample_pos]; + float weight = filter_weight * plane_weight * pdf_weight; + reflection_x += radiance_and_hit_distance * weight; + reflection_average_squared_x += radiance_and_hit_distance.xyz * radiance_and_hit_distance.xyz * weight; + reflection_weight_x += weight; + } + + // BE CAREFUL: we reuse a discarded sample (plane_weight near 0 on edges with upsampling) + // when no sample were used for removing black pixels at grazing angles on object borders... + if (reflection_weight_x < 1e-3f) + { + for (int half_offset_x = -1; half_offset_x <= +1; half_offset_x += 1) + { + int2 half_sample_pos = half_pos + int2(half_offset_x, 0); + int2 full_sample_pos = GlossyReflections_HalfToFullRes(half_sample_pos); + if (any(full_sample_pos < 0) || any(full_sample_pos >= GlossyReflections_FullRes()) || + g_RoughnessBuffer.Load(int3(full_sample_pos, 0)).x > g_GlossyReflectionsConstants.high_roughness_threshold) + { + continue; // out of bounds + } + + // Filter + float4 radiance_and_hit_distance = g_GlossyReflections_SpecularBuffer[half_sample_pos]; + float weight = 1.f; + reflection_x += radiance_and_hit_distance * weight; + reflection_average_squared_x += radiance_and_hit_distance.xyz * radiance_and_hit_distance.xyz * weight; + reflection_weight_x += weight; + } + } + + if (reflection_weight_x > 0.f) + { + reflection_x /= reflection_weight_x; + reflection_average_squared_x /= reflection_weight_x; + } + + g_GlossyReflections_ReflectionsBufferX[split_pos] = reflection_x; + g_GlossyReflections_AverageSquaredBufferX[split_pos] = float4(sqrt(reflection_average_squared_x), 1.f); +} + +[numthreads(8, 8, 1)] +void ResolveReflections_SplitRatioEstimatorY(in uint2 did : SV_DispatchThreadID) +{ + int2 full_pos = did; + if (any(full_pos >= GlossyReflections_FullRes())) + { + return; // out of bounds + } + + float3 normal = g_ShadingNormalBuffer.Load(int3(full_pos, 0)).xyz; + float roughness = g_RoughnessBuffer.Load(int3(full_pos, 0)).x; + bool is_sky_pixel = (dot(normal, normal) == 0.0f ? true : false); + if (is_sky_pixel || roughness > g_GlossyReflectionsConstants.high_roughness_threshold) + { + return; // pixel was already updated + } + + normal = normalize(2.0f * normal - 1.0f); + + float2 uv = (full_pos + 0.5f) / GlossyReflections_FullRes(); + float center_depth = g_DepthBuffer.Load(int3(full_pos, 0)).x; + float3 center_world = InverseProject(g_GI10Constants.view_proj_inv, uv, center_depth); + float3 center_normal = normalize(2.f * g_GeometryNormalBuffer.Load(int3(full_pos, 0)).xyz - 1.f); + float center_view_depth = GetLinearDepth(center_depth); + float3 view_direction = normalize(g_Eye - center_world); + Quaternion localRotation = QuaternionRotationZ(normal); + float3 localView = localRotation.transform(view_direction); + + float4 reflection = 0.f; + float3 reflection_average_squared = 0.f; + float reflection_weight = 0.f; + + // Micro-facet alpha is equal to roughness^2 + float roughnessAlpha = roughness * roughness; + roughnessAlpha = max(0.000001f, roughnessAlpha); // fix for GGX not being able to handle 0 roughness + float roughnessAlphaSqr = max(0.000001f, roughnessAlpha * roughnessAlpha); + + int2 half_pos = GlossyReflections_FullToHalfRes(full_pos); + int full_radius = GlossyReflections_FullRadius(); // Default was 7 + int half_radius = GlossyReflections_FullToHalfRadius(full_radius); + // BE CAREFUL: needed for filtering out stripes with temporal accumulation (overblur for near mirror surfaces) + int2 half_jitter = floor((BlueNoise_Sample2D(full_pos, g_FrameIndex) - 0.5f) * 2 + 0.5f); + for (int half_offset_y = -half_radius; half_offset_y <= +half_radius; half_offset_y += 1) + { + int2 half_sample_pos = half_pos + int2(half_jitter.x, half_offset_y); + int2 full_sample_pos = GlossyReflections_HalfToFullRes(half_sample_pos); + if (any(full_sample_pos < 0) || any(full_sample_pos >= GlossyReflections_FullRes()) || + g_RoughnessBuffer.Load(int3(full_sample_pos, 0)).x > g_GlossyReflectionsConstants.high_roughness_threshold) + { + continue; // out of bounds + } + + // Compute pdf for reused sample (don't evaluate GGX) + float4 light_direction_or_hit_position = g_GlossyReflections_DirectionBuffer[half_sample_pos]; + float3 light_direction = light_direction_or_hit_position.w > 0.5f ? normalize(light_direction_or_hit_position.xyz + g_Eye - center_world) : light_direction_or_hit_position.xyz; + float3 halfVector = normalize(view_direction + light_direction.xyz); + float dotNH = clamp(dot(normal, halfVector), -1.0f, 1.0f); // Maybe we can remove this clamping. + float dotNV = clamp(dot(normal, view_direction), -1.0f, 1.0f); // Maybe we can remove this clamping. + float pdf_weight = sampleGGXPDF(roughnessAlphaSqr, dotNH, dotNV, localView); + + // Plane weight + float sample_depth = g_DepthBuffer.Load(int3(full_sample_pos, 0)).x; + float2 sample_uv = (full_sample_pos + 0.5f) / GlossyReflections_FullRes(); + float3 sample_world = InverseProject(g_GI10Constants.view_proj_inv, sample_uv, sample_depth); + float plane_weight = 1.f - saturate(abs(dot(center_normal, center_world - sample_world) / center_view_depth) * 200.0f - 0.0f); + + // Gaussian + int2 filter_pos = GlossyReflections_HalfToFullRes(half_sample_pos) - full_pos; + float filter_weight = RatioEstimator_GaussianFilter(filter_pos.y, full_radius); + + // Filter + // BE CAREFUL: adding the ratio estimator in the neighborhood helps with ghosting but reduce stability... + int2 split_sample_pos = int2(full_sample_pos.x, half_sample_pos.y); + float4 reflection_x = g_GlossyReflections_ReflectionsBufferX[split_sample_pos]; + float weight = filter_weight * plane_weight * pdf_weight; + float3 reflection_average_squared_x = pow(g_GlossyReflections_AverageSquaredBufferX[split_sample_pos].xyz, 2.f); + reflection += reflection_x * weight; + reflection_average_squared += reflection_average_squared_x * weight; + reflection_weight += weight; + } + + // BE CAREFUL: we reuse discarded samples (plane_weight near 0 on edges with upsampling) + // when no sample were used for removing black pixels at grazing angles on object borders... + if (reflection_weight < 1e-3f) + { + for (int half_offset_y = -1; half_offset_y <= +1; half_offset_y += 1) + { + int2 half_sample_pos = half_pos + int2(0, half_offset_y); + int2 full_sample_pos = GlossyReflections_HalfToFullRes(half_sample_pos); + if (any(full_sample_pos < 0) || any(full_sample_pos >= GlossyReflections_FullRes()) || + g_RoughnessBuffer.Load(int3(full_sample_pos, 0)).x > g_GlossyReflectionsConstants.high_roughness_threshold) + { + continue; // out of bounds + } + + // Filter + int2 split_sample_pos = int2(full_sample_pos.x, half_sample_pos.y); + float4 reflection_x = g_GlossyReflections_ReflectionsBufferX[split_sample_pos]; + float3 reflection_average_squared_x = pow(g_GlossyReflections_AverageSquaredBufferX[split_sample_pos].xyz, 2.f); + float weight = 1.f; + reflection += reflection_x * weight; + reflection_average_squared += reflection_average_squared_x * weight; + reflection_weight += weight; + } + } + + if (reflection_weight > 0.f) + { + reflection /= reflection_weight; + reflection_average_squared /= reflection_weight; + } + + float3 reflection_average = reflection.xyz; + float3 reflection_std = sqrt(max(abs(reflection_average_squared - reflection_average * reflection_average), 0)); + + reflection.xyz /= max(1.0f - reflection.xyz, 1e-3f); + reflection_std.xyz /= max(1.0f - reflection_std.xyz, 1e-3f); + + g_GlossyReflections_ReflectionsBuffer[full_pos] = reflection; + g_GlossyReflections_StandardDevBuffer[full_pos] = float4(reflection_std, 1.f); +} + +[numthreads(8, 8, 1)] +void ReprojectReflections(in uint2 did : SV_DispatchThreadID) +{ + uint2 full_pos = did; + if (any(full_pos >= GlossyReflections_FullRes())) + { + return; // out of bounds + } + + float4 color_and_hit_distance = g_GlossyReflections_ReflectionsBuffer[full_pos]; + float3 color = color_and_hit_distance.rgb; + float hit_distance = color_and_hit_distance.a; + + float4 lighting = float4(0.0f, 0.0f, 0.0f, 0.0f); + float alpha_blend = 1.0f; + float parallax_threshold = 0.0f; + + float2 uv = (full_pos + 0.5f) / g_BufferDimensions; + float3 normal = g_ShadingNormalBuffer.Load(int3(full_pos, 0)).xyz; + float roughness = g_RoughnessBuffer.Load(int3(full_pos, 0)).x; + bool is_sky_pixel = (dot(normal, normal) == 0.0f ? true : false); + normal = 2.0f * normal - 1.0f; + + if (is_sky_pixel || roughness > g_GlossyReflectionsConstants.high_roughness_threshold) + { + g_GlossyReflections_ReflectionsBuffer[full_pos] = float4(color, 1.f); + return; // pixel was already updated + } + + { + float depth = g_DepthBuffer.Load(int3(full_pos, 0)).x; + float linear_depth = GetLinearDepth(depth); + float3 world = InverseProject(g_GI10Constants.view_proj_inv, uv, depth); + float3 ray_dir = world - g_Eye; + + // We start out with reconstructing the ray length + // This includes the portion from the camera to the reflecting surface as well as the portion from the surface to the hit position + float surface_depth = length(ray_dir); + float ray_length = surface_depth + hit_distance; + + // We then perform a parallax correction by shooting a ray of the same length "straight through" the reflecting surface + // and reprojecting the tip of that ray to the previous frame + float3 world_hit_position = g_Eye + (ray_dir / surface_depth) * ray_length; + + // Previous hit + float4 previous_hit_pos = mul(g_PreviousViewProjection, float4(world_hit_position, 1.0)); + float2 previous_hit_uv = float2(0.5f, -0.5f) * (previous_hit_pos.xy / previous_hit_pos.w) + 0.5f; + float3 previous_hit_normal = g_PreviousDetailsBuffer.SampleLevel(g_TextureSampler, previous_hit_uv, 0.0f).xyz; + previous_hit_normal = 2.f * previous_hit_normal - 1.f; + + float4 previous_hit_depths = g_PreviousDepthBuffer.Gather(g_TextureSampler, previous_hit_uv); // (-,+),(+,+),(+,-),(-,-) + float4 previous_hit_linear_depths = GetLinearDepth(previous_hit_depths); + float4 previous_hit_dissoclusions = exp(-abs(1.f - max(0.f, dot(normal, previous_hit_normal))) * 1.4f) * + exp(-abs(previous_hit_linear_depths - linear_depth.xxxx) / linear_depth.xxxx * 1.0f); + + float4 previous_hit_color_and_count = 0; + if (all(previous_hit_dissoclusions > 0.9)) + { + previous_hit_color_and_count = g_PreviousReflectionBuffer.SampleLevel(g_TextureSampler, previous_hit_uv, 0.0f); + } + else if (any(previous_hit_dissoclusions > 0.9f)) + { + int max_index = 0; + float max_dissoclusion = previous_hit_dissoclusions[max_index]; + for (int i = 1; i < 4; ++i) + { + if (previous_hit_dissoclusions[i] > max_dissoclusion) + { + max_index = i; + max_dissoclusion = previous_hit_dissoclusions[max_index]; + } + } + + int2 offsets[] = { + {0, 1}, + {1, 1}, + {1, 0}, + {0, 0} + }; + + int2 previous_hit_px = int2(previous_hit_uv * g_BufferDimensions) + offsets[max_index]; + previous_hit_color_and_count = g_PreviousReflectionBuffer.Load(int3(previous_hit_px, 0)); + } + + float3 previous_hit_color = previous_hit_color_and_count.xyz / max(previous_hit_color_and_count.w, 1.f); + float previous_hit_count = previous_hit_color_and_count.w; + + // Previous + float2 velocity = g_VelocityBuffer.SampleLevel(g_NearestSampler, uv, 0.0f).xy; + float2 previous_uv = (uv - velocity); + float3 previous_normal = g_PreviousDetailsBuffer.SampleLevel(g_TextureSampler, previous_uv, 0.0f).xyz; + previous_normal = 2.f * previous_normal - 1.f; + + float4 previous_depths = g_PreviousDepthBuffer.Gather(g_TextureSampler, previous_uv); // (-,+),(+,+),(+,-),(-,-) + float4 previous_linear_depths = GetLinearDepth(previous_depths); + float4 previous_dissoclusions = exp(-abs(1.f - max(0.f, dot(normal, previous_normal))) * 1.4f) * + exp(-abs(previous_linear_depths - linear_depth.xxxx) / linear_depth.xxxx * 1.0f); + + float4 previous_color_and_count = 0; + if (all(previous_dissoclusions > 0.9)) + { + previous_color_and_count = g_PreviousReflectionBuffer.SampleLevel(g_TextureSampler, previous_uv, 0.0f); + } + else if (any(previous_dissoclusions > 0.9f)) + { + int max_index = 0; + float max_dissoclusion = previous_dissoclusions[max_index]; + for (int i = 1; i < 4; ++i) + { + if (previous_dissoclusions[i] > max_dissoclusion) + { + max_index = i; + max_dissoclusion = previous_dissoclusions[max_index]; + } + } + + int2 offsets[] = { + {0, 1}, + {1, 1}, + {1, 0}, + {0, 0} + }; + + int2 previous_px = int2(previous_uv * g_BufferDimensions) + offsets[max_index]; + previous_color_and_count = g_PreviousReflectionBuffer.Load(int3(previous_px, 0)); + } + + float3 previous_color = previous_color_and_count.xyz / max(previous_color_and_count.w, 1.f); + float previous_count = previous_color_and_count.w; + + // Local neighborhood + float3 neighborhood_average = color; + float3 neighborhood_std = g_GlossyReflections_StandardDevBuffer[full_pos].xyz; + + // BE CAREFUL: depending on noise amount, this can create large blocky or boily artifacts (depends on ratio estimator filter) + float neighborhood_std_scale = 1.f; // 2.f == 75%, 3.f == 89%, 4.f == 94% + float3 neighborhood_min = neighborhood_average - neighborhood_std_scale * neighborhood_std; + float3 neighborhood_max = neighborhood_average + neighborhood_std_scale * neighborhood_std; + + // Dual source blending + // BE CAREFUL: this logic breaks if neighborhood_average isn't stable enough (too much input noise) + float luminance_diff_scale = -1e2f * exp2(g_Exposure); // BE CAREFUL: depends on Median heavily + float previous_hit_weight = (previous_hit_count > 0.f ? 1.f : 0.f) * saturate(exp2(luminance_diff_scale * luminance(previous_hit_color - neighborhood_average))); + float previous_weight = (previous_count > 0.f ? 1.f : 0.f) * saturate(exp2(luminance_diff_scale * luminance(previous_color - neighborhood_average))); + float dual_weight = max(previous_hit_weight + previous_weight, 1e-7f); + float3 dual_color = (previous_hit_weight * clamp(previous_hit_color, neighborhood_min, neighborhood_max) + + previous_weight * clamp(previous_color, neighborhood_min, neighborhood_max)) / dual_weight; + float dual_count = (previous_hit_weight * previous_hit_count + previous_weight * previous_count) / dual_weight; + + lighting = float4(dual_color * dual_count, dual_count); + } + + lighting += float4(color, 1.f); + + float2 vignette_uv = uv * (1.0f - uv.yx); + float vignette = pow(15.0f * vignette_uv.x * vignette_uv.y, 0.25f); + float max_sample_count = max(8.0f * vignette, 1.0f); + + if (lighting.w > max_sample_count) // evict old samples from the history + { + lighting *= (max_sample_count / lighting.w); + } + + g_ReflectionBuffer[did] = float4(GIDenoiser_RemoveNaNs(lighting.xyz), lighting.w); // Don't propagate NaNs +} + +float AtrousRatioEstimator_GaussianFilter(float x, float step) +{ + float sigma = 1.065f * step; // 1.f, 2.f, 4.f, 8.f... + return (step / (sqrt(2.f * PI) * sigma)) * exp(-(x * x) / (2.f * sigma * sigma)); +} + +[numthreads(8, 8, 1)] +void ResolveReflections_AtrousRatioEstimator_First(in uint2 did : SV_DispatchThreadID) +{ + int2 half_pos = did; + if (any(half_pos >= GlossyReflections_HalfRes())) + { + return; // out of bounds + } + + int2 full_pos = GlossyReflections_HalfToFullRes(half_pos); + float3 normal = g_ShadingNormalBuffer.Load(int3(full_pos, 0)).xyz; + float roughness = g_RoughnessBuffer.Load(int3(full_pos, 0)).x; + bool is_sky_pixel = (dot(normal, normal) == 0.0f ? true : false); + + if (is_sky_pixel || roughness > g_GlossyReflectionsConstants.high_roughness_threshold) + { + return; // pixel was already updated + } + + normal = normalize(2.0f * normal - 1.0f); + + float2 uv = (full_pos + 0.5f) / GlossyReflections_FullRes(); + float center_depth = g_DepthBuffer.Load(int3(full_pos, 0)).x; + float3 center_world = InverseProject(g_GI10Constants.view_proj_inv, uv, center_depth); + float3 center_normal = normalize(2.f * g_GeometryNormalBuffer.Load(int3(full_pos, 0)).xyz - 1.f); + float center_view_depth = GetLinearDepth(center_depth); + float3 view_direction = normalize(g_Eye - center_world); + Quaternion localRotation = QuaternionRotationZ(normal); + float3 localView = localRotation.transform(view_direction); + + float4 reflection_0 = 0.f; + float3 reflection_average_squared_0 = 0.f; + float reflection_weight_0 = 0.f; + + // Micro-facet alpha is equal to roughness^2 + float roughnessAlpha = roughness * roughness; + roughnessAlpha = max(0.000001f, roughnessAlpha); // fix for GGX not being able to handle 0 roughness + float roughnessAlphaSqr = max(0.000001f, roughnessAlpha * roughnessAlpha); + + int full_step = g_GlossyReflectionsAtrousConstants.full_step; + int full_radius = 2 * full_step; + int half_step = GlossyReflections_FullToHalfRadius(full_step); + int half_radius = GlossyReflections_FullToHalfRadius(full_radius); + for (int half_offset_x = -half_radius; half_offset_x <= +half_radius; half_offset_x += half_step) + { + for (int half_offset_y = -half_radius; half_offset_y <= +half_radius; half_offset_y += half_step) + { + int2 half_sample_pos = half_pos + int2(half_offset_x, half_offset_y); + int2 full_sample_pos = GlossyReflections_HalfToFullRes(half_sample_pos); + if (any(full_sample_pos < 0) || any(full_sample_pos >= GlossyReflections_FullRes())) + { + // BE CAREFUL: out of screen + continue; + } + + float sample_depth = g_DepthBuffer.Load(int3(full_sample_pos, 0)).x; + if (sample_depth >= 1.0) + { + // BE CAREFUL: we can't rely on plane_weight for sky pixels, it can create NaN values... + continue; + } + + float sample_roughness = g_RoughnessBuffer.Load(int3(full_sample_pos, 0)).x; + if (sample_roughness > g_GlossyReflectionsConstants.high_roughness_threshold) + { + // BE CAREFUL: diffuse enought surfaces + continue; + } + + // Compute pdf for reused sample (don't evaluate GGX) + float4 light_direction_or_hit_position = g_GlossyReflections_DirectionBuffer[half_sample_pos]; + float3 light_direction = light_direction_or_hit_position.w > 0.5f ? normalize(light_direction_or_hit_position.xyz + g_Eye - center_world) : light_direction_or_hit_position.xyz; + float3 halfVector = normalize(view_direction + light_direction.xyz); + float dotNH = clamp(dot(normal, halfVector), -1.0f, 1.0f); // Maybe we can remove this clamping. + float dotNV = clamp(dot(normal, view_direction), -1.0f, 1.0f); // Maybe we can remove this clamping. + float pdf_weight = sampleGGXPDF(roughnessAlphaSqr, dotNH, dotNV, localView); + + // Plane weight + float2 sample_uv = (full_sample_pos + 0.5f) / GlossyReflections_FullRes(); + float3 sample_world = InverseProject(g_GI10Constants.view_proj_inv, sample_uv, sample_depth); + float plane_weight = 1.f - saturate(abs(dot(center_normal, center_world - sample_world) / center_view_depth) * 200.0f - 0.0f); + + // Gaussian + int2 filter_pos = GlossyReflections_HalfToFullRes(half_sample_pos) - full_pos; + float filter_weight = AtrousRatioEstimator_GaussianFilter(filter_pos.x, full_step) + * AtrousRatioEstimator_GaussianFilter(filter_pos.y, full_step); + + // Filter + // BE CAREFUL: adding the ratio estimator in the neighborhood helps with ghosting but reduce stability... + float4 radiance_and_hit_distance = g_GlossyReflections_SpecularBuffer[half_sample_pos]; + float weight = filter_weight * plane_weight * pdf_weight; + reflection_0 += radiance_and_hit_distance * weight; + reflection_average_squared_0 += radiance_and_hit_distance.xyz * radiance_and_hit_distance.xyz * weight; + reflection_weight_0 += weight; + } + } + + if (reflection_weight_0 > 1e-3f) + { + reflection_0 /= reflection_weight_0; + reflection_average_squared_0 /= reflection_weight_0; + } + + g_GlossyReflections_ReflectionsBuffer0[half_pos] = reflection_0; + g_GlossyReflections_AverageSquaredBuffer0[half_pos] = float4(sqrt(reflection_average_squared_0), 1.f); +} + +[numthreads(8, 8, 1)] +void ResolveReflections_AtrousRatioEstimator_Iter(in uint2 did : SV_DispatchThreadID) +{ + int2 half_pos = did; + if (any(half_pos >= GlossyReflections_HalfRes())) + { + return; // out of bounds + } + + int2 full_pos = GlossyReflections_HalfToFullRes(half_pos); + float3 normal = g_ShadingNormalBuffer.Load(int3(full_pos, 0)).xyz; + float roughness = g_RoughnessBuffer.Load(int3(full_pos, 0)).x; + bool is_sky_pixel = (dot(normal, normal) == 0.0f ? true : false); + + if (is_sky_pixel || roughness > g_GlossyReflectionsConstants.high_roughness_threshold) + { + return; // pixel was already updated + } + + normal = normalize(2.0f * normal - 1.0f); + + float2 uv = (full_pos + 0.5f) / GlossyReflections_FullRes(); + float center_depth = g_DepthBuffer.Load(int3(full_pos, 0)).x; + float3 center_world = InverseProject(g_GI10Constants.view_proj_inv, uv, center_depth); + float3 center_normal = normalize(2.f * g_GeometryNormalBuffer.Load(int3(full_pos, 0)).xyz - 1.f); + float center_view_depth = GetLinearDepth(center_depth); + float3 view_direction = normalize(g_Eye - center_world); + Quaternion localRotation = QuaternionRotationZ(normal); + float3 localView = localRotation.transform(view_direction); + + float4 reflection_1 = 0.f; + float3 reflection_average_1 = 0.f; + float3 reflection_average_squared_1 = 0.f; + float reflection_weight_1 = 0.f; + float reflection_neighborhood_weight_1 = 0.f; + + // Micro-facet alpha is equal to roughness^2 + float roughnessAlpha = roughness * roughness; + roughnessAlpha = max(0.000001f, roughnessAlpha); // fix for GGX not being able to handle 0 roughness + float roughnessAlphaSqr = max(0.000001f, roughnessAlpha * roughnessAlpha); + + int full_step = g_GlossyReflectionsAtrousConstants.full_step; + int full_radius = 2 * full_step; + int half_step = GlossyReflections_FullToHalfRadius(full_step); + int half_radius = GlossyReflections_FullToHalfRadius(full_radius); + for (int half_offset_x = -half_radius; half_offset_x <= +half_radius; half_offset_x += half_step) + { + for (int half_offset_y = -half_radius; half_offset_y <= +half_radius; half_offset_y += half_step) + { + int2 half_sample_pos = half_pos + int2(half_offset_x, half_offset_y); + int2 full_sample_pos = GlossyReflections_HalfToFullRes(half_sample_pos); + if (any(full_sample_pos < 0) || any(full_sample_pos >= GlossyReflections_FullRes())) + { + // BE CAREFUL: out of screen + continue; + } + + float sample_depth = g_DepthBuffer.Load(int3(full_sample_pos, 0)).x; + if (sample_depth >= 1.0) + { + // BE CAREFUL: we can't rely on plane_weight for sky pixels, it can create NaN values... + continue; + } + + float sample_roughness = g_RoughnessBuffer.Load(int3(full_sample_pos, 0)).x; + if (sample_roughness > g_GlossyReflectionsConstants.high_roughness_threshold) + { + // BE CAREFUL: diffuse enought surfaces + continue; + } + + // Compute pdf for reused sample (don't evaluate GGX) + float4 light_direction_or_hit_position = g_GlossyReflections_DirectionBuffer[half_sample_pos]; + float3 light_direction = light_direction_or_hit_position.w > 0.5f ? normalize(light_direction_or_hit_position.xyz + g_Eye - center_world) : light_direction_or_hit_position.xyz; + float3 halfVector = normalize(view_direction + light_direction.xyz); + float dotNH = clamp(dot(normal, halfVector), -1.0f, 1.0f); // Maybe we can remove this clamping. + float dotNV = clamp(dot(normal, view_direction), -1.0f, 1.0f); // Maybe we can remove this clamping. + float pdf_weight = sampleGGXPDF(roughnessAlphaSqr, dotNH, dotNV, localView); + + // Plane weight + float2 sample_uv = (full_sample_pos + 0.5f) / GlossyReflections_FullRes(); + float3 sample_world = InverseProject(g_GI10Constants.view_proj_inv, sample_uv, sample_depth); + float plane_weight = 1.f - saturate(abs(dot(center_normal, center_world - sample_world) / center_view_depth) * 200.0f - 0.0f); + + // Gaussian + int2 filter_pos = GlossyReflections_HalfToFullRes(half_sample_pos) - full_pos; + float filter_weight = AtrousRatioEstimator_GaussianFilter(filter_pos.x, full_step) + * AtrousRatioEstimator_GaussianFilter(filter_pos.y, full_step); + + // Filter + // BE CAREFUL: adding the ratio estimator in the neighborhood helps with ghosting but reduce stability... + float4 reflection_0 = g_GlossyReflections_ReflectionsBuffer0[half_sample_pos]; + float3 reflection_average_squared_0 = pow(g_GlossyReflections_AverageSquaredBuffer0[half_sample_pos].xyz, 2.f); + float weight = filter_weight * plane_weight * pdf_weight; + reflection_1 += reflection_0 * weight; + reflection_average_squared_1 += reflection_average_squared_0 * weight; + reflection_weight_1 += weight; + } + } + + if (reflection_weight_1 > 1e-3f) + { + reflection_1 /= reflection_weight_1; + reflection_average_squared_1 /= reflection_weight_1; + } + + g_GlossyReflections_ReflectionsBuffer1[half_pos] = reflection_1; + g_GlossyReflections_AverageSquaredBuffer1[half_pos] = float4(sqrt(reflection_average_squared_1), 1.f); +} + +[numthreads(8, 8, 1)] +void ResolveReflections_AtrousRatioEstimator_Last(in uint2 did : SV_DispatchThreadID) +{ + int2 full_pos = did; + if (any(full_pos >= GlossyReflections_FullRes())) + { + return; // out of bounds + } + + float3 normal = g_ShadingNormalBuffer.Load(int3(full_pos, 0)).xyz; + float roughness = g_RoughnessBuffer.Load(int3(full_pos, 0)).x; + bool is_sky_pixel = (dot(normal, normal) == 0.0f ? true : false); + + if (is_sky_pixel || roughness > g_GlossyReflectionsConstants.high_roughness_threshold) + { + return; // pixel was already updated + } + + normal = normalize(2.0f * normal - 1.0f); + + float2 uv = (full_pos + 0.5f) / GlossyReflections_FullRes(); + float center_depth = g_DepthBuffer.Load(int3(full_pos, 0)).x; + float3 center_world = InverseProject(g_GI10Constants.view_proj_inv, uv, center_depth); + float3 center_normal = normalize(2.f * g_GeometryNormalBuffer.Load(int3(full_pos, 0)).xyz - 1.f); + float center_view_depth = GetLinearDepth(center_depth); + float3 view_direction = normalize(g_Eye - center_world); + Quaternion localRotation = QuaternionRotationZ(normal); + float3 localView = localRotation.transform(view_direction); + + float4 reflection = 0.f; + float3 reflection_average_squared = 0.f; + float reflection_weight = 0.f; + + // Micro-facet alpha is equal to roughness^2 + float roughnessAlpha = roughness * roughness; + roughnessAlpha = max(0.000001f, roughnessAlpha); // fix for GGX not being able to handle 0 roughness + float roughnessAlphaSqr = max(0.000001f, roughnessAlpha * roughnessAlpha); + + int full_step = g_GlossyReflectionsAtrousConstants.full_step; + int full_radius = 2 * full_step; + int2 half_pos = GlossyReflections_FullToHalfRes(full_pos); + int half_step = GlossyReflections_FullToHalfRadius(full_step); + int half_radius = GlossyReflections_FullToHalfRadius(full_radius); + + // Roughness + float roughness_average = 0.f; + float roughness_average_squared = 0.f; + float roughness_weight = 0.f; + + for (int full_offset_x = -4; full_offset_x <= +4; full_offset_x += 2) + { + for (int full_offset_y = -4; full_offset_y <= +4; full_offset_y += 2) + { + int2 full_sample_pos = full_pos + int2(full_offset_x, full_offset_y); + if (any(full_sample_pos < 0) || any(full_sample_pos >= GlossyReflections_FullRes())) + continue; + + float roughness = g_RoughnessBuffer.Load(int3(full_sample_pos, 0)).x; + + roughness_average += roughness; + roughness_average_squared += roughness * roughness; + roughness_weight += 1.f; + } + } + + if (roughness_weight > 1e-3f) + { + roughness_average /= roughness_weight; + roughness_average_squared /= roughness_weight; + } + + float roughness_std = sqrt(max(abs(roughness_average_squared - roughness_average * roughness_average), 0.f)); + + // BE CAREFUL: half_step needed for filtering out patterns when roughness is varying with temporal accumulation + float jitter_scale = clamp((4.0 * roughness_std) / roughness_average, 0.f, 1.f); + int2 half_jitter = floor((BlueNoise_Sample2D(full_pos, g_FrameIndex) - 0.5f) * lerp(0.f, half_step, jitter_scale) + 0.5f); + + for (int half_offset_x = -half_radius; half_offset_x <= +half_radius; half_offset_x += half_step) + { + for (int half_offset_y = -half_radius; half_offset_y <= +half_radius; half_offset_y += half_step) + { + int2 half_sample_pos = half_pos + int2(half_offset_x, half_offset_y) + half_jitter; + int2 full_sample_pos = GlossyReflections_HalfToFullRes(half_sample_pos); + if (any(full_sample_pos < 0) || any(full_sample_pos >= GlossyReflections_FullRes())) + { + // BE CAREFUL: out of screen + continue; + } + + float sample_depth = g_DepthBuffer.Load(int3(full_sample_pos, 0)).x; + if (sample_depth >= 1.0) + { + // BE CAREFUL: we can't rely on plane_weight for sky pixels, it can create NaN values... + continue; + } + + float sample_roughness = g_RoughnessBuffer.Load(int3(full_sample_pos, 0)).x; + if (sample_roughness > g_GlossyReflectionsConstants.high_roughness_threshold) + { + // BE CAREFUL: diffuse enought surfaces + continue; + } + + // Compute pdf for reused sample (don't evaluate GGX) + float4 light_direction_or_hit_position = g_GlossyReflections_DirectionBuffer[half_sample_pos]; + float3 light_direction = light_direction_or_hit_position.w > 0.5f ? normalize(light_direction_or_hit_position.xyz + g_Eye - center_world) : light_direction_or_hit_position.xyz; + float3 halfVector = normalize(view_direction + light_direction.xyz); + float dotNH = clamp(dot(normal, halfVector), -1.0f, 1.0f); // Maybe we can remove this clamping. + float dotNV = clamp(dot(normal, view_direction), -1.0f, 1.0f); // Maybe we can remove this clamping. + float pdf_weight = sampleGGXPDF(roughnessAlphaSqr, dotNH, dotNV, localView); + + // Plane weight + float2 sample_uv = (full_sample_pos + 0.5f) / GlossyReflections_FullRes(); + float3 sample_world = InverseProject(g_GI10Constants.view_proj_inv, sample_uv, sample_depth); + float plane_weight = 1.f - saturate(abs(dot(center_normal, center_world - sample_world) / center_view_depth) * 200.0f - 0.0f); + + // Gaussian + int2 filter_pos = GlossyReflections_HalfToFullRes(half_sample_pos) - full_pos; + float filter_weight = AtrousRatioEstimator_GaussianFilter(filter_pos.x, full_step) + * AtrousRatioEstimator_GaussianFilter(filter_pos.y, full_step); + + // Filter + // BE CAREFUL: adding the ratio estimator in the neighborhood helps with ghosting but reduce stability... + float4 reflection_1 = g_GlossyReflections_ReflectionsBuffer1[half_sample_pos]; + float3 reflection_average_squared_1 = pow(g_GlossyReflections_AverageSquaredBuffer1[half_sample_pos].xyz, 2.f); + float weight = filter_weight * plane_weight * pdf_weight; + reflection += reflection_1 * weight; + reflection_average_squared += reflection_average_squared_1 * weight; + reflection_weight += weight; + } + } + + if (reflection_weight > 0.f) + { + reflection /= reflection_weight; + reflection_average_squared /= reflection_weight; + } + + float3 reflection_average = reflection.xyz; + float3 reflection_std = sqrt(max(abs(reflection_average_squared - reflection_average * reflection_average), 0)); + + reflection.xyz /= max(1.0f - reflection.xyz, 1e-3f); + reflection_std /= max(1.0f - reflection_std, 1e-3f); + + g_GlossyReflections_ReflectionsBuffer[full_pos] = reflection; + g_GlossyReflections_StandardDevBuffer[full_pos] = float4(reflection_std, 1.f); +} + +float MarkFireflies_BoxFilter(in int pos, in int radius) +{ + return abs(pos) <= radius ? 1.f : 0.f; +} + +[numthreads(8, 8, 1)] +void MarkFireflies(in uint2 did : SV_DispatchThreadID) +{ + int2 half_pos = did; + if (any(half_pos >= GlossyReflections_HalfRes())) + { + return; // out of bounds + } + + int2 full_pos = GlossyReflections_HalfToFullRes(half_pos); + float3 normal = g_ShadingNormalBuffer.Load(int3(full_pos, 0)).xyz; + float roughness = g_RoughnessBuffer.Load(int3(full_pos, 0)).x; + bool is_sky_pixel = (dot(normal, normal) == 0.0f ? true : false); + + if (is_sky_pixel || roughness > g_GlossyReflectionsConstants.high_roughness_threshold) + { + return; // pixel was already updated + } + + float4 radiance_and_hit_distance = g_GlossyReflections_SpecularBuffer[half_pos]; + if (radiance_and_hit_distance.w < 0.f) + { + // TRACE_SKY_DISTANCE or TRACE_NONE_DISTANCE + g_GlossyReflections_FirefliesBuffer[half_pos] = 1.f; + return; + } + + float center_luminance = luminance(radiance_and_hit_distance.xyz); + float luminance_lower = 0.f; + float luminance_higher = 0.f; + + int full_radius = GlossyReflections_MarkFireflies_FullRadius(); // Default was 3 + int half_radius = GlossyReflections_FullToHalfRadius(full_radius); + for (int half_offset_x = -half_radius; half_offset_x <= +half_radius; half_offset_x += 1) + { + for (int half_offset_y = -half_radius; half_offset_y <= +half_radius; half_offset_y += 1) + { + if (half_offset_x == 0 && half_offset_y == 0) + continue; + + int2 half_sample_pos = int2(half_pos) + int2(half_offset_x, half_offset_y); + int2 full_sample_pos = GlossyReflections_HalfToFullRes(half_sample_pos); + if (any(full_sample_pos < 0) || any(full_sample_pos >= GlossyReflections_FullRes()) || + g_RoughnessBuffer.Load(int3(full_sample_pos, 0)).x > g_GlossyReflectionsConstants.high_roughness_threshold) + { + continue; // out of bounds + } + + float4 sample_radiance_and_hit_distance = g_GlossyReflections_SpecularBuffer[half_sample_pos]; + float sample_luminance = luminance(sample_radiance_and_hit_distance.xyz); + + int2 filter_pos = GlossyReflections_HalfToFullRes(half_sample_pos) - full_pos; + float sample_weight = MarkFireflies_BoxFilter(filter_pos.x, full_radius) + * MarkFireflies_BoxFilter(filter_pos.y, full_radius); + + if (sample_luminance < center_luminance) + luminance_higher += sample_weight; // BE CAREFUL: we count when center_luminance is higher + else if (sample_luminance > center_luminance) + luminance_lower += sample_weight; // BE CAREFUL: we count when center_luminance is lower + } + } + + float luminance_weight = luminance_lower + luminance_higher; + + g_GlossyReflections_FirefliesBuffer[half_pos] = + luminance_lower < GlossyReflections_MarkFireflies_LowThreshold() * luminance_weight || // BE CAREFUL: these thresholds depends on radius + luminance_higher > GlossyReflections_MarkFireflies_HighThreshold() * luminance_weight ? + 1.f : 0.f; +} + +float CleanupFireflies_NeighborhoodFilter(in float i, in float radius) +{ + const float k = 1.f; // BE CAREFUL: somehow truncated gaussian, k depends on radius... + return exp(-k * (i * i) / pow(radius + 1.0f, 2.0f)); +} + +[numthreads(8, 8, 1)] +void CleanupFireflies(in uint2 did : SV_DispatchThreadID) +{ + int2 half_pos = did; + if (any(half_pos >= GlossyReflections_HalfRes())) + { + return; // out of bounds + } + + // Mark weight + float mark_weight = g_GlossyReflections_FirefliesBuffer[did]; + if (mark_weight < 0.5f) + { + return; + } + + int2 full_pos = GlossyReflections_HalfToFullRes(half_pos); + float2 uv = (full_pos + 0.5f) / GlossyReflections_FullRes(); + float center_depth = g_DepthBuffer[full_pos].x; + float3 center_world = InverseProject(g_GI10Constants.view_proj_inv, uv, center_depth); + float3 center_normal = normalize(2.f * g_GeometryNormalBuffer[full_pos].xyz - 1.f); // BE CAREFUL: no normal maps + float center_view_depth = GetLinearDepth(center_depth); + + float4 specular = 0.f; + float specular_weight = 0.f; + + int full_radius = GlossyReflections_CleanupFireflies_FullRadius(); // Default was 1 + for (int offset_x = -full_radius; offset_x <= +full_radius; offset_x += 1) + { + for (int offset_y = -full_radius; offset_y <= +full_radius; offset_y += 1) + { + int2 half_sample_pos = int2(half_pos) + int2(offset_x, offset_y); + int2 full_sample_pos = GlossyReflections_HalfToFullRes(half_sample_pos); + + // Mark weight + float mark_weight = 1.f - g_GlossyReflections_FirefliesBuffer[half_sample_pos]; + + // Plane weight + float sample_depth = g_DepthBuffer.Load(int3(full_sample_pos, 0)).x; + float2 sample_uv = (full_sample_pos + 0.5f) / GlossyReflections_FullRes(); + float3 sample_world = InverseProject(g_GI10Constants.view_proj_inv, sample_uv, sample_depth); + float plane_weight = 1.f - saturate(abs(dot(center_normal, center_world - sample_world) / center_view_depth) * 200.0f - 0.0f); + + // Smooth + float filter_weight = CleanupFireflies_NeighborhoodFilter(offset_x, full_radius) + * CleanupFireflies_NeighborhoodFilter(offset_y, full_radius); + + // Filter + float4 radiance_and_hit_distance = g_GlossyReflections_SpecularBuffer[half_sample_pos]; + float weight = mark_weight * plane_weight * filter_weight; + specular += radiance_and_hit_distance * weight; + specular_weight += weight; + } + } + + if (abs(specular_weight) < 1e-3f) + { + // BE CAREFUL: No sample could be used for cleanup, just pass through the center value + int2 half_sample_pos = int2(half_pos); + int2 full_sample_pos = GlossyReflections_HalfToFullRes(half_sample_pos); + + float4 radiance_and_hit_distance = g_GlossyReflections_SpecularBuffer[half_sample_pos]; + specular += radiance_and_hit_distance; + specular_weight += 1.f; + } + + if (specular_weight > 0.0f) + { + specular /= specular_weight; + } + + // BE CAREFUL: we don't want to propagate rare NaNs contained by g_GlossyReflections_SpecularBuffer + g_GlossyReflections_SpecularBuffer[half_pos] = GIDenoiser_RemoveNaNs(specular); +} + +[numthreads(8, 8, 1)] +void NoDenoiserReflections(in uint2 did : SV_DispatchThreadID) +{ + uint2 full_pos = did; + if (any(full_pos >= GlossyReflections_FullRes())) + { + return; // out of bounds + } + + float3 normal = g_ShadingNormalBuffer.Load(int3(full_pos, 0)).xyz; + float roughness = g_RoughnessBuffer.Load(int3(full_pos, 0)).x; + bool is_sky_pixel = (dot(normal, normal) == 0.0f ? true : false); + + if (is_sky_pixel || roughness > g_GlossyReflectionsConstants.high_roughness_threshold) + { + return; // pixel was already updated + } + + int2 half_pos = GlossyReflections_FullToHalfRes(full_pos); + float4 radiance_and_hit_distance = g_GlossyReflections_SpecularBuffer[half_pos]; + float3 radiance = radiance_and_hit_distance.xyz; + float4 light_direction_and_pdf = g_GlossyReflections_DirectionBuffer[half_pos]; + float specular_pdf = light_direction_and_pdf.w; + + radiance /= max(1.0f - radiance, 1e-3f); +#if 0 + // BE CAREFUL: this is needed to mimic denoiser but noise is so strong (fireflies being visible over diffuse for matte surfaces), + // temporal feedback just bring fireflies it into global illumination and everything is white + radiance /= max(specular_pdf, 1e-3f); +#endif + g_ReflectionBuffer[full_pos] = float4(GIDenoiser_RemoveNaNs(radiance), 1.f); // Don't propagate NaNs +} + +//! +//! GI-1.0 denoiser kernels. +//! + +[numthreads(8, 8, 1)] +void ReprojectGI(in uint2 did : SV_DispatchThreadID) +{ + if (all(did == 0)) + { + g_GIDenoiser_BlurSampleCountBuffer[0] = 0; + } + + if (any(did >= g_BufferDimensions)) + { + return; // out of bounds + } + + float4 color = g_GIDenoiser_ColorBuffer[did]; + float4 lighting = float4(0.0f, 0.0f, 0.0f, 0.0f); + float3 normal = g_ShadingNormalBuffer.Load(int3(did, 0)).xyz; + + float alpha_blend = 1.0f; + float color_delta = 0.0f; + float2 uv = (did + 0.5f) / g_BufferDimensions; + bool is_sky_pixel = (dot(normal, normal) == 0.0f ? true : false); + + if (!is_sky_pixel) + { + float2 velocity = g_VelocityBuffer.SampleLevel(g_NearestSampler, uv, 0.0f).xy; + float2 previous_uv = (uv - velocity); + + float depth = g_DepthBuffer.Load(int3(did, 0)).x; + normal = 2.0f * normal - 1.0f; + + if (all(previous_uv > 0.0f) && all(previous_uv < 1.0f)) + { + float3 world = InverseProject(g_GI10Constants.view_proj_inv, uv, depth); + float cell_size = distance(g_Eye, world) * g_ScreenProbesConstants.cell_size; + + cell_size *= lerp(1.0f, 5.0f, pow(1.0f - max(dot(normalize(g_Eye - world), normalize(normal)), 0.0f), 6.0f)); + + float weight = 0.0f; + float2 texel_size = 1.0f / g_BufferDimensions; + + const float kOneOverSqrtOfTwo = 0.707107f; + + for (float y = -1.0f; y <= 1.0f; ++y) { for (float x = -1.0f; x <= 1.0f; ++x) { @@ -2213,7 +3608,7 @@ void ReprojectGI(in uint2 did : SV_DispatchThreadID) float previous_depth = g_PreviousDepthBuffer.SampleLevel(g_NearestSampler, st, 0.0f).x; float3 previous_world = InverseProject(g_PreviousViewProjectionInverse, st, previous_depth); - if (abs(dot(previous_world - world, normal)) < cell_size && dot(normal, previous_normal) > 0.95f) + if (distance(world, previous_world) < cell_size && dot(normal, previous_normal) > 0.95f) { float subpixel_dist = distance(floor(st * g_BufferDimensions) + 0.5f, previous_uv * g_BufferDimensions); float w = saturate(1.0f - subpixel_dist * kOneOverSqrtOfTwo); @@ -2254,7 +3649,7 @@ void ReprojectGI(in uint2 did : SV_DispatchThreadID) float2 vignette_uv = uv * (1.0f - uv.yx); float vignette = pow(15.0f * vignette_uv.x * vignette_uv.y, 0.25f); - float max_sample_count = max(lerp(4.0f, 80.0f, alpha_blend) * vignette, 1.0f); + float max_sample_count = max(lerp(4.0f, 8.0f * min(abs(lighting.w), 8.0f), alpha_blend) * vignette, 1.0f); if (lighting.w > max_sample_count) // evict old samples from the history { @@ -2350,7 +3745,7 @@ void FilterGI(in uint2 did : SV_DispatchThreadID) float3 color = lighting.xyz / max(lighting.w, 1.0f); float center_depth = GetLinearDepth(g_DepthBuffer.Load(int3(did, 0)).x); - float3 center_normal = 2.0f * g_DetailsBuffer.Load(int3(did, 0)).xyz - 1.0f; + float3 center_normal = 2.0f * g_ShadingNormalBuffer.Load(int3(did, 0)).xyz - 1.0f; for (int r = -blur_radius; r <= blur_radius; ++r) { @@ -2360,7 +3755,7 @@ void FilterGI(in uint2 did : SV_DispatchThreadID) if (c.w > 0.0f) { float depth = GetLinearDepth(g_DepthBuffer.Load(int3(pos, 0)).x); - float3 normal = 2.0f * g_DetailsBuffer.Load(int3(pos, 0)).xyz - 1.0f; + float3 normal = 2.0f * g_ShadingNormalBuffer.Load(int3(pos, 0)).xyz - 1.0f; float depth_diff = 1.0f - (center_depth / depth); float depth_factor = exp2(-(lighting.w > 0.0f ? 2e2f : 2e1f) * abs(depth_diff)); @@ -2384,3 +3779,115 @@ void FilterGI(in uint2 did : SV_DispatchThreadID) g_GIDenoiser_ColorBuffer[did] = lighting; } + +[numthreads(64, 1, 1)] +void ClearBucketOverflowCount(in uint did : SV_DispatchThreadID) +{ + uint bucket_index = did; + if (bucket_index >= g_HashGridCacheConstants.num_buckets) + { + return; + } + + g_HashGridCache_BucketOverflowCountBuffer[bucket_index] = 0; +} + +[numthreads(64, 1, 1)] +void ClearBucketOccupancy(in uint did : SV_DispatchThreadID) +{ + uint bucket_occupancy = did; + if (bucket_occupancy >= g_HashGridCacheConstants.debug_bucket_occupancy_histogram_size) + { + return; + } + + if (did == 0) + { + g_HashGridCache_FreeBucketCountBuffer[0] = 0; + g_HashGridCache_UsedBucketCountBuffer[0] = 0; + } + + g_HashGridCache_BucketOccupancyBuffer[bucket_occupancy] = 0; +} + +[numthreads(64, 1, 1)] +void ClearBucketOverflow(in uint did : SV_DispatchThreadID) +{ + uint bucket_overflow = did; + if (bucket_overflow >= g_HashGridCacheConstants.debug_bucket_overflow_histogram_size) + { + return; + } + + g_HashGridCache_BucketOverflowBuffer[bucket_overflow] = 0; +} + +[numthreads(64, 1, 1)] +void BuildBucketStatistics(in uint did : SV_DispatchThreadID) +{ + uint bucket_index = did; + if (bucket_index >= g_HashGridCacheConstants.num_buckets) + { + return; + } + + uint bucket_offset; + for (bucket_offset = 0; bucket_offset < g_HashGridCacheConstants.num_tiles_per_bucket; ++bucket_offset) + { + uint tile_index = bucket_offset + bucket_index * g_HashGridCacheConstants.num_tiles_per_bucket; + uint current_hash = g_HashGridCache_HashBuffer[tile_index]; + if (current_hash == 0) + { + break; // free tile + } + } + + uint previous_value; + uint bucket_occupancy = min(bucket_offset, g_HashGridCacheConstants.debug_bucket_occupancy_histogram_size - 1); + InterlockedAdd(g_HashGridCache_BucketOccupancyBuffer[bucket_occupancy], 1, previous_value); + + uint bucket_overflow_count = min(g_HashGridCache_BucketOverflowCountBuffer[bucket_index], g_HashGridCacheConstants.debug_bucket_overflow_histogram_size - 1); + InterlockedAdd(g_HashGridCache_BucketOverflowBuffer[bucket_overflow_count], 1, previous_value); + + if (bucket_occupancy < 1) + { + InterlockedAdd(g_HashGridCache_FreeBucketCountBuffer[0], 1, previous_value); + } + else + { + InterlockedAdd(g_HashGridCache_UsedBucketCountBuffer[0], 1, previous_value); + } +} + +[numthreads(64, 1, 1)] +void FormatBucketOccupancy(in uint did : SV_DispatchThreadID) +{ + uint bucket_occupancy = did; + if (bucket_occupancy >= g_HashGridCacheConstants.debug_bucket_occupancy_histogram_size) + { + return; + } + + uint stats_cursor = 0; + if (did == 0) + { + g_HashGridCache_StatsBuffer[stats_cursor + 0] = float(g_HashGridCache_FreeBucketCountBuffer[0]); + g_HashGridCache_StatsBuffer[stats_cursor + 1] = float(g_HashGridCache_UsedBucketCountBuffer[0]); + } + + stats_cursor += 2; + g_HashGridCache_StatsBuffer[stats_cursor + bucket_occupancy] = float(g_HashGridCache_BucketOccupancyBuffer[bucket_occupancy]); +} + +[numthreads(64, 1, 1)] +void FormatBucketOverflow(in uint did : SV_DispatchThreadID) +{ + uint bucket_overflow = did; + if (bucket_overflow >= g_HashGridCacheConstants.debug_bucket_overflow_histogram_size) + { + return; + } + + uint stats_cursor = 2 + g_HashGridCacheConstants.debug_bucket_occupancy_histogram_size; + g_HashGridCache_StatsBuffer[stats_cursor + bucket_overflow] = float(g_HashGridCache_BucketOverflowBuffer[bucket_overflow]); +} diff --git a/src/core/src/render_techniques/gi10/gi10.cpp b/src/core/src/render_techniques/gi10/gi10.cpp index 448f484..9260d62 100644 --- a/src/core/src/render_techniques/gi10/gi10.cpp +++ b/src/core/src/render_techniques/gi10/gi10.cpp @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -23,11 +23,31 @@ THE SOFTWARE. #include "capsaicin_internal.h" #include "components/blue_noise_sampler/blue_noise_sampler.h" -#include "components/light_sampler_bounds/light_sampler_bounds.h" +#include "components/brdf_lut/brdf_lut.h" +#include "components/light_sampler_grid_stream/light_sampler_grid_stream.h" +#include "components/prefilter_ibl/prefilter_ibl.h" #include "components/stratified_sampler/stratified_sampler.h" namespace Capsaicin { +char const *kPopulateScreenProbesRaygenShaderName = "PopulateScreenProbesRaygen"; +char const *kPopulateScreenProbesMissShaderName = "PopulateScreenProbesMiss"; +char const *kPopulateScreenProbesAnyHitShaderName = "PopulateScreenProbesAnyHit"; +char const *kPopulateScreenProbesClosestHitShaderName = "PopulateScreenProbesClosestHit"; +char const *kPopulateScreenProbesHitGroupName = "PopulateScreenProbesHitGroup"; + +char const *kPopulateCellsRaygenShaderName = "PopulateCellsRaygen"; +char const *kPopulateCellsMissShaderName = "PopulateCellsMiss"; +char const *kPopulateCellsAnyHitShaderName = "PopulateCellsAnyHit"; +char const *kPopulateCellsClosestHitShaderName = "PopulateCellsClosestHit"; +char const *kPopulateCellsHitGroupName = "PopulateCellsHitGroup"; + +char const *kTraceReflectionsRaygenShaderName = "TraceReflectionsRaygen"; +char const *kTraceReflectionsMissShaderName = "TraceReflectionsMiss"; +char const *kTraceReflectionsAnyHitShaderName = "TraceReflectionsAnyHit"; +char const *kTraceReflectionsClosestHitShaderName = "TraceReflectionsClosestHit"; +char const *kTraceReflectionsHitGroupName = "TraceReflectionsHitGroup"; + GI10::Base::Base(GI10 &gi10) : gfx_(gi10.gfx_) , self(gi10) @@ -35,23 +55,19 @@ GI10::Base::Base(GI10 &gi10) GI10::ScreenProbes::ScreenProbes(GI10 &gi10) : Base(gi10) - , probe_spawn_tile_size_(sampling_mode_ == kSamplingMode_QuarterSpp ? (probe_size_ << 1) - : sampling_mode_ == kSamplingMode_SixteenthSpp ? (probe_size_ << 2) - : probe_size_) , probe_buffer_index_(0) -{ -} +{} GI10::ScreenProbes::~ScreenProbes() { - for (GfxTexture probe_buffer : probe_buffers_) + for (GfxTexture &probe_buffer : probe_buffers_) gfxDestroyTexture(gfx_, probe_buffer); - for (GfxTexture probe_mask_buffer : probe_mask_buffers_) + for (GfxTexture &probe_mask_buffer : probe_mask_buffers_) gfxDestroyTexture(gfx_, probe_mask_buffer); - for (GfxBuffer probe_sh_buffer : probe_sh_buffers_) + for (GfxBuffer &probe_sh_buffer : probe_sh_buffers_) gfxDestroyBuffer(gfx_, probe_sh_buffer); - for (GfxBuffer probe_spawn_buffer : probe_spawn_buffers_) + for (GfxBuffer &probe_spawn_buffer : probe_spawn_buffers_) gfxDestroyBuffer(gfx_, probe_spawn_buffer); gfxDestroyBuffer(gfx_, probe_spawn_scan_buffer_); gfxDestroyBuffer(gfx_, probe_spawn_index_buffer_); @@ -64,7 +80,7 @@ GI10::ScreenProbes::~ScreenProbes() gfxDestroyBuffer(gfx_, probe_override_tile_count_buffer_); gfxDestroyTexture(gfx_, probe_cached_tile_buffer_); gfxDestroyTexture(gfx_, probe_cached_tile_index_buffer_); - for (GfxBuffer probe_cached_tile_lru_buffer : probe_cached_tile_lru_buffers_) + for (GfxBuffer &probe_cached_tile_lru_buffer : probe_cached_tile_lru_buffers_) gfxDestroyBuffer(gfx_, probe_cached_tile_lru_buffer); gfxDestroyBuffer(gfx_, probe_cached_tile_lru_flag_buffer_); gfxDestroyBuffer(gfx_, probe_cached_tile_lru_count_buffer_); @@ -100,7 +116,7 @@ void GI10::ScreenProbes::ensureMemoryIsAllocated(CapsaicinInternal const &capsai if (probe_buffers_->getWidth() != probe_buffer_width || probe_buffers_->getHeight() != probe_buffer_height) { - for (GfxTexture probe_buffer : probe_buffers_) + for (GfxTexture &probe_buffer : probe_buffers_) gfxDestroyTexture(gfx_, probe_buffer); for (uint32_t i = 0; i < ARRAYSIZE(probe_buffers_); ++i) @@ -117,7 +133,7 @@ void GI10::ScreenProbes::ensureMemoryIsAllocated(CapsaicinInternal const &capsai if (probe_mask_buffers_->getWidth() != probe_count[0] || probe_mask_buffers_->getHeight() != probe_count[1]) { - for (GfxTexture probe_mask_buffer : probe_mask_buffers_) + for (GfxTexture &probe_mask_buffer : probe_mask_buffers_) gfxDestroyTexture(gfx_, probe_mask_buffer); gfxCommandBindKernel(gfx_, self.clear_probe_mask_kernel_); @@ -150,7 +166,7 @@ void GI10::ScreenProbes::ensureMemoryIsAllocated(CapsaicinInternal const &capsai if (probe_sh_buffers_->getCount() != 9 * max_probe_count) { - for (GfxBuffer probe_sh_buffer : probe_sh_buffers_) + for (GfxBuffer &probe_sh_buffer : probe_sh_buffers_) gfxDestroyBuffer(gfx_, probe_sh_buffer); for (uint32_t i = 0; i < ARRAYSIZE(probe_sh_buffers_); ++i) @@ -165,7 +181,7 @@ void GI10::ScreenProbes::ensureMemoryIsAllocated(CapsaicinInternal const &capsai if (probe_spawn_buffers_->getCount() != max_probe_spawn_count) { - for (GfxBuffer probe_spawn_buffer : probe_spawn_buffers_) + for (GfxBuffer &probe_spawn_buffer : probe_spawn_buffers_) gfxDestroyBuffer(gfx_, probe_spawn_buffer); gfxDestroyBuffer(gfx_, probe_spawn_scan_buffer_); gfxDestroyBuffer(gfx_, probe_spawn_index_buffer_); @@ -231,7 +247,7 @@ void GI10::ScreenProbes::ensureMemoryIsAllocated(CapsaicinInternal const &capsai { gfxDestroyTexture(gfx_, probe_cached_tile_buffer_); gfxDestroyTexture(gfx_, probe_cached_tile_index_buffer_); - for (GfxBuffer probe_cached_tile_lru_buffer : probe_cached_tile_lru_buffers_) + for (GfxBuffer &probe_cached_tile_lru_buffer : probe_cached_tile_lru_buffers_) gfxDestroyBuffer(gfx_, probe_cached_tile_lru_buffer); gfxDestroyBuffer(gfx_, probe_cached_tile_lru_flag_buffer_); gfxDestroyBuffer(gfx_, probe_cached_tile_lru_count_buffer_); @@ -313,6 +329,7 @@ void GI10::ScreenProbes::ensureMemoryIsAllocated(CapsaicinInternal const &capsai GI10::HashGridCache::HashGridCache(GI10 &gi10) : Base(gi10) , max_ray_count_(0) + , num_buckets_(0) , num_cells_(0) , num_tiles_(0) , num_tiles_per_bucket_(0) @@ -359,32 +376,50 @@ GI10::HashGridCache::HashGridCache(GI10 &gi10) , radiance_cache_packed_tile_index_buffer1_( radiance_cache_hash_buffer_uint_[HASHGRIDCACHE_PACKEDTILEINDEXBUFFER1]) , radiance_cache_debug_cell_buffer_(radiance_cache_hash_buffer_float4_[HASHGRIDCACHE_DEBUGCELLBUFFER]) + , radiance_cache_debug_bucket_occupancy_buffer_( + radiance_cache_hash_buffer_uint_[HASHGRIDCACHE_BUCKETOCCUPANCYBUFFER]) + , radiance_cache_debug_bucket_overflow_count_buffer_( + radiance_cache_hash_buffer_uint_[HASHGRIDCACHE_BUCKETOVERFLOWCOUNTBUFFER]) + , radiance_cache_debug_bucket_overflow_buffer_( + radiance_cache_hash_buffer_uint_[HASHGRIDCACHE_BUCKETOVERFLOWBUFFER]) + , radiance_cache_debug_free_bucket_buffer_( + radiance_cache_hash_buffer_uint_[HASHGRIDCACHE_FREEBUCKETBUFFER]) + , radiance_cache_debug_used_bucket_buffer_( + radiance_cache_hash_buffer_uint_[HASHGRIDCACHE_USEDBUCKETBUFFER]) + , radiance_cache_debug_stats_buffer_(radiance_cache_hash_buffer_float_[HASHGRIDCACHE_STATSBUFFER]) {} GI10::HashGridCache::~HashGridCache() { - for (GfxBuffer buffer : radiance_cache_hash_buffer_uint_) + for (GfxBuffer &buffer : radiance_cache_hash_buffer_float_) + { + gfxDestroyBuffer(gfx_, buffer); + } + + for (GfxBuffer &buffer : radiance_cache_hash_buffer_uint_) + { + gfxDestroyBuffer(gfx_, buffer); + } + + for (GfxBuffer &buffer : radiance_cache_hash_buffer_uint2_) { gfxDestroyBuffer(gfx_, buffer); } - for (GfxBuffer buffer : radiance_cache_hash_buffer_uint2_) + for (GfxBuffer &buffer : radiance_cache_hash_buffer_float4_) { gfxDestroyBuffer(gfx_, buffer); } - for (GfxBuffer buffer : radiance_cache_hash_buffer_float4_) + for (GfxBuffer &buffer : radiance_cache_debug_stats_readback_buffers_) { gfxDestroyBuffer(gfx_, buffer); } } -void GI10::HashGridCache::ensureMemoryIsAllocated( - CapsaicinInternal const &capsaicin, RenderOptions const &options, std::string_view const &debug_view) +void GI10::HashGridCache::ensureMemoryIsAllocated([[maybe_unused]] CapsaicinInternal const &capsaicin, + RenderOptions const &options, std::string_view const &debug_view) { - uint32_t const buffer_width = capsaicin.getWidth(); - uint32_t const buffer_height = capsaicin.getHeight(); - uint32_t const max_ray_count = self.screen_probes_.max_ray_count; uint32_t const num_buckets = 1u << options.gi10_hash_grid_cache_num_buckets; uint32_t const num_tiles_per_bucket = 1u << options.gi10_hash_grid_cache_num_tiles_per_bucket; @@ -392,8 +427,7 @@ void GI10::HashGridCache::ensureMemoryIsAllocated( uint32_t const size_tile_mip1 = size_tile_mip0 >> 1; uint32_t const size_tile_mip2 = size_tile_mip1 >> 1; uint32_t const size_tile_mip3 = size_tile_mip2 >> 1; - uint32_t const size_tile_mip4 = size_tile_mip3 >> 1; - GFX_ASSERT(size_tile_mip4 == 0); + GFX_ASSERT((size_tile_mip3 >> 1) == 0); uint32_t const num_cells_per_tile_mip0 = size_tile_mip0 * size_tile_mip0; uint32_t const num_cells_per_tile_mip1 = size_tile_mip1 * size_tile_mip1; uint32_t const num_cells_per_tile_mip2 = size_tile_mip2 * size_tile_mip2; @@ -405,8 +439,15 @@ void GI10::HashGridCache::ensureMemoryIsAllocated( uint32_t const first_cell_offset_tile_mip2 = first_cell_offset_tile_mip1 + num_cells_per_tile_mip1; uint32_t const first_cell_offset_tile_mip3 = first_cell_offset_tile_mip2 + num_cells_per_tile_mip2; GFX_ASSERT(first_cell_offset_tile_mip3 + num_cells_per_tile_mip3 == num_cells_per_tile); - uint32_t const num_tiles = num_tiles_per_bucket * num_buckets; - uint32_t const num_cells = num_cells_per_tile * num_tiles; + uint32_t const num_tiles = num_tiles_per_bucket * num_buckets; + uint32_t const num_cells = num_cells_per_tile * num_tiles; + uint32_t const debug_bucket_occupancy_histogram_size = num_tiles_per_bucket + 1; + uint32_t const debug_bucket_overflow_histogram_size = + options.gi10_hash_grid_cache_debug_max_bucket_overflow + 1; + uint32_t const debug_stats_size = + 2 + debug_bucket_occupancy_histogram_size + debug_bucket_overflow_histogram_size; + + uint64_t debug_total_memory_size_in_bytes = 0; if (!radiance_cache_hash_buffer_ || num_tiles != num_tiles_) { @@ -422,6 +463,9 @@ void GI10::HashGridCache::ensureMemoryIsAllocated( gfxCommandClearBuffer(gfx_, radiance_cache_hash_buffer_); // clear the radiance cache } + debug_total_memory_size_in_bytes += radiance_cache_hash_buffer_.getSize(); + debug_total_memory_size_in_bytes += radiance_cache_decay_tile_buffer_.getSize(); + if (!radiance_cache_value_buffer_ || num_cells != num_cells_) { gfxDestroyBuffer(gfx_, radiance_cache_value_buffer_); @@ -430,12 +474,16 @@ void GI10::HashGridCache::ensureMemoryIsAllocated( radiance_cache_value_buffer_.setName("Capsaicin_RadianceCache_ValueBuffer"); } + debug_total_memory_size_in_bytes += radiance_cache_value_buffer_.getSize(); + if (!radiance_cache_update_tile_count_buffer_) { radiance_cache_update_tile_count_buffer_ = gfxCreateBuffer(gfx_, 1); radiance_cache_update_tile_count_buffer_.setName("Capsaicin_RadianceCache_UpdateTileCountBuffer"); } + debug_total_memory_size_in_bytes += radiance_cache_update_tile_count_buffer_.getSize(); + if (!radiance_cache_update_cell_value_buffer_ || num_cells != num_cells_) { gfxDestroyBuffer(gfx_, radiance_cache_update_cell_value_buffer_); @@ -446,8 +494,15 @@ void GI10::HashGridCache::ensureMemoryIsAllocated( gfxCommandClearBuffer(gfx_, radiance_cache_update_cell_value_buffer_); } + debug_total_memory_size_in_bytes += radiance_cache_update_cell_value_buffer_.getSize(); + if (!radiance_cache_visibility_count_buffer_) { + gfxDestroyBuffer(gfx_, radiance_cache_visibility_count_buffer_); + gfxDestroyBuffer(gfx_, radiance_cache_visibility_ray_count_buffer_); + gfxDestroyBuffer(gfx_, radiance_cache_packed_tile_count_buffer0_); + gfxDestroyBuffer(gfx_, radiance_cache_packed_tile_count_buffer1_); + radiance_cache_visibility_count_buffer_ = gfxCreateBuffer(gfx_, 1); radiance_cache_visibility_count_buffer_.setName("Capsaicin_RadianceCache_VisibilityCountBuffer"); @@ -462,6 +517,11 @@ void GI10::HashGridCache::ensureMemoryIsAllocated( radiance_cache_packed_tile_count_buffer1_.setName("Capsaicin_RadianceCache_PackedTileCountBuffer1"); } + debug_total_memory_size_in_bytes += radiance_cache_visibility_count_buffer_.getSize(); + debug_total_memory_size_in_bytes += radiance_cache_visibility_ray_count_buffer_.getSize(); + debug_total_memory_size_in_bytes += radiance_cache_packed_tile_count_buffer0_.getSize(); + debug_total_memory_size_in_bytes += radiance_cache_packed_tile_count_buffer1_.getSize(); + if (!radiance_cache_packed_tile_index_buffer0_ || num_tiles != num_tiles_) { gfxDestroyBuffer(gfx_, radiance_cache_packed_tile_index_buffer0_); @@ -477,6 +537,9 @@ void GI10::HashGridCache::ensureMemoryIsAllocated( gfxCommandClearBuffer(gfx_, radiance_cache_packed_tile_index_buffer1_); } + debug_total_memory_size_in_bytes += radiance_cache_packed_tile_index_buffer0_.getSize(); + debug_total_memory_size_in_bytes += radiance_cache_packed_tile_index_buffer1_.getSize(); + // The `packedCell' buffer is not necessary for drawing, but rather used // when debugging our hash cells. // So, we only allocate the memory when debugging the hash grid radiance @@ -506,6 +569,9 @@ void GI10::HashGridCache::ensureMemoryIsAllocated( radiance_cache_decay_cell_buffer_ = {}; } + debug_total_memory_size_in_bytes += radiance_cache_debug_cell_buffer_.getSize(); + debug_total_memory_size_in_bytes += radiance_cache_decay_cell_buffer_.getSize(); + if (!radiance_cache_update_tile_buffer_ || max_ray_count != max_ray_count_ || num_cells != num_cells_) { gfxDestroyBuffer(gfx_, radiance_cache_update_tile_buffer_); @@ -531,24 +597,116 @@ void GI10::HashGridCache::ensureMemoryIsAllocated( radiance_cache_visibility_ray_buffer_.setName("Capsaicin_RadianceCache_VisibilityRayBuffer"); } - max_ray_count_ = max_ray_count; - num_buckets_ = num_buckets; - num_tiles_ = num_tiles; - num_cells_ = num_cells; - num_tiles_per_bucket_ = num_tiles_per_bucket; - size_tile_mip0_ = size_tile_mip0; - size_tile_mip1_ = size_tile_mip1; - size_tile_mip2_ = size_tile_mip2; - size_tile_mip3_ = size_tile_mip3; - num_cells_per_tile_mip0_ = num_cells_per_tile_mip0; - num_cells_per_tile_mip1_ = num_cells_per_tile_mip1; - num_cells_per_tile_mip2_ = num_cells_per_tile_mip2; - num_cells_per_tile_mip3_ = num_cells_per_tile_mip3; - num_cells_per_tile_ = num_cells_per_tile; // all mips - first_cell_offset_tile_mip0_ = first_cell_offset_tile_mip0; - first_cell_offset_tile_mip1_ = first_cell_offset_tile_mip1; - first_cell_offset_tile_mip2_ = first_cell_offset_tile_mip2; - first_cell_offset_tile_mip3_ = first_cell_offset_tile_mip3; + debug_total_memory_size_in_bytes += radiance_cache_update_tile_buffer_.getSize(); + debug_total_memory_size_in_bytes += radiance_cache_visibility_buffer_.getSize(); + debug_total_memory_size_in_bytes += radiance_cache_visibility_cell_buffer_.getSize(); + debug_total_memory_size_in_bytes += radiance_cache_visibility_query_buffer_.getSize(); + debug_total_memory_size_in_bytes += radiance_cache_visibility_ray_buffer_.getSize(); + + if (!radiance_cache_debug_free_bucket_buffer_) + { + gfxDestroyBuffer(gfx_, radiance_cache_debug_free_bucket_buffer_); + gfxDestroyBuffer(gfx_, radiance_cache_debug_used_bucket_buffer_); + + radiance_cache_debug_free_bucket_buffer_ = gfxCreateBuffer(gfx_, 1); + radiance_cache_debug_free_bucket_buffer_.setName("Capsaicin_RadianceCache_FreeBucketBuffer"); + + radiance_cache_debug_used_bucket_buffer_ = gfxCreateBuffer(gfx_, 1); + radiance_cache_debug_used_bucket_buffer_.setName("Capsaicin_RadianceCache_UsedBucketBuffer"); + } + + debug_total_memory_size_in_bytes += radiance_cache_debug_free_bucket_buffer_.getSize(); + debug_total_memory_size_in_bytes += radiance_cache_debug_used_bucket_buffer_.getSize(); + + if (!radiance_cache_debug_bucket_overflow_count_buffer_ || num_buckets != num_buckets_) + { + gfxDestroyBuffer(gfx_, radiance_cache_debug_bucket_overflow_count_buffer_); + + radiance_cache_debug_bucket_overflow_count_buffer_ = gfxCreateBuffer(gfx_, num_buckets); + radiance_cache_debug_bucket_overflow_count_buffer_.setName( + "Capsaicin_RadianceCache_BucketOverflowBuffer"); + } + + debug_total_memory_size_in_bytes += radiance_cache_debug_bucket_overflow_count_buffer_.getSize(); + + if (!radiance_cache_debug_bucket_occupancy_buffer_ + || debug_bucket_occupancy_histogram_size != debug_bucket_occupancy_histogram_size_) + { + static_assert(kGfxConstant_BackBufferCount == 3); + gfxDestroyBuffer(gfx_, radiance_cache_debug_bucket_occupancy_buffer_); + + radiance_cache_debug_bucket_occupancy_buffer_ = + gfxCreateBuffer(gfx_, debug_bucket_occupancy_histogram_size + 1); + radiance_cache_debug_bucket_occupancy_buffer_.setName( + "Capsaicin_RadianceCache_BucketOccupancyBuffer"); + } + + debug_total_memory_size_in_bytes += radiance_cache_debug_bucket_occupancy_buffer_.getSize(); + + if (!radiance_cache_debug_bucket_overflow_buffer_ + || debug_bucket_overflow_histogram_size != debug_bucket_overflow_histogram_size_) + { + gfxDestroyBuffer(gfx_, radiance_cache_debug_bucket_overflow_buffer_); + + radiance_cache_debug_bucket_overflow_buffer_ = + gfxCreateBuffer(gfx_, debug_bucket_overflow_histogram_size); + radiance_cache_debug_bucket_overflow_buffer_.setName("Capsaicin_RadianceCache_BucketOverflowBuffer"); + } + + debug_total_memory_size_in_bytes += radiance_cache_debug_bucket_overflow_buffer_.getSize(); + + if (!radiance_cache_debug_stats_buffer_ || debug_stats_size != debug_stats_size_) + { + gfxDestroyBuffer(gfx_, radiance_cache_debug_stats_buffer_); + for (GfxBuffer &buffer : radiance_cache_debug_stats_readback_buffers_) + { + gfxDestroyBuffer(gfx_, buffer); + } + + radiance_cache_debug_stats_buffer_ = gfxCreateBuffer(gfx_, debug_stats_size); + radiance_cache_debug_stats_buffer_.setName("Capsaicin_RadianceCache_StatsBuffer"); + + for (uint32_t i = 0; i < ARRAYSIZE(radiance_cache_debug_stats_readback_buffers_); ++i) + { + char buffer[64]; + GFX_SNPRINTF(buffer, sizeof(buffer), "Capsaicin_RadianceCache_StatsReadbackBuffer%u", i); + + radiance_cache_debug_stats_readback_buffers_[i] = + gfxCreateBuffer(gfx_, debug_stats_size, nullptr, kGfxCpuAccess_Read); + radiance_cache_debug_stats_readback_buffers_[i].setName(buffer); + + radiance_cache_debug_stats_readback_is_pending_[i] = false; // Don't readback unfilled buffers + } + } + + debug_total_memory_size_in_bytes += radiance_cache_debug_stats_buffer_.getSize(); + for (GfxBuffer &buffer : radiance_cache_debug_stats_readback_buffers_) + { + debug_total_memory_size_in_bytes += buffer.getSize(); + } + + max_ray_count_ = max_ray_count; + num_buckets_ = num_buckets; + num_tiles_ = num_tiles; + num_cells_ = num_cells; + num_tiles_per_bucket_ = num_tiles_per_bucket; + size_tile_mip0_ = size_tile_mip0; + size_tile_mip1_ = size_tile_mip1; + size_tile_mip2_ = size_tile_mip2; + size_tile_mip3_ = size_tile_mip3; + num_cells_per_tile_mip0_ = num_cells_per_tile_mip0; + num_cells_per_tile_mip1_ = num_cells_per_tile_mip1; + num_cells_per_tile_mip2_ = num_cells_per_tile_mip2; + num_cells_per_tile_mip3_ = num_cells_per_tile_mip3; + num_cells_per_tile_ = num_cells_per_tile; // all mips + first_cell_offset_tile_mip0_ = first_cell_offset_tile_mip0; + first_cell_offset_tile_mip1_ = first_cell_offset_tile_mip1; + first_cell_offset_tile_mip2_ = first_cell_offset_tile_mip2; + first_cell_offset_tile_mip3_ = first_cell_offset_tile_mip3; + debug_bucket_occupancy_histogram_size_ = debug_bucket_occupancy_histogram_size; + debug_bucket_overflow_histogram_size_ = debug_bucket_overflow_histogram_size; + debug_stats_size_ = debug_stats_size; + debug_total_memory_size_in_bytes_ = debug_total_memory_size_in_bytes; } GI10::WorldSpaceReSTIR::WorldSpaceReSTIR(GI10 &gi10) @@ -558,41 +716,38 @@ GI10::WorldSpaceReSTIR::WorldSpaceReSTIR(GI10 &gi10) GI10::WorldSpaceReSTIR::~WorldSpaceReSTIR() { - for (GfxBuffer reservoir_hash_buffer : reservoir_hash_buffers_) + for (GfxBuffer &reservoir_hash_buffer : reservoir_hash_buffers_) gfxDestroyBuffer(gfx_, reservoir_hash_buffer); - for (GfxBuffer reservoir_hash_count_buffer : reservoir_hash_count_buffers_) + for (GfxBuffer &reservoir_hash_count_buffer : reservoir_hash_count_buffers_) gfxDestroyBuffer(gfx_, reservoir_hash_count_buffer); - for (GfxBuffer reservoir_hash_index_buffer : reservoir_hash_index_buffers_) + for (GfxBuffer &reservoir_hash_index_buffer : reservoir_hash_index_buffers_) gfxDestroyBuffer(gfx_, reservoir_hash_index_buffer); - for (GfxBuffer reservoir_hash_value_buffer : reservoir_hash_value_buffers_) + for (GfxBuffer &reservoir_hash_value_buffer : reservoir_hash_value_buffers_) gfxDestroyBuffer(gfx_, reservoir_hash_value_buffer); gfxDestroyBuffer(gfx_, reservoir_hash_list_buffer_); gfxDestroyBuffer(gfx_, reservoir_hash_list_count_buffer_); gfxDestroyBuffer(gfx_, reservoir_indirect_sample_buffer_); - for (GfxBuffer reservoir_indirect_sample_normal_buffer : reservoir_indirect_sample_normal_buffers_) + for (GfxBuffer &reservoir_indirect_sample_normal_buffer : reservoir_indirect_sample_normal_buffers_) gfxDestroyBuffer(gfx_, reservoir_indirect_sample_normal_buffer); gfxDestroyBuffer(gfx_, reservoir_indirect_sample_material_buffer_); - for (GfxBuffer reservoir_indirect_sample_reservoir_buffer : reservoir_indirect_sample_reservoir_buffers_) + for (GfxBuffer &reservoir_indirect_sample_reservoir_buffer : reservoir_indirect_sample_reservoir_buffers_) gfxDestroyBuffer(gfx_, reservoir_indirect_sample_reservoir_buffer); } -void GI10::WorldSpaceReSTIR::ensureMemoryIsAllocated(CapsaicinInternal const &capsaicin) +void GI10::WorldSpaceReSTIR::ensureMemoryIsAllocated([[maybe_unused]] CapsaicinInternal const &capsaicin) { - uint32_t const buffer_width = capsaicin.getWidth(); - uint32_t const buffer_height = capsaicin.getHeight(); - uint32_t const max_ray_count = self.screen_probes_.max_ray_count; if (reservoir_hash_buffers_->getCount() != kConstant_NumEntries) { - for (GfxBuffer reservoir_hash_buffer : reservoir_hash_buffers_) + for (GfxBuffer &reservoir_hash_buffer : reservoir_hash_buffers_) gfxDestroyBuffer(gfx_, reservoir_hash_buffer); - for (GfxBuffer reservoir_hash_count_buffer : reservoir_hash_count_buffers_) + for (GfxBuffer &reservoir_hash_count_buffer : reservoir_hash_count_buffers_) gfxDestroyBuffer(gfx_, reservoir_hash_count_buffer); - for (GfxBuffer reservoir_hash_index_buffer : reservoir_hash_index_buffers_) + for (GfxBuffer &reservoir_hash_index_buffer : reservoir_hash_index_buffers_) gfxDestroyBuffer(gfx_, reservoir_hash_index_buffer); - for (GfxBuffer reservoir_hash_value_buffer : reservoir_hash_value_buffers_) + for (GfxBuffer &reservoir_hash_value_buffer : reservoir_hash_value_buffers_) gfxDestroyBuffer(gfx_, reservoir_hash_value_buffer); for (uint32_t i = 0; i < ARRAYSIZE(reservoir_hash_buffers_); ++i) @@ -651,10 +806,10 @@ void GI10::WorldSpaceReSTIR::ensureMemoryIsAllocated(CapsaicinInternal const &ca if (reservoir_indirect_sample_buffer_.getCount() < max_ray_count) { gfxDestroyBuffer(gfx_, reservoir_indirect_sample_buffer_); - for (GfxBuffer reservoir_indirect_sample_normal_buffer : reservoir_indirect_sample_normal_buffers_) + for (GfxBuffer &reservoir_indirect_sample_normal_buffer : reservoir_indirect_sample_normal_buffers_) gfxDestroyBuffer(gfx_, reservoir_indirect_sample_normal_buffer); gfxDestroyBuffer(gfx_, reservoir_indirect_sample_material_buffer_); - for (GfxBuffer reservoir_indirect_sample_reservoir_buffer : + for (GfxBuffer &reservoir_indirect_sample_reservoir_buffer : reservoir_indirect_sample_reservoir_buffers_) gfxDestroyBuffer(gfx_, reservoir_indirect_sample_reservoir_buffer); @@ -685,6 +840,180 @@ void GI10::WorldSpaceReSTIR::ensureMemoryIsAllocated(CapsaicinInternal const &ca } } +GI10::GlossyReflections::GlossyReflections(GI10 &gi10) + : Base(gi10) + , fireflies_buffer_(texture_float_[GLOSSY_REFLECTION_FIREFLIES_BUFFER]) + , specular_buffer_(texture_float4_[GLOSSY_REFLECTION_SPECULAR_BUFFER]) + , direction_buffer_(texture_float4_[GLOSSY_REFLECTION_DIRECTION_BUFFER]) + , reflections_buffer_(texture_float4_[GLOSSY_REFLECTION_REFLECTION_BUFFER]) + , standard_dev_buffer_(texture_float4_[GLOSSY_REFLECTION_STANDARD_DEV_BUFFER]) + , reflections_buffer0_(texture_float4_[GLOSSY_REFLECTION_REFLECTIONS_BUFFER_0]) + , average_squared_buffer0_(texture_float4_[GLOSSY_REFLECTION_AVERAGE_SQUARED_BUFFER_0]) + , reflections_buffer1_(texture_float4_[GLOSSY_REFLECTION_REFLECTIONS_BUFFER_1]) + , average_squared_buffer1_(texture_float4_[GLOSSY_REFLECTION_AVERAGE_SQUARED_BUFFER_1]) +{} + +GI10::GlossyReflections::~GlossyReflections() +{ + for (auto &texture : texture_float_) + gfxDestroyTexture(gfx_, texture); + for (auto &texture : texture_float4_) + gfxDestroyTexture(gfx_, texture); + gfxDestroyBuffer(gfx_, rt_sample_buffer_); + gfxDestroyBuffer(gfx_, rt_sample_count_buffer_); +} + +void GI10::GlossyReflections::ensureMemoryIsAllocated(CapsaicinInternal const &capsaicin) +{ + RenderOptions options = convertOptions(capsaicin.getOptions()); + + uint const full_buffer_width = capsaicin.getWidth(); + uint const full_buffer_height = capsaicin.getHeight(); + + uint32_t half_buffer_width = + options.gi10_glossy_reflections_halfres ? (full_buffer_width + 1) / 2 : full_buffer_width; + uint32_t half_buffer_height = + options.gi10_glossy_reflections_halfres ? (full_buffer_height + 1) / 2 : full_buffer_height; + + // Half or full resolution + if (!specular_buffer_ || specular_buffer_.getWidth() != half_buffer_width + || specular_buffer_.getHeight() != half_buffer_height) + { + gfxDestroyTexture(gfx_, specular_buffer_); + + specular_buffer_ = + gfxCreateTexture2D(gfx_, half_buffer_width, half_buffer_height, DXGI_FORMAT_R16G16B16A16_FLOAT); + specular_buffer_.setName("SpecularBuffer"); + gfxCommandClearTexture(gfx_, specular_buffer_); + } + + if (!direction_buffer_ || direction_buffer_.getWidth() != half_buffer_width + || direction_buffer_.getHeight() != half_buffer_height) + { + gfxDestroyTexture(gfx_, direction_buffer_); + + direction_buffer_ = + gfxCreateTexture2D(gfx_, half_buffer_width, half_buffer_height, DXGI_FORMAT_R16G16B16A16_FLOAT); + direction_buffer_.setName("DirectionBuffer"); + } + + if (!rt_sample_buffer_ || rt_sample_buffer_.getCount() != half_buffer_width * half_buffer_height) + { + gfxDestroyBuffer(gfx_, rt_sample_buffer_); + + rt_sample_buffer_ = gfxCreateBuffer(gfx_, half_buffer_width * half_buffer_height); + rt_sample_buffer_.setName("RtSampleBuffer"); + } + + if (!rt_sample_count_buffer_) + { + rt_sample_count_buffer_ = gfxCreateBuffer(gfx_, 1); + rt_sample_count_buffer_.setName("RtSampleCountBuffer"); + } + + if (!fireflies_buffer_ || fireflies_buffer_.getWidth() != half_buffer_width + || fireflies_buffer_.getHeight() != half_buffer_height) + { + gfxDestroyTexture(gfx_, fireflies_buffer_); + + fireflies_buffer_ = + gfxCreateTexture2D(gfx_, half_buffer_width, half_buffer_height, DXGI_FORMAT_R8_SNORM); + fireflies_buffer_.setName("FirefliesBuffer"); + } + + // Full resolution + if (!reflections_buffer_ || reflections_buffer_.getWidth() != full_buffer_width + || reflections_buffer_.getHeight() != full_buffer_height) + { + gfxDestroyTexture(gfx_, reflections_buffer_); + + reflections_buffer_ = + gfxCreateTexture2D(gfx_, full_buffer_width, full_buffer_height, DXGI_FORMAT_R16G16B16A16_FLOAT); + reflections_buffer_.setName("ReflectionsBuffer"); + } + + if (!standard_dev_buffer_ || standard_dev_buffer_.getWidth() != full_buffer_width + || standard_dev_buffer_.getHeight() != full_buffer_height) + { + gfxDestroyTexture(gfx_, standard_dev_buffer_); + + standard_dev_buffer_ = + gfxCreateTexture2D(gfx_, full_buffer_width, full_buffer_height, DXGI_FORMAT_R16G16B16A16_FLOAT); + standard_dev_buffer_.setName("StandardDevBuffer"); + } + + // Half or full resolution on width, half or full resolution on height + uint temp_buffer_width = 0; + uint temp_buffer_height = 0; + int temp_buffer_count = 0; + switch (options.gi10_glossy_reflections_denoiser_mode) + { + case 0: // Split Ratio Estimator + { + temp_buffer_width = full_buffer_width; + temp_buffer_height = half_buffer_height; + temp_buffer_count = 1; + break; + } + case 1: // Atrous Ratio Estimator + { + temp_buffer_width = half_buffer_width; + temp_buffer_height = half_buffer_height; + temp_buffer_count = 2; + break; + } + case 2: // Passthrough + break; + default: + GFX_ASSERTMSG(false, "Unexpected denoiser mode %d...", options.gi10_glossy_reflections_denoiser_mode); + } + + // BE CAREFUL: we need to call gfxDestroyTexture if temp_buffer_count == 0 + GfxTexture *temp_reflections_buffers[] = {&reflections_buffer0_, &reflections_buffer1_}; + for (int buffer_index = 0; buffer_index < 2; ++buffer_index) + { + auto &temp_reflections_buffer = *temp_reflections_buffers[buffer_index]; + if (!temp_reflections_buffer || temp_reflections_buffer.getWidth() != temp_buffer_width + || temp_reflections_buffer.getHeight() != temp_buffer_height) + { + gfxDestroyTexture(gfx_, temp_reflections_buffer); + temp_reflections_buffer = {}; + + if (temp_buffer_width > 0 && buffer_index < temp_buffer_count) + { + char buffer[64]; + GFX_SNPRINTF(buffer, sizeof(buffer), "ReflectionsBuffer%d", buffer_index); + + temp_reflections_buffer = gfxCreateTexture2D( + gfx_, temp_buffer_width, temp_buffer_height, DXGI_FORMAT_R16G16B16A16_FLOAT); + temp_reflections_buffer.setName(buffer); + } + } + } + + GfxTexture *temp_average_squared_buffers[] = {&average_squared_buffer0_, &average_squared_buffer1_}; + for (int buffer_index = 0; buffer_index < ARRAYSIZE(temp_average_squared_buffers); ++buffer_index) + { + auto &temp_average_squared_buffer = *temp_average_squared_buffers[buffer_index]; + if (!temp_average_squared_buffer || temp_average_squared_buffer.getWidth() != temp_buffer_width + || temp_average_squared_buffer.getHeight() != temp_buffer_height) + { + gfxDestroyTexture(gfx_, temp_average_squared_buffer); + temp_average_squared_buffer = {}; + + if (temp_buffer_width > 0 && buffer_index < temp_buffer_count) + { + char buffer[64]; + GFX_SNPRINTF(buffer, sizeof(buffer), "AverageSquaredBuffer%d", buffer_index); + + temp_average_squared_buffer = gfxCreateTexture2D( + gfx_, temp_buffer_width, temp_buffer_height, DXGI_FORMAT_R16G16B16A16_FLOAT); + temp_average_squared_buffer.setName(buffer); + } + } + } +} + GI10::GIDenoiser::GIDenoiser(GI10 &gi10) : Base(gi10) , color_buffer_index_(0) @@ -692,11 +1021,11 @@ GI10::GIDenoiser::GIDenoiser(GI10 &gi10) GI10::GIDenoiser::~GIDenoiser() { - for (GfxTexture blur_mask : blur_masks_) + for (GfxTexture &blur_mask : blur_masks_) gfxDestroyTexture(gfx_, blur_mask); - for (GfxTexture color_buffer : color_buffers_) + for (GfxTexture &color_buffer : color_buffers_) gfxDestroyTexture(gfx_, color_buffer); - for (GfxTexture color_delta_buffer : color_delta_buffers_) + for (GfxTexture &color_delta_buffer : color_delta_buffers_) gfxDestroyTexture(gfx_, color_delta_buffer); gfxDestroyBuffer(gfx_, blur_sample_count_buffer_); } @@ -753,10 +1082,10 @@ void GI10::GIDenoiser::ensureMemoryIsAllocated(CapsaicinInternal const &capsaici GI10::GI10() : RenderTechnique("GI-1.0") - , has_delta_lights_(false) , screen_probes_(*this) , hash_grid_cache_(*this) , world_space_restir_(*this) + , glossy_reflections_(*this) , gi_denoiser_(*this) {} @@ -768,11 +1097,14 @@ GI10::~GI10() RenderOptionList GI10::getRenderOptions() noexcept { RenderOptionList newOptions; + newOptions.emplace(RENDER_OPTION_MAKE(gi10_use_dxr10, options_)); newOptions.emplace(RENDER_OPTION_MAKE(gi10_use_resampling, options_)); newOptions.emplace(RENDER_OPTION_MAKE(gi10_use_alpha_testing, options_)); newOptions.emplace(RENDER_OPTION_MAKE(gi10_use_direct_lighting, options_)); newOptions.emplace(RENDER_OPTION_MAKE(gi10_disable_albedo_textures, options_)); + newOptions.emplace(RENDER_OPTION_MAKE(gi10_disable_specular_materials, options_)); newOptions.emplace(RENDER_OPTION_MAKE(gi10_hash_grid_cache_cell_size, options_)); + newOptions.emplace(RENDER_OPTION_MAKE(gi10_hash_grid_cache_min_cell_size, options_)); newOptions.emplace(RENDER_OPTION_MAKE(gi10_hash_grid_cache_tile_cell_ratio, options_)); newOptions.emplace(RENDER_OPTION_MAKE(gi10_hash_grid_cache_num_buckets, options_)); newOptions.emplace(RENDER_OPTION_MAKE(gi10_hash_grid_cache_num_tiles_per_bucket, options_)); @@ -780,35 +1112,79 @@ RenderOptionList GI10::getRenderOptions() noexcept newOptions.emplace(RENDER_OPTION_MAKE(gi10_hash_grid_cache_debug_mip_level, options_)); newOptions.emplace(RENDER_OPTION_MAKE(gi10_hash_grid_cache_debug_propagate, options_)); newOptions.emplace(RENDER_OPTION_MAKE(gi10_hash_grid_cache_debug_max_cell_decay, options_)); + newOptions.emplace(RENDER_OPTION_MAKE(gi10_hash_grid_cache_debug_stats, options_)); + newOptions.emplace(RENDER_OPTION_MAKE(gi10_hash_grid_cache_debug_max_bucket_overflow, options_)); newOptions.emplace(RENDER_OPTION_MAKE(gi10_reservoir_cache_cell_size, options_)); + newOptions.emplace(RENDER_OPTION_MAKE(gi10_glossy_reflections_halfres, options_)); + newOptions.emplace(RENDER_OPTION_MAKE(gi10_glossy_reflections_denoiser_mode, options_)); + newOptions.emplace(RENDER_OPTION_MAKE(gi10_glossy_reflections_low_roughness_threshold, options_)); + newOptions.emplace(RENDER_OPTION_MAKE(gi10_glossy_reflections_high_roughness_threshold, options_)); + newOptions.emplace(RENDER_OPTION_MAKE(gi10_glossy_reflections_cleanup_fireflies, options_)); + newOptions.emplace(RENDER_OPTION_MAKE(gi10_glossy_reflections_atrous_pass_count, options_)); + newOptions.emplace(RENDER_OPTION_MAKE(gi10_glossy_reflections_full_radius, options_)); + newOptions.emplace(RENDER_OPTION_MAKE(gi10_glossy_reflections_half_radius, options_)); + newOptions.emplace(RENDER_OPTION_MAKE(gi10_glossy_reflections_mark_fireflies_half_radius, options_)); + newOptions.emplace(RENDER_OPTION_MAKE(gi10_glossy_reflections_mark_fireflies_full_radius, options_)); + newOptions.emplace( + RENDER_OPTION_MAKE(gi10_glossy_reflections_mark_fireflies_half_low_threshold, options_)); + newOptions.emplace( + RENDER_OPTION_MAKE(gi10_glossy_reflections_mark_fireflies_full_low_threshold, options_)); + newOptions.emplace( + RENDER_OPTION_MAKE(gi10_glossy_reflections_mark_fireflies_half_high_threshold, options_)); + newOptions.emplace( + RENDER_OPTION_MAKE(gi10_glossy_reflections_mark_fireflies_full_high_threshold, options_)); + newOptions.emplace(RENDER_OPTION_MAKE(gi10_glossy_reflections_cleanup_fireflies_half_radius, options_)); + newOptions.emplace(RENDER_OPTION_MAKE(gi10_glossy_reflections_cleanup_fireflies_full_radius, options_)); return newOptions; } -GI10::RenderOptions GI10::convertOptions(RenderSettings const &settings) noexcept +GI10::RenderOptions GI10::convertOptions(RenderOptionList const &options) noexcept { RenderOptions newOptions; - RENDER_OPTION_GET(gi10_use_resampling, newOptions, settings.options_) - RENDER_OPTION_GET(gi10_use_alpha_testing, newOptions, settings.options_) - RENDER_OPTION_GET(gi10_use_direct_lighting, newOptions, settings.options_) - RENDER_OPTION_GET(gi10_disable_albedo_textures, newOptions, settings.options_) - RENDER_OPTION_GET(gi10_hash_grid_cache_cell_size, newOptions, settings.options_) - RENDER_OPTION_GET(gi10_hash_grid_cache_tile_cell_ratio, newOptions, settings.options_) - RENDER_OPTION_GET(gi10_hash_grid_cache_num_buckets, newOptions, settings.options_) - RENDER_OPTION_GET(gi10_hash_grid_cache_num_tiles_per_bucket, newOptions, settings.options_) - RENDER_OPTION_GET(gi10_hash_grid_cache_max_sample_count, newOptions, settings.options_) - RENDER_OPTION_GET(gi10_hash_grid_cache_debug_mip_level, newOptions, settings.options_) - RENDER_OPTION_GET(gi10_hash_grid_cache_debug_propagate, newOptions, settings.options_) - RENDER_OPTION_GET(gi10_hash_grid_cache_debug_max_cell_decay, newOptions, settings.options_) - RENDER_OPTION_GET(gi10_reservoir_cache_cell_size, newOptions, settings.options_) + RENDER_OPTION_GET(gi10_use_dxr10, newOptions, options) + RENDER_OPTION_GET(gi10_use_resampling, newOptions, options) + RENDER_OPTION_GET(gi10_use_alpha_testing, newOptions, options) + RENDER_OPTION_GET(gi10_use_direct_lighting, newOptions, options) + RENDER_OPTION_GET(gi10_disable_albedo_textures, newOptions, options) + RENDER_OPTION_GET(gi10_disable_specular_materials, newOptions, options) + RENDER_OPTION_GET(gi10_hash_grid_cache_cell_size, newOptions, options) + RENDER_OPTION_GET(gi10_hash_grid_cache_min_cell_size, newOptions, options) + RENDER_OPTION_GET(gi10_hash_grid_cache_tile_cell_ratio, newOptions, options) + RENDER_OPTION_GET(gi10_hash_grid_cache_num_buckets, newOptions, options) + RENDER_OPTION_GET(gi10_hash_grid_cache_num_tiles_per_bucket, newOptions, options) + RENDER_OPTION_GET(gi10_hash_grid_cache_max_sample_count, newOptions, options) + RENDER_OPTION_GET(gi10_hash_grid_cache_debug_mip_level, newOptions, options) + RENDER_OPTION_GET(gi10_hash_grid_cache_debug_propagate, newOptions, options) + RENDER_OPTION_GET(gi10_hash_grid_cache_debug_max_cell_decay, newOptions, options) + RENDER_OPTION_GET(gi10_hash_grid_cache_debug_stats, newOptions, options) + RENDER_OPTION_GET(gi10_hash_grid_cache_debug_max_bucket_overflow, newOptions, options) + RENDER_OPTION_GET(gi10_reservoir_cache_cell_size, newOptions, options) + RENDER_OPTION_GET(gi10_glossy_reflections_halfres, newOptions, options) + RENDER_OPTION_GET(gi10_glossy_reflections_denoiser_mode, newOptions, options) + RENDER_OPTION_GET(gi10_glossy_reflections_low_roughness_threshold, newOptions, options) + RENDER_OPTION_GET(gi10_glossy_reflections_high_roughness_threshold, newOptions, options) + RENDER_OPTION_GET(gi10_glossy_reflections_cleanup_fireflies, newOptions, options) + RENDER_OPTION_GET(gi10_glossy_reflections_atrous_pass_count, newOptions, options) + RENDER_OPTION_GET(gi10_glossy_reflections_full_radius, newOptions, options) + RENDER_OPTION_GET(gi10_glossy_reflections_half_radius, newOptions, options) + RENDER_OPTION_GET(gi10_glossy_reflections_mark_fireflies_half_radius, newOptions, options) + RENDER_OPTION_GET(gi10_glossy_reflections_mark_fireflies_full_radius, newOptions, options) + RENDER_OPTION_GET(gi10_glossy_reflections_mark_fireflies_half_low_threshold, newOptions, options) + RENDER_OPTION_GET(gi10_glossy_reflections_mark_fireflies_full_low_threshold, newOptions, options) + RENDER_OPTION_GET(gi10_glossy_reflections_mark_fireflies_half_high_threshold, newOptions, options) + RENDER_OPTION_GET(gi10_glossy_reflections_mark_fireflies_full_high_threshold, newOptions, options) + RENDER_OPTION_GET(gi10_glossy_reflections_cleanup_fireflies_half_radius, newOptions, options) + RENDER_OPTION_GET(gi10_glossy_reflections_cleanup_fireflies_full_radius, newOptions, options) return newOptions; } ComponentList GI10::getComponents() const noexcept { ComponentList components; - components.emplace_back(COMPONENT_MAKE(LightSamplerBounds)); + components.emplace_back(COMPONENT_MAKE(BrdfLut)); + components.emplace_back(COMPONENT_MAKE(PrefilterIBL)); + components.emplace_back(COMPONENT_MAKE(LightSamplerGridStream)); components.emplace_back(COMPONENT_MAKE(BlueNoiseSampler)); - components.emplace_back(COMPONENT_MAKE(StratifiedSampler)); return components; } @@ -817,10 +1193,12 @@ AOVList GI10::getAOVs() const noexcept AOVList aovs; aovs.push_back({"Debug", AOV::Write}); aovs.push_back({"GlobalIllumination", AOV::Write, AOV::None, DXGI_FORMAT_R16G16B16A16_FLOAT}); + aovs.push_back({"Reflection", AOV::Write, AOV::None, DXGI_FORMAT_R16G16B16A16_FLOAT, "PrevReflection"}); aovs.push_back({.name = "VisibilityDepth", .backup_name = "PrevVisibilityDepth"}); - aovs.push_back({.name = "Normal", .backup_name = "PrevNormal"}); - aovs.push_back({.name = "Details", .backup_name = "PrevDetails"}); + aovs.push_back({.name = "GeometryNormal", .backup_name = "PrevGeometryNormal"}); + aovs.push_back({.name = "ShadingNormal", .backup_name = "PrevShadingNormal"}); aovs.push_back({"Velocity"}); + aovs.push_back({.name = "Roughness", .backup_name = "PrevRoughness"}); aovs.push_back({"OcclusionAndBentNormal"}); aovs.push_back({"NearFieldGlobalIllumination"}); aovs.push_back({"Visibility"}); @@ -841,6 +1219,7 @@ DebugViewList GI10::getDebugViews() const noexcept views.emplace_back("HashGridCache_FilteredSampleCount"); views.emplace_back("HashGridCache_FilteredMipLevel"); views.emplace_back("HashGridCache_Occupancy"); + views.emplace_back("Reflection"); return views; } @@ -852,21 +1231,21 @@ bool GI10::init(CapsaicinInternal const &capsaicin) noexcept draw_command_buffer_ = gfxCreateBuffer(gfx_, 1); draw_command_buffer_.setName("Capsaicin_DrawCommandBuffer"); - dispatch_command_buffer_ = gfxCreateBuffer(gfx_, 1); + dispatch_command_buffer_ = + gfxCreateBuffer(gfx_, GFX_MAX(sizeof(DispatchCommand), sizeof(DispatchRaysCommand))); dispatch_command_buffer_.setName("Capsaicin_DispatchCommandBuffer"); // Set up the base defines based on available features - auto light_sampler = capsaicin.getComponent(); + auto light_sampler = capsaicin.getComponent(); std::vector defines(std::move(light_sampler->getShaderDefines(capsaicin))); std::vector base_defines; for (auto &i : defines) { base_defines.push_back(i.c_str()); } - base_defines.push_back("DISABLE_SPECULAR_LIGHTING"); // TODO: glossy reflections aren't supported yet - if (!has_delta_lights_) base_defines.push_back("HAS_FEEDBACK"); - if (options_.gi10_use_alpha_testing) base_defines.push_back("USE_ALPHA_TESTING"); + if (!options_.gi10_use_alpha_testing) base_defines.push_back("DISABLE_ALPHA_TESTING"); if (capsaicin.hasAOVBuffer("OcclusionAndBentNormal")) base_defines.push_back("HAS_OCCLUSION"); + if (options_.gi10_disable_specular_materials) base_defines.push_back("DISABLE_SPECULAR_MATERIALS"); uint32_t const base_define_count = (uint32_t)base_defines.size(); std::vector resampling_defines = base_defines; @@ -878,6 +1257,10 @@ bool GI10::init(CapsaicinInternal const &capsaicin) noexcept { debug_hash_cells_defines.push_back("DEBUG_HASH_CELLS"); } + if (options_.gi10_hash_grid_cache_debug_stats) + { + debug_hash_cells_defines.push_back("DEBUG_HASH_STATS"); + } uint32_t const debug_hash_cells_define_count = (uint32_t)debug_hash_cells_defines.size(); // We need to clear the radiance cache when toggling the hash cells debug mode; @@ -903,8 +1286,8 @@ bool GI10::init(CapsaicinInternal const &capsaicin) noexcept gfxDrawStateSetDepthStencilTarget(debug_hash_grid_cells_draw_state, depth_buffer_); gfxDrawStateSetCullMode(debug_hash_grid_cells_draw_state, D3D12_CULL_MODE_NONE); - GfxDrawState debug_material_draw_state; - gfxDrawStateSetColorTarget(debug_material_draw_state, 0, capsaicin.getAOVBuffer("Debug")); + GfxDrawState debug_reflection_draw_state; + gfxDrawStateSetColorTarget(debug_reflection_draw_state, 0, capsaicin.getAOVBuffer("Debug")); gi10_program_ = gfxCreateProgram(gfx_, "render_techniques/gi10/gi10", capsaicin.getShaderPath()); resolve_gi10_kernel_ = gfxCreateGraphicsKernel(gfx_, gi10_program_, resolve_lighting_draw_state, @@ -918,19 +1301,79 @@ bool GI10::init(CapsaicinInternal const &capsaicin) noexcept gfxCreateGraphicsKernel(gfx_, gi10_program_, debug_screen_probes_draw_state, "DebugScreenProbes"); debug_hash_grid_cells_kernel_ = gfxCreateGraphicsKernel(gfx_, gi10_program_, debug_hash_grid_cells_draw_state, "DebugHashGridCells"); - - clear_probe_mask_kernel_ = gfxCreateComputeKernel(gfx_, gi10_program_, "ClearProbeMask"); - filter_probe_mask_kernel_ = gfxCreateComputeKernel(gfx_, gi10_program_, "FilterProbeMask"); - init_cached_tile_lru_kernel_ = gfxCreateComputeKernel(gfx_, gi10_program_, "InitCachedTileLRU"); - reproject_screen_probes_kernel_ = gfxCreateComputeKernel(gfx_, gi10_program_, "ReprojectScreenProbes"); - count_screen_probes_kernel_ = gfxCreateComputeKernel(gfx_, gi10_program_, "CountScreenProbes"); - scatter_screen_probes_kernel_ = gfxCreateComputeKernel(gfx_, gi10_program_, "ScatterScreenProbes"); - spawn_screen_probes_kernel_ = gfxCreateComputeKernel(gfx_, gi10_program_, "SpawnScreenProbes"); - compact_screen_probes_kernel_ = gfxCreateComputeKernel(gfx_, gi10_program_, "CompactScreenProbes"); - patch_screen_probes_kernel_ = gfxCreateComputeKernel(gfx_, gi10_program_, "PatchScreenProbes"); - sample_screen_probes_kernel_ = gfxCreateComputeKernel(gfx_, gi10_program_, "SampleScreenProbes"); - populate_screen_probes_kernel_ = gfxCreateComputeKernel(gfx_, gi10_program_, "PopulateScreenProbes", - debug_hash_cells_defines.data(), debug_hash_cells_define_count); + debug_reflection_kernel_ = + gfxCreateGraphicsKernel(gfx_, gi10_program_, debug_reflection_draw_state, "DebugReflection"); + + clear_probe_mask_kernel_ = gfxCreateComputeKernel(gfx_, gi10_program_, "ClearProbeMask"); + filter_probe_mask_kernel_ = gfxCreateComputeKernel(gfx_, gi10_program_, "FilterProbeMask"); + init_cached_tile_lru_kernel_ = gfxCreateComputeKernel(gfx_, gi10_program_, "InitCachedTileLRU"); + reproject_screen_probes_kernel_ = gfxCreateComputeKernel(gfx_, gi10_program_, "ReprojectScreenProbes"); + count_screen_probes_kernel_ = gfxCreateComputeKernel(gfx_, gi10_program_, "CountScreenProbes"); + scatter_screen_probes_kernel_ = gfxCreateComputeKernel(gfx_, gi10_program_, "ScatterScreenProbes"); + spawn_screen_probes_kernel_ = gfxCreateComputeKernel(gfx_, gi10_program_, "SpawnScreenProbes"); + compact_screen_probes_kernel_ = gfxCreateComputeKernel(gfx_, gi10_program_, "CompactScreenProbes"); + patch_screen_probes_kernel_ = gfxCreateComputeKernel(gfx_, gi10_program_, "PatchScreenProbes"); + sample_screen_probes_kernel_ = gfxCreateComputeKernel(gfx_, gi10_program_, "SampleScreenProbes"); + if (options_.gi10_use_dxr10) + { + std::vector base_subobjects; + base_subobjects.push_back("MyShaderConfig"); + base_subobjects.push_back("MyPipelineConfig"); + + std::vector populate_screen_probes_exports; + populate_screen_probes_exports.push_back(kPopulateScreenProbesRaygenShaderName); + populate_screen_probes_exports.push_back(kPopulateScreenProbesMissShaderName); + populate_screen_probes_exports.push_back(kPopulateScreenProbesAnyHitShaderName); + populate_screen_probes_exports.push_back(kPopulateScreenProbesClosestHitShaderName); + std::vector populate_screen_probes_subobjects = base_subobjects; + populate_screen_probes_subobjects.push_back(kPopulateScreenProbesHitGroupName); + populate_screen_probes_kernel_ = gfxCreateRaytracingKernel(gfx_, gi10_program_, nullptr, 0, + populate_screen_probes_exports.data(), (uint32_t)populate_screen_probes_exports.size(), + populate_screen_probes_subobjects.data(), (uint32_t)populate_screen_probes_subobjects.size(), + debug_hash_cells_defines.data(), debug_hash_cells_define_count); + + std::vector populate_cells_kernel_exports; + populate_cells_kernel_exports.push_back(kPopulateCellsRaygenShaderName); + populate_cells_kernel_exports.push_back(kPopulateCellsMissShaderName); + populate_cells_kernel_exports.push_back(kPopulateCellsAnyHitShaderName); + populate_cells_kernel_exports.push_back(kPopulateCellsClosestHitShaderName); + std::vector populate_cells_kernel_subobjects = base_subobjects; + populate_cells_kernel_subobjects.push_back(kPopulateCellsHitGroupName); + populate_cells_kernel_ = gfxCreateRaytracingKernel(gfx_, gi10_program_, nullptr, 0, + populate_cells_kernel_exports.data(), (uint32_t)populate_cells_kernel_exports.size(), + populate_cells_kernel_subobjects.data(), (uint32_t)populate_cells_kernel_subobjects.size(), + resampling_defines.data(), resampling_define_count); + + std::vector trace_reflections_kernel_exports; + trace_reflections_kernel_exports.push_back(kTraceReflectionsRaygenShaderName); + trace_reflections_kernel_exports.push_back(kTraceReflectionsMissShaderName); + trace_reflections_kernel_exports.push_back(kTraceReflectionsAnyHitShaderName); + trace_reflections_kernel_exports.push_back(kTraceReflectionsClosestHitShaderName); + std::vector trace_reflections_kernel_subobjects = base_subobjects; + trace_reflections_kernel_subobjects.push_back(kTraceReflectionsHitGroupName); + trace_reflections_kernel_ = gfxCreateRaytracingKernel(gfx_, gi10_program_, nullptr, 0, + trace_reflections_kernel_exports.data(), (uint32_t)trace_reflections_kernel_exports.size(), + trace_reflections_kernel_subobjects.data(), (uint32_t)trace_reflections_kernel_subobjects.size()); + generate_dispatch_rays_kernel_ = gfxCreateComputeKernel(gfx_, gi10_program_, "GenerateDispatchRays"); + + uint32_t entry_count[kGfxShaderGroupType_Count] { + capsaicin.getSbtStrideInEntries(kGfxShaderGroupType_Raygen), + capsaicin.getSbtStrideInEntries(kGfxShaderGroupType_Miss), + gfxSceneGetInstanceCount(capsaicin.getScene()) + * capsaicin.getSbtStrideInEntries(kGfxShaderGroupType_Hit), + capsaicin.getSbtStrideInEntries(kGfxShaderGroupType_Callable)}; + GfxKernel sbt_kernels[] { + populate_screen_probes_kernel_, populate_cells_kernel_, trace_reflections_kernel_}; + sbt_ = gfxCreateSbt(gfx_, sbt_kernels, ARRAYSIZE(sbt_kernels), entry_count); + } + else + { + populate_screen_probes_kernel_ = gfxCreateComputeKernel(gfx_, gi10_program_, + "PopulateScreenProbesMain", debug_hash_cells_defines.data(), debug_hash_cells_define_count); + populate_cells_kernel_ = gfxCreateComputeKernel( + gfx_, gi10_program_, "PopulateCellsMain", resampling_defines.data(), resampling_define_count); + trace_reflections_kernel_ = gfxCreateComputeKernel(gfx_, gi10_program_, "TraceReflectionsMain"); + } blend_screen_probes_kernel_ = gfxCreateComputeKernel(gfx_, gi10_program_, "BlendScreenProbes"); reorder_screen_probes_kernel_ = gfxCreateComputeKernel(gfx_, gi10_program_, "ReorderScreenProbes"); filter_screen_probes_kernel_ = gfxCreateComputeKernel(gfx_, gi10_program_, "FilterScreenProbes"); @@ -940,11 +1383,21 @@ bool GI10::init(CapsaicinInternal const &capsaicin) noexcept purge_tiles_kernel_ = gfxCreateComputeKernel( gfx_, gi10_program_, "PurgeTiles", debug_hash_cells_defines.data(), debug_hash_cells_define_count); - populate_cells_kernel_ = gfxCreateComputeKernel( - gfx_, gi10_program_, "PopulateCells", resampling_defines.data(), resampling_define_count); update_tiles_kernel_ = gfxCreateComputeKernel(gfx_, gi10_program_, "UpdateTiles"); resolve_cells_kernel_ = gfxCreateComputeKernel(gfx_, gi10_program_, "ResolveCells"); + if (options_.gi10_hash_grid_cache_debug_stats) + { + clear_bucket_overflow_count_kernel_ = + gfxCreateComputeKernel(gfx_, gi10_program_, "ClearBucketOverflowCount"); + clear_bucket_occupancy_kernel_ = gfxCreateComputeKernel(gfx_, gi10_program_, "ClearBucketOccupancy"); + clear_bucket_overflow_kernel_ = gfxCreateComputeKernel(gfx_, gi10_program_, "ClearBucketOverflow"); + build_bucket_stats_kernel_ = gfxCreateComputeKernel(gfx_, gi10_program_, "BuildBucketStatistics"); + format_bucket_occupancy_kernel_ = + gfxCreateComputeKernel(gfx_, gi10_program_, "FormatBucketOccupancy"); + format_bucket_overflow_kernel_ = gfxCreateComputeKernel(gfx_, gi10_program_, "FormatBucketOverflow"); + } + clear_reservoirs_kernel_ = gfxCreateComputeKernel(gfx_, gi10_program_, "ClearReservoirs"); generate_reservoirs_kernel_ = gfxCreateComputeKernel( gfx_, gi10_program_, "GenerateReservoirs", resampling_defines.data(), resampling_define_count); @@ -952,6 +1405,21 @@ bool GI10::init(CapsaicinInternal const &capsaicin) noexcept resample_reservoirs_kernel_ = gfxCreateComputeKernel( gfx_, gi10_program_, "ResampleReservoirs", base_defines.data(), base_define_count); + mark_fireflies_kernel_ = gfxCreateComputeKernel(gfx_, gi10_program_, "MarkFireflies"); + cleanup_fireflies_kernel_ = gfxCreateComputeKernel(gfx_, gi10_program_, "CleanupFireflies"); + resolve_reflections_kernels_[0] = + gfxCreateComputeKernel(gfx_, gi10_program_, "ResolveReflections_SplitRatioEstimatorX"); + resolve_reflections_kernels_[1] = + gfxCreateComputeKernel(gfx_, gi10_program_, "ResolveReflections_SplitRatioEstimatorY"); + resolve_reflections_kernels_[2] = + gfxCreateComputeKernel(gfx_, gi10_program_, "ResolveReflections_AtrousRatioEstimator_First"); + resolve_reflections_kernels_[3] = + gfxCreateComputeKernel(gfx_, gi10_program_, "ResolveReflections_AtrousRatioEstimator_Iter"); + resolve_reflections_kernels_[4] = + gfxCreateComputeKernel(gfx_, gi10_program_, "ResolveReflections_AtrousRatioEstimator_Last"); + reproject_reflections_kernel_ = gfxCreateComputeKernel(gfx_, gi10_program_, "ReprojectReflections"); + no_denoiser_reflections_kernel_ = gfxCreateComputeKernel(gfx_, gi10_program_, "NoDenoiserReflections"); + reproject_gi_kernel_ = gfxCreateComputeKernel(gfx_, gi10_program_, "ReprojectGI"); filter_blur_mask_kernel_ = gfxCreateComputeKernel(gfx_, gi10_program_, "FilterBlurMask"); filter_gi_kernel_ = gfxCreateComputeKernel(gfx_, gi10_program_, "FilterGI"); @@ -960,6 +1428,7 @@ bool GI10::init(CapsaicinInternal const &capsaicin) noexcept screen_probes_.ensureMemoryIsAllocated(capsaicin); hash_grid_cache_.ensureMemoryIsAllocated(capsaicin, options_, debug_view_); world_space_restir_.ensureMemoryIsAllocated(capsaicin); + glossy_reflections_.ensureMemoryIsAllocated(capsaicin); gi_denoiser_.ensureMemoryIsAllocated(capsaicin); // Ensure our fullscreen render target is allocated @@ -967,39 +1436,43 @@ bool GI10::init(CapsaicinInternal const &capsaicin) noexcept gfxCreateTexture2D(gfx_, capsaicin.getWidth(), capsaicin.getHeight(), DXGI_FORMAT_R16G16B16A16_FLOAT); irradiance_buffer_.setName("Capsaicin_IrradianceBuffer"); + // Reserve position values with light bounds sampler + light_sampler->reserveBoundsValues(screen_probes_.max_ray_count, this); + return !!filter_gi_kernel_; } void GI10::render(CapsaicinInternal &capsaicin) noexcept { - auto const &render_settings = capsaicin.getRenderSettings(); - RenderOptions options = convertOptions(render_settings); - auto light_sampler = capsaicin.getComponent(); + RenderOptions options = convertOptions(capsaicin.getOptions()); + auto light_sampler = capsaicin.getComponent(); + auto brdf_lut = capsaicin.getComponent(); + auto prefilter_ibl = capsaicin.getComponent(); auto blue_noise_sampler = capsaicin.getComponent(); - auto stratified_sampler = capsaicin.getComponent(); - bool const needs_debug_view = render_settings.debug_view_ != debug_view_ - && ((debug_view_.starts_with("HashGridCache_") - && !render_settings.debug_view_.starts_with("HashGridCache_")) - || (!debug_view_.starts_with("HashGridCache_") - && render_settings.debug_view_.starts_with("HashGridCache_"))); - - bool const has_delta_lights = (GetDeltaLightCount() != 0); + auto const debug_view = capsaicin.getCurrentDebugView(); + bool const needs_debug_view = + debug_view != debug_view_ + && ((debug_view_.starts_with("HashGridCache_") && !debug_view.starts_with("HashGridCache_")) + || (!debug_view_.starts_with("HashGridCache_") && debug_view.starts_with("HashGridCache_"))); bool const needs_recompile = (options.gi10_use_resampling != options_.gi10_use_resampling || options.gi10_use_alpha_testing != options_.gi10_use_alpha_testing - || light_sampler->needsRecompile(capsaicin) || needs_debug_view - || has_delta_lights != has_delta_lights_); + || options.gi10_disable_specular_materials != options_.gi10_disable_specular_materials + || light_sampler->needsRecompile(capsaicin) || needs_debug_view) + || options_.gi10_use_dxr10 != options.gi10_use_dxr10 + || options_.gi10_hash_grid_cache_debug_stats != options.gi10_hash_grid_cache_debug_stats; bool const needs_hash_grid_clear = options_.gi10_hash_grid_cache_cell_size != options.gi10_hash_grid_cache_cell_size + || options_.gi10_hash_grid_cache_min_cell_size != options.gi10_hash_grid_cache_min_cell_size || options_.gi10_hash_grid_cache_debug_mip_level != options.gi10_hash_grid_cache_debug_mip_level - || options_.gi10_hash_grid_cache_debug_propagate != options.gi10_hash_grid_cache_debug_propagate; + || options_.gi10_hash_grid_cache_debug_propagate != options.gi10_hash_grid_cache_debug_propagate + || capsaicin.getFrameIndex() == 0; - options_ = options; - has_delta_lights_ = has_delta_lights; - debug_view_ = render_settings.debug_view_; + options_ = options; + debug_view_ = debug_view; if (needs_recompile) { @@ -1011,6 +1484,7 @@ void GI10::render(CapsaicinInternal &capsaicin) noexcept { gfxDestroyKernel(gfx_, debug_screen_probes_kernel_); gfxDestroyKernel(gfx_, debug_hash_grid_cells_kernel_); + gfxDestroyKernel(gfx_, debug_reflection_kernel_); GfxDrawState debug_screen_probes_draw_state; gfxDrawStateSetColorTarget(debug_screen_probes_draw_state, 0, capsaicin.getAOVBuffer("Debug")); @@ -1020,13 +1494,15 @@ void GI10::render(CapsaicinInternal &capsaicin) noexcept gfxDrawStateSetDepthStencilTarget(debug_hash_grid_cells_draw_state, depth_buffer_); gfxDrawStateSetCullMode(debug_hash_grid_cells_draw_state, D3D12_CULL_MODE_NONE); - GfxDrawState debug_material_draw_state; - gfxDrawStateSetColorTarget(debug_material_draw_state, 0, capsaicin.getAOVBuffer("Debug")); + GfxDrawState debug_reflection_draw_state; + gfxDrawStateSetColorTarget(debug_reflection_draw_state, 0, capsaicin.getAOVBuffer("Debug")); debug_screen_probes_kernel_ = gfxCreateGraphicsKernel(gfx_, gi10_program_, debug_screen_probes_draw_state, "DebugScreenProbes"); debug_hash_grid_cells_kernel_ = gfxCreateGraphicsKernel( gfx_, gi10_program_, debug_hash_grid_cells_draw_state, "DebugHashGridCells"); + debug_reflection_kernel_ = + gfxCreateGraphicsKernel(gfx_, gi10_program_, debug_reflection_draw_state, "DebugReflection"); } // Clear the hash-grid cache if user's changed the cell size @@ -1039,6 +1515,7 @@ void GI10::render(CapsaicinInternal &capsaicin) noexcept screen_probes_.ensureMemoryIsAllocated(capsaicin); hash_grid_cache_.ensureMemoryIsAllocated(capsaicin, options_, debug_view_); world_space_restir_.ensureMemoryIsAllocated(capsaicin); + glossy_reflections_.ensureMemoryIsAllocated(capsaicin); gi_denoiser_.ensureMemoryIsAllocated(capsaicin); // Reallocate fullscreen render target if required @@ -1052,24 +1529,29 @@ void GI10::render(CapsaicinInternal &capsaicin) noexcept irradiance_buffer_.setName("Capsaicin_IrradianceBuffer"); } - // Reserve position values with light bounds sampler - light_sampler->reserveBoundsValues(screen_probes_.max_ray_count, this); - // Allocate and populate our constant data GfxBuffer gi10_constants = capsaicin.allocateConstantBuffer(1); GfxBuffer screen_probes_constants = capsaicin.allocateConstantBuffer(1); GfxBuffer hash_grid_cache_constants = capsaicin.allocateConstantBuffer(1); GfxBuffer world_space_restir_constants = capsaicin.allocateConstantBuffer(1); + GfxBuffer glossy_reflections_constants = capsaicin.allocateConstantBuffer(1); + + GI10Constants gi_constant_data = {}; + auto const &camera = capsaicin.getCameraMatrices(capsaicin.getOption("taa_enable")); + gi_constant_data.view_proj = camera.view_projection; + gi_constant_data.view_proj_prev = camera.view_projection_prev; + gi_constant_data.view_proj_inv = camera.inv_view_projection; + gi_constant_data.view_proj_inv_prev = camera.inv_view_projection_prev; + gi_constant_data.reprojection = camera.reprojection; + if (options.gi10_use_dxr10) + { + gfxSbtGetGpuVirtualAddressRangeAndStride(gfx_, sbt_, + (D3D12_GPU_VIRTUAL_ADDRESS_RANGE *)&gi_constant_data.ray_generation_shader_record, + (D3D12_GPU_VIRTUAL_ADDRESS_RANGE_AND_STRIDE *)&gi_constant_data.miss_shader_table, + (D3D12_GPU_VIRTUAL_ADDRESS_RANGE_AND_STRIDE *)&gi_constant_data.hit_group_table, + (D3D12_GPU_VIRTUAL_ADDRESS_RANGE_AND_STRIDE *)&gi_constant_data.callable_shader_table); + } - GI10Constants gi_constant_data = {}; - auto const &camera = capsaicin.getCameraMatrices(render_settings.getOption("taa_enable")); - gi_constant_data.view_proj = camera.view_projection; - gi_constant_data.view_proj_prev = camera.view_projection_prev; - gi_constant_data.view_proj_inv = glm::inverse(glm::dmat4(camera.view_projection)); - gi_constant_data.view_proj_inv_prev = glm::inverse(glm::dmat4(camera.view_projection_prev)); - gi_constant_data.reprojection = - glm::dmat4(camera.view_projection_prev) * glm::inverse(glm::dmat4(camera.view_projection)); - gi_constant_data.view_inv = camera.inv_view; gfxBufferGetData(gfx_, gi10_constants)[0] = gi_constant_data; ScreenProbesConstants screen_probes_constant_data = {}; @@ -1088,11 +1570,13 @@ void GI10::render(CapsaicinInternal &capsaicin) noexcept } gfxBufferGetData(gfx_, screen_probes_constants)[0] = screen_probes_constant_data; - float cell_size = tanf(capsaicin.getCamera().fovY * options_.gi10_hash_grid_cache_cell_size - * GFX_MAX(1.0f / capsaicin.getHeight(), - (float)capsaicin.getHeight() / (capsaicin.getWidth() * capsaicin.getWidth()))); + uint32_t const frame_index = capsaicin.getFrameIndex(); + float cell_size = tanf(capsaicin.getCamera().fovY * options_.gi10_hash_grid_cache_cell_size + * GFX_MAX(1.0f / capsaicin.getHeight(), + (float)capsaicin.getHeight() / (capsaicin.getWidth() * capsaicin.getWidth()))); HashGridCacheConstants hash_grid_cache_constant_data = {}; hash_grid_cache_constant_data.cell_size = cell_size; + hash_grid_cache_constant_data.min_cell_size = options_.gi10_hash_grid_cache_min_cell_size; hash_grid_cache_constant_data.tile_size = cell_size * options_.gi10_hash_grid_cache_tile_cell_ratio; hash_grid_cache_constant_data.tile_cell_ratio = (float)options_.gi10_hash_grid_cache_tile_cell_ratio; hash_grid_cache_constant_data.num_buckets = hash_grid_cache_.num_buckets_; @@ -1117,7 +1601,12 @@ void GI10::render(CapsaicinInternal &capsaicin) noexcept hash_grid_cache_constant_data.debug_mip_level = options_.gi10_hash_grid_cache_debug_mip_level; hash_grid_cache_constant_data.debug_propagate = (uint)options_.gi10_hash_grid_cache_debug_propagate; hash_grid_cache_constant_data.debug_max_cell_decay = options_.gi10_hash_grid_cache_debug_max_cell_decay; - hash_grid_cache_constant_data.debug_mode = HASHGRIDCACHE_DEBUG_RADIANCE; + hash_grid_cache_constant_data.debug_bucket_occupancy_histogram_size = + hash_grid_cache_.debug_bucket_occupancy_histogram_size_; + hash_grid_cache_constant_data.debug_bucket_overflow_histogram_size = + hash_grid_cache_.debug_bucket_overflow_histogram_size_; + options_.gi10_hash_grid_cache_debug_max_bucket_overflow; + hash_grid_cache_constant_data.debug_mode = HASHGRIDCACHE_DEBUG_RADIANCE; if (debug_view_ == "HashGridCache_RadianceSampleCount") { hash_grid_cache_constant_data.debug_mode = HASHGRIDCACHE_DEBUG_RADIANCE_SAMPLE_COUNT; @@ -1146,12 +1635,11 @@ void GI10::render(CapsaicinInternal &capsaicin) noexcept { options_.gi10_hash_grid_cache_debug_mip_level = 0; // Filtering is adaptive... } - if (debug_view_.starts_with("HashGridCache_Filtered") - || render_settings.debug_view_ == "HashGridCache_Occupancy") + if (debug_view_.starts_with("HashGridCache_Filtered") || debug_view_ == "HashGridCache_Occupancy") { options_.gi10_hash_grid_cache_debug_propagate = false; } - else if (render_settings.getOption("gi10_hash_grid_cache_debug_mip_level") > 0) + else if (capsaicin.getOption("gi10_hash_grid_cache_debug_mip_level") > 0) { options_.gi10_hash_grid_cache_debug_propagate = true; } @@ -1169,32 +1657,44 @@ void GI10::render(CapsaicinInternal &capsaicin) noexcept gfxBufferGetData(gfx_, world_space_restir_constants)[0] = world_space_restir_constant_data; + GlossyReflectionsConstants glossy_reflections_constant_data = {}; + glossy_reflections_constant_data.full_res = glm::uvec2(capsaicin.getWidth(), capsaicin.getHeight()); + glossy_reflections_constant_data.full_radius = options.gi10_glossy_reflections_full_radius; + glossy_reflections_constant_data.half_radius = options.gi10_glossy_reflections_half_radius; + glossy_reflections_constant_data.mark_fireflies_half_radius = + options.gi10_glossy_reflections_mark_fireflies_half_radius; + glossy_reflections_constant_data.mark_fireflies_full_radius = + options.gi10_glossy_reflections_mark_fireflies_full_radius; + glossy_reflections_constant_data.mark_fireflies_half_low_threshold = + options.gi10_glossy_reflections_mark_fireflies_half_low_threshold; + glossy_reflections_constant_data.mark_fireflies_full_low_threshold = + options.gi10_glossy_reflections_mark_fireflies_full_low_threshold; + glossy_reflections_constant_data.mark_fireflies_half_high_threshold = + options.gi10_glossy_reflections_mark_fireflies_half_high_threshold; + glossy_reflections_constant_data.mark_fireflies_full_high_threshold = + options.gi10_glossy_reflections_mark_fireflies_full_high_threshold; + glossy_reflections_constant_data.cleanup_fireflies_half_radius = + options.gi10_glossy_reflections_cleanup_fireflies_half_radius; + glossy_reflections_constant_data.cleanup_fireflies_full_radius = + options.gi10_glossy_reflections_cleanup_fireflies_full_radius; + glossy_reflections_constant_data.low_roughness_threshold = + options.gi10_glossy_reflections_low_roughness_threshold; + glossy_reflections_constant_data.high_roughness_threshold = + options.gi10_glossy_reflections_high_roughness_threshold; + glossy_reflections_constant_data.half_res = options.gi10_glossy_reflections_halfres; + gfxBufferGetData(gfx_, glossy_reflections_constants)[0] = + glossy_reflections_constant_data; + // Bind the shader parameters uint32_t const buffer_dimensions[] = {capsaicin.getWidth(), capsaicin.getHeight()}; float const near_far[] = {capsaicin.getCamera().nearZ, capsaicin.getCamera().farZ}; - GfxBuffer transform_buffer = capsaicin.getTransformBuffer(); - - transform_buffer.setStride(sizeof(float4)); // this is to align with UE5 where transforms are stored as - // 4x3 matrices and fetched using 3x float4 reads - - // Some NVIDIA-specific fix - vertex_buffers_.resize(capsaicin.getVertexBufferCount()); - - for (uint32_t i = 0; i < capsaicin.getVertexBufferCount(); ++i) - { - vertex_buffers_[i] = capsaicin.getVertexBuffers()[i]; - - if (gfx_.getVendorId() == 0x10DEu) // NVIDIA - { - vertex_buffers_[i].setStride(4); - } - } - + gfxProgramSetParameter(gfx_, gi10_program_, "g_Exposure", + capsaicin.hasOption("tonemap_exposure") ? capsaicin.getOption("tonemap_exposure") : 0.0f); gfxProgramSetParameter(gfx_, gi10_program_, "g_Eye", capsaicin.getCamera().eye); gfxProgramSetParameter(gfx_, gi10_program_, "g_NearFar", near_far); - gfxProgramSetParameter(gfx_, gi10_program_, "g_FrameIndex", capsaicin.getFrameIndex()); + gfxProgramSetParameter(gfx_, gi10_program_, "g_FrameIndex", frame_index); gfxProgramSetParameter(gfx_, gi10_program_, "g_InvDeviceZ", capsaicin.getInvDeviceZ()); gfxProgramSetParameter(gfx_, gi10_program_, "g_PreviousEye", previous_camera_.eye); gfxProgramSetParameter(gfx_, gi10_program_, "g_BufferDimensions", buffer_dimensions); @@ -1205,9 +1705,12 @@ void GI10::render(CapsaicinInternal &capsaicin) noexcept gfx_, gi10_program_, "g_DisableAlbedoTextures", options_.gi10_disable_albedo_textures ? 1 : 0); gfxProgramSetParameter(gfx_, gi10_program_, "g_DepthBuffer", capsaicin.getAOVBuffer("VisibilityDepth")); - gfxProgramSetParameter(gfx_, gi10_program_, "g_NormalBuffer", capsaicin.getAOVBuffer("Normal")); - gfxProgramSetParameter(gfx_, gi10_program_, "g_DetailsBuffer", capsaicin.getAOVBuffer("Details")); + gfxProgramSetParameter( + gfx_, gi10_program_, "g_GeometryNormalBuffer", capsaicin.getAOVBuffer("GeometryNormal")); + gfxProgramSetParameter( + gfx_, gi10_program_, "g_ShadingNormalBuffer", capsaicin.getAOVBuffer("ShadingNormal")); gfxProgramSetParameter(gfx_, gi10_program_, "g_VelocityBuffer", capsaicin.getAOVBuffer("Velocity")); + gfxProgramSetParameter(gfx_, gi10_program_, "g_RoughnessBuffer", capsaicin.getAOVBuffer("Roughness")); gfxProgramSetParameter(gfx_, gi10_program_, "g_OcclusionAndBentNormalBuffer", capsaicin.getAOVBuffer("OcclusionAndBentNormal")); gfxProgramSetParameter(gfx_, gi10_program_, "g_NearFieldGlobalIlluminationBuffer", @@ -1216,27 +1719,33 @@ void GI10::render(CapsaicinInternal &capsaicin) noexcept gfxProgramSetParameter( gfx_, gi10_program_, "g_PreviousDepthBuffer", capsaicin.getAOVBuffer("PrevVisibilityDepth")); gfxProgramSetParameter( - gfx_, gi10_program_, "g_PreviousNormalBuffer", capsaicin.getAOVBuffer("PrevNormal")); + gfx_, gi10_program_, "g_PreviousNormalBuffer", capsaicin.getAOVBuffer("PrevGeometryNormal")); gfxProgramSetParameter( - gfx_, gi10_program_, "g_PreviousDetailsBuffer", capsaicin.getAOVBuffer("PrevDetails")); - - stratified_sampler->addProgramParameters(capsaicin, gi10_program_); + gfx_, gi10_program_, "g_PreviousDetailsBuffer", capsaicin.getAOVBuffer("PrevShadingNormal")); + gfxProgramSetParameter( + gfx_, gi10_program_, "g_PreviousRoughnessBuffer", capsaicin.getAOVBuffer("PrevRoughness")); blue_noise_sampler->addProgramParameters(capsaicin, gi10_program_); - gfxProgramSetParameter( - gfx_, gi10_program_, "g_IndexBuffers", capsaicin.getIndexBuffers(), capsaicin.getIndexBufferCount()); - gfxProgramSetParameter( - gfx_, gi10_program_, "g_VertexBuffers", vertex_buffers_.data(), capsaicin.getVertexBufferCount()); + brdf_lut->addProgramParameters(capsaicin, gi10_program_); + + gfxProgramSetParameter(gfx_, gi10_program_, "g_IndexBuffer", capsaicin.getIndexBuffer()); + gfxProgramSetParameter(gfx_, gi10_program_, "g_VertexBuffer", capsaicin.getVertexBuffer()); gfxProgramSetParameter(gfx_, gi10_program_, "g_MeshBuffer", capsaicin.getMeshBuffer()); gfxProgramSetParameter(gfx_, gi10_program_, "g_InstanceBuffer", capsaicin.getInstanceBuffer()); gfxProgramSetParameter(gfx_, gi10_program_, "g_MaterialBuffer", capsaicin.getMaterialBuffer()); - gfxProgramSetParameter(gfx_, gi10_program_, "g_TransformBuffer", transform_buffer); + gfxProgramSetParameter(gfx_, gi10_program_, "g_TransformBuffer", capsaicin.getTransformBuffer()); gfxProgramSetParameter(gfx_, gi10_program_, "g_IrradianceBuffer", irradiance_buffer_); + gfxProgramSetParameter(gfx_, gi10_program_, "g_ReflectionBuffer", capsaicin.getAOVBuffer("Reflection")); + gfxProgramSetParameter( + gfx_, gi10_program_, "g_PreviousReflectionBuffer", capsaicin.getAOVBuffer("PrevReflection")); + gfxProgramSetParameter(gfx_, gi10_program_, "g_DrawCommandBuffer", draw_command_buffer_); gfxProgramSetParameter(gfx_, gi10_program_, "g_DispatchCommandBuffer", dispatch_command_buffer_); + if (options_.gi10_use_dxr10) + gfxProgramSetParameter(gfx_, gi10_program_, "g_DispatchRaysCommandBuffer", dispatch_command_buffer_); gfxProgramSetParameter( gfx_, gi10_program_, "g_GlobalIlluminationBuffer", capsaicin.getAOVBuffer("GlobalIllumination")); gfxProgramSetParameter(gfx_, gi10_program_, "g_PrevCombinedIlluminationBuffer", @@ -1250,6 +1759,8 @@ void GI10::render(CapsaicinInternal &capsaicin) noexcept gfxProgramSetParameter(gfx_, gi10_program_, "g_EnvironmentBuffer", capsaicin.getEnvironmentBuffer()); + prefilter_ibl->addProgramParameters(capsaicin, gi10_program_); + gfxProgramSetParameter( gfx_, gi10_program_, "g_TextureMaps", capsaicin.getTextures(), capsaicin.getTextureCount()); @@ -1261,6 +1772,7 @@ void GI10::render(CapsaicinInternal &capsaicin) noexcept gfxProgramSetParameter(gfx_, gi10_program_, "g_ScreenProbesConstants", screen_probes_constants); gfxProgramSetParameter(gfx_, gi10_program_, "g_HashGridCacheConstants", hash_grid_cache_constants); gfxProgramSetParameter(gfx_, gi10_program_, "g_WorldSpaceReSTIRConstants", world_space_restir_constants); + gfxProgramSetParameter(gfx_, gi10_program_, "g_GlossyReflectionsConstants", glossy_reflections_constants); // Bind the screen probes shader parameters gfxProgramSetParameter(gfx_, gi10_program_, "g_ScreenProbes_ProbeMask", @@ -1334,6 +1846,9 @@ void GI10::render(CapsaicinInternal &capsaicin) noexcept screen_probes_.probe_cached_tile_lru_buffers_[1 - screen_probes_.probe_buffer_index_]); // Bind the hash-grid cache shader parameters + gfxProgramSetParameter(gfx_, gi10_program_, "g_HashGridCache_BuffersFloat", + hash_grid_cache_.radiance_cache_hash_buffer_float_, + ARRAYSIZE(hash_grid_cache_.radiance_cache_hash_buffer_float_)); gfxProgramSetParameter(gfx_, gi10_program_, "g_HashGridCache_BuffersUint", hash_grid_cache_.radiance_cache_hash_buffer_uint_, ARRAYSIZE(hash_grid_cache_.radiance_cache_hash_buffer_uint_)); @@ -1391,6 +1906,17 @@ void GI10::render(CapsaicinInternal &capsaicin) noexcept world_space_restir_.reservoir_indirect_sample_reservoir_buffers_ [1 - world_space_restir_.reservoir_indirect_sample_buffer_index_]); + // Bind the glossy reflections shader parameters + gfxProgramSetParameter(gfx_, gi10_program_, "g_GlossyReflections_TextureFloat", + glossy_reflections_.texture_float_, ARRAYSIZE(glossy_reflections_.texture_float_)); + gfxProgramSetParameter(gfx_, gi10_program_, "g_GlossyReflections_TextureFloat4", + glossy_reflections_.texture_float4_, ARRAYSIZE(glossy_reflections_.texture_float4_)); + + gfxProgramSetParameter( + gfx_, gi10_program_, "g_GlossyReflections_RtSampleBuffer", glossy_reflections_.rt_sample_buffer_); + gfxProgramSetParameter(gfx_, gi10_program_, "g_GlossyReflections_RtSampleCountBuffer", + glossy_reflections_.rt_sample_count_buffer_); + // Bind the GI denoiser shader parameters gfxProgramSetParameter(gfx_, gi10_program_, "g_GIDenoiser_BlurMask", gi_denoiser_.blur_masks_[0]); gfxProgramSetParameter(gfx_, gi10_program_, "g_GIDenoiser_BlurredBlurMask", gi_denoiser_.blur_masks_[1]); @@ -1406,6 +1932,18 @@ void GI10::render(CapsaicinInternal &capsaicin) noexcept gfxProgramSetParameter( gfx_, gi10_program_, "g_GIDenoiser_BlurSampleCountBuffer", gi_denoiser_.blur_sample_count_buffer_); + // Clear bucket overflow count + if (options_.gi10_hash_grid_cache_debug_stats) + { + TimedSection const timed_section(*this, "ClearBucketOverflowCount"); + + uint32_t const *num_threads = gfxKernelGetNumThreads(gfx_, clear_bucket_overflow_count_kernel_); + uint32_t const num_groups_x = (hash_grid_cache_.num_buckets_ + num_threads[0] - 1) / num_threads[0]; + + gfxCommandBindKernel(gfx_, clear_bucket_overflow_count_kernel_); + gfxCommandDispatch(gfx_, num_groups_x, 1, 1); + } + // Purge the unused tiles (square or cube of cells) within our hash-grid cache { TimedSection const timed_section(*this, "PurgeRadianceCache"); @@ -1502,6 +2040,25 @@ void GI10::render(CapsaicinInternal &capsaicin) noexcept } // Now we go and populate the newly spawned probes + if (options.gi10_use_dxr10) + { + TimedSection const timed_section(*this, "PopulateScreenProbes"); + + gfxSbtSetShaderGroup( + gfx_, sbt_, kGfxShaderGroupType_Raygen, 0, kPopulateScreenProbesRaygenShaderName); + gfxSbtSetShaderGroup(gfx_, sbt_, kGfxShaderGroupType_Miss, 0, kPopulateScreenProbesMissShaderName); + for (uint32_t i = 0; i < gfxAccelerationStructureGetRaytracingPrimitiveCount( + gfx_, capsaicin.getAccelerationStructure()); + i++) + { + gfxSbtSetShaderGroup(gfx_, sbt_, kGfxShaderGroupType_Hit, + i * capsaicin.getSbtStrideInEntries(kGfxShaderGroupType_Hit), + kPopulateScreenProbesHitGroupName); + } + gfxCommandBindKernel(gfx_, populate_screen_probes_kernel_); + gfxCommandDispatchRays(gfx_, sbt_, screen_probes_.max_ray_count, 1, 1); + } + else { TimedSection const timed_section(*this, "PopulateScreenProbes"); @@ -1514,7 +2071,7 @@ void GI10::render(CapsaicinInternal &capsaicin) noexcept // Update light sampling data structure { - light_sampler->update(capsaicin, *this); + light_sampler->update(capsaicin, this); } // Clear our cache prior to generating new reservoirs @@ -1571,6 +2128,26 @@ void GI10::render(CapsaicinInternal &capsaicin) noexcept } // Populate the cells of our world-space hash-grid radiance cache + if (options.gi10_use_dxr10) + { + TimedSection const timed_section(*this, "PopulateRadianceCache"); + + gfxSbtSetShaderGroup(gfx_, sbt_, kGfxShaderGroupType_Raygen, 0, kPopulateCellsRaygenShaderName); + gfxSbtSetShaderGroup(gfx_, sbt_, kGfxShaderGroupType_Miss, 0, kPopulateCellsMissShaderName); + for (uint32_t i = 0; i < gfxAccelerationStructureGetRaytracingPrimitiveCount( + gfx_, capsaicin.getAccelerationStructure()); + i++) + { + gfxSbtSetShaderGroup(gfx_, sbt_, kGfxShaderGroupType_Hit, + i * capsaicin.getSbtStrideInEntries(kGfxShaderGroupType_Hit), kPopulateCellsHitGroupName); + } + + generateDispatchRays(hash_grid_cache_.radiance_cache_visibility_ray_count_buffer_); + + gfxCommandBindKernel(gfx_, populate_cells_kernel_); + gfxCommandDispatchRaysIndirect(gfx_, sbt_, dispatch_command_buffer_); + } + else { TimedSection const timed_section(*this, "PopulateRadianceCache"); @@ -1711,6 +2288,247 @@ void GI10::render(CapsaicinInternal &capsaicin) noexcept gfxCommandDispatch(gfx_, num_groups_x, num_groups_y, 1); } + // Ray traced reflections for surface with roughness under gi10_glossy_reflections_low_roughness_threshold + if (options_.gi10_disable_specular_materials) + { + gfxCommandClearTexture(gfx_, capsaicin.getAOVBuffer("Reflection")); + } + else + { + TimedSection const timed_section(*this, "TraceReflections"); + + gfxProgramSetParameter(gfx_, gi10_program_, "g_TextureSampler", capsaicin.getAnisotropicSampler()); + + if (options.gi10_use_dxr10) + { + gfxSbtSetShaderGroup( + gfx_, sbt_, kGfxShaderGroupType_Raygen, 0, kTraceReflectionsRaygenShaderName); + gfxSbtSetShaderGroup(gfx_, sbt_, kGfxShaderGroupType_Miss, 0, kTraceReflectionsMissShaderName); + for (uint32_t i = 0; i < gfxAccelerationStructureGetRaytracingPrimitiveCount( + gfx_, capsaicin.getAccelerationStructure()); + i++) + { + gfxSbtSetShaderGroup(gfx_, sbt_, kGfxShaderGroupType_Hit, + i * capsaicin.getSbtStrideInEntries(kGfxShaderGroupType_Hit), + kTraceReflectionsHitGroupName); + } + + generateDispatchRays(glossy_reflections_.rt_sample_count_buffer_); + + gfxCommandBindKernel(gfx_, trace_reflections_kernel_); + gfxCommandDispatchRaysIndirect(gfx_, sbt_, dispatch_command_buffer_); + } + else + { + uint32_t const *num_threads = gfxKernelGetNumThreads(gfx_, trace_reflections_kernel_); + generateDispatch(glossy_reflections_.rt_sample_count_buffer_, num_threads[0]); + + gfxCommandBindKernel(gfx_, trace_reflections_kernel_); + gfxCommandDispatchIndirect(gfx_, dispatch_command_buffer_); + } + + gfxProgramSetParameter(gfx_, gi10_program_, "g_TextureSampler", capsaicin.getLinearSampler()); + } + + // Mark fireflies + if (!options_.gi10_disable_specular_materials && options_.gi10_glossy_reflections_cleanup_fireflies) + { + TimedSection const timed_section(*this, "MarkFireflies"); + + uint32_t const spec_buffer_dimensions[] = {glossy_reflections_.specular_buffer_.getWidth(), + glossy_reflections_.specular_buffer_.getHeight()}; + + uint32_t const *num_threads = gfxKernelGetNumThreads(gfx_, mark_fireflies_kernel_); + uint32_t const num_groups_x = (spec_buffer_dimensions[0] + num_threads[0] - 1) / num_threads[0]; + uint32_t const num_groups_y = (spec_buffer_dimensions[1] + num_threads[1] - 1) / num_threads[1]; + + gfxCommandBindKernel(gfx_, mark_fireflies_kernel_); + gfxCommandDispatch(gfx_, num_groups_x, num_groups_y, 1); + } + + // Cleanup fireflies from reflections + if (!options_.gi10_disable_specular_materials && options_.gi10_glossy_reflections_cleanup_fireflies) + { + TimedSection const timed_section(*this, "CleanupFireflies"); + + uint32_t const gloss_buffer_dimensions[] = {glossy_reflections_.specular_buffer_.getWidth(), + glossy_reflections_.specular_buffer_.getHeight()}; + + uint32_t const *num_threads = gfxKernelGetNumThreads(gfx_, cleanup_fireflies_kernel_); + uint32_t const num_groups_x = (gloss_buffer_dimensions[0] + num_threads[0] - 1) / num_threads[0]; + uint32_t const num_groups_y = (gloss_buffer_dimensions[1] + num_threads[1] - 1) / num_threads[1]; + + gfxCommandBindKernel(gfx_, cleanup_fireflies_kernel_); + gfxCommandDispatch(gfx_, num_groups_x, num_groups_y, 1); + } + + // Resolve the reflections using the BRDF-based ratio estimator + if (!options_.gi10_disable_specular_materials && options.gi10_glossy_reflections_denoiser_mode == 0) + { + // X + { + TimedSection const timed_section(*this, "ResolveReflections Split X"); + + GfxKernel resolve_reflections_kernel_x = resolve_reflections_kernels_[0]; + + uint32_t const refl_buffer_dimensions[] = {glossy_reflections_.reflections_buffer0_.getWidth(), + glossy_reflections_.reflections_buffer0_.getHeight()}; + + uint32_t const *num_threads = gfxKernelGetNumThreads(gfx_, resolve_reflections_kernel_x); + uint32_t const num_groups_x = (refl_buffer_dimensions[0] + num_threads[0] - 1) / num_threads[0]; + uint32_t const num_groups_y = (refl_buffer_dimensions[1] + num_threads[1] - 1) / num_threads[1]; + + gfxCommandBindKernel(gfx_, resolve_reflections_kernel_x); + gfxCommandDispatch(gfx_, num_groups_x, num_groups_y, 1); + } + + // Y + { + TimedSection const timed_section(*this, "ResolveReflections Split Y"); + + GfxKernel resolve_reflections_kernel_y = resolve_reflections_kernels_[1]; + + uint32_t const *num_threads = gfxKernelGetNumThreads(gfx_, resolve_reflections_kernel_y); + uint32_t const num_groups_x = (buffer_dimensions[0] + num_threads[0] - 1) / num_threads[0]; + uint32_t const num_groups_y = (buffer_dimensions[1] + num_threads[1] - 1) / num_threads[1]; + + gfxCommandBindKernel(gfx_, resolve_reflections_kernel_y); + gfxCommandDispatch(gfx_, num_groups_x, num_groups_y, 1); + } + } + else if (!options_.gi10_disable_specular_materials && options.gi10_glossy_reflections_denoiser_mode == 1) + { + std::string_view section_names[] = {"ResolveReflections Atrous 1", + "ResolveReflections Atrous 2", "ResolveReflections Atrous 3", "ResolveReflections Atrous 4", + "ResolveReflections Atrous 5", "ResolveReflections Atrous 6", "ResolveReflections Atrous 7"}; + GFX_ASSERT(options.gi10_glossy_reflections_atrous_pass_count - 1 <= ARRAYSIZE(section_names)); + + GfxKernel atrous_kernels[] = {resolve_reflections_kernels_[2], resolve_reflections_kernels_[3], + resolve_reflections_kernels_[4]}; + + // First + { + TimedSection const timed_section(*this, "ResolveReflections Atrous First"); + + GfxBuffer glossy_reflections_atrous_constants = + capsaicin.allocateConstantBuffer(1); + GlossyReflectionsAtrousConstants glossy_reflections_atrous_constant_data; + glossy_reflections_atrous_constant_data.ping_pong = 0; + glossy_reflections_atrous_constant_data.full_step = 1; + glossy_reflections_atrous_constant_data.pass_index = 0; + gfxBufferGetData(gfx_, glossy_reflections_atrous_constants)[0] = + glossy_reflections_atrous_constant_data; + gfxProgramSetParameter(gfx_, gi10_program_, "g_GlossyReflectionsAtrousConstants", + glossy_reflections_atrous_constants); + + uint32_t const gloss_buffer_dimensions[] = {glossy_reflections_.reflections_buffer0_.getWidth(), + glossy_reflections_.reflections_buffer0_.getHeight()}; + + uint32_t const *num_threads = gfxKernelGetNumThreads(gfx_, atrous_kernels[0]); + uint32_t const num_groups_x = (gloss_buffer_dimensions[0] + num_threads[0] - 1) / num_threads[0]; + uint32_t const num_groups_y = (gloss_buffer_dimensions[1] + num_threads[1] - 1) / num_threads[1]; + + gfxCommandBindKernel(gfx_, atrous_kernels[0]); + gfxCommandDispatch(gfx_, num_groups_x, num_groups_y, 1); + + gfxDestroyBuffer(gfx_, glossy_reflections_atrous_constants); + } + + for (int pass_index = 1; pass_index < options.gi10_glossy_reflections_atrous_pass_count - 1; + ++pass_index) + { + // Iter + { + TimedSection const timed_section(*this, section_names[pass_index - 1]); + + GfxBuffer glossy_reflections_atrous_constants = + capsaicin.allocateConstantBuffer(1); + GlossyReflectionsAtrousConstants glossy_reflections_atrous_constant_data; + glossy_reflections_atrous_constant_data.ping_pong = (pass_index + 1) % 2; + glossy_reflections_atrous_constant_data.full_step = 1 << pass_index; + glossy_reflections_atrous_constant_data.pass_index = pass_index; + gfxBufferGetData( + gfx_, glossy_reflections_atrous_constants)[0] = glossy_reflections_atrous_constant_data; + gfxProgramSetParameter(gfx_, gi10_program_, "g_GlossyReflectionsAtrousConstants", + glossy_reflections_atrous_constants); + + uint32_t const refl_buffer_dimensions[] = { + glossy_reflections_.reflections_buffer0_.getWidth(), + glossy_reflections_.reflections_buffer0_.getHeight()}; + + uint32_t const *num_threads = gfxKernelGetNumThreads(gfx_, atrous_kernels[1]); + uint32_t const num_groups_x = + (refl_buffer_dimensions[0] + num_threads[0] - 1) / num_threads[0]; + uint32_t const num_groups_y = + (refl_buffer_dimensions[1] + num_threads[1] - 1) / num_threads[1]; + + gfxCommandBindKernel(gfx_, atrous_kernels[1]); + gfxCommandDispatch(gfx_, num_groups_x, num_groups_y, 1); + + gfxDestroyBuffer(gfx_, glossy_reflections_atrous_constants); + } + } + + // Last + { + TimedSection const timed_section(*this, "ResolveReflections Atrous Last"); + + GfxBuffer glossy_reflections_atrous_constants = + capsaicin.allocateConstantBuffer(1); + GlossyReflectionsAtrousConstants glossy_reflections_atrous_constant_data; + int pass_index = options.gi10_glossy_reflections_atrous_pass_count - 1; + glossy_reflections_atrous_constant_data.ping_pong = (pass_index + 0) % 2; + glossy_reflections_atrous_constant_data.full_step = 1 << pass_index; + glossy_reflections_atrous_constant_data.pass_index = pass_index; + gfxBufferGetData(gfx_, glossy_reflections_atrous_constants)[0] = + glossy_reflections_atrous_constant_data; + gfxProgramSetParameter(gfx_, gi10_program_, "g_GlossyReflectionsAtrousConstants", + glossy_reflections_atrous_constants); + + uint32_t const gloss_buffer_dimensions[] = {glossy_reflections_.reflections_buffer_.getWidth(), + glossy_reflections_.reflections_buffer_.getHeight()}; + + uint32_t const *num_threads = gfxKernelGetNumThreads(gfx_, atrous_kernels[2]); + uint32_t const num_groups_x = (gloss_buffer_dimensions[0] + num_threads[0] - 1) / num_threads[0]; + uint32_t const num_groups_y = (gloss_buffer_dimensions[1] + num_threads[1] - 1) / num_threads[1]; + + gfxCommandBindKernel(gfx_, atrous_kernels[2]); + gfxCommandDispatch(gfx_, num_groups_x, num_groups_y, 1); + + gfxDestroyBuffer(gfx_, glossy_reflections_atrous_constants); + } + + // Unset deleted buffer + GfxBuffer invalid_buffer {}; + gfxProgramSetParameter(gfx_, gi10_program_, "g_GlossyReflectionsAtrousConstants", invalid_buffer); + } + + // Reproject the previous frame's reflections + if (!options_.gi10_disable_specular_materials && options.gi10_glossy_reflections_denoiser_mode < 2) + { + TimedSection const timed_section(*this, "ReprojectReflections"); + + uint32_t const *num_threads = gfxKernelGetNumThreads(gfx_, reproject_reflections_kernel_); + uint32_t const num_groups_x = (buffer_dimensions[0] + num_threads[0] - 1) / num_threads[0]; + uint32_t const num_groups_y = (buffer_dimensions[1] + num_threads[1] - 1) / num_threads[1]; + + gfxCommandBindKernel(gfx_, reproject_reflections_kernel_); + gfxCommandDispatch(gfx_, num_groups_x, num_groups_y, 1); + } + + // No denoiser + if (!options_.gi10_disable_specular_materials && options.gi10_glossy_reflections_denoiser_mode == 2) + { + TimedSection const timed_section(*this, "NoDenoiserReflections"); + + uint32_t const *num_threads = gfxKernelGetNumThreads(gfx_, no_denoiser_reflections_kernel_); + uint32_t const num_groups_x = (buffer_dimensions[0] + num_threads[0] - 1) / num_threads[0]; + uint32_t const num_groups_y = (buffer_dimensions[1] + num_threads[1] - 1) / num_threads[1]; + + gfxCommandBindKernel(gfx_, no_denoiser_reflections_kernel_); + gfxCommandDispatch(gfx_, num_groups_x, num_groups_y, 1); + } + // Reproject the previous frame's global illumination { TimedSection const timed_section(*this, "ReprojectGI"); @@ -1782,6 +2600,124 @@ void GI10::render(CapsaicinInternal &capsaicin) noexcept gfxCommandDraw(gfx_, 3); } + // Debug hash grid cache bucket occupancy (an histogram) + + if (options_.gi10_hash_grid_cache_debug_stats) + { + auto &free_bucket_count = hash_grid_cache_.debug_free_bucket_count_; + auto &used_bucket_count = hash_grid_cache_.debug_used_bucket_count_; + auto &bucket_occupancy_histogram = hash_grid_cache_.debug_bucket_occupancy_histogram_; + auto &bucket_overflow_histogram = hash_grid_cache_.debug_bucket_overflow_histogram_; + auto total_memory_size_in_bytes = hash_grid_cache_.debug_total_memory_size_in_bytes_; + + // Build histogram + { + TimedSection const timed_section(*this, "ClearBucketOccupancy"); + + uint32_t const *num_threads = gfxKernelGetNumThreads(gfx_, clear_bucket_occupancy_kernel_); + uint32_t const num_groups_x = + (hash_grid_cache_.debug_bucket_occupancy_histogram_size_ + num_threads[0] - 1) + / num_threads[0]; + + gfxCommandBindKernel(gfx_, clear_bucket_occupancy_kernel_); + gfxCommandDispatch(gfx_, num_groups_x, 1, 1); + } + { + TimedSection const timed_section(*this, "ClearBucketOverflow"); + + uint32_t const *num_threads = gfxKernelGetNumThreads(gfx_, clear_bucket_overflow_kernel_); + uint32_t const num_groups_x = + (hash_grid_cache_.debug_bucket_overflow_histogram_size_ + num_threads[0] - 1) + / num_threads[0]; + + gfxCommandBindKernel(gfx_, clear_bucket_overflow_kernel_); + gfxCommandDispatch(gfx_, num_groups_x, 1, 1); + } + { + TimedSection const timed_section(*this, "BuildBucketStats"); + + uint32_t const *num_threads = gfxKernelGetNumThreads(gfx_, build_bucket_stats_kernel_); + uint32_t const num_groups_x = + (hash_grid_cache_.num_buckets_ + num_threads[0] - 1) / num_threads[0]; + + gfxCommandBindKernel(gfx_, build_bucket_stats_kernel_); + gfxCommandDispatch(gfx_, num_groups_x, 1, 1); + } + { + TimedSection const timed_section(*this, "FormatBucketOccupancy"); + + uint32_t const *num_threads = gfxKernelGetNumThreads(gfx_, format_bucket_occupancy_kernel_); + uint32_t const num_groups_x = + (hash_grid_cache_.debug_bucket_occupancy_histogram_size_ + num_threads[0] - 1) + / num_threads[0]; + + gfxCommandBindKernel(gfx_, format_bucket_occupancy_kernel_); + gfxCommandDispatch(gfx_, num_groups_x, 1, 1); + } + { + TimedSection const timed_section(*this, "FormatBucketOverflow"); + + uint32_t const *num_threads = gfxKernelGetNumThreads(gfx_, format_bucket_overflow_kernel_); + uint32_t const num_groups_x = + (hash_grid_cache_.debug_bucket_overflow_histogram_size_ + num_threads[0] - 1) + / num_threads[0]; + + gfxCommandBindKernel(gfx_, format_bucket_overflow_kernel_); + gfxCommandDispatch(gfx_, num_groups_x, 1, 1); + } + + // Copy stats buffer for delayed readback + { + TimedSection const timed_section(*this, "CopyBucketStats"); + + uint32_t const copy_index = (frame_index + 0) % kGfxConstant_BackBufferCount; + + GfxBuffer destination_buffer = + hash_grid_cache_.radiance_cache_debug_stats_readback_buffers_[copy_index]; + gfxCommandCopyBuffer( + gfx_, destination_buffer, hash_grid_cache_.radiance_cache_debug_stats_buffer_); + + GFX_ASSERT(!hash_grid_cache_.radiance_cache_debug_stats_readback_is_pending_[copy_index]); + hash_grid_cache_.radiance_cache_debug_stats_readback_is_pending_[copy_index] = true; + } + + // Readback stats + uint32_t readback_index = (frame_index + 1) % kGfxConstant_BackBufferCount; + if (hash_grid_cache_.radiance_cache_debug_stats_readback_is_pending_[readback_index]) + { + GfxBuffer readback_buffer = + hash_grid_cache_.radiance_cache_debug_stats_readback_buffers_[readback_index]; + + bucket_occupancy_histogram.resize(hash_grid_cache_.debug_bucket_occupancy_histogram_size_); + bucket_overflow_histogram.resize(hash_grid_cache_.debug_bucket_overflow_histogram_size_); + + float const *formatted_data = (float *)gfxBufferGetData(gfx_, readback_buffer); + float const *formatted_data_cursor = formatted_data; + + free_bucket_count = formatted_data_cursor[0]; + used_bucket_count = formatted_data_cursor[1]; + formatted_data_cursor += 2; + + std::copy(formatted_data_cursor, formatted_data_cursor + bucket_occupancy_histogram.size(), + bucket_occupancy_histogram.begin()); + formatted_data_cursor += bucket_occupancy_histogram.size(); + + std::copy(formatted_data_cursor, formatted_data_cursor + bucket_overflow_histogram.size(), + bucket_overflow_histogram.begin()); + formatted_data_cursor += bucket_overflow_histogram.size(); + + hash_grid_cache_.radiance_cache_debug_stats_readback_is_pending_[readback_index] = false; + } + else + { + free_bucket_count = 0; + used_bucket_count = 0; + bucket_occupancy_histogram.clear(); + bucket_overflow_histogram.clear(); + total_memory_size_in_bytes = 0; + } + } + // Debug the screen-space radiance cache if asked to do so if (debug_view_.starts_with("RadianceCache")) { @@ -1803,11 +2739,21 @@ void GI10::render(CapsaicinInternal &capsaicin) noexcept gfxCommandMultiDrawIndirect(gfx_, draw_command_buffer_, 1); } + // Debug reflections + if (debug_view_ == "Reflection") + { + TimedSection const timed_section(*this, "DebugReflection"); + + gfxCommandBindKernel(gfx_, debug_reflection_kernel_); + gfxCommandDraw(gfx_, 3); + } + // Release our constant buffers gfxDestroyBuffer(gfx_, gi10_constants); gfxDestroyBuffer(gfx_, screen_probes_constants); gfxDestroyBuffer(gfx_, hash_grid_cache_constants); gfxDestroyBuffer(gfx_, world_space_restir_constants); + gfxDestroyBuffer(gfx_, glossy_reflections_constants); // Flip the buffers screen_probes_.probe_buffer_index_ = (1 - screen_probes_.probe_buffer_index_); @@ -1821,7 +2767,7 @@ void GI10::render(CapsaicinInternal &capsaicin) noexcept previous_camera_ = capsaicin.getCamera(); } -void GI10::terminate() +void GI10::terminate() noexcept { gfxDestroyTexture(gfx_, depth_buffer_); gfxDestroyTexture(gfx_, irradiance_buffer_); @@ -1833,9 +2779,11 @@ void GI10::terminate() gfxDestroyKernel(gfx_, clear_counters_kernel_); gfxDestroyKernel(gfx_, generate_draw_kernel_); gfxDestroyKernel(gfx_, generate_dispatch_kernel_); + gfxDestroyKernel(gfx_, generate_dispatch_rays_kernel_); gfxDestroyKernel(gfx_, generate_update_tiles_dispatch_kernel_); gfxDestroyKernel(gfx_, debug_screen_probes_kernel_); gfxDestroyKernel(gfx_, debug_hash_grid_cells_kernel_); + gfxDestroyKernel(gfx_, debug_reflection_kernel_); gfxDestroyKernel(gfx_, clear_probe_mask_kernel_); gfxDestroyKernel(gfx_, filter_probe_mask_kernel_); @@ -1858,17 +2806,92 @@ void GI10::terminate() gfxDestroyKernel(gfx_, populate_cells_kernel_); gfxDestroyKernel(gfx_, update_tiles_kernel_); gfxDestroyKernel(gfx_, resolve_cells_kernel_); + gfxDestroyKernel(gfx_, clear_bucket_overflow_count_kernel_); + gfxDestroyKernel(gfx_, clear_bucket_occupancy_kernel_); + gfxDestroyKernel(gfx_, clear_bucket_overflow_kernel_); + gfxDestroyKernel(gfx_, build_bucket_stats_kernel_); + gfxDestroyKernel(gfx_, format_bucket_occupancy_kernel_); + gfxDestroyKernel(gfx_, format_bucket_overflow_kernel_); gfxDestroyKernel(gfx_, clear_reservoirs_kernel_); gfxDestroyKernel(gfx_, generate_reservoirs_kernel_); gfxDestroyKernel(gfx_, compact_reservoirs_kernel_); gfxDestroyKernel(gfx_, resample_reservoirs_kernel_); + gfxDestroyKernel(gfx_, trace_reflections_kernel_); + for (auto &resolve_reflections_kernel : resolve_reflections_kernels_) + gfxDestroyKernel(gfx_, resolve_reflections_kernel); + gfxDestroyKernel(gfx_, reproject_reflections_kernel_); + gfxDestroyKernel(gfx_, mark_fireflies_kernel_); + gfxDestroyKernel(gfx_, cleanup_fireflies_kernel_); + gfxDestroyKernel(gfx_, no_denoiser_reflections_kernel_); + gfxDestroyKernel(gfx_, reproject_gi_kernel_); gfxDestroyKernel(gfx_, filter_blur_mask_kernel_); gfxDestroyKernel(gfx_, filter_gi_kernel_); - irradiance_buffer_ = {}; + gfxDestroySbt(gfx_, sbt_); + + irradiance_buffer_ = {}; + clear_bucket_overflow_count_kernel_ = {}; + clear_bucket_occupancy_kernel_ = {}; + clear_bucket_overflow_kernel_ = {}; + build_bucket_stats_kernel_ = {}; + format_bucket_occupancy_kernel_ = {}; + format_bucket_overflow_kernel_ = {}; +} + +void GI10::renderGUI(CapsaicinInternal &capsaicin) const noexcept +{ + ImGui::Checkbox("Use Resampling", &capsaicin.getOption("gi10_use_resampling")); + ImGui::Checkbox("Use Alpha Testing", &capsaicin.getOption("gi10_use_alpha_testing")); + ImGui::Checkbox("Use Direct Lighting", &capsaicin.getOption("gi10_use_direct_lighting")); + ImGui::Checkbox("Disable Albedo Textures", &capsaicin.getOption("gi10_disable_albedo_textures")); + ImGui::Checkbox( + "Disable Specular Materials", &capsaicin.getOption("gi10_disable_specular_materials")); + + if (ImGui::CollapsingHeader("Hash Grid Cache", ImGuiTreeNodeFlags_None)) + { + auto &num_buckets = capsaicin.getOption("gi10_hash_grid_cache_num_buckets"); + if (ImGui::SliderInt("Number of Buckets (1<<)", &num_buckets, 8, 16)) + { + num_buckets = glm::clamp(num_buckets, 8, 16); + } + + auto &num_tiles_per_bucket = capsaicin.getOption("gi10_hash_grid_cache_num_tiles_per_bucket"); + if (ImGui::SliderInt("Number of Tiles per Bucket (1<<)", &num_tiles_per_bucket, 1, 8)) + { + num_tiles_per_bucket = glm::clamp(num_tiles_per_bucket, 1, 8); + } + + auto &debug_stats = capsaicin.getOption("gi10_hash_grid_cache_debug_stats"); + ImGui::Checkbox("Debug Statistics", &debug_stats); + if (debug_stats && ImGui::CollapsingHeader("Hash Grid Cache", ImGuiTreeNodeFlags_DefaultOpen)) + { + auto const &free_bucket_count = hash_grid_cache_.debug_free_bucket_count_; + auto const &used_bucket_count = hash_grid_cache_.debug_used_bucket_count_; + auto const &bucket_occupancy_histogram = hash_grid_cache_.debug_bucket_occupancy_histogram_; + auto const &bucket_overflow_histogram = hash_grid_cache_.debug_bucket_overflow_histogram_; + auto total_memory_size_in_bytes = hash_grid_cache_.debug_total_memory_size_in_bytes_; + + ImGui::PlotHistogram("Bucket Occupancy Histogram", bucket_occupancy_histogram.data(), + (int)bucket_occupancy_histogram.size(), 0, nullptr, FLT_MAX, FLT_MAX, ImVec2(0.f, 64.f)); + + auto &debug_max_bucket_overflow = + capsaicin.getOption("gi10_hash_grid_cache_debug_max_bucket_overflow"); + if (ImGui::SliderInt("Max overflow", &debug_max_bucket_overflow, 8, 256)) + { + debug_max_bucket_overflow = glm::clamp(debug_max_bucket_overflow, 1, 1024); + } + + ImGui::PlotHistogram("Bucket Overflow Histogram", bucket_overflow_histogram.data(), + (int)bucket_overflow_histogram.size(), 0, nullptr, FLT_MAX, FLT_MAX, ImVec2(0.f, 64.f)); + + ImGui::Text("Free Bucket Count : %u", (uint32_t)free_bucket_count); + ImGui::Text("Used Bucket Count : %u", (uint32_t)used_bucket_count); + ImGui::Text("Total Memory Size : %u MB", (uint32_t)(total_memory_size_in_bytes >> 20)); + } + } } void GI10::generateDispatch(GfxBuffer count_buffer, uint32_t group_size) @@ -1880,6 +2903,14 @@ void GI10::generateDispatch(GfxBuffer count_buffer, uint32_t group_size) gfxCommandDispatch(gfx_, 1, 1, 1); } +void GI10::generateDispatchRays(GfxBuffer count_buffer) +{ + gfxProgramSetParameter(gfx_, gi10_program_, "g_CountBuffer", count_buffer); + + gfxCommandBindKernel(gfx_, generate_dispatch_rays_kernel_); + gfxCommandDispatch(gfx_, 1, 1, 1); +} + void GI10::clearHashGridCache() { if (hash_grid_cache_.radiance_cache_hash_buffer_) diff --git a/src/core/src/render_techniques/gi10/gi10.frag b/src/core/src/render_techniques/gi10/gi10.frag index 4d3e414..dacfb39 100644 --- a/src/core/src/render_techniques/gi10/gi10.frag +++ b/src/core/src/render_techniques/gi10/gi10.frag @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -30,19 +30,22 @@ uint2 g_BufferDimensions; uint g_DisableAlbedoTextures; Texture2D g_DepthBuffer; -Texture2D g_NormalBuffer; -Texture2D g_DetailsBuffer; +Texture2D g_ShadingNormalBuffer; +Texture2D g_RoughnessBuffer; Texture2D g_VisibilityBuffer; Texture2D g_OcclusionAndBentNormalBuffer; -StructuredBuffer g_IndexBuffers[] : register(space1); -StructuredBuffer g_VertexBuffers[] : register(space2); +Texture2D g_LutBuffer; +uint g_LutSize; +StructuredBuffer g_IndexBuffer; +StructuredBuffer g_VertexBuffer; StructuredBuffer g_MeshBuffer; StructuredBuffer g_InstanceBuffer; StructuredBuffer g_MaterialBuffer; RWTexture2D g_IrradianceBuffer; +RWTexture2D g_ReflectionBuffer; Texture2D g_TextureMaps[] : register(space99); SamplerState g_NearestSampler; @@ -51,12 +54,16 @@ SamplerState g_TextureSampler; ConstantBuffer g_GI10Constants; ConstantBuffer g_ScreenProbesConstants; +ConstantBuffer g_GlossyReflectionsConstants; +ConstantBuffer g_GlossyReflectionsAtrousConstants; +#include "../../geometry/geometry.hlsl" +#include "../../geometry/mesh.hlsl" #include "../../materials/material_evaluation.hlsl" #include "../../materials/material_sampling.hlsl" -#include "../../math/geometry.hlsl" #include "gi10.hlsl" +#include "glossy_reflections.hlsl" #include "screen_probes.hlsl" struct PS_OUTPUT @@ -69,7 +76,7 @@ PS_OUTPUT ResolveGI10(in float4 pos : SV_Position) uint2 did = uint2(pos.xy); float depth = g_DepthBuffer.Load(int3(did, 0)).x; - float3 normal = normalize(2.0f * g_DetailsBuffer.Load(int3(did, 0)).xyz - 1.0f); + float3 normal = normalize(2.0f * g_ShadingNormalBuffer.Load(int3(did, 0)).xyz - 1.0f); if (depth >= 1.0f) { @@ -85,45 +92,77 @@ PS_OUTPUT ResolveGI10(in float4 pos : SV_Position) Instance instance = g_InstanceBuffer[instanceID]; Mesh mesh = g_MeshBuffer[instance.mesh_index]; - uint i0 = g_IndexBuffers[0][mesh.index_offset / mesh.index_stride + 3 * primitiveID + 0] + mesh.vertex_offset / mesh.vertex_stride; - uint i1 = g_IndexBuffers[0][mesh.index_offset / mesh.index_stride + 3 * primitiveID + 1] + mesh.vertex_offset / mesh.vertex_stride; - uint i2 = g_IndexBuffers[0][mesh.index_offset / mesh.index_stride + 3 * primitiveID + 2] + mesh.vertex_offset / mesh.vertex_stride; - - float2 uv0 = g_VertexBuffers[0][i0].uv; - float2 uv1 = g_VertexBuffers[0][i1].uv; - float2 uv2 = g_VertexBuffers[0][i2].uv; + // Get UV values from buffers + UVs uvs = fetchUVs(mesh, primitiveID); float2 uv = (did + 0.5f) / g_BufferDimensions; float3 world = InverseProject(g_GI10Constants.view_proj_inv, uv, depth); float3 view_direction = normalize(g_Eye - world); - float2 mesh_uv = interpolate(uv0, uv1, uv2, visibility.xy); + float2 mesh_uv = interpolate(uvs.uv0, uvs.uv1, uvs.uv2, visibility.xy); float dotNV = saturate(dot(normal, view_direction)); - Material material = g_MaterialBuffer[mesh.material_index]; + Material material = g_MaterialBuffer[instance.material_index]; MaterialEvaluated material_evaluated = MakeMaterialEvaluated(material, mesh_uv); MaterialEmissive emissiveMaterial = MakeMaterialEmissive(material, mesh_uv); -#ifndef DISABLE_SPECULAR_LIGHTING - material_evaluated.metallicity = 0.0f; -#endif MaterialBRDF materialBRDF = MakeMaterialBRDF(material_evaluated); if (g_DisableAlbedoTextures) { materialBRDF.albedo = 0.3f.xxx; -#ifndef DISABLE_SPECULAR_LIGHTING +#ifndef DISABLE_SPECULAR_MATERIALS materialBRDF.F0 = 0.0f.xxx; -#endif +#endif // DISABLE_SPECULAR_MATERIALS } PS_OUTPUT output; +#ifdef DISABLE_SPECULAR_MATERIALS + float3 irradiance = g_IrradianceBuffer[did].xyz; float3 diffuse = evaluateLambert(materialBRDF.albedo) * irradiance; output.lighting = float4(emissiveMaterial.emissive + diffuse, 1.0f); +#else // DISABLE_SPECULAR_MATERIALS + + // compute diffuse compensation term with specular dominant half-vector + float3 specular_dominant_direction = calculateGGXSpecularDirection(normal, view_direction, sqrt(materialBRDF.roughnessAlpha)); + float3 specular_dominant_half_vector = normalize(view_direction + specular_dominant_direction); + float dotHV = saturate(dot(view_direction, specular_dominant_half_vector)); + float3 diffuse_compensation = diffuseCompensation(materialBRDF.F0, dotHV); + + // diffuse term + float3 irradiance = g_IrradianceBuffer[did].xyz; + float3 diffuse = evaluateLambert(materialBRDF.albedo) * diffuse_compensation * irradiance; + + // compute specular term with split-sum approximation + float4 radiance_sum = (material_evaluated.roughness > g_GlossyReflectionsConstants.high_roughness_threshold + ? float4(irradiance, PI) : g_ReflectionBuffer[did]); // fall back to filtered irradiance past threshold + float2 lut = g_LutBuffer.SampleLevel(g_LinearSampler, float2(dotNV, material_evaluated.roughness), 0.0f).xy; + float3 directional_albedo = saturate(materialBRDF.F0 * lut.x + (1.0f - materialBRDF.F0) * lut.y); + float3 specular = directional_albedo * (radiance_sum.xyz / max(radiance_sum.w, 1.0f)); + output.lighting = float4(emissiveMaterial.emissive + diffuse + specular, 1.0f); + +#endif // DISABLE_SPECULAR_MATERIALS + return output; } +float4 DebugReflection(in float4 pos : SV_Position) : SV_Target +{ + int2 full_pos = int2(pos.xy); + float3 normal = g_ShadingNormalBuffer.Load(int3(full_pos, 0)).xyz; + float roughness = g_RoughnessBuffer.Load(int3(full_pos, 0)).x; + bool is_sky_pixel = (dot(normal, normal) == 0.0f ? true : false); + if (is_sky_pixel || roughness > g_GlossyReflectionsConstants.high_roughness_threshold) + { + // Better black than looking to noisy diffuse fallback when debugging + return float4(0.f, 0.f, 0.f, 1.f); + } + + float4 lighting = g_ReflectionBuffer[full_pos]; + return float4(lighting.xyz / max(lighting.w, 1.0f), 1.0f); +} + float4 DebugScreenProbes(in float4 pos : SV_Position) : SV_Target { uint2 did = uint2(pos.xy); diff --git a/src/core/src/render_techniques/gi10/gi10.h b/src/core/src/render_techniques/gi10/gi10.h index f07f0ad..7f8dd33 100644 --- a/src/core/src/render_techniques/gi10/gi10.h +++ b/src/core/src/render_techniques/gi10/gi10.h @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -24,6 +24,8 @@ THE SOFTWARE. #include "gi10_shared.h" #include "render_technique.h" +#include + namespace Capsaicin { class GI10 : public RenderTechnique @@ -42,11 +44,14 @@ class GI10 : public RenderTechnique struct RenderOptions { + bool gi10_use_dxr10 = false; bool gi10_use_resampling = false; bool gi10_use_alpha_testing = true; bool gi10_use_direct_lighting = true; bool gi10_disable_albedo_textures = false; + bool gi10_disable_specular_materials = false; float gi10_hash_grid_cache_cell_size = 32.0f; + float gi10_hash_grid_cache_min_cell_size = 1e-1f; int gi10_hash_grid_cache_tile_cell_ratio = 8; // 8x8 = 64 int gi10_hash_grid_cache_num_buckets = 12; // 1 << 12 = 4096 int gi10_hash_grid_cache_num_tiles_per_bucket = 4; // 1 << 4 = 16 total : 4194304 @@ -54,15 +59,34 @@ class GI10 : public RenderTechnique int gi10_hash_grid_cache_debug_mip_level = 0; bool gi10_hash_grid_cache_debug_propagate = false; int gi10_hash_grid_cache_debug_max_cell_decay = 0; // Debug cells touched this frame - float gi10_reservoir_cache_cell_size = 16.0f; + bool gi10_hash_grid_cache_debug_stats = false; + int gi10_hash_grid_cache_debug_max_bucket_overflow = 64; + float gi10_reservoir_cache_cell_size = 16.0f; + + bool gi10_glossy_reflections_halfres = true; + int gi10_glossy_reflections_denoiser_mode = 1; // Atrous Ratio Estimator + bool gi10_glossy_reflections_cleanup_fireflies = true; + float gi10_glossy_reflections_low_roughness_threshold = 0.2f; + float gi10_glossy_reflections_high_roughness_threshold = 0.6f; + int gi10_glossy_reflections_atrous_pass_count = 4; + int gi10_glossy_reflections_full_radius = 11; + int gi10_glossy_reflections_half_radius = 11; + int gi10_glossy_reflections_mark_fireflies_half_radius = 3; + int gi10_glossy_reflections_mark_fireflies_full_radius = 2; + float gi10_glossy_reflections_mark_fireflies_half_low_threshold = 0.0f; + float gi10_glossy_reflections_mark_fireflies_full_low_threshold = 0.0f; + float gi10_glossy_reflections_mark_fireflies_half_high_threshold = 1.0f; + float gi10_glossy_reflections_mark_fireflies_full_high_threshold = 1.0f; + int gi10_glossy_reflections_cleanup_fireflies_half_radius = 2; + int gi10_glossy_reflections_cleanup_fireflies_full_radius = 1; }; /** - * Convert render settings to internal options format. - * @param settings Current render settings. + * Convert render options to internal options format. + * @param options Current render options. * @returns The options converted. */ - static RenderOptions convertOptions(RenderSettings const &settings) noexcept; + static RenderOptions convertOptions(RenderOptionList const &options) noexcept; /** * Gets a list of any shared components used by the current render technique. @@ -97,10 +121,20 @@ class GI10 : public RenderTechnique */ void render(CapsaicinInternal &capsaicin) noexcept override; -protected: - void terminate(); + /** + * Render GUI options. + * @param [in,out] capsaicin The current capsaicin context. + */ + void renderGUI(CapsaicinInternal &capsaicin) const noexcept override; + + /** + * Destroy any used internal resources and shutdown. + */ + void terminate() noexcept override; +protected: void generateDispatch(GfxBuffer count_buffer, uint32_t group_size); + void generateDispatchRays(GfxBuffer count_buffer); void clearHashGridCache(); class Base @@ -136,36 +170,39 @@ class GI10 : public RenderTechnique static constexpr SamplingMode sampling_mode_ = kSamplingMode_QuarterSpp; uint2 probe_count_; - const uint32_t probe_spawn_tile_size_; - uint32_t probe_buffer_index_; - uint32_t max_probe_spawn_count; - uint32_t max_ray_count; - GfxTexture probe_buffers_[2]; - GfxTexture probe_mask_buffers_[2]; - GfxBuffer probe_sh_buffers_[2]; - GfxBuffer probe_spawn_buffers_[2]; - GfxBuffer probe_spawn_scan_buffer_; - GfxBuffer probe_spawn_index_buffer_; - GfxBuffer probe_spawn_probe_buffer_; - GfxBuffer probe_spawn_sample_buffer_; - GfxBuffer probe_spawn_radiance_buffer_; - GfxBuffer probe_empty_tile_buffer_; - GfxBuffer probe_empty_tile_count_buffer_; - GfxBuffer probe_override_tile_buffer_; - GfxBuffer probe_override_tile_count_buffer_; - GfxTexture probe_cached_tile_buffer_; - GfxTexture probe_cached_tile_index_buffer_; - GfxBuffer probe_cached_tile_lru_buffers_[2]; - GfxBuffer probe_cached_tile_lru_flag_buffer_; - GfxBuffer probe_cached_tile_lru_count_buffer_; - GfxBuffer probe_cached_tile_lru_index_buffer_; - GfxBuffer probe_cached_tile_mru_buffer_; - GfxBuffer probe_cached_tile_mru_count_buffer_; - GfxBuffer probe_cached_tile_list_buffer_; - GfxBuffer probe_cached_tile_list_count_buffer_; - GfxBuffer probe_cached_tile_list_index_buffer_; - GfxBuffer probe_cached_tile_list_element_buffer_; - GfxBuffer probe_cached_tile_list_element_count_buffer_; + static constexpr uint32_t probe_spawn_tile_size_ = + (sampling_mode_ == kSamplingMode_QuarterSpp ? (probe_size_ << 1) + : sampling_mode_ == kSamplingMode_SixteenthSpp ? (probe_size_ << 2) + : probe_size_); + uint32_t probe_buffer_index_; + uint32_t max_probe_spawn_count; + uint32_t max_ray_count; + GfxTexture probe_buffers_[2]; + GfxTexture probe_mask_buffers_[2]; + GfxBuffer probe_sh_buffers_[2]; + GfxBuffer probe_spawn_buffers_[2]; + GfxBuffer probe_spawn_scan_buffer_; + GfxBuffer probe_spawn_index_buffer_; + GfxBuffer probe_spawn_probe_buffer_; + GfxBuffer probe_spawn_sample_buffer_; + GfxBuffer probe_spawn_radiance_buffer_; + GfxBuffer probe_empty_tile_buffer_; + GfxBuffer probe_empty_tile_count_buffer_; + GfxBuffer probe_override_tile_buffer_; + GfxBuffer probe_override_tile_count_buffer_; + GfxTexture probe_cached_tile_buffer_; + GfxTexture probe_cached_tile_index_buffer_; + GfxBuffer probe_cached_tile_lru_buffers_[2]; + GfxBuffer probe_cached_tile_lru_flag_buffer_; + GfxBuffer probe_cached_tile_lru_count_buffer_; + GfxBuffer probe_cached_tile_lru_index_buffer_; + GfxBuffer probe_cached_tile_mru_buffer_; + GfxBuffer probe_cached_tile_mru_count_buffer_; + GfxBuffer probe_cached_tile_list_buffer_; + GfxBuffer probe_cached_tile_list_count_buffer_; + GfxBuffer probe_cached_tile_list_index_buffer_; + GfxBuffer probe_cached_tile_list_element_buffer_; + GfxBuffer probe_cached_tile_list_element_count_buffer_; }; // Used for caching in world space the lighting calculated at primary (same as screen probes) and @@ -196,7 +233,12 @@ class GI10 : public RenderTechnique uint32_t first_cell_offset_tile_mip1_; uint32_t first_cell_offset_tile_mip2_; uint32_t first_cell_offset_tile_mip3_; + uint32_t debug_bucket_occupancy_histogram_size_; + uint32_t debug_bucket_overflow_histogram_size_; + uint32_t debug_stats_size_; + uint64_t debug_total_memory_size_in_bytes_; + GfxBuffer radiance_cache_hash_buffer_float_[HASHGRID_FLOAT_BUFFER_COUNT]; GfxBuffer radiance_cache_hash_buffer_uint_[HASHGRID_UINT_BUFFER_COUNT]; GfxBuffer radiance_cache_hash_buffer_uint2_[HASHGRID_UINT2_BUFFER_COUNT]; GfxBuffer radiance_cache_hash_buffer_float4_[HASHGRID_FLOAT4_BUFFER_COUNT]; @@ -219,6 +261,19 @@ class GI10 : public RenderTechnique GfxBuffer &radiance_cache_packed_tile_index_buffer0_; GfxBuffer &radiance_cache_packed_tile_index_buffer1_; GfxBuffer &radiance_cache_debug_cell_buffer_; + GfxBuffer &radiance_cache_debug_bucket_occupancy_buffer_; + GfxBuffer &radiance_cache_debug_bucket_overflow_count_buffer_; + GfxBuffer &radiance_cache_debug_bucket_overflow_buffer_; + GfxBuffer &radiance_cache_debug_free_bucket_buffer_; + GfxBuffer &radiance_cache_debug_used_bucket_buffer_; + GfxBuffer &radiance_cache_debug_stats_buffer_; + GfxBuffer radiance_cache_debug_stats_readback_buffers_[kGfxConstant_BackBufferCount]; + bool radiance_cache_debug_stats_readback_is_pending_[kGfxConstant_BackBufferCount]; + + std::vector debug_bucket_occupancy_histogram_; + std::vector debug_bucket_overflow_histogram_; + float debug_free_bucket_count_; + float debug_used_bucket_count_; }; // Used for sampling the direct lighting at primary (i.e., direct lighting; disabled by default) and @@ -250,6 +305,30 @@ class GI10 : public RenderTechnique uint32_t reservoir_indirect_sample_buffer_index_; }; + // Used for tracing and denoising glossy reflections. + struct GlossyReflections : public Base + { + GlossyReflections(GI10 &gi10); + ~GlossyReflections(); + + void ensureMemoryIsAllocated(CapsaicinInternal const &capsaicin); + + GfxTexture texture_float_[GLOSSY_REFLECTION_TEXTURE_FLOAT_COUNT]; + GfxTexture texture_float4_[GLOSSY_REFLECTION_TEXTURE_FLOAT4_COUNT]; + GfxTexture &fireflies_buffer_; + GfxTexture &specular_buffer_; + GfxTexture &direction_buffer_; + GfxTexture &reflections_buffer_; + GfxTexture &standard_dev_buffer_; + GfxTexture &reflections_buffer0_; + GfxTexture &average_squared_buffer0_; + GfxTexture &reflections_buffer1_; + GfxTexture &average_squared_buffer1_; + + GfxBuffer rt_sample_buffer_; + GfxBuffer rt_sample_count_buffer_; + }; + // Used for image-space spatiotemporal denoising of the probes' interpolation results. struct GIDenoiser : public Base { @@ -265,20 +344,19 @@ class GI10 : public RenderTechnique GfxBuffer blur_sample_count_buffer_; }; - GfxCamera previous_camera_; - RenderOptions options_; - std::string_view debug_view_; - GfxTexture depth_buffer_; - std::vector vertex_buffers_; - bool has_delta_lights_; - GfxTexture irradiance_buffer_; - GfxBuffer draw_command_buffer_; - GfxBuffer dispatch_command_buffer_; + GfxCamera previous_camera_; + RenderOptions options_; + std::string_view debug_view_; + GfxTexture depth_buffer_; + GfxTexture irradiance_buffer_; + GfxBuffer draw_command_buffer_; + GfxBuffer dispatch_command_buffer_; // GI-1.0 building blocks: ScreenProbes screen_probes_; HashGridCache hash_grid_cache_; WorldSpaceReSTIR world_space_restir_; + GlossyReflections glossy_reflections_; GIDenoiser gi_denoiser_; // GI-1.0 kernels: @@ -287,9 +365,11 @@ class GI10 : public RenderTechnique GfxKernel clear_counters_kernel_; GfxKernel generate_draw_kernel_; GfxKernel generate_dispatch_kernel_; + GfxKernel generate_dispatch_rays_kernel_; GfxKernel generate_update_tiles_dispatch_kernel_; GfxKernel debug_screen_probes_kernel_; GfxKernel debug_hash_grid_cells_kernel_; + GfxKernel debug_reflection_kernel_; // Screen probes kernels: GfxKernel clear_probe_mask_kernel_; @@ -308,12 +388,19 @@ class GI10 : public RenderTechnique GfxKernel filter_screen_probes_kernel_; GfxKernel project_screen_probes_kernel_; GfxKernel interpolate_screen_probes_kernel_; + GfxSbt sbt_; // Hash grid cache kernels: GfxKernel purge_tiles_kernel_; GfxKernel populate_cells_kernel_; GfxKernel update_tiles_kernel_; GfxKernel resolve_cells_kernel_; + GfxKernel clear_bucket_overflow_count_kernel_; + GfxKernel clear_bucket_occupancy_kernel_; + GfxKernel clear_bucket_overflow_kernel_; + GfxKernel build_bucket_stats_kernel_; + GfxKernel format_bucket_occupancy_kernel_; + GfxKernel format_bucket_overflow_kernel_; // World-space ReSTIR kernels: GfxKernel clear_reservoirs_kernel_; @@ -321,6 +408,14 @@ class GI10 : public RenderTechnique GfxKernel compact_reservoirs_kernel_; GfxKernel resample_reservoirs_kernel_; + // Reflection kernels: + GfxKernel trace_reflections_kernel_; + GfxKernel resolve_reflections_kernels_[5]; + GfxKernel reproject_reflections_kernel_; + GfxKernel mark_fireflies_kernel_; + GfxKernel cleanup_fireflies_kernel_; + GfxKernel no_denoiser_reflections_kernel_; + // GI denoiser kernels: GfxKernel reproject_gi_kernel_; GfxKernel filter_blur_mask_kernel_; diff --git a/src/core/src/render_techniques/gi10/gi10.hlsl b/src/core/src/render_techniques/gi10/gi10.hlsl index 26752b2..46573b1 100644 --- a/src/core/src/render_techniques/gi10/gi10.hlsl +++ b/src/core/src/render_techniques/gi10/gi10.hlsl @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -23,6 +23,7 @@ THE SOFTWARE. #ifndef GI10_HLSL #define GI10_HLSL +#include "../../math/transform.hlsl" #include "../../math/math_constants.hlsl" // A define for marking invalid identifiers: @@ -54,25 +55,7 @@ float4 GetLinearDepth(in float4 depth) float3 InverseProject(in float4x4 transform, in float2 uv, in float depth) { - float4 homogeneous = mul(transform, float4(2.0f * float2(uv.x, 1.0f - uv.y) - 1.0f, depth, 1.0f)); - return homogeneous.xyz / homogeneous.w; // perspective divide -} - -// Assume input on [-1, 1]. Output is normalized on +Z hemisphere. -float3 hemioct_to_float32x3(in float2 e) -{ - float2 temp = float2(e.x + e.y, e.x - e.y) * 0.5f; - float3 v = float3(temp, 1.0f - abs(temp.x) - abs(temp.y)); - return normalize(v); -} - -// Assume normalized input on +Z hemisphere. Output is on [-1, 1]. -float2 float32x3_to_hemioct(in float3 v) -{ - // Project the hemisphere onto the hemi-octahedron, and then into the xy plane - float2 p = v.xy * (1.0f / (abs(v.x) + abs(v.y) + v.z)); - // Rotate and scale the center diamond to the unit square - return float2(p.x + p.y, p.x - p.y); + return transformPointProjection(float3(2.0f * float2(uv.x, 1.0f - uv.y) - 1.0f, depth), transform); } #define origin() (1.0f / 32.0f) diff --git a/src/core/src/render_techniques/gi10/gi10.rt b/src/core/src/render_techniques/gi10/gi10.rt new file mode 100644 index 0000000..5fb1b03 --- /dev/null +++ b/src/core/src/render_techniques/gi10/gi10.rt @@ -0,0 +1,138 @@ +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ + +#define USE_INLINE_RT 0 + +#include "gi10_shared.h" +#include "gi10.comp" + +TriangleHitGroup PopulateScreenProbesHitGroup = +{ + "PopulateScreenProbesAnyHit", // AnyHit + "PopulateScreenProbesClosestHit", // ClosestHit +}; + +TriangleHitGroup PopulateCellsHitGroup = +{ + "PopulateCellsAnyHit", // AnyHit + "PopulateCellsClosestHit", // ClosestHit +}; + +TriangleHitGroup TraceReflectionsHitGroup = +{ + "TraceReflectionsAnyHit", // AnyHit + "TraceReflectionsClosestHit", // ClosestHit +}; + +RaytracingShaderConfig MyShaderConfig = +{ + 64, // max payload size + 8 // max attribute size +}; + +RaytracingPipelineConfig MyPipelineConfig = +{ + 1 // max trace recursion depth +}; + +[shader("raygeneration")] +void PopulateScreenProbesRaygen() +{ + PopulateScreenProbes(DispatchRaysIndex().x); +} + +[shader("anyhit")] +void PopulateScreenProbesAnyHit(inout PopulateScreenProbesPayload payload, in BuiltInTriangleIntersectionAttributes attr) +{ + if (!AlphaTest(GetHitInfoRt(attr))) + { + IgnoreHit(); + } +} + +[shader("closesthit")] +void PopulateScreenProbesClosestHit(inout PopulateScreenProbesPayload payload, in BuiltInTriangleIntersectionAttributes attr) +{ + payload.hit_dist = RayTCurrent(); + PopulateScreenProbesHandleHit(DispatchRaysIndex().x, payload, GetRayDescRt(), GetHitInfoRt(attr)); +} + +[shader("miss")] +void PopulateScreenProbesMiss(inout PopulateScreenProbesPayload payload) +{ + payload.hit_dist = 1e9f; + PopulateScreenProbesHandleMiss(payload, GetRayDescRt()); +} + +[shader("raygeneration")] +void PopulateCellsRaygen() +{ + PopulateCells(DispatchRaysIndex().x); +} + +[shader("anyhit")] +void PopulateCellsAnyHit(inout PopulateCellsPayload payload, in BuiltInTriangleIntersectionAttributes attr) +{ + if (!AlphaTest(GetHitInfoRt(attr))) + { + IgnoreHit(); + } +} + +[shader("closesthit")] +void PopulateCellsClosestHit(inout PopulateCellsPayload payload, in BuiltInTriangleIntersectionAttributes attr) +{ + PopulateCellsHandleHit(DispatchRaysIndex().x, payload, GetRayDescRt()); +} + +[shader("miss")] +void PopulateCellsMiss(inout PopulateCellsPayload payload) +{ + PopulateCellsHandleMiss(DispatchRaysIndex().x, payload, GetRayDescRt()); +} + +[shader("raygeneration")] +void TraceReflectionsRaygen() +{ + TraceReflections(DispatchRaysIndex().x); +} + +[shader("anyhit")] +void TraceReflectionsAnyHit(inout TraceReflectionsPayload payload, in BuiltInTriangleIntersectionAttributes attr) +{ + if (!AlphaTest(GetHitInfoRt(attr))) + { + IgnoreHit(); + } +} + +[shader("closesthit")] +void TraceReflectionsClosestHit(inout TraceReflectionsPayload payload, in BuiltInTriangleIntersectionAttributes attr) +{ + TraceReflectionsHandleHit(DispatchRaysIndex().x, payload, GetRayDescRt(), GetHitInfoRt(attr), RayTCurrent()); +} + +[shader("miss")] +void TraceReflectionsMiss(inout TraceReflectionsPayload payload) +{ + TraceReflectionsHandleMiss(DispatchRaysIndex().x, payload, GetRayDescRt()); +} diff --git a/src/core/src/render_techniques/gi10/gi10.vert b/src/core/src/render_techniques/gi10/gi10.vert index 9fae873..c646655 100644 --- a/src/core/src/render_techniques/gi10/gi10.vert +++ b/src/core/src/render_techniques/gi10/gi10.vert @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -42,6 +42,11 @@ float4 ResolveGI10(in uint idx : SV_VertexID) : SV_POSITION return 1.0f - float4(4.0f * (idx & 1), 4.0f * (idx >> 1), 1.0f, 0.0f); } +float4 DebugReflection(in uint idx : SV_VertexID) : SV_POSITION +{ + return 1.0f - float4(4.0f * (idx & 1), 4.0f * (idx >> 1), 1.0f, 0.0f); +} + float4 DebugScreenProbes(in uint idx : SV_VertexID) : SV_POSITION { return 1.0f - float4(4.0f * (idx & 1), 4.0f * (idx >> 1), 1.0f, 0.0f); diff --git a/src/core/src/render_techniques/gi10/gi10_shared.h b/src/core/src/render_techniques/gi10/gi10_shared.h index bae0dad..4ad1516 100644 --- a/src/core/src/render_techniques/gi10/gi10_shared.h +++ b/src/core/src/render_techniques/gi10/gi10_shared.h @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -24,6 +24,10 @@ THE SOFTWARE. #include "../../gpu_shared.h" +#ifdef __cplusplus +using namespace Capsaicin; +#endif + enum ScreenProbesDebugModes { SCREENPROBES_DEBUG_RADIANCE = 0, @@ -54,6 +58,7 @@ enum HashGridCacheDebugMode struct HashGridCacheConstants { float cell_size; + float min_cell_size; float tile_size; float tile_cell_ratio; // tile_size / cell_size uint num_buckets; @@ -78,9 +83,17 @@ struct HashGridCacheConstants uint debug_mip_level; uint debug_propagate; uint debug_max_cell_decay; + uint debug_bucket_occupancy_histogram_size; + uint debug_bucket_overflow_histogram_size; HashGridCacheDebugMode debug_mode; }; +enum HashGridBufferNamesFloat +{ + HASHGRIDCACHE_STATSBUFFER, + HASHGRID_FLOAT_BUFFER_COUNT +}; + enum HashGridBufferNamesUint { HASHGRIDCACHE_HASHBUFFER = 0, @@ -98,6 +111,11 @@ enum HashGridBufferNamesUint HASHGRIDCACHE_PACKEDTILECOUNTBUFFER1, HASHGRIDCACHE_PACKEDTILEINDEXBUFFER0, HASHGRIDCACHE_PACKEDTILEINDEXBUFFER1, + HASHGRIDCACHE_BUCKETOCCUPANCYBUFFER, + HASHGRIDCACHE_BUCKETOVERFLOWCOUNTBUFFER, + HASHGRIDCACHE_BUCKETOVERFLOWBUFFER, + HASHGRIDCACHE_FREEBUCKETBUFFER, + HASHGRIDCACHE_USEDBUCKETBUFFER, HASHGRID_UINT_BUFFER_COUNT }; @@ -114,14 +132,27 @@ enum HashGridBufferNamesFloat4 HASHGRID_FLOAT4_BUFFER_COUNT }; +enum GI10DebugMode +{ + GI10_DEBUG_MATERIAL_ALBEDO = 0, + GI10_DEBUG_MATERIAL_METALLICITY, + GI10_DEBUG_MATERIAL_ROUGHNESS +}; + struct GI10Constants { - float4x4 view_proj; - float4x4 view_proj_prev; - float4x4 view_proj_inv; - float4x4 view_proj_inv_prev; - float4x4 reprojection; - float4x4 view_inv; + float4x4 view_proj; + float4x4 view_proj_prev; + float4x4 view_proj_inv; + float4x4 view_proj_inv_prev; + float4x4 reprojection; + GpuVirtualAddressRange ray_generation_shader_record; + GpuVirtualAddressRangeAndStride miss_shader_table; + uint2 padding0; + GpuVirtualAddressRangeAndStride hit_group_table; + uint2 padding1; + GpuVirtualAddressRangeAndStride callable_shader_table; + uint padding2; }; struct WorldSpaceReSTIRConstants @@ -132,4 +163,49 @@ struct WorldSpaceReSTIRConstants uint unused_padding; }; +struct GlossyReflectionsConstants +{ + int2 full_res; + int full_radius; + int half_radius; + int mark_fireflies_half_radius; + int mark_fireflies_full_radius; + float mark_fireflies_half_low_threshold; + float mark_fireflies_full_low_threshold; + float mark_fireflies_half_high_threshold; + float mark_fireflies_full_high_threshold; + int cleanup_fireflies_half_radius; + int cleanup_fireflies_full_radius; + float low_roughness_threshold; + float high_roughness_threshold; + uint half_res; + uint padding; +}; + +struct GlossyReflectionsAtrousConstants +{ + int ping_pong; + int full_step; + int pass_index; +}; + +enum GlossyReflectionsNamesFloat +{ + GLOSSY_REFLECTION_FIREFLIES_BUFFER = 0, + GLOSSY_REFLECTION_TEXTURE_FLOAT_COUNT +}; + +enum GlossyReflectionsNamesFloat4 +{ + GLOSSY_REFLECTION_SPECULAR_BUFFER = 0, + GLOSSY_REFLECTION_DIRECTION_BUFFER, + GLOSSY_REFLECTION_REFLECTION_BUFFER, + GLOSSY_REFLECTION_STANDARD_DEV_BUFFER, + GLOSSY_REFLECTION_REFLECTIONS_BUFFER_0, // + GLOSSY_REFLECTION_REFLECTIONS_BUFFER_1, // BE CAREFUL: keep 0 and 1 contiguous in memory for ping pong + GLOSSY_REFLECTION_AVERAGE_SQUARED_BUFFER_0, // + GLOSSY_REFLECTION_AVERAGE_SQUARED_BUFFER_1, // + GLOSSY_REFLECTION_TEXTURE_FLOAT4_COUNT +}; + #endif diff --git a/src/core/src/render_techniques/gi10/gi_denoiser.hlsl b/src/core/src/render_techniques/gi10/gi_denoiser.hlsl index c015df5..692694b 100644 --- a/src/core/src/render_techniques/gi10/gi_denoiser.hlsl +++ b/src/core/src/render_techniques/gi10/gi_denoiser.hlsl @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/src/core/src/render_techniques/gi10/glossy_reflections.hlsl b/src/core/src/render_techniques/gi10/glossy_reflections.hlsl new file mode 100644 index 0000000..7017087 --- /dev/null +++ b/src/core/src/render_techniques/gi10/glossy_reflections.hlsl @@ -0,0 +1,170 @@ +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ + +#ifndef GLOSSY_REFLECTIONS_HLSL +#define GLOSSY_REFLECTIONS_HLSL + +//! +//! Glossy reflections shader bindings. +//! + +RWTexture2D g_GlossyReflections_TextureFloat[] : register(space94); +RWTexture2D g_GlossyReflections_TextureFloat4[] : register(space95); + +#define g_GlossyReflections_FirefliesBuffer g_GlossyReflections_TextureFloat [GLOSSY_REFLECTION_FIREFLIES_BUFFER] +#define g_GlossyReflections_SpecularBuffer g_GlossyReflections_TextureFloat4[GLOSSY_REFLECTION_SPECULAR_BUFFER] +#define g_GlossyReflections_DirectionBuffer g_GlossyReflections_TextureFloat4[GLOSSY_REFLECTION_DIRECTION_BUFFER] +#define g_GlossyReflections_ReflectionsBuffer g_GlossyReflections_TextureFloat4[GLOSSY_REFLECTION_REFLECTION_BUFFER] +#define g_GlossyReflections_StandardDevBuffer g_GlossyReflections_TextureFloat4[GLOSSY_REFLECTION_STANDARD_DEV_BUFFER] +#define g_GlossyReflections_ReflectionsBuffer0 g_GlossyReflections_TextureFloat4[GLOSSY_REFLECTION_REFLECTIONS_BUFFER_0 + g_GlossyReflectionsAtrousConstants.ping_pong] +#define g_GlossyReflections_AverageSquaredBuffer0 g_GlossyReflections_TextureFloat4[GLOSSY_REFLECTION_AVERAGE_SQUARED_BUFFER_0 + g_GlossyReflectionsAtrousConstants.ping_pong] +#define g_GlossyReflections_ReflectionsBuffer1 g_GlossyReflections_TextureFloat4[GLOSSY_REFLECTION_REFLECTIONS_BUFFER_1 - g_GlossyReflectionsAtrousConstants.ping_pong] +#define g_GlossyReflections_AverageSquaredBuffer1 g_GlossyReflections_TextureFloat4[GLOSSY_REFLECTION_AVERAGE_SQUARED_BUFFER_1 - g_GlossyReflectionsAtrousConstants.ping_pong] +#define g_GlossyReflections_ReflectionsBufferX g_GlossyReflections_ReflectionsBuffer0 // BE CAREFUL: aliases... +#define g_GlossyReflections_AverageSquaredBufferX g_GlossyReflections_AverageSquaredBuffer0 // + +RWStructuredBuffer g_GlossyReflections_RtSampleBuffer; +RWStructuredBuffer g_GlossyReflections_RtSampleCountBuffer; + +//! +//! Glossy reflections common functions. +//! + +// +int GlossyReflections_FullRadius() +{ + return g_GlossyReflectionsConstants.half_res + ? g_GlossyReflectionsConstants.half_radius // BECAREFUL: Full res pixel count for half res + : g_GlossyReflectionsConstants.full_radius; +} + +// +int GlossyReflections_MarkFireflies_FullRadius() +{ + return g_GlossyReflectionsConstants.half_res + ? g_GlossyReflectionsConstants.mark_fireflies_half_radius // BECAREFUL: Full res pixel count for half res + : g_GlossyReflectionsConstants.mark_fireflies_full_radius; +} + +// +float GlossyReflections_MarkFireflies_LowThreshold() +{ + return g_GlossyReflectionsConstants.half_res + ? g_GlossyReflectionsConstants.mark_fireflies_half_low_threshold + : g_GlossyReflectionsConstants.mark_fireflies_full_low_threshold; +} + +// +float GlossyReflections_MarkFireflies_HighThreshold() +{ + return g_GlossyReflectionsConstants.half_res + ? g_GlossyReflectionsConstants.mark_fireflies_half_high_threshold + : g_GlossyReflectionsConstants.mark_fireflies_full_high_threshold; +} + +// +int GlossyReflections_CleanupFireflies_FullRadius() +{ + return g_GlossyReflectionsConstants.half_res + ? g_GlossyReflectionsConstants.cleanup_fireflies_half_radius // BECAREFUL: Full res pixel count for half res + : g_GlossyReflectionsConstants.cleanup_fireflies_full_radius; +} + +// +int2 GlossyReflections_HalfRes() +{ + return g_GlossyReflectionsConstants.half_res + ? (g_GlossyReflectionsConstants.full_res + 1) >> 1 + : (g_GlossyReflectionsConstants.full_res); +} + +// +int2 GlossyReflections_FullRes() +{ + return g_GlossyReflectionsConstants.full_res; +} + +// +int2 GlossyReflections_SplitRes() +{ + return g_GlossyReflectionsConstants.half_res + ? (g_GlossyReflectionsConstants.full_res + int2(0, 1)) >> int2(0, 1) + : (g_GlossyReflectionsConstants.full_res); +} + +// Maps the input 2D coordinate from half resolution to the full resolution sample. +int2 GlossyReflections_HalfToFullRes(in int2 half_pos) +{ + return g_GlossyReflectionsConstants.half_res + ? (half_pos << int2(1, 1)) + int2((g_FrameIndex >> 0) & 1, + (g_FrameIndex >> 1) & 1) + : (half_pos); +} + +// +int2 GlossyReflections_SplitToFullRes(in int2 split_pos) +{ + return g_GlossyReflectionsConstants.half_res + ? (split_pos << int2(0, 1)) + : (split_pos); +} + +// +int2 GlossyReflections_FullToHalfRes(in int2 full_pos) +{ + return g_GlossyReflectionsConstants.half_res ? full_pos >> 1 : full_pos; +} + +// +int GlossyReflections_FullToHalfRadius(in int full_radius) +{ + return g_GlossyReflectionsConstants.half_res ? (full_radius + 1) >> 1 : full_radius; +} + +// +bool GlossyReflections_QueueSample(in int2 full_pos) +{ + return g_GlossyReflectionsConstants.half_res ? all(GlossyReflections_HalfToFullRes(full_pos >> 1) == full_pos) : true; +} + +// +uint GlossyReflections_PackSample(in uint2 seed) +{ + return ((seed.y & 0xFFFF) << 0) | + ((seed.x & 0xFFFF) << 16); +} + +// +uint2 GlossyReflections_UnpackSample(in uint packed_sample) +{ + return uint2((packed_sample >> 16) & 0xFFFF, + (packed_sample >> 0) & 0xFFFF); +} + +// +float GlossyReflections_NeighborhoodFilter(in float i, in float radius) +{ + const float k = 3.0f; + return exp(-k * (i * i) / pow(radius + 1.0f, 2.0f)); +} + +#endif // GLOSSY_REFLECTIONS_HLSL diff --git a/src/core/src/render_techniques/gi10/hash_grid_cache.hlsl b/src/core/src/render_techniques/gi10/hash_grid_cache.hlsl index 849d97f..d86cd35 100644 --- a/src/core/src/render_techniques/gi10/hash_grid_cache.hlsl +++ b/src/core/src/render_techniques/gi10/hash_grid_cache.hlsl @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -47,6 +47,7 @@ struct HashGridCache_Visibility //! Hash-grid radiance caching shader bindings. //! +RWStructuredBuffer g_HashGridCache_BuffersFloat[] : register(space93); RWStructuredBuffer g_HashGridCache_BuffersUint[] : register(space96); RWStructuredBuffer g_HashGridCache_BuffersUint2[] : register(space97); RWStructuredBuffer g_HashGridCache_BuffersFloat4[] : register(space98); @@ -69,6 +70,12 @@ RWStructuredBuffer g_HashGridCache_BuffersFloat4[] : register(space98); #define g_HashGridCache_PackedTileIndexBuffer g_HashGridCache_BuffersUint [HASHGRIDCACHE_PACKEDTILEINDEXBUFFER0 + g_HashGridCacheConstants.buffer_ping_pong] #define g_HashGridCache_PreviousPackedTileIndexBuffer g_HashGridCache_BuffersUint [HASHGRIDCACHE_PACKEDTILEINDEXBUFFER1 - g_HashGridCacheConstants.buffer_ping_pong] #define g_HashGridCache_DebugCellBuffer g_HashGridCache_BuffersFloat4[HASHGRIDCACHE_DEBUGCELLBUFFER] +#define g_HashGridCache_BucketOccupancyBuffer g_HashGridCache_BuffersUint [HASHGRIDCACHE_BUCKETOCCUPANCYBUFFER] +#define g_HashGridCache_BucketOverflowCountBuffer g_HashGridCache_BuffersUint [HASHGRIDCACHE_BUCKETOVERFLOWCOUNTBUFFER] +#define g_HashGridCache_BucketOverflowBuffer g_HashGridCache_BuffersUint [HASHGRIDCACHE_BUCKETOVERFLOWBUFFER] +#define g_HashGridCache_FreeBucketCountBuffer g_HashGridCache_BuffersUint [HASHGRIDCACHE_FREEBUCKETBUFFER] +#define g_HashGridCache_UsedBucketCountBuffer g_HashGridCache_BuffersUint [HASHGRIDCACHE_USEDBUCKETBUFFER] +#define g_HashGridCache_StatsBuffer g_HashGridCache_BuffersFloat [HASHGRIDCACHE_STATSBUFFER] //! //! Hash-grid radiance caching common functions. @@ -80,7 +87,7 @@ RWStructuredBuffer g_HashGridCache_BuffersFloat4[] : register(space98); // Gets the size of the hash cell for the given world-space position. float HashGridCache_GetCellSize(in float3 position) { - float cell_size_step = distance(g_Eye, position) * g_HashGridCacheConstants.cell_size; + float cell_size_step = max(distance(g_Eye, position) * g_HashGridCacheConstants.cell_size, g_HashGridCacheConstants.min_cell_size); uint log_step_multiplier = uint(log2(HASHGRIDCACHE_STEP_FACTOR * cell_size_step)); float hit_cell_size = HASHGRIDCACHE_SIZE_FACTOR * exp2(log_step_multiplier); return hit_cell_size; @@ -122,7 +129,7 @@ HashGridCache_Desc HashGridCache_GetDesc(in HashGridCache_Data data) float3 direction = data.direction; float hit_distance = data.hit_distance; - float cell_size_step = distance(eye_position, hit_position) * g_HashGridCacheConstants.cell_size; + float cell_size_step = max(distance(eye_position, hit_position) * g_HashGridCacheConstants.cell_size, g_HashGridCacheConstants.min_cell_size); float log_step_multiplier = floor(log2(HASHGRIDCACHE_STEP_FACTOR * cell_size_step)); float hit_cell_size = HASHGRIDCACHE_SIZE_FACTOR * exp2(log_step_multiplier); float hit_tile_size = hit_cell_size * float(g_HashGridCacheConstants.tile_cell_ratio); @@ -135,16 +142,16 @@ HashGridCache_Desc HashGridCache_GetDesc(in HashGridCache_Data data) uint3 d = asuint(int3(signed_d)); // uint1 t = uint(hit_distance < hit_tile_size); - uint bucket_index = pcg(l + - pcg(c.x + pcg(c.y + pcg(c.z + - pcg(d.x + pcg(d.y + pcg(d.z + - pcg(t)))))))) % g_HashGridCacheConstants.num_buckets; + uint bucket_index = pcgHash(l + + pcgHash(c.x + pcgHash(c.y + pcgHash(c.z + + pcgHash(d.x + pcgHash(d.y + pcgHash(d.z + + pcgHash(t)))))))) % g_HashGridCacheConstants.num_buckets; uint tile_hash = max(1, - xxhash32(l + - xxhash32(c.x + xxhash32(c.y + xxhash32(c.z + - xxhash32(d.x + xxhash32(d.y + xxhash32(d.z + - xxhash32(t))))))))); + xxHash(l + + xxHash(c.x + xxHash(c.y + xxHash(c.z + + xxHash(d.x + xxHash(d.y + xxHash(d.z + + xxHash(t))))))))); float3 e = floor(hit_position / hit_cell_size) - floor(hit_position / hit_tile_size) * g_HashGridCacheConstants.tile_cell_ratio; uint2 cell_offset; @@ -251,7 +258,14 @@ uint HashGridCache_InsertCell(in HashGridCache_Data data, out uint tile_index, o break; // found existing tile and cell } if (bucket_offset >= g_HashGridCacheConstants.num_tiles_per_bucket) + { + #ifdef DEBUG_HASH_STATS + uint previous_value; + InterlockedAdd(g_HashGridCache_BucketOverflowCountBuffer[desc.bucket_index], 1, previous_value); + #endif return kGI10_InvalidId; // too much collisions, out of tiles :( + } + return HashGridCache_CellIndex(desc.cell_offset, tile_index); } @@ -356,13 +370,12 @@ float3 HashGridCache_UnpackDirection(in uint packed_direction) return normalize(2.0f * direction / 255.0f - 1.0f); } -// Packs the visibility information. -#define HashGridCache_PackVisibility(RAY_QUERY) \ - float4(asfloat(RAY_QUERY.CommittedInstanceIndex() | (RAY_QUERY.CommittedTriangleFrontFace() ? 0 : 0x80000000u)), \ - asfloat(RAY_QUERY.CommittedGeometryIndex()), \ - asfloat(RAY_QUERY.CommittedPrimitiveIndex()), \ - asfloat((f32tof16(ray_query.CommittedTriangleBarycentrics().x) << 16) | \ - (f32tof16(ray_query.CommittedTriangleBarycentrics().y) << 0))) +float4 HashGridCache_PackVisibility(HashGridCache_Visibility visibility) +{ + return float4(asfloat(visibility.instance_index | (visibility.is_front_face ? 0 : 0x80000000u)), + asfloat(visibility.geometry_index), asfloat(visibility.primitive_index), + asfloat((f32tof16(visibility.barycentrics.x) << 16) | (f32tof16(visibility.barycentrics.y) << 0))); +} // Unpacks the visibility information. HashGridCache_Visibility HashGridCache_UnpackVisibility(in float4 packed_visibility) diff --git a/src/core/src/render_techniques/gi10/screen_probes.hlsl b/src/core/src/render_techniques/gi10/screen_probes.hlsl index 99eda6a..cdef6fa 100644 --- a/src/core/src/render_techniques/gi10/screen_probes.hlsl +++ b/src/core/src/render_techniques/gi10/screen_probes.hlsl @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/src/core/src/render_techniques/gi10/world_space_restir.hlsl b/src/core/src/render_techniques/gi10/world_space_restir.hlsl index e922bf6..ad613b2 100644 --- a/src/core/src/render_techniques/gi10/world_space_restir.hlsl +++ b/src/core/src/render_techniques/gi10/world_space_restir.hlsl @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -70,10 +70,10 @@ uint2 Reservoir_GetIndexAndHash(in float3 position) uint log_step_multiplier = uint(log2(1e3f * cell_size_step)); float cell_size = 1e-3f * exp2(log_step_multiplier); int3 c = int3(floor(position / cell_size)); - index_and_hash.x = pcg(log_step_multiplier + - pcg(c.x + pcg(c.y + pcg(c.z)))); - index_and_hash.y = xxhash32(log_step_multiplier + - xxhash32(c.x + xxhash32(c.y + xxhash32(c.z)))); + index_and_hash.x = pcgHash(log_step_multiplier + + pcgHash(c.x + pcgHash(c.y + pcgHash(c.z)))); + index_and_hash.y = xxHash(log_step_multiplier + + xxHash(c.x + xxHash(c.y + xxHash(c.z)))); index_and_hash.x = (index_and_hash.x % g_WorldSpaceReSTIRConstants.num_cells); index_and_hash.y = max(index_and_hash.y, 1); return index_and_hash; @@ -88,10 +88,10 @@ uint2 Reservoir_GetPreviousIndexAndHash(in float3 position) uint log_step_multiplier = uint(log2(1e3f * cell_size_step)); float cell_size = 1e-3f * exp2(log_step_multiplier); int3 c = int3(floor(position / cell_size)); - index_and_hash.x = pcg(log_step_multiplier + - pcg(c.x + pcg(c.y + pcg(c.z)))); - index_and_hash.y = xxhash32(log_step_multiplier + - xxhash32(c.x + xxhash32(c.y + xxhash32(c.z)))); + index_and_hash.x = pcgHash(log_step_multiplier + + pcgHash(c.x + pcgHash(c.y + pcgHash(c.z)))); + index_and_hash.y = xxHash(log_step_multiplier + + xxHash(c.x + xxHash(c.y + xxHash(c.z)))); index_and_hash.x = (index_and_hash.x % g_WorldSpaceReSTIRConstants.num_cells); index_and_hash.y = max(index_and_hash.y, 1); return index_and_hash; @@ -168,58 +168,4 @@ void Reservoir_UnpackIndirectSample(in float4 packed_indirect_sample, out float3 hit_position = f16tof32(packed_hit_position).xyz; } -uint packMaterial(MaterialEvaluated material) -{ -#ifdef DISABLE_SPECULAR_LIGHTING - // Pack albedo color onto 10-10-10 format, i.e. 30 bits - uint packed_albedo = (uint(pow(saturate(material.albedo.x), 1.0f / 2.2f) * 1023.0f) << 20) - | (uint(pow(saturate(material.albedo.y), 1.0f / 2.2f) * 1023.0f) << 10) - | (uint(pow(saturate(material.albedo.z), 1.0f / 2.2f) * 1023.0f)); - return packed_albedo; -#else - // Pack albedo color onto 5-6-5 format, i.e. 16 bits - uint packed_albedo = (uint(pow(saturate(material.albedo.x), 1.0f / 2.2f) * 31.0f) << 11) - | (uint(pow(saturate(material.albedo.y), 1.0f / 2.2f) * 63.0f) << 5) - | (uint(pow(saturate(material.albedo.z), 1.0f / 2.2f) * 31.0f) << 0); - - // Pack metallicity and roughness onto 8 bits each - uint packed_metallicity_roughness = (uint(saturate(material.metallicity) * 255.0f) << 8) - | (uint(saturate(material.roughness) * 255.0f) << 0); - - return (packed_albedo << 16) | packed_metallicity_roughness; -#endif -} - -MaterialBRDF unpackMaterial(in uint packed_material) -{ - MaterialBRDF material; - -#ifdef DISABLE_SPECULAR_LIGHTING - // Unpack the albedo - material.albedo = float3( - pow(((packed_material >> 20) & 0x3FFu) / 1023.0f, 2.2f), - pow(((packed_material >> 10) & 0x3FFu) / 1023.0f, 2.2f), - pow(((packed_material) & 0x3FFu) / 1023.0f, 2.2f) - ); -#else - MaterialEvaluated material2; - // Unpack the albedo - uint packed_albedo = (packed_material >> 16); - material2.albedo = float3( - pow(((packed_albedo >> 11) & 0x1Fu) / 31.0f, 2.2f), - pow(((packed_albedo >> 5) & 0x3Fu) / 63.0f, 2.2f), - pow(((packed_albedo >> 0) & 0x1Fu) / 31.0f, 2.2f) - ); - - // Unpack the metallicity and roughness - uint packed_metallicity_roughness = (packed_material & 0xFFFFu); - - material2.metallicity = ((packed_metallicity_roughness >> 8) & 0xFFu) / 255.0f; - material2.roughness = ((packed_metallicity_roughness >> 0) & 0xFFu) / 255.0f; - - material = MakeMaterialBRDF(material2); -#endif - return material; -} - #endif // WORLD_SPACE_RESTIR_HLSL diff --git a/src/core/src/render_techniques/path_tracer/reference_pt.comp b/src/core/src/render_techniques/path_tracer/reference_pt.comp deleted file mode 100644 index f4d3754..0000000 --- a/src/core/src/render_techniques/path_tracer/reference_pt.comp +++ /dev/null @@ -1,634 +0,0 @@ -/********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ - -#include "reference_pt_shared.h" - -uint2 g_BufferDimensions; -RayCamera g_RayCamera; -uint g_BounceCount; -uint g_BounceRRCount; -uint g_SampleCount; -uint g_Accumulate; - -uint g_FrameIndex; - -StructuredBuffer g_InstanceBuffer; -StructuredBuffer g_MeshBuffer; -StructuredBuffer g_TransformBuffer; -StructuredBuffer g_IndexBuffer; -StructuredBuffer g_VertexBuffer; -StructuredBuffer g_MaterialBuffer; - -RWTexture2D g_AccumulationBuffer; -RWTexture2D g_OutputBuffer; - -RaytracingAccelerationStructure g_Scene; - -TextureCube g_EnvironmentBuffer; -Texture2D g_TextureMaps[] : register(space99); - -SamplerState g_TextureSampler; // Should be a linear sampler - -#include "../../components/light_sampler_bounds/light_sampler_bounds.hlsl" -#include "../../components/stratified_sampler/stratified_sampler.hlsl" -#include "../../materials/material_sampling.hlsl" -#include "../../math/random.hlsl" - -typedef RayQuery< RAY_FLAG_NONE /*| RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES*/> //RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES seems to cause the driver to crash -ClosestRayQuery; - -typedef RayQuery< RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES| RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH > -ShadowRayQuery; - -/** Data representing a surface intersection */ -struct IntersectData -{ - Material material; /**< The material associated with intersected surface */ - float2 uv; /**< The texture UV values at intersected position */ - float3 vertex0; /**< The surface triangles first vertex */ - float3 vertex1; /**< The surface triangles second vertex */ - float3 vertex2; /**< The surface triangles third vertex */ - float2 uv0; /**< The uv coordinate at the first vertex */ - float2 uv1; /**< The uv coordinate at the second vertex */ - float2 uv2; /**< The uv coordinate at the third vertex */ - float3 position; /**< The ray intersection location */ - float3 normal; /**< The shading normal at position */ - float3 geometryNormal; /**< The normal of actual intersected triangle at position */ - float2 barycentrics; /**< The barycentric coordinates within the intersected primitive */ -}; - -/** - * Determine a transformation matrix to correctly transform normal vectors. - * @param transform The original transform matrix. - * @return The new transform matrix. - */ -float3x3 getNormalTransform(float4x4 transform) -{ - // The transform for a normal is transpose(inverse(M)) - // The inverse is calculated as [1/det(A)]*transpose(C) where C is the cofactor matrix - // This simplifies down to [1/det(A)]*C - float3x3 input = (float3x3)transform; - float3x3 result; - result._m00 = determinant(float2x2(input._m11_m12, input._m21_m22)); - result._m01 = -determinant(float2x2(input._m10_m12, input._m20_m22)); - result._m02 = determinant(float2x2(input._m10_m11, input._m20_m21)); - result._m10 = -determinant(float2x2(input._m01_m02, input._m21_m22)); - result._m11 = determinant(float2x2(input._m00_m02, input._m20_m22)); - result._m12 = -determinant(float2x2(input._m00_m01, input._m20_m21)); - result._m20 = determinant(float2x2(input._m01_m02, input._m11_m12)); - result._m21 = -determinant(float2x2(input._m00_m02, input._m10_m12)); - result._m22 = determinant(float2x2(input._m00_m01, input._m10_m11)); -#undef minor - float3 det3 = input._m00_m01_m02 * result._m00_m01_m02; - float det = det3.x + det3.y + det3.z; - det = 1.0f / det; - return (result * det); -} - -/** - * Determine the intersection data for a ray hit. - * @param instanceIndex The index of the object instance that was intersected (instances into g_InstanceBuffer). - * @param primitiveIndex The index of the intersected primitive within the instance. - * @param barycentrics The barycentric coordinates within the intersected primitive. - * @param frontFace True if the intersection was on the front face of the primitive. - * @return The data associated with the intersection. - */ -IntersectData MakeIntersectData(uint instanceIndex, uint primitiveIndex, float2 barycentrics, bool frontFace) -{ - // Get instance information for current object - Instance instance = g_InstanceBuffer[instanceIndex]; - Mesh mesh = g_MeshBuffer[instance.mesh_index]; - float4x4 transform = g_TransformBuffer[instance.transform_index]; - float3x3 normalTransform = getNormalTransform(transform); - - // Get index buffer values - uint i0 = g_IndexBuffer[mesh.index_offset / mesh.index_stride + 3 * primitiveIndex + 0] + mesh.vertex_offset / mesh.vertex_stride; - uint i1 = g_IndexBuffer[mesh.index_offset / mesh.index_stride + 3 * primitiveIndex + 1] + mesh.vertex_offset / mesh.vertex_stride; - uint i2 = g_IndexBuffer[mesh.index_offset / mesh.index_stride + 3 * primitiveIndex + 2] + mesh.vertex_offset / mesh.vertex_stride; - - // Get vertex values from buffers - float3 v0 = g_VertexBuffer[i0].position.xyz; - float3 v1 = g_VertexBuffer[i1].position.xyz; - float3 v2 = g_VertexBuffer[i2].position.xyz; - - // Get normal values from buffers - float3 n0 = g_VertexBuffer[i0].normal.xyz; - float3 n1 = g_VertexBuffer[i1].normal.xyz; - float3 n2 = g_VertexBuffer[i2].normal.xyz; - float3 normal = interpolate(n0, n1, n2, barycentrics); - - // Get UV values from buffers - float2 uv0 = g_VertexBuffer[i0].uv; - float2 uv1 = g_VertexBuffer[i1].uv; - float2 uv2 = g_VertexBuffer[i2].uv; - - IntersectData iData; - // Set material - iData.material = g_MaterialBuffer[mesh.material_index]; - // Calculate UV coordinates - iData.uv = interpolate(uv0, uv1, uv2, barycentrics); - // Add vertex information needed for lights - iData.vertex0 = mul(transform, float4(v0, 1.0f)).xyz; - iData.vertex1 = mul(transform, float4(v1, 1.0f)).xyz; - iData.vertex2 = mul(transform, float4(v2, 1.0f)).xyz; - iData.uv0 = uv0; - iData.uv1 = uv1; - iData.uv2 = uv2; - // Calculate intersection position - iData.position = interpolate(iData.vertex0, iData.vertex1, iData.vertex2, barycentrics); - // Calculate shading normal - iData.normal = normalize(mul(normalTransform, normal * (frontFace ? 1.0f : -1.0f))); - // Check for normal mapping - uint normalTex = asuint(iData.material.normal_ao.x); - if (normalTex != uint(-1)) - { - // Get normal from texture map - float3 normalTan = 2.0f * g_TextureMaps[NonUniformResourceIndex(normalTex)].SampleLevel(g_TextureSampler, iData.uv, 0.0f).xyz - 1.0f; - - // Calculate tangent and bi-tangent basis vectors - float3 edge1 = v1 - v0; - float3 edge2 = v2 - v0; - normal = normalize(normal) * (frontFace ? 1.0f : -1.0f); - float2 edgeUV1 = uv1 - uv0; - float2 edgeUV2 = uv2 - uv0; - float r = 1.0f / (edgeUV1.x * edgeUV2.y - edgeUV1.y * edgeUV2.x); - float3 tangent = (edge1 * edgeUV2.yyy - edge2 * edgeUV1.yyy) * r; - float3 bitangent = (edge2 * edgeUV1.xxx - edge1 * edgeUV2.xxx) * r; - - // Calculate handedness - float handedness = dot(cross(normal, tangent), bitangent) < 0.0f ? 1.0f : -1.0f; // Inverted due to left handed system - - // Gram-Schmidt orthogonalise tangent - tangent = normalize(tangent - normal * dot(normal, tangent)); - bitangent = normalize(cross(normal, tangent) * handedness); - - // Convert from tangent space - float3x3 tbn = transpose(float3x3(tangent, bitangent, normal)); //HLSL matrices are row-major so need to be transposed - iData.normal = normalize(mul(normalTransform, mul(tbn, normalTan))); - } - // Calculate geometry normal (assume CCW winding) - float3 edge10 = v1 - v0; - float3 edge20 = v2 - v0; - iData.geometryNormal = normalize(mul(normalTransform, cross(edge10, edge20) * (frontFace ? 1.0f : -1.0f))); - iData.barycentrics = barycentrics; - return iData; -} - -/** Data representing only valid material data for surface intersection. */ -struct IntersectDataMaterial -{ - Material material; /**< The material associated with intersected surface */ - float2 uv; /**< The texture UV values at intersected position */ -}; - -/** - * Determine the material specific intersection data for a ray hit. - * @param instanceIndex The index of the object instance that was intersected (instances into g_InstanceBuffer). - * @param primitiveIndex The index of the intersected primitive within the instance. - * @param barycentrics The barycentric coordinates within the intersected primitive. - * @return The data associated with the intersection. - */ -IntersectDataMaterial MakeIntersectDataMaterial(uint instanceIndex, uint primitiveIndex, float2 barycentrics) -{ - // Get instance information for current object - Instance instance = g_InstanceBuffer[instanceIndex]; - Mesh mesh = g_MeshBuffer[instance.mesh_index]; - - // Get index buffer values - uint i0 = g_IndexBuffer[mesh.index_offset / mesh.index_stride + 3 * primitiveIndex + 0] + mesh.vertex_offset / mesh.vertex_stride; - uint i1 = g_IndexBuffer[mesh.index_offset / mesh.index_stride + 3 * primitiveIndex + 1] + mesh.vertex_offset / mesh.vertex_stride; - uint i2 = g_IndexBuffer[mesh.index_offset / mesh.index_stride + 3 * primitiveIndex + 2] + mesh.vertex_offset / mesh.vertex_stride; - - // Get UV values from buffers - float2 uv0 = g_VertexBuffer[i0].uv; - float2 uv1 = g_VertexBuffer[i1].uv; - float2 uv2 = g_VertexBuffer[i2].uv; - - IntersectDataMaterial iData; - // Set material - iData.material = g_MaterialBuffer[mesh.material_index]; - // Calculate UV coordinates - iData.uv = interpolate(uv0, uv1, uv2, barycentrics); - return iData; -} - -/** - * Balanced heuristic used in MIS weight calculation. - * @param fPDF The PDF of the sampled value. - * @param gPDF The PDF of the MIS weighting value. - * @return The calculated weight. - */ -float balanceHeuristic(float fPDF, float gPDF) -{ - return fPDF / (fPDF + gPDF); -} - -/** - * Power heuristic used in MIS weight calculation. - * @param fPDF The PDF of the sampled value. - * @param gPDF The PDF of the MIS weighting value. - * @return The calculated weight. - */ -float powerHeuristic(float fPDF, float gPDF) -{ - return (fPDF * fPDF) / (fPDF * fPDF + gPDF * gPDF); -} - -/** - * Heuristic used in MIS weight calculation. - * @param fPDF The PDF of the sampled value. - * @param gPDF The PDF of the MIS weighting value. - * @return The calculated weight. - */ -float heuristicMIS(float fPDF, float gPDF) -{ - return balanceHeuristic(fPDF, gPDF); - //return powerHeuristic(fPDF, gPDF); -} - -/** - * Calculates radiance from a new light ray direction from a surface by sampling the scenes lighting. - * @tparam RNG The type of random number sampler to be used. - * @param material Material data describing BRDF of surface. - * @param randomStratified Random number sampler used to sample light. - * @param randomNG Random number sampler used to sample alpha. - * @param position Current position on surface. - * @param normal Shading normal vector at current position. - * @param viewDirection Outgoing ray view direction. - * @param rayShadowQuery Existing object used to test visibility of shadow rays. - * @return The radiance returned from sampled light direction. - */ -float3 sampleLightsNEE(MaterialBRDF material, inout StratifiedSampler randomStratified, inout Random randomNG, float3 position, float3 normal, - float3 viewDirection, ShadowRayQuery rayShadowQuery) -{ - float lightPDF; - uint lightIndex = sampleLights(randomNG, position, normal, lightPDF); - randomNG.rngState = randomNG.rand() * 4294967295.0f; //Scrambling to compensate for dxc crash with inout params above - - if (lightPDF == 0.0f) - { - return 0.0f.xxx; - } - - // Initialise returned radiance - float3 lightPosition; - float3 lightDirection; - float sampledLightPDF; - float2 unused; - Light selectedLight = getLight(lightIndex); - float3 radianceLi = sampleLight(selectedLight, randomStratified, position, normal, lightDirection, sampledLightPDF, lightPosition, unused); - - // Combine PDFs - lightPDF *= sampledLightPDF; - - // Early discard lights behind surface - if (dot(lightDirection, normal) < 0.0f || lightPDF == 0.0f) - { - return 0.0f.xxx; - } - - // Check if light is not occluded - RayDesc ray; - ray.Origin = position; - ray.Direction = lightDirection; - ray.TMin = 0.0f; - ray.TMax = hasLightPosition(selectedLight) ? length(lightPosition - position) : FLT_MAX; - rayShadowQuery.TraceRayInline(g_Scene, RAY_FLAG_NONE, 0xFFu, ray); - - // Check for non-opaque geometry - while (rayShadowQuery.Proceed()) - { - if (rayShadowQuery.CandidateType() == CANDIDATE_NON_OPAQUE_TRIANGLE) - { - // Get the intersection data - uint instanceIndex = rayShadowQuery.CandidateInstanceIndex(); - uint primitiveIndex = rayShadowQuery.CandidatePrimitiveIndex(); - float2 uv = rayShadowQuery.CandidateTriangleBarycentrics(); - IntersectDataMaterial iData = MakeIntersectDataMaterial(instanceIndex, primitiveIndex, uv); - - // Check the alpha mask/blend - MaterialAlpha materialMask = MakeMaterialAlpha(iData.material, iData.uv); - // Perform alpha stochastic check - if (materialMask.alpha >= 0.5f) - { - rayShadowQuery.CommitNonOpaqueTriangleHit(); - } - } - else - { - // Should never get here as we don't support non-triangle geometry - // However if this conditional is removed the driver crashes - rayShadowQuery.Abort(); - } - } - - // If nothing was hit then we have hit the light - if (rayShadowQuery.CommittedStatus() == COMMITTED_NOTHING) - { - // Evaluate BRDF for new light direction and calculate combined PDF for current sample - float3 sampleReflectance; - float samplePDF = sampleBRDFPDFAndEvalute(material, normal, viewDirection, lightDirection, sampleReflectance); - - // Add lighting contribution - bool deltaLight = isDeltaLight(selectedLight); - float weight = (!deltaLight) ? heuristicMIS(lightPDF, samplePDF) : 1.0f; - return sampleReflectance * radianceLi * (weight / lightPDF).xxx; - } - - return 0.0f.xxx; -} - -/** - * Generate a primary ray originating from the camera for a given pixel. - * @param pixel Requested pixel (pixel center is at 0.5 +-0.5) - * @return The generated ray. - */ -RayDesc generateCameraRay(float2 pixel) -{ - // Setup the ray - RayDesc ray; - - // Get direction from origin to current pixel in screen plane - float3 direction = - (pixel.x * g_RayCamera.directionX) + - (pixel.y * g_RayCamera.directionY) + - g_RayCamera.directionTL; - - // Set the ray origin - ray.Origin = g_RayCamera.origin; - - // Compute the ray direction for this pixel - ray.Direction = normalize(direction); - - // Get adjusted range values - ray.TMin = g_RayCamera.range.x; - ray.TMax = g_RayCamera.range.y; - - return ray; -} - -/** - * Calculate illumination information for a specific pixel - * @note This writes a single sample per pixel directly to the output buffer - * @param pixel The current pixel. - * @param dimensions The maximum pixel dimensions. - */ -void pathTracer(in uint2 pixel, in uint2 dimensions) -{ - // Setup configurable constants - const uint minBounces = g_BounceRRCount; //Minimum bounces before early terminations are allowed - const uint maxBounces = g_BounceCount; - const uint maxSamples = g_SampleCount; - - //Check if valid pixel - if (any(pixel >= dimensions)) - { - return; - } - - // Intialise random number sampler - const uint id = pixel.x + pixel.y * dimensions.x; - Random randomNG = MakeRandom(id, g_FrameIndex); - - // Offset pixel to pixel center - float2 pixelRay = pixel; - pixelRay += 0.5f; - - // Initialise per-pixel path tracing values - float3 radiance = 0.0f.xxx; - - // Initialise shader ray local ray query - ClosestRayQuery rayQuery; - ShadowRayQuery rayShadowQuery; - - // The PDF of the last sampled BRDF - float samplePDF = 1.0f; - - // Loop over requested number of samples per pixel - for (uint sample = 0; sample < maxSamples; ++sample) - { - // Calculate jittered pixel position - StratifiedSampler randomStratified = MakeStratifiedSampler(id, g_FrameIndex * maxSamples + sample); - float2 newPixelRay = pixelRay + lerp(-0.5.xx, 0.5.xx, randomStratified.rand2()); - - // Calculate primary ray - RayDesc ray = generateCameraRay(newPixelRay); - - // Initialise per-sample path tracing values - float3 throughput = 1.0f.xxx; - - for (uint bounce = 0; bounce <= maxBounces; ++bounce) - { - // Trace the ray through the scene - rayQuery.TraceRayInline(g_Scene, RAY_FLAG_NONE, 0xFFu, ray); - - while (rayQuery.Proceed()) - { - if (rayQuery.CandidateType() == CANDIDATE_NON_OPAQUE_TRIANGLE) - { - // Get the intersection data - uint instanceIndex = rayQuery.CandidateInstanceIndex(); - uint primitiveIndex = rayQuery.CandidatePrimitiveIndex(); - float2 uv = rayQuery.CandidateTriangleBarycentrics(); - IntersectDataMaterial iData = MakeIntersectDataMaterial(instanceIndex, primitiveIndex, uv); - - // Check for alpha masking - MaterialAlpha materialMask = MakeMaterialAlpha(iData.material, iData.uv); - // Perform alpha stochastic check - if (materialMask.alpha >= 0.5f) - { - rayQuery.CommitNonOpaqueTriangleHit(); - } - } - else - { - // Should never get here as we don't support non-triangle geometry - // However if this conditional is removed the driver crashes - rayQuery.Abort(); - } - } - - // Check for valid intersection - if (rayQuery.CommittedStatus() == COMMITTED_NOTHING) - { -#ifdef DISABLE_DIRECT_LIGHTING - if (bounce == 1) break; -#endif - if (hasEnvironmentLight()) - { - // If nothing was hit then load the environment map - LightEnvironment light = getEnvironmentLight(); - float3 lightRadiance = evaluateEnvironmentLight(light, ray.Direction); - if (bounce != 0) - { - // Account for light contribution along sampled direction - float lightPDF = sampleEnvironmentLightPDF(light, ray.Direction, float3(0.0f.xxx)); - lightPDF *= sampleLightPDF(ray.Origin); - - // Add lighting contribution - float weight = heuristicMIS(samplePDF, lightPDF); - radiance += throughput * lightRadiance * weight.xxx; - } - else - { - radiance += throughput * lightRadiance; - } - } - break; - } - - // Get the intersection data - uint instanceIndex = rayQuery.CommittedInstanceIndex(); - uint primitiveIndex = rayQuery.CommittedPrimitiveIndex(); - float2 barycentrics = rayQuery.CommittedTriangleBarycentrics(); - bool frontFace = rayQuery.CommittedTriangleFrontFace(); - IntersectData iData = MakeIntersectData(instanceIndex, primitiveIndex, barycentrics, frontFace); - - // Get material BSDF values - MaterialBSDF materialBSDF = MakeMaterialBSDF(iData.material, iData.uv); - -#ifdef DISABLE_DIRECT_LIGHTING - if (bounce == 1) {/*ignore emissive hit*/} else -#endif - if (frontFace && any(materialBSDF.emissive > 0.0f)) - { - // Get light contribution - float3 lightRadiance = materialBSDF.emissive; -#ifndef DISABLE_AREA_LIGHTS - if (bounce != 0) - { - // Get material properties at intersection - LightArea emissiveLight = MakeLightArea(iData.vertex0, iData.vertex1, iData.vertex2, - // The following parameters are irrelevant for calculating PDF - 0.0f.xxxx, 0.0f, 0.0f, 0.0f); - - // Account for light contribution along sampled direction - float lightPDF = sampleAreaLightPDF(emissiveLight, ray.Origin, iData.position); - lightPDF *= sampleLightPDF(ray.Origin); - - // Add lighting contribution - float weight = heuristicMIS(samplePDF, lightPDF); - lightRadiance *= weight.xxx; - } -#endif - radiance += throughput * lightRadiance; - } - - // Terminate early if no more bounces - if (bounce == maxBounces) - { - break; - } - - float3 viewDirection = -ray.Direction; - - // Offset the intersection position to prevent self intersection on generated rays - float3 offsetOrigin = offsetPosition(iData.position, iData.geometryNormal); - -#ifdef DISABLE_ALBEDO_MATERIAL - // Disable material albedo if requested - if (bounce == 0) - { - materialBSDF.albedo = 0.3f.xxx; -#ifndef DISABLE_SPECULAR_LIGHTING - materialBSDF.F0 = 0.0f.xxx; -#endif - } -#endif - -#ifdef DISABLE_DIRECT_LIGHTING - // Disable direct lighting if requested - if (bounce > 0) -#endif - { - // Sample a single light - radiance += throughput * sampleLightsNEE(MakeMaterialBRDF(materialBSDF), randomStratified, randomNG, offsetOrigin, iData.normal, viewDirection, rayShadowQuery); - } - - // Sample BRDF to get next ray direction - float3 sampleReflectance; - float3 rayDirection = sampleBRDF(MakeMaterialBRDF(materialBSDF), randomStratified, iData.normal, viewDirection, sampleReflectance, samplePDF); - - // Prevent tracing directions below the surface - if (dot(iData.geometryNormal, rayDirection) <= 0.0f) - { - break; - } - - // Add sampling weight to current weight - throughput *= sampleReflectance / samplePDF.xxx; - - // Stop if no further contribution - if (luminance(throughput) <= (1.0f / 1024.0f)) - { - break; - } - - // Russian Roulette early termination - if (bounce > minBounces) - { - float rrSample = hmax(throughput); - if (rrSample <= randomNG.rand()) - { - break; - } - throughput /= rrSample.xxx; - } - - // Create new ray - ray.Origin = offsetOrigin; - ray.Direction = rayDirection; - ray.TMin = 0.0f; - ray.TMax = FLT_MAX; - } - } - - // Accumulate previous samples - if (g_Accumulate != 0) - { - // Get previous values - float4 accumulator = g_AccumulationBuffer[pixel]; - // Calculate running average - float3 runningAverage = accumulator.xyz * accumulator.www + radiance; - // Increment sample count - float sampleCount = accumulator.w + maxSamples; - radiance = runningAverage / sampleCount.xxx; - // Write out new radiance and sample count to accumulation buffer - g_AccumulationBuffer[pixel] = float4(radiance, sampleCount); - } - else - { - // Write out current value so its ready for next frame - g_AccumulationBuffer[pixel] = float4(radiance, maxSamples); - // Average out radiance so its ready for final output - radiance /= maxSamples; - } - - // Output average accumulation - g_OutputBuffer[pixel] = float4(radiance, 1.0f); -} - -[numthreads(4, 8, 1)] -void ReferencePT(in uint2 did : SV_DispatchThreadID) -{ - pathTracer(did, g_BufferDimensions); -} diff --git a/src/core/src/render_techniques/path_tracer/reference_pt.cpp b/src/core/src/render_techniques/path_tracer/reference_pt.cpp deleted file mode 100644 index 9782707..0000000 --- a/src/core/src/render_techniques/path_tracer/reference_pt.cpp +++ /dev/null @@ -1,282 +0,0 @@ -/********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#include "reference_pt.h" - -#include "capsaicin_internal.h" -#include "components/light_sampler_bounds/light_sampler_bounds.h" -#include "components/stratified_sampler/stratified_sampler.h" - -namespace Capsaicin -{ -RayCamera caclulateRayCamera(CapsaicinInternal const &capsaicin) -{ - float3 origin = capsaicin.getCamera().eye; - float2 range = float2(capsaicin.getCamera().nearZ, capsaicin.getCamera().farZ); - - // Get the size of the screen in the X and Y screen direction - float size = tan(capsaicin.getCamera().fovY / 2.0f); - size *= range.x; - float sizeHalfX = size * capsaicin.getCamera().aspect; - float sizeHalfY = size; - - // Generate view direction - float3 forward(capsaicin.getCamera().center - origin); - forward = normalize(forward); - // Generate proper horizontal direction - float3 right(cross(forward, capsaicin.getCamera().up)); - right = normalize(right); - // Generate proper up direction - float3 down(cross(forward, right)); - // Normalize vectors - down = normalize(down); - - // Set each of the camera vectors to an orthonormal basis - float3 directionX = right; - float3 directionY = down; - float3 directionZ = forward; - - // Get weighted distance vector - directionZ = directionZ * range.x; - - // Get the Scaled Horizontal and up vectors - directionX *= sizeHalfX; - directionY *= sizeHalfY; - - // Offset the direction vector - float3 directionTL = directionZ - directionX - directionY; - - // Scale the direction X and Y vectors from half size - directionX += directionX; - directionY += directionY; - - // Scale the X and Y vectors to be pixel length - directionX /= (float)capsaicin.getWidth(); - directionY /= (float)capsaicin.getHeight(); - - return {origin, directionTL, directionX, directionY, range}; -} - -ReferencePT::ReferencePT() - : RenderTechnique("Reference PT") -{} - -ReferencePT::~ReferencePT() -{ - terminate(); -} - -RenderOptionList ReferencePT::getRenderOptions() noexcept -{ - RenderOptionList newOptions; - newOptions.emplace(RENDER_OPTION_MAKE(reference_pt_bounce_count, options)); - newOptions.emplace(RENDER_OPTION_MAKE(reference_pt_min_rr_bounces, options)); - newOptions.emplace(RENDER_OPTION_MAKE(reference_pt_sample_count, options)); - newOptions.emplace(RENDER_OPTION_MAKE(reference_pt_disable_albedo_materials, options)); - newOptions.emplace(RENDER_OPTION_MAKE(reference_pt_disable_direct_lighting, options)); - newOptions.emplace(RENDER_OPTION_MAKE(reference_pt_disable_specular_lighting, options)); - return newOptions; -} - -ReferencePT::RenderOptions ReferencePT::convertOptions(RenderSettings const &settings) noexcept -{ - RenderOptions newOptions; - RENDER_OPTION_GET(reference_pt_bounce_count, newOptions, settings.options_) - RENDER_OPTION_GET(reference_pt_min_rr_bounces, newOptions, settings.options_) - RENDER_OPTION_GET(reference_pt_sample_count, newOptions, settings.options_) - RENDER_OPTION_GET(reference_pt_disable_albedo_materials, newOptions, settings.options_) - RENDER_OPTION_GET(reference_pt_disable_direct_lighting, newOptions, settings.options_) - RENDER_OPTION_GET(reference_pt_disable_specular_lighting, newOptions, settings.options_) - return newOptions; -} - -ComponentList ReferencePT::getComponents() const noexcept -{ - ComponentList components; - components.emplace_back(COMPONENT_MAKE(LightSamplerBounds)); - components.emplace_back(COMPONENT_MAKE(StratifiedSampler)); - return components; -} - -AOVList ReferencePT::getAOVs() const noexcept -{ - AOVList aovs; - aovs.push_back({"Color", AOV::Write}); - return aovs; -} - -bool ReferencePT::init(CapsaicinInternal const &capsaicin) noexcept -{ - rayCameraData = gfxCreateBuffer(gfx_, 1, nullptr, kGfxCpuAccess_Write); - rayCameraData.setName("Capsaicin_PT_RayCamera"); - accumulationBuffer = gfxCreateTexture2D(gfx_, DXGI_FORMAT_R32G32B32A32_FLOAT); - accumulationBuffer.setName("Capsaicin_PT_AccumulationBuffer"); - - textureSampler = gfxCreateSamplerState(gfx_, D3D12_FILTER_MIN_MAG_MIP_LINEAR, - D3D12_TEXTURE_ADDRESS_MODE_WRAP, D3D12_TEXTURE_ADDRESS_MODE_WRAP); - - reference_pt_program_ = - gfxCreateProgram(gfx_, "render_techniques/path_tracer/reference_pt", capsaicin.getShaderPath()); - return initKernels(capsaicin); -} - -void ReferencePT::render(CapsaicinInternal &capsaicin) noexcept -{ - RenderOptions newOptions = convertOptions(capsaicin.getRenderSettings()); - RenderSettings const &renderSettings = capsaicin.getRenderSettings(); - auto lightSampler = capsaicin.getComponent(); - auto stratified_sampler = capsaicin.getComponent(); - - // Check if options change requires kernel recompile - bool recompile = - lightSampler->needsRecompile(capsaicin) - || options.reference_pt_disable_albedo_materials != newOptions.reference_pt_disable_albedo_materials - || options.reference_pt_disable_direct_lighting != newOptions.reference_pt_disable_direct_lighting - || options.reference_pt_disable_specular_lighting - != newOptions.reference_pt_disable_specular_lighting; - - // Check if we can continue to accumulate samples - bool const accumulate = !recompile && bufferDimensions.x == capsaicin.getWidth() - && bufferDimensions.y == capsaicin.getHeight() - && checkCameraUpdated(capsaicin.getCamera()) - && options.reference_pt_bounce_count == newOptions.reference_pt_bounce_count - && options.reference_pt_min_rr_bounces == newOptions.reference_pt_min_rr_bounces - && !capsaicin.getMeshesUpdated() && !capsaicin.getTransformsUpdated() - && !lightSampler->getLightsUpdated(); - - // Update light sampling data structure - if (capsaicin.getMeshesUpdated() || capsaicin.getTransformsUpdated() - || bufferDimensions == uint2(0) /*i.e. un-initialised*/) - { - // Update the light sampler using scene bounds - auto sceneBounds = capsaicin.getSceneBounds(); - lightSampler->setBounds(sceneBounds, this); - } - - lightSampler->update(capsaicin, *this); - - // Update the history - bufferDimensions = uint2(capsaicin.getWidth(), capsaicin.getHeight()); - camera = capsaicin.getCamera(); - options = newOptions; - - if (!accumulate) - { - cameraData = caclulateRayCamera(capsaicin); - } - - if (recompile) - { - gfxDestroyKernel(gfx_, reference_pt_kernel_); - initKernels(capsaicin); - } - - // Bind the shader parameters - gfxProgramSetParameter(gfx_, reference_pt_program_, "g_BufferDimensions", bufferDimensions); - gfxProgramSetParameter(gfx_, reference_pt_program_, "g_FrameIndex", capsaicin.getFrameIndex()); - gfxProgramSetParameter(gfx_, reference_pt_program_, "g_RayCamera", cameraData); - gfxProgramSetParameter(gfx_, reference_pt_program_, "g_BounceCount", options.reference_pt_bounce_count); - gfxProgramSetParameter( - gfx_, reference_pt_program_, "g_BounceRRCount", options.reference_pt_min_rr_bounces); - gfxProgramSetParameter(gfx_, reference_pt_program_, "g_SampleCount", options.reference_pt_sample_count); - gfxProgramSetParameter(gfx_, reference_pt_program_, "g_Accumulate", accumulate ? 1 : 0); - - stratified_sampler->addProgramParameters(capsaicin, reference_pt_program_); - - gfxProgramSetParameter(gfx_, reference_pt_program_, "g_InstanceBuffer", capsaicin.getInstanceBuffer()); - gfxProgramSetParameter(gfx_, reference_pt_program_, "g_MeshBuffer", capsaicin.getMeshBuffer()); - gfxProgramSetParameter(gfx_, reference_pt_program_, "g_TransformBuffer", capsaicin.getTransformBuffer()); - gfxProgramSetParameter(gfx_, reference_pt_program_, "g_IndexBuffer", capsaicin.getIndexBuffer()); - gfxProgramSetParameter(gfx_, reference_pt_program_, "g_VertexBuffer", capsaicin.getVertexBuffer()); - gfxProgramSetParameter(gfx_, reference_pt_program_, "g_MaterialBuffer", capsaicin.getMaterialBuffer()); - - gfxProgramSetParameter(gfx_, reference_pt_program_, "g_AccumulationBuffer", accumulationBuffer); - gfxProgramSetParameter(gfx_, reference_pt_program_, "g_OutputBuffer", capsaicin.getAOVBuffer("Color")); - - gfxProgramSetParameter(gfx_, reference_pt_program_, "g_Scene", capsaicin.getAccelerationStructure()); - - gfxProgramSetParameter( - gfx_, reference_pt_program_, "g_EnvironmentBuffer", capsaicin.getEnvironmentBuffer()); - gfxProgramSetParameter( - gfx_, reference_pt_program_, "g_TextureMaps", capsaicin.getTextures(), capsaicin.getTextureCount()); - - gfxProgramSetParameter(gfx_, reference_pt_program_, "g_TextureSampler", textureSampler); - - lightSampler->addProgramParameters(capsaicin, reference_pt_program_); - - // Render a reference for the current scene - { - TimedSection const timed_section(*this, "ReferencePT"); - - uint32_t const *num_threads = gfxKernelGetNumThreads(gfx_, reference_pt_kernel_); - uint32_t const num_groups_x = (bufferDimensions.x + num_threads[0] - 1) / num_threads[0]; - uint32_t const num_groups_y = (bufferDimensions.y + num_threads[1] - 1) / num_threads[1]; - - gfxCommandBindKernel(gfx_, reference_pt_kernel_); - gfxCommandDispatch(gfx_, num_groups_x, num_groups_y, 1); - } -} - -bool ReferencePT::initKernels(CapsaicinInternal const &capsaicin) noexcept -{ - // Set up the base defines based on available features - auto lightSampler = capsaicin.getComponent(); - std::vector baseDefines(std::move(lightSampler->getShaderDefines(capsaicin))); - std::vector defines; - for (auto &i : baseDefines) - { - defines.push_back(i.c_str()); - } - if (options.reference_pt_disable_albedo_materials) - { - defines.push_back("DISABLE_ALBEDO_MATERIAL"); - } - if (options.reference_pt_disable_direct_lighting) - { - defines.push_back("DISABLE_DIRECT_LIGHTING"); - } - if (options.reference_pt_disable_specular_lighting) - { - defines.push_back("DISABLE_SPECULAR_LIGHTING"); - } - reference_pt_kernel_ = gfxCreateComputeKernel( - gfx_, reference_pt_program_, "ReferencePT", defines.data(), (uint32_t)defines.size()); - return !!reference_pt_program_; -} - -void ReferencePT::terminate() noexcept -{ - gfxDestroyBuffer(gfx_, rayCameraData); - gfxDestroyTexture(gfx_, accumulationBuffer); - - gfxDestroySamplerState(gfx_, textureSampler); - gfxDestroyProgram(gfx_, reference_pt_program_); - gfxDestroyKernel(gfx_, reference_pt_kernel_); -} - -bool ReferencePT::checkCameraUpdated(GfxCamera const ¤tCamera) noexcept -{ - return camera.aspect == currentCamera.aspect && camera.center == currentCamera.center - && camera.eye == currentCamera.eye && camera.farZ == currentCamera.farZ - && camera.fovY == currentCamera.fovY && camera.nearZ == currentCamera.nearZ - && camera.type == currentCamera.type && camera.up == currentCamera.up; -} -} // namespace Capsaicin diff --git a/src/core/src/render_techniques/reference_path_tracer/reference_path_tracer.comp b/src/core/src/render_techniques/reference_path_tracer/reference_path_tracer.comp new file mode 100644 index 0000000..2b95d8b --- /dev/null +++ b/src/core/src/render_techniques/reference_path_tracer/reference_path_tracer.comp @@ -0,0 +1,139 @@ +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ + +#ifndef USE_INLINE_RT +#define USE_INLINE_RT 1 +#endif + +#include "../../geometry/path_tracing_shared.h" + +uint2 g_BufferDimensions; +RayCamera g_RayCamera; +uint g_BounceCount; +uint g_BounceRRCount; +uint g_SampleCount; +uint g_Accumulate; + +uint g_FrameIndex; + +StructuredBuffer g_InstanceBuffer; +StructuredBuffer g_MeshBuffer; +StructuredBuffer g_TransformBuffer; +StructuredBuffer g_IndexBuffer; +StructuredBuffer g_VertexBuffer; +StructuredBuffer g_MaterialBuffer; + +RWTexture2D g_AccumulationBuffer; +RWTexture2D g_OutputBuffer; + +RaytracingAccelerationStructure g_Scene; + +TextureCube g_EnvironmentBuffer; +Texture2D g_TextureMaps[] : register(space99); + +SamplerState g_TextureSampler; // Should be a linear sampler + +#include "../../components/light_sampler/light_sampler.hlsl" +#include "../../geometry/path_tracing.hlsl" + +/** + * Calculate illumination information for a specific pixel + * @note This writes a single sample per pixel directly to the output buffer + * @param pixel The current pixel. + * @param dimensions The maximum pixel dimensions. + */ +void pathTracer(in uint2 pixel, in uint2 dimensions) +{ + // Setup configurable constants + const uint minBounces = g_BounceRRCount; //Minimum bounces before early terminations are allowed + const uint maxBounces = g_BounceCount; + const uint maxSamples = g_SampleCount; + + //Check if valid pixel + if (any(pixel >= dimensions)) + { + return; + } + const uint id = pixel.x + pixel.y * dimensions.x; + + // Offset pixel to pixel center + float2 pixelRay = pixel; + pixelRay += 0.5f; + + // Initialise per-pixel path tracing values + float3 radiance = 0.0f; + + // Loop over requested number of samples per pixel + for (uint sample = 0; sample < maxSamples; ++sample) + { + // Intialise random number sampler + const uint id = pixel.x + pixel.y * dimensions.x; + const uint frameID = g_FrameIndex * maxSamples + sample; + LightSampler lightSampler = MakeLightSampler(MakeRandom(id, frameID)); + + // Calculate jittered pixel position + StratifiedSampler randomStratified = MakeStratifiedSampler(id, frameID); + float2 newPixelRay = pixelRay + lerp(-0.5.xx, 0.5.xx, randomStratified.rand2()); + + // Calculate primary ray + RayDesc ray = generateCameraRay(newPixelRay, g_RayCamera); + + traceFullPath(ray, randomStratified, lightSampler, minBounces, maxBounces, radiance); + } + + // Check for incorrect calculations + //if (any(isnan(radiance)) || any(isinf(radiance)) || any(radiance < 0.0f)) + //{ + // radiance = 0.0f.xxx; + //} + + // Accumulate previous samples + if (g_Accumulate != 0) + { + // Get previous values + float4 accumulator = g_AccumulationBuffer[pixel]; + uint previousCount = asuint(accumulator.w); + // Increment sample count + uint sampleCount = previousCount + maxSamples; + radiance += accumulator.xyz; + // Write out new radiance and sample count to accumulation buffer + g_AccumulationBuffer[pixel] = float4(radiance, asfloat(sampleCount)); + // Average out radiance so its ready for final output + radiance /= (float)sampleCount; + } + else + { + // Write out current value so its ready for next frame + g_AccumulationBuffer[pixel] = float4(radiance, asfloat(maxSamples)); + // Average out radiance so its ready for final output + radiance /= (float)maxSamples; + } + + // Output average accumulation + g_OutputBuffer[pixel] = float4(radiance, 1.0f); +} + +[numthreads(4, 8, 1)] +void ReferencePT(in uint2 did : SV_DispatchThreadID) +{ + pathTracer(did, g_BufferDimensions); +} diff --git a/src/core/src/render_techniques/reference_path_tracer/reference_path_tracer.cpp b/src/core/src/render_techniques/reference_path_tracer/reference_path_tracer.cpp new file mode 100644 index 0000000..a7bb8ca --- /dev/null +++ b/src/core/src/render_techniques/reference_path_tracer/reference_path_tracer.cpp @@ -0,0 +1,380 @@ +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ + +#include "reference_path_tracer.h" + +#include "capsaicin_internal.h" +#include "components/light_sampler/light_sampler_switcher.h" +#include "components/stratified_sampler/stratified_sampler.h" + +char const *kReferencePTRaygenShaderName = "ReferencePTRaygen"; +char const *kReferencePTMissShaderName = "ReferencePTMiss"; +char const *kReferencePTShadowMissShaderName = "ReferencePTShadowMiss"; +char const *kReferencePTAnyHitShaderName = "ReferencePTAnyHit"; +char const *kReferencePTShadowAnyHitShaderName = "ReferencePTShadowAnyHit"; +char const *kReferencePTClosestHitShaderName = "ReferencePTClosestHit"; +char const *kReferencePTHitGroupName = "ReferencePTHitGroup"; +char const *kReferencePTShadowHitGroupName = "ReferencePTShadowHitGroup"; + +namespace Capsaicin +{ +ReferencePT::ReferencePT() + : RenderTechnique("Reference Path Tracer") +{} + +ReferencePT::~ReferencePT() +{ + terminate(); +} + +RenderOptionList ReferencePT::getRenderOptions() noexcept +{ + RenderOptionList newOptions; + newOptions.emplace(RENDER_OPTION_MAKE(reference_pt_bounce_count, options)); + newOptions.emplace(RENDER_OPTION_MAKE(reference_pt_min_rr_bounces, options)); + newOptions.emplace(RENDER_OPTION_MAKE(reference_pt_sample_count, options)); + newOptions.emplace(RENDER_OPTION_MAKE(reference_pt_disable_albedo_materials, options)); + newOptions.emplace(RENDER_OPTION_MAKE(reference_pt_disable_direct_lighting, options)); + newOptions.emplace(RENDER_OPTION_MAKE(reference_pt_disable_specular_materials, options)); + newOptions.emplace(RENDER_OPTION_MAKE(reference_pt_nee_only, options)); + newOptions.emplace(RENDER_OPTION_MAKE(reference_pt_disable_nee, options)); + newOptions.emplace(RENDER_OPTION_MAKE(reference_pt_nee_reservoir_resampling, options)); + newOptions.emplace(RENDER_OPTION_MAKE(reference_pt_use_dxr10, options)); + return newOptions; +} + +ReferencePT::RenderOptions ReferencePT::convertOptions(RenderOptionList const &options) noexcept +{ + RenderOptions newOptions; + RENDER_OPTION_GET(reference_pt_bounce_count, newOptions, options) + RENDER_OPTION_GET(reference_pt_min_rr_bounces, newOptions, options) + RENDER_OPTION_GET(reference_pt_sample_count, newOptions, options) + RENDER_OPTION_GET(reference_pt_disable_albedo_materials, newOptions, options) + RENDER_OPTION_GET(reference_pt_disable_direct_lighting, newOptions, options) + RENDER_OPTION_GET(reference_pt_disable_specular_materials, newOptions, options) + RENDER_OPTION_GET(reference_pt_nee_only, newOptions, options) + RENDER_OPTION_GET(reference_pt_disable_nee, newOptions, options) + RENDER_OPTION_GET(reference_pt_nee_reservoir_resampling, newOptions, options) + RENDER_OPTION_GET(reference_pt_use_dxr10, newOptions, options) + return newOptions; +} + +ComponentList ReferencePT::getComponents() const noexcept +{ + ComponentList components; + components.emplace_back(COMPONENT_MAKE(LightSamplerSwitcher)); + components.emplace_back(COMPONENT_MAKE(StratifiedSampler)); + return components; +} + +AOVList ReferencePT::getAOVs() const noexcept +{ + AOVList aovs; + aovs.push_back({"Color", AOV::Write}); + return aovs; +} + +bool ReferencePT::init(CapsaicinInternal const &capsaicin) noexcept +{ + rayCameraData = gfxCreateBuffer(gfx_, 1, nullptr, kGfxCpuAccess_Write); + rayCameraData.setName("Capsaicin_PT_RayCamera"); + accumulationBuffer = gfxCreateTexture2D(gfx_, DXGI_FORMAT_R32G32B32A32_FLOAT); + accumulationBuffer.setName("Capsaicin_PT_AccumulationBuffer"); + + reference_pt_program_ = gfxCreateProgram(gfx_, getProgramName(), capsaicin.getShaderPath()); + return initKernels(capsaicin); +} + +void ReferencePT::render(CapsaicinInternal &capsaicin) noexcept +{ + RenderOptions newOptions = convertOptions(capsaicin.getOptions()); + auto lightSampler = capsaicin.getComponent(); + auto stratified_sampler = capsaicin.getComponent(); + + // Check if options change requires kernel recompile + bool recompile = needsRecompile(capsaicin, newOptions); + + // Check if we can continue to accumulate samples + bool const accumulate = !recompile && bufferDimensions.x == capsaicin.getWidth() + && bufferDimensions.y == capsaicin.getHeight() + && checkCameraUpdated(capsaicin.getCamera()) + && options.reference_pt_bounce_count == newOptions.reference_pt_bounce_count + && options.reference_pt_min_rr_bounces == newOptions.reference_pt_min_rr_bounces + && !capsaicin.getMeshesUpdated() && !capsaicin.getTransformsUpdated() + && !lightSampler->getLightsUpdated(capsaicin) + && !capsaicin.getEnvironmentMapUpdated() && capsaicin.getFrameIndex() > 0; + + // Update the history + bufferDimensions = uint2(capsaicin.getWidth(), capsaicin.getHeight()); + camera = capsaicin.getCamera(); + options = newOptions; + + if (!accumulate) + { + cameraData = caclulateRayCamera( + {camera.eye, camera.center, camera.up, camera.aspect, camera.fovY, camera.nearZ, camera.farZ}, + capsaicin.getWidth(), capsaicin.getHeight()); + } + + if (recompile) + { + gfxDestroyKernel(gfx_, reference_pt_kernel_); + gfxDestroySbt(gfx_, reference_pt_sbt_); + initKernels(capsaicin); + } + + // Bind the shader parameters + gfxProgramSetParameter(gfx_, reference_pt_program_, "g_BufferDimensions", bufferDimensions); + gfxProgramSetParameter(gfx_, reference_pt_program_, "g_FrameIndex", capsaicin.getFrameIndex()); + gfxProgramSetParameter(gfx_, reference_pt_program_, "g_RayCamera", cameraData); + gfxProgramSetParameter(gfx_, reference_pt_program_, "g_BounceCount", options.reference_pt_bounce_count); + gfxProgramSetParameter( + gfx_, reference_pt_program_, "g_BounceRRCount", options.reference_pt_min_rr_bounces); + gfxProgramSetParameter(gfx_, reference_pt_program_, "g_SampleCount", options.reference_pt_sample_count); + gfxProgramSetParameter(gfx_, reference_pt_program_, "g_Accumulate", accumulate ? 1 : 0); + + stratified_sampler->addProgramParameters(capsaicin, reference_pt_program_); + + gfxProgramSetParameter(gfx_, reference_pt_program_, "g_InstanceBuffer", capsaicin.getInstanceBuffer()); + gfxProgramSetParameter(gfx_, reference_pt_program_, "g_MeshBuffer", capsaicin.getMeshBuffer()); + gfxProgramSetParameter(gfx_, reference_pt_program_, "g_TransformBuffer", capsaicin.getTransformBuffer()); + gfxProgramSetParameter(gfx_, reference_pt_program_, "g_IndexBuffer", capsaicin.getIndexBuffer()); + gfxProgramSetParameter(gfx_, reference_pt_program_, "g_VertexBuffer", capsaicin.getVertexBuffer()); + gfxProgramSetParameter(gfx_, reference_pt_program_, "g_MaterialBuffer", capsaicin.getMaterialBuffer()); + + gfxProgramSetParameter(gfx_, reference_pt_program_, "g_AccumulationBuffer", accumulationBuffer); + gfxProgramSetParameter(gfx_, reference_pt_program_, "g_OutputBuffer", capsaicin.getAOVBuffer("Color")); + + gfxProgramSetParameter(gfx_, reference_pt_program_, "g_Scene", capsaicin.getAccelerationStructure()); + + gfxProgramSetParameter( + gfx_, reference_pt_program_, "g_EnvironmentBuffer", capsaicin.getEnvironmentBuffer()); + gfxProgramSetParameter( + gfx_, reference_pt_program_, "g_TextureMaps", capsaicin.getTextures(), capsaicin.getTextureCount()); + + gfxProgramSetParameter(gfx_, reference_pt_program_, "g_TextureSampler", capsaicin.getLinearWrapSampler()); + + lightSampler->addProgramParameters(capsaicin, reference_pt_program_); + + // Render a reference for the current scene + if (options.reference_pt_use_dxr10) + { + setupSbt(capsaicin); + gfxCommandBindKernel(gfx_, reference_pt_kernel_); + gfxCommandDispatchRays(gfx_, reference_pt_sbt_, bufferDimensions.x, bufferDimensions.y, 1); + } + else + { + TimedSection const timed_section(*this, "ReferencePT"); + + uint32_t const *num_threads = gfxKernelGetNumThreads(gfx_, reference_pt_kernel_); + uint32_t const num_groups_x = (bufferDimensions.x + num_threads[0] - 1) / num_threads[0]; + uint32_t const num_groups_y = (bufferDimensions.y + num_threads[1] - 1) / num_threads[1]; + + gfxCommandBindKernel(gfx_, reference_pt_kernel_); + gfxCommandDispatch(gfx_, num_groups_x, num_groups_y, 1); + } +} + +void ReferencePT::terminate() noexcept +{ + gfxDestroyBuffer(gfx_, rayCameraData); + rayCameraData = {}; + gfxDestroyTexture(gfx_, accumulationBuffer); + accumulationBuffer = {}; + + gfxDestroyProgram(gfx_, reference_pt_program_); + reference_pt_program_ = {}; + gfxDestroyKernel(gfx_, reference_pt_kernel_); + reference_pt_kernel_ = {}; + gfxDestroySbt(gfx_, reference_pt_sbt_); + reference_pt_sbt_ = {}; +} + +void ReferencePT::renderGUI(CapsaicinInternal &capsaicin) const noexcept +{ + ImGui::DragInt("Samples Per Pixel", + (int32_t *)&capsaicin.getOption("reference_pt_sample_count"), 1, 1, 30); + auto &bounces = capsaicin.getOption("reference_pt_bounce_count"); + ImGui::DragInt("Bounces", (int32_t *)&bounces, 1, 0, 30); + auto &minBounces = capsaicin.getOption("reference_pt_min_rr_bounces"); + ImGui::DragInt("Min Bounces", (int32_t *)&minBounces, 1, 0, bounces); + minBounces = glm::min(minBounces, bounces); + ImGui::Checkbox( + "Disable Albedo Textures", &capsaicin.getOption("reference_pt_disable_albedo_materials")); + ImGui::Checkbox( + "Disable Direct Lighting", &capsaicin.getOption("reference_pt_disable_direct_lighting")); + ImGui::Checkbox("NEE Only", &capsaicin.getOption("reference_pt_nee_only")); + ImGui::Checkbox("Disable NEE", &capsaicin.getOption("reference_pt_disable_nee")); + ImGui::Checkbox( + "Disable Specular Materials", &capsaicin.getOption("reference_pt_disable_specular_materials")); +} + +bool ReferencePT::initKernels(CapsaicinInternal const &capsaicin) noexcept +{ + // Set up the base defines based on available features + auto lightSampler = capsaicin.getComponent(); + std::vector baseDefines(std::move(lightSampler->getShaderDefines(capsaicin))); + std::vector defines; + for (auto &i : baseDefines) + { + defines.push_back(i.c_str()); + } + if (options.reference_pt_disable_albedo_materials) + { + defines.push_back("DISABLE_ALBEDO_MATERIAL"); + } + if (options.reference_pt_disable_direct_lighting) + { + defines.push_back("DISABLE_DIRECT_LIGHTING"); + } + if (options.reference_pt_disable_specular_materials) + { + defines.push_back("DISABLE_SPECULAR_MATERIALS"); + } + if (options.reference_pt_nee_only) + { + defines.push_back("DISABLE_NON_NEE"); + } + if (options.reference_pt_disable_nee) + { + defines.push_back("DISABLE_NEE"); + } + if (options.reference_pt_nee_reservoir_resampling) + { + defines.push_back("ENABLE_NEE_RESERVOIR_SAMPLING"); + } + if (options.reference_pt_use_dxr10) + { + std::vector exports; + std::vector subobjects; + std::vector defines_str; + std::vector exports_str; + std::vector subobjects_str; + std::vector local_root_signature_associations; + setupPTKernel(capsaicin, local_root_signature_associations, defines_str, exports_str, subobjects_str); + for (auto &i : defines_str) + { + defines.push_back(i.c_str()); + } + for (auto &i : exports_str) + { + exports.push_back(i.c_str()); + } + for (auto &i : subobjects_str) + { + subobjects.push_back(i.c_str()); + } + + reference_pt_kernel_ = + gfxCreateRaytracingKernel(gfx_, reference_pt_program_, local_root_signature_associations.data(), + (uint32_t)local_root_signature_associations.size(), exports.data(), (uint32_t)exports.size(), + subobjects.data(), (uint32_t)subobjects.size(), defines.data(), (uint32_t)defines.size()); + + uint32_t entry_count[kGfxShaderGroupType_Count] { + capsaicin.getSbtStrideInEntries(kGfxShaderGroupType_Raygen), + capsaicin.getSbtStrideInEntries( + kGfxShaderGroupType_Miss), // two miss shaders for scattered and shadow ray + gfxSceneGetInstanceCount(capsaicin.getScene()) + * capsaicin.getSbtStrideInEntries( + kGfxShaderGroupType_Hit), // two sets of hit groups for scattered and shadow ray + capsaicin.getSbtStrideInEntries(kGfxShaderGroupType_Callable)}; + GfxKernel sbt_kernels[] {reference_pt_kernel_}; + reference_pt_sbt_ = gfxCreateSbt(gfx_, sbt_kernels, ARRAYSIZE(sbt_kernels), entry_count); + } + else + { + reference_pt_kernel_ = gfxCreateComputeKernel( + gfx_, reference_pt_program_, "ReferencePT", defines.data(), (uint32_t)defines.size()); + reference_pt_sbt_ = {}; + } + return !!reference_pt_program_; +} + +bool ReferencePT::checkCameraUpdated(GfxCamera const ¤tCamera) noexcept +{ + return camera.aspect == currentCamera.aspect && camera.center == currentCamera.center + && camera.eye == currentCamera.eye && camera.farZ == currentCamera.farZ + && camera.fovY == currentCamera.fovY && camera.nearZ == currentCamera.nearZ + && camera.type == currentCamera.type && camera.up == currentCamera.up; +} + +bool ReferencePT::needsRecompile(CapsaicinInternal &capsaicin, RenderOptions const &newOptions) noexcept +{ + auto lightSampler = capsaicin.getComponent(); + + // Check if options change requires kernel recompile + bool recompile = + lightSampler->needsRecompile(capsaicin) + || options.reference_pt_disable_albedo_materials != newOptions.reference_pt_disable_albedo_materials + || options.reference_pt_disable_direct_lighting != newOptions.reference_pt_disable_direct_lighting + || options.reference_pt_disable_specular_materials + != newOptions.reference_pt_disable_specular_materials + || options.reference_pt_nee_only != newOptions.reference_pt_nee_only + || options.reference_pt_disable_nee != newOptions.reference_pt_disable_nee + || options.reference_pt_nee_reservoir_resampling != newOptions.reference_pt_nee_reservoir_resampling + || options.reference_pt_use_dxr10 != newOptions.reference_pt_use_dxr10; + return recompile; +} + +void ReferencePT::setupSbt(CapsaicinInternal &capsaicin) noexcept +{ + // Populate shader binding table + gfxSbtSetShaderGroup( + gfx_, reference_pt_sbt_, kGfxShaderGroupType_Raygen, 0, kReferencePTRaygenShaderName); + gfxSbtSetShaderGroup(gfx_, reference_pt_sbt_, kGfxShaderGroupType_Miss, 0, kReferencePTMissShaderName); + gfxSbtSetShaderGroup( + gfx_, reference_pt_sbt_, kGfxShaderGroupType_Miss, 1, kReferencePTShadowMissShaderName); + for (uint32_t i = 0; + i < gfxAccelerationStructureGetRaytracingPrimitiveCount(gfx_, capsaicin.getAccelerationStructure()); + i++) + { + gfxSbtSetShaderGroup(gfx_, reference_pt_sbt_, kGfxShaderGroupType_Hit, + i * capsaicin.getSbtStrideInEntries(kGfxShaderGroupType_Hit) + 0, kReferencePTHitGroupName); + gfxSbtSetShaderGroup(gfx_, reference_pt_sbt_, kGfxShaderGroupType_Hit, + i * capsaicin.getSbtStrideInEntries(kGfxShaderGroupType_Hit) + 1, kReferencePTShadowHitGroupName); + } +} + +void ReferencePT::setupPTKernel([[maybe_unused]] CapsaicinInternal const &capsaicin, + [[maybe_unused]] std::vector &local_root_signature_associations, + [[maybe_unused]] std::vector &defines, std::vector &exports, + std::vector &subobjects) noexcept +{ + exports.push_back(kReferencePTRaygenShaderName); + exports.push_back(kReferencePTMissShaderName); + exports.push_back(kReferencePTShadowMissShaderName); + exports.push_back(kReferencePTAnyHitShaderName); + exports.push_back(kReferencePTShadowAnyHitShaderName); + exports.push_back(kReferencePTClosestHitShaderName); + + subobjects.push_back("MyShaderConfig"); + subobjects.push_back("MyPipelineConfig"); + subobjects.push_back(kReferencePTHitGroupName); + subobjects.push_back(kReferencePTShadowHitGroupName); +} + +char const *ReferencePT::getProgramName() noexcept +{ + return "render_techniques/reference_path_tracer/reference_path_tracer"; +} +} // namespace Capsaicin diff --git a/src/core/src/render_techniques/path_tracer/reference_pt.h b/src/core/src/render_techniques/reference_path_tracer/reference_path_tracer.h similarity index 59% rename from src/core/src/render_techniques/path_tracer/reference_pt.h rename to src/core/src/render_techniques/reference_path_tracer/reference_path_tracer.h index c8b96e7..ff8b61b 100644 --- a/src/core/src/render_techniques/path_tracer/reference_pt.h +++ b/src/core/src/render_techniques/reference_path_tracer/reference_path_tracer.h @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -21,9 +21,11 @@ THE SOFTWARE. ********************************************************************/ #pragma once -#include "reference_pt_shared.h" +#include "../../geometry/path_tracing_shared.h" #include "render_technique.h" +#include + namespace Capsaicin { class ReferencePT : public RenderTechnique @@ -40,7 +42,7 @@ class ReferencePT : public RenderTechnique struct RenderOptions { - uint32_t reference_pt_bounce_count = 3; /**< Maximum number of bounces each path can take */ + uint32_t reference_pt_bounce_count = 30; /**< Maximum number of bounces each path can take */ uint32_t reference_pt_min_rr_bounces = 2; /**< Number of bounces a path takes before Russian roulette can be used */ uint32_t reference_pt_sample_count = 1; /**< Number of paths to trace per pixel per frame */ @@ -48,17 +50,21 @@ class ReferencePT : public RenderTechnique false; /**< Sets material to fixed diffuse gray on first intersected surface */ bool reference_pt_disable_direct_lighting = false; /**< Disable sampling direct lighting on first intersection */ - bool reference_pt_disable_specular_lighting = - true; /**< Disable specular sampling/evaluation and therefore setting materials to diffuse only - */ + bool reference_pt_disable_specular_materials = + false; /**< Disable specular sampling/evaluation essentially setting materials to diffuse only */ + bool reference_pt_nee_only = false; /**< Disable light contributions from source other than NEE */ + bool reference_pt_disable_nee = false; /**< Disable light contributions from Next Event Estimation */ + bool reference_pt_nee_reservoir_resampling = + false; /**< Use reservoir resampling for selecting NEE light samples */ + bool reference_pt_use_dxr10 = false; /**< Use dxr 1.0 ray-tracing pipelines instead of inline rt */ }; /** - * Convert render settings to internal options format. - * @param settings Current render settings. + * Convert render options to internal options format. + * @param options Current render options. * @returns The options converted. */ - static RenderOptions convertOptions(RenderSettings const &settings) noexcept; + static RenderOptions convertOptions(RenderOptionList const &options) noexcept; /** * Gets a list of any shared components used by the current render technique. @@ -87,6 +93,17 @@ class ReferencePT : public RenderTechnique */ void render(CapsaicinInternal &capsaicin) noexcept override; + /** + * Destroy any used internal resources and shutdown. + */ + void terminate() noexcept override; + + /** + * Render GUI options. + * @param [in,out] capsaicin The current capsaicin context. + */ + void renderGUI(CapsaicinInternal &capsaicin) const noexcept override; + protected: /** * Initialise internal computer kernels. @@ -95,7 +112,6 @@ class ReferencePT : public RenderTechnique */ bool initKernels(CapsaicinInternal const &capsaicin) noexcept; - void terminate() noexcept; /** * Check if camera has changed. * @param currentCamera The current camera. @@ -103,15 +119,32 @@ class ReferencePT : public RenderTechnique */ bool checkCameraUpdated(GfxCamera const ¤tCamera) noexcept; - GfxBuffer rayCameraData; - GfxTexture accumulationBuffer; /**< Buffer used to store pixel running average, .w= number of samples */ - RayCamera cameraData; - uint2 bufferDimensions = uint2(0); - GfxCamera camera = {}; + /** + * Check if kernels needs to be recompiled. + * @param capsaicin The current capsaicin context. + * @param newOptions New render options. + * @return True if kernels needs to be recompiled, False otherwise. + */ + virtual bool needsRecompile(CapsaicinInternal &capsaicin, RenderOptions const &newOptions) noexcept; + + virtual void setupSbt(CapsaicinInternal &capsaicin) noexcept; + + virtual void setupPTKernel(CapsaicinInternal const &capsaicin, + std::vector &local_root_signature_associations, + std::vector &defines, std::vector &exports, + std::vector &subobjects) noexcept; + + virtual char const *getProgramName() noexcept; + + GfxBuffer rayCameraData; + GfxTexture accumulationBuffer; /**< Buffer used to store pixel running average, .w= number of samples */ + RayCamera cameraData; + uint2 bufferDimensions = uint2(0); + GfxCamera camera = {}; RenderOptions options; - GfxSamplerState textureSampler; - GfxProgram reference_pt_program_; - GfxKernel reference_pt_kernel_; + GfxProgram reference_pt_program_; + GfxKernel reference_pt_kernel_; + GfxSbt reference_pt_sbt_; }; } // namespace Capsaicin diff --git a/src/core/src/render_techniques/reference_path_tracer/reference_path_tracer.rt b/src/core/src/render_techniques/reference_path_tracer/reference_path_tracer.rt new file mode 100644 index 0000000..2b70a6b --- /dev/null +++ b/src/core/src/render_techniques/reference_path_tracer/reference_path_tracer.rt @@ -0,0 +1,113 @@ +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ + +#define USE_INLINE_RT 0 + +#include "reference_path_tracer.comp" + +TriangleHitGroup ReferencePTHitGroup = +{ + "ReferencePTAnyHit", // AnyHit + "ReferencePTClosestHit", // ClosestHit +}; + +TriangleHitGroup ReferencePTShadowHitGroup = +{ + "ReferencePTShadowAnyHit", // AnyHit + "", // ClosestHit +}; + +RaytracingShaderConfig MyShaderConfig = +{ +#ifdef ENABLE_NEE_RESERVOIR_SAMPLING + 100, // max payload size +#else + 88, // max payload size +#endif + 8 // max attribute size +}; + +RaytracingPipelineConfig MyPipelineConfig = +{ + 2 // max trace recursion depth +}; + +[shader("raygeneration")] +void ReferencePTRaygen() +{ + uint2 did = DispatchRaysIndex().xy; + pathTracer(did, g_BufferDimensions); +} + +[shader("anyhit")] +void ReferencePTAnyHit(inout PathData path, in BuiltInTriangleIntersectionAttributes attr) +{ + if (!AlphaTest(GetHitInfoRt(attr))) + { + IgnoreHit(); + } +} + +[shader("anyhit")] +void ReferencePTShadowAnyHit(inout ShadowRayPayload payload, in BuiltInTriangleIntersectionAttributes attr) +{ + if (!AlphaTest(GetHitInfoRt(attr))) + { + IgnoreHit(); + } +} + +[shader("closesthit")] +void ReferencePTClosestHit(inout PathData path, in BuiltInTriangleIntersectionAttributes attr) +{ + // Setup configurable constants + const uint minBounces = g_BounceRRCount; // Minimum bounces before early terminations are allowed + const uint maxBounces = g_BounceCount; + RayDesc ray = GetRayDescRt(); + HitInfo hitData = GetHitInfoRt(attr); + IntersectData iData = MakeIntersectData(hitData); + path.terminated = !pathHit(ray, hitData, iData, path.randomStratified, path.lightSampler, + path.bounce, minBounces, maxBounces, path.normal, path.samplePDF, path.throughput, +#ifdef ENABLE_NEE_RESERVOIR_SAMPLING + path.sampleReflectance, +#endif + path.radiance); + path.origin = ray.Origin; + path.direction = ray.Direction; +} + +[shader("miss")] +void ReferencePTMiss(inout PathData pathData) +{ + shadePathMiss(GetRayDescRt(), pathData.bounce, pathData.lightSampler, pathData.normal, pathData.samplePDF, pathData.throughput, +#ifdef ENABLE_NEE_RESERVOIR_SAMPLING + pathData.sampleReflectance, +#endif + pathData.radiance); + pathData.terminated = true; +} + +[shader("miss")] +void ReferencePTShadowMiss(inout ShadowRayPayload payload) +{ + payload.visible = true; +} diff --git a/src/core/src/render_techniques/render_technique.cpp b/src/core/src/render_techniques/render_technique.cpp index a430076..855b40d 100644 --- a/src/core/src/render_techniques/render_technique.cpp +++ b/src/core/src/render_techniques/render_technique.cpp @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -51,4 +51,9 @@ DebugViewList RenderTechnique::getDebugViews() const noexcept { return {}; } + +void RenderTechnique::renderGUI(CapsaicinInternal &capsaicin) const noexcept +{ + (void)&capsaicin; +} } // namespace Capsaicin diff --git a/src/core/src/render_techniques/render_technique.h b/src/core/src/render_techniques/render_technique.h index d300b61..76eeefa 100644 --- a/src/core/src/render_techniques/render_technique.h +++ b/src/core/src/render_techniques/render_technique.h @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -21,7 +21,6 @@ THE SOFTWARE. ********************************************************************/ #pragma once -#include "capsaicin.h" #include "capsaicin_internal_types.h" #include "components/component.h" @@ -89,6 +88,17 @@ class RenderTechnique : public Timeable */ virtual void render(CapsaicinInternal &capsaicin) noexcept = 0; + /** + * Destroy any used internal resources and shutdown. + */ + virtual void terminate() noexcept = 0; + + /** + * Render GUI options. + * @param [in,out] capsaicin The current capsaicin context. + */ + virtual void renderGUI(CapsaicinInternal &capsaicin) const noexcept; + protected: }; } // namespace Capsaicin diff --git a/src/core/src/render_techniques/skybox/skybox.cpp b/src/core/src/render_techniques/skybox/skybox.cpp index dab9afd..25b556c 100644 --- a/src/core/src/render_techniques/skybox/skybox.cpp +++ b/src/core/src/render_techniques/skybox/skybox.cpp @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -71,7 +71,7 @@ void Skybox::render(CapsaicinInternal &capsaicin) noexcept gfxCommandDraw(gfx_, 3); } -void Skybox::terminate() +void Skybox::terminate() noexcept { gfxDestroyProgram(gfx_, skybox_program_); gfxDestroyKernel(gfx_, skybox_kernel_); diff --git a/src/core/src/render_techniques/skybox/skybox.frag b/src/core/src/render_techniques/skybox/skybox.frag index 9633d06..4890c72 100644 --- a/src/core/src/render_techniques/skybox/skybox.frag +++ b/src/core/src/render_techniques/skybox/skybox.frag @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -28,15 +28,16 @@ TextureCube g_EnvironmentBuffer; SamplerState g_LinearSampler; +#include "../../math/transform.hlsl" + float4 main(in float4 pos : SV_Position) : SV_Target { float2 uv = pos.xy / g_BufferDimensions; float2 ndc = 2.0f * float2(uv.x, 1.0f - uv.y) - 1.0f; - float4 world = mul(g_ViewProjectionInverse, float4(ndc, 1.0f, 1.0f)); - world /= world.w; // perspective divide + float3 world = transformPointProjection(float3(ndc, 1.0f), g_ViewProjectionInverse); - float3 sky_sample = g_EnvironmentBuffer.Sample(g_LinearSampler, world.xyz - g_Eye).xyz; + float3 sky_sample = g_EnvironmentBuffer.Sample(g_LinearSampler, world - g_Eye).xyz; return float4(sky_sample, 1.0f); } diff --git a/src/core/src/render_techniques/skybox/skybox.h b/src/core/src/render_techniques/skybox/skybox.h index d44480e..9548fc0 100644 --- a/src/core/src/render_techniques/skybox/skybox.h +++ b/src/core/src/render_techniques/skybox/skybox.h @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -52,9 +52,12 @@ class Skybox : public RenderTechnique */ void render(CapsaicinInternal &capsaicin) noexcept override; -protected: - void terminate(); + /** + * Destroy any used internal resources and shutdown. + */ + void terminate() noexcept override; +protected: GfxProgram skybox_program_; GfxKernel skybox_kernel_; }; diff --git a/src/core/src/render_techniques/skybox/skybox.vert b/src/core/src/render_techniques/skybox/skybox.vert index 2a3338f..32413eb 100644 --- a/src/core/src/render_techniques/skybox/skybox.vert +++ b/src/core/src/render_techniques/skybox/skybox.vert @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/src/core/src/render_techniques/ssgi/ssgi.comp b/src/core/src/render_techniques/ssgi/ssgi.comp index 5d58329..57981f8 100644 --- a/src/core/src/render_techniques/ssgi/ssgi.comp +++ b/src/core/src/render_techniques/ssgi/ssgi.comp @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -27,7 +27,7 @@ float4 g_InvDeviceZ; // ConstantBuffer g_SSGIConstants; Texture2D g_DepthBuffer; -Texture2D g_NormalBuffer; +Texture2D g_ShadingNormalBuffer; Texture2D g_LightingBuffer; RWTexture2D g_OcclusionAndBentNormalBuffer; RWTexture2D g_NearFieldGlobalIlluminationBuffer; @@ -138,7 +138,7 @@ void Main(int2 did : SV_DispatchThreadID) } float2 uv = (did + 0.5f) / (g_SSGIConstants.buffer_dimensions); - float3 normal = g_NormalBuffer.SampleLevel(g_PointSampler, uv, 0); + float3 normal = g_ShadingNormalBuffer.SampleLevel(g_PointSampler, uv, 0); if (dot(normal, normal) == 0.0f) { g_OcclusionAndBentNormalBuffer[did] = float4(0.f, 0.f, 0.f, 0.f); @@ -208,7 +208,7 @@ void Main(int2 did : SV_DispatchThreadID) // From h3r2tic's demo // https://github.com/h3r2tic/rtoy-samples/blob/main/assets/shaders/ssgi/ssgi.glsl float3 sample_lighting = g_LightingBuffer.SampleLevel(g_PointSampler, sample_uv, 0); - float3 sample_world_normal = normalize(2.0f * g_NormalBuffer.SampleLevel(g_PointSampler, sample_uv, 0) - 1.f); + float3 sample_world_normal = normalize(2.0f * g_ShadingNormalBuffer.SampleLevel(g_PointSampler, sample_uv, 0) - 1.f); if (step_index > 0) { diff --git a/src/core/src/render_techniques/ssgi/ssgi.cpp b/src/core/src/render_techniques/ssgi/ssgi.cpp index 29e2908..37577c6 100644 --- a/src/core/src/render_techniques/ssgi/ssgi.cpp +++ b/src/core/src/render_techniques/ssgi/ssgi.cpp @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -34,9 +34,7 @@ SSGI::SSGI() SSGI::~SSGI() { - destroyStaticResources(); - destroyBuffers(); - destroyKernels(); + terminate(); } RenderOptionList SSGI::getRenderOptions() noexcept @@ -50,14 +48,14 @@ RenderOptionList SSGI::getRenderOptions() noexcept return newOptions; } -SSGI::RenderOptions SSGI::convertOptions(RenderSettings const &settings) noexcept +SSGI::RenderOptions SSGI::convertOptions(RenderOptionList const &options) noexcept { RenderOptions newOptions; - RENDER_OPTION_GET(ssgi_slice_count_, newOptions, settings.options_) - RENDER_OPTION_GET(ssgi_step_count_, newOptions, settings.options_) - RENDER_OPTION_GET(ssgi_view_radius_, newOptions, settings.options_) - RENDER_OPTION_GET(ssgi_falloff_range_, newOptions, settings.options_) - RENDER_OPTION_GET(ssgi_unroll_kernel_, newOptions, settings.options_) + RENDER_OPTION_GET(ssgi_slice_count_, newOptions, options) + RENDER_OPTION_GET(ssgi_step_count_, newOptions, options) + RENDER_OPTION_GET(ssgi_view_radius_, newOptions, options) + RENDER_OPTION_GET(ssgi_falloff_range_, newOptions, options) + RENDER_OPTION_GET(ssgi_unroll_kernel_, newOptions, options) return newOptions; } @@ -77,7 +75,7 @@ AOVList SSGI::getAOVs() const noexcept aovs.push_back({"NearFieldGlobalIllumination", AOV::Write, AOV::None, DXGI_FORMAT_R16G16B16A16_FLOAT}); aovs.push_back({"VisibilityDepth"}); - aovs.push_back({"Normal"}); + aovs.push_back({"ShadingNormal"}); aovs.push_back({"PrevCombinedIllumination"}); return aovs; } @@ -101,10 +99,9 @@ bool SSGI::init(CapsaicinInternal const &capsaicin) noexcept void SSGI::render(CapsaicinInternal &capsaicin) noexcept { // BE CAREFUL: Used for rendering current frame and initializing next frame - auto const &render_settings = capsaicin.getRenderSettings(); - auto const options = convertOptions(render_settings); - auto blue_noise_sampler = capsaicin.getComponent(); - auto stratified_sampler = capsaicin.getComponent(); + auto const options = convertOptions(capsaicin.getOptions()); + auto blue_noise_sampler = capsaicin.getComponent(); + auto stratified_sampler = capsaicin.getComponent(); options_ = options; @@ -154,8 +151,9 @@ void SSGI::render(CapsaicinInternal &capsaicin) noexcept stratified_sampler->addProgramParameters(capsaicin, ssgi_program_); gfxProgramSetParameter(gfx_, ssgi_program_, "g_SSGIConstants", ssgi_constant_buffer); - gfxProgramSetParameter(gfx_, ssgi_program_, "g_DepthBuffer", capsaicin.getAOVBuffer("Depth")); - gfxProgramSetParameter(gfx_, ssgi_program_, "g_NormalBuffer", capsaicin.getAOVBuffer("Details")); + gfxProgramSetParameter(gfx_, ssgi_program_, "g_DepthBuffer", capsaicin.getAOVBuffer("VisibilityDepth")); + gfxProgramSetParameter( + gfx_, ssgi_program_, "g_ShadingNormalBuffer", capsaicin.getAOVBuffer("ShadingNormal")); gfxProgramSetParameter( gfx_, ssgi_program_, "g_LightingBuffer", capsaicin.getAOVBuffer("PrevCombinedIllumination")); gfxProgramSetParameter(gfx_, ssgi_program_, "g_OcclusionAndBentNormalBuffer", @@ -178,7 +176,7 @@ void SSGI::render(CapsaicinInternal &capsaicin) noexcept } // Debug modes - if (render_settings.debug_view_ == "Occlusion") + if (capsaicin.getCurrentDebugView() == "Occlusion") { GfxCommandEvent const command_event(gfx_, "Debug Occlusion"); @@ -196,7 +194,7 @@ void SSGI::render(CapsaicinInternal &capsaicin) noexcept gfxCommandBindKernel(gfx_, debug_occlusion_kernel_); gfxCommandDispatch(gfx_, num_groups_x, num_groups_y, 1); } - else if (render_settings.debug_view_ == "BentNormal") + else if (capsaicin.getCurrentDebugView() == "BentNormal") { GfxCommandEvent const command_event(gfx_, "Debug Bent Normal"); @@ -219,7 +217,14 @@ void SSGI::render(CapsaicinInternal &capsaicin) noexcept gfxDestroyBuffer(gfx_, ssgi_constant_buffer); } -void SSGI::initializeStaticResources(CapsaicinInternal const &capsaicin) +void SSGI::terminate() noexcept +{ + destroyStaticResources(); + destroyBuffers(); + destroyKernels(); +} + +void SSGI::initializeStaticResources([[maybe_unused]] CapsaicinInternal const &capsaicin) { point_sampler_ = gfxCreateSamplerState(gfx_, D3D12_FILTER_MIN_MAG_MIP_POINT, D3D12_TEXTURE_ADDRESS_MODE_CLAMP, D3D12_TEXTURE_ADDRESS_MODE_CLAMP); @@ -271,9 +276,7 @@ void SSGI::destroyStaticResources() gfxDestroySamplerState(gfx_, point_sampler_); } -void SSGI::destroyBuffers() -{ -} +void SSGI::destroyBuffers() {} void SSGI::destroyKernels() { diff --git a/src/core/src/render_techniques/ssgi/ssgi.h b/src/core/src/render_techniques/ssgi/ssgi.h index 413dd89..6816b01 100644 --- a/src/core/src/render_techniques/ssgi/ssgi.h +++ b/src/core/src/render_techniques/ssgi/ssgi.h @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -47,11 +47,11 @@ class SSGI : public RenderTechnique }; /** - * Convert render settings to internal options format. - * @param settings Current render settings. + * Convert render options to internal options format. + * @param options Current render options. * @returns The options converted. */ - static RenderOptions convertOptions(RenderSettings const &settings) noexcept; + static RenderOptions convertOptions(RenderOptionList const &options) noexcept; /** * Gets a list of any shared components used by the current render technique. @@ -86,6 +86,11 @@ class SSGI : public RenderTechnique */ void render(CapsaicinInternal &capsaicin) noexcept override; + /** + * Destroy any used internal resources and shutdown. + */ + void terminate() noexcept override; + protected: void initializeStaticResources(CapsaicinInternal const &capsaicin); void initializeBuffers(CapsaicinInternal const &capsaicin); diff --git a/src/core/src/render_techniques/ssgi/ssgi_debug.comp b/src/core/src/render_techniques/ssgi/ssgi_debug.comp index 68fae30..504d35c 100644 --- a/src/core/src/render_techniques/ssgi/ssgi_debug.comp +++ b/src/core/src/render_techniques/ssgi/ssgi_debug.comp @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/src/core/src/render_techniques/ssgi/ssgi_shared.h b/src/core/src/render_techniques/ssgi/ssgi_shared.h index 6c3ce34..a76630b 100644 --- a/src/core/src/render_techniques/ssgi/ssgi_shared.h +++ b/src/core/src/render_techniques/ssgi/ssgi_shared.h @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/src/core/src/render_techniques/switcher/switcher.h b/src/core/src/render_techniques/switcher/switcher.h index 690860b..f48c92a 100644 --- a/src/core/src/render_techniques/switcher/switcher.h +++ b/src/core/src/render_techniques/switcher/switcher.h @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -57,7 +57,7 @@ class Switcher : public RenderTechnique {} /** Defaulted destructor */ - ~Switcher() = default; + ~Switcher() { terminate(); } /* * Gets configuration options for current technique. @@ -72,11 +72,11 @@ class Switcher : public RenderTechnique } /** - * Convert render settings to internal options format. - * @param settings Current render settings. + * Convert render options to internal options format. + * @param options Current render options. * @returns The options converted. */ - static RenderOptions convertOptions(RenderSettings const &settings) noexcept + static RenderOptions convertOptions(RenderOptionList const &options) noexcept { RenderOptions newOptions; newOptions.mixer_use_second_technique = *std::get_if( @@ -137,6 +137,7 @@ class Switcher : public RenderTechnique */ bool init(CapsaicinInternal const &capsaicin) noexcept override { + options = convertOptions(capsaicin.getOptions()); if (!options.mixer_use_second_technique) { return technique1.init(capsaicin); @@ -153,7 +154,21 @@ class Switcher : public RenderTechnique */ void render(CapsaicinInternal &capsaicin) noexcept override { - options = convertOptions(capsaicin.getRenderSettings()); + auto const optionsNew = convertOptions(capsaicin.getOptions()); + if (optionsNew.light_sampler_type != options.light_sampler_type) + { + if (!options.mixer_use_second_technique) + { + technique1.terminate(); + technique2.init(capsaicin); + } + else + { + technique2.terminate(); + technique1.init(capsaicin); + } + } + options = optionsNew; if (!options.mixer_use_second_technique) { technique1.render(capsaicin); @@ -164,6 +179,21 @@ class Switcher : public RenderTechnique } } + /** + * Destroy any used internal resources and shutdown. + */ + void terminate() noexcept override + { + if (!options.mixer_use_second_technique) + { + return technique1.terminate(); + } + else + { + return technique2.terminate(); + } + } + /** * Gets number of timestamp queries. * @returns The timestamp query count. diff --git a/src/core/src/render_techniques/taa/taa.comp b/src/core/src/render_techniques/taa/taa.comp index c2cf014..67ef6bf 100644 --- a/src/core/src/render_techniques/taa/taa.comp +++ b/src/core/src/render_techniques/taa/taa.comp @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -25,7 +25,7 @@ THE SOFTWARE. #define RADIUS 1 -#define GROUP_SIZE 16 +#define GROUP_SIZE 8 #define TILE_DIM (2 * RADIUS + GROUP_SIZE) @@ -38,8 +38,12 @@ Texture2D g_VelocityBuffer; RWTexture2D g_ColorBuffer; RWTexture2D g_OutputBuffer; +#ifdef HAS_DIRECT_LIGHTING_BUFFER Texture2D g_DirectLightingBuffer; +#endif +#ifdef HAS_GLOBAL_ILLUMINATION_BUFFER Texture2D g_GlobalIlluminationBuffer; +#endif SamplerState g_LinearSampler; SamplerState g_NearestSampler; @@ -189,15 +193,24 @@ void ResolveTemporal(in uint2 globalID : SV_DispatchThreadID, in uint2 localID : float2 uv3 = (coord3 + 0.5f) * texel_size; float2 uv4 = (coord4 + 0.5f) * texel_size; - float3 color0 = g_DirectLightingBuffer.SampleLevel(g_NearestSampler, uv1, 0.0f).xyz; - float3 color1 = g_DirectLightingBuffer.SampleLevel(g_NearestSampler, uv2, 0.0f).xyz; - float3 color2 = g_DirectLightingBuffer.SampleLevel(g_NearestSampler, uv3, 0.0f).xyz; - float3 color3 = g_DirectLightingBuffer.SampleLevel(g_NearestSampler, uv4, 0.0f).xyz; + float3 color0 = 0.0f.xxx; + float3 color1 = 0.0f.xxx; + float3 color2 = 0.0f.xxx; + float3 color3 = 0.0f.xxx; +#ifdef HAS_DIRECT_LIGHTING_BUFFER + color0 += g_DirectLightingBuffer.SampleLevel(g_NearestSampler, uv1, 0.0f).xyz; + color1 += g_DirectLightingBuffer.SampleLevel(g_NearestSampler, uv2, 0.0f).xyz; + color2 += g_DirectLightingBuffer.SampleLevel(g_NearestSampler, uv3, 0.0f).xyz; + color3 += g_DirectLightingBuffer.SampleLevel(g_NearestSampler, uv4, 0.0f).xyz; +#endif + +#ifdef HAS_GLOBAL_ILLUMINATION_BUFFER color0 += g_GlobalIlluminationBuffer.SampleLevel(g_NearestSampler, uv1, 0.0f).xyz; color1 += g_GlobalIlluminationBuffer.SampleLevel(g_NearestSampler, uv2, 0.0f).xyz; color2 += g_GlobalIlluminationBuffer.SampleLevel(g_NearestSampler, uv3, 0.0f).xyz; color3 += g_GlobalIlluminationBuffer.SampleLevel(g_NearestSampler, uv4, 0.0f).xyz; +#endif Tile[localIndex] = saturate(color0); Tile[localIndex + TILE_DIM * TILE_DIM / 4] = saturate(color1); @@ -253,10 +266,15 @@ void ResolveTemporal(in uint2 globalID : SV_DispatchThreadID, in uint2 localID : [numthreads(8, 8, 1)] void ResolvePassthru(in uint2 did : SV_DispatchThreadID) { - float3 color = g_DirectLightingBuffer.Load(int3(did, 0)).xyz; - color += g_GlobalIlluminationBuffer.Load(int3(did, 0)).xyz; - - g_ColorBuffer[did] = float4(color, 1.0f); + float3 colour = 0.0f.xxx; +#ifdef HAS_DIRECT_LIGHTING_BUFFER + colour += g_DirectLightingBuffer.Load(int3(did, 0)).xyz; +#endif +#ifdef HAS_GLOBAL_ILLUMINATION_BUFFER + colour += g_GlobalIlluminationBuffer.Load(int3(did, 0)).xyz; +#endif + + g_ColorBuffer[did] = float4(colour, 1.0f); } [numthreads(8, 8, 1)] diff --git a/src/core/src/render_techniques/taa/taa.cpp b/src/core/src/render_techniques/taa/taa.cpp index 7b6efdb..ad5eb96 100644 --- a/src/core/src/render_techniques/taa/taa.cpp +++ b/src/core/src/render_techniques/taa/taa.cpp @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -41,10 +41,10 @@ RenderOptionList TAA::getRenderOptions() noexcept return newOptions; } -TAA::RenderOptions TAA::convertOptions(RenderSettings const &settings) noexcept +TAA::RenderOptions TAA::convertOptions(RenderOptionList const &options) noexcept { RenderOptions newOptions; - RENDER_OPTION_GET(taa_enable, newOptions, settings.options_) + RENDER_OPTION_GET(taa_enable, newOptions, options) return newOptions; } @@ -55,17 +55,28 @@ AOVList TAA::getAOVs() const noexcept aovs.push_back({"VisibilityDepth"}); aovs.push_back({"Velocity"}); - aovs.push_back({"DirectLighting"}); - aovs.push_back({"GlobalIllumination"}); + aovs.push_back({.name = "DirectLighting", .flags = AOV::Optional}); + aovs.push_back({.name = "GlobalIllumination", .flags = AOV::Optional}); return aovs; } bool TAA::init(CapsaicinInternal const &capsaicin) noexcept { + std::vector defines; + if (capsaicin.hasAOVBuffer("DirectLighting")) + { + defines.push_back("HAS_DIRECT_LIGHTING_BUFFER"); + } + if (capsaicin.hasAOVBuffer("GlobalIllumination")) + { + defines.push_back("HAS_GLOBAL_ILLUMINATION_BUFFER"); + } taa_program_ = gfxCreateProgram(gfx_, "render_techniques/taa/taa", capsaicin.getShaderPath()); - resolve_temporal_kernel_ = gfxCreateComputeKernel(gfx_, taa_program_, "ResolveTemporal"); - resolve_passthru_kernel_ = gfxCreateComputeKernel(gfx_, taa_program_, "ResolvePassthru"); - update_history_kernel_ = gfxCreateComputeKernel(gfx_, taa_program_, "UpdateHistory"); + resolve_temporal_kernel_ = gfxCreateComputeKernel( + gfx_, taa_program_, "ResolveTemporal", defines.data(), (uint32_t)defines.size()); + resolve_passthru_kernel_ = gfxCreateComputeKernel( + gfx_, taa_program_, "ResolvePassthru", defines.data(), (uint32_t)defines.size()); + update_history_kernel_ = gfxCreateComputeKernel(gfx_, taa_program_, "UpdateHistory"); return !!taa_program_; } @@ -78,7 +89,7 @@ void TAA::render(CapsaicinInternal &capsaicin) noexcept bool not_cleared_history = true; if (buffer_width != color_buffers_->getWidth() || buffer_height != color_buffers_->getHeight()) { - for (GfxTexture color_buffer : color_buffers_) + for (GfxTexture &color_buffer : color_buffers_) gfxDestroyTexture(gfx_, color_buffer); for (uint32_t i = 0; i < ARRAYSIZE(color_buffers_); ++i) @@ -99,7 +110,7 @@ void TAA::render(CapsaicinInternal &capsaicin) noexcept // Bind the shader parameters uint32_t const buffer_dimensions[] = {buffer_width, buffer_height}; - options = convertOptions(capsaicin.getRenderSettings()); + options = convertOptions(capsaicin.getOptions()); gfxProgramSetParameter( gfx_, taa_program_, "g_HaveHistory", not_cleared_history && capsaicin.getFrameIndex() > 0); @@ -109,10 +120,16 @@ void TAA::render(CapsaicinInternal &capsaicin) noexcept gfxProgramSetParameter(gfx_, taa_program_, "g_VelocityBuffer", capsaicin.getAOVBuffer("Velocity")); gfxProgramSetParameter(gfx_, taa_program_, "g_ColorBuffer", capsaicin.getAOVBuffer("Color")); - gfxProgramSetParameter( - gfx_, taa_program_, "g_DirectLightingBuffer", capsaicin.getAOVBuffer("DirectLighting")); - gfxProgramSetParameter( - gfx_, taa_program_, "g_GlobalIlluminationBuffer", capsaicin.getAOVBuffer("GlobalIllumination")); + if (capsaicin.hasAOVBuffer("DirectLighting")) + { + gfxProgramSetParameter( + gfx_, taa_program_, "g_DirectLightingBuffer", capsaicin.getAOVBuffer("DirectLighting")); + } + if (capsaicin.hasAOVBuffer("GlobalIllumination")) + { + gfxProgramSetParameter( + gfx_, taa_program_, "g_GlobalIlluminationBuffer", capsaicin.getAOVBuffer("GlobalIllumination")); + } gfxProgramSetParameter(gfx_, taa_program_, "g_LinearSampler", capsaicin.getLinearSampler()); gfxProgramSetParameter(gfx_, taa_program_, "g_NearestSampler", capsaicin.getNearestSampler()); @@ -161,9 +178,9 @@ void TAA::render(CapsaicinInternal &capsaicin) noexcept } } -void TAA::terminate() +void TAA::terminate() noexcept { - for (GfxTexture color_buffer : color_buffers_) + for (GfxTexture &color_buffer : color_buffers_) gfxDestroyTexture(gfx_, color_buffer); gfxDestroyProgram(gfx_, taa_program_); @@ -173,4 +190,9 @@ void TAA::terminate() memset(color_buffers_, 0, sizeof(color_buffers_)); } + +void TAA::renderGUI(CapsaicinInternal &capsaicin) const noexcept +{ + ImGui::Checkbox("Use TAA", &capsaicin.getOption("taa_enable")); +} } // namespace Capsaicin diff --git a/src/core/src/render_techniques/taa/taa.h b/src/core/src/render_techniques/taa/taa.h index a8b54ed..96f0ff1 100644 --- a/src/core/src/render_techniques/taa/taa.h +++ b/src/core/src/render_techniques/taa/taa.h @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -43,11 +43,11 @@ class TAA : public RenderTechnique }; /** - * Convert render settings to internal options format. - * @param settings Current render settings. + * Convert render options to internal options format. + * @param options Current render options. * @returns The options converted. */ - static RenderOptions convertOptions(RenderSettings const &settings) noexcept; + static RenderOptions convertOptions(RenderOptionList const &options) noexcept; /** * Gets the required list of AOVs needed for the current render technique. @@ -70,9 +70,18 @@ class TAA : public RenderTechnique */ void render(CapsaicinInternal &capsaicin) noexcept override; -protected: - void terminate(); + /** + * Destroy any used internal resources and shutdown. + */ + void terminate() noexcept override; + + /** + * Render GUI options. + * @param [in,out] capsaicin The current capsaicin context. + */ + void renderGUI(CapsaicinInternal &capsaicin) const noexcept override; +protected: RenderOptions options; GfxTexture color_buffers_[2]; diff --git a/src/core/src/render_techniques/taa/update_history.comp b/src/core/src/render_techniques/taa/update_history.comp index 2e850ad..6105e62 100644 --- a/src/core/src/render_techniques/taa/update_history.comp +++ b/src/core/src/render_techniques/taa/update_history.comp @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -21,16 +21,25 @@ THE SOFTWARE. ********************************************************************/ #include "../../gpu_shared.h" +#ifdef HAS_DIRECT_LIGHTING_BUFFER Texture2D g_DirectLightingBuffer; +#endif +#ifdef HAS_GLOBAL_ILLUMINATION_BUFFER Texture2D g_GlobalIlluminationBuffer; +#endif RWTexture2D g_PrevCombinedIlluminationBuffer; [numthreads(8, 8, 1)] void UpdateHistory(in uint2 did : SV_DispatchThreadID) { - float3 direct_lighting = g_DirectLightingBuffer.Load(int3(did, 0)).xyz; - float3 global_illumination = g_GlobalIlluminationBuffer.Load(int3(did, 0)).xyz; + float3 colour = 0.0f.xxx; +#ifdef HAS_DIRECT_LIGHTING_BUFFER + colour += g_DirectLightingBuffer.Load(int3(did, 0)).xyz; +#endif +#ifdef HAS_GLOBAL_ILLUMINATION_BUFFER + colour += g_GlobalIlluminationBuffer.Load(int3(did, 0)).xyz; +#endif - g_PrevCombinedIlluminationBuffer[did] = float4(direct_lighting + global_illumination, 1.0f); + g_PrevCombinedIlluminationBuffer[did] = float4(colour, 1.0f); } diff --git a/src/core/src/render_techniques/taa/update_history.cpp b/src/core/src/render_techniques/taa/update_history.cpp index f2d2b66..6e9ee31 100644 --- a/src/core/src/render_techniques/taa/update_history.cpp +++ b/src/core/src/render_techniques/taa/update_history.cpp @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -38,25 +38,41 @@ AOVList UpdateHistory::getAOVs() const noexcept { AOVList aovs; aovs.push_back({"PrevCombinedIllumination", AOV::Write, AOV::Accumulate, DXGI_FORMAT_R16G16B16A16_FLOAT}); - aovs.push_back({"DirectLighting"}); - aovs.push_back({"GlobalIllumination"}); + aovs.push_back({.name = "DirectLighting", .flags = AOV::Optional}); + aovs.push_back({.name = "GlobalIllumination", .flags = AOV::Optional}); return aovs; } bool UpdateHistory::init(CapsaicinInternal const &capsaicin) noexcept { + std::vector defines; + if (capsaicin.hasAOVBuffer("DirectLighting")) + { + defines.push_back("HAS_DIRECT_LIGHTING_BUFFER"); + } + if (capsaicin.hasAOVBuffer("GlobalIllumination")) + { + defines.push_back("HAS_GLOBAL_ILLUMINATION_BUFFER"); + } update_history_program_ = gfxCreateProgram(gfx_, "render_techniques/taa/update_history", capsaicin.getShaderPath()); - update_history_kernel_ = gfxCreateComputeKernel(gfx_, update_history_program_, "UpdateHistory"); + update_history_kernel_ = gfxCreateComputeKernel( + gfx_, update_history_program_, "UpdateHistory", defines.data(), (uint32_t)defines.size()); return !!update_history_program_; } void UpdateHistory::render(CapsaicinInternal &capsaicin) noexcept { - gfxProgramSetParameter( - gfx_, update_history_program_, "g_DirectLightingBuffer", capsaicin.getAOVBuffer("DirectLighting")); - gfxProgramSetParameter(gfx_, update_history_program_, "g_GlobalIlluminationBuffer", - capsaicin.getAOVBuffer("GlobalIllumination")); + if (capsaicin.hasAOVBuffer("DirectLighting")) + { + gfxProgramSetParameter(gfx_, update_history_program_, "g_DirectLightingBuffer", + capsaicin.getAOVBuffer("DirectLighting")); + } + if (capsaicin.hasAOVBuffer("GlobalIllumination")) + { + gfxProgramSetParameter(gfx_, update_history_program_, "g_GlobalIlluminationBuffer", + capsaicin.getAOVBuffer("GlobalIllumination")); + } gfxProgramSetParameter(gfx_, update_history_program_, "g_PrevCombinedIlluminationBuffer", capsaicin.getAOVBuffer("PrevCombinedIllumination")); @@ -69,7 +85,7 @@ void UpdateHistory::render(CapsaicinInternal &capsaicin) noexcept gfxCommandDispatch(gfx_, num_groups_x, num_groups_y, 1); } -void UpdateHistory::terminate() +void UpdateHistory::terminate() noexcept { gfxDestroyProgram(gfx_, update_history_program_); gfxDestroyKernel(gfx_, update_history_kernel_); diff --git a/src/core/src/render_techniques/taa/update_history.h b/src/core/src/render_techniques/taa/update_history.h index 6c1638b..91278c8 100644 --- a/src/core/src/render_techniques/taa/update_history.h +++ b/src/core/src/render_techniques/taa/update_history.h @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -52,9 +52,12 @@ class UpdateHistory : public RenderTechnique */ void render(CapsaicinInternal &capsaicin) noexcept override; -protected: - void terminate(); + /** + * Destroy any used internal resources and shutdown. + */ + void terminate() noexcept override; +protected: GfxProgram update_history_program_; GfxKernel update_history_kernel_; }; diff --git a/src/core/src/render_techniques/tone_mapping/tone_mapping.comp b/src/core/src/render_techniques/tone_mapping/tone_mapping.comp index 4e8ddcc..90ac8f2 100644 --- a/src/core/src/render_techniques/tone_mapping/tone_mapping.comp +++ b/src/core/src/render_techniques/tone_mapping/tone_mapping.comp @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -22,11 +22,14 @@ THE SOFTWARE. #include "../../math/color.hlsl" uint2 g_BufferDimensions; +uint g_FrameIndex; float g_Exposure; RWTexture2D g_InputBuffer; RWTexture2D g_OutputBuffer; +#include "../../components/blue_noise_sampler/blue_noise_sampler.hlsl" + float3 EvalLogContrastFunc(in float3 color, in float epsilon, in float logMidpoint, in float contrast) { float3 logColor = log2(color + epsilon); @@ -44,6 +47,14 @@ float3 tonemapSimple(in float3 color) return lerp((peak / (peak + 1.0f)) * ratio, color, blend_amount); } +float3 ditherColor(in uint2 pixel, in float3 color) +{ + float v = BlueNoise_Sample1D(pixel, g_FrameIndex); + float o = 2.0f * v - 1.0f; // to (-1, 1) range + v = max(o / sqrt(abs(o)), -1.0f); + return color + v / 255.0f; +} + [numthreads(8, 8, 1)] void Tonemap(in uint2 did : SV_DispatchThreadID) { @@ -54,6 +65,7 @@ void Tonemap(in uint2 did : SV_DispatchThreadID) color = saturate(tonemapSimple(color)); color = EvalLogContrastFunc(1.2f * color, 1e-5f, 0.18f, 1.2f); color = convertToSRGB(color); + color = ditherColor(did, color); g_OutputBuffer[did] = float4(color, 1.0f); } diff --git a/src/core/src/render_techniques/tone_mapping/tone_mapping.cpp b/src/core/src/render_techniques/tone_mapping/tone_mapping.cpp index 8568999..4b19130 100644 --- a/src/core/src/render_techniques/tone_mapping/tone_mapping.cpp +++ b/src/core/src/render_techniques/tone_mapping/tone_mapping.cpp @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -22,6 +22,7 @@ THE SOFTWARE. #include "tone_mapping.h" #include "capsaicin_internal.h" +#include "../../components/blue_noise_sampler/blue_noise_sampler.h" namespace Capsaicin { @@ -42,14 +43,21 @@ RenderOptionList ToneMapping::getRenderOptions() noexcept return newOptions; } -ToneMapping::RenderOptions ToneMapping::convertOptions(RenderSettings const &settings) noexcept +ToneMapping::RenderOptions ToneMapping::convertOptions(RenderOptionList const &options) noexcept { RenderOptions newOptions; - RENDER_OPTION_GET(tonemap_enable, newOptions, settings.options_) - RENDER_OPTION_GET(tonemap_exposure, newOptions, settings.options_) + RENDER_OPTION_GET(tonemap_enable, newOptions, options) + RENDER_OPTION_GET(tonemap_exposure, newOptions, options) return newOptions; } +ComponentList ToneMapping::getComponents() const noexcept +{ + ComponentList components; + components.emplace_back(COMPONENT_MAKE(BlueNoiseSampler)); + return components; +} + AOVList ToneMapping::getAOVs() const noexcept { AOVList aovs; @@ -68,23 +76,31 @@ bool ToneMapping::init(CapsaicinInternal const &capsaicin) noexcept void ToneMapping::render(CapsaicinInternal &capsaicin) noexcept { - auto const &settings = capsaicin.getRenderSettings(); - options = convertOptions(capsaicin.getRenderSettings()); + options = convertOptions(capsaicin.getOptions()); if (!options.tonemap_enable) return; - uint32_t const buffer_dimensions[] = {capsaicin.getWidth(), capsaicin.getHeight()}; + uint32_t const buffer_dimensions[] = + { + capsaicin.getWidth(), + capsaicin.getHeight() + }; + gfxProgramSetParameter(gfx_, tone_mapping_program_, "g_BufferDimensions", buffer_dimensions); + gfxProgramSetParameter(gfx_, tone_mapping_program_, "g_FrameIndex", capsaicin.getFrameIndex()); gfxProgramSetParameter(gfx_, tone_mapping_program_, "g_Exposure", options.tonemap_exposure); - GfxTexture input = capsaicin.getAOVBuffer("Color"); - GfxTexture output = input; - if (!settings.debug_view_.empty() && settings.debug_view_ != "None") + + GfxTexture input = capsaicin.getAOVBuffer("Color"); + GfxTexture output = input; + auto debug_view = capsaicin.getCurrentDebugView(); + + if (!debug_view.empty() && debug_view != "None") { // Tone map the debug buffer if we are using a debug view - if (capsaicin.checkDebugViewAOV(settings.debug_view_)) + if (capsaicin.checkDebugViewAOV(debug_view)) { // If the debug view is actually an AOV then only tonemap if its a floating point format - auto const debugAOV = capsaicin.getAOVBuffer(settings.debug_view_); + auto const debugAOV = capsaicin.getAOVBuffer(debug_view); auto const format = debugAOV.getFormat(); if (format == DXGI_FORMAT_R32G32B32A32_FLOAT || format == DXGI_FORMAT_R32G32B32_FLOAT || format == DXGI_FORMAT_R16G16B16A16_FLOAT || format == DXGI_FORMAT_R11G11B10_FLOAT) @@ -99,6 +115,10 @@ void ToneMapping::render(CapsaicinInternal &capsaicin) noexcept output = input; } } + + auto blue_noise_sampler = capsaicin.getComponent(); + blue_noise_sampler->addProgramParameters(capsaicin, tone_mapping_program_); + gfxProgramSetParameter(gfx_, tone_mapping_program_, "g_InputBuffer", input); gfxProgramSetParameter(gfx_, tone_mapping_program_, "g_OutputBuffer", output); @@ -110,9 +130,18 @@ void ToneMapping::render(CapsaicinInternal &capsaicin) noexcept gfxCommandDispatch(gfx_, num_groups_x, num_groups_y, 1); } -void ToneMapping::terminate() +void ToneMapping::terminate() noexcept { gfxDestroyKernel(gfx_, tone_mapping_kernel_); gfxDestroyProgram(gfx_, tone_mapping_program_); } + +void ToneMapping::renderGUI(CapsaicinInternal &capsaicin) const noexcept +{ + bool &enabled = capsaicin.getOption("tonemap_enable"); + if (!enabled) ImGui::BeginDisabled(true); + ImGui::DragFloat("Exposure", &capsaicin.getOption("tonemap_exposure"), 5e-3f); + if (!enabled) ImGui::EndDisabled(); + ImGui::Checkbox("Enable Tone Mapping", &enabled); +} } // namespace Capsaicin diff --git a/src/core/src/render_techniques/tone_mapping/tone_mapping.h b/src/core/src/render_techniques/tone_mapping/tone_mapping.h index 8c93ab7..93b55ea 100644 --- a/src/core/src/render_techniques/tone_mapping/tone_mapping.h +++ b/src/core/src/render_techniques/tone_mapping/tone_mapping.h @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -44,11 +44,17 @@ class ToneMapping : public RenderTechnique }; /** - * Convert render settings to internal options format. - * @param settings Current render settings. + * Convert render options to internal options format. + * @param options Current render options. * @returns The options converted. */ - static RenderOptions convertOptions(RenderSettings const &settings) noexcept; + static RenderOptions convertOptions(RenderOptionList const &options) noexcept; + + /** + * Gets a list of any shared components used by the current render technique. + * @return A list of all supported components. + */ + ComponentList getComponents() const noexcept override; /** * Gets the required list of AOVs needed for the current render technique. @@ -71,9 +77,18 @@ class ToneMapping : public RenderTechnique */ void render(CapsaicinInternal &capsaicin) noexcept override; -private: - void terminate(); + /** + * Destroy any used internal resources and shutdown. + */ + void terminate() noexcept override; + /** + * Render GUI options. + * @param [in,out] capsaicin The current capsaicin context. + */ + void renderGUI(CapsaicinInternal &capsaicin) const noexcept override; + +private: RenderOptions options; GfxKernel tone_mapping_kernel_; diff --git a/src/core/src/render_techniques/variance_estimate/variance_estimate.comp b/src/core/src/render_techniques/variance_estimate/variance_estimate.comp new file mode 100644 index 0000000..6d83a9b --- /dev/null +++ b/src/core/src/render_techniques/variance_estimate/variance_estimate.comp @@ -0,0 +1,122 @@ +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ +#define GROUP_SIZE 16 + +#include "../../math/math.hlsl" +#include "../../math/color.hlsl" + +uint2 g_BufferDimensions; + +Texture2D g_ColorBuffer; + +RWStructuredBuffer g_MeanBuffer; +RWStructuredBuffer g_SquareBuffer; +RWStructuredBuffer g_ResultBuffer; + +groupshared float lds_BlockBuffer[GROUP_SIZE * GROUP_SIZE]; +groupshared uint lds_writes; + +float BlockReduceSum(in float value, in uint local_id, in uint group_size) +{ + value = WaveActiveSum(value); + + // Combine values across the group + const uint groupSize = GROUP_SIZE * GROUP_SIZE; + for (uint j = WaveGetLaneCount(); j < groupSize; j *= WaveGetLaneCount()) + { + // Since we work on square tiles its possible that some waves don't write to lds as they have no valid pixels + // To ensure that invalid values arent read from lds we use an atomic to count the actual lds writes + if (local_id == 0) + { + // Clear atomic + InterlockedAnd(lds_writes, 0); + } + GroupMemoryBarrierWithGroupSync(); + + // Use local data share to combine across waves + if (WaveIsFirstLane()) + { + uint waveID; + InterlockedAdd(lds_writes, 1, waveID); + lds_BlockBuffer[waveID] = value; + + } + GroupMemoryBarrierWithGroupSync(); + + uint numWaves = lds_writes; + if (local_id >= numWaves) + { + break; + } + + // Use the current wave to combine across group + value = lds_BlockBuffer[local_id]; + value = WaveActiveSum(value); + } + + return value; +} + +[numthreads(GROUP_SIZE, GROUP_SIZE, 1)] +void ComputeMean(in uint2 did : SV_DispatchThreadID, in uint local_id : SV_GroupIndex, in uint2 group_id : SV_GroupID) +{ + float3 color = (all(did < g_BufferDimensions) ? g_ColorBuffer[did].xyz : (float3)0); + float luma = (any(isnan(color)) ? 0.0f : luminance(color)); + + luma = BlockReduceSum(luma, local_id, GROUP_SIZE * GROUP_SIZE); + + if (local_id == 0) + { + uint block_count = (g_BufferDimensions.x + GROUP_SIZE - 1) / GROUP_SIZE; + uint block_index = group_id.x + group_id.y * block_count; + + g_MeanBuffer[block_index] = luma; + } +} + +[numthreads(GROUP_SIZE, GROUP_SIZE, 1)] +void ComputeDistance(in uint2 did : SV_DispatchThreadID, in uint local_id : SV_GroupIndex, in uint2 group_id : SV_GroupID) +{ + uint pixel_count = g_BufferDimensions.x * g_BufferDimensions.y; + float3 color = (all(did < g_BufferDimensions) ? g_ColorBuffer[did].xyz : (float3)0); + float dist2 = squared((any(isnan(color)) ? 0.0f : luminance(color)) - g_MeanBuffer[0] / pixel_count); + + dist2 = BlockReduceSum(dist2, local_id, GROUP_SIZE * GROUP_SIZE); + + if (local_id == 0) + { + uint block_count = (g_BufferDimensions.x + GROUP_SIZE - 1) / GROUP_SIZE; + uint block_index = group_id.x + group_id.y * block_count; + + g_SquareBuffer[block_index] = dist2; + } +} + +[numthreads(1, 1, 1)] +void ComputeDeviation() +{ + uint pixel_count = g_BufferDimensions.x * g_BufferDimensions.y; + float std_dev = sqrt(g_SquareBuffer[0] / pixel_count); + float mean = g_MeanBuffer[0] / pixel_count; + + g_ResultBuffer[0] = (mean != 0.0f ? std_dev / mean : 0.0f); // coefficient of variation +} diff --git a/src/core/src/render_techniques/variance_estimate/variance_estimate.cpp b/src/core/src/render_techniques/variance_estimate/variance_estimate.cpp new file mode 100644 index 0000000..0d08ea2 --- /dev/null +++ b/src/core/src/render_techniques/variance_estimate/variance_estimate.cpp @@ -0,0 +1,183 @@ +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ +#include "variance_estimate.h" + +#include "capsaicin_internal.h" + +namespace Capsaicin +{ +VarianceEstimate::VarianceEstimate() + : RenderTechnique("Variance Estimate") + , cv_(0.0f) + , readback_buffer_index_(0) +{} + +VarianceEstimate::~VarianceEstimate() +{ + terminate(); +} + +AOVList VarianceEstimate::getAOVs() const noexcept +{ + AOVList aovs; + aovs.push_back({"Color", AOV::Read}); + return aovs; +} + +bool VarianceEstimate::init(CapsaicinInternal const &capsaicin) noexcept +{ + result_buffer_ = gfxCreateBuffer(gfx_, 1); + result_buffer_.setName("Capsaicin_ResultBuffer"); + + for (uint32_t i = 0; i < ARRAYSIZE(readback_buffers_); ++i) + { + char buffer[64]; + GFX_SNPRINTF(buffer, sizeof(buffer), "Capsaicin_ReadbackBuffer%u", i); + + readback_buffers_[i] = gfxCreateBuffer(gfx_, 1, nullptr, kGfxCpuAccess_Read); + readback_buffers_[i].setName(buffer); + } + + variance_estimate_program_ = gfxCreateProgram( + gfx_, "render_techniques/variance_estimate/variance_estimate", capsaicin.getShaderPath()); + compute_mean_kernel_ = gfxCreateComputeKernel(gfx_, variance_estimate_program_, "ComputeMean"); + compute_distance_kernel_ = gfxCreateComputeKernel(gfx_, variance_estimate_program_, "ComputeDistance"); + compute_deviation_kernel_ = gfxCreateComputeKernel(gfx_, variance_estimate_program_, "ComputeDeviation"); + + return !!variance_estimate_program_; +} + +void VarianceEstimate::render(CapsaicinInternal &capsaicin) noexcept +{ + uint32_t const buffer_dimensions[] = {capsaicin.getWidth(), capsaicin.getHeight()}; + + uint32_t const pixel_count = buffer_dimensions[0] * buffer_dimensions[1]; + uint32_t const *num_threads = gfxKernelGetNumThreads(gfx_, compute_mean_kernel_); + uint32_t const num_groups_x = (buffer_dimensions[0] + num_threads[0] - 1) / num_threads[0]; + uint32_t const num_groups_y = (buffer_dimensions[1] + num_threads[1] - 1) / num_threads[1]; + uint32_t const elem_count = + (pixel_count + num_threads[0] * num_threads[1] - 1) / (num_threads[0] * num_threads[1]); + + GFX_ASSERT(num_threads[0] == gfxKernelGetNumThreads(gfx_, compute_distance_kernel_)[0] + && num_threads[1] == gfxKernelGetNumThreads(gfx_, compute_distance_kernel_)[1] + && num_threads[2] == gfxKernelGetNumThreads(gfx_, compute_distance_kernel_)[2]); + + if (mean_buffer_.getCount() != elem_count) + { + gfxDestroyBuffer(gfx_, mean_buffer_); + + mean_buffer_ = gfxCreateBuffer(gfx_, elem_count); + mean_buffer_.setName("Capsaicin_MeanBuffer"); + } + + if (square_buffer_.getCount() != elem_count) + { + gfxDestroyBuffer(gfx_, square_buffer_); + + square_buffer_ = gfxCreateBuffer(gfx_, elem_count); + square_buffer_.setName("Capsaicin_SquareBuffer"); + } + + gfxProgramSetParameter(gfx_, variance_estimate_program_, "g_BufferDimensions", buffer_dimensions); + + gfxProgramSetParameter( + gfx_, variance_estimate_program_, "g_ColorBuffer", capsaicin.getAOVBuffer("Color")); + + gfxProgramSetParameter(gfx_, variance_estimate_program_, "g_MeanBuffer", mean_buffer_); + gfxProgramSetParameter(gfx_, variance_estimate_program_, "g_SquareBuffer", square_buffer_); + gfxProgramSetParameter(gfx_, variance_estimate_program_, "g_ResultBuffer", result_buffer_); + + // Compute the mean + { + TimedSection const timed_section(*this, "ComputeMean"); + + gfxCommandBindKernel(gfx_, compute_mean_kernel_); + gfxCommandDispatch(gfx_, num_groups_x, num_groups_y, 1); + gfxCommandReduceSum(gfx_, kGfxDataType_Float, mean_buffer_, mean_buffer_); + } + + // Compute the squared distance to the mean + { + TimedSection const timed_section(*this, "ComputeDistance"); + + gfxCommandBindKernel(gfx_, compute_distance_kernel_); + gfxCommandDispatch(gfx_, num_groups_x, num_groups_y, 1); + gfxCommandReduceSum(gfx_, kGfxDataType_Float, square_buffer_, square_buffer_); + } + + // Compute the standard deviation + { + TimedSection const timed_section(*this, "ComputeDeviation"); + + gfxCommandBindKernel(gfx_, compute_deviation_kernel_); + gfxCommandDispatch(gfx_, 1, 1, 1); + } + + // Stream the result back to the CPU + { + GfxBuffer const &readback_buffer = + readback_buffers_[readback_buffer_index_ % ARRAYSIZE(readback_buffers_)]; + + if (readback_buffer_index_ >= ARRAYSIZE(readback_buffers_)) + { + cv_ = *gfxBufferGetData(gfx_, readback_buffer); + } + + gfxCommandCopyBuffer(gfx_, readback_buffer, result_buffer_); + + if (++readback_buffer_index_ >= 2 * ARRAYSIZE(readback_buffers_)) + { + readback_buffer_index_ -= ARRAYSIZE(readback_buffers_); + } + } +} + +void VarianceEstimate::terminate() noexcept +{ + gfxDestroyBuffer(gfx_, mean_buffer_); + mean_buffer_ = {}; + gfxDestroyBuffer(gfx_, square_buffer_); + square_buffer_ = {}; + gfxDestroyBuffer(gfx_, result_buffer_); + result_buffer_ = {}; + + for (GfxBuffer &readback_buffer : readback_buffers_) + { + gfxDestroyBuffer(gfx_, readback_buffer); + readback_buffer = {}; + } + + gfxDestroyProgram(gfx_, variance_estimate_program_); + variance_estimate_program_ = {}; + gfxDestroyKernel(gfx_, compute_mean_kernel_); + compute_mean_kernel_ = {}; + gfxDestroyKernel(gfx_, compute_distance_kernel_); + compute_distance_kernel_ = {}; + gfxDestroyKernel(gfx_, compute_deviation_kernel_); + compute_deviation_kernel_ = {}; +} + +void VarianceEstimate::renderGUI([[maybe_unused]] CapsaicinInternal &capsaicin) const noexcept +{ + ImGui::Text("Noise amount : %f", cv_); +} +} // namespace Capsaicin diff --git a/src/core/src/render_techniques/variance_estimate/variance_estimate.h b/src/core/src/render_techniques/variance_estimate/variance_estimate.h new file mode 100644 index 0000000..6cd6895 --- /dev/null +++ b/src/core/src/render_techniques/variance_estimate/variance_estimate.h @@ -0,0 +1,81 @@ +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ +#pragma once + +#include "render_technique.h" + +namespace Capsaicin +{ +class VarianceEstimate : public RenderTechnique +{ +public: + VarianceEstimate(); + ~VarianceEstimate(); + + /** + * Gets the required list of AOVs needed for the current render technique. + * @return A list of all required AOV buffers. + */ + AOVList getAOVs() const noexcept override; + + /** + * Initialise any internal data or state. + * @note This is automatically called by the framework after construction and should be used to create + * any required CPU|GPU resources. + * @param capsaicin Current framework context. + * @return True if initialisation succeeded, False otherwise. + */ + bool init(CapsaicinInternal const &capsaicin) noexcept override; + + /** + * Perform render operations. + * @param [in,out] capsaicin The current capsaicin context. + */ + void render(CapsaicinInternal &capsaicin) noexcept override; + + /** + * Destroy any used internal resources and shutdown. + */ + void terminate() noexcept override; + + /** + * Render GUI options. + * @param [in,out] capsaicin The current capsaicin context. + */ + void renderGUI(CapsaicinInternal &capsaicin) const noexcept override; + +protected: + float cv_; // + + GfxBuffer mean_buffer_; // + GfxBuffer square_buffer_; // + GfxBuffer result_buffer_; // + + GfxBuffer readback_buffers_[kGfxConstant_BackBufferCount]; // + uint32_t readback_buffer_index_; // + + GfxProgram variance_estimate_program_; // + GfxKernel compute_mean_kernel_; // + GfxKernel compute_distance_kernel_; // + GfxKernel compute_deviation_kernel_; // +}; +} // namespace Capsaicin diff --git a/src/core/src/render_techniques/visibility_buffer/debug_dxr10.rt b/src/core/src/render_techniques/visibility_buffer/debug_dxr10.rt new file mode 100644 index 0000000..1d5a9df --- /dev/null +++ b/src/core/src/render_techniques/visibility_buffer/debug_dxr10.rt @@ -0,0 +1,109 @@ +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ + +float3 g_Eye; +float4x4 g_ViewProjectionInverse; + +struct MyConstantBuffer +{ + float4 color; +}; + +RaytracingAccelerationStructure g_Scene : register(t0); +RWTexture2D g_RenderTarget : register(u0); +ConstantBuffer g_MyCB : register(b0, space1); + + +TriangleHitGroup MyHitGroup = +{ + "", // AnyHit + "MyClosestHitShader", // ClosestHit +}; + +TriangleHitGroup MyHitGroup2 = +{ + "", // AnyHit + "MyClosestHitShader2", // ClosestHit +}; + +RaytracingShaderConfig MyShaderConfig = +{ + 16, // max payload size + 8 // max attribute size +}; + +RaytracingPipelineConfig MyPipelineConfig = +{ + 1 // max trace recursion depth +}; + +struct CubeConstantBuffer +{ + float4 color; +}; + +struct RayPayload +{ + float4 color; +}; + +[shader("raygeneration")] +void MyRaygenShader() +{ + float2 xy = DispatchRaysIndex().xy + 0.5f; + float2 screen_pos = xy / DispatchRaysDimensions().xy * 2.0 - 1.0; + screen_pos.y = -screen_pos.y; + + // Unproject the pixel coordinate into a ray. + float4 world = mul(g_ViewProjectionInverse, float4(screen_pos, 0, 1)); + world.xyz /= world.w; + + RayDesc ray; + ray.Origin = g_Eye; + ray.Direction = normalize(world.xyz - g_Eye); + ray.TMin = 0.001; + ray.TMax = 10000.0; + RayPayload payload = { float4(0, 0, 0, 0) }; + TraceRay(g_Scene, RAY_FLAG_CULL_BACK_FACING_TRIANGLES, ~0, 0, 1, 0, ray, payload); + + g_RenderTarget[DispatchRaysIndex().xy] = payload.color; +} + +[shader("closesthit")] +void MyClosestHitShader(inout RayPayload payload, in BuiltInTriangleIntersectionAttributes attr) +{ + payload.color = g_MyCB.color; +} + + +[shader("closesthit")] +void MyClosestHitShader2(inout RayPayload payload, in BuiltInTriangleIntersectionAttributes attr) +{ + payload.color = float4(1, 0, 0, 0); +} + +[shader("miss")] +void MyMissShader(inout RayPayload payload) +{ + float4 background = float4(0.0f, 0.2f, 0.4f, 1.0f); + payload.color = background; +} diff --git a/src/core/src/render_techniques/visibility_buffer/debug_material.frag b/src/core/src/render_techniques/visibility_buffer/debug_material.frag new file mode 100644 index 0000000..0f75e65 --- /dev/null +++ b/src/core/src/render_techniques/visibility_buffer/debug_material.frag @@ -0,0 +1,76 @@ +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ + +#include "../../gpu_shared.h" + +Texture2D g_VisibilityBuffer; +Texture2D g_DepthBuffer; + +StructuredBuffer g_IndexBuffer; +StructuredBuffer g_VertexBuffer; +StructuredBuffer g_MeshBuffer; +StructuredBuffer g_InstanceBuffer; +StructuredBuffer g_MaterialBuffer; + +Texture2D g_TextureMaps[] : register(space99); +SamplerState g_TextureSampler; + +uint g_MaterialMode; + +#include "../../materials/material_evaluation.hlsl" +#include "../../geometry/geometry.hlsl" +#include "../../geometry/mesh.hlsl" + +float4 DebugMaterial(in float4 pos : SV_Position) : SV_Target +{ + uint2 did = uint2(pos.xy); + + if (g_DepthBuffer[did].x >= 1.0f) + { + return float4(0.0f, 0.0f, 0.0f, 1.0f); + } + + float4 visibility = g_VisibilityBuffer.Load(int3(did, 0)); + uint instanceID = asuint(visibility.z); + uint primitiveID = asuint(visibility.w); + + Instance instance = g_InstanceBuffer[instanceID]; + Mesh mesh = g_MeshBuffer[instance.mesh_index]; + + // Get UV values from buffers + UVs uvs = fetchUVs(mesh, primitiveID); + float2 mesh_uv = interpolate(uvs.uv0, uvs.uv1, uvs.uv2, visibility.xy); + + Material material = g_MaterialBuffer[instance.material_index]; + MaterialEvaluated materialEvaluated = MakeMaterialEvaluated(material, mesh_uv); + switch (g_MaterialMode) + { + case 0: + return float4(materialEvaluated.albedo, 1.0f); + case 1: + return float4(materialEvaluated.metallicity.xxx, 1.0f); + case 2: + return float4(materialEvaluated.roughness.xxx, 1.0f); + default: + return float4(0.0f, 0.0f, 0.0f, 1.0f); + } +} diff --git a/src/core/src/render_techniques/visibility_buffer/debug_material.vert b/src/core/src/render_techniques/visibility_buffer/debug_material.vert new file mode 100644 index 0000000..954cf62 --- /dev/null +++ b/src/core/src/render_techniques/visibility_buffer/debug_material.vert @@ -0,0 +1,26 @@ +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ + +float4 DebugMaterial(in uint idx : SV_VertexID) : SV_POSITION +{ + return 1.0f - float4(4.0f * (idx & 1), 4.0f * (idx >> 1), 1.0f, 0.0f); +} diff --git a/src/core/src/render_techniques/visibility_buffer/debug_velocity.frag b/src/core/src/render_techniques/visibility_buffer/debug_velocity.frag new file mode 100644 index 0000000..7eb6280 --- /dev/null +++ b/src/core/src/render_techniques/visibility_buffer/debug_velocity.frag @@ -0,0 +1,32 @@ +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ + +Texture2D VelocityBuffer; + +float4 main(in float4 pos : SV_Position) : SV_Target +{ + float2 velocity = VelocityBuffer.Load(int3(pos.xy, 0)).xy; + float3 velocity_vec = float3((velocity.y >= 0.0f ? velocity.y : 0.0f) + (velocity.y < 0.0f ? -velocity.y : 0.0f), + (velocity.x < 0.0f ? -velocity.x : 0.0f) + (velocity.y < 0.0f ? -velocity.y : 0.0f), + (velocity.x >= 0.0f ? velocity.x : 0.0f)); + return float4(pow(velocity_vec, 0.4f), 1.0f); +} diff --git a/src/core/src/render_techniques/path_tracer/reference_pt_shared.h b/src/core/src/render_techniques/visibility_buffer/debug_velocity.vert similarity index 65% rename from src/core/src/render_techniques/path_tracer/reference_pt_shared.h rename to src/core/src/render_techniques/visibility_buffer/debug_velocity.vert index 5ff9f56..2757814 100644 --- a/src/core/src/render_techniques/path_tracer/reference_pt_shared.h +++ b/src/core/src/render_techniques/visibility_buffer/debug_velocity.vert @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -20,18 +20,17 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ********************************************************************/ -#ifndef REFERENCE_PT_SHARED_H -#define REFERENCE_PT_SHARED_H - -#include "../../gpu_shared.h" - -struct RayCamera +struct VS_OUTPUT { - float3 origin; /**< The ray starting position */ - float3 directionTL; /**< The direction to the top left of the virtual screen */ - float3 directionX; /**< The virtual screens horizontal direction (length of 1 pixel - left->right)*/ - float3 directionY; /**< The virtual screens vertical direction (length of 1 pixel - top->bottom)*/ - float2 range; /**< The rays near and far distances */ + float4 pos : SV_POSITION; + float2 texcoord : TEXCOORD; }; -#endif // REFERENCE_PT_SHARED_H +VS_OUTPUT main(in uint idx : SV_VertexID) +{ + VS_OUTPUT output; + output.texcoord = float2(1.0f - 2.0f * (idx & 1), 2.0f * (idx >> 1)); + + output.pos = 1.0f - float4(4.0f * (idx & 1), 4.0f * (idx >> 1), 1.0f, 0.0f); + return output; +} diff --git a/src/core/src/render_techniques/visibility_buffer/disocclusion_mask.comp b/src/core/src/render_techniques/visibility_buffer/disocclusion_mask.comp index cb6ee6e..8281227 100644 --- a/src/core/src/render_techniques/visibility_buffer/disocclusion_mask.comp +++ b/src/core/src/render_techniques/visibility_buffer/disocclusion_mask.comp @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -19,7 +19,9 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ********************************************************************/ + #include "../../gpu_shared.h" +#include "../../math/transform.hlsl" float3 g_Eye; float2 g_NearFar; @@ -28,7 +30,7 @@ float4x4 g_Reprojection; float4x4 g_ViewProjectionInverse; Texture2D g_DepthBuffer; -Texture2D g_NormalBuffer; +Texture2D g_GeometryNormalBuffer; Texture2D g_VelocityBuffer; Texture2D g_PreviousDepthBuffer; @@ -75,13 +77,11 @@ void main(in uint2 did : SV_DispatchThreadID) if (depth < 1.0f && all(previous_uv > 0.0f) && all(previous_uv < 1.0f)) { - float4 clip_space = mul(g_Reprojection, float4(2.0f * float2(uv.x, 1.0f - uv.y) - 1.0f, depth, 1.0f)); - clip_space = clip_space / clip_space.w; // perspective divide - float3 normal = normalize(2.0f * g_NormalBuffer.SampleLevel(g_NearestSampler, uv, 0.0f).xyz - 1.0f); + float3 clip_space = transformPointProjection(float3(2.0f * float2(uv.x, 1.0f - uv.y) - 1.0f, depth), g_Reprojection); + float3 normal = normalize(2.0f * g_GeometryNormalBuffer.SampleLevel(g_NearestSampler, uv, 0.0f).xyz - 1.0f); - float4 world_pos = mul(g_ViewProjectionInverse, float4(2.0f * float2(uv.x, 1.0f - uv.y) - 1.0f, depth, 1.0f)); - world_pos = world_pos / world_pos.w; // perspective divide - float3 view_dir = normalize(g_Eye - world_pos.xyz); + float3 world_pos = transformPointProjection(float3(2.0f * float2(uv.x, 1.0f - uv.y) - 1.0f, depth), g_ViewProjectionInverse); + float3 view_dir = normalize(g_Eye - world_pos); float z_alignment = pow(1.0f - max(dot(view_dir, normal), 0.0f), 6.0f); depth = GetLinearDepth(clip_space.z); // get linear depth diff --git a/src/core/src/render_techniques/visibility_buffer/visibility_buffer.cpp b/src/core/src/render_techniques/visibility_buffer/visibility_buffer.cpp index 4dc9be4..69e3e07 100644 --- a/src/core/src/render_techniques/visibility_buffer/visibility_buffer.cpp +++ b/src/core/src/render_techniques/visibility_buffer/visibility_buffer.cpp @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -19,9 +19,13 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ********************************************************************/ + #include "visibility_buffer.h" +#include "../../geometry/path_tracing_shared.h" #include "capsaicin_internal.h" +#include "components/blue_noise_sampler/blue_noise_sampler.h" +#include "thread_pool.h" namespace Capsaicin { @@ -34,16 +38,45 @@ VisibilityBuffer::~VisibilityBuffer() terminate(); } +RenderOptionList VisibilityBuffer::getRenderOptions() noexcept +{ + RenderOptionList newOptions; + newOptions.emplace(RENDER_OPTION_MAKE(visibility_buffer_use_rt, options)); + newOptions.emplace(RENDER_OPTION_MAKE(visibility_buffer_use_rt_dxr10, options)); + return newOptions; +} + +VisibilityBuffer::RenderOptions VisibilityBuffer::convertOptions(RenderOptionList const &options) noexcept +{ + RenderOptions newOptions; + RENDER_OPTION_GET(visibility_buffer_use_rt, newOptions, options) + RENDER_OPTION_GET(visibility_buffer_use_rt_dxr10, newOptions, options) + return newOptions; +} + +ComponentList VisibilityBuffer::getComponents() const noexcept +{ + ComponentList components; + components.emplace_back(COMPONENT_MAKE(BlueNoiseSampler)); + return components; +} + AOVList VisibilityBuffer::getAOVs() const noexcept { AOVList aovs; - aovs.push_back({"VisibilityDepth", AOV::Write, AOV::Clear, DXGI_FORMAT_D32_FLOAT, "PrevVisibilityDepth"}); - aovs.push_back({"Normal", AOV::Write, AOV::Clear, DXGI_FORMAT_R8G8B8A8_UNORM}); - aovs.push_back({"Details", AOV::Write, AOV::Clear, DXGI_FORMAT_R8G8B8A8_UNORM}); - aovs.push_back({"Velocity", AOV::Write, AOV::Clear, DXGI_FORMAT_R16G16_FLOAT}); + aovs.push_back({"Debug", AOV::Write}); aovs.push_back({"Visibility", AOV::Write, AOV::Clear, DXGI_FORMAT_R32G32B32A32_FLOAT}); - aovs.push_back({"DisocclusionMask", AOV::Write, AOV::None, DXGI_FORMAT_R8_UNORM}); aovs.push_back({"Depth", AOV::ReadWrite}); + aovs.push_back({"VisibilityDepth", AOV::Write, AOV::Clear, DXGI_FORMAT_R32_FLOAT, "PrevVisibilityDepth"}); + aovs.push_back({"GeometryNormal", AOV::Write, AOV::Clear, DXGI_FORMAT_R8G8B8A8_UNORM}); + aovs.push_back({"Velocity", AOV::Write, AOV::Clear, DXGI_FORMAT_R16G16_FLOAT}); + aovs.push_back( + {"ShadingNormal", AOV::Write, AOV::Flags(AOV::Clear | AOV::Optional), DXGI_FORMAT_R8G8B8A8_UNORM}); + aovs.push_back( + {"VertexNormal", AOV::Write, AOV::Flags(AOV::Clear | AOV::Optional), DXGI_FORMAT_R8G8B8A8_UNORM}); + aovs.push_back({"Roughness", AOV::Write, AOV::Flags(AOV::Clear | AOV::Optional), DXGI_FORMAT_R16_FLOAT}); + aovs.push_back( + {"DisocclusionMask", AOV::Write, AOV::Flags(AOV::None | AOV::Optional), DXGI_FORMAT_R8_UNORM}); return aovs; } @@ -51,161 +84,473 @@ DebugViewList VisibilityBuffer::getDebugViews() const noexcept { DebugViewList views; views.emplace_back("Velocity"); + views.emplace_back("DXR1.0"); + views.emplace_back("MaterialAlbedo"); + views.emplace_back("MaterialMetallicity"); + views.emplace_back("MaterialRoughness"); return views; } bool VisibilityBuffer::init(CapsaicinInternal const &capsaicin) noexcept { - disocclusion_mask_program_ = gfxCreateProgram( - gfx_, "render_techniques/visibility_buffer/disocclusion_mask", capsaicin.getShaderPath()); - disocclusion_mask_kernel_ = gfxCreateComputeKernel(gfx_, disocclusion_mask_program_); - - gfxProgramSetParameter( - gfx_, disocclusion_mask_program_, "g_DepthBuffer", capsaicin.getAOVBuffer("VisibilityDepth")); - gfxProgramSetParameter( - gfx_, disocclusion_mask_program_, "g_NormalBuffer", capsaicin.getAOVBuffer("Normal")); - gfxProgramSetParameter( - gfx_, disocclusion_mask_program_, "g_VelocityBuffer", capsaicin.getAOVBuffer("Velocity")); - gfxProgramSetParameter(gfx_, disocclusion_mask_program_, "g_PreviousDepthBuffer", - capsaicin.getAOVBuffer("PrevVisibilityDepth")); - - gfxProgramSetParameter( - gfx_, disocclusion_mask_program_, "g_DisocclusionMask", capsaicin.getAOVBuffer("DisocclusionMask")); - - gfxProgramSetParameter( - gfx_, disocclusion_mask_program_, "g_NearestSampler", capsaicin.getNearestSampler()); - - GfxDrawState visibility_buffer_draw_state = {}; - gfxDrawStateSetCullMode(visibility_buffer_draw_state, D3D12_CULL_MODE_NONE); - - gfxDrawStateSetColorTarget(visibility_buffer_draw_state, 0, capsaicin.getAOVBuffer("Visibility")); - gfxDrawStateSetColorTarget(visibility_buffer_draw_state, 1, capsaicin.getAOVBuffer("Normal")); - gfxDrawStateSetColorTarget(visibility_buffer_draw_state, 2, capsaicin.getAOVBuffer("Details")); - gfxDrawStateSetColorTarget(visibility_buffer_draw_state, 3, capsaicin.getAOVBuffer("Velocity")); - gfxDrawStateSetDepthStencilTarget(visibility_buffer_draw_state, capsaicin.getAOVBuffer("Depth")); - - visibility_buffer_program_ = gfxCreateProgram( - gfx_, "render_techniques/visibility_buffer/visibility_buffer", capsaicin.getShaderPath()); - visibility_buffer_kernel_ = - gfxCreateGraphicsKernel(gfx_, visibility_buffer_program_, visibility_buffer_draw_state); + if (capsaicin.hasAOVBuffer("DisocclusionMask")) + { + // Initialise disocclusion program + disocclusion_mask_program_ = gfxCreateProgram( + gfx_, "render_techniques/visibility_buffer/disocclusion_mask", capsaicin.getShaderPath()); + disocclusion_mask_kernel_ = gfxCreateComputeKernel(gfx_, disocclusion_mask_program_); - return !!visibility_buffer_program_; + gfxProgramSetParameter( + gfx_, disocclusion_mask_program_, "g_DepthBuffer", capsaicin.getAOVBuffer("VisibilityDepth")); + gfxProgramSetParameter(gfx_, disocclusion_mask_program_, "g_GeometryNormalBuffer", + capsaicin.getAOVBuffer("GeometryNormal")); + gfxProgramSetParameter( + gfx_, disocclusion_mask_program_, "g_VelocityBuffer", capsaicin.getAOVBuffer("Velocity")); + gfxProgramSetParameter(gfx_, disocclusion_mask_program_, "g_PreviousDepthBuffer", + capsaicin.getAOVBuffer("PrevVisibilityDepth")); + + gfxProgramSetParameter(gfx_, disocclusion_mask_program_, "g_DisocclusionMask", + capsaicin.getAOVBuffer("DisocclusionMask")); + + gfxProgramSetParameter( + gfx_, disocclusion_mask_program_, "g_NearestSampler", capsaicin.getNearestSampler()); + } + + return initKernel(capsaicin); } void VisibilityBuffer::render(CapsaicinInternal &capsaicin) noexcept { - uint32_t const instance_count = gfxSceneGetObjectCount(capsaicin.getScene()); - GfxBuffer draw_command_buffer = - capsaicin.allocateConstantBuffer(instance_count); - D3D12_DRAW_INDEXED_ARGUMENTS *draw_commands = - (D3D12_DRAW_INDEXED_ARGUMENTS *)gfxBufferGetData(gfx_, draw_command_buffer); + // Check for option change + RenderOptions newOptions = convertOptions(capsaicin.getOptions()); + bool recompile = options.visibility_buffer_use_rt != newOptions.visibility_buffer_use_rt + || (options.visibility_buffer_use_rt + && options.visibility_buffer_use_rt_dxr10 != newOptions.visibility_buffer_use_rt_dxr10); + + options = newOptions; + if (recompile) + { + gfxDestroyProgram(gfx_, visibility_buffer_program_); + gfxDestroyKernel(gfx_, visibility_buffer_kernel_); + gfxDestroySbt(gfx_, visibility_buffer_sbt_); + visibility_buffer_sbt_ = {}; + + initKernel(capsaicin); + } + + auto blue_noise_sampler = capsaicin.getComponent(); + auto const &camera = capsaicin.getCameraMatrices( + capsaicin.hasOption("taa_enable") && capsaicin.getOption("taa_enable")); + + if (!options.visibility_buffer_use_rt) + { + // Render using raster pass + uint32_t const instance_count = gfxSceneGetObjectCount(capsaicin.getScene()); + GfxBuffer draw_command_buffer = + capsaicin.allocateConstantBuffer(instance_count); + D3D12_DRAW_INDEXED_ARGUMENTS *draw_commands = + (D3D12_DRAW_INDEXED_ARGUMENTS *)gfxBufferGetData(gfx_, draw_command_buffer); + + for (uint32_t i = 0; i < instance_count; ++i) + { + uint32_t const instance_index = capsaicin.getInstanceIdData()[i]; + Instance const &instance = capsaicin.getInstanceData()[instance_index]; + Mesh const &mesh = capsaicin.getMeshData()[instance.mesh_index]; + + draw_commands[i].IndexCountPerInstance = mesh.index_count; + draw_commands[i].InstanceCount = 1; + draw_commands[i].StartIndexLocation = mesh.index_offset_idx; + draw_commands[i].BaseVertexLocation = mesh.vertex_offset_idx; + draw_commands[i].StartInstanceLocation = i; // <- drawID + } + + gfxProgramSetParameter(gfx_, visibility_buffer_program_, "g_Eye", capsaicin.getCamera().eye); + gfxProgramSetParameter(gfx_, visibility_buffer_program_, "g_FrameIndex", capsaicin.getFrameIndex()); + gfxProgramSetParameter(gfx_, visibility_buffer_program_, "g_ViewProjection", camera.view_projection); + gfxProgramSetParameter( + gfx_, visibility_buffer_program_, "g_PrevViewProjection", camera.view_projection_prev); + + gfxProgramSetParameter(gfx_, visibility_buffer_program_, "g_MeshBuffer", capsaicin.getMeshBuffer()); + gfxProgramSetParameter( + gfx_, visibility_buffer_program_, "g_MaterialBuffer", capsaicin.getMaterialBuffer()); + gfxProgramSetParameter( + gfx_, visibility_buffer_program_, "g_InstanceBuffer", capsaicin.getInstanceBuffer()); + gfxProgramSetParameter( + gfx_, visibility_buffer_program_, "g_TransformBuffer", capsaicin.getTransformBuffer()); + gfxProgramSetParameter( + gfx_, visibility_buffer_program_, "g_InstanceIDBuffer", capsaicin.getInstanceIdBuffer()); + gfxProgramSetParameter( + gfx_, visibility_buffer_program_, "g_PrevTransformBuffer", capsaicin.getPrevTransformBuffer()); + + blue_noise_sampler->addProgramParameters(capsaicin, visibility_buffer_program_); + + gfxProgramSetParameter(gfx_, visibility_buffer_program_, "g_TextureMaps", capsaicin.getTextures(), + capsaicin.getTextureCount()); + gfxProgramSetParameter( + gfx_, visibility_buffer_program_, "g_TextureSampler", capsaicin.getAnisotropicSampler()); + + gfxCommandBindKernel(gfx_, visibility_buffer_kernel_); + gfxCommandMultiDrawIndexedIndirect(gfx_, draw_command_buffer, instance_count); + + gfxDestroyBuffer(gfx_, draw_command_buffer); + gfxCommandCopyTexture( + gfx_, capsaicin.getAOVBuffer("VisibilityDepth"), capsaicin.getAOVBuffer("Depth")); + } + else + { + // Render using ray tracing pass + auto &cam = capsaicin.getCamera(); + auto cameraData = + caclulateRayCamera({cam.eye, cam.center, cam.up, cam.aspect, cam.fovY, cam.nearZ, cam.farZ}, + capsaicin.getWidth(), capsaicin.getHeight()); + auto bufferDimensions = uint2(capsaicin.getWidth(), capsaicin.getHeight()); + + gfxProgramSetParameter(gfx_, visibility_buffer_program_, "g_BufferDimensions", bufferDimensions); + gfxProgramSetParameter(gfx_, visibility_buffer_program_, "g_RayCamera", cameraData); + gfxProgramSetParameter(gfx_, visibility_buffer_program_, "g_FrameIndex", capsaicin.getFrameIndex()); + gfxProgramSetParameter( + gfx_, visibility_buffer_program_, "g_InstanceBuffer", capsaicin.getInstanceBuffer()); + gfxProgramSetParameter(gfx_, visibility_buffer_program_, "g_MeshBuffer", capsaicin.getMeshBuffer()); + gfxProgramSetParameter( + gfx_, visibility_buffer_program_, "g_TransformBuffer", capsaicin.getTransformBuffer()); + gfxProgramSetParameter(gfx_, visibility_buffer_program_, "g_IndexBuffer", capsaicin.getIndexBuffer()); + gfxProgramSetParameter( + gfx_, visibility_buffer_program_, "g_VertexBuffer", capsaicin.getVertexBuffer()); + gfxProgramSetParameter( + gfx_, visibility_buffer_program_, "g_MaterialBuffer", capsaicin.getMaterialBuffer()); + gfxProgramSetParameter( + gfx_, visibility_buffer_program_, "g_PrevTransformBuffer", capsaicin.getPrevTransformBuffer()); + + gfxProgramSetParameter( + gfx_, visibility_buffer_program_, "g_Scene", capsaicin.getAccelerationStructure()); + + gfxProgramSetParameter(gfx_, visibility_buffer_program_, "g_TextureMaps", capsaicin.getTextures(), + capsaicin.getTextureCount()); + gfxProgramSetParameter( + gfx_, visibility_buffer_program_, "g_TextureSampler", capsaicin.getLinearWrapSampler()); + + GfxBuffer cameraMatrixBuffer = capsaicin.allocateConstantBuffer(1); + gfxBufferGetData(gfx_, cameraMatrixBuffer)[0] = camera.view_projection; + GfxBuffer cameraPrevMatrixBuffer = capsaicin.allocateConstantBuffer(1); + gfxBufferGetData(gfx_, cameraPrevMatrixBuffer)[0] = camera.view_projection_prev; + gfxProgramSetParameter(gfx_, visibility_buffer_program_, "g_ViewProjection", camera.view_projection); + gfxProgramSetParameter( + gfx_, visibility_buffer_program_, "g_PrevViewProjection", camera.view_projection_prev); + // Need to correctly jitter ray camera equivalent to raster camera + float2 jitter = float2(camera.projection[2][0] * capsaicin.getWidth(), + camera.projection[2][1] * capsaicin.getHeight()) + * 0.5f; + gfxProgramSetParameter(gfx_, visibility_buffer_program_, "g_Jitter", jitter); + + gfxProgramSetParameter( + gfx_, visibility_buffer_program_, "g_Visibility", capsaicin.getAOVBuffer("Visibility")); + // Write to VisibilityDepth as its not possible to write directly to a depth buffer from a compute + // shader + gfxProgramSetParameter( + gfx_, visibility_buffer_program_, "g_Depth", capsaicin.getAOVBuffer("VisibilityDepth")); + gfxProgramSetParameter( + gfx_, visibility_buffer_program_, "g_GeometryNormal", capsaicin.getAOVBuffer("GeometryNormal")); + gfxProgramSetParameter( + gfx_, visibility_buffer_program_, "g_Velocity", capsaicin.getAOVBuffer("Velocity")); + if (capsaicin.hasAOVBuffer("ShadingNormal")) + { + gfxProgramSetParameter( + gfx_, visibility_buffer_program_, "g_ShadingNormal", capsaicin.getAOVBuffer("ShadingNormal")); + } + if (capsaicin.hasAOVBuffer("VertexNormal")) + { + gfxProgramSetParameter( + gfx_, visibility_buffer_program_, "g_VertexNormal", capsaicin.getAOVBuffer("VertexNormal")); + } + if (capsaicin.hasAOVBuffer("Roughness")) + { + gfxProgramSetParameter( + gfx_, visibility_buffer_program_, "g_Roughness", capsaicin.getAOVBuffer("Roughness")); + } + + if (options.visibility_buffer_use_rt_dxr10) + { + TimedSection const timed_section(*this, "VisibilityBufferRT1.0"); + + // Populate shader binding table + gfxSbtSetShaderGroup( + gfx_, visibility_buffer_sbt_, kGfxShaderGroupType_Raygen, 0, "VisibilityRTRaygen"); + gfxSbtSetShaderGroup( + gfx_, visibility_buffer_sbt_, kGfxShaderGroupType_Miss, 0, "VisibilityRTMiss"); + for (uint32_t i = 0; i < gfxAccelerationStructureGetRaytracingPrimitiveCount( + gfx_, capsaicin.getAccelerationStructure()); + i++) + { + gfxSbtSetShaderGroup(gfx_, visibility_buffer_sbt_, kGfxShaderGroupType_Hit, + i * capsaicin.getSbtStrideInEntries(kGfxShaderGroupType_Hit) + 0, "VisibilityRTHitGroup"); + } + + gfxCommandBindKernel(gfx_, visibility_buffer_kernel_); + gfxCommandDispatchRays(gfx_, visibility_buffer_sbt_, bufferDimensions.x, bufferDimensions.y, 1); + } + else + { + TimedSection const timed_section(*this, "VisibilityBufferRT"); - for (uint32_t i = 0; i < instance_count; ++i) + uint32_t const *num_threads = gfxKernelGetNumThreads(gfx_, visibility_buffer_kernel_); + uint32_t const num_groups_x = (bufferDimensions.x + num_threads[0] - 1) / num_threads[0]; + uint32_t const num_groups_y = (bufferDimensions.y + num_threads[1] - 1) / num_threads[1]; + + gfxCommandBindKernel(gfx_, visibility_buffer_kernel_); + gfxCommandDispatch(gfx_, num_groups_x, num_groups_y, 1); + } + gfxDestroyBuffer(gfx_, cameraMatrixBuffer); + gfxDestroyBuffer(gfx_, cameraPrevMatrixBuffer); + // Copy The F32 VisibilityDepth into D32 Depth buffer for later passes + gfxCommandCopyTexture( + gfx_, capsaicin.getAOVBuffer("Depth"), capsaicin.getAOVBuffer("VisibilityDepth")); + } + + if (capsaicin.hasAOVBuffer("DisocclusionMask")) { - uint32_t const instance_index = capsaicin.getInstanceIdData()[i]; - Instance const &instance = capsaicin.getInstanceData()[instance_index]; - Mesh const &mesh = capsaicin.getMeshData()[instance.mesh_index]; - - draw_commands[i].IndexCountPerInstance = mesh.index_count; - draw_commands[i].InstanceCount = 1; - draw_commands[i].StartIndexLocation = mesh.index_offset / mesh.index_stride; - draw_commands[i].BaseVertexLocation = mesh.vertex_offset / mesh.vertex_stride; - draw_commands[i].StartInstanceLocation = i; // <- drawID + gfxProgramSetParameter(gfx_, disocclusion_mask_program_, "g_Eye", capsaicin.getCamera().eye); + gfxProgramSetParameter(gfx_, disocclusion_mask_program_, "g_NearFar", + float2(capsaicin.getCamera().nearZ, capsaicin.getCamera().farZ)); + gfxProgramSetParameter(gfx_, disocclusion_mask_program_, "g_TexelSize", + float2(1.0f / capsaicin.getWidth(), 1.0f / capsaicin.getHeight())); + gfxProgramSetParameter(gfx_, disocclusion_mask_program_, "g_Reprojection", camera.reprojection); + gfxProgramSetParameter( + gfx_, disocclusion_mask_program_, "g_ViewProjectionInverse", camera.inv_view_projection); + + uint32_t const *num_threads = gfxKernelGetNumThreads(gfx_, disocclusion_mask_kernel_); + uint32_t const num_groups_x = (capsaicin.getWidth() + num_threads[0] - 1) / num_threads[0]; + uint32_t const num_groups_y = (capsaicin.getHeight() + num_threads[1] - 1) / num_threads[1]; + + gfxCommandBindKernel(gfx_, disocclusion_mask_kernel_); + gfxCommandDispatch(gfx_, num_groups_x, num_groups_y, 1); } - gfxProgramSetParameter(gfx_, visibility_buffer_program_, "g_Eye", capsaicin.getCamera().eye); - gfxProgramSetParameter(gfx_, visibility_buffer_program_, "g_FrameIndex", capsaicin.getFrameIndex()); - auto const &camera = - capsaicin.getCameraMatrices(capsaicin.getRenderSettings().getOption("taa_enable")); - gfxProgramSetParameter(gfx_, visibility_buffer_program_, "g_ViewProjection", camera.view_projection); - gfxProgramSetParameter( - gfx_, visibility_buffer_program_, "g_PrevViewProjection", camera.view_projection_prev); - - gfxProgramSetParameter(gfx_, visibility_buffer_program_, "g_MeshBuffer", capsaicin.getMeshBuffer()); - gfxProgramSetParameter( - gfx_, visibility_buffer_program_, "g_MaterialBuffer", capsaicin.getMaterialBuffer()); - gfxProgramSetParameter( - gfx_, visibility_buffer_program_, "g_InstanceBuffer", capsaicin.getInstanceBuffer()); - gfxProgramSetParameter( - gfx_, visibility_buffer_program_, "g_TransformBuffer", capsaicin.getTransformBuffer()); - gfxProgramSetParameter( - gfx_, visibility_buffer_program_, "g_InstanceIDBuffer", capsaicin.getInstanceIdBuffer()); - gfxProgramSetParameter( - gfx_, visibility_buffer_program_, "g_PrevTransformBuffer", capsaicin.getPrevTransformBuffer()); - - gfxProgramSetParameter(gfx_, visibility_buffer_program_, "g_TextureMaps", capsaicin.getTextures(), - capsaicin.getTextureCount()); - gfxProgramSetParameter( - gfx_, visibility_buffer_program_, "g_TextureSampler", capsaicin.getAnisotropicSampler()); - - gfxCommandBindKernel(gfx_, visibility_buffer_kernel_); - gfxCommandMultiDrawIndexedIndirect(gfx_, draw_command_buffer, instance_count); - gfxCommandCopyTexture(gfx_, capsaicin.getAOVBuffer("VisibilityDepth"), capsaicin.getAOVBuffer("Depth")); - - gfxDestroyBuffer(gfx_, draw_command_buffer); - - glm::mat4 const view_projection_inverse = glm::inverse(glm::dmat4(camera.view_projection)); - glm::mat4 const reprojection_matrix = - glm::dmat4(camera.view_projection_prev) * glm::dmat4(view_projection_inverse); - - gfxProgramSetParameter(gfx_, disocclusion_mask_program_, "g_Eye", capsaicin.getCamera().eye); - gfxProgramSetParameter(gfx_, disocclusion_mask_program_, "g_NearFar", - float2(capsaicin.getCamera().nearZ, capsaicin.getCamera().farZ)); - gfxProgramSetParameter(gfx_, disocclusion_mask_program_, "g_TexelSize", - float2(1.0f / capsaicin.getWidth(), 1.0f / capsaicin.getHeight())); - gfxProgramSetParameter(gfx_, disocclusion_mask_program_, "g_Reprojection", reprojection_matrix); - gfxProgramSetParameter( - gfx_, disocclusion_mask_program_, "g_ViewProjectionInverse", view_projection_inverse); - - uint32_t const *num_threads = gfxKernelGetNumThreads(gfx_, disocclusion_mask_kernel_); - uint32_t const num_groups_x = (capsaicin.getWidth() + num_threads[0] - 1) / num_threads[0]; - uint32_t const num_groups_y = (capsaicin.getHeight() + num_threads[1] - 1) / num_threads[1]; - - gfxCommandBindKernel(gfx_, disocclusion_mask_kernel_); - gfxCommandDispatch(gfx_, num_groups_x, num_groups_y, 1); - - if (capsaicin.getRenderSettings().debug_view_ == "Velocity") + auto const debugView = capsaicin.getCurrentDebugView(); + if (debugView == "Velocity") { if (!debug_velocities_program_) { - char const *screen_triangle_vs = - "struct VS_OUTPUT { float4 pos : SV_POSITION; float2 texcoord : TEXCOORD; };" - "VS_OUTPUT main(in uint idx : SV_VertexID) { VS_OUTPUT output; output.texcoord = float2(1.0f - 2.0f * (idx & 1), 2.0f * (idx >> 1));" - "output.pos = 1.0f - float4(4.0f * (idx & 1), 4.0f * (idx >> 1), 1.0f, 0.0f); return output; }"; - - GfxProgramDesc debug_velocities_program_desc = {}; - debug_velocities_program_desc.vs = screen_triangle_vs; - debug_velocities_program_desc.ps = - "Texture2D VelocityBuffer; float4 main(in float4 pos : SV_Position) : SV_Target {" - "float2 velocity = VelocityBuffer.Load(int3(pos.xy, 0)).xy;" - "return float4(pow(float3((velocity.y >= 0.0f ? velocity.y : 0.0f) + (velocity.y < 0.0f ? -velocity.y : 0.0f)," - " (velocity.x < 0.0f ? -velocity.x : 0.0f) + (velocity.y < 0.0f ? -velocity.y : 0.0f)," - " (velocity.x >= 0.0f ? velocity.x : 0.0f)), 0.4f), 1.0f); }"; - debug_velocities_program_ = - gfxCreateProgram(gfx_, debug_velocities_program_desc, "Capsaicin_DebugVelocitiesProgram"); + debug_velocities_program_ = gfxCreateProgram( + gfx_, "render_techniques/visibility_buffer/debug_velocity", capsaicin.getShaderPath()); GfxDrawState debug_state; gfxDrawStateSetColorTarget(debug_state, 0, capsaicin.getAOVBuffer("Debug")); debug_velocities_kernel_ = gfxCreateGraphicsKernel(gfx_, debug_velocities_program_, debug_state); } - const GfxCommandEvent command_event(gfx_, "DrawDebugView"); + const GfxCommandEvent command_event(gfx_, "DrawDebugVelocities"); gfxProgramSetParameter( gfx_, debug_velocities_program_, "VelocityBuffer", capsaicin.getAOVBuffer("Velocity")); gfxCommandBindKernel(gfx_, debug_velocities_kernel_); gfxCommandDraw(gfx_, 3); } + else if (debugView.starts_with("Material")) + { + if (!debug_material_program_) + { + debug_material_program_ = gfxCreateProgram( + gfx_, "render_techniques/visibility_buffer/debug_material", capsaicin.getShaderPath()); + + GfxDrawState debug_material_draw_state; + gfxDrawStateSetColorTarget(debug_material_draw_state, 0, capsaicin.getAOVBuffer("Debug")); + debug_material_kernel_ = gfxCreateGraphicsKernel( + gfx_, debug_material_program_, debug_material_draw_state, "DebugMaterial"); + } + + enum class MaterialMode : uint32_t + { + ALBEDO = 0, + METALLICITY, + ROUGHNESS, + }; + MaterialMode materialMode = MaterialMode::ALBEDO; + if (debugView == "MaterialMetallicity") + { + materialMode = MaterialMode::METALLICITY; + } + else if (debugView == "MaterialRoughness") + { + materialMode = MaterialMode::ROUGHNESS; + } + + const GfxCommandEvent command_event(gfx_, "DebugMaterial"); + gfxProgramSetParameter(gfx_, debug_material_program_, "g_MaterialMode", materialMode); + + gfxProgramSetParameter( + gfx_, debug_material_program_, "g_VisibilityBuffer", capsaicin.getAOVBuffer("Visibility")); + gfxProgramSetParameter( + gfx_, debug_material_program_, "g_DepthBuffer", capsaicin.getAOVBuffer("VisibilityDepth")); + + gfxProgramSetParameter( + gfx_, debug_material_program_, "g_InstanceBuffer", capsaicin.getInstanceBuffer()); + gfxProgramSetParameter(gfx_, debug_material_program_, "g_MeshBuffer", capsaicin.getMeshBuffer()); + gfxProgramSetParameter(gfx_, debug_material_program_, "g_IndexBuffer", capsaicin.getIndexBuffer()); + gfxProgramSetParameter(gfx_, debug_material_program_, "g_VertexBuffer", capsaicin.getVertexBuffer()); + gfxProgramSetParameter( + gfx_, debug_material_program_, "g_MaterialBuffer", capsaicin.getMaterialBuffer()); + + gfxProgramSetParameter(gfx_, debug_material_program_, "g_TextureMaps", capsaicin.getTextures(), + capsaicin.getTextureCount()); + gfxProgramSetParameter( + gfx_, debug_material_program_, "g_TextureSampler", capsaicin.getAnisotropicSampler()); + gfxCommandBindKernel(gfx_, debug_material_kernel_); + gfxCommandDraw(gfx_, 3); + } + else if (debugView == "DXR1.0") + { + if (!debug_dxr10_program_) + { + debug_dxr10_program_ = gfxCreateProgram( + gfx_, "render_techniques/visibility_buffer/debug_dxr10", capsaicin.getShaderPath()); + // Associate space1 with local root signature for MyHitGroup + GfxLocalRootSignatureAssociation local_root_signature_associations[] = { + {1, kGfxShaderGroupType_Hit, "MyHitGroup"} + }; + debug_dxr10_kernel_ = + gfxCreateRaytracingKernel(gfx_, debug_dxr10_program_, local_root_signature_associations, 1); + uint32_t entry_count[kGfxShaderGroupType_Count] { + capsaicin.getSbtStrideInEntries(kGfxShaderGroupType_Raygen), + capsaicin.getSbtStrideInEntries(kGfxShaderGroupType_Miss), + gfxSceneGetInstanceCount(capsaicin.getScene()) + * capsaicin.getSbtStrideInEntries(kGfxShaderGroupType_Hit), + capsaicin.getSbtStrideInEntries(kGfxShaderGroupType_Callable)}; + debug_dxr10_sbt_ = gfxCreateSbt(gfx_, &debug_dxr10_kernel_, 1, entry_count); + } + + const GfxCommandEvent command_event(gfx_, "DrawDebugDXR1.0"); + gfxProgramSetParameter(gfx_, debug_dxr10_program_, "g_Eye", capsaicin.getCamera().eye); + gfxProgramSetParameter( + gfx_, debug_dxr10_program_, "g_ViewProjectionInverse", camera.inv_view_projection); + gfxProgramSetParameter(gfx_, debug_dxr10_program_, "g_Scene", capsaicin.getAccelerationStructure()); + gfxProgramSetParameter(gfx_, debug_dxr10_program_, "g_RenderTarget", capsaicin.getAOVBuffer("Debug")); + // Populate shader binding table + gfxSbtSetShaderGroup(gfx_, debug_dxr10_sbt_, kGfxShaderGroupType_Raygen, 0, "MyRaygenShader"); + gfxSbtSetShaderGroup(gfx_, debug_dxr10_sbt_, kGfxShaderGroupType_Miss, 0, "MyMissShader"); + for (uint32_t i = 0; i < gfxAccelerationStructureGetRaytracingPrimitiveCount( + gfx_, capsaicin.getAccelerationStructure()); + i++) + { + gfxSbtSetShaderGroup(gfx_, debug_dxr10_sbt_, kGfxShaderGroupType_Hit, + i * capsaicin.getSbtStrideInEntries(kGfxShaderGroupType_Hit), + i % 2 == 0 ? "MyHitGroup" : "MyHitGroup2"); + // Populate local root signature parameters + glm::vec4 test_data = i % 4 == 0 ? glm::vec4(0, 0, 1, 0) : glm::vec4(0, 1, 0, 0); + gfxSbtSetConstants(gfx_, debug_dxr10_sbt_, kGfxShaderGroupType_Hit, + i * capsaicin.getSbtStrideInEntries(kGfxShaderGroupType_Hit), "g_MyCB", + (void const *)&test_data, sizeof(test_data)); + } + gfxCommandBindKernel(gfx_, debug_dxr10_kernel_); + gfxCommandDispatchRays(gfx_, debug_dxr10_sbt_, capsaicin.getWidth(), capsaicin.getHeight(), 1); + } } -void VisibilityBuffer::terminate() +void VisibilityBuffer::terminate() noexcept { gfxDestroyKernel(gfx_, disocclusion_mask_kernel_); gfxDestroyProgram(gfx_, disocclusion_mask_program_); gfxDestroyKernel(gfx_, visibility_buffer_kernel_); gfxDestroyProgram(gfx_, visibility_buffer_program_); + gfxDestroySbt(gfx_, visibility_buffer_sbt_); + visibility_buffer_sbt_ = {}; gfxDestroyKernel(gfx_, debug_velocities_kernel_); gfxDestroyProgram(gfx_, debug_velocities_program_); + debug_velocities_program_ = {}; + + gfxDestroyKernel(gfx_, debug_material_kernel_); + gfxDestroyProgram(gfx_, debug_material_program_); + debug_material_kernel_ = {}; + + gfxDestroyProgram(gfx_, debug_dxr10_program_); + gfxDestroyKernel(gfx_, debug_dxr10_kernel_); + debug_dxr10_program_ = {}; + gfxDestroySbt(gfx_, debug_dxr10_sbt_); +} + +bool VisibilityBuffer::initKernel(CapsaicinInternal const &capsaicin) noexcept +{ + if (!options.visibility_buffer_use_rt) + { + // Initialise the raster variant of visibility buffer kernel + GfxDrawState visibility_buffer_draw_state = {}; + gfxDrawStateSetCullMode(visibility_buffer_draw_state, D3D12_CULL_MODE_NONE); + + gfxDrawStateSetColorTarget(visibility_buffer_draw_state, 0, capsaicin.getAOVBuffer("Visibility")); + gfxDrawStateSetColorTarget(visibility_buffer_draw_state, 1, capsaicin.getAOVBuffer("GeometryNormal")); + gfxDrawStateSetColorTarget(visibility_buffer_draw_state, 2, capsaicin.getAOVBuffer("Velocity")); + std::vector defines; + if (capsaicin.hasAOVBuffer("ShadingNormal")) + { + gfxDrawStateSetColorTarget( + visibility_buffer_draw_state, 3, capsaicin.getAOVBuffer("ShadingNormal")); + defines.push_back("HAS_SHADING_NORMAL"); + } + if (capsaicin.hasAOVBuffer("VertexNormal")) + { + gfxDrawStateSetColorTarget( + visibility_buffer_draw_state, 4, capsaicin.getAOVBuffer("VertexNormal")); + defines.push_back("HAS_VERTEX_NORMAL"); + } + if (capsaicin.hasAOVBuffer("Roughness")) + { + gfxDrawStateSetColorTarget(visibility_buffer_draw_state, 5, capsaicin.getAOVBuffer("Roughness")); + defines.push_back("HAS_ROUGHNESS"); + } + gfxDrawStateSetDepthStencilTarget(visibility_buffer_draw_state, capsaicin.getAOVBuffer("Depth")); + + visibility_buffer_program_ = gfxCreateProgram( + gfx_, "render_techniques/visibility_buffer/visibility_buffer", capsaicin.getShaderPath()); + visibility_buffer_kernel_ = gfxCreateGraphicsKernel(gfx_, visibility_buffer_program_, + visibility_buffer_draw_state, nullptr, defines.data(), (uint32_t)defines.size()); + } + else + { + // Initialise the ray tracing variant of visibility buffer kernel + std::vector defines; + defines.push_back("HAS_RT"); + if (capsaicin.hasAOVBuffer("ShadingNormal")) + { + defines.push_back("HAS_SHADING_NORMAL"); + } + if (capsaicin.hasAOVBuffer("VertexNormal")) + { + defines.push_back("HAS_VERTEX_NORMAL"); + } + if (capsaicin.hasAOVBuffer("Roughness")) + { + defines.push_back("HAS_ROUGHNESS"); + } + visibility_buffer_program_ = gfxCreateProgram( + gfx_, "render_techniques/visibility_buffer/visibility_buffer_rt", capsaicin.getShaderPath()); + if (options.visibility_buffer_use_rt_dxr10) + { + std::vector exports = { + "VisibilityRTRaygen", "VisibilityRTMiss", "VisibilityRTAnyHit", "VisibilityRTClosestHit"}; + std::vector subobjects = { + "VisibilityShaderConfig", "VisibilityPipelineConfig", "VisibilityRTHitGroup"}; + visibility_buffer_kernel_ = gfxCreateRaytracingKernel(gfx_, visibility_buffer_program_, nullptr, + 0, exports.data(), (uint32_t)exports.size(), subobjects.data(), (uint32_t)subobjects.size(), + defines.data(), (uint32_t)defines.size()); + + uint32_t entry_count[kGfxShaderGroupType_Count] { + capsaicin.getSbtStrideInEntries(kGfxShaderGroupType_Raygen), + capsaicin.getSbtStrideInEntries(kGfxShaderGroupType_Miss), + gfxSceneGetInstanceCount(capsaicin.getScene()) + * capsaicin.getSbtStrideInEntries(kGfxShaderGroupType_Hit), + capsaicin.getSbtStrideInEntries(kGfxShaderGroupType_Callable)}; + std::vector sbt_kernels = {visibility_buffer_kernel_}; + visibility_buffer_sbt_ = + gfxCreateSbt(gfx_, sbt_kernels.data(), (uint32_t)sbt_kernels.size(), entry_count); + } + else + { + defines.push_back("USE_INLINE_RT"); + visibility_buffer_kernel_ = gfxCreateComputeKernel(gfx_, visibility_buffer_program_, + "VisibilityBufferRT", defines.data(), (uint32_t)defines.size()); + } + } + + return !!visibility_buffer_program_; } } // namespace Capsaicin diff --git a/src/core/src/render_techniques/visibility_buffer/visibility_buffer.frag b/src/core/src/render_techniques/visibility_buffer/visibility_buffer.frag index 77b0dd5..0d99918 100644 --- a/src/core/src/render_techniques/visibility_buffer/visibility_buffer.frag +++ b/src/core/src/render_techniques/visibility_buffer/visibility_buffer.frag @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -26,16 +26,21 @@ uint g_FrameIndex; StructuredBuffer g_MaterialBuffer; +#include "../../components/blue_noise_sampler/blue_noise_sampler.hlsl" + Texture2D g_TextureMaps[]; SamplerState g_TextureSampler; +#include "../../geometry/geometry.hlsl" #include "../../math/math.hlsl" #include "../../materials/materials.hlsl" struct Params { - float4 position : SV_Position; + float4 position : SV_Position; +#if defined(HAS_SHADING_NORMAL) || defined(HAS_VERTEX_NORMAL) float3 normal : NORMAL; +#endif float2 uv : TEXCOORD; float3 world : POSITION0; float4 current : POSITION1; @@ -46,71 +51,92 @@ struct Params struct Pixel { - float4 visibility : SV_Target0; - float4 normal : SV_Target1; - float4 details : SV_Target2; - float2 velocity : SV_Target3; + float4 visibility : SV_Target0; + float4 geom_normal : SV_Target1; + float2 velocity : SV_Target2; +#ifdef HAS_SHADING_NORMAL + float4 shad_normal : SV_Target3; +#endif +#ifdef HAS_VERTEX_NORMAL + float4 vert_normal : SV_Target4; +#endif +#ifdef HAS_ROUGHNESS + float roughness : SV_Target5; +#endif }; -float2 CalculateMotionVector(in float4 current_pos, in float4 previous_pos) -{ - float2 current_uv = 0.5f * current_pos.xy / current_pos.w; - float2 previous_uv = 0.5f * previous_pos.xy / previous_pos.w; - - return (current_uv - previous_uv) * float2(1.0f, -1.0f); -} - -Pixel main(in Params params, in float3 barycentrics : SV_Barycentrics, in uint primitiveID : SV_PrimitiveID, in bool is_front_face : SV_IsFrontFace) +Pixel main(in Params params, in float3 barycentrics : SV_Barycentrics, in uint primitiveID : SV_PrimitiveID) { - Pixel pixel; - Material material = g_MaterialBuffer[params.materialID]; + Pixel pixel; + Material material = g_MaterialBuffer[params.materialID]; + float alpha = material.normal_alpha_side.y; uint alphaMap = asuint(material.albedo.w); - uint normalMap = asuint(material.normal_ao.x); - if (alphaMap != uint(-1)) { - float alpha = g_TextureMaps[alphaMap].SampleLevel(g_TextureSampler, params.uv, 0).w; + alpha *= g_TextureMaps[alphaMap].SampleLevel(g_TextureSampler, params.uv, 0).w; + } + if (alpha != 1.0f) + { if (alpha < 0.5f) { discard; } } - float3 normal = normalize(params.normal); - float3 details = normal; + float3 dFdxPos = ddx_fine(params.world); + float3 dFdyPos = ddy_fine(params.world); + float3 face_normal = normalize(cross(dFdyPos, dFdxPos)); +#if defined(HAS_SHADING_NORMAL) || defined(HAS_VERTEX_NORMAL) + float3 vertex_normal = normalize(params.normal); + // SV_IsFrontFace incorrectly flips normals when the geometry has non uniform negative(mirrored) scaling + vertex_normal = dot(vertex_normal, face_normal) >= 0.0f ? vertex_normal : -vertex_normal; + +# ifdef HAS_SHADING_NORMAL + float3 details = vertex_normal; + uint normalMap = asuint(material.normal_alpha_side.x); if (normalMap != uint(-1)) { - float3 view_direction = normalize(g_Eye - params.world); - - float3 dp1 = ddx(-view_direction); - float3 dp2 = ddy(-view_direction); - float2 duv1 = ddx(params.uv); - float2 duv2 = ddy(params.uv); + float2 dFdxUV = ddx(params.uv); + float2 dFdyUV = ddy(params.uv); - float3 dp2perp = normalize(cross(dp2, normal)); - float3 dp1perp = normalize(cross(normal, dp1)); - float3 tangent = dp2perp * duv1.x + dp1perp * duv2.x; - float3 bitangent = dp2perp * duv1.y + dp1perp * duv2.y; + float determinate = dFdxUV.x * dFdyUV.y - dFdyUV.x * dFdxUV.y; + float3 normalTan = 2.0f * g_TextureMaps[normalMap].Sample(g_TextureSampler, params.uv).xyz - 1.0f; + // If the determinate is zero then the matrix is non invertable + if (determinate != 0.0f && dot(normalTan, normalTan) > 0.0f) + { + determinate = rcp(determinate); + float3 tangentBasis = (dFdxPos * dFdyUV.yyy - dFdyPos * dFdxUV.yyy) * determinate; + float3 bitangentBasis = (dFdyPos * dFdxUV.xxx - dFdxPos * dFdyUV.xxx) * determinate; - float invmax = rsqrt(max(dot(tangent, tangent), dot(bitangent, bitangent))); - float3x3 tbn = transpose(float3x3(tangent * invmax, bitangent * invmax, normal)); - float3 disturb = 2.0f * g_TextureMaps[normalMap].Sample(g_TextureSampler, params.uv).xyz - 1.0f; + // Gram-Schmidt orthogonalise tangent + float3 tangent = normalize(tangentBasis - vertex_normal * dot(vertex_normal, tangentBasis)); + float3 bitangent = cross(vertex_normal, tangent); - details = normalize(mul(tbn, disturb)); - } + // Correct handedness + bitangent = dot(bitangent, bitangentBasis) >= 0.0f ? -bitangent : bitangent; - if (!is_front_face) - { - normal = -normal; - details = -details; + float3x3 tbn = float3x3(tangent, bitangent, vertex_normal); + details = normalize(mul(normalTan, tbn)); + } } - - pixel.visibility = float4(barycentrics.yz, asfloat(params.instanceID), asfloat(primitiveID)); - pixel.normal = float4(0.5f * normal + 0.5f, 1.0f); - pixel.details = float4(0.5f * details + 0.5f, 1.0f); - pixel.velocity = CalculateMotionVector(params.current, params.previous); +# endif // HAS_SHADING_NORMAL +#endif // HAS_SHADING_NORMAL || HAS_VERTEX_NORMAL + + pixel.visibility = float4(barycentrics.yz, asfloat(params.instanceID), asfloat(primitiveID)); + pixel.geom_normal = float4(0.5f * face_normal + 0.5f, 1.0f); + pixel.velocity = CalculateMotionVector(params.current, params.previous); +#ifdef HAS_SHADING_NORMAL + pixel.shad_normal = float4(0.5f * details + 0.5f, 1.0f); +#endif +#ifdef HAS_VERTEX_NORMAL + pixel.vert_normal = float4(0.5f * vertex_normal + 0.5f, 1.0f); +#endif +#ifdef HAS_ROUGHNESS + MaterialEvaluated materialEvaluated = MakeMaterialEvaluated(material, params.uv); + pixel.roughness = materialEvaluated.roughness; +#endif return pixel; } diff --git a/src/core/src/render_techniques/visibility_buffer/visibility_buffer.h b/src/core/src/render_techniques/visibility_buffer/visibility_buffer.h index 8479a8a..7f0c489 100644 --- a/src/core/src/render_techniques/visibility_buffer/visibility_buffer.h +++ b/src/core/src/render_techniques/visibility_buffer/visibility_buffer.h @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -31,6 +31,33 @@ class VisibilityBuffer : public RenderTechnique VisibilityBuffer(); ~VisibilityBuffer(); + /* + * Gets configuration options for current technique. + * @return A list of all valid configuration options. + */ + RenderOptionList getRenderOptions() noexcept override; + + struct RenderOptions + { + bool visibility_buffer_use_rt = false; /**< Use Ray Tracing instead of rasterisation */ + bool visibility_buffer_use_rt_dxr10 = + false; /**< Use dxr 1.0 ray-tracing pipelines instead of inline rt + (only effects if visibility_buffer_use_rt is enabled) */ + }; + + /** + * Convert render options to internal options format. + * @param options Current render options. + * @returns The options converted. + */ + static RenderOptions convertOptions(RenderOptionList const &options) noexcept; + + /** + * Gets a list of any shared components used by the current render technique. + * @return A list of all supported components. + */ + ComponentList getComponents() const noexcept override; + /** * Gets the required list of AOVs needed for the current render technique. * @return A list of all required AOV buffers. @@ -58,14 +85,31 @@ class VisibilityBuffer : public RenderTechnique */ void render(CapsaicinInternal &capsaicin) noexcept override; + /** + * Destroy any used internal resources and shutdown. + */ + void terminate() noexcept override; + private: - void terminate(); - - GfxKernel disocclusion_mask_kernel_; - GfxProgram disocclusion_mask_program_; - GfxKernel visibility_buffer_kernel_; - GfxProgram visibility_buffer_program_; - GfxKernel debug_velocities_kernel_; - GfxProgram debug_velocities_program_; + /** + * Initialise internal visibility buffer kernel. + * @param capsaicin The current capsaicin context. + * @return True if initialisation succeeded, False otherwise. + */ + bool initKernel(CapsaicinInternal const &capsaicin) noexcept; + + RenderOptions options; + GfxKernel disocclusion_mask_kernel_; + GfxProgram disocclusion_mask_program_; + GfxKernel visibility_buffer_kernel_; + GfxProgram visibility_buffer_program_; + GfxSbt visibility_buffer_sbt_; + GfxProgram debug_velocities_program_; + GfxKernel debug_velocities_kernel_; + GfxProgram debug_material_program_; + GfxKernel debug_material_kernel_; + GfxProgram debug_dxr10_program_; + GfxKernel debug_dxr10_kernel_; + GfxSbt debug_dxr10_sbt_; }; } // namespace Capsaicin diff --git a/src/core/src/render_techniques/visibility_buffer/visibility_buffer.vert b/src/core/src/render_techniques/visibility_buffer/visibility_buffer.vert index f55c3b2..a4fed6b 100644 --- a/src/core/src/render_techniques/visibility_buffer/visibility_buffer.vert +++ b/src/core/src/render_techniques/visibility_buffer/visibility_buffer.vert @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -21,7 +21,7 @@ THE SOFTWARE. ********************************************************************/ #include "../../gpu_shared.h" -#include "../../math/geometry.hlsl" +#include "../../math/transform.hlsl" float4x4 g_ViewProjection; float4x4 g_PrevViewProjection; @@ -29,14 +29,16 @@ float4x4 g_PrevViewProjection; StructuredBuffer g_MeshBuffer; StructuredBuffer g_MaterialBuffer; StructuredBuffer g_InstanceBuffer; -StructuredBuffer g_TransformBuffer; +StructuredBuffer g_TransformBuffer; StructuredBuffer g_InstanceIDBuffer; -StructuredBuffer g_PrevTransformBuffer; +StructuredBuffer g_PrevTransformBuffer; struct Params { - float4 position : SV_Position; + float4 position : SV_Position; +#if defined(HAS_SHADING_NORMAL) || defined(HAS_VERTEX_NORMAL) float3 normal : NORMAL; +#endif float2 uv : TEXCOORD; float3 world : POSITION0; float4 current : POSITION1; @@ -51,21 +53,23 @@ Params main(in Vertex vertex, in uint drawID : gfx_DrawID) Instance instance = g_InstanceBuffer[instanceID]; Mesh mesh = g_MeshBuffer[instance.mesh_index]; - float4x4 transform = g_TransformBuffer[instance.transform_index]; - float4x4 prev_transform = g_PrevTransformBuffer[instance.transform_index]; + float3x4 transform = g_TransformBuffer[instance.transform_index]; + float3x4 prev_transform = g_PrevTransformBuffer[instance.transform_index]; - float3 position = mul(transform, float4(vertex.position.xyz, 1.0f)).xyz; - float3 prev_position = mul(prev_transform, float4(vertex.position.xyz, 1.0f)).xyz; + float3 position = transformPoint(vertex.position.xyz, transform); + float3 prev_position = transformPoint(vertex.position.xyz, prev_transform); Params params; - params.position = mul(g_ViewProjection, float4(position, 1.0f)); - params.normal = transformDirection(vertex.normal.xyz, transform); + params.position = mul(g_ViewProjection, float4(position, 1.0f)); +#if defined(HAS_SHADING_NORMAL) || defined(HAS_VERTEX_NORMAL) + params.normal = transformNormal(vertex.normal.xyz, transform); +#endif params.uv = vertex.uv; params.world = position; params.current = params.position; params.previous = mul(g_PrevViewProjection, float4(prev_position, 1.0f)); params.instanceID = instanceID; - params.materialID = mesh.material_index; + params.materialID = instance.material_index; return params; } diff --git a/src/core/src/render_techniques/visibility_buffer/visibility_buffer_rt.comp b/src/core/src/render_techniques/visibility_buffer/visibility_buffer_rt.comp new file mode 100644 index 0000000..8963f25 --- /dev/null +++ b/src/core/src/render_techniques/visibility_buffer/visibility_buffer_rt.comp @@ -0,0 +1,206 @@ +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ + +#include "../../geometry/path_tracing_shared.h" + +uint2 g_BufferDimensions; +RayCamera g_RayCamera; +uint g_FrameIndex; + +StructuredBuffer g_InstanceBuffer; +StructuredBuffer g_MeshBuffer; +StructuredBuffer g_TransformBuffer; +StructuredBuffer g_IndexBuffer; +StructuredBuffer g_VertexBuffer; +StructuredBuffer g_MaterialBuffer; +StructuredBuffer g_PrevTransformBuffer; + +RaytracingAccelerationStructure g_Scene; + +Texture2D g_TextureMaps[]; +SamplerState g_TextureSampler; // Should be a linear sampler + +struct CameraMatrix +{ + float4x4 data; +}; +ConstantBuffer g_ViewProjection; +ConstantBuffer g_PrevViewProjection; +float2 g_Jitter; + +RWTexture2D g_Visibility; +RWTexture2D g_Depth; +RWTexture2D g_GeometryNormal; +RWTexture2D g_Velocity; +#ifdef HAS_SHADING_NORMAL +RWTexture2D g_ShadingNormal; +#endif +#ifdef HAS_VERTEX_NORMAL +RWTexture2D g_VertexNormal; +#endif +#ifdef HAS_ROUGHNESS +RWTexture2D g_Roughness; +#endif + +#include "../../geometry/geometry.hlsl" +#include "../../geometry/intersection.hlsl" +#include "../../geometry/ray_intersection.hlsl" +#include "../../geometry/ray_tracing.hlsl" +#include "../../math/math.hlsl" +#include "../../math/transform.hlsl" +#include "../../materials/materials.hlsl" + +struct PathData +{ + uint2 pixel; +}; + +void pathHit(PathData pathData, RayDesc ray, HitInfo hitData) +{ + // Get instance information for current object + Instance instance = g_InstanceBuffer[hitData.instanceIndex]; + Mesh mesh = g_MeshBuffer[instance.mesh_index]; + float3x4 transform = g_TransformBuffer[instance.transform_index]; + + // Fetch vertex data +#if defined(HAS_SHADING_NORMAL) || defined(HAS_VERTEX_NORMAL) + TriangleNormUV triData = fetchVerticesNormUV(mesh, hitData.primitiveIndex); +#else + TriangleUV triData = fetchVerticesUV(mesh, hitData.primitiveIndex); +#endif + + // Set material + Material material = g_MaterialBuffer[instance.material_index]; + // Calculate UV coordinates + float2 uv = interpolate(triData.uv0, triData.uv1, triData.uv2, hitData.barycentrics); + // Calculate intersection position + float3 position = transformPoint(interpolate(triData.v0, triData.v1, triData.v2, hitData.barycentrics), transform); + + // Calculate geometry normal (assume CCW winding) + float3 edge10 = triData.v1 - triData.v0; + float3 edge20 = triData.v2 - triData.v0; + float3x3 normalTransform = getNormalTransform((float3x3)transform); + float3 localGeometryNormal = cross(edge10, edge20) * (hitData.frontFace ? 1.0f : -1.0f); + float3 geometryNormal = normalize(mul(normalTransform, localGeometryNormal)); + +#if defined(HAS_SHADING_NORMAL) || defined(HAS_VERTEX_NORMAL) + float3 normal = interpolate(triData.n0, triData.n1, triData.n2, hitData.barycentrics) * (hitData.frontFace ? 1.0f : -1.0f); + +# ifdef HAS_VERTEX_NORMAL + // Calculate shading normal + float3 vertexNormal = normalize(mul(normalTransform, normal)); +# endif +#endif // HAS_SHADING_NORMAL || HAS_VERTEX_NORMAL +#ifdef HAS_SHADING_NORMAL + // Check for normal mapping + float3 shadingNormal = normal; + uint normalTex = asuint(material.normal_alpha_side.x); + if (normalTex != uint(-1)) + { + // Get normal from texture map + float3 normalTan = 2.0f * g_TextureMaps[NonUniformResourceIndex(normalTex)].SampleLevel(g_TextureSampler, uv, 0.0f).xyz - 1.0f; + normal = normalize(normal); + // Ensure normal is in same hemisphere as geometry normal (This is required when non-uniform negative(mirrored) scaling is applied to a backface surface) + normal = dot(normal, normalize(localGeometryNormal)) >= 0.0f ? normal : -normal; + + // Calculate tangent and bi-tangent basis vectors + float2 edgeUV1 = triData.uv1 - triData.uv0; + float2 edgeUV2 = triData.uv2 - triData.uv0; + float determinate = edgeUV1.x * edgeUV2.y - edgeUV1.y * edgeUV2.x; + // If the determinate is zero then the matrix is non invertable + if (determinate != 0.0f && dot(normalTan, normalTan) > 0.0f) + { + determinate = rcp(determinate); + float3 tangentBasis = (edge10 * edgeUV2.yyy - edge20 * edgeUV1.yyy) * determinate; + float3 bitangentBasis = (edge20 * edgeUV1.xxx - edge10 * edgeUV2.xxx) * determinate; + + // Gram-Schmidt orthogonalise tangent + float3 tangent = normalize(tangentBasis - normal * dot(normal, tangentBasis)); + float3 bitangent = cross(normal, tangent); + + // Correct handedness + bitangent = dot(bitangent, bitangentBasis) >= 0.0f ? -bitangent : bitangent; + + // Convert from tangent space + float3x3 tbn = float3x3(tangent, bitangent, normal); + shadingNormal = mul(normalTan, tbn); + } + } + shadingNormal = normalize(mul(normalTransform, shadingNormal)); +#endif // HAS_SHADING_NORMAL + // Write out buffers + g_Visibility[pathData.pixel] = float4(hitData.barycentrics, asfloat(hitData.instanceIndex), asfloat(hitData.primitiveIndex)); + g_Depth[pathData.pixel] = transformPointProjection(position, g_ViewProjection.data).z; + g_GeometryNormal[pathData.pixel] = float4(0.5f * geometryNormal + 0.5f, 1.0f); + + float3 prevPosition = transformPoint(interpolate(triData.v0, triData.v1, triData.v2, hitData.barycentrics), g_PrevTransformBuffer[instance.transform_index]); + float4 positionVP = mul(g_ViewProjection.data, float4(position, 1.0f)); + float4 prevPositionVP = mul(g_PrevViewProjection.data, float4(prevPosition, 1.0f)); + g_Velocity[pathData.pixel] = CalculateMotionVector(positionVP, prevPositionVP); +#ifdef HAS_SHADING_NORMAL + g_ShadingNormal[pathData.pixel] = float4(0.5f * shadingNormal + 0.5f, 1.0f); +#endif +#ifdef HAS_VERTEX_NORMAL + g_VertexNormal[pathData.pixel] = float4(0.5f * vertexNormal + 0.5f, 1.0f); +#endif +#ifdef HAS_ROUGHNESS + MaterialEvaluated materialEvaluated = MakeMaterialEvaluated(material, uv); + g_Roughness[pathData.pixel] = materialEvaluated.roughness; +#endif +} + +void visibilityRT(uint2 did, uint2 dimensions) +{ + //Check if valid pixel + uint2 pixel = did; + if (any(pixel >= dimensions)) + { + return; + } + + // Offset pixel to pixel center + float2 pixelRay = (float2)pixel + 0.5f.xx + g_Jitter; + + // Calculate primary ray + RayDesc ray = generateCameraRay(pixelRay, g_RayCamera); + + PathData pathData; + pathData.pixel = pixel; + +#ifdef USE_INLINE_RT + ClosestRayQuery rayQuery = TraceRay(ray); + + // Check for valid intersection + if (rayQuery.CommittedStatus() != COMMITTED_NOTHING) + { + pathHit(pathData, ray, GetHitInfoRtInlineCommitted(rayQuery)); + } +#else + TraceRay(g_Scene, RAY_FLAG_NONE, 0xFFu, 0, 0, 0, ray, pathData); +#endif +} + +[numthreads(4, 8, 1)] +void VisibilityBufferRT(in uint2 did : SV_DispatchThreadID) +{ + visibilityRT(did, g_BufferDimensions); +} diff --git a/src/core/src/render_techniques/visibility_buffer/visibility_buffer_rt.rt b/src/core/src/render_techniques/visibility_buffer/visibility_buffer_rt.rt new file mode 100644 index 0000000..98ef2c3 --- /dev/null +++ b/src/core/src/render_techniques/visibility_buffer/visibility_buffer_rt.rt @@ -0,0 +1,68 @@ +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ + +#include "visibility_buffer_rt.comp" + +TriangleHitGroup VisibilityRTHitGroup = +{ + "VisibilityRTAnyHit", // AnyHit + "VisibilityRTClosestHit", // ClosestHit +}; + +RaytracingShaderConfig VisibilityShaderConfig = +{ + 8, // max payload size + 8 // max attribute size +}; + +RaytracingPipelineConfig VisibilityPipelineConfig = +{ + 1 // max trace recursion depth +}; + +[shader("raygeneration")] +void VisibilityRTRaygen() +{ + uint2 did = DispatchRaysIndex().xy; + visibilityRT(did, g_BufferDimensions); +} + +[shader("anyhit")] +void VisibilityRTAnyHit(inout PathData path, in BuiltInTriangleIntersectionAttributes attr) +{ + if (!AlphaTest(GetHitInfoRt(attr))) + { + IgnoreHit(); + } +} + +[shader("closesthit")] +void VisibilityRTClosestHit(inout PathData path, in BuiltInTriangleIntersectionAttributes attr) +{ + pathHit(path, GetRayDescRt(), GetHitInfoRt(attr)); +} + +[shader("miss")] +void VisibilityRTMiss(inout PathData path) +{ + // Do nothing +} diff --git a/src/core/src/renderers/gi10/gi10_renderer.cpp b/src/core/src/renderers/gi10/gi10_renderer.cpp index 36ca2bb..e8ba50b 100644 --- a/src/core/src/renderers/gi10/gi10_renderer.cpp +++ b/src/core/src/renderers/gi10/gi10_renderer.cpp @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -32,23 +32,25 @@ THE SOFTWARE. namespace Capsaicin { -/** The GI1.0 renderer. */ -class GI10Renderer : public Renderer::Registrar +/** The GI-1.0 renderer. */ +class GI10Renderer + : public Renderer + , public RendererFactory::Registrar { public: - static constexpr std::string_view Name = "GI-1.0"; + static constexpr std::string_view Name = "GI-1.1"; /** Default constructor. */ - GI10Renderer() noexcept {} + GI10Renderer() noexcept {}; /** * Sets up the required render techniques. - * @param render_settings The current global render settings. + * @param renderOptions The current global render options. * @return A list of all required render techniques in the order that they are required. The calling * function takes all ownership of the returned list. */ std::vector> setupRenderTechniques( - RenderSettings const &render_settings) noexcept override + [[maybe_unused]] RenderOptionList const &renderOptions) noexcept override { std::vector> render_techniques; render_techniques.emplace_back(std::make_unique()); @@ -61,7 +63,5 @@ class GI10Renderer : public Renderer::Registrar render_techniques.emplace_back(std::make_unique()); return render_techniques; } - -private: }; } // namespace Capsaicin diff --git a/src/core/src/renderers/path_tracer/path_tracer.cpp b/src/core/src/renderers/reference_path_tracer/reference_path_tracer.cpp similarity index 75% rename from src/core/src/renderers/path_tracer/path_tracer.cpp rename to src/core/src/renderers/reference_path_tracer/reference_path_tracer.cpp index f1c0b30..cb3c3ff 100644 --- a/src/core/src/renderers/path_tracer/path_tracer.cpp +++ b/src/core/src/renderers/reference_path_tracer/reference_path_tracer.cpp @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -21,33 +21,37 @@ THE SOFTWARE. ********************************************************************/ #pragma once -#include "path_tracer/reference_pt.h" +#include "reference_path_tracer/reference_path_tracer.h" #include "renderer.h" #include "tone_mapping/tone_mapping.h" +#include "variance_estimate/variance_estimate.h" namespace Capsaicin { /** The path tracer renderer. */ -class PathTracer : public Renderer::Registrar +class ReferencePathTracer + : public Renderer + , public RendererFactory::Registrar { public: - static constexpr std::string_view Name = "Path Tracer"; + static constexpr std::string_view Name = "Reference Path Tracer"; - /** Default constructor. */ - PathTracer() noexcept {} + /** Constructor. */ + ReferencePathTracer() noexcept {}; /** * Sets up the required render techniques. - * @param render_settings The current global render settings. + * @param renderOptions The current global render options. * @return A list of all required render techniques in the order that they are required. The calling * function takes all ownership of the returned list. */ std::vector> setupRenderTechniques( - RenderSettings const &render_settings) noexcept override + [[maybe_unused]] RenderOptionList const &renderOptions) noexcept override { std::vector> render_techniques; render_techniques.emplace_back(std::make_unique()); render_techniques.emplace_back(std::make_unique()); + render_techniques.emplace_back(std::make_unique()); return render_techniques; } diff --git a/src/core/src/renderers/renderer.h b/src/core/src/renderers/renderer.h index 2b06d39..61e694e 100644 --- a/src/core/src/renderers/renderer.h +++ b/src/core/src/renderers/renderer.h @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -21,7 +21,6 @@ THE SOFTWARE. ********************************************************************/ #pragma once -#include "capsaicin.h" #include "factory.h" #include "render_technique.h" @@ -30,23 +29,24 @@ THE SOFTWARE. namespace Capsaicin { /** A abstract renderer class used to encapsulate all required operations to setup a rendering work flow. */ -class Renderer : public Factory +class Renderer { Renderer(Renderer const &) = delete; Renderer &operator=(Renderer const &) = delete; public: - Renderer(Key) noexcept {} + Renderer() noexcept = default; /** * Sets up the required render techniques. - * @param render_settings The current global render settings. + * @param renderOptions The current global render options. * @return A list of all required render techniques in the order that they are required. The calling * function takes all ownership of the returned list. */ virtual std::vector> setupRenderTechniques( - RenderSettings const &render_settings) noexcept = 0; - -private: + RenderOptionList const &renderOptions) noexcept = 0; }; + +class RendererFactory : public Factory +{}; } // namespace Capsaicin diff --git a/src/core/src/utilities/buffer_view.h b/src/core/src/utilities/buffer_view.h index 1db10f4..e832bbc 100644 --- a/src/core/src/utilities/buffer_view.h +++ b/src/core/src/utilities/buffer_view.h @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -21,7 +21,7 @@ THE SOFTWARE. ********************************************************************/ #pragma once -#include "gfx.h" +#include namespace Capsaicin { diff --git a/src/core/src/utilities/gpu_image_metrics.comp b/src/core/src/utilities/gpu_image_metrics.comp new file mode 100644 index 0000000..7009a46 --- /dev/null +++ b/src/core/src/utilities/gpu_image_metrics.comp @@ -0,0 +1,255 @@ +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ + +#include "../math/math.hlsl" +#include "../math/color.hlsl" + +uint2 g_ImageDimensions; + +#if defined(INPUT_MULTICHANNEL) +Texture2D g_SourceImage; +Texture2D g_ReferenceImage; +#else +Texture2D g_SourceImage; +Texture2D g_ReferenceImage; +#endif + +RWStructuredBuffer g_MetricBuffer; + +#define GROUP_SIZE 16 + +groupshared float lds[(GROUP_SIZE * GROUP_SIZE) / 16]; //Assume 16 as smallest possible wave size +groupshared uint ldsWrites; + +// Reduce sum template function +void BlockReduceSum(float value, uint2 did, uint gtid, uint2 gid) +{ + // Combine values across the wave + value = WaveActiveSum(value); + + // Combine values across the group + const uint groupSize = GROUP_SIZE * GROUP_SIZE; + for (uint j = WaveGetLaneCount(); j < groupSize; j *= WaveGetLaneCount()) + { + // Since we work on square tiles its possible that some waves don't write to lds as they have no valid pixels + // To ensure that invalid values arent read from lds we use an atomic to count the actual lds writes + if (gtid == 0) + { + // Clear atomic + InterlockedAnd(ldsWrites, 0); + } + GroupMemoryBarrierWithGroupSync(); + + // Use local data share to combine across waves + if (WaveIsFirstLane()) + { + uint waveID; + InterlockedAdd(ldsWrites, 1, waveID); + lds[waveID] = value; + + } + GroupMemoryBarrierWithGroupSync(); + + uint numWaves = ldsWrites; + if (gtid >= numWaves) + { + break; + } + + // Use the current wave to combine across group + value = lds[gtid]; + value = WaveActiveSum(value); + } + + // Write out final result + if (gtid == 0) + { + const uint blockCount = (g_ImageDimensions.x + GROUP_SIZE - 1) / GROUP_SIZE; + const uint blockIndex = gid.x + gid.y * blockCount; + g_MetricBuffer[blockIndex] = value; + } +} + +float2 GetImageValues(uint2 pixel) +{ +#if defined(INPUT_MULTICHANNEL) + float3 inputSource = g_SourceImage[pixel].xyz; + float3 referenceSource = g_ReferenceImage[pixel].xyz; +# if !defined(INPUT_HDR) + // Most image comparison uses luma instead of luminance which requires converting to gamma corrected values + // Textures containing sRGB wil get auto linearized when reading so must manually convert back + inputSource = convertToSRGB(inputSource); + referenceSource = convertToSRGB(referenceSource); +# endif + // Although MSE etc. can operate on RGB values by just extending the sum to include all 3 channels, + // this has been shown to be inaccurate when compared to measured perceptual results. As such we + // instead operate on luma + float input = luminance(inputSource); + float reference = luminance(referenceSource); +#else + float input = g_SourceImage[pixel].x; + float reference = g_ReferenceImage[pixel].x; +# if !defined(INPUT_HDR) && defined(INPUT_LINEAR) + input = select(input < 0.0031308f, 12.92f * input, 1.055f * pow(abs(input), 1.0f / 2.4f) - 0.055f); + reference = select(reference < 0.0031308f, 12.92f * reference, 1.055f * pow(abs(reference), 1.0f / 2.4f) - 0.055f); +# endif +#endif +#if defined(INPUT_HDR) + // Standard MSE metrics dont work well with HDR data + // As such the input values need to be converted to non-linear perceptual values using a conversion metric + // Here we use the ITU Rec2100 Perceptual Quantizer (PQ) transfer function + input = decodePQEOTF(input); + reference = decodePQEOTF(reference); +#endif + return float2(input, reference); +} + +[numthreads(GROUP_SIZE, GROUP_SIZE, 1)] +void ComputeMetric(uint2 did : SV_DispatchThreadID, uint gtid : SV_GroupIndex, uint2 gid : SV_GroupID) +{ + if (any(did >= g_ImageDimensions)) + { + return; + } + + +#ifdef CALCULATE_SSIM + // Each pixel samples from a 11x11 window centered around the pixel. Each sample is weighted by + // a Gaussian with sigma=1.5 + // Note: The Gaussian is symmetric so most of these weight are duplicates + const float gaussianWeights[11][11]={ + {1.057565598e-06, 7.814411533e-06, 3.702247708e-05, 0.0001124643551, 0.0002190506529, 0.0002735611601, 0.0002190506529, 0.0001124643551, 3.702247708e-05, 7.814411533e-06, 1.057565598e-06}, + {7.814411533e-06, 5.77411252e-05, 0.0002735611601, 0.0008310054291, 0.001618577563, 0.002021358758, 0.001618577563, 0.0008310054291, 0.0002735611601, 5.77411252e-05, 7.814411533e-06}, + {3.702247708e-05, 0.0002735611601, 0.001296055594, 0.003937069263, 0.007668363825, 0.00957662749, 0.007668363825, 0.003937069263, 0.001296055594, 0.0002735611601, 3.702247708e-05}, + {0.0001124643551, 0.0008310054291, 0.003937069263, 0.01195976041, 0.02329443247, 0.02909122565, 0.02329443247, 0.01195976041, 0.003937069263, 0.0008310054291, 0.0001124643551}, + {0.0002190506529, 0.001618577563, 0.007668363825, 0.02329443247, 0.0453713591, 0.05666197049, 0.0453713591, 0.02329443247, 0.007668363825, 0.001618577563, 0.0002190506529}, + {0.0002735611601, 0.002021358758, 0.00957662749, 0.02909122565, 0.05666197049, 0.07076223776, 0.05666197049, 0.02909122565, 0.00957662749, 0.002021358758, 0.0002735611601}, + {0.0002190506529, 0.001618577563, 0.007668363825, 0.02329443247, 0.0453713591, 0.05666197049, 0.0453713591, 0.02329443247, 0.007668363825, 0.001618577563, 0.0002190506529}, + {0.0001124643551, 0.0008310054291, 0.003937069263, 0.01195976041, 0.02329443247, 0.02909122565, 0.02329443247, 0.01195976041, 0.003937069263, 0.0008310054291, 0.0001124643551}, + {3.702247708e-05, 0.0002735611601, 0.001296055594, 0.003937069263, 0.007668363825, 0.00957662749, 0.007668363825, 0.003937069263, 0.001296055594, 0.0002735611601, 3.702247708e-05}, + {7.814411533e-06, 5.77411252e-05, 0.0002735611601, 0.0008310054291, 0.001618577563, 0.002021358758, 0.001618577563, 0.0008310054291, 0.0002735611601, 5.77411252e-05, 7.814411533e-06}, + {1.057565598e-06, 7.814411533e-06, 3.702247708e-05, 0.0001124643551, 0.0002190506529, 0.0002735611601, 0.0002190506529, 0.0001124643551, 3.702247708e-05, 7.814411533e-06, 1.057565598e-06}}; + const float sumSquaredWeights = 0.0353944717; // The sum of the squared weights needed for weighted variance + + const int offset=5; + float width, height; + g_SourceImage.GetDimensions(width, height); + + // Note: The current approach is much slower than it could be as each window recalculates pixel values that + // are shared with neighboring windows. Splitting this into separate passes will improve performance but we + // instead go for simplicity as we are not concerned with performance here as image metrics are expected to be + // performed outside of benchmark runs. + + // Calculate the weighted pixel mean over the window as the sum of each weighted pixel + // MeanImg = Sum(Img.x.y * Weight.x.y) / Sum(Weight.x.y) + // The Gaussian weights are normalised so the total weight is 1 + float sampleMeanInput = 0.0f; + float sampleMeanReference = 0.0f; + // Clamp window to avoid going over image edges + uint minX = (uint)max(offset - (int)did.x, 0); + uint maxX = (uint)min(offset + (int)width - (int)did.x, 11); + uint minY = (uint)max(offset - (int)did.y, 0); + uint maxY = (uint)min(offset + (int)height - (int)did.y, 11); + for(int x = minX; x < maxX; ++x) + { + for(int y = minY; y < maxY; ++y) + { + uint2 pixels = uint2(did.x + x - offset, did.y + y - offset); + float2 values = GetImageValues(pixels); + float input = values.x; + float reference = values.y; + float pixelWeight = gaussianWeights[x][y]; + sampleMeanInput += input * pixelWeight; + sampleMeanReference += reference * pixelWeight; + } + } + + // Calculate the weighted unbiased variance and cross-correlation + // VarianceImg = Sum([Img.x.y - MeanImg]^2 * Weight.x.y) / (1 - Sum(Weight.x.y^2)) + // CrossCorrelation = Sum([Img.x.y - MeanImg] * [Img2.x.y - MeanImg2] * Weight.x.y) / (1 - Sum(Weight.x.y^2)) + // Note: We don't use two-pass method for variance due to precision error considerations. + float varianceInput = 0.0f; + float varianceReference = 0.0f; + float crossCorrelation = 0.0f; + for(int x = minX; x < maxX; ++x) + { + for(int y = minY; y < maxY; ++y) + { + uint2 pixels = uint2(did.x + x - offset, did.y + y - offset); + float2 values = GetImageValues(pixels); + float input = values.x; + float reference = values.y; + float pixelWeight = gaussianWeights[x][y]; + float inputSq = input - sampleMeanInput; + float referenceSq = reference - sampleMeanReference; + varianceInput += inputSq * inputSq * pixelWeight; + varianceReference += referenceSq * referenceSq * pixelWeight; + crossCorrelation += inputSq * referenceSq * pixelWeight; + } + } + varianceInput /= 1.0f - sumSquaredWeights; + varianceReference /= 1.0f - sumSquaredWeights; + crossCorrelation /= 1.0f - sumSquaredWeights; + // Clamp negative variance + varianceInput = max(varianceInput, 0); + varianceReference = max(varianceReference, 0); + + // Calculate final SSIM + // SSIM = (2 * MeanImg * MeanImg2 + c1) * (2 * CrossCorrelation + c2) + // / + // (MeanImg^2 + MeanImg2^2 + c1) * (VarianceImg + VarianceImg2 + c2) + // where + // c1 = (k1 * L)^2, c2 = (k2 * L), k1 = 0.01, k2 = 0.03 + // Input range is [0,1] due to PQ-luma so L=1 + const float k1 = 0.01f; + const float k2 = 0.03f; + const float c1 = k1 * k1; + const float c2 = k2 * k2; + float value1 = (2.0f * sampleMeanInput * sampleMeanReference) + c1; + float value2 = (2.0f * crossCorrelation) + c2; + float divisor1 = (sampleMeanInput * sampleMeanInput) + (sampleMeanReference * sampleMeanReference) + c1; + float divisor2 = varianceInput + varianceReference + c2; + float value = value1 * value2; + value /= divisor1 * divisor2; +#else + float2 values = GetImageValues(did); + float input = values.x; + float reference = values.y; + // MSE = [1/(width*height)]Sum([Ref.x.y - Src.x.y]^2) + // RMSE = sqrt(MSE) + // PSNR = 20log10(MaxValue) - 10log10(MSE) + // RMAE = [1/(width*height)]Sum(Abs(Src.x.y - Ref.x.y)/Ref.x.y) + // SMAPE = [100/(width*height)]Sum(Abs(Ref.x.y - Src.x.y)/([abs(Ref.x.y)+Abs(Src.x.y)]/2) + float value = reference - input; +# ifdef CALCULATE_RMAE + value = abs(value) / reference; + value = (reference != 0) ? value : 0.0f; +# elif defined(CALCULATE_SMAPE) + float divisor = (abs(reference) + abs(input)) / 2.0f; + value = abs(value) / divisor; + value = (divisor != 0) ? value : 0.0f; +# else + value *= value; +# endif +#endif + BlockReduceSum(value, did, gtid, gid); +} diff --git a/src/core/src/utilities/gpu_image_metrics.cpp b/src/core/src/utilities/gpu_image_metrics.cpp new file mode 100644 index 0000000..aac0b3c --- /dev/null +++ b/src/core/src/utilities/gpu_image_metrics.cpp @@ -0,0 +1,302 @@ +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ + +#include "gpu_image_metrics.h" + +#include "capsaicin_internal.h" + +namespace Capsaicin +{ +GPUImageMetrics::~GPUImageMetrics() noexcept +{ + terminate(); +} + +bool GPUImageMetrics::initialise( + GfxContext gfxIn, std::string_view const &shaderPath, Type type, Operation operation) noexcept +{ + gfx = gfxIn; + + if (type != currentType || operation != currentOperation) + { + // If configuration has changed then need to recompile kernels + gfxDestroyProgram(gfx, metricsProgram); + metricsProgram = {}; + gfxDestroyKernel(gfx, metricsKernel); + metricsKernel = {}; + + if (!metricBufferTemp.empty()) + { + for (auto &i : metricBufferTemp) + { + i.first = 0.0f; + } + } + } + currentType = type; + currentOperation = operation; + + static const std::array typeName = {"MSE", "RMSE", "PSNR", "RMAE", "SMAPE", "SSIM"}; + + if (metricBufferTemp.empty()) + { + const uint32_t backBufferCount = getAsyncDelay(); + metricBufferTemp.reserve(backBufferCount); + for (uint32_t i = 0; i < backBufferCount; ++i) + { + GfxBuffer buffer = gfxCreateBuffer(gfx, 1, nullptr, kGfxCpuAccess_Read); + std::string name = "GPUImageMetrics_"; + name += typeName[static_cast(operation)].data(); + name += "Buffer"; + name += std::to_string(i); + buffer.setName(name.c_str()); + metricBufferTemp.emplace_back(0.0f, buffer); + } + } + else + { + // Invalidate current values + for (auto &i : metricBufferTemp) + { + i.first = 0.0f; + } + } + + if (!metricBuffer) + { + metricBuffer = gfxCreateBuffer(gfx, 8160 /*required @ 1080p*/); + std::string name = "GPUImageMetrics_Metrics"; + name += typeName[static_cast(operation)].data(); + name += "Buffer"; + metricBuffer.setName(name.c_str()); + } + + if (!metricsProgram) + { + gfxDestroyProgram(gfx, metricsProgram); + gfxDestroyKernel(gfx, metricsKernel); + metricsProgram = gfxCreateProgram(gfx, "utilities/gpu_image_metrics", shaderPath.data()); + std::vector baseDefines; + if (currentType == Type::HDR_RGB || currentType == Type::SDR_RGB || currentType == Type::SDR_SRGB) + { + baseDefines.push_back("INPUT_MULTICHANNEL"); + } + if (currentType == Type::HDR_RGB || currentType == Type::HDR) + { + baseDefines.push_back("INPUT_HDR"); + } + if (currentType != Type::SDR_NONLINEAR && currentType != Type::SDR_SRGB) + { + baseDefines.push_back("INPUT_LINEAR"); + } + if (currentOperation == Operation::PSNR) + { + baseDefines.push_back("CALCULATE_PSNR"); + } + else if (currentOperation == Operation::MSE) + { + baseDefines.push_back("CALCULATE_MSE"); + } + else if (currentOperation == Operation::RMSE) + { + baseDefines.push_back("CALCULATE_RMSE"); + } + else if (currentOperation == Operation::RMAE) + { + baseDefines.push_back("CALCULATE_RMAE"); + } + else if (currentOperation == Operation::SMAPE) + { + baseDefines.push_back("CALCULATE_SMAPE"); + } + else if (currentOperation == Operation::SSIM) + { + baseDefines.push_back("CALCULATE_SSIM"); + } + metricsKernel = gfxCreateComputeKernel(gfx, metricsProgram, "ComputeMetric", baseDefines.data(), + static_cast(baseDefines.size())); + } + if (!reducer.initialise(gfx, shaderPath, GPUReduce::Type::Float, GPUReduce::Operation::Sum)) + { + return false; + } + return !!metricsKernel; +} + +bool GPUImageMetrics::initialise(CapsaicinInternal const &capsaicin, Type type, Operation operation) noexcept +{ + return initialise(capsaicin.getGfx(), capsaicin.getShaderPath(), type, operation); +} + +bool GPUImageMetrics::compare(GfxTexture const &sourceImage, GfxTexture const &referenceImage) noexcept +{ + if (!compareInternal(sourceImage, referenceImage)) + { + return false; + } + + gfxCommandCopyBuffer(gfx, metricBufferTemp[0].second, 0, metricBuffer, 0, sizeof(float)); + // Force the operation to complete and then read back to CPU + gfxFinish(gfx); + auto const newValue = *gfxBufferGetData(gfx, metricBufferTemp[0].second); + currentValue = convertMetric(newValue, referenceImage.getWidth() * referenceImage.getHeight()); + return true; +} + +bool GPUImageMetrics::compareAsync(GfxTexture const &sourceImage, GfxTexture const &referenceImage) noexcept +{ + if (!compareInternal(sourceImage, referenceImage)) + { + return false; + } + + // Stream the result back to the CPU + const uint32_t bufferIndex = gfxGetBackBufferIndex(gfx); + if (metricBufferTemp[bufferIndex].first != 0.0f) + { + auto const newValue = *gfxBufferGetData(gfx, metricBufferTemp[bufferIndex].second); + currentValue = convertMetric(newValue, referenceImage.getWidth() * referenceImage.getHeight()); + } + + // Begin copy of new value (will take 'bufferIndex' number of frames to become valid) + gfxCommandCopyBuffer(gfx, metricBufferTemp[bufferIndex].second, 0, metricBuffer, 0, sizeof(float)); + metricBufferTemp[bufferIndex].first = currentValue; + return true; +} + +float GPUImageMetrics::getMetricValue() const noexcept +{ + return currentValue; +} + +uint32_t GPUImageMetrics::getAsyncDelay() const noexcept +{ + return gfxGetBackBufferCount(gfx); +} + +void GPUImageMetrics::terminate() noexcept +{ + gfxDestroyBuffer(gfx, metricBuffer); + metricBuffer = {}; + for (auto &i : metricBufferTemp) + { + gfxDestroyBuffer(gfx, i.second); + i.second = {}; + } + metricBufferTemp.clear(); + + gfxDestroyProgram(gfx, metricsProgram); + metricsProgram = {}; + gfxDestroyKernel(gfx, metricsKernel); + metricsKernel = {}; +} + +bool GPUImageMetrics::compareInternal( + GfxTexture const &sourceImage, GfxTexture const &referenceImage) noexcept +{ + if ((sourceImage.getWidth() != referenceImage.getWidth() && sourceImage.getWidth() != 0) + || (sourceImage.getHeight() != referenceImage.getHeight() && sourceImage.getHeight() != 0)) + { + return false; + } + + uint32_t const dimensions[] = {referenceImage.getWidth(), referenceImage.getHeight()}; + uint32_t const *numThreads = gfxKernelGetNumThreads(gfx, metricsKernel); + uint32_t const numGroupsX = (dimensions[0] + numThreads[0] - 1) / numThreads[0]; + uint32_t const numGroupsY = (dimensions[1] + numThreads[1] - 1) / numThreads[1]; + uint32_t const numOutputValues = numGroupsX * numGroupsY; + + if (metricBuffer.getCount() < numOutputValues) + { + std::string bufferName = metricBuffer.getName(); + gfxDestroyBuffer(gfx, metricBuffer); + metricBuffer = gfxCreateBuffer(gfx, numOutputValues); + metricBuffer.setName(bufferName.c_str()); + } + + gfxProgramSetParameter(gfx, metricsProgram, "g_ImageDimensions", dimensions); + + gfxProgramSetParameter(gfx, metricsProgram, "g_SourceImage", sourceImage); + gfxProgramSetParameter(gfx, metricsProgram, "g_ReferenceImage", referenceImage); + + gfxProgramSetParameter(gfx, metricsProgram, "g_MetricBuffer", metricBuffer); + + // Compute the metric + { + gfxCommandBindKernel(gfx, metricsKernel); + gfxCommandDispatch(gfx, numGroupsX, numGroupsY, 1); + } + + // Reduce to single value + if (numOutputValues > 1) + { + if (!reducer.reduce(metricBuffer, numOutputValues)) + { + return false; + } + } + + return true; +} + +float GPUImageMetrics::convertMetric(float value, uint32_t totalSamples) const noexcept +{ + double const totalPixels = static_cast(totalSamples); + double ret = (double)value / totalPixels; + switch (currentOperation) + { + case Operation::MSE: + // MSE = [1/(width*height)]Sum([Ref.x.y - Src.x.y]^2) + break; + case Operation::RMSE: + // RMSE = sqrt(MSE) + ret = sqrt(ret); + break; + case Operation::PSNR: + // PSNR = 20log10(MaxValue) - 10log10(MSE) + if (currentType == Type::HDR || currentType == Type::HDR_RGB) + { + // MaxValue is set as 1.0f as we assume always using normalised float values + ret = -10.0 * log10(ret); + } + else + { + // MaxValue is set as 255 for 8bit values + ret = 48.13080361 - 10.0 * log10(ret); + } + break; + case Operation::RMAE: + // RMAE = [1/(width*height)]Sum(Abs(Src.x.y - Ref.x.y)/Ref.x.y) + break; + case Operation::SMAPE: + // SMAPE = [100/(width*height)]Sum(Abs(Ref.x.y - Src.x.y)/([abs(Ref.x.y)+Abs(Src.x.y)]/2) + ret *= 100.0; + break; + case Operation::SSIM: + // SSIM =[1/(width*height)]Sum(SSIM(x,y)) + break; + default: break; + } + return static_cast(ret); +} + +} // namespace Capsaicin diff --git a/src/core/src/utilities/gpu_image_metrics.h b/src/core/src/utilities/gpu_image_metrics.h new file mode 100644 index 0000000..631deaa --- /dev/null +++ b/src/core/src/utilities/gpu_image_metrics.h @@ -0,0 +1,136 @@ +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ +#pragma once + +#include "gpu_reduce.h" +#include "gpu_shared.h" + +namespace Capsaicin +{ +class CapsaicinInternal; + +class GPUImageMetrics +{ +public: + GPUImageMetrics() noexcept = default; + + ~GPUImageMetrics() noexcept; + + /** Type of image values. */ + enum class Type : uint32_t + { + HDR = 0, /**< HDR linear float grayscale/luminance values */ + HDR_RGB, /**< HDR linear float RGB values */ + SDR, /**< SDR linear float values */ + SDR_RGB, /**< SDR linear float RGB values */ + SDR_NONLINEAR, /**< SDR gamma corrected float values */ + SDR_SRGB, /**< SDR gamma corrected sRGB float values */ + }; + + /** Type of comparison operation to perform. */ + enum class Operation + { + MSE, /**< Mean Squared Error */ + RMSE, /**< Root Mean Squared Error */ + PSNR, /**< Peak Signal to noise ratio */ + RMAE, /**< Relative Mean Absolute Error */ + SMAPE, /**< Symmetric Mean Absolute Percentage Error */ + SSIM, /**< Structural Similarity */ + }; + + /** + * Initialise the internal data based on current configuration. + * @param gfx Active gfx context. + * @param shaderPath Path to shader files based on current working directory. + * @param type The object type to reduce. + * @param operation The type of operation to perform. + * @return True, if any initialisation/changes succeeded. + */ + bool initialise( + GfxContext gfx, std::string_view const &shaderPath, Type type, Operation operation) noexcept; + + /** + * Initialise the internal data based on current configuration. + * @param capsaicin Current framework context. + * @param type The type of data in the images. + * @param operation The type of operation to perform. + * @return True, if any initialisation/changes succeeded. + */ + bool initialise(CapsaicinInternal const &capsaicin, Type type, Operation operation) noexcept; + + /** + * Generate comparison metrics for 2 different images. + * @note This will flush the current GPU pipeline so only call this function is no other work is to be + * performed. Otherwise use 'compareAsync'. + * @param sourceImage The input image to compare. + * @param referenceImage The reference image to compare to. + * @returns True, if operation succeeded. + */ + bool compare(GfxTexture const &sourceImage, GfxTexture const &referenceImage) noexcept; + + /** + * Asynchronously generate comparison metrics for 2 different images. + * Used if calculating metrics for multiple frames as it allows many frames in flight at a time. + * @param sourceImage The input image to compare. + * @param referenceImage The reference image to compare to. + * @returns True, if operation succeeded. + */ + bool compareAsync(GfxTexture const &sourceImage, GfxTexture const &referenceImage) noexcept; + + /** + * Read back the value of the most recent calculated metric. + * @note When using 'compareAsync' there will be a delay before the final value is available. This delay + * can be retrieved using 'getAsyncDelay'. + * @returns The calculate metric value, Zero if no value is available. + */ + float getMetricValue() const noexcept; + + /** + * Get the number of frames of delay there is when using 'compareAsync'. + * @returns The number of frames worth of delay. + */ + uint32_t getAsyncDelay() const noexcept; + +private: + /** Terminates and cleans up this object. */ + void terminate() noexcept; + + bool compareInternal(GfxTexture const &sourceImage, GfxTexture const &referenceImage) noexcept; + + float convertMetric(float value, uint32_t totalSamples) const noexcept; + + GfxContext gfx; + + Type currentType = Type::HDR_RGB; + Operation currentOperation = Operation::RMSE; + + GfxBuffer metricBuffer; /**< Buffer used to hold calculated metric */ + std::vector> + metricBufferTemp; /**< Buffer used to copy back calculated metric into CPU memory */ + float currentValue = 1.0f; /**< Most recent calculated metric value */ + + GfxProgram metricsProgram; + GfxKernel metricsKernel; + + GPUReduce reducer; +}; +} // namespace Capsaicin diff --git a/src/core/src/utilities/gpu_reduce.comp b/src/core/src/utilities/gpu_reduce.comp index 88bb5fc..bbc0be8 100644 --- a/src/core/src/utilities/gpu_reduce.comp +++ b/src/core/src/utilities/gpu_reduce.comp @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -76,68 +76,78 @@ TYPE CombineWave(TYPE a) } #endif -groupshared TYPE lds[GROUP_SIZE]; +groupshared TYPE lds[GROUP_SIZE / 16]; //Assume 16 as smallest possible wave size // Reduce sum template function -void BlockReduceType(uint gidx, uint lidx, uint bidx, uint count) +void BlockReduceType(uint gtid, uint gid, uint count) { - const uint ThreadGroupSize = GROUP_SIZE; - - // Loop through each input key and combine - uint index = gidx + (bidx * ((KEYS_PER_THREAD - 1) * GROUP_SIZE)); - TYPE result = g_InputBuffer[index]; - index += GROUP_SIZE; + // Loop through each input key and combine + const uint prefixGroupKeys = gid * KEYS_PER_THREAD * GROUP_SIZE; + uint index = gtid + prefixGroupKeys; + if (index >= count) + { + return; + } + TYPE result = g_InputBuffer[index];; for (uint i = 1; i < KEYS_PER_THREAD; i++) { + index += GROUP_SIZE; if (index >= count) { break; } - Combine(result, g_InputBuffer[index]); + TYPE value = g_InputBuffer[index]; + result = Combine(result, value); } - // Combine values across the wave + // Combine values across the wave result = CombineWave(result); - // Combine values across the group - for (uint j = WaveGetLaneCount(); j < ThreadGroupSize; j *= WaveGetLaneCount()) + // Combine values across the group + const uint keysInGroup = count - prefixGroupKeys; + const uint validThreadsInGroup = min(keysInGroup, GROUP_SIZE); + for (uint j = validThreadsInGroup; j > WaveGetLaneCount();) { - // Use local data share to combine across waves - lds[lidx] = result; + // Use local data share to combine across waves + if (WaveIsFirstLane()) + { + const uint waveID = gtid / WaveGetLaneCount(); + lds[waveID] = result; + } GroupMemoryBarrierWithGroupSync(); - uint numWaves = (ThreadGroupSize + j - 1) / j; - if (lidx >= numWaves) + j = (j + WaveGetLaneCount() - 1) / WaveGetLaneCount(); + if (gtid >= j) { break; } - // Use the current wave to combine across group - result = lds[lidx * WaveGetLaneCount()]; + // Use the current wave to combine across group + result = lds[gtid]; result = CombineWave(result); } - // Write out final result - if (lidx == 0) + // Write out final result + if (gtid == 0) { - g_OutputBuffer[bidx] = result; + g_OutputBuffer[gid] = result; } } uint g_Count; [numthreads(GROUP_SIZE, 1, 1)] -void BlockReduce(in uint gidx : SV_DispatchThreadID, in uint lidx : SV_GroupThreadID, in uint bidx : SV_GroupID) +void BlockReduce(uint gtid : SV_GroupThreadID, uint gid : SV_GroupID) { - BlockReduceType(gidx, lidx, bidx, g_Count); + BlockReduceType(gtid, gid, g_Count); } StructuredBuffer g_InputLength; [numthreads(GROUP_SIZE, 1, 1)] -void BlockReduceIndirect(in uint gidx : SV_DispatchThreadID, in uint lidx : SV_GroupThreadID, in uint bidx : SV_GroupID) +void BlockReduceIndirect(uint gtid : SV_GroupThreadID, uint gid : SV_GroupID) { - BlockReduceType(gidx, lidx, bidx, g_InputLength[0]); + BlockReduceType(gtid, gid, g_InputLength[0]); } RWStructuredBuffer g_Dispatch1; diff --git a/src/core/src/utilities/gpu_reduce.cpp b/src/core/src/utilities/gpu_reduce.cpp index 69f7446..5dcd6cd 100644 --- a/src/core/src/utilities/gpu_reduce.cpp +++ b/src/core/src/utilities/gpu_reduce.cpp @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -31,11 +31,11 @@ GPUReduce::~GPUReduce() noexcept terminate(); } -bool GPUReduce::initialise(CapsaicinInternal const &capsaicin, Type type, Operation operation) noexcept +bool GPUReduce::initialise( + GfxContext gfxIn, std::string_view const &shaderPath, Type type, Operation operation) noexcept { - gfx = capsaicin.getGfx(); + gfx = gfxIn; - bool ret = false; if (!indirectBuffer) { // Free just in case @@ -52,7 +52,6 @@ bool GPUReduce::initialise(CapsaicinInternal const &capsaicin, Type type, Operat indirectCountBuffer.setName("Capsaicin_Reduce_IndirectCountBuffer"); indirectCountBuffer2 = gfxCreateBuffer(gfx, 1); indirectCountBuffer2.setName("Capsaicin_Reduce_IndirectCountBuffer2"); - ret = true; } if (type != currentType || operation != currentOperation) @@ -71,7 +70,7 @@ bool GPUReduce::initialise(CapsaicinInternal const &capsaicin, Type type, Operat gfxDestroyKernel(gfx, reduceKernel); gfxDestroyKernel(gfx, reduceIndirectKernel); gfxDestroyKernel(gfx, dispatchIndirectKernel); - reduceProgram = gfxCreateProgram(gfx, "utilities/gpu_reduce", capsaicin.getShaderPath()); + reduceProgram = gfxCreateProgram(gfx, "utilities/gpu_reduce", shaderPath.data()); std::vector baseDefines; switch (currentType) { @@ -87,6 +86,10 @@ bool GPUReduce::initialise(CapsaicinInternal const &capsaicin, Type type, Operat case Type::Int2: baseDefines.push_back("TYPE=int2"); break; case Type::Int3: baseDefines.push_back("TYPE=int3"); break; case Type::Int4: baseDefines.push_back("TYPE=int4"); break; + case Type::Double: baseDefines.push_back("TYPE=double"); break; + case Type::Double2: baseDefines.push_back("TYPE=double2"); break; + case Type::Double3: baseDefines.push_back("TYPE=double3"); break; + case Type::Double4: baseDefines.push_back("TYPE=double4"); break; default: break; } switch (currentOperation) @@ -102,10 +105,14 @@ bool GPUReduce::initialise(CapsaicinInternal const &capsaicin, Type type, Operat baseDefines.data(), static_cast(baseDefines.size())); dispatchIndirectKernel = gfxCreateComputeKernel(gfx, reduceProgram, "GenerateDispatches", baseDefines.data(), static_cast(baseDefines.size())); - ret = true; } - return ret; + return !!dispatchIndirectKernel; +} + +bool GPUReduce::initialise(CapsaicinInternal const &capsaicin, Type type, Operation operation) noexcept +{ + return initialise(capsaicin.getGfx(), capsaicin.getShaderPath(), type, operation); } void GPUReduce::terminate() noexcept @@ -149,9 +156,12 @@ bool GPUReduce::reduceInternal( bool indirect = (numKeys != nullptr); // Calculate number of loops - uint32_t const *numThreads = gfxKernelGetNumThreads(gfx, reduceKernel); - const uint32_t numGroups1 = (maxNumKeys + numThreads[0] - 1) / numThreads[0]; - const uint32_t numGroups2 = (numGroups1 + numThreads[0] - 1) / numThreads[0]; + uint32_t const *numThreads = gfxKernelGetNumThreads(gfx, reduceKernel); + const uint groupSize = numThreads[0]; + const uint keysPerThread = 4; // Must match KEYS_PER_THREAD in shader + const uint keysPerGroup = groupSize * keysPerThread; + const uint32_t numGroups1 = (maxNumKeys + keysPerGroup - 1) / keysPerGroup; + const uint32_t numGroups2 = (numGroups1 + keysPerGroup - 1) / keysPerGroup; if (numGroups2 > numThreads[0]) { // To many keys as we only support 2 loops @@ -161,6 +171,7 @@ bool GPUReduce::reduceInternal( if (indirect) { // Call indirect setup kernel + gfxProgramSetParameter(gfx, reduceProgram, "g_InputLength", *numKeys); gfxProgramSetParameter(gfx, reduceProgram, "g_Dispatch1", indirectBuffer); gfxProgramSetParameter(gfx, reduceProgram, "g_Dispatch2", indirectBuffer2); gfxProgramSetParameter(gfx, reduceProgram, "g_InputLength1", indirectCountBuffer); @@ -183,11 +194,11 @@ bool GPUReduce::reduceInternal( { gfxProgramSetParameter(gfx, reduceProgram, "g_Count", maxNumKeys); } - if (numGroups1 > 1 && numGroups2 > 1) + if (numGroups1 > 1) { // Create scratch buffer needed for loops - const uint64_t typeSize = ((uint32_t)currentType % 4) * sizeof(float); - const uint64_t scratchBufferSize = maxNumKeys * typeSize; + const uint64_t typeSize = (((uint64_t)currentType % 4) + 1) * (currentType >= Type::Double ? sizeof(double) : sizeof(float)); + const uint64_t scratchBufferSize = numGroups1 * typeSize; if (!scratchBuffer || (scratchBuffer.getSize() < scratchBufferSize)) { gfxDestroyBuffer(gfx, scratchBuffer); diff --git a/src/core/src/utilities/gpu_reduce.h b/src/core/src/utilities/gpu_reduce.h index f487b43..515de92 100644 --- a/src/core/src/utilities/gpu_reduce.h +++ b/src/core/src/utilities/gpu_reduce.h @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -23,6 +23,8 @@ THE SOFTWARE. #include "gpu_shared.h" +#include + namespace Capsaicin { class CapsaicinInternal; @@ -52,6 +54,10 @@ class GPUReduce Int2, Int3, Int4, + Double, + Double2, + Double3, + Double4, }; /** Type of reduce operation to perform. */ @@ -63,12 +69,23 @@ class GPUReduce Product, }; + /** + * Initialise the internal data based on current configuration. + * @param gfx Active gfx context. + * @param shaderPath Path to shader files based on current working directory. + * @param type The object type to reduce. + * @param operation The type of operation to perform. + * @return True, if any initialisation/changes succeeded. + */ + bool initialise( + GfxContext gfx, std::string_view const &shaderPath, Type type, Operation operation) noexcept; + /** * Initialise the internal data based on current configuration. * @param capsaicin Current framework context. * @param type The object type to reduce. * @param operation The type of operation to perform. - * @return True, if any initialisation/changes actually where needed. + * @return True, if any initialisation/changes succeeded. */ bool initialise(CapsaicinInternal const &capsaicin, Type type, Operation operation) noexcept; diff --git a/src/core/src/utilities/gpu_sort.comp b/src/core/src/utilities/gpu_sort.comp index 5b2b2d3..b21bcf9 100644 --- a/src/core/src/utilities/gpu_sort.comp +++ b/src/core/src/utilities/gpu_sort.comp @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -72,14 +72,14 @@ void setupIndirectParameters(uint localID : SV_GroupThreadID) [numthreads(FFX_PARALLELSORT_THREADGROUP_SIZE, 1, 1)] void count(uint localID : SV_GroupThreadID, uint groupID : SV_GroupID) { - // Call the uint version of the count part of the algorithm + // Call the uint version of the count part of the algorithm FFX_ParallelSort_Count_uint(localID, groupID, CBuffer[0], CShiftBit, SrcBuffer, SumTable); } [numthreads(FFX_PARALLELSORT_THREADGROUP_SIZE, 1, 1)] void countReduce(uint localID : SV_GroupThreadID, uint groupID : SV_GroupID) { - // Call the reduce part of the algorithm + // Call the reduce part of the algorithm FFX_ParallelSort_ReduceCount(localID, groupID, CBuffer[0], SumTable, ReduceTable); } @@ -88,24 +88,24 @@ void scan(uint localID : SV_GroupThreadID, uint groupID : SV_GroupID) { uint BaseIndex = FFX_PARALLELSORT_ELEMENTS_PER_THREAD * FFX_PARALLELSORT_THREADGROUP_SIZE * groupID; FFX_ParallelSort_ScanPrefix(CBuffer[0].NumScanValues, localID, groupID, 0, BaseIndex, false, - CBuffer[0], ScanSrc, ScanDst, ScanScratch); + CBuffer[0], ScanSrc, ScanDst, ScanScratch); } [numthreads(FFX_PARALLELSORT_THREADGROUP_SIZE, 1, 1)] void scanAdd(uint localID : SV_GroupThreadID, uint groupID : SV_GroupID) { - // When doing adds, we need to access data differently because reduce - // has a more specialized access pattern to match optimized count - // Access needs to be done similarly to reduce - // Figure out what bin data we are reducing + // When doing adds, we need to access data differently because reduce + // has a more specialized access pattern to match optimized count + // Access needs to be done similarly to reduce + // Figure out what bin data we are reducing uint BinID = groupID / CBuffer[0].NumReduceThreadgroupPerBin; uint BinOffset = BinID * CBuffer[0].NumThreadGroups; - // Get the base index for this thread group + // Get the base index for this thread group uint BaseIndex = (groupID % CBuffer[0].NumReduceThreadgroupPerBin) * FFX_PARALLELSORT_ELEMENTS_PER_THREAD * FFX_PARALLELSORT_THREADGROUP_SIZE; FFX_ParallelSort_ScanPrefix(CBuffer[0].NumThreadGroups, localID, groupID, BinOffset, BaseIndex, true, - CBuffer[0], ScanSrc, ScanDst, ScanScratch); + CBuffer[0], ScanSrc, ScanDst, ScanScratch); } [numthreads(FFX_PARALLELSORT_THREADGROUP_SIZE, 1, 1)] @@ -117,5 +117,12 @@ void scatter(uint localID : SV_GroupThreadID, uint groupID : SV_GroupID) [numthreads(FFX_PARALLELSORT_THREADGROUP_SIZE, 1, 1)] void scatterPayload(uint localID : SV_GroupThreadID, uint groupID : SV_GroupID) { - Payload::FFX_ParallelSort_Scatter_uint(localID, groupID, CBuffer[0], CShiftBit, SrcBuffer, DstBuffer, SumTable, SrcPayload, DstPayload); + Payload::FFX_ParallelSortCB PayloadCBuffer; + PayloadCBuffer.NumKeys = CBuffer[0].NumKeys; + PayloadCBuffer.NumBlocksPerThreadGroup = CBuffer[0].NumBlocksPerThreadGroup; + PayloadCBuffer.NumThreadGroups = CBuffer[0].NumThreadGroups; + PayloadCBuffer.NumThreadGroupsWithAdditionalBlocks = CBuffer[0].NumThreadGroupsWithAdditionalBlocks; + PayloadCBuffer.NumReduceThreadgroupPerBin = CBuffer[0].NumReduceThreadgroupPerBin; + PayloadCBuffer.NumScanValues = CBuffer[0].NumScanValues; + Payload::FFX_ParallelSort_Scatter_uint(localID, groupID, PayloadCBuffer, CShiftBit, SrcBuffer, DstBuffer, SumTable, SrcPayload, DstPayload); } diff --git a/src/core/src/utilities/gpu_sort.cpp b/src/core/src/utilities/gpu_sort.cpp index f219f2a..97a3331 100644 --- a/src/core/src/utilities/gpu_sort.cpp +++ b/src/core/src/utilities/gpu_sort.cpp @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -34,11 +34,11 @@ GPUSort::~GPUSort() noexcept terminate(); } -bool GPUSort::initialise(CapsaicinInternal const &capsaicin, Type type, Operation operation) noexcept +bool GPUSort::initialise( + GfxContext gfxIn, std::string_view const &shaderPath, Type type, Operation operation) noexcept { - gfx = capsaicin.getGfx(); + gfx = gfxIn; - bool ret = false; if (!parallelSortCBBuffer) { // Currently we just allocate enough for a max number of 16 segments @@ -48,7 +48,6 @@ bool GPUSort::initialise(CapsaicinInternal const &capsaicin, Type type, Operatio countScatterArgsBuffer.setName("Capsaicin_CountScatterArgsBuffer"); reduceScanArgsBuffer = gfxCreateBuffer(gfx, 3 * 16); reduceScanArgsBuffer.setName("Capsaicin_ReduceScanArgsBuffer"); - ret = true; } if (type != currentType || operation != currentOperation) @@ -83,7 +82,7 @@ bool GPUSort::initialise(CapsaicinInternal const &capsaicin, Type type, Operatio gfxDestroyKernel(gfx, scanAdd); gfxDestroyKernel(gfx, scatter); gfxDestroyKernel(gfx, scatterPayload); - sortProgram = gfxCreateProgram(gfx, "utilities/gpu_sort", capsaicin.getShaderPath()); + sortProgram = gfxCreateProgram(gfx, "utilities/gpu_sort", shaderPath.data()); std::vector baseDefines; switch (currentType) { @@ -110,10 +109,14 @@ bool GPUSort::initialise(CapsaicinInternal const &capsaicin, Type type, Operatio gfx, sortProgram, "scatter", baseDefines.data(), static_cast(baseDefines.size())); scatterPayload = gfxCreateComputeKernel(gfx, sortProgram, "scatterPayload", baseDefines.data(), static_cast(baseDefines.size())); - ret = true; } - return ret; + return !!scatterPayload; +} + +bool GPUSort::initialise(CapsaicinInternal const &capsaicin, Type type, Operation operation) noexcept +{ + return initialise(capsaicin.getGfx(), capsaicin.getShaderPath(), type, operation); } void GPUSort::terminate() noexcept @@ -130,6 +133,11 @@ void GPUSort::terminate() noexcept gfxDestroyBuffer(gfx, reducedScratchBuffer); reducedScratchBuffer = {}; + gfxDestroyBuffer(gfx, sourcePongBuffer); + sourcePongBuffer = {}; + gfxDestroyBuffer(gfx, payloadPongBuffer); + payloadPongBuffer = {}; + gfxDestroyProgram(gfx, sortProgram); sortProgram = {}; gfxDestroyKernel(gfx, setupIndirect); @@ -192,7 +200,7 @@ void GPUSort::sortSegmented( void GPUSort::sortPayloadSegmented(GfxBuffer const &sourceBuffer, std::vector const &numKeys, const uint maxNumKeys, GfxBuffer const &sourcePayload) noexcept { - sortInternalSegmented(sourceBuffer, numKeys, maxNumKeys, -1, nullptr, &sourcePayload); + sortInternalSegmented(sourceBuffer, numKeys, maxNumKeys, UINT_MAX, nullptr, &sourcePayload); } void GPUSort::sortInternal(GfxBuffer const &sourceBuffer, const uint maxNumKeys, GfxBuffer const *numKeys, diff --git a/src/core/src/utilities/gpu_sort.h b/src/core/src/utilities/gpu_sort.h index f72e999..d10cad6 100644 --- a/src/core/src/utilities/gpu_sort.h +++ b/src/core/src/utilities/gpu_sort.h @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -23,6 +23,8 @@ THE SOFTWARE. #include "gpu_shared.h" +#include + namespace Capsaicin { class CapsaicinInternal; @@ -48,12 +50,23 @@ class GPUSort Descending, }; + /** + * Initialise the internal data based on current configuration. + * @param gfx Active gfx context. + * @param shaderPath Path to shader files based on current working directory. + * @param type The object type to reduce. + * @param operation The type of operation to perform. + * @return True, if any initialisation/changes succeeded. + */ + bool initialise( + GfxContext gfx, std::string_view const &shaderPath, Type type, Operation operation) noexcept; + /** * Initialise the internal data based on current configuration. * @param capsaicin Current framework context. * @param type The object type to sort. * @param operation The type of operation to perform. - * @return True, if any initialisation/changes actually where needed. + * @return True, if any initialisation/changes succeeded. */ bool initialise(CapsaicinInternal const &capsaicin, Type type, Operation operation) noexcept; @@ -179,7 +192,7 @@ class GPUSort * key are supported). */ void sortInternalSegmented(GfxBuffer const &sourceBuffer, std::vector const &numKeysList, - uint maxNumKeys, uint numSegments = -1, GfxBuffer const *numKeys = nullptr, + uint maxNumKeys, uint numSegments = UINT_MAX, GfxBuffer const *numKeys = nullptr, GfxBuffer const *sourcePayload = nullptr) noexcept; GfxContext gfx; diff --git a/src/scene_viewer/CMakeLists.txt b/src/scene_viewer/CMakeLists.txt index 25383a4..6e9bda0 100644 --- a/src/scene_viewer/CMakeLists.txt +++ b/src/scene_viewer/CMakeLists.txt @@ -1,17 +1,21 @@ -add_executable(scene_viewer ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp +add_executable(scene_viewer WIN32 ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp ${CMAKE_CURRENT_SOURCE_DIR}/main_shared.h ${CMAKE_CURRENT_SOURCE_DIR}/main_shared.cpp ) target_compile_options(scene_viewer PRIVATE - /W3 /WX + /W4 /WX /external:anglebrackets /external:W0 /analyze:external- -D_CRT_SECURE_NO_WARNINGS + -DNOMINMAX ) -target_link_libraries(scene_viewer PRIVATE core CLI11) +target_compile_definitions(scene_viewer PRIVATE "$<$:SHADER_DEBUG>") + +target_link_libraries(scene_viewer PRIVATE capsaicin CLI11) + +target_link_options(scene_viewer PRIVATE "/SUBSYSTEM:WINDOWS") set_target_properties(scene_viewer PROPERTIES - DEBUG_POSTFIX D VS_DEBUGGER_WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}) set_target_properties(scene_viewer PROPERTIES @@ -20,8 +24,21 @@ set_target_properties(scene_viewer PROPERTIES ARCHIVE_OUTPUT_DIRECTORY ${CAPSAICIN_ARCHIVE_OUTPUT_DIRECTORY} ) +# Install the executable +include(GNUInstallDirs) +install(TARGETS scene_viewer + RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_BINDIR} +) + # Install dlls -install(FILES $ DESTINATION .) +install(FILES $ DESTINATION ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_BINDIR}) -# Install the executable -install(TARGETS scene_viewer RUNTIME DESTINATION .) +# Install assets +install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/../../assets + DESTINATION ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_BINDIR} + FILES_MATCHING + PATTERN "*.gltf" + PATTERN "*.bin" + PATTERN "*.png" + PATTERN "*.ktx2" +) diff --git a/src/scene_viewer/main.cpp b/src/scene_viewer/main.cpp index c374b71..dbe2bd1 100644 --- a/src/scene_viewer/main.cpp +++ b/src/scene_viewer/main.cpp @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -22,7 +22,7 @@ THE SOFTWARE. #include "main_shared.h" -int32_t WinMain(HINSTANCE, HINSTANCE, LPSTR, int32_t) +int32_t WinMain(_In_ HINSTANCE, _In_opt_ HINSTANCE, _In_ LPSTR, _In_ int32_t) { CapsaicinMain main("Capsaicin - Scene Viewer"); if (!main.run()) diff --git a/src/scene_viewer/main_shared.cpp b/src/scene_viewer/main_shared.cpp index ecfeddc..79bcb4c 100644 --- a/src/scene_viewer/main_shared.cpp +++ b/src/scene_viewer/main_shared.cpp @@ -1,5 +1,5 @@ -/********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +/********************************************************************** +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -27,6 +27,7 @@ THE SOFTWARE. #include #include #include +#include #include #include #include @@ -57,46 +58,46 @@ struct KeyboardMapping uint32_t downX; uint32_t upY; uint32_t downY; - uint32_t exit; - uint32_t pause; }; KeyboardMapping keyboardMappings[] = { - {0x57 /*W*/, 0x53 /*S*/, 0x44 /*D*/, 0x41 /*A*/, 0x45 /*E*/, 0x51 /*Q*/, 0x1B /*Esc*/,0x20 /*Space*/ }, - {0x5A /*Z*/, 0x53 /*S*/, 0x44 /*D*/, 0x51 /*Q*/, 0x21 /*Page Up*/, 0x22 /*Page Down*/, 0x1B /*Esc*/, - 0x20 /*Space*/} + {0x57 /*W*/, 0x53 /*S*/, 0x44 /*D*/, 0x41 /*A*/, 0x45 /*E*/, 0x51 /*Q*/}, + {0x5A /*Z*/, 0x53 /*S*/, 0x44 /*D*/, 0x51 /*Q*/, 0x21 /*Page Up*/, 0x22 /*Page Down*/} }; /** Data required to represent each supported scene file */ struct SceneData { - string_view name; - string_view fileName; - bool useEnvironmentMap; - float renderExposure; + std::string name; + std::vector fileNames; + bool useEnvironmentMap; + float renderExposure; }; /** List of supported scene files and associated data */ static vector const scenes = { - { "Flying World", - "assets/CapsaicinTestMedia/flying_world_battle_of_the_trash_god/FlyingWorld-BattleOfTheTrashGod.gltf", true, 2.5f}, - { "Gas Station", "assets/CapsaicinTestMedia/gas_station/GasStation.gltf", true, 1.5f}, - { "Tropical Bedroom", "assets/CapsaicinTestMedia/tropical_bedroom/TropicalBedroom.gltf", true, 2.0f}, + {"Flying World", {"assets/CapsaicinTestMedia/flying_world_battle_of_the_trash_god/FlyingWorld-BattleOfTheTrashGod.gltf"}, true, 2.5f }, + {"Gas Station", {"assets/CapsaicinTestMedia/gas_station/GasStation.gltf"}, true, 1.0f}, + {"Tropical Bedroom", {"assets/CapsaicinTestMedia/tropical_bedroom/TropicalBedroom.gltf"}, true, 1.0f}, + {"Sponza", {"assets/CapsaicinTestMedia/sponza/Sponza.gltf"}, true, 5.0f}, + {"Breakfast Room", {"assets/CapsaicinTestMedia/breakfast_room/BreakfastRoom.gltf"}, true, 3.0f}, }; /** List of supported environment maps */ static vector> const sceneEnvironmentMaps = { - { "None", "" }, - {"Photo Studio London Hall", - "assets/CapsaicinTestMedia/environment_maps/photo_studio_london_hall_4k.hdr" }, - { "Kiara Dawn", "assets/CapsaicinTestMedia/environment_maps/kiara_1_dawn_4k.hdr"}, - { "Nagoya Wall Path", "assets/CapsaicinTestMedia/environment_maps/nagoya_wall_path_4k.hdr"}, - { "Spaichingen Hill", "assets/CapsaicinTestMedia/environment_maps/spaichingen_hill_4k.hdr"}, - { "Studio Small", "assets/CapsaicinTestMedia/environment_maps/studio_small_08_4k.hdr"}, - { "White", "assets/CapsaicinTestMedia/environment_maps/white.hdr"}, - { "Atmosphere", ""}, + { "None", ""}, + {"Photo Studio London Hall", "assets/CapsaicinTestMedia/environment_maps/PhotoStudioLondonHall.hdr"}, + { "Kiara Dawn", "assets/CapsaicinTestMedia/environment_maps/KiaraDawn.hdr"}, + { "Nagoya Wall Path", "assets/CapsaicinTestMedia/environment_maps/NagoyaWallPath.hdr"}, + { "Spaichingen Hill", "assets/CapsaicinTestMedia/environment_maps/SpaichingenHill.hdr"}, + { "Studio Small", "assets/CapsaicinTestMedia/environment_maps/StudioSmall.hdr"}, + { "White", "assets/CapsaicinTestMedia/environment_maps/White.hdr"}, + { "Atmosphere", ""}, }; +/** List of executable relative file paths to search for scene files */ +static vector const sceneDirectories = {"", "../../../", "../../", "../"}; + CapsaicinMain::CapsaicinMain(string_view &&programNameIn) noexcept : programName(forward(programNameIn)) {} @@ -106,7 +107,6 @@ CapsaicinMain::~CapsaicinMain() noexcept // Destroy Capsaicin context gfxImGuiTerminate(); Capsaicin::Terminate(); - gfxDestroyScene(sceneData); gfxDestroyContext(contextGFX); gfxDestroyWindow(window); @@ -127,63 +127,74 @@ bool CapsaicinMain::run() noexcept } // Render frames continuously - while (renderFrame()) + while (true) { - // Check for change - if (updateRenderer) - { - setRenderer(); - updateRenderer = false; - } - if (updateScene) + // Check benchmark mode run + if (benchmarkMode) { - if (!loadScene()) + // If current frame has reached our benchmark value then dump frame + if (Capsaicin::GetFrameIndex() >= benchmarkModeFrameCount) { - return false; + // Needed to wait a single render pass for the frame saving to complete before closing + break; } - updateScene = false; - } - if (updateEnvironmentMap) - { - if (!setEnvironmentMap()) + else if (Capsaicin::GetFrameIndex() >= benchmarkModeStartFrame) { - return false; + saveFrame(); } - updateEnvironmentMap = false; } - if (updateCamera) + if (!renderFrame()) { - setCamera(); - updateCamera = false; + return true; } + } - // Check benchmark mode run - if (benchmarkMode) + if (benchmarkMode && !benchmarkModeSuffix.empty() && Capsaicin::hasOption("image_metrics_enable") + && Capsaicin::getOption("image_metrics_enable") + && Capsaicin::getOption("image_metrics_save_to_file")) + { + // Flush remaining stats + for (uint32_t i = 0; i <= gfxGetBackBufferCount(contextGFX); ++i) { - // If current frame has reached our benchmark value then dump frame - if (Capsaicin::GetFrameIndex() == benchmarkModeFrameCount) - { - saveFrame(); - } - else if (Capsaicin::GetFrameIndex() > benchmarkModeFrameCount) - { - // Need to wait a single render pass for the frame saving to complete before closing - return true; - } + Capsaicin::Render(); + gfxFrame(contextGFX); + } + // Force finalising metrics file + Capsaicin::setOption("image_metrics_enable", false); + Capsaicin::Render(); + // Rename metrics file to also contain suffix + auto savePath = getSaveName(); + std::string newMetricsFile = savePath + '_' + benchmarkModeSuffix + ".csv"; + std::remove(newMetricsFile.c_str()); + std::string metricsFile = savePath + ".csv"; + if (std::rename(metricsFile.c_str(), newMetricsFile.c_str()) != 0) + { + printString("Failed to rename image metrics file: "s + metricsFile, MessageLevel::Warning); } } return true; } -void CapsaicinMain::printString(std::string const &text) noexcept +void CapsaicinMain::printString(std::string const &text, MessageLevel level) noexcept { + std::string outputText; + switch (level) + { + case MessageLevel::Debug: break; + case MessageLevel::Info: break; + case MessageLevel::Warning: outputText = "Warning: "; break; + case MessageLevel::Error: outputText = "Error: "; [[fallthrough]]; + default: break; + } + outputText += text; + // Check if a debugger is attached and use it instead of a console // If no debugger is attached then we need to attach to a console process in order to be able to // output text if (IsDebuggerPresent()) { - OutputDebugStringA(text.c_str()); + OutputDebugStringA(outputText.c_str()); } else { @@ -196,73 +207,88 @@ void CapsaicinMain::printString(std::string const &text) noexcept // Set the screen buffer big enough to hold at least help text CONSOLE_SCREEN_BUFFER_INFO scInfo; GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &scInfo); - { - // Force the console buffer to be resized no matter what as this forces the console to - // update the end line to match the number of printed lines from this app - constexpr int16_t minLength = 4096; - scInfo.dwSize.Y = std::max(minLength, (short)(scInfo.dwSize.Y + 1)); - SetConsoleScreenBufferSize(GetStdHandle(STD_OUTPUT_HANDLE), scInfo.dwSize); - } + // Force the console buffer to be resized no matter what as this forces the console to + // update the end line to match the number of printed lines from this app + constexpr int16_t minLength = 4096; + scInfo.dwSize.Y = std::max(minLength, (short)(scInfo.dwSize.Y + 100)); + SetConsoleScreenBufferSize(GetStdHandle(STD_OUTPUT_HANDLE), scInfo.dwSize); hasConsole = true; } - else - { - return; - } } - // The parent console has already printed a new user prompt before this program has even run so - // need to insert any printed lines before the existing user prompt - - // Save current cursor position - CONSOLE_SCREEN_BUFFER_INFO scInfo; - GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &scInfo); - auto cursorPosY = scInfo.dwCursorPosition.Y; - auto cursorPosX = scInfo.dwCursorPosition.X; - - // Move to start of current line - SetConsoleCursorPosition(GetStdHandle(STD_OUTPUT_HANDLE), {0, cursorPosY}); - - // Insert new line into console buffer - std::vector buffer; - buffer.resize(cursorPosX * sizeof(CHAR_INFO)); - COORD coordinates = {0}; - SMALL_RECT textRegion = { - .Left = 0, - .Top = cursorPosY, - .Right = (short)(cursorPosX - 1), - .Bottom = cursorPosY, - }; - COORD bufferSize = { - .X = cursorPosX, - .Y = 1, - }; - ReadConsoleOutputA( - GetStdHandle(STD_OUTPUT_HANDLE), buffer.data(), bufferSize, coordinates, &textRegion); - DWORD dnc; - FillConsoleOutputCharacter(GetStdHandle(STD_OUTPUT_HANDLE), ' ', cursorPosX, {0, cursorPosY}, &dnc); - SetConsoleCursorPosition(GetStdHandle(STD_OUTPUT_HANDLE), {0, cursorPosY}); - - // Write out each new line from the input text - uint32_t lines = 0; - for (auto const i : std::views::split(text, '\n')) + if (hasConsole) { + // The parent console has already printed a new user prompt before this program has even run so + // need to insert any printed lines before the existing user prompt + + // Save current cursor position + CONSOLE_SCREEN_BUFFER_INFO scInfo; + GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &scInfo); + auto cursorPosY = scInfo.dwCursorPosition.Y; + auto cursorPosX = scInfo.dwCursorPosition.X; + + // Move to start of current line + SetConsoleCursorPosition(GetStdHandle(STD_OUTPUT_HANDLE), {0, cursorPosY}); + + // Insert new line into console buffer + std::vector buffer; + buffer.resize(cursorPosX); + COORD coordinates = {0}; + SMALL_RECT textRegion = { + .Left = 0, + .Top = cursorPosY, + .Right = (short)(cursorPosX - 1), + .Bottom = cursorPosY, + }; + COORD bufferSize = { + .X = cursorPosX, + .Y = 1, + }; + + ReadConsoleOutputA( + GetStdHandle(STD_OUTPUT_HANDLE), buffer.data(), bufferSize, coordinates, &textRegion); DWORD dnc; - WriteConsoleA(GetStdHandle(STD_OUTPUT_HANDLE), &*i.begin(), (DWORD)i.size(), &dnc, 0); - WriteConsoleA(GetStdHandle(STD_OUTPUT_HANDLE), "\n", 1, &dnc, 0); - ++lines; - } + FillConsoleOutputCharacter( + GetStdHandle(STD_OUTPUT_HANDLE), ' ', cursorPosX, {0, cursorPosY}, &dnc); + + // Set the screen buffer big enough to hold new lines + std::vector textLines; + for (auto const i : std::views::split(outputText, '\n')) + { + textLines.emplace_back(i.begin(), i.end()); + } - // Restore cursor position to previously saved state and increment by number of new lines - textRegion = { - .Left = 0, - .Top = (short)(cursorPosY + lines), - .Right = (short)(cursorPosX - 1), - .Bottom = (short)(cursorPosY + lines), - }; - WriteConsoleOutput( - GetStdHandle(STD_OUTPUT_HANDLE), buffer.data(), bufferSize, coordinates, &textRegion); - SetConsoleCursorPosition(GetStdHandle(STD_OUTPUT_HANDLE), {cursorPosX, short(cursorPosY + lines)}); + // Write out each new line from the input text + SetConsoleCursorPosition(GetStdHandle(STD_OUTPUT_HANDLE), {0, cursorPosY}); + uint32_t lines = 0; + for (auto const &i : textLines) + { + auto lineWidth = i.size(); + if (lineWidth > 0) + { + WriteConsoleOutputCharacterA(GetStdHandle(STD_OUTPUT_HANDLE), &*i.begin(), + (DWORD)lineWidth, {0, short(cursorPosY + lines)}, &dnc); + ++lines; + SetConsoleCursorPosition(GetStdHandle(STD_OUTPUT_HANDLE), {0, short(cursorPosY + lines)}); + } + } + + // Restore cursor position to previously saved state and increment by number of new lines + textRegion = { + .Left = 0, + .Top = (short)(cursorPosY + lines), + .Right = (short)(cursorPosX - 1), + .Bottom = (short)(cursorPosY + lines), + }; + WriteConsoleOutputA( + GetStdHandle(STD_OUTPUT_HANDLE), buffer.data(), bufferSize, coordinates, &textRegion); + SetConsoleCursorPosition( + GetStdHandle(STD_OUTPUT_HANDLE), {cursorPosX, short(cursorPosY + lines)}); + } + } + if (level == MessageLevel::Error) + { + MessageBoxA(nullptr, text.c_str(), "Error", MB_OK | MB_ICONEXCLAMATION | MB_TASKMODAL); } } @@ -278,31 +304,56 @@ bool CapsaicinMain::initialise() noexcept app.allow_config_extras(CLI::config_extras_mode::error); app.add_option("--width", windowWidth, "Window width")->capture_default_str(); app.add_option("--height", windowHeight, "Window height")->capture_default_str(); - uint32_t sceneSelect = static_cast(scene); + uint32_t sceneSelect = static_cast(defaultScene); app.add_option("--start-scene-index", sceneSelect, "Start scene index") ->capture_default_str() ->check(CLI::Range(0u, (uint32_t)scenes.size() - 1)); - uint32_t envMapSelect = static_cast(environmentMap); + uint32_t envMapSelect = static_cast(defaultEnvironmentMap); app.add_option("--start-environment-map-index", envMapSelect, "Start environment map index") ->capture_default_str() ->check(CLI::Range(0u, (uint32_t)sceneEnvironmentMaps.size() - 1)); auto renderers = Capsaicin::GetRenderers(); - auto rendererSelectIt = find(renderers.begin(), renderers.end(), renderSettings.renderer_); + auto rendererSelectIt = find(renderers.begin(), renderers.end(), Capsaicin::GetCurrentRenderer()); uint32_t rendererSelect = 0; if (rendererSelectIt != renderers.end()) { rendererSelect = static_cast(rendererSelectIt - renderers.begin()); } + else + { + rendererSelectIt = find(renderers.begin(), renderers.end(), defaultRenderer); + if (rendererSelectIt != renderers.end()) + { + rendererSelect = static_cast(rendererSelectIt - renderers.begin()); + } + } app.add_option("--start-renderer-index", rendererSelect, "Start renderer index") ->capture_default_str() ->check(CLI::Range(0u, (uint32_t)renderers.size() - 1)); - uint32_t cameraSelect = cameraIndex; + uint32_t cameraSelect = uint32_t(-1); app.add_option("--start-camera-index", cameraSelect, "Start camera index"); + std::vector cameraPosition; + app.add_option("--user-camera-position", cameraPosition, "Set the initial position of the user camera"); + std::vector cameraLookAt; + app.add_option( + "--user-camera-lookat", cameraLookAt, "Set the initial look at position of the user camera"); + bool startPlaying = false; + app.add_flag("--start-playing", startPlaying, "Start with any animations running"); auto bench = app.add_flag("--benchmark-mode", benchmarkMode, "Enable benchmarking mode"); app.add_option( "--benchmark-frames", benchmarkModeFrameCount, "Number of frames to render during benchmark mode") ->needs(bench) ->capture_default_str(); + app.add_option("--benchmark-first-frame", benchmarkModeStartFrame, + "The first frame to start saving images from (Default just the last frame)") + ->needs(bench) + ->capture_default_str(); + app.add_option("--benchmark-suffix", benchmarkModeSuffix, "Suffix to add to any saved filenames") + ->needs(bench) + ->capture_default_str(); + + std::vector renderOptions; + app.add_option("--render-options", renderOptions, "Additional render options"); bool listScene = false; app.add_flag("--list-scenes", listScene, "List all available scenes and corresponding indexes"); @@ -335,7 +386,7 @@ bool CapsaicinMain::initialise() noexcept return false; } - printString("Command Line Error: "s + ((exception)e).what()); + printString("Command Line Error: "s + ((exception)e).what(), MessageLevel::Error); return false; } @@ -368,332 +419,346 @@ bool CapsaicinMain::initialise() noexcept return false; } - // Setup passed in values - scene = static_cast(sceneSelect); - environmentMap = static_cast(envMapSelect); - renderSettings.renderer_ = renderers[rendererSelect]; - // Create the internal gfx window and context window = gfxCreateWindow(windowWidth, windowHeight, programName.data()); if (!window) { return false; } - if (!reset()) + + contextGFX = gfxCreateContext( + window, 0 +#if _DEBUG || defined(SHADER_DEBUG) + | kGfxCreateContextFlag_EnableStablePowerState | kGfxCreateContextFlag_EnableDebugLayer + | kGfxCreateContextFlag_EnableShaderDebugging +#endif + ); + if (!contextGFX) { return false; } - // Initialise render settings - setRenderer(); - - // Load the requested start scene - if (!loadScene()) + // Create ImGui context using additional needed fonts + char const *fonts[] = {"C:\\Windows\\Fonts\\seguisym.ttf"}; + ImFontConfig fontConfigs[1]; + fontConfigs[0].MergeMode = true; + static const ImWchar glyphRanges[] = { + 0x2310, + 0x23FF, // Media player icons + 0x1F500, + 0x1F505, // Restart icon + 0, + }; + fontConfigs[0].GlyphRanges = &glyphRanges[0]; + fontConfigs[0].SizePixels = 30.0f; + fontConfigs[0].GlyphOffset.y += 5.0f; // Need to offset glyphs downward to properly center them + if (auto err = gfxImGuiInitialize(contextGFX, fonts, 1, fontConfigs); err != kGfxResult_NoError) { return false; } - // Check the passed in camera index - if (cameraIndex != cameraSelect) + // Create Capsaicin render context + Capsaicin::Initialize(contextGFX, ImGui::GetCurrentContext()); + + // Initialise render settings + if (!setRenderer(renderers[rendererSelect])) { - if (cameraSelect >= gfxSceneGetCameraCount(sceneData)) - { - printString("Invalid value passed in for '--start-camera-index'"); - return false; - } - cameraIndex = cameraSelect; - setCamera(); + return false; } - return true; -} - -bool CapsaicinMain::reset() noexcept -{ - // Restart capsaicin to prevent resource issues on change - // - this prevents freezes due to capsaicin not releasing resources properly - if (contextGFX) + // Pass any command line render options + if (!renderOptions.empty()) { - gfxImGuiTerminate(); - Capsaicin::Terminate(); - gfxDestroyContext(contextGFX); + auto &validOpts = Capsaicin::GetOptions(); + for (auto const &opt : renderOptions) + { + auto const splitLoc = opt.find('='); + if (splitLoc == std::string::npos) + { + printString("Invalid command line format of '--render-options'", MessageLevel::Error); + return false; + } + std::string option = opt.substr(0, splitLoc); + std::string value = opt.substr(splitLoc + 1); + if (auto found = validOpts.find(option); found != validOpts.end()) + { + if (std::holds_alternative(found->second)) + { + if (value == "true" || value == "1") + { + Capsaicin::setOption(option, true); + } + else if (value == "false" || value == "0") + { + Capsaicin::setOption(option, false); + } + else + { + printString("Invalid command line value passed for render option '" + option + + "' expected bool", + MessageLevel::Error); + return false; + } + } + else if (std::holds_alternative(found->second)) + { + try + { + const int32_t newValue = std::stoi(value); + Capsaicin::setOption(option, newValue); + } + catch (...) + { + printString("Invalid command line value passed for render option '" + option + + "' expected integer", + MessageLevel::Error); + return false; + } + } + else if (std::holds_alternative(found->second)) + { + try + { + const uint32_t newValue = std::stoul(value); + Capsaicin::setOption(option, newValue); + } + catch (...) + { + printString("Invalid command line value passed for render option '" + option + + "' expected unsigned integer", + MessageLevel::Error); + return false; + } + } + else if (std::holds_alternative(found->second)) + { + try + { + float const newValue = std::stof(value); + Capsaicin::setOption(option, newValue); + } + catch (...) + { + printString("Invalid command line value passed for render option '" + option + + "' expected float", + MessageLevel::Error); + return false; + } + } + } + else + { + printString( + "Invalid command line value passed for '--render-options': " + opt, MessageLevel::Error); + return false; + } + } } - contextGFX = gfxCreateContext( - window, kGfxCreateContextFlag_EnableStablePowerState -#if _DEBUG - | kGfxCreateContextFlag_EnableDebugLayer | kGfxCreateContextFlag_EnableShaderDebugging -#endif - ); - if (!contextGFX) + // Load the requested start scene + if (!loadScene(static_cast(sceneSelect))) { return false; } - // Create Capsaicin render context - Capsaicin::Initialize(contextGFX); - if (auto err = gfxImGuiInitialize(contextGFX); err != kGfxResult_NoError) + // Set environment map (must be done after scene load as environment maps are attached to scenes) + auto const environmentMap = scenes[static_cast(sceneSelect)].useEnvironmentMap + ? static_cast(envMapSelect) + : EnvironmentMap::None; + if (!setEnvironmentMap(environmentMap)) { return false; } - // Reset render settings animation state - Capsaicin::SetSequenceTime(0.0); - restartAnimation(); - setAnimation(false); - - // Reset frame graph - frameGraph.reset(); - - // Reset time - auto wallClock = - chrono::duration_cast(chrono::high_resolution_clock::now().time_since_epoch()); - previousTime = wallClock.count() / 1000000.0; - currentTime = previousTime; - frameTime = 0.0f; - - // Reset camera movement - cameraTranslation = vec3(0.0f); - cameraRotation = vec2(0.0f); - - return true; -} - -bool CapsaicinMain::loadScene() noexcept -{ - // Clear any pre-existing scene data - if (sceneData) + // Check the passed in camera index + if (cameraSelect != -1) { - gfxDestroyScene(sceneData); - if (!reset()) + auto const cameras = Capsaicin::GetSceneCameras(); + if (cameraSelect >= cameras.size()) { + printString( + "Invalid command line value passed in for '--start-camera-index'", MessageLevel::Error); return false; } + if (cameraSelect == 0) + { + // Copy scene settings from any existing scene camera + auto oldCamera = Capsaicin::GetSceneCamera(); + setCamera(cameras[cameraSelect]); + auto camera = Capsaicin::GetSceneCamera(); + *camera = *oldCamera; + } + else + { + setCamera(cameras[cameraSelect]); + } } - renderSettings.environment_map_ = {}; - // Create new blank scene - sceneData = gfxCreateScene(); - if (!sceneData) - { - return false; - } - - // Create default user camera - auto userCamera = gfxSceneCreateCamera(sceneData); - userCamera->eye = {0.0f, 0.0f, -1.0f}; - userCamera->center = {0.0f, 0.0f, 0.0f}; - userCamera->up = {0.0f, 1.0f, 0.0f}; - // Load in environment map based on current settings - if (scenes[static_cast(scene)].useEnvironmentMap) + // Check any initial user camera values + if (!cameraPosition.empty() || !cameraLookAt.empty()) { - if (!setEnvironmentMap()) + if (cameraSelect == 0) { - return false; + auto camera = &*Capsaicin::GetSceneCamera(); + camera->up = glm::vec3(0.0f, 1.0f, 0.0f); + if (cameraPosition.size() == 3) + { + camera->eye = glm::vec3(cameraPosition[0], cameraPosition[1], cameraPosition[2]); + } + else if (!cameraPosition.empty()) + { + printString( + "Invalid command line value passed in for '--user-camera-position' must be in the form '0.0 0.0 0.0'", + MessageLevel::Error); + return false; + } + if (cameraLookAt.size() == 3) + { + camera->center = glm::vec3(cameraLookAt[0], cameraLookAt[1], cameraLookAt[2]); + } + else if (!cameraLookAt.empty()) + { + printString( + "Invalid command line value passed in for '--user-camera-lookat' must be in the form '0.0 0.0 0.0'", + MessageLevel::Error); + return false; + } + } + else + { + printString( + "Command line values for '--user-camera-position' and '--user-camera-lookat' only take effect if start camera index is set to user camera '0'", + MessageLevel::Warning); } } - // Load in scene based on current requested scene index - if (gfxSceneImport(sceneData, scenes[static_cast(scene)].fileName.data()) != kGfxResult_NoError) + + if (benchmarkMode) { - return false; + benchmarkModeStartFrame = std::min(benchmarkModeStartFrame, benchmarkModeFrameCount - 1); + // Benchmark mode uses a fixed frame rate playback mode + Capsaicin::SetFixedFrameRate(true); } - if (!scenes[static_cast(scene)].useEnvironmentMap) + if (startPlaying) { - // Load a null image - renderSettings.environment_map_ = GfxConstRef(); + Capsaicin::SetPaused(false); } - setSceneRenderOptions(true); + return true; +} + +bool CapsaicinMain::loadScene(Scene scene) noexcept +{ + // Check that scene file is locatable + error_code ec; + + auto const &sceneData = scenes[static_cast(scene)]; + auto const &sceneNames = sceneData.fileNames; + std::vector scenePaths; - // Set up camera based on internal scene data - cameraIndex = 0; - if (gfxSceneGetCameraCount(sceneData) > 1) + for (auto const &sceneDirectory : sceneDirectories) { - cameraIndex = 1; // Use first scene camera - // Try and find 'Main' camera - for (uint32_t i = 1; i < gfxSceneGetCameraCount(sceneData); ++i) + scenePaths.clear(); + for (auto const &sceneName : sceneNames) { - auto cameraHandle = gfxSceneGetCameraHandle(sceneData, i); - string_view cameraName = gfxSceneGetCameraMetadata(sceneData, cameraHandle).getObjectName(); - if (cameraName.find("Main"sv) != string::npos) + string scenePath = sceneDirectory + sceneName; + if (!std::filesystem::exists(scenePath, ec)) { - cameraIndex = i; + break; } + + scenePaths.push_back(scenePath); } - // Set user camera equal to first camera - auto defaultCamera = gfxSceneGetCameraHandle(sceneData, cameraIndex); - userCamera->eye = defaultCamera->eye; - userCamera->center = defaultCamera->center; - userCamera->up = defaultCamera->up; + + // All scenes exist, load them + if (scenePaths.size() == sceneNames.size()) break; } - setCamera(); - // Calculate some scene stats - triangleCount = 0; - for (uint32_t i = 0; i < gfxSceneGetObjectCount(sceneData); ++i) + if (scenePaths.size() != sceneNames.size()) { - if (gfxSceneGetObjects(sceneData)[i].mesh) - { - GfxMesh const &mesh = *gfxSceneGetObjects(sceneData)[i].mesh; - triangleCount += (uint32_t)(mesh.indices.size() / 3); - } + printString("Failed to find all requested files for scene: "s + sceneData.name, MessageLevel::Error); + return false; + } + else if (!Capsaicin::SetScenes(scenePaths)) + { + return false; } + // Set render settings based on current scene + Capsaicin::setOption("tonemap_exposure", sceneData.renderExposure); + currentScene = scene; return true; } -void CapsaicinMain::setCamera() noexcept +void CapsaicinMain::setCamera(std::string_view camera) noexcept { // Set the camera to the currently requested camera index - GFX_ASSERT(cameraIndex < gfxSceneGetCameraCount(sceneData)); - camera = gfxSceneGetCameraHandle(sceneData, cameraIndex); - camera->aspect = static_cast(gfxGetBackBufferWidth(contextGFX)) - / static_cast(gfxGetBackBufferHeight(contextGFX)); - gfxSceneSetActiveCamera(sceneData, camera); + Capsaicin::SetSceneCamera(camera); // Reset camera movement data cameraTranslation = glm::vec3(0.0f); cameraRotation = glm::vec2(0.0f); } -bool CapsaicinMain::setEnvironmentMap() noexcept +bool CapsaicinMain::setEnvironmentMap(EnvironmentMap environmentMap) noexcept { - if (sceneEnvironmentMaps[static_cast(environmentMap)].first == "Atmosphere") - { - // The atmosphere technique overrides current environment map - renderSettings.setOption("atmosphere_enable", true); - return true; - } - else if (renderSettings.hasOption("atmosphere_enable")) - { - renderSettings.setOption("atmosphere_enable", false); - } - - // Remove the old environment map - if (renderSettings.environment_map_) + if (environmentMap == EnvironmentMap::None) { - auto handle = gfxSceneGetImageHandle(sceneData, renderSettings.environment_map_.getIndex()); - gfxSceneDestroyImage(sceneData, handle); + // Load a null image + currentEnvironmentMap = environmentMap; + return Capsaicin::SetEnvironmentMap(""); } - - if (sceneEnvironmentMaps[static_cast(environmentMap)].first == "None") + else if (sceneEnvironmentMaps[static_cast(environmentMap)].first == "Atmosphere") { - // Don't load new map - renderSettings.environment_map_ = GfxConstRef(); + // The atmosphere technique overrides current environment map + Capsaicin::setOption("atmosphere_enable", true); + currentEnvironmentMap = environmentMap; return true; } - - // Load in the new environment map - if (gfxSceneImport(sceneData, sceneEnvironmentMaps[static_cast(environmentMap)].second.data()) - != kGfxResult_NoError) + else if (Capsaicin::hasOption("atmosphere_enable")) { - return false; + Capsaicin::setOption("atmosphere_enable", false); } - // Update render settings - renderSettings.environment_map_ = gfxSceneFindObjectByAssetFile( - sceneData, sceneEnvironmentMaps[static_cast(environmentMap)].second.data()); - return true; -} - -void CapsaicinMain::setRenderer() noexcept -{ - // Change render settings based on currently selected renderer - renderSettings.debug_view_ = "None"; - - if (sceneData) + error_code ec; + for (auto &i : sceneDirectories) { - // If we are already loaded and the renderer is changed then destroy capsaicin and reload - reset(); - } - - setSceneRenderOptions(); -} - -void CapsaicinMain::setSceneRenderOptions(bool force) noexcept -{ - if (!force && renderSettings.hasOption("tonemap_exposure")) - { - return; - } - // Set render settings based on current scene - renderSettings.setOption("tonemap_exposure", scenes[static_cast(scene)].renderExposure); -} - -void CapsaicinMain::setPlayMode(Capsaicin::PlayMode playMode) noexcept -{ - if (renderSettings.play_mode_ != playMode) - { - renderSettings.play_mode_ = playMode; - if (renderSettings.play_mode_ == Capsaicin::kPlayMode_FrameByFrame) + string evFile = i; + evFile += sceneEnvironmentMaps[static_cast(environmentMap)].second.data(); + if (std::filesystem::exists(evFile, ec)) { - renderSettings.play_to_frame_index_ = Capsaicin::GetFrameIndex(); - setAnimation(true); - } - else if (renderSettings.play_mode_ == Capsaicin::kPlayMode_None) - { - // Check if state was previously paused before it was changed to frame-by-frame - if (renderSettings.delta_time_ > 0.0f) - { - setAnimation(false); - } + currentEnvironmentMap = environmentMap; + return Capsaicin::SetEnvironmentMap(evFile); } } + printString("Failed to find requested environment map file: "s + + string(sceneEnvironmentMaps[static_cast(environmentMap)].second), + MessageLevel::Error); + return false; } -void CapsaicinMain::setAnimation(bool animate) noexcept +bool CapsaicinMain::setRenderer(std::string_view renderer) noexcept { - renderSettings.play_from_start_ = !animate; - if (renderSettings.play_mode_ == Capsaicin::kPlayMode_None) + // Change render settings based on currently selected renderer + if (!Capsaicin::SetRenderer(renderer)) { - if (animate) - { - Capsaicin::SetSequenceTime(renderSettings.delta_time_); - renderSettings.delta_time_ = 0.0f; - } - else - { - renderSettings.delta_time_ = (float)Capsaicin::GetSequenceTime() + FLT_EPSILON; - } + return false; } - Capsaicin::SetAnimate(animate); -} -void CapsaicinMain::toggleAnimation() noexcept -{ - bool const paused = !getAnimation(); - setAnimation(paused); -} - -void CapsaicinMain::restartAnimation() noexcept -{ - // Reset animations to start - renderSettings.play_from_start_ = true; - renderSettings.delta_time_ = renderSettings.delta_time_ == 0.0f ? 0.0f : FLT_EPSILON; - renderSettings.play_to_frame_index_ = 1; - Capsaicin::SetSequenceTime(0.0); -} - -bool CapsaicinMain::getAnimation() noexcept -{ - return Capsaicin::GetAnimate(); -} + // Set render settings based on current scene + auto const currentScenes = Capsaicin::GetCurrentScenes(); + auto const selectedScene = std::find_if(scenes.cbegin(), scenes.cend(), + [¤tScenes](auto const &value) { return value.fileNames == currentScenes; }); -void CapsaicinMain::tickAnimation() noexcept -{ - if (renderSettings.play_mode_ == Capsaicin::kPlayMode_FrameByFrame) + if (selectedScene != scenes.cend()) { - renderSettings.play_from_start_ = false; - return; + Capsaicin::setOption("tonemap_exposure", selectedScene->renderExposure); } - bool const paused = !getAnimation(); - renderSettings.play_from_start_ = paused; -} -uint32_t CapsaicinMain::getCurrentAnimationFrame() noexcept -{ - return (uint32_t)((float)Capsaicin::GetSequenceTime() / renderSettings.frame_by_frame_delta_time_); + // Reset camera movement + cameraTranslation = vec3(0.0f); + cameraRotation = vec2(0.0f); + return true; } bool CapsaicinMain::renderFrame() noexcept @@ -706,8 +771,7 @@ bool CapsaicinMain::renderFrame() noexcept } // Check if window should close - if (gfxWindowIsCloseRequested(window) - || gfxWindowIsKeyReleased(window, keyboardMappings[static_cast(kbMap)].exit)) + if (gfxWindowIsCloseRequested(window) || gfxWindowIsKeyReleased(window, VK_ESCAPE)) { return false; } @@ -718,8 +782,9 @@ bool CapsaicinMain::renderFrame() noexcept if (!benchmarkMode) { // Update the camera - if (renderSettings.play_mode_ != Capsaicin::kPlayMode_FrameByFrame) + if (!Capsaicin::GetFixedFrameRate()) { + auto camera = Capsaicin::GetSceneCamera(); vec3 const forward = normalize(camera->center - camera->eye); vec3 const right = cross(forward, camera->up); vec3 const up = cross(right, forward); @@ -727,7 +792,7 @@ bool CapsaicinMain::renderFrame() noexcept float const force = cameraSpeed * 10000.0f; // Clamp frametime to prevent errors at low frame rates - frameTime = glm::min(frameTime, 0.05f); + auto frameTime = glm::min(static_cast(Capsaicin::GetFrameTime()), 0.05f); // Get keyboard input if (!ImGui::GetIO().WantCaptureKeyboard) @@ -759,6 +824,23 @@ bool CapsaicinMain::renderFrame() noexcept } cameraTranslation += acceleration * 0.5f * frameTime; cameraTranslation = glm::clamp(cameraTranslation, -cameraSpeed, cameraSpeed); + // Clamp tiny values to zero to improve convergence to resting state + auto const clampMin = glm::lessThan(glm::abs(cameraTranslation), vec3(0.0000001f)); + if (glm::any(clampMin)) + { + if (clampMin.x) + { + cameraTranslation.x = 0.0f; + } + if (clampMin.y) + { + cameraTranslation.y = 0.0f; + } + if (clampMin.z) + { + cameraTranslation.z = 0.0f; + } + } // Get mouse input vec2 acceleration2 = cameraRotation * -45.0f; @@ -770,18 +852,31 @@ bool CapsaicinMain::renderFrame() noexcept } cameraRotation += acceleration2 * 0.5f * frameTime; cameraRotation = glm::clamp(cameraRotation, -4e-2f, 4e-2f); + // Clamp tiny values to zero to improve convergence to resting state + auto const clampRotationMin = glm::lessThan(glm::abs(cameraRotation), vec2(0.00000001f)); + if (glm::any(clampRotationMin)) + { + if (clampRotationMin.x) + { + cameraRotation.x = 0.0f; + } + if (clampRotationMin.y) + { + cameraRotation.y = 0.0f; + } + } ImGui::ResetMouseDragDelta(0); if (!glm::all(glm::equal(cameraTranslation, vec3(0.0f))) || !glm::all(glm::equal(cameraRotation, vec2(0.0f)))) { - if (cameraIndex != 0) + if (Capsaicin::GetSceneCurrentCamera() != "User") { // Change to the user camera - auto userCamera = gfxSceneGetCameraHandle(sceneData, 0); - *userCamera = *camera; - cameraIndex = 0; - setCamera(); + auto oldCamera = camera; + Capsaicin::SetSceneCamera("User"); + camera = Capsaicin::GetSceneCamera(); + *camera = *oldCamera; } // Update translation @@ -826,55 +921,102 @@ bool CapsaicinMain::renderFrame() noexcept camera->fovY -= mouseWheelH; camera->fovY = glm::clamp(camera->fovY, 10.0f * (float)M_PI / 180.0f, 140.0f * (float)M_PI / 180.0f); - } - // Hot-reload the shaders if requested - if (gfxWindowIsKeyReleased(window, VK_F5)) - { - gfxKernelReloadAll(contextGFX); - } + // Handle playback animation keys + if (Capsaicin::HasAnimation()) + { + if (gfxWindowIsKeyReleased(window, VK_UP)) + { + if (!Capsaicin::GetPaused()) + { + Capsaicin::IncreasePlaybackSpeed(); + } + } + if (gfxWindowIsKeyReleased(window, VK_DOWN)) + { + if (!Capsaicin::GetPaused()) + { + Capsaicin::DecreasePlaybackSpeed(); + } + } + if (gfxWindowIsKeyReleased(window, VK_LEFT)) + { + Capsaicin::StepPlaybackBackward(1); + } + if (gfxWindowIsKeyReleased(window, VK_RIGHT)) + { + Capsaicin::StepPlaybackForward(1); + } + } - // Save image to disk if requested - if (gfxWindowIsKeyReleased(window, VK_F6)) - { - saveFrame(); - } + if (Capsaicin::HasAnimation() || Capsaicin::GetRenderPaused()) + { + // Pause/Resume animations if requested + if (gfxWindowIsKeyReleased(window, VK_SPACE)) + { + if (Capsaicin::GetPaused()) + { + if (!Capsaicin::GetRenderPaused()) + { + Capsaicin::ResetPlaybackSpeed(); + Capsaicin::SetPaused(false); + } + else + { + // Render 1 more frame + Capsaicin::SetRenderPaused(false); + reDisableRender = true; + } + } + else + { + Capsaicin::SetPaused(true); + } + } + } - // Pause/Resume animations if requested - if (gfxWindowIsKeyReleased(window, keyboardMappings[static_cast(kbMap)].pause)) - { - toggleAnimation(); + // Hot-reload the shaders if requested + if (gfxWindowIsKeyReleased(window, VK_F5)) + { + Capsaicin::ReloadShaders(); + } + + // Save image to disk if requested + if (gfxWindowIsKeyReleased(window, VK_F6)) + { + saveFrame(); + } } } // Render the scene - Capsaicin::Render(sceneData, renderSettings); + Capsaicin::Render(); if (!benchmarkMode) { // Re-enable Tonemap after save to disk if (reenableToneMap) { - renderSettings.setOption("tonemap_enable", true); + Capsaicin::setOption("tonemap_enable", true); reenableToneMap = false; } } - // Progress any animation state - tickAnimation(); + if (reDisableRender) + { + Capsaicin::SetRenderPaused(true); + reDisableRender = false; + } // Render the UI renderGUI(); // Complete the frame +#if _DEBUG || defined(SHADER_DEBUG) gfxFrame(contextGFX); - - // Update frame time - auto wallTime = - chrono::duration_cast(chrono::high_resolution_clock::now().time_since_epoch()); - currentTime = wallTime.count() / 1000000.0; - frameTime = static_cast(currentTime - previousTime); - previousTime = currentTime; +#else + gfxFrame(contextGFX, false); +#endif return true; } @@ -886,13 +1028,10 @@ bool CapsaicinMain::renderGUI() noexcept ImGui::Begin( programName.data(), nullptr, ImGuiWindowFlags_AlwaysAutoResize | ImGuiWindowFlags_NoSavedSettings); { - ImGui::Text("Selected device : %s", contextGFX.getName()); - ImGui::Separator(); - if (!benchmarkMode) { // Select which scene to display - int32_t selectedScene = static_cast(scene); + int32_t selectedScene = static_cast(currentScene); string sceneList; for (auto &i : scenes) { @@ -901,56 +1040,61 @@ bool CapsaicinMain::renderGUI() noexcept } if (ImGui::Combo("Scene", &selectedScene, sceneList.c_str(), static_cast(scenes.size()))) { - if (static_cast(scene) != selectedScene) + if (currentScene != static_cast(selectedScene)) { // Change the selected scene - scene = static_cast(selectedScene); - updateScene = true; + if (!loadScene(static_cast(selectedScene))) + { + ImGui::End(); + return false; + } + // Reset environment map + auto const environmentMap = + scenes[static_cast(selectedScene)].useEnvironmentMap + ? (currentEnvironmentMap != EnvironmentMap::None ? currentEnvironmentMap + : defaultEnvironmentMap) + : EnvironmentMap::None; + if (!setEnvironmentMap(environmentMap)) + { + ImGui::End(); + return false; + } } } // Optionally select which environment map is used - if (scenes[static_cast(scene)].useEnvironmentMap) + if (scenes[static_cast(selectedScene)].useEnvironmentMap) { - int32_t selectedEM = static_cast(environmentMap); string emList; + int32_t selectedEM = static_cast(currentEnvironmentMap); for (auto &i : sceneEnvironmentMaps) { - if (i.first == "Atmosphere" && !renderSettings.hasOption("atmosphere_enable")) - continue; + if (i.first == "Atmosphere" && !Capsaicin::hasOption("atmosphere_enable")) continue; emList += i.first; emList += '\0'; } if (ImGui::Combo( "Environment Map", &selectedEM, emList.c_str(), static_cast(emList.size()))) { - if (static_cast(environmentMap) != selectedEM) + if (currentEnvironmentMap != static_cast(selectedEM)) { // Change the selected environment map - environmentMap = static_cast(selectedEM); - updateEnvironmentMap = true; + if (!setEnvironmentMap(static_cast(selectedEM))) + { + ImGui::End(); + return false; + } } } } - ImGui::Text("Triangle Count : %u", triangleCount); - const uint32 deltaLightCount = Capsaicin::GetDeltaLightCount(); - const uint32 areaLightCount = Capsaicin::GetAreaLightCount(); - const uint32 envLightCount = Capsaicin::GetEnvironmentLightCount(); - ImGui::Text("Light Count : %u", areaLightCount + deltaLightCount + envLightCount); - ImGui::Text(" Area Light Count : %u", areaLightCount); - ImGui::Text(" Delta Light Count : %u", deltaLightCount); - ImGui::Text(" Environment Light Count : %u", envLightCount); - ImGui::Text("Render Resolution : %ux%u", gfxGetBackBufferWidth(contextGFX), - gfxGetBackBufferHeight(contextGFX)); - // Call the class specific GUI function renderGUIDetails(); } else { // Display profiling options - renderProfiling(); + Capsaicin::RenderGUI(true); } } ImGui::End(); @@ -964,29 +1108,19 @@ bool CapsaicinMain::renderGUI() noexcept return true; } -bool CapsaicinMain::renderCameraDetails() noexcept +void CapsaicinMain::renderCameraDetails() noexcept { if (ImGui::CollapsingHeader("Camera settings", ImGuiTreeNodeFlags_DefaultOpen)) { // Select which preset camera to use - int32_t selectedCamera = cameraIndex; - string cameraList; - for (uint32_t i = 0; i < gfxSceneGetCameraCount(sceneData); ++i) + string cameraList; + auto const cameras = Capsaicin::GetSceneCameras(); + int32_t selectedCamera = static_cast( + std::find(cameras.begin(), cameras.end(), Capsaicin::GetSceneCurrentCamera()) - cameras.begin()); + auto const cameraIndex = selectedCamera; + for (auto const &i : cameras) { - if (i > 0) - { - auto cameraHandle = gfxSceneGetCameraHandle(sceneData, i); - string_view cameraName = gfxSceneGetCameraMetadata(sceneData, cameraHandle).getObjectName(); - if (cameraName.find("Camera"sv) == 0 && cameraName.length() > 6) - { - cameraName = cameraName.substr(6); - } - cameraList += cameraName; - } - else - { - cameraList += "User"sv; - } + cameraList += i; cameraList += '\0'; } if (ImGui::Combo( @@ -995,14 +1129,14 @@ bool CapsaicinMain::renderCameraDetails() noexcept if (cameraIndex != selectedCamera) { // Change the selected camera - cameraIndex = selectedCamera; - updateCamera = true; + setCamera(cameras[selectedCamera]); } } - float fovf = glm::degrees(camera->fovY); - int32_t fov = static_cast(fovf); - float remainder = fovf - static_cast(fov); + auto const camera = Capsaicin::GetSceneCamera(); + float fovf = glm::degrees(camera->fovY); + int32_t fov = static_cast(fovf); + float remainder = fovf - static_cast(fov); ImGui::DragInt("FOV", &fov, 1, 10, 140); camera->fovY = glm::radians(static_cast(fov) + remainder); ImGui::DragFloat("Speed", &cameraSpeed, 0.01f); @@ -1054,134 +1188,205 @@ bool CapsaicinMain::renderCameraDetails() noexcept ImGui::TreePop(); } } - return true; } -void CapsaicinMain::renderGUIDetails() noexcept +bool CapsaicinMain::renderGUIDetails() noexcept { // Display camera options renderCameraDetails(); - if (ImGui::CollapsingHeader("Render settings", ImGuiTreeNodeFlags_DefaultOpen)) + // Display animation options + if (ImGui::CollapsingHeader("Animation Settings", ImGuiTreeNodeFlags_DefaultOpen)) { - // Select which renderer to use - string rendererString; - auto rendererList = Capsaicin::GetRenderers(); - int32_t selectedRenderer = - static_cast(find(rendererList.cbegin(), rendererList.cend(), renderSettings.renderer_) - - rendererList.cbegin()); - int32_t currentRenderer = selectedRenderer; - for (auto &i : rendererList) + if (!Capsaicin::HasAnimation()) { - rendererString += i; - rendererString += '\0'; + ImGui::BeginDisabled(); } - auto renderer = renderSettings.renderer_; - if (ImGui::Combo("Renderer", &selectedRenderer, rendererString.c_str(), 8)) + constexpr char const *playModes[2] = {"Real-time", "Fixed Frame Rate"}; + + int32_t playMode = static_cast(Capsaicin::GetFixedFrameRate()); + if (ImGui::Combo("Play mode", &playMode, playModes, 2)) { - if (currentRenderer != selectedRenderer) - { - // Change the selected renderer - renderSettings.renderer_ = rendererList[selectedRenderer]; - updateRenderer = true; - } + Capsaicin::SetFixedFrameRate(playMode > 0); + } + if (!Capsaicin::HasAnimation()) + { + ImGui::EndDisabled(); + } + ImVec2 buttonHeight(0.0f, 30.0f); + char const restartGlyph[] = {static_cast(0xF0), static_cast(0x9F), + static_cast(0x94), static_cast(0x83), + static_cast(0x0)}; // Workaround compiler not handling u8"\u1F503" properly + if (ImGui::Button(restartGlyph, buttonHeight)) // Restart + { + Capsaicin::RestartPlayback(); } - // Light sampling settings - if (renderSettings.hasOption("delta_light_enable") && - ImGui::CollapsingHeader("Light Sampler Settings", ImGuiTreeNodeFlags_DefaultOpen)) + if (!Capsaicin::HasAnimation()) { - ImGui::Checkbox("Enable Delta Lights", &renderSettings.getOption("delta_light_enable")); - ImGui::Checkbox("Enable Area Lights", &renderSettings.getOption("area_light_enable")); - ImGui::Checkbox( - "Enable Environment Lights", &renderSettings.getOption("environment_light_enable")); + ImGui::BeginDisabled(); } - // Display renderer specific options - if (ImGui::CollapsingHeader("Renderer Settings", ImGuiTreeNodeFlags_DefaultOpen)) + ImGui::SameLine(); + if (ImGui::Button(reinterpret_cast(u8"\u23EE"), buttonHeight)) // Step backward { - if (renderSettings.hasOption("tonemap_enable")) + Capsaicin::StepPlaybackBackward(30); + } + ImGui::SameLine(); + if (ImGui::Button(reinterpret_cast(u8"\u23EA"), buttonHeight)) // Rewind + { + // If paused then just step back 1 frame, otherwise rewind + if (Capsaicin::GetPaused()) + { + Capsaicin::StepPlaybackBackward(1); + } + else if (!Capsaicin::GetPlayRewind()) { - // Tone mapping settings - bool &enabled = renderSettings.getOption("tonemap_enable"); - if (!enabled) ImGui::BeginDisabled(true); - ImGui::DragFloat("Exposure", &renderSettings.getOption("tonemap_exposure"), 5e-3f); - if (!enabled) ImGui::EndDisabled(); - ImGui::Checkbox("Enable Tone Mapping", &enabled); + if (Capsaicin::GetPlaybackSpeed() > 1.5) + { + // If currently fast forwarding then slow down speed + Capsaicin::DecreasePlaybackSpeed(); + } + else + { + Capsaicin::ResetPlaybackSpeed(); + Capsaicin::SetPlayRewind(true); + } } - if (renderer == "Path Tracer") + else { - ImGui::DragInt("Samples Per Pixel", - (int32_t *)&renderSettings.getOption("reference_pt_sample_count"), 1, 0, 30); - auto &bounces = renderSettings.getOption("reference_pt_bounce_count"); - ImGui::DragInt("Bounces", (int32_t *)&bounces, 1, 0, 30); - auto &minBounces = renderSettings.getOption("reference_pt_min_rr_bounces"); - ImGui::DragInt("Min Bounces", (int32_t *)&minBounces, 1, 0, bounces); - minBounces = glm::min(minBounces, bounces); - ImGui::Checkbox("Disable Albedo Textures", - &renderSettings.getOption("reference_pt_disable_albedo_materials")); - ImGui::Checkbox("Disable Direct Lighting", - &renderSettings.getOption("reference_pt_disable_direct_lighting")); - ImGui::Checkbox("Disable Specular Lighting", - &renderSettings.getOption("reference_pt_disable_specular_lighting")); + // If already rewinding then increase rewind speed + Capsaicin::IncreasePlaybackSpeed(); } - else if (renderer == "GI-1.0") + } + ImGui::SameLine(); + if (!Capsaicin::HasAnimation() && Capsaicin::GetRenderPaused()) + { + ImGui::EndDisabled(); + } + if (Capsaicin::GetPaused()) + { + // Display play button + if (ImGui::Button(reinterpret_cast(u8"\u23F5"), buttonHeight)) { - ImGui::Checkbox("Use TAA", &renderSettings.getOption("taa_enable")); - ImGui::Checkbox("Use Resampling", &renderSettings.getOption("gi10_use_resampling")); - ImGui::Checkbox( - "Use Direct Lighting", &renderSettings.getOption("gi10_use_direct_lighting")); - ImGui::Checkbox("Disable Albedo Textures", - &renderSettings.getOption("gi10_disable_albedo_textures")); + if (!Capsaicin::GetRenderPaused()) + { + Capsaicin::ResetPlaybackSpeed(); + Capsaicin::SetPaused(false); + } + else + { + // Render 1 more frame + Capsaicin::SetRenderPaused(false); + reDisableRender = true; + } } } - ImGui::Separator(); - } - - // Display animation options - if (gfxSceneGetAnimationCount(sceneData) > 0) - { - if (ImGui::CollapsingHeader("Animation Settings", ImGuiTreeNodeFlags_DefaultOpen)) + else { - int32_t playMode = (int32_t)renderSettings.play_mode_; - if (ImGui::Combo( - "Play mode", &playMode, Capsaicin::g_play_modes, (int32_t)Capsaicin::kPlayMode_Count)) + // Display pause button + if (ImGui::Button(reinterpret_cast(u8"\u23F8"), buttonHeight)) { - setPlayMode((Capsaicin::PlayMode)playMode); + Capsaicin::SetPaused(true); } - else if (ImGui::Button("Restart")) + } + if (!Capsaicin::HasAnimation() && (Capsaicin::GetRenderPaused() || reDisableRender)) + { + ImGui::BeginDisabled(); + } + ImGui::SameLine(); + if (ImGui::Button(reinterpret_cast(u8"\u23E9"), buttonHeight)) // Fast forward + { + // If paused then just step forward 1 frame, otherwise fast-forward + if (Capsaicin::GetPaused()) { - restartAnimation(); + Capsaicin::StepPlaybackForward(1); } - else if (renderSettings.play_mode_ == Capsaicin::kPlayMode_None) + else if (Capsaicin::GetPlayRewind()) { - string_view buttonLabel = getAnimation() ? "Pause"sv : "Play"sv; - if (ImGui::Button(buttonLabel.data())) + if (Capsaicin::GetPlaybackSpeed() > 1.5) { - toggleAnimation(); + // If currently fast rewinding then slow down speed + Capsaicin::DecreasePlaybackSpeed(); } + else + { + Capsaicin::ResetPlaybackSpeed(); + Capsaicin::SetPlayRewind(false); + } + } + else + { + // If already fast-forwarding then increase speed + Capsaicin::IncreasePlaybackSpeed(); } - else if (renderSettings.play_mode_ == Capsaicin::kPlayMode_FrameByFrame) + } + ImGui::SameLine(); + if (ImGui::Button(reinterpret_cast(u8"\u23ED"), buttonHeight)) // Step forward + { + Capsaicin::StepPlaybackForward(30); + } + ImGui::SameLine(); + if (!Capsaicin::HasAnimation()) + { + ImGui::EndDisabled(); + } + if (!Capsaicin::GetRenderPaused()) + { + if (ImGui::Button(reinterpret_cast(u8"\u23F3"), buttonHeight)) // Pause renderer + { + // Ensure animation is also paused + Capsaicin::SetPaused(true); + Capsaicin::SetRenderPaused(true); + reDisableRender = false; + } + } + else + { + if (ImGui::Button(reinterpret_cast(u8"\u231B"), buttonHeight)) // Unpause renderer + { + Capsaicin::SetRenderPaused(false); + reDisableRender = false; + } + } + } + + if (ImGui::CollapsingHeader("Render settings", ImGuiTreeNodeFlags_DefaultOpen)) + { + // Select which renderer to use + string rendererString; + auto rendererList = Capsaicin::GetRenderers(); + int32_t selectedRenderer = static_cast( + find(rendererList.cbegin(), rendererList.cend(), Capsaicin::GetCurrentRenderer()) + - rendererList.cbegin()); + int32_t currentRenderer = selectedRenderer; + for (auto &i : rendererList) + { + rendererString += i; + rendererString += '\0'; + } + if (ImGui::Combo("Renderer", &selectedRenderer, rendererString.c_str(), 8)) + { + if (currentRenderer != selectedRenderer) { - uint32_t playToFrame = getCurrentAnimationFrame(); - string playFrameLabel = "Play Next Frame ("s + to_string(playToFrame) + ')'; - if (ImGui::Button(playFrameLabel.data())) + // Change the selected renderer + if (!setRenderer(rendererList[selectedRenderer])) { - renderSettings.play_to_frame_index_ = Capsaicin::GetFrameIndex() + 1; + return false; } } } + Capsaicin::RenderGUI(false); + ImGui::Separator(); } - // Display profiling options - renderProfiling(); - // Display debugging options if (ImGui::CollapsingHeader("Debugging", ImGuiTreeNodeFlags_DefaultOpen)) { // Select which debug view to use string debugString; - auto debugList = Capsaicin::GetDebugViews(); - int32_t selectedDebug = static_cast( - find(debugList.cbegin(), debugList.cend(), renderSettings.debug_view_) - debugList.cbegin()); + auto debugList = Capsaicin::GetDebugViews(); + int32_t selectedDebug = + static_cast(find(debugList.cbegin(), debugList.cend(), Capsaicin::GetCurrentDebugView()) + - debugList.cbegin()); selectedDebug = std::max(selectedDebug, 0); // Reset to 0 if unfound int32_t currentDebug = selectedDebug; for (auto &i : debugList) @@ -1194,140 +1399,27 @@ void CapsaicinMain::renderGUIDetails() noexcept if (currentDebug != selectedDebug) { // Change the selected view - renderSettings.debug_view_ = debugList[selectedDebug]; + Capsaicin::SetDebugView(debugList[selectedDebug]); } } if (ImGui::Button("Reload Shaders (F5)")) { - gfxKernelReloadAll(contextGFX); + Capsaicin::ReloadShaders(); } if (ImGui::Button("Dump Frame (F6)")) { saveFrame(); } - renderOptions(); - } -} - -void CapsaicinMain::renderProfiling() noexcept -{ - if (ImGui::CollapsingHeader("Profiling", ImGuiTreeNodeFlags_DefaultOpen)) - { - auto [totalFrameTime, timestamps] = Capsaicin::GetProfiling(); - - bool children = false; - size_t maxStringSize = 0; - for (auto &i : timestamps) - { - bool hasChildren = i.children_.size() > 1; - const ImGuiTreeNodeFlags flags = - (hasChildren ? ImGuiTreeNodeFlags_None : ImGuiTreeNodeFlags_Leaf); - - children = children || hasChildren; - if (ImGui::TreeNodeEx(i.name_.data(), flags, "%-20s: %.3f ms", i.children_[0].name_.data(), - i.children_[0].time_)) - { - ImGui::PushStyleColor(ImGuiCol_Header, ImVec4(0.8f, 0.4f, 0.0f, 1.0f)); - maxStringSize = std::max(maxStringSize, i.children_[0].name_.length()); - for (uint32_t j = 1; j < i.children_.size(); ++j) - { - const ImGuiTreeNodeFlags selectedFlag = - (selectedProfile.first == i.name_ && selectedProfile.second == i.children_[j].name_ - ? ImGuiTreeNodeFlags_Selected - : ImGuiTreeNodeFlags_None); - ImGui::TreeNodeEx(std::to_string(j).c_str(), - ImGuiTreeNodeFlags_Leaf | ImGuiTreeNodeFlags_NoTreePushOnOpen | selectedFlag, - "%-17s: %.3f ms", i.children_[j].name_.data(), i.children_[j].time_); - - maxStringSize = std::max(maxStringSize, i.children_[j].name_.length()); - if (ImGui::IsItemClicked()) - { - selectedProfile = - std::make_pair(!selectedFlag ? i.name_ : nullptr, i.children_[j].name_); - } - } - - ImGui::PopStyleColor(); - ImGui::TreePop(); - } - } - - ImGui::Separator(); - - frameGraph.addValue(totalFrameTime); - const std::string graphName = std::format("{:.2f}", totalFrameTime) + " ms (" - + std::format("{:.2f}", 1000.0f / totalFrameTime) + " fps)"; - - ImGui::PushID("Total frame time"); - std::string text = "Total frame time"; - size_t additionalSpace = maxStringSize > text.size() ? maxStringSize - text.size() : 0; - if (children) - { - text.insert(0, " "); - } - for (size_t i = 0; i < additionalSpace + 1; ++i) - { - text.append(" "); - } - text.append(":"); - ImGui::Text(text.data()); - ImGui::SameLine(); - ImGui::PlotLines("", CapsaicinMain::Graph::GetValueAtIndex, &frameGraph, frameGraph.getValueCount(), - 0, graphName.c_str(), 0.0f, FLT_MAX, ImVec2(150, 20)); - ImGui::PopID(); - - ImGui::PushID("Frame"); - text = "Frame"; - additionalSpace = maxStringSize > text.size() ? maxStringSize - text.size() : 0; - if (children) - { - text.insert(0, " "); - } - for (size_t i = 0; i < additionalSpace + 1; ++i) - { - text.append(" "); - } - text.append(":"); - ImGui::Text(text.data()); ImGui::SameLine(); - ImGui::Text(to_string(Capsaicin::GetFrameIndex()).c_str()); - ImGui::PopID(); - } -} - -void CapsaicinMain::renderOptions() noexcept -{ - if (ImGui::CollapsingHeader("Render Options", ImGuiTreeNodeFlags_OpenOnArrow)) - { - for (auto &i : renderSettings.options_) - { - if (std::holds_alternative(i.second)) - { - ImGui::Checkbox(i.first.data(), std::get_if(&(i.second))); - } - else if (std::holds_alternative(i.second)) - { - uint32_t *option = std::get_if(&(i.second)); - ImGui::DragInt(i.first.data(), reinterpret_cast(option), 1, 0); - } - else if (std::holds_alternative(i.second)) - { - ImGui::DragInt(i.first.data(), std::get_if(&(i.second)), 1); - } - else if (std::holds_alternative(i.second)) - { - ImGui::DragFloat(i.first.data(), std::get_if(&(i.second)), 5e-3f); - } - } + ImGui::Checkbox("Save as JPEG", &saveAsJPEG); } + return true; } void CapsaicinMain::saveFrame() noexcept { - // Save the current frame buffer to disk - uint32_t frameIndex = Capsaicin::GetFrameIndex(); - string savePath = "./dump/"s; // Ensure output directory exists + std::string savePath = "./dump/"s; { std::filesystem::path outPath = savePath; std::error_code ec; @@ -1336,92 +1428,73 @@ void CapsaicinMain::saveFrame() noexcept create_directory(outPath, ec); } } - savePath += scenes[static_cast(scene)].name; - savePath += "_C"; - if (cameraIndex > 0) + savePath = getSaveName(); + if (!benchmarkModeSuffix.empty()) { - auto cameraHandle = gfxSceneGetCameraHandle(sceneData, cameraIndex); - string_view cameraName = gfxSceneGetCameraMetadata(sceneData, cameraHandle).getObjectName(); - if (cameraName.find("Camera"sv) == 0 && cameraName.length() > 6) - { - cameraName = cameraName.substr(6); - } - savePath += cameraName; + savePath += '_'; + savePath += benchmarkModeSuffix; + } + savePath += '_'; + uint32_t frameIndex = Capsaicin::GetFrameIndex() + 1; //+1 to correct for 0 indexed + savePath += to_string(frameIndex); + savePath += '_'; + savePath += to_string(Capsaicin::GetAverageFrameTime()); + if (saveAsJPEG) + { + savePath += ".jpeg"sv; } else { - savePath += "User"sv; + savePath += ".exr"sv; } - savePath += "_R"sv; - savePath += renderSettings.renderer_; - savePath += "_F"sv; - savePath += to_string(frameIndex); - savePath += "_T"sv; - double frameTime = frameGraph.getAverageValue(); - savePath += to_string(frameTime); - // savePath += "AdditionalDescription"sv; - savePath += ".exr"sv; - savePath.erase(std::remove_if(savePath.begin(), savePath.end(), - [](unsigned char const c) { return std::isspace(c); }), - savePath.end()); + // Save the current frame buffer to disk Capsaicin::DumpAOVBuffer(savePath.c_str(), "Color"); // Disable performing tone mapping as we output in HDR - if (renderSettings.hasOption("tonemap_enable")) + if (!saveAsJPEG && Capsaicin::hasOption("tonemap_enable")) { - reenableToneMap = renderSettings.getOption("tonemap_enable"); - renderSettings.setOption("tonemap_enable", false); + reenableToneMap = Capsaicin::getOption("tonemap_enable"); + Capsaicin::setOption("tonemap_enable", false); } } -uint32_t CapsaicinMain::Graph::getValueCount() const noexcept +std::string CapsaicinMain::getSaveName() const noexcept { - return static_cast(values.size()); -} - -void CapsaicinMain::Graph::addValue(float value) noexcept -{ - values[current] = value; - current = (current + 1) % values.size(); -} - -float CapsaicinMain::Graph::getLastAddedValue() const noexcept -{ - if (current == 0) return getValueAtIndex(static_cast(values.size() - 1)); - return getValueAtIndex(current - 1); -} - -float CapsaicinMain::Graph::getValueAtIndex(uint32_t index) const noexcept -{ - return values[index]; -} - -float CapsaicinMain::Graph::getAverageValue() noexcept -{ - double runningCount = 0.0; - uint32_t validFrames = 0; - for (uint32_t i = 0; i < getValueCount(); ++i) + std::string savePath = "./dump/"s; + + auto currentScenes = Capsaicin::GetCurrentScenes(); + GFX_ASSERT(!currentScenes.empty()); + auto currentSceneName = currentScenes[0]; + currentSceneName.erase(currentSceneName.length() - 5); // Remove the '.gltf' extension + auto const sceneFolders = currentSceneName.find_last_of("/\\"); + if (sceneFolders != std::string::npos) + { + currentSceneName.erase(0, sceneFolders + 1); + } + auto currentEM = Capsaicin::GetCurrentEnvironmentMap(); + if (!currentEM.empty()) { - runningCount += (double)getValueAtIndex(i); - if (getValueAtIndex(i) != 0.0f) + currentEM.erase(currentEM.length() - 4); // Remove the '.hdr' extension + auto const emFolders = currentEM.find_last_of("/\\"); + if (emFolders != std::string::npos) { - ++validFrames; + currentEM.erase(0, emFolders + 1); } } - return static_cast(runningCount / (double)validFrames); -} - -void CapsaicinMain::Graph::reset() noexcept -{ - current = 0; - values.fill(0.0f); -} + else + { + currentEM = "None"; + } -float CapsaicinMain::Graph::GetValueAtIndex(void *object, int32_t index) noexcept -{ - Graph const &graph = *static_cast(object); - const int32_t offset = (int32_t)(graph.values.size()) - index; - const int32_t newIndex = (int32_t)(graph.current) - offset; - const int32_t fixedIndex = (newIndex < 0 ? (int32_t)(graph.values.size()) + newIndex : newIndex); - return graph.values[fixedIndex]; + savePath += currentSceneName; + savePath += '_'; + savePath += currentEM; + savePath += '_'; + savePath += Capsaicin::GetSceneCurrentCamera(); + savePath += '_'; + savePath += Capsaicin::GetCurrentRenderer(); + savePath.erase(std::remove_if(savePath.begin(), savePath.end(), + [](unsigned char const c) { return std::isspace(c); }), + savePath.end()); + return savePath; } diff --git a/src/scene_viewer/main_shared.h b/src/scene_viewer/main_shared.h index c30140a..5296843 100644 --- a/src/scene_viewer/main_shared.h +++ b/src/scene_viewer/main_shared.h @@ -1,5 +1,5 @@ /********************************************************************** -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -52,99 +52,81 @@ class CapsaicinMain bool run() noexcept; protected: + enum class Scene : uint32_t + { + FlyingWorld = 0, + GasStation, + TropicalBedroom, + }; + + enum class EnvironmentMap : uint32_t + { + None = 0, + PhotoStudioLondonHall, + KiaraDawn, + NagoyaWallPath, + SpaichingenHill, + StudioSmall, + White, + Atmosphere, + }; + + enum class MessageLevel : uint32_t + { + Debug, + Info, + Warning, + Error, + }; + /** * Print a string to an output console or debugger window if one is available. * @note If a debugger is attached then the string will be output to the debug console, else if the * program was launched from a terminal window then it will use that. If neither of these then the text - * will not be displayed. + * will not be displayed. When message level is error then a message box will be displayed in addition + * to attempting to print to debugger/console. * @param text The string to be printed. + * @param level Logging level of current message. */ - void printString(std::string const &text) noexcept; + void printString(std::string const &text, MessageLevel level = MessageLevel::Info) noexcept; /** * Initialise internal capsaicin data. * @return Boolean signaling if no error occurred. */ - bool initialise() noexcept; - - /** - * Resets internal Capsaicin state. - * @return Boolean signaling if no error occurred. - */ - bool reset() noexcept; + [[nodiscard]] bool initialise() noexcept; /** * Load the scene file corresponding to the currently set scene. + * @param scene The scene to load. * @return Boolean signaling if no error occurred. */ - bool loadScene() noexcept; + [[nodiscard]] bool loadScene(Scene scene) noexcept; /** * Set the current camera data to the currently set cameraIndex. + * @param camera The camera to load. */ - void setCamera() noexcept; + void setCamera(std::string_view camera) noexcept; /** * Update scene and render settings based on the currently requested environment map. + * @param environmentMap The environment map to load. * @return Boolean signaling if no error occurred. */ - bool setEnvironmentMap() noexcept; + [[nodiscard]] bool setEnvironmentMap(EnvironmentMap environmentMap) noexcept; /** * Update render settings based on the currently set renderer. + * @param renderer The renderer to load. */ - void setRenderer() noexcept; - - /** - * Set scene specific render options. - * @param force (Optional) True to force overwrite any existing values. - */ - void setSceneRenderOptions(bool force = false) noexcept; - - /** - * Set the current animation play mode. - * @param playMode The type of animation mode to use. - */ - void setPlayMode(Capsaicin::PlayMode playMode) noexcept; - - /** - * Set current animation state. - * @param animate True to enable animations, False otherwise. - */ - void setAnimation(bool animate) noexcept; - - /** - * Toggle the current animation state. - */ - void toggleAnimation() noexcept; - - /** - * Reset current animation state to beginning. - */ - void restartAnimation() noexcept; - - /** - * Get the current animation state. - * @return Boolean signaling if animation is currently enabled. - */ - bool getAnimation() noexcept; - - /** - * Update animation state for current frame. - */ - void tickAnimation() noexcept; - - /** - * Get the current frame within the current animation sequence. - * @return The current animation frame index. - */ - uint32_t getCurrentAnimationFrame() noexcept; + [[nodiscard]] bool setRenderer(std::string_view renderer) noexcept; /** * Update render settings based on the currently set renderer. * @return Boolean signaling if no error occurred. */ - bool renderFrame() noexcept; + [[nodiscard]] bool renderFrame() noexcept; /** * Perform operations to display the default GUI. @@ -154,28 +136,16 @@ class CapsaicinMain /** * Perform operations to display additional UI elements for camera control. - * @return Boolean signaling if no error occurred. */ - bool renderCameraDetails() noexcept; + void renderCameraDetails() noexcept; /** * Perform operations to display additional GUI elements. * @note Called from within @renderGUI. This can be overridden to display * alternate UI information. + * @return Boolean signaling if no error occurred. */ - void renderGUIDetails() noexcept; - - /** - * Perform operations to display profiling GUI elements. - * @note Called from within CapsaicinMain::renderGUI. - */ - void renderProfiling() noexcept; - - /** - * Perform operations to display all internal render options. - * @note Called from within CapsaicinMain::renderGUIDetails. - */ - void renderOptions() noexcept; + bool renderGUIDetails() noexcept; /** * Save the currently displayed frame to disk. @@ -183,74 +153,33 @@ class CapsaicinMain */ void saveFrame() noexcept; - enum class Scene : uint32_t - { - FlyingWorld = 0, - GasStation, - TropicalBedroom, - }; + /** + * Get the common base file name based on current capsaicin settings. + * @return String containing base name. + */ + std::string getSaveName() const noexcept; - enum class EnvironmentMap : uint32_t - { - None = 0, - PhotoStudioLondonHall, - KiaraDawn, - NagoyaWallPath, - SpaichingenHill, - StudioSmall, - White, - Atmosphere, - }; + static constexpr auto defaultScene = Scene::FlyingWorld; + static constexpr auto defaultEnvironmentMap = EnvironmentMap::KiaraDawn; + static constexpr auto defaultRenderer = "GI-1.1"; - GfxWindow window; /**< Gfx window class */ - GfxContext contextGFX; /**< Gfx context */ - Capsaicin::RenderSettings renderSettings; /**< Render settings used to control rendering */ - GfxScene sceneData; /**< Current scene data */ - Scene scene = Scene::TropicalBedroom; /**< Index of currently selected scene (indexes into internal list) */ - EnvironmentMap environmentMap = EnvironmentMap::KiaraDawn; /**< Currently selected environment map */ - uint32_t cameraIndex = 1; /**< Index of currently used camera (indexes into scenes camera list) */ - GfxRef camera; /**< Handle to internal gfx camera for currently used camera */ - float cameraSpeed = 1.2f; /**< Camera speed (m/s) used for camera movement */ - glm::vec3 cameraTranslation = glm::vec3(0.0f); /**< Camera translation velocity (m/s) */ - glm::vec2 cameraRotation = glm::vec2(0.0f); /**< Camera rotation velocity (m/s) */ - double previousTime = 0.0; /**< Previous wall clock time used for timing (seconds) */ - double currentTime = 0.0; /**< Current wall clock time used for timing (seconds) */ - float frameTime = 0.0; /**< Elapsed frame time for most recent frame (seconds) */ - std::string_view programName; /**< Stored name for the current program */ - bool benchmarkMode = false; /**< If enabled this prevents user inputs and runs a predefined benchmark */ + GfxWindow window; /**< Gfx window class */ + GfxContext contextGFX; /**< Gfx context */ + float cameraSpeed = 1.2f; /**< Camera speed (m/s) used for camera movement */ + glm::vec3 cameraTranslation = glm::vec3(0.0f); /**< Camera translation velocity (m/s) */ + glm::vec2 cameraRotation = glm::vec2(0.0f); /**< Camera rotation velocity (m/s) */ + Scene currentScene = defaultScene; /**< Currently loaded scene */ + EnvironmentMap currentEnvironmentMap = defaultEnvironmentMap; /**< Currently loaded environment map */ + std::string_view programName; /**< Stored name for the current program */ + bool benchmarkMode = false; /**< If enabled this prevents user inputs and runs a predefined benchmark */ uint32_t benchmarkModeFrameCount = 512; /**< The number of frames to be rendered during benchmarking mode */ - bool updateScene = false; - bool updateEnvironmentMap = false; - bool updateCamera = false; - bool updateRenderer = false; - bool reenableToneMap = false; /**< Used to re-enable Tonemapping after a frame has been saved to disk */ - - // Scene statistics for currently loaded scene - uint32_t triangleCount = 0; - - class Graph - { - public: - Graph() noexcept = default; - - uint32_t getValueCount() const noexcept; - void addValue(float value) noexcept; - float getLastAddedValue() const noexcept; - float getValueAtIndex(uint32_t index) const noexcept; - float getAverageValue() noexcept; - void reset() noexcept; - - static float GetValueAtIndex(void *object, int32_t index) noexcept; - - private: - uint32_t current = 0; /**< The current cursor into values circular buffer */ - std::array values = {0.0f}; /**< The stored list of values */ - }; - - Graph frameGraph; /**< The stored frame history graph */ - std::pair - selectedProfile; /**< Currently selected technique used in renderProfiling */ + uint32_t benchmarkModeStartFrame = uint32_t(-1); /**< The first frame to start saving images at in + benchmark mode (default is just the last frame) */ + std::string benchmarkModeSuffix; /**< String appended to any saved files */ + bool saveAsJPEG = false; /**< File type selector for dump frame */ + bool reenableToneMap = false; /**< Used to re-enable Tonemapping after a frame has been saved to disk */ + bool reDisableRender = false; /**< Use to render only a single frame at a time */ bool hasConsole = false; /**< Set if a console output terminal is attached */ }; diff --git a/third_party/CLI11 b/third_party/CLI11 index a6c4826..c2ea58c 160000 --- a/third_party/CLI11 +++ b/third_party/CLI11 @@ -1 +1 @@ -Subproject commit a6c48261d4fb62b232c46277acbcc3d14d5b7e14 +Subproject commit c2ea58c7f9bb2a1da2d3d7f5b462121ac6a07f16 diff --git a/third_party/agility_sdk/dummy.lib b/third_party/agility_sdk/dummy.lib new file mode 100644 index 0000000..e68e2ed Binary files /dev/null and b/third_party/agility_sdk/dummy.lib differ diff --git a/third_party/ffx-parallelsort/FFX_ParallelSort.h b/third_party/ffx-parallelsort/FFX_ParallelSort.h index 240eee5..afdad3a 100644 --- a/third_party/ffx-parallelsort/FFX_ParallelSort.h +++ b/third_party/ffx-parallelsort/FFX_ParallelSort.h @@ -1,6 +1,6 @@ // FFX_ParallelSort.h // -// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights diff --git a/third_party/gfx b/third_party/gfx index 771a135..3453ef9 160000 --- a/third_party/gfx +++ b/third_party/gfx @@ -1 +1 @@ -Subproject commit 771a13524307f21f7f299553ade31e10343d342e +Subproject commit 3453ef9549fbed898b36d6dc3a60e5b08d87e3a7 diff --git a/third_party/miniz b/third_party/miniz deleted file mode 160000 index 9ae305f..0000000 --- a/third_party/miniz +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 9ae305f6e109f8f1fbd2130458f1ee6197269b3b diff --git a/third_party/tinyexr b/third_party/tinyexr deleted file mode 160000 index 41cc140..0000000 --- a/third_party/tinyexr +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 41cc1405bbc7ab05e99bd0d581f72aa6d2c190c7