diff --git a/.gitignore b/.gitignore index 877d2a3c0..0cb3b7be6 100644 --- a/.gitignore +++ b/.gitignore @@ -4,11 +4,11 @@ .baseq2/ rogue/ xatrix/ -q2pro -q2proded +q2rtx +q2rtxded game*.so *.swp -build/ +build*/ shader_vkpt ./vkpt ./vkptded diff --git a/CMakeLists.txt b/CMakeLists.txt index 20d69d315..c1c5eca20 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -31,6 +31,7 @@ OPTION(CONFIG_VKPT_ENABLE_DEVICE_GROUPS "Enable device groups (multi-gpu) suppor OPTION(CONFIG_VKPT_ENABLE_IMAGE_DUMPS "Enable image dumping functionality" OFF) OPTION(CONFIG_USE_CURL "Use CURL for HTTP support" ON) OPTION(CONFIG_LINUX_PACKAGING_SUPPORT "Enable Linux Packaging support" OFF) +OPTION(CONFIG_LINUX_PACKAGING_SKIP_PKZ "Skip zipping the game contents into .pkz when packaging (for quicker iteration)" OFF) OPTION(CONFIG_LINUX_STEAM_RUNTIME_SUPPORT "Enable Linux Steam Runtime support" OFF) IF(WIN32) SET(DEFAULT_BUILD_GLSLANG OFF) @@ -95,9 +96,15 @@ IF(CONFIG_LINUX_PACKAGING_SUPPORT) set(CPACK_PACKAGE_VERSION_MAJOR ${Q2RTX_VERSION_MAJOR}) set(CPACK_PACKAGE_VERSION_MINOR ${Q2RTX_VERSION_MINOR}) set(CPACK_PACKAGE_VERSION_PATCH ${Q2RTX_VERSION_POINT}) - set(CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}") - set(CPACK_DEBIAN_PACKAGE_ARCHITECTURE "amd64") set(CPACK_DEBIAN_PACKAGE_SECTION "games") + + IF(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") + set(CPACK_DEBIAN_PACKAGE_ARCHITECTURE "arm64") + ELSE() + set(CPACK_DEBIAN_PACKAGE_ARCHITECTURE "amd64") + ENDIF() + + set(CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}-${CPACK_DEBIAN_PACKAGE_ARCHITECTURE}") set(CPACK_PACKAGE_CONTACT "http://nvidia.com/object/support.html") diff --git a/cmake/compileShaders.cmake b/cmake/compileShaders.cmake index 5239e6b56..183ecdd91 100644 --- a/cmake/compileShaders.cmake +++ b/cmake/compileShaders.cmake @@ -8,6 +8,8 @@ set(SHADER_SOURCE_DEPENDENCIES ${CMAKE_SOURCE_DIR}/src/refresh/vkpt/shader/light_lists.h ${CMAKE_SOURCE_DIR}/src/refresh/vkpt/shader/path_tracer_rgen.h ${CMAKE_SOURCE_DIR}/src/refresh/vkpt/shader/path_tracer.h + ${CMAKE_SOURCE_DIR}/src/refresh/vkpt/shader/path_tracer_hit_shaders.h + ${CMAKE_SOURCE_DIR}/src/refresh/vkpt/shader/path_tracer_transparency.glsl ${CMAKE_SOURCE_DIR}/src/refresh/vkpt/shader/precomputed_sky.glsl ${CMAKE_SOURCE_DIR}/src/refresh/vkpt/shader/precomputed_sky_params.h ${CMAKE_SOURCE_DIR}/src/refresh/vkpt/shader/projection.glsl diff --git a/inc/client/video.h b/inc/client/video.h index 6c9bd3e95..7e238a2de 100644 --- a/inc/client/video.h +++ b/inc/client/video.h @@ -44,9 +44,6 @@ void VID_UpdateGamma(const byte *table); void *VID_GetCoreAddr(const char *sym); void *VID_GetProcAddr(const char *sym); -qboolean VID_VideoSync(void); -void VID_VideoWait(void); - void VID_BeginFrame(void); void VID_EndFrame(void); diff --git a/inc/common/bsp.h b/inc/common/bsp.h index 9ea64bc81..83de60aa4 100644 --- a/inc/common/bsp.h +++ b/inc/common/bsp.h @@ -283,8 +283,8 @@ typedef struct bsp_s { msurfedge_t *surfedges; #endif - char *pvs_matrix; - char *pvs2_matrix; + byte *pvs_matrix; + byte *pvs2_matrix; qboolean pvs_patched; // WARNING: the 'name' string is actually longer than this, and the bsp_t structure is allocated larger than sizeof(bsp_t) in BSP_Load @@ -312,8 +312,8 @@ byte *BSP_ClusterVis(bsp_t *bsp, byte *mask, int cluster, int vis); mleaf_t *BSP_PointLeaf(mnode_t *node, vec3_t p); mmodel_t *BSP_InlineModel(bsp_t *bsp, const char *name); -char* BSP_GetPvs(bsp_t *bsp, int cluster); -char* BSP_GetPvs2(bsp_t *bsp, int cluster); +byte* BSP_GetPvs(bsp_t *bsp, int cluster); +byte* BSP_GetPvs2(bsp_t *bsp, int cluster); qboolean BSP_SavePatchedPVS(bsp_t *bsp); diff --git a/inc/format/iqm.h b/inc/format/iqm.h new file mode 100644 index 000000000..015342d88 --- /dev/null +++ b/inc/format/iqm.h @@ -0,0 +1,131 @@ +/* +=========================================================================== +This file is part of Quake III Arena source code. + +Quake III Arena source code is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Quake III Arena source code is distributed in the hope that it will be +useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Quake III Arena source code; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +=========================================================================== +*/ + +#ifndef __IQM_H__ +#define __IQM_H__ + +#define IQM_MAGIC "INTERQUAKEMODEL" +#define IQM_VERSION 2 + +#define IQM_IDENT (('E'<<24)+('T'<<16)+('N'<<8)+'I') + +#define IQM_MAX_JOINTS 256 + +typedef struct iqmheader +{ + char magic[16]; + unsigned int version; + unsigned int filesize; + unsigned int flags; + unsigned int num_text, ofs_text; + unsigned int num_meshes, ofs_meshes; + unsigned int num_vertexarrays, num_vertexes, ofs_vertexarrays; + unsigned int num_triangles, ofs_triangles, ofs_adjacency; + unsigned int num_joints, ofs_joints; + unsigned int num_poses, ofs_poses; + unsigned int num_anims, ofs_anims; + unsigned int num_frames, num_framechannels, ofs_frames, ofs_bounds; + unsigned int num_comment, ofs_comment; + unsigned int num_extensions, ofs_extensions; +} iqmHeader_t; + +typedef struct iqmmesh +{ + unsigned int name; + unsigned int material; + unsigned int first_vertex, num_vertexes; + unsigned int first_triangle, num_triangles; +} iqmMesh_t; + +enum +{ + IQM_POSITION = 0, + IQM_TEXCOORD = 1, + IQM_NORMAL = 2, + IQM_TANGENT = 3, + IQM_BLENDINDEXES = 4, + IQM_BLENDWEIGHTS = 5, + IQM_COLOR = 6, + IQM_CUSTOM = 0x10 +}; + +enum +{ + IQM_BYTE = 0, + IQM_UBYTE = 1, + IQM_SHORT = 2, + IQM_USHORT = 3, + IQM_INT = 4, + IQM_UINT = 5, + IQM_HALF = 6, + IQM_FLOAT = 7, + IQM_DOUBLE = 8, +}; + +typedef struct iqmtriangle +{ + unsigned int vertex[3]; +} iqmTriangle_t; + +typedef struct iqmjoint +{ + unsigned int name; + int parent; + float translate[3], rotate[4], scale[3]; +} iqmJoint_t; + +typedef struct iqmpose +{ + int parent; + unsigned int mask; + float channeloffset[10]; + float channelscale[10]; +} iqmPose_t; + +typedef struct iqmanim +{ + unsigned int name; + unsigned int first_frame, num_frames; + float framerate; + unsigned int flags; +} iqmAnim_t; + +enum +{ + IQM_LOOP = 1<<0 +}; + +typedef struct iqmvertexarray +{ + unsigned int type; + unsigned int flags; + unsigned int format; + unsigned int size; + unsigned int offset; +} iqmVertexArray_t; + +typedef struct iqmbounds +{ + float bbmin[3], bbmax[3]; + float xyradius, radius; +} iqmBounds_t; + +#endif + diff --git a/inc/refresh/models.h b/inc/refresh/models.h index f1c62f449..54ef18d09 100644 --- a/inc/refresh/models.h +++ b/inc/refresh/models.h @@ -48,6 +48,65 @@ typedef enum MCLASS_FLARE } model_class_t; +typedef struct +{ + vec3_t translate; + quat_t rotate; + vec3_t scale; +} iqm_transform_t; + +typedef struct +{ + char name[MAX_QPATH]; + uint32_t first_frame; + uint32_t num_frames; + qboolean loop; +} iqm_anim_t; + +// inter-quake-model +typedef struct +{ + uint32_t num_vertexes; + uint32_t num_triangles; + uint32_t num_frames; + uint32_t num_meshes; + uint32_t num_joints; + uint32_t num_poses; + uint32_t num_animations; + struct iqm_mesh_s* meshes; + + uint32_t* indices; + + // vertex arrays + float* positions; + float* texcoords; + float* normals; + float* tangents; + byte* colors; + byte* blend_indices; // byte4 per vertex + float* blend_weights; // float4 per vertex + + char* jointNames; + int* jointParents; + float* bindJoints; // [num_joints * 12] + float* invBindJoints; // [num_joints * 12] + iqm_transform_t* poses; // [num_frames * num_poses] + float* bounds; + + iqm_anim_t* animations; +} iqm_model_t; + +// inter-quake-model mesh +typedef struct iqm_mesh_s +{ + char name[MAX_QPATH]; + char material[MAX_QPATH]; + iqm_model_t* data; + uint32_t first_vertex, num_vertexes; + uint32_t first_triangle, num_triangles; + uint32_t first_influence, num_influences; +} iqm_mesh_t; + typedef struct model_s { enum { MOD_FREE, @@ -81,6 +140,8 @@ typedef struct model_s { // sprite models struct mspriteframe_s *spriteframes; qboolean sprite_vertical; + + iqm_model_t* iqmData; } model_t; extern model_t r_models[]; @@ -88,6 +149,8 @@ extern int r_numModels; extern int registration_sequence; +typedef struct entity_s entity_t; + // these are implemented in r_models.c void MOD_FreeUnused(void); void MOD_FreeAll(void); @@ -100,12 +163,16 @@ qhandle_t R_RegisterModel(const char *name); struct dmd2header_s; qerror_t MOD_ValidateMD2(struct dmd2header_s *header, size_t length); +qerror_t MOD_LoadIQM_Base(model_t* mod, const void* rawdata, size_t length, const char* mod_name); +qboolean R_ComputeIQMTransforms(const iqm_model_t* model, const entity_t* entity, float* pose_matrices); + // these are implemented in [gl,sw]_models.c -typedef qerror_t (*mod_load_t)(model_t *, const void *, size_t); -extern qerror_t (*MOD_LoadMD2)(model_t *model, const void *rawdata, size_t length); +typedef qerror_t (*mod_load_t)(model_t *, const void *, size_t, const char*); +extern qerror_t (*MOD_LoadMD2)(model_t *model, const void *rawdata, size_t length, const char* mod_name); #if USE_MD3 -extern qerror_t (*MOD_LoadMD3)(model_t *model, const void *rawdata, size_t length); +extern qerror_t (*MOD_LoadMD3)(model_t *model, const void *rawdata, size_t length, const char* mod_name); #endif +extern qerror_t(*MOD_LoadIQM)(model_t* model, const void* rawdata, size_t length, const char* mod_name); extern void (*MOD_Reference)(model_t *model); #endif // MODELS_H diff --git a/inc/refresh/refresh.h b/inc/refresh/refresh.h index 744de5ea0..28f7e5dca 100644 --- a/inc/refresh/refresh.h +++ b/inc/refresh/refresh.h @@ -175,8 +175,7 @@ typedef struct refdef_s { typedef enum { QVF_ACCELERATED = (1 << 0), QVF_GAMMARAMP = (1 << 1), - QVF_FULLSCREEN = (1 << 2), - QVF_VIDEOSYNC = (1 << 3) + QVF_FULLSCREEN = (1 << 2) } vidFlags_t; typedef struct { diff --git a/inc/shared/config.h b/inc/shared/config.h index 241cf0d03..a0f6198f2 100644 --- a/inc/shared/config.h +++ b/inc/shared/config.h @@ -14,6 +14,9 @@ #elif _WIN32 #define CPUSTRING "x86" #define BUILDSTRING "Win32" +#elif __aarch64__ +#define CPUSTRING "aarch64" +#define BUILDSTRING "Linux" #elif __x86_64__ #define CPUSTRING "x86_64" #define BUILDSTRING "Linux" diff --git a/inc/shared/shared.h b/inc/shared/shared.h index a6eee253b..251dff42d 100644 --- a/inc/shared/shared.h +++ b/inc/shared/shared.h @@ -137,6 +137,8 @@ typedef vec_t vec3_t[3]; typedef vec_t vec4_t[4]; typedef vec_t vec5_t[5]; +typedef vec_t quat_t[4]; + typedef float mat4_t[16]; typedef union { @@ -261,6 +263,8 @@ static inline float Q_fabs(float f) #define Vector4Negate(a,b) ((b)[0]=-(a)[0],(b)[1]=-(a)[1],(b)[2]=-(a)[2],(b)[3]=-(a)[3]) #define Vector4Set(v, a, b, c, d) ((v)[0]=(a),(v)[1]=(b),(v)[2]=(c),(v)[3]=(d)) +#define QuatCopy(a,b) ((b)[0]=(a)[0],(b)[1]=(a)[1],(b)[2]=(a)[2],(b)[3]=(a)[3]) + void AngleVectors(vec3_t angles, vec3_t forward, vec3_t right, vec3_t up); vec_t VectorNormalize(vec3_t v); // returns vector length vec_t VectorNormalize2(vec3_t v, vec3_t out); diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c7b7761fc..ca79254b3 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,6 +1,13 @@ cmake_minimum_required (VERSION 3.9) cmake_policy(SET CMP0069 NEW) +if (UNIX) + FIND_PACKAGE(OpenAL) + if (NOT OPENAL_INCLUDE_DIR) + message(FATAL_ERROR "Can't find OpenAL. Please install the libopenal-dev package.") + endif() +endif() + SET(SRC_BASEQ2 baseq2/g_ai.c baseq2/g_chase.c @@ -185,6 +192,7 @@ SET(HEADERS_COMMON SET(SRC_REFRESH refresh/images.c refresh/models.c + refresh/model_iqm.c refresh/stb/stb.c ) @@ -279,6 +287,7 @@ SET(SRC_VKPT refresh/vkpt/buddy_allocator.c refresh/vkpt/device_memory_allocator.c refresh/vkpt/god_rays.c + refresh/vkpt/conversion.c ) SET(HEADERS_VKPT @@ -289,6 +298,7 @@ SET(HEADERS_VKPT refresh/vkpt/material.h refresh/vkpt/physical_sky.h refresh/vkpt/precomputed_sky.h + refresh/vkpt/conversion.h ) set(SRC_SHADERS @@ -330,6 +340,7 @@ set(SRC_RT_SHADERS refresh/vkpt/shader/path_tracer_particle.rahit refresh/vkpt/shader/path_tracer_sprite.rahit refresh/vkpt/shader/path_tracer_beam.rahit + refresh/vkpt/shader/path_tracer_beam.rint refresh/vkpt/shader/path_tracer_explosion.rahit refresh/vkpt/shader/path_tracer_shadow.rmiss refresh/vkpt/shader/reflect_refract.rgen @@ -370,7 +381,7 @@ IF(WIN32) ADD_DEFINITIONS(-D_CRT_SECURE_NO_WARNINGS) ENDIF() -ADD_LIBRARY(gamex86 SHARED ${SRC_BASEQ2} ${HEADERS_BASEQ2} ${SRC_SHARED}) +ADD_LIBRARY(baseq2 SHARED ${SRC_BASEQ2} ${HEADERS_BASEQ2} ${SRC_SHARED}) IF(WIN32) ADD_EXECUTABLE(client WIN32 ${SRC_CLIENT} ${HEADERS_CLIENT} @@ -464,8 +475,8 @@ SOURCE_GROUP("windows\\headers" FILES ${HEADERS_WINDOWS}) IF (WIN32) TARGET_INCLUDE_DIRECTORIES(client PRIVATE ../VC/inc) TARGET_INCLUDE_DIRECTORIES(server PRIVATE ../VC/inc) - TARGET_INCLUDE_DIRECTORIES(gamex86 PRIVATE ../VC/inc) - TARGET_SOURCES(gamex86 PRIVATE windows/res/baseq2.rc) + TARGET_INCLUDE_DIRECTORIES(baseq2 PRIVATE ../VC/inc) + TARGET_SOURCES(baseq2 PRIVATE windows/res/baseq2.rc) TARGET_LINK_LIBRARIES(client winmm ws2_32) TARGET_LINK_LIBRARIES(server winmm ws2_32) @@ -476,10 +487,10 @@ IF (WIN32) # macro redefinition, deprecation target_compile_options(client PRIVATE /wd4005 /wd4996) target_compile_options(server PRIVATE /wd4005 /wd4996) - target_compile_options(gamex86 PRIVATE /wd4005 /wd4996) + target_compile_options(baseq2 PRIVATE /wd4005 /wd4996) ENDIF() -TARGET_INCLUDE_DIRECTORIES(gamex86 PRIVATE ../inc) +TARGET_INCLUDE_DIRECTORIES(baseq2 PRIVATE ../inc) TARGET_INCLUDE_DIRECTORIES(client PRIVATE ../inc) TARGET_INCLUDE_DIRECTORIES(client PRIVATE "${ZLIB_INCLUDE_DIRS}") @@ -519,7 +530,7 @@ SET_TARGET_PROPERTIES(server # specify both LIBRARY and RUNTIME because one works only on Windows and another works only on Linux -SET_TARGET_PROPERTIES(gamex86 +SET_TARGET_PROPERTIES(baseq2 PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${CMAKE_SOURCE_DIR}/baseq2" LIBRARY_OUTPUT_DIRECTORY_DEBUG "${CMAKE_SOURCE_DIR}/baseq2" @@ -536,11 +547,19 @@ SET_TARGET_PROPERTIES(gamex86 ) IF(IS_64_BIT) - SET_TARGET_PROPERTIES(gamex86 - PROPERTIES - LIBRARY_OUTPUT_NAME "gamex86_64" - RUNTIME_OUTPUT_NAME "gamex86_64" - ) + IF(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") + SET_TARGET_PROPERTIES(baseq2 + PROPERTIES + LIBRARY_OUTPUT_NAME "gameaarch64" + RUNTIME_OUTPUT_NAME "gameaarch64" + ) + ELSE() + SET_TARGET_PROPERTIES(baseq2 + PROPERTIES + LIBRARY_OUTPUT_NAME "gamex86_64" + RUNTIME_OUTPUT_NAME "gamex86_64" + ) + ENDIF() ENDIF() include(ProcessorCount) @@ -574,12 +593,14 @@ IF(CONFIG_LINUX_PACKAGING_SUPPORT) INSTALL(TARGETS client DESTINATION share/quake2rtx/bin COMPONENT shareware) INSTALL(TARGETS server DESTINATION games COMPONENT shareware) - # Compress NVIDIA custom content - INSTALL(CODE "set(SOURCE \"${CMAKE_SOURCE_DIR}\")" - SCRIPT "${CMAKE_SOURCE_DIR}/setup/package_media.cmake" - SCRIPT "${CMAKE_SOURCE_DIR}/setup/package_shaders.cmake" - COMPONENT shareware) - INSTALL(TARGETS gamex86 DESTINATION share/quake2rtx/baseq2 COMPONENT shareware) + IF(NOT CONFIG_LINUX_PACKAGING_SKIP_PKZ) + # Compress NVIDIA custom content + INSTALL(CODE "set(SOURCE \"${CMAKE_SOURCE_DIR}\")" + SCRIPT "${CMAKE_SOURCE_DIR}/setup/package_media.cmake" + SCRIPT "${CMAKE_SOURCE_DIR}/setup/package_shaders.cmake" + COMPONENT shareware) + ENDIF() + INSTALL(TARGETS baseq2 DESTINATION share/quake2rtx/baseq2 COMPONENT shareware) # Package data files, including the shareware pak0 for demo levels set (SHAREWARE_DATA_FILES_LIST diff --git a/src/client/cin.c b/src/client/cin.c index 197aae272..c1708502f 100644 --- a/src/client/cin.c +++ b/src/client/cin.c @@ -320,7 +320,7 @@ cblock_t Huff1Decompress(cblock_t in) if (input - in.data != in.count && input - in.data != in.count + 1) { - Com_Printf("Decompression overread by %i", (input - in.data) - in.count); + Com_Printf("Decompression overread by %li", (input - in.data) - in.count); } out.count = out_p - out.data; diff --git a/src/client/main.c b/src/client/main.c index 1637bb9b3..81d36b419 100644 --- a/src/client/main.c +++ b/src/client/main.c @@ -28,6 +28,7 @@ cvar_t *cl_timeout; cvar_t *cl_predict; cvar_t *cl_gun; cvar_t *cl_gunalpha; +cvar_t *cl_warn_on_fps_rounding; cvar_t *cl_maxfps; cvar_t *cl_async; cvar_t *r_maxfps; @@ -2625,11 +2626,51 @@ static void cl_updaterate_changed(cvar_t *self) } #endif +static inline int fps_to_msec(int fps) +{ +#if 0 + return (1000 + fps / 2) / fps; +#else + return 1000 / fps; +#endif +} + +static void warn_on_fps_rounding(cvar_t *cvar) +{ + static qboolean warned = qfalse; + int msec, real_maxfps; + + if (cvar->integer <= 0 || cl_warn_on_fps_rounding->integer <= 0) + return; + + msec = fps_to_msec(cvar->integer); + if (!msec) + return; + + real_maxfps = 1000 / msec; + if (cvar->integer == real_maxfps) + return; + + Com_WPrintf("%s value `%d' is inexact, using `%d' instead.\n", + cvar->name, cvar->integer, real_maxfps); + if (!warned) { + Com_Printf("(Set `%s' to `0' to disable this warning.)\n", + cl_warn_on_fps_rounding->name); + warned = qtrue; + } +} + static void cl_sync_changed(cvar_t *self) { CL_UpdateFrameTimes(); } +static void cl_maxfps_changed(cvar_t *self) +{ + CL_UpdateFrameTimes(); + warn_on_fps_rounding(self); +} + // allow downloads to be permanently disabled as a // protection measure from malicious (or just stupid) servers // that force downloads by stuffing commands @@ -2742,12 +2783,13 @@ static void CL_InitLocal(void) cl_predict = Cvar_Get("cl_predict", "1", 0); cl_predict->changed = cl_predict_changed; cl_kickangles = Cvar_Get("cl_kickangles", "1", CVAR_CHEAT); - cl_maxfps = Cvar_Get("cl_maxfps", "60", 0); - cl_maxfps->changed = cl_sync_changed; + cl_warn_on_fps_rounding = Cvar_Get("cl_warn_on_fps_rounding", "1", 0); + cl_maxfps = Cvar_Get("cl_maxfps", "62", 0); + cl_maxfps->changed = cl_maxfps_changed; cl_async = Cvar_Get("cl_async", "1", 0); cl_async->changed = cl_sync_changed; r_maxfps = Cvar_Get("r_maxfps", "0", 0); - r_maxfps->changed = cl_sync_changed; + r_maxfps->changed = cl_maxfps_changed; cl_autopause = Cvar_Get("cl_autopause", "1", 0); cl_rollhack = Cvar_Get("cl_rollhack", "1", 0); cl_noglow = Cvar_Get("cl_noglow", "0", 0); @@ -2757,6 +2799,8 @@ static void CL_InitLocal(void) com_timedemo->changed = cl_sync_changed; CL_UpdateFrameTimes(); + warn_on_fps_rounding(cl_maxfps); + warn_on_fps_rounding(r_maxfps); #ifdef _DEBUG cl_shownet = Cvar_Get("cl_shownet", "0", 0); @@ -3090,25 +3134,19 @@ void CL_CheckForPause(void) } typedef enum { - SYNC_FULL, + SYNC_TIMEDEMO, SYNC_MAXFPS, SYNC_SLEEP_10, SYNC_SLEEP_60, - SYNC_SLEEP_VIDEO, - ASYNC_VIDEO, - ASYNC_MAXFPS, ASYNC_FULL } sync_mode_t; #ifdef _DEBUG static const char *const sync_names[] = { - "SYNC_FULL", + "SYNC_TIMEDEMO", "SYNC_MAXFPS", "SYNC_SLEEP_10", "SYNC_SLEEP_60", - "SYNC_SLEEP_VIDEO", - "ASYNC_VIDEO", - "ASYNC_MAXFPS", "ASYNC_FULL" }; #endif @@ -3117,13 +3155,17 @@ static int ref_msec, phys_msec, main_msec; static int ref_extra, phys_extra, main_extra; static sync_mode_t sync_mode; -static inline int fps_to_msec(int fps) +#define MIN_PHYS_HZ 10 +#define MAX_PHYS_HZ 125 +#define MIN_REF_HZ MIN_PHYS_HZ +#define MAX_REF_HZ 1000 + +static int fps_to_clamped_msec(cvar_t *cvar, int min, int max) { -#if 0 - return (1000 + fps / 2) / fps; -#else - return 1000 / fps; -#endif + if (cvar->integer == 0) + return fps_to_msec(max); + else + return fps_to_msec(Cvar_ClampInteger(cvar, min, max)); } /* @@ -3139,60 +3181,33 @@ void CL_UpdateFrameTimes(void) return; // not yet fully initialized } - // check if video driver supports syncing to vertical retrace - if (cl_async->integer > 1 && !(r_config.flags & QVF_VIDEOSYNC)) { - Cvar_Reset(cl_async); - } + phys_msec = ref_msec = main_msec = 0; + ref_extra = phys_extra = main_extra = 0; if (com_timedemo->integer) { // timedemo just runs at full speed - ref_msec = phys_msec = main_msec = 0; - sync_mode = SYNC_FULL; + sync_mode = SYNC_TIMEDEMO; } else if (cls.active == ACT_MINIMIZED) { // run at 10 fps if minimized - ref_msec = phys_msec = 0; main_msec = fps_to_msec(10); sync_mode = SYNC_SLEEP_10; } else if (cls.active == ACT_RESTORED || cls.state != ca_active) { // run at 60 fps if not active - ref_msec = phys_msec = 0; - if (cl_async->integer > 1) { - main_msec = 0; - sync_mode = SYNC_SLEEP_VIDEO; - } else { - main_msec = fps_to_msec(60); - sync_mode = SYNC_SLEEP_60; - } + main_msec = fps_to_msec(60); + sync_mode = SYNC_SLEEP_60; } else if (cl_async->integer > 0) { // run physics and refresh separately - phys_msec = fps_to_msec(Cvar_ClampInteger(cl_maxfps, 10, 120)); - if (cl_async->integer > 1) { - ref_msec = 0; - sync_mode = ASYNC_VIDEO; - } else if (r_maxfps->integer) { - ref_msec = fps_to_msec(Cvar_ClampInteger(r_maxfps, 10, 1000)); - sync_mode = ASYNC_MAXFPS; - } else { - ref_msec = 1; - sync_mode = ASYNC_FULL; - } - main_msec = 0; + phys_msec = fps_to_clamped_msec(cl_maxfps, MIN_PHYS_HZ, MAX_PHYS_HZ); + ref_msec = fps_to_clamped_msec(r_maxfps, MIN_REF_HZ, MAX_REF_HZ); + sync_mode = ASYNC_FULL; } else { // everything ticks in sync with refresh - phys_msec = ref_msec = 0; - if (cl_maxfps->integer) { - main_msec = fps_to_msec(Cvar_ClampInteger(cl_maxfps, 10, 1000)); - sync_mode = SYNC_MAXFPS; - } else { - main_msec = 1; - sync_mode = SYNC_FULL; - } + main_msec = fps_to_clamped_msec(cl_maxfps, MIN_PHYS_HZ, MAX_PHYS_HZ); + sync_mode = SYNC_MAXFPS; } - Com_DDDPrintf("%s: mode=%s main_msec=%d ref_msec=%d, phys_msec=%d\n", - __func__, sync_names[sync_mode], main_msec, ref_msec, phys_msec); - - ref_extra = phys_extra = main_extra = 0; + Com_DDPrintf("%s: mode=%s main_msec=%d ref_msec=%d, phys_msec=%d\n", + __func__, sync_names[sync_mode], main_msec, ref_msec, phys_msec); } /* @@ -3203,7 +3218,7 @@ CL_Frame */ unsigned CL_Frame(unsigned msec) { - qboolean phys_frame, ref_frame; + qboolean phys_frame = qtrue, ref_frame = qtrue; time_after_ref = time_before_ref = 0; @@ -3216,9 +3231,8 @@ unsigned CL_Frame(unsigned msec) CL_ProcessEvents(); - ref_frame = phys_frame = qtrue; switch (sync_mode) { - case SYNC_FULL: + case SYNC_TIMEDEMO: // timedemo just runs at full speed break; case SYNC_SLEEP_10: @@ -3231,31 +3245,25 @@ unsigned CL_Frame(unsigned msec) return main_msec - main_extra; } break; - case SYNC_SLEEP_VIDEO: - // wait for vertical retrace if not active - VID_VideoWait(); - break; - case ASYNC_VIDEO: - case ASYNC_MAXFPS: case ASYNC_FULL: // run physics and refresh separately - phys_extra += main_extra; + phys_extra += msec; + ref_extra += msec; + if (phys_extra < phys_msec) { phys_frame = qfalse; } else if (phys_extra > phys_msec * 4) { phys_extra = phys_msec; } - if (sync_mode == ASYNC_VIDEO) { - // sync refresh to vertical retrace - ref_frame = VID_VideoSync(); - } else { - ref_extra += main_extra; - if (ref_extra < ref_msec) { - ref_frame = qfalse; - } else if (ref_extra > ref_msec * 4) { - ref_extra = ref_msec; - } + if (ref_extra < ref_msec) { + ref_frame = qfalse; + } else if (ref_extra > ref_msec * 4) { + ref_extra = ref_msec; + } + // Return immediately if neither physics or refresh are scheduled + if(!phys_frame && !ref_frame) { + return min(phys_msec - phys_extra, ref_msec - ref_extra); } break; case SYNC_MAXFPS: diff --git a/src/client/refresh.c b/src/client/refresh.c index b64699fa8..676f2ed21 100644 --- a/src/client/refresh.c +++ b/src/client/refresh.c @@ -434,10 +434,11 @@ void(*IMG_Unload)(image_t *image) = NULL; void(*IMG_Load)(image_t *image, byte *pic) = NULL; byte* (*IMG_ReadPixels)(int *width, int *height, int *rowbytes) = NULL; -qerror_t(*MOD_LoadMD2)(model_t *model, const void *rawdata, size_t length) = NULL; +qerror_t(*MOD_LoadMD2)(model_t *model, const void *rawdata, size_t length, const char* mod_name) = NULL; #if USE_MD3 -qerror_t(*MOD_LoadMD3)(model_t *model, const void *rawdata, size_t length) = NULL; +qerror_t(*MOD_LoadMD3)(model_t *model, const void *rawdata, size_t length, const char* mod_name) = NULL; #endif +qerror_t(*MOD_LoadIQM)(model_t* model, const void* rawdata, size_t length, const char* mod_name) = NULL; void(*MOD_Reference)(model_t *model) = NULL; float R_ClampScale(cvar_t *var) diff --git a/src/client/screen.c b/src/client/screen.c index 99c2103ff..6d9983086 100644 --- a/src/client/screen.c +++ b/src/client/screen.c @@ -858,7 +858,7 @@ static void SCR_DrawFPS(void) if (scr_fps->integer == 0) return; - int fps = CL_GetFps(); + int fps = R_FPS; int scale = CL_GetResolutionScale(); char buffer[MAX_QPATH]; diff --git a/src/common/bsp.c b/src/common/bsp.c index 6b52798a8..d031c47bd 100644 --- a/src/common/bsp.c +++ b/src/common/bsp.c @@ -961,7 +961,7 @@ static void BSP_BuildPvsMatrix(bsp_t *bsp) // allocate the matrix but don't set it in the BSP structure yet: // we want BSP_CluterVis to use the old PVS data here, and not the new empty matrix - char* pvs_matrix = Z_Mallocz(matrix_size); + byte* pvs_matrix = Z_Mallocz(matrix_size); for (int cluster = 0; cluster < bsp->vis->numclusters; cluster++) { @@ -971,7 +971,7 @@ static void BSP_BuildPvsMatrix(bsp_t *bsp) bsp->pvs_matrix = pvs_matrix; } -char* BSP_GetPvs(bsp_t *bsp, int cluster) +byte* BSP_GetPvs(bsp_t *bsp, int cluster) { if (!bsp->vis || !bsp->pvs_matrix) return NULL; @@ -982,7 +982,7 @@ char* BSP_GetPvs(bsp_t *bsp, int cluster) return bsp->pvs_matrix + bsp->visrowsize * cluster; } -char* BSP_GetPvs2(bsp_t *bsp, int cluster) +byte* BSP_GetPvs2(bsp_t *bsp, int cluster) { if (!bsp->vis || !bsp->pvs2_matrix) return NULL; @@ -1025,7 +1025,7 @@ static qboolean BSP_LoadPatchedPVS(bsp_t *bsp) unsigned char* filebuf = 0; ssize_t filelen = 0; - filelen = FS_LoadFile(pvs_path, &filebuf); + filelen = FS_LoadFile(pvs_path, (void**)&filebuf); if (filebuf == 0) return qfalse; @@ -1351,7 +1351,7 @@ byte *BSP_ClusterVis(bsp_t *bsp, byte *mask, int cluster, int vis) { if (bsp->pvs2_matrix) { - char* row = BSP_GetPvs2(bsp, cluster); + byte* row = BSP_GetPvs2(bsp, cluster); memcpy(mask, row, bsp->visrowsize); return mask; } @@ -1362,7 +1362,7 @@ byte *BSP_ClusterVis(bsp_t *bsp, byte *mask, int cluster, int vis) if (vis == DVIS_PVS && bsp->pvs_matrix) { - char* row = BSP_GetPvs(bsp, cluster); + byte* row = BSP_GetPvs(bsp, cluster); memcpy(mask, row, bsp->visrowsize); return mask; } diff --git a/src/common/cmd.c b/src/common/cmd.c index 2d226eae4..2c75cd400 100644 --- a/src/common/cmd.c +++ b/src/common/cmd.c @@ -709,8 +709,8 @@ static void Cmd_OpenURL_f(void) #ifdef __linux__ pid_t pid = fork(); if (pid == 0) { - char * args[] = { "xdg-open", url, NULL}; - execv("/usr/bin/xdg-open", args); + const char* args[] = { "xdg-open", url, NULL}; + execv("/usr/bin/xdg-open", (char* const*)args); exit(0); } #elif _WINDOWS diff --git a/src/refresh/gl/gl.h b/src/refresh/gl/gl.h index 9e9392d41..e56da5c43 100644 --- a/src/refresh/gl/gl.h +++ b/src/refresh/gl/gl.h @@ -520,6 +520,6 @@ void HQ2x_Init(void); /* models.c */ -qerror_t MOD_LoadMD2_GL(model_t *model, const void *rawdata, size_t length); -qerror_t MOD_LoadMD3_GL(model_t *model, const void *rawdata, size_t length); +qerror_t MOD_LoadMD2_GL(model_t *model, const void *rawdata, size_t length, const char* mod_name); +qerror_t MOD_LoadMD3_GL(model_t *model, const void *rawdata, size_t length, const char* mod_name); void MOD_Reference_GL(model_t *model); diff --git a/src/refresh/gl/main.c b/src/refresh/gl/main.c index 21d0b7a3a..591eaa2f9 100644 --- a/src/refresh/gl/main.c +++ b/src/refresh/gl/main.c @@ -1174,5 +1174,6 @@ void R_RegisterFunctionsGL() IMG_ReadPixels = IMG_ReadPixels_GL; MOD_LoadMD2 = MOD_LoadMD2_GL; MOD_LoadMD3 = MOD_LoadMD3_GL; + MOD_LoadIQM = NULL; MOD_Reference = MOD_Reference_GL; } diff --git a/src/refresh/gl/models.c b/src/refresh/gl/models.c index 17883c588..a8287fb17 100644 --- a/src/refresh/gl/models.c +++ b/src/refresh/gl/models.c @@ -29,7 +29,7 @@ with this program; if not, write to the Free Software Foundation, Inc., #error TESS_MAX_INDICES #endif -qerror_t MOD_LoadMD2_GL(model_t *model, const void *rawdata, size_t length) +qerror_t MOD_LoadMD2_GL(model_t *model, const void *rawdata, size_t length, const char* mod_name) { dmd2header_t header; dmd2frame_t *src_frame; @@ -345,7 +345,7 @@ static qerror_t MOD_LoadMD3Mesh(model_t *model, maliasmesh_t *mesh, return Q_ERR_SUCCESS; } -qerror_t MOD_LoadMD3_GL(model_t *model, const void *rawdata, size_t length) +qerror_t MOD_LoadMD3_GL(model_t *model, const void *rawdata, size_t length, const char* mod_name) { dmd3header_t header; size_t end, offset, remaining; diff --git a/src/refresh/model_iqm.c b/src/refresh/model_iqm.c new file mode 100644 index 000000000..451ba488d --- /dev/null +++ b/src/refresh/model_iqm.c @@ -0,0 +1,810 @@ +/* +=========================================================================== +Copyright (C) 2011 Thilo Schulz +Copyright (C) 2011 Matthias Bentrup +Copyright (C) 2011-2019 Zack Middleton + +This file is part of Quake III Arena source code. + +Quake III Arena source code is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Quake III Arena source code is distributed in the hope that it will be +useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Quake III Arena source code; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +=========================================================================== +*/ + +#include +#include +#include +#include + +static qboolean IQM_CheckRange(const iqmHeader_t* header, uint32_t offset, uint32_t count, size_t size) +{ + // return true if the range specified by offset, count and size + // doesn't fit into the file + return (count == 0 || + offset > header->filesize || + offset + count * size > header->filesize); +} + +// "multiply" 3x4 matrices, these are assumed to be the top 3 rows +// of a 4x4 matrix with the last row = (0 0 0 1) +static void Matrix34Multiply(const float* a, const float* b, float* out) +{ + out[0] = a[0] * b[0] + a[1] * b[4] + a[2] * b[8]; + out[1] = a[0] * b[1] + a[1] * b[5] + a[2] * b[9]; + out[2] = a[0] * b[2] + a[1] * b[6] + a[2] * b[10]; + out[3] = a[0] * b[3] + a[1] * b[7] + a[2] * b[11] + a[3]; + out[4] = a[4] * b[0] + a[5] * b[4] + a[6] * b[8]; + out[5] = a[4] * b[1] + a[5] * b[5] + a[6] * b[9]; + out[6] = a[4] * b[2] + a[5] * b[6] + a[6] * b[10]; + out[7] = a[4] * b[3] + a[5] * b[7] + a[6] * b[11] + a[7]; + out[8] = a[8] * b[0] + a[9] * b[4] + a[10] * b[8]; + out[9] = a[8] * b[1] + a[9] * b[5] + a[10] * b[9]; + out[10] = a[8] * b[2] + a[9] * b[6] + a[10] * b[10]; + out[11] = a[8] * b[3] + a[9] * b[7] + a[10] * b[11] + a[11]; +} + +static void JointToMatrix(const quat_t rot, const vec3_t scale, const vec3_t trans, float* mat) +{ + float xx = 2.0f * rot[0] * rot[0]; + float yy = 2.0f * rot[1] * rot[1]; + float zz = 2.0f * rot[2] * rot[2]; + float xy = 2.0f * rot[0] * rot[1]; + float xz = 2.0f * rot[0] * rot[2]; + float yz = 2.0f * rot[1] * rot[2]; + float wx = 2.0f * rot[3] * rot[0]; + float wy = 2.0f * rot[3] * rot[1]; + float wz = 2.0f * rot[3] * rot[2]; + + mat[0] = scale[0] * (1.0f - (yy + zz)); + mat[1] = scale[0] * (xy - wz); + mat[2] = scale[0] * (xz + wy); + mat[3] = trans[0]; + mat[4] = scale[1] * (xy + wz); + mat[5] = scale[1] * (1.0f - (xx + zz)); + mat[6] = scale[1] * (yz - wx); + mat[7] = trans[1]; + mat[8] = scale[2] * (xz - wy); + mat[9] = scale[2] * (yz + wx); + mat[10] = scale[2] * (1.0f - (xx + yy)); + mat[11] = trans[2]; +} + +static void Matrix34Invert(const float* inMat, float* outMat) +{ + outMat[0] = inMat[0]; outMat[1] = inMat[4]; outMat[2] = inMat[8]; + outMat[4] = inMat[1]; outMat[5] = inMat[5]; outMat[6] = inMat[9]; + outMat[8] = inMat[2]; outMat[9] = inMat[6]; outMat[10] = inMat[10]; + + float invSqrLen, *v; + v = outMat + 0; invSqrLen = 1.0f / DotProduct(v, v); VectorScale(v, invSqrLen, v); + v = outMat + 4; invSqrLen = 1.0f / DotProduct(v, v); VectorScale(v, invSqrLen, v); + v = outMat + 8; invSqrLen = 1.0f / DotProduct(v, v); VectorScale(v, invSqrLen, v); + + vec3_t trans; + trans[0] = inMat[3]; + trans[1] = inMat[7]; + trans[2] = inMat[11]; + + outMat[3] = -DotProduct(outMat + 0, trans); + outMat[7] = -DotProduct(outMat + 4, trans); + outMat[11] = -DotProduct(outMat + 8, trans); +} + +static void QuatSlerp(const quat_t from, const quat_t _to, float fraction, quat_t out) +{ + // cos() of angle + float cosAngle = from[0] * _to[0] + from[1] * _to[1] + from[2] * _to[2] + from[3] * _to[3]; + + // negative handling is needed for taking shortest path (required for model joints) + quat_t to; + if (cosAngle < 0.0f) + { + cosAngle = -cosAngle; + to[0] = -_to[0]; + to[1] = -_to[1]; + to[2] = -_to[2]; + to[3] = -_to[3]; + } + else + { + QuatCopy(_to, to); + } + + float backlerp, lerp; + if (cosAngle < 0.999999f) + { + // spherical lerp (slerp) + const float angle = acosf(cosAngle); + const float sinAngle = sinf(angle); + backlerp = sinf((1.0f - fraction) * angle) / sinAngle; + lerp = sinf(fraction * angle) / sinAngle; + } + else + { + // linear lerp + backlerp = 1.0f - fraction; + lerp = fraction; + } + + out[0] = from[0] * backlerp + to[0] * lerp; + out[1] = from[1] * backlerp + to[1] * lerp; + out[2] = from[2] * backlerp + to[2] * lerp; + out[3] = from[3] * backlerp + to[3] * lerp; +} + +static vec_t QuatNormalize2(const quat_t v, quat_t out) +{ + float length = v[0] * v[0] + v[1] * v[1] + v[2] * v[2] + v[3] * v[3]; + + if (length > 0.f) + { + /* writing it this way allows gcc to recognize that rsqrt can be used */ + float ilength = 1 / sqrtf(length); + /* sqrt(length) = length * (1 / sqrt(length)) */ + length *= ilength; + out[0] = v[0] * ilength; + out[1] = v[1] * ilength; + out[2] = v[2] * ilength; + out[3] = v[3] * ilength; + } + else + { + out[0] = out[1] = out[2] = 0; + out[3] = -1; + } + + return length; +} + +// ReSharper disable CppClangTidyClangDiagnosticCastAlign + +/* +================= +MOD_LoadIQM_Base + +Load an IQM model and compute the joint poses for every frame. +================= +*/ +qerror_t MOD_LoadIQM_Base(model_t* model, const void* rawdata, size_t length, const char* mod_name) +{ + iqm_transform_t* transform; + float* mat, * matInv; + size_t joint_names; + iqm_model_t* iqmData; + char meshName[MAX_QPATH]; + int vertexArrayFormat[IQM_COLOR + 1]; + + if (length < sizeof(iqmHeader_t)) + { + return Q_ERR_FILE_TOO_SMALL; + } + + const iqmHeader_t* header = rawdata; + if (strncmp(header->magic, IQM_MAGIC, sizeof(header->magic)) != 0) + { + return Q_ERR_INVALID_FORMAT; + } + + if (header->version != IQM_VERSION) + { + Com_WPrintf("R_LoadIQM: %s is a unsupported IQM version (%d), only version %d is supported.\n", + mod_name, header->version, IQM_VERSION); + return Q_ERR_UNKNOWN_FORMAT; + } + + if (header->filesize > length || header->filesize > 16 << 20) + { + return Q_ERR_FILE_TOO_SMALL; + } + + // check ioq3 joint limit + if (header->num_joints > IQM_MAX_JOINTS) + { + Com_WPrintf("R_LoadIQM: %s has more than %d joints (%d).\n", + mod_name, IQM_MAX_JOINTS, header->num_joints); + return Q_ERR_INVALID_FORMAT; + } + + for (uint32_t vertexarray_idx = 0; vertexarray_idx < q_countof(vertexArrayFormat); vertexarray_idx++) + { + vertexArrayFormat[vertexarray_idx] = -1; + } + + if (header->num_meshes) + { + // check vertex arrays + if (IQM_CheckRange(header, header->ofs_vertexarrays, header->num_vertexarrays, sizeof(iqmVertexArray_t))) + { + return Q_ERR_BAD_EXTENT; + } + const iqmVertexArray_t* vertexarray = (const iqmVertexArray_t*)((const byte*)header + header->ofs_vertexarrays); + for (uint32_t vertexarray_idx = 0; vertexarray_idx < header->num_vertexarrays; vertexarray_idx++, vertexarray++) + { + if (vertexarray->size <= 0 || vertexarray->size > 4) + { + return Q_ERR_INVALID_FORMAT; + } + + uint32_t num_values = header->num_vertexes * vertexarray->size; + + switch (vertexarray->format) { + case IQM_BYTE: + case IQM_UBYTE: + // 1-byte + if (IQM_CheckRange(header, vertexarray->offset, num_values, sizeof(byte))) + { + return Q_ERR_BAD_EXTENT; + } + break; + case IQM_INT: + case IQM_UINT: + case IQM_FLOAT: + // 4-byte + if (IQM_CheckRange(header, vertexarray->offset, num_values, sizeof(float))) + { + return Q_ERR_BAD_EXTENT; + } + break; + default: + // not supported + return Q_ERR_INVALID_FORMAT; + } + + if (vertexarray->type < q_countof(vertexArrayFormat)) + { + vertexArrayFormat[vertexarray->type] = (int)vertexarray->format; + } + + switch (vertexarray->type) + { + case IQM_POSITION: + case IQM_NORMAL: + if (vertexarray->format != IQM_FLOAT || + vertexarray->size != 3) + { + return Q_ERR_INVALID_FORMAT; + } + break; + case IQM_TANGENT: + if (vertexarray->format != IQM_FLOAT || + vertexarray->size != 4) + { + return Q_ERR_INVALID_FORMAT; + } + break; + case IQM_TEXCOORD: + if (vertexarray->format != IQM_FLOAT || + vertexarray->size != 2) + { + return Q_ERR_INVALID_FORMAT; + } + break; + case IQM_BLENDINDEXES: + if ((vertexarray->format != IQM_INT && + vertexarray->format != IQM_UBYTE) || + vertexarray->size != 4) + { + return Q_ERR_INVALID_FORMAT; + } + break; + case IQM_BLENDWEIGHTS: + if ((vertexarray->format != IQM_FLOAT && + vertexarray->format != IQM_UBYTE) || + vertexarray->size != 4) + { + return Q_ERR_INVALID_FORMAT; + } + break; + case IQM_COLOR: + if (vertexarray->format != IQM_UBYTE || + vertexarray->size != 4) + { + return Q_ERR_INVALID_FORMAT; + } + break; + default: + break; + } + } + + // check for required vertex arrays + if (vertexArrayFormat[IQM_POSITION] == -1 || vertexArrayFormat[IQM_NORMAL] == -1 || vertexArrayFormat[IQM_TEXCOORD] == -1) + { + Com_WPrintf("R_LoadIQM: %s is missing IQM_POSITION, IQM_NORMAL, and/or IQM_TEXCOORD array.\n", mod_name); + return Q_ERR_INVALID_FORMAT; + } + + if (header->num_joints) + { + if (vertexArrayFormat[IQM_BLENDINDEXES] == -1 || vertexArrayFormat[IQM_BLENDWEIGHTS] == -1) + { + Com_WPrintf("R_LoadIQM: %s is missing IQM_BLENDINDEXES and/or IQM_BLENDWEIGHTS array.\n", mod_name); + return Q_ERR_INVALID_FORMAT; + } + } + else + { + // ignore blend arrays if present + vertexArrayFormat[IQM_BLENDINDEXES] = -1; + vertexArrayFormat[IQM_BLENDWEIGHTS] = -1; + } + + // check triangles + if (IQM_CheckRange(header, header->ofs_triangles, header->num_triangles, sizeof(iqmTriangle_t))) + { + return Q_ERR_BAD_EXTENT; + } + const iqmTriangle_t* triangle = (const iqmTriangle_t*)((const byte*)header + header->ofs_triangles); + for (uint32_t triangle_idx = 0; triangle_idx < header->num_triangles; triangle_idx++, triangle++) + { + if (triangle->vertex[0] > header->num_vertexes || + triangle->vertex[1] > header->num_vertexes || + triangle->vertex[2] > header->num_vertexes) { + return Q_ERR_INVALID_FORMAT; + } + } + + // check meshes + if (IQM_CheckRange(header, header->ofs_meshes, header->num_meshes, sizeof(iqmMesh_t))) + { + return Q_ERR_BAD_EXTENT; + } + + const iqmMesh_t* mesh = (const iqmMesh_t*)((const byte*)header + header->ofs_meshes); + for (uint32_t mesh_idx = 0; mesh_idx < header->num_meshes; mesh_idx++, mesh++) + { + if (mesh->name < header->num_text) + { + strncpy(meshName, (const char*)header + header->ofs_text + mesh->name, sizeof(meshName) - 1); + } + else + { + meshName[0] = '\0'; + } + + if (mesh->first_vertex >= header->num_vertexes || + mesh->first_vertex + mesh->num_vertexes > header->num_vertexes || + mesh->first_triangle >= header->num_triangles || + mesh->first_triangle + mesh->num_triangles > header->num_triangles || + mesh->name >= header->num_text || + mesh->material >= header->num_text) { + return Q_ERR_INVALID_FORMAT; + } + } + } + + if (header->num_poses != header->num_joints && header->num_poses != 0) + { + Com_WPrintf("R_LoadIQM: %s has %d poses and %d joints, must have the same number or 0 poses\n", + mod_name, header->num_poses, header->num_joints); + return Q_ERR_INVALID_FORMAT; + } + + joint_names = 0; + + if (header->num_joints) + { + // check joints + if (IQM_CheckRange(header, header->ofs_joints, header->num_joints, sizeof(iqmJoint_t))) + { + return Q_ERR_BAD_EXTENT; + } + + const iqmJoint_t* joint = (const iqmJoint_t*)((const byte*)header + header->ofs_joints); + for (uint32_t joint_idx = 0; joint_idx < header->num_joints; joint_idx++, joint++) + { + if (joint->parent < -1 || + joint->parent >= (int)header->num_joints || + joint->name >= header->num_text) { + return Q_ERR_INVALID_FORMAT; + } + joint_names += strlen((const char*)header + header->ofs_text + + joint->name) + 1; + } + } + + if (header->num_poses) + { + // check poses + if (IQM_CheckRange(header, header->ofs_poses, header->num_poses, sizeof(iqmPose_t))) + { + return Q_ERR_BAD_EXTENT; + } + } + + if (header->ofs_bounds) + { + // check model bounds + if (IQM_CheckRange(header, header->ofs_bounds, header->num_frames, sizeof(iqmBounds_t))) + { + return Q_ERR_BAD_EXTENT; + } + } + + if (header->num_anims) + { + // check animations + const iqmAnim_t* anim = (const iqmAnim_t*)((const byte*)header + header->ofs_anims); + for (uint32_t anim_idx = 0; anim_idx < header->num_anims; anim_idx++, anim++) + { + if (anim->first_frame + anim->num_frames > header->num_frames) + { + return Q_ERR_INVALID_FORMAT; + } + } + } + + iqmData = (iqm_model_t*)MOD_Malloc(sizeof(iqm_model_t)); + model->iqmData = iqmData; + + // fill header + iqmData->num_vertexes = (header->num_meshes > 0) ? header->num_vertexes : 0; + iqmData->num_triangles = (header->num_meshes > 0) ? header->num_triangles : 0; + iqmData->num_frames = header->num_frames; + iqmData->num_meshes = header->num_meshes; + iqmData->num_joints = header->num_joints; + iqmData->num_poses = header->num_poses; + + if (header->num_meshes) + { + iqmData->meshes = (iqm_mesh_t*)MOD_Malloc(header->num_meshes * sizeof(iqm_mesh_t)); + iqmData->indices = (uint32_t*)MOD_Malloc(header->num_triangles * 3 * sizeof(int)); + iqmData->positions = (float*)MOD_Malloc(header->num_vertexes * 3 * sizeof(float)); + iqmData->texcoords = (float*)MOD_Malloc(header->num_vertexes * 2 * sizeof(float)); + iqmData->normals = (float*)MOD_Malloc(header->num_vertexes * 3 * sizeof(float)); + + if (vertexArrayFormat[IQM_TANGENT] != -1) + { + iqmData->tangents = (float*)MOD_Malloc(header->num_vertexes * 4 * sizeof(float)); + } + + if (vertexArrayFormat[IQM_COLOR] != -1) + { + iqmData->colors = (byte*)MOD_Malloc(header->num_vertexes * 4 * sizeof(byte)); + } + + if (vertexArrayFormat[IQM_BLENDINDEXES] != -1) + { + iqmData->blend_indices = MOD_Malloc(header->num_vertexes * 4 * sizeof(byte)); + } + + if (vertexArrayFormat[IQM_BLENDWEIGHTS] != -1) + { + iqmData->blend_weights = (float*)MOD_Malloc(header->num_vertexes * 4 * sizeof(float)); + } + } + + if (header->num_joints) + { + iqmData->jointNames = (char*)MOD_Malloc(joint_names); + iqmData->jointParents = (int*)MOD_Malloc(header->num_joints * sizeof(int)); + iqmData->bindJoints = (float*)MOD_Malloc(header->num_joints * 12 * sizeof(float)); // bind joint matricies + iqmData->invBindJoints = (float*)MOD_Malloc(header->num_joints * 12 * sizeof(float)); // inverse bind joint matricies + } + + if (header->num_poses) + { + iqmData->poses = (iqm_transform_t*)MOD_Malloc(header->num_poses * header->num_frames * sizeof(iqm_transform_t)); // pose transforms + } + + if (header->ofs_bounds) + { + iqmData->bounds = (float*)MOD_Malloc(header->num_frames * 6 * sizeof(float)); // model bounds + } + else if (header->num_meshes && header->num_frames == 0) + { + iqmData->bounds = (float*)MOD_Malloc(6 * sizeof(float)); // model bounds + } + + if (header->num_meshes) + { + const iqmMesh_t* mesh = (const iqmMesh_t*)((const byte*)header + header->ofs_meshes); + iqm_mesh_t* surface = iqmData->meshes; + const char* str = (const char*)header + header->ofs_text; + for (uint32_t mesh_idx = 0; mesh_idx < header->num_meshes; mesh_idx++, mesh++, surface++) + { + strncpy(surface->name, str + mesh->name, sizeof(surface->name) - 1); + Q_strlwr(surface->name); // lowercase the surface name so skin compares are faster + strncpy(surface->material, str + mesh->material, sizeof(surface->material) - 1); + Q_strlwr(surface->material); + surface->data = iqmData; + surface->first_vertex = mesh->first_vertex; + surface->num_vertexes = mesh->num_vertexes; + surface->first_triangle = mesh->first_triangle; + surface->num_triangles = mesh->num_triangles; + } + + // copy triangles + const iqmTriangle_t* triangle = (const iqmTriangle_t*)((const byte*)header + header->ofs_triangles); + for (uint32_t i = 0; i < header->num_triangles; i++, triangle++) + { + iqmData->indices[3 * i + 0] = triangle->vertex[0]; + iqmData->indices[3 * i + 1] = triangle->vertex[1]; + iqmData->indices[3 * i + 2] = triangle->vertex[2]; + } + + // copy vertexarrays and indexes + const iqmVertexArray_t* vertexarray = (const iqmVertexArray_t*)((const byte*)header + header->ofs_vertexarrays); + for (uint32_t vertexarray_idx = 0; vertexarray_idx < header->num_vertexarrays; vertexarray_idx++, vertexarray++) + { + // skip disabled arrays + if (vertexarray->type < q_countof(vertexArrayFormat) + && vertexArrayFormat[vertexarray->type] == -1) + continue; + + // total number of values + uint32_t n = header->num_vertexes * vertexarray->size; + + switch (vertexarray->type) + { + case IQM_POSITION: + memcpy(iqmData->positions, + (const byte*)header + vertexarray->offset, + n * sizeof(float)); + break; + case IQM_NORMAL: + memcpy(iqmData->normals, + (const byte*)header + vertexarray->offset, + n * sizeof(float)); + break; + case IQM_TANGENT: + memcpy(iqmData->tangents, + (const byte*)header + vertexarray->offset, + n * sizeof(float)); + break; + case IQM_TEXCOORD: + memcpy(iqmData->texcoords, + (const byte*)header + vertexarray->offset, + n * sizeof(float)); + break; + case IQM_BLENDINDEXES: + memcpy(iqmData->blend_indices, + (const byte*)header + vertexarray->offset, + n * sizeof(float)); + break; + case IQM_BLENDWEIGHTS: + if (vertexArrayFormat[IQM_BLENDWEIGHTS] == IQM_FLOAT) + { + memcpy(iqmData->blend_weights, + (const byte*)header + vertexarray->offset, + n * sizeof(float)); + } + else + { + // convert blend weights from byte to float + for (uint32_t vertex_idx = 0; vertex_idx < 4 * header->num_vertexes; vertex_idx++) + { + iqmData->blend_weights[vertex_idx] = (float)((const byte*)header + vertexarray->offset)[vertex_idx] / 255.f; + } + } + break; + case IQM_COLOR: + memcpy(iqmData->colors, + (const byte*)header + vertexarray->offset, + n * sizeof(byte)); + break; + default: + break; + } + } + } + + if (header->num_joints) + { + // copy joint names + char* str = iqmData->jointNames; + const iqmJoint_t* joint = (const iqmJoint_t*)((const byte*)header + header->ofs_joints); + for (uint32_t joint_idx = 0; joint_idx < header->num_joints; joint_idx++, joint++) + { + const char* name = (const char*)header + header->ofs_text + joint->name; + size_t len = strlen(name) + 1; + memcpy(str, name, len); + str += len; + } + + // copy joint parents + joint = (const iqmJoint_t*)((const byte*)header + header->ofs_joints); + for (uint32_t joint_idx = 0; joint_idx < header->num_joints; joint_idx++, joint++) + { + iqmData->jointParents[joint_idx] = joint->parent; + } + + // calculate bind joint matrices and their inverses + mat = iqmData->bindJoints; + matInv = iqmData->invBindJoints; + joint = (const iqmJoint_t*)((const byte*)header + header->ofs_joints); + for (uint32_t joint_idx = 0; joint_idx < header->num_joints; joint_idx++, joint++) + { + float baseFrame[12], invBaseFrame[12]; + + quat_t rotate; + QuatNormalize2(joint->rotate, rotate); + + JointToMatrix(rotate, joint->scale, joint->translate, baseFrame); + Matrix34Invert(baseFrame, invBaseFrame); + + if (joint->parent >= 0) + { + Matrix34Multiply(iqmData->bindJoints + 12 * joint->parent, baseFrame, mat); + mat += 12; + Matrix34Multiply(invBaseFrame, iqmData->invBindJoints + 12 * joint->parent, matInv); + matInv += 12; + } + else + { + memcpy(mat, baseFrame, sizeof(baseFrame)); + mat += 12; + memcpy(matInv, invBaseFrame, sizeof(invBaseFrame)); + matInv += 12; + } + } + } + + if (header->num_poses) + { + // calculate pose transforms + transform = iqmData->poses; + const uint16_t* framedata = (const uint16_t*)((const byte*)header + header->ofs_frames); + for (uint32_t frame_idx = 0; frame_idx < header->num_frames; frame_idx++) + { + const iqmPose_t* pose = (const iqmPose_t*)((const byte*)header + header->ofs_poses); + for (uint32_t pose_idx = 0; pose_idx < header->num_poses; pose_idx++, pose++, transform++) + { + vec3_t translate; + quat_t rotate; + vec3_t scale; + + translate[0] = pose->channeloffset[0]; if (pose->mask & 0x001) translate[0] += (float)*framedata++ * pose->channelscale[0]; + translate[1] = pose->channeloffset[1]; if (pose->mask & 0x002) translate[1] += (float)*framedata++ * pose->channelscale[1]; + translate[2] = pose->channeloffset[2]; if (pose->mask & 0x004) translate[2] += (float)*framedata++ * pose->channelscale[2]; + + rotate[0] = pose->channeloffset[3]; if (pose->mask & 0x008) rotate[0] += (float)*framedata++ * pose->channelscale[3]; + rotate[1] = pose->channeloffset[4]; if (pose->mask & 0x010) rotate[1] += (float)*framedata++ * pose->channelscale[4]; + rotate[2] = pose->channeloffset[5]; if (pose->mask & 0x020) rotate[2] += (float)*framedata++ * pose->channelscale[5]; + rotate[3] = pose->channeloffset[6]; if (pose->mask & 0x040) rotate[3] += (float)*framedata++ * pose->channelscale[6]; + + scale[0] = pose->channeloffset[7]; if (pose->mask & 0x080) scale[0] += (float)*framedata++ * pose->channelscale[7]; + scale[1] = pose->channeloffset[8]; if (pose->mask & 0x100) scale[1] += (float)*framedata++ * pose->channelscale[8]; + scale[2] = pose->channeloffset[9]; if (pose->mask & 0x200) scale[2] += (float)*framedata++ * pose->channelscale[9]; + + VectorCopy(translate, transform->translate); + QuatNormalize2(rotate, transform->rotate); + VectorCopy(scale, transform->scale); + } + } + } + + // copy model bounds + if (header->ofs_bounds) + { + mat = iqmData->bounds; + const iqmBounds_t* bounds = (const iqmBounds_t*)((const byte*)header + header->ofs_bounds); + for (uint32_t frame_idx = 0; frame_idx < header->num_frames; frame_idx++) + { + mat[0] = bounds->bbmin[0]; + mat[1] = bounds->bbmin[1]; + mat[2] = bounds->bbmin[2]; + mat[3] = bounds->bbmax[0]; + mat[4] = bounds->bbmax[1]; + mat[5] = bounds->bbmax[2]; + + mat += 6; + bounds++; + } + } + else if (header->num_meshes && header->num_frames == 0) + { + mat = iqmData->bounds; + + ClearBounds(&iqmData->bounds[0], &iqmData->bounds[3]); + for (uint32_t vertex_idx = 0; vertex_idx < header->num_vertexes; vertex_idx++) + { + AddPointToBounds(&iqmData->positions[vertex_idx * 3], &iqmData->bounds[0], &iqmData->bounds[3]); + } + } + + if (header->num_anims) + { + iqmData->num_animations = header->num_anims; + iqmData->animations = (iqm_anim_t*)MOD_Malloc(header->num_anims * sizeof(iqm_anim_t)); + + const iqmAnim_t* src = (const iqmAnim_t*)((const byte*)header + header->ofs_anims); + iqm_anim_t* dst = iqmData->animations; + for (uint32_t anim_idx = 0; anim_idx < header->num_anims; anim_idx++, src++, dst++) + { + const char* name = (const char*)header + header->ofs_text + src->name; + strncpy(dst->name, name, sizeof(dst->name)); + dst->name[sizeof(dst->name) - 1] = 0; + + dst->first_frame = src->first_frame; + dst->num_frames = src->num_frames; + dst->loop = (src->flags & IQM_LOOP) != 0; + } + } + + return Q_ERR_SUCCESS; +} + +/* +================= +R_ComputeIQMTransforms + +Compute matrices for this model, returns [model->num_poses] 3x4 matrices in the (pose_matrices) array +================= +*/ +qboolean R_ComputeIQMTransforms(const iqm_model_t* model, const entity_t* entity, float* pose_matrices) +{ + iqm_transform_t relativeJoints[IQM_MAX_JOINTS]; + + iqm_transform_t* relativeJoint = relativeJoints; + + const int frame = model->num_frames ? entity->frame % (int)model->num_frames : 0; + const int oldframe = model->num_frames ? entity->oldframe % (int)model->num_frames : 0; + const float backlerp = entity->backlerp; + + // copy or lerp animation frame pose + if (oldframe == frame) + { + const iqm_transform_t* pose = &model->poses[frame * model->num_poses]; + for (uint32_t pose_idx = 0; pose_idx < model->num_poses; pose_idx++, pose++, relativeJoint++) + { + VectorCopy(pose->translate, relativeJoint->translate); + QuatCopy(pose->rotate, relativeJoint->rotate); + VectorCopy(pose->scale, relativeJoint->scale); + } + } + else + { + const float lerp = 1.0f - backlerp; + const iqm_transform_t* pose = &model->poses[frame * model->num_poses]; + const iqm_transform_t* oldpose = &model->poses[oldframe * model->num_poses]; + for (uint32_t pose_idx = 0; pose_idx < model->num_poses; pose_idx++, oldpose++, pose++, relativeJoint++) + { + relativeJoint->translate[0] = oldpose->translate[0] * backlerp + pose->translate[0] * lerp; + relativeJoint->translate[1] = oldpose->translate[1] * backlerp + pose->translate[1] * lerp; + relativeJoint->translate[2] = oldpose->translate[2] * backlerp + pose->translate[2] * lerp; + + relativeJoint->scale[0] = oldpose->scale[0] * backlerp + pose->scale[0] * lerp; + relativeJoint->scale[1] = oldpose->scale[1] * backlerp + pose->scale[1] * lerp; + relativeJoint->scale[2] = oldpose->scale[2] * backlerp + pose->scale[2] * lerp; + + QuatSlerp(oldpose->rotate, pose->rotate, lerp, relativeJoint->rotate); + } + } + + // multiply by inverse of bind pose and parent 'pose mat' (bind pose transform matrix) + relativeJoint = relativeJoints; + const int* jointParent = model->jointParents; + const float* invBindMat = model->invBindJoints; + float* poseMat = pose_matrices; + for (uint32_t pose_idx = 0; pose_idx < model->num_poses; pose_idx++, relativeJoint++, jointParent++, invBindMat += 12, poseMat += 12) + { + float mat1[12], mat2[12]; + + JointToMatrix(relativeJoint->rotate, relativeJoint->scale, relativeJoint->translate, mat1); + + if (*jointParent >= 0) + { + Matrix34Multiply(&model->bindJoints[(*jointParent) * 12], mat1, mat2); + Matrix34Multiply(mat2, invBindMat, mat1); + Matrix34Multiply(&pose_matrices[(*jointParent) * 12], mat1, poseMat); + } + else + { + Matrix34Multiply(mat1, invBindMat, poseMat); + } + } + + return qtrue; +} diff --git a/src/refresh/models.c b/src/refresh/models.c index 6ff7d429b..ea5afe281 100644 --- a/src/refresh/models.c +++ b/src/refresh/models.c @@ -28,6 +28,7 @@ with this program; if not, write to the Free Software Foundation, Inc., #include "format/md3.h" #endif #include "format/sp2.h" +#include "format/iqm.h" #include "refresh/images.h" #include "refresh/models.h" @@ -224,7 +225,7 @@ get_model_class(const char *name) return MCLASS_REGULAR; } -static qerror_t MOD_LoadSP2(model_t *model, const void *rawdata, size_t length) +static qerror_t MOD_LoadSP2(model_t *model, const void *rawdata, size_t length, const char* mod_name) { dsp2header_t header; dsp2frame_t *src_frame; @@ -406,11 +407,20 @@ qhandle_t R_RegisterModel(const char *name) case SP2_IDENT: load = MOD_LoadSP2; break; + case IQM_IDENT: + load = MOD_LoadIQM; + break; default: ret = Q_ERR_UNKNOWN_FORMAT; goto fail2; } + if (!load) + { + ret = Q_ERR_UNKNOWN_FORMAT; + goto fail2; + } + model = MOD_Alloc(); if (!model) { ret = Q_ERR_OUT_OF_SLOTS; @@ -420,7 +430,7 @@ qhandle_t R_RegisterModel(const char *name) memcpy(model->name, normalized, namelen + 1); model->registration_sequence = registration_sequence; - ret = load(model, rawdata, filelen); + ret = load(model, rawdata, filelen, name); FS_FreeFile(rawdata); diff --git a/src/refresh/vkpt/bsp_mesh.c b/src/refresh/vkpt/bsp_mesh.c index 722d1d69e..de2fcf91c 100644 --- a/src/refresh/vkpt/bsp_mesh.c +++ b/src/refresh/vkpt/bsp_mesh.c @@ -381,7 +381,7 @@ is_sky_or_lava_cluster(bsp_mesh_t* wm, mface_t* surf, int cluster, int material_ return qfalse; } -static void merge_pvs_rows(bsp_t* bsp, char* src, char* dst) +static void merge_pvs_rows(bsp_t* bsp, byte* src, byte* dst) { for (int i = 0; i < bsp->visrowsize; i++) { @@ -390,7 +390,7 @@ static void merge_pvs_rows(bsp_t* bsp, char* src, char* dst) } #define FOREACH_BIT_BEGIN(SET,ROWSIZE,VAR) \ - for (int _byte_idx = 0; _byte_idx < ROWSIZE; _byte_idx++) { \ + for (int _byte_idx = 0; _byte_idx < (ROWSIZE); _byte_idx++) { \ if (SET[_byte_idx]) { \ for (int _bit_idx = 0; _bit_idx < 8; _bit_idx++) { \ if (SET[_byte_idx] & (1 << _bit_idx)) { \ @@ -398,7 +398,7 @@ static void merge_pvs_rows(bsp_t* bsp, char* src, char* dst) #define FOREACH_BIT_END } } } } -static void connect_pvs(bsp_t* bsp, int cluster_a, char* pvs_a, int cluster_b, char* pvs_b) +static void connect_pvs(bsp_t* bsp, int cluster_a, byte* pvs_a, int cluster_b, byte* pvs_b) { FOREACH_BIT_BEGIN(pvs_a, bsp->visrowsize, vis_cluster_a) if (vis_cluster_a != cluster_a && vis_cluster_a != cluster_b) @@ -422,12 +422,12 @@ static void make_pvs_symmetric(bsp_t* bsp) { for (int cluster = 0; cluster < bsp->vis->numclusters; cluster++) { - char* pvs = BSP_GetPvs(bsp, cluster); + byte* pvs = BSP_GetPvs(bsp, cluster); FOREACH_BIT_BEGIN(pvs, bsp->visrowsize, vis_cluster) if (vis_cluster != cluster) { - char* vis_pvs = BSP_GetPvs(bsp, vis_cluster); + byte* vis_pvs = BSP_GetPvs(bsp, vis_cluster); Q_SetBit(vis_pvs, cluster); } FOREACH_BIT_END @@ -442,12 +442,12 @@ static void build_pvs2(bsp_t* bsp) for (int cluster = 0; cluster < bsp->vis->numclusters; cluster++) { - char* pvs = BSP_GetPvs(bsp, cluster); - char* dest_pvs = BSP_GetPvs2(bsp, cluster); + byte* pvs = BSP_GetPvs(bsp, cluster); + byte* dest_pvs = BSP_GetPvs2(bsp, cluster); memcpy(dest_pvs, pvs, bsp->visrowsize); FOREACH_BIT_BEGIN(pvs, bsp->visrowsize, vis_cluster) - char* pvs2 = BSP_GetPvs(bsp, vis_cluster); + byte* pvs2 = BSP_GetPvs(bsp, vis_cluster); merge_pvs_rows(bsp, pvs2, dest_pvs); FOREACH_BIT_END } @@ -553,8 +553,8 @@ collect_surfaces(int *idx_ctr, bsp_mesh_t *wm, bsp_t *bsp, int model_idx, int (* if (cluster >= 0 && anti_cluster >= 0 && cluster != anti_cluster) { - char* pvs_cluster = BSP_GetPvs(bsp, cluster); - char* pvs_anti_cluster = BSP_GetPvs(bsp, anti_cluster); + byte* pvs_cluster = BSP_GetPvs(bsp, cluster); + byte* pvs_anti_cluster = BSP_GetPvs(bsp, anti_cluster); if (!Q_IsBitSet(pvs_cluster, anti_cluster) || !Q_IsBitSet(pvs_anti_cluster, cluster)) { @@ -1095,8 +1095,8 @@ encode_normal(vec3_t normal) pp[0] = pp[0] * 0.5f + 0.5f; pp[1] = pp[1] * 0.5f + 0.5f; - pp[0] = clamp(pp[0], 0.f, 1.f); - pp[1] = clamp(pp[1], 0.f, 1.f); + clamp(pp[0], 0.f, 1.f); + clamp(pp[1], 0.f, 1.f); uint32_t ux = (uint32_t)(pp[0] * 0xffffu); uint32_t uy = (uint32_t)(pp[1] * 0xffffu); @@ -1228,7 +1228,7 @@ load_sky_and_lava_clusters(bsp_mesh_t* wm, const char* map_name) qboolean found_map = qfalse; char* filebuf = NULL; - FS_LoadFile(filename, &filebuf); + FS_LoadFile(filename, (void**)&filebuf); if (filebuf) { @@ -1238,7 +1238,7 @@ load_sky_and_lava_clusters(bsp_mesh_t* wm, const char* map_name) else { // try to load the global file - FS_LoadFile("sky_clusters.txt", &filebuf); + FS_LoadFile("sky_clusters.txt", (void**)&filebuf); if (!filebuf) { Com_WPrintf("Couldn't read sky_clusters.txt\n"); @@ -1299,7 +1299,7 @@ load_cameras(bsp_mesh_t* wm, const char* map_name) wm->num_cameras = 0; char* filebuf = NULL; - FS_LoadFile("cameras.txt", &filebuf); + FS_LoadFile("cameras.txt", (void**)&filebuf); if (!filebuf) { Com_WPrintf("Couldn't read cameras.txt\n"); @@ -1373,7 +1373,7 @@ compute_sky_visibility(bsp_mesh_t *wm, bsp_t *bsp) { if (clusters_with_sky[cluster >> 3] & (1 << (cluster & 7))) { - char* mask = BSP_GetPvs(bsp, cluster); + byte* mask = BSP_GetPvs(bsp, cluster); for (int i = 0; i < bsp->visrowsize; i++) wm->sky_visibility[i] |= mask[i]; @@ -1481,7 +1481,7 @@ collect_cluster_lights(bsp_mesh_t *wm, bsp_t *bsp) if(light->cluster < 0) continue; - const byte* pvs = BSP_GetPvs(bsp, light->cluster); + const byte* pvs = (const byte*)BSP_GetPvs(bsp, light->cluster); FOREACH_BIT_BEGIN(pvs, bsp->visrowsize, other_cluster) aabb_t* cluster_aabb = wm->cluster_aabbs + other_cluster; diff --git a/src/refresh/vkpt/conversion.c b/src/refresh/vkpt/conversion.c new file mode 100644 index 000000000..a10e87184 --- /dev/null +++ b/src/refresh/vkpt/conversion.c @@ -0,0 +1,25 @@ +/* +Copyright (C) 2019, NVIDIA CORPORATION. All rights reserved. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +*/ + +#include "conversion.h" + +void packHalf4x16(uint32_t* half, float* vec4) +{ + half[0] = floatToHalf(vec4[0]) | (floatToHalf(vec4[1]) << 16); + half[1] = floatToHalf(vec4[2]) | (floatToHalf(vec4[3]) << 16); +} diff --git a/src/refresh/vkpt/conversion.h b/src/refresh/vkpt/conversion.h new file mode 100644 index 000000000..dd1b5f503 --- /dev/null +++ b/src/refresh/vkpt/conversion.h @@ -0,0 +1,79 @@ +/* +Copyright (C) 2019, NVIDIA CORPORATION. All rights reserved. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +*/ + +#ifndef MATERIAL_H_ +#define MATERIAL_H_ + +#include + +/* + Float -> Half converter function, adapted from + https://stackoverflow.com/questions/1659440/32-bit-to-16-bit-floating-point-conversion +*/ + +typedef union +{ + float f; + int32_t si; + uint32_t ui; +} f2hBits; + +static inline uint16_t floatToHalf(float value) +{ + static int const shift = 13; + static int const shiftSign = 16; + + static int32_t const infN = 0x7F800000; // flt32 infinity + static int32_t const maxN = 0x477FE000; // max flt16 normal as a flt32 + static int32_t const minN = 0x38800000; // min flt16 normal as a flt32 + static int32_t const signN = 0x80000000; // flt32 sign bit + + static int32_t const infC = 0x3FC00; + static int32_t const nanN = 0x7F802000; // minimum flt16 nan as a flt32 + static int32_t const maxC = 0x23BFF; + static int32_t const minC = 0x1C400; + static int32_t const signC = 0x8000; // flt16 sign bit + + static int32_t const mulN = 0x52000000; // (1 << 23) / minN + static int32_t const mulC = 0x33800000; // minN / (1 << (23 - shift)) + + static int32_t const subC = 0x003FF; // max flt32 subnormal down shifted + static int32_t const norC = 0x00400; // min flt32 normal down shifted + + static int32_t const maxD = 0x1C000; + static int32_t const minD = 0x1C000; + + f2hBits v, s; + v.f = value; + uint32_t sign = v.si & signN; + v.si ^= sign; + sign >>= shiftSign; // logical shift + s.si = mulN; + s.si = (int32_t)(s.f * v.f); // correct subnormals + v.si ^= (s.si ^ v.si) & -(minN > v.si); + v.si ^= (infN ^ v.si) & -((infN > v.si) & (v.si > maxN)); + v.si ^= (nanN ^ v.si) & -((nanN > v.si) & (v.si > infN)); + v.ui >>= shift; // logical shift + v.si ^= ((v.si - maxD) ^ v.si) & -(v.si > maxC); + v.si ^= ((v.si - minD) ^ v.si) & -(v.si > subC); + return v.ui | sign; +} + +void packHalf4x16(uint32_t* half, float* vec4); + +#endif // MATERIAL_H_ diff --git a/src/refresh/vkpt/draw.c b/src/refresh/vkpt/draw.c index 7b6c720d9..4cf8644d1 100644 --- a/src/refresh/vkpt/draw.c +++ b/src/refresh/vkpt/draw.c @@ -52,7 +52,7 @@ static VkPipelineLayout pipeline_layout_final_blit; static VkRenderPass render_pass_stretch_pic; static VkPipeline pipeline_stretch_pic; static VkPipeline pipeline_final_blit; -static VkFramebuffer framebuffer_stretch_pic[MAX_SWAPCHAIN_IMAGES]; +static VkFramebuffer* framebuffer_stretch_pic = NULL; static BufferResource_t buf_stretch_pic_queue[MAX_FRAMES_IN_FLIGHT]; static VkDescriptorSetLayout desc_set_layout_sbo; static VkDescriptorPool desc_pool_sbo; @@ -316,6 +316,9 @@ vkpt_draw_destroy_pipelines() for(int i = 0; i < qvk.num_swap_chain_images; i++) { vkDestroyFramebuffer(qvk.device, framebuffer_stretch_pic[i], NULL); } + free(framebuffer_stretch_pic); + framebuffer_stretch_pic = NULL; + return VK_SUCCESS; } @@ -465,7 +468,7 @@ vkpt_draw_create_pipelines() _VK(vkCreateGraphicsPipelines(qvk.device, VK_NULL_HANDLE, 1, &pipeline_info, NULL, &pipeline_final_blit)); ATTACH_LABEL_VARIABLE(pipeline_final_blit, PIPELINE); - + framebuffer_stretch_pic = malloc(qvk.num_swap_chain_images * sizeof(*framebuffer_stretch_pic)); for(int i = 0; i < qvk.num_swap_chain_images; i++) { VkImageView attachments[] = { qvk.swap_chain_image_views[i] diff --git a/src/refresh/vkpt/main.c b/src/refresh/vkpt/main.c index 498fa0003..b7b9ea59a 100644 --- a/src/refresh/vkpt/main.c +++ b/src/refresh/vkpt/main.c @@ -86,7 +86,7 @@ cvar_t *cvar_sli = NULL; cvar_t *cvar_dump_image = NULL; #endif -char cluster_debug_mask[VIS_MAX_BYTES]; +byte cluster_debug_mask[VIS_MAX_BYTES]; int cluster_debug_index; #define UBO_CVAR_DO(name, default_value) cvar_t *cvar_##name; @@ -355,7 +355,7 @@ vkpt_set_material() pbr_material_t * mat = MAT_FindPBRMaterial(vkpt_refdef.fd->feedback.view_material); if (!mat) { - Com_EPrintf("Cannot find material '%s' in table\n"); + Com_EPrintf("Cannot find material '%s' in table\n", vkpt_refdef.fd->feedback.view_material); return; } @@ -371,7 +371,7 @@ vkpt_print_material() pbr_material_t * mat = MAT_FindPBRMaterial(vkpt_refdef.fd->feedback.view_material); if (!mat) { - Com_EPrintf("Cannot find material '%s' in table\n"); + Com_EPrintf("Cannot find material '%s' in table\n", vkpt_refdef.fd->feedback.view_material); return; } MAT_PrintMaterialProperties(mat); @@ -642,11 +642,11 @@ out:; } vkGetSwapchainImagesKHR(qvk.device, qvk.swap_chain, &qvk.num_swap_chain_images, NULL); - //qvk.swap_chain_images = malloc(qvk.num_swap_chain_images * sizeof(*qvk.swap_chain_images)); - assert(qvk.num_swap_chain_images < MAX_SWAPCHAIN_IMAGES); + assert(qvk.num_swap_chain_images); + qvk.swap_chain_images = malloc(qvk.num_swap_chain_images * sizeof(*qvk.swap_chain_images)); vkGetSwapchainImagesKHR(qvk.device, qvk.swap_chain, &qvk.num_swap_chain_images, qvk.swap_chain_images); - //qvk.swap_chain_image_views = malloc(qvk.num_swap_chain_images * sizeof(*qvk.swap_chain_image_views)); + qvk.swap_chain_image_views = malloc(qvk.num_swap_chain_images * sizeof(*qvk.swap_chain_image_views)); for(int i = 0; i < qvk.num_swap_chain_images; i++) { VkImageViewCreateInfo img_create_info = { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, @@ -672,6 +672,14 @@ out:; if(vkCreateImageView(qvk.device, &img_create_info, NULL, qvk.swap_chain_image_views + i) != VK_SUCCESS) { Com_EPrintf("error creating image view!"); + + free(qvk.swap_chain_image_views); + qvk.swap_chain_image_views = NULL; + + free(qvk.swap_chain_images); + qvk.swap_chain_images = NULL; + + qvk.num_swap_chain_images = 0; return 1; } } @@ -713,10 +721,7 @@ create_command_pool_and_fences() /* command pool and buffers */ _VK(vkCreateCommandPool(qvk.device, &cmd_pool_create_info, NULL, &qvk.cmd_buffers_graphics.command_pool)); - - cmd_pool_create_info.queueFamilyIndex = qvk.queue_idx_compute; - _VK(vkCreateCommandPool(qvk.device, &cmd_pool_create_info, NULL, &qvk.cmd_buffers_compute.command_pool)); - + cmd_pool_create_info.queueFamilyIndex = qvk.queue_idx_transfer; _VK(vkCreateCommandPool(qvk.device, &cmd_pool_create_info, NULL, &qvk.cmd_buffers_transfer.command_pool)); @@ -1097,7 +1102,7 @@ init_vulkan() { Com_Error(ERR_FATAL, "Running Quake II RTX with KHR ray tracing extensions requires NVIDIA Graphics Driver version " "to be at least %u.%02u, while the installed version is %u.%02u. Please update the NVIDIA Graphics Driver, or " - "switch to the legacy mode by adding \"+set nv_ray_tracing 1\" to the command line.", + "switch to the legacy mode by adding \"+set ray_tracing_api nv\" to the command line.", required_major, required_minor, driver_major, driver_minor); } } @@ -1154,7 +1159,6 @@ init_vulkan() // Com_Printf("num queue families: %d\n", num_queue_families); qvk.queue_idx_graphics = -1; - qvk.queue_idx_compute = -1; qvk.queue_idx_transfer = -1; for(int i = 0; i < num_queue_families; i++) { @@ -1172,15 +1176,12 @@ init_vulkan() continue; qvk.queue_idx_graphics = i; } - else if(supports_compute && qvk.queue_idx_compute < 0) { - qvk.queue_idx_compute = i; - } else if(supports_transfer && qvk.queue_idx_transfer < 0) { qvk.queue_idx_transfer = i; } } - if(qvk.queue_idx_graphics < 0 || qvk.queue_idx_compute < 0 || qvk.queue_idx_transfer < 0) { + if(qvk.queue_idx_graphics < 0 || qvk.queue_idx_transfer < 0) { Com_Error(ERR_FATAL, "Could not find a suitable Vulkan queue family!\n"); return qfalse; } @@ -1199,16 +1200,7 @@ init_vulkan() queue_create_info[num_create_queues++] = q; }; - if(qvk.queue_idx_compute != qvk.queue_idx_graphics) { - VkDeviceQueueCreateInfo q = { - .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, - .queueCount = 1, - .pQueuePriorities = &queue_priorities, - .queueFamilyIndex = qvk.queue_idx_compute, - }; - queue_create_info[num_create_queues++] = q; - }; - if(qvk.queue_idx_transfer != qvk.queue_idx_graphics && qvk.queue_idx_transfer != qvk.queue_idx_compute) { + if(qvk.queue_idx_transfer != qvk.queue_idx_graphics) { VkDeviceQueueCreateInfo q = { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, .queueCount = 1, @@ -1242,6 +1234,12 @@ init_vulkan() .bufferDeviceAddress = VK_TRUE }; +#ifdef VKPT_DEVICE_GROUPS + if (qvk.device_count > 1) { + physical_device_address_features.bufferDeviceAddressMultiDevice = VK_TRUE; + } +#endif + VkPhysicalDeviceRayTracingPipelineFeaturesKHR physical_device_rt_pipeline_features = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_FEATURES_KHR, .pNext = &physical_device_address_features, @@ -1373,7 +1371,6 @@ init_vulkan() } vkGetDeviceQueue(qvk.device, qvk.queue_idx_graphics, 0, &qvk.queue_graphics); - vkGetDeviceQueue(qvk.device, qvk.queue_idx_compute, 0, &qvk.queue_compute); vkGetDeviceQueue(qvk.device, qvk.queue_idx_transfer, 0, &qvk.queue_transfer); #define VK_EXTENSION_DO(a) \ @@ -1439,7 +1436,7 @@ create_shader_module_from_file(const char *name, const char *enum_name, qboolean char *data; size_t size; - size = FS_LoadFile(path, &data); + size = FS_LoadFile(path, (void**)&data); if(!data) { Com_EPrintf("Couldn't find shader module %s!\n", path); return VK_NULL_HANDLE; @@ -1474,6 +1471,7 @@ vkpt_load_shader_modules() #define IS_RT_SHADER qfalse LIST_SHADER_MODULES; +#undef IS_RT_SHADER #define IS_RT_SHADER qtrue LIST_RT_RGEN_SHADER_MODULES if(!qvk.use_ray_query) @@ -1505,6 +1503,12 @@ destroy_swapchain() vkDestroyImageView (qvk.device, qvk.swap_chain_image_views[i], NULL); qvk.swap_chain_image_views[i] = VK_NULL_HANDLE; } + free(qvk.swap_chain_image_views); + qvk.swap_chain_image_views = NULL; + + free(qvk.swap_chain_images); + qvk.swap_chain_images = NULL; + qvk.num_swap_chain_images = 0; vkDestroySwapchainKHR(qvk.device, qvk.swap_chain, NULL); @@ -1540,11 +1544,9 @@ destroy_vulkan() vkDestroyFence(qvk.device, qvk.fence_vertex_sync, NULL); vkpt_free_command_buffers(&qvk.cmd_buffers_graphics); - vkpt_free_command_buffers(&qvk.cmd_buffers_compute); vkpt_free_command_buffers(&qvk.cmd_buffers_transfer); vkDestroyCommandPool(qvk.device, qvk.cmd_buffers_graphics.command_pool, NULL); - vkDestroyCommandPool(qvk.device, qvk.cmd_buffers_compute.command_pool, NULL); vkDestroyCommandPool(qvk.device, qvk.cmd_buffers_transfer.command_pool, NULL); vkDestroyDevice(qvk.device, NULL); @@ -1582,6 +1584,7 @@ static int model_entity_ids[2][MAX_ENTITIES]; static int world_entity_ids[2][MAX_ENTITIES]; static int model_entity_id_count[2]; static int world_entity_id_count[2]; +static int iqm_matrix_count[2]; #define MAX_MODEL_LIGHTS 1024 static int num_model_lights = 0; @@ -1602,7 +1605,7 @@ static pbr_material_t const * get_mesh_material(const entity_t* entity, const ma } static inline uint32_t fill_model_instance(const entity_t* entity, const model_t* model, const maliasmesh_t* mesh, - const float* transform, int model_instance_index, qboolean is_viewer_weapon, qboolean is_double_sided) + const float* transform, int model_instance_index, qboolean is_viewer_weapon, qboolean is_double_sided, int iqm_matrix_index) { pbr_material_t const * material = get_mesh_material(entity, mesh); @@ -1657,6 +1660,9 @@ static inline uint32_t fill_model_instance(const entity_t* entity, const model_t instance->backlerp = entity->backlerp; instance->material = material_id; instance->alpha = (entity->flags & RF_TRANSLUCENT) ? entity->alpha : 1.0f; + instance->is_iqm = (model->iqmData) ? 1 : 0; + if (instance->is_iqm) + instance->offset_prev = iqm_matrix_index; return material_id; } @@ -1820,7 +1826,9 @@ static void process_regular_entity( int* instance_idx, int* num_instanced_vert, int mesh_filter, - qboolean* contains_transparent) + qboolean* contains_transparent, + int* iqm_matrix_offset, + float* iqm_matrix_data) { QVKInstanceBuffer_t* uniform_instance_buffer = &vkpt_refdef.uniform_instance_buffer; uint32_t* ubo_instance_buf_offset = (uint32_t*)uniform_instance_buffer->model_instance_buf_offset; @@ -1838,6 +1846,22 @@ static void process_regular_entity( if (contains_transparent) *contains_transparent = qfalse; + int iqm_matrix_index = -1; + if (model->iqmData && model->iqmData->num_poses) + { + iqm_matrix_index = *iqm_matrix_offset; + + if (iqm_matrix_index + model->iqmData->num_poses > MAX_IQM_MATRICES) + { + assert(!"IQM matrix buffer overflow"); + return; + } + + R_ComputeIQMTransforms(model->iqmData, entity, iqm_matrix_data + (iqm_matrix_index * 12)); + + *iqm_matrix_offset += (int)model->iqmData->num_poses; + } + for (int i = 0; i < model->nummeshes; i++) { const maliasmesh_t* mesh = model->meshes + i; @@ -1860,7 +1884,9 @@ static void process_regular_entity( continue; } - uint32_t material_id = fill_model_instance(entity, model, mesh, transform, current_model_instance_index, is_viewer_weapon, is_double_sided); + uint32_t material_id = fill_model_instance(entity, model, mesh, transform, + current_model_instance_index,is_viewer_weapon, is_double_sided, iqm_matrix_index); + if (!material_id) continue; @@ -1960,6 +1986,7 @@ prepare_entities(EntityUploadInfo* upload_info) int bsp_mesh_idx = 0; int num_instanced_vert = 0; /* need to track this here to find lights */ int instance_idx = 0; + int iqm_matrix_offset = 0; const qboolean first_person_model = (cl_player_model->integer == CL_PLAYER_MODEL_FIRST_PERSON) && cl.baseclientinfo.model; @@ -1990,14 +2017,15 @@ prepare_entities(EntityUploadInfo* upload_info) else { qboolean contains_transparent = qfalse; - process_regular_entity(entity, model, qfalse, qfalse, &model_instance_idx, &instance_idx, &num_instanced_vert, MESH_FILTER_OPAQUE, &contains_transparent); + process_regular_entity(entity, model, qfalse, qfalse, &model_instance_idx, &instance_idx, &num_instanced_vert, + MESH_FILTER_OPAQUE, &contains_transparent, &iqm_matrix_offset, qvk.iqm_matrices_shadow); if(contains_transparent) transparent_model_indices[transparent_model_num++] = i; } } } - + upload_info->dynamic_vertex_num = num_instanced_vert; const uint32_t transparent_model_base_vertex_num = num_instanced_vert; @@ -2012,7 +2040,8 @@ prepare_entities(EntityUploadInfo* upload_info) else { const model_t* model = MOD_ForHandle(entity->model); - process_regular_entity(entity, model, qfalse, qfalse, &model_instance_idx, &instance_idx, &num_instanced_vert, MESH_FILTER_TRANSPARENT, NULL); + process_regular_entity(entity, model, qfalse, qfalse, &model_instance_idx, &instance_idx, &num_instanced_vert, + MESH_FILTER_TRANSPARENT, NULL, &iqm_matrix_offset, qvk.iqm_matrices_shadow); } } @@ -2026,7 +2055,8 @@ prepare_entities(EntityUploadInfo* upload_info) { const entity_t* entity = vkpt_refdef.fd->entities + viewer_model_indices[i]; const model_t* model = MOD_ForHandle(entity->model); - process_regular_entity(entity, model, qfalse, qtrue, &model_instance_idx, &instance_idx, &num_instanced_vert, MESH_FILTER_ALL, NULL); + process_regular_entity(entity, model, qfalse, qtrue, &model_instance_idx, &instance_idx, &num_instanced_vert, + MESH_FILTER_ALL, NULL, &iqm_matrix_offset, qvk.iqm_matrices_shadow); } } @@ -2040,7 +2070,8 @@ prepare_entities(EntityUploadInfo* upload_info) { const entity_t* entity = vkpt_refdef.fd->entities + viewer_weapon_indices[i]; const model_t* model = MOD_ForHandle(entity->model); - process_regular_entity(entity, model, qtrue, qfalse, &model_instance_idx, &instance_idx, &num_instanced_vert, MESH_FILTER_ALL, NULL); + process_regular_entity(entity, model, qtrue, qfalse, &model_instance_idx, &instance_idx, &num_instanced_vert, + MESH_FILTER_ALL, NULL, &iqm_matrix_offset, qvk.iqm_matrices_shadow); if (entity->flags & RF_LEFTHAND) upload_info->weapon_left_handed = qtrue; @@ -2054,7 +2085,8 @@ prepare_entities(EntityUploadInfo* upload_info) { const entity_t* entity = vkpt_refdef.fd->entities + explosion_indices[i]; const model_t* model = MOD_ForHandle(entity->model); - process_regular_entity(entity, model, qfalse, qfalse, &model_instance_idx, &instance_idx, &num_instanced_vert, MESH_FILTER_ALL, NULL); + process_regular_entity(entity, model, qfalse, qfalse, &model_instance_idx, &instance_idx, &num_instanced_vert, + MESH_FILTER_ALL, NULL, &iqm_matrix_offset, qvk.iqm_matrices_shadow); } upload_info->explosions_vertex_offset = explosion_base_vertex_num; @@ -2089,6 +2121,41 @@ prepare_entities(EntityUploadInfo* upload_info) } } } + + // Store the number of IQM matrices for the next frame + iqm_matrix_count[entity_frame_num] = iqm_matrix_offset; + + if (iqm_matrix_count[entity_frame_num] > 0) + { + // If we had some matrices previously... + if (iqm_matrix_count[!entity_frame_num] > 0) + { + // Copy over the previous frame IQM matrices into an offset location in the current frame buffer + memcpy(qvk.iqm_matrices_shadow + (iqm_matrix_count[entity_frame_num] * 12), + qvk.iqm_matrices_prev, iqm_matrix_count[!entity_frame_num] * 12 * sizeof(float)); + + // Patch the previous model instances to point at the offset matrices + for (int i = 0; i < model_entity_id_count[!entity_frame_num]; i++) + { + ModelInstance* instance = &instance_buffer->model_instances_prev[i]; + if (instance->is_iqm) { + // Offset = current matrix count + instance->offset_prev += iqm_matrix_count[entity_frame_num]; + } + } + } + + // Store the current matrices for the next frame + memcpy(qvk.iqm_matrices_prev, qvk.iqm_matrices_shadow, iqm_matrix_count[entity_frame_num] * 12 * sizeof(float)); + + // Upload the current matrices to the staging buffer + IqmMatrixBuffer* iqm_matrix_staging = buffer_map(&qvk.buf_iqm_matrices_staging[qvk.current_frame_index]); + + int total_matrix_count = (iqm_matrix_count[entity_frame_num] + iqm_matrix_count[!entity_frame_num]); + memcpy(iqm_matrix_staging, qvk.iqm_matrices_shadow, total_matrix_count * 12 * sizeof(float)); + + buffer_unmap(&qvk.buf_iqm_matrices_staging[qvk.current_frame_index]); + } } #ifdef VKPT_IMAGE_DUMPS @@ -2512,7 +2579,7 @@ prepare_ubo(refdef_t *fd, mleaf_t* viewleaf, const reference_mode_t* ref_mode, c // adjust texture LOD bias to the resolution scale, i.e. use negative bias if scale is < 100 float resolution_scale = (drs_effective_scale != 0) ? (float)drs_effective_scale : (float)scr_viewsize->integer; resolution_scale *= 0.01f; - resolution_scale = clamp(resolution_scale, 0.1f, 1.f); + clamp(resolution_scale, 0.1f, 1.f); ubo->pt_texture_lod_bias = cvar_pt_texture_lod_bias->value + log2f(resolution_scale); } @@ -2732,6 +2799,7 @@ R_RenderFrame_RTX(refdef_t *fd) VkCommandBuffer transfer_cmd_buf = vkpt_begin_command_buffer(&qvk.cmd_buffers_transfer); vkpt_light_buffer_upload_staging(transfer_cmd_buf); + vkpt_iqm_matrix_buffer_upload_staging(transfer_cmd_buf); for (int gpu = 0; gpu < qvk.device_count; gpu++) { @@ -3113,7 +3181,7 @@ retry:; goto retry; } else if(res_swapchain != VK_SUCCESS) { - _VK(res_swapchain); + Com_EPrintf("Error %d in vkAcquireNextImageKHR\n", res_swapchain); } if (qvk.wait_for_idle_frames) { @@ -3124,7 +3192,6 @@ retry:; vkResetFences(qvk.device, 1, qvk.fences_frame_sync + qvk.current_frame_index); vkpt_reset_command_buffers(&qvk.cmd_buffers_graphics); - vkpt_reset_command_buffers(&qvk.cmd_buffers_compute); vkpt_reset_command_buffers(&qvk.cmd_buffers_transfer); // Process the profiler queries - always enabled to support DRS @@ -3925,7 +3992,7 @@ void vkpt_submit_command_buffer( _VK(vkQueueSubmit(queue, 1, &submit_info, fence)); #ifdef _DEBUG - cmd_buf_group_t* groups[] = { &qvk.cmd_buffers_graphics, &qvk.cmd_buffers_compute, &qvk.cmd_buffers_transfer }; + cmd_buf_group_t* groups[] = { &qvk.cmd_buffers_graphics, &qvk.cmd_buffers_transfer }; for (int ngroup = 0; ngroup < LENGTH(groups); ngroup++) { cmd_buf_group_t* group = groups[ngroup]; @@ -4003,6 +4070,7 @@ void R_RegisterFunctionsRTX() IMG_ReadPixels = IMG_ReadPixels_RTX; MOD_LoadMD2 = MOD_LoadMD2_RTX; MOD_LoadMD3 = MOD_LoadMD3_RTX; + MOD_LoadIQM = MOD_LoadIQM_RTX; MOD_Reference = MOD_Reference_RTX; } diff --git a/src/refresh/vkpt/material.c b/src/refresh/vkpt/material.c index 84d162e9c..32bd5712d 100644 --- a/src/refresh/vkpt/material.c +++ b/src/refresh/vkpt/material.c @@ -436,7 +436,7 @@ qerror_t MAT_ResetUnused() for (int i = 0; i < table->num_materials + table->num_custom_materials; ++i) { - pbr_material_t * mat = table->materials; + pbr_material_t * mat = table->materials + i; if (mat->registration_sequence == registration_sequence) continue; diff --git a/src/refresh/vkpt/matrix.c b/src/refresh/vkpt/matrix.c index 51bfa9fcc..d5c094d62 100644 --- a/src/refresh/vkpt/matrix.c +++ b/src/refresh/vkpt/matrix.c @@ -20,7 +20,7 @@ with this program; if not, write to the Free Software Foundation, Inc., #include "vkpt.h" void -create_entity_matrix(float matrix[16], entity_t *e, qboolean enable_left_hand) +create_entity_matrix(mat4_t matrix, entity_t *e, qboolean enable_left_hand) { vec3_t axis[3]; vec3_t origin; @@ -60,7 +60,7 @@ create_entity_matrix(float matrix[16], entity_t *e, qboolean enable_left_hand) } void -create_projection_matrix(float matrix[16], float znear, float zfar, float fov_x, float fov_y) +create_projection_matrix(mat4_t matrix, float znear, float zfar, float fov_x, float fov_y) { float xmin, xmax, ymin, ymax; float width, height, depth; @@ -97,7 +97,7 @@ create_projection_matrix(float matrix[16], float znear, float zfar, float fov_x, } void -create_orthographic_matrix(float matrix[16], float xmin, float xmax, +create_orthographic_matrix(mat4_t matrix, float xmin, float xmax, float ymin, float ymax, float znear, float zfar) { float width, height, depth; @@ -128,7 +128,7 @@ create_orthographic_matrix(float matrix[16], float xmin, float xmax, } void -create_view_matrix(float matrix[16], refdef_t *fd) +create_view_matrix(mat4_t matrix, refdef_t *fd) { vec3_t viewaxis[3]; AnglesToAxis(fd->viewangles, viewaxis); @@ -155,7 +155,7 @@ create_view_matrix(float matrix[16], refdef_t *fd) } void -inverse(const float *m, float *inv) +inverse(const mat4_t m, mat4_t inv) { inv[0] = m[5] * m[10] * m[15] - m[5] * m[11] * m[14] - @@ -278,7 +278,7 @@ inverse(const float *m, float *inv) } void -mult_matrix_matrix(float *p, const float *a, const float *b) +mult_matrix_matrix(mat4_t p, const mat4_t a, const mat4_t b) { for(int i = 0; i < 4; i++) { for(int j = 0; j < 4; j++) { @@ -292,7 +292,7 @@ mult_matrix_matrix(float *p, const float *a, const float *b) } void -mult_matrix_vector(float *p, const float *a, const float *b) +mult_matrix_vector(mat4_t p, const mat4_t a, const vec4_t b) { int j; for (j = 0; j < 4; j++) { diff --git a/src/refresh/vkpt/models.c b/src/refresh/vkpt/models.c index aa06413bc..304f83d27 100644 --- a/src/refresh/vkpt/models.c +++ b/src/refresh/vkpt/models.c @@ -100,7 +100,7 @@ static void export_obj_frames(model_t* model, const char* path_pattern) } } -qerror_t MOD_LoadMD2_RTX(model_t *model, const void *rawdata, size_t length) +qerror_t MOD_LoadMD2_RTX(model_t *model, const void *rawdata, size_t length, const char* mod_name) { dmd2header_t header; dmd2frame_t *src_frame; @@ -508,7 +508,7 @@ static qerror_t MOD_LoadMD3Mesh(model_t *model, maliasmesh_t *mesh, return Q_ERR_SUCCESS; } -qerror_t MOD_LoadMD3_RTX(model_t *model, const void *rawdata, size_t length) +qerror_t MOD_LoadMD3_RTX(model_t *model, const void *rawdata, size_t length, const char* mod_name) { dmd3header_t header; size_t end, offset, remaining; @@ -588,6 +588,85 @@ qerror_t MOD_LoadMD3_RTX(model_t *model, const void *rawdata, size_t length) } #endif +qerror_t MOD_LoadIQM_RTX(model_t* model, const void* rawdata, size_t length, const char* mod_name) +{ + Hunk_Begin(&model->hunk, 0x4000000); + model->type = MOD_ALIAS; + + qerror_t res = MOD_LoadIQM_Base(model, rawdata, length, mod_name); + + if (res != Q_ERR_SUCCESS) + { + Hunk_Free(&model->hunk); + return res; + } + + char base_path[MAX_QPATH]; + COM_FilePath(mod_name, base_path, sizeof(base_path)); + + model->meshes = MOD_Malloc(sizeof(maliasmesh_t) * model->iqmData->num_meshes); + model->nummeshes = (int)model->iqmData->num_meshes; + model->numframes = 1; // these are baked frames, so that the VBO uploader will only make one copy of the vertices + + for (unsigned model_idx = 0; model_idx < model->iqmData->num_meshes; model_idx++) + { + iqm_mesh_t* iqm_mesh = &model->iqmData->meshes[model_idx]; + maliasmesh_t* mesh = &model->meshes[model_idx]; + + mesh->indices = (int*)iqm_mesh->data->indices + iqm_mesh->first_triangle * 3; + mesh->positions = (vec3_t*)(iqm_mesh->data->positions + iqm_mesh->first_vertex * 3); + mesh->normals = (vec3_t*)(iqm_mesh->data->normals + iqm_mesh->first_vertex * 3); + mesh->tex_coords = (vec2_t*)(iqm_mesh->data->texcoords + iqm_mesh->first_vertex * 2); + mesh->tangents = (vec3_t*)(iqm_mesh->data->tangents + iqm_mesh->first_vertex * 3); + mesh->blend_indices = (uint32_t*)(iqm_mesh->data->blend_indices + iqm_mesh->first_vertex * 4); + mesh->blend_weights = (vec4_t*)(iqm_mesh->data->blend_weights + iqm_mesh->first_vertex * 4); + + mesh->numindices = (int)(iqm_mesh->num_triangles * 3); + mesh->numverts = (int)iqm_mesh->num_vertexes; + mesh->numtris = (int)iqm_mesh->num_triangles; + + // convert the indices from IQM global space to mesh-local space; fix winding order. + for (unsigned triangle_idx = 0; triangle_idx < iqm_mesh->num_triangles; triangle_idx++) + { + int tri[3]; + tri[0] = mesh->indices[triangle_idx * 3 + 0]; + tri[1] = mesh->indices[triangle_idx * 3 + 1]; + tri[2] = mesh->indices[triangle_idx * 3 + 2]; + + mesh->indices[triangle_idx * 3 + 0] = tri[2] - (int)iqm_mesh->first_vertex; + mesh->indices[triangle_idx * 3 + 1] = tri[1] - (int)iqm_mesh->first_vertex; + mesh->indices[triangle_idx * 3 + 2] = tri[0] - (int)iqm_mesh->first_vertex; + } + + char filename[MAX_QPATH]; + + Q_concat(filename, sizeof(filename), base_path, "/", iqm_mesh->material, NULL); + pbr_material_t* mat = MAT_FindPBRMaterial(filename); + assert(mat); // it's either found or created + + Q_concat(filename, sizeof(filename), base_path, "/", iqm_mesh->material, ".tga", NULL); + image_t* image_diffuse = IMG_Find(filename, IT_SKIN, IF_SRGB); + if (image_diffuse == R_NOTEXTURE) image_diffuse = NULL; + + Q_concat(filename, sizeof(filename), base_path, "/", iqm_mesh->material, "_n.tga", NULL); + image_t* image_normals = IMG_Find(filename, IT_SKIN, IF_NONE); + if (image_normals == R_NOTEXTURE) image_normals = NULL; + + Q_concat(filename, sizeof(filename), base_path, "/", iqm_mesh->material, "_light.tga", NULL); + image_t* image_emissive = IMG_Find(filename, IT_SKIN, IF_NONE); + if (image_emissive == R_NOTEXTURE) image_emissive = NULL; + + MAT_RegisterPBRMaterial(mat, image_diffuse, image_normals, image_emissive); + + mesh->materials[0] = mat; + mesh->numskins = 1; // looks like IQM only supports one skin? + } + + Hunk_End(&model->hunk); + + return Q_ERR_SUCCESS; +} + void MOD_Reference_RTX(model_t *model) { int mesh_idx, skin_idx, frame_idx; diff --git a/src/refresh/vkpt/path_tracer.c b/src/refresh/vkpt/path_tracer.c index 12cdd3038..22843bd33 100644 --- a/src/refresh/vkpt/path_tracer.c +++ b/src/refresh/vkpt/path_tracer.c @@ -29,6 +29,7 @@ with this program; if not, write to the Free Software Foundation, Inc., #define RAY_GEN_PARTICLE_COLOR_BUFFER_BINDING_IDX 1 #define RAY_GEN_BEAM_COLOR_BUFFER_BINDING_IDX 2 #define RAY_GEN_SPRITE_INFO_BUFFER_BINDING_IDX 3 +#define RAY_GEN_BEAM_INTERSECT_BUFFER_BINDING_IDX 4 #define SIZE_SCRATCH_BUFFER (1 << 25) @@ -42,6 +43,7 @@ typedef struct accel_bottom_match_info_s { int fast_build; uint32_t vertex_count; uint32_t index_count; + uint32_t aabb_count; } accel_bottom_match_info_t; typedef struct accel_top_match_info_s { @@ -221,6 +223,12 @@ vkpt_pt_init() .descriptorCount = 1, .stageFlags = qvk.use_ray_query ? VK_SHADER_STAGE_COMPUTE_BIT : VK_SHADER_STAGE_ANY_HIT_BIT_KHR, }, + { + .binding = RAY_GEN_BEAM_INTERSECT_BUFFER_BINDING_IDX, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, + .descriptorCount = 1, + .stageFlags = qvk.use_ray_query ? VK_SHADER_STAGE_COMPUTE_BIT : VK_SHADER_STAGE_INTERSECTION_BIT_KHR, + }, }; VkDescriptorSetLayoutCreateInfo layout_info = { @@ -310,6 +318,7 @@ vkpt_pt_update_descripter_set_bindings(int idx) VkBufferView particle_color_buffer_view = get_transparency_particle_color_buffer_view(); VkBufferView beam_color_buffer_view = get_transparency_beam_color_buffer_view(); VkBufferView sprite_info_buffer_view = get_transparency_sprite_info_buffer_view(); + VkBufferView beam_intersect_buffer_view = get_transparency_beam_intersect_buffer_view(); VkWriteDescriptorSet writes[] = { { @@ -344,6 +353,14 @@ vkpt_pt_update_descripter_set_bindings(int idx) .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, .pTexelBufferView = &sprite_info_buffer_view }, + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = rt_descriptor_set[idx], + .dstBinding = RAY_GEN_BEAM_INTERSECT_BUFFER_BINDING_IDX, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, + .pTexelBufferView = &beam_intersect_buffer_view + }, }; vkUpdateDescriptorSets(qvk.device, LENGTH(writes), writes, 0, NULL); @@ -386,6 +403,7 @@ static void destroy_blas(blas_t* blas) blas->match.fast_build = 0; blas->match.index_count = 0; blas->match.vertex_count = 0; + blas->match.aabb_count = 0; } void vkpt_pt_destroy_static() @@ -417,6 +435,13 @@ static inline int accel_matches(accel_bottom_match_info_t *match, match->index_count >= index_count; } +static inline int accel_matches_aabb(accel_bottom_match_info_t *match, + int fast_build, + uint32_t aabb_count) { + return match->fast_build == fast_build && + match->aabb_count >= aabb_count; +} + // How much to bloat the dynamic geometry allocations // to try to avoid later allocations. #define DYNAMIC_GEOMETRY_BLOAT_FACTOR 2 @@ -535,6 +560,7 @@ vkpt_pt_create_accel_bottom( blas->match.fast_build = fast_build; blas->match.vertex_count = num_vertices_to_allocate; blas->match.index_count = num_indices_to_allocate; + blas->match.aabb_count = 0; } // set where the build lands @@ -551,7 +577,7 @@ vkpt_pt_create_accel_bottom( // build offset VkAccelerationStructureBuildRangeInfoKHR offset = { .primitiveCount = max(num_vertices, num_indices) / 3 }; - VkAccelerationStructureBuildRangeInfoKHR* offsets = &offset; + const VkAccelerationStructureBuildRangeInfoKHR* offsets = &offset; qvkCmdBuildAccelerationStructuresKHR(cmd_buf, 1, &buildInfo, &offsets); } @@ -637,6 +663,242 @@ vkpt_pt_create_accel_bottom( blas->match.fast_build = fast_build; blas->match.vertex_count = allocGeometry.geometry.triangles.vertexCount; blas->match.index_count = allocGeometry.geometry.triangles.indexCount; + blas->match.aabb_count = 0; + } + + size_t scratch_buf_size = get_scratch_buffer_size_nv(blas->accel_nv); + assert(scratch_buf_ptr + scratch_buf_size < SIZE_SCRATCH_BUFFER); + + VkAccelerationStructureInfoNV as_info = { + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_INFO_NV, + .type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_NV, + .geometryCount = 1, + .pGeometries = &geometry, + .flags = fast_build ? VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_BUILD_BIT_NV : VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_NV + }; + + qvkCmdBuildAccelerationStructureNV(cmd_buf, &as_info, + VK_NULL_HANDLE, /* instance buffer */ + 0 /* instance offset */, + VK_FALSE, /* update */ + blas->accel_nv, + VK_NULL_HANDLE, + buf_accel_scratch.buffer, + scratch_buf_ptr); + + scratch_buf_ptr += scratch_buf_size; + } + + blas->present = qtrue; + + return VK_SUCCESS; +} + +static VkResult +vkpt_pt_create_accel_bottom_aabb( + VkCommandBuffer cmd_buf, + BufferResource_t* buffer_aabb, + VkDeviceAddress offset_aabb, + int num_aabbs, + blas_t* blas, + qboolean is_dynamic, + qboolean fast_build) +{ + assert(blas); + + if (num_aabbs == 0) + { + blas->present = qfalse; + return VK_SUCCESS; + } + + if (qvk.use_khr_ray_tracing) + { + assert(buffer_aabb->address); + + const VkAccelerationStructureGeometryAabbsDataKHR aabbs = { + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_AABBS_DATA_KHR, + .data = {.deviceAddress = buffer_aabb->address + offset_aabb }, + .stride = sizeof(VkAabbPositionsKHR) + }; + + const VkAccelerationStructureGeometryDataKHR geometry_data = { + .aabbs = aabbs + }; + + const VkAccelerationStructureGeometryKHR geometry = { + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, + .geometryType = VK_GEOMETRY_TYPE_AABBS_KHR, + .geometry = geometry_data + }; + + const VkAccelerationStructureGeometryKHR* geometries = &geometry; + + VkAccelerationStructureBuildGeometryInfoKHR buildInfo; + + // Prepare build info now, acceleration is filled later + buildInfo.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR; + buildInfo.pNext = NULL; + buildInfo.type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR; + buildInfo.flags = fast_build ? VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_BUILD_BIT_KHR : VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR; + buildInfo.mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR; + buildInfo.srcAccelerationStructure = VK_NULL_HANDLE; + buildInfo.dstAccelerationStructure = VK_NULL_HANDLE; + buildInfo.geometryCount = 1; + buildInfo.pGeometries = geometries; + buildInfo.ppGeometries = NULL; + + int doFree = 0; + int doAlloc = 0; + + if (!is_dynamic || !accel_matches_aabb(&blas->match, fast_build, num_aabbs) || blas->accel_khr == VK_NULL_HANDLE) + { + doAlloc = 1; + doFree = (blas->accel_khr != VK_NULL_HANDLE); + } + + if (doFree) + { + destroy_blas(blas); + } + + // Find size to build on the device + uint32_t max_primitive_count = num_aabbs; + VkAccelerationStructureBuildSizesInfoKHR sizeInfo = { .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR }; + qvkGetAccelerationStructureBuildSizesKHR(qvk.device, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, &buildInfo, &max_primitive_count, &sizeInfo); + + if (doAlloc) + { + int num_aabs_to_allocate = num_aabbs; + + // Allocate more memory / larger BLAS for dynamic objects + if (is_dynamic) + { + num_aabs_to_allocate *= DYNAMIC_GEOMETRY_BLOAT_FACTOR; + + max_primitive_count = num_aabs_to_allocate; + qvkGetAccelerationStructureBuildSizesKHR(qvk.device, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, &buildInfo, &max_primitive_count, &sizeInfo); + } + + // Create acceleration structure + VkAccelerationStructureCreateInfoKHR createInfo = { + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR, + .type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR, + .size = sizeInfo.accelerationStructureSize + }; + + // Create the buffer for the acceleration structure + buffer_create(&blas->mem, sizeInfo.accelerationStructureSize, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + createInfo.buffer = blas->mem.buffer; + + // Create the acceleration structure + qvkCreateAccelerationStructureKHR(qvk.device, &createInfo, NULL, &blas->accel_khr); + + blas->match.fast_build = fast_build; + blas->match.vertex_count = 0; + blas->match.index_count = 0; + blas->match.aabb_count = num_aabs_to_allocate; + } + + // set where the build lands + buildInfo.dstAccelerationStructure = blas->accel_khr; + + // Use shared scratch buffer for holding the temporary data of the acceleration structure builder + buildInfo.scratchData.deviceAddress = buf_accel_scratch.address + scratch_buf_ptr; + assert(buf_accel_scratch.address); + + // Update the scratch buffer ptr + scratch_buf_ptr += sizeInfo.buildScratchSize; + scratch_buf_ptr = align(scratch_buf_ptr, minAccelerationStructureScratchOffsetAlignment); + assert(scratch_buf_ptr < SIZE_SCRATCH_BUFFER); + + // build offset + VkAccelerationStructureBuildRangeInfoKHR offset = { .primitiveCount = num_aabbs }; + const VkAccelerationStructureBuildRangeInfoKHR* offsets = &offset; + + qvkCmdBuildAccelerationStructuresKHR(cmd_buf, 1, &buildInfo, &offsets); + } + else // (!qvk.use_khr_ray_tracing) + { + VkGeometryNV geometry = { + .sType = VK_STRUCTURE_TYPE_GEOMETRY_NV, + .geometryType = VK_GEOMETRY_TYPE_AABBS_NV, + .geometry = { + .triangles = { + .sType = VK_STRUCTURE_TYPE_GEOMETRY_TRIANGLES_NV + }, + .aabbs = { + .sType = VK_STRUCTURE_TYPE_GEOMETRY_AABB_NV, + .aabbData = buffer_aabb->buffer, + .offset = offset_aabb, + .numAABBs = num_aabbs, + .stride = sizeof(VkAabbPositionsKHR) + } + } + }; + + int doFree = 0; + int doAlloc = 0; + + if (!is_dynamic || !accel_matches_aabb(&blas->match, fast_build, num_aabbs) || blas->accel_nv == VK_NULL_HANDLE) { + doAlloc = 1; + doFree = (blas->accel_nv != VK_NULL_HANDLE); + } + + if (doFree) + { + destroy_blas(blas); + } + + if (doAlloc) + { + VkGeometryNV allocGeometry = geometry; + + // Allocate more memory / larger BLAS for dynamic objects + if (is_dynamic) + { + allocGeometry.geometry.aabbs.numAABBs *= DYNAMIC_GEOMETRY_BLOAT_FACTOR; + } + + VkAccelerationStructureCreateInfoNV accel_create_info = + { + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_NV, + .info = { + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_INFO_NV, + .instanceCount = 0, + .geometryCount = 1, + .pGeometries = &allocGeometry, + .type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_NV, + .flags = fast_build ? VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_BUILD_BIT_NV : VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_NV + } + }; + + qvkCreateAccelerationStructureNV(qvk.device, &accel_create_info, NULL, &blas->accel_nv); + + VkAccelerationStructureMemoryRequirementsInfoNV mem_req_info = { + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_INFO_NV, + .accelerationStructure = blas->accel_nv, + .type = VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_OBJECT_NV + }; + + VkMemoryRequirements2 mem_req = { 0 }; + mem_req.sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2; + qvkGetAccelerationStructureMemoryRequirementsNV(qvk.device, &mem_req_info, &mem_req); + + _VK(buffer_create(&blas->mem, mem_req.memoryRequirements.size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)); + + VkBindAccelerationStructureMemoryInfoNV bind_info = { + .sType = VK_STRUCTURE_TYPE_BIND_ACCELERATION_STRUCTURE_MEMORY_INFO_NV, + .accelerationStructure = blas->accel_nv, + .memory = blas->mem.memory + }; + + _VK(qvkBindAccelerationStructureMemoryNV(qvk.device, 1, &bind_info)); + + blas->match.fast_build = fast_build; + blas->match.vertex_count = 0; + blas->match.index_count = 0; + blas->match.aabb_count = allocGeometry.geometry.aabbs.numAABBs; } size_t scratch_buf_size = get_scratch_buffer_size_nv(blas->accel_nv); @@ -749,8 +1011,11 @@ vkpt_pt_create_all_dynamic( vkpt_get_transparency_buffers(VKPT_TRANSPARENCY_PARTICLES, &buffer_vertex, &offset_vertex, &buffer_index, &offset_index, &num_vertices, &num_indices); vkpt_pt_create_accel_bottom(cmd_buf, buffer_vertex, offset_vertex, buffer_index, offset_index, num_vertices, num_indices, blas_particles + idx, qtrue, qtrue); - vkpt_get_transparency_buffers(VKPT_TRANSPARENCY_BEAMS, &buffer_vertex, &offset_vertex, &buffer_index, &offset_index, &num_vertices, &num_indices); - vkpt_pt_create_accel_bottom(cmd_buf, buffer_vertex, offset_vertex, buffer_index, offset_index, num_vertices, num_indices, blas_beams + idx, qtrue, qtrue); + BufferResource_t *buffer_aabb = NULL; + uint64_t offset_aabb = 0; + uint32_t num_aabbs = 0; + vkpt_get_beam_aabb_buffer(&buffer_aabb, &offset_aabb, &num_aabbs); + vkpt_pt_create_accel_bottom_aabb(cmd_buf, buffer_aabb, offset_aabb, num_aabbs, blas_beams + idx, qtrue, qtrue); vkpt_get_transparency_buffers(VKPT_TRANSPARENCY_SPRITES, &buffer_vertex, &offset_vertex, &buffer_index, &offset_index, &num_vertices, &num_indices); vkpt_pt_create_accel_bottom(cmd_buf, buffer_vertex, offset_vertex, buffer_index, offset_index, num_vertices, num_indices, blas_sprites + idx, qtrue, qtrue); @@ -780,7 +1045,7 @@ vkpt_pt_destroy_toplevel(int idx) } static void -append_blas(QvkGeometryInstance_t *instances, int *num_instances, blas_t* blas, int instance_id, int mask, int flags, int sbt_offset) +append_blas(QvkGeometryInstance_t *instances, uint32_t *num_instances, blas_t* blas, int instance_id, int mask, int flags, int sbt_offset) { if (!blas->present) return; @@ -821,39 +1086,39 @@ VkResult vkpt_pt_create_toplevel(VkCommandBuffer cmd_buf, int idx, qboolean include_world, qboolean weapon_left_handed) { QvkGeometryInstance_t instances[INSTANCE_MAX_NUM]; - int num_instances = 0; + uint32_t num_instances = 0; if (include_world) { - append_blas(instances, &num_instances, &blas_static, 0, AS_FLAG_OPAQUE, VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR, 0); - append_blas(instances, &num_instances, &blas_transparent, transparent_primitive_offset, AS_FLAG_TRANSPARENT, VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR, 0); - append_blas(instances, &num_instances, &blas_sky, AS_INSTANCE_FLAG_SKY | sky_primitive_offset, AS_FLAG_SKY, VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR, 0); - append_blas(instances, &num_instances, &blas_custom_sky, AS_INSTANCE_FLAG_SKY | custom_sky_primitive_offset, AS_FLAG_CUSTOM_SKY, VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR, 0); + append_blas(instances, &num_instances, &blas_static, 0, AS_FLAG_OPAQUE, VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR, SBTO_OPAQUE); + append_blas(instances, &num_instances, &blas_transparent, transparent_primitive_offset, AS_FLAG_TRANSPARENT, VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR, SBTO_OPAQUE); + append_blas(instances, &num_instances, &blas_sky, AS_INSTANCE_FLAG_SKY | sky_primitive_offset, AS_FLAG_SKY, VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR, SBTO_OPAQUE); + append_blas(instances, &num_instances, &blas_custom_sky, AS_INSTANCE_FLAG_SKY | custom_sky_primitive_offset, AS_FLAG_CUSTOM_SKY, VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR, SBTO_OPAQUE); } - append_blas(instances, &num_instances, &blas_dynamic[idx], AS_INSTANCE_FLAG_DYNAMIC, AS_FLAG_OPAQUE, VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR, 0); - append_blas(instances, &num_instances, &blas_transparent_models[idx], AS_INSTANCE_FLAG_DYNAMIC | transparent_model_primitive_offset, AS_FLAG_TRANSPARENT, VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR, 0); - append_blas(instances, &num_instances, &blas_explosions[idx], AS_INSTANCE_FLAG_DYNAMIC | explosions_primitive_offset, AS_FLAG_EXPLOSIONS, 0, 3); - append_blas(instances, &num_instances, &blas_viewer_weapon[idx], AS_INSTANCE_FLAG_DYNAMIC | viewer_weapon_primitive_offset, AS_FLAG_VIEWER_WEAPON, VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR | (weapon_left_handed ? VK_GEOMETRY_INSTANCE_TRIANGLE_FRONT_COUNTERCLOCKWISE_BIT_KHR : 0), 0); + append_blas(instances, &num_instances, &blas_dynamic[idx], AS_INSTANCE_FLAG_DYNAMIC, AS_FLAG_OPAQUE, VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR, SBTO_OPAQUE); + append_blas(instances, &num_instances, &blas_transparent_models[idx], AS_INSTANCE_FLAG_DYNAMIC | transparent_model_primitive_offset, AS_FLAG_TRANSPARENT, VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR, SBTO_OPAQUE); + append_blas(instances, &num_instances, &blas_explosions[idx], AS_INSTANCE_FLAG_DYNAMIC | explosions_primitive_offset, AS_FLAG_EXPLOSIONS, 0, SBTO_EXPLOSION); + append_blas(instances, &num_instances, &blas_viewer_weapon[idx], AS_INSTANCE_FLAG_DYNAMIC | viewer_weapon_primitive_offset, AS_FLAG_VIEWER_WEAPON, VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR | (weapon_left_handed ? VK_GEOMETRY_INSTANCE_TRIANGLE_FRONT_COUNTERCLOCKWISE_BIT_KHR : 0), SBTO_OPAQUE); if (cl_player_model->integer == CL_PLAYER_MODEL_FIRST_PERSON) { - append_blas(instances, &num_instances, &blas_viewer_models[idx], AS_INSTANCE_FLAG_DYNAMIC | viewer_model_primitive_offset, AS_FLAG_VIEWER_MODELS, VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR | VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR, 0); + append_blas(instances, &num_instances, &blas_viewer_models[idx], AS_INSTANCE_FLAG_DYNAMIC | viewer_model_primitive_offset, AS_FLAG_VIEWER_MODELS, VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR | VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR, SBTO_OPAQUE); } if (cvar_pt_enable_particles->integer != 0) { - append_blas(instances, &num_instances, &blas_particles[idx], 0, AS_FLAG_PARTICLES, VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR, 1); + append_blas(instances, &num_instances, &blas_particles[idx], 0, AS_FLAG_PARTICLES, VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR, SBTO_PARTICLE); } if (cvar_pt_enable_beams->integer != 0) { - append_blas(instances, &num_instances, &blas_beams[idx], 0, AS_FLAG_PARTICLES, VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR, 2); + append_blas(instances, &num_instances, &blas_beams[idx], 0, AS_FLAG_PARTICLES, VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR, SBTO_BEAM); } if (cvar_pt_enable_sprites->integer != 0) { - append_blas(instances, &num_instances, &blas_sprites[idx], 0, AS_FLAG_EXPLOSIONS, VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR, 4); + append_blas(instances, &num_instances, &blas_sprites[idx], 0, AS_FLAG_EXPLOSIONS, VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR, SBTO_SPRITE); } void *instance_data = buffer_map(buf_instances + idx); @@ -922,7 +1187,7 @@ vkpt_pt_create_toplevel(VkCommandBuffer cmd_buf, int idx, qboolean include_world VkAccelerationStructureBuildRangeInfoKHR offset = { .primitiveCount = num_instances }; - VkAccelerationStructureBuildRangeInfoKHR* offsets = &offset; + const VkAccelerationStructureBuildRangeInfoKHR* offsets = &offset; qvkCmdBuildAccelerationStructuresKHR( cmd_buf, @@ -1321,24 +1586,36 @@ vkpt_pt_create_pipelines() uint32_t num_shader_groups = SBT_ENTRIES_PER_PIPELINE * PIPELINE_COUNT; char* shader_handles = alloca(num_shader_groups * shaderGroupHandleSize); + memset(shader_handles, 0, num_shader_groups * shaderGroupHandleSize); VkPipelineShaderStageCreateInfo shader_stages[] = { + // Stages used by all pipelines. Count must match num_base_shader_stages below! { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .stage = VK_SHADER_STAGE_RAYGEN_BIT_KHR, .pName = "main" + // Shader module is set below }, SHADER_STAGE(QVK_MOD_PATH_TRACER_RMISS, VK_SHADER_STAGE_MISS_BIT_KHR), SHADER_STAGE(QVK_MOD_PATH_TRACER_SHADOW_RMISS, VK_SHADER_STAGE_MISS_BIT_KHR), SHADER_STAGE(QVK_MOD_PATH_TRACER_RCHIT, VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR), + // Stages used by all pipelines that consider transparency SHADER_STAGE(QVK_MOD_PATH_TRACER_PARTICLE_RAHIT, VK_SHADER_STAGE_ANY_HIT_BIT_KHR), - SHADER_STAGE(QVK_MOD_PATH_TRACER_BEAM_RAHIT, VK_SHADER_STAGE_ANY_HIT_BIT_KHR), SHADER_STAGE(QVK_MOD_PATH_TRACER_EXPLOSION_RAHIT, VK_SHADER_STAGE_ANY_HIT_BIT_KHR), SHADER_STAGE(QVK_MOD_PATH_TRACER_SPRITE_RAHIT, VK_SHADER_STAGE_ANY_HIT_BIT_KHR), + // Must be last + SHADER_STAGE(QVK_MOD_PATH_TRACER_BEAM_RAHIT, VK_SHADER_STAGE_ANY_HIT_BIT_KHR), + SHADER_STAGE(QVK_MOD_PATH_TRACER_BEAM_RINT, VK_SHADER_STAGE_INTERSECTION_BIT_KHR), }; + const unsigned num_base_shader_stages = 4; + const unsigned num_transparent_no_beam_shader_stages = 7; for (pipeline_index_t index = 0; index < PIPELINE_COUNT; index++) { + qboolean needs_beams = index <= PIPELINE_REFLECT_REFRACT_2; + qboolean needs_transparency = needs_beams || index == PIPELINE_INDIRECT_LIGHTING_FIRST; + unsigned int num_shader_stages = needs_beams ? LENGTH(shader_stages) : (needs_transparency ? num_transparent_no_beam_shader_stages : num_base_shader_stages); + switch (index) { case PIPELINE_PRIMARY_RAYS: @@ -1431,20 +1708,20 @@ vkpt_pt_create_pipelines() .anyHitShader = VK_SHADER_UNUSED_KHR, .intersectionShader = VK_SHADER_UNUSED_KHR }, - [SBT_RAHIT_PARTICLE] = { + [SBT_RCHIT_EMPTY] = { .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR, .generalShader = VK_SHADER_UNUSED_KHR, .closestHitShader = VK_SHADER_UNUSED_KHR, - .anyHitShader = 4, + .anyHitShader = VK_SHADER_UNUSED_KHR, .intersectionShader = VK_SHADER_UNUSED_KHR }, - [SBT_RAHIT_BEAM] = { + [SBT_RAHIT_PARTICLE] = { .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR, .generalShader = VK_SHADER_UNUSED_KHR, .closestHitShader = VK_SHADER_UNUSED_KHR, - .anyHitShader = 5, + .anyHitShader = 4, .intersectionShader = VK_SHADER_UNUSED_KHR }, [SBT_RAHIT_EXPLOSION] = { @@ -1452,7 +1729,7 @@ vkpt_pt_create_pipelines() .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR, .generalShader = VK_SHADER_UNUSED_KHR, .closestHitShader = VK_SHADER_UNUSED_KHR, - .anyHitShader = 6, + .anyHitShader = 5, .intersectionShader = VK_SHADER_UNUSED_KHR }, [SBT_RAHIT_SPRITE] = { @@ -1460,27 +1737,29 @@ vkpt_pt_create_pipelines() .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR, .generalShader = VK_SHADER_UNUSED_KHR, .closestHitShader = VK_SHADER_UNUSED_KHR, - .anyHitShader = 7, + .anyHitShader = 6, .intersectionShader = VK_SHADER_UNUSED_KHR }, - [SBT_RCHIT_EMPTY] = { + [SBT_RINT_BEAM] = { .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, - .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR, .generalShader = VK_SHADER_UNUSED_KHR, .closestHitShader = VK_SHADER_UNUSED_KHR, - .anyHitShader = VK_SHADER_UNUSED_KHR, - .intersectionShader = VK_SHADER_UNUSED_KHR + .anyHitShader = 7, + .intersectionShader = 8 }, }; + unsigned int num_shader_groups = needs_beams ? LENGTH(rt_shader_group_info) : (needs_transparency ? SBT_RINT_BEAM : SBT_FIRST_TRANSPARENCY); + VkPipelineLibraryCreateInfoKHR library_info = { .sType = VK_STRUCTURE_TYPE_PIPELINE_LIBRARY_CREATE_INFO_KHR }; VkRayTracingPipelineCreateInfoKHR rt_pipeline_info = { .sType = VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR, .pNext = NULL, .flags = 0, - .stageCount = LENGTH(shader_stages), + .stageCount = num_shader_stages, .pStages = shader_stages, - .groupCount = LENGTH(rt_shader_group_info), + .groupCount = num_shader_groups, .pGroups = rt_shader_group_info, .maxPipelineRayRecursionDepth = 1, .pLibraryInfo = &library_info, @@ -1502,8 +1781,8 @@ vkpt_pt_create_pipelines() } _VK(qvkGetRayTracingShaderGroupHandlesKHR( - qvk.device, rt_pipelines[index], 0, SBT_ENTRIES_PER_PIPELINE, - /* dataSize = */ SBT_ENTRIES_PER_PIPELINE * shaderGroupHandleSize, + qvk.device, rt_pipelines[index], 0, num_shader_groups, + /* dataSize = */ num_shader_groups * shaderGroupHandleSize, /* pData = */ shader_handles + SBT_ENTRIES_PER_PIPELINE * shaderGroupHandleSize * index)); } else // (!qvk.use_khr_ray_tracing) @@ -1542,20 +1821,20 @@ vkpt_pt_create_pipelines() .anyHitShader = VK_SHADER_UNUSED_NV, .intersectionShader = VK_SHADER_UNUSED_NV }, - [SBT_RAHIT_PARTICLE] = { + [SBT_RCHIT_EMPTY] = { .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_NV, .generalShader = VK_SHADER_UNUSED_NV, .closestHitShader = VK_SHADER_UNUSED_NV, - .anyHitShader = 4, + .anyHitShader = VK_SHADER_UNUSED_NV, .intersectionShader = VK_SHADER_UNUSED_NV }, - [SBT_RAHIT_BEAM] = { + [SBT_RAHIT_PARTICLE] = { .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_NV, .generalShader = VK_SHADER_UNUSED_NV, .closestHitShader = VK_SHADER_UNUSED_NV, - .anyHitShader = 5, + .anyHitShader = 4, .intersectionShader = VK_SHADER_UNUSED_NV }, [SBT_RAHIT_EXPLOSION] = { @@ -1563,7 +1842,7 @@ vkpt_pt_create_pipelines() .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_NV, .generalShader = VK_SHADER_UNUSED_NV, .closestHitShader = VK_SHADER_UNUSED_NV, - .anyHitShader = 6, + .anyHitShader = 5, .intersectionShader = VK_SHADER_UNUSED_NV }, [SBT_RAHIT_SPRITE] = { @@ -1571,24 +1850,26 @@ vkpt_pt_create_pipelines() .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_NV, .generalShader = VK_SHADER_UNUSED_NV, .closestHitShader = VK_SHADER_UNUSED_NV, - .anyHitShader = 7, + .anyHitShader = 6, .intersectionShader = VK_SHADER_UNUSED_NV }, - [SBT_RCHIT_EMPTY] = { + [SBT_RINT_BEAM] = { .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, - .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_NV, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_NV, .generalShader = VK_SHADER_UNUSED_NV, .closestHitShader = VK_SHADER_UNUSED_NV, - .anyHitShader = VK_SHADER_UNUSED_NV, - .intersectionShader = VK_SHADER_UNUSED_NV + .anyHitShader = 7, + .intersectionShader = 8 }, }; + unsigned int num_shader_groups = needs_beams ? LENGTH(rt_shader_group_info) : (needs_transparency ? SBT_RINT_BEAM : SBT_FIRST_TRANSPARENCY); + VkRayTracingPipelineCreateInfoNV rt_pipeline_info = { .sType = VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_NV, - .stageCount = LENGTH(shader_stages), + .stageCount = num_shader_stages, .pStages = shader_stages, - .groupCount = LENGTH(rt_shader_group_info), + .groupCount = num_shader_groups, .pGroups = rt_shader_group_info, .layout = rt_pipeline_layout, .maxRecursionDepth = 1, @@ -1605,8 +1886,8 @@ vkpt_pt_create_pipelines() } _VK(qvkGetRayTracingShaderGroupHandlesNV( - qvk.device, rt_pipelines[index], 0, SBT_ENTRIES_PER_PIPELINE, - /* dataSize = */ SBT_ENTRIES_PER_PIPELINE* shaderGroupHandleSize, + qvk.device, rt_pipelines[index], 0, num_shader_groups, + /* dataSize = */ num_shader_groups* shaderGroupHandleSize, /* pData = */ shader_handles + SBT_ENTRIES_PER_PIPELINE * shaderGroupHandleSize * index)); } } diff --git a/src/refresh/vkpt/precomputed_sky.c b/src/refresh/vkpt/precomputed_sky.c index 8eb973c05..2806dbecb 100644 --- a/src/refresh/vkpt/precomputed_sky.c +++ b/src/refresh/vkpt/precomputed_sky.c @@ -906,12 +906,13 @@ void CreateShadowMap(struct Shadowmap* InOutShadowmap) .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .viewType = VK_IMAGE_VIEW_TYPE_2D, .format = InOutShadowmap->DepthFormat, - .subresourceRange = {0}, - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT, - .subresourceRange.baseMipLevel = 0, - .subresourceRange.levelCount = 1, - .subresourceRange.baseArrayLayer = 0, - .subresourceRange.layerCount = 1, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1 + }, .image = InOutShadowmap->TargetTexture, }; diff --git a/src/refresh/vkpt/profiler.c b/src/refresh/vkpt/profiler.c index 78b4367b1..e8a18d184 100644 --- a/src/refresh/vkpt/profiler.c +++ b/src/refresh/vkpt/profiler.c @@ -129,7 +129,7 @@ draw_query(int x, int y, qhandle_t font, const char *enum_name, int idx) char buf[256]; int i; for(i = 0; i < LENGTH(buf) - 1 && enum_name[i]; i++) - buf[i] = enum_name[i] == '_' ? ' ' : tolower(enum_name[i]); + buf[i] = enum_name[i] == '_' ? ' ' : (char)tolower(enum_name[i]); buf[i] = 0; R_DrawString(x, y, 0, 128, buf, font); @@ -155,7 +155,7 @@ draw_profiler(int enable_asvgf) return; #define PROFILER_DO(name, indent) \ - draw_query(x, y, font, #name + 9, name); y += 10; + draw_query(x, y, font, &#name[9], name); y += 10; PROFILER_DO(PROFILER_FRAME_TIME, 0); PROFILER_DO(PROFILER_INSTANCE_GEOMETRY, 1); diff --git a/src/refresh/vkpt/shader/constants.h b/src/refresh/vkpt/shader/constants.h index 7b396695d..72d093e89 100644 --- a/src/refresh/vkpt/shader/constants.h +++ b/src/refresh/vkpt/shader/constants.h @@ -129,11 +129,20 @@ with this program; if not, write to the Free Software Foundation, Inc., #define SBT_RMISS_PATH_TRACER 1 #define SBT_RMISS_SHADOW 2 #define SBT_RCHIT_OPAQUE 3 -#define SBT_RAHIT_PARTICLE 4 -#define SBT_RAHIT_BEAM 5 +#define SBT_RCHIT_EMPTY 4 +#define SBT_RAHIT_PARTICLE 5 #define SBT_RAHIT_EXPLOSION 6 #define SBT_RAHIT_SPRITE 7 -#define SBT_RCHIT_EMPTY 8 +#define SBT_RINT_BEAM 8 #define SBT_ENTRIES_PER_PIPELINE 9 +// vkpt_pt_create_pipelines() relies on all 'transparency' SBT entries coming after SBT_FIRST_TRANSPARENCY +#define SBT_FIRST_TRANSPARENCY SBT_RAHIT_PARTICLE + +// SBT indices, for primary rays +#define SBTO_OPAQUE (SBT_RCHIT_OPAQUE - SBT_RCHIT_OPAQUE) +#define SBTO_PARTICLE (SBT_RAHIT_PARTICLE - SBT_RCHIT_OPAQUE) +#define SBTO_EXPLOSION (SBT_RAHIT_EXPLOSION - SBT_RCHIT_OPAQUE) +#define SBTO_SPRITE (SBT_RAHIT_SPRITE - SBT_RCHIT_OPAQUE) +#define SBTO_BEAM (SBT_RINT_BEAM - SBT_RCHIT_OPAQUE) #endif /*_CONSTANTS_H_*/ diff --git a/src/refresh/vkpt/shader/global_ubo.h b/src/refresh/vkpt/shader/global_ubo.h index 3728175b2..f00ae5df5 100644 --- a/src/refresh/vkpt/shader/global_ubo.h +++ b/src/refresh/vkpt/shader/global_ubo.h @@ -246,17 +246,17 @@ typedef int ivec4_t[4]; typedef uint32_t uint; typedef struct { - float M[16]; // 16 + float M[16]; // mat4 uint32_t material; int offset_curr; - int offset_prev; + int offset_prev; // matrix offset for IQM float backlerp; float alpha; int idx_offset; int model_index; - int pad; + int is_iqm; } ModelInstance; typedef struct { @@ -285,13 +285,13 @@ struct ModelInstance { uint material; int offset_curr; - int offset_prev; + int offset_prev; // matrix offset for IQM float backlerp; float alpha; int idx_offset; int model_index; - int pad; + int is_iqm; }; struct BspMeshInstance { diff --git a/src/refresh/vkpt/shader/indirect_lighting.rgen b/src/refresh/vkpt/shader/indirect_lighting.rgen index 2f23f72e7..c0ad7f41f 100644 --- a/src/refresh/vkpt/shader/indirect_lighting.rgen +++ b/src/refresh/vkpt/shader/indirect_lighting.rgen @@ -30,6 +30,7 @@ with this program; if not, write to the Free Software Foundation, Inc., #pragma optionNV(unroll all) #include "path_tracer_rgen.h" +#include "path_tracer_transparency.glsl" layout(constant_id = 0) const uint spec_bounce_index = 0; @@ -214,7 +215,7 @@ indirect_lighting( shadow_cull_mask |= AS_FLAG_VIEWER_WEAPON; } - trace_ray(bounce_ray, true, bounce_cull_mask); + trace_ray(bounce_ray, true, bounce_cull_mask, true); Triangle triangle; @@ -222,7 +223,7 @@ indirect_lighting( if(is_specular_ray) { - vec4 transparency = unpackHalf4x16(ray_payload_brdf.transparency); + vec4 transparency = get_payload_transparency_simple(ray_payload_brdf); bounce_contrib += transparency.rgb * transparency.a * bounce_throughput * (1.0 - direct_specular_weight); } diff --git a/src/refresh/vkpt/shader/instance_geometry.comp b/src/refresh/vkpt/shader/instance_geometry.comp index 02978e209..42951947b 100644 --- a/src/refresh/vkpt/shader/instance_geometry.comp +++ b/src/refresh/vkpt/shader/instance_geometry.comp @@ -43,6 +43,11 @@ layout(set = 2, binding = 0) readonly buffer MODEL_VBO { uint data[]; } model_vbos[]; +uint get_model_uint(uint model_id, uint offset) +{ + return model_vbos[nonuniformEXT(model_id)].data[offset]; +} + vec2 get_model_float2(uint model_id, uint offset) { vec2 result; @@ -95,7 +100,9 @@ get_model_triangle(uint model_id, uint prim_id, uint idx_offset, uint vert_offse t.tex_coords[1] = get_model_float2(model_id, idx[1] + MODEL_VERTEX_TEXCOORD); t.tex_coords[2] = get_model_float2(model_id, idx[2] + MODEL_VERTEX_TEXCOORD); - t.tangent = vec3(0); // computed later + t.tangents[0] = vec3(0); // computed later + t.tangents[1] = vec3(0); + t.tangents[2] = vec3(0); t.material_id = 0; // needs to come from uniform buffer @@ -105,10 +112,6 @@ get_model_triangle(uint model_id, uint prim_id, uint idx_offset, uint vert_offse return t; } -#define LOCAL_SIZE_X 512 - -layout(local_size_x = LOCAL_SIZE_X, local_size_y = 1, local_size_z = 1) in; - vec3 compute_tangent(mat3x3 positions, mat3x2 tex_coords, mat3x3 normals, out bool handedness) { vec3 dp0 = positions[1] - positions[0]; @@ -137,6 +140,86 @@ vec3 compute_tangent(mat3x3 positions, mat3x2 tex_coords, mat3x3 normals, out bo return tangent; } +Triangle +get_iqm_triangle(uint model_id, uint prim_id, uint idx_offset, uint vert_offset, uint matrix_offset) +{ + uint idx_base = idx_offset + prim_id * 3; + + uvec3 idx; + idx.x = model_vbos[nonuniformEXT(model_id)].data[idx_base + 0]; + idx.y = model_vbos[nonuniformEXT(model_id)].data[idx_base + 1]; + idx.z = model_vbos[nonuniformEXT(model_id)].data[idx_base + 2]; + + idx = idx * IQM_VERTEX_SIZE + vert_offset; + + Triangle t; + t.positions[0] = get_model_float3(model_id, idx[0] + IQM_VERTEX_POSITION); + t.positions[1] = get_model_float3(model_id, idx[1] + IQM_VERTEX_POSITION); + t.positions[2] = get_model_float3(model_id, idx[2] + IQM_VERTEX_POSITION); + + t.normals[0] = get_model_float3(model_id, idx[0] + IQM_VERTEX_NORMAL); + t.normals[1] = get_model_float3(model_id, idx[1] + IQM_VERTEX_NORMAL); + t.normals[2] = get_model_float3(model_id, idx[2] + IQM_VERTEX_NORMAL); + + t.tex_coords[0] = get_model_float2(model_id, idx[0] + IQM_VERTEX_TEXCOORD); + t.tex_coords[1] = get_model_float2(model_id, idx[1] + IQM_VERTEX_TEXCOORD); + t.tex_coords[2] = get_model_float2(model_id, idx[2] + IQM_VERTEX_TEXCOORD); + + t.tangents[0] = get_model_float3(model_id, idx[0] + IQM_VERTEX_TANGENT); + t.tangents[1] = get_model_float3(model_id, idx[1] + IQM_VERTEX_TANGENT); + t.tangents[2] = get_model_float3(model_id, idx[2] + IQM_VERTEX_TANGENT); + + if (dot(t.tangents[0], t.tangents[0]) == 0) // maybe tangents are missing? + { + bool handedness = false; + t.tangents[0].xyz = compute_tangent(t.positions, t.tex_coords, t.normals, handedness); + t.tangents[1].xyz = t.tangents[0].xyz; + t.tangents[2].xyz = t.tangents[0].xyz; + } + + if (matrix_offset >= 0) + { + for (int i_vtx = 0; i_vtx < 3; i_vtx++) + { + mat3x4 transform = mat3x4(0); + uint bone_indices = get_model_uint(model_id, idx[i_vtx] + IQM_VERTEX_INDICES); + vec4 bone_weights = get_model_float4(model_id, idx[i_vtx] + IQM_VERTEX_WEIGHTS); + float weight_sum = 0; + + for (int i_bone = 0; i_bone < 4; i_bone++) + { + uint bone_index = (bone_indices >> (i_bone * 8)) & 0xff; + float bone_weight = bone_weights[i_bone]; + + if (bone_weight > 0) + { + mat3x4 m = get_iqm_matrix(matrix_offset + bone_index); + transform += m * bone_weight; + weight_sum += bone_weight; + } + } + + if (weight_sum > 0) // should always sum up to 1 by IQM definition, but maybe the data is missing altogether? + { + t.positions[i_vtx] = vec4(t.positions[i_vtx], 1.0) * transform; + t.normals[i_vtx] = normalize(vec4(t.normals[i_vtx], 0.0) * transform); + t.tangents[i_vtx] = normalize(vec4(t.tangents[i_vtx], 0.0) * transform); + } + } + } + + t.material_id = 0; // needs to come from uniform buffer + + t.alpha = 1.0; + t.texel_density = 0; + + return t; +} + +#define LOCAL_SIZE_X 512 + +layout(local_size_x = LOCAL_SIZE_X, local_size_y = 1, local_size_z = 1) in; + void main() { @@ -175,7 +258,7 @@ main() t_i.positions = t.positions; t_i.positions_prev = t.positions; /* no vertex anim for bsp meshes */ - t_i.tangent = t.tangent; + t_i.tangents = t.tangents; t_i.normals = t.normals; t_i.tex_coords = t.tex_coords; t_i.texel_density = t.texel_density; @@ -205,42 +288,72 @@ main() } } else { /* model */ - { - /* read and interpolate triangles for model for _current_ frame */ - ModelInstance mi_curr = instance_buffer.model_instances[instance_id]; - Triangle t = get_model_triangle(mi_curr.model_index, idx, mi_curr.idx_offset, mi_curr.offset_curr); - Triangle t_prev = get_model_triangle(mi_curr.model_index, idx, mi_curr.idx_offset, mi_curr.offset_prev); - M_curr = mi_curr.M; - - float backlerp = mi_curr.backlerp; + ModelInstance mi_curr = instance_buffer.model_instances[instance_id]; + M_curr = mi_curr.M; - t_i.positions[0] = mix(t.positions[0], t_prev.positions[0], backlerp); - t_i.positions[1] = mix(t.positions[1], t_prev.positions[1], backlerp); - t_i.positions[2] = mix(t.positions[2], t_prev.positions[2], backlerp); + uint id_prev = instance_buffer.model_current_to_prev[instance_id]; + ModelInstance mi_prev = mi_curr; + if(id_prev != ~0u) + { + mi_prev = instance_buffer.model_instances_prev[id_prev]; + } - t_i.normals[0] = mix(t.normals[0], t_prev.normals[0], backlerp); - t_i.normals[1] = mix(t.normals[1], t_prev.normals[1], backlerp); - t_i.normals[2] = mix(t.normals[2], t_prev.normals[2], backlerp); - t_i.tex_coords = t.tex_coords; + if (mi_curr.is_iqm != 0) + { + t_i = get_iqm_triangle(mi_curr.model_index, idx, mi_curr.idx_offset, mi_curr.offset_curr, mi_curr.offset_prev); - bool handedness = false; - t_i.tangent.xyz = compute_tangent(t_i.positions, t_i.tex_coords, t_i.normals, handedness); + if (id_prev != ~0u) + { + Triangle t_prev = get_iqm_triangle(mi_prev.model_index, idx, mi_prev.idx_offset, mi_prev.offset_curr, mi_prev.offset_prev); + t_i.positions_prev = t_prev.positions; + M_prev = mi_prev.M; + } + else + { + t_i.positions_prev = t_i.positions; + M_prev = M_curr; + } t_i.alpha = mi_curr.alpha.x; - t_i.texel_density = t.texel_density; - t_i.material_id = mi_curr.material; - t_i.material_id |= handedness ? MATERIAL_FLAG_HANDEDNESS : 0; + // t_i.material_id |= MATERIAL_FLAG_HANDEDNESS; // not sure t_i.cluster = instance_buffer.model_cluster_id[instance_id]; } - + else { - uint id_prev = instance_buffer.model_current_to_prev[instance_id]; + { + /* read and interpolate triangles for model for _current_ frame */ + Triangle t = get_model_triangle(mi_curr.model_index, idx, mi_curr.idx_offset, mi_curr.offset_curr); + Triangle t_prev = get_model_triangle(mi_curr.model_index, idx, mi_curr.idx_offset, mi_curr.offset_prev); + + float backlerp = mi_curr.backlerp; + + t_i.positions[0] = mix(t.positions[0], t_prev.positions[0], backlerp); + t_i.positions[1] = mix(t.positions[1], t_prev.positions[1], backlerp); + t_i.positions[2] = mix(t.positions[2], t_prev.positions[2], backlerp); + + t_i.normals[0] = mix(t.normals[0], t_prev.normals[0], backlerp); + t_i.normals[1] = mix(t.normals[1], t_prev.normals[1], backlerp); + t_i.normals[2] = mix(t.normals[2], t_prev.normals[2], backlerp); + + t_i.tex_coords = t.tex_coords; + bool handedness = false; + t_i.tangents[0].xyz = compute_tangent(t_i.positions, t_i.tex_coords, t_i.normals, handedness); + t_i.tangents[1].xyz = t_i.tangents[0].xyz; + t_i.tangents[2].xyz = t_i.tangents[0].xyz; + + t_i.alpha = mi_curr.alpha.x; + t_i.texel_density = t.texel_density; + + t_i.material_id = mi_curr.material; + t_i.material_id |= handedness ? MATERIAL_FLAG_HANDEDNESS : 0; + t_i.cluster = instance_buffer.model_cluster_id[instance_id]; + } + if(id_prev != ~0u) { - ModelInstance mi_prev = instance_buffer.model_instances_prev[id_prev]; /* read and interpolate triangles for model for _previous_ frame */ Triangle t = get_model_triangle(mi_prev.model_index, idx, mi_prev.idx_offset, mi_prev.offset_curr); Triangle t_prev = get_model_triangle(mi_prev.model_index, idx, mi_prev.idx_offset, mi_prev.offset_prev); @@ -275,7 +388,9 @@ main() t_i.normals[1] = vec3(M_curr * vec4(t_i.normals[1], 0.0)); t_i.normals[2] = vec3(M_curr * vec4(t_i.normals[2], 0.0)); - t_i.tangent = vec3(M_curr * vec4(t_i.tangent, 0.0)); + t_i.tangents[0] = vec3(M_curr * vec4(t_i.tangents[0], 0.0)); + t_i.tangents[1] = vec3(M_curr * vec4(t_i.tangents[1], 0.0)); + t_i.tangents[2] = vec3(M_curr * vec4(t_i.tangents[2], 0.0)); uint instance_triangle_id = visbuf_pack_instance(instance_id, idx, is_world); store_instanced_triangle(t_i, instance_triangle_id, idx + buf_offset); diff --git a/src/refresh/vkpt/shader/path_tracer.h b/src/refresh/vkpt/shader/path_tracer.h index 56d3df9c2..6dbad7647 100644 --- a/src/refresh/vkpt/shader/path_tracer.h +++ b/src/refresh/vkpt/shader/path_tracer.h @@ -117,18 +117,25 @@ Converting skyboxes to local lights provides two benefits: // ========================================================================== // */ +#ifndef PATH_TRACER_H_ +#define PATH_TRACER_H_ + #ifdef NV_RAY_TRACING #extension GL_NV_ray_tracing : require #define rt_accelerationStructure accelerationStructureNV #define rt_hitAttribute hitAttributeNV #define rt_HitT gl_HitTNV +#define rt_RayTmin gl_RayTminNV +#define rt_RayTmax gl_RayTmaxNV #define rt_ignoreIntersection ignoreIntersectionNV() #define rt_InstanceCustomIndex gl_InstanceCustomIndexNV #define rt_LaunchID gl_LaunchIDNV #define rt_rayPayload rayPayloadNV #define rt_rayPayloadIn rayPayloadInNV +#define rt_reportIntersection reportIntersectionNV #define rt_traceRay traceNV +#define rt_WorldRayOrigin gl_WorldRayOriginNV #define rt_WorldRayDirection gl_WorldRayDirectionNV #else @@ -144,11 +151,15 @@ Converting skyboxes to local lights provides two benefits: #define rt_accelerationStructure accelerationStructureEXT #define rt_hitAttribute hitAttributeEXT #define rt_HitT gl_HitTEXT +#define rt_RayTmin gl_RayTminEXT +#define rt_RayTmax gl_RayTmaxEXT #define rt_ignoreIntersection ignoreIntersectionEXT #define rt_InstanceCustomIndex gl_InstanceCustomIndexEXT #define rt_rayPayload rayPayloadEXT #define rt_rayPayloadIn rayPayloadInEXT +#define rt_reportIntersection reportIntersectionEXT #define rt_traceRay traceRayEXT +#define rt_WorldRayOrigin gl_WorldRayOriginEXT #define rt_WorldRayDirection gl_WorldRayDirectionEXT #endif @@ -173,12 +184,21 @@ struct RayPayload { vec2 barycentric; uint instance_prim; float hit_distance; - uvec2 transparency; // half4x16 - float max_transparent_distance; + uvec2 close_transparencies; // half4x16 + uvec2 farthest_transparency; // half4x16 + float closest_max_transparent_distance; + float farthest_transparent_distance; + float farthest_transparent_depth; }; struct RayPayloadShadow { int missed; }; +struct HitAttributeBeam { + uint fade_and_thickness; // half2x16 +}; + +#endif // PATH_TRACER_H_ + // vim: shiftwidth=4 noexpandtab tabstop=4 cindent diff --git a/src/refresh/vkpt/shader/path_tracer.rchit b/src/refresh/vkpt/shader/path_tracer.rchit index 755954b56..dc9fb72fc 100644 --- a/src/refresh/vkpt/shader/path_tracer.rchit +++ b/src/refresh/vkpt/shader/path_tracer.rchit @@ -30,6 +30,7 @@ with this program; if not, write to the Free Software Foundation, Inc., #define VERTEX_READONLY 1 #include "vertex_buffer.h" +#include "path_tracer_transparency.glsl" #include "path_tracer_hit_shaders.h" rt_rayPayloadIn RayPayload ray_payload; diff --git a/src/refresh/vkpt/shader/path_tracer_beam.rahit b/src/refresh/vkpt/shader/path_tracer_beam.rahit index 349c37d69..584e9fb2e 100644 --- a/src/refresh/vkpt/shader/path_tracer_beam.rahit +++ b/src/refresh/vkpt/shader/path_tracer_beam.rahit @@ -29,14 +29,17 @@ with this program; if not, write to the Free Software Foundation, Inc., #define VERTEX_READONLY 1 #include "vertex_buffer.h" +#include "path_tracer_transparency.glsl" #include "path_tracer_hit_shaders.h" rt_rayPayloadIn RayPayload ray_payload; -rt_hitAttribute vec2 hit_attribs; +rt_hitAttribute HitAttributeBeam beam_hit_attrib; void main() { - pt_logic_beam(ray_payload, gl_PrimitiveID, rt_HitT, hit_attribs.xy); + const vec2 beam_fade_and_thickness = unpackHalf2x16(beam_hit_attrib.fade_and_thickness); + + pt_logic_beam(ray_payload, gl_PrimitiveID, beam_fade_and_thickness, rt_HitT); rt_ignoreIntersection; } diff --git a/src/refresh/vkpt/shader/path_tracer_beam.rint b/src/refresh/vkpt/shader/path_tracer_beam.rint new file mode 100644 index 000000000..937a18d08 --- /dev/null +++ b/src/refresh/vkpt/shader/path_tracer_beam.rint @@ -0,0 +1,53 @@ +/* +Copyright (C) 2019, NVIDIA CORPORATION. All rights reserved. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +*/ + +#version 460 +#extension GL_GOOGLE_include_directive : enable + +#include "path_tracer.h" +#include "utils.glsl" + +#define GLOBAL_TEXTURES_DESC_SET_IDX 2 +#include "global_textures.h" + +#define VERTEX_BUFFER_DESC_SET_IDX 3 +#define VERTEX_READONLY 1 +#include "vertex_buffer.h" + +#include "path_tracer_transparency.glsl" +#include "path_tracer_hit_shaders.h" + +rt_hitAttribute HitAttributeBeam beam_hit_attrib; + +void main() +{ + const int beam_index = gl_PrimitiveID; + + float tShapeHit; + vec2 fade_and_thickness; + + if (!pt_logic_beam_intersection(gl_PrimitiveID, rt_WorldRayOrigin, rt_WorldRayDirection, + rt_RayTmin, rt_RayTmax, fade_and_thickness, tShapeHit)) + { + return; + } + + beam_hit_attrib.fade_and_thickness = packHalf2x16(fade_and_thickness); + + rt_reportIntersection(tShapeHit, 0); +} diff --git a/src/refresh/vkpt/shader/path_tracer_explosion.rahit b/src/refresh/vkpt/shader/path_tracer_explosion.rahit index b0f5683dc..c2612e622 100644 --- a/src/refresh/vkpt/shader/path_tracer_explosion.rahit +++ b/src/refresh/vkpt/shader/path_tracer_explosion.rahit @@ -29,6 +29,7 @@ with this program; if not, write to the Free Software Foundation, Inc., #define VERTEX_READONLY 1 #include "vertex_buffer.h" +#include "path_tracer_transparency.glsl" #include "path_tracer_hit_shaders.h" rt_rayPayloadIn RayPayload ray_payload; diff --git a/src/refresh/vkpt/shader/path_tracer_hit_shaders.h b/src/refresh/vkpt/shader/path_tracer_hit_shaders.h index 9c7502204..66b3ea1b4 100644 --- a/src/refresh/vkpt/shader/path_tracer_hit_shaders.h +++ b/src/refresh/vkpt/shader/path_tracer_hit_shaders.h @@ -1,5 +1,6 @@ /* -Copyright (C) 2020, NVIDIA CORPORATION. All rights reserved. +Copyright (C) 2020-2021, NVIDIA CORPORATION. All rights reserved. +Copyright (C) 2021 Frank Richter This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -25,6 +26,9 @@ uniform textureBuffer beam_color_buffer; layout(set = 0, binding = 3) uniform utextureBuffer sprite_texure_buffer; +layout(set = 0, binding = 4) +uniform utextureBuffer beam_info_buffer; + void pt_logic_rchit(inout RayPayload ray_payload, int primitiveID, uint instanceCustomIndex, float hitT, vec2 bary) { ray_payload.barycentric = bary.xy; @@ -56,25 +60,19 @@ void pt_logic_particle(inout RayPayload ray_payload, int primitiveID, float hitT color.rgb *= global_ubo.prev_adapted_luminance * 500; - if(ray_payload.max_transparent_distance < hitT) - { - ray_payload.transparency = packHalf4x16(alpha_blend_premultiplied(unpackHalf4x16(ray_payload.transparency), color)); - ray_payload.max_transparent_distance = hitT; - } - else - ray_payload.transparency = packHalf4x16(alpha_blend_premultiplied(color, unpackHalf4x16(ray_payload.transparency))); + update_payload_transparency(ray_payload, color, 0, hitT); } } -void pt_logic_beam(inout RayPayload ray_payload, int primitiveID, float hitT, vec2 bary) +void pt_logic_beam(inout RayPayload ray_payload, int primitiveID, vec2 beam_fade_and_thickness, float hitT) { - const float x = bary.x + bary.y; - const float factor = pow(clamp(1.0 - abs(0.5 - x) * 2.0, 0.0, 1.0), global_ubo.pt_beam_softness); + const float x = beam_fade_and_thickness.x; + const float factor = pow(clamp(x, 0.0, 1.0), global_ubo.pt_beam_softness); if (factor > 0.0) { - const int particle_index = primitiveID / 2; - vec4 color = texelFetch(beam_color_buffer, particle_index); + const int beam_index = primitiveID; + vec4 color = texelFetch(beam_color_buffer, beam_index); color.a *= factor; color.rgb *= color.a; @@ -86,14 +84,7 @@ void pt_logic_beam(inout RayPayload ray_payload, int primitiveID, float hitT, ve float noise = texelFetch(TEX_BLUE_NOISE, ivec3(texpos, texnum), 0).r; color.rgb *= noise * noise + 0.1; - if(ray_payload.max_transparent_distance < hitT) - { - ray_payload.transparency = packHalf4x16(alpha_blend_premultiplied(unpackHalf4x16(ray_payload.transparency), color)); - ray_payload.max_transparent_distance = hitT; - } - else - ray_payload.transparency = packHalf4x16(alpha_blend_premultiplied(color, unpackHalf4x16(ray_payload.transparency))); - + update_payload_transparency(ray_payload, color, beam_fade_and_thickness.y, hitT); } } @@ -125,12 +116,7 @@ void pt_logic_sprite(inout RayPayload ray_payload, int primitiveID, float hitT, color.rgb *= global_ubo.prev_adapted_luminance * 2000; } - if(ray_payload.max_transparent_distance < hitT) - ray_payload.transparency = packHalf4x16(alpha_blend_premultiplied(unpackHalf4x16(ray_payload.transparency), color)); - else - ray_payload.transparency = packHalf4x16(alpha_blend_premultiplied(color, unpackHalf4x16(ray_payload.transparency))); - - ray_payload.max_transparent_distance = hitT; + update_payload_transparency(ray_payload, color, 0, hitT); } void pt_logic_explosion(inout RayPayload ray_payload, int primitiveID, uint instanceCustomIndex, float hitT, vec3 worldRayDirection, vec2 bary) @@ -156,12 +142,136 @@ void pt_logic_explosion(inout RayPayload ray_payload, int primitiveID, uint inst emission.rgb *= global_ubo.prev_adapted_luminance * 500; - if(ray_payload.max_transparent_distance < hitT) + update_payload_transparency(ray_payload, emission, 0, hitT); +} + +// Adapted from: http://www.pbr-book.org/3ed-2018/Utilities/Mathematical_Routines.html#SolvingQuadraticEquations +bool solve_quadratic(in float a, in float b, in float c, out vec2 t) +{ + float discrim = b * b - 4 * a * c; + if (discrim < 0) return false; + float q; + if (b < 0) + q = -0.5 * (b - sqrt(discrim)); + else + q = -0.5 * (b + sqrt(discrim)); + float t0 = q / a; + float t1 = c / q; + t = vec2(min(t0, t1), max(t0, t1)); + return true; +} + +bool hit_cylinder(in vec3 o, in vec3 d, in float radius, out vec2 t) +{ + // Adapted from: http://www.pbr-book.org/3ed-2018/Shapes/Cylinders.html#IntersectionTests + float a = dot(d.xy, d.xy); + float b = 2 * dot(d.xy, o.xy); + float c = dot(o.xy, o.xy) - radius * radius; + + return solve_quadratic(a, b, c, t); +} + +bool hit_sphere(in vec3 o, in vec3 d, in float radius, out vec2 t) +{ + // Adapted from: http://www.pbr-book.org/3ed-2018/Shapes/Spheres.html#IntersectionTests + float a = dot(d, d); + float b = 2 * dot(d, o); + float c = dot(o, o) - radius * radius; + + return solve_quadratic(a, b, c, t); +} + +bool pt_logic_beam_intersection(int beam_index, + vec3 worldRayOrigin, vec3 worldRayDirection, + float rayTmin, float rayTmax, out vec2 beam_fade_and_thickness, out float tShapeHit) +{ + beam_fade_and_thickness = vec2(0); + tShapeHit = 0; + + const uvec4 beam_info[3] = { texelFetch(beam_info_buffer, beam_index * 3), + texelFetch(beam_info_buffer, beam_index * 3 + 1), + texelFetch(beam_info_buffer, beam_index * 3 + 2) }; + /* Transform from world space to "beam space" (really, object space), + where the beam starts at the origin and points towards +Z */ + const mat4 world_to_beam = mat4(unpackHalf4x16(beam_info[1].xy), + unpackHalf4x16(beam_info[1].zw), + unpackHalf4x16(beam_info[2].xy), + uintBitsToFloat(beam_info[0])); + const float beam_radius = uintBitsToFloat(beam_info[2].z); + const float beam_length = uintBitsToFloat(beam_info[2].w); + + // Ray origin, direction in "beam space" + const vec3 o = (world_to_beam * vec4(worldRayOrigin, 1)).xyz; + const vec3 d = (world_to_beam * vec4(worldRayDirection, 0)).xyz; + + vec2 t; + if(!hit_cylinder(o, d, beam_radius, t)) + return false; + + // The intersection Z values (ie "height on beam") + vec2 hit_z = vec2(o.z) + vec2(d.z) * t; + /* Check if we hit outside the cylinder bounds - + if so, see if we hit the "end spheres", + and update the hit location */ + bvec2 hit_below_0 = lessThan(hit_z, vec2(0)); + if(any(hit_below_0)) { - ray_payload.transparency = packHalf4x16(alpha_blend_premultiplied(unpackHalf4x16(ray_payload.transparency), emission)); - ray_payload.max_transparent_distance = hitT; + vec2 t_sphere; + if(!hit_sphere(o, d, beam_radius, t_sphere)) + return false; + + if(hit_below_0.x) t.x = max(t.x, t_sphere.x); + if(hit_below_0.y) t.y = min(t.y, t_sphere.y); } - else - ray_payload.transparency = packHalf4x16(alpha_blend_premultiplied(emission, unpackHalf4x16(ray_payload.transparency))); -} \ No newline at end of file + bvec2 hit_above_end = greaterThan(hit_z, vec2(beam_length)); + if(any(hit_above_end)) + { + vec2 t_sphere; + if(!hit_sphere(o - vec3(0, 0, beam_length), d, beam_radius, t_sphere)) + return false; + + if(hit_above_end.x) t.x = max(t.x, t_sphere.x); + if(hit_above_end.y) t.y = min(t.y, t_sphere.y); + } + + if((t.x >= rayTmax) || (t.y < rayTmin)) + return false; + + tShapeHit = t.x; + if (tShapeHit < rayTmin) + { + tShapeHit = t.y; + if (tShapeHit >= rayTmax) + return false; + } + + // Compute points on ray and beam center where they're closest to each other + const vec3 perp_norm = normalize(vec3(d.y, -d.x, 0)); + const vec3 n2 = vec3(-perp_norm.y, perp_norm.x, 0); + const float t1 = dot(-o, n2) / dot(d, n2); + const vec3 n1 = cross(d, perp_norm); + const float t2 = dot(o, n1) / n1.z; + + const vec3 c_ray = o + t1 * d; // Point on ray closest to beam center + const vec3 c_beam = vec3(0, 0, t2); // Point on beam closest to ray + + /* Compute "distance" to beam center used for beam intensity. + Using the closest distance between the ray and the beam line segment + looks best when the beam is seen from the side; + Using the closest distance between the ray and the beam infinitely + extended looks looks best when looking at the beam "head on" + (ray parallel to beam) - + so mix between those two, based on the ray/beam angle. + */ + const float dist_side = distance(c_ray, vec3(0, 0, clamp(t2, 0, beam_length))); + const float dist_head = distance(c_ray, c_beam); + const float dist = mix(dist_side, dist_head, abs(d.z)); + + float fade = 1.0 - dist / beam_radius; + float thickness = t.y - t.x; + fade *= clamp(thickness / (2 * beam_radius), 0, 1); + + beam_fade_and_thickness = vec2(fade, thickness); + return true; +} diff --git a/src/refresh/vkpt/shader/path_tracer_particle.rahit b/src/refresh/vkpt/shader/path_tracer_particle.rahit index 1097da0b6..ce721d8b4 100644 --- a/src/refresh/vkpt/shader/path_tracer_particle.rahit +++ b/src/refresh/vkpt/shader/path_tracer_particle.rahit @@ -29,6 +29,7 @@ with this program; if not, write to the Free Software Foundation, Inc., #define VERTEX_READONLY 1 #include "vertex_buffer.h" +#include "path_tracer_transparency.glsl" #include "path_tracer_hit_shaders.h" rt_rayPayloadIn RayPayload ray_payload; diff --git a/src/refresh/vkpt/shader/path_tracer_rgen.h b/src/refresh/vkpt/shader/path_tracer_rgen.h index edd9c5f2e..1f14d4905 100644 --- a/src/refresh/vkpt/shader/path_tracer_rgen.h +++ b/src/refresh/vkpt/shader/path_tracer_rgen.h @@ -19,6 +19,7 @@ with this program; if not, write to the Free Software Foundation, Inc., #include "path_tracer.h" #include "utils.glsl" +#include "path_tracer_transparency.glsl" #define RAY_GEN_DESCRIPTOR_SET_IDX 0 layout(set = RAY_GEN_DESCRIPTOR_SET_IDX, binding = 0) @@ -260,17 +261,22 @@ correct_emissive(uint material_id, vec3 emissive) } void -trace_ray(Ray ray, bool cull_back_faces, int instance_mask) +trace_ray(Ray ray, bool cull_back_faces, int instance_mask, bool skip_procedural) { uint rayFlags = 0; - if(cull_back_faces) + if (cull_back_faces) rayFlags |= gl_RayFlagsCullBackFacingTrianglesEXT; + if (skip_procedural) + rayFlags |= 0x200; // RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES - the corresponding constant is not defined in GLSL ray_payload_brdf.barycentric = vec2(0); ray_payload_brdf.instance_prim = 0; - ray_payload_brdf.transparency = uvec2(0); ray_payload_brdf.hit_distance = 0; - ray_payload_brdf.max_transparent_distance = 0; + ray_payload_brdf.close_transparencies = uvec2(0); + ray_payload_brdf.farthest_transparency = uvec2(0); + ray_payload_brdf.closest_max_transparent_distance = 0; + ray_payload_brdf.farthest_transparent_distance = 0; + ray_payload_brdf.farthest_transparent_depth = 0; #ifdef KHR_RAY_QUERY @@ -286,24 +292,44 @@ trace_ray(Ray ray, bool cull_back_faces, int instance_mask) uint instanceCustomIndex = rayQueryGetIntersectionInstanceCustomIndexEXT(rayQuery, false); float hitT = rayQueryGetIntersectionTEXT(rayQuery, false); vec2 bary = rayQueryGetIntersectionBarycentricsEXT(rayQuery, false); + bool isProcedural = rayQueryGetIntersectionTypeEXT(rayQuery, false) == gl_RayQueryCandidateIntersectionAABBEXT; - switch(sbtOffset) + if (isProcedural) { - case 1: // particles - pt_logic_particle(ray_payload_brdf, primitiveID, hitT, bary); - break; - - case 2: // beams - pt_logic_beam(ray_payload_brdf, primitiveID, hitT, bary); - break; + if (!skip_procedural) // this should be a compile-time constant + { + // We only have one type of procedural primitives: beams. + + // Run the intersection shader first... + float tShapeHit; + vec2 beam_fade_and_thickness; + bool intersectsWithBeam = pt_logic_beam_intersection(primitiveID, + ray.origin, ray.direction, ray.t_min, ray.t_max, + beam_fade_and_thickness, tShapeHit); + + // Then the any-hit shader. + if (intersectsWithBeam) + { + pt_logic_beam(ray_payload_brdf, primitiveID, beam_fade_and_thickness, tShapeHit); + } + } + } + else + { + switch(sbtOffset) + { + case SBTO_PARTICLE: // particles + pt_logic_particle(ray_payload_brdf, primitiveID, hitT, bary); + break; - case 3: // explosions - pt_logic_explosion(ray_payload_brdf, primitiveID, instanceCustomIndex, hitT, ray.direction, bary); - break; + case SBTO_EXPLOSION: // explosions + pt_logic_explosion(ray_payload_brdf, primitiveID, instanceCustomIndex, hitT, ray.direction, bary); + break; - case 4: // sprites - pt_logic_sprite(ray_payload_brdf, primitiveID, hitT, bary); - break; + case SBTO_SPRITE: // sprites + pt_logic_sprite(ray_payload_brdf, primitiveID, hitT, bary); + break; + } } } @@ -382,9 +408,13 @@ trace_caustic_ray(Ray ray, int surface_medium) { ray_payload_brdf.barycentric = vec2(0); ray_payload_brdf.instance_prim = 0; - ray_payload_brdf.transparency = uvec2(0); ray_payload_brdf.hit_distance = -1; - ray_payload_brdf.max_transparent_distance = 0; + ray_payload_brdf.close_transparencies = uvec2(0); + ray_payload_brdf.farthest_transparency = uvec2(0); + ray_payload_brdf.closest_max_transparent_distance = 0; + ray_payload_brdf.farthest_transparent_distance = 0; + ray_payload_brdf.farthest_transparent_depth = 0; + uint rayFlags = gl_RayFlagsCullBackFacingTrianglesEXT | gl_RayFlagsOpaqueEXT; uint instance_mask = AS_FLAG_TRANSPARENT; @@ -431,7 +461,7 @@ trace_caustic_ray(Ray ray, int surface_medium) if((is_water(triangle.material_id) || is_slime(triangle.material_id)) && !is_vertical) { vec3 position = ray.origin + ray.direction * ray_payload_brdf.hit_distance; - vec3 w = get_water_normal(triangle.material_id, geo_normal, triangle.tangent, position, true); + vec3 w = get_water_normal(triangle.material_id, geo_normal, triangle.tangents[0], position, true); float caustic = clamp((1 - pow(clamp(1 - length(w.xz), 0, 1), 2)) * 100, 0, 8); caustic = mix(1, caustic, clamp(ray_payload_brdf.hit_distance * 0.02, 0, 1)); @@ -822,7 +852,7 @@ bool get_is_gradient(ivec2 ipos) void -get_material(Triangle triangle, vec2 tex_coord, vec2 tex_coord_x, vec2 tex_coord_y, float mip_level, vec3 geo_normal, +get_material(Triangle triangle, vec3 bary, vec2 tex_coord, vec2 tex_coord_x, vec2 tex_coord_y, float mip_level, vec3 geo_normal, out vec3 albedo, out vec3 normal, out float metallic, out float specular, out float roughness, out vec3 emissive) { if((triangle.material_id & MATERIAL_FLAG_FLOWING) != 0) @@ -855,7 +885,7 @@ get_material(Triangle triangle, vec2 tex_coord, vec2 tex_coord_x, vec2 tex_coord specular = 0; roughness = 1; - if (minfo.normals_texture != 0)// && dot(triangle.tangent, triangle.tangent) > 0) + if (minfo.normals_texture != 0) { vec4 image2; if (mip_level >= 0) @@ -866,10 +896,10 @@ get_material(Triangle triangle, vec2 tex_coord, vec2 tex_coord_x, vec2 tex_coord float normalMapLen; vec3 local_normal = rgbToNormal(image2.rgb, normalMapLen); - if(dot(triangle.tangent, triangle.tangent) > 0) + if(dot(triangle.tangents[0], triangle.tangents[0]) > 0) { - vec3 tangent = triangle.tangent, - bitangent = cross(geo_normal, tangent); + vec3 tangent = normalize(triangle.tangents * bary); + vec3 bitangent = cross(geo_normal, tangent); if((triangle.material_id & MATERIAL_FLAG_HANDEDNESS) != 0) bitangent = -bitangent; @@ -933,4 +963,4 @@ bool get_camera_uv(vec2 tex_coord, out vec2 cameraUV) //cameraUV = (floor(cameraUV * resolution) + vec2(0.5)) / resolution; return all(greaterThan(cameraUV, vec2(0))) && all(lessThan(cameraUV, vec2(1))); -} \ No newline at end of file +} diff --git a/src/refresh/vkpt/shader/path_tracer_sprite.rahit b/src/refresh/vkpt/shader/path_tracer_sprite.rahit index a1ed1e822..a0accb1c8 100644 --- a/src/refresh/vkpt/shader/path_tracer_sprite.rahit +++ b/src/refresh/vkpt/shader/path_tracer_sprite.rahit @@ -29,6 +29,7 @@ with this program; if not, write to the Free Software Foundation, Inc., #define VERTEX_READONLY 1 #include "vertex_buffer.h" +#include "path_tracer_transparency.glsl" #include "path_tracer_hit_shaders.h" rt_rayPayloadIn RayPayload ray_payload; diff --git a/src/refresh/vkpt/shader/path_tracer_transparency.glsl b/src/refresh/vkpt/shader/path_tracer_transparency.glsl new file mode 100644 index 000000000..f555f79bf --- /dev/null +++ b/src/refresh/vkpt/shader/path_tracer_transparency.glsl @@ -0,0 +1,58 @@ +/* +Copyright (C) 2018 Christoph Schied +Copyright (C) 2019, NVIDIA CORPORATION. All rights reserved. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +*/ + +#ifndef PATH_TRACER_TRANSPARENCY_GLSL_ +#define PATH_TRACER_TRANSPARENCY_GLSL_ + +void update_payload_transparency(inout RayPayload rp, vec4 color, float depth, float hitT) +{ + if(hitT > rp.farthest_transparent_distance) + { + rp.close_transparencies = packHalf4x16(alpha_blend_premultiplied(unpackHalf4x16(rp.close_transparencies), unpackHalf4x16(rp.farthest_transparency))); + rp.closest_max_transparent_distance = rp.farthest_transparent_distance; + rp.farthest_transparency = packHalf4x16(color); + rp.farthest_transparent_distance = hitT; + rp.farthest_transparent_depth = depth; + } + else if(rp.closest_max_transparent_distance < hitT) + { + rp.close_transparencies = packHalf4x16(alpha_blend_premultiplied(unpackHalf4x16(rp.close_transparencies), color)); + rp.closest_max_transparent_distance = hitT; + } + else + rp.close_transparencies = packHalf4x16(alpha_blend_premultiplied(color, unpackHalf4x16(rp.close_transparencies))); +} + +vec4 get_payload_transparency(in RayPayload rp, float solidDist) +{ + float scale_far = 1; + if (rp.farthest_transparent_depth > 0) + { + scale_far = clamp((solidDist - rp.farthest_transparent_distance) / rp.farthest_transparent_depth, 0, 1); + } + + return alpha_blend_premultiplied(unpackHalf4x16(rp.close_transparencies), unpackHalf4x16(rp.farthest_transparency) * scale_far); +} + +vec4 get_payload_transparency_simple(in RayPayload rp) +{ + return alpha_blend_premultiplied(unpackHalf4x16(rp.close_transparencies), unpackHalf4x16(rp.farthest_transparency)); +} + +#endif // PATH_TRACER_TRANSPARENCY_GLSL_ diff --git a/src/refresh/vkpt/shader/primary_rays.rgen b/src/refresh/vkpt/shader/primary_rays.rgen index 72fd41707..62ccd817c 100644 --- a/src/refresh/vkpt/shader/primary_rays.rgen +++ b/src/refresh/vkpt/shader/primary_rays.rgen @@ -155,7 +155,7 @@ main() if(global_ubo.pt_show_sky != 0) cull_mask |= AS_FLAG_CUSTOM_SKY; - trace_ray(ray, true, cull_mask); + trace_ray(ray, true, cull_mask, false); } direction = ray.direction; @@ -167,7 +167,8 @@ main() vec3 env = env_map(ray.direction, false); env *= global_ubo.pt_env_scale; - vec4 transparent = alpha_blend(unpackHalf4x16(ray_payload_brdf.transparency), vec4(env, 1)); + vec4 ray_transparency = get_payload_transparency_simple(ray_payload_brdf); + vec4 transparent = alpha_blend(ray_transparency, vec4(env, 1)); if(is_readback_pixel) { @@ -284,6 +285,7 @@ main() // Get the primary surface material parameters get_material( triangle, + bary, tex_coord, tex_coord_x, tex_coord_y, @@ -380,7 +382,7 @@ main() if(is_water(material_id) || is_slime(material_id)) { - normal = get_water_normal(material_id, geo_normal, triangle.tangent, position, false); + normal = get_water_normal(material_id, geo_normal, triangle.tangents[0], position, false); if(abs(geo_normal.z) < 0.1) // hack to detect actual water vs. vertical force fields material_id = (material_id & ~MATERIAL_KIND_MASK) | MATERIAL_KIND_GLASS; @@ -452,9 +454,9 @@ main() float hit_distance = ray_payload_brdf.hit_distance; - if(ray_payload_brdf.max_transparent_distance <= hit_distance) + if(ray_payload_brdf.farthest_transparent_distance <= hit_distance) { - vec4 transparent = unpackHalf4x16(ray_payload_brdf.transparency); + vec4 transparent = get_payload_transparency(ray_payload_brdf, hit_distance); // Blend in the emissive component from the primary surface, with zero alpha transparent = alpha_blend_premultiplied(transparent, vec4(primary_emissive * throughput, 0)); @@ -470,10 +472,10 @@ main() { int cull_mask = AS_FLAG_PARTICLES | AS_FLAG_EXPLOSIONS; - trace_ray(ray, true, cull_mask); + trace_ray(ray, true, cull_mask, false); } - vec4 transparent = unpackHalf4x16(ray_payload_brdf.transparency); + vec4 transparent = get_payload_transparency(ray_payload_brdf, hit_distance); // Blend in the emissive component from the primary surface, with zero alpha transparent = alpha_blend_premultiplied(transparent, vec4(primary_emissive * throughput, 0)); diff --git a/src/refresh/vkpt/shader/reflect_refract.rgen b/src/refresh/vkpt/shader/reflect_refract.rgen index fa36be4f5..0ff166871 100644 --- a/src/refresh/vkpt/shader/reflect_refract.rgen +++ b/src/refresh/vkpt/shader/reflect_refract.rgen @@ -31,6 +31,7 @@ with this program; if not, write to the Free Software Foundation, Inc., #define ENABLE_SHADOW_CAUSTICS #define ENABLE_SUN_SHAPE #include "path_tracer_rgen.h" +#include "path_tracer_transparency.glsl" #include "projection.glsl" layout(constant_id = 0) const uint spec_bounce_index = 0; @@ -480,10 +481,11 @@ main() else reflection_ray.origin -= normal.xyz * 0.001; - trace_ray(reflection_ray, backface_culling, reflection_cull_mask); + trace_ray(reflection_ray, backface_culling, reflection_cull_mask, false); // Add the transparency encountered along the reflection ray - transparent = alpha_blend(transparent, unpackHalf4x16(ray_payload_brdf.transparency) * vec4(throughput, 1)); + vec4 payload_transparency = get_payload_transparency_simple(ray_payload_brdf); + transparent = alpha_blend(transparent, payload_transparency * vec4(throughput, 1)); if(is_sky(ray_payload_brdf)) { @@ -627,6 +629,7 @@ main() get_material( triangle, + bary, tex_coord, vec2(0), vec2(0), @@ -652,7 +655,7 @@ main() if(is_water(material_id) || is_slime(material_id)) { - normal = get_water_normal(material_id, geo_normal, triangle.tangent, position, false); + normal = get_water_normal(material_id, geo_normal, triangle.tangents[0], position, false); if(abs(geo_normal.z) < 0.1) // hack to detect actual water vs. vertical force fields material_id = (material_id & ~MATERIAL_KIND_MASK) | MATERIAL_KIND_GLASS; diff --git a/src/refresh/vkpt/shader/vertex_buffer.h b/src/refresh/vkpt/shader/vertex_buffer.h index 8f9625812..270e57138 100644 --- a/src/refresh/vkpt/shader/vertex_buffer.h +++ b/src/refresh/vkpt/shader/vertex_buffer.h @@ -29,6 +29,8 @@ with this program; if not, write to the Free Software Foundation, Inc., #define MAX_LIGHT_LISTS (1 << 14) #define MAX_LIGHT_LIST_NODES (1 << 19) +#define MAX_IQM_MATRICES 32768 + #define MAX_LIGHT_POLYS 4096 #define LIGHT_POLY_VEC4S 4 @@ -42,11 +44,12 @@ with this program; if not, write to the Free Software Foundation, Inc., #define BSP_VERTEX_BUFFER_BINDING_IDX 0 #define MODEL_DYNAMIC_VERTEX_BUFFER_BINDING_IDX 1 #define LIGHT_BUFFER_BINDING_IDX 2 -#define READBACK_BUFFER_BINDING_IDX 3 -#define TONE_MAPPING_BUFFER_BINDING_IDX 4 -#define SUN_COLOR_BUFFER_BINDING_IDX 5 -#define SUN_COLOR_UBO_BINDING_IDX 6 -#define LIGHT_STATS_BUFFER_BINDING_IDX 7 +#define IQM_MATRIX_BUFFER_BINDING_IDX 3 +#define READBACK_BUFFER_BINDING_IDX 4 +#define TONE_MAPPING_BUFFER_BINDING_IDX 5 +#define SUN_COLOR_BUFFER_BINDING_IDX 6 +#define SUN_COLOR_UBO_BINDING_IDX 7 +#define LIGHT_STATS_BUFFER_BINDING_IDX 8 #define SUN_COLOR_ACCUMULATOR_FIXED_POINT_SCALE 0x100000 #define SKY_COLOR_ACCUMULATOR_FIXED_POINT_SCALE 0x100 @@ -68,7 +71,7 @@ with this program; if not, write to the Free Software Foundation, Inc., VERTEX_BUFFER_LIST_DO(float, 3, positions_instanced, (MAX_VERT_MODEL )) \ VERTEX_BUFFER_LIST_DO(float, 3, pos_prev_instanced, (MAX_VERT_MODEL )) \ VERTEX_BUFFER_LIST_DO(uint32_t, 1, normals_instanced, (MAX_VERT_MODEL )) \ - VERTEX_BUFFER_LIST_DO(uint32_t, 1, tangents_instanced, (MAX_PRIM_MODEL )) \ + VERTEX_BUFFER_LIST_DO(uint32_t, 1, tangents_instanced, (MAX_VERT_MODEL )) \ VERTEX_BUFFER_LIST_DO(float, 2, tex_coords_instanced, (MAX_VERT_MODEL )) \ VERTEX_BUFFER_LIST_DO(float, 1, alpha_instanced, (MAX_PRIM_MODEL )) \ VERTEX_BUFFER_LIST_DO(uint32_t, 1, clusters_instanced, (MAX_PRIM_MODEL )) \ @@ -84,6 +87,9 @@ with this program; if not, write to the Free Software Foundation, Inc., VERTEX_BUFFER_LIST_DO(float, 1, light_styles, (MAX_LIGHT_STYLES )) \ VERTEX_BUFFER_LIST_DO(uint32_t, 1, cluster_debug_mask, (MAX_LIGHT_LISTS / 32)) \ +#define IQM_MATRIX_BUFFER_LIST \ + VERTEX_BUFFER_LIST_DO(float, 4, iqm_matrices, (MAX_IQM_MATRICES)) \ + #define VERTEX_BUFFER_LIST_DO(type, dim, name, size) \ type name[ALIGN_SIZE_4(size, dim)]; @@ -102,6 +108,11 @@ struct LightBuffer LIGHT_BUFFER_LIST }; +struct IqmMatrixBuffer +{ + IQM_MATRIX_BUFFER_LIST +}; + #undef VERTEX_BUFFER_LIST_DO @@ -151,6 +162,7 @@ struct SunColorBuffer typedef struct BspVertexBuffer BspVertexBuffer; typedef struct ModelDynamicVertexBuffer ModelDynamicVertexBuffer; typedef struct LightBuffer LightBuffer; +typedef struct IqmMatrixBuffer IqmMatrixBuffer; typedef struct ReadbackBuffer ReadbackBuffer; typedef struct ToneMappingBuffer ToneMappingBuffer; typedef struct SunColorBuffer SunColorBuffer; @@ -160,11 +172,29 @@ typedef struct { vec3_t normal; vec2_t texcoord; } model_vertex_t; + +typedef struct +{ + vec3_t position; + vec3_t normal; + vec2_t texcoord; + vec3_t tangent; + uint32_t blend_indices; + vec4_t blend_weights; +} iqm_vertex_t; #else #define MODEL_VERTEX_SIZE 8 #define MODEL_VERTEX_POSITION 0 #define MODEL_VERTEX_NORMAL 3 #define MODEL_VERTEX_TEXCOORD 6 + +#define IQM_VERTEX_SIZE 16 +#define IQM_VERTEX_POSITION 0 +#define IQM_VERTEX_NORMAL 3 +#define IQM_VERTEX_TEXCOORD 6 +#define IQM_VERTEX_TANGENT 8 +#define IQM_VERTEX_INDICES 11 +#define IQM_VERTEX_WEIGHTS 12 #endif #ifdef VKPT_SHADER @@ -215,6 +245,10 @@ layout(set = VERTEX_BUFFER_DESC_SET_IDX, binding = LIGHT_BUFFER_BINDING_IDX) rea LightBuffer lbo; }; +layout(set = VERTEX_BUFFER_DESC_SET_IDX, binding = IQM_MATRIX_BUFFER_BINDING_IDX) readonly buffer IQM_MATRIX_BUFFER { + IqmMatrixBuffer iqmbo; +}; + layout(set = VERTEX_BUFFER_DESC_SET_IDX, binding = READBACK_BUFFER_BINDING_IDX) buffer READBACK_BUFFER { ReadbackBuffer readback; }; @@ -368,13 +402,18 @@ MODEL_DYNAMIC_VERTEX_BUFFER_LIST LIGHT_BUFFER_LIST #undef VERTEX_BUFFER_LIST_DO +#define VERTEX_BUFFER_LIST_DO(type, dim, name, size) \ + GET_##type##_##dim(iqmbo,name) +IQM_MATRIX_BUFFER_LIST +#undef VERTEX_BUFFER_LIST_DO + struct Triangle { mat3x3 positions; mat3x3 positions_prev; mat3x3 normals; mat3x2 tex_coords; - vec3 tangent; + mat3x3 tangents; uint material_id; uint cluster; float alpha; @@ -403,7 +442,9 @@ get_bsp_triangle(uint prim_id) t.tex_coords[1] = get_tex_coords_bsp(prim_id * 3 + 1); t.tex_coords[2] = get_tex_coords_bsp(prim_id * 3 + 2); - t.tangent = decode_normal(get_tangents_bsp(prim_id)); + t.tangents[0] = decode_normal(get_tangents_bsp(prim_id)); + t.tangents[1] = t.tangents[0]; + t.tangents[2] = t.tangents[0]; t.material_id = get_materials_bsp(prim_id); @@ -432,7 +473,9 @@ get_instanced_triangle(uint prim_id) t.normals[1] = decode_normal(get_normals_instanced(prim_id * 3 + 1)); t.normals[2] = decode_normal(get_normals_instanced(prim_id * 3 + 2)); - t.tangent = decode_normal(get_tangents_instanced(prim_id)); + t.tangents[0] = decode_normal(get_tangents_instanced(prim_id * 3 + 0)); + t.tangents[1] = decode_normal(get_tangents_instanced(prim_id * 3 + 1)); + t.tangents[2] = decode_normal(get_tangents_instanced(prim_id * 3 + 2)); t.tex_coords[0] = get_tex_coords_instanced(prim_id * 3 + 0); t.tex_coords[1] = get_tex_coords_instanced(prim_id * 3 + 1); @@ -465,7 +508,9 @@ store_instanced_triangle(Triangle t, uint instance_id, uint prim_id) set_normals_instanced(prim_id * 3 + 1, encode_normal(t.normals[1])); set_normals_instanced(prim_id * 3 + 2, encode_normal(t.normals[2])); - set_tangents_instanced(prim_id, encode_normal(t.tangent)); + set_tangents_instanced(prim_id * 3 + 0, encode_normal(t.tangents[0])); + set_tangents_instanced(prim_id * 3 + 1, encode_normal(t.tangents[1])); + set_tangents_instanced(prim_id * 3 + 2, encode_normal(t.tangents[2])); set_tex_coords_instanced(prim_id * 3 + 0, t.tex_coords[0]); set_tex_coords_instanced(prim_id * 3 + 1, t.tex_coords[1]); @@ -529,5 +574,15 @@ get_light_polygon(uint index) return light; } +mat3x4 +get_iqm_matrix(uint index) +{ + mat3x4 result; + result[0] = get_iqm_matrices(index * 3 + 0); + result[1] = get_iqm_matrices(index * 3 + 1); + result[2] = get_iqm_matrices(index * 3 + 2); + return result; +} + #endif #endif diff --git a/src/refresh/vkpt/textures.c b/src/refresh/vkpt/textures.c index 8269e28d4..95119b1b5 100644 --- a/src/refresh/vkpt/textures.c +++ b/src/refresh/vkpt/textures.c @@ -76,7 +76,7 @@ static const float megabyte = 1048576.0f; void vkpt_textures_prefetch() { - byte* buffer = NULL; + char * buffer = NULL; ssize_t buffer_size = 0; char const * filename = "prefetch.txt"; buffer_size = FS_LoadFile(filename, (void**)&buffer); @@ -312,10 +312,10 @@ load_blue_noise() byte* filedata = 0; uint16_t *data = 0; - ssize_t filelen = FS_LoadFile(buf, &filedata); + ssize_t filelen = FS_LoadFile(buf, (void**)&filedata); if (filedata) { - data = stbi_load_16_from_memory(filedata, filelen, &w, &h, &n, 4); + data = stbi_load_16_from_memory(filedata, (int)filelen, &w, &h, &n, 4); Z_Free(filedata); } diff --git a/src/refresh/vkpt/transparency.c b/src/refresh/vkpt/transparency.c index 82f20327c..c95e33417 100644 --- a/src/refresh/vkpt/transparency.c +++ b/src/refresh/vkpt/transparency.c @@ -20,25 +20,29 @@ with this program; if not, write to the Free Software Foundation, Inc., #include "shared/shared.h" #include "vkpt.h" #include "vk_util.h" +#include "conversion.h" #define TR_PARTICLE_MAX_NUM MAX_PARTICLES #define TR_BEAM_MAX_NUM MAX_ENTITIES #define TR_SPRITE_MAX_NUM MAX_ENTITIES -#define TR_VERTEX_MAX_NUM (TR_PARTICLE_MAX_NUM + TR_BEAM_MAX_NUM + TR_SPRITE_MAX_NUM) * 4 -#define TR_INDEX_MAX_NUM (TR_PARTICLE_MAX_NUM + TR_BEAM_MAX_NUM + TR_SPRITE_MAX_NUM) * 6 -#define TR_POSITION_SIZE 3 * sizeof(float) -#define TR_COLOR_SIZE 4 * sizeof(float) -#define TR_SPRITE_INFO_SIZE 2 * sizeof(float) +#define TR_VERTEX_MAX_NUM ((TR_PARTICLE_MAX_NUM + TR_SPRITE_MAX_NUM) * 4) +#define TR_INDEX_MAX_NUM ((TR_PARTICLE_MAX_NUM + TR_SPRITE_MAX_NUM) * 6) +#define TR_BEAM_AABB_SIZE sizeof(VkAabbPositionsKHR) +#define TR_POSITION_SIZE (3 * sizeof(float)) +#define TR_COLOR_SIZE (4 * sizeof(float)) +#define TR_BEAM_INTERSECT_SIZE (12 * sizeof(float)) +#define TR_SPRITE_INFO_SIZE (2 * sizeof(float)) struct { size_t vertex_position_host_offset; + size_t beam_aabb_host_offset; size_t particle_color_host_offset; size_t beam_color_host_offset; size_t sprite_info_host_offset; size_t current_upload_size; - size_t beam_vertex_device_offset; + size_t beam_intersect_host_offset; size_t sprite_vertex_device_offset; @@ -53,15 +57,18 @@ struct char* host_buffer_shadow; BufferResource_t vertex_buffer; BufferResource_t index_buffer; + BufferResource_t beam_aabb_buffer; BufferResource_t particle_color_buffer; BufferResource_t beam_color_buffer; BufferResource_t sprite_info_buffer; + BufferResource_t beam_intersect_buffer; VkBufferView particle_color_buffer_view; VkBufferView beam_color_buffer_view; VkBufferView sprite_info_buffer_view; + VkBufferView beam_intersect_buffer_view; VkBuffer host_buffer; VkDeviceMemory host_buffer_memory; - VkBufferMemoryBarrier transfer_barriers[4]; + VkBufferMemoryBarrier transfer_barriers[6]; } transparency; // initialization @@ -72,7 +79,7 @@ static void fill_index_buffer(); // update static void write_particle_geometry(const float* view_matrix, const particle_t* particles, int particle_num); -static void write_beam_geometry(const float* view_matrix, const entity_t* entities, int entity_num); +static void write_beam_geometry(const entity_t* entities, int entity_num); static void write_sprite_geometry(const float* view_matrix, const entity_t* entities, int entity_num); static void upload_geometry(VkCommandBuffer command_buffer); @@ -107,9 +114,10 @@ qboolean initialize_transparency() const size_t particle_color_size = TR_PARTICLE_MAX_NUM * TR_COLOR_SIZE; const size_t particle_data_size = particle_vertex_position_max_size + particle_color_size; - const size_t beam_vertex_position_max_size = TR_BEAM_MAX_NUM * TR_POSITION_SIZE; + const size_t beam_aabb_max_size = TR_BEAM_MAX_NUM * TR_BEAM_AABB_SIZE; const size_t beam_color_size = TR_BEAM_MAX_NUM * TR_COLOR_SIZE; - const size_t beam_data_size = beam_vertex_position_max_size + beam_color_size; + const size_t beam_intersect_size = TR_BEAM_MAX_NUM * TR_BEAM_INTERSECT_SIZE; + const size_t beam_data_size = beam_aabb_max_size + beam_color_size + beam_intersect_size; const size_t sprite_vertex_position_max_size = TR_SPRITE_MAX_NUM * TR_POSITION_SIZE; const size_t sprite_info_size = TR_SPRITE_MAX_NUM * TR_SPRITE_INFO_SIZE; @@ -135,11 +143,14 @@ void destroy_transparency() vkDestroyBufferView(qvk.device, transparency.particle_color_buffer_view, NULL); vkDestroyBufferView(qvk.device, transparency.beam_color_buffer_view, NULL); vkDestroyBufferView(qvk.device, transparency.sprite_info_buffer_view, NULL); + vkDestroyBufferView(qvk.device, transparency.beam_intersect_buffer_view, NULL); buffer_destroy(&transparency.vertex_buffer); buffer_destroy(&transparency.index_buffer); + buffer_destroy(&transparency.beam_aabb_buffer); buffer_destroy(&transparency.particle_color_buffer); buffer_destroy(&transparency.beam_color_buffer); buffer_destroy(&transparency.sprite_info_buffer); + buffer_destroy(&transparency.beam_intersect_buffer); vkDestroyBuffer(qvk.device, transparency.host_buffer, NULL); vkFreeMemory(qvk.device, transparency.host_buffer_memory, NULL); @@ -162,7 +173,11 @@ void update_transparency(VkCommandBuffer command_buffer, const float* view_matri for (int i = 0; i < entity_num; i++) { if (entities[i].flags & RF_BEAM) - ++beam_num; + { + // write_beam_geometry skips zero-width beams as well + if(entities[i].frame > 0) + ++beam_num; + } else if ((entities[i].model & 0x80000000) == 0) { const model_t* model = MOD_ForHandle(entities[i].model); @@ -177,20 +192,21 @@ void update_transparency(VkCommandBuffer command_buffer, const float* view_matri transparency.particle_num = particle_num; transparency.sprite_num = sprite_num; - const size_t particle_vertices_size = particle_num * 4 * TR_POSITION_SIZE; - const size_t beam_vertices_size = beam_num * 4 * TR_POSITION_SIZE; - const size_t sprite_vertices_size = sprite_num * 4 * TR_POSITION_SIZE; + const size_t particle_vertices_size = particle_num * (4 * TR_POSITION_SIZE); + const size_t sprite_vertices_size = sprite_num * (4 * TR_POSITION_SIZE); transparency.vertex_position_host_offset = 0; - transparency.particle_color_host_offset = particle_vertices_size + beam_vertices_size + sprite_vertices_size; - transparency.beam_color_host_offset = transparency.particle_color_host_offset + particle_num * TR_COLOR_SIZE; - transparency.sprite_info_host_offset = transparency.beam_color_host_offset + beam_num * TR_COLOR_SIZE; - transparency.current_upload_size = transparency.sprite_info_host_offset + sprite_num * TR_SPRITE_INFO_SIZE; + transparency.particle_color_host_offset = transparency.vertex_position_host_offset + particle_vertices_size + sprite_vertices_size; + transparency.sprite_info_host_offset = transparency.particle_color_host_offset + particle_num * TR_COLOR_SIZE; + transparency.beam_aabb_host_offset = transparency.sprite_info_host_offset + sprite_num * TR_SPRITE_INFO_SIZE; + transparency.beam_color_host_offset = transparency.beam_aabb_host_offset + beam_num * TR_BEAM_AABB_SIZE; + transparency.beam_intersect_host_offset = transparency.beam_color_host_offset + beam_num * TR_COLOR_SIZE; + transparency.current_upload_size = transparency.beam_intersect_host_offset + beam_num * TR_BEAM_INTERSECT_SIZE; if (particle_num > 0 || beam_num > 0 || sprite_num > 0) { write_particle_geometry(view_matrix, particles, particle_num); - write_beam_geometry(view_matrix, entities, entity_num); + write_beam_geometry(entities, entity_num); write_sprite_geometry(view_matrix, entities, entity_num); upload_geometry(command_buffer); } @@ -217,12 +233,6 @@ void vkpt_get_transparency_buffers( *num_indices = transparency.particle_num * 6; return; - case VKPT_TRANSPARENCY_BEAMS: - *vertex_offset = transparency.beam_vertex_device_offset; - *num_vertices = transparency.beam_num * 4; - *num_indices = transparency.beam_num * 6; - return; - case VKPT_TRANSPARENCY_SPRITES: *vertex_offset = transparency.sprite_vertex_device_offset; *num_vertices = transparency.sprite_num * 4; @@ -237,6 +247,16 @@ void vkpt_get_transparency_buffers( } } +void vkpt_get_beam_aabb_buffer( + BufferResource_t** aabb_buffer, + uint64_t* aabb_offset, + uint32_t* num_aabbs) +{ + *aabb_buffer = &transparency.beam_aabb_buffer; + *aabb_offset = 0; + *num_aabbs = transparency.beam_num; +} + VkBufferView get_transparency_particle_color_buffer_view() { return transparency.particle_color_buffer_view; @@ -252,6 +272,11 @@ VkBufferView get_transparency_sprite_info_buffer_view() return transparency.sprite_info_buffer_view; } +VkBufferView get_transparency_beam_intersect_buffer_view() +{ + return transparency.beam_intersect_buffer_view; +} + void get_transparency_counts(int* particle_num, int* beam_num, int* sprite_num) { *particle_num = transparency.particle_num; @@ -321,24 +346,18 @@ static void write_particle_geometry(const float* view_matrix, const particle_t* } } -static void write_beam_geometry(const float* view_matrix, const entity_t* entities, int entity_num) +static void write_beam_geometry(const entity_t* entities, int entity_num) { - const float beam_width = cvar_pt_beam_width->value; const float hdr_factor = cvar_pt_particle_emissive->value; - const vec3_t view_y = { view_matrix[1], view_matrix[5], view_matrix[9] }; - if (transparency.beam_num == 0) return; - // TODO: remove vkpt_refdef.fd, it's better to calculate it from the view matrix - const vec3_t view_origin = { vkpt_refdef.fd->vieworg[0], vkpt_refdef.fd->vieworg[1], vkpt_refdef.fd->vieworg[2] }; - - const size_t particle_vertex_data_size = transparency.particle_num * 4 * TR_POSITION_SIZE; - const size_t beam_vertex_offset = transparency.vertex_position_host_offset + particle_vertex_data_size; + const size_t beam_aabb_offset = transparency.beam_aabb_host_offset; // TODO: use better alignment? - vec3_t* vertex_positions = (vec3_t*)(transparency.host_buffer_shadow + beam_vertex_offset); + VkAabbPositionsKHR* aabb_positions = (VkAabbPositionsKHR*)(transparency.host_buffer_shadow + beam_aabb_offset); + uint32_t* beam_infos = (uint32_t*)(transparency.host_buffer_shadow + transparency.beam_intersect_host_offset); float* beam_colors = (float*)(transparency.host_buffer_shadow + transparency.beam_color_host_offset); for (int i = 0; i < entity_num; i++) @@ -348,37 +367,82 @@ static void write_beam_geometry(const float* view_matrix, const entity_t* entiti const entity_t* beam = entities + i; + // Adjust beam width. Default "narrow" beams have a width of 4, "fat" beams have 16. + if (beam->frame == 0) + continue; + const float beam_radius = cvar_pt_beam_width->value * beam->frame * 0.5; + cast_u32_to_f32_color(beam->skinnum, &beam->rgba, beam_colors, hdr_factor); beam_colors[3] = beam->alpha; - beam_colors = beam_colors + 4; + beam_colors += 4; vec3_t begin; vec3_t end; VectorCopy(beam->oldorigin, begin); VectorCopy(beam->origin, end); + // Compute AABB for beam beam + vec3_t beam_aabb_min; + vec3_t beam_aabb_max; + for (int i = 0; i < 3; i++) { + if(begin[i] < end[i]) { + beam_aabb_min[i] = begin[i]; + beam_aabb_max[i] = end[i]; + } else { + beam_aabb_min[i] = end[i]; + beam_aabb_max[i] = begin[i]; + } + beam_aabb_min[i] -= beam_radius; // bit of an overestimation for 'tilted' beams + beam_aabb_max[i] += beam_radius; + } + aabb_positions->minX = beam_aabb_min[0]; + aabb_positions->minY = beam_aabb_min[1]; + aabb_positions->minZ = beam_aabb_min[2]; + aabb_positions->maxX = beam_aabb_max[0]; + aabb_positions->maxY = beam_aabb_max[1]; + aabb_positions->maxZ = beam_aabb_max[2]; + ++aabb_positions; + vec3_t to_end; VectorSubtract(end, begin, to_end); vec3_t norm_dir; VectorCopy(to_end, norm_dir); VectorNormalize(norm_dir); - VectorMA(begin, -5.f, norm_dir, begin); - VectorMA(end, 5.f, norm_dir, end); - - vec3_t to_view; - VectorSubtract(view_origin, begin, to_view); - vec3_t x_axis; - CrossProduct(to_end, to_view, x_axis); - VectorNormalize(x_axis); - VectorScale(x_axis, beam_width, x_axis); - - VectorSubtract(end, x_axis, vertex_positions[0]); - VectorAdd(end, x_axis, vertex_positions[1]); - VectorAdd(begin, x_axis, vertex_positions[2]); - VectorSubtract(begin, x_axis, vertex_positions[3]); - vertex_positions += 4; + // Compute a transform from a beam at origin, pointing towards +Z, to actual beam origin/direction + vec3_t beam_coordsys_x, beam_coordsys_y; + MakeNormalVectors(norm_dir, beam_coordsys_x, beam_coordsys_y); + mat4_t world_to_beam; + + world_to_beam[0] = beam_coordsys_x[0]; + world_to_beam[1] = beam_coordsys_y[0]; + world_to_beam[2] = norm_dir[0]; + world_to_beam[3] = 0; + + world_to_beam[4] = beam_coordsys_x[1]; + world_to_beam[5] = beam_coordsys_y[1]; + world_to_beam[6] = norm_dir[1]; + world_to_beam[7] = 0; + + world_to_beam[8] = beam_coordsys_x[2]; + world_to_beam[9] = beam_coordsys_y[2]; + world_to_beam[10] = norm_dir[2]; + world_to_beam[11] = 0; + + world_to_beam[12] = -DotProduct(begin, beam_coordsys_x); + world_to_beam[13] = -DotProduct(begin, beam_coordsys_y); + world_to_beam[14] = -DotProduct(begin, norm_dir); + world_to_beam[15] = 1; + + Vector4Copy((world_to_beam + 12), (float *)(beam_infos)); + // First three columns are normals, so it's fine to pack them to half floats + packHalf4x16(beam_infos + 4, world_to_beam); + packHalf4x16(beam_infos + 6, world_to_beam + 4); + packHalf4x16(beam_infos + 8, world_to_beam + 8); + *(float *)(beam_infos + 10) = beam_radius; + *(float *)(beam_infos + 11) = VectorLength(to_end); + beam_infos += TR_BEAM_INTERSECT_SIZE / sizeof(uint32_t); } } @@ -490,7 +554,6 @@ qboolean vkpt_build_cylinder_light(light_poly_t* light_list, int* num_lights, in void vkpt_build_beam_lights(light_poly_t* light_list, int* num_lights, int max_lights, bsp_t *bsp, entity_t* entities, int num_entites, float adapted_luminance) { - const float beam_width = cvar_pt_beam_width->value; const float hdr_factor = cvar_pt_beam_lights->value * adapted_luminance * 20.f; if (hdr_factor <= 0.f) @@ -521,6 +584,11 @@ void vkpt_build_beam_lights(light_poly_t* light_list, int* num_lights, int max_l const entity_t* beam = beams[i]; + // Adjust beam width. Default "narrow" beams have a width of 4, "fat" beams have 16. + if (beam->frame == 0) + continue; + const float beam_radius = cvar_pt_beam_width->value * beam->frame * 0.5; + vec3_t begin; vec3_t end; VectorCopy(beam->oldorigin, begin); @@ -538,7 +606,7 @@ void vkpt_build_beam_lights(light_poly_t* light_list, int* num_lights, int max_l vec3_t color; cast_u32_to_f32_color(beam->skinnum, &beam->rgba, color, hdr_factor); - vkpt_build_cylinder_light(light_list, num_lights, max_lights, bsp, begin, end, color, beam_width); + vkpt_build_cylinder_light(light_list, num_lights, max_lights, bsp, begin, end, color, beam_radius); } } @@ -555,12 +623,11 @@ static void write_sprite_geometry(const float* view_matrix, const entity_t* enti const vec3_t view_origin = { vkpt_refdef.fd->vieworg[0], vkpt_refdef.fd->vieworg[1], vkpt_refdef.fd->vieworg[2] }; const size_t particle_vertex_data_size = transparency.particle_num * 4 * TR_POSITION_SIZE; - const size_t beam_vertex_data_size = transparency.beam_num * 4 * TR_POSITION_SIZE; - const size_t sprite_vertex_offset = transparency.vertex_position_host_offset + particle_vertex_data_size + beam_vertex_data_size; + const size_t sprite_vertex_offset = transparency.vertex_position_host_offset + particle_vertex_data_size; // TODO: use better alignment? vec3_t* vertex_positions = (vec3_t*)(transparency.host_buffer_shadow + sprite_vertex_offset); - uint32_t* sprite_info = (int*)(transparency.host_buffer_shadow + transparency.sprite_info_host_offset); + uint32_t* sprite_info = (uint32_t*)(transparency.host_buffer_shadow + transparency.sprite_info_host_offset); int sprite_count = 0; for (int i = 0; i < entity_num; i++) @@ -635,8 +702,7 @@ static void upload_geometry(VkCommandBuffer command_buffer) { const size_t frame_offset = transparency.host_frame_index * transparency.host_frame_size; - transparency.beam_vertex_device_offset = transparency.particle_num * 4 * TR_POSITION_SIZE; - transparency.sprite_vertex_device_offset = transparency.beam_vertex_device_offset + transparency.beam_num * 4 * TR_POSITION_SIZE; + transparency.sprite_vertex_device_offset = transparency.particle_num * 4 * TR_POSITION_SIZE; const size_t host_buffer_offset = transparency.host_frame_index * transparency.host_frame_size; @@ -647,7 +713,13 @@ static void upload_geometry(VkCommandBuffer command_buffer) const VkBufferCopy vertices = { .srcOffset = host_buffer_offset + transparency.vertex_position_host_offset, .dstOffset = 0, - .size = (transparency.particle_num + transparency.beam_num + transparency.sprite_num) * 4 * TR_POSITION_SIZE + .size = (transparency.particle_num + transparency.sprite_num) * 4 * TR_POSITION_SIZE + }; + + const VkBufferCopy beam_aabbs = { + .srcOffset = host_buffer_offset + transparency.beam_aabb_host_offset, + .dstOffset = 0, + .size = transparency.beam_num * TR_BEAM_AABB_SIZE }; const VkBufferCopy particle_colors = { @@ -668,10 +740,20 @@ static void upload_geometry(VkCommandBuffer command_buffer) .size = transparency.sprite_num * TR_SPRITE_INFO_SIZE }; + const VkBufferCopy beam_intersect = { + .srcOffset = host_buffer_offset + transparency.beam_intersect_host_offset, + .dstOffset = 0, + .size = transparency.beam_num * TR_BEAM_INTERSECT_SIZE + }; + if (vertices.size) vkCmdCopyBuffer(command_buffer, transparency.host_buffer, transparency.vertex_buffer.buffer, 1, &vertices); + if (beam_aabbs.size) + vkCmdCopyBuffer(command_buffer, transparency.host_buffer, transparency.beam_aabb_buffer.buffer, + 1, &beam_aabbs); + if (particle_colors.size) vkCmdCopyBuffer(command_buffer, transparency.host_buffer, transparency.particle_color_buffer.buffer, 1, &particle_colors); @@ -684,6 +766,10 @@ static void upload_geometry(VkCommandBuffer command_buffer) vkCmdCopyBuffer(command_buffer, transparency.host_buffer, transparency.sprite_info_buffer.buffer, 1, &sprite_infos); + if (beam_intersect.size) + vkCmdCopyBuffer(command_buffer, transparency.host_buffer, transparency.beam_intersect_buffer.buffer, + 1, &beam_intersect); + for (size_t i = 0; i < LENGTH(transparency.transfer_barriers); i++) { transparency.transfer_barriers[i].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; @@ -701,6 +787,10 @@ static void upload_geometry(VkCommandBuffer command_buffer) transparency.transfer_barriers[2].size = beam_colors.size; transparency.transfer_barriers[3].buffer = transparency.sprite_info_buffer.buffer; transparency.transfer_barriers[3].size = sprite_infos.size; + transparency.transfer_barriers[4].buffer = transparency.beam_aabb_buffer.buffer; + transparency.transfer_barriers[4].size = beam_aabbs.size; + transparency.transfer_barriers[5].buffer = transparency.beam_intersect_buffer.buffer; + transparency.transfer_barriers[5].size = beam_intersect.size; } @@ -716,7 +806,13 @@ static void create_buffers() buffer_create( &transparency.vertex_buffer, - transparency.host_frame_size, + TR_VERTEX_MAX_NUM * sizeof(vec3_t), + VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + + buffer_create( + &transparency.beam_aabb_buffer, + TR_BEAM_MAX_NUM * sizeof(VkAabbPositionsKHR), VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); @@ -743,6 +839,12 @@ static void create_buffers() TR_SPRITE_MAX_NUM * TR_SPRITE_INFO_SIZE, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + + buffer_create( + &transparency.beam_intersect_buffer, + TR_BEAM_MAX_NUM * TR_BEAM_INTERSECT_SIZE, + VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); } static qboolean allocate_and_bind_memory_to_buffers() @@ -774,7 +876,7 @@ static qboolean allocate_and_bind_memory_to_buffers() const size_t host_buffer_size = transparency.host_buffered_frame_num * transparency.host_frame_size; _VK(vkMapMemory(qvk.device, transparency.host_buffer_memory, 0, host_buffer_size, 0, - &transparency.mapped_host_buffer)); + (void**)&transparency.mapped_host_buffer)); transparency.host_buffer_shadow = Z_Mallocz(transparency.host_frame_size); @@ -804,6 +906,13 @@ static void create_buffer_views() .range = TR_SPRITE_MAX_NUM * TR_SPRITE_INFO_SIZE }; + const VkBufferViewCreateInfo beam_intersect_view_info = { + .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, + .buffer = transparency.beam_intersect_buffer.buffer, + .format = VK_FORMAT_R32G32B32A32_UINT, + .range = TR_BEAM_MAX_NUM * TR_BEAM_INTERSECT_SIZE + }; + _VK(vkCreateBufferView(qvk.device, &particle_color_view_info, NULL, &transparency.particle_color_buffer_view)); @@ -812,6 +921,9 @@ static void create_buffer_views() _VK(vkCreateBufferView(qvk.device, &sprite_info_view_info, NULL, &transparency.sprite_info_buffer_view)); + + _VK(vkCreateBufferView(qvk.device, &beam_intersect_view_info, NULL, + &transparency.beam_intersect_buffer_view)); } static void fill_index_buffer() diff --git a/src/refresh/vkpt/vertex_buffer.c b/src/refresh/vkpt/vertex_buffer.c index b6386f849..ad1fb2c45 100644 --- a/src/refresh/vkpt/vertex_buffer.c +++ b/src/refresh/vkpt/vertex_buffer.c @@ -24,6 +24,7 @@ with this program; if not, write to the Free Software Foundation, Inc., #include #include +#include "conversion.h" #include "precomputed_sky.h" @@ -95,6 +96,21 @@ vkpt_light_buffer_upload_staging(VkCommandBuffer cmd_buf) return VK_SUCCESS; } +VkResult +vkpt_iqm_matrix_buffer_upload_staging(VkCommandBuffer cmd_buf) +{ + BufferResource_t* staging = qvk.buf_iqm_matrices_staging + qvk.current_frame_index; + + assert(!staging->is_mapped); + + VkBufferCopy copyRegion = { + .size = sizeof(IqmMatrixBuffer), + }; + vkCmdCopyBuffer(cmd_buf, staging->buffer, qvk.buf_iqm_matrices.buffer, 1, ©Region); + + return VK_SUCCESS; +} + VkResult vkpt_vertex_buffer_upload_bsp_mesh_to_staging(bsp_mesh_t *bsp_mesh) { @@ -162,7 +178,7 @@ inject_model_lights(bsp_mesh_t* bsp_mesh, bsp_t* bsp, int num_model_lights, ligh { if (local_light_counts[c]) { - const char* mask = BSP_GetPvs(bsp, c); + const byte* mask = BSP_GetPvs(bsp, c); for (int j = 0; j < bsp->visrowsize; j++) { if (mask[j]) { @@ -204,7 +220,7 @@ inject_model_lights(bsp_mesh_t* bsp_mesh, bsp_t* bsp, int num_model_lights, ligh for (int nlight = 0; nlight < num_model_lights; nlight++) { - const char* mask = BSP_GetPvs(bsp, transformed_model_lights[nlight].cluster); + const byte* mask = BSP_GetPvs(bsp, transformed_model_lights[nlight].cluster); for (int j = 0; j < bsp->visrowsize; j++) { if (mask[j]) { @@ -259,59 +275,6 @@ copy_light(const light_poly_t* light, float* vblight, const float* sky_radiance) vblight[15] = 0.f; } -/* - Float -> Half converter function, adapted from - https://stackoverflow.com/questions/1659440/32-bit-to-16-bit-floating-point-conversion -*/ - -typedef union -{ - float f; - int32_t si; - uint32_t ui; -} Bits; - -static uint16_t floatToHalf(float value) -{ - static int const shift = 13; - static int const shiftSign = 16; - - static int32_t const infN = 0x7F800000; // flt32 infinity - static int32_t const maxN = 0x477FE000; // max flt16 normal as a flt32 - static int32_t const minN = 0x38800000; // min flt16 normal as a flt32 - static int32_t const signN = 0x80000000; // flt32 sign bit - - static int32_t const infC = 0x3FC00; - static int32_t const nanN = 0x7F802000; // minimum flt16 nan as a flt32 - static int32_t const maxC = 0x23BFF; - static int32_t const minC = 0x1C400; - static int32_t const signC = 0x8000; // flt16 sign bit - - static int32_t const mulN = 0x52000000; // (1 << 23) / minN - static int32_t const mulC = 0x33800000; // minN / (1 << (23 - shift)) - - static int32_t const subC = 0x003FF; // max flt32 subnormal down shifted - static int32_t const norC = 0x00400; // min flt32 normal down shifted - - static int32_t const maxD = 0x1C000; - static int32_t const minD = 0x1C000; - - Bits v, s; - v.f = value; - uint32_t sign = v.si & signN; - v.si ^= sign; - sign >>= shiftSign; // logical shift - s.si = mulN; - s.si = s.f * v.f; // correct subnormals - v.si ^= (s.si ^ v.si) & -(minN > v.si); - v.si ^= (infN ^ v.si) & -((infN > v.si) & (v.si > maxN)); - v.si ^= (nanN ^ v.si) & -((nanN > v.si) & (v.si > infN)); - v.ui >>= shift; // logical shift - v.si ^= ((v.si - maxD) ^ v.si) & -(v.si > maxC); - v.si ^= ((v.si - minD) ^ v.si) & -(v.si > subC); - return v.ui | sign; -} - extern vkpt_refdef_t vkpt_refdef; extern char cluster_debug_mask[VIS_MAX_BYTES]; @@ -447,8 +410,10 @@ vkpt_vertex_buffer_upload_models() for(int i = 0; i < MAX_MODELS; i++) { + const model_t* model = &r_models[i]; model_vbo_t* vbo = model_vertex_data + i; - if (!r_models[i].meshes && vbo->buffer.buffer) { + + if (!model->meshes && vbo->buffer.buffer) { // model unloaded, destroy the VBO write_model_vbo_descriptor(i, null_buffer.buffer, null_buffer.size); buffer_destroy(&vbo->buffer); @@ -457,32 +422,37 @@ vkpt_vertex_buffer_upload_models() continue; } - if(!r_models[i].meshes) { + if(!model->meshes) { // model does not exist continue; } - if (r_models[i].registration_sequence <= vbo->registration_sequence) { + if (model->registration_sequence <= vbo->registration_sequence) { // VBO is valid, nothing to do continue; } - //Com_Printf("Loading model[%d] %s\n", i, r_models[i].name); + //Com_Printf("Loading model[%d] %s\n", i, model->name); - assert(r_models[i].numframes > 0); + assert(model->numframes > 0); int model_vertices = 0; int model_indices = 0; - for (int nmesh = 0; nmesh < r_models[i].nummeshes; nmesh++) + for (int nmesh = 0; nmesh < model->nummeshes; nmesh++) { - maliasmesh_t *m = r_models[i].meshes + nmesh; - int num_verts = r_models[i].numframes * m->numverts; + maliasmesh_t *m = model->meshes + nmesh; + int num_verts = model->numframes * m->numverts; model_vertices += num_verts; model_indices += m->numindices; } - size_t vbo_size = model_vertices * sizeof(model_vertex_t) + model_indices * sizeof(uint32_t); + size_t vbo_size = model_indices * sizeof(uint32_t); + if (model->iqmData) + vbo_size += model_vertices * sizeof(iqm_vertex_t); + else + vbo_size += model_vertices * sizeof(model_vertex_t); + buffer_create(&vbo->buffer, vbo_size, VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); @@ -494,25 +464,56 @@ vkpt_vertex_buffer_upload_models() uint32_t* staging_data = (uint32_t*)buffer_map(&vbo->staging_buffer); int write_ptr = 0; - for (int nmesh = 0; nmesh < r_models[i].nummeshes; nmesh++) + for (int nmesh = 0; nmesh < model->nummeshes; nmesh++) { - maliasmesh_t *m = r_models[i].meshes + nmesh; + maliasmesh_t *m = model->meshes + nmesh; assert(m->numverts > 0); m->vertex_offset = write_ptr; - int num_verts = r_models[i].numframes * m->numverts; + int num_verts = model->numframes * m->numverts; - for (int nvert = 0; nvert < num_verts; nvert++) + if (model->iqmData) { - model_vertex_t* vtx = (model_vertex_t*)(staging_data + write_ptr) + nvert; - memcpy(vtx->position, m->positions + nvert, sizeof(vec3_t)); - memcpy(vtx->normal, m->normals + nvert, sizeof(vec3_t)); - memcpy(vtx->texcoord, m->tex_coords + nvert, sizeof(vec2_t)); + for (int nvert = 0; nvert < num_verts; nvert++) + { + iqm_vertex_t* vtx = (iqm_vertex_t*)(staging_data + write_ptr) + nvert; + memcpy(vtx->position, m->positions + nvert, sizeof(vec3_t)); + memcpy(vtx->normal, m->normals + nvert, sizeof(vec3_t)); + memcpy(vtx->texcoord, m->tex_coords + nvert, sizeof(vec2_t)); + + if (m->tangents) + memcpy(vtx->tangent, m->tangents + nvert, sizeof(vec3_t)); + else + VectorSet(vtx->tangent, 0.f, 0.f, 0.f); + + if (m->blend_indices && m->blend_weights) + { + vtx->blend_indices = m->blend_indices[nvert]; + memcpy(vtx->blend_weights, m->blend_weights + nvert, sizeof(vec4_t)); + } + else + { + vtx->blend_indices = 0; + Vector4Set(vtx->blend_weights, 0.f, 0.f, 0.f, 0.f); + } + } + + write_ptr += num_verts * (int)(sizeof(iqm_vertex_t) / sizeof(uint32_t)); } + else + { + for (int nvert = 0; nvert < num_verts; nvert++) + { + model_vertex_t* vtx = (model_vertex_t*)(staging_data + write_ptr) + nvert; + memcpy(vtx->position, m->positions + nvert, sizeof(vec3_t)); + memcpy(vtx->normal, m->normals + nvert, sizeof(vec3_t)); + memcpy(vtx->texcoord, m->tex_coords + nvert, sizeof(vec2_t)); + } - write_ptr += num_verts * (sizeof(model_vertex_t) / sizeof(uint32_t)); + write_ptr += num_verts * (int)(sizeof(model_vertex_t) / sizeof(uint32_t)); + } m->idx_offset = write_ptr; @@ -557,7 +558,7 @@ vkpt_vertex_buffer_upload_models() buffer_unmap(&vbo->staging_buffer); - vbo->registration_sequence = r_models[i].registration_sequence; + vbo->registration_sequence = model->registration_sequence; any_models_to_upload = qtrue; } @@ -622,6 +623,12 @@ vkpt_vertex_buffer_create() .binding = LIGHT_BUFFER_BINDING_IDX, .stageFlags = VK_SHADER_STAGE_ALL, }, + { + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .binding = IQM_MATRIX_BUFFER_BINDING_IDX, + .stageFlags = VK_SHADER_STAGE_ALL, + }, { .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .descriptorCount = 1, @@ -689,6 +696,20 @@ vkpt_vertex_buffer_create() VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + buffer_create(&qvk.buf_iqm_matrices, sizeof(IqmMatrixBuffer), + VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + + for (int frame = 0; frame < MAX_FRAMES_IN_FLIGHT; frame++) + { + buffer_create(qvk.buf_iqm_matrices_staging + frame, sizeof(IqmMatrixBuffer), + VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + } + + qvk.iqm_matrices_shadow = Z_Mallocz(sizeof(IqmMatrixBuffer)); + qvk.iqm_matrices_prev = Z_Mallocz(sizeof(IqmMatrixBuffer)); + buffer_create(&qvk.buf_tonemap, sizeof(ToneMappingBuffer), VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); @@ -755,6 +776,11 @@ vkpt_vertex_buffer_create() buf_info.range = sizeof(LightBuffer); vkUpdateDescriptorSets(qvk.device, 1, &output_buf_write, 0, NULL); + output_buf_write.dstBinding = IQM_MATRIX_BUFFER_BINDING_IDX; + buf_info.buffer = qvk.buf_iqm_matrices.buffer; + buf_info.range = sizeof(IqmMatrixBuffer); + vkUpdateDescriptorSets(qvk.device, 1, &output_buf_write, 0, NULL); + output_buf_write.dstBinding = READBACK_BUFFER_BINDING_IDX; buf_info.buffer = qvk.buf_readback.buffer; buf_info.range = sizeof(ReadbackBuffer); @@ -852,16 +878,23 @@ vkpt_vertex_buffer_destroy() buffer_destroy(&qvk.buf_vertex_model_dynamic); buffer_destroy(&qvk.buf_light); + buffer_destroy(&qvk.buf_iqm_matrices); buffer_destroy(&qvk.buf_readback); for (int frame = 0; frame < MAX_FRAMES_IN_FLIGHT; frame++) { buffer_destroy(qvk.buf_light_staging + frame); + buffer_destroy(qvk.buf_iqm_matrices_staging + frame); buffer_destroy(qvk.buf_readback_staging + frame); } buffer_destroy(&qvk.buf_tonemap); buffer_destroy(&qvk.buf_sun_color); + Z_Free(qvk.iqm_matrices_shadow); + Z_Free(qvk.iqm_matrices_prev); + qvk.iqm_matrices_shadow = NULL; + qvk.iqm_matrices_prev = NULL; + return VK_SUCCESS; } diff --git a/src/refresh/vkpt/vkpt.h b/src/refresh/vkpt/vkpt.h index e7aa5958b..1458d4c1e 100644 --- a/src/refresh/vkpt/vkpt.h +++ b/src/refresh/vkpt/vkpt.h @@ -106,6 +106,7 @@ with this program; if not, write to the Free Software Foundation, Inc., SHADER_MODULE_DO(QVK_MOD_PATH_TRACER_RCHIT) \ SHADER_MODULE_DO(QVK_MOD_PATH_TRACER_PARTICLE_RAHIT) \ SHADER_MODULE_DO(QVK_MOD_PATH_TRACER_BEAM_RAHIT) \ + SHADER_MODULE_DO(QVK_MOD_PATH_TRACER_BEAM_RINT) \ SHADER_MODULE_DO(QVK_MOD_PATH_TRACER_RMISS) \ SHADER_MODULE_DO(QVK_MOD_PATH_TRACER_SHADOW_RMISS) \ SHADER_MODULE_DO(QVK_MOD_PATH_TRACER_EXPLOSION_RAHIT) \ @@ -139,7 +140,6 @@ enum QVK_SHADER_MODULES { }; #define MAX_FRAMES_IN_FLIGHT 2 -#define MAX_SWAPCHAIN_IMAGES 4 typedef struct cmd_buf_group_s { uint32_t count_per_frame; @@ -172,10 +172,8 @@ typedef struct QVK_s { VkDevice device; VkQueue queue_graphics; - VkQueue queue_compute; VkQueue queue_transfer; int32_t queue_idx_graphics; - int32_t queue_idx_compute; int32_t queue_idx_transfer; VkSurfaceKHR surface; VkSwapchainKHR swap_chain; @@ -190,15 +188,14 @@ typedef struct QVK_s { uint32_t gpu_slice_width; uint32_t gpu_slice_width_prev; uint32_t num_swap_chain_images; - VkImage swap_chain_images[MAX_SWAPCHAIN_IMAGES]; - VkImageView swap_chain_image_views[MAX_SWAPCHAIN_IMAGES]; + VkImage* swap_chain_images; + VkImageView* swap_chain_image_views; qboolean use_khr_ray_tracing; qboolean use_ray_query; qboolean enable_validation; cmd_buf_group_t cmd_buffers_graphics; - cmd_buf_group_t cmd_buffers_compute; cmd_buf_group_t cmd_buffers_transfer; semaphore_group_t semaphores[MAX_FRAMES_IN_FLIGHT][VKPT_MAX_GPUS]; @@ -260,6 +257,11 @@ typedef struct QVK_s { BufferResource_t buf_light; BufferResource_t buf_light_staging[MAX_FRAMES_IN_FLIGHT]; BufferResource_t buf_light_stats[NUM_LIGHT_STATS_BUFFERS]; + + BufferResource_t buf_iqm_matrices; + BufferResource_t buf_iqm_matrices_staging[MAX_FRAMES_IN_FLIGHT]; + float* iqm_matrices_shadow; + float* iqm_matrices_prev; BufferResource_t buf_readback; BufferResource_t buf_readback_staging[MAX_FRAMES_IN_FLIGHT]; @@ -402,7 +404,7 @@ typedef struct bsp_mesh_s { struct { vec3_t pos; vec3_t dir; } cameras[MAX_CAMERAS]; int num_cameras; - char sky_visibility[VIS_MAX_BYTES]; + byte sky_visibility[VIS_MAX_BYTES]; aabb_t* cluster_aabbs; } bsp_mesh_t; @@ -441,13 +443,13 @@ typedef struct sun_light_s { qboolean visible; } sun_light_t; -void mult_matrix_matrix(float *p, const float *a, const float *b); -void mult_matrix_vector(float *p, const float *a, const float *b); -void create_entity_matrix(float matrix[16], entity_t *e, qboolean enable_left_hand); -void create_projection_matrix(float matrix[16], float znear, float zfar, float fov_x, float fov_y); -void create_view_matrix(float matrix[16], refdef_t *fd); -void inverse(const float *m, float *inv); -void create_orthographic_matrix(float matrix[16], float xmin, float xmax, +void mult_matrix_matrix(mat4_t p, const mat4_t a, const mat4_t b); +void mult_matrix_vector(mat4_t p, const mat4_t a, const vec4_t b); +void create_entity_matrix(mat4_t matrix, entity_t *e, qboolean enable_left_hand); +void create_projection_matrix(mat4_t matrix, float znear, float zfar, float fov_x, float fov_y); +void create_view_matrix(mat4_t matrix, refdef_t *fd); +void inverse(const mat4_t m, mat4_t inv); +void create_orthographic_matrix(mat4_t matrix, float xmin, float xmax, float ymin, float ymax, float znear, float zfar); #define PROFILER_LIST \ @@ -613,6 +615,8 @@ VkResult vkpt_light_buffer_upload_staging(VkCommandBuffer cmd_buf); VkResult vkpt_light_stats_create(bsp_mesh_t *bsp_mesh); VkResult vkpt_light_stats_destroy(); +VkResult vkpt_iqm_matrix_buffer_upload_staging(VkCommandBuffer cmd_buf); + VkResult vkpt_load_shader_modules(); VkResult vkpt_destroy_shader_modules(); VkResult vkpt_create_images(); @@ -677,7 +681,6 @@ void update_transparency(VkCommandBuffer command_buffer, const float* view_matri typedef enum { VKPT_TRANSPARENCY_PARTICLES, - VKPT_TRANSPARENCY_BEAMS, VKPT_TRANSPARENCY_SPRITES, VKPT_TRANSPARENCY_COUNT @@ -691,10 +694,15 @@ void vkpt_get_transparency_buffers( uint64_t* index_offset, uint32_t* num_vertices, uint32_t* num_indices); +void vkpt_get_beam_aabb_buffer( + BufferResource_t** aabb_buffer, + uint64_t* aabb_offset, + uint32_t* num_aabbs); VkBufferView get_transparency_particle_color_buffer_view(); VkBufferView get_transparency_beam_color_buffer_view(); VkBufferView get_transparency_sprite_info_buffer_view(); +VkBufferView get_transparency_beam_intersect_buffer_view(); void get_transparency_counts(int* particle_num, int* beam_num, int* sprite_num); void vkpt_build_beam_lights(light_poly_t* light_list, int* num_lights, int max_lights, bsp_t *bsp, entity_t* entities, int num_entites, float adapted_luminance); qboolean vkpt_build_cylinder_light(light_poly_t* light_list, int* num_lights, int max_lights, bsp_t *bsp, vec3_t begin, vec3_t end, vec3_t color, float radius); @@ -738,6 +746,9 @@ typedef struct maliasmesh_s { vec3_t *positions; vec3_t *normals; vec2_t *tex_coords; + vec3_t *tangents; // iqm only + uint32_t *blend_indices; // iqm only + vec4_t *blend_weights; // iqm only struct pbr_material_s *materials[MAX_ALIAS_SKINS]; int numskins; } maliasmesh_t; @@ -798,8 +809,9 @@ void IMG_Load_RTX(image_t *image, byte *pic); void IMG_Unload_RTX(image_t *image); byte *IMG_ReadPixels_RTX(int *width, int *height, int *rowbytes); -qerror_t MOD_LoadMD2_RTX(model_t *model, const void *rawdata, size_t length); -qerror_t MOD_LoadMD3_RTX(model_t *model, const void *rawdata, size_t length); +qerror_t MOD_LoadMD2_RTX(model_t *model, const void *rawdata, size_t length, const char* mod_name); +qerror_t MOD_LoadMD3_RTX(model_t* model, const void* rawdata, size_t length, const char* mod_name); +qerror_t MOD_LoadIQM_RTX(model_t *model, const void *rawdata, size_t length, const char* mod_name); void MOD_Reference_RTX(model_t *model); #endif /*__VKPT_H__*/ diff --git a/src/unix/sdl2/video.c b/src/unix/sdl2/video.c index d27a313d2..58ba15ee8 100644 --- a/src/unix/sdl2/video.c +++ b/src/unix/sdl2/video.c @@ -257,15 +257,6 @@ void VID_SetMode(void) VID_SDL_ModeChanged(); } -void VID_VideoWait(void) -{ -} - -qboolean VID_VideoSync(void) -{ - return qtrue; -} - void VID_BeginFrame(void) { } diff --git a/src/unix/system.c b/src/unix/system.c index a6771979c..bc7fb1a03 100644 --- a/src/unix/system.c +++ b/src/unix/system.c @@ -231,7 +231,7 @@ void Sys_Init(void) getcwd(baseDirectory, PATH_MAX); } - if (!baseDirectory) { + if (!baseDirectory[0]) { Sys_Error("Game basedir not found!\n"); } // basedir diff --git a/src/windows/glimp.c b/src/windows/glimp.c index 332682b7f..f0ee39a83 100644 --- a/src/windows/glimp.c +++ b/src/windows/glimp.c @@ -493,15 +493,6 @@ qboolean VID_Init(void) return qtrue; } -void VID_VideoWait(void) -{ -} - -qboolean VID_VideoSync(void) -{ - return qtrue; -} - void VID_BeginFrame(void) { }