diff --git a/Core/Config.cpp b/Core/Config.cpp index 0a4ada08f450..2a696abaf438 100644 --- a/Core/Config.cpp +++ b/Core/Config.cpp @@ -798,6 +798,7 @@ static ConfigSetting graphicsSettings[] = { ReportedConfigSetting("SplineBezierQuality", &g_Config.iSplineBezierQuality, 2, true, true), ReportedConfigSetting("HardwareTessellation", &g_Config.bHardwareTessellation, false, true, true), ReportedConfigSetting("PostShader", &g_Config.sPostShaderName, "Off", true, true), + ConfigSetting("TextureShader", &g_Config.sTextureShaderName, "Off", true, true), ReportedConfigSetting("MemBlockTransferGPU", &g_Config.bBlockTransferGPU, true, true, true), ReportedConfigSetting("DisableSlowFramebufEffects", &g_Config.bDisableSlowFramebufEffects, false, true, true), diff --git a/Core/Config.h b/Core/Config.h index 497e7eae20c9..d288f6230596 100644 --- a/Core/Config.h +++ b/Core/Config.h @@ -209,6 +209,7 @@ struct Config { int iSplineBezierQuality; // 0 = low , 1 = Intermediate , 2 = High bool bHardwareTessellation; std::string sPostShaderName; // Off for off. + std::string sTextureShaderName; std::map mPostShaderSetting; bool bGfxDebugOutput; bool bGfxDebugSplitSubmit; diff --git a/GPU/Common/PostShader.cpp b/GPU/Common/PostShader.cpp index c8aacbf843e5..5c24d48ee38c 100644 --- a/GPU/Common/PostShader.cpp +++ b/GPU/Common/PostShader.cpp @@ -31,11 +31,12 @@ #include "GPU/Common/PostShader.h" static std::vector shaderInfo; +// Okay, not really "post" shaders, but related. +static std::vector textureShaderInfo; // Scans the directories for shader ini files and collects info about all the shaders found. -// Additionally, scan the VFS assets. (TODO) -void LoadPostShaderInfo(std::vector directories) { +void LoadPostShaderInfo(const std::vector &directories) { std::vector notVisible; shaderInfo.clear(); @@ -52,6 +53,12 @@ void LoadPostShaderInfo(std::vector directories) { } shaderInfo.push_back(off); + textureShaderInfo.clear(); + TextureShaderInfo textureOff{}; + textureOff.name = "Off"; + textureOff.section = "Off"; + textureShaderInfo.push_back(textureOff); + auto appendShader = [&](const ShaderInfo &info) { auto beginErase = std::remove(shaderInfo.begin(), shaderInfo.end(), info.name); if (beginErase != shaderInfo.end()) { @@ -60,12 +67,19 @@ void LoadPostShaderInfo(std::vector directories) { shaderInfo.push_back(info); }; + auto appendTextureShader = [&](const TextureShaderInfo &info) { + auto beginErase = std::remove(textureShaderInfo.begin(), textureShaderInfo.end(), info.name); + if (beginErase != textureShaderInfo.end()) { + textureShaderInfo.erase(beginErase, textureShaderInfo.end()); + } + textureShaderInfo.push_back(info); + }; + for (size_t d = 0; d < directories.size(); d++) { std::vector fileInfo; getFilesInDir(directories[d].c_str(), &fileInfo, "ini:"); if (fileInfo.size() == 0) { - // TODO: Really gotta fix the filter, now it's gonna open shaders as ini files.. VFSGetFileListing(directories[d].c_str(), &fileInfo, "ini:"); } @@ -90,7 +104,10 @@ void LoadPostShaderInfo(std::vector directories) { // Alright, let's loop through the sections and see if any is a shader. for (size_t i = 0; i < ini.Sections().size(); i++) { IniFile::Section §ion = ini.Sections()[i]; - if (section.Exists("Fragment") && section.Exists("Vertex")) { + std::string shaderType; + section.Get("Type", &shaderType, "render"); + + if (section.Exists("Fragment") && section.Exists("Vertex") && strncasecmp(shaderType.c_str(), "render", shaderType.size()) == 0) { // Valid shader! ShaderInfo info; std::string temp; @@ -135,6 +152,16 @@ void LoadPostShaderInfo(std::vector directories) { } else { notVisible.push_back(info); } + } else if (section.Exists("Compute") && strncasecmp(shaderType.c_str(), "texture", shaderType.size()) == 0) { + // This is a texture shader. + TextureShaderInfo info; + std::string temp; + info.section = section.name(); + section.Get("Name", &info.name, section.name().c_str()); + section.Get("Compute", &temp, ""); + info.computeShaderFile = path + "/" + temp; + + appendTextureShader(info); } } } @@ -189,3 +216,15 @@ std::vector GetPostShaderChain(const std::string &name) { const std::vector &GetAllPostShaderInfo() { return shaderInfo; } + +const TextureShaderInfo *GetTextureShaderInfo(const std::string &name) { + for (auto &info : textureShaderInfo) { + if (info.section == name) { + return &info; + } + } + return nullptr; +} +const std::vector &GetAllTextureShaderInfo() { + return textureShaderInfo; +} diff --git a/GPU/Common/PostShader.h b/GPU/Common/PostShader.h index 86fd7b77da51..4f01d728f9d1 100644 --- a/GPU/Common/PostShader.h +++ b/GPU/Common/PostShader.h @@ -64,8 +64,26 @@ struct ShaderInfo { } }; +struct TextureShaderInfo { + std::string iniFile; + std::string section; + std::string name; + + std::string computeShaderFile; + + bool operator == (const std::string &other) { + return name == other; + } + bool operator == (const TextureShaderInfo &other) { + return name == other.name; + } +}; + void ReloadAllPostShaderInfo(); const ShaderInfo *GetPostShaderInfo(const std::string &name); std::vector GetPostShaderChain(const std::string &name); const std::vector &GetAllPostShaderInfo(); + +const TextureShaderInfo *GetTextureShaderInfo(const std::string &name); +const std::vector &GetAllTextureShaderInfo(); diff --git a/GPU/Common/TextureCacheCommon.h b/GPU/Common/TextureCacheCommon.h index 13bfa647e68a..166f378b26c8 100644 --- a/GPU/Common/TextureCacheCommon.h +++ b/GPU/Common/TextureCacheCommon.h @@ -198,7 +198,7 @@ class TextureCacheCommon { // FramebufferManager keeps TextureCache updated about what regions of memory are being rendered to. void NotifyFramebuffer(u32 address, VirtualFramebuffer *framebuffer, FramebufferNotification msg); - void NotifyConfigChanged(); + virtual void NotifyConfigChanged(); void NotifyVideoUpload(u32 addr, int size, int width, GEBufferFormat fmt); int AttachedDrawingHeight(); diff --git a/GPU/Vulkan/TextureCacheVulkan.cpp b/GPU/Vulkan/TextureCacheVulkan.cpp index 57068030f524..b46ca05710c9 100644 --- a/GPU/Vulkan/TextureCacheVulkan.cpp +++ b/GPU/Vulkan/TextureCacheVulkan.cpp @@ -19,6 +19,7 @@ #include #include "ext/xxhash.h" +#include "file/vfs.h" #include "i18n/i18n.h" #include "math/math_util.h" #include "profiler/profiler.h" @@ -38,6 +39,7 @@ #include "GPU/ge_constants.h" #include "GPU/GPUState.h" +#include "GPU/Common/PostShader.h" #include "GPU/Common/TextureCacheCommon.h" #include "GPU/Common/TextureDecoder.h" #include "GPU/Vulkan/TextureCacheVulkan.h" @@ -62,288 +64,6 @@ static const VkComponentMapping VULKAN_1555_SWIZZLE = { VK_COMPONENT_SWIZZLE_B, static const VkComponentMapping VULKAN_565_SWIZZLE = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A }; static const VkComponentMapping VULKAN_8888_SWIZZLE = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A }; -// 4xBRZ shader - Copyright (C) 2014-2016 DeSmuME team (GPL2+) -// Hyllian's xBR-vertex code and texel mapping -// Copyright (C) 2011/2016 Hyllian - sergiogdb@gmail.com -const char *shader4xbrz = R"( -#define BLEND_ALPHA 1 -#define BLEND_NONE 0 -#define BLEND_NORMAL 1 -#define BLEND_DOMINANT 2 -#define LUMINANCE_WEIGHT 1.0 -#define EQUAL_COLOR_TOLERANCE 30.0/255.0 -#define STEEP_DIRECTION_THRESHOLD 2.2 -#define DOMINANT_DIRECTION_THRESHOLD 3.6 - -float reduce(vec4 color) { - return dot(color.rgb, vec3(65536.0, 256.0, 1.0)); -} - -float DistYCbCr(vec4 pixA, vec4 pixB) { - // https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.2020_conversion - const vec3 K = vec3(0.2627, 0.6780, 0.0593); - const mat3 MATRIX = mat3(K, - -.5 * K.r / (1.0 - K.b), -.5 * K.g / (1.0 - K.b), .5, - .5, -.5 * K.g / (1.0 - K.r), -.5 * K.b / (1.0 - K.r)); - vec4 diff = pixA - pixB; - vec3 YCbCr = diff.rgb * MATRIX; - YCbCr.x *= LUMINANCE_WEIGHT; - float d = length(YCbCr); - return sqrt(pixA.a * pixB.a * d * d + diff.a * diff.a); -} - -bool IsPixEqual(const vec4 pixA, const vec4 pixB) { - return (DistYCbCr(pixA, pixB) < EQUAL_COLOR_TOLERANCE); -} - -bool IsBlendingNeeded(const ivec4 blend) { - ivec4 diff = blend - ivec4(BLEND_NONE); - return diff.x != 0 || diff.y != 0 || diff.z != 0 || diff.w != 0; -} - -vec4 applyScalingf(uvec2 origxy, uvec2 xy) { - float dx = 1.0 / params.width; - float dy = 1.0 / params.height; - - // A1 B1 C1 - // A0 A B C C4 - // D0 D E F F4 - // G0 G H I I4 - // G5 H5 I5 - - uvec4 t1 = uvec4(origxy.x - 1, origxy.x, origxy.x + 1, origxy.y - 2); // A1 B1 C1 - uvec4 t2 = uvec4(origxy.x - 1, origxy.x, origxy.x + 1, origxy.y - 1); // A B C - uvec4 t3 = uvec4(origxy.x - 1, origxy.x, origxy.x + 1, origxy.y + 0); // D E F - uvec4 t4 = uvec4(origxy.x - 1, origxy.x, origxy.x + 1, origxy.y + 1); // G H I - uvec4 t5 = uvec4(origxy.x - 1, origxy.x, origxy.x + 1, origxy.y + 2); // G5 H5 I5 - uvec4 t6 = uvec4(origxy.x - 2, origxy.y - 1, origxy.y, origxy.y + 1); // A0 D0 G0 - uvec4 t7 = uvec4(origxy.x + 2, origxy.y - 1, origxy.y, origxy.y + 1); // C4 F4 I4 - - vec2 f = fract(vec2(float(xy.x) / float(params.scale), float(xy.y) / float(params.scale))); - - //--------------------------------------- - // Input Pixel Mapping: |21|22|23| - // 19|06|07|08|09 - // 18|05|00|01|10 - // 17|04|03|02|11 - // |15|14|13| - - vec4 src[25]; - - src[21] = readColorf(t1.xw); - src[22] = readColorf(t1.yw); - src[23] = readColorf(t1.zw); - src[ 6] = readColorf(t2.xw); - src[ 7] = readColorf(t2.yw); - src[ 8] = readColorf(t2.zw); - src[ 5] = readColorf(t3.xw); - src[ 0] = readColorf(t3.yw); - src[ 1] = readColorf(t3.zw); - src[ 4] = readColorf(t4.xw); - src[ 3] = readColorf(t4.yw); - src[ 2] = readColorf(t4.zw); - src[15] = readColorf(t5.xw); - src[14] = readColorf(t5.yw); - src[13] = readColorf(t5.zw); - src[19] = readColorf(t6.xy); - src[18] = readColorf(t6.xz); - src[17] = readColorf(t6.xw); - src[ 9] = readColorf(t7.xy); - src[10] = readColorf(t7.xz); - src[11] = readColorf(t7.xw); - - float v[9]; - v[0] = reduce(src[0]); - v[1] = reduce(src[1]); - v[2] = reduce(src[2]); - v[3] = reduce(src[3]); - v[4] = reduce(src[4]); - v[5] = reduce(src[5]); - v[6] = reduce(src[6]); - v[7] = reduce(src[7]); - v[8] = reduce(src[8]); - - ivec4 blendResult = ivec4(BLEND_NONE); - - // Preprocess corners - // Pixel Tap Mapping: --|--|--|--|-- - // --|--|07|08|-- - // --|05|00|01|10 - // --|04|03|02|11 - // --|--|14|13|-- - // Corner (1, 1) - if ( ((v[0] == v[1] && v[3] == v[2]) || (v[0] == v[3] && v[1] == v[2])) == false) { - float dist_03_01 = DistYCbCr(src[ 4], src[ 0]) + DistYCbCr(src[ 0], src[ 8]) + DistYCbCr(src[14], src[ 2]) + DistYCbCr(src[ 2], src[10]) + (4.0 * DistYCbCr(src[ 3], src[ 1])); - float dist_00_02 = DistYCbCr(src[ 5], src[ 3]) + DistYCbCr(src[ 3], src[13]) + DistYCbCr(src[ 7], src[ 1]) + DistYCbCr(src[ 1], src[11]) + (4.0 * DistYCbCr(src[ 0], src[ 2])); - bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_03_01) < dist_00_02; - blendResult[2] = ((dist_03_01 < dist_00_02) && (v[0] != v[1]) && (v[0] != v[3])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; - } - - // Pixel Tap Mapping: --|--|--|--|-- - // --|06|07|--|-- - // 18|05|00|01|-- - // 17|04|03|02|-- - // --|15|14|--|-- - // Corner (0, 1) - if ( ((v[5] == v[0] && v[4] == v[3]) || (v[5] == v[4] && v[0] == v[3])) == false) { - float dist_04_00 = DistYCbCr(src[17], src[ 5]) + DistYCbCr(src[ 5], src[ 7]) + DistYCbCr(src[15], src[ 3]) + DistYCbCr(src[ 3], src[ 1]) + (4.0 * DistYCbCr(src[ 4], src[ 0])); - float dist_05_03 = DistYCbCr(src[18], src[ 4]) + DistYCbCr(src[ 4], src[14]) + DistYCbCr(src[ 6], src[ 0]) + DistYCbCr(src[ 0], src[ 2]) + (4.0 * DistYCbCr(src[ 5], src[ 3])); - bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_05_03) < dist_04_00; - blendResult[3] = ((dist_04_00 > dist_05_03) && (v[0] != v[5]) && (v[0] != v[3])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; - } - - // Pixel Tap Mapping: --|--|22|23|-- - // --|06|07|08|09 - // --|05|00|01|10 - // --|--|03|02|-- - // --|--|--|--|-- - // Corner (1, 0) - if ( ((v[7] == v[8] && v[0] == v[1]) || (v[7] == v[0] && v[8] == v[1])) == false) { - float dist_00_08 = DistYCbCr(src[ 5], src[ 7]) + DistYCbCr(src[ 7], src[23]) + DistYCbCr(src[ 3], src[ 1]) + DistYCbCr(src[ 1], src[ 9]) + (4.0 * DistYCbCr(src[ 0], src[ 8])); - float dist_07_01 = DistYCbCr(src[ 6], src[ 0]) + DistYCbCr(src[ 0], src[ 2]) + DistYCbCr(src[22], src[ 8]) + DistYCbCr(src[ 8], src[10]) + (4.0 * DistYCbCr(src[ 7], src[ 1])); - bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_07_01) < dist_00_08; - blendResult[1] = ((dist_00_08 > dist_07_01) && (v[0] != v[7]) && (v[0] != v[1])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; - } - - // Pixel Tap Mapping: --|21|22|--|-- - // 19|06|07|08|-- - // 18|05|00|01|-- - // --|04|03|--|-- - // --|--|--|--|-- - // Corner (0, 0) - if ( ((v[6] == v[7] && v[5] == v[0]) || (v[6] == v[5] && v[7] == v[0])) == false) { - float dist_05_07 = DistYCbCr(src[18], src[ 6]) + DistYCbCr(src[ 6], src[22]) + DistYCbCr(src[ 4], src[ 0]) + DistYCbCr(src[ 0], src[ 8]) + (4.0 * DistYCbCr(src[ 5], src[ 7])); - float dist_06_00 = DistYCbCr(src[19], src[ 5]) + DistYCbCr(src[ 5], src[ 3]) + DistYCbCr(src[21], src[ 7]) + DistYCbCr(src[ 7], src[ 1]) + (4.0 * DistYCbCr(src[ 6], src[ 0])); - bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_05_07) < dist_06_00; - blendResult[0] = ((dist_05_07 < dist_06_00) && (v[0] != v[5]) && (v[0] != v[7])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; - } - - vec4 dst[16]; - dst[ 0] = src[0]; - dst[ 1] = src[0]; - dst[ 2] = src[0]; - dst[ 3] = src[0]; - dst[ 4] = src[0]; - dst[ 5] = src[0]; - dst[ 6] = src[0]; - dst[ 7] = src[0]; - dst[ 8] = src[0]; - dst[ 9] = src[0]; - dst[10] = src[0]; - dst[11] = src[0]; - dst[12] = src[0]; - dst[13] = src[0]; - dst[14] = src[0]; - dst[15] = src[0]; - - // Scale pixel - if (IsBlendingNeeded(blendResult) == true) { - float dist_01_04 = DistYCbCr(src[1], src[4]); - float dist_03_08 = DistYCbCr(src[3], src[8]); - bool haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_01_04 <= dist_03_08) && (v[0] != v[4]) && (v[5] != v[4]); - bool haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_03_08 <= dist_01_04) && (v[0] != v[8]) && (v[7] != v[8]); - bool needBlend = (blendResult[2] != BLEND_NONE); - bool doLineBlend = ( blendResult[2] >= BLEND_DOMINANT || - ((blendResult[1] != BLEND_NONE && !IsPixEqual(src[0], src[4])) || - (blendResult[3] != BLEND_NONE && !IsPixEqual(src[0], src[8])) || - (IsPixEqual(src[4], src[3]) && IsPixEqual(src[3], src[2]) && IsPixEqual(src[2], src[1]) && IsPixEqual(src[1], src[8]) && IsPixEqual(src[0], src[2]) == false) ) == false ); - - vec4 blendPix = ( DistYCbCr(src[0], src[1]) <= DistYCbCr(src[0], src[3]) ) ? src[1] : src[3]; - dst[ 2] = mix(dst[ 2], blendPix, (needBlend && doLineBlend) ? ((haveShallowLine) ? ((haveSteepLine) ? 1.0/3.0 : 0.25) : ((haveSteepLine) ? 0.25 : 0.00)) : 0.00); - dst[ 9] = mix(dst[ 9], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.25 : 0.00); - dst[10] = mix(dst[10], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.75 : 0.00); - dst[11] = mix(dst[11], blendPix, (needBlend) ? ((doLineBlend) ? ((haveSteepLine) ? 1.00 : ((haveShallowLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); - dst[12] = mix(dst[12], blendPix, (needBlend) ? ((doLineBlend) ? 1.00 : 0.6848532563) : 0.00); - dst[13] = mix(dst[13], blendPix, (needBlend) ? ((doLineBlend) ? ((haveShallowLine) ? 1.00 : ((haveSteepLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); - dst[14] = mix(dst[14], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.75 : 0.00); - dst[15] = mix(dst[15], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.25 : 0.00); - - dist_01_04 = DistYCbCr(src[7], src[2]); - dist_03_08 = DistYCbCr(src[1], src[6]); - haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_01_04 <= dist_03_08) && (v[0] != v[2]) && (v[3] != v[2]); - haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_03_08 <= dist_01_04) && (v[0] != v[6]) && (v[5] != v[6]); - needBlend = (blendResult[1] != BLEND_NONE); - doLineBlend = ( blendResult[1] >= BLEND_DOMINANT || - !((blendResult[0] != BLEND_NONE && !IsPixEqual(src[0], src[2])) || - (blendResult[2] != BLEND_NONE && !IsPixEqual(src[0], src[6])) || - (IsPixEqual(src[2], src[1]) && IsPixEqual(src[1], src[8]) && IsPixEqual(src[8], src[7]) && IsPixEqual(src[7], src[6]) && !IsPixEqual(src[0], src[8])) ) ); - - blendPix = ( DistYCbCr(src[0], src[7]) <= DistYCbCr(src[0], src[1]) ) ? src[7] : src[1]; - dst[ 1] = mix(dst[ 1], blendPix, (needBlend && doLineBlend) ? ((haveShallowLine) ? ((haveSteepLine) ? 1.0/3.0 : 0.25) : ((haveSteepLine) ? 0.25 : 0.00)) : 0.00); - dst[ 6] = mix(dst[ 6], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.25 : 0.00); - dst[ 7] = mix(dst[ 7], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.75 : 0.00); - dst[ 8] = mix(dst[ 8], blendPix, (needBlend) ? ((doLineBlend) ? ((haveSteepLine) ? 1.00 : ((haveShallowLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); - dst[ 9] = mix(dst[ 9], blendPix, (needBlend) ? ((doLineBlend) ? 1.00 : 0.6848532563) : 0.00); - dst[10] = mix(dst[10], blendPix, (needBlend) ? ((doLineBlend) ? ((haveShallowLine) ? 1.00 : ((haveSteepLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); - dst[11] = mix(dst[11], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.75 : 0.00); - dst[12] = mix(dst[12], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.25 : 0.00); - - dist_01_04 = DistYCbCr(src[5], src[8]); - dist_03_08 = DistYCbCr(src[7], src[4]); - haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_01_04 <= dist_03_08) && (v[0] != v[8]) && (v[1] != v[8]); - haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_03_08 <= dist_01_04) && (v[0] != v[4]) && (v[3] != v[4]); - needBlend = (blendResult[0] != BLEND_NONE); - doLineBlend = ( blendResult[0] >= BLEND_DOMINANT || - !((blendResult[3] != BLEND_NONE && !IsPixEqual(src[0], src[8])) || - (blendResult[1] != BLEND_NONE && !IsPixEqual(src[0], src[4])) || - (IsPixEqual(src[8], src[7]) && IsPixEqual(src[7], src[6]) && IsPixEqual(src[6], src[5]) && IsPixEqual(src[5], src[4]) && !IsPixEqual(src[0], src[6])) ) ); - - blendPix = ( DistYCbCr(src[0], src[5]) <= DistYCbCr(src[0], src[7]) ) ? src[5] : src[7]; - dst[ 0] = mix(dst[ 0], blendPix, (needBlend && doLineBlend) ? ((haveShallowLine) ? ((haveSteepLine) ? 1.0/3.0 : 0.25) : ((haveSteepLine) ? 0.25 : 0.00)) : 0.00); - dst[15] = mix(dst[15], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.25 : 0.00); - dst[ 4] = mix(dst[ 4], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.75 : 0.00); - dst[ 5] = mix(dst[ 5], blendPix, (needBlend) ? ((doLineBlend) ? ((haveSteepLine) ? 1.00 : ((haveShallowLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); - dst[ 6] = mix(dst[ 6], blendPix, (needBlend) ? ((doLineBlend) ? 1.00 : 0.6848532563) : 0.00); - dst[ 7] = mix(dst[ 7], blendPix, (needBlend) ? ((doLineBlend) ? ((haveShallowLine) ? 1.00 : ((haveSteepLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); - dst[ 8] = mix(dst[ 8], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.75 : 0.00); - dst[ 9] = mix(dst[ 9], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.25 : 0.00); - - dist_01_04 = DistYCbCr(src[3], src[6]); - dist_03_08 = DistYCbCr(src[5], src[2]); - haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_01_04 <= dist_03_08) && (v[0] != v[6]) && (v[7] != v[6]); - haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_03_08 <= dist_01_04) && (v[0] != v[2]) && (v[1] != v[2]); - needBlend = (blendResult[3] != BLEND_NONE); - doLineBlend = ( blendResult[3] >= BLEND_DOMINANT || - !((blendResult[2] != BLEND_NONE && !IsPixEqual(src[0], src[6])) || - (blendResult[0] != BLEND_NONE && !IsPixEqual(src[0], src[2])) || - (IsPixEqual(src[6], src[5]) && IsPixEqual(src[5], src[4]) && IsPixEqual(src[4], src[3]) && IsPixEqual(src[3], src[2]) && !IsPixEqual(src[0], src[4])) ) ); - - blendPix = ( DistYCbCr(src[0], src[3]) <= DistYCbCr(src[0], src[5]) ) ? src[3] : src[5]; - dst[ 3] = mix(dst[ 3], blendPix, (needBlend && doLineBlend) ? ((haveShallowLine) ? ((haveSteepLine) ? 1.0/3.0 : 0.25) : ((haveSteepLine) ? 0.25 : 0.00)) : 0.00); - dst[12] = mix(dst[12], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.25 : 0.00); - dst[13] = mix(dst[13], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.75 : 0.00); - dst[14] = mix(dst[14], blendPix, (needBlend) ? ((doLineBlend) ? ((haveSteepLine) ? 1.00 : ((haveShallowLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); - dst[15] = mix(dst[15], blendPix, (needBlend) ? ((doLineBlend) ? 1.00 : 0.6848532563) : 0.00); - dst[ 4] = mix(dst[ 4], blendPix, (needBlend) ? ((doLineBlend) ? ((haveShallowLine) ? 1.00 : ((haveSteepLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); - dst[ 5] = mix(dst[ 5], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.75 : 0.00); - dst[ 6] = mix(dst[ 6], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.25 : 0.00); - } - - // select output pixel - vec4 res = mix(mix(mix(mix(dst[ 6], dst[ 7], step(0.25, f.x)), - mix(dst[ 8], dst[ 9], step(0.75, f.x)), - step(0.50, f.x)), - mix(mix(dst[ 5], dst[ 0], step(0.25, f.x)), - mix(dst[ 1], dst[10], step(0.75, f.x)), - step(0.50, f.x)), - step(0.25, f.y)), - mix(mix(mix(dst[ 4], dst[ 3], step(0.25, f.x)), - mix(dst[ 2], dst[11], step(0.75, f.x)), - step(0.50, f.x)), - mix(mix(dst[15], dst[14], step(0.25, f.x)), - mix(dst[13], dst[12], step(0.75, f.x)), - step(0.50, f.x)), - step(0.75, f.y)), - step(0.50, f.y)); - - return res; -} - -uint applyScalingu(uvec2 origxy, uvec2 xy) { - return packUnorm4x8(applyScalingf(origxy, xy)); -} -)"; - const char *copyShader = R"( #version 450 #extension GL_ARB_separate_shader_objects : enable @@ -645,20 +365,55 @@ void TextureCacheVulkan::DeviceRestore(VulkanContext *vulkan, Draw::DrawContext samp.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; vkCreateSampler(vulkan_->GetDevice(), &samp, nullptr, &samplerNearest_); - std::string error; - std::string fullUploadShader = StringFromFormat(uploadShader, shader4xbrz); - std::string fullCopyShader = StringFromFormat(copyShader, shader4xbrz); - - if (g_Config.bTexHardwareScaling) { - uploadCS_ = CompileShaderModule(vulkan_, VK_SHADER_STAGE_COMPUTE_BIT, fullUploadShader.c_str(), &error); - _dbg_assert_msg_(uploadCS_ != VK_NULL_HANDLE, "failed to compile upload shader"); - copyCS_ = CompileShaderModule(vulkan_, VK_SHADER_STAGE_COMPUTE_BIT, fullCopyShader.c_str(), &error); - _dbg_assert_msg_(copyCS_!= VK_NULL_HANDLE, "failed to compile copy shader"); - } + CompileScalingShader(); computeShaderManager_.DeviceRestore(vulkan); } +void TextureCacheVulkan::NotifyConfigChanged() { + TextureCacheCommon::NotifyConfigChanged(); + CompileScalingShader(); +} + +static std::string ReadShaderSrc(const std::string &filename) { + size_t sz = 0; + char *data = (char *)VFSReadFile(filename.c_str(), &sz); + if (!data) + return ""; + + std::string src(data, sz); + free(data); + return src; +} + +void TextureCacheVulkan::CompileScalingShader() { + if (!g_Config.bTexHardwareScaling || g_Config.sTextureShaderName != textureShader_) { + if (uploadCS_ != VK_NULL_HANDLE) + vulkan_->Delete().QueueDeleteShaderModule(uploadCS_); + if (copyCS_ != VK_NULL_HANDLE) + vulkan_->Delete().QueueDeleteShaderModule(copyCS_); + textureShader_.clear(); + } + if (!g_Config.bTexHardwareScaling) + return; + + const TextureShaderInfo *shaderInfo = GetTextureShaderInfo(g_Config.sTextureShaderName); + if (!shaderInfo || shaderInfo->computeShaderFile.empty()) + return; + + std::string shaderSource = ReadShaderSrc(shaderInfo->computeShaderFile); + std::string fullUploadShader = StringFromFormat(uploadShader, shaderSource.c_str()); + std::string fullCopyShader = StringFromFormat(copyShader, shaderSource.c_str()); + + std::string error; + uploadCS_ = CompileShaderModule(vulkan_, VK_SHADER_STAGE_COMPUTE_BIT, fullUploadShader.c_str(), &error); + _dbg_assert_msg_(uploadCS_ != VK_NULL_HANDLE, "failed to compile upload shader"); + copyCS_ = CompileShaderModule(vulkan_, VK_SHADER_STAGE_COMPUTE_BIT, fullCopyShader.c_str(), &error); + _dbg_assert_msg_(copyCS_ != VK_NULL_HANDLE, "failed to compile copy shader"); + + textureShader_ = g_Config.sTextureShaderName; +} + void TextureCacheVulkan::ReleaseTexture(TexCacheEntry *entry, bool delete_them) { DEBUG_LOG(G3D, "Deleting texture %p", entry->vkTex); delete entry->vkTex; @@ -1026,17 +781,19 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) { badMipSizes = false; } + bool hardwareScaling = g_Config.bTexHardwareScaling && (uploadCS_ != VK_NULL_HANDLE || copyCS_ != VK_NULL_HANDLE); + // Don't scale the PPGe texture. if (entry->addr > 0x05000000 && entry->addr < PSP_GetKernelMemoryEnd()) scaleFactor = 1; - if ((entry->status & TexCacheEntry::STATUS_CHANGE_FREQUENT) != 0 && scaleFactor != 1 && !g_Config.bTexHardwareScaling) { + if ((entry->status & TexCacheEntry::STATUS_CHANGE_FREQUENT) != 0 && scaleFactor != 1 && !hardwareScaling) { // Remember for later that we /wanted/ to scale this texture. entry->status |= TexCacheEntry::STATUS_TO_SCALE; scaleFactor = 1; } if (scaleFactor != 1) { - if (texelsScaledThisFrame_ >= TEXCACHE_MAX_TEXELS_SCALED && !g_Config.bTexHardwareScaling) { + if (texelsScaledThisFrame_ >= TEXCACHE_MAX_TEXELS_SCALED && !hardwareScaling) { entry->status |= TexCacheEntry::STATUS_TO_SCALE; scaleFactor = 1; } else { @@ -1088,7 +845,7 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) { // If we want to use the GE debugger, we should add VK_IMAGE_USAGE_TRANSFER_SRC_BIT too... // Compute experiment - if (actualFmt == VULKAN_8888_FORMAT && scaleFactor > 1 && g_Config.bTexHardwareScaling) { + if (actualFmt == VULKAN_8888_FORMAT && scaleFactor > 1 && hardwareScaling) { // Enable the experiment you want. if (uploadCS_ != VK_NULL_HANDLE) computeUpload = true; diff --git a/GPU/Vulkan/TextureCacheVulkan.h b/GPU/Vulkan/TextureCacheVulkan.h index 3635d5299c29..a9163f2e1869 100644 --- a/GPU/Vulkan/TextureCacheVulkan.h +++ b/GPU/Vulkan/TextureCacheVulkan.h @@ -94,6 +94,8 @@ class TextureCacheVulkan : public TextureCacheCommon { } } + void NotifyConfigChanged() override; + void GetVulkanHandles(VkImageView &imageView, VkSampler &sampler) { imageView = imageView_; sampler = curSampler_; @@ -123,6 +125,8 @@ class TextureCacheVulkan : public TextureCacheCommon { void ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFramebuffer *framebuffer) override; void BuildTexture(TexCacheEntry *const entry) override; + void CompileScalingShader(); + VulkanContext *vulkan_ = nullptr; VulkanDeviceAllocator *allocator_ = nullptr; VulkanPushBuffer *push_ = nullptr; @@ -145,6 +149,7 @@ class TextureCacheVulkan : public TextureCacheCommon { DrawEngineVulkan *drawEngine_; Vulkan2D *vulkan2D_; + std::string textureShader_; VkShaderModule uploadCS_ = VK_NULL_HANDLE; VkShaderModule copyCS_ = VK_NULL_HANDLE; diff --git a/UI/GameSettingsScreen.cpp b/UI/GameSettingsScreen.cpp index a9ae9f891fc9..7c6637faef68 100644 --- a/UI/GameSettingsScreen.cpp +++ b/UI/GameSettingsScreen.cpp @@ -112,6 +112,21 @@ bool DoesBackendSupportHWTess() { } } +static bool UsingHardwareTextureScaling() { + // For now, Vulkan only. + return g_Config.bTexHardwareScaling && GetGPUBackend() == GPUBackend::VULKAN && !g_Config.bSoftwareRendering; +} + +static std::string TextureTranslateName(const char *value) { + auto ps = GetI18NCategory("TextureShaders"); + const TextureShaderInfo *info = GetTextureShaderInfo(value); + if (info) { + return ps->T(value, info ? info->name.c_str() : value); + } else { + return value; + } +} + static std::string PostShaderTranslateName(const char *value) { auto ps = GetI18NCategory("PostShaders"); const ShaderInfo *info = GetPostShaderInfo(value); @@ -450,7 +465,7 @@ void GameSettingsScreen::CreateViews() { texScalingChoice->HideChoice(5); // 5x } texScalingChoice->OnChoice.Add([=](EventParams &e) { - if (g_Config.iTexScalingLevel != 1) { + if (g_Config.iTexScalingLevel != 1 && !UsingHardwareTextureScaling()) { settingInfo_->Show(gr->T("UpscaleLevel Tip", "CPU heavy - some scaling may be delayed to avoid stutter"), e.v); } return UI::EVENT_CONTINUE; @@ -459,7 +474,9 @@ void GameSettingsScreen::CreateViews() { static const char *texScaleAlgos[] = { "xBRZ", "Hybrid", "Bicubic", "Hybrid + Bicubic", }; PopupMultiChoice *texScalingType = graphicsSettings->Add(new PopupMultiChoice(&g_Config.iTexScalingType, gr->T("Upscale Type"), texScaleAlgos, 0, ARRAY_SIZE(texScaleAlgos), gr->GetName(), screenManager())); - texScalingType->SetDisabledPtr(&g_Config.bSoftwareRendering); + texScalingType->SetEnabledFunc([]() { + return !g_Config.bSoftwareRendering && !UsingHardwareTextureScaling(); + }); CheckBox *deposterize = graphicsSettings->Add(new CheckBox(&g_Config.bTexDeposterize, gr->T("Deposterize"))); deposterize->OnClick.Add([=](EventParams &e) { @@ -468,7 +485,15 @@ void GameSettingsScreen::CreateViews() { } return UI::EVENT_CONTINUE; }); - deposterize->SetDisabledPtr(&g_Config.bSoftwareRendering); + deposterize->SetEnabledFunc([]() { + return !g_Config.bSoftwareRendering && !UsingHardwareTextureScaling(); + }); + + ChoiceWithValueDisplay *textureShaderChoice = graphicsSettings->Add(new ChoiceWithValueDisplay(&g_Config.sTextureShaderName, gr->T("Texture Shader"), &TextureTranslateName)); + textureShaderChoice->OnClick.Handle(this, &GameSettingsScreen::OnTextureShader); + textureShaderChoice->SetEnabledFunc([]() { + return GetGPUBackend() == GPUBackend::VULKAN && !g_Config.bSoftwareRendering; + }); graphicsSettings->Add(new ItemHeader(gr->T("Texture Filtering"))); static const char *anisoLevels[] = { "Off", "2x", "4x", "8x", "16x" }; @@ -1460,6 +1485,23 @@ UI::EventReturn GameSettingsScreen::OnPostProcShaderChange(UI::EventParams &e) { return UI::EVENT_DONE; } +UI::EventReturn GameSettingsScreen::OnTextureShader(UI::EventParams &e) { + auto gr = GetI18NCategory("Graphics"); + auto shaderScreen = new TextureShaderScreen(gr->T("Texture Shader")); + shaderScreen->OnChoice.Handle(this, &GameSettingsScreen::OnTextureShaderChange); + if (e.v) + shaderScreen->SetPopupOrigin(e.v); + screenManager()->push(shaderScreen); + return UI::EVENT_DONE; +} + +UI::EventReturn GameSettingsScreen::OnTextureShaderChange(UI::EventParams &e) { + NativeMessageReceived("gpu_resized", ""); + RecreateViews(); // Update setting name + g_Config.bTexHardwareScaling = g_Config.sTextureShaderName != "Off"; + return UI::EVENT_DONE; +} + UI::EventReturn GameSettingsScreen::OnDeveloperTools(UI::EventParams &e) { screenManager()->push(new DeveloperToolsScreen()); return UI::EVENT_DONE; diff --git a/UI/GameSettingsScreen.h b/UI/GameSettingsScreen.h index f2f27bc206c3..3b0859e0d060 100644 --- a/UI/GameSettingsScreen.h +++ b/UI/GameSettingsScreen.h @@ -82,6 +82,8 @@ class GameSettingsScreen : public UIDialogScreenWithGameBackground { UI::EventReturn OnAutoFrameskip(UI::EventParams &e); UI::EventReturn OnPostProcShader(UI::EventParams &e); UI::EventReturn OnPostProcShaderChange(UI::EventParams &e); + UI::EventReturn OnTextureShader(UI::EventParams &e); + UI::EventReturn OnTextureShaderChange(UI::EventParams &e); UI::EventReturn OnDeveloperTools(UI::EventParams &e); UI::EventReturn OnRemoteISO(UI::EventParams &e); UI::EventReturn OnChangeQuickChat0(UI::EventParams &e); diff --git a/UI/MiscScreens.cpp b/UI/MiscScreens.cpp index a6542c65d067..e88e2448291f 100644 --- a/UI/MiscScreens.cpp +++ b/UI/MiscScreens.cpp @@ -321,6 +321,26 @@ void PostProcScreen::OnCompleted(DialogResult result) { g_Config.sPostShaderName = shaders_[listView_->GetSelected()].section; } +TextureShaderScreen::TextureShaderScreen(const std::string &title) : ListPopupScreen(title) { + auto ps = GetI18NCategory("TextureShaders"); + ReloadAllPostShaderInfo(); + shaders_ = GetAllTextureShaderInfo(); + std::vector items; + int selected = -1; + for (int i = 0; i < (int)shaders_.size(); i++) { + if (shaders_[i].section == g_Config.sTextureShaderName) + selected = i; + items.push_back(ps->T(shaders_[i].section.c_str(), shaders_[i].name.c_str())); + } + adaptor_ = UI::StringVectorListAdaptor(items, selected); +} + +void TextureShaderScreen::OnCompleted(DialogResult result) { + if (result != DR_OK) + return; + g_Config.sTextureShaderName = shaders_[listView_->GetSelected()].section; +} + NewLanguageScreen::NewLanguageScreen(const std::string &title) : ListPopupScreen(title) { // Disable annoying encoding warning #ifdef _MSC_VER diff --git a/UI/MiscScreens.h b/UI/MiscScreens.h index 459974de48da..b1c5881bc436 100644 --- a/UI/MiscScreens.h +++ b/UI/MiscScreens.h @@ -26,6 +26,7 @@ #include "ui/ui_screen.h" struct ShaderInfo; +struct TextureShaderInfo; extern std::string boot_filename; void UIBackgroundInit(UIContext &dc); @@ -112,6 +113,16 @@ class PostProcScreen : public ListPopupScreen { std::vector shaders_; }; +class TextureShaderScreen : public ListPopupScreen { +public: + TextureShaderScreen(const std::string &title); + +private: + void OnCompleted(DialogResult result) override; + bool ShowButtons() const override { return true; } + std::vector shaders_; +}; + class LogoScreen : public UIScreen { public: LogoScreen(bool gotoGameSettings = false) diff --git a/assets/shaders/defaultshaders.ini b/assets/shaders/defaultshaders.ini index d364ad93af52..78c1eb62a4a6 100644 --- a/assets/shaders/defaultshaders.ini +++ b/assets/shaders/defaultshaders.ini @@ -144,4 +144,8 @@ SettingDefaultValue4=1.0 SettingMaxValue4=2.0 SettingMinValue4=0.1 SettingStep4=0.05 - +[Tex4xBRZ] +Type=Texture +Name=4xBRZ +Author=Hyllian +Compute=tex_4xbrz.csh diff --git a/assets/shaders/tex_4xbrz.csh b/assets/shaders/tex_4xbrz.csh new file mode 100644 index 000000000000..2a72a7f3b58f --- /dev/null +++ b/assets/shaders/tex_4xbrz.csh @@ -0,0 +1,279 @@ +// 4xBRZ shader - Copyright (C) 2014-2016 DeSmuME team (GPL2+) +// Hyllian's xBR-vertex code and texel mapping +// Copyright (C) 2011/2016 Hyllian - sergiogdb@gmail.com +#define BLEND_ALPHA 1 +#define BLEND_NONE 0 +#define BLEND_NORMAL 1 +#define BLEND_DOMINANT 2 +#define LUMINANCE_WEIGHT 1.0 +#define EQUAL_COLOR_TOLERANCE 30.0/255.0 +#define STEEP_DIRECTION_THRESHOLD 2.2 +#define DOMINANT_DIRECTION_THRESHOLD 3.6 + +float reduce(vec4 color) { + return dot(color.rgb, vec3(65536.0, 256.0, 1.0)); +} + +float DistYCbCr(vec4 pixA, vec4 pixB) { + // https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.2020_conversion + const vec3 K = vec3(0.2627, 0.6780, 0.0593); + const mat3 MATRIX = mat3(K, + -.5 * K.r / (1.0 - K.b), -.5 * K.g / (1.0 - K.b), .5, + .5, -.5 * K.g / (1.0 - K.r), -.5 * K.b / (1.0 - K.r)); + vec4 diff = pixA - pixB; + vec3 YCbCr = diff.rgb * MATRIX; + YCbCr.x *= LUMINANCE_WEIGHT; + float d = length(YCbCr); + return sqrt(pixA.a * pixB.a * d * d + diff.a * diff.a); +} + +bool IsPixEqual(const vec4 pixA, const vec4 pixB) { + return (DistYCbCr(pixA, pixB) < EQUAL_COLOR_TOLERANCE); +} + +bool IsBlendingNeeded(const ivec4 blend) { + ivec4 diff = blend - ivec4(BLEND_NONE); + return diff.x != 0 || diff.y != 0 || diff.z != 0 || diff.w != 0; +} + +vec4 applyScalingf(uvec2 origxy, uvec2 xy) { + float dx = 1.0 / params.width; + float dy = 1.0 / params.height; + + // A1 B1 C1 + // A0 A B C C4 + // D0 D E F F4 + // G0 G H I I4 + // G5 H5 I5 + + uvec4 t1 = uvec4(origxy.x - 1, origxy.x, origxy.x + 1, origxy.y - 2); // A1 B1 C1 + uvec4 t2 = uvec4(origxy.x - 1, origxy.x, origxy.x + 1, origxy.y - 1); // A B C + uvec4 t3 = uvec4(origxy.x - 1, origxy.x, origxy.x + 1, origxy.y + 0); // D E F + uvec4 t4 = uvec4(origxy.x - 1, origxy.x, origxy.x + 1, origxy.y + 1); // G H I + uvec4 t5 = uvec4(origxy.x - 1, origxy.x, origxy.x + 1, origxy.y + 2); // G5 H5 I5 + uvec4 t6 = uvec4(origxy.x - 2, origxy.y - 1, origxy.y, origxy.y + 1); // A0 D0 G0 + uvec4 t7 = uvec4(origxy.x + 2, origxy.y - 1, origxy.y, origxy.y + 1); // C4 F4 I4 + + vec2 f = fract(vec2(float(xy.x) / float(params.scale), float(xy.y) / float(params.scale))); + + //--------------------------------------- + // Input Pixel Mapping: |21|22|23| + // 19|06|07|08|09 + // 18|05|00|01|10 + // 17|04|03|02|11 + // |15|14|13| + + vec4 src[25]; + + src[21] = readColorf(t1.xw); + src[22] = readColorf(t1.yw); + src[23] = readColorf(t1.zw); + src[ 6] = readColorf(t2.xw); + src[ 7] = readColorf(t2.yw); + src[ 8] = readColorf(t2.zw); + src[ 5] = readColorf(t3.xw); + src[ 0] = readColorf(t3.yw); + src[ 1] = readColorf(t3.zw); + src[ 4] = readColorf(t4.xw); + src[ 3] = readColorf(t4.yw); + src[ 2] = readColorf(t4.zw); + src[15] = readColorf(t5.xw); + src[14] = readColorf(t5.yw); + src[13] = readColorf(t5.zw); + src[19] = readColorf(t6.xy); + src[18] = readColorf(t6.xz); + src[17] = readColorf(t6.xw); + src[ 9] = readColorf(t7.xy); + src[10] = readColorf(t7.xz); + src[11] = readColorf(t7.xw); + + float v[9]; + v[0] = reduce(src[0]); + v[1] = reduce(src[1]); + v[2] = reduce(src[2]); + v[3] = reduce(src[3]); + v[4] = reduce(src[4]); + v[5] = reduce(src[5]); + v[6] = reduce(src[6]); + v[7] = reduce(src[7]); + v[8] = reduce(src[8]); + + ivec4 blendResult = ivec4(BLEND_NONE); + + // Preprocess corners + // Pixel Tap Mapping: --|--|--|--|-- + // --|--|07|08|-- + // --|05|00|01|10 + // --|04|03|02|11 + // --|--|14|13|-- + // Corner (1, 1) + if ( ((v[0] == v[1] && v[3] == v[2]) || (v[0] == v[3] && v[1] == v[2])) == false) { + float dist_03_01 = DistYCbCr(src[ 4], src[ 0]) + DistYCbCr(src[ 0], src[ 8]) + DistYCbCr(src[14], src[ 2]) + DistYCbCr(src[ 2], src[10]) + (4.0 * DistYCbCr(src[ 3], src[ 1])); + float dist_00_02 = DistYCbCr(src[ 5], src[ 3]) + DistYCbCr(src[ 3], src[13]) + DistYCbCr(src[ 7], src[ 1]) + DistYCbCr(src[ 1], src[11]) + (4.0 * DistYCbCr(src[ 0], src[ 2])); + bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_03_01) < dist_00_02; + blendResult[2] = ((dist_03_01 < dist_00_02) && (v[0] != v[1]) && (v[0] != v[3])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; + } + + // Pixel Tap Mapping: --|--|--|--|-- + // --|06|07|--|-- + // 18|05|00|01|-- + // 17|04|03|02|-- + // --|15|14|--|-- + // Corner (0, 1) + if ( ((v[5] == v[0] && v[4] == v[3]) || (v[5] == v[4] && v[0] == v[3])) == false) { + float dist_04_00 = DistYCbCr(src[17], src[ 5]) + DistYCbCr(src[ 5], src[ 7]) + DistYCbCr(src[15], src[ 3]) + DistYCbCr(src[ 3], src[ 1]) + (4.0 * DistYCbCr(src[ 4], src[ 0])); + float dist_05_03 = DistYCbCr(src[18], src[ 4]) + DistYCbCr(src[ 4], src[14]) + DistYCbCr(src[ 6], src[ 0]) + DistYCbCr(src[ 0], src[ 2]) + (4.0 * DistYCbCr(src[ 5], src[ 3])); + bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_05_03) < dist_04_00; + blendResult[3] = ((dist_04_00 > dist_05_03) && (v[0] != v[5]) && (v[0] != v[3])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; + } + + // Pixel Tap Mapping: --|--|22|23|-- + // --|06|07|08|09 + // --|05|00|01|10 + // --|--|03|02|-- + // --|--|--|--|-- + // Corner (1, 0) + if ( ((v[7] == v[8] && v[0] == v[1]) || (v[7] == v[0] && v[8] == v[1])) == false) { + float dist_00_08 = DistYCbCr(src[ 5], src[ 7]) + DistYCbCr(src[ 7], src[23]) + DistYCbCr(src[ 3], src[ 1]) + DistYCbCr(src[ 1], src[ 9]) + (4.0 * DistYCbCr(src[ 0], src[ 8])); + float dist_07_01 = DistYCbCr(src[ 6], src[ 0]) + DistYCbCr(src[ 0], src[ 2]) + DistYCbCr(src[22], src[ 8]) + DistYCbCr(src[ 8], src[10]) + (4.0 * DistYCbCr(src[ 7], src[ 1])); + bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_07_01) < dist_00_08; + blendResult[1] = ((dist_00_08 > dist_07_01) && (v[0] != v[7]) && (v[0] != v[1])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; + } + + // Pixel Tap Mapping: --|21|22|--|-- + // 19|06|07|08|-- + // 18|05|00|01|-- + // --|04|03|--|-- + // --|--|--|--|-- + // Corner (0, 0) + if ( ((v[6] == v[7] && v[5] == v[0]) || (v[6] == v[5] && v[7] == v[0])) == false) { + float dist_05_07 = DistYCbCr(src[18], src[ 6]) + DistYCbCr(src[ 6], src[22]) + DistYCbCr(src[ 4], src[ 0]) + DistYCbCr(src[ 0], src[ 8]) + (4.0 * DistYCbCr(src[ 5], src[ 7])); + float dist_06_00 = DistYCbCr(src[19], src[ 5]) + DistYCbCr(src[ 5], src[ 3]) + DistYCbCr(src[21], src[ 7]) + DistYCbCr(src[ 7], src[ 1]) + (4.0 * DistYCbCr(src[ 6], src[ 0])); + bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_05_07) < dist_06_00; + blendResult[0] = ((dist_05_07 < dist_06_00) && (v[0] != v[5]) && (v[0] != v[7])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; + } + + vec4 dst[16]; + dst[ 0] = src[0]; + dst[ 1] = src[0]; + dst[ 2] = src[0]; + dst[ 3] = src[0]; + dst[ 4] = src[0]; + dst[ 5] = src[0]; + dst[ 6] = src[0]; + dst[ 7] = src[0]; + dst[ 8] = src[0]; + dst[ 9] = src[0]; + dst[10] = src[0]; + dst[11] = src[0]; + dst[12] = src[0]; + dst[13] = src[0]; + dst[14] = src[0]; + dst[15] = src[0]; + + // Scale pixel + if (IsBlendingNeeded(blendResult) == true) { + float dist_01_04 = DistYCbCr(src[1], src[4]); + float dist_03_08 = DistYCbCr(src[3], src[8]); + bool haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_01_04 <= dist_03_08) && (v[0] != v[4]) && (v[5] != v[4]); + bool haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_03_08 <= dist_01_04) && (v[0] != v[8]) && (v[7] != v[8]); + bool needBlend = (blendResult[2] != BLEND_NONE); + bool doLineBlend = ( blendResult[2] >= BLEND_DOMINANT || + ((blendResult[1] != BLEND_NONE && !IsPixEqual(src[0], src[4])) || + (blendResult[3] != BLEND_NONE && !IsPixEqual(src[0], src[8])) || + (IsPixEqual(src[4], src[3]) && IsPixEqual(src[3], src[2]) && IsPixEqual(src[2], src[1]) && IsPixEqual(src[1], src[8]) && IsPixEqual(src[0], src[2]) == false) ) == false ); + + vec4 blendPix = ( DistYCbCr(src[0], src[1]) <= DistYCbCr(src[0], src[3]) ) ? src[1] : src[3]; + dst[ 2] = mix(dst[ 2], blendPix, (needBlend && doLineBlend) ? ((haveShallowLine) ? ((haveSteepLine) ? 1.0/3.0 : 0.25) : ((haveSteepLine) ? 0.25 : 0.00)) : 0.00); + dst[ 9] = mix(dst[ 9], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.25 : 0.00); + dst[10] = mix(dst[10], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.75 : 0.00); + dst[11] = mix(dst[11], blendPix, (needBlend) ? ((doLineBlend) ? ((haveSteepLine) ? 1.00 : ((haveShallowLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); + dst[12] = mix(dst[12], blendPix, (needBlend) ? ((doLineBlend) ? 1.00 : 0.6848532563) : 0.00); + dst[13] = mix(dst[13], blendPix, (needBlend) ? ((doLineBlend) ? ((haveShallowLine) ? 1.00 : ((haveSteepLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); + dst[14] = mix(dst[14], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.75 : 0.00); + dst[15] = mix(dst[15], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.25 : 0.00); + + dist_01_04 = DistYCbCr(src[7], src[2]); + dist_03_08 = DistYCbCr(src[1], src[6]); + haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_01_04 <= dist_03_08) && (v[0] != v[2]) && (v[3] != v[2]); + haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_03_08 <= dist_01_04) && (v[0] != v[6]) && (v[5] != v[6]); + needBlend = (blendResult[1] != BLEND_NONE); + doLineBlend = ( blendResult[1] >= BLEND_DOMINANT || + !((blendResult[0] != BLEND_NONE && !IsPixEqual(src[0], src[2])) || + (blendResult[2] != BLEND_NONE && !IsPixEqual(src[0], src[6])) || + (IsPixEqual(src[2], src[1]) && IsPixEqual(src[1], src[8]) && IsPixEqual(src[8], src[7]) && IsPixEqual(src[7], src[6]) && !IsPixEqual(src[0], src[8])) ) ); + + blendPix = ( DistYCbCr(src[0], src[7]) <= DistYCbCr(src[0], src[1]) ) ? src[7] : src[1]; + dst[ 1] = mix(dst[ 1], blendPix, (needBlend && doLineBlend) ? ((haveShallowLine) ? ((haveSteepLine) ? 1.0/3.0 : 0.25) : ((haveSteepLine) ? 0.25 : 0.00)) : 0.00); + dst[ 6] = mix(dst[ 6], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.25 : 0.00); + dst[ 7] = mix(dst[ 7], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.75 : 0.00); + dst[ 8] = mix(dst[ 8], blendPix, (needBlend) ? ((doLineBlend) ? ((haveSteepLine) ? 1.00 : ((haveShallowLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); + dst[ 9] = mix(dst[ 9], blendPix, (needBlend) ? ((doLineBlend) ? 1.00 : 0.6848532563) : 0.00); + dst[10] = mix(dst[10], blendPix, (needBlend) ? ((doLineBlend) ? ((haveShallowLine) ? 1.00 : ((haveSteepLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); + dst[11] = mix(dst[11], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.75 : 0.00); + dst[12] = mix(dst[12], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.25 : 0.00); + + dist_01_04 = DistYCbCr(src[5], src[8]); + dist_03_08 = DistYCbCr(src[7], src[4]); + haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_01_04 <= dist_03_08) && (v[0] != v[8]) && (v[1] != v[8]); + haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_03_08 <= dist_01_04) && (v[0] != v[4]) && (v[3] != v[4]); + needBlend = (blendResult[0] != BLEND_NONE); + doLineBlend = ( blendResult[0] >= BLEND_DOMINANT || + !((blendResult[3] != BLEND_NONE && !IsPixEqual(src[0], src[8])) || + (blendResult[1] != BLEND_NONE && !IsPixEqual(src[0], src[4])) || + (IsPixEqual(src[8], src[7]) && IsPixEqual(src[7], src[6]) && IsPixEqual(src[6], src[5]) && IsPixEqual(src[5], src[4]) && !IsPixEqual(src[0], src[6])) ) ); + + blendPix = ( DistYCbCr(src[0], src[5]) <= DistYCbCr(src[0], src[7]) ) ? src[5] : src[7]; + dst[ 0] = mix(dst[ 0], blendPix, (needBlend && doLineBlend) ? ((haveShallowLine) ? ((haveSteepLine) ? 1.0/3.0 : 0.25) : ((haveSteepLine) ? 0.25 : 0.00)) : 0.00); + dst[15] = mix(dst[15], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.25 : 0.00); + dst[ 4] = mix(dst[ 4], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.75 : 0.00); + dst[ 5] = mix(dst[ 5], blendPix, (needBlend) ? ((doLineBlend) ? ((haveSteepLine) ? 1.00 : ((haveShallowLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); + dst[ 6] = mix(dst[ 6], blendPix, (needBlend) ? ((doLineBlend) ? 1.00 : 0.6848532563) : 0.00); + dst[ 7] = mix(dst[ 7], blendPix, (needBlend) ? ((doLineBlend) ? ((haveShallowLine) ? 1.00 : ((haveSteepLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); + dst[ 8] = mix(dst[ 8], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.75 : 0.00); + dst[ 9] = mix(dst[ 9], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.25 : 0.00); + + dist_01_04 = DistYCbCr(src[3], src[6]); + dist_03_08 = DistYCbCr(src[5], src[2]); + haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_01_04 <= dist_03_08) && (v[0] != v[6]) && (v[7] != v[6]); + haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_03_08 <= dist_01_04) && (v[0] != v[2]) && (v[1] != v[2]); + needBlend = (blendResult[3] != BLEND_NONE); + doLineBlend = ( blendResult[3] >= BLEND_DOMINANT || + !((blendResult[2] != BLEND_NONE && !IsPixEqual(src[0], src[6])) || + (blendResult[0] != BLEND_NONE && !IsPixEqual(src[0], src[2])) || + (IsPixEqual(src[6], src[5]) && IsPixEqual(src[5], src[4]) && IsPixEqual(src[4], src[3]) && IsPixEqual(src[3], src[2]) && !IsPixEqual(src[0], src[4])) ) ); + + blendPix = ( DistYCbCr(src[0], src[3]) <= DistYCbCr(src[0], src[5]) ) ? src[3] : src[5]; + dst[ 3] = mix(dst[ 3], blendPix, (needBlend && doLineBlend) ? ((haveShallowLine) ? ((haveSteepLine) ? 1.0/3.0 : 0.25) : ((haveSteepLine) ? 0.25 : 0.00)) : 0.00); + dst[12] = mix(dst[12], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.25 : 0.00); + dst[13] = mix(dst[13], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.75 : 0.00); + dst[14] = mix(dst[14], blendPix, (needBlend) ? ((doLineBlend) ? ((haveSteepLine) ? 1.00 : ((haveShallowLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); + dst[15] = mix(dst[15], blendPix, (needBlend) ? ((doLineBlend) ? 1.00 : 0.6848532563) : 0.00); + dst[ 4] = mix(dst[ 4], blendPix, (needBlend) ? ((doLineBlend) ? ((haveShallowLine) ? 1.00 : ((haveSteepLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); + dst[ 5] = mix(dst[ 5], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.75 : 0.00); + dst[ 6] = mix(dst[ 6], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.25 : 0.00); + } + + // select output pixel + vec4 res = mix(mix(mix(mix(dst[ 6], dst[ 7], step(0.25, f.x)), + mix(dst[ 8], dst[ 9], step(0.75, f.x)), + step(0.50, f.x)), + mix(mix(dst[ 5], dst[ 0], step(0.25, f.x)), + mix(dst[ 1], dst[10], step(0.75, f.x)), + step(0.50, f.x)), + step(0.25, f.y)), + mix(mix(mix(dst[ 4], dst[ 3], step(0.25, f.x)), + mix(dst[ 2], dst[11], step(0.75, f.x)), + step(0.50, f.x)), + mix(mix(dst[15], dst[14], step(0.25, f.x)), + mix(dst[13], dst[12], step(0.75, f.x)), + step(0.50, f.x)), + step(0.75, f.y)), + step(0.50, f.y)); + + return res; +} + +uint applyScalingu(uvec2 origxy, uvec2 xy) { + return packUnorm4x8(applyScalingf(origxy, xy)); +}