diff --git a/Common/MemoryUtil.h b/Common/MemoryUtil.h index 53c917b7b8eb..2807d12957fb 100644 --- a/Common/MemoryUtil.h +++ b/Common/MemoryUtil.h @@ -45,16 +45,21 @@ void FreeAlignedMemory(void* ptr); int GetMemoryProtectPageSize(); +// A simple buffer that bypasses the libc memory allocator. As a result the buffer is always page-aligned. template class SimpleBuf { public: - SimpleBuf() : buf_(0), size_(0) { - } + SimpleBuf() : buf_(0), size_(0) {} SimpleBuf(size_t size) : buf_(0) { resize(size); } + SimpleBuf(const SimpleBuf &o) : buf_(o.buf_), size_(o.size_) {} + + // Move constructor + SimpleBuf(SimpleBuf &&o) noexcept : buf_(o.buf_), size_(o.size_) { o.buf_ = nullptr; o.size_ = 0; } + ~SimpleBuf() { if (buf_ != 0) { FreeMemoryPages(buf_, size_ * sizeof(T)); diff --git a/Core/TextureReplacer.cpp b/Core/TextureReplacer.cpp index 525cabe52cf1..aa709b12eefe 100644 --- a/Core/TextureReplacer.cpp +++ b/Core/TextureReplacer.cpp @@ -33,6 +33,7 @@ #include "Common/StringUtils.h" #include "Common/Thread/ParallelLoop.h" #include "Common/Thread/Waitable.h" +#include "Common/Thread/ThreadManager.h" #include "Common/TimeUtil.h" #include "Core/Config.h" #include "Core/Host.h" @@ -502,6 +503,39 @@ static bool WriteTextureToPNG(png_imagep image, const Path &filename, int conver } } +class TextureSaveTask : public Task { +public: + // Could probably just use a vector. + SimpleBuf data; + + int w = 0; + int h = 0; + int pitch = 0; // bytes + + Path path; + u32 replacedInfoHash; + + TextureSaveTask(SimpleBuf _data) : data(std::move(_data)) {} + + TaskType Type() const override { return TaskType::CPU_COMPUTE; } // Also I/O blocking but dominated by compute + void Run() override { + png_image png; + memset(&png, 0, sizeof(png)); + png.version = PNG_IMAGE_VERSION; + png.format = PNG_FORMAT_RGBA; + png.width = w; + png.height = h; + bool success = WriteTextureToPNG(&png, path, 0, data.data(), pitch, nullptr); + png_image_free(&png); + if (png.warning_or_error >= 2) { + ERROR_LOG(COMMON, "Saving screenshot to PNG produced errors."); + } else if (success) { + NOTICE_LOG(G3D, "Saving texture for replacement: %08x / %dx%d", replacedInfoHash, w, h); + } + } +}; + + void TextureReplacer::NotifyTextureDecoded(const ReplacedTextureDecodeInfo &replacedInfo, const void *data, int pitch, int level, int w, int h) { _assert_msg_(enabled_, "Replacement not enabled"); if (!g_Config.bSaveNewTextures) { @@ -564,6 +598,10 @@ void TextureReplacer::NotifyTextureDecoded(const ReplacedTextureDecodeInfo &repl h = lookupH * replacedInfo.scaleFactor; } + SimpleBuf saveBuf; + + // TODO: Move the color conversion to the thread as well. + // Actually may be better to re-decode using expand32? if (replacedInfo.fmt != ReplacedTextureFormat::F_8888) { saveBuf.resize((pitch * h) / sizeof(u16)); switch (replacedInfo.fmt) { @@ -598,24 +636,27 @@ void TextureReplacer::NotifyTextureDecoded(const ReplacedTextureDecodeInfo &repl // We doubled our pitch. pitch *= 2; } + } else { + // Copy data to a buffer so we can send it to the thread. Might as well compact-away the pitch + // while we're at it. + saveBuf.resize(w * h); + for (int y = 0; y < h; y++) { + memcpy((u8 *)saveBuf.data() + y * w * 4, (const u8 *)data + y * pitch, w * sizeof(u32)); + } + pitch = w * 4; } - png_image png; - memset(&png, 0, sizeof(png)); - png.version = PNG_IMAGE_VERSION; - png.format = PNG_FORMAT_RGBA; - png.width = w; - png.height = h; - bool success = WriteTextureToPNG(&png, saveFilename, 0, data, pitch, nullptr); - png_image_free(&png); - - if (png.warning_or_error >= 2) { - ERROR_LOG(COMMON, "Saving screenshot to PNG produced errors."); - } else if (success) { - NOTICE_LOG(G3D, "Saving texture for replacement: %08x / %dx%d", replacedInfo.hash, w, h); - } + TextureSaveTask *task = new TextureSaveTask(std::move(saveBuf)); + // Should probably do a proper move constructor but this'll work. + task->w = w; + task->h = h; + task->pitch = pitch; + task->path = saveFilename; + task->replacedInfoHash = replacedInfo.hash; + g_threadManager.EnqueueTask(task); // We don't care about waiting for the task. It'll be fine. // Remember that we've saved this for next time. + // Should be OK that the actual disk write may not be finished yet. ReplacedTextureLevel saved; saved.fmt = ReplacedTextureFormat::F_8888; saved.file = filename; diff --git a/Core/TextureReplacer.h b/Core/TextureReplacer.h index 66407e119525..76bcf96b09ef 100644 --- a/Core/TextureReplacer.h +++ b/Core/TextureReplacer.h @@ -227,7 +227,6 @@ class TextureReplacer { void PopulateReplacement(ReplacedTexture *result, u64 cachekey, u32 hash, int w, int h); bool PopulateLevel(ReplacedTextureLevel &level); - SimpleBuf saveBuf; bool enabled_ = false; bool allowVideo_ = false; bool ignoreAddress_ = false; diff --git a/GPU/D3D11/TextureCacheD3D11.cpp b/GPU/D3D11/TextureCacheD3D11.cpp index 24be25aca8d9..58a38cb6776d 100644 --- a/GPU/D3D11/TextureCacheD3D11.cpp +++ b/GPU/D3D11/TextureCacheD3D11.cpp @@ -736,6 +736,7 @@ void TextureCacheD3D11::LoadTextureLevel(TexCacheEntry &entry, ReplacedTexture & replacedInfo.scaleFactor = scaleFactor; replacedInfo.fmt = FromD3D11Format(dstFmt); + // NOTE: Reading the decoded texture here may be very slow, if we just wrote it to write-combined memory. replacer_.NotifyTextureDecoded(replacedInfo, pixelData, decPitch, level, w, h); } } diff --git a/GPU/Vulkan/TextureCacheVulkan.cpp b/GPU/Vulkan/TextureCacheVulkan.cpp index 29f06d09ee92..64ee0aba0617 100644 --- a/GPU/Vulkan/TextureCacheVulkan.cpp +++ b/GPU/Vulkan/TextureCacheVulkan.cpp @@ -880,6 +880,7 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) { // When hardware texture scaling is enabled, this saves the original. int w = dataScaled ? mipWidth : mipUnscaledWidth; int h = dataScaled ? mipHeight : mipUnscaledHeight; + // NOTE: Reading the decoded texture here may be very slow, if we just wrote it to write-combined memory. replacer_.NotifyTextureDecoded(replacedInfo, data, stride, i, w, h); } }