Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove the vertex cache option #18339

Merged
merged 3 commits into from
Oct 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 0 additions & 121 deletions Common/GPU/Vulkan/VulkanMemory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,127 +35,6 @@ using namespace PPSSPP_VK;
// Always keep around push buffers at least this long (seconds).
static const double PUSH_GARBAGE_COLLECTION_DELAY = 10.0;

VulkanPushBuffer::VulkanPushBuffer(VulkanContext *vulkan, const char *name, size_t size, VkBufferUsageFlags usage)
: vulkan_(vulkan), name_(name), size_(size), usage_(usage) {
RegisterGPUMemoryManager(this);
bool res = AddBuffer();
_assert_(res);
}

VulkanPushBuffer::~VulkanPushBuffer() {
UnregisterGPUMemoryManager(this);
_dbg_assert_(!writePtr_);
_assert_(buffers_.empty());
}

bool VulkanPushBuffer::AddBuffer() {
BufInfo info;
VkDevice device = vulkan_->GetDevice();

VkBufferCreateInfo b{ VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
b.size = size_;
b.flags = 0;
b.usage = usage_;
b.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
b.queueFamilyIndexCount = 0;
b.pQueueFamilyIndices = nullptr;

VmaAllocationCreateInfo allocCreateInfo{};
allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
VmaAllocationInfo allocInfo{};

VkResult res = vmaCreateBuffer(vulkan_->Allocator(), &b, &allocCreateInfo, &info.buffer, &info.allocation, &allocInfo);
if (VK_SUCCESS != res) {
_assert_msg_(false, "vkCreateBuffer failed! result=%d", (int)res);
return false;
}

vulkan_->SetDebugName(info.buffer, VK_OBJECT_TYPE_BUFFER, name_);

buffers_.push_back(info);
buf_ = buffers_.size() - 1;
return true;
}

void VulkanPushBuffer::Destroy(VulkanContext *vulkan) {
_dbg_assert_(!writePtr_);
for (BufInfo &info : buffers_) {
vulkan->Delete().QueueDeleteBufferAllocation(info.buffer, info.allocation);
}
buffers_.clear();
}

void VulkanPushBuffer::NextBuffer(size_t minSize) {
// First, unmap the current memory.
Unmap();

buf_++;
if (buf_ >= buffers_.size() || minSize > size_) {
// Before creating the buffer, adjust to the new size_ if necessary.
while (size_ < minSize) {
size_ <<= 1;
}

bool res = AddBuffer();
_assert_(res);
if (!res) {
// Let's try not to crash at least?
buf_ = 0;
}
}

// Now, move to the next buffer and map it.
offset_ = 0;
Map();
}

void VulkanPushBuffer::Defragment(VulkanContext *vulkan) {
if (buffers_.size() <= 1) {
return;
}

// Okay, we have more than one. Destroy them all and start over with a larger one.
size_t newSize = size_ * buffers_.size();
Destroy(vulkan);

size_ = newSize;
bool res = AddBuffer();
_assert_(res);
}

size_t VulkanPushBuffer::GetTotalSize() const {
size_t sum = 0;
if (buffers_.size() > 1)
sum += size_ * (buffers_.size() - 1);
sum += offset_;
return sum;
}

void VulkanPushBuffer::GetDebugString(char *buffer, size_t bufSize) const {
size_t sum = 0;
if (buffers_.size() > 1)
sum += size_ * (buffers_.size() - 1);
sum += offset_;
size_t capacity = size_ * buffers_.size();
snprintf(buffer, bufSize, "Push %s: %s / %s", name_, NiceSizeFormat(sum).c_str(), NiceSizeFormat(capacity).c_str());
}

void VulkanPushBuffer::Map() {
_dbg_assert_(!writePtr_);
VkResult res = vmaMapMemory(vulkan_->Allocator(), buffers_[buf_].allocation, (void **)(&writePtr_));
_dbg_assert_(writePtr_);
_assert_(VK_SUCCESS == res);
}

void VulkanPushBuffer::Unmap() {
_dbg_assert_msg_(writePtr_ != nullptr, "VulkanPushBuffer::Unmap: writePtr_ null here means we have a bug (map/unmap mismatch)");
if (!writePtr_)
return;

vmaUnmapMemory(vulkan_->Allocator(), buffers_[buf_].allocation);
writePtr_ = nullptr;
}

VulkanPushPool::VulkanPushPool(VulkanContext *vulkan, const char *name, size_t originalBlockSize, VkBufferUsageFlags usage)
: vulkan_(vulkan), name_(name), originalBlockSize_(originalBlockSize), usage_(usage) {
RegisterGPUMemoryManager(this);
Expand Down
82 changes: 0 additions & 82 deletions Common/GPU/Vulkan/VulkanMemory.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,88 +16,6 @@ VK_DEFINE_HANDLE(VmaAllocation);
//
// Vulkan memory management utils.

// VulkanPushBuffer
// Simple incrementing allocator.
// Use these to push vertex, index and uniform data. Generally you'll have two or three of these
// and alternate on each frame. Make sure not to reset until the fence from the last time you used it
// has completed.
// NOTE: This has now been replaced with VulkanPushPool for all uses except the vertex cache.
class VulkanPushBuffer : public GPUMemoryManager {
struct BufInfo {
VkBuffer buffer;
VmaAllocation allocation;
};

public:
// NOTE: If you create a push buffer with PushBufferType::GPU_ONLY,
// then you can't use any of the push functions as pointers will not be reachable from the CPU.
// You must in this case use Allocate() only, and pass the returned offset and the VkBuffer to Vulkan APIs.
VulkanPushBuffer(VulkanContext *vulkan, const char *name, size_t size, VkBufferUsageFlags usage);
~VulkanPushBuffer();

void Destroy(VulkanContext *vulkan);

void Reset() { offset_ = 0; }

void GetDebugString(char *buffer, size_t bufSize) const override;
const char *Name() const override {
return name_;
}

// Needs context in case of defragment.
void Begin(VulkanContext *vulkan) {
buf_ = 0;
offset_ = 0;
// Note: we must defrag because some buffers may be smaller than size_.
Defragment(vulkan);
Map();
}

void BeginNoReset() { Map(); }
void End() { Unmap(); }

void Map();
void Unmap();

// When using the returned memory, make sure to bind the returned vkbuf.
uint8_t *Allocate(VkDeviceSize numBytes, VkDeviceSize alignment, VkBuffer *vkbuf, uint32_t *bindOffset) {
size_t offset = (offset_ + alignment - 1) & ~(alignment - 1);
if (offset + numBytes > size_) {
NextBuffer(numBytes);
offset = offset_;
}
offset_ = offset + numBytes;
*bindOffset = (uint32_t)offset;
*vkbuf = buffers_[buf_].buffer;
return writePtr_ + offset;
}

VkDeviceSize Push(const void *data, VkDeviceSize numBytes, int alignment, VkBuffer *vkbuf) {
uint32_t bindOffset;
uint8_t *ptr = Allocate(numBytes, alignment, vkbuf, &bindOffset);
memcpy(ptr, data, numBytes);
return bindOffset;
}

size_t GetOffset() const { return offset_; }
size_t GetTotalSize() const;

private:
bool AddBuffer();
void NextBuffer(size_t minSize);
void Defragment(VulkanContext *vulkan);

VulkanContext *vulkan_;

std::vector<BufInfo> buffers_;
size_t buf_ = 0;
size_t offset_ = 0;
size_t size_ = 0;
uint8_t *writePtr_ = nullptr;
VkBufferUsageFlags usage_;
const char *name_;
};

// Simple memory pushbuffer pool that can share blocks between the "frames", to reduce the impact of push memory spikes -
// a later frame can gobble up redundant buffers from an earlier frame even if they don't share frame index.
// NOT thread safe! Can only be used from one thread (our main thread).
Expand Down
1 change: 0 additions & 1 deletion Core/Config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -590,7 +590,6 @@ static const ConfigSetting graphicsSettings[] = {
ConfigSetting("AnisotropyLevel", &g_Config.iAnisotropyLevel, 4, CfgFlag::PER_GAME),
ConfigSetting("MultiSampleLevel", &g_Config.iMultiSampleLevel, 0, CfgFlag::PER_GAME), // Number of samples is 1 << iMultiSampleLevel

ConfigSetting("VertexDecCache", &g_Config.bVertexCache, false, CfgFlag::PER_GAME | CfgFlag::REPORT),
ConfigSetting("TextureBackoffCache", &g_Config.bTextureBackoffCache, false, CfgFlag::PER_GAME | CfgFlag::REPORT),
ConfigSetting("VertexDecJit", &g_Config.bVertexDecoderJit, &DefaultCodeGen, CfgFlag::DONT_SAVE | CfgFlag::REPORT),

Expand Down
1 change: 0 additions & 1 deletion Core/Config.h
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,6 @@ struct Config {
float fUITint;
float fUISaturation;

bool bVertexCache;
bool bTextureBackoffCache;
bool bVertexDecoderJit;
bool bFullScreen;
Expand Down
100 changes: 0 additions & 100 deletions GPU/Common/DrawEngineCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -575,80 +575,6 @@ void DrawEngineCommon::ApplyFramebufferRead(FBOTexState *fboTexState) {
gstate_c.Dirty(DIRTY_SHADERBLEND);
}

inline u32 ComputeMiniHashRange(const void *ptr, size_t sz) {
// Switch to u32 units, and round up to avoid unaligned accesses.
// Probably doesn't matter if we skip the first few bytes in some cases.
const u32 *p = (const u32 *)(((uintptr_t)ptr + 3) & ~3);
sz >>= 2;

if (sz > 100) {
size_t step = sz / 4;
u32 hash = 0;
for (size_t i = 0; i < sz; i += step) {
hash += XXH3_64bits(p + i, 100);
}
return hash;
} else {
return p[0] + p[sz - 1];
}
}

u32 DrawEngineCommon::ComputeMiniHash() {
u32 fullhash = 0;
const int vertexSize = dec_->GetDecVtxFmt().stride;
const int indexSize = IndexSize(dec_->VertexType());

int step;
if (numDrawVerts_ < 3) {
step = 1;
} else if (numDrawVerts_ < 8) {
step = 4;
} else {
step = numDrawVerts_ / 8;
}
for (int i = 0; i < numDrawVerts_; i += step) {
const DeferredVerts &dc = drawVerts_[i];
fullhash += ComputeMiniHashRange((const u8 *)dc.verts + vertexSize * dc.indexLowerBound, vertexSize * (dc.indexUpperBound - dc.indexLowerBound));
}
for (int i = 0; i < numDrawInds_; i += step) {
const DeferredInds &di = drawInds_[i];
if (di.indexType != 0) {
fullhash += ComputeMiniHashRange(di.inds, indexSize * di.vertexCount);
}
}

return fullhash;
}

// Cheap bit scrambler from https://nullprogram.com/blog/2018/07/31/
inline uint32_t lowbias32_r(uint32_t x) {
x ^= x >> 16;
x *= 0x43021123U;
x ^= x >> 15 ^ x >> 30;
x *= 0x1d69e2a5U;
x ^= x >> 16;
return x;
}

uint32_t DrawEngineCommon::ComputeDrawcallsHash() const {
uint32_t dcid = 0;
for (int i = 0; i < numDrawVerts_; i++) {
u32 dhash = dcid;
dhash = __rotl(dhash ^ (u32)(uintptr_t)drawVerts_[i].verts, 13);
dhash = __rotl(dhash ^ (u32)drawInds_[i].vertexCount, 11);
dcid = lowbias32_r(dhash ^ (u32)drawInds_[i].prim);
}
for (int i = 0; i < numDrawInds_; i++) {
const DeferredInds &di = drawInds_[i];
u32 dhash = dcid;
if (di.indexType) {
dhash = __rotl(dhash ^ (u32)(uintptr_t)di.inds, 19);
dcid = lowbias32_r(__rotl(dhash ^ (u32)di.indexType, 7));
}
}
return dcid;
}

int DrawEngineCommon::ComputeNumVertsToDecode() const {
int sum = 0;
for (int i = 0; i < numDrawVerts_; i++) {
Expand All @@ -657,32 +583,6 @@ int DrawEngineCommon::ComputeNumVertsToDecode() const {
return sum;
}

uint64_t DrawEngineCommon::ComputeHash() {
uint64_t fullhash = 0;
const int vertexSize = dec_->GetDecVtxFmt().stride;

// TODO: Add some caps both for numDrawCalls_ and num verts to check?
// It is really very expensive to check all the vertex data so often.
for (int i = 0; i < numDrawVerts_; i++) {
const DeferredVerts &dv = drawVerts_[i];
int indexLowerBound = dv.indexLowerBound, indexUpperBound = dv.indexUpperBound;
fullhash += XXH3_64bits((const char *)dv.verts + vertexSize * indexLowerBound, vertexSize * (indexUpperBound - indexLowerBound));
}

for (int i = 0; i < numDrawInds_; i++) {
const DeferredInds &di = drawInds_[i];
if (di.indexType != 0) {
int indexSize = IndexSize(di.indexType << GE_VTYPE_IDX_SHIFT);
// Hm, we will miss some indices when combining above, but meh, it should be fine.
fullhash += XXH3_64bits((const char *)di.inds, indexSize * di.vertexCount);
}
}

// this looks utterly broken??
// fullhash += XXH3_64bits(&drawCalls_[0].uvScale, sizeof(drawCalls_[0].uvScale) * numDrawCalls_);
return fullhash;
}

int DrawEngineCommon::ExtendNonIndexedPrim(const uint32_t *cmd, const uint32_t *stall, u32 vertTypeID, bool clockwise, int *bytesRead, bool isTriangle) {
const uint32_t *start = cmd;
int prevDrawVerts = numDrawVerts_ - 1;
Expand Down
5 changes: 0 additions & 5 deletions GPU/Common/DrawEngineCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,10 +159,6 @@ class DrawEngineCommon {
// Preprocessing for spline/bezier
u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, int lowerBound, int upperBound, u32 vertType, int *vertexSize = nullptr);

// Utility for vertex caching
u32 ComputeMiniHash();
uint64_t ComputeHash();

int ComputeNumVertsToDecode() const;

void ApplyFramebufferRead(FBOTexState *fboTexState);
Expand Down Expand Up @@ -271,7 +267,6 @@ class DrawEngineCommon {
int numDrawInds_ = 0;
int vertexCountInDrawCalls_ = 0;

int decimationCounter_ = 0;
int decodeVertsCounter_ = 0;
int decodeIndsCounter_ = 0;

Expand Down
Loading