diff --git a/CMakeLists.txt b/CMakeLists.txt index 0a9179c1f92c..b0228ab24b18 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -566,6 +566,7 @@ add_library(Common STATIC Common/Data/Collections/FixedSizeQueue.h Common/Data/Collections/Hashmaps.h Common/Data/Collections/TinySet.h + Common/Data/Collections/FastVec.h Common/Data/Collections/ThreadSafeList.h Common/Data/Color/RGBAUtil.cpp Common/Data/Color/RGBAUtil.h diff --git a/Common/Common.vcxproj b/Common/Common.vcxproj index eec456c3f588..3ecf5c41ed92 100644 --- a/Common/Common.vcxproj +++ b/Common/Common.vcxproj @@ -859,6 +859,7 @@ + diff --git a/Common/Common.vcxproj.filters b/Common/Common.vcxproj.filters index 776cefaf35a7..719cbe3e64e6 100644 --- a/Common/Common.vcxproj.filters +++ b/Common/Common.vcxproj.filters @@ -938,6 +938,9 @@ GPU\OpenGL + + Data\Collections + @@ -1086,4 +1089,4 @@ ext\basis_universal - \ No newline at end of file + diff --git a/Common/Data/Collections/FastVec.h b/Common/Data/Collections/FastVec.h new file mode 100644 index 000000000000..41e18e3eac8d --- /dev/null +++ b/Common/Data/Collections/FastVec.h @@ -0,0 +1,137 @@ +#pragma once + +// Yet another replacement for std::vector, this time for use in graphics queues. +// Its major difference is that you can append uninitialized structures and initialize them after. +// This is not allows by std::vector but is very useful for our sometimes oversized unions. +// Also, copies during resize are done by memcpy, not by any move constructor or similar. + +#include +#include + +template +class FastVec { +public: + FastVec() {} + FastVec(size_t initialCapacity) { + capacity_ = initialCapacity; + data_ = (T *)malloc(initialCapacity * sizeof(T)); + } + ~FastVec() { if (data_) free(data_); } + + T &push_uninitialized() { + if (size_ < capacity_) { + size_++; + return data_[size_ - 1]; + } else { + ExtendByOne(); + return data_[size_ - 1]; + } + } + + void push_back(const T &t) { + T &dest = push_uninitialized(); + dest = t; + } + + // Move constructor + FastVec(FastVec &&other) { + data_ = other.data_; + size_ = other.size_; + capacity_ = other.capacity_; + other.data_ = nullptr; + other.size_ = 0; + other.capacity_ = 0; + } + + FastVec &operator=(FastVec &&other) { + if (this != &other) { + delete[] data_; + data_ = other.data_; + size_ = other.size_; + capacity_ = other.capacity_; + other.data_ = nullptr; + other.size_ = 0; + other.capacity_ = 0; + } + return *this; + } + + // No copy constructor. + FastVec(const FastVec &other) = delete; + FastVec &operator=(const FastVec &other) = delete; + + size_t size() const { return size_; } + size_t capacity() const { return capacity_; } + void clear() { size_ = 0; } + bool empty() const { return size_ == 0; } + + T *begin() { return data_; } + T *end() { return data_ + size_; } + const T *begin() const { return data_; } + const T *end() const { return data_ + size_; } + + // Out of bounds (past size() - 1) is undefined behavior. + T &operator[] (const size_t index) { return data_[index]; } + const T &operator[] (const size_t index) const { return data_[index]; } + T &at(const size_t index) { return data_[index]; } + const T &at(const size_t index) const { return data_[index]; } + + // These two are invalid if empty(). + const T &back() const { return (*this)[size() - 1]; } + const T &front() const { return (*this)[0]; } + + // Limited functionality for inserts and similar, add as needed. + T &insert(T *iter) { + int pos = iter - data_; + ExtendByOne(); + if (pos + 1 < size_) { + memmove(data_ + pos + 1, data_ + pos, (size_ - pos) * sizeof(T)); + } + return data_[pos]; + } + + void insert(T *destIter, const T *beginIter, const T *endIter) { + int pos = destIter - data_; + if (beginIter == endIter) + return; + size_t newItems = endIter - beginIter; + IncreaseCapacityTo(size_ + newItems); + memmove(data_ + pos + newItems, data_ + pos, (size_ - pos) * sizeof(T)); + memcpy(data_ + pos, beginIter, newItems * sizeof(T)); + size_ += newItems; + } + + void resize(size_t size) { + if (size < size_) { + size_ = size; + } else { + // TODO + } + } + +private: + void IncreaseCapacityTo(size_t newCapacity) { + if (newCapacity <= capacity_) + return; + T *oldData = data_; + data_ = (T *)malloc(sizeof(T) * newCapacity); + if (capacity_ != 0) { + memcpy(data_, oldData, sizeof(T) * size_); + free(oldData); + } + } + + void ExtendByOne() { + size_t newCapacity = capacity_ * 2; + if (newCapacity < 16) { + newCapacity = 16; + } + IncreaseCapacityTo(newCapacity); + size_++; + capacity_ = newCapacity; + } + + size_t size_ = 0; + size_t capacity_ = 0; + T *data_ = nullptr; +}; diff --git a/Common/GPU/OpenGL/GLQueueRunner.cpp b/Common/GPU/OpenGL/GLQueueRunner.cpp index c5d562a5dcc2..457f5d98e174 100644 --- a/Common/GPU/OpenGL/GLQueueRunner.cpp +++ b/Common/GPU/OpenGL/GLQueueRunner.cpp @@ -118,7 +118,7 @@ static std::string GetStereoBufferLayout(const char *uniformName) { else return "undefined"; } -void GLQueueRunner::RunInitSteps(const std::vector &steps, bool skipGLCalls) { +void GLQueueRunner::RunInitSteps(const FastVec &steps, bool skipGLCalls) { if (skipGLCalls) { // Some bookkeeping still needs to be done. for (size_t i = 0; i < steps.size(); i++) { @@ -700,7 +700,7 @@ void GLQueueRunner::RunSteps(const std::vector &steps, bool skipGLCal CHECK_GL_ERROR_IF_DEBUG(); size_t renderCount = 0; for (size_t i = 0; i < steps.size(); i++) { - const GLRStep &step = *steps[i]; + GLRStep &step = *steps[i]; #if !defined(USING_GLES2) if (useDebugGroups_) @@ -711,7 +711,7 @@ void GLQueueRunner::RunSteps(const std::vector &steps, bool skipGLCal case GLRStepType::RENDER: renderCount++; if (IsVREnabled()) { - GLRStep vrStep = step; + GLRStep &vrStep = step; PreprocessStepVR(&vrStep); PerformRenderPass(vrStep, renderCount == 1, renderCount == totalRenderCount); } else { diff --git a/Common/GPU/OpenGL/GLQueueRunner.h b/Common/GPU/OpenGL/GLQueueRunner.h index 9cdbfeee01ef..11d6e1acac7c 100644 --- a/Common/GPU/OpenGL/GLQueueRunner.h +++ b/Common/GPU/OpenGL/GLQueueRunner.h @@ -11,7 +11,7 @@ #include "Common/GPU/Shader.h" #include "Common/GPU/thin3d.h" #include "Common/Data/Collections/TinySet.h" - +#include "Common/Data/Collections/FastVec.h" struct GLRViewport { float x, y, w, h, minZ, maxZ; @@ -70,6 +70,7 @@ enum class GLRRenderCommand : uint8_t { // type field, smashed right after each other?) // Also, all GLenums are really only 16 bits. struct GLRRenderData { + GLRRenderData(GLRRenderCommand _cmd) : cmd(_cmd) {} GLRRenderCommand cmd; union { struct { @@ -301,7 +302,7 @@ enum { struct GLRStep { GLRStep(GLRStepType _type) : stepType(_type) {} GLRStepType stepType; - std::vector commands; + FastVec commands; TinySet dependencies; const char *tag; union { @@ -310,8 +311,6 @@ struct GLRStep { GLRRenderPassAction color; GLRRenderPassAction depth; GLRRenderPassAction stencil; - // Note: not accurate. - int numDraws; } render; struct { GLRFramebuffer *src; @@ -355,7 +354,7 @@ class GLQueueRunner { caps_ = caps; } - void RunInitSteps(const std::vector &steps, bool skipGLCalls); + void RunInitSteps(const FastVec &steps, bool skipGLCalls); void RunSteps(const std::vector &steps, bool skipGLCalls, bool keepSteps, bool useVR); diff --git a/Common/GPU/OpenGL/GLRenderManager.cpp b/Common/GPU/OpenGL/GLRenderManager.cpp index e851da860768..9ac14dfcec48 100644 --- a/Common/GPU/OpenGL/GLRenderManager.cpp +++ b/Common/GPU/OpenGL/GLRenderManager.cpp @@ -129,25 +129,24 @@ bool GLRenderManager::ThreadFrame() { return false; } - GLRRenderThreadTask task; + GLRRenderThreadTask *task = nullptr; // In case of syncs or other partial completion, we keep going until we complete a frame. while (true) { // Pop a task of the queue and execute it. // NOTE: We need to actually wait for a task, we can't just bail! - { std::unique_lock lock(pushMutex_); while (renderThreadQueue_.empty()) { pushCondVar_.wait(lock); } - task = renderThreadQueue_.front(); + task = std::move(renderThreadQueue_.front()); renderThreadQueue_.pop(); } // We got a task! We can now have pushMutex_ unlocked, allowing the host to // push more work when it feels like it, and just start working. - if (task.runType == GLRRunType::EXIT) { + if (task->runType == GLRRunType::EXIT) { // Oh, host wanted out. Let's leave, and also let's notify the host. // This is unlike Vulkan too which can just block on the thread existing. std::unique_lock lock(syncMutex_); @@ -157,11 +156,13 @@ bool GLRenderManager::ThreadFrame() { } // Render the scene. - VLOG(" PULL: Frame %d RUN (%0.3f)", task.frame, time_now_d()); - if (Run(task)) { + VLOG(" PULL: Frame %d RUN (%0.3f)", task->frame, time_now_d()); + if (Run(*task)) { // Swap requested, so we just bail the loop. + delete task; break; } + delete task; }; return true; @@ -174,9 +175,7 @@ void GLRenderManager::StopThread() { run_ = false; std::unique_lock lock(pushMutex_); - GLRRenderThreadTask exitTask{}; - exitTask.runType = GLRRunType::EXIT; - renderThreadQueue_.push(exitTask); + renderThreadQueue_.push(new GLRRenderThreadTask(GLRRunType::EXIT)); pushCondVar_.notify_one(); } else { WARN_LOG(G3D, "GL submission thread was already paused."); @@ -215,13 +214,11 @@ void GLRenderManager::BindFramebufferAsRenderTarget(GLRFramebuffer *fb, GLRRende step->render.color = color; step->render.depth = depth; step->render.stencil = stencil; - step->render.numDraws = 0; step->tag = tag; steps_.push_back(step); GLuint clearMask = 0; - GLRRenderData data; - data.cmd = GLRRenderCommand::CLEAR; + GLRRenderData data(GLRRenderCommand::CLEAR); if (color == GLRRenderPassAction::CLEAR) { clearMask |= GL_COLOR_BUFFER_BIT; data.clear.clearColor = clearColor; @@ -379,15 +376,14 @@ void GLRenderManager::Finish() { frameData_[curFrame].deleter.Take(deleter_); VLOG("PUSH: Finish, pushing task. curFrame = %d", curFrame); - GLRRenderThreadTask task; - task.frame = curFrame; - task.runType = GLRRunType::PRESENT; + GLRRenderThreadTask *task = new GLRRenderThreadTask(GLRRunType::PRESENT); + task->frame = curFrame; { std::unique_lock lock(pushMutex_); renderThreadQueue_.push(task); - renderThreadQueue_.back().initSteps = std::move(initSteps_); - renderThreadQueue_.back().steps = std::move(steps_); + renderThreadQueue_.back()->initSteps = std::move(initSteps_); + renderThreadQueue_.back()->steps = std::move(steps_); initSteps_.clear(); steps_.clear(); pushCondVar_.notify_one(); @@ -509,14 +505,13 @@ void GLRenderManager::FlushSync() { { VLOG("PUSH: Frame[%d].readyForRun = true (sync)", curFrame_); - GLRRenderThreadTask task; - task.frame = curFrame_; - task.runType = GLRRunType::SYNC; + GLRRenderThreadTask *task = new GLRRenderThreadTask(GLRRunType::SYNC); + task->frame = curFrame_; std::unique_lock lock(pushMutex_); renderThreadQueue_.push(task); - renderThreadQueue_.back().initSteps = std::move(initSteps_); - renderThreadQueue_.back().steps = std::move(steps_); + renderThreadQueue_.back()->initSteps = std::move(initSteps_); + renderThreadQueue_.back()->steps = std::move(steps_); pushCondVar_.notify_one(); steps_.clear(); } diff --git a/Common/GPU/OpenGL/GLRenderManager.h b/Common/GPU/OpenGL/GLRenderManager.h index 505e92aac29e..fcf0bfe7a5d8 100644 --- a/Common/GPU/OpenGL/GLRenderManager.h +++ b/Common/GPU/OpenGL/GLRenderManager.h @@ -203,14 +203,19 @@ enum class GLRRunType { class GLRenderManager; class GLPushBuffer; -// These are enqueued from the main thread, -// and the render thread pops them off +// These are enqueued from the main thread, and the render thread pops them off struct GLRRenderThreadTask { + GLRRenderThreadTask(GLRRunType _runType) : runType(_runType) {} + std::vector steps; - std::vector initSteps; + FastVec initSteps; - int frame; + int frame = -1; GLRRunType runType; + + // Avoid copying these by accident. + GLRRenderThreadTask(GLRRenderThreadTask &) = delete; + GLRRenderThreadTask& operator =(GLRRenderThreadTask &) = delete; }; // Note: The GLRenderManager is created and destroyed on the render thread, and the latter @@ -221,6 +226,9 @@ class GLRenderManager { GLRenderManager(); ~GLRenderManager(); + GLRenderManager(GLRenderManager &) = delete; + GLRenderManager &operator=(GLRenderManager &) = delete; + void SetInvalidationCallback(InvalidationCallback callback) { invalidationCallback_ = callback; } @@ -248,37 +256,37 @@ class GLRenderManager { // We pass in width/height here even though it's not strictly needed until we support glTextureStorage // and then we'll also need formats and stuff. GLRTexture *CreateTexture(GLenum target, int width, int height, int depth, int numMips) { - GLRInitStep step { GLRInitStepType::CREATE_TEXTURE }; + GLRInitStep &step = initSteps_.push_uninitialized(); + step.stepType = GLRInitStepType::CREATE_TEXTURE; step.create_texture.texture = new GLRTexture(caps_, width, height, depth, numMips); step.create_texture.texture->target = target; - initSteps_.push_back(step); return step.create_texture.texture; } GLRBuffer *CreateBuffer(GLuint target, size_t size, GLuint usage) { - GLRInitStep step{ GLRInitStepType::CREATE_BUFFER }; + GLRInitStep &step = initSteps_.push_uninitialized(); + step.stepType = GLRInitStepType::CREATE_BUFFER; step.create_buffer.buffer = new GLRBuffer(target, size); step.create_buffer.size = (int)size; step.create_buffer.usage = usage; - initSteps_.push_back(step); return step.create_buffer.buffer; } GLRShader *CreateShader(GLuint stage, const std::string &code, const std::string &desc) { - GLRInitStep step{ GLRInitStepType::CREATE_SHADER }; + GLRInitStep &step = initSteps_.push_uninitialized(); + step.stepType = GLRInitStepType::CREATE_SHADER; step.create_shader.shader = new GLRShader(); step.create_shader.shader->desc = desc; step.create_shader.stage = stage; step.create_shader.code = new char[code.size() + 1]; memcpy(step.create_shader.code, code.data(), code.size() + 1); - initSteps_.push_back(step); return step.create_shader.shader; } GLRFramebuffer *CreateFramebuffer(int width, int height, bool z_stencil) { - GLRInitStep step{ GLRInitStepType::CREATE_FRAMEBUFFER }; + GLRInitStep &step = initSteps_.push_uninitialized(); + step.stepType = GLRInitStepType::CREATE_FRAMEBUFFER; step.create_framebuffer.framebuffer = new GLRFramebuffer(caps_, width, height, z_stencil); - initSteps_.push_back(step); return step.create_framebuffer.framebuffer; } @@ -287,7 +295,8 @@ class GLRenderManager { GLRProgram *CreateProgram( std::vector shaders, std::vector semantics, std::vector queries, std::vector initializers, GLRProgramLocData *locData, const GLRProgramFlags &flags) { - GLRInitStep step{ GLRInitStepType::CREATE_PROGRAM }; + GLRInitStep &step = initSteps_.push_uninitialized(); + step.stepType = GLRInitStepType::CREATE_PROGRAM; _assert_(shaders.size() <= ARRAY_SIZE(step.create_program.shaders)); step.create_program.program = new GLRProgram(); step.create_program.program->semantics_ = semantics; @@ -311,18 +320,17 @@ class GLRenderManager { } #endif step.create_program.num_shaders = (int)shaders.size(); - initSteps_.push_back(step); return step.create_program.program; } GLRInputLayout *CreateInputLayout(const std::vector &entries) { - GLRInitStep step{ GLRInitStepType::CREATE_INPUT_LAYOUT }; + GLRInitStep &step = initSteps_.push_uninitialized(); + step.stepType = GLRInitStepType::CREATE_INPUT_LAYOUT; step.create_input_layout.inputLayout = new GLRInputLayout(); step.create_input_layout.inputLayout->entries = entries; for (auto &iter : step.create_input_layout.inputLayout->entries) { step.create_input_layout.inputLayout->semanticsMask_ |= 1 << iter.location; } - initSteps_.push_back(step); return step.create_input_layout.inputLayout; } @@ -395,7 +403,8 @@ class GLRenderManager { void BufferSubdata(GLRBuffer *buffer, size_t offset, size_t size, uint8_t *data, bool deleteData = true) { // TODO: Maybe should be a render command instead of an init command? When possible it's better as // an init command, that's for sure. - GLRInitStep step{ GLRInitStepType::BUFFER_SUBDATA }; + GLRInitStep &step = initSteps_.push_uninitialized(); + step.stepType = GLRInitStepType::BUFFER_SUBDATA; _dbg_assert_(offset >= 0); _dbg_assert_(offset <= buffer->size_ - size); step.buffer_subdata.buffer = buffer; @@ -403,12 +412,12 @@ class GLRenderManager { step.buffer_subdata.size = (int)size; step.buffer_subdata.data = data; step.buffer_subdata.deleteData = deleteData; - initSteps_.push_back(step); } // Takes ownership over the data pointer and delete[]-s it. void TextureImage(GLRTexture *texture, int level, int width, int height, int depth, Draw::DataFormat format, uint8_t *data, GLRAllocType allocType = GLRAllocType::NEW, bool linearFilter = false) { - GLRInitStep step{ GLRInitStepType::TEXTURE_IMAGE }; + GLRInitStep &step = initSteps_.push_uninitialized(); + step.stepType = GLRInitStepType::TEXTURE_IMAGE; step.texture_image.texture = texture; step.texture_image.data = data; step.texture_image.format = format; @@ -418,12 +427,11 @@ class GLRenderManager { step.texture_image.depth = depth; step.texture_image.allocType = allocType; step.texture_image.linearFilter = linearFilter; - initSteps_.push_back(step); } void TextureSubImage(int slot, GLRTexture *texture, int level, int x, int y, int width, int height, Draw::DataFormat format, uint8_t *data, GLRAllocType allocType = GLRAllocType::NEW) { _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); - GLRRenderData _data{ GLRRenderCommand::TEXTURE_SUBIMAGE }; + GLRRenderData _data(GLRRenderCommand::TEXTURE_SUBIMAGE); _data.texture_subimage.texture = texture; _data.texture_subimage.data = data; _data.texture_subimage.format = format; @@ -438,11 +446,11 @@ class GLRenderManager { } void FinalizeTexture(GLRTexture *texture, int loadedLevels, bool genMips) { - GLRInitStep step{ GLRInitStepType::TEXTURE_FINALIZE }; + GLRInitStep &step = initSteps_.push_uninitialized(); + step.stepType = GLRInitStepType::TEXTURE_FINALIZE; step.texture_finalize.texture = texture; step.texture_finalize.loadedLevels = loadedLevels; step.texture_finalize.genMips = genMips; - initSteps_.push_back(step); } void BindTexture(int slot, GLRTexture *tex) { @@ -453,18 +461,18 @@ class GLRenderManager { } _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); _dbg_assert_(slot < MAX_GL_TEXTURE_SLOTS); - GLRRenderData data{ GLRRenderCommand::BINDTEXTURE }; + GLRRenderData &data = curRenderStep_->commands.push_uninitialized(); + data.cmd = GLRRenderCommand::BINDTEXTURE; data.texture.slot = slot; data.texture.texture = tex; - curRenderStep_->commands.push_back(data); } void BindProgram(GLRProgram *program) { _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); - GLRRenderData data{ GLRRenderCommand::BINDPROGRAM }; + GLRRenderData &data = curRenderStep_->commands.push_uninitialized(); + data.cmd = GLRRenderCommand::BINDPROGRAM; _dbg_assert_(program != nullptr); data.program.program = program; - curRenderStep_->commands.push_back(data); #ifdef _DEBUG curProgram_ = program; #endif @@ -472,25 +480,25 @@ class GLRenderManager { void SetDepth(bool enabled, bool write, GLenum func) { _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); - GLRRenderData data{ GLRRenderCommand::DEPTH }; + GLRRenderData &data = curRenderStep_->commands.push_uninitialized(); + data.cmd = GLRRenderCommand::DEPTH; data.depth.enabled = enabled; data.depth.write = write; data.depth.func = func; - curRenderStep_->commands.push_back(data); } void SetViewport(const GLRViewport &vp) { _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); - GLRRenderData data{ GLRRenderCommand::VIEWPORT }; + GLRRenderData &data = curRenderStep_->commands.push_uninitialized(); + data.cmd = GLRRenderCommand::VIEWPORT; data.viewport.vp = vp; - curRenderStep_->commands.push_back(data); } void SetScissor(const GLRect2D &rc) { _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); - GLRRenderData data{ GLRRenderCommand::SCISSOR }; + GLRRenderData &data = curRenderStep_->commands.push_uninitialized(); + data.cmd = GLRRenderCommand::SCISSOR; data.scissor.rc = rc; - curRenderStep_->commands.push_back(data); } void SetUniformI(const GLint *loc, int count, const int *udata) { @@ -498,11 +506,12 @@ class GLRenderManager { #ifdef _DEBUG _dbg_assert_(curProgram_); #endif - GLRRenderData data{ GLRRenderCommand::UNIFORM4I }; + GLRRenderData &data = curRenderStep_->commands.push_uninitialized(); + data.cmd = GLRRenderCommand::UNIFORM4I; + data.uniform4.name = nullptr; data.uniform4.loc = loc; data.uniform4.count = count; memcpy(data.uniform4.v, udata, sizeof(int) * count); - curRenderStep_->commands.push_back(data); } void SetUniformI1(const GLint *loc, int udata) { @@ -510,11 +519,12 @@ class GLRenderManager { #ifdef _DEBUG _dbg_assert_(curProgram_); #endif - GLRRenderData data{ GLRRenderCommand::UNIFORM4I }; + GLRRenderData &data = curRenderStep_->commands.push_uninitialized(); + data.cmd = GLRRenderCommand::UNIFORM4I; + data.uniform4.name = nullptr; data.uniform4.loc = loc; data.uniform4.count = 1; memcpy(data.uniform4.v, &udata, sizeof(udata)); - curRenderStep_->commands.push_back(data); } void SetUniformUI(const GLint *loc, int count, const uint32_t *udata) { @@ -522,11 +532,12 @@ class GLRenderManager { #ifdef _DEBUG _dbg_assert_(curProgram_); #endif - GLRRenderData data{ GLRRenderCommand::UNIFORM4UI }; + GLRRenderData &data = curRenderStep_->commands.push_uninitialized(); + data.cmd = GLRRenderCommand::UNIFORM4UI; + data.uniform4.name = nullptr; data.uniform4.loc = loc; data.uniform4.count = count; memcpy(data.uniform4.v, udata, sizeof(uint32_t) * count); - curRenderStep_->commands.push_back(data); } void SetUniformUI1(const GLint *loc, uint32_t udata) { @@ -534,11 +545,12 @@ class GLRenderManager { #ifdef _DEBUG _dbg_assert_(curProgram_); #endif - GLRRenderData data{ GLRRenderCommand::UNIFORM4UI }; + GLRRenderData &data = curRenderStep_->commands.push_uninitialized(); + data.cmd = GLRRenderCommand::UNIFORM4UI; + data.uniform4.name = nullptr; data.uniform4.loc = loc; data.uniform4.count = 1; memcpy(data.uniform4.v, &udata, sizeof(udata)); - curRenderStep_->commands.push_back(data); } void SetUniformF(const GLint *loc, int count, const float *udata) { @@ -546,11 +558,12 @@ class GLRenderManager { #ifdef _DEBUG _dbg_assert_(curProgram_); #endif - GLRRenderData data{ GLRRenderCommand::UNIFORM4F }; + GLRRenderData &data = curRenderStep_->commands.push_uninitialized(); + data.cmd = GLRRenderCommand::UNIFORM4F; + data.uniform4.name = nullptr; data.uniform4.loc = loc; data.uniform4.count = count; memcpy(data.uniform4.v, udata, sizeof(float) * count); - curRenderStep_->commands.push_back(data); } void SetUniformF1(const GLint *loc, const float udata) { @@ -558,11 +571,12 @@ class GLRenderManager { #ifdef _DEBUG _dbg_assert_(curProgram_); #endif - GLRRenderData data{ GLRRenderCommand::UNIFORM4F }; + GLRRenderData &data = curRenderStep_->commands.push_uninitialized(); + data.cmd = GLRRenderCommand::UNIFORM4F; + data.uniform4.name = nullptr; data.uniform4.loc = loc; data.uniform4.count = 1; memcpy(data.uniform4.v, &udata, sizeof(float)); - curRenderStep_->commands.push_back(data); } void SetUniformF(const char *name, int count, const float *udata) { @@ -570,11 +584,12 @@ class GLRenderManager { #ifdef _DEBUG _dbg_assert_(curProgram_); #endif - GLRRenderData data{ GLRRenderCommand::UNIFORM4F }; + GLRRenderData &data = curRenderStep_->commands.push_uninitialized(); + data.cmd = GLRRenderCommand::UNIFORM4F; data.uniform4.name = name; + data.uniform4.loc = nullptr; data.uniform4.count = count; memcpy(data.uniform4.v, udata, sizeof(float) * count); - curRenderStep_->commands.push_back(data); } void SetUniformM4x4(const GLint *loc, const float *udata) { @@ -582,10 +597,11 @@ class GLRenderManager { #ifdef _DEBUG _dbg_assert_(curProgram_); #endif - GLRRenderData data{ GLRRenderCommand::UNIFORMMATRIX }; + GLRRenderData &data = curRenderStep_->commands.push_uninitialized(); + data.cmd = GLRRenderCommand::UNIFORMMATRIX; + data.uniformMatrix4.name = nullptr; data.uniformMatrix4.loc = loc; memcpy(data.uniformMatrix4.m, udata, sizeof(float) * 16); - curRenderStep_->commands.push_back(data); } void SetUniformM4x4Stereo(const char *name, const GLint *loc, const float *left, const float *right) { @@ -593,13 +609,13 @@ class GLRenderManager { #ifdef _DEBUG _dbg_assert_(curProgram_); #endif - GLRRenderData data{ GLRRenderCommand::UNIFORMSTEREOMATRIX }; + GLRRenderData &data = curRenderStep_->commands.push_uninitialized(); + data.cmd = GLRRenderCommand::UNIFORMSTEREOMATRIX; data.uniformStereoMatrix4.name = name; data.uniformStereoMatrix4.loc = loc; data.uniformStereoMatrix4.mData = new float[32]; memcpy(&data.uniformStereoMatrix4.mData[0], left, sizeof(float) * 16); memcpy(&data.uniformStereoMatrix4.mData[16], right, sizeof(float) * 16); - curRenderStep_->commands.push_back(data); } void SetUniformM4x4(const char *name, const float *udata) { @@ -607,17 +623,19 @@ class GLRenderManager { #ifdef _DEBUG _dbg_assert_(curProgram_); #endif - GLRRenderData data{ GLRRenderCommand::UNIFORMMATRIX }; + GLRRenderData &data = curRenderStep_->commands.push_uninitialized(); + data.cmd = GLRRenderCommand::UNIFORMMATRIX; data.uniformMatrix4.name = name; + data.uniformMatrix4.loc = nullptr; memcpy(data.uniformMatrix4.m, udata, sizeof(float) * 16); - curRenderStep_->commands.push_back(data); } void SetBlendAndMask(int colorMask, bool blendEnabled, GLenum srcColor, GLenum dstColor, GLenum srcAlpha, GLenum dstAlpha, GLenum funcColor, GLenum funcAlpha) { // Make this one only a non-debug _assert_, since it often comes first. // Lets us collect info about this potential crash through assert extra data. _assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); - GLRRenderData data{ GLRRenderCommand::BLEND }; + GLRRenderData &data = curRenderStep_->commands.push_uninitialized(); + data.cmd = GLRRenderCommand::BLEND; data.blend.mask = colorMask; data.blend.enabled = blendEnabled; data.blend.srcColor = srcColor; @@ -626,96 +644,95 @@ class GLRenderManager { data.blend.dstAlpha = dstAlpha; data.blend.funcColor = funcColor; data.blend.funcAlpha = funcAlpha; - curRenderStep_->commands.push_back(data); } void SetNoBlendAndMask(int colorMask) { _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); - GLRRenderData data{ GLRRenderCommand::BLEND }; + GLRRenderData &data = curRenderStep_->commands.push_uninitialized(); + data.cmd = GLRRenderCommand::BLEND; data.blend.mask = colorMask; data.blend.enabled = false; - curRenderStep_->commands.push_back(data); } #ifndef USING_GLES2 void SetLogicOp(bool enabled, GLenum logicOp) { _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); - GLRRenderData data{ GLRRenderCommand::LOGICOP }; + GLRRenderData &data = curRenderStep_->commands.push_uninitialized(); + data.cmd = GLRRenderCommand::LOGICOP; data.logic.enabled = enabled; data.logic.logicOp = logicOp; - curRenderStep_->commands.push_back(data); } #endif void SetStencilFunc(bool enabled, GLenum func, uint8_t refValue, uint8_t compareMask) { _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); - GLRRenderData data{ GLRRenderCommand::STENCILFUNC }; + GLRRenderData &data = curRenderStep_->commands.push_uninitialized(); + data.cmd = GLRRenderCommand::STENCILFUNC; data.stencilFunc.enabled = enabled; data.stencilFunc.func = func; data.stencilFunc.ref = refValue; data.stencilFunc.compareMask = compareMask; - curRenderStep_->commands.push_back(data); } void SetStencilOp(uint8_t writeMask, GLenum sFail, GLenum zFail, GLenum pass) { _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); - GLRRenderData data{ GLRRenderCommand::STENCILOP }; + GLRRenderData &data = curRenderStep_->commands.push_uninitialized(); + data.cmd = GLRRenderCommand::STENCILOP; data.stencilOp.writeMask = writeMask; data.stencilOp.sFail = sFail; data.stencilOp.zFail = zFail; data.stencilOp.pass = pass; - curRenderStep_->commands.push_back(data); } void SetStencilDisabled() { _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); - GLRRenderData data; + GLRRenderData &data = curRenderStep_->commands.push_uninitialized(); data.cmd = GLRRenderCommand::STENCILFUNC; data.stencilFunc.enabled = false; - curRenderStep_->commands.push_back(data); + // When enabled = false, the others aren't read so we don't zero-initialize them. } void SetBlendFactor(const float color[4]) { _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); - GLRRenderData data{ GLRRenderCommand::BLENDCOLOR }; + GLRRenderData &data = curRenderStep_->commands.push_uninitialized(); + data.cmd = GLRRenderCommand::BLENDCOLOR; CopyFloat4(data.blendColor.color, color); - curRenderStep_->commands.push_back(data); } void SetRaster(GLboolean cullEnable, GLenum frontFace, GLenum cullFace, GLboolean ditherEnable, GLboolean depthClamp) { _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); - GLRRenderData data{ GLRRenderCommand::RASTER }; + GLRRenderData &data = curRenderStep_->commands.push_uninitialized(); + data.cmd = GLRRenderCommand::RASTER; data.raster.cullEnable = cullEnable; data.raster.frontFace = frontFace; data.raster.cullFace = cullFace; data.raster.ditherEnable = ditherEnable; data.raster.depthClampEnable = depthClamp; - curRenderStep_->commands.push_back(data); } // Modifies the current texture as per GL specs, not global state. void SetTextureSampler(int slot, GLenum wrapS, GLenum wrapT, GLenum magFilter, GLenum minFilter, float anisotropy) { _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); _dbg_assert_(slot < MAX_GL_TEXTURE_SLOTS); - GLRRenderData data{ GLRRenderCommand::TEXTURESAMPLER }; + GLRRenderData &data = curRenderStep_->commands.push_uninitialized(); + data.cmd = GLRRenderCommand::TEXTURESAMPLER; data.textureSampler.slot = slot; data.textureSampler.wrapS = wrapS; data.textureSampler.wrapT = wrapT; data.textureSampler.magFilter = magFilter; data.textureSampler.minFilter = minFilter; data.textureSampler.anisotropy = anisotropy; - curRenderStep_->commands.push_back(data); } void SetTextureLod(int slot, float minLod, float maxLod, float lodBias) { _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); _dbg_assert_(slot < MAX_GL_TEXTURE_SLOTS); - GLRRenderData data{ GLRRenderCommand::TEXTURELOD}; + GLRRenderData &data = curRenderStep_->commands.push_uninitialized(); + data.cmd = GLRRenderCommand::TEXTURELOD; data.textureLod.slot = slot; data.textureLod.minLod = minLod; data.textureLod.maxLod = maxLod; data.textureLod.lodBias = lodBias; - curRenderStep_->commands.push_back(data); } // If scissorW == 0, no scissor is applied (the whole render target is cleared). @@ -723,7 +740,8 @@ class GLRenderManager { _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); if (!clearMask) return; - GLRRenderData data{ GLRRenderCommand::CLEAR }; + GLRRenderData &data = curRenderStep_->commands.push_uninitialized(); + data.cmd = GLRRenderCommand::CLEAR; data.clear.clearMask = clearMask; data.clear.clearColor = clearColor; data.clear.clearZ = clearZ; @@ -733,12 +751,12 @@ class GLRenderManager { data.clear.scissorY = scissorY; data.clear.scissorW = scissorW; data.clear.scissorH = scissorH; - curRenderStep_->commands.push_back(data); } void Draw(GLRInputLayout *inputLayout, GLRBuffer *buffer, size_t offset, GLenum mode, int first, int count) { _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); - GLRRenderData data{ GLRRenderCommand::DRAW }; + GLRRenderData &data = curRenderStep_->commands.push_uninitialized(); + data.cmd = GLRRenderCommand::DRAW; data.draw.inputLayout = inputLayout; data.draw.offset = offset; data.draw.buffer = buffer; @@ -747,13 +765,12 @@ class GLRenderManager { data.draw.first = first; data.draw.count = count; data.draw.indexType = 0; - curRenderStep_->commands.push_back(data); - curRenderStep_->render.numDraws++; } void DrawIndexed(GLRInputLayout *inputLayout, GLRBuffer *buffer, size_t offset, GLRBuffer *indexBuffer, GLenum mode, int count, GLenum indexType, void *indices, int instances = 1) { _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); - GLRRenderData data{ GLRRenderCommand::DRAW }; + GLRRenderData &data = curRenderStep_->commands.push_uninitialized(); + data.cmd = GLRRenderCommand::DRAW; data.draw.inputLayout = inputLayout; data.draw.offset = offset; data.draw.buffer = buffer; @@ -763,8 +780,6 @@ class GLRenderManager { data.draw.indexType = indexType; data.draw.indices = indices; data.draw.instances = instances; - curRenderStep_->commands.push_back(data); - curRenderStep_->render.numDraws++; } enum { MAX_INFLIGHT_FRAMES = 3 }; @@ -847,7 +862,7 @@ class GLRenderManager { GLRStep *curRenderStep_ = nullptr; std::vector steps_; - std::vector initSteps_; + FastVec initSteps_; // Execution time state bool run_ = true; @@ -859,7 +874,7 @@ class GLRenderManager { std::mutex pushMutex_; std::condition_variable pushCondVar_; - std::queue renderThreadQueue_; + std::queue renderThreadQueue_; // For readbacks and other reasons we need to sync with the render thread. std::mutex syncMutex_; diff --git a/Common/GPU/Vulkan/VulkanQueueRunner.cpp b/Common/GPU/Vulkan/VulkanQueueRunner.cpp index 3e2c26934e70..e7670e99445f 100644 --- a/Common/GPU/Vulkan/VulkanQueueRunner.cpp +++ b/Common/GPU/Vulkan/VulkanQueueRunner.cpp @@ -481,7 +481,7 @@ void VulkanQueueRunner::ApplyMGSHack(std::vector &steps) { last = j - 1; // should really also check descriptor sets... if (steps[j]->commands.size()) { - VkRenderData &cmd = steps[j]->commands.back(); + const VkRenderData &cmd = steps[j]->commands.back(); if (cmd.cmd == VKRRenderCommand::DRAW_INDEXED && cmd.draw.count != 6) last = j - 1; } @@ -1241,7 +1241,7 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c VKRGraphicsPipeline *lastGraphicsPipeline = nullptr; VKRComputePipeline *lastComputePipeline = nullptr; - auto &commands = step.commands; + const auto &commands = step.commands; // We can do a little bit of state tracking here to eliminate some calls into the driver. // The stencil ones are very commonly mostly redundant so let's eliminate them where possible. diff --git a/Common/GPU/Vulkan/VulkanQueueRunner.h b/Common/GPU/Vulkan/VulkanQueueRunner.h index 6df7dfc68103..45af407cf538 100644 --- a/Common/GPU/Vulkan/VulkanQueueRunner.h +++ b/Common/GPU/Vulkan/VulkanQueueRunner.h @@ -6,6 +6,7 @@ #include "Common/Thread/Promise.h" #include "Common/Data/Collections/Hashmaps.h" +#include "Common/Data/Collections/FastVec.h" #include "Common/GPU/Vulkan/VulkanContext.h" #include "Common/GPU/Vulkan/VulkanBarrier.h" #include "Common/GPU/Vulkan/VulkanFrameData.h" @@ -153,7 +154,7 @@ struct VKRStep { ~VKRStep() {} VKRStepType stepType; - std::vector commands; + FastVec commands; TinySet preTransitions; TinySet dependencies; const char *tag; @@ -212,9 +213,14 @@ struct VKRStep { // These are enqueued from the main thread, // and the render thread pops them off struct VKRRenderThreadTask { + VKRRenderThreadTask(VKRRunType _runType) : runType(_runType) {} std::vector steps; - int frame; + int frame = -1; VKRRunType runType; + + // Avoid copying these by accident. + VKRRenderThreadTask(VKRRenderThreadTask &) = delete; + VKRRenderThreadTask &operator =(VKRRenderThreadTask &) = delete; }; class VulkanQueueRunner { diff --git a/Common/GPU/Vulkan/VulkanRenderManager.cpp b/Common/GPU/Vulkan/VulkanRenderManager.cpp index 7ab778d8c288..287eaf236575 100644 --- a/Common/GPU/Vulkan/VulkanRenderManager.cpp +++ b/Common/GPU/Vulkan/VulkanRenderManager.cpp @@ -308,9 +308,8 @@ bool VulkanRenderManager::CreateBackbuffers() { void VulkanRenderManager::StopThread() { { // Tell the render thread to quit when it's done. - VKRRenderThreadTask task; - task.frame = vulkan_->GetCurFrame(); - task.runType = VKRRunType::EXIT; + VKRRenderThreadTask *task = new VKRRenderThreadTask(VKRRunType::EXIT); + task->frame = vulkan_->GetCurFrame(); std::unique_lock lock(pushMutex_); renderThreadQueue_.push(task); pushCondVar_.notify_one(); @@ -494,7 +493,7 @@ void VulkanRenderManager::ThreadFunc() { SetCurrentThreadName("RenderMan"); while (true) { // Pop a task of the queue and execute it. - VKRRenderThreadTask task; + VKRRenderThreadTask *task = nullptr; { std::unique_lock lock(pushMutex_); while (renderThreadQueue_.empty()) { @@ -506,12 +505,13 @@ void VulkanRenderManager::ThreadFunc() { // Oh, we got a task! We can now have pushMutex_ unlocked, allowing the host to // push more work when it feels like it, and just start working. - if (task.runType == VKRRunType::EXIT) { + if (task->runType == VKRRunType::EXIT) { // Oh, host wanted out. Let's leave. break; } - Run(task); + Run(*task); + delete task; } // Wait for the device to be done with everything, before tearing stuff down. @@ -999,7 +999,7 @@ void VulkanRenderManager::CopyImageToMemorySync(VkImage image, int mipLevel, int queueRunner_.CopyReadbackBuffer(frameData_[vulkan_->GetCurFrame()], nullptr, w, h, destFormat, destFormat, pixelStride, pixels); } -static void RemoveDrawCommands(std::vector *cmds) { +static void RemoveDrawCommands(FastVec *cmds) { // Here we remove any DRAW type commands when we hit a CLEAR. for (auto &c : *cmds) { if (c.cmd == VKRRenderCommand::DRAW || c.cmd == VKRRenderCommand::DRAW_INDEXED) { @@ -1008,7 +1008,7 @@ static void RemoveDrawCommands(std::vector *cmds) { } } -static void CleanupRenderCommands(std::vector *cmds) { +static void CleanupRenderCommands(FastVec *cmds) { size_t lastCommand[(int)VKRRenderCommand::NUM_RENDER_COMMANDS]; memset(lastCommand, -1, sizeof(lastCommand)); @@ -1266,13 +1266,12 @@ void VulkanRenderManager::Finish() { FrameData &frameData = frameData_[curFrame]; VLOG("PUSH: Frame[%d]", curFrame); - VKRRenderThreadTask task; - task.frame = curFrame; - task.runType = VKRRunType::PRESENT; + VKRRenderThreadTask *task = new VKRRenderThreadTask(VKRRunType::PRESENT); + task->frame = curFrame; { std::unique_lock lock(pushMutex_); renderThreadQueue_.push(task); - renderThreadQueue_.back().steps = std::move(steps_); + renderThreadQueue_.back()->steps = std::move(steps_); pushCondVar_.notify_one(); } @@ -1382,12 +1381,11 @@ void VulkanRenderManager::FlushSync() { { VLOG("PUSH: Frame[%d]", curFrame); - VKRRenderThreadTask task; - task.frame = curFrame; - task.runType = VKRRunType::SYNC; + VKRRenderThreadTask *task = new VKRRenderThreadTask(VKRRunType::SYNC); + task->frame = curFrame; std::unique_lock lock(pushMutex_); renderThreadQueue_.push(task); - renderThreadQueue_.back().steps = std::move(steps_); + renderThreadQueue_.back()->steps = std::move(steps_); pushCondVar_.notify_one(); } diff --git a/Common/GPU/Vulkan/VulkanRenderManager.h b/Common/GPU/Vulkan/VulkanRenderManager.h index b0ca50d4bfa5..c1bb34234aca 100644 --- a/Common/GPU/Vulkan/VulkanRenderManager.h +++ b/Common/GPU/Vulkan/VulkanRenderManager.h @@ -240,7 +240,8 @@ class VulkanRenderManager { void BindPipeline(VKRGraphicsPipeline *pipeline, PipelineFlags flags, VkPipelineLayout pipelineLayout) { _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER); _dbg_assert_(pipeline != nullptr); - VkRenderData data{ VKRRenderCommand::BIND_GRAPHICS_PIPELINE }; + VkRenderData &data = curRenderStep_->commands.push_uninitialized(); + data.cmd = VKRRenderCommand::BIND_GRAPHICS_PIPELINE; pipelinesToCheck_.push_back(pipeline); data.graphics_pipeline.pipeline = pipeline; data.graphics_pipeline.pipelineLayout = pipelineLayout; @@ -249,24 +250,24 @@ class VulkanRenderManager { // DebugBreak(); // } curPipelineFlags_ |= flags; - curRenderStep_->commands.push_back(data); } void BindPipeline(VKRComputePipeline *pipeline, PipelineFlags flags, VkPipelineLayout pipelineLayout) { _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER); _dbg_assert_(pipeline != nullptr); - VkRenderData data{ VKRRenderCommand::BIND_COMPUTE_PIPELINE }; + VkRenderData &data = curRenderStep_->commands.push_uninitialized(); + data.cmd = VKRRenderCommand::BIND_COMPUTE_PIPELINE; data.compute_pipeline.pipeline = pipeline; data.compute_pipeline.pipelineLayout = pipelineLayout; curPipelineFlags_ |= flags; - curRenderStep_->commands.push_back(data); } void SetViewport(const VkViewport &vp) { _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER); _dbg_assert_((int)vp.width >= 0); _dbg_assert_((int)vp.height >= 0); - VkRenderData data{ VKRRenderCommand::VIEWPORT }; + VkRenderData &data = curRenderStep_->commands.push_uninitialized(); + data.cmd = VKRRenderCommand::VIEWPORT; data.viewport.vp.x = vp.x; data.viewport.vp.y = vp.y; data.viewport.vp.width = vp.width; @@ -276,7 +277,6 @@ class VulkanRenderManager { // TODO: This should be fixed at the source. data.viewport.vp.minDepth = clamp_value(vp.minDepth, 0.0f, 1.0f); data.viewport.vp.maxDepth = clamp_value(vp.maxDepth, 0.0f, 1.0f); - curRenderStep_->commands.push_back(data); curStepHasViewport_ = true; } @@ -318,37 +318,37 @@ class VulkanRenderManager { curRenderArea_.Apply(rc); - VkRenderData data{ VKRRenderCommand::SCISSOR }; + VkRenderData &data = curRenderStep_->commands.push_uninitialized(); + data.cmd = VKRRenderCommand::SCISSOR; data.scissor.scissor = rc; - curRenderStep_->commands.push_back(data); curStepHasScissor_ = true; } void SetStencilParams(uint8_t writeMask, uint8_t compareMask, uint8_t refValue) { _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER); - VkRenderData data{ VKRRenderCommand::STENCIL }; + VkRenderData &data = curRenderStep_->commands.push_uninitialized(); + data.cmd = VKRRenderCommand::STENCIL; data.stencil.stencilWriteMask = writeMask; data.stencil.stencilCompareMask = compareMask; data.stencil.stencilRef = refValue; - curRenderStep_->commands.push_back(data); } void SetBlendFactor(uint32_t color) { _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER); - VkRenderData data{ VKRRenderCommand::BLEND }; + VkRenderData &data = curRenderStep_->commands.push_uninitialized(); + data.cmd = VKRRenderCommand::BLEND; data.blendColor.color = color; - curRenderStep_->commands.push_back(data); } void PushConstants(VkPipelineLayout pipelineLayout, VkShaderStageFlags stages, int offset, int size, void *constants) { _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER); _dbg_assert_(size + offset < 40); - VkRenderData data{ VKRRenderCommand::PUSH_CONSTANTS }; + VkRenderData &data = curRenderStep_->commands.push_uninitialized(); + data.cmd = VKRRenderCommand::PUSH_CONSTANTS; data.push.stages = stages; data.push.offset = offset; data.push.size = size; memcpy(data.push.data, constants, size); - curRenderStep_->commands.push_back(data); } void Clear(uint32_t clearColor, float clearZ, int clearStencil, int clearMask); @@ -380,7 +380,8 @@ class VulkanRenderManager { void Draw(VkDescriptorSet descSet, int numUboOffsets, const uint32_t *uboOffsets, VkBuffer vbuffer, int voffset, int count, int offset = 0) { _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER && curStepHasViewport_ && curStepHasScissor_); - VkRenderData data{ VKRRenderCommand::DRAW }; + VkRenderData &data = curRenderStep_->commands.push_uninitialized(); + data.cmd = VKRRenderCommand::DRAW; data.draw.count = count; data.draw.offset = offset; data.draw.ds = descSet; @@ -390,13 +391,13 @@ class VulkanRenderManager { _dbg_assert_(numUboOffsets <= ARRAY_SIZE(data.draw.uboOffsets)); for (int i = 0; i < numUboOffsets; i++) data.draw.uboOffsets[i] = uboOffsets[i]; - curRenderStep_->commands.push_back(data); curRenderStep_->render.numDraws++; } void DrawIndexed(VkDescriptorSet descSet, int numUboOffsets, const uint32_t *uboOffsets, VkBuffer vbuffer, int voffset, VkBuffer ibuffer, int ioffset, int count, int numInstances, VkIndexType indexType) { _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER && curStepHasViewport_ && curStepHasScissor_); - VkRenderData data{ VKRRenderCommand::DRAW_INDEXED }; + VkRenderData &data = curRenderStep_->commands.push_uninitialized(); + data.cmd = VKRRenderCommand::DRAW_INDEXED; data.drawIndexed.count = count; data.drawIndexed.instances = numInstances; data.drawIndexed.ds = descSet; @@ -409,7 +410,6 @@ class VulkanRenderManager { for (int i = 0; i < numUboOffsets; i++) data.drawIndexed.uboOffsets[i] = uboOffsets[i]; data.drawIndexed.indexType = indexType; - curRenderStep_->commands.push_back(data); curRenderStep_->render.numDraws++; } @@ -417,9 +417,9 @@ class VulkanRenderManager { // in the debugger. void DebugAnnotate(const char *annotation) { _dbg_assert_(curRenderStep_); - VkRenderData data{ VKRRenderCommand::DEBUG_ANNOTATION }; + VkRenderData &data = curRenderStep_->commands.push_uninitialized(); + data.cmd = VKRRenderCommand::DEBUG_ANNOTATION; data.debugAnnotation.annotation = annotation; - curRenderStep_->commands.push_back(data); } VkCommandBuffer GetInitCmd(); @@ -509,7 +509,7 @@ class VulkanRenderManager { std::mutex pushMutex_; std::condition_variable pushCondVar_; - std::queue renderThreadQueue_; + std::queue renderThreadQueue_; // For readbacks and other reasons we need to sync with the render thread. std::mutex syncMutex_; diff --git a/Common/VR/PPSSPPVR.cpp b/Common/VR/PPSSPPVR.cpp index e580a6af8030..a2f5bc285bb5 100644 --- a/Common/VR/PPSSPPVR.cpp +++ b/Common/VR/PPSSPPVR.cpp @@ -560,12 +560,11 @@ void PreprocessSkyplane(GLRStep* step) { // Clear sky with the fog color. if (!vrCompat[VR_COMPAT_FBO_CLEAR]) { - GLRRenderData skyClear {}; - skyClear.cmd = GLRRenderCommand::CLEAR; + GLRRenderData &skyClear = step->commands.insert(step->commands.begin()); + skyClear.cmd = GLRRenderCommand::CLEAR; // intentional zero-initialize skyClear.clear.colorMask = 0xF; skyClear.clear.clearMask = GL_COLOR_BUFFER_BIT; skyClear.clear.clearColor = vrCompat[VR_COMPAT_FOG_COLOR]; - step->commands.insert(step->commands.begin(), skyClear); vrCompat[VR_COMPAT_FBO_CLEAR] = true; } diff --git a/unittest/UnitTest.cpp b/unittest/UnitTest.cpp index 047f32129d38..4b4b1b6e5052 100644 --- a/unittest/UnitTest.cpp +++ b/unittest/UnitTest.cpp @@ -47,6 +47,7 @@ #endif #include "Common/Data/Collections/TinySet.h" +#include "Common/Data/Collections/FastVec.h" #include "Common/Data/Convert/SmallDataConvert.h" #include "Common/Data/Text/Parsers.h" #include "Common/Data/Text/WrapText.h" @@ -365,6 +366,45 @@ bool TestTinySet() { return true; } +bool TestFastVec() { + FastVec a; + EXPECT_EQ_INT((int)a.size(), 0); + a.push_back(1); + EXPECT_EQ_INT((int)a.size(), 1); + a.push_back(2); + EXPECT_EQ_INT((int)a.size(), 2); + FastVec b; + b.push_back(8); + b.push_back(9); + b.push_back(10); + EXPECT_EQ_INT((int)b.size(), 3); + for (int i = 0; i < 100; i++) { + b.push_back(33); + } + EXPECT_EQ_INT((int)b.size(), 103); + + int items[4] = { 50, 60, 70, 80 }; + b.insert(b.begin() + 1, items, items + 4); + EXPECT_EQ_INT(b[0], 8); + EXPECT_EQ_INT(b[1], 50); + EXPECT_EQ_INT(b[2], 60); + EXPECT_EQ_INT(b[3], 70); + EXPECT_EQ_INT(b[4], 80); + EXPECT_EQ_INT(b[5], 9); + + b.resize(2); + b.insert(b.end(), items, items + 4); + EXPECT_EQ_INT(b[0], 8); + EXPECT_EQ_INT(b[1], 50); + EXPECT_EQ_INT(b[2], 50); + EXPECT_EQ_INT(b[3], 60); + EXPECT_EQ_INT(b[4], 70); + EXPECT_EQ_INT(b[5], 80); + + + return true; +} + bool TestVFPUSinCos() { float sine, cosine; // Needed for VFPU tables. @@ -977,6 +1017,7 @@ TestItem availableTests[] = { TEST_ITEM(ThreadManager), TEST_ITEM(WrapText), TEST_ITEM(TinySet), + TEST_ITEM(FastVec), TEST_ITEM(SmallDataConvert), TEST_ITEM(DepthMath), TEST_ITEM(InputMapping),