diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0a9179c1f92c..b0228ab24b18 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -566,6 +566,7 @@ add_library(Common STATIC
Common/Data/Collections/FixedSizeQueue.h
Common/Data/Collections/Hashmaps.h
Common/Data/Collections/TinySet.h
+ Common/Data/Collections/FastVec.h
Common/Data/Collections/ThreadSafeList.h
Common/Data/Color/RGBAUtil.cpp
Common/Data/Color/RGBAUtil.h
diff --git a/Common/Common.vcxproj b/Common/Common.vcxproj
index eec456c3f588..3ecf5c41ed92 100644
--- a/Common/Common.vcxproj
+++ b/Common/Common.vcxproj
@@ -859,6 +859,7 @@
+
diff --git a/Common/Common.vcxproj.filters b/Common/Common.vcxproj.filters
index 776cefaf35a7..719cbe3e64e6 100644
--- a/Common/Common.vcxproj.filters
+++ b/Common/Common.vcxproj.filters
@@ -938,6 +938,9 @@
GPU\OpenGL
+
+ Data\Collections
+
@@ -1086,4 +1089,4 @@
ext\basis_universal
-
\ No newline at end of file
+
diff --git a/Common/Data/Collections/FastVec.h b/Common/Data/Collections/FastVec.h
new file mode 100644
index 000000000000..41e18e3eac8d
--- /dev/null
+++ b/Common/Data/Collections/FastVec.h
@@ -0,0 +1,137 @@
+#pragma once
+
+// Yet another replacement for std::vector, this time for use in graphics queues.
+// Its major difference is that you can append uninitialized structures and initialize them after.
+// This is not allows by std::vector but is very useful for our sometimes oversized unions.
+// Also, copies during resize are done by memcpy, not by any move constructor or similar.
+
+#include
+#include
+
+template
+class FastVec {
+public:
+ FastVec() {}
+ FastVec(size_t initialCapacity) {
+ capacity_ = initialCapacity;
+ data_ = (T *)malloc(initialCapacity * sizeof(T));
+ }
+ ~FastVec() { if (data_) free(data_); }
+
+ T &push_uninitialized() {
+ if (size_ < capacity_) {
+ size_++;
+ return data_[size_ - 1];
+ } else {
+ ExtendByOne();
+ return data_[size_ - 1];
+ }
+ }
+
+ void push_back(const T &t) {
+ T &dest = push_uninitialized();
+ dest = t;
+ }
+
+ // Move constructor
+ FastVec(FastVec &&other) {
+ data_ = other.data_;
+ size_ = other.size_;
+ capacity_ = other.capacity_;
+ other.data_ = nullptr;
+ other.size_ = 0;
+ other.capacity_ = 0;
+ }
+
+ FastVec &operator=(FastVec &&other) {
+ if (this != &other) {
+ delete[] data_;
+ data_ = other.data_;
+ size_ = other.size_;
+ capacity_ = other.capacity_;
+ other.data_ = nullptr;
+ other.size_ = 0;
+ other.capacity_ = 0;
+ }
+ return *this;
+ }
+
+ // No copy constructor.
+ FastVec(const FastVec &other) = delete;
+ FastVec &operator=(const FastVec &other) = delete;
+
+ size_t size() const { return size_; }
+ size_t capacity() const { return capacity_; }
+ void clear() { size_ = 0; }
+ bool empty() const { return size_ == 0; }
+
+ T *begin() { return data_; }
+ T *end() { return data_ + size_; }
+ const T *begin() const { return data_; }
+ const T *end() const { return data_ + size_; }
+
+ // Out of bounds (past size() - 1) is undefined behavior.
+ T &operator[] (const size_t index) { return data_[index]; }
+ const T &operator[] (const size_t index) const { return data_[index]; }
+ T &at(const size_t index) { return data_[index]; }
+ const T &at(const size_t index) const { return data_[index]; }
+
+ // These two are invalid if empty().
+ const T &back() const { return (*this)[size() - 1]; }
+ const T &front() const { return (*this)[0]; }
+
+ // Limited functionality for inserts and similar, add as needed.
+ T &insert(T *iter) {
+ int pos = iter - data_;
+ ExtendByOne();
+ if (pos + 1 < size_) {
+ memmove(data_ + pos + 1, data_ + pos, (size_ - pos) * sizeof(T));
+ }
+ return data_[pos];
+ }
+
+ void insert(T *destIter, const T *beginIter, const T *endIter) {
+ int pos = destIter - data_;
+ if (beginIter == endIter)
+ return;
+ size_t newItems = endIter - beginIter;
+ IncreaseCapacityTo(size_ + newItems);
+ memmove(data_ + pos + newItems, data_ + pos, (size_ - pos) * sizeof(T));
+ memcpy(data_ + pos, beginIter, newItems * sizeof(T));
+ size_ += newItems;
+ }
+
+ void resize(size_t size) {
+ if (size < size_) {
+ size_ = size;
+ } else {
+ // TODO
+ }
+ }
+
+private:
+ void IncreaseCapacityTo(size_t newCapacity) {
+ if (newCapacity <= capacity_)
+ return;
+ T *oldData = data_;
+ data_ = (T *)malloc(sizeof(T) * newCapacity);
+ if (capacity_ != 0) {
+ memcpy(data_, oldData, sizeof(T) * size_);
+ free(oldData);
+ }
+ }
+
+ void ExtendByOne() {
+ size_t newCapacity = capacity_ * 2;
+ if (newCapacity < 16) {
+ newCapacity = 16;
+ }
+ IncreaseCapacityTo(newCapacity);
+ size_++;
+ capacity_ = newCapacity;
+ }
+
+ size_t size_ = 0;
+ size_t capacity_ = 0;
+ T *data_ = nullptr;
+};
diff --git a/Common/GPU/OpenGL/GLQueueRunner.cpp b/Common/GPU/OpenGL/GLQueueRunner.cpp
index c5d562a5dcc2..457f5d98e174 100644
--- a/Common/GPU/OpenGL/GLQueueRunner.cpp
+++ b/Common/GPU/OpenGL/GLQueueRunner.cpp
@@ -118,7 +118,7 @@ static std::string GetStereoBufferLayout(const char *uniformName) {
else return "undefined";
}
-void GLQueueRunner::RunInitSteps(const std::vector &steps, bool skipGLCalls) {
+void GLQueueRunner::RunInitSteps(const FastVec &steps, bool skipGLCalls) {
if (skipGLCalls) {
// Some bookkeeping still needs to be done.
for (size_t i = 0; i < steps.size(); i++) {
@@ -700,7 +700,7 @@ void GLQueueRunner::RunSteps(const std::vector &steps, bool skipGLCal
CHECK_GL_ERROR_IF_DEBUG();
size_t renderCount = 0;
for (size_t i = 0; i < steps.size(); i++) {
- const GLRStep &step = *steps[i];
+ GLRStep &step = *steps[i];
#if !defined(USING_GLES2)
if (useDebugGroups_)
@@ -711,7 +711,7 @@ void GLQueueRunner::RunSteps(const std::vector &steps, bool skipGLCal
case GLRStepType::RENDER:
renderCount++;
if (IsVREnabled()) {
- GLRStep vrStep = step;
+ GLRStep &vrStep = step;
PreprocessStepVR(&vrStep);
PerformRenderPass(vrStep, renderCount == 1, renderCount == totalRenderCount);
} else {
diff --git a/Common/GPU/OpenGL/GLQueueRunner.h b/Common/GPU/OpenGL/GLQueueRunner.h
index 9cdbfeee01ef..11d6e1acac7c 100644
--- a/Common/GPU/OpenGL/GLQueueRunner.h
+++ b/Common/GPU/OpenGL/GLQueueRunner.h
@@ -11,7 +11,7 @@
#include "Common/GPU/Shader.h"
#include "Common/GPU/thin3d.h"
#include "Common/Data/Collections/TinySet.h"
-
+#include "Common/Data/Collections/FastVec.h"
struct GLRViewport {
float x, y, w, h, minZ, maxZ;
@@ -70,6 +70,7 @@ enum class GLRRenderCommand : uint8_t {
// type field, smashed right after each other?)
// Also, all GLenums are really only 16 bits.
struct GLRRenderData {
+ GLRRenderData(GLRRenderCommand _cmd) : cmd(_cmd) {}
GLRRenderCommand cmd;
union {
struct {
@@ -301,7 +302,7 @@ enum {
struct GLRStep {
GLRStep(GLRStepType _type) : stepType(_type) {}
GLRStepType stepType;
- std::vector commands;
+ FastVec commands;
TinySet dependencies;
const char *tag;
union {
@@ -310,8 +311,6 @@ struct GLRStep {
GLRRenderPassAction color;
GLRRenderPassAction depth;
GLRRenderPassAction stencil;
- // Note: not accurate.
- int numDraws;
} render;
struct {
GLRFramebuffer *src;
@@ -355,7 +354,7 @@ class GLQueueRunner {
caps_ = caps;
}
- void RunInitSteps(const std::vector &steps, bool skipGLCalls);
+ void RunInitSteps(const FastVec &steps, bool skipGLCalls);
void RunSteps(const std::vector &steps, bool skipGLCalls, bool keepSteps, bool useVR);
diff --git a/Common/GPU/OpenGL/GLRenderManager.cpp b/Common/GPU/OpenGL/GLRenderManager.cpp
index e851da860768..9ac14dfcec48 100644
--- a/Common/GPU/OpenGL/GLRenderManager.cpp
+++ b/Common/GPU/OpenGL/GLRenderManager.cpp
@@ -129,25 +129,24 @@ bool GLRenderManager::ThreadFrame() {
return false;
}
- GLRRenderThreadTask task;
+ GLRRenderThreadTask *task = nullptr;
// In case of syncs or other partial completion, we keep going until we complete a frame.
while (true) {
// Pop a task of the queue and execute it.
// NOTE: We need to actually wait for a task, we can't just bail!
-
{
std::unique_lock lock(pushMutex_);
while (renderThreadQueue_.empty()) {
pushCondVar_.wait(lock);
}
- task = renderThreadQueue_.front();
+ task = std::move(renderThreadQueue_.front());
renderThreadQueue_.pop();
}
// We got a task! We can now have pushMutex_ unlocked, allowing the host to
// push more work when it feels like it, and just start working.
- if (task.runType == GLRRunType::EXIT) {
+ if (task->runType == GLRRunType::EXIT) {
// Oh, host wanted out. Let's leave, and also let's notify the host.
// This is unlike Vulkan too which can just block on the thread existing.
std::unique_lock lock(syncMutex_);
@@ -157,11 +156,13 @@ bool GLRenderManager::ThreadFrame() {
}
// Render the scene.
- VLOG(" PULL: Frame %d RUN (%0.3f)", task.frame, time_now_d());
- if (Run(task)) {
+ VLOG(" PULL: Frame %d RUN (%0.3f)", task->frame, time_now_d());
+ if (Run(*task)) {
// Swap requested, so we just bail the loop.
+ delete task;
break;
}
+ delete task;
};
return true;
@@ -174,9 +175,7 @@ void GLRenderManager::StopThread() {
run_ = false;
std::unique_lock lock(pushMutex_);
- GLRRenderThreadTask exitTask{};
- exitTask.runType = GLRRunType::EXIT;
- renderThreadQueue_.push(exitTask);
+ renderThreadQueue_.push(new GLRRenderThreadTask(GLRRunType::EXIT));
pushCondVar_.notify_one();
} else {
WARN_LOG(G3D, "GL submission thread was already paused.");
@@ -215,13 +214,11 @@ void GLRenderManager::BindFramebufferAsRenderTarget(GLRFramebuffer *fb, GLRRende
step->render.color = color;
step->render.depth = depth;
step->render.stencil = stencil;
- step->render.numDraws = 0;
step->tag = tag;
steps_.push_back(step);
GLuint clearMask = 0;
- GLRRenderData data;
- data.cmd = GLRRenderCommand::CLEAR;
+ GLRRenderData data(GLRRenderCommand::CLEAR);
if (color == GLRRenderPassAction::CLEAR) {
clearMask |= GL_COLOR_BUFFER_BIT;
data.clear.clearColor = clearColor;
@@ -379,15 +376,14 @@ void GLRenderManager::Finish() {
frameData_[curFrame].deleter.Take(deleter_);
VLOG("PUSH: Finish, pushing task. curFrame = %d", curFrame);
- GLRRenderThreadTask task;
- task.frame = curFrame;
- task.runType = GLRRunType::PRESENT;
+ GLRRenderThreadTask *task = new GLRRenderThreadTask(GLRRunType::PRESENT);
+ task->frame = curFrame;
{
std::unique_lock lock(pushMutex_);
renderThreadQueue_.push(task);
- renderThreadQueue_.back().initSteps = std::move(initSteps_);
- renderThreadQueue_.back().steps = std::move(steps_);
+ renderThreadQueue_.back()->initSteps = std::move(initSteps_);
+ renderThreadQueue_.back()->steps = std::move(steps_);
initSteps_.clear();
steps_.clear();
pushCondVar_.notify_one();
@@ -509,14 +505,13 @@ void GLRenderManager::FlushSync() {
{
VLOG("PUSH: Frame[%d].readyForRun = true (sync)", curFrame_);
- GLRRenderThreadTask task;
- task.frame = curFrame_;
- task.runType = GLRRunType::SYNC;
+ GLRRenderThreadTask *task = new GLRRenderThreadTask(GLRRunType::SYNC);
+ task->frame = curFrame_;
std::unique_lock lock(pushMutex_);
renderThreadQueue_.push(task);
- renderThreadQueue_.back().initSteps = std::move(initSteps_);
- renderThreadQueue_.back().steps = std::move(steps_);
+ renderThreadQueue_.back()->initSteps = std::move(initSteps_);
+ renderThreadQueue_.back()->steps = std::move(steps_);
pushCondVar_.notify_one();
steps_.clear();
}
diff --git a/Common/GPU/OpenGL/GLRenderManager.h b/Common/GPU/OpenGL/GLRenderManager.h
index 505e92aac29e..fcf0bfe7a5d8 100644
--- a/Common/GPU/OpenGL/GLRenderManager.h
+++ b/Common/GPU/OpenGL/GLRenderManager.h
@@ -203,14 +203,19 @@ enum class GLRRunType {
class GLRenderManager;
class GLPushBuffer;
-// These are enqueued from the main thread,
-// and the render thread pops them off
+// These are enqueued from the main thread, and the render thread pops them off
struct GLRRenderThreadTask {
+ GLRRenderThreadTask(GLRRunType _runType) : runType(_runType) {}
+
std::vector steps;
- std::vector initSteps;
+ FastVec initSteps;
- int frame;
+ int frame = -1;
GLRRunType runType;
+
+ // Avoid copying these by accident.
+ GLRRenderThreadTask(GLRRenderThreadTask &) = delete;
+ GLRRenderThreadTask& operator =(GLRRenderThreadTask &) = delete;
};
// Note: The GLRenderManager is created and destroyed on the render thread, and the latter
@@ -221,6 +226,9 @@ class GLRenderManager {
GLRenderManager();
~GLRenderManager();
+ GLRenderManager(GLRenderManager &) = delete;
+ GLRenderManager &operator=(GLRenderManager &) = delete;
+
void SetInvalidationCallback(InvalidationCallback callback) {
invalidationCallback_ = callback;
}
@@ -248,37 +256,37 @@ class GLRenderManager {
// We pass in width/height here even though it's not strictly needed until we support glTextureStorage
// and then we'll also need formats and stuff.
GLRTexture *CreateTexture(GLenum target, int width, int height, int depth, int numMips) {
- GLRInitStep step { GLRInitStepType::CREATE_TEXTURE };
+ GLRInitStep &step = initSteps_.push_uninitialized();
+ step.stepType = GLRInitStepType::CREATE_TEXTURE;
step.create_texture.texture = new GLRTexture(caps_, width, height, depth, numMips);
step.create_texture.texture->target = target;
- initSteps_.push_back(step);
return step.create_texture.texture;
}
GLRBuffer *CreateBuffer(GLuint target, size_t size, GLuint usage) {
- GLRInitStep step{ GLRInitStepType::CREATE_BUFFER };
+ GLRInitStep &step = initSteps_.push_uninitialized();
+ step.stepType = GLRInitStepType::CREATE_BUFFER;
step.create_buffer.buffer = new GLRBuffer(target, size);
step.create_buffer.size = (int)size;
step.create_buffer.usage = usage;
- initSteps_.push_back(step);
return step.create_buffer.buffer;
}
GLRShader *CreateShader(GLuint stage, const std::string &code, const std::string &desc) {
- GLRInitStep step{ GLRInitStepType::CREATE_SHADER };
+ GLRInitStep &step = initSteps_.push_uninitialized();
+ step.stepType = GLRInitStepType::CREATE_SHADER;
step.create_shader.shader = new GLRShader();
step.create_shader.shader->desc = desc;
step.create_shader.stage = stage;
step.create_shader.code = new char[code.size() + 1];
memcpy(step.create_shader.code, code.data(), code.size() + 1);
- initSteps_.push_back(step);
return step.create_shader.shader;
}
GLRFramebuffer *CreateFramebuffer(int width, int height, bool z_stencil) {
- GLRInitStep step{ GLRInitStepType::CREATE_FRAMEBUFFER };
+ GLRInitStep &step = initSteps_.push_uninitialized();
+ step.stepType = GLRInitStepType::CREATE_FRAMEBUFFER;
step.create_framebuffer.framebuffer = new GLRFramebuffer(caps_, width, height, z_stencil);
- initSteps_.push_back(step);
return step.create_framebuffer.framebuffer;
}
@@ -287,7 +295,8 @@ class GLRenderManager {
GLRProgram *CreateProgram(
std::vector shaders, std::vector semantics, std::vector queries,
std::vector initializers, GLRProgramLocData *locData, const GLRProgramFlags &flags) {
- GLRInitStep step{ GLRInitStepType::CREATE_PROGRAM };
+ GLRInitStep &step = initSteps_.push_uninitialized();
+ step.stepType = GLRInitStepType::CREATE_PROGRAM;
_assert_(shaders.size() <= ARRAY_SIZE(step.create_program.shaders));
step.create_program.program = new GLRProgram();
step.create_program.program->semantics_ = semantics;
@@ -311,18 +320,17 @@ class GLRenderManager {
}
#endif
step.create_program.num_shaders = (int)shaders.size();
- initSteps_.push_back(step);
return step.create_program.program;
}
GLRInputLayout *CreateInputLayout(const std::vector &entries) {
- GLRInitStep step{ GLRInitStepType::CREATE_INPUT_LAYOUT };
+ GLRInitStep &step = initSteps_.push_uninitialized();
+ step.stepType = GLRInitStepType::CREATE_INPUT_LAYOUT;
step.create_input_layout.inputLayout = new GLRInputLayout();
step.create_input_layout.inputLayout->entries = entries;
for (auto &iter : step.create_input_layout.inputLayout->entries) {
step.create_input_layout.inputLayout->semanticsMask_ |= 1 << iter.location;
}
- initSteps_.push_back(step);
return step.create_input_layout.inputLayout;
}
@@ -395,7 +403,8 @@ class GLRenderManager {
void BufferSubdata(GLRBuffer *buffer, size_t offset, size_t size, uint8_t *data, bool deleteData = true) {
// TODO: Maybe should be a render command instead of an init command? When possible it's better as
// an init command, that's for sure.
- GLRInitStep step{ GLRInitStepType::BUFFER_SUBDATA };
+ GLRInitStep &step = initSteps_.push_uninitialized();
+ step.stepType = GLRInitStepType::BUFFER_SUBDATA;
_dbg_assert_(offset >= 0);
_dbg_assert_(offset <= buffer->size_ - size);
step.buffer_subdata.buffer = buffer;
@@ -403,12 +412,12 @@ class GLRenderManager {
step.buffer_subdata.size = (int)size;
step.buffer_subdata.data = data;
step.buffer_subdata.deleteData = deleteData;
- initSteps_.push_back(step);
}
// Takes ownership over the data pointer and delete[]-s it.
void TextureImage(GLRTexture *texture, int level, int width, int height, int depth, Draw::DataFormat format, uint8_t *data, GLRAllocType allocType = GLRAllocType::NEW, bool linearFilter = false) {
- GLRInitStep step{ GLRInitStepType::TEXTURE_IMAGE };
+ GLRInitStep &step = initSteps_.push_uninitialized();
+ step.stepType = GLRInitStepType::TEXTURE_IMAGE;
step.texture_image.texture = texture;
step.texture_image.data = data;
step.texture_image.format = format;
@@ -418,12 +427,11 @@ class GLRenderManager {
step.texture_image.depth = depth;
step.texture_image.allocType = allocType;
step.texture_image.linearFilter = linearFilter;
- initSteps_.push_back(step);
}
void TextureSubImage(int slot, GLRTexture *texture, int level, int x, int y, int width, int height, Draw::DataFormat format, uint8_t *data, GLRAllocType allocType = GLRAllocType::NEW) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
- GLRRenderData _data{ GLRRenderCommand::TEXTURE_SUBIMAGE };
+ GLRRenderData _data(GLRRenderCommand::TEXTURE_SUBIMAGE);
_data.texture_subimage.texture = texture;
_data.texture_subimage.data = data;
_data.texture_subimage.format = format;
@@ -438,11 +446,11 @@ class GLRenderManager {
}
void FinalizeTexture(GLRTexture *texture, int loadedLevels, bool genMips) {
- GLRInitStep step{ GLRInitStepType::TEXTURE_FINALIZE };
+ GLRInitStep &step = initSteps_.push_uninitialized();
+ step.stepType = GLRInitStepType::TEXTURE_FINALIZE;
step.texture_finalize.texture = texture;
step.texture_finalize.loadedLevels = loadedLevels;
step.texture_finalize.genMips = genMips;
- initSteps_.push_back(step);
}
void BindTexture(int slot, GLRTexture *tex) {
@@ -453,18 +461,18 @@ class GLRenderManager {
}
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
_dbg_assert_(slot < MAX_GL_TEXTURE_SLOTS);
- GLRRenderData data{ GLRRenderCommand::BINDTEXTURE };
+ GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
+ data.cmd = GLRRenderCommand::BINDTEXTURE;
data.texture.slot = slot;
data.texture.texture = tex;
- curRenderStep_->commands.push_back(data);
}
void BindProgram(GLRProgram *program) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
- GLRRenderData data{ GLRRenderCommand::BINDPROGRAM };
+ GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
+ data.cmd = GLRRenderCommand::BINDPROGRAM;
_dbg_assert_(program != nullptr);
data.program.program = program;
- curRenderStep_->commands.push_back(data);
#ifdef _DEBUG
curProgram_ = program;
#endif
@@ -472,25 +480,25 @@ class GLRenderManager {
void SetDepth(bool enabled, bool write, GLenum func) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
- GLRRenderData data{ GLRRenderCommand::DEPTH };
+ GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
+ data.cmd = GLRRenderCommand::DEPTH;
data.depth.enabled = enabled;
data.depth.write = write;
data.depth.func = func;
- curRenderStep_->commands.push_back(data);
}
void SetViewport(const GLRViewport &vp) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
- GLRRenderData data{ GLRRenderCommand::VIEWPORT };
+ GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
+ data.cmd = GLRRenderCommand::VIEWPORT;
data.viewport.vp = vp;
- curRenderStep_->commands.push_back(data);
}
void SetScissor(const GLRect2D &rc) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
- GLRRenderData data{ GLRRenderCommand::SCISSOR };
+ GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
+ data.cmd = GLRRenderCommand::SCISSOR;
data.scissor.rc = rc;
- curRenderStep_->commands.push_back(data);
}
void SetUniformI(const GLint *loc, int count, const int *udata) {
@@ -498,11 +506,12 @@ class GLRenderManager {
#ifdef _DEBUG
_dbg_assert_(curProgram_);
#endif
- GLRRenderData data{ GLRRenderCommand::UNIFORM4I };
+ GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
+ data.cmd = GLRRenderCommand::UNIFORM4I;
+ data.uniform4.name = nullptr;
data.uniform4.loc = loc;
data.uniform4.count = count;
memcpy(data.uniform4.v, udata, sizeof(int) * count);
- curRenderStep_->commands.push_back(data);
}
void SetUniformI1(const GLint *loc, int udata) {
@@ -510,11 +519,12 @@ class GLRenderManager {
#ifdef _DEBUG
_dbg_assert_(curProgram_);
#endif
- GLRRenderData data{ GLRRenderCommand::UNIFORM4I };
+ GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
+ data.cmd = GLRRenderCommand::UNIFORM4I;
+ data.uniform4.name = nullptr;
data.uniform4.loc = loc;
data.uniform4.count = 1;
memcpy(data.uniform4.v, &udata, sizeof(udata));
- curRenderStep_->commands.push_back(data);
}
void SetUniformUI(const GLint *loc, int count, const uint32_t *udata) {
@@ -522,11 +532,12 @@ class GLRenderManager {
#ifdef _DEBUG
_dbg_assert_(curProgram_);
#endif
- GLRRenderData data{ GLRRenderCommand::UNIFORM4UI };
+ GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
+ data.cmd = GLRRenderCommand::UNIFORM4UI;
+ data.uniform4.name = nullptr;
data.uniform4.loc = loc;
data.uniform4.count = count;
memcpy(data.uniform4.v, udata, sizeof(uint32_t) * count);
- curRenderStep_->commands.push_back(data);
}
void SetUniformUI1(const GLint *loc, uint32_t udata) {
@@ -534,11 +545,12 @@ class GLRenderManager {
#ifdef _DEBUG
_dbg_assert_(curProgram_);
#endif
- GLRRenderData data{ GLRRenderCommand::UNIFORM4UI };
+ GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
+ data.cmd = GLRRenderCommand::UNIFORM4UI;
+ data.uniform4.name = nullptr;
data.uniform4.loc = loc;
data.uniform4.count = 1;
memcpy(data.uniform4.v, &udata, sizeof(udata));
- curRenderStep_->commands.push_back(data);
}
void SetUniformF(const GLint *loc, int count, const float *udata) {
@@ -546,11 +558,12 @@ class GLRenderManager {
#ifdef _DEBUG
_dbg_assert_(curProgram_);
#endif
- GLRRenderData data{ GLRRenderCommand::UNIFORM4F };
+ GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
+ data.cmd = GLRRenderCommand::UNIFORM4F;
+ data.uniform4.name = nullptr;
data.uniform4.loc = loc;
data.uniform4.count = count;
memcpy(data.uniform4.v, udata, sizeof(float) * count);
- curRenderStep_->commands.push_back(data);
}
void SetUniformF1(const GLint *loc, const float udata) {
@@ -558,11 +571,12 @@ class GLRenderManager {
#ifdef _DEBUG
_dbg_assert_(curProgram_);
#endif
- GLRRenderData data{ GLRRenderCommand::UNIFORM4F };
+ GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
+ data.cmd = GLRRenderCommand::UNIFORM4F;
+ data.uniform4.name = nullptr;
data.uniform4.loc = loc;
data.uniform4.count = 1;
memcpy(data.uniform4.v, &udata, sizeof(float));
- curRenderStep_->commands.push_back(data);
}
void SetUniformF(const char *name, int count, const float *udata) {
@@ -570,11 +584,12 @@ class GLRenderManager {
#ifdef _DEBUG
_dbg_assert_(curProgram_);
#endif
- GLRRenderData data{ GLRRenderCommand::UNIFORM4F };
+ GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
+ data.cmd = GLRRenderCommand::UNIFORM4F;
data.uniform4.name = name;
+ data.uniform4.loc = nullptr;
data.uniform4.count = count;
memcpy(data.uniform4.v, udata, sizeof(float) * count);
- curRenderStep_->commands.push_back(data);
}
void SetUniformM4x4(const GLint *loc, const float *udata) {
@@ -582,10 +597,11 @@ class GLRenderManager {
#ifdef _DEBUG
_dbg_assert_(curProgram_);
#endif
- GLRRenderData data{ GLRRenderCommand::UNIFORMMATRIX };
+ GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
+ data.cmd = GLRRenderCommand::UNIFORMMATRIX;
+ data.uniformMatrix4.name = nullptr;
data.uniformMatrix4.loc = loc;
memcpy(data.uniformMatrix4.m, udata, sizeof(float) * 16);
- curRenderStep_->commands.push_back(data);
}
void SetUniformM4x4Stereo(const char *name, const GLint *loc, const float *left, const float *right) {
@@ -593,13 +609,13 @@ class GLRenderManager {
#ifdef _DEBUG
_dbg_assert_(curProgram_);
#endif
- GLRRenderData data{ GLRRenderCommand::UNIFORMSTEREOMATRIX };
+ GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
+ data.cmd = GLRRenderCommand::UNIFORMSTEREOMATRIX;
data.uniformStereoMatrix4.name = name;
data.uniformStereoMatrix4.loc = loc;
data.uniformStereoMatrix4.mData = new float[32];
memcpy(&data.uniformStereoMatrix4.mData[0], left, sizeof(float) * 16);
memcpy(&data.uniformStereoMatrix4.mData[16], right, sizeof(float) * 16);
- curRenderStep_->commands.push_back(data);
}
void SetUniformM4x4(const char *name, const float *udata) {
@@ -607,17 +623,19 @@ class GLRenderManager {
#ifdef _DEBUG
_dbg_assert_(curProgram_);
#endif
- GLRRenderData data{ GLRRenderCommand::UNIFORMMATRIX };
+ GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
+ data.cmd = GLRRenderCommand::UNIFORMMATRIX;
data.uniformMatrix4.name = name;
+ data.uniformMatrix4.loc = nullptr;
memcpy(data.uniformMatrix4.m, udata, sizeof(float) * 16);
- curRenderStep_->commands.push_back(data);
}
void SetBlendAndMask(int colorMask, bool blendEnabled, GLenum srcColor, GLenum dstColor, GLenum srcAlpha, GLenum dstAlpha, GLenum funcColor, GLenum funcAlpha) {
// Make this one only a non-debug _assert_, since it often comes first.
// Lets us collect info about this potential crash through assert extra data.
_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
- GLRRenderData data{ GLRRenderCommand::BLEND };
+ GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
+ data.cmd = GLRRenderCommand::BLEND;
data.blend.mask = colorMask;
data.blend.enabled = blendEnabled;
data.blend.srcColor = srcColor;
@@ -626,96 +644,95 @@ class GLRenderManager {
data.blend.dstAlpha = dstAlpha;
data.blend.funcColor = funcColor;
data.blend.funcAlpha = funcAlpha;
- curRenderStep_->commands.push_back(data);
}
void SetNoBlendAndMask(int colorMask) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
- GLRRenderData data{ GLRRenderCommand::BLEND };
+ GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
+ data.cmd = GLRRenderCommand::BLEND;
data.blend.mask = colorMask;
data.blend.enabled = false;
- curRenderStep_->commands.push_back(data);
}
#ifndef USING_GLES2
void SetLogicOp(bool enabled, GLenum logicOp) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
- GLRRenderData data{ GLRRenderCommand::LOGICOP };
+ GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
+ data.cmd = GLRRenderCommand::LOGICOP;
data.logic.enabled = enabled;
data.logic.logicOp = logicOp;
- curRenderStep_->commands.push_back(data);
}
#endif
void SetStencilFunc(bool enabled, GLenum func, uint8_t refValue, uint8_t compareMask) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
- GLRRenderData data{ GLRRenderCommand::STENCILFUNC };
+ GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
+ data.cmd = GLRRenderCommand::STENCILFUNC;
data.stencilFunc.enabled = enabled;
data.stencilFunc.func = func;
data.stencilFunc.ref = refValue;
data.stencilFunc.compareMask = compareMask;
- curRenderStep_->commands.push_back(data);
}
void SetStencilOp(uint8_t writeMask, GLenum sFail, GLenum zFail, GLenum pass) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
- GLRRenderData data{ GLRRenderCommand::STENCILOP };
+ GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
+ data.cmd = GLRRenderCommand::STENCILOP;
data.stencilOp.writeMask = writeMask;
data.stencilOp.sFail = sFail;
data.stencilOp.zFail = zFail;
data.stencilOp.pass = pass;
- curRenderStep_->commands.push_back(data);
}
void SetStencilDisabled() {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
- GLRRenderData data;
+ GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
data.cmd = GLRRenderCommand::STENCILFUNC;
data.stencilFunc.enabled = false;
- curRenderStep_->commands.push_back(data);
+ // When enabled = false, the others aren't read so we don't zero-initialize them.
}
void SetBlendFactor(const float color[4]) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
- GLRRenderData data{ GLRRenderCommand::BLENDCOLOR };
+ GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
+ data.cmd = GLRRenderCommand::BLENDCOLOR;
CopyFloat4(data.blendColor.color, color);
- curRenderStep_->commands.push_back(data);
}
void SetRaster(GLboolean cullEnable, GLenum frontFace, GLenum cullFace, GLboolean ditherEnable, GLboolean depthClamp) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
- GLRRenderData data{ GLRRenderCommand::RASTER };
+ GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
+ data.cmd = GLRRenderCommand::RASTER;
data.raster.cullEnable = cullEnable;
data.raster.frontFace = frontFace;
data.raster.cullFace = cullFace;
data.raster.ditherEnable = ditherEnable;
data.raster.depthClampEnable = depthClamp;
- curRenderStep_->commands.push_back(data);
}
// Modifies the current texture as per GL specs, not global state.
void SetTextureSampler(int slot, GLenum wrapS, GLenum wrapT, GLenum magFilter, GLenum minFilter, float anisotropy) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
_dbg_assert_(slot < MAX_GL_TEXTURE_SLOTS);
- GLRRenderData data{ GLRRenderCommand::TEXTURESAMPLER };
+ GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
+ data.cmd = GLRRenderCommand::TEXTURESAMPLER;
data.textureSampler.slot = slot;
data.textureSampler.wrapS = wrapS;
data.textureSampler.wrapT = wrapT;
data.textureSampler.magFilter = magFilter;
data.textureSampler.minFilter = minFilter;
data.textureSampler.anisotropy = anisotropy;
- curRenderStep_->commands.push_back(data);
}
void SetTextureLod(int slot, float minLod, float maxLod, float lodBias) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
_dbg_assert_(slot < MAX_GL_TEXTURE_SLOTS);
- GLRRenderData data{ GLRRenderCommand::TEXTURELOD};
+ GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
+ data.cmd = GLRRenderCommand::TEXTURELOD;
data.textureLod.slot = slot;
data.textureLod.minLod = minLod;
data.textureLod.maxLod = maxLod;
data.textureLod.lodBias = lodBias;
- curRenderStep_->commands.push_back(data);
}
// If scissorW == 0, no scissor is applied (the whole render target is cleared).
@@ -723,7 +740,8 @@ class GLRenderManager {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
if (!clearMask)
return;
- GLRRenderData data{ GLRRenderCommand::CLEAR };
+ GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
+ data.cmd = GLRRenderCommand::CLEAR;
data.clear.clearMask = clearMask;
data.clear.clearColor = clearColor;
data.clear.clearZ = clearZ;
@@ -733,12 +751,12 @@ class GLRenderManager {
data.clear.scissorY = scissorY;
data.clear.scissorW = scissorW;
data.clear.scissorH = scissorH;
- curRenderStep_->commands.push_back(data);
}
void Draw(GLRInputLayout *inputLayout, GLRBuffer *buffer, size_t offset, GLenum mode, int first, int count) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
- GLRRenderData data{ GLRRenderCommand::DRAW };
+ GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
+ data.cmd = GLRRenderCommand::DRAW;
data.draw.inputLayout = inputLayout;
data.draw.offset = offset;
data.draw.buffer = buffer;
@@ -747,13 +765,12 @@ class GLRenderManager {
data.draw.first = first;
data.draw.count = count;
data.draw.indexType = 0;
- curRenderStep_->commands.push_back(data);
- curRenderStep_->render.numDraws++;
}
void DrawIndexed(GLRInputLayout *inputLayout, GLRBuffer *buffer, size_t offset, GLRBuffer *indexBuffer, GLenum mode, int count, GLenum indexType, void *indices, int instances = 1) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
- GLRRenderData data{ GLRRenderCommand::DRAW };
+ GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
+ data.cmd = GLRRenderCommand::DRAW;
data.draw.inputLayout = inputLayout;
data.draw.offset = offset;
data.draw.buffer = buffer;
@@ -763,8 +780,6 @@ class GLRenderManager {
data.draw.indexType = indexType;
data.draw.indices = indices;
data.draw.instances = instances;
- curRenderStep_->commands.push_back(data);
- curRenderStep_->render.numDraws++;
}
enum { MAX_INFLIGHT_FRAMES = 3 };
@@ -847,7 +862,7 @@ class GLRenderManager {
GLRStep *curRenderStep_ = nullptr;
std::vector steps_;
- std::vector initSteps_;
+ FastVec initSteps_;
// Execution time state
bool run_ = true;
@@ -859,7 +874,7 @@ class GLRenderManager {
std::mutex pushMutex_;
std::condition_variable pushCondVar_;
- std::queue renderThreadQueue_;
+ std::queue renderThreadQueue_;
// For readbacks and other reasons we need to sync with the render thread.
std::mutex syncMutex_;
diff --git a/Common/GPU/Vulkan/VulkanQueueRunner.cpp b/Common/GPU/Vulkan/VulkanQueueRunner.cpp
index 3e2c26934e70..e7670e99445f 100644
--- a/Common/GPU/Vulkan/VulkanQueueRunner.cpp
+++ b/Common/GPU/Vulkan/VulkanQueueRunner.cpp
@@ -481,7 +481,7 @@ void VulkanQueueRunner::ApplyMGSHack(std::vector &steps) {
last = j - 1;
// should really also check descriptor sets...
if (steps[j]->commands.size()) {
- VkRenderData &cmd = steps[j]->commands.back();
+ const VkRenderData &cmd = steps[j]->commands.back();
if (cmd.cmd == VKRRenderCommand::DRAW_INDEXED && cmd.draw.count != 6)
last = j - 1;
}
@@ -1241,7 +1241,7 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c
VKRGraphicsPipeline *lastGraphicsPipeline = nullptr;
VKRComputePipeline *lastComputePipeline = nullptr;
- auto &commands = step.commands;
+ const auto &commands = step.commands;
// We can do a little bit of state tracking here to eliminate some calls into the driver.
// The stencil ones are very commonly mostly redundant so let's eliminate them where possible.
diff --git a/Common/GPU/Vulkan/VulkanQueueRunner.h b/Common/GPU/Vulkan/VulkanQueueRunner.h
index 6df7dfc68103..45af407cf538 100644
--- a/Common/GPU/Vulkan/VulkanQueueRunner.h
+++ b/Common/GPU/Vulkan/VulkanQueueRunner.h
@@ -6,6 +6,7 @@
#include "Common/Thread/Promise.h"
#include "Common/Data/Collections/Hashmaps.h"
+#include "Common/Data/Collections/FastVec.h"
#include "Common/GPU/Vulkan/VulkanContext.h"
#include "Common/GPU/Vulkan/VulkanBarrier.h"
#include "Common/GPU/Vulkan/VulkanFrameData.h"
@@ -153,7 +154,7 @@ struct VKRStep {
~VKRStep() {}
VKRStepType stepType;
- std::vector commands;
+ FastVec commands;
TinySet preTransitions;
TinySet dependencies;
const char *tag;
@@ -212,9 +213,14 @@ struct VKRStep {
// These are enqueued from the main thread,
// and the render thread pops them off
struct VKRRenderThreadTask {
+ VKRRenderThreadTask(VKRRunType _runType) : runType(_runType) {}
std::vector steps;
- int frame;
+ int frame = -1;
VKRRunType runType;
+
+ // Avoid copying these by accident.
+ VKRRenderThreadTask(VKRRenderThreadTask &) = delete;
+ VKRRenderThreadTask &operator =(VKRRenderThreadTask &) = delete;
};
class VulkanQueueRunner {
diff --git a/Common/GPU/Vulkan/VulkanRenderManager.cpp b/Common/GPU/Vulkan/VulkanRenderManager.cpp
index 7ab778d8c288..287eaf236575 100644
--- a/Common/GPU/Vulkan/VulkanRenderManager.cpp
+++ b/Common/GPU/Vulkan/VulkanRenderManager.cpp
@@ -308,9 +308,8 @@ bool VulkanRenderManager::CreateBackbuffers() {
void VulkanRenderManager::StopThread() {
{
// Tell the render thread to quit when it's done.
- VKRRenderThreadTask task;
- task.frame = vulkan_->GetCurFrame();
- task.runType = VKRRunType::EXIT;
+ VKRRenderThreadTask *task = new VKRRenderThreadTask(VKRRunType::EXIT);
+ task->frame = vulkan_->GetCurFrame();
std::unique_lock lock(pushMutex_);
renderThreadQueue_.push(task);
pushCondVar_.notify_one();
@@ -494,7 +493,7 @@ void VulkanRenderManager::ThreadFunc() {
SetCurrentThreadName("RenderMan");
while (true) {
// Pop a task of the queue and execute it.
- VKRRenderThreadTask task;
+ VKRRenderThreadTask *task = nullptr;
{
std::unique_lock lock(pushMutex_);
while (renderThreadQueue_.empty()) {
@@ -506,12 +505,13 @@ void VulkanRenderManager::ThreadFunc() {
// Oh, we got a task! We can now have pushMutex_ unlocked, allowing the host to
// push more work when it feels like it, and just start working.
- if (task.runType == VKRRunType::EXIT) {
+ if (task->runType == VKRRunType::EXIT) {
// Oh, host wanted out. Let's leave.
break;
}
- Run(task);
+ Run(*task);
+ delete task;
}
// Wait for the device to be done with everything, before tearing stuff down.
@@ -999,7 +999,7 @@ void VulkanRenderManager::CopyImageToMemorySync(VkImage image, int mipLevel, int
queueRunner_.CopyReadbackBuffer(frameData_[vulkan_->GetCurFrame()], nullptr, w, h, destFormat, destFormat, pixelStride, pixels);
}
-static void RemoveDrawCommands(std::vector *cmds) {
+static void RemoveDrawCommands(FastVec *cmds) {
// Here we remove any DRAW type commands when we hit a CLEAR.
for (auto &c : *cmds) {
if (c.cmd == VKRRenderCommand::DRAW || c.cmd == VKRRenderCommand::DRAW_INDEXED) {
@@ -1008,7 +1008,7 @@ static void RemoveDrawCommands(std::vector *cmds) {
}
}
-static void CleanupRenderCommands(std::vector *cmds) {
+static void CleanupRenderCommands(FastVec *cmds) {
size_t lastCommand[(int)VKRRenderCommand::NUM_RENDER_COMMANDS];
memset(lastCommand, -1, sizeof(lastCommand));
@@ -1266,13 +1266,12 @@ void VulkanRenderManager::Finish() {
FrameData &frameData = frameData_[curFrame];
VLOG("PUSH: Frame[%d]", curFrame);
- VKRRenderThreadTask task;
- task.frame = curFrame;
- task.runType = VKRRunType::PRESENT;
+ VKRRenderThreadTask *task = new VKRRenderThreadTask(VKRRunType::PRESENT);
+ task->frame = curFrame;
{
std::unique_lock lock(pushMutex_);
renderThreadQueue_.push(task);
- renderThreadQueue_.back().steps = std::move(steps_);
+ renderThreadQueue_.back()->steps = std::move(steps_);
pushCondVar_.notify_one();
}
@@ -1382,12 +1381,11 @@ void VulkanRenderManager::FlushSync() {
{
VLOG("PUSH: Frame[%d]", curFrame);
- VKRRenderThreadTask task;
- task.frame = curFrame;
- task.runType = VKRRunType::SYNC;
+ VKRRenderThreadTask *task = new VKRRenderThreadTask(VKRRunType::SYNC);
+ task->frame = curFrame;
std::unique_lock lock(pushMutex_);
renderThreadQueue_.push(task);
- renderThreadQueue_.back().steps = std::move(steps_);
+ renderThreadQueue_.back()->steps = std::move(steps_);
pushCondVar_.notify_one();
}
diff --git a/Common/GPU/Vulkan/VulkanRenderManager.h b/Common/GPU/Vulkan/VulkanRenderManager.h
index b0ca50d4bfa5..c1bb34234aca 100644
--- a/Common/GPU/Vulkan/VulkanRenderManager.h
+++ b/Common/GPU/Vulkan/VulkanRenderManager.h
@@ -240,7 +240,8 @@ class VulkanRenderManager {
void BindPipeline(VKRGraphicsPipeline *pipeline, PipelineFlags flags, VkPipelineLayout pipelineLayout) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER);
_dbg_assert_(pipeline != nullptr);
- VkRenderData data{ VKRRenderCommand::BIND_GRAPHICS_PIPELINE };
+ VkRenderData &data = curRenderStep_->commands.push_uninitialized();
+ data.cmd = VKRRenderCommand::BIND_GRAPHICS_PIPELINE;
pipelinesToCheck_.push_back(pipeline);
data.graphics_pipeline.pipeline = pipeline;
data.graphics_pipeline.pipelineLayout = pipelineLayout;
@@ -249,24 +250,24 @@ class VulkanRenderManager {
// DebugBreak();
// }
curPipelineFlags_ |= flags;
- curRenderStep_->commands.push_back(data);
}
void BindPipeline(VKRComputePipeline *pipeline, PipelineFlags flags, VkPipelineLayout pipelineLayout) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER);
_dbg_assert_(pipeline != nullptr);
- VkRenderData data{ VKRRenderCommand::BIND_COMPUTE_PIPELINE };
+ VkRenderData &data = curRenderStep_->commands.push_uninitialized();
+ data.cmd = VKRRenderCommand::BIND_COMPUTE_PIPELINE;
data.compute_pipeline.pipeline = pipeline;
data.compute_pipeline.pipelineLayout = pipelineLayout;
curPipelineFlags_ |= flags;
- curRenderStep_->commands.push_back(data);
}
void SetViewport(const VkViewport &vp) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER);
_dbg_assert_((int)vp.width >= 0);
_dbg_assert_((int)vp.height >= 0);
- VkRenderData data{ VKRRenderCommand::VIEWPORT };
+ VkRenderData &data = curRenderStep_->commands.push_uninitialized();
+ data.cmd = VKRRenderCommand::VIEWPORT;
data.viewport.vp.x = vp.x;
data.viewport.vp.y = vp.y;
data.viewport.vp.width = vp.width;
@@ -276,7 +277,6 @@ class VulkanRenderManager {
// TODO: This should be fixed at the source.
data.viewport.vp.minDepth = clamp_value(vp.minDepth, 0.0f, 1.0f);
data.viewport.vp.maxDepth = clamp_value(vp.maxDepth, 0.0f, 1.0f);
- curRenderStep_->commands.push_back(data);
curStepHasViewport_ = true;
}
@@ -318,37 +318,37 @@ class VulkanRenderManager {
curRenderArea_.Apply(rc);
- VkRenderData data{ VKRRenderCommand::SCISSOR };
+ VkRenderData &data = curRenderStep_->commands.push_uninitialized();
+ data.cmd = VKRRenderCommand::SCISSOR;
data.scissor.scissor = rc;
- curRenderStep_->commands.push_back(data);
curStepHasScissor_ = true;
}
void SetStencilParams(uint8_t writeMask, uint8_t compareMask, uint8_t refValue) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER);
- VkRenderData data{ VKRRenderCommand::STENCIL };
+ VkRenderData &data = curRenderStep_->commands.push_uninitialized();
+ data.cmd = VKRRenderCommand::STENCIL;
data.stencil.stencilWriteMask = writeMask;
data.stencil.stencilCompareMask = compareMask;
data.stencil.stencilRef = refValue;
- curRenderStep_->commands.push_back(data);
}
void SetBlendFactor(uint32_t color) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER);
- VkRenderData data{ VKRRenderCommand::BLEND };
+ VkRenderData &data = curRenderStep_->commands.push_uninitialized();
+ data.cmd = VKRRenderCommand::BLEND;
data.blendColor.color = color;
- curRenderStep_->commands.push_back(data);
}
void PushConstants(VkPipelineLayout pipelineLayout, VkShaderStageFlags stages, int offset, int size, void *constants) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER);
_dbg_assert_(size + offset < 40);
- VkRenderData data{ VKRRenderCommand::PUSH_CONSTANTS };
+ VkRenderData &data = curRenderStep_->commands.push_uninitialized();
+ data.cmd = VKRRenderCommand::PUSH_CONSTANTS;
data.push.stages = stages;
data.push.offset = offset;
data.push.size = size;
memcpy(data.push.data, constants, size);
- curRenderStep_->commands.push_back(data);
}
void Clear(uint32_t clearColor, float clearZ, int clearStencil, int clearMask);
@@ -380,7 +380,8 @@ class VulkanRenderManager {
void Draw(VkDescriptorSet descSet, int numUboOffsets, const uint32_t *uboOffsets, VkBuffer vbuffer, int voffset, int count, int offset = 0) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER && curStepHasViewport_ && curStepHasScissor_);
- VkRenderData data{ VKRRenderCommand::DRAW };
+ VkRenderData &data = curRenderStep_->commands.push_uninitialized();
+ data.cmd = VKRRenderCommand::DRAW;
data.draw.count = count;
data.draw.offset = offset;
data.draw.ds = descSet;
@@ -390,13 +391,13 @@ class VulkanRenderManager {
_dbg_assert_(numUboOffsets <= ARRAY_SIZE(data.draw.uboOffsets));
for (int i = 0; i < numUboOffsets; i++)
data.draw.uboOffsets[i] = uboOffsets[i];
- curRenderStep_->commands.push_back(data);
curRenderStep_->render.numDraws++;
}
void DrawIndexed(VkDescriptorSet descSet, int numUboOffsets, const uint32_t *uboOffsets, VkBuffer vbuffer, int voffset, VkBuffer ibuffer, int ioffset, int count, int numInstances, VkIndexType indexType) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER && curStepHasViewport_ && curStepHasScissor_);
- VkRenderData data{ VKRRenderCommand::DRAW_INDEXED };
+ VkRenderData &data = curRenderStep_->commands.push_uninitialized();
+ data.cmd = VKRRenderCommand::DRAW_INDEXED;
data.drawIndexed.count = count;
data.drawIndexed.instances = numInstances;
data.drawIndexed.ds = descSet;
@@ -409,7 +410,6 @@ class VulkanRenderManager {
for (int i = 0; i < numUboOffsets; i++)
data.drawIndexed.uboOffsets[i] = uboOffsets[i];
data.drawIndexed.indexType = indexType;
- curRenderStep_->commands.push_back(data);
curRenderStep_->render.numDraws++;
}
@@ -417,9 +417,9 @@ class VulkanRenderManager {
// in the debugger.
void DebugAnnotate(const char *annotation) {
_dbg_assert_(curRenderStep_);
- VkRenderData data{ VKRRenderCommand::DEBUG_ANNOTATION };
+ VkRenderData &data = curRenderStep_->commands.push_uninitialized();
+ data.cmd = VKRRenderCommand::DEBUG_ANNOTATION;
data.debugAnnotation.annotation = annotation;
- curRenderStep_->commands.push_back(data);
}
VkCommandBuffer GetInitCmd();
@@ -509,7 +509,7 @@ class VulkanRenderManager {
std::mutex pushMutex_;
std::condition_variable pushCondVar_;
- std::queue renderThreadQueue_;
+ std::queue renderThreadQueue_;
// For readbacks and other reasons we need to sync with the render thread.
std::mutex syncMutex_;
diff --git a/Common/VR/PPSSPPVR.cpp b/Common/VR/PPSSPPVR.cpp
index e580a6af8030..a2f5bc285bb5 100644
--- a/Common/VR/PPSSPPVR.cpp
+++ b/Common/VR/PPSSPPVR.cpp
@@ -560,12 +560,11 @@ void PreprocessSkyplane(GLRStep* step) {
// Clear sky with the fog color.
if (!vrCompat[VR_COMPAT_FBO_CLEAR]) {
- GLRRenderData skyClear {};
- skyClear.cmd = GLRRenderCommand::CLEAR;
+ GLRRenderData &skyClear = step->commands.insert(step->commands.begin());
+ skyClear.cmd = GLRRenderCommand::CLEAR; // intentional zero-initialize
skyClear.clear.colorMask = 0xF;
skyClear.clear.clearMask = GL_COLOR_BUFFER_BIT;
skyClear.clear.clearColor = vrCompat[VR_COMPAT_FOG_COLOR];
- step->commands.insert(step->commands.begin(), skyClear);
vrCompat[VR_COMPAT_FBO_CLEAR] = true;
}
diff --git a/unittest/UnitTest.cpp b/unittest/UnitTest.cpp
index 047f32129d38..4b4b1b6e5052 100644
--- a/unittest/UnitTest.cpp
+++ b/unittest/UnitTest.cpp
@@ -47,6 +47,7 @@
#endif
#include "Common/Data/Collections/TinySet.h"
+#include "Common/Data/Collections/FastVec.h"
#include "Common/Data/Convert/SmallDataConvert.h"
#include "Common/Data/Text/Parsers.h"
#include "Common/Data/Text/WrapText.h"
@@ -365,6 +366,45 @@ bool TestTinySet() {
return true;
}
+bool TestFastVec() {
+ FastVec a;
+ EXPECT_EQ_INT((int)a.size(), 0);
+ a.push_back(1);
+ EXPECT_EQ_INT((int)a.size(), 1);
+ a.push_back(2);
+ EXPECT_EQ_INT((int)a.size(), 2);
+ FastVec b;
+ b.push_back(8);
+ b.push_back(9);
+ b.push_back(10);
+ EXPECT_EQ_INT((int)b.size(), 3);
+ for (int i = 0; i < 100; i++) {
+ b.push_back(33);
+ }
+ EXPECT_EQ_INT((int)b.size(), 103);
+
+ int items[4] = { 50, 60, 70, 80 };
+ b.insert(b.begin() + 1, items, items + 4);
+ EXPECT_EQ_INT(b[0], 8);
+ EXPECT_EQ_INT(b[1], 50);
+ EXPECT_EQ_INT(b[2], 60);
+ EXPECT_EQ_INT(b[3], 70);
+ EXPECT_EQ_INT(b[4], 80);
+ EXPECT_EQ_INT(b[5], 9);
+
+ b.resize(2);
+ b.insert(b.end(), items, items + 4);
+ EXPECT_EQ_INT(b[0], 8);
+ EXPECT_EQ_INT(b[1], 50);
+ EXPECT_EQ_INT(b[2], 50);
+ EXPECT_EQ_INT(b[3], 60);
+ EXPECT_EQ_INT(b[4], 70);
+ EXPECT_EQ_INT(b[5], 80);
+
+
+ return true;
+}
+
bool TestVFPUSinCos() {
float sine, cosine;
// Needed for VFPU tables.
@@ -977,6 +1017,7 @@ TestItem availableTests[] = {
TEST_ITEM(ThreadManager),
TEST_ITEM(WrapText),
TEST_ITEM(TinySet),
+ TEST_ITEM(FastVec),
TEST_ITEM(SmallDataConvert),
TEST_ITEM(DepthMath),
TEST_ITEM(InputMapping),