-
Notifications
You must be signed in to change notification settings - Fork 2.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Reduce zero initialization and copying overhead of render commands #17471
Merged
Merged
Changes from all commits
Commits
Show all changes
10 commits
Select commit
Hold shift + click to select a range
e4a729d
Remove unnecessary zero-initialization of GLRRenderCommand structs
hrydgard c30895b
Remove unnecessary variable
hrydgard 78eaa8c
Make sure we never copy GLRRenderThreadTask objects
hrydgard 956d784
Add FastVec, start using it for InitSteps
hrydgard 558e29a
Switch to the copy-free method of initializing initsteps
hrydgard 47931de
Switch to FastVec for commands. Slower than std::vector!
hrydgard 3790638
Use the faster pushes.
hrydgard ab34d20
Add more methods to FastVec
hrydgard 0b9dfac
Make sure VKRRenderThreadTask isn't copied.
hrydgard 234c1f0
Apply the same optimizations to the Vulkan backend. Smaller effect th…
hrydgard File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,137 @@ | ||
#pragma once | ||
|
||
// Yet another replacement for std::vector, this time for use in graphics queues. | ||
// Its major difference is that you can append uninitialized structures and initialize them after. | ||
// This is not allows by std::vector but is very useful for our sometimes oversized unions. | ||
// Also, copies during resize are done by memcpy, not by any move constructor or similar. | ||
|
||
#include <cstdlib> | ||
#include <cstring> | ||
|
||
template<class T> | ||
class FastVec { | ||
public: | ||
FastVec() {} | ||
FastVec(size_t initialCapacity) { | ||
capacity_ = initialCapacity; | ||
data_ = (T *)malloc(initialCapacity * sizeof(T)); | ||
} | ||
~FastVec() { if (data_) free(data_); } | ||
|
||
T &push_uninitialized() { | ||
if (size_ < capacity_) { | ||
size_++; | ||
return data_[size_ - 1]; | ||
} else { | ||
ExtendByOne(); | ||
return data_[size_ - 1]; | ||
} | ||
} | ||
|
||
void push_back(const T &t) { | ||
T &dest = push_uninitialized(); | ||
dest = t; | ||
} | ||
|
||
// Move constructor | ||
FastVec(FastVec &&other) { | ||
data_ = other.data_; | ||
size_ = other.size_; | ||
capacity_ = other.capacity_; | ||
other.data_ = nullptr; | ||
other.size_ = 0; | ||
other.capacity_ = 0; | ||
} | ||
|
||
FastVec &operator=(FastVec &&other) { | ||
if (this != &other) { | ||
delete[] data_; | ||
data_ = other.data_; | ||
size_ = other.size_; | ||
capacity_ = other.capacity_; | ||
other.data_ = nullptr; | ||
other.size_ = 0; | ||
other.capacity_ = 0; | ||
} | ||
return *this; | ||
} | ||
|
||
// No copy constructor. | ||
FastVec(const FastVec &other) = delete; | ||
FastVec &operator=(const FastVec &other) = delete; | ||
|
||
size_t size() const { return size_; } | ||
size_t capacity() const { return capacity_; } | ||
void clear() { size_ = 0; } | ||
bool empty() const { return size_ == 0; } | ||
|
||
T *begin() { return data_; } | ||
T *end() { return data_ + size_; } | ||
const T *begin() const { return data_; } | ||
const T *end() const { return data_ + size_; } | ||
|
||
// Out of bounds (past size() - 1) is undefined behavior. | ||
T &operator[] (const size_t index) { return data_[index]; } | ||
const T &operator[] (const size_t index) const { return data_[index]; } | ||
T &at(const size_t index) { return data_[index]; } | ||
const T &at(const size_t index) const { return data_[index]; } | ||
|
||
// These two are invalid if empty(). | ||
const T &back() const { return (*this)[size() - 1]; } | ||
const T &front() const { return (*this)[0]; } | ||
|
||
// Limited functionality for inserts and similar, add as needed. | ||
T &insert(T *iter) { | ||
int pos = iter - data_; | ||
ExtendByOne(); | ||
if (pos + 1 < size_) { | ||
memmove(data_ + pos + 1, data_ + pos, (size_ - pos) * sizeof(T)); | ||
} | ||
return data_[pos]; | ||
} | ||
|
||
void insert(T *destIter, const T *beginIter, const T *endIter) { | ||
int pos = destIter - data_; | ||
if (beginIter == endIter) | ||
return; | ||
size_t newItems = endIter - beginIter; | ||
IncreaseCapacityTo(size_ + newItems); | ||
memmove(data_ + pos + newItems, data_ + pos, (size_ - pos) * sizeof(T)); | ||
memcpy(data_ + pos, beginIter, newItems * sizeof(T)); | ||
size_ += newItems; | ||
} | ||
|
||
void resize(size_t size) { | ||
if (size < size_) { | ||
size_ = size; | ||
} else { | ||
// TODO | ||
} | ||
} | ||
|
||
private: | ||
void IncreaseCapacityTo(size_t newCapacity) { | ||
if (newCapacity <= capacity_) | ||
return; | ||
T *oldData = data_; | ||
data_ = (T *)malloc(sizeof(T) * newCapacity); | ||
if (capacity_ != 0) { | ||
memcpy(data_, oldData, sizeof(T) * size_); | ||
free(oldData); | ||
} | ||
} | ||
|
||
void ExtendByOne() { | ||
size_t newCapacity = capacity_ * 2; | ||
if (newCapacity < 16) { | ||
newCapacity = 16; | ||
} | ||
IncreaseCapacityTo(newCapacity); | ||
size_++; | ||
capacity_ = newCapacity; | ||
} | ||
|
||
size_t size_ = 0; | ||
size_t capacity_ = 0; | ||
T *data_ = nullptr; | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe this should have an assert or something? I don't think anything's using this right now, though.
I do wonder if there might've been a way to coax the compiler to do less copying with enough move/emplace, but this is probably safer. I assume it's also faster in debug anyway...
-[Unknown]
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I actually tried a bunch of things, but when looking at the disassembly, still lots of zeroing and copying. So gave up and did it this way. And yes, it helps debug performance indeed.