Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix the semantics of DenseHashMap to be consistent even when inserting nulls #18129

Merged
merged 2 commits into from
Sep 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 38 additions & 15 deletions Common/Data/Collections/Hashmaps.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,31 +29,52 @@ enum class BucketState : uint8_t {
// we always use very small values, so it's probably better to have them in the same
// cache-line as the corresponding key.
// Enforces that value are pointers to make sure that combined storage makes sense.
template <class Key, class Value, Value NullValue>
template <class Key, class Value>
class DenseHashMap {
public:
DenseHashMap(int initialCapacity) : capacity_(initialCapacity) {
map.resize(initialCapacity);
state.resize(initialCapacity);
}

// Returns nullptr if no entry was found.
Value Get(const Key &key) {
// Returns true if the entry was found, and writes the entry to *value.
// Returns false and does not write to value if no entry was found.
// Note that nulls can be stored.
bool Get(const Key &key, Value *value) const {
uint32_t mask = capacity_ - 1;
uint32_t pos = HashKey(key) & mask;
// No? Let's go into search mode. Linear probing.
uint32_t p = pos;
while (true) {
if (state[p] == BucketState::TAKEN && KeyEquals(key, map[p].key))
return map[p].value;
else if (state[p] == BucketState::FREE)
return NullValue;
if (state[p] == BucketState::TAKEN && KeyEquals(key, map[p].key)) {
*value = map[p].value;
return true;
} else if (state[p] == BucketState::FREE) {
return false;
}
p = (p + 1) & mask; // If the state is REMOVED, we just keep on walking.
if (p == pos) {
// We looped around the whole map.
_assert_msg_(false, "DenseHashMap: Hit full on Get()");
}
}
return NullValue;
return false;
}

// Only works if Value can be nullptr
Value GetOrNull(const Key &key) const {
Value value;
if (Get(key, &value)) {
return value;
} else {
return (Value)nullptr;
}
}

bool ContainsKey(const Key &key) const {
// Slightly wasteful.
Value value;
return Get(key, &value);
}

// Asserts if we already had the key!
Expand Down Expand Up @@ -190,7 +211,7 @@ class DenseHashMap {

// Like the above, uses linear probing for cache-friendliness.
// Does not perform hashing at all so expects well-distributed keys.
template <class Value, Value NullValue>
template <class Value>
class PrehashMap {
public:
PrehashMap(int initialCapacity) : capacity_(initialCapacity) {
Expand All @@ -199,22 +220,24 @@ class PrehashMap {
}

// Returns nullptr if no entry was found.
Value Get(uint32_t hash) {
bool Get(uint32_t hash, Value *value) {
uint32_t mask = capacity_ - 1;
uint32_t pos = hash & mask;
// No? Let's go into search mode. Linear probing.
uint32_t p = pos;
while (true) {
if (state[p] == BucketState::TAKEN && hash == map[p].hash)
return map[p].value;
else if (state[p] == BucketState::FREE)
return NullValue;
if (state[p] == BucketState::TAKEN && hash == map[p].hash) {
*value = map[p].value;
return true;
} else if (state[p] == BucketState::FREE) {
return false;
}
p = (p + 1) & mask; // If the state is REMOVED, we just keep on walking.
if (p == pos) {
_assert_msg_(false, "PrehashMap: Hit full on Get()");
}
}
return NullValue;
return false;
}

// Returns false if we already had the key! Which is a bit different.
Expand Down
4 changes: 2 additions & 2 deletions Common/GPU/Vulkan/VulkanFrameData.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,13 +99,13 @@ struct FrameData {

// Frames need unique IDs to wait for present on, let's keep them here.
// Also used for indexing into the frame timing history buffer.
uint64_t frameId;
uint64_t frameId = 0;

// Profiling.
QueueProfileContext profile{};

// Async readback cache.
DenseHashMap<ReadbackKey, CachedReadback*, nullptr> readbacks_;
DenseHashMap<ReadbackKey, CachedReadback *> readbacks_;

FrameData() : readbacks_(8) {}

Expand Down
11 changes: 5 additions & 6 deletions Common/GPU/Vulkan/VulkanQueueRunner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -251,8 +251,8 @@ void VulkanQueueRunner::DestroyBackBuffers() {
// Self-dependency: https://github.com/gpuweb/gpuweb/issues/442#issuecomment-547604827
// Also see https://www.khronos.org/registry/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-pipeline-barriers-subpass-self-dependencies
VKRRenderPass *VulkanQueueRunner::GetRenderPass(const RPKey &key) {
auto foundPass = renderPasses_.Get(key);
if (foundPass) {
VKRRenderPass *foundPass;
if (renderPasses_.Get(key, &foundPass)) {
return foundPass;
}

Expand Down Expand Up @@ -1984,8 +1984,7 @@ void VulkanQueueRunner::PerformReadback(const VKRStep &step, VkCommandBuffer cmd
key.height = step.readback.srcRect.extent.height;

// See if there's already a buffer we can reuse
cached = frameData.readbacks_.Get(key);
if (!cached) {
if (!frameData.readbacks_.Get(key, &cached)) {
cached = new CachedReadback();
cached->bufferSize = 0;
frameData.readbacks_.Insert(key, cached);
Expand Down Expand Up @@ -2065,8 +2064,8 @@ bool VulkanQueueRunner::CopyReadbackBuffer(FrameData &frameData, VKRFramebuffer
key.framebuf = src;
key.width = width;
key.height = height;
CachedReadback *cached = frameData.readbacks_.Get(key);
if (cached) {
CachedReadback *cached;
if (frameData.readbacks_.Get(key, &cached)) {
readback = cached;
} else {
// Didn't have a cached image ready yet
Expand Down
2 changes: 1 addition & 1 deletion Common/GPU/Vulkan/VulkanQueueRunner.h
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ class VulkanQueueRunner {

// Renderpasses, all combinations of preserving or clearing or dont-care-ing fb contents.
// Each VKRRenderPass contains all compatibility classes (which attachments they have, etc).
DenseHashMap<RPKey, VKRRenderPass *, nullptr> renderPasses_;
DenseHashMap<RPKey, VKRRenderPass *> renderPasses_;

// Readback buffer. Currently we only support synchronous readback, so we only really need one.
// We size it generously.
Expand Down
12 changes: 8 additions & 4 deletions GPU/Common/DrawEngineCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ void DrawEngineCommon::Init() {
}

VertexDecoder *DrawEngineCommon::GetVertexDecoder(u32 vtype) {
VertexDecoder *dec = decoderMap_.Get(vtype);
if (dec)
VertexDecoder *dec;
if (decoderMap_.Get(vtype, &dec))
return dec;
dec = new VertexDecoder();
_assert_(dec);
Expand Down Expand Up @@ -132,8 +132,12 @@ std::vector<std::string> DrawEngineCommon::DebugGetVertexLoaderIDs() {
std::string DrawEngineCommon::DebugGetVertexLoaderString(std::string id, DebugShaderStringType stringType) {
u32 mapId;
memcpy(&mapId, &id[0], sizeof(mapId));
VertexDecoder *dec = decoderMap_.Get(mapId);
return dec ? dec->GetString(stringType) : "N/A";
VertexDecoder *dec;
if (decoderMap_.Get(mapId, &dec)) {
return dec->GetString(stringType);
} else {
return "N/A";
}
}

static Vec3f ClipToScreen(const Vec4f& coords) {
Expand Down
2 changes: 1 addition & 1 deletion GPU/Common/DrawEngineCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ class DrawEngineCommon {

// Cached vertex decoders
u32 lastVType_ = -1; // corresponds to dec_. Could really just pick it out of dec_...
DenseHashMap<u32, VertexDecoder *, nullptr> decoderMap_;
DenseHashMap<u32, VertexDecoder *> decoderMap_;
VertexDecoder *dec_ = nullptr;
VertexDecoderJitCache *decJitCache_ = nullptr;
VertexDecoderOptions decOptions_{};
Expand Down
12 changes: 6 additions & 6 deletions GPU/D3D11/DrawEngineD3D11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,8 +199,8 @@ ID3D11InputLayout *DrawEngineD3D11::SetupDecFmtForDraw(D3D11VertexShader *vshade
// TODO: Instead of one for each vshader, we can reduce it to one for each type of shader
// that reads TEXCOORD or not, etc. Not sure if worth it.
const InputLayoutKey key{ vshader, decFmt.id };
ID3D11InputLayout *inputLayout = inputLayoutMap_.Get(key);
if (inputLayout) {
ID3D11InputLayout *inputLayout;
if (inputLayoutMap_.Get(key, &inputLayout)) {
return inputLayout;
} else {
D3D11_INPUT_ELEMENT_DESC VertexElements[8];
Expand Down Expand Up @@ -368,8 +368,8 @@ void DrawEngineD3D11::DoFlush() {
// getUVGenMode can have an effect on which UV decoder we need to use! And hence what the decoded data will look like. See #9263
u32 dcid = (u32)XXH3_64bits(&drawCalls_, sizeof(DeferredDrawCall) * numDrawCalls_) ^ gstate.getUVGenMode();

VertexArrayInfoD3D11 *vai = vai_.Get(dcid);
if (!vai) {
VertexArrayInfoD3D11 *vai;
if (!vai_.Get(dcid, &vai)) {
vai = new VertexArrayInfoD3D11();
vai_.Insert(dcid, vai);
}
Expand Down Expand Up @@ -663,8 +663,8 @@ void DrawEngineD3D11::DoFlush() {
// We really do need a vertex layout for each vertex shader (or at least check its ID bits for what inputs it uses)!
// Some vertex shaders ignore one of the inputs, and then the layout created from it will lack it, which will be a problem for others.
InputLayoutKey key{ vshader, 0xFFFFFFFF }; // Let's use 0xFFFFFFFF to signify TransformedVertex
ID3D11InputLayout *layout = inputLayoutMap_.Get(key);
if (!layout) {
ID3D11InputLayout *layout;
if (!inputLayoutMap_.Get(key, &layout)) {
ASSERT_SUCCESS(device_->CreateInputLayout(TransformedVertexElements, ARRAY_SIZE(TransformedVertexElements), vshader->bytecode().data(), vshader->bytecode().size(), &layout));
inputLayoutMap_.Insert(key, layout);
}
Expand Down
12 changes: 6 additions & 6 deletions GPU/D3D11/DrawEngineD3D11.h
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ class DrawEngineD3D11 : public DrawEngineCommon {
ID3D11DeviceContext *context_;
ID3D11DeviceContext1 *context1_;

PrehashMap<VertexArrayInfoD3D11 *, nullptr> vai_;
PrehashMap<VertexArrayInfoD3D11 *> vai_;

struct InputLayoutKey {
D3D11VertexShader *vshader;
Expand All @@ -193,7 +193,7 @@ class DrawEngineD3D11 : public DrawEngineCommon {
}
};

DenseHashMap<InputLayoutKey, ID3D11InputLayout *, nullptr> inputLayoutMap_;
DenseHashMap<InputLayoutKey, ID3D11InputLayout *> inputLayoutMap_;

// Other
ShaderManagerD3D11 *shaderManager_ = nullptr;
Expand All @@ -205,10 +205,10 @@ class DrawEngineD3D11 : public DrawEngineCommon {
PushBufferD3D11 *pushInds_;

// D3D11 state object caches.
DenseHashMap<uint64_t, ID3D11BlendState *, nullptr> blendCache_;
DenseHashMap<uint64_t, ID3D11BlendState1 *, nullptr> blendCache1_;
DenseHashMap<uint64_t, ID3D11DepthStencilState *, nullptr> depthStencilCache_;
DenseHashMap<uint32_t, ID3D11RasterizerState *, nullptr> rasterCache_;
DenseHashMap<uint64_t, ID3D11BlendState *> blendCache_;
DenseHashMap<uint64_t, ID3D11BlendState1 *> blendCache1_;
DenseHashMap<uint64_t, ID3D11DepthStencilState *> depthStencilCache_;
DenseHashMap<uint32_t, ID3D11RasterizerState *> rasterCache_;

// Keep the depth state between ApplyDrawState and ApplyDrawStateLate
ID3D11RasterizerState *rasterState_ = nullptr;
Expand Down
16 changes: 8 additions & 8 deletions GPU/D3D11/StateMappingD3D11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -356,8 +356,8 @@ void DrawEngineD3D11::ApplyDrawState(int prim) {
// There might have been interactions between depth and blend above.
if (gstate_c.IsDirty(DIRTY_BLEND_STATE)) {
if (!device1_) {
ID3D11BlendState *bs = blendCache_.Get(keys_.blend.value);
if (bs == nullptr) {
ID3D11BlendState *bs = nullptr;
if (!blendCache_.Get(keys_.blend.value, &bs) || !bs) {
D3D11_BLEND_DESC desc{};
D3D11_RENDER_TARGET_BLEND_DESC &rt = desc.RenderTarget[0];
rt.BlendEnable = keys_.blend.blendEnable;
Expand All @@ -373,8 +373,8 @@ void DrawEngineD3D11::ApplyDrawState(int prim) {
}
blendState_ = bs;
} else {
ID3D11BlendState1 *bs1 = blendCache1_.Get(keys_.blend.value);
if (bs1 == nullptr) {
ID3D11BlendState1 *bs1 = nullptr;
if (!blendCache1_.Get(keys_.blend.value, &bs1) || !bs1) {
D3D11_BLEND_DESC1 desc1{};
D3D11_RENDER_TARGET_BLEND_DESC1 &rt = desc1.RenderTarget[0];
rt.BlendEnable = keys_.blend.blendEnable;
Expand All @@ -395,8 +395,8 @@ void DrawEngineD3D11::ApplyDrawState(int prim) {
}

if (gstate_c.IsDirty(DIRTY_RASTER_STATE)) {
ID3D11RasterizerState *rs = rasterCache_.Get(keys_.raster.value);
if (rs == nullptr) {
ID3D11RasterizerState *rs;
if (!rasterCache_.Get(keys_.raster.value, &rs) || !rs) {
D3D11_RASTERIZER_DESC desc{};
desc.CullMode = (D3D11_CULL_MODE)(keys_.raster.cullMode);
desc.FillMode = D3D11_FILL_SOLID;
Expand All @@ -410,8 +410,8 @@ void DrawEngineD3D11::ApplyDrawState(int prim) {
}

if (gstate_c.IsDirty(DIRTY_DEPTHSTENCIL_STATE)) {
ID3D11DepthStencilState *ds = depthStencilCache_.Get(keys_.depthStencil.value);
if (ds == nullptr) {
ID3D11DepthStencilState *ds;
if (!depthStencilCache_.Get(keys_.depthStencil.value, &ds) || !ds) {
D3D11_DEPTH_STENCIL_DESC desc{};
desc.DepthEnable = keys_.depthStencil.depthTestEnable;
desc.DepthWriteMask = keys_.depthStencil.depthWriteEnable ? D3D11_DEPTH_WRITE_MASK_ALL : D3D11_DEPTH_WRITE_MASK_ZERO;
Expand Down
9 changes: 4 additions & 5 deletions GPU/Directx9/DrawEngineDX9.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,9 +162,8 @@ static void VertexAttribSetup(D3DVERTEXELEMENT9 * VertexElement, u8 fmt, u8 offs
}

IDirect3DVertexDeclaration9 *DrawEngineDX9::SetupDecFmtForDraw(const DecVtxFormat &decFmt, u32 pspFmt) {
IDirect3DVertexDeclaration9 *vertexDeclCached = vertexDeclMap_.Get(pspFmt);

if (vertexDeclCached) {
IDirect3DVertexDeclaration9 *vertexDeclCached;
if (vertexDeclMap_.Get(pspFmt, &vertexDeclCached)) {
return vertexDeclCached;
} else {
D3DVERTEXELEMENT9 VertexElements[8];
Expand Down Expand Up @@ -347,8 +346,8 @@ void DrawEngineDX9::DoFlush() {
if (useCache) {
// getUVGenMode can have an effect on which UV decoder we need to use! And hence what the decoded data will look like. See #9263
u32 dcid = (u32)XXH3_64bits(&drawCalls_, sizeof(DeferredDrawCall) * numDrawCalls_) ^ gstate.getUVGenMode();
VertexArrayInfoDX9 *vai = vai_.Get(dcid);
if (!vai) {
VertexArrayInfoDX9 *vai;
if (!vai_.Get(dcid, &vai)) {
vai = new VertexArrayInfoDX9();
vai_.Insert(dcid, vai);
}
Expand Down
4 changes: 2 additions & 2 deletions GPU/Directx9/DrawEngineDX9.h
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,8 @@ class DrawEngineDX9 : public DrawEngineCommon {
LPDIRECT3DDEVICE9 device_ = nullptr;
Draw::DrawContext *draw_;

PrehashMap<VertexArrayInfoDX9 *, nullptr> vai_;
DenseHashMap<u32, IDirect3DVertexDeclaration9 *, nullptr> vertexDeclMap_;
PrehashMap<VertexArrayInfoDX9 *> vai_;
DenseHashMap<u32, IDirect3DVertexDeclaration9 *> vertexDeclMap_;

// SimpleVertex
IDirect3DVertexDeclaration9* transformedVertexDecl_ = nullptr;
Expand Down
4 changes: 2 additions & 2 deletions GPU/GLES/DrawEngineGLES.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -206,8 +206,8 @@ static inline void VertexAttribSetup(int attrib, int fmt, int stride, int offset
// TODO: Use VBO and get rid of the vertexData pointers - with that, we will supply only offsets
GLRInputLayout *DrawEngineGLES::SetupDecFmtForDraw(const DecVtxFormat &decFmt) {
uint32_t key = decFmt.id;
GLRInputLayout *inputLayout = inputLayoutMap_.Get(key);
if (inputLayout) {
GLRInputLayout *inputLayout;
if (inputLayoutMap_.Get(key, &inputLayout)) {
return inputLayout;
}

Expand Down
2 changes: 1 addition & 1 deletion GPU/GLES/DrawEngineGLES.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ class DrawEngineGLES : public DrawEngineCommon {
};
FrameData frameData_[GLRenderManager::MAX_INFLIGHT_FRAMES];

DenseHashMap<uint32_t, GLRInputLayout *, nullptr> inputLayoutMap_;
DenseHashMap<uint32_t, GLRInputLayout *> inputLayoutMap_;

GLRInputLayout *softwareInputLayout_ = nullptr;
GLRenderManager *render_;
Expand Down
Loading