Skip to content

Commit

Permalink
ir_passes: Integrate DS barriers in block
Browse files Browse the repository at this point in the history
  • Loading branch information
raphaelthegreat committed Jan 2, 2025
1 parent c977011 commit 827ea02
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 26 deletions.
6 changes: 0 additions & 6 deletions src/shader_recompiler/frontend/translate/data_share.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,6 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool strid
addr, ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0)));
ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr0);
}
emit_ds_read_barrier = true;
}

void Translator::DS_SWIZZLE_B32(const GcnInst& inst) {
Expand All @@ -222,11 +221,6 @@ void Translator::DS_SWIZZLE_B32(const GcnInst& inst) {

void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride64,
const GcnInst& inst) {
if (emit_ds_read_barrier && profile.needs_lds_barriers) {
ir.Barrier();
emit_ds_read_barrier = false;
}

const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))};
IR::VectorReg dst_reg{inst.dst[0].code};
if (is_pair) {
Expand Down
1 change: 0 additions & 1 deletion src/shader_recompiler/frontend/translate/translate.h
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,6 @@ class Translator {
const RuntimeInfo& runtime_info;
const Profile& profile;
bool opcode_missing = false;
bool emit_ds_read_barrier = false;
};

void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_list, Info& info,
Expand Down
71 changes: 52 additions & 19 deletions src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,54 @@

namespace Shader::Optimization {

static void EmitBarrierInBlock(IR::Block* block) {
// This is inteded to insert a barrier when shared memory write and read
// occur in the same basic block. Also checks if branch depth is zero as
// we don't want to insert barrier in potentially divergent code.
bool emit_barrier_on_write = false;
bool emit_barrier_on_read = false;
const auto emit_barrier = [block](bool& emit_cond, IR::Inst& inst) {
if (emit_cond) {
IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
ir.Barrier();
emit_cond = false;
}
};
for (IR::Inst& inst : block->Instructions()) {
if (inst.GetOpcode() == IR::Opcode::LoadSharedU32 ||
inst.GetOpcode() == IR::Opcode::LoadSharedU64) {
emit_barrier(emit_barrier_on_read, inst);
emit_barrier_on_write = true;
}
if (inst.GetOpcode() == IR::Opcode::WriteSharedU32 ||
inst.GetOpcode() == IR::Opcode::WriteSharedU64) {
emit_barrier(emit_barrier_on_write, inst);
emit_barrier_on_read = true;
}
}
}

static void EmitBarrierInMergeBlock(const IR::AbstractSyntaxNode::Data& data) {
// Insert a barrier after divergent conditional blocks.
// This avoids potential softlocks and crashes when some threads
// initialize shared memory and others read from it.
const IR::U1 cond = data.if_node.cond;
const auto insert_barrier =
IR::BreadthFirstSearch(cond, [](IR::Inst* inst) -> std::optional<bool> {
if (inst->GetOpcode() == IR::Opcode::GetAttributeU32 &&
inst->Arg(0).Attribute() == IR::Attribute::LocalInvocationId) {
return true;
}
return std::nullopt;
});
if (insert_barrier) {
IR::Block* const merge = data.if_node.merge;
auto insert_point = std::ranges::find_if_not(merge->Instructions(), IR::IsPhi);
IR::IREmitter ir{*merge, insert_point};
ir.Barrier();
}
}

void SharedMemoryBarrierPass(IR::Program& program, const Profile& profile) {
if (!program.info.uses_shared || !profile.needs_lds_barriers) {
return;
Expand All @@ -19,27 +67,12 @@ void SharedMemoryBarrierPass(IR::Program& program, const Profile& profile) {
--branch_depth;
continue;
}
if (node.type != Type::If) {
if (node.type == Type::If && branch_depth++ == 0) {
EmitBarrierInMergeBlock(node.data);
continue;
}
u32 curr_depth = branch_depth++;
if (curr_depth != 0) {
continue;
}
const IR::U1 cond = node.data.if_node.cond;
const auto insert_barrier =
IR::BreadthFirstSearch(cond, [](IR::Inst* inst) -> std::optional<bool> {
if (inst->GetOpcode() == IR::Opcode::GetAttributeU32 &&
inst->Arg(0).Attribute() == IR::Attribute::LocalInvocationId) {
return true;
}
return std::nullopt;
});
if (insert_barrier) {
IR::Block* const merge = node.data.if_node.merge;
auto insert_point = std::ranges::find_if_not(merge->Instructions(), IR::IsPhi);
IR::IREmitter ir{*merge, insert_point};
ir.Barrier();
if (node.type == Type::Block && branch_depth == 0) {
EmitBarrierInBlock(node.data.block);
}
}
}
Expand Down

0 comments on commit 827ea02

Please sign in to comment.