Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Store IR instructions in a bump-allocated vector instead of loose allocations #19240

Merged
merged 7 commits into from
Jun 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 9 additions & 5 deletions Core/MIPS/ARM64/Arm64IRJit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,11 @@ static void NoBlockExits() {
_assert_msg_(false, "Never exited block, invalid IR?");
}

bool Arm64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload) {
bool Arm64JitBackend::CompileBlock(IRBlockCache *irBlockCache, int block_num, bool preload) {
if (GetSpaceLeft() < 0x800)
return false;

IRBlock *block = irBlockCache->GetBlock(block_num);
BeginWrite(std::min(GetSpaceLeft(), (size_t)block->GetNumInstructions() * 32));

u32 startPC = block->GetOriginalStart();
Expand All @@ -92,12 +93,13 @@ bool Arm64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload)
compilingBlockNum_ = block_num;
lastConstPC_ = 0;

regs_.Start(block);
regs_.Start(irBlockCache, block_num);

std::vector<const u8 *> addresses;
addresses.reserve(block->GetNumInstructions());
const IRInst *instructions = irBlockCache->GetBlockInstructionPtr(*block);
for (int i = 0; i < block->GetNumInstructions(); ++i) {
const IRInst &inst = block->GetInstructions()[i];
const IRInst &inst = instructions[i];
regs_.SetIRIndex(i);
addresses.push_back(GetCodePtr());

Expand Down Expand Up @@ -156,10 +158,11 @@ bool Arm64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload)
addressesLookup[addresses[i]] = i;

INFO_LOG(JIT, "=============== ARM64 (%08x, %d bytes) ===============", startPC, len);
const IRInst *instructions = irBlockCache->GetBlockInstructionPtr(*block);
for (const u8 *p = blockStart; p < GetCodePointer(); ) {
auto it = addressesLookup.find(p);
if (it != addressesLookup.end()) {
const IRInst &inst = block->GetInstructions()[it->second];
const IRInst &inst = instructions[it->second];

char temp[512];
DisassembleIR(temp, sizeof(temp), inst);
Expand Down Expand Up @@ -319,7 +322,8 @@ void Arm64JitBackend::ClearAllBlocks() {
EraseAllLinks(-1);
}

void Arm64JitBackend::InvalidateBlock(IRBlock *block, int block_num) {
void Arm64JitBackend::InvalidateBlock(IRBlockCache *irBlockCache, int block_num) {
IRBlock *block = irBlockCache->GetBlock(block_num);
int offset = block->GetTargetOffset();
u8 *writable = GetWritablePtrFromCodePtr(GetBasePtr()) + offset;

Expand Down
4 changes: 2 additions & 2 deletions Core/MIPS/ARM64/Arm64IRJit.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ class Arm64JitBackend : public Arm64Gen::ARM64CodeBlock, public IRNativeBackend
bool DescribeCodePtr(const u8 *ptr, std::string &name) const override;

void GenerateFixedCode(MIPSState *mipsState) override;
bool CompileBlock(IRBlock *block, int block_num, bool preload) override;
bool CompileBlock(IRBlockCache *irBlockCache, int block_num, bool preload) override;
void ClearAllBlocks() override;
void InvalidateBlock(IRBlock *block, int block_num) override;
void InvalidateBlock(IRBlockCache *irBlockCache, int block_num) override;

void UpdateFCR31(MIPSState *mipsState) override;

Expand Down
2 changes: 2 additions & 0 deletions Core/MIPS/IR/IRInst.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,8 @@ static const IRMeta irMeta[] = {
const IRMeta *metaIndex[256];

void InitIR() {
if (metaIndex[0])
return;
for (size_t i = 0; i < ARRAY_SIZE(irMeta); i++) {
metaIndex[(int)irMeta[i].op] = &irMeta[i];
}
Expand Down
119 changes: 88 additions & 31 deletions Core/MIPS/IR/IRJit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ namespace MIPSComp {

IRJit::IRJit(MIPSState *mipsState) : frontend_(mipsState->HasDefaultPrefix()), mips_(mipsState) {
// u32 size = 128 * 1024;
// blTrampolines_ = kernelMemory.Alloc(size, true, "trampoline");
InitIR();

jo.optimizeForInterpreter = true;
Expand Down Expand Up @@ -91,7 +90,7 @@ void IRJit::InvalidateCacheAt(u32 em_address, int length) {
std::vector<int> numbers = blocks_.FindInvalidatedBlockNumbers(em_address, length);
for (int block_num : numbers) {
auto block = blocks_.GetBlock(block_num);
int cookie = block->GetTargetOffset() < 0 ? block_num : block->GetTargetOffset();
int cookie = block->GetTargetOffset() < 0 ? block->GetInstructionOffset() : block->GetTargetOffset();
block->Destroy(cookie);
}
}
Expand All @@ -103,13 +102,13 @@ void IRJit::Compile(u32 em_address) {
// Look to see if we've preloaded this block.
int block_num = blocks_.FindPreloadBlock(em_address);
if (block_num != -1) {
IRBlock *b = blocks_.GetBlock(block_num);
IRBlock *block = blocks_.GetBlock(block_num);
// Okay, let's link and finalize the block now.
int cookie = b->GetTargetOffset() < 0 ? block_num : b->GetTargetOffset();
b->Finalize(cookie);
if (b->IsValid()) {
int cookie = block->GetTargetOffset() < 0 ? block->GetInstructionOffset() : block->GetTargetOffset();
block->Finalize(cookie);
if (block->IsValid()) {
// Success, we're done.
FinalizeTargetBlock(b, block_num);
FinalizeTargetBlock(&blocks_, block_num);
return;
}
}
Expand Down Expand Up @@ -139,27 +138,25 @@ bool IRJit::CompileBlock(u32 em_address, std::vector<IRInst> &instructions, u32
return preload;
}

int block_num = blocks_.AllocateBlock(em_address);
int block_num = blocks_.AllocateBlock(em_address, mipsBytes, instructions);
if ((block_num & ~MIPS_EMUHACK_VALUE_MASK) != 0) {
WARN_LOG(JIT, "Failed to allocate block for %08x (%d instructions)", em_address, (int)instructions.size());
// Out of block numbers. Caller will handle.
return false;
}

IRBlock *b = blocks_.GetBlock(block_num);
b->SetInstructions(instructions);
b->SetOriginalAddrSize(em_address, mipsBytes);
if (preload) {
// Hash, then only update page stats, don't link yet.
// TODO: Should we always hash? Then we can reuse blocks.
b->UpdateHash();
}
if (!CompileTargetBlock(b, block_num, preload))
if (!CompileTargetBlock(&blocks_, block_num, preload))
return false;
// Overwrites the first instruction, and also updates stats.
blocks_.FinalizeBlock(block_num, preload);
if (!preload)
FinalizeTargetBlock(b, block_num);

FinalizeTargetBlock(&blocks_, block_num);
return true;
}

Expand Down Expand Up @@ -257,20 +254,21 @@ void IRJit::RunLoopUntil(u64 globalticks) {
u32 inst = Memory::ReadUnchecked_U32(mips->pc);
u32 opcode = inst & 0xFF000000;
if (opcode == MIPS_EMUHACK_OPCODE) {
IRBlock *block = blocks_.GetBlockUnchecked(inst & 0xFFFFFF);

u32 offset = inst & 0x00FFFFFF; // Alternatively, inst - opcode
#ifdef IR_PROFILING
{
TimeSpan span;
mips->pc = IRInterpret(mips, block->GetInstructions());
block->profileStats_.executions += 1;
block->profileStats_.totalNanos += span.ElapsedNanos();
}
IRBlock *block = blocks_.GetBlock(blocks_.GetBlockNumFromOffset(offset));
TimeSpan span;
mips->pc = IRInterpret(mips, blocks_.GetArenaPtr() + offset);
int64_t elapsedNanos = span.ElapsedNanos();
block->profileStats_.executions += 1;
block->profileStats_.totalNanos += elapsedNanos;
#else
mips->pc = IRInterpret(mips, block->GetInstructions());
mips->pc = IRInterpret(mips, blocks_.GetArenaPtr() + offset);
#endif
// Note: this will "jump to zero" on a badly constructed block missing exits.
if (!Memory::IsValid4AlignedAddress(mips->pc)) {
int blockNum = blocks_.GetBlockNumFromOffset(offset);
IRBlock *block = blocks_.GetBlockUnchecked(blockNum);
Core_ExecException(mips->pc, block->GetOriginalStart(), ExecExceptionType::JUMP);
break;
}
Expand Down Expand Up @@ -300,11 +298,68 @@ void IRJit::UnlinkBlock(u8 *checkedEntry, u32 originalAddress) {

void IRBlockCache::Clear() {
for (int i = 0; i < (int)blocks_.size(); ++i) {
int cookie = blocks_[i].GetTargetOffset() < 0 ? i : blocks_[i].GetTargetOffset();
int cookie = blocks_[i].GetTargetOffset() < 0 ? blocks_[i].GetInstructionOffset() : blocks_[i].GetTargetOffset();
blocks_[i].Destroy(cookie);
}
blocks_.clear();
byPage_.clear();
arena_.clear();
arena_.shrink_to_fit();
}

IRBlockCache::IRBlockCache() {
// For whatever reason, this makes things go slower?? Probably just a CPU cache alignment fluke.
// arena_.reserve(1024 * 1024 * 2);
}

int IRBlockCache::AllocateBlock(int emAddr, u32 origSize, const std::vector<IRInst> &inst) {
// We have 24 bits to represent offsets with.
const u32 MAX_ARENA_SIZE = 0x1000000 - 1;
int offset = (int)arena_.size();
if (offset >= MAX_ARENA_SIZE) {
WARN_LOG(JIT, "Filled JIT arena, restarting");
return -1;
}
for (int i = 0; i < inst.size(); i++) {
arena_.push_back(inst[i]);
}
blocks_.push_back(IRBlock(emAddr, origSize, offset, (u16)inst.size()));
return (int)blocks_.size() - 1;
}

int IRBlockCache::GetBlockNumFromOffset(int offset) const {
// Block offsets are always in rising order (we don't go back and replace them when invalidated). So we can binary search.
int low = 0;
int high = (int)blocks_.size() - 1;
int found = -1;
while (low <= high) {
int mid = low + (high - low) / 2;
const int blockOffset = blocks_[mid].GetInstructionOffset();
if (blockOffset == offset) {
found = mid;
break;
}
if (blockOffset < offset) {
low = mid + 1;
} else {
high = mid - 1;
}
}

#ifndef _DEBUG
// Then, in debug builds, cross check the result.
return found;
#else
// TODO: Optimize if we need to call this often.
for (int i = 0; i < (int)blocks_.size(); i++) {
if (blocks_[i].GetInstructionOffset() == offset) {
_dbg_assert_(i == found);
return i;
}
}
#endif
_dbg_assert_(found == -1);
return -1;
}

std::vector<int> IRBlockCache::FindInvalidatedBlockNumbers(u32 address, u32 length) {
Expand All @@ -331,7 +386,7 @@ std::vector<int> IRBlockCache::FindInvalidatedBlockNumbers(u32 address, u32 leng

void IRBlockCache::FinalizeBlock(int i, bool preload) {
if (!preload) {
int cookie = blocks_[i].GetTargetOffset() < 0 ? i : blocks_[i].GetTargetOffset();
int cookie = blocks_[i].GetTargetOffset() < 0 ? blocks_[i].GetInstructionOffset() : blocks_[i].GetTargetOffset();
blocks_[i].Finalize(cookie);
}

Expand Down Expand Up @@ -372,16 +427,18 @@ int IRBlockCache::FindPreloadBlock(u32 em_address) {
int IRBlockCache::FindByCookie(int cookie) {
if (blocks_.empty())
return -1;

// TODO: Maybe a flag to determine target offset mode?
if (blocks_[0].GetTargetOffset() < 0)
return cookie;
return GetBlockNumFromOffset(cookie);

// TODO: Now that we are using offsets in pure IR mode too, we can probably unify
// the two paradigms. Or actually no, we still need two offsets..
for (int i = 0; i < GetNumBlocks(); ++i) {
int offset = blocks_[i].GetTargetOffset();
if (offset == cookie)
return i;
}

return -1;
}

Expand All @@ -391,7 +448,7 @@ std::vector<u32> IRBlockCache::SaveAndClearEmuHackOps() {

for (int number = 0; number < (int)blocks_.size(); ++number) {
IRBlock &b = blocks_[number];
int cookie = b.GetTargetOffset() < 0 ? number : b.GetTargetOffset();
int cookie = b.GetTargetOffset() < 0 ? b.GetInstructionOffset() : b.GetTargetOffset();
if (b.IsValid() && b.RestoreOriginalFirstOp(cookie)) {
result[number] = number;
} else {
Expand All @@ -412,7 +469,7 @@ void IRBlockCache::RestoreSavedEmuHackOps(const std::vector<u32> &saved) {
IRBlock &b = blocks_[number];
// Only if we restored it, write it back.
if (b.IsValid() && saved[number] != 0 && b.HasOriginalFirstOp()) {
int cookie = b.GetTargetOffset() < 0 ? number : b.GetTargetOffset();
int cookie = b.GetTargetOffset() < 0 ? b.GetInstructionOffset() : b.GetTargetOffset();
b.Finalize(cookie);
}
}
Expand All @@ -434,8 +491,9 @@ JitBlockDebugInfo IRBlockCache::GetBlockDebugInfo(int blockNum) const {
}

debugInfo.irDisasm.reserve(ir.GetNumInstructions());
const IRInst *instructions = GetBlockInstructionPtr(ir);
for (int i = 0; i < ir.GetNumInstructions(); i++) {
IRInst inst = ir.GetInstructions()[i];
IRInst inst = instructions[i];
char buffer[256];
DisassembleIR(buffer, sizeof(buffer), inst);
debugInfo.irDisasm.push_back(buffer);
Expand All @@ -448,10 +506,9 @@ void IRBlockCache::ComputeStats(BlockCacheStats &bcStats) const {
double maxBloat = 0.0;
double minBloat = 1000000000.0;
for (const auto &b : blocks_) {
double codeSize = (double)b.GetNumInstructions() * sizeof(IRInst);
double codeSize = (double)b.GetNumInstructions() * 4; // We count bloat in instructions, not bytes. sizeof(IRInst);
if (codeSize == 0)
continue;

u32 origAddr, mipsBytes;
b.GetRange(origAddr, mipsBytes);
double origSize = (double)mipsBytes;
Expand Down
Loading
Loading