diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index aa3fbefad7003..e24b7b54a5b56 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -664,7 +664,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */ #ifdef DEBUG - void genIPmappingDisp(unsigned mappingNum, IPmappingDsc* ipMapping); + void genIPmappingDisp(unsigned mappingNum, const IPmappingDsc* ipMapping); void genIPmappingListDisp(); #endif // DEBUG diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 1be119806114c..ebca3137aacac 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -7042,7 +7042,7 @@ const char* CodeGen::siStackVarName(size_t offs, size_t size, unsigned reg, unsi * Display a IPmappingDsc. Pass -1 as mappingNum to not display a mapping number. */ -void CodeGen::genIPmappingDisp(unsigned mappingNum, IPmappingDsc* ipMapping) +void CodeGen::genIPmappingDisp(unsigned mappingNum, const IPmappingDsc* ipMapping) { if (mappingNum != unsigned(-1)) { diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index 8c5adef145de9..013c141ad872e 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -33,6 +33,22 @@ void emitLocation::CaptureLocation(emitter* emit) assert(Valid()); } +void emitLocation::SetLocation(insGroup* _ig, unsigned _codePos) +{ + ig = _ig; + codePos = _codePos; + + assert(Valid()); +} + +void emitLocation::SetLocation(emitLocation newLocation) +{ + ig = newLocation.ig; + codePos = newLocation.codePos; + + assert(Valid()); +} + bool emitLocation::IsCurrentLocation(emitter* emit) const { assert(Valid()); @@ -50,6 +66,11 @@ int emitLocation::GetInsNum() const return emitGetInsNumFromCodePos(codePos); } +int emitLocation::GetInsOffset() const +{ + return emitGetInsOfsFromCodePos(codePos); +} + // Get the instruction offset in the current instruction group, which must be a funclet prolog group. // This is used to find an instruction offset used in unwind data. // TODO-AMD64-Bug?: We only support a single main function prolog group, but allow for multiple funclet prolog @@ -798,6 +819,7 @@ insGroup* emitter::emitSavIG(bool emitAdd) assert((ig->igFlags & IGF_PLACEHOLDER) == 0); ig->igData = id; + INDEBUG(ig->igDataSize = gs;) memcpy(id, emitCurIGfreeBase, sz); @@ -8724,6 +8746,14 @@ UNATIVE_OFFSET emitter::emitCodeOffset(void* blockPtr, unsigned codePos) { of = ig->igSize; } +#ifdef TARGET_ARM64 + else if ((ig->igFlags & IGF_HAS_REMOVED_INSTR) != 0 && no == ig->igInsCnt + 1U) + { + // This can happen if a instruction was replaced, but the replacement couldn't fit into + // the same IG and instead was place in a new IG. + return ig->igNext->igOffs + emitFindOffset(ig->igNext, 1); + } +#endif else if (ig->igFlags & IGF_UPD_ISZ) { /* @@ -8742,7 +8772,6 @@ UNATIVE_OFFSET emitter::emitCodeOffset(void* blockPtr, unsigned codePos) // printf("[IG=%02u;ID=%03u;OF=%04X] <= %08X\n", ig->igNum, emitGetInsNumFromCodePos(codePos), of, codePos); /* Make sure the offset estimate is accurate */ - assert(of == emitFindOffset(ig, emitGetInsNumFromCodePos(codePos))); } @@ -9198,6 +9227,66 @@ void emitter::emitNxtIG(bool extend) #endif } +//------------------------------------------------------------------------ +// emitRemoveLastInstruction: Remove the last instruction emitted; it has been optimized away by the +// next instruction we are generating. `emitLastIns` must be non-null, meaning there is a +// previous instruction. The previous instruction might have already been saved, or it might +// be in the currently accumulating insGroup buffer. +// +// The `emitLastIns` is set to nullptr after this function. It is expected that a new instruction +// will be immediately generated after this, which will set it again. +// +// Removing an instruction can invalidate any captured emitter location +// (using emitLocation::CaptureLocation()) after the instruction was generated. This is because the +// emitLocation stores the current IG instruction number and code size. If the instruction is +// removed and not replaced (e.g., it is at the end of the IG, and any replacement creates a new +// EXTEND IG), then the saved instruction number is incorrect. The IGF_HAS_REMOVED_INSTR flag is +// used to check for this later. +// +// NOTE: It is expected that the GC effect of the removed instruction will be handled by the newly +// generated replacement(s). +// +#ifdef TARGET_ARM64 +void emitter::emitRemoveLastInstruction() +{ + assert(emitLastIns != nullptr); + assert(emitLastInsIG != nullptr); + + JITDUMP("Removing saved instruction in %s:\n> ", emitLabelString(emitLastInsIG)); + JITDUMPEXEC(dispIns(emitLastIns)) + + // We should assert it's not a jmp, as that would require updating the jump lists, e.g. emitCurIGjmpList. + + BYTE* lastInsActualStartAddr = (BYTE*)emitLastIns - m_debugInfoSize; + unsigned short lastCodeSize = (unsigned short)emitLastIns->idCodeSize(); + + // Check that a new buffer hasn't been create since the last instruction was emitted. + assert((emitCurIGfreeBase <= lastInsActualStartAddr) && (lastInsActualStartAddr < emitCurIGfreeEndp)); + + // Ensure the current IG is non-empty. + assert(emitCurIGnonEmpty()); + assert(lastInsActualStartAddr < emitCurIGfreeNext); + assert(emitCurIGinsCnt >= 1); + assert(emitCurIGsize >= emitLastIns->idCodeSize()); + + size_t insSize = emitCurIGfreeNext - lastInsActualStartAddr; + + emitCurIGfreeNext = lastInsActualStartAddr; + emitCurIGinsCnt -= 1; + emitInsCount -= 1; + emitCurIGsize -= lastCodeSize; + + // We're going to overwrite the memory; zero it. + memset(emitCurIGfreeNext, 0, insSize); + + // Remember this happened. + emitCurIG->igFlags |= IGF_HAS_REMOVED_INSTR; + + emitLastIns = nullptr; + emitLastInsIG = nullptr; +} +#endif + /***************************************************************************** * * emitGetInsSC: Get the instruction's constant value. diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 4d4b75ad35107..783d927f6661c 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -131,6 +131,16 @@ class emitLocation { } + emitLocation(insGroup* _ig, unsigned _codePos) + { + SetLocation(_ig, _codePos); + } + + emitLocation(emitter* emit) + { + CaptureLocation(emit); + } + emitLocation(void* emitCookie) : ig((insGroup*)emitCookie), codePos(0) { } @@ -142,6 +152,8 @@ class emitLocation } void CaptureLocation(emitter* emit); + void SetLocation(insGroup* _ig, unsigned _codePos); + void SetLocation(emitLocation newLocation); bool IsCurrentLocation(emitter* emit) const; @@ -160,6 +172,7 @@ class emitLocation } int GetInsNum() const; + int GetInsOffset() const; bool operator!=(const emitLocation& other) const { @@ -250,6 +263,7 @@ struct insGroup #ifdef DEBUG BasicBlock* lastGeneratedBlock; // The last block that generated code into this insGroup. jitstd::list igBlocks; // All the blocks that generated code into this insGroup. + size_t igDataSize; // size of instrDesc data pointed to by 'igData' #endif UNATIVE_OFFSET igNum; // for ordering (and display) purposes @@ -280,6 +294,9 @@ struct insGroup #define IGF_REMOVED_ALIGN 0x0800 // IG was marked as having an alignment instruction(s), but was later unmarked // without updating the IG's size/offsets. #define IGF_HAS_REMOVABLE_JMP 0x1000 // this group ends with an unconditional jump which is a candidate for removal +#ifdef TARGET_ARM64 +#define IGF_HAS_REMOVED_INSTR 0x2000 // this group has an instruction that was removed. +#endif // Mask of IGF_* flags that should be propagated to new blocks when they are created. // This allows prologs and epilogs to be any number of IGs, but still be @@ -2170,6 +2187,10 @@ class emitter insGroup* emitSavIG(bool emitAdd = false); void emitNxtIG(bool extend = false); +#ifdef TARGET_ARM64 + void emitRemoveLastInstruction(); +#endif + bool emitCurIGnonEmpty() { return (emitCurIG && emitCurIGfreeNext > emitCurIGfreeBase); @@ -2823,12 +2844,15 @@ inline unsigned emitGetInsOfsFromCodePos(unsigned codePos) inline unsigned emitter::emitCurOffset() { - unsigned codePos = emitCurIGinsCnt + (emitCurIGsize << 16); + return emitSpecifiedOffset(emitCurIGinsCnt, emitCurIGsize); +} - assert(emitGetInsOfsFromCodePos(codePos) == emitCurIGsize); - assert(emitGetInsNumFromCodePos(codePos) == emitCurIGinsCnt); +inline unsigned emitter::emitSpecifiedOffset(unsigned insCount, unsigned igSize) +{ + unsigned codePos = insCount + (igSize << 16); - // printf("[IG=%02u;ID=%03u;OF=%04X] => %08X\n", emitCurIG->igNum, emitCurIGinsCnt, emitCurIGsize, codePos); + assert(emitGetInsOfsFromCodePos(codePos) == igSize); + assert(emitGetInsNumFromCodePos(codePos) == insCount); return codePos; } diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index fa99bb9f6ba2a..10f56ddf77ff3 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -5068,6 +5068,7 @@ void emitter::emitIns_R_R_I( emitAttr elemsize = EA_UNKNOWN; insFormat fmt = IF_NONE; bool isLdSt = false; + bool isLdrStr = false; bool isSIMD = false; bool isAddSub = false; bool setFlags = false; @@ -5529,6 +5530,7 @@ void emitter::emitIns_R_R_I( unscaledOp = false; scale = NaturalScale_helper(size); isLdSt = true; + isLdrStr = true; break; case INS_ldur: @@ -5683,11 +5685,8 @@ void emitter::emitIns_R_R_I( } } - // Is the ldr/str even necessary? - // For volatile load/store, there will be memory barrier instruction before/after the load/store - // and in such case, IsRedundantLdStr() returns false, because the method just checks for load/store - // pair next to each other. - if (emitComp->opts.OptimizationEnabled() && IsRedundantLdStr(ins, reg1, reg2, imm, size, fmt)) + // Try to optimize a load/store with an alternative instruction. + if (isLdrStr && emitComp->opts.OptimizationEnabled() && OptimizeLdrStr(ins, attr, reg1, reg2, imm, size, fmt)) { return; } @@ -6641,6 +6640,7 @@ void emitter::emitIns_R_R_R_I(instruction ins, scale = (size == EA_8BYTE) ? 3 : 2; } isLdSt = true; + fmt = IF_LS_3C; break; case INS_ld1: @@ -6919,6 +6919,7 @@ void emitter::emitIns_R_R_R_I(instruction ins, assert(!"Instruction cannot be encoded: Add/Sub IF_DR_3A"); } } + assert(fmt != IF_NONE); instrDesc* id = emitNewInstrCns(attr, imm); @@ -7554,10 +7555,11 @@ void emitter::emitIns_S(instruction ins, emitAttr attr, int varx, int offs) */ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs) { - emitAttr size = EA_SIZE(attr); - insFormat fmt = IF_NONE; - int disp = 0; - unsigned scale = 0; + emitAttr size = EA_SIZE(attr); + insFormat fmt = IF_NONE; + int disp = 0; + unsigned scale = 0; + bool isLdrStr = false; assert(offs >= 0); @@ -7584,7 +7586,8 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va case INS_str: case INS_ldr: assert(isValidGeneralDatasize(size) || isValidVectorDatasize(size)); - scale = genLog2(EA_SIZE_IN_BYTES(size)); + scale = genLog2(EA_SIZE_IN_BYTES(size)); + isLdrStr = true; break; case INS_lea: @@ -7638,8 +7641,7 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va { bool useRegForImm = false; ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate - - imm = disp; + imm = disp; if (imm == 0) { fmt = IF_LS_2A; @@ -7677,14 +7679,15 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va } } - // Is the ldr/str even necessary? - if (emitComp->opts.OptimizationEnabled() && IsRedundantLdStr(ins, reg1, reg2, imm, size, fmt)) + assert(fmt != IF_NONE); + + // Try to optimize a load/store with an alternative instruction. + if (isLdrStr && emitComp->opts.OptimizationEnabled() && + OptimizeLdrStr(ins, attr, reg1, reg2, imm, size, fmt, true, varx, offs)) { return; } - assert(fmt != IF_NONE); - instrDesc* id = emitNewInstrCns(attr, imm); id->idIns(ins); @@ -7811,6 +7814,7 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int va int disp = 0; unsigned scale = 0; bool isVectorStore = false; + bool isStr = false; // TODO-ARM64-CQ: use unscaled loads? /* Figure out the encoding format of the instruction */ @@ -7839,6 +7843,7 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int va scale = NaturalScale_helper(size); isVectorStore = true; } + isStr = true; break; default: @@ -7908,14 +7913,15 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int va fmt = IF_LS_3A; } - // Is the ldr/str even necessary? - if (emitComp->opts.OptimizationEnabled() && IsRedundantLdStr(ins, reg1, reg2, imm, size, fmt)) + assert(fmt != IF_NONE); + + // Try to optimize a store with an alternative instruction. + if (isStr && emitComp->opts.OptimizationEnabled() && + OptimizeLdrStr(ins, attr, reg1, reg2, imm, size, fmt, true, varx, offs)) { return; } - assert(fmt != IF_NONE); - instrDesc* id = emitNewInstrCns(attr, imm); id->idIns(ins); @@ -9921,6 +9927,7 @@ BYTE* emitter::emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* i) // Backward branches using instruction count must be within the same instruction group. assert(insNum + 1 >= (unsigned)(-instrCount)); } + dstOffs = ig->igOffs + emitFindOffset(ig, (insNum + 1 + instrCount)); dstAddr = emitOffsetToPtr(dstOffs); } @@ -16085,7 +16092,7 @@ bool emitter::IsRedundantMov(instruction ins, emitAttr size, regNumber dst, regN // // str x1, [x2, #56] // ldr x1, [x2, #56] <-- redundant - +// // Arguments: // ins - The current instruction // dst - The current destination @@ -16093,13 +16100,19 @@ bool emitter::IsRedundantMov(instruction ins, emitAttr size, regNumber dst, regN // imm - Immediate offset // size - Operand size // fmt - Format of instruction +// // Return Value: // true if previous instruction already has desired value in register/memory location. - +// +// Notes: +// For volatile load/store, there will be memory barrier instruction before/after the load/store +// and in such case, this method returns false, because the method just checks for load/store +// pair next to each other. +// bool emitter::IsRedundantLdStr( instruction ins, regNumber reg1, regNumber reg2, ssize_t imm, emitAttr size, insFormat fmt) { - if (((ins != INS_ldr) && (ins != INS_str)) || !emitCanPeepholeLastIns()) + if ((ins != INS_ldr) && (ins != INS_str)) { return false; } @@ -16108,7 +16121,7 @@ bool emitter::IsRedundantLdStr( regNumber prevReg2 = emitLastIns->idReg2(); insFormat lastInsfmt = emitLastIns->idInsFmt(); emitAttr prevSize = emitLastIns->idOpSize(); - ssize_t prevImm = emitLastIns->idIsLargeCns() ? ((instrDescCns*)emitLastIns)->idcCnsVal : emitLastIns->idSmallCns(); + ssize_t prevImm = emitGetInsSC(emitLastIns); // Only optimize if: // 1. "base" or "base plus immediate offset" addressing modes. @@ -16160,4 +16173,197 @@ bool emitter::IsRedundantLdStr( return false; } + +//----------------------------------------------------------------------------------- +// ReplaceLdrStrWithPairInstr: Potentially, overwrite a previously-emitted "ldr" or "str" +// instruction with an "ldp" or "stp" instruction. +// +// Arguments: +// ins - The instruction code +// reg1Attr - The emit attribute for register 1 +// reg1 - Register 1 +// reg2 - Encoded register 2 +// imm - Immediate offset, prior to scaling by operand size +// size - Operand size +// fmt - Instruction format +// +// Return Value: +// "true" if the previous instruction has been overwritten. +// +bool emitter::ReplaceLdrStrWithPairInstr( + instruction ins, emitAttr reg1Attr, regNumber reg1, regNumber reg2, ssize_t imm, emitAttr size, insFormat fmt) +{ + // Register 2 needs conversion to unencoded value. + reg2 = encodingZRtoSP(reg2); + + RegisterOrder optimizationOrder = IsOptimizableLdrStrWithPair(ins, reg1, reg2, imm, size, fmt); + + if (optimizationOrder != eRO_none) + { + regNumber oldReg1 = emitLastIns->idReg1(); + + ssize_t oldImm = emitGetInsSC(emitLastIns); + instruction optIns = (ins == INS_ldr) ? INS_ldp : INS_stp; + + emitAttr oldReg1Attr; + switch (emitLastIns->idGCref()) + { + case GCT_GCREF: + oldReg1Attr = EA_GCREF; + break; + case GCT_BYREF: + oldReg1Attr = EA_BYREF; + break; + default: + oldReg1Attr = emitLastIns->idOpSize(); + break; + } + + // Remove the last instruction written. + emitRemoveLastInstruction(); + + // Emit the new instruction. Make sure to scale the immediate value by the operand size. + if (optimizationOrder == eRO_ascending) + { + // The FIRST register is at the lower offset + emitIns_R_R_R_I(optIns, oldReg1Attr, oldReg1, reg1, reg2, oldImm * size, INS_OPTS_NONE, reg1Attr); + } + else + { + // The SECOND register is at the lower offset + emitIns_R_R_R_I(optIns, reg1Attr, reg1, oldReg1, reg2, imm * size, INS_OPTS_NONE, oldReg1Attr); + } + + return true; + } + + return false; +} + +//----------------------------------------------------------------------------------- +// IsOptimizableLdrStrWithPair: Check if it is possible to optimize two "ldr" or "str" +// instructions into a single "ldp" or "stp" instruction. +// +// Examples: ldr w1, [x20, #0x10] +// ldr w2, [x20, #0x14] => ldp w1, w2, [x20, #0x10] +// +// ldr w1, [x20, #0x14] +// ldr w2, [x20, #0x10] => ldp w2, w1, [x20, #0x10] +// +// Arguments: +// ins - The instruction code +// reg1 - Register 1 number +// reg2 - Register 2 number +// imm - Immediate offset, prior to scaling by operand size +// size - Operand size +// fmt - Instruction format +// +// Return Value: +// eRO_none - No optimization of consecutive instructions is possible +// eRO_ascending - Registers can be loaded/ stored into ascending store locations +// eRO_descending - Registers can be loaded/ stored into decending store locations. +// +emitter::RegisterOrder emitter::IsOptimizableLdrStrWithPair( + instruction ins, regNumber reg1, regNumber reg2, ssize_t imm, emitAttr size, insFormat fmt) +{ + RegisterOrder optimisationOrder = eRO_none; + + if ((ins != INS_ldr) && (ins != INS_str)) + { + return eRO_none; + } + + if (ins != emitLastIns->idIns()) + { + // Not successive ldr or str instructions + return eRO_none; + } + + regNumber prevReg1 = emitLastIns->idReg1(); + regNumber prevReg2 = emitLastIns->idReg2(); + insFormat lastInsFmt = emitLastIns->idInsFmt(); + emitAttr prevSize = emitLastIns->idOpSize(); + ssize_t prevImm = emitGetInsSC(emitLastIns); + + // Signed, *raw* immediate value fits in 7 bits, so for LDP/ STP the raw value is from -64 to +63. + // For LDR/ STR, there are 9 bits, so we need to limit the range explicitly in software. + if ((imm < -64) || (imm > 63) || (prevImm < -64) || (prevImm > 63)) + { + // Then one or more of the immediate values is out of range, so we cannot optimise. + return eRO_none; + } + + if ((!isGeneralRegisterOrZR(reg1)) || (!isGeneralRegisterOrZR(prevReg1))) + { + // Either register 1 is not a general register or previous register 1 is not a general register + // or the zero register, so we cannot optimise. + return eRO_none; + } + + if (lastInsFmt != fmt) + { + // The formats of the two instructions differ. + return eRO_none; + } + + if ((emitInsIsLoad(ins)) && (prevReg1 == prevReg2)) + { + // Then the previous load overwrote the register that we are indexing against. + return eRO_none; + } + + if ((emitInsIsLoad(ins)) && (reg1 == prevReg1)) + { + // Cannot load to the same register twice. + return eRO_none; + } + + if (prevSize != size) + { + // Operand sizes differ. + return eRO_none; + } + + // There are two possible orders for consecutive registers. + // These may be stored to or loaded from increasing or + // decreasing store locations. + if (imm == (prevImm + 1)) + { + // Previous Register 1 is at a higher offset than This Register 1 + optimisationOrder = eRO_ascending; + } + else if (imm == (prevImm - 1)) + { + // Previous Register 1 is at a lower offset than This Register 1 + optimisationOrder = eRO_descending; + } + else + { + // Not consecutive immediate values. + return eRO_none; + } + + if ((reg2 != prevReg2) || !isGeneralRegisterOrSP(reg2)) + { + // The "register 2" should be same as previous instruction and should either be a general + // register or stack pointer. + return eRO_none; + } + + // Don't remove instructions whilst in prologs or epilogs, as these contain "unwindable" + // parts, where we need to report unwind codes to the OS, + if (emitIGisInProlog(emitCurIG) || emitIGisInEpilog(emitCurIG)) + { + return eRO_none; + } +#ifdef FEATURE_EH_FUNCLETS + if (emitIGisInFuncletProlog(emitCurIG) || emitIGisInFuncletEpilog(emitCurIG)) + { + return eRO_none; + } +#endif + + return optimisationOrder; +} + #endif // defined(TARGET_ARM64) diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 05939d4f19773..d82f7dd833a1f 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -69,6 +69,17 @@ instrDesc* emitNewInstrCallInd(int argCnt, emitAttr retSize, emitAttr secondRetSize); +/************************************************************************/ +/* enum to allow instruction optimisation to specify register order */ +/************************************************************************/ + +enum RegisterOrder +{ + eRO_none = 0, + eRO_ascending, + eRO_descending +}; + /************************************************************************/ /* Private helpers for instruction output */ /************************************************************************/ @@ -112,7 +123,49 @@ static UINT64 Replicate_helper(UINT64 value, unsigned width, emitAttr size); // If yes, the caller of this method can choose to omit current mov instruction. static bool IsMovInstruction(instruction ins); bool IsRedundantMov(instruction ins, emitAttr size, regNumber dst, regNumber src, bool canSkip); + +// Methods to optimize a Ldr or Str with an alternative instruction. bool IsRedundantLdStr(instruction ins, regNumber reg1, regNumber reg2, ssize_t imm, emitAttr size, insFormat fmt); +RegisterOrder IsOptimizableLdrStrWithPair( + instruction ins, regNumber reg1, regNumber reg2, ssize_t imm, emitAttr size, insFormat fmt); +bool ReplaceLdrStrWithPairInstr( + instruction ins, emitAttr reg1Attr, regNumber reg1, regNumber reg2, ssize_t imm, emitAttr size, insFormat fmt); + +// Try to optimize a Ldr or Str with an alternative instruction. +inline bool OptimizeLdrStr(instruction ins, + emitAttr reg1Attr, + regNumber reg1, + regNumber reg2, + ssize_t imm, + emitAttr size, + insFormat fmt, + bool localVar = false, + int varx = 0, + int offs = 0) +{ + assert(ins == INS_ldr || ins == INS_str); + + if (!emitCanPeepholeLastIns()) + { + return false; + } + + // Is the ldr/str even necessary? + if (IsRedundantLdStr(ins, reg1, reg2, imm, size, fmt)) + { + return true; + } + + // If the previous instruction was a matching load/store, then try to replace it instead of emitting. + // Don't do this if either instruction had a local variable. + if ((emitLastIns->idIns() == ins) && !localVar && !emitLastIns->idIsLclVar() && + ReplaceLdrStrWithPairInstr(ins, reg1Attr, reg1, reg2, imm, size, fmt)) + { + return true; + } + + return false; +} /************************************************************************ * diff --git a/src/coreclr/jit/emitloongarch64.h b/src/coreclr/jit/emitloongarch64.h index 2dd9d63750289..fcbb32fa7d17f 100644 --- a/src/coreclr/jit/emitloongarch64.h +++ b/src/coreclr/jit/emitloongarch64.h @@ -73,8 +73,6 @@ unsigned emitOutput_Instr(BYTE* dst, code_t code); // If yes, the caller of this method can choose to omit current mov instruction. static bool IsMovInstruction(instruction ins); bool IsRedundantMov(instruction ins, emitAttr size, regNumber dst, regNumber src, bool canSkip); -bool IsRedundantLdStr( - instruction ins, regNumber reg1, regNumber reg2, ssize_t imm, emitAttr size, insFormat fmt); // New functions end. /************************************************************************/ /* Public inline informational methods */ diff --git a/src/coreclr/jit/emitpub.h b/src/coreclr/jit/emitpub.h index 723d9ca0563c9..0133fb19f0212 100644 --- a/src/coreclr/jit/emitpub.h +++ b/src/coreclr/jit/emitpub.h @@ -64,6 +64,7 @@ void emitFinishPrologEpilogGeneration(); void* emitCurBlock(); unsigned emitCurOffset(); +unsigned emitSpecifiedOffset(unsigned insCount, unsigned igSize); UNATIVE_OFFSET emitCodeOffset(void* blockPtr, unsigned codeOffs);