From 7d9254014a5e9a97b00529735cbe3a70f4b419bc Mon Sep 17 00:00:00 2001 From: Qiao Pengcheng Date: Wed, 15 Dec 2021 21:54:36 +0800 Subject: [PATCH 01/46] Part6-1: add the coreclr-jit directory for LoongArch64. (#59561) Co-authored-by: Loongson's .NET-teams --- src/coreclr/jit/ICorJitInfo_API_names.h | 1 + src/coreclr/jit/ICorJitInfo_API_wrapper.hpp | 18 +- src/coreclr/jit/codegen.h | 127 +- src/coreclr/jit/codegencommon.cpp | 1043 ++- src/coreclr/jit/codegeninterface.h | 15 +- src/coreclr/jit/codegenlinear.cpp | 9 +- src/coreclr/jit/codegenloongarch64.cpp | 9157 +++++++++++++++++++ src/coreclr/jit/compiler.cpp | 95 +- src/coreclr/jit/compiler.h | 84 +- src/coreclr/jit/compiler.hpp | 12 +- src/coreclr/jit/ee_il_dll.cpp | 8 + src/coreclr/jit/emit.cpp | 269 +- src/coreclr/jit/emit.h | 192 +- src/coreclr/jit/emitdef.h | 2 + src/coreclr/jit/emitfmts.h | 2 + src/coreclr/jit/emitfmtsloongarch64.h | 82 + src/coreclr/jit/emitinl.h | 56 + src/coreclr/jit/emitjmps.h | 20 + src/coreclr/jit/emitloongarch64.cpp | 6780 ++++++++++++++ src/coreclr/jit/emitloongarch64.h | 421 + src/coreclr/jit/emitpub.h | 4 +- src/coreclr/jit/error.h | 13 +- src/coreclr/jit/gentree.cpp | 104 +- src/coreclr/jit/gentree.h | 5 + src/coreclr/jit/importer.cpp | 178 +- src/coreclr/jit/instr.cpp | 132 + src/coreclr/jit/instr.h | 47 +- src/coreclr/jit/instrs.h | 2 + src/coreclr/jit/instrsloongarch64.h | 499 + src/coreclr/jit/jit.h | 66 +- src/coreclr/jit/jitconfigvalues.h | 17 + src/coreclr/jit/jiteh.cpp | 2 +- src/coreclr/jit/lclvars.cpp | 348 +- src/coreclr/jit/lower.cpp | 111 +- src/coreclr/jit/lower.h | 2 +- src/coreclr/jit/lowerloongarch64.cpp | 1693 ++++ src/coreclr/jit/lsra.cpp | 23 +- src/coreclr/jit/lsra.h | 8 + src/coreclr/jit/lsrabuild.cpp | 9 + src/coreclr/jit/lsraloongarch64.cpp | 1731 ++++ src/coreclr/jit/morph.cpp | 478 +- src/coreclr/jit/optimizer.cpp | 7 + src/coreclr/jit/regalloc.cpp | 23 + src/coreclr/jit/register.h | 3 + src/coreclr/jit/register_arg_convention.cpp | 11 + src/coreclr/jit/registerloongarch64.h | 108 + src/coreclr/jit/regset.cpp | 4 +- src/coreclr/jit/regset.h | 2 + src/coreclr/jit/scopeinfo.cpp | 27 +- src/coreclr/jit/target.h | 37 +- src/coreclr/jit/targetloongarch64.cpp | 30 + src/coreclr/jit/targetloongarch64.h | 336 + src/coreclr/jit/unwind.cpp | 6 +- src/coreclr/jit/unwind.h | 17 +- src/coreclr/jit/unwindloongarch64.cpp | 2347 +++++ src/coreclr/jit/utils.cpp | 18 +- src/coreclr/jit/valuenum.cpp | 28 +- src/coreclr/jit/valuenumfuncs.h | 4 + 58 files changed, 26683 insertions(+), 190 deletions(-) create mode 100644 src/coreclr/jit/codegenloongarch64.cpp create mode 100644 src/coreclr/jit/emitfmtsloongarch64.h create mode 100644 src/coreclr/jit/emitloongarch64.cpp create mode 100644 src/coreclr/jit/emitloongarch64.h create mode 100644 src/coreclr/jit/instrsloongarch64.h create mode 100644 src/coreclr/jit/lowerloongarch64.cpp create mode 100644 src/coreclr/jit/lsraloongarch64.cpp create mode 100644 src/coreclr/jit/registerloongarch64.h create mode 100644 src/coreclr/jit/targetloongarch64.cpp create mode 100644 src/coreclr/jit/targetloongarch64.h create mode 100644 src/coreclr/jit/unwindloongarch64.cpp diff --git a/src/coreclr/jit/ICorJitInfo_API_names.h b/src/coreclr/jit/ICorJitInfo_API_names.h index d373091453220..f9597085d11df 100644 --- a/src/coreclr/jit/ICorJitInfo_API_names.h +++ b/src/coreclr/jit/ICorJitInfo_API_names.h @@ -122,6 +122,7 @@ DEF_CLR_API(getMethodNameFromMetadata) DEF_CLR_API(getMethodHash) DEF_CLR_API(findNameOfToken) DEF_CLR_API(getSystemVAmd64PassStructInRegisterDescriptor) +DEF_CLR_API(getFieldTypeByHnd) DEF_CLR_API(getThreadTLSIndex) DEF_CLR_API(getInlinedCallFrameVptr) DEF_CLR_API(getAddrOfCaptureThreadGlobal) diff --git a/src/coreclr/jit/ICorJitInfo_API_wrapper.hpp b/src/coreclr/jit/ICorJitInfo_API_wrapper.hpp index 1e2627ccb3c9a..8326f17cfc7ee 100644 --- a/src/coreclr/jit/ICorJitInfo_API_wrapper.hpp +++ b/src/coreclr/jit/ICorJitInfo_API_wrapper.hpp @@ -986,14 +986,30 @@ CORINFO_ARG_LIST_HANDLE WrapICorJitInfo::getArgNext( CorInfoTypeWithMod WrapICorJitInfo::getArgType( CORINFO_SIG_INFO* sig, CORINFO_ARG_LIST_HANDLE args, - CORINFO_CLASS_HANDLE* vcTypeRet) + CORINFO_CLASS_HANDLE* vcTypeRet +#if defined(TARGET_LOONGARCH64) + ,int *flags = NULL +#endif +) { API_ENTER(getArgType); +#if defined(TARGET_LOONGARCH64) + CorInfoTypeWithMod temp = wrapHnd->getArgType(sig, args, vcTypeRet, flags); +#else CorInfoTypeWithMod temp = wrapHnd->getArgType(sig, args, vcTypeRet); +#endif API_LEAVE(getArgType); return temp; } +uint32_t WrapICorJitInfo::getFieldTypeByHnd(CORINFO_CLASS_HANDLE cls) +{ + API_ENTER(getFieldTypeByHnd); + DWORD temp = wrapHnd->getFieldTypeByHnd(cls); + API_LEAVE(getFieldTypeByHnd); + return temp; +} + CORINFO_CLASS_HANDLE WrapICorJitInfo::getArgClass( CORINFO_SIG_INFO* sig, CORINFO_ARG_LIST_HANDLE args) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 343bcb138300b..1960d6ae55320 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -237,7 +237,12 @@ class CodeGen final : public CodeGenInterface void genJumpToThrowHlpBlk(emitJumpKind jumpKind, SpecialCodeKind codeKind, BasicBlock* failBlk = nullptr); +#ifdef TARGET_LOONGARCH64 + void genSetRegToIcon(regNumber reg, ssize_t val, var_types type); + void genJumpToThrowHlpBlk_la(SpecialCodeKind codeKind, instruction ins, regNumber reg1, BasicBlock* failBlk = nullptr, regNumber reg2 = REG_R0); +#else void genCheckOverflow(GenTree* tree); +#endif //------------------------------------------------------------------------- // @@ -253,7 +258,11 @@ class CodeGen final : public CodeGenInterface // void genEstablishFramePointer(int delta, bool reportUnwindData); +#if defined(TARGET_LOONGARCH64) + void genFnPrologCalleeRegArgs(); +#else void genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbered, RegState* regState); +#endif void genEnregisterIncomingStackArgs(); void genCheckUseBlockInit(); #if defined(UNIX_AMD64_ABI) && defined(FEATURE_SIMD) @@ -319,6 +328,67 @@ class CodeGen final : public CodeGenInterface void genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset, int spDelta); void genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset, int spDelta); + void genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed); + +#elif defined(TARGET_LOONGARCH64) + bool genInstrWithConstant(instruction ins, + emitAttr attr, + regNumber reg1, + regNumber reg2, + ssize_t imm, + regNumber tmpReg, + bool inUnwindRegion = false); + + void genStackPointerAdjustment(ssize_t spAdjustment, regNumber tmpReg, bool* pTmpRegIsZero, bool reportUnwindData); + + void genPrologSaveRegPair(regNumber reg1, + regNumber reg2, + int spOffset, + int spDelta, + bool useSaveNextPair, + regNumber tmpReg, + bool* pTmpRegIsZero); + + void genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero); + + void genEpilogRestoreRegPair(regNumber reg1, + regNumber reg2, + int spOffset, + int spDelta, + bool useSaveNextPair, + regNumber tmpReg, + bool* pTmpRegIsZero); + + void genEpilogRestoreReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero); + + // A simple struct to keep register pairs for prolog and epilog. + struct RegPair + { + regNumber reg1; + regNumber reg2; + bool useSaveNextPair; + + RegPair(regNumber reg1) : reg1(reg1), reg2(REG_NA), useSaveNextPair(false) + { + } + + RegPair(regNumber reg1, regNumber reg2) : reg1(reg1), reg2(reg2), useSaveNextPair(false) + { + assert(reg2 == REG_NEXT(reg1)); + } + }; + + static void genBuildRegPairsStack(regMaskTP regsMask, ArrayStack* regStack); + static void genSetUseSaveNextPairs(ArrayStack* regStack); + + static int genGetSlotSizeForRegsInMask(regMaskTP regsMask); + + void genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset); + void genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset); + + void genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset, int spDelta); + void genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset, int spDelta); + void genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed); #else void genPushCalleeSavedRegisters(); @@ -400,7 +470,25 @@ class CodeGen final : public CodeGenInterface FuncletFrameInfoDsc genFuncletInfo; -#endif // TARGET_AMD64 +#elif defined(TARGET_LOONGARCH64) + + // A set of information that is used by funclet prolog and epilog generation. + // It is collected once, before funclet prologs and epilogs are generated, + // and used by all funclet prologs and epilogs, which must all be the same. + struct FuncletFrameInfoDsc + { + regMaskTP fiSaveRegs; // Set of callee-saved registers saved in the funclet prolog (includes RA) + int fiFunction_CallerSP_to_FP_delta; // Delta between caller SP and the frame pointer in the parent function + // (negative) + int fiSP_to_FPRA_save_delta; // FP/RA register save offset from SP (positive) + int fiSP_to_PSP_slot_delta; // PSP slot offset from SP (positive) + int fiCallerSP_to_PSP_slot_delta; // PSP slot offset from Caller SP (negative) + int fiFrameType; // Funclet frame types are numbered. See genFuncletProlog() for details. + int fiSpDelta1; // Stack pointer delta 1 (negative) + }; + + FuncletFrameInfoDsc genFuncletInfo; +#endif // TARGET_LOONGARCH64 #if defined(TARGET_XARCH) @@ -520,6 +608,10 @@ class CodeGen final : public CodeGenInterface void genArm64EmitterUnitTests(); #endif +#if defined(DEBUG) && defined(TARGET_LOONGARCH64) + void genLOONGARCH64EmitterUnitTests(); +#endif + #if defined(DEBUG) && defined(LATE_DISASM) && defined(TARGET_AMD64) void genAmd64EmitterUnitTests(); #endif @@ -530,6 +622,12 @@ class CodeGen final : public CodeGenInterface bool genSaveFpLrWithAllCalleeSavedRegisters; #endif // TARGET_ARM64 +#ifdef TARGET_LOONGARCH64 + virtual void SetSaveFpRaWithAllCalleeSavedRegisters(bool value); + virtual bool IsSaveFpRaWithAllCalleeSavedRegisters() const; + bool genSaveFpRaWithAllCalleeSavedRegisters; +#endif // TARGET_LOONGARCH64 + //------------------------------------------------------------------------- // // End prolog/epilog generation @@ -835,10 +933,10 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void genLeaInstruction(GenTreeAddrMode* lea); void genSetRegToCond(regNumber dstReg, GenTree* tree); -#if defined(TARGET_ARMARCH) +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) void genScaledAdd(emitAttr attr, regNumber targetReg, regNumber baseReg, regNumber indexReg, int scale); void genCodeForMulLong(GenTreeOp* mul); -#endif // TARGET_ARMARCH +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 #if !defined(TARGET_64BIT) void genLongToIntCast(GenTree* treeNode); @@ -1231,6 +1329,11 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void genStoreRegToStackArg(var_types type, regNumber reg, int offset); #endif // FEATURE_PUT_STRUCT_ARG_STK +#ifdef TARGET_LOONGARCH64 + //TODO for LOONGARCH64 : maybe delete on LA64? + void genCodeForStoreOffset(instruction ins, emitAttr size, regNumber src, GenTree* base, unsigned offset); +#endif + void genCodeForStoreBlk(GenTreeBlk* storeBlkNode); #ifndef TARGET_X86 void genCodeForInitBlkHelper(GenTreeBlk* initBlkNode); @@ -1241,7 +1344,11 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void genTableBasedSwitch(GenTree* tree); void genCodeForArrIndex(GenTreeArrIndex* treeNode); void genCodeForArrOffset(GenTreeArrOffs* treeNode); +#if defined(TARGET_LOONGARCH64) + instruction genGetInsForOper(GenTree* treeNode); +#else instruction genGetInsForOper(genTreeOps oper, var_types type); +#endif bool genEmitOptimizedGCWriteBarrier(GCInfo::WriteBarrierForm writeBarrierForm, GenTree* addr, GenTree* data); GenTree* getCallTarget(const GenTreeCall* call, CORINFO_METHOD_HANDLE* methHnd); regNumber getCallIndirectionCellReg(const GenTreeCall* call); @@ -1250,7 +1357,11 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void genJmpMethod(GenTree* jmp); BasicBlock* genCallFinally(BasicBlock* block); void genCodeForJumpTrue(GenTreeOp* jtrue); -#ifdef TARGET_ARM64 +#if defined(TARGET_LOONGARCH64) + //TODO: refactor for LA. + void genCodeForJumpCompare(GenTreeOp* tree); +#endif +#if defined(TARGET_ARM64) void genCodeForJumpCompare(GenTreeOp* tree); void genCodeForMadd(GenTreeOp* tree); void genCodeForBfiz(GenTreeOp* tree); @@ -1266,6 +1377,10 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void genMultiRegStoreToSIMDLocal(GenTreeLclVar* lclNode); void genMultiRegStoreToLocal(GenTreeLclVar* lclNode); +#if defined(TARGET_LOONGARCH64) + void genMultiRegCallStoreToLocal(GenTree* treeNode); +#endif + // Codegen for multi-register struct returns. bool isStructReturn(GenTree* treeNode); #ifdef FEATURE_SIMD @@ -1281,9 +1396,9 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void genFloatReturn(GenTree* treeNode); #endif // TARGET_X86 -#if defined(TARGET_ARM64) +#if defined(TARGET_ARM64)|| defined(TARGET_LOONGARCH64) void genSimpleReturn(GenTree* treeNode); -#endif // TARGET_ARM64 +#endif // TARGET_ARM64 || TARGET_LOONGARCH64 void genReturn(GenTree* treeNode); diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 237a22c6782f0..b2b2f0d77a9ef 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -138,6 +138,11 @@ CodeGen::CodeGen(Compiler* theCompiler) : CodeGenInterface(theCompiler) #ifdef TARGET_ARM64 genSaveFpLrWithAllCalleeSavedRegisters = false; #endif // TARGET_ARM64 + +#ifdef TARGET_LOONGARCH64 + SetHasTailCalls(false); + genSaveFpRaWithAllCalleeSavedRegisters = false; +#endif // TARGET_LOONGARCH64 } void CodeGenInterface::genMarkTreeInReg(GenTree* tree, regNumber reg) @@ -596,7 +601,7 @@ regMaskTP Compiler::compHelperCallKillSet(CorInfoHelpFunc helper) case CORINFO_HELP_ASSIGN_BYREF: #if defined(TARGET_AMD64) return RBM_RSI | RBM_RDI | RBM_CALLEE_TRASH_NOGC; -#elif defined(TARGET_ARMARCH) +#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) return RBM_CALLEE_TRASH_WRITEBARRIER_BYREF; #elif defined(TARGET_X86) return RBM_ESI | RBM_EDI | RBM_ECX; @@ -605,7 +610,7 @@ regMaskTP Compiler::compHelperCallKillSet(CorInfoHelpFunc helper) return RBM_CALLEE_TRASH; #endif -#if defined(TARGET_ARMARCH) +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) case CORINFO_HELP_ASSIGN_REF: case CORINFO_HELP_CHECKED_ASSIGN_REF: return RBM_CALLEE_TRASH_WRITEBARRIER; @@ -1101,7 +1106,9 @@ void CodeGen::genAdjustSP(target_ssize_t delta) inst_RV(INS_pop, REG_ECX, TYP_INT); else #endif +#ifndef TARGET_LOONGARCH64 inst_RV_IV(INS_add, REG_SPBASE, delta, EA_PTRSIZE); +#endif } //------------------------------------------------------------------------ @@ -1154,7 +1161,7 @@ void CodeGen::genAdjustStackLevel(BasicBlock* block) #endif // !FEATURE_FIXED_OUT_ARGS } -#ifdef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) // return size // alignmentWB is out param unsigned CodeGenInterface::InferOpSizeAlign(GenTree* op, unsigned* alignmentWB) @@ -1238,7 +1245,7 @@ unsigned CodeGenInterface::InferStructOpSizeAlign(GenTree* op, unsigned* alignme return opSize; } -#endif // TARGET_ARMARCH +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 /***************************************************************************** * @@ -1383,7 +1390,7 @@ bool CodeGen::genCreateAddrMode( cns += op2->AsIntConCommon()->IconValue(); -#if defined(TARGET_ARMARCH) +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) if (cns == 0) #endif { @@ -1403,7 +1410,7 @@ bool CodeGen::genCreateAddrMode( goto AGAIN; -#if !defined(TARGET_ARMARCH) +#if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64) // TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index. case GT_MUL: if (op1->gtOverflow()) @@ -1426,7 +1433,7 @@ bool CodeGen::genCreateAddrMode( goto FOUND_AM; } break; -#endif // !defined(TARGET_ARMARCH) +#endif // !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64) default: break; @@ -1447,7 +1454,7 @@ bool CodeGen::genCreateAddrMode( switch (op1->gtOper) { -#if !defined(TARGET_ARMARCH) +#if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64) // TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index. case GT_ADD: @@ -1506,7 +1513,7 @@ bool CodeGen::genCreateAddrMode( goto FOUND_AM; } break; -#endif // !TARGET_ARMARCH +#endif // !TARGET_ARMARCH && !TARGET_LOONGARCH64 case GT_NOP: @@ -1746,6 +1753,67 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg) } #endif // TARGET_ARMARCH +#ifdef TARGET_LOONGARCH64 +//------------------------------------------------------------------------ +// genEmitGSCookieCheck: Generate code to check that the GS cookie +// wasn't thrashed by a buffer overrun. +// +void CodeGen::genEmitGSCookieCheck(bool pushReg) +{ + noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal); + + // Make sure that the return register is reported as live GC-ref so that any GC that kicks in while + // executing GS cookie check will not collect the object pointed to by REG_INTRET (R0). + if (!pushReg && (compiler->info.compRetType == TYP_REF)) + gcInfo.gcRegGCrefSetCur |= RBM_INTRET; + + // We need two temporary registers, to load the GS cookie values and compare them. We can't use + // any argument registers if 'pushReg' is true (meaning we have a JMP call). They should be + // callee-trash registers, which should not contain anything interesting at this point. + // We don't have any IR node representing this check, so LSRA can't communicate registers + // for us to use. + + regNumber regGSConst = REG_GSCOOKIE_TMP_0; + regNumber regGSValue = REG_GSCOOKIE_TMP_1; + + if (compiler->gsGlobalSecurityCookieAddr == nullptr) + { + // load the GS cookie constant into a reg + // + genSetRegToIcon(regGSConst, compiler->gsGlobalSecurityCookieVal, TYP_I_IMPL); + } + else + { + //// Ngen case - GS cookie constant needs to be accessed through an indirection. + //instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSConst, (ssize_t)compiler->gsGlobalSecurityCookieAddr); + //GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, regGSConst, regGSConst, 0); + if (compiler->opts.compReloc) + { + GetEmitter()->emitIns_R_AI(INS_bl, EA_PTR_DSP_RELOC, regGSConst, (ssize_t)compiler->gsGlobalSecurityCookieAddr); + } + else + {////TODO:LoongArch64 should amend for optimize! + //GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, regGSConst, (ssize_t)compiler->gsGlobalSecurityCookieAddr); + //GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, regGSConst, regGSConst, ); + GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, regGSConst, ((ssize_t)compiler->gsGlobalSecurityCookieAddr & 0xfffff000)>>12); + GetEmitter()->emitIns_R_I(INS_lu32i_d, EA_PTRSIZE, regGSConst, (ssize_t)compiler->gsGlobalSecurityCookieAddr >> 32); + GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, regGSConst, regGSConst, ((ssize_t)compiler->gsGlobalSecurityCookieAddr & 0xfff)>>2); + } + regSet.verifyRegUsed(regGSConst); + } + // Load this method's GS value from the stack frame + GetEmitter()->emitIns_R_S(INS_ld_d, EA_PTRSIZE, regGSValue, compiler->lvaGSSecurityCookie, 0); + + // Compare with the GC cookie constant + BasicBlock* gsCheckBlk = genCreateTempLabel(); + GetEmitter()->emitIns_J_cond_la(INS_beq, gsCheckBlk, regGSConst, regGSValue); + + // regGSConst and regGSValue aren't needed anymore, we can use them for helper call + genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN, regGSConst);//no branch-delay! + genDefineTempLabel(gsCheckBlk); +} +#endif // TARGET_LOONGARCH64 + /***************************************************************************** * * Generate an exit sequence for a return from a method (note: when compiling @@ -1856,6 +1924,7 @@ void CodeGen::genJumpToThrowHlpBlk(emitJumpKind jumpKind, SpecialCodeKind codeKi // The code to throw the exception will be generated inline, and // we will jump around it in the normal non-exception case. +#ifndef TARGET_LOONGARCH64 BasicBlock* tgtBlk = nullptr; emitJumpKind reverseJumpKind = emitter::emitReverseJumpKind(jumpKind); if (reverseJumpKind != jumpKind) @@ -1863,15 +1932,18 @@ void CodeGen::genJumpToThrowHlpBlk(emitJumpKind jumpKind, SpecialCodeKind codeKi tgtBlk = genCreateTempLabel(); inst_JMP(reverseJumpKind, tgtBlk); } +#endif genEmitHelperCall(compiler->acdHelper(codeKind), 0, EA_UNKNOWN); +#ifndef TARGET_LOONGARCH64 // Define the spot for the normal non-exception case to jump to. if (tgtBlk != nullptr) { assert(reverseJumpKind != jumpKind); genDefineTempLabel(tgtBlk); } +#endif } } @@ -1881,6 +1953,7 @@ void CodeGen::genJumpToThrowHlpBlk(emitJumpKind jumpKind, SpecialCodeKind codeKi * have set the flags. Check if the operation caused an overflow. */ +#ifndef TARGET_LOONGARCH64 // inline void CodeGen::genCheckOverflow(GenTree* tree) { @@ -1927,6 +2000,7 @@ void CodeGen::genCheckOverflow(GenTree* tree) genJumpToThrowHlpBlk(jumpKind, SCK_OVERFLOW); } +#endif #if defined(FEATURE_EH_FUNCLETS) @@ -2106,6 +2180,10 @@ void CodeGen::genGenerateMachineCode() { printf("generic ARM64 CPU"); } + else if (compiler->info.genCPU == CPU_LOONGARCH64) + { + printf("generic LOONGARCH64 CPU"); + } else { printf("unknown architecture"); @@ -2305,7 +2383,7 @@ void CodeGen::genEmitMachineCode() bool trackedStackPtrsContig; // are tracked stk-ptrs contiguous ? -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) trackedStackPtrsContig = false; #elif defined(TARGET_ARM) // On arm due to prespilling of arguments, tracked stk-ptrs may not be contiguous @@ -3224,6 +3302,285 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #pragma warning(push) #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function #endif + +#if defined(TARGET_LOONGARCH64) +void CodeGen::genFnPrologCalleeRegArgs() +{ + assert(!(intRegState.rsCalleeRegArgMaskLiveIn & floatRegState.rsCalleeRegArgMaskLiveIn)); + + regMaskTP regArgMaskLive = intRegState.rsCalleeRegArgMaskLiveIn | floatRegState.rsCalleeRegArgMaskLiveIn; + +#ifdef DEBUG + if (verbose) + { + printf("*************** In genFnPrologCalleeRegArgs() LOONGARCH64:0x%llx.\n", regArgMaskLive); + } +#endif + + // We should be generating the prolog block when we are called + assert(compiler->compGeneratingProlog); + + // We expect to have some registers of the type we are doing, that are LiveIn, otherwise we don't need to be called. + noway_assert(regArgMaskLive != 0); + + unsigned varNum; + unsigned regArgsVars[MAX_REG_ARG*2] = {0}; + unsigned regArgNum = 0; + for (varNum = 0; varNum < compiler->lvaCount; ++varNum) + { + LclVarDsc* varDsc = compiler->lvaTable + varNum; + + // Is this variable a register arg? + if (!varDsc->lvIsParam) + { + continue; + } + + if (!varDsc->lvIsRegArg) + { + continue; + } + + if (varDsc->lvIsInReg()) + { + assert(genIsValidIntReg(varDsc->GetArgReg()) || genIsValidFloatReg(varDsc->GetArgReg())); + assert(!(genIsValidIntReg(varDsc->GetOtherArgReg()) || genIsValidFloatReg(varDsc->GetOtherArgReg()))); + if (varDsc->GetArgInitReg() != varDsc->GetArgReg()) + { + if (varDsc->GetArgInitReg() > REG_ARG_LAST) + { + inst_Mov(genIsValidFloatReg(varDsc->GetArgInitReg()) ? TYP_DOUBLE : TYP_LONG, varDsc->GetArgInitReg(), varDsc->GetArgReg(), false); + regArgMaskLive &= ~genRegMask(varDsc->GetArgReg()); + } + else + { + regArgsVars[regArgNum] = varNum; + regArgNum++; + } + } + else + regArgMaskLive &= ~genRegMask(varDsc->GetArgReg()); +#ifdef USING_SCOPE_INFO + psiMoveToReg(varNum); +#endif // USING_SCOPE_INFO + if (!varDsc->lvLiveInOutOfHndlr) + continue; + } + + // When we have a promoted struct we have two possible LclVars that can represent the incoming argument + // in the regArgTab[], either the original TYP_STRUCT argument or the introduced lvStructField. + // We will use the lvStructField if we have a TYPE_INDEPENDENT promoted struct field otherwise + // use the the original TYP_STRUCT argument. + // + if (varDsc->lvPromoted || varDsc->lvIsStructField) + { + assert(!"-------------Should confirm on Loongarch!"); + } + + var_types storeType = TYP_UNDEF; + unsigned slotSize = TARGET_POINTER_SIZE; + + if (varTypeIsStruct(varDsc)) + { + if (emitter::isFloatReg(varDsc->GetArgReg())) + { + storeType = varDsc->lvIs4Field1 ? TYP_FLOAT : TYP_DOUBLE; + } + else //if (emitter::isGeneralRegister(varDsc->GetArgReg())) + { + assert(emitter::isGeneralRegister(varDsc->GetArgReg())); + if (varDsc->lvIs4Field1) + storeType = TYP_INT; + else + storeType = varDsc->GetLayout()->GetGCPtrType(0); + } + slotSize = (unsigned)emitActualTypeSize(storeType); + +#if FEATURE_MULTIREG_ARGS + // Must be <= MAX_PASS_MULTIREG_BYTES or else it wouldn't be passed in registers + noway_assert(varDsc->lvSize() <= MAX_PASS_MULTIREG_BYTES); +#endif + } + else // Not a struct type + { + storeType = compiler->mangleVarArgsType(genActualType(varDsc->TypeGet())); + if (emitter::isFloatReg(varDsc->GetArgReg()) != varTypeIsFloating(storeType)) + { + assert(varTypeIsFloating(storeType)); + storeType = storeType == TYP_DOUBLE ? TYP_I_IMPL : TYP_INT; + } + } + emitAttr size = emitActualTypeSize(storeType); + + regNumber srcRegNum = varDsc->GetArgReg(); + + // Stack argument - if the ref count is 0 don't care about it + if (!varDsc->lvOnFrame) + { + noway_assert(varDsc->lvRefCnt() == 0); + regArgMaskLive &= ~genRegMask(varDsc->GetArgReg()); + if (varDsc->GetOtherArgReg() < REG_STK) + regArgMaskLive &= ~genRegMask(varDsc->GetOtherArgReg()); + } + else + { + assert(srcRegNum != varDsc->GetOtherArgReg()); + + int tmp_offset = 0; + regNumber tmp_reg = REG_NA; + + bool FPbased; + int baseOffset = 0;//(regArgTab[argNum].slot - 1) * slotSize; + int base = compiler->lvaFrameAddress(varNum, &FPbased); + + base += baseOffset; + + if ((-2048 <= base) && (base < 2048)) + { + GetEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, baseOffset); + } + else + { + if (tmp_reg == REG_NA) + { + regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE; + tmp_offset = base; + tmp_reg = REG_R21; + GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset>>12); + GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, tmp_offset & 0xfff); + GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, REG_R21, REG_R21, reg2); + GetEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, -8); + } + else + { + baseOffset = -(base - tmp_offset) - 8; + GetEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, baseOffset); + } + } + + regArgMaskLive &= ~genRegMask(srcRegNum); + + // Check if we are writing past the end of the struct + if (varTypeIsStruct(varDsc)) + { + if (emitter::isFloatReg(varDsc->GetOtherArgReg())) + { + baseOffset = (int)EA_SIZE(emitActualTypeSize(storeType)); + storeType = varDsc->lvIs4Field2 ? TYP_FLOAT : TYP_DOUBLE; + size = EA_SIZE(emitActualTypeSize(storeType)); + baseOffset = baseOffset < (int)size ? (int)size : baseOffset; + srcRegNum = varDsc->GetOtherArgReg(); + } + else if (emitter::isGeneralRegister(varDsc->GetOtherArgReg())) + { + baseOffset = (int)EA_SIZE(slotSize); + if (varDsc->lvIs4Field2) + storeType = TYP_INT; + else + storeType = varDsc->GetLayout()->GetGCPtrType(1); + size = emitActualTypeSize(storeType); + if (baseOffset < (int)EA_SIZE(size)) + baseOffset = (int)EA_SIZE(size); + srcRegNum = varDsc->GetOtherArgReg(); + } + + if (srcRegNum == varDsc->GetOtherArgReg()) + { + base += baseOffset; + + if ((-2048 <= base) && (base < 2048)) + { + GetEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, baseOffset); + } + else + { + if (tmp_reg == REG_NA) + { + regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE; + tmp_offset = base; + tmp_reg = REG_R21; + GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset>>12); + GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, tmp_offset & 0xfff); + GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, REG_R21, REG_R21, reg2); + GetEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, -8); + } + else + { + baseOffset = -(base - tmp_offset) - 8; + GetEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, baseOffset); + } + } + regArgMaskLive &= ~genRegMask(srcRegNum);//maybe do this later is better! + } + else if (varDsc->lvIsSplit) + { + assert(varDsc->GetArgReg() == REG_ARG_LAST && varDsc->GetOtherArgReg() == REG_STK); + baseOffset = 8; + base += 8; + + GetEmitter()->emitIns_R_R_Imm(INS_ld_d, size/*EA_PTRSIZE*/, REG_SCRATCH, REG_SPBASE, genTotalFrameSize()); + if ((-2048 <= base) && (base < 2048)) + { + GetEmitter()->emitIns_S_R(INS_st_d, size, REG_SCRATCH, varNum, baseOffset); + } + else + { + if (tmp_reg == REG_NA) + { + regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE; + tmp_offset = base; + tmp_reg = REG_R21; + GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset>>12); + GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, tmp_offset & 0xfff); + GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, REG_R21, REG_R21, reg2); + GetEmitter()->emitIns_S_R(INS_st_d, size, REG_ARG_LAST, varNum, -8); + } + else + { + baseOffset = -(base - tmp_offset) - 8; + GetEmitter()->emitIns_S_R(INS_st_d, size, REG_ARG_LAST, varNum, baseOffset); + } + } + } + } + +#ifdef USING_SCOPE_INFO + { + psiMoveToStack(varNum); + } +#endif // USING_SCOPE_INFO + } + } + + while (regArgNum > 0) + { + varNum = regArgsVars[regArgNum - 1]; + LclVarDsc* varDsc = compiler->lvaTable + varNum; + + if (varDsc->GetArgInitReg() > varDsc->GetArgReg()) + { + var_types destMemType = varDsc->TypeGet(); + GetEmitter()->emitIns_R_R(ins_Copy(destMemType), emitActualTypeSize(destMemType), varDsc->GetArgInitReg(), varDsc->GetArgReg()); + regArgNum--; + regArgMaskLive &= ~genRegMask(varDsc->GetArgReg()); + } + else + { + for (int i=0; i < regArgNum; i++) + { + LclVarDsc* varDsc2 = compiler->lvaTable + regArgsVars[i]; + var_types destMemType = varDsc2->GetRegisterType(); + inst_Mov(destMemType, varDsc2->GetArgInitReg(), varDsc2->GetArgReg(), /* canSkip */ false, emitActualTypeSize(destMemType)); + regArgMaskLive &= ~genRegMask(varDsc2->GetArgReg()); + } + break; + } + } + + assert(!regArgMaskLive); + +} +#else //!defined(TARGET_LOONGARCH64) void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbered, RegState* regState) { #ifdef DEBUG @@ -4514,6 +4871,8 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere noway_assert(regArgMaskLiveSave != regArgMaskLive); // if it doesn't change, we have an infinite loop } } +#endif + #ifdef _PREFAST_ #pragma warning(pop) #endif @@ -4540,6 +4899,11 @@ void CodeGen::genEnregisterIncomingStackArgs() unsigned varNum = 0; +#ifdef TARGET_LOONGARCH64 + int tmp_offset = 0; + regNumber tmp_reg = REG_NA; +#endif + for (LclVarDsc *varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++) { /* Is this variable a parameter? */ @@ -4586,8 +4950,39 @@ void CodeGen::genEnregisterIncomingStackArgs() assert(regNum != REG_STK); var_types regType = varDsc->GetActualRegisterType(); +#ifdef TARGET_LOONGARCH64 + { + bool FPbased; + //int baseOffset = (regArgTab[argNum].slot - 1) * slotSize; + int base = compiler->lvaFrameAddress(varNum, &FPbased); + if ((-2048 <= base) && (base < 2048)) + { + GetEmitter()->emitIns_R_S(ins_Load(regType), emitTypeSize(regType), regNum, varNum, 0); + } + else + { + if (tmp_reg == REG_NA) + { + regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE; + tmp_offset = base; + tmp_reg = REG_R21; + GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset>>12); + GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, tmp_offset & 0xfff); + GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, REG_R21, REG_R21, reg2); + GetEmitter()->emitIns_R_S(ins_Load(regType), emitTypeSize(regType), regNum, varNum, -8); + } + else + { + int baseOffset = -(base - tmp_offset) - 8; + GetEmitter()->emitIns_R_S(ins_Load(regType), emitTypeSize(regType), regNum, varNum, baseOffset); + } + } + } +#else GetEmitter()->emitIns_R_S(ins_Load(regType), emitTypeSize(regType), regNum, varNum, 0); +#endif // TARGET_LOONGARCH64 + regSet.verifyRegUsed(regNum); #ifdef USING_SCOPE_INFO psiMoveToReg(varNum); @@ -5068,6 +5463,9 @@ void CodeGen::genZeroInitFltRegs(const regMaskTP& initFltRegs, const regMaskTP& #elif defined(TARGET_ARM64) // We will just zero out the entire vector register. This sets it to a double/float zero value GetEmitter()->emitIns_R_I(INS_movi, EA_16BYTE, reg, 0x00, INS_OPTS_16B); +#elif defined(TARGET_LOONGARCH64) + // We will just zero out the entire vector register. This sets it to a double/float zero value + GetEmitter()->emitIns_R_R(INS_movgr2fr_d, EA_8BYTE, reg, REG_R0); #else // TARGET* #error Unsupported or unset target architecture #endif @@ -5103,6 +5501,8 @@ void CodeGen::genZeroInitFltRegs(const regMaskTP& initFltRegs, const regMaskTP& #elif defined(TARGET_ARM64) // We will just zero out the entire vector register. This sets it to a double/float zero value GetEmitter()->emitIns_R_I(INS_movi, EA_16BYTE, reg, 0x00, INS_OPTS_16B); +#elif defined(TARGET_LOONGARCH64) + GetEmitter()->emitIns_R_R(INS_movgr2fr_d, EA_8BYTE, reg, REG_R0); #else // TARGET* #error Unsupported or unset target architecture #endif @@ -5498,23 +5898,230 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) noway_assert(compiler->compCalleeRegsPushed == popCount); } -#endif // TARGET* - -// We need a register with value zero. Zero the initReg, if necessary, and set *pInitRegZeroed if so. -// Return the register to use. On ARM64, we never touch the initReg, and always just return REG_ZR. -regNumber CodeGen::genGetZeroReg(regNumber initReg, bool* pInitRegZeroed) +#elif defined(TARGET_LOONGARCH64) +void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) { -#ifdef TARGET_ARM64 - return REG_ZR; -#else // !TARGET_ARM64 - if (*pInitRegZeroed == false) + assert(compiler->compGeneratingEpilog); + + regMaskTP rsRestoreRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED; + + if (isFramePointerUsed()) { - instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg); - *pInitRegZeroed = true; + rsRestoreRegs |= RBM_FPBASE; } - return initReg; -#endif // !TARGET_ARM64 -} + + rsRestoreRegs |= RBM_RA; // We must save/restore the return address. + + regMaskTP regsToRestoreMask = rsRestoreRegs; + + int totalFrameSize = genTotalFrameSize(); + + int calleeSaveSPOffset = 0; // This will be the starting place for restoring + // the callee-saved registers, in decreasing order. + int frameType = 0; // An indicator of what type of frame we are popping. + int calleeSaveSPDelta = 0; // Amount to add to SP after callee-saved registers have been restored. + + if (isFramePointerUsed()) + { + if (totalFrameSize <= 2047) + { + if (compiler->compLocallocUsed) + { + int SPtoFPdelta = genSPtoFPdelta(); + // Restore sp from fp + GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -SPtoFPdelta); + compiler->unwindSetFrameReg(REG_FPBASE, SPtoFPdelta); + } + + if (!IsSaveFpRaWithAllCalleeSavedRegisters()) + { + JITDUMP("Frame type 1(save FP/RA at bottom). #outsz=%d; #framesz=%d; localloc? %s\n", unsigned(compiler->lvaOutgoingArgSpaceSize), + totalFrameSize, dspBool(compiler->compLocallocUsed)); + + frameType = 1; + + regsToRestoreMask &= ~(RBM_FP | RBM_RA); // We'll restore FP/RA at the end. + + calleeSaveSPOffset = compiler->compLclFrameSize + 2 * REGSIZE_BYTES; + } + else + { + frameType = 2; + + calleeSaveSPOffset = compiler->compLclFrameSize; + + JITDUMP("Frame type 2(Top). #outsz=%d; #framesz=%d; localloc? %s\n", unsigned(compiler->lvaOutgoingArgSpaceSize), + totalFrameSize, dspBool(compiler->compLocallocUsed)); + + } + //calleeSaveSPDelta = 0; + } + else + { + if (!IsSaveFpRaWithAllCalleeSavedRegisters()) + { + JITDUMP("Frame type 3(save FP/RA at bottom). #outsz=%d; #framesz=%d; #calleeSaveRegsPushed:%d; localloc? %s\n", + unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compCalleeRegsPushed, + dspBool(compiler->compLocallocUsed)); + + frameType = 3; + + int outSzAligned; + if (compiler->lvaOutgoingArgSpaceSize >= 2040) + { + int offset = totalFrameSize - compiler->compLclFrameSize - 2*REGSIZE_BYTES; + calleeSaveSPDelta = AlignUp((UINT)offset, STACK_ALIGN); + calleeSaveSPOffset = calleeSaveSPDelta - offset; + + int offset2 = totalFrameSize - calleeSaveSPDelta - compiler->lvaOutgoingArgSpaceSize; + calleeSaveSPDelta = AlignUp((UINT)offset2, STACK_ALIGN); + offset2 = calleeSaveSPDelta - offset2; + + if (compiler->compLocallocUsed) + { + // Restore sp from fp + GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -offset2); + compiler->unwindSetFrameReg(REG_FPBASE, offset2); + } + else + { + outSzAligned = compiler->lvaOutgoingArgSpaceSize & ~0xf; + //if (outSzAligned > 0) + { + genStackPointerAdjustment(outSzAligned, REG_R21, nullptr, /* reportUnwindData */ true); + } + } + + regsToRestoreMask &= ~(RBM_FP | RBM_RA); // We'll restore FP/RA at the end. + + GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offset2 + 8); + compiler->unwindSaveReg(REG_RA, offset2 + 8); + + GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset2); + compiler->unwindSaveReg(REG_FP, offset2); + + genStackPointerAdjustment(calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true); + + calleeSaveSPDelta = totalFrameSize - compiler->compLclFrameSize - 2*REGSIZE_BYTES; + calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDelta, STACK_ALIGN); + } + else + { + int offset2 = compiler->lvaOutgoingArgSpaceSize; + if (compiler->compLocallocUsed) + { + // Restore sp from fp + GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -offset2); + compiler->unwindSetFrameReg(REG_FPBASE, offset2); + } + + regsToRestoreMask &= ~(RBM_FP | RBM_RA); // We'll restore FP/RA at the end. + + GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offset2 + 8); + compiler->unwindSaveReg(REG_RA, offset2 + 8); + + GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset2); + compiler->unwindSaveReg(REG_FP, offset2); + + calleeSaveSPOffset = totalFrameSize - compiler->compLclFrameSize - 2*REGSIZE_BYTES; + calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPOffset, STACK_ALIGN); + calleeSaveSPOffset = calleeSaveSPDelta - calleeSaveSPOffset; + + genStackPointerAdjustment(totalFrameSize - calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true); + } + } + else + { + frameType = 4; + + JITDUMP("Frame type 4(Top). #outsz=%d; #framesz=%d; #calleeSaveRegsPushed:%d; localloc? %s\n", + unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compCalleeRegsPushed, + dspBool(compiler->compLocallocUsed)); + + calleeSaveSPOffset = totalFrameSize - compiler->compLclFrameSize; + calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPOffset, STACK_ALIGN); + calleeSaveSPOffset = calleeSaveSPDelta - calleeSaveSPOffset; + + if (compiler->compLocallocUsed) + { + calleeSaveSPDelta = calleeSaveSPOffset + REGSIZE_BYTES; + + // Restore sp from fp + GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -calleeSaveSPDelta); + compiler->unwindSetFrameReg(REG_FPBASE, calleeSaveSPDelta); + } + else + { + calleeSaveSPDelta = totalFrameSize - calleeSaveSPDelta; + genStackPointerAdjustment(calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true); + } + + calleeSaveSPDelta = totalFrameSize - compiler->compLclFrameSize; + calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDelta, STACK_ALIGN); + } + } + } + else + { + // No frame pointer (no chaining). + NYI("Frame without frame pointer"); + calleeSaveSPOffset = 0; + } + + JITDUMP(" calleeSaveSPOffset=%d, calleeSaveSPDelta=%d\n", calleeSaveSPOffset, calleeSaveSPDelta); + genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, calleeSaveSPOffset, calleeSaveSPDelta); + + if (frameType == 1) + { + calleeSaveSPOffset = compiler->lvaOutgoingArgSpaceSize; + + GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, calleeSaveSPOffset + 8); + compiler->unwindSaveReg(REG_RA, calleeSaveSPOffset + 8); + + GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, calleeSaveSPOffset); + compiler->unwindSaveReg(REG_FP, calleeSaveSPOffset); + + GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize); + compiler->unwindAllocStack(totalFrameSize); + } + else if (frameType == 2) + { + GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize); + compiler->unwindAllocStack(totalFrameSize); + } + else if (frameType == 3) + { + //genStackPointerAdjustment(calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true); + } + else if (frameType == 4) + { + //genStackPointerAdjustment(calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true); + } + else + { + unreached(); + } +} + +#endif // TARGET* + +// We need a register with value zero. Zero the initReg, if necessary, and set *pInitRegZeroed if so. +// Return the register to use. On ARM64, we never touch the initReg, and always just return REG_ZR. +regNumber CodeGen::genGetZeroReg(regNumber initReg, bool* pInitRegZeroed) +{ +#ifdef TARGET_ARM64 + return REG_ZR; +#elif defined(TARGET_LOONGARCH64) + return REG_R0; +#else // !TARGET_LOONGARCH64 + if (*pInitRegZeroed == false) + { + instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg); + *pInitRegZeroed = true; + } + return initReg; +#endif // !TARGET_LOONGARCH64 +} //----------------------------------------------------------------------------- // genZeroInitFrame: Zero any untracked pointer locals and/or initialize memory for locspace @@ -6072,6 +6679,118 @@ void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg, assert(i == alignmentHiBlkSize); } } + +#elif defined(TARGET_LOONGARCH64) + regNumber rAddr; + regNumber rCnt = REG_NA; // Invalid + regMaskTP regMask; + + regMaskTP availMask = regSet.rsGetModifiedRegsMask() | RBM_INT_CALLEE_TRASH; // Set of available registers + //see: src/jit/registerloongarch64.h + availMask &= ~intRegState.rsCalleeRegArgMaskLiveIn; // Remove all of the incoming argument registers as they are + // currently live + availMask &= ~genRegMask(initReg); // Remove the pre-calculated initReg as we will zero it and maybe use it for + // a large constant. + + rAddr = initReg; + *pInitRegZeroed = false; + + // rAddr is not a live incoming argument reg + assert((genRegMask(rAddr) & intRegState.rsCalleeRegArgMaskLiveIn) == 0); + assert(untrLclLo%4 == 0); + + if ((-2048 <= untrLclLo) && (untrLclLo < 2048)) + { + GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, rAddr, genFramePointerReg(), untrLclLo); + } + else + { + // Load immediate into the InitReg register + instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, (ssize_t)untrLclLo); + GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, rAddr, genFramePointerReg(), initReg); + *pInitRegZeroed = false; + } + + bool useLoop = false; + unsigned uCntBytes = untrLclHi - untrLclLo; + assert((uCntBytes % sizeof(int)) == 0); // The smallest stack slot is always 4 bytes. + unsigned int padding = untrLclLo & 0x7; + + if (padding) + { + assert(padding == 4); + GetEmitter()->emitIns_R_R_I(INS_st_w, EA_4BYTE, REG_R0, rAddr, 0); + uCntBytes -= 4; + } + + unsigned uCntSlots = uCntBytes / REGSIZE_BYTES; // How many register sized stack slots we're going to use. + + // When uCntSlots is 9 or less, we will emit a sequence of sd instructions inline. + // When it is 10 or greater, we will emit a loop containing a sd instruction. + // In both of these cases the sd instruction will write two zeros to memory + // and we will use a single str instruction at the end whenever we have an odd count. + if (uCntSlots >= 10) + useLoop = true; + + if (useLoop) + { + // We pick the next lowest register number for rCnt + noway_assert(availMask != RBM_NONE); + regMask = genFindLowestBit(availMask); + rCnt = genRegNumFromMask(regMask); + availMask &= ~regMask; + + noway_assert(uCntSlots >= 2); + assert((genRegMask(rCnt) & intRegState.rsCalleeRegArgMaskLiveIn) == 0); // rCnt is not a live incoming + // argument reg + instGen_Set_Reg_To_Imm(EA_PTRSIZE, rCnt, (ssize_t)uCntSlots / 2); + + /* TODO for LA: maybe optimize further */ + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, rAddr, 8 + padding); + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, rAddr, 0 + padding); + GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, rCnt, rCnt, -1); + + // bne rCnt, zero, -4 * 4 + ssize_t imm = -16; + GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, rAddr, rAddr, 2 * REGSIZE_BYTES); + GetEmitter()->emitIns_R_R_I(INS_bne, EA_PTRSIZE, rCnt, REG_R0, imm); + + uCntBytes %= REGSIZE_BYTES * 2; + } + else + { + while (uCntBytes >= REGSIZE_BYTES * 2) + { + /* TODO for LA: can be optimize further */ + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, rAddr, 8 + padding); + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, rAddr, 0 + padding); + GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, rAddr, rAddr, 2 * REGSIZE_BYTES + padding); + uCntBytes -= REGSIZE_BYTES * 2; + padding = 0; + } + } + + if (uCntBytes >= REGSIZE_BYTES) // check and zero the last register-sized stack slot (odd number) + { + if ((uCntBytes - REGSIZE_BYTES) == 0) + { + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, rAddr, padding); + } + else + { + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, rAddr, padding); + GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, rAddr, rAddr, REGSIZE_BYTES); + } + uCntBytes -= REGSIZE_BYTES; + } + if (uCntBytes > 0) + { + assert(uCntBytes == sizeof(int)); + GetEmitter()->emitIns_R_R_I(INS_st_w, EA_4BYTE, REG_R0, rAddr, padding); + uCntBytes -= sizeof(int); + } + noway_assert(uCntBytes == 0); + #else // TARGET* #error Unsupported or unset target architecture #endif // TARGET* @@ -6385,11 +7104,14 @@ void CodeGen::genReportGenericContextArg(regNumber initReg, bool* pInitRegZeroed // ARM's emitIns_R_R_I automatically uses the reserved register if necessary. GetEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(), compiler->lvaCachedGenericContextArgOffset()); -#else // !ARM64 !ARM +#elif defined(TARGET_LOONGARCH64) + genInstrWithConstant(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(), + compiler->lvaCachedGenericContextArgOffset(), REG_R21); +#else // !ARM64 !ARM !LOONGARCH64 // mov [ebp-lvaCachedGenericContextArgOffset()], reg GetEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(), compiler->lvaCachedGenericContextArgOffset()); -#endif // !ARM64 !ARM +#endif // !ARM64 !ARM !LOONGARCH64 } /***************************************************************************** @@ -6772,6 +7494,23 @@ void CodeGen::genFinalizeFrame() maskCalleeRegsPushed &= ~RBM_FLT_CALLEE_SAVED; #endif // defined(TARGET_XARCH) +#ifdef TARGET_LOONGARCH64 + if (isFramePointerUsed()) + { + // For a FP based frame we have to push/pop the FP register + // + maskCalleeRegsPushed |= RBM_FPBASE; + + // This assert check that we are not using REG_FP + // as both the frame pointer and as a codegen register + // + assert(!regSet.rsRegsModified(RBM_FPBASE)); + } + + // we always push RA. See genPushCalleeSavedRegisters + maskCalleeRegsPushed |= RBM_RA; +#endif // TARGET_LOONGARCH64 + compiler->compCalleeRegsPushed = genCountBits(maskCalleeRegsPushed); #ifdef DEBUG @@ -6861,6 +7600,22 @@ void CodeGen::genEstablishFramePointer(int delta, bool reportUnwindData) compiler->unwindSetFrameReg(REG_FPBASE, delta); } +#elif defined(TARGET_LOONGARCH64) + if (delta == 0) + { + GetEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_SPBASE); + } + else + { + assert((-2048 <= delta) && (delta < 2048)); + GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, delta); + } + + if (reportUnwindData) + { + compiler->unwindSetFrameReg(REG_FPBASE, delta); + } + #else NYI("establish frame pointer"); #endif @@ -6953,10 +7708,10 @@ void CodeGen::genFnProlog() instGen(INS_nop); instGen(INS_BREAKPOINT); -#ifdef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) // Avoid asserts in the unwind info because these instructions aren't accounted for. compiler->unwindPadding(); -#endif // TARGET_ARMARCH +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 } #endif // DEBUG @@ -7134,11 +7889,11 @@ void CodeGen::genFnProlog() // previous frame pointer. Thus, stkOffs can't be zero. CLANG_FORMAT_COMMENT_ANCHOR; -#if !defined(TARGET_AMD64) +#if !defined(TARGET_AMD64) && !defined(TARGET_LOONGARCH64) // However, on amd64 there is no requirement to chain frame pointers. noway_assert(!isFramePointerUsed() || loOffs != 0); -#endif // !defined(TARGET_AMD64) +#endif // !defined(TARGET_AMD64) && !defined(TARGET_LOONGARCH64) // printf(" Untracked tmp at [EBP-%04X]\n", -stkOffs); @@ -7298,9 +8053,9 @@ void CodeGen::genFnProlog() } #endif // TARGET_XARCH -#ifdef TARGET_ARM64 +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) genPushCalleeSavedRegisters(initReg, &initRegZeroed); -#else // !TARGET_ARM64 +#else // !TARGET_ARM64 || !TARGET_LOONGARCH64 genPushCalleeSavedRegisters(); #endif // !TARGET_ARM64 @@ -7333,7 +8088,7 @@ void CodeGen::genFnProlog() //------------------------------------------------------------------------- CLANG_FORMAT_COMMENT_ANCHOR; -#ifndef TARGET_ARM64 +#if !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) regMaskTP maskStackAlloc = RBM_NONE; #ifdef TARGET_ARM @@ -7345,7 +8100,7 @@ void CodeGen::genFnProlog() { genAllocLclFrame(compiler->compLclFrameSize, initReg, &initRegZeroed, intRegState.rsCalleeRegArgMaskLiveIn); } -#endif // !TARGET_ARM64 +#endif // !TARGET_ARM64 && !TARGET_LOONGARCH64 //------------------------------------------------------------------------- @@ -7485,6 +8240,13 @@ void CodeGen::genFnProlog() // if (!compiler->opts.IsOSR()) { +#if defined(TARGET_LOONGARCH64) + if (intRegState.rsCalleeRegArgMaskLiveIn || floatRegState.rsCalleeRegArgMaskLiveIn) + { + initRegZeroed = false; + genFnPrologCalleeRegArgs(); + } +#else auto assignIncomingRegisterArgs = [this, initReg, &initRegZeroed](RegState* regState) { if (regState->rsCalleeRegArgMaskLiveIn) { @@ -7519,6 +8281,7 @@ void CodeGen::genFnProlog() assignIncomingRegisterArgs(&floatRegState); #else assignIncomingRegisterArgs(&intRegState); +#endif #endif } @@ -8453,6 +9216,192 @@ void CodeGen::genFnEpilog(BasicBlock* block) } } +#elif defined(TARGET_LOONGARCH64) + +void CodeGen::genFnEpilog(BasicBlock* block) +{ +#ifdef DEBUG + if (verbose) + printf("*************** In genFnEpilog()\n"); +#endif // DEBUG + + ScopedSetVariable _setGeneratingEpilog(&compiler->compGeneratingEpilog, true); + + VarSetOps::Assign(compiler, gcInfo.gcVarPtrSetCur, GetEmitter()->emitInitGCrefVars); + gcInfo.gcRegGCrefSetCur = GetEmitter()->emitInitGCrefRegs; + gcInfo.gcRegByrefSetCur = GetEmitter()->emitInitByrefRegs; + +#ifdef DEBUG + if (compiler->opts.dspCode) + printf("\n__epilog:\n"); + + if (verbose) + { + printf("gcVarPtrSetCur=%s ", VarSetOps::ToString(compiler, gcInfo.gcVarPtrSetCur)); + dumpConvertedVarSet(compiler, gcInfo.gcVarPtrSetCur); + printf(", gcRegGCrefSetCur="); + printRegMaskInt(gcInfo.gcRegGCrefSetCur); + GetEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur); + printf(", gcRegByrefSetCur="); + printRegMaskInt(gcInfo.gcRegByrefSetCur); + GetEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur); + printf("\n"); + } +#endif // DEBUG + + bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0); + + GenTree* lastNode = block->lastNode(); + + // Method handle and address info used in case of jump epilog + CORINFO_METHOD_HANDLE methHnd = nullptr; + CORINFO_CONST_LOOKUP addrInfo; + addrInfo.addr = nullptr; + addrInfo.accessType = IAT_VALUE; + + if (jmpEpilog && lastNode->gtOper == GT_JMP) + { + methHnd = (CORINFO_METHOD_HANDLE)lastNode->AsVal()->gtVal1; + compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo); + } + + compiler->unwindBegEpilog(); + + if (jmpEpilog) + { + SetHasTailCalls(true); + + noway_assert(block->bbJumpKind == BBJ_RETURN); + noway_assert(block->GetFirstLIRNode() != nullptr); + + /* figure out what jump we have */ + GenTree* jmpNode = lastNode; +#if !FEATURE_FASTTAILCALL + noway_assert(jmpNode->gtOper == GT_JMP); +#else // FEATURE_FASTTAILCALL + // armarch + // If jmpNode is GT_JMP then gtNext must be null. + // If jmpNode is a fast tail call, gtNext need not be null since it could have embedded stmts. + noway_assert((jmpNode->gtOper != GT_JMP) || (jmpNode->gtNext == nullptr)); + + // Could either be a "jmp method" or "fast tail call" implemented as epilog+jmp + noway_assert((jmpNode->gtOper == GT_JMP) || + ((jmpNode->gtOper == GT_CALL) && jmpNode->AsCall()->IsFastTailCall())); + + // The next block is associated with this "if" stmt + if (jmpNode->gtOper == GT_JMP) +#endif // FEATURE_FASTTAILCALL + { + // Simply emit a jump to the methodHnd. This is similar to a call so we can use + // the same descriptor with some minor adjustments. + assert(methHnd != nullptr); + assert(addrInfo.addr != nullptr); + + emitter::EmitCallType callType; + void* addr; + regNumber indCallReg; + switch (addrInfo.accessType) + { + case IAT_VALUE: + //if (validImmForBAL((ssize_t)addrInfo.addr)) + //{ + // // Simple direct call + + // //TODO for LA. + // callType = emitter::EC_FUNC_TOKEN; + // addr = addrInfo.addr; + // indCallReg = REG_NA; + // break; + //} + + //// otherwise the target address doesn't fit in an immediate + //// so we have to burn a register... + //__fallthrough; + + case IAT_PVALUE: + // Load the address into a register, load indirect and call through a register + // We have to use REG_INDIRECT_CALL_TARGET_REG since we assume the argument registers are in use + callType = emitter::EC_INDIR_R; + indCallReg = REG_INDIRECT_CALL_TARGET_REG; + addr = NULL; + instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addrInfo.addr); + if (addrInfo.accessType == IAT_PVALUE) + { + GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, indCallReg, indCallReg, 0); + regSet.verifyRegUsed(indCallReg); + } + break; + + case IAT_RELPVALUE: + { + // Load the address into a register, load relative indirect and call through a register + // We have to use R12 since we assume the argument registers are in use + // LR is used as helper register right before it is restored from stack, thus, + // all relative address calculations are performed before LR is restored. + callType = emitter::EC_INDIR_R; + indCallReg = REG_T2; + addr = NULL; + + regSet.verifyRegUsed(indCallReg); + break; + } + + case IAT_PPVALUE: + default: + NO_WAY("Unsupported JMP indirection"); + } + + /* Simply emit a jump to the methodHnd. This is similar to a call so we can use + * the same descriptor with some minor adjustments. + */ + + genPopCalleeSavedRegisters(true); + + // clang-format off + GetEmitter()->emitIns_Call(callType, + methHnd, + INDEBUG_LDISASM_COMMA(nullptr) + addr, + 0, // argSize + EA_UNKNOWN // retSize + MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(EA_UNKNOWN), // secondRetSize + gcInfo.gcVarPtrSetCur, + gcInfo.gcRegGCrefSetCur, + gcInfo.gcRegByrefSetCur, + DebugInfo(), + indCallReg, // ireg + REG_NA, // xreg + 0, // xmul + 0, // disp + true); // isJump + // clang-format on + CLANG_FORMAT_COMMENT_ANCHOR; + } +#if FEATURE_FASTTAILCALL + else + { + genPopCalleeSavedRegisters(true); + // Fast tail call. + // Call target = REG_FASTTAILCALL_TARGET + // https://github.com/dotnet/coreclr/issues/4827 + // Do we need a special encoding for stack walker like rex.w prefix for x64? + + //TODO for LA: whether the relative address is enough for optimize? + GetEmitter()->emitIns_R_R_I(INS_jirl, emitTypeSize(TYP_I_IMPL), REG_R0, REG_FASTTAILCALL_TARGET, 0); + } +#endif // FEATURE_FASTTAILCALL + } + else + { + genPopCalleeSavedRegisters(false); + + GetEmitter()->emitIns_R_R_I(INS_jirl, EA_PTRSIZE, REG_R0, REG_RA, 0); + compiler->unwindReturn(REG_RA); + } + + compiler->unwindEndEpilog(); +} + #else // TARGET* #error Unsupported or unset target architecture #endif // TARGET* @@ -9145,6 +10094,10 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() } } +#elif defined(TARGET_LOONGARCH64) + +// Look in codegenloongarch64.cpp + #else // TARGET* /***************************************************************************** @@ -9329,6 +10282,18 @@ void CodeGen::genSetPSPSym(regNumber initReg, bool* pInitRegZeroed) GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaPSPSym, 0); +#elif defined(TARGET_LOONGARCH64) + + int SPtoCallerSPdelta = -genCallerSPtoInitialSPdelta(); + + // We will just use the initReg since it is an available register + // and we are probably done using it anyway... + regNumber regTmp = initReg; + *pInitRegZeroed = false; + + genInstrWithConstant(INS_addi_d, EA_PTRSIZE, regTmp, REG_SPBASE, SPtoCallerSPdelta, REG_R21, false); + GetEmitter()->emitIns_S_R(INS_st_d, EA_PTRSIZE, regTmp, compiler->lvaPSPSym, 0); + #else // TARGET* NYI("Set function PSP sym"); @@ -9582,7 +10547,7 @@ bool Compiler::IsMultiRegReturnedType(CORINFO_CLASS_HANDLE hClass, CorInfoCallCo structPassingKind howToReturnStruct; var_types returnType = getReturnTypeForStruct(hClass, callConv, &howToReturnStruct); -#ifdef TARGET_ARM64 +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) return (varTypeIsStruct(returnType) && (howToReturnStruct != SPK_PrimitiveType)); #else return (varTypeIsStruct(returnType)); @@ -9744,7 +10709,7 @@ instruction CodeGen::genMapShiftInsToShiftByConstantIns(instruction ins, int shi // unsigned CodeGen::getFirstArgWithStackSlot() { -#if defined(UNIX_AMD64_ABI) || defined(TARGET_ARMARCH) +#if defined(UNIX_AMD64_ABI) || defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) unsigned baseVarNum = 0; // Iterate over all the lvParam variables in the Lcl var table until we find the first one // that's passed on the stack. @@ -10907,9 +11872,9 @@ void CodeGen::genReturn(GenTree* treeNode) // exit point where it is actually dead. genConsumeReg(op1); -#if defined(TARGET_ARM64) +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) genSimpleReturn(treeNode); -#else // !TARGET_ARM64 +#else // !TARGET_ARM64 || !TARGET_LOONGARCH64 #if defined(TARGET_X86) if (varTypeUsesFloatReg(treeNode)) { @@ -10937,7 +11902,7 @@ void CodeGen::genReturn(GenTree* treeNode) regNumber retReg = varTypeUsesFloatReg(treeNode) ? REG_FLOATRET : REG_INTRET; inst_Mov_Extend(targetType, /* srcInReg */ true, retReg, op1->GetRegNum(), /* canSkip */ true); } -#endif // !TARGET_ARM64 +#endif // !TARGET_ARM64 || !TARGET_LOONGARCH64 } } diff --git a/src/coreclr/jit/codegeninterface.h b/src/coreclr/jit/codegeninterface.h index f276a492da33d..84d8560545894 100644 --- a/src/coreclr/jit/codegeninterface.h +++ b/src/coreclr/jit/codegeninterface.h @@ -114,6 +114,8 @@ class CodeGenInterface static const insFlags instInfo[INS_count]; #elif defined(TARGET_ARM) || defined(TARGET_ARM64) static const BYTE instInfo[INS_count]; +#elif defined(TARGET_LOONGARCH64) + static const BYTE instInfo[INS_count]; #else #error Unsupported target architecture #endif @@ -196,6 +198,11 @@ class CodeGenInterface virtual bool IsSaveFpLrWithAllCalleeSavedRegisters() const = 0; #endif // TARGET_ARM64 +#ifdef TARGET_LOONGARCH64 + virtual void SetSaveFpRaWithAllCalleeSavedRegisters(bool value) = 0; + virtual bool IsSaveFpRaWithAllCalleeSavedRegisters() const = 0; +#endif // TARGET_LOONGARCH64 + regNumber genGetThisArgReg(GenTreeCall* call) const; #ifdef TARGET_XARCH @@ -305,7 +312,11 @@ class CodeGenInterface bool validImmForAdd(target_ssize_t imm, insFlags flags); bool validImmForAlu(target_ssize_t imm); bool validImmForMov(target_ssize_t imm); +#ifdef TARGET_LOONGARCH64 + bool validImmForBAL(ssize_t addr); +#else bool validImmForBL(ssize_t addr); +#endif instruction ins_Load(var_types srcType, bool aligned = false); instruction ins_Store(var_types dstType, bool aligned = false); @@ -360,7 +371,7 @@ class CodeGenInterface m_cgInterruptible = value; } -#ifdef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) bool GetHasTailCalls() { @@ -374,7 +385,7 @@ class CodeGenInterface private: bool m_cgInterruptible; -#ifdef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) bool m_cgHasTailCalls; #endif // TARGET_ARMARCH diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index 51e9afc074398..e10e498466f52 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -1211,7 +1211,7 @@ void CodeGen::genUnspillRegIfNeeded(GenTree* tree) assert(spillType != TYP_UNDEF); // TODO-Cleanup: The following code could probably be further merged and cleaned up. -#if defined(TARGET_XARCH) || defined(TARGET_ARM64) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) // Load local variable from its home location. // In most cases the tree type will indicate the correct type to use for the load. // However, if it is NOT a normalizeOnLoad lclVar (i.e. NOT a small int that always gets @@ -2515,7 +2515,12 @@ CodeGen::GenIntCastDesc::GenIntCastDesc(GenTreeCast* cast) m_checkKind = CHECK_NONE; } +#ifdef TARGET_LOONGARCH64 + m_extendKind = castUnsigned ? ZERO_EXTEND_INT : SIGN_EXTEND_INT; + cast->gtFlags |= castUnsigned ? GTF_UNSIGNED : GTF_EMPTY; +#else m_extendKind = COPY; +#endif m_extendSrcSize = 4; } #endif @@ -2592,6 +2597,7 @@ void CodeGen::genStoreLongLclVar(GenTree* treeNode) } #endif // !defined(TARGET_64BIT) +#ifndef TARGET_LOONGARCH64 //------------------------------------------------------------------------ // genCodeForJumpTrue: Generate code for a GT_JTRUE node. // @@ -2637,6 +2643,7 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue) inst_JCC(condition, compiler->compCurBB->bbJumpDest); } +#endif // !TARGET_LOONGARCH64 //------------------------------------------------------------------------ // genCodeForJcc: Generate code for a GT_JCC node. diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp new file mode 100644 index 0000000000000..49cc67b50dc36 --- /dev/null +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -0,0 +1,9157 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +// Copyright (c) Loongson Technology. All rights reserved. + +/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX LOONGARCH64 Code Generator XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ +#include "jitpch.h" +#ifdef _MSC_VER +#pragma hdrstop +#endif + +#ifdef TARGET_LOONGARCH64 +#include "emit.h" +#include "codegen.h" +#include "lower.h" +#include "gcinfo.h" +#include "gcinfoencoder.h" + +static short splitLow(int value) { + return (value & 0xffff); +} + +// Returns true if 'value' is a legal signed immediate 16 bit encoding. +static bool isValidSimm16(ssize_t value) +{ + return -( ((int)1) << 15 ) <= value && value < ( ((int)1) << 15 ); +}; + +// Returns true if 'value' is a legal unsigned immediate 16 bit encoding. +static bool isValidUimm16(ssize_t value) +{ + return (0 == (value >> 16)); +}; + +// Returns true if 'value' is a legal signed immediate 12 bit encoding. +static bool isValidSimm12(ssize_t value) +{ + return -( ((int)1) << 11 ) <= value && value < ( ((int)1) << 11 ); +}; + +// Returns true if 'value' is a legal unsigned immediate 11 bit encoding. +static bool isValidUimm11(ssize_t value) +{ + return (0 == (value >> 11)); +}; + +// Returns true if 'value' is a legal unsigned immediate 12 bit encoding. +static bool isValidUimm12(ssize_t value) +{ + return (0 == (value >> 12)); +}; + +/* +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX Prolog / Epilog XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ + +//------------------------------------------------------------------------ +// genInstrWithConstant: we will typically generate one instruction +// +// ins reg1, reg2, imm +// +// However the imm might not fit as a directly encodable immediate, +// when it doesn't fit we generate extra instruction(s) that sets up +// the 'regTmp' with the proper immediate value. +// +// li64 regTmp, imm +// ins reg1, reg2, regTmp +// +// Arguments: +// ins - instruction +// attr - operation size and GC attribute +// reg1, reg2 - first and second register operands +// imm - immediate value (third operand when it fits) +// tmpReg - temp register to use when the 'imm' doesn't fit. Can be REG_NA +// if caller knows for certain the constant will fit. +// inUnwindRegion - true if we are in a prolog/epilog region with unwind codes. +// Default: false. +// +// Return Value: +// returns true if the immediate was small enough to be encoded inside instruction. If not, +// returns false meaning the immediate was too large and tmpReg was used and modified. +// +bool CodeGen::genInstrWithConstant(instruction ins, + emitAttr attr, + regNumber reg1, + regNumber reg2, + ssize_t imm, + regNumber tmpReg, + bool inUnwindRegion /* = false */) +{ + emitAttr size = EA_SIZE(attr); + + // reg1 is usually a dest register + // reg2 is always source register + assert(tmpReg != reg2); // regTmp can not match any source register + +#ifdef DEBUG + switch (ins) + { + case INS_addi_d: + + case INS_st_b: + case INS_st_h: + case INS_st_w: + case INS_fst_s: + case INS_st_d: + case INS_fst_d: + + case INS_ld_b: + case INS_ld_h: + case INS_ld_w: + case INS_fld_s: + case INS_ld_d: + case INS_fld_d: + break; + + default: + assert(!"Unexpected instruction in genInstrWithConstant"); + break; + } +#endif + bool immFitsInIns = (-2048 <= imm) && (imm <= 2047); + + if (immFitsInIns) + { + // generate a single instruction that encodes the immediate directly + GetEmitter()->emitIns_R_R_I(ins, attr, reg1, reg2, imm); + } + else + { + // caller can specify REG_NA for tmpReg, when it "knows" that the immediate will always fit + assert(tmpReg != REG_NA); + + // generate two or more instructions + + // first we load the immediate into tmpReg + assert(!EA_IS_RELOC(size)); + GetEmitter()->emitIns_I_la(size, tmpReg, imm); + regSet.verifyRegUsed(tmpReg); + + // when we are in an unwind code region + // we record the extra instructions using unwindPadding() + if (inUnwindRegion) + { + compiler->unwindPadding(); + } + + if (ins == INS_addi_d) + { + GetEmitter()->emitIns_R_R_R(INS_add_d, attr, reg1, reg2, tmpReg); + } + else + { + GetEmitter()->emitIns_R_R_R(INS_add_d, attr, tmpReg, reg2, tmpReg); + GetEmitter()->emitIns_R_R_I(ins, attr, reg1, tmpReg, 0); + } + } + return immFitsInIns; +} + +//------------------------------------------------------------------------ +// genStackPointerAdjustment: add a specified constant value to the stack pointer in either the prolog +// or the epilog. The unwind codes for the generated instructions are produced. An available temporary +// register is required to be specified, in case the constant is too large to encode in an "daddu" +// instruction (or "dsubu" instruction if we choose to use one), such that we need to load the constant +// into a register first, before using it. +// +// Arguments: +// spDelta - the value to add to SP (can be negative) +// tmpReg - an available temporary register +// pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'. +// Otherwise, we don't touch it. +// reportUnwindData - If true, report the change in unwind data. Otherwise, do not report it. +// +// Return Value: +// None. + +void CodeGen::genStackPointerAdjustment(ssize_t spDelta, regNumber tmpReg, bool* pTmpRegIsZero, bool reportUnwindData) +{ + // Even though INS_addi_d is specified here, the encoder will choose either + // an INS_add_d or an INS_addi_d and encode the immediate as a positive value + // + bool wasTempRegisterUsedForImm = + !genInstrWithConstant(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, spDelta, tmpReg, true); + if (wasTempRegisterUsedForImm) + { + if (pTmpRegIsZero != nullptr) + { + *pTmpRegIsZero = false; + } + } + + if (reportUnwindData) + { + // spDelta is negative in the prolog, positive in the epilog, + // but we always tell the unwind codes the positive value. + ssize_t spDeltaAbs = abs(spDelta); + unsigned unwindSpDelta = (unsigned)spDeltaAbs; + assert((ssize_t)unwindSpDelta == spDeltaAbs); // make sure that it fits in a unsigned + + compiler->unwindAllocStack(unwindSpDelta); + } +} + +//------------------------------------------------------------------------ +// genPrologSaveRegPair: Save a pair of general-purpose or floating-point/SIMD registers in a function or funclet +// prolog. If possible, we use pre-indexed addressing to adjust SP and store the registers with a single instruction. +// The caller must ensure that we can use the STP instruction, and that spOffset will be in the legal range for that +// instruction. +// +// Arguments: +// reg1 - First register of pair to save. +// reg2 - Second register of pair to save. +// spOffset - The offset from SP to store reg1 (must be positive or zero). +// spDelta - If non-zero, the amount to add to SP before the register saves (must be negative or +// zero). +// useSaveNextPair - True if the last prolog instruction was to save the previous register pair. This +// allows us to emit the "save_next" unwind code. +// tmpReg - An available temporary register. Needed for the case of large frames. +// pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'. +// Otherwise, we don't touch it. +// +// Return Value: +// None. + +void CodeGen::genPrologSaveRegPair(regNumber reg1, + regNumber reg2, + int spOffset, + int spDelta, + bool useSaveNextPair, + regNumber tmpReg, + bool* pTmpRegIsZero) +{ + assert(spOffset >= 0); + assert(spDelta <= 0); + assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned + assert(genIsValidFloatReg(reg1) == genIsValidFloatReg(reg2)); // registers must be both general-purpose, or both + // FP/SIMD + + instruction ins = INS_st_d; + if (genIsValidFloatReg(reg1)) + ins = INS_fst_d; + + if (spDelta != 0) + { + // generate addi.d SP,SP,-imm + genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, /* reportUnwindData */ true); + + assert((spDelta+spOffset+16)<=0); + + assert(spOffset <= 2031);//2047-16 + } + +// #ifdef OPTIMIZE_LOONGSON_EXT +// if (!(spOffset & 0xf) && (spOffset <= 0xff0) && (INS_st_d == ins)) +// { +// GetEmitter()->emitIns_R_R_R_I(INS_gssq, EA_PTRSIZE, reg2, reg1, REG_SPBASE, spOffset >> 4); +// compiler->unwindSaveRegPair(reg1, reg2, spOffset); +// } +// else +// #endif + { + GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); + compiler->unwindSaveReg(reg1, spOffset); + + GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg2, REG_SPBASE, spOffset+8); + compiler->unwindSaveReg(reg2, spOffset+8); + } +} + +//------------------------------------------------------------------------ +// genPrologSaveReg: Like genPrologSaveRegPair, but for a single register. Save a single general-purpose or +// floating-point/SIMD register in a function or funclet prolog. Note that if we wish to change SP (i.e., spDelta != 0), +// then spOffset must be 8. This is because otherwise we would create an alignment hole above the saved register, not +// below it, which we currently don't support. This restriction could be loosened if the callers change to handle it +// (and this function changes to support using pre-indexed SD addressing). The caller must ensure that we can use the +// SD instruction, and that spOffset will be in the legal range for that instruction. +// +// Arguments: +// reg1 - Register to save. +// spOffset - The offset from SP to store reg1 (must be positive or zero). +// spDelta - If non-zero, the amount to add to SP before the register saves (must be negative or +// zero). +// tmpReg - An available temporary register. Needed for the case of large frames. +// pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'. +// Otherwise, we don't touch it. +// +// Return Value: +// None. + +void CodeGen::genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero) +{ + assert(spOffset >= 0); + assert(spDelta <= 0); + assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned + + instruction ins = INS_st_d; + if (genIsValidFloatReg(reg1)) + ins = INS_fst_d; + + if (spDelta != 0) + { + // generate daddiu SP,SP,-imm + genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, /* reportUnwindData */ true); + } + + GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); + compiler->unwindSaveReg(reg1, spOffset); + +} + +//------------------------------------------------------------------------ +// genEpilogRestoreRegPair: This is the opposite of genPrologSaveRegPair(), run in the epilog instead of the prolog. +// The stack pointer adjustment, if requested, is done after the register restore, using post-index addressing. +// The caller must ensure that we can use the LDP instruction, and that spOffset will be in the legal range for that +// instruction. +// +// Arguments: +// reg1 - First register of pair to restore. +// reg2 - Second register of pair to restore. +// spOffset - The offset from SP to load reg1 (must be positive or zero). +// spDelta - If non-zero, the amount to add to SP after the register restores (must be positive or +// zero). +// useSaveNextPair - True if the last prolog instruction was to save the previous register pair. This +// allows us to emit the "save_next" unwind code. +// tmpReg - An available temporary register. Needed for the case of large frames. +// pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'. +// Otherwise, we don't touch it. +// +// Return Value: +// None. + +void CodeGen::genEpilogRestoreRegPair(regNumber reg1, + regNumber reg2, + int spOffset, + int spDelta, + bool useSaveNextPair, + regNumber tmpReg, + bool* pTmpRegIsZero) +{ + assert(spOffset >= 0); + assert(spDelta >= 0); + assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned + assert(genIsValidFloatReg(reg1) == genIsValidFloatReg(reg2)); // registers must be both general-purpose, or both + // FP/SIMD + + instruction ins = INS_ld_d; + if (genIsValidFloatReg(reg1)) + ins = INS_fld_d; + + if (spDelta != 0) + { + assert(!useSaveNextPair); +// #ifdef OPTIMIZE_LOONGSON_EXT +// if (!(spOffset & 0xf) && (spOffset <= 0xff0) && (INS_ld_d == ins)) +// { +// GetEmitter()->emitIns_R_R_R_I(INS_gslq, EA_PTRSIZE, reg2, reg1, REG_SPBASE, spOffset >> 4); +// compiler->unwindSaveRegPair(reg1, reg2, spOffset); +// } +// else +// #endif + { + GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg2, REG_SPBASE, spOffset+8); + compiler->unwindSaveReg(reg2, spOffset+8); + + GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); + compiler->unwindSaveReg(reg1, spOffset); + } + + // generate daddiu SP,SP,imm + genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, /* reportUnwindData */ true); + } + else + { +// #ifdef OPTIMIZE_LOONGSON_EXT +// if (!(spOffset & 0xf) && (spOffset <= 0xff0) && (INS_ld_d == ins)) +// { +// GetEmitter()->emitIns_R_R_R_I(INS_gslq, EA_PTRSIZE, reg2, reg1, REG_SPBASE, spOffset >> 4); +// compiler->unwindSaveRegPair(reg1, reg2, spOffset); +// } +// else +// #endif + { + GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg2, REG_SPBASE, spOffset+8); + compiler->unwindSaveReg(reg2, spOffset+8); + + GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); + compiler->unwindSaveReg(reg1, spOffset); + } + } +} + +//------------------------------------------------------------------------ +// genEpilogRestoreReg: The opposite of genPrologSaveReg(), run in the epilog instead of the prolog. +// +// Arguments: +// reg1 - Register to restore. +// spOffset - The offset from SP to restore reg1 (must be positive or zero). +// spDelta - If non-zero, the amount to add to SP after the register restores (must be positive or +// zero). +// tmpReg - An available temporary register. Needed for the case of large frames. +// pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'. +// Otherwise, we don't touch it. +// +// Return Value: +// None. + +void CodeGen::genEpilogRestoreReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero) +{ + assert(spOffset >= 0); + assert(spDelta >= 0); + assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned + + instruction ins = INS_ld_d; + if (genIsValidFloatReg(reg1)) + ins = INS_fld_d; + + if (spDelta != 0) + { + // ld reg1, offset(SP) + GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); + compiler->unwindSaveReg(reg1, spOffset); + + // generate add SP,SP,imm + genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, /* reportUnwindData */ true); + } + else + { + GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); + compiler->unwindSaveReg(reg1, spOffset); + } +} + +//------------------------------------------------------------------------ +// genBuildRegPairsStack: Build a stack of register pairs for prolog/epilog save/restore for the given mask. +// The first register pair will contain the lowest register. Register pairs will combine neighbor +// registers in pairs. If it can't be done (for example if we have a hole or this is the last reg in a mask with +// odd number of regs) then the second element of that RegPair will be REG_NA. +// +// Arguments: +// regsMask - a mask of registers for prolog/epilog generation; +// regStack - a regStack instance to build the stack in, used to save temp copyings. +// +// Return value: +// no return value; the regStack argument is modified. +// +// static +void CodeGen::genBuildRegPairsStack(regMaskTP regsMask, ArrayStack* regStack) +{ + assert(regStack != nullptr); + assert(regStack->Height() == 0); + + unsigned regsCount = genCountBits(regsMask); + + while (regsMask != RBM_NONE) + { + regMaskTP reg1Mask = genFindLowestBit(regsMask); + regNumber reg1 = genRegNumFromMask(reg1Mask); + regsMask &= ~reg1Mask; + regsCount -= 1; + + bool isPairSave = false; + if (regsCount > 0) + { + regMaskTP reg2Mask = genFindLowestBit(regsMask); + regNumber reg2 = genRegNumFromMask(reg2Mask); + if (reg2 == REG_NEXT(reg1)) + { + // The JIT doesn't allow saving pair (S7,FP), even though the + // save_regp register pair unwind code specification allows it. + // The JIT always saves (FP,RA) as a pair, and uses the save_fpra + // unwind code. This only comes up in stress mode scenarios + // where callee-saved registers are not allocated completely + // from lowest-to-highest, without gaps. + if (reg1 != REG_FP) + { + // Both registers must have the same type to be saved as pair. + if (genIsValidFloatReg(reg1) == genIsValidFloatReg(reg2)) + { + isPairSave = true; + + regsMask &= ~reg2Mask; + regsCount -= 1; + + regStack->Push(RegPair(reg1, reg2)); + } + } + } + } + if (!isPairSave) + { + regStack->Push(RegPair(reg1)); + } + } + assert(regsCount == 0 && regsMask == RBM_NONE); + + genSetUseSaveNextPairs(regStack); +} + +//------------------------------------------------------------------------ +// genSetUseSaveNextPairs: Set useSaveNextPair for each RegPair on the stack which unwind info can be encoded as +// save_next code. +// +// Arguments: +// regStack - a regStack instance to set useSaveNextPair. +// +// Notes: +// We can use save_next for RegPair(N, N+1) only when we have sequence like (N-2, N-1), (N, N+1). +// In this case in the prolog save_next for (N, N+1) refers to save_pair(N-2, N-1); +// in the epilog the unwinder will search for the first save_pair (N-2, N-1) +// and then go back to the first save_next (N, N+1) to restore it first. +// +// static +void CodeGen::genSetUseSaveNextPairs(ArrayStack* regStack) +{ + for (int i = 1; i < regStack->Height(); ++i) + { + RegPair& curr = regStack->BottomRef(i); + RegPair prev = regStack->Bottom(i - 1); + + if (prev.reg2 == REG_NA || curr.reg2 == REG_NA) + { + continue; + } + + if (REG_NEXT(prev.reg2) != curr.reg1) + { + continue; + } + + if (genIsValidFloatReg(prev.reg2) != genIsValidFloatReg(curr.reg1)) + { + // It is possible to support changing of the last int pair with the first float pair, + // but it is very rare case and it would require superfluous changes in the unwinder. + continue; + } + curr.useSaveNextPair = true; + } +} + +//------------------------------------------------------------------------ +// genGetSlotSizeForRegsInMask: Get the stack slot size appropriate for the register type from the mask. +// +// Arguments: +// regsMask - a mask of registers for prolog/epilog generation. +// +// Return value: +// stack slot size in bytes. +// +// Note: Because int and float register type sizes match we can call this function with a mask that includes both. +// +// static +int CodeGen::genGetSlotSizeForRegsInMask(regMaskTP regsMask) +{ + assert((regsMask & (RBM_CALLEE_SAVED | RBM_FP | RBM_RA)) == regsMask); // Do not expect anything else. + + static_assert_no_msg(REGSIZE_BYTES == FPSAVE_REGSIZE_BYTES); + return REGSIZE_BYTES; +} + +//------------------------------------------------------------------------ +// genSaveCalleeSavedRegisterGroup: Saves the group of registers described by the mask. +// +// Arguments: +// regsMask - a mask of registers for prolog generation; +// spDelta - if non-zero, the amount to add to SP before the first register save (or together with it); +// spOffset - the offset from SP that is the beginning of the callee-saved register area; +// +void CodeGen::genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset) +{ + const int slotSize = genGetSlotSizeForRegsInMask(regsMask); + + ArrayStack regStack(compiler->getAllocator(CMK_Codegen)); + genBuildRegPairsStack(regsMask, ®Stack); + + for (int i = 0; i < regStack.Height(); ++i) + { + RegPair regPair = regStack.Bottom(i); + if (regPair.reg2 != REG_NA) + { + // We can use two SD instructions. + genPrologSaveRegPair(regPair.reg1, regPair.reg2, spOffset, spDelta, regPair.useSaveNextPair, REG_R21, + nullptr); + + spOffset += 2 * slotSize; + } + else + { + // No register pair; we use a SD instruction. + genPrologSaveReg(regPair.reg1, spOffset, spDelta, REG_R21, nullptr); + spOffset += slotSize; + } + + spDelta = 0; // We've now changed SP already, if necessary; don't do it again. + } +} + +//------------------------------------------------------------------------ +// genSaveCalleeSavedRegistersHelp: Save the callee-saved registers in 'regsToSaveMask' to the stack frame +// in the function or funclet prolog. Registers are saved in register number order from low addresses +// to high addresses. This means that integer registers are saved at lower addresses than floatint-point/SIMD +// registers. +// +// If establishing frame pointer chaining, it must be done after saving the callee-saved registers. +// +// We can only use the instructions that are allowed by the unwind codes. The caller ensures that +// there is enough space on the frame to store these registers, and that the store instructions +// we need to use (SD) are encodable with the stack-pointer immediate offsets we need to use. +// +// The caller can tell us to fold in a stack pointer adjustment, which we will do with the first instruction. +// Note that the stack pointer adjustment must be by a multiple of 16 to preserve the invariant that the +// stack pointer is always 16 byte aligned. If we are saving an odd number of callee-saved +// registers, though, we will have an empty aligment slot somewhere. It turns out we will put +// it below (at a lower address) the callee-saved registers, as that is currently how we +// do frame layout. This means that the first stack offset will be 8 and the stack pointer +// adjustment must be done by a SUB, and not folded in to a pre-indexed store. +// +// Arguments: +// regsToSaveMask - The mask of callee-saved registers to save. If empty, this function does nothing. +// lowestCalleeSavedOffset - The offset from SP that is the beginning of the callee-saved register area. Note that +// if non-zero spDelta, then this is the offset of the first save *after* that +// SP adjustment. +// spDelta - If non-zero, the amount to add to SP before the register saves (must be negative or +// zero). +// +// Notes: +// The save set can not contain FP/RA in which case FP/RA is saved along with the other callee-saved registers. +// +void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset, int spDelta) +{ + assert(spDelta <= 0); + + unsigned regsToSaveCount = genCountBits(regsToSaveMask); + if (regsToSaveCount == 0) + { + if (spDelta != 0) + { + // Currently this is the case for varargs only + // whose size is MAX_REG_ARG * REGSIZE_BYTES = 64 bytes. + genStackPointerAdjustment(spDelta, REG_R21, nullptr, /* reportUnwindData */ true); + } + return; + } + + assert((spDelta % 16) == 0); + + assert(regsToSaveCount <= genCountBits(RBM_CALLEE_SAVED)); + + // Save integer registers at higher addresses than floating-point registers. + + regMaskTP maskSaveRegsFloat = regsToSaveMask & RBM_ALLFLOAT; + regMaskTP maskSaveRegsInt = regsToSaveMask & ~maskSaveRegsFloat; + + if (maskSaveRegsFloat != RBM_NONE) + { + genSaveCalleeSavedRegisterGroup(maskSaveRegsFloat, spDelta, lowestCalleeSavedOffset); + spDelta = 0; + lowestCalleeSavedOffset += genCountBits(maskSaveRegsFloat) * FPSAVE_REGSIZE_BYTES; + } + + if (maskSaveRegsInt != RBM_NONE) + { + genSaveCalleeSavedRegisterGroup(maskSaveRegsInt, spDelta, lowestCalleeSavedOffset); + // No need to update spDelta, lowestCalleeSavedOffset since they're not used after this. + } +} + +//------------------------------------------------------------------------ +// genRestoreCalleeSavedRegisterGroup: Restores the group of registers described by the mask. +// +// Arguments: +// regsMask - a mask of registers for epilog generation; +// spDelta - if non-zero, the amount to add to SP after the last register restore (or together with it); +// spOffset - the offset from SP that is the beginning of the callee-saved register area; +// +void CodeGen::genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset) +{ + const int slotSize = genGetSlotSizeForRegsInMask(regsMask); + + ArrayStack regStack(compiler->getAllocator(CMK_Codegen)); + genBuildRegPairsStack(regsMask, ®Stack); + + int stackDelta = 0; + for (int i = 0; i < regStack.Height(); ++i) + { + bool lastRestoreInTheGroup = (i == regStack.Height() - 1); + bool updateStackDelta = lastRestoreInTheGroup && (spDelta != 0); + if (updateStackDelta) + { + // Update stack delta only if it is the last restore (the first save). + assert(stackDelta == 0); + stackDelta = spDelta; + } + + RegPair regPair = regStack.Top(i); + if (regPair.reg2 != REG_NA) + { + spOffset -= 2 * slotSize; + + genEpilogRestoreRegPair(regPair.reg1, regPair.reg2, spOffset, stackDelta, regPair.useSaveNextPair, REG_R21, + nullptr); + } + else + { + spOffset -= slotSize; + genEpilogRestoreReg(regPair.reg1, spOffset, stackDelta, REG_R21, nullptr); + } + } +} + +//------------------------------------------------------------------------ +// genRestoreCalleeSavedRegistersHelp: Restore the callee-saved registers in 'regsToRestoreMask' from the stack frame +// in the function or funclet epilog. This exactly reverses the actions of genSaveCalleeSavedRegistersHelp(). +// +// Arguments: +// regsToRestoreMask - The mask of callee-saved registers to restore. If empty, this function does nothing. +// lowestCalleeSavedOffset - The offset from SP that is the beginning of the callee-saved register area. +// spDelta - If non-zero, the amount to add to SP after the register restores (must be positive or +// zero). +// +// Here's an example restore sequence: +// ld s7, 88(sp) +// ld s6, 80(sp) +// ld s5, 72(sp) +// ld s4, 64(sp) +// ld s3, 56(sp) +// ld s2, 48(sp) +// ld s1, 40(sp) +// ld s0, 32(sp) +// +// For the case of non-zero spDelta, we assume the base of the callee-save registers to restore is at SP, and +// the last restore adjusts SP by the specified amount. For example: +// ld s7, 56(sp) +// ld s6, 48(sp) +// ld s5, 40(sp) +// ld s4, 32(sp) +// ld s3, 24(sp) +// ld s2, 16(sp) +// ld s1, 88(sp) +// ld s0, 80(sp) +// +// Note you call the unwind functions specifying the prolog operation that is being un-done. So, for example, when +// generating a post-indexed load, you call the unwind function for specifying the corresponding preindexed store. +// +// Return Value: +// None. + +void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset, int spDelta) +{ + assert(spDelta >= 0); + unsigned regsToRestoreCount = genCountBits(regsToRestoreMask); + if (regsToRestoreCount == 0) + { + if (spDelta != 0) + { + // Currently this is the case for varargs only + // whose size is MAX_REG_ARG * REGSIZE_BYTES = 64 bytes. + genStackPointerAdjustment(spDelta, REG_R21, nullptr, /* reportUnwindData */ true); + } + return; + } + + assert((spDelta % 16) == 0); + + // We also can restore FP and RA, even though they are not in RBM_CALLEE_SAVED. + assert(regsToRestoreCount <= genCountBits(RBM_CALLEE_SAVED | RBM_FP | RBM_RA)); + + // Point past the end, to start. We predecrement to find the offset to load from. + static_assert_no_msg(REGSIZE_BYTES == FPSAVE_REGSIZE_BYTES); + int spOffset = lowestCalleeSavedOffset + regsToRestoreCount * REGSIZE_BYTES; + + // Save integer registers at higher addresses than floating-point registers. + + regMaskTP maskRestoreRegsFloat = regsToRestoreMask & RBM_ALLFLOAT; + regMaskTP maskRestoreRegsInt = regsToRestoreMask & ~maskRestoreRegsFloat; + + // Restore in the opposite order of saving. + + if (maskRestoreRegsInt != RBM_NONE) + { + int spIntDelta = (maskRestoreRegsFloat != RBM_NONE) ? 0 : spDelta; // should we delay the SP adjustment? + genRestoreCalleeSavedRegisterGroup(maskRestoreRegsInt, spIntDelta, spOffset); + spOffset -= genCountBits(maskRestoreRegsInt) * REGSIZE_BYTES; + } + + if (maskRestoreRegsFloat != RBM_NONE) + { + // If there is any spDelta, it must be used here. + genRestoreCalleeSavedRegisterGroup(maskRestoreRegsFloat, spDelta, spOffset); + // No need to update spOffset since it's not used after this. + } +} + +// clang-format off +/***************************************************************************** + * + * Generates code for an EH funclet prolog. + * + * Funclets have the following incoming arguments: + * + * catch: a0 = the exception object that was caught (see GT_CATCH_ARG) + * filter: a0 = the exception object to filter (see GT_CATCH_ARG), a1 = CallerSP of the containing function + * finally/fault: none + * + * Funclets set the following registers on exit: + * + * catch: v0 = the address at which execution should resume (see BBJ_EHCATCHRET) + * filter: v0 = non-zero if the handler should handle the exception, zero otherwise (see GT_RETFILT) + * finally/fault: none + * + * The LOONGARCH64 funclet prolog sequence is one of the following (Note: #framesz is total funclet frame size, + * including everything; #outsz is outgoing argument space. #framesz must be a multiple of 16): + * + * Frame type 1: + * For #framesz <= 32760 and FP/RA at bottom: + * daddiu sp,sp,-#framesz ; establish the frame (predecrement by #framesz), save FP/RA + * sd fp,#outsz(sp) + * sd ra,#outsz+8(sp) + * sd s0,#xxx-8(sp) ; save callee-saved registers, as necessary + * sd s1,#xxx(sp) + * + * The funclet frame is thus: + * + * | | + * |-----------------------| + * | incoming arguments | + * +=======================+ <---- Caller's SP + * | Varargs regs space | // Only for varargs main functions; 64 bytes + * |-----------------------| + * |Callee saved registers | // multiple of 8 bytes + * |-----------------------| + * | PSP slot | // 8 bytes (omitted in CoreRT ABI) + * |-----------------------| + * ~ alignment padding ~ // To make the whole frame 16 byte aligned. + * |-----------------------| + * | Saved FP, RA | // 16 bytes + * |-----------------------| + * | Outgoing arg space | // multiple of 8 bytes; if required (i.e., #outsz != 0) + * |-----------------------| <---- Ambient SP + * | | | + * ~ | Stack grows ~ + * | | downward | + * V + * + * Frame type 2: + * For #framesz <= 32760 and FP/RA at top: + * daddiu sp,sp,-#framesz ; establish the frame + * sd s0,xxx(sp) ; save callee-saved registers, as necessary + * sd s1,xxx+8(sp) + * sd s?,xxx+?(sp) + * sd fp,xxx+?(sp) ; save FP/RA. + * sd ra,xxx+?(sp) + * + * The funclet frame is thus: + * + * | | + * |-----------------------| + * | incoming arguments | + * +=======================+ <---- Caller's SP + * | Varargs regs space | // Only for varargs main functions; 64 bytes + * |-----------------------| + * | Saved FP, RA | // 16 bytes + * |-----------------------| + * |Callee saved registers | // multiple of 8 bytes + * |-----------------------| + * | PSP slot | // 8 bytes (omitted in CoreRT ABI) + * |-----------------------| + * ~ alignment padding ~ // To make the whole frame 16 byte aligned. + * |-----------------------| + * | Outgoing arg space | // multiple of 8 bytes; if required (i.e., #outsz != 0) + * |-----------------------| <---- Ambient SP + * | | | + * ~ | Stack grows ~ + * | | downward | + * V + * + * Frame type 3: + * For #framesz > 32760 and FP/RA at bottom: + * ; for funclet, #framesz-#outsz will be less than 32760. + * + * daddiu sp,sp,-(#framesz-#FPRA_delta) ; note maybe 16byte-alignment. + * sd fp, pad(sp) ; pad is depended on stack-16byte-alignment.. + * sd ra, pad+8(sp) + * sd s0,#xxx(sp) ; save callee-saved registers, as necessary, + * sd s1,#xxx+8(sp) + * daddiu sp,sp,-#outsz ; create space for outgoing argument space, mabye 16byte-alignment. + * + * The funclet frame is thus: + * + * | | + * |-----------------------| + * | incoming arguments | + * +=======================+ <---- Caller's SP + * | Varargs regs space | // Only for varargs main functions; 64 bytes + * |-----------------------| + * |Callee saved registers | // multiple of 8 bytes + * |-----------------------| + * | PSP slot | // 8 bytes (omitted in CoreRT ABI) + * |-----------------------| + * ~ alignment padding ~ + * |-----------------------| + * | Saved FP, RA | // 16 bytes + * |-----------------------| + * | Outgoing arg space | // multiple of 8 bytes + * |-----------------------| <---- Ambient SP + * | | | + * ~ | Stack grows ~ + * | | downward | + * V + * + * Frame type 4: + * For #framesz > 32760 and FP/RA at top: + * daddiu sp,sp,-#framesz+PSP_offset ; establish the frame, maybe 16byte-alignment. + * sd s0,xxx(sp) ; save callee-saved registers, as necessary + * sd s1,xxx+8(sp) + * sd s?,xxx+?(sp) + * sd fp,xxx+?(sp) ; save FP/RA. + * sd ra,xxx+?(sp) + * + * daddiu sp,sp,-#PSP_offset ; establish the frame, maybe 16byte-alignment. + * + * The funclet frame is thus: + * + * | | + * |-----------------------| + * | incoming arguments | + * +=======================+ <---- Caller's SP + * | Varargs regs space | // Only for varargs main functions; 64 bytes + * |-----------------------| + * | Saved FP, RA | // 16 bytes + * |-----------------------| + * |Callee saved registers | // multiple of 8 bytes + * |-----------------------| + * | PSP slot | // 8 bytes (omitted in CoreRT ABI) + * |-----------------------| + * ~ alignment padding ~ // To make the whole frame 16 byte aligned. + * |-----------------------| + * | Outgoing arg space | // multiple of 8 bytes; if required (i.e., #outsz != 0) + * |-----------------------| <---- Ambient SP + * | | | + * ~ | Stack grows ~ + * | | downward | + * V + * + * + * Both #1 and #2 only change SP once. That means that there will be a maximum of one alignment slot needed. For the general case, #3, + * it is possible that we will need to add alignment to both changes to SP, leading to 16 bytes of alignment. Remember that the stack + * pointer needs to be 16 byte aligned at all times. The size of the PSP slot plus callee-saved registers space is a maximum of 224 bytes: + * + * FP,RA registers + * 8 int callee-saved register s0-s7 + * 8 float callee-saved registers f24-f31 + * 8 saved integer argument registers a0-a7, if varargs function + * 1 PSP slot + * 1 alignment slot, future maybe add gp + * == 28 slots * 8 bytes = 224 bytes. + * + * The outgoing argument size, however, can be very large, if we call a function that takes a large number of + * arguments (note that we currently use the same outgoing argument space size in the funclet as for the main + * function, even if the funclet doesn't have any calls, or has a much smaller, or larger, maximum number of + * outgoing arguments for any call). In that case, we need to 16-byte align the initial change to SP, before + * saving off the callee-saved registers and establishing the PSPsym, so we can use the limited immediate offset + * encodings we have available, before doing another 16-byte aligned SP adjustment to create the outgoing argument + * space. Both changes to SP might need to add alignment padding. + * + * In addition to the above "standard" frames, we also need to support a frame where the saved FP/RA are at the + * highest addresses. This is to match the frame layout (specifically, callee-saved registers including FP/RA + * and the PSPSym) that is used in the main function when a GS cookie is required due to the use of localloc. + * (Note that localloc cannot be used in a funclet.) In these variants, not only has the position of FP/RA + * changed, but where the alignment padding is placed has also changed. + * + * + * Note that in all cases, the PSPSym is in exactly the same position with respect to Caller-SP, and that location is the same relative to Caller-SP + * as in the main function. + * + * Funclets do not have varargs arguments. However, because the PSPSym must exist at the same offset from Caller-SP as in the main function, we + * must add buffer space for the saved varargs/argument registers here, if the main function did the same. + * + * ; After this header, fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested filters. + * ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet epilog. + * + * if (this is a filter funclet) + * { + * // a1 on entry to a filter funclet is CallerSP of the containing function: + * // either the main function, or the funclet for a handler that this filter is dynamically nested within. + * // Note that a filter can be dynamically nested within a funclet even if it is not statically within + * // a funclet. Consider: + * // + * // try { + * // try { + * // throw new Exception(); + * // } catch(Exception) { + * // throw new Exception(); // The exception thrown here ... + * // } + * // } filter { // ... will be processed here, while the "catch" funclet frame is still on the stack + * // } filter-handler { + * // } + * // + * // Because of this, we need a PSP in the main function anytime a filter funclet doesn't know whether the enclosing frame will + * // be a funclet or main function. We won't know any time there is a filter protecting nested EH. To simplify, we just always + * // create a main function PSP for any function with a filter. + * + * ld a1, CallerSP_to_PSP_slot_delta(a1) ; Load the CallerSP of the main function (stored in the PSP of the dynamically containing funclet or function) + * sd a1, SP_to_PSP_slot_delta(sp) ; store the PSP + * daddiu fp, a1, Function_CallerSP_to_FP_delta ; re-establish the frame pointer + * } + * else + * { + * // This is NOT a filter funclet. The VM re-establishes the frame pointer on entry. + * // TODO-LOONGARCH64-CQ: if VM set x1 to CallerSP on entry, like for filters, we could save an instruction. + * + * daddiu a3, fp, Function_FP_to_CallerSP_delta ; compute the CallerSP, given the frame pointer. a3 is scratch? + * sd a3, SP_to_PSP_slot_delta(sp) ; store the PSP + * } + * + * An example epilog sequence is then: + * + * daddiu sp,sp,#outsz ; if any outgoing argument space + * ... ; restore callee-saved registers + * ld s0,#xxx-8(sp) + * ld s1,#xxx(sp) + * ld fp,#framesz-8(sp) + * ld ra,#framesz(sp) + * daddiu sp,sp,#framesz + * jr ra + * + */ +// clang-format on + +void CodeGen::genFuncletProlog(BasicBlock* block) +{ +#ifdef DEBUG + if (verbose) + printf("*************** In genFuncletProlog()\n"); +#endif + + assert(block != NULL); + assert(block->bbFlags & BBF_FUNCLET_BEG); + + ScopedSetVariable _setGeneratingProlog(&compiler->compGeneratingProlog, true); + + gcInfo.gcResetForBB(); + + compiler->unwindBegProlog(); + + regMaskTP maskSaveRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT; + regMaskTP maskSaveRegsInt = genFuncletInfo.fiSaveRegs & ~maskSaveRegsFloat; + + // Funclets must always save RA and FP, since when we have funclets we must have an FP frame. + assert((maskSaveRegsInt & RBM_RA) != 0); + assert((maskSaveRegsInt & RBM_FP) != 0); + + bool isFilter = (block->bbCatchTyp == BBCT_FILTER); + int frameSize = genFuncletInfo.fiSpDelta1; + + regMaskTP maskArgRegsLiveIn; + if (isFilter) + { + maskArgRegsLiveIn = RBM_A0 | RBM_A1; + } + else if ((block->bbCatchTyp == BBCT_FINALLY) || (block->bbCatchTyp == BBCT_FAULT)) + { + maskArgRegsLiveIn = RBM_NONE; + } + else + { + maskArgRegsLiveIn = RBM_A0; + } + +#ifdef DEBUG + if (compiler->opts.disAsm) + printf("DEBUG: CodeGen::genFuncletProlog, frameType:%d\n\n", genFuncletInfo.fiFrameType); +#endif + + int offset = 0; + if (genFuncletInfo.fiFrameType == 1) + { + // fiFrameType constraints: + assert(frameSize < 0); + assert(frameSize >= -2048); + + assert(genFuncletInfo.fiSP_to_FPRA_save_delta < 2040); + genStackPointerAdjustment(frameSize, REG_R21, nullptr, /* reportUnwindData */ true); + +// #ifdef OPTIMIZE_LOONGSON_EXT +// if (!(genFuncletInfo.fiSP_to_FPRA_save_delta & 0xf) && (genFuncletInfo.fiSP_to_FPRA_save_delta <= 0xff0)) +// { +// GetEmitter()->emitIns_R_R_R_I(INS_gssq, EA_PTRSIZE, REG_RA, REG_FP, REG_SPBASE, genFuncletInfo.fiSP_to_FPRA_save_delta >> 4); +// compiler->unwindSaveRegPair(REG_FP, REG_RA, genFuncletInfo.fiSP_to_FPRA_save_delta); +// } +// else +// #endif + { + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, genFuncletInfo.fiSP_to_FPRA_save_delta); + compiler->unwindSaveReg(REG_FP, genFuncletInfo.fiSP_to_FPRA_save_delta); + + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_RA, REG_SPBASE, genFuncletInfo.fiSP_to_FPRA_save_delta + 8); + compiler->unwindSaveReg(REG_RA, genFuncletInfo.fiSP_to_FPRA_save_delta + 8); + } + + maskSaveRegsInt &= ~(RBM_RA | RBM_FP); // We've saved these now + + genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, genFuncletInfo.fiSP_to_PSP_slot_delta + 8, 0); + } + else if (genFuncletInfo.fiFrameType == 2) + { + // fiFrameType constraints: + assert(frameSize < 0); + assert(frameSize >= -2048); + + assert(genFuncletInfo.fiSP_to_FPRA_save_delta < 2040); + genStackPointerAdjustment(frameSize, REG_R21, nullptr, /* reportUnwindData */ true); + + genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, genFuncletInfo.fiSP_to_PSP_slot_delta + 8, 0); + } + else if (genFuncletInfo.fiFrameType == 3) + { + // fiFrameType constraints: + assert(frameSize < -2048); + + offset = -frameSize - genFuncletInfo.fiSP_to_FPRA_save_delta; + int SP_delta = roundUp((UINT)offset, STACK_ALIGN); + offset = SP_delta - offset; + + genStackPointerAdjustment(-SP_delta, REG_R21, nullptr, /* reportUnwindData */ true); + +// #ifdef OPTIMIZE_LOONGSON_EXT +// if (!(offset & 0xf) && (offset <= 0xff0)) +// { +// GetEmitter()->emitIns_R_R_R_I(INS_gssq, EA_PTRSIZE, REG_RA, REG_FP, REG_SPBASE, offset >> 4); +// compiler->unwindSaveRegPair(REG_FP, REG_RA, offset); +// } +// else +// #endif + { + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset); + compiler->unwindSaveReg(REG_FP, offset); + + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offset + 8); + compiler->unwindSaveReg(REG_RA, offset + 8); + } + + maskSaveRegsInt &= ~(RBM_RA | RBM_FP); // We've saved these now + + offset = frameSize + SP_delta + genFuncletInfo.fiSP_to_PSP_slot_delta + 8; + genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, offset, 0); + + genStackPointerAdjustment(frameSize + SP_delta, REG_R21, nullptr, /* reportUnwindData */ true); + } + else if (genFuncletInfo.fiFrameType == 4) + { + // fiFrameType constraints: + assert(frameSize < -2048); + + offset = -frameSize - (genFuncletInfo.fiSP_to_PSP_slot_delta + 8); + int SP_delta = roundUp((UINT)offset, STACK_ALIGN); + offset = SP_delta - offset; + + genStackPointerAdjustment(-SP_delta, REG_R21, nullptr, /* reportUnwindData */ true); + + genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, offset, 0); + + genStackPointerAdjustment(frameSize + SP_delta, REG_R21, nullptr, /* reportUnwindData */ true); + } + else + { + unreached(); + } + + // This is the end of the OS-reported prolog for purposes of unwinding + compiler->unwindEndProlog(); + + // If there is no PSPSym (CoreRT ABI), we are done. Otherwise, we need to set up the PSPSym in the functlet frame. + if (compiler->lvaPSPSym != BAD_VAR_NUM) + { + if (isFilter) + { + // This is the first block of a filter + // Note that register a1 = CallerSP of the containing function + // A1 is overwritten by the first Load (new callerSP) + // A2 is scratch when we have a large constant offset + + // Load the CallerSP of the main function (stored in the PSP of the dynamically containing funclet or + // function) + genInstrWithConstant(INS_ld_d, EA_PTRSIZE, REG_A1, REG_A1, genFuncletInfo.fiCallerSP_to_PSP_slot_delta, + REG_A2, false); + regSet.verifyRegUsed(REG_A1); + + // Store the PSP value (aka CallerSP) + genInstrWithConstant(INS_st_d, EA_PTRSIZE, REG_A1, REG_SPBASE, genFuncletInfo.fiSP_to_PSP_slot_delta, REG_A2, + false); + + // re-establish the frame pointer + genInstrWithConstant(INS_addi_d, EA_PTRSIZE, REG_FPBASE, REG_A1, + genFuncletInfo.fiFunction_CallerSP_to_FP_delta, REG_A2, false); + } + else // This is a non-filter funclet + { + // A3 is scratch, A2 can also become scratch. + + // compute the CallerSP, given the frame pointer. a3 is scratch? + genInstrWithConstant(INS_addi_d, EA_PTRSIZE, REG_A3, REG_FPBASE, + -genFuncletInfo.fiFunction_CallerSP_to_FP_delta, REG_A2, false); + regSet.verifyRegUsed(REG_A3); + + genInstrWithConstant(INS_st_d, EA_PTRSIZE, REG_A3, REG_SPBASE, genFuncletInfo.fiSP_to_PSP_slot_delta, REG_A2, + false); + } + } +} + +/***************************************************************************** + * + * Generates code for an EH funclet epilog. + */ + +void CodeGen::genFuncletEpilog() +{ +#ifdef DEBUG + if (verbose) + printf("*************** In genFuncletEpilog()\n"); +#endif + + ScopedSetVariable _setGeneratingEpilog(&compiler->compGeneratingEpilog, true); + + bool unwindStarted = false; + int frameSize = genFuncletInfo.fiSpDelta1; + + if (!unwindStarted) + { + // We can delay this until we know we'll generate an unwindable instruction, if necessary. + compiler->unwindBegEpilog(); + unwindStarted = true; + } + + regMaskTP maskRestoreRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT; + regMaskTP maskRestoreRegsInt = genFuncletInfo.fiSaveRegs & ~maskRestoreRegsFloat; + + // Funclets must always save RA and FP, since when we have funclets we must have an FP frame. + assert((maskRestoreRegsInt & RBM_RA) != 0); + assert((maskRestoreRegsInt & RBM_FP) != 0); + +#ifdef DEBUG + if (compiler->opts.disAsm) + printf("DEBUG: CodeGen::genFuncletEpilog, frameType:%d\n\n", genFuncletInfo.fiFrameType); +#endif + + regMaskTP regsToRestoreMask = maskRestoreRegsInt | maskRestoreRegsFloat; + + assert(frameSize < 0); + if (genFuncletInfo.fiFrameType == 1) + { + // fiFrameType constraints: + assert(frameSize >= -2048); + assert(genFuncletInfo.fiSP_to_FPRA_save_delta < 2040); + + regsToRestoreMask &= ~(RBM_RA | RBM_FP); // We restore FP/RA at the end + + genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, genFuncletInfo.fiSP_to_PSP_slot_delta + 8, 0); + +// #ifdef OPTIMIZE_LOONGSON_EXT +// if (!(genFuncletInfo.fiSP_to_FPRA_save_delta & 0xf) && (genFuncletInfo.fiSP_to_FPRA_save_delta <= 0xff0)) +// { +// GetEmitter()->emitIns_R_R_R_I(INS_gslq, EA_PTRSIZE, REG_RA, REG_FP, REG_SPBASE, genFuncletInfo.fiSP_to_FPRA_save_delta >> 4); +// compiler->unwindSaveRegPair(REG_FP, REG_RA, genFuncletInfo.fiSP_to_FPRA_save_delta); +// } +// else +// #endif + { + GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, genFuncletInfo.fiSP_to_FPRA_save_delta + 8); + compiler->unwindSaveReg(REG_RA, genFuncletInfo.fiSP_to_FPRA_save_delta + 8); + + GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, genFuncletInfo.fiSP_to_FPRA_save_delta); + compiler->unwindSaveReg(REG_FP, genFuncletInfo.fiSP_to_FPRA_save_delta); + } + + // generate daddiu SP,SP,imm + genStackPointerAdjustment(-frameSize, REG_R21, nullptr, /* reportUnwindData */ true); + } + else if (genFuncletInfo.fiFrameType == 2) + { + // fiFrameType constraints: + assert(frameSize >= -2048); + assert(genFuncletInfo.fiSP_to_FPRA_save_delta < 2040); + + genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, genFuncletInfo.fiSP_to_PSP_slot_delta + 8, 0); + + // generate daddiu SP,SP,imm + genStackPointerAdjustment(-frameSize, REG_R21, nullptr, /* reportUnwindData */ true); + } + else if (genFuncletInfo.fiFrameType == 3) + { + // fiFrameType constraints: + assert(frameSize < -2048); + + + int offset = -frameSize - genFuncletInfo.fiSP_to_FPRA_save_delta; + int SP_delta = roundUp((UINT)offset, STACK_ALIGN); + offset = SP_delta - offset; + + //first, generate daddiu SP,SP,imm + genStackPointerAdjustment(-frameSize - SP_delta, REG_R21, nullptr, /* reportUnwindData */ true); + + int offset2 = frameSize + SP_delta + genFuncletInfo.fiSP_to_PSP_slot_delta + 8; + assert(offset2 < 2040);//can amend. + + regsToRestoreMask &= ~(RBM_RA | RBM_FP); // We restore FP/RA at the end + genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, offset2, 0); + +// #ifdef OPTIMIZE_LOONGSON_EXT +// if (!(offset & 0xf) && (offset <= 0xff0)) +// { +// GetEmitter()->emitIns_R_R_R_I(INS_gslq, EA_PTRSIZE, REG_RA, REG_FP, REG_SPBASE, offset >> 4); +// compiler->unwindSaveRegPair(REG_FP, REG_RA, offset); +// } +// else +// #endif + { + GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offset + 8); + compiler->unwindSaveReg(REG_RA, offset + 8); + + GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset); + compiler->unwindSaveReg(REG_FP, offset); + } + //second, generate daddiu SP,SP,imm for remaine space. + genStackPointerAdjustment(SP_delta, REG_R21, nullptr, /* reportUnwindData */ true); + } + else if (genFuncletInfo.fiFrameType == 4) + { + // fiFrameType constraints: + assert(frameSize < -2048); + + int offset = -frameSize - (genFuncletInfo.fiSP_to_PSP_slot_delta + 8); + int SP_delta = roundUp((UINT)offset, STACK_ALIGN); + offset = SP_delta - offset; + + genStackPointerAdjustment(-frameSize - SP_delta, REG_R21, nullptr, /* reportUnwindData */ true); + + genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, offset, 0); + + genStackPointerAdjustment(SP_delta, REG_R21, nullptr, /* reportUnwindData */ true); + } + else + { + unreached(); + } + GetEmitter()->emitIns_R_R_I(INS_jirl, emitActualTypeSize(TYP_I_IMPL), REG_R0, REG_RA, 0); + compiler->unwindReturn(REG_RA); + + compiler->unwindEndEpilog(); +} + +/***************************************************************************** + * + * Capture the information used to generate the funclet prologs and epilogs. + * Note that all funclet prologs are identical, and all funclet epilogs are + * identical (per type: filters are identical, and non-filters are identical). + * Thus, we compute the data used for these just once. + * + * See genFuncletProlog() for more information about the prolog/epilog sequences. + */ + +void CodeGen::genCaptureFuncletPrologEpilogInfo() +{ + if (!compiler->ehAnyFunclets()) + return; + + assert(isFramePointerUsed()); + + // The frame size and offsets must be finalized + assert(compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT); + + genFuncletInfo.fiFunction_CallerSP_to_FP_delta = genCallerSPtoFPdelta(); + + regMaskTP rsMaskSaveRegs = regSet.rsMaskCalleeSaved; + assert((rsMaskSaveRegs & RBM_RA) != 0); + assert((rsMaskSaveRegs & RBM_FP) != 0); + + unsigned PSPSize = (compiler->lvaPSPSym != BAD_VAR_NUM) ? 8 : 0; + + unsigned saveRegsCount = genCountBits(rsMaskSaveRegs); + assert((saveRegsCount == compiler->compCalleeRegsPushed) || (saveRegsCount == compiler->compCalleeRegsPushed - 1)); + + unsigned saveRegsPlusPSPSize; + if (!IsSaveFpRaWithAllCalleeSavedRegisters()) + saveRegsPlusPSPSize = roundUp((UINT)genTotalFrameSize(), STACK_ALIGN) - compiler->compLclFrameSize +PSPSize/* -2*8*/; + else + saveRegsPlusPSPSize = roundUp((UINT)genTotalFrameSize(), STACK_ALIGN) - compiler->compLclFrameSize +PSPSize; + + if (compiler->info.compIsVarArgs) + { + // For varargs we always save all of the integer register arguments + // so that they are contiguous with the incoming stack arguments. + saveRegsPlusPSPSize += MAX_REG_ARG * REGSIZE_BYTES; + } + unsigned saveRegsPlusPSPSizeAligned = roundUp(saveRegsPlusPSPSize, STACK_ALIGN); + + assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == 0); + unsigned outgoingArgSpaceAligned = roundUp(compiler->lvaOutgoingArgSpaceSize, STACK_ALIGN); + + unsigned maxFuncletFrameSizeAligned = saveRegsPlusPSPSizeAligned + outgoingArgSpaceAligned; + assert((maxFuncletFrameSizeAligned % STACK_ALIGN) == 0); + + int SP_to_FPRA_save_delta = compiler->lvaOutgoingArgSpaceSize; + + unsigned funcletFrameSize = saveRegsPlusPSPSize + compiler->lvaOutgoingArgSpaceSize; + unsigned funcletFrameSizeAligned = roundUp(funcletFrameSize, STACK_ALIGN); + assert(funcletFrameSizeAligned <= maxFuncletFrameSizeAligned); + + unsigned funcletFrameAlignmentPad = funcletFrameSizeAligned - funcletFrameSize; + assert((funcletFrameAlignmentPad == 0) || (funcletFrameAlignmentPad == REGSIZE_BYTES)); + + if (maxFuncletFrameSizeAligned <= (2048-8)) + { + if (!IsSaveFpRaWithAllCalleeSavedRegisters()) + { + genFuncletInfo.fiFrameType = 1; + saveRegsPlusPSPSize -= 2*8;// FP/RA + } + else + { + genFuncletInfo.fiFrameType = 2; + SP_to_FPRA_save_delta += REGSIZE_BYTES + PSPSize; + } + } + else + { + unsigned saveRegsPlusPSPAlignmentPad = saveRegsPlusPSPSizeAligned - saveRegsPlusPSPSize; + assert((saveRegsPlusPSPAlignmentPad == 0) || (saveRegsPlusPSPAlignmentPad == REGSIZE_BYTES)); + + if (!IsSaveFpRaWithAllCalleeSavedRegisters()) + { + genFuncletInfo.fiFrameType = 3; + saveRegsPlusPSPSize -= 2*8;// FP/RA + } + else + { + genFuncletInfo.fiFrameType = 4; + SP_to_FPRA_save_delta += REGSIZE_BYTES + PSPSize; + } + } + + + int CallerSP_to_PSP_slot_delta = -(int)saveRegsPlusPSPSize; + genFuncletInfo.fiSpDelta1 = -(int)funcletFrameSizeAligned; + int SP_to_PSP_slot_delta = funcletFrameSizeAligned - saveRegsPlusPSPSize; + + /* Now save it for future use */ + genFuncletInfo.fiSaveRegs = rsMaskSaveRegs; + genFuncletInfo.fiSP_to_FPRA_save_delta = SP_to_FPRA_save_delta; + + genFuncletInfo.fiSP_to_PSP_slot_delta = SP_to_PSP_slot_delta; + genFuncletInfo.fiCallerSP_to_PSP_slot_delta = CallerSP_to_PSP_slot_delta; + +#ifdef DEBUG + if (verbose) + { + printf("\n"); + printf("Funclet prolog / epilog info\n"); + printf(" Save regs: "); + dspRegMask(genFuncletInfo.fiSaveRegs); + printf("\n"); + printf(" Function CallerSP-to-FP delta: %d\n", genFuncletInfo.fiFunction_CallerSP_to_FP_delta); + printf(" SP to FP/RA save location delta: %d\n", genFuncletInfo.fiSP_to_FPRA_save_delta); + printf(" Frame type: %d\n", genFuncletInfo.fiFrameType); + printf(" SP delta 1: %d\n", genFuncletInfo.fiSpDelta1); + + if (compiler->lvaPSPSym != BAD_VAR_NUM) + { + if (CallerSP_to_PSP_slot_delta != + compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)) // for debugging + { + printf("lvaGetCallerSPRelativeOffset(lvaPSPSym): %d\n", + compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)); + } + } + } + + assert(genFuncletInfo.fiSP_to_FPRA_save_delta >= 0); +#endif // DEBUG +} + +/* +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX End Prolog / Epilog XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ + +BasicBlock* CodeGen::genCallFinally(BasicBlock* block) +{ + // Generate a call to the finally, like this: + // mov a0,qword ptr [fp + 10H] / sp // Load a0 with PSPSym, or sp if PSPSym is not used + // bl finally-funclet + // b finally-return // Only for non-retless finally calls + // The 'b' can be a NOP if we're going to the next block. + + if (compiler->lvaPSPSym != BAD_VAR_NUM) + { + GetEmitter()->emitIns_R_S(INS_ld_d, EA_PTRSIZE, REG_A0, compiler->lvaPSPSym, 0); + } + else + { + GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_A0, REG_SPBASE, 0); + } + GetEmitter()->emitIns_J(INS_bl, block->bbJumpDest); + + if (block->bbFlags & BBF_RETLESS_CALL) + { + // We have a retless call, and the last instruction generated was a call. + // If the next block is in a different EH region (or is the end of the code + // block), then we need to generate a breakpoint here (since it will never + // get executed) to get proper unwind behavior. + + if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext)) + { + instGen(INS_break); // This should never get executed + } + } + else + { + // Because of the way the flowgraph is connected, the liveness info for this one instruction + // after the call is not (can not be) correct in cases where a variable has a last use in the + // handler. So turn off GC reporting for this single instruction. + GetEmitter()->emitDisableGC(); + + // Now go to where the finally funclet needs to return to. + if (block->bbNext->bbJumpDest == block->bbNext->bbNext) + { + // Fall-through. + // TODO-LOONGARCH64-CQ: Can we get rid of this instruction, and just have the call return directly + // to the next instruction? This would depend on stack walking from within the finally + // handler working without this instruction being in this special EH region. + instGen(INS_nop); + } + else + { + inst_JMP(EJ_jmp, block->bbNext->bbJumpDest); + } + + GetEmitter()->emitEnableGC(); + } + + // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the + // jump target using bbJumpDest - that is already used to point + // to the finally block. So just skip past the BBJ_ALWAYS unless the + // block is RETLESS. + if (!(block->bbFlags & BBF_RETLESS_CALL)) + { + assert(block->isBBCallAlwaysPair()); + block = block->bbNext; + } + return block; +} + +void CodeGen::genEHCatchRet(BasicBlock* block) +{ + GetEmitter()->emitIns_R_L(INS_lea, EA_PTRSIZE, block->bbJumpDest, REG_INTRET); +} + +// move an immediate value into an integer register +void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, + regNumber reg, + ssize_t imm, + insFlags flags DEBUGARG(size_t targetHandle) DEBUGARG(GenTreeFlags gtFlags)) +{//maybe optimize. + emitter* emit = GetEmitter(); + + if (!compiler->opts.compReloc) + { + size = EA_SIZE(size); // Strip any Reloc flags from size if we aren't doing relocs. + } + + if (EA_IS_RELOC(size)) + { + assert(genIsValidIntReg(reg));//TODO: maybe optimize!!! + emit->emitIns_R_AI(INS_bl, size, reg, imm);//for example: EA_PTR_DSP_RELOC + } + else + { + emit->emitIns_I_la(size, reg, imm);//TODO: maybe optimize. + } + + regSet.verifyRegUsed(reg); +} + +/*********************************************************************************** + * + * Generate code to set a register 'targetReg' of type 'targetType' to the constant + * specified by the constant (GT_CNS_INT or GT_CNS_DBL) in 'tree'. This does not call + * genProduceReg() on the target register. + */ +void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTree* tree) +{ + switch (tree->gtOper) + { + case GT_CNS_INT: + { + // relocatable values tend to come down as a CNS_INT of native int type + // so the line between these two opcodes is kind of blurry + GenTreeIntConCommon* con = tree->AsIntConCommon(); + ssize_t cnsVal = con->IconValue(); + + //if (con->ImmedValNeedsReloc(compiler)) + if (con->ImmedValNeedsReloc(compiler) && compiler->opts.compReloc) + { + //instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, targetReg, cnsVal); + assert(compiler->opts.compReloc); + GetEmitter()->emitIns_R_AI(INS_bl, EA_HANDLE_CNS_RELOC, targetReg, cnsVal); + regSet.verifyRegUsed(targetReg); + } + else + { + genSetRegToIcon(targetReg, cnsVal, targetType); + } + } + break; + + case GT_CNS_DBL: + { + emitter* emit = GetEmitter(); + emitAttr size = emitActualTypeSize(tree); + double constValue = tree->AsDblCon()->gtDconVal; + + // Make sure we use "daddiu reg, zero, 0x00" only for positive zero (0.0) + // and not for negative zero (-0.0) + if (*(__int64*)&constValue == 0) + { + // A faster/smaller way to generate 0.0 + // We will just zero out the entire vector register for both float and double + emit->emitIns_R_R(INS_movgr2fr_d, EA_8BYTE, targetReg, REG_R0); + } + /*else if (emitter::emitIns_valid_imm_for_fmov(constValue)) + {// LOONGARCH64 doesn't need this. + assert(!"unimplemented on LOONGARCH yet"); + }*/ + else + { + // Get a temp integer register to compute long address. + //regNumber addrReg = tree->GetSingleTempReg(); + + // We must load the FP constant from the constant pool + // Emit a data section constant for the float or double constant. + CORINFO_FIELD_HANDLE hnd = emit->emitFltOrDblConst(constValue, size); + + // Load the FP constant. + assert(targetReg >= REG_F0); + + instruction ins = size == EA_4BYTE ? INS_fld_s : INS_fld_d; + + // Compute the address of the FP constant and load the data. + emit->emitIns_R_C(ins, size, targetReg, REG_NA, hnd, 0); + } + } + break; + + default: + unreached(); + } +} + +// Produce code for a GT_INC_SATURATE node. +void CodeGen::genCodeForIncSaturate(GenTree* tree) +{ + assert(!"unimplemented on LOONGARCH yet"); +#if 0 + regNumber targetReg = tree->GetRegNum(); + + // The arithmetic node must be sitting in a register (since it's not contained) + assert(!tree->isContained()); + // The dst can only be a register. + assert(targetReg != REG_NA); + + GenTree* operand = tree->gtGetOp1(); + assert(!operand->isContained()); + // The src must be a register. + regNumber operandReg = genConsumeReg(operand); + + GetEmitter()->emitIns_R_R_I(INS_addi_d, emitActualTypeSize(tree), targetReg, operandReg, 1); + GetEmitter()->emitIns_R_R_I(INS_bne, emitActualTypeSize(tree), targetReg, REG_R0, 2); + GetEmitter()->emitIns_R_R_R(INS_andn, emitActualTypeSize(tree), targetReg, targetReg, REG_R0); + + genProduceReg(tree); +#endif +} + +// Generate code to get the high N bits of a N*N=2N bit multiplication result +void CodeGen::genCodeForMulHi(GenTreeOp* treeNode) +{ + assert(!treeNode->gtOverflowEx()); + + genConsumeOperands(treeNode); + + regNumber targetReg = treeNode->GetRegNum(); + var_types targetType = treeNode->TypeGet(); + emitter* emit = GetEmitter(); + emitAttr attr = emitActualTypeSize(treeNode); + unsigned isUnsigned = (treeNode->gtFlags & GTF_UNSIGNED); + + GenTree* op1 = treeNode->gtGetOp1(); + GenTree* op2 = treeNode->gtGetOp2(); + + assert(!varTypeIsFloating(targetType)); + + // op1 and op2 can only be a reg at present, will amend in the future. + assert(!op1->isContained()); + assert(!op2->isContained()); + + // The arithmetic node must be sitting in a register (since it's not contained) + assert(targetReg != REG_NA); + + if (EA_SIZE(attr) == EA_8BYTE) + { + instruction ins = isUnsigned ? INS_mulh_du : INS_mulh_d; + + emit->emitIns_R_R_R(ins, attr, targetReg, op1->GetRegNum(), op2->GetRegNum()); + } + else + { + assert(EA_SIZE(attr) == EA_4BYTE); + instruction ins = isUnsigned ? INS_mulh_wu : INS_mulh_w; + + emit->emitIns_R_R_R(ins, attr, targetReg, op1->GetRegNum(), op2->GetRegNum()); + } + + genProduceReg(treeNode); +} + +// Generate code for ADD, SUB, MUL, AND, OR and XOR +// This method is expected to have called genConsumeOperands() before calling it. +void CodeGen::genCodeForBinary(GenTreeOp* treeNode) +{ + const genTreeOps oper = treeNode->OperGet(); + regNumber targetReg = treeNode->GetRegNum(); + emitter* emit = GetEmitter(); + + assert(oper == GT_ADD || oper == GT_SUB || oper == GT_MUL || oper == GT_AND || + oper == GT_OR || oper == GT_XOR); + + GenTree* op1 = treeNode->gtGetOp1(); + GenTree* op2 = treeNode->gtGetOp2(); + instruction ins = genGetInsForOper(treeNode); + + // The arithmetic node must be sitting in a register (since it's not contained) + assert(targetReg != REG_NA); + + regNumber r = emit->emitInsTernary(ins, emitActualTypeSize(treeNode), treeNode, op1, op2); + assert(r == targetReg); + + genProduceReg(treeNode); +} + +//------------------------------------------------------------------------ +// genCodeForLclVar: Produce code for a GT_LCL_VAR node. +// +// Arguments: +// tree - the GT_LCL_VAR node +// +void CodeGen::genCodeForLclVar(GenTreeLclVar* tree) +{ + unsigned varNum = tree->GetLclNum(); + assert(varNum < compiler->lvaCount); + LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); + bool isRegCandidate = varDsc->lvIsRegCandidate(); + + // lcl_vars are not defs + assert((tree->gtFlags & GTF_VAR_DEF) == 0); + + // If this is a register candidate that has been spilled, genConsumeReg() will + // reload it at the point of use. Otherwise, if it's not in a register, we load it here. + + if (!isRegCandidate && !(tree->gtFlags & GTF_SPILLED)) + { + var_types targetType = varDsc->GetRegisterType(tree); + //if (tree->gtFlags & GTF_UNSIGNED) + // targetType = varTypeSignedToUnsigned(targetType);//uuuuu. + emitter* emit = GetEmitter(); + + // targetType must be a normal scalar type and not a TYP_STRUCT + assert(targetType != TYP_STRUCT); + instruction ins = ins_Load(targetType); + emitAttr attr = emitTypeSize(targetType); + + emit->emitIns_R_S(ins, attr, tree->GetRegNum(), varNum, 0); + genProduceReg(tree); + } +} + +//------------------------------------------------------------------------ +// genCodeForStoreLclFld: Produce code for a GT_STORE_LCL_FLD node. +// +// Arguments: +// tree - the GT_STORE_LCL_FLD node +// +void CodeGen::genCodeForStoreLclFld(GenTreeLclFld* tree) +{ + var_types targetType = tree->TypeGet(); + regNumber targetReg = tree->GetRegNum(); + emitter* emit = GetEmitter(); + noway_assert(targetType != TYP_STRUCT); + +#ifdef FEATURE_SIMD + // storing of TYP_SIMD12 (i.e. Vector3) field + if (tree->TypeGet() == TYP_SIMD12) + { + genStoreLclTypeSIMD12(tree); + return; + } +#endif // FEATURE_SIMD + + // record the offset + unsigned offset = tree->GetLclOffs(); + + // We must have a stack store with GT_STORE_LCL_FLD + noway_assert(targetReg == REG_NA); + + unsigned varNum = tree->GetLclNum(); + assert(varNum < compiler->lvaCount); + LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); + + // Ensure that lclVar nodes are typed correctly. + assert(!varDsc->lvNormalizeOnStore() || targetType == genActualType(varDsc->TypeGet())); + + GenTree* data = tree->gtOp1; + genConsumeRegs(data); + + regNumber dataReg = REG_NA; + if (data->isContainedIntOrIImmed()) + { + assert(data->IsIntegralConst(0)); + dataReg = REG_R0; + } + else if (data->isContained()) + { + assert(data->OperIs(GT_BITCAST)); + const GenTree* bitcastSrc = data->AsUnOp()->gtGetOp1(); + assert(!bitcastSrc->isContained()); + dataReg = bitcastSrc->GetRegNum(); + } + else + { + assert(!data->isContained()); + dataReg = data->GetRegNum(); + } + assert(dataReg != REG_NA); + + instruction ins = ins_StoreFromSrc(dataReg, targetType); + + emitAttr attr = emitTypeSize(targetType); + + emit->emitIns_S_R(ins, attr, dataReg, varNum, offset); + + genUpdateLife(tree); + + varDsc->SetRegNum(REG_STK); +} + +//------------------------------------------------------------------------ +// genCodeForStoreLclVar: Produce code for a GT_STORE_LCL_VAR node. +// +// Arguments: +// lclNode - the GT_STORE_LCL_VAR node +// +void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode) +{ + GenTree* data = lclNode->gtOp1; + + // var = call, where call returns a multi-reg return value + // case is handled separately. + if (data->gtSkipReloadOrCopy()->IsMultiRegNode()) + { + genMultiRegCallStoreToLocal(lclNode); + return; + } + + regNumber targetReg = lclNode->GetRegNum(); + emitter* emit = GetEmitter(); + + unsigned varNum = lclNode->GetLclNum(); + assert(varNum < compiler->lvaCount); + LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); + var_types targetType = varDsc->GetRegisterType(lclNode); + + if (lclNode->IsMultiReg()) + { + regNumber operandReg = genConsumeReg(data); + unsigned int regCount = varDsc->lvFieldCnt; + for (unsigned i = 0; i < regCount; ++i) + { + assert(!"unimplemented on LOONGARCH yet"); + regNumber varReg = lclNode->GetRegByIndex(i); + assert(varReg != REG_NA); + unsigned fieldLclNum = varDsc->lvFieldLclStart + i; + LclVarDsc* fieldVarDsc = compiler->lvaGetDesc(fieldLclNum); + assert(fieldVarDsc->TypeGet() == TYP_FLOAT); + GetEmitter()->emitIns_R_R_I(INS_st_d, emitTypeSize(TYP_FLOAT), varReg, operandReg, i); + } + genProduceReg(lclNode); + } + else + { +#ifdef FEATURE_SIMD + // storing of TYP_SIMD12 (i.e. Vector3) field + if (lclNode->TypeGet() == TYP_SIMD12) + { + genStoreLclTypeSIMD12(lclNode); + return; + } +#endif // FEATURE_SIMD + + genConsumeRegs(data); + + regNumber dataReg = REG_NA; + if (data->isContained()) + { + // This is only possible for a zero-init or bitcast. + const bool zeroInit = data->IsIntegralConst(0); +#if 0 + //TODO: supporting the SIMD on LoongArch64. + if (zeroInit && varTypeIsSIMD(targetType)) + { + assert(!varTypeIsSIMD(targetType)); + //assert(targetType == TYP_SIMD8);//TODO:TYP_SIMD16 + assert(targetReg == REG_NA); + GetEmitter()->emitIns_S_R(INS_st_d, EA_8BYTE, REG_R0, varNum, 0); + genUpdateLife(lclNode); + return; + } +#else + assert(!varTypeIsSIMD(targetType)); +#endif + + if (zeroInit) + { + dataReg = REG_R0; + } + else if (data->IsIntegralConst()) + { + ssize_t imm = data->AsIntConCommon()->IconValue(); + emit->emitIns_I_la(EA_PTRSIZE, REG_R21, imm); + dataReg = REG_R21; + } + else + { + assert(data->OperIs(GT_BITCAST)); + const GenTree* bitcastSrc = data->AsUnOp()->gtGetOp1(); + assert(!bitcastSrc->isContained()); + dataReg = bitcastSrc->GetRegNum(); + } + } + else + { + assert(!data->isContained()); + dataReg = data->GetRegNum(); + } + assert(dataReg != REG_NA); + + if (targetReg == REG_NA) // store into stack based LclVar + { + inst_set_SV_var(lclNode); + + instruction ins = ins_StoreFromSrc(dataReg, targetType); + emitAttr attr = emitActualTypeSize(targetType); + + emit->emitIns_S_R(ins, attr, dataReg, varNum, /* offset */ 0); + + genUpdateLife(lclNode); + + varDsc->SetRegNum(REG_STK); + } + else // store into register (i.e move into register) + { + if (dataReg != targetReg) + { + // Assign into targetReg when dataReg (from op1) is not the same register + inst_Mov(targetType, targetReg, dataReg, true); + } + genProduceReg(lclNode); + } + } +} + +//------------------------------------------------------------------------ +// genSimpleReturn: Generates code for simple return statement for loongarch64. +// +// Note: treeNode's and op1's registers are already consumed. +// +// Arguments: +// treeNode - The GT_RETURN or GT_RETFILT tree node with non-struct and non-void type +// +// Return Value: +// None +// +void CodeGen::genSimpleReturn(GenTree* treeNode) +{ + assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT); + GenTree* op1 = treeNode->gtGetOp1(); + var_types targetType = treeNode->TypeGet(); + + assert(targetType != TYP_STRUCT); + assert(targetType != TYP_VOID); + + regNumber retReg = varTypeUsesFloatArgReg(treeNode) ? REG_FLOATRET : REG_INTRET; + + bool movRequired = (op1->GetRegNum() != retReg); + + if (!movRequired) + { + if (op1->OperGet() == GT_LCL_VAR) + { + GenTreeLclVarCommon* lcl = op1->AsLclVarCommon(); + bool isRegCandidate = compiler->lvaTable[lcl->GetLclNum()].lvIsRegCandidate(); + if (isRegCandidate && ((op1->gtFlags & GTF_SPILLED) == 0)) + { + // We may need to generate a zero-extending mov instruction to load the value from this GT_LCL_VAR + + unsigned lclNum = lcl->GetLclNum(); + LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]); + var_types op1Type = genActualType(op1->TypeGet()); + var_types lclType = genActualType(varDsc->TypeGet()); + + if (genTypeSize(op1Type) < genTypeSize(lclType)) + { + movRequired = true; + } + } + } + } + if (movRequired) + { + emitAttr attr = emitActualTypeSize(targetType); + if (varTypeUsesFloatArgReg(treeNode)) + { + if (attr == EA_4BYTE) + GetEmitter()->emitIns_R_R(INS_fmov_s, attr, retReg, op1->GetRegNum()); + else + GetEmitter()->emitIns_R_R(INS_fmov_d, attr, retReg, op1->GetRegNum()); + } + else + { + if (attr == EA_4BYTE) // && op1->OperIs(GT_LCL_VAR) && (emitActualTypeSize(compiler->lvaTable[op1->AsLclVar()->GetLclNum()].lvType) == EA_8BYTE)) + { + if (treeNode->gtFlags & GTF_UNSIGNED) + GetEmitter()->emitIns_R_R_I_I(INS_bstrpick_d, EA_PTRSIZE, retReg, op1->GetRegNum(), 31, 0); + else + GetEmitter()->emitIns_R_R_I(INS_slli_w, attr, retReg, op1->GetRegNum(), 0); + } + else + GetEmitter()->emitIns_R_R_I(INS_ori, attr, retReg, op1->GetRegNum(), 0); + } + } +} + +/*********************************************************************************************** + * Generate code for localloc + */ +void CodeGen::genLclHeap(GenTree* tree) +{ + assert(tree->OperGet() == GT_LCLHEAP); + assert(compiler->compLocallocUsed); + + emitter* emit = GetEmitter(); + GenTree* size = tree->AsOp()->gtOp1; + noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL)); + + regNumber targetReg = tree->GetRegNum(); + regNumber regCnt = REG_NA; + regNumber pspSymReg = REG_NA; + var_types type = genActualType(size->gtType); + emitAttr easz = emitTypeSize(type); + BasicBlock* endLabel = nullptr;//can optimize for loongarch. + unsigned stackAdjustment = 0; + const target_ssize_t ILLEGAL_LAST_TOUCH_DELTA = (target_ssize_t)-1; + target_ssize_t lastTouchDelta = + ILLEGAL_LAST_TOUCH_DELTA; // The number of bytes from SP to the last stack address probed. + + noway_assert(isFramePointerUsed()); // localloc requires Frame Pointer to be established since SP changes + noway_assert(genStackLevel == 0); // Can't have anything on the stack + + // compute the amount of memory to allocate to properly STACK_ALIGN. + size_t amount = 0; + if (size->IsCnsIntOrI()) + { + // If size is a constant, then it must be contained. + assert(size->isContained()); + + // If amount is zero then return null in targetReg + amount = size->AsIntCon()->gtIconVal; + if (amount == 0) + { + instGen_Set_Reg_To_Zero(EA_PTRSIZE, targetReg); + goto BAILOUT; + } + + // 'amount' is the total number of bytes to localloc to properly STACK_ALIGN + amount = AlignUp(amount, STACK_ALIGN); + } + else + { + // If 0 bail out by returning null in targetReg + genConsumeRegAndCopy(size, targetReg); + endLabel = genCreateTempLabel(); + emit->emitIns_J_cond_la(INS_beq, endLabel, targetReg, REG_R0); + + // Compute the size of the block to allocate and perform alignment. + // If compInitMem=true, we can reuse targetReg as regcnt, + // since we don't need any internal registers. + if (compiler->info.compInitMem) + { + assert(tree->AvailableTempRegCount() == 0); + regCnt = targetReg; + } + else + { + regCnt = tree->ExtractTempReg(); + if (regCnt != targetReg) + { + emit->emitIns_R_R_I(INS_ori, easz, regCnt, targetReg, 0); + } + } + + // Align to STACK_ALIGN + // regCnt will be the total number of bytes to localloc + inst_RV_IV(INS_addi_d, regCnt, (STACK_ALIGN - 1), emitActualTypeSize(type)); + + assert(regCnt != REG_R21); + ssize_t imm2 = ~(STACK_ALIGN - 1); + emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_R21, REG_R0, imm2); + emit->emitIns_R_R_R(INS_and, emitActualTypeSize(type), regCnt, regCnt, REG_R21); + } + + // If we have an outgoing arg area then we must adjust the SP by popping off the + // outgoing arg area. We will restore it right before we return from this method. + // + // Localloc returns stack space that aligned to STACK_ALIGN bytes. The following + // are the cases that need to be handled: + // i) Method has out-going arg area. + // It is guaranteed that size of out-going arg area is STACK_ALIGN'ed (see fgMorphArgs). + // Therefore, we will pop off the out-going arg area from the stack pointer before allocating the localloc + // space. + // ii) Method has no out-going arg area. + // Nothing to pop off from the stack. + if (compiler->lvaOutgoingArgSpaceSize > 0) + { + unsigned outgoingArgSpaceAligned = roundUp(compiler->lvaOutgoingArgSpaceSize, STACK_ALIGN); + //assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == 0); // This must be true for the stack to remain + // // aligned + genInstrWithConstant(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, outgoingArgSpaceAligned, + rsGetRsvdReg()); + stackAdjustment += outgoingArgSpaceAligned; + } + + if (size->IsCnsIntOrI()) + { + // We should reach here only for non-zero, constant size allocations. + assert(amount > 0); + ssize_t imm = -16; + + // For small allocations we will generate up to four stp instructions, to zero 16 to 64 bytes. + static_assert_no_msg(STACK_ALIGN == (REGSIZE_BYTES * 2)); + assert(amount % (REGSIZE_BYTES * 2) == 0); // stp stores two registers at a time + size_t stpCount = amount / (REGSIZE_BYTES * 2); + if (stpCount <= 4) + { + imm = -16 * stpCount; + emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, imm); + + imm = -imm; + while (stpCount != 0) + { + imm -= 8; + emit->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, REG_SPBASE, imm); + imm -= 8; + emit->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, REG_SPBASE, imm); + stpCount -= 1; + } + + lastTouchDelta = 0; + + goto ALLOC_DONE; + } + else if (!compiler->info.compInitMem && (amount < compiler->eeGetPageSize())) // must be < not <= + { + // Since the size is less than a page, simply adjust the SP value. + // The SP might already be in the guard page, so we must touch it BEFORE + // the alloc, not after. + + // ld_w r0, 0(SP) + emit->emitIns_R_R_I(INS_ld_w, EA_4BYTE, REG_R0, REG_SP, 0); + + lastTouchDelta = amount; + imm = -(ssize_t)amount; + assert(-8192 <= imm && imm < 0); + if (-2048 <= imm && imm < 0) + emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, imm); + else + { + emit->emitIns_R_R_I(INS_srai_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, 3); + emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, imm >> 3); + emit->emitIns_R_R_I(INS_slli_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, 3); + } + + goto ALLOC_DONE; + } + + // else, "mov regCnt, amount" + // If compInitMem=true, we can reuse targetReg as regcnt. + // Since size is a constant, regCnt is not yet initialized. + assert(regCnt == REG_NA); + if (compiler->info.compInitMem) + { + assert(tree->AvailableTempRegCount() == 0); + regCnt = targetReg; + } + else + { + regCnt = tree->ExtractTempReg(); + } + genSetRegToIcon(regCnt, amount, ((unsigned int)amount == amount) ? TYP_INT : TYP_LONG); + } + + if (compiler->info.compInitMem) + { + // At this point 'regCnt' is set to the total number of bytes to locAlloc. + // Since we have to zero out the allocated memory AND ensure that the stack pointer is always valid + // by tickling the pages, we will just push 0's on the stack. + // + // Note: regCnt is guaranteed to be even on Amd64 since STACK_ALIGN/TARGET_POINTER_SIZE = 2 + // and localloc size is a multiple of STACK_ALIGN. + + // Loop: + ssize_t imm = -16; + emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, imm); + + emit->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, REG_SPBASE, 8); + emit->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, REG_SPBASE, 0); + + // If not done, loop + // Note that regCnt is the number of bytes to stack allocate. + // Therefore we need to subtract 16 from regcnt here. + assert(genIsValidIntReg(regCnt)); + + emit->emitIns_R_R_I(INS_addi_d, emitActualTypeSize(type), regCnt, regCnt, -16); + + assert(imm == (-4 << 2));//goto loop. + emit->emitIns_R_R_I(INS_bne, EA_PTRSIZE, regCnt, REG_R0, (-4 << 2)); + + lastTouchDelta = 0; + } + else + { + // At this point 'regCnt' is set to the total number of bytes to localloc. + // + // We don't need to zero out the allocated memory. However, we do have + // to tickle the pages to ensure that SP is always valid and is + // in sync with the "stack guard page". Note that in the worst + // case SP is on the last byte of the guard page. Thus you must + // touch SP-0 first not SP-0x1000. + // + // This is similar to the prolog code in CodeGen::genAllocLclFrame(). + // + // Note that we go through a few hoops so that SP never points to + // illegal pages at any time during the tickling process. + // + // sltu R21, SP, regCnt + // sub_d regCnt, SP, regCnt // regCnt now holds ultimate SP + // masknez regCnt, regCnt, R21 // Overflow, pick lowest possible value + // + // lu12i_w regTmp, eeGetPageSize()>>12 + // Loop: + // ld_w r0, 0(SP) // tickle the page - read from the page + // sub_d R21, SP, regTmp // decrement SP by eeGetPageSize() + // bltu R21, regCnt, Done + // sub_d SP, SP,regTmp + // b Loop + // + // Done: + // mov SP, regCnt + // + + // Setup the regTmp + regNumber regTmp = tree->GetSingleTempReg(); + + assert(regCnt != REG_R21); + emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, REG_R21, REG_SPBASE, regCnt); + + //// dsubu regCnt, SP, regCnt // regCnt now holds ultimate SP + emit->emitIns_R_R_R(INS_sub_d, EA_PTRSIZE, regCnt, REG_SPBASE, regCnt); + + // Overflow, set regCnt to lowest possible value + emit->emitIns_R_R_R(INS_masknez, EA_PTRSIZE, regCnt, regCnt, REG_R21); + + assert(compiler->eeGetPageSize() == ((compiler->eeGetPageSize()>>12)<<12)); + emit->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, regTmp, compiler->eeGetPageSize()>>12); + + //genDefineTempLabel(loop); + + // tickle the page - Read from the updated SP - this triggers a page fault when on the guard page + emit->emitIns_R_R_I(INS_ld_w, EA_4BYTE, REG_R0, REG_SPBASE, 0); + + // decrement SP by eeGetPageSize() + emit->emitIns_R_R_R(INS_sub_d, EA_PTRSIZE, REG_R21, REG_SPBASE, regTmp); + + assert(regTmp != REG_R21); + + ssize_t imm = 3 << 2;//goto done. + emit->emitIns_R_R_I(INS_bltu, EA_PTRSIZE, REG_R21, regCnt, imm); + + emit->emitIns_R_R_R(INS_sub_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, regTmp); + + imm = -4 << 2; + // Jump to loop and tickle new stack address + emit->emitIns_I(INS_b, EA_PTRSIZE, imm); + + // Done with stack tickle loop + //genDefineTempLabel(done); + + // Now just move the final value to SP + emit->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_SPBASE, regCnt, 0); + + // lastTouchDelta is dynamic, and can be up to a page. So if we have outgoing arg space, + // we're going to assume the worst and probe. + } + +ALLOC_DONE: + // Re-adjust SP to allocate outgoing arg area. We must probe this adjustment. + if (stackAdjustment != 0) + { + assert((stackAdjustment % STACK_ALIGN) == 0); // This must be true for the stack to remain aligned + assert((lastTouchDelta == ILLEGAL_LAST_TOUCH_DELTA) || (lastTouchDelta >= 0)); + + const regNumber tmpReg = rsGetRsvdReg(); + + if ((lastTouchDelta == ILLEGAL_LAST_TOUCH_DELTA) || + (stackAdjustment + (unsigned)lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES > + compiler->eeGetPageSize())) + { + genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)stackAdjustment, tmpReg); + } + else + { + genStackPointerConstantAdjustment(-(ssize_t)stackAdjustment, tmpReg); + } + + // Return the stackalloc'ed address in result register. + // TargetReg = SP + stackAdjustment. + // + genInstrWithConstant(INS_addi_d, EA_PTRSIZE, targetReg, REG_SPBASE, (ssize_t)stackAdjustment, tmpReg); + } + else // stackAdjustment == 0 + { + // Move the final value of SP to targetReg + GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, targetReg, REG_SPBASE, 0); + } + +BAILOUT: + if (endLabel != nullptr) + genDefineTempLabel(endLabel); + + genProduceReg(tree); +} + +//------------------------------------------------------------------------ +// genCodeForNegNot: Produce code for a GT_NEG/GT_NOT node. +// +// Arguments: +// tree - the node +// +void CodeGen::genCodeForNegNot(GenTree* tree) +{ + assert(tree->OperIs(GT_NEG, GT_NOT)); + + var_types targetType = tree->TypeGet(); + + assert(!tree->OperIs(GT_NOT) || !varTypeIsFloating(targetType)); + + regNumber targetReg = tree->GetRegNum(); + instruction ins = genGetInsForOper(tree); + + // The arithmetic node must be sitting in a register (since it's not contained) + assert(!tree->isContained()); + // The dst can only be a register. + assert(targetReg != REG_NA); + + GenTree* operand = tree->gtGetOp1(); + assert(!operand->isContained()); + // The src must be a register. + regNumber operandReg = genConsumeReg(operand); + + emitAttr attr = emitActualTypeSize(tree); + GetEmitter()->emitIns_R_R(ins, attr, targetReg, operandReg); + + genProduceReg(tree); +} + +//------------------------------------------------------------------------ +// genCodeForBswap: Produce code for a GT_BSWAP / GT_BSWAP16 node. +// +// Arguments: +// tree - the node +// +void CodeGen::genCodeForBswap(GenTree* tree) +{ + assert(!"unimpleement on LOONGAARCH64 yet"); +} + +//------------------------------------------------------------------------ +// genCodeForDivMod: Produce code for a GT_DIV/GT_UDIV node. We don't see MOD: +// (1) integer MOD is morphed into a sequence of sub, mul, div in fgMorph; +// (2) float/double MOD is morphed into a helper call by front-end. +// +// Arguments: +// tree - the node +// +void CodeGen::genCodeForDivMod(GenTreeOp* tree) +{//can amend further. + assert(tree->OperIs(GT_MOD, GT_UMOD, GT_DIV, GT_UDIV)); + + var_types targetType = tree->TypeGet(); + emitter* emit = GetEmitter(); + + genConsumeOperands(tree); + + if (varTypeIsFloating(targetType)) + { + // Floating point divide never raises an exception + assert(varTypeIsFloating(tree->gtOp1)); + assert(varTypeIsFloating(tree->gtOp2)); + assert(tree->gtOper == GT_DIV); + //genCodeForBinary(tree); + instruction ins = genGetInsForOper(tree); + emit->emitIns_R_R_R(ins, emitActualTypeSize(targetType), tree->GetRegNum(), tree->gtOp1->GetRegNum(), tree->gtOp2->GetRegNum()); + } + else // an integer divide operation + { + GenTree* divisorOp = tree->gtGetOp2(); + // divisorOp can be immed or reg + assert(!divisorOp->isContained() || divisorOp->isContainedIntOrIImmed()); + + if (divisorOp->IsIntegralConst(0) || divisorOp->GetRegNum() == REG_R0) + { + // We unconditionally throw a divide by zero exception + genJumpToThrowHlpBlk(EJ_jmp, SCK_DIV_BY_ZERO); + } + else // the divisor is not the constant zero + { + GenTree* src1 = tree->gtOp1; + unsigned TypeSize = genTypeSize(genActualType(tree->TypeGet())); + emitAttr size = EA_ATTR(TypeSize); + + assert(TypeSize >= genTypeSize(genActualType(src1->TypeGet())) + && TypeSize >= genTypeSize(genActualType(divisorOp->TypeGet()))); + + //ssize_t intConstValue = divisorOp->AsIntCon()->gtIconVal; + regNumber Reg1 = src1->GetRegNum(); + regNumber divisorReg = divisorOp->GetRegNum(); + instruction ins; + + // Check divisorOp first as we can always allow it to be a contained immediate + if (divisorOp->isContainedIntOrIImmed()) + { + ssize_t intConst = (int)(divisorOp->AsIntCon()->gtIconVal); + divisorReg = REG_R21; + if ((-2048 <= intConst) && (intConst <= 0x7ff)) + emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_R21, REG_R0, (short)intConst); + else + { + emit->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, intConst >> 12); + emit->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, intConst & 0xfff); + } + } + // Only for commutative operations do we check src1 and allow it to be a contained immediate + else if (tree->OperIsCommutative()) + { + // src1 can be immed or reg + assert(!src1->isContained() || src1->isContainedIntOrIImmed()); + + // Check src1 and allow it to be a contained immediate + if (src1->isContainedIntOrIImmed()) + { + assert(!divisorOp->isContainedIntOrIImmed()); + ssize_t intConst = (int)(src1->AsIntCon()->gtIconVal); + Reg1 = REG_R21; + if ((-2048 <= intConst) && (intConst <= 0x7ff)) + emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_R21, REG_R0, (short)intConst); + else + { + emit->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, intConst >> 12); + emit->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, intConst & 0xfff); + } + } + } + else + { + // src1 can only be a reg + assert(!src1->isContained()); + } + + // Generate the require runtime checks for GT_DIV or GT_UDIV + if (tree->gtOper == GT_DIV || tree->gtOper == GT_MOD) + { + // Two possible exceptions: + // (AnyVal / 0) => DivideByZeroException + // (MinInt / -1) => ArithmeticException + // + bool checkDividend = true; + + // Do we have an immediate for the 'divisorOp'? + // + if (divisorOp->IsCnsIntOrI()) + { + ssize_t intConstValue = divisorOp->AsIntCon()->gtIconVal; + //assert(intConstValue != 0); // already checked above by IsIntegralConst(0) + if (intConstValue != -1) + { + checkDividend = false; // We statically know that the dividend is not -1 + } + } + else // insert check for divison by zero + { + // Check if the divisor is zero throw a DivideByZeroException + genJumpToThrowHlpBlk_la(SCK_DIV_BY_ZERO, INS_beq, divisorReg); + } + + if (checkDividend) + { + // Check if the divisor is not -1 branch to 'sdivLabel' + emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_R21, REG_R0, -1); + BasicBlock* sdivLabel = genCreateTempLabel();//can optimize for loongarch64. + emit->emitIns_J_cond_la(INS_bne, sdivLabel, REG_R21, divisorReg); + + // If control flow continues past here the 'divisorReg' is known to be -1 + regNumber dividendReg = tree->gtGetOp1()->GetRegNum(); + // At this point the divisor is known to be -1 + // + // Wether dividendReg is MinInt or not + // + + emit->emitIns_J_cond_la(INS_beq, sdivLabel, dividendReg, REG_R0); + + emit->emitIns_R_R_R(size == EA_4BYTE ? INS_add_w : INS_add_d, size, REG_R21, dividendReg, dividendReg); + genJumpToThrowHlpBlk_la(SCK_ARITH_EXCPN, INS_beq, REG_R21); + genDefineTempLabel(sdivLabel); + } + + // Generate the sdiv instruction + if (size == EA_4BYTE) + { + if (tree->OperGet() == GT_DIV) + { + ins = INS_div_w; + } + else + ins = INS_mod_w; + } + else + { + if (tree->OperGet() == GT_DIV) + { + ins = INS_div_d; + } + else + ins = INS_mod_d; + } + + emit->emitIns_R_R_R(ins, size, tree->GetRegNum(), Reg1, divisorReg); + } + else //if (tree->gtOper == GT_UDIV) GT_UMOD + { + // Only one possible exception + // (AnyVal / 0) => DivideByZeroException + // + // Note that division by the constant 0 was already checked for above by the + // op2->IsIntegralConst(0) check + // + + if (!divisorOp->IsCnsIntOrI()) + { + // divisorOp is not a constant, so it could be zero + // + genJumpToThrowHlpBlk_la(SCK_DIV_BY_ZERO, INS_beq, divisorReg); + } + + if (size == EA_4BYTE) + { + if (tree->OperGet() == GT_UDIV) + { + ins = INS_div_wu; + } + else + ins = INS_mod_wu; + + //TODO: temp workround, should amend for optimize. + emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, Reg1, Reg1, 0); + emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, divisorReg, divisorReg, 0); + } + else + { + if (tree->OperGet() == GT_UDIV) + { + ins = INS_div_du; + } + else + ins = INS_mod_du; + } + + emit->emitIns_R_R_R(ins, size, tree->GetRegNum(), Reg1, divisorReg); + } + } + } + genProduceReg(tree); +} + +// Generate code for InitBlk by performing a loop unroll +// Preconditions: +// a) Both the size and fill byte value are integer constants. +// b) The size of the struct to initialize is smaller than INITBLK_UNROLL_LIMIT bytes. +void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node) +{ + assert(node->OperIs(GT_STORE_BLK)); + + unsigned dstLclNum = BAD_VAR_NUM; + regNumber dstAddrBaseReg = REG_NA; + int dstOffset = 0; + GenTree* dstAddr = node->Addr(); + + if (!dstAddr->isContained()) + { + dstAddrBaseReg = genConsumeReg(dstAddr); + } + else if (dstAddr->OperIsAddrMode()) + { + assert(!dstAddr->AsAddrMode()->HasIndex()); + + dstAddrBaseReg = genConsumeReg(dstAddr->AsAddrMode()->Base()); + dstOffset = dstAddr->AsAddrMode()->Offset(); + } + else + { + assert(dstAddr->OperIsLocalAddr()); + dstLclNum = dstAddr->AsLclVarCommon()->GetLclNum(); + dstOffset = dstAddr->AsLclVarCommon()->GetLclOffs(); + } + + regNumber srcReg; + GenTree* src = node->Data(); + + if (src->OperIs(GT_INIT_VAL)) + { + assert(src->isContained()); + src = src->gtGetOp1(); + } + + if (!src->isContained()) + { + srcReg = genConsumeReg(src); + } + else + { + assert(src->IsIntegralConst(0)); + srcReg = REG_R0; + } + + if (node->IsVolatile()) + { + instGen_MemoryBarrier(); + } + + emitter* emit = GetEmitter(); + unsigned size = node->GetLayout()->GetSize(); + + assert(size <= INT32_MAX); + assert(dstOffset < INT32_MAX - static_cast(size)); + + for (unsigned regSize = 2 * REGSIZE_BYTES; size >= regSize; size -= regSize, dstOffset += regSize) + { + if (dstLclNum != BAD_VAR_NUM) + { + emit->emitIns_S_R(INS_st_d, EA_8BYTE, srcReg, dstLclNum, dstOffset); + emit->emitIns_S_R(INS_st_d, EA_8BYTE, srcReg, dstLclNum, dstOffset + 8); + } + else + { + emit->emitIns_R_R_I(INS_st_d, EA_8BYTE, srcReg, dstAddrBaseReg, dstOffset); + emit->emitIns_R_R_I(INS_st_d, EA_8BYTE, srcReg, dstAddrBaseReg, dstOffset + 8); + } + } + + for (unsigned regSize = REGSIZE_BYTES; size > 0; size -= regSize, dstOffset += regSize) + { + while (regSize > size) + { + regSize /= 2; + } + + instruction storeIns; + emitAttr attr; + + switch (regSize) + { + case 1: + storeIns = INS_st_b; + attr = EA_4BYTE; + break; + case 2: + storeIns = INS_st_h; + attr = EA_4BYTE; + break; + case 4: + storeIns = INS_st_w; + attr = EA_ATTR(regSize); + break; + case 8: + storeIns = INS_st_d; + attr = EA_ATTR(regSize); + break; + default: + unreached(); + } + + if (dstLclNum != BAD_VAR_NUM) + { + emit->emitIns_S_R(storeIns, attr, srcReg, dstLclNum, dstOffset); + } + else + { + emit->emitIns_R_R_I(storeIns, attr, srcReg, dstAddrBaseReg, dstOffset); + } + } +} + +// Generate code for CpObj nodes wich copy structs that have interleaved +// GC pointers. +// For this case we'll generate a sequence of loads/stores in the case of struct +// slots that don't contain GC pointers. The generated code will look like: +// ld tempReg, 8(A5) +// sd tempReg, 8(A6) +// +// In the case of a GC-Pointer we'll call the ByRef write barrier helper +// who happens to use the same registers as the previous call to maintain +// the same register requirements and register killsets: +// bl CORINFO_HELP_ASSIGN_BYREF +// +// So finally an example would look like this: +// ld tempReg, 8(A5) +// sd tempReg, 8(A6) +// bl CORINFO_HELP_ASSIGN_BYREF +// ld tempReg, 8(A5) +// sd tempReg, 8(A6) +// bl CORINFO_HELP_ASSIGN_BYREF +// ld tempReg, 8(A5) +// sd tempReg, 8(A6) +void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode) +{ + GenTree* dstAddr = cpObjNode->Addr(); + GenTree* source = cpObjNode->Data(); + var_types srcAddrType = TYP_BYREF; + bool sourceIsLocal = false; + + assert(source->isContained()); + if (source->gtOper == GT_IND) + { + GenTree* srcAddr = source->gtGetOp1(); + assert(!srcAddr->isContained()); + srcAddrType = srcAddr->TypeGet(); + } + else + { + noway_assert(source->IsLocal()); + sourceIsLocal = true; + } + + bool dstOnStack = dstAddr->gtSkipReloadOrCopy()->OperIsLocalAddr(); + +#ifdef DEBUG + assert(!dstAddr->isContained()); + + // This GenTree node has data about GC pointers, this means we're dealing + // with CpObj. + assert(cpObjNode->GetLayout()->HasGCPtr()); +#endif // DEBUG + + // Consume the operands and get them into the right registers. + // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing"). + genConsumeBlockOp(cpObjNode, REG_WRITE_BARRIER_DST_BYREF, REG_WRITE_BARRIER_SRC_BYREF, REG_NA); + gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_SRC_BYREF, srcAddrType); + gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_DST_BYREF, dstAddr->TypeGet()); + + ClassLayout* layout = cpObjNode->GetLayout(); + unsigned slots = layout->GetSlotCount(); + + // Temp register(s) used to perform the sequence of loads and stores. + regNumber tmpReg = cpObjNode->ExtractTempReg(); + regNumber tmpReg2 = REG_NA; + + assert(genIsValidIntReg(tmpReg)); + assert(tmpReg != REG_WRITE_BARRIER_SRC_BYREF); + assert(tmpReg != REG_WRITE_BARRIER_DST_BYREF); + + if (slots > 1) + { + tmpReg2 = cpObjNode->GetSingleTempReg(); + assert(tmpReg2 != tmpReg); + assert(genIsValidIntReg(tmpReg2)); + assert(tmpReg2 != REG_WRITE_BARRIER_DST_BYREF); + assert(tmpReg2 != REG_WRITE_BARRIER_SRC_BYREF); + } + + if (cpObjNode->gtFlags & GTF_BLK_VOLATILE) + { + // issue a full memory barrier before a volatile CpObj operation + instGen_MemoryBarrier(); + } + + emitter* emit = GetEmitter(); + + emitAttr attrSrcAddr = emitActualTypeSize(srcAddrType); + emitAttr attrDstAddr = emitActualTypeSize(dstAddr->TypeGet()); + + // If we can prove it's on the stack we don't need to use the write barrier. + if (dstOnStack) + { + unsigned i = 0; + // Check if two or more remaining slots and use two ld/sd sequence + while (i < slots - 1) + { + emitAttr attr0 = emitTypeSize(layout->GetGCPtrType(i + 0)); + emitAttr attr1 = emitTypeSize(layout->GetGCPtrType(i + 1)); + + emit->emitIns_R_R_I(INS_ld_d, attr0, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, 0); + emit->emitIns_R_R_I(INS_ld_d, attr1, tmpReg2, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE); + emit->emitIns_R_R_I(INS_addi_d, attrSrcAddr, REG_WRITE_BARRIER_SRC_BYREF, REG_WRITE_BARRIER_SRC_BYREF, 2 * TARGET_POINTER_SIZE); + emit->emitIns_R_R_I(INS_st_d, attr0, tmpReg, REG_WRITE_BARRIER_DST_BYREF, 0); + emit->emitIns_R_R_I(INS_st_d, attr1, tmpReg2, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE); + emit->emitIns_R_R_I(INS_addi_d, attrDstAddr, REG_WRITE_BARRIER_DST_BYREF, REG_WRITE_BARRIER_DST_BYREF, 2 * TARGET_POINTER_SIZE); + i += 2; + } + + // Use a ld/sd sequence for the last remainder + if (i < slots) + { + emitAttr attr0 = emitTypeSize(layout->GetGCPtrType(i + 0)); + + emit->emitIns_R_R_I(INS_ld_d, attr0, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, 0); + emit->emitIns_R_R_I(INS_addi_d, attrSrcAddr, REG_WRITE_BARRIER_SRC_BYREF, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE); + emit->emitIns_R_R_I(INS_st_d, attr0, tmpReg, REG_WRITE_BARRIER_DST_BYREF, 0); + emit->emitIns_R_R_I(INS_addi_d, attrDstAddr, REG_WRITE_BARRIER_DST_BYREF, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE); + } + } + else + { + unsigned gcPtrCount = cpObjNode->GetLayout()->GetGCPtrCount(); + + unsigned i = 0; + while (i < slots) + { + if (!layout->IsGCPtr(i)) + { + // Check if the next slot's type is also TYP_GC_NONE and use two ld/sd + if ((i + 1 < slots) && !layout->IsGCPtr(i + 1)) + { + emit->emitIns_R_R_I(INS_ld_d, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, 0); + emit->emitIns_R_R_I(INS_ld_d, EA_8BYTE, tmpReg2, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE); + emit->emitIns_R_R_I(INS_addi_d, attrSrcAddr, REG_WRITE_BARRIER_SRC_BYREF, REG_WRITE_BARRIER_SRC_BYREF, 2 * TARGET_POINTER_SIZE); + emit->emitIns_R_R_I(INS_st_d, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_DST_BYREF, 0); + emit->emitIns_R_R_I(INS_st_d, EA_8BYTE, tmpReg2, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE); + emit->emitIns_R_R_I(INS_addi_d, attrDstAddr, REG_WRITE_BARRIER_DST_BYREF, REG_WRITE_BARRIER_DST_BYREF, 2 * TARGET_POINTER_SIZE); + ++i; // extra increment of i, since we are copying two items + } + else + { + emit->emitIns_R_R_I(INS_ld_d, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, 0); + emit->emitIns_R_R_I(INS_addi_d, attrSrcAddr, REG_WRITE_BARRIER_SRC_BYREF, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE); + emit->emitIns_R_R_I(INS_st_d, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_DST_BYREF, 0); + emit->emitIns_R_R_I(INS_addi_d, attrDstAddr, REG_WRITE_BARRIER_DST_BYREF, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE); + } + } + else + { + // In the case of a GC-Pointer we'll call the ByRef write barrier helper + genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE); + gcPtrCount--; + } + ++i; + } + assert(gcPtrCount == 0); + } + + if (cpObjNode->gtFlags & GTF_BLK_VOLATILE) + { + // issue a INS_BARRIER_RMB after a volatile CpObj operation + ////TODO: there is only BARRIER_FULL for LOONGARCH64. + instGen_MemoryBarrier(BARRIER_FULL); + } + + // Clear the gcInfo for REG_WRITE_BARRIER_SRC_BYREF and REG_WRITE_BARRIER_DST_BYREF. + // While we normally update GC info prior to the last instruction that uses them, + // these actually live into the helper call. + gcInfo.gcMarkRegSetNpt(RBM_WRITE_BARRIER_SRC_BYREF | RBM_WRITE_BARRIER_DST_BYREF); +} + +// generate code do a switch statement based on a table of ip-relative offsets +void CodeGen::genTableBasedSwitch(GenTree* treeNode) +{ + genConsumeOperands(treeNode->AsOp()); + regNumber idxReg = treeNode->AsOp()->gtOp1->GetRegNum(); + regNumber baseReg = treeNode->AsOp()->gtOp2->GetRegNum(); + + regNumber tmpReg = treeNode->GetSingleTempReg(); + + // load the ip-relative offset (which is relative to start of fgFirstBB) + GetEmitter()->emitIns_R_R_I(INS_slli_d, EA_8BYTE, REG_R21, idxReg, 2); + GetEmitter()->emitIns_R_R_R(INS_add_d, EA_8BYTE, baseReg, baseReg, REG_R21); + GetEmitter()->emitIns_R_R_I(INS_ld_w, EA_4BYTE, baseReg, baseReg, 0); + + // add it to the absolute address of fgFirstBB + //compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET;//TODO for LOONGARCH64. + GetEmitter()->emitIns_R_L(INS_lea, EA_PTRSIZE, compiler->fgFirstBB, tmpReg); + GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, baseReg, baseReg, tmpReg); + + // jr baseReg + GetEmitter()->emitIns_R_R_I(INS_jirl, emitActualTypeSize(TYP_I_IMPL), REG_R0, baseReg, 0); +} + +// emits the table and an instruction to get the address of the first element +void CodeGen::genJumpTable(GenTree* treeNode) +{ + noway_assert(compiler->compCurBB->bbJumpKind == BBJ_SWITCH); + assert(treeNode->OperGet() == GT_JMPTABLE); + + unsigned jumpCount = compiler->compCurBB->bbJumpSwt->bbsCount; + BasicBlock** jumpTable = compiler->compCurBB->bbJumpSwt->bbsDstTab; + unsigned jmpTabOffs; + unsigned jmpTabBase; + + jmpTabBase = GetEmitter()->emitBBTableDataGenBeg(jumpCount, true); + + jmpTabOffs = 0; + + JITDUMP("\n J_M%03u_DS%02u LABEL DWORD\n", compiler->compMethodID, jmpTabBase); + + for (unsigned i = 0; i < jumpCount; i++) + { + BasicBlock* target = *jumpTable++; + noway_assert(target->bbFlags & BBF_HAS_LABEL); + + JITDUMP(" DD L_M%03u_" FMT_BB "\n", compiler->compMethodID, target->bbNum); + + GetEmitter()->emitDataGenData(i, target); + }; + + GetEmitter()->emitDataGenEnd(); + + // Access to inline data is 'abstracted' by a special type of static member + // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference + // to constant data, not a real static field. + GetEmitter()->emitIns_R_C(INS_bl, emitActualTypeSize(TYP_I_IMPL), treeNode->GetRegNum(), REG_NA, + compiler->eeFindJitDataOffs(jmpTabBase), 0); + genProduceReg(treeNode); +} + +//------------------------------------------------------------------------ +// genLockedInstructions: Generate code for a GT_XADD or GT_XCHG node. +// +// Arguments: +// treeNode - the GT_XADD/XCHG node +// +void CodeGen::genLockedInstructions(GenTreeOp* treeNode) +{ + assert(!"unimplemented on LOONGARCH yet"); +} + +//------------------------------------------------------------------------ +// genCodeForCmpXchg: Produce code for a GT_CMPXCHG node. +// +// Arguments: +// tree - the GT_CMPXCHG node +// +void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode) +{ + assert(!"unimplemented on LOONGARCH yet"); +} + +static inline bool isImmed(GenTree* treeNode) +{ + if (treeNode->gtGetOp1()->isContainedIntOrIImmed()) + { + return true; + } + else if (treeNode->OperIsBinary()) + { + if (treeNode->gtGetOp2()->isContainedIntOrIImmed()) + return true; + } + + return false; +} + +instruction CodeGen::genGetInsForOper(GenTree* treeNode) +{ + var_types type = treeNode->TypeGet(); + genTreeOps oper = treeNode->OperGet(); + GenTree* op1 = treeNode->gtGetOp1(); + GenTree* op2; + emitAttr attr = emitActualTypeSize(treeNode); + bool isImm = false; + + instruction ins = INS_break; + + if (varTypeIsFloating(type)) + { + switch (oper) + { + case GT_ADD: + if (attr == EA_4BYTE) + ins = INS_fadd_s; + else + ins = INS_fadd_d; + break; + case GT_SUB: + if (attr == EA_4BYTE) + ins = INS_fsub_s; + else + ins = INS_fsub_d; + break; + case GT_MUL: + if (attr == EA_4BYTE) + ins = INS_fmul_s; + else + ins = INS_fmul_d; + break; + case GT_DIV: + if (attr == EA_4BYTE) + ins = INS_fdiv_s; + else + ins = INS_fdiv_d; + break; + case GT_NEG: + if (attr == EA_4BYTE) + ins = INS_fneg_s; + else + ins = INS_fneg_d; + break; + + default: + NYI("Unhandled oper in genGetInsForOper() - float"); + unreached(); + break; + } + } + else + { + switch (oper) + { + case GT_ADD: + isImm = isImmed(treeNode); + if (isImm) + { + if ((attr == EA_8BYTE) || (attr == EA_BYREF)) + { + ins = INS_addi_d; + } + else + { + assert(attr == EA_4BYTE); + ins = INS_addi_w; + } + } + else + { + if ((attr == EA_8BYTE) || (attr == EA_BYREF)) + { + ins = INS_add_d; + } + else + { + assert(attr == EA_4BYTE); + ins = INS_add_w; + } + } + break; + + case GT_SUB: + isImm = isImmed(treeNode); + if ((attr == EA_8BYTE) || (attr == EA_BYREF)) + { + ins = INS_sub_d; + } + else + { + assert(attr == EA_4BYTE); + ins = INS_sub_w; + } + break; + + case GT_MOD: + if ((attr == EA_8BYTE) || (attr == EA_BYREF)) + { + ins = INS_mod_d; + } + else + { + assert(attr == EA_4BYTE); + ins = INS_mod_w; + } + break; + + case GT_DIV: + if ((attr == EA_8BYTE) || (attr == EA_BYREF)) + { + ins = INS_div_d; + } + else + { + assert(attr == EA_4BYTE); + ins = INS_div_w; + } + break; + + case GT_UMOD: + if ((attr == EA_8BYTE) || (attr == EA_BYREF)) + { + ins = INS_mod_du; + } + else + { + assert(attr == EA_4BYTE); + ins = INS_mod_wu; + } + break; + + case GT_UDIV: + if ((attr == EA_8BYTE) || (attr == EA_BYREF)) + { + ins = INS_div_du; + } + else + { + assert(attr == EA_4BYTE); + ins = INS_div_wu; + } + break; + + case GT_MUL: + if ((attr == EA_8BYTE) || (attr == EA_BYREF)) + { + //if ((treeNode->gtFlags & GTF_UNSIGNED) != 0) + // ins = INS_mul_d; + //else + ins = INS_mul_d; + } + else + { + if ((treeNode->gtFlags & GTF_UNSIGNED) != 0) + ins = INS_mulw_d_wu; + else + ins = INS_mul_w; + } + break; + + case GT_NEG: + if (attr == EA_8BYTE) + { + ins = INS_dneg; + } + else + { + assert(attr == EA_4BYTE); + ins = INS_neg; + } + break; + + case GT_NOT: + ins = INS_not; + break; + + case GT_AND: + isImm = isImmed(treeNode); + if (isImm) + { + ins = INS_andi; + } + else + { + ins = INS_and; + } + break; + + case GT_OR: + isImm = isImmed(treeNode); + if (isImm) + { + ins = INS_ori; + } + else + { + ins = INS_or; + } + break; + + case GT_LSH: + isImm = isImmed(treeNode); + if (isImm) + { + //it's better to check sa. + if (attr == EA_4BYTE) + ins = INS_slli_w; + else + ins = INS_slli_d; + } + else + { + if (attr == EA_4BYTE) + ins = INS_sll_w; + else + ins = INS_sll_d; + } + break; + + case GT_RSZ: + isImm = isImmed(treeNode); + if (isImm) + { + //it's better to check sa. + if (attr == EA_4BYTE) + ins = INS_srli_w; + else + ins = INS_srli_d; + } + else + { + if (attr == EA_4BYTE) + ins = INS_srl_w; + else + ins = INS_srl_d; + } + break; + + case GT_RSH: + isImm = isImmed(treeNode); + if (isImm) + { + //it's better to check sa. + if (attr == EA_4BYTE) + ins = INS_srai_w; + else + ins = INS_srai_d; + } + else + { + if (attr == EA_4BYTE) + ins = INS_sra_w; + else + ins = INS_sra_d; + } + break; + + case GT_ROR: + isImm = isImmed(treeNode); + if (isImm) + { + //it's better to check sa. + if (attr == EA_4BYTE) + ins = INS_rotri_w; + else + ins = INS_rotri_d; + } + else + { + if (attr == EA_4BYTE) + ins = INS_rotr_w; + else + ins = INS_rotr_d; + } + break; + + case GT_XOR: + isImm = isImmed(treeNode); + if (isImm) + { + ins = INS_xori; + } + else + { + ins = INS_xor; + } + break; + + default: + NYI("Unhandled oper in genGetInsForOper() - integer"); + unreached(); + break; + } + } + return ins; +} + +//------------------------------------------------------------------------ +// genCodeForReturnTrap: Produce code for a GT_RETURNTRAP node. +// +// Arguments: +// tree - the GT_RETURNTRAP node +// +void CodeGen::genCodeForReturnTrap(GenTreeOp* tree) +{ + assert(tree->OperGet() == GT_RETURNTRAP); + + // this is nothing but a conditional call to CORINFO_HELP_STOP_FOR_GC + // based on the contents of 'data' + + GenTree* data = tree->gtOp1; + genConsumeRegs(data); + + BasicBlock* skipLabel = genCreateTempLabel(); + GetEmitter()->emitIns_J_cond_la(INS_beq, skipLabel, data->GetRegNum(), REG_R0); + + void* pAddr = nullptr; + void* addr = compiler->compGetHelperFtn(CORINFO_HELP_STOP_FOR_GC, &pAddr); + emitter::EmitCallType callType; + regNumber callTarget; + + if (addr == nullptr) + { + callType = emitter::EC_INDIR_R; + callTarget = REG_DEFAULT_HELPER_CALL_TARGET; + + //instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr); + if (compiler->opts.compReloc) + { + GetEmitter()->emitIns_R_AI(INS_bl, EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr); + } + else + { + //GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, callTarget, (ssize_t)pAddr); + //GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, ); + GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, callTarget, ((ssize_t)pAddr & 0xfffff000)>>12); + GetEmitter()->emitIns_R_I(INS_lu32i_d, EA_PTRSIZE, callTarget, (ssize_t)pAddr >> 32); + GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, ((ssize_t)pAddr & 0xfff)>>2); + } + regSet.verifyRegUsed(callTarget); + //GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, callTarget, callTarget, 0); + } + else + { + callType = emitter::EC_FUNC_TOKEN; + callTarget = REG_NA; + } + + ////TODO: can optimize further !!! + GetEmitter()->emitIns_Call(callType, compiler->eeFindHelper(CORINFO_HELP_STOP_FOR_GC), INDEBUG_LDISASM_COMMA(nullptr) addr, 0, + EA_UNKNOWN, EA_UNKNOWN, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, + gcInfo.gcRegByrefSetCur, DebugInfo(), /* IL offset */ + callTarget, /* ireg */ + REG_NA, 0, 0, /* xreg, xmul, disp */ + false /* isJump */ + ); + + genDefineTempLabel(skipLabel); + + regMaskTP killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC); + regSet.verifyRegistersUsed(killMask); +} + +//------------------------------------------------------------------------ +// genCodeForStoreInd: Produce code for a GT_STOREIND node. +// +// Arguments: +// tree - the GT_STOREIND node +// +void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree) +{ +#ifdef FEATURE_SIMD + // Storing Vector3 of size 12 bytes through indirection + if (tree->TypeGet() == TYP_SIMD12) + { + genStoreIndTypeSIMD12(tree); + return; + } +#endif // FEATURE_SIMD + + GenTree* data = tree->Data(); + GenTree* addr = tree->Addr(); + + GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(tree, data); + if (writeBarrierForm != GCInfo::WBF_NoBarrier) + { + // data and addr must be in registers. + // Consume both registers so that any copies of interfering + // registers are taken care of. + genConsumeOperands(tree); + + // At this point, we should not have any interference. + // That is, 'data' must not be in REG_WRITE_BARRIER_DST_BYREF, + // as that is where 'addr' must go. + noway_assert(data->GetRegNum() != REG_WRITE_BARRIER_DST_BYREF); + + // 'addr' goes into x14 (REG_WRITE_BARRIER_DST) + genCopyRegIfNeeded(addr, REG_WRITE_BARRIER_DST); + + // 'data' goes into x15 (REG_WRITE_BARRIER_SRC) + genCopyRegIfNeeded(data, REG_WRITE_BARRIER_SRC); + + genGCWriteBarrier(tree, writeBarrierForm); + } + else // A normal store, not a WriteBarrier store + { + // We must consume the operands in the proper execution order, + // so that liveness is updated appropriately. + genConsumeAddress(addr); + + if (!data->isContained()) + { + genConsumeRegs(data); + } + + regNumber dataReg; + if (data->isContainedIntOrIImmed()) + { + assert(data->IsIntegralConst(0)); + dataReg = REG_R0; + } + else // data is not contained, so evaluate it into a register + { + assert(!data->isContained()); + dataReg = data->GetRegNum(); + } + + var_types type = tree->TypeGet(); + instruction ins = ins_Store(type); + + if ((tree->gtFlags & GTF_IND_VOLATILE) != 0) + { + // issue a full memory barrier before a volatile StInd + instGen_MemoryBarrier(); + } + + GetEmitter()->emitInsLoadStoreOp(ins, emitActualTypeSize(type), dataReg, tree); + } +} + +//------------------------------------------------------------------------ +// genCodeForSwap: Produce code for a GT_SWAP node. +// +// Arguments: +// tree - the GT_SWAP node +// +void CodeGen::genCodeForSwap(GenTreeOp* tree) +{ + assert(!"unimplemented on LOONGARCH yet"); +} + +//------------------------------------------------------------------------ +// genIntToFloatCast: Generate code to cast an int/long to float/double +// +// Arguments: +// treeNode - The GT_CAST node +// +// Return Value: +// None. +// +// Assumptions: +// Cast is a non-overflow conversion. +// The treeNode must have an assigned register. +// SrcType= int32/uint32/int64/uint64 and DstType=float/double. +// +void CodeGen::genIntToFloatCast(GenTree* treeNode) +{ + // int type --> float/double conversions are always non-overflow ones + assert(treeNode->OperGet() == GT_CAST); + assert(!treeNode->gtOverflow()); + + regNumber targetReg = treeNode->GetRegNum(); + assert(genIsValidFloatReg(targetReg)); + + GenTree* op1 = treeNode->AsOp()->gtOp1; + assert(!op1->isContained()); // Cannot be contained + assert(genIsValidIntReg(op1->GetRegNum())); // Must be a valid int reg. + + var_types dstType = treeNode->CastToType(); + var_types srcType = genActualType(op1->TypeGet()); + assert(!varTypeIsFloating(srcType) && varTypeIsFloating(dstType)); + + emitter *emit = GetEmitter(); + emitAttr attr = emitActualTypeSize(dstType); + + // We should never see a srcType whose size is neither EA_4BYTE or EA_8BYTE + emitAttr srcSize = EA_ATTR(genTypeSize(srcType)); + noway_assert((srcSize == EA_4BYTE) || (srcSize == EA_8BYTE)); + + bool IsUnsigned = treeNode->gtFlags & GTF_UNSIGNED; + instruction ins = INS_invalid; + + genConsumeOperands(treeNode->AsOp()); + + if (IsUnsigned) + {//should amend. + emit->emitIns_R_R(INS_movgr2fr_d, EA_8BYTE, REG_SCRATCH_FLT, op1->GetRegNum()); // save op1 + + if (srcSize == EA_8BYTE) + { + ssize_t imm = 4 << 2; + emit->emitIns_R_R_I(INS_bge, EA_8BYTE, op1->GetRegNum(), REG_R0, imm); + + emit->emitIns_R_R_I(INS_andi, EA_8BYTE, REG_R21, op1->GetRegNum(), 1); + emit->emitIns_R_R_I(INS_srli_d, EA_8BYTE, op1->GetRegNum(), op1->GetRegNum(), 1); + emit->emitIns_R_R_R(INS_or, EA_8BYTE, op1->GetRegNum(), op1->GetRegNum(), REG_R21); + } + else + { + srcSize = EA_8BYTE; + emit->emitIns_R_R_I_I(INS_bstrins_d, EA_8BYTE, op1->GetRegNum(), REG_R0, 63, 32); + } + } + + ins = srcSize == EA_8BYTE ? INS_movgr2fr_d : INS_movgr2fr_w; + emit->emitIns_R_R(ins, attr, treeNode->GetRegNum(), op1->GetRegNum()); + + if (dstType == TYP_DOUBLE) + { + if (srcSize == EA_4BYTE) + { + ins = INS_ffint_d_w; + } + else + { + assert(srcSize == EA_8BYTE); + ins = INS_ffint_d_l; + } + } + else + { + assert(dstType == TYP_FLOAT); + if (srcSize == EA_4BYTE) + { + ins = INS_ffint_s_w; + } + else + { + assert(srcSize == EA_8BYTE); + ins = INS_ffint_s_l; + } + } + + emit->emitIns_R_R(ins, attr, treeNode->GetRegNum(), treeNode->GetRegNum()); + + if (IsUnsigned) + { + srcSize = EA_ATTR(genTypeSize(srcType)); + emit->emitIns_R_R(INS_movfr2gr_d, attr, op1->GetRegNum(), REG_SCRATCH_FLT); // recover op1 + + if (srcSize == EA_8BYTE) + { + ssize_t imm = 3 << 2; + emit->emitIns_R_R_I(INS_bge, EA_8BYTE, op1->GetRegNum(), REG_R0, imm); + + emit->emitIns_R_R(dstType == TYP_DOUBLE ? INS_fmov_d : INS_fmov_s, attr, REG_SCRATCH_FLT, treeNode->GetRegNum()); + emit->emitIns_R_R_R(dstType == TYP_DOUBLE ? INS_fadd_d : INS_fadd_s, attr, treeNode->GetRegNum(), REG_SCRATCH_FLT, treeNode->GetRegNum()); + } + } + + genProduceReg(treeNode); +} + +//------------------------------------------------------------------------ +// genFloatToIntCast: Generate code to cast float/double to int/long +// +// Arguments: +// treeNode - The GT_CAST node +// +// Return Value: +// None. +// +// Assumptions: +// Cast is a non-overflow conversion. +// The treeNode must have an assigned register. +// SrcType=float/double and DstType= int32/uint32/int64/uint64 +// +void CodeGen::genFloatToIntCast(GenTree* treeNode) +{ + // we don't expect to see overflow detecting float/double --> int type conversions here + // as they should have been converted into helper calls by front-end. + assert(treeNode->OperGet() == GT_CAST); + assert(!treeNode->gtOverflow()); + + regNumber targetReg = treeNode->GetRegNum(); + assert(genIsValidIntReg(targetReg)); // Must be a valid int reg. + + GenTree* op1 = treeNode->AsOp()->gtOp1; + assert(!op1->isContained()); // Cannot be contained + assert(genIsValidFloatReg(op1->GetRegNum())); // Must be a valid float reg. + + var_types dstType = treeNode->CastToType(); + var_types srcType = op1->TypeGet(); + assert(varTypeIsFloating(srcType) && !varTypeIsFloating(dstType)); + + // We should never see a dstType whose size is neither EA_4BYTE or EA_8BYTE + // For conversions to small types (byte/sbyte/int16/uint16) from float/double, + // we expect the front-end or lowering phase to have generated two levels of cast. + // + emitAttr dstSize = EA_ATTR(genTypeSize(dstType)); + noway_assert((dstSize == EA_4BYTE) || (dstSize == EA_8BYTE)); + + instruction ins1 = INS_invalid; + instruction ins2 = INS_invalid; + bool IsUnsigned = varTypeIsUnsigned(dstType); + + regNumber tmpReg = REG_SCRATCH_FLT; + assert(tmpReg != op1->GetRegNum()); + + if (srcType == TYP_DOUBLE) + { + if (dstSize == EA_4BYTE) + { + ins1 = INS_ftintrz_w_d; + ins2 = INS_movfr2gr_s; + } + else + { + assert(dstSize == EA_8BYTE); + ins1 = INS_ftintrz_l_d; + ins2 = INS_movfr2gr_d; + } + } + else + { + assert(srcType == TYP_FLOAT); + if (dstSize == EA_4BYTE) + { + ins1 = INS_ftintrz_w_s; + ins2 = INS_movfr2gr_s; + } + else + { + assert(dstSize == EA_8BYTE); + ins1 = INS_ftintrz_l_s; + ins2 = INS_movfr2gr_d; + } + } + + genConsumeOperands(treeNode->AsOp()); + + if (IsUnsigned) + { + ssize_t imm = 0; + + if (srcType == TYP_DOUBLE) + { + if (dstSize == EA_4BYTE) + { + imm = 0x41e00; + } + else + { + imm = 0x43e00; + } + } + else + { + assert(srcType == TYP_FLOAT); + if (dstSize == EA_4BYTE) + { + imm = 0x4f000; + } + else + { + imm = 0x5f000; + } + } + + //{ + // GetEmitter()->emitIns_R_R(INS_movgr2fr_d, EA_8BYTE, tmpReg, REG_R0); + + // GetEmitter()->emitIns_R_R_I(srcType == TYP_DOUBLE ? INS_c_olt_d : INS_c_olt_s, EA_8BYTE, op1->GetRegNum(), tmpReg, 2); + // GetEmitter()->emitIns_I_I(INS_bc1f, EA_PTRSIZE, 2, 4 << 2); + + // GetEmitter()->emitIns_R_R_I(INS_ori*/, EA_PTRSIZE, treeNode->GetRegNum(), REG_R0, 0); + // GetEmitter()->emitIns_I(INS_b, EA_PTRSIZE, srcType == TYP_DOUBLE ? 14 << 2 : 13 << 2); + //} + + if (srcType == TYP_DOUBLE) + GetEmitter()->emitIns_R_R_I(INS_lu52i_d, EA_8BYTE, REG_R21, REG_R0, imm >> 8); + else + GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, imm); + + GetEmitter()->emitIns_R_R(srcType == TYP_DOUBLE ? INS_movgr2fr_d : INS_movgr2fr_w, EA_8BYTE, tmpReg, REG_R21); + + GetEmitter()->emitIns_R_R_I(srcType == TYP_DOUBLE ? INS_fcmp_clt_d : INS_fcmp_clt_s, EA_8BYTE, op1->GetRegNum(), tmpReg, 2); + + GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R0, 0); + GetEmitter()->emitIns_I_I(INS_bcnez, EA_PTRSIZE, 2, 4 << 2); + + GetEmitter()->emitIns_R_R_R(srcType == TYP_DOUBLE ? INS_fsub_d : INS_fsub_s, EA_8BYTE, tmpReg, op1->GetRegNum(), tmpReg); + + GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R0, 1); + GetEmitter()->emitIns_R_R_I(dstSize == EA_8BYTE ? INS_slli_d : INS_slli_w, EA_PTRSIZE, REG_R21, REG_R21, dstSize == EA_8BYTE ? 63 : 31); + + GetEmitter()->emitIns_R_R_R_I(INS_fsel, EA_PTRSIZE, tmpReg, tmpReg, op1->GetRegNum(), 2); + + GetEmitter()->emitIns_R_R(ins1, dstSize, tmpReg, tmpReg); + GetEmitter()->emitIns_R_R(ins2, dstSize, treeNode->GetRegNum(), tmpReg); + + GetEmitter()->emitIns_R_R_R(INS_or, dstSize, treeNode->GetRegNum(), REG_R21, treeNode->GetRegNum()); + } + else + { + GetEmitter()->emitIns_R_R(ins1, dstSize, tmpReg, op1->GetRegNum()); + GetEmitter()->emitIns_R_R(ins2, dstSize, treeNode->GetRegNum(), tmpReg); + } + + genProduceReg(treeNode); +} + +//------------------------------------------------------------------------ +// genCkfinite: Generate code for ckfinite opcode. +// +// Arguments: +// treeNode - The GT_CKFINITE node +// +// Return Value: +// None. +// +// Assumptions: +// GT_CKFINITE node has reserved an internal register. +// +void CodeGen::genCkfinite(GenTree* treeNode) +{ + assert(treeNode->OperGet() == GT_CKFINITE); + + GenTree* op1 = treeNode->AsOp()->gtOp1; + var_types targetType = treeNode->TypeGet(); + ssize_t expMask = (targetType == TYP_FLOAT) ? 0xFF : 0x7FF; // Bit mask to extract exponent. + ssize_t size = (targetType == TYP_FLOAT) ? 8 : 11; // Bit size to extract exponent. + ssize_t pos = (targetType == TYP_FLOAT) ? 23 : 52; // Bit pos of exponent. + + emitter* emit = GetEmitter(); + emitAttr attr = emitActualTypeSize(treeNode); + + // Extract exponent into a register. + regNumber intReg = treeNode->GetSingleTempReg(); + regNumber fpReg = genConsumeReg(op1); + + emit->emitIns_R_R(attr == EA_8BYTE ? INS_movfr2gr_d : INS_movfr2gr_s, attr, intReg, fpReg); + + // Mask of exponent with all 1's and check if the exponent is all 1's + instruction ins = (targetType == TYP_FLOAT) ? INS_bstrpick_w : INS_bstrpick_d; + emit->emitIns_R_R_I_I(ins, EA_PTRSIZE, intReg, intReg, pos+size-1, pos); + emit->emitIns_R_R_I(INS_xori, attr, intReg, intReg, expMask); + + genJumpToThrowHlpBlk_la(SCK_ARITH_EXCPN, INS_beq, intReg); + + // if it is a finite value copy it to targetReg + if (treeNode->GetRegNum() != fpReg) + { + emit->emitIns_R_R(ins_Copy(targetType), attr, treeNode->GetRegNum(), fpReg); + } + genProduceReg(treeNode); +} + +//------------------------------------------------------------------------ +// genCodeForCompare: Produce code for a GT_EQ/GT_NE/GT_LT/GT_LE/GT_GE/GT_GT node. +// +// Arguments: +// tree - the node +// +void CodeGen::genCodeForCompare(GenTreeOp* jtree) +{ + emitter* emit = GetEmitter(); + + GenTreeOp* tree = nullptr; + regNumber targetReg; + if (jtree->OperIs(GT_JTRUE)) + { + tree = jtree->gtGetOp1()->AsOp(); + targetReg = REG_RA; + assert(tree->GetRegNum() == REG_NA); + + jtree->gtOp2 = (GenTree*)REG_RA;//targetReg + jtree->SetRegNum((regNumber)INS_bnez); + } + else + { + tree = jtree; + targetReg = tree->GetRegNum(); + } + assert(targetReg != REG_NA); + + GenTree* op1 = tree->gtOp1; + GenTree* op2 = tree->gtOp2; + var_types op1Type = genActualType(op1->TypeGet()); + var_types op2Type = genActualType(op2->TypeGet()); + + assert(!op1->isUsedFromMemory()); + assert(!op2->isUsedFromMemory()); + + genConsumeOperands(tree); + + emitAttr cmpSize = EA_ATTR(genTypeSize(op1Type)); + + assert(genTypeSize(op1Type) == genTypeSize(op2Type)); + + if (varTypeIsFloating(op1Type)) + { + assert(tree->OperIs(GT_LT, GT_LE, GT_EQ, GT_NE, GT_GT, GT_GE)); + bool IsUnordered = (tree->gtFlags & GTF_RELOP_NAN_UN) != 0; + + if(IsUnordered) + { + if(tree->OperIs(GT_LT)) + emit->emitIns_R_R_I(cmpSize==EA_4BYTE ? INS_fcmp_cult_s : INS_fcmp_cult_d, cmpSize, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/); + else if(tree->OperIs(GT_LE)) + emit->emitIns_R_R_I(cmpSize==EA_4BYTE ? INS_fcmp_cule_s : INS_fcmp_cule_d, cmpSize, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/); + else if(tree->OperIs(GT_EQ)) + emit->emitIns_R_R_I(cmpSize==EA_4BYTE ? INS_fcmp_cueq_s : INS_fcmp_cueq_d, cmpSize, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/); + else if(tree->OperIs(GT_NE)) + emit->emitIns_R_R_I(cmpSize==EA_4BYTE ? INS_fcmp_cune_s : INS_fcmp_cune_d, cmpSize, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/); + else if(tree->OperIs(GT_GT)) + emit->emitIns_R_R_I(cmpSize==EA_4BYTE ? INS_fcmp_cult_s : INS_fcmp_cult_d, cmpSize, op2->GetRegNum(), op1->GetRegNum(), 1/*cc*/); + else if(tree->OperIs(GT_GE)) + emit->emitIns_R_R_I(cmpSize==EA_4BYTE ? INS_fcmp_cule_s : INS_fcmp_cule_d, cmpSize, op2->GetRegNum(), op1->GetRegNum(), 1/*cc*/); + } + else + { + if(tree->OperIs(GT_LT)) + emit->emitIns_R_R_I(cmpSize==EA_4BYTE ? INS_fcmp_clt_s : INS_fcmp_clt_d, cmpSize, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/); + else if(tree->OperIs(GT_LE)) + emit->emitIns_R_R_I(cmpSize==EA_4BYTE ? INS_fcmp_cle_s : INS_fcmp_cle_d, cmpSize, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/); + else if(tree->OperIs(GT_EQ)) + emit->emitIns_R_R_I(cmpSize==EA_4BYTE ? INS_fcmp_ceq_s : INS_fcmp_ceq_d, cmpSize, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/); + else if(tree->OperIs(GT_NE)) + emit->emitIns_R_R_I(cmpSize==EA_4BYTE ? INS_fcmp_cne_s : INS_fcmp_cne_d, cmpSize, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/); + else if(tree->OperIs(GT_GT)) + emit->emitIns_R_R_I(cmpSize==EA_4BYTE ? INS_fcmp_clt_s : INS_fcmp_clt_d, cmpSize, op2->GetRegNum(), op1->GetRegNum(), 1/*cc*/); + else if(tree->OperIs(GT_GE)) + emit->emitIns_R_R_I(cmpSize==EA_4BYTE ? INS_fcmp_cle_s : INS_fcmp_cle_d, cmpSize, op2->GetRegNum(), op1->GetRegNum(), 1/*cc*/); + } + + emit->emitIns_R_R(INS_mov, EA_PTRSIZE, targetReg, REG_R0); + emit->emitIns_R_I(INS_movcf2gr, EA_PTRSIZE, targetReg, 1/*cc*/); + } + else if (op1->isContainedIntOrIImmed() && op2->isContainedIntOrIImmed()) + { + ssize_t imm1 = op1->AsIntCon()->gtIconVal; + ssize_t imm2 = op2->AsIntCon()->gtIconVal; + + assert(tree->OperIs(GT_LT, GT_LE, GT_EQ, GT_NE, GT_GT, GT_GE)); + + bool IsUnsigned = (tree->gtFlags & GTF_UNSIGNED) != 0; + instruction ins = INS_beqz; + + switch (cmpSize) + { + case EA_4BYTE: + { + imm1 = static_cast(imm1); + imm2 = static_cast(imm2); + } + break; + case EA_8BYTE: + break; + case EA_1BYTE: + { + imm1 = static_cast(imm1); + imm2 = static_cast(imm2); + } + break; + //case EA_2BYTE: + // imm = static_cast(imm); + // break; + default: + assert(!"Unexpected type in jumpCompare."); + } + + switch (tree->OperGet()) + { + case GT_LT: + if (((!IsUnsigned) && (imm1 < imm2)) || ((IsUnsigned) && ((unsigned)imm1 < (unsigned)imm2))) + { + ins = INS_b; + } + break; + case GT_LE: + if (((!IsUnsigned) && (imm1 <= imm2)) || ((IsUnsigned) && ((unsigned)imm1 <= (unsigned)imm2))) + { + ins = INS_b; + } + break; + case GT_EQ: + if (imm1 == imm2) + { + ins = INS_b; + } + break; + case GT_NE: + if (imm1 != imm2) + { + ins = INS_b; + } + break; + case GT_GT: + if (((!IsUnsigned) && (imm1 > imm2)) || ((IsUnsigned) && ((unsigned)imm1 > (unsigned)imm2))) + { + ins = INS_b; + } + break; + case GT_GE: + if (((!IsUnsigned) && (imm1 >= imm2)) || ((IsUnsigned) && ((unsigned)imm1 >= (unsigned)imm2))) + { + ins = INS_b; + } + break; + default: + break; + } + + assert(ins != INS_invalid); + jtree->gtOp2 = (GenTree*)REG_SP; + jtree->SetRegNum((regNumber)ins); + } + else + { + //TODO:can optimize further. + if (op1->isContainedIntOrIImmed()) + { + op1 = tree->gtOp2; + op2 = tree->gtOp1; + switch (tree->OperGet()) + { + case GT_LT: + tree->SetOper(GT_GT); + break; + case GT_LE: + tree->SetOper(GT_GE); + break; + case GT_GT: + tree->SetOper(GT_LT); + break; + case GT_GE: + tree->SetOper(GT_LE); + break; + default: + break; + } + } + assert(!op1->isContainedIntOrIImmed()); + assert(tree->OperIs(GT_LT, GT_LE, GT_EQ, GT_NE, GT_GT, GT_GE)); + + bool IsUnsigned = (tree->gtFlags & GTF_UNSIGNED) != 0; + regNumber regOp1 = op1->GetRegNum(); + + if (op2->isContainedIntOrIImmed()) + { + ssize_t imm = op2->AsIntCon()->gtIconVal; + + { + switch (cmpSize) + { + case EA_4BYTE: + imm = static_cast(imm); + break; + case EA_8BYTE: + break; + case EA_1BYTE: + imm = static_cast(imm); + break; + //case EA_2BYTE: + // imm = static_cast(imm); + // break; + default: + assert(!"Unexpected type in jumpTrue(imm)."); + } + } + + if (tree->OperIs(GT_LT)) + { + if (!IsUnsigned && isValidSimm12(imm)) { + emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, targetReg, regOp1, imm); + } + else if (IsUnsigned && isValidUimm11(imm)) { + emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, regOp1, imm); + } + else { + emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm); + emit->emitIns_R_R_R(IsUnsigned ? INS_sltu : INS_slt, EA_PTRSIZE, targetReg, regOp1, REG_RA); + } + } + else if (tree->OperIs(GT_LE)) + { + if (!IsUnsigned && isValidSimm12(imm + 1)) { + emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, targetReg, regOp1, imm + 1); + } + else if (IsUnsigned && isValidUimm11(imm + 1)) { + emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, regOp1, imm + 1); + } + else { + emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm + 1); + emit->emitIns_R_R_R(IsUnsigned ? INS_sltu : INS_slt, EA_PTRSIZE, targetReg, regOp1, REG_RA); + } + } + else if (tree->OperIs(GT_GT)) + { + if (!IsUnsigned && isValidSimm12(imm + 1)) { + emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, REG_RA, regOp1, imm + 1); + emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, REG_RA, 1); + } + else if (IsUnsigned && isValidUimm11(imm + 1)) { + emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, REG_RA, regOp1, imm + 1); + emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, REG_RA, 1); + } + else { + emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm); + emit->emitIns_R_R_R(IsUnsigned ? INS_sltu : INS_slt, EA_PTRSIZE, targetReg, REG_RA, regOp1); + } + } + else if (tree->OperIs(GT_GE)) + { if (!IsUnsigned && isValidSimm12(imm)) { + emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, targetReg, regOp1, imm); + } + else if (IsUnsigned && isValidUimm11(imm)) { + emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, regOp1, imm); + } + else { + emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm); + emit->emitIns_R_R_R(IsUnsigned ? INS_sltu : INS_slt, EA_PTRSIZE, targetReg, regOp1, REG_RA); + } + emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, targetReg, 1); + } + else if (tree->OperIs(GT_NE)) + { + if (!imm) { + emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, targetReg, REG_R0, regOp1); + } + else if (isValidUimm12(imm)) { + emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, regOp1, imm); + emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, targetReg, REG_R0, targetReg); + } + else { + emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm); + emit->emitIns_R_R_R(INS_xor, EA_PTRSIZE, targetReg, regOp1, REG_RA); + emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, targetReg, REG_R0, targetReg); + } + } + else if (tree->OperIs(GT_EQ)) + { + if (!imm) { + emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, regOp1, 1); + } + else if (isValidUimm12(imm)) { + emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, regOp1, imm); + emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, targetReg, 1); + } + else { + emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm); + emit->emitIns_R_R_R(INS_xor, EA_PTRSIZE, targetReg, regOp1, REG_RA); + emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, targetReg, 1); + } + } + + genProduceReg(tree); + } + else + { + regNumber tmpRegOp1 = tree->ExtractTempReg(); + regNumber tmpRegOp2 = tree->ExtractTempReg(); + regNumber regOp2 = op2->GetRegNum(); + if (cmpSize == EA_4BYTE) + { + regOp1 = tmpRegOp1; + regOp2 = tmpRegOp2; + if (IsUnsigned) + { + emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_PTRSIZE, tmpRegOp1, op1->GetRegNum(), 31, 0); + emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_PTRSIZE, tmpRegOp2, op2->GetRegNum(), 31, 0); + } + else + { + emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, tmpRegOp1, op1->GetRegNum(), 0); + emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, tmpRegOp2, op2->GetRegNum(), 0); + } + } + + if (tree->OperIs(GT_LT)) + { + emit->emitIns_R_R_R(IsUnsigned ? INS_sltu : INS_slt, EA_8BYTE, targetReg, regOp1, regOp2); + } + else if (tree->OperIs(GT_LE)) + { + emit->emitIns_R_R_R(IsUnsigned ? INS_sltu : INS_slt, EA_8BYTE, targetReg, regOp2, regOp1); + emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, targetReg, 1); + } + else if (tree->OperIs(GT_GT)) + { + emit->emitIns_R_R_R(IsUnsigned ? INS_sltu : INS_slt, EA_8BYTE, targetReg, regOp2, regOp1); + } + else if (tree->OperIs(GT_GE)) + { + emit->emitIns_R_R_R(IsUnsigned ? INS_sltu : INS_slt, EA_8BYTE, targetReg, regOp1, regOp2); + emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, targetReg, 1); + } + else if (tree->OperIs(GT_NE)) + { + emit->emitIns_R_R_R(INS_xor, EA_PTRSIZE, targetReg, regOp1, regOp2); + emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, targetReg, REG_R0, targetReg); + } + else if (tree->OperIs(GT_EQ)) + { + emit->emitIns_R_R_R(INS_xor, EA_PTRSIZE, targetReg, regOp1, regOp2); + emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, targetReg, 1); + } + + genProduceReg(tree); + } + } +} + +//------------------------------------------------------------------------ +// genCodeForJumpTrue: Generate code for a GT_JTRUE node. +// +// Arguments: +// jtrue - The node +// +void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue) +{ + //assert(compiler->compCurBB->bbJumpKind == BBJ_COND);//should confirm. + ////assert(jtrue->OperIs(GT_JTRUE)); + + emitter* emit = GetEmitter(); + + GenTreeOp* tree = jtrue->OperIs(GT_JTRUE) ? jtrue->gtGetOp1()->AsOp() : jtrue; + regNumber targetReg = tree->GetRegNum(); + instruction ins = INS_invalid; + + if (jtrue->OperIs(GT_JTRUE) && jtrue->gtOp2) + { + emit->emitIns_J((instruction)jtrue->GetRegNum(), compiler->compCurBB->bbJumpDest, (int)(int64_t)jtrue->gtOp2);//5-bits; + jtrue->SetRegNum(REG_NA); + jtrue->gtOp2 = nullptr; + return; + } + else + { + GenTree* op1 = tree->gtOp1; + GenTree* op2 = tree->gtOp2; + + var_types op1Type = genActualType(op1->TypeGet()); + var_types op2Type = genActualType(op2->TypeGet()); + + bool IsEq = tree == jtrue->gtPrev; + + assert(!op1->isUsedFromMemory()); + assert(!op2->isUsedFromMemory()); + + genConsumeOperands(tree); + + emitAttr cmpSize = EA_ATTR(genTypeSize(op1Type)); + + assert(targetReg == REG_NA); + int SaveCcResultReg = (int)REG_RA << 5; + + if (varTypeIsFloating(op1Type)) + { + assert(genTypeSize(op1Type) == genTypeSize(op2Type)); + //int cc = 1; + + assert(tree->OperIs(GT_LT, GT_LE, GT_EQ, GT_NE, GT_GT, GT_GE)); + bool IsUnordered = (tree->gtFlags & GTF_RELOP_NAN_UN) != 0; + + if (tree->OperIs(GT_EQ)) + { + ins = INS_bcnez; + if (cmpSize == EA_4BYTE) + emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cueq_s : INS_fcmp_ceq_s, EA_4BYTE, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/); + else + emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cueq_d : INS_fcmp_ceq_d, EA_8BYTE, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/); + } + else if (tree->OperIs(GT_NE)) + { + ins = INS_bceqz; + if (cmpSize == EA_4BYTE) + emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_ceq_s : INS_fcmp_cueq_s, EA_4BYTE, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/); + else + emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_ceq_d : INS_fcmp_cueq_d, EA_8BYTE, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/); + } + else if (tree->OperIs(GT_LT)) + { + ins = INS_bcnez; + if (cmpSize == EA_4BYTE) + emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cult_s : INS_fcmp_clt_s, EA_4BYTE, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/); + else + emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cult_d : INS_fcmp_clt_d, EA_8BYTE, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/); + } + else if (tree->OperIs(GT_LE)) + { + ins = INS_bcnez; + if (cmpSize == EA_4BYTE) + emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cule_s : INS_fcmp_cle_s, EA_4BYTE, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/); + else + emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cule_d : INS_fcmp_cle_d, EA_8BYTE, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/); + } + else if (tree->OperIs(GT_GE)) + { + ins = INS_bceqz; + if (cmpSize == EA_4BYTE) + emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_clt_s : INS_fcmp_cult_s, EA_4BYTE, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/); + else + emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_clt_d : INS_fcmp_cult_d, EA_8BYTE, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/); + } + else if (tree->OperIs(GT_GT)) + { + ins = INS_bceqz; + if (cmpSize == EA_4BYTE) + emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cle_s : INS_fcmp_cule_s, EA_4BYTE, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/); + else + emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cle_d : INS_fcmp_cule_d, EA_8BYTE, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/); + } + + //assert(0 <= cc && cc < 8); + if (IsEq) + emit->emitIns_J(ins, compiler->compCurBB->bbJumpDest, (int)1/*cc*/);//5-bits; + else + { + jtrue->gtOp2 = (GenTree*)(1/*cc*/); + jtrue->SetRegNum((regNumber)ins); + } + } + else if (op1->isContainedIntOrIImmed() && op2->isContainedIntOrIImmed()) + { + ssize_t imm1 = op1->AsIntCon()->gtIconVal; + ssize_t imm2 = op2->AsIntCon()->gtIconVal; + + assert(tree->OperIs(GT_LT, GT_LE, GT_EQ, GT_NE, GT_GT, GT_GE)); + + bool IsUnsigned = (tree->gtFlags & GTF_UNSIGNED) != 0; + + switch (cmpSize) + { + case EA_4BYTE: + { + imm1 = static_cast(imm1); + imm2 = static_cast(imm2); + } + break; + case EA_8BYTE: + break; + case EA_1BYTE: + { + imm1 = static_cast(imm1); + imm2 = static_cast(imm2); + } + break; + //case EA_2BYTE: + // imm = static_cast(imm); + // break; + default: + assert(!"Unexpected type in jumpTrue."); + } + switch (tree->OperGet()) + { + case GT_LT: + if (((!IsUnsigned) && (imm1 < imm2)) || ((IsUnsigned) && ((unsigned)imm1 < (unsigned)imm2))) + { + ins = INS_b; + } + break; + case GT_LE: + if (((!IsUnsigned) && (imm1 <= imm2)) || ((IsUnsigned) && ((unsigned)imm1 <= (unsigned)imm2))) + { + ins = INS_b; + } + break; + case GT_EQ: + if (imm1 == imm2) + { + ins = INS_b; + } + break; + case GT_NE: + if (imm1 != imm2) + { + ins = INS_b; + } + break; + case GT_GT: + if (((!IsUnsigned) && (imm1 > imm2)) || ((IsUnsigned) && ((unsigned)imm1 > (unsigned)imm2))) + { + ins = INS_b; + } + break; + case GT_GE: + if (((!IsUnsigned) && (imm1 >= imm2)) || ((IsUnsigned) && ((unsigned)imm1 >= (unsigned)imm2))) + { + ins = INS_b; + } + break; + default: + break; + } + + if (IsEq && (ins != INS_invalid)) + emit->emitIns_J(ins, compiler->compCurBB->bbJumpDest, 0);//5-bits; + else if (ins != INS_invalid) + { + jtrue->gtOp2 = (GenTree*)(uint64_t)SaveCcResultReg; + jtrue->SetRegNum((regNumber)ins); + } + } + else + { + //TODO:can optimize further. + if (op1->isContainedIntOrIImmed()) + { + op1 = tree->gtOp2; + op2 = tree->gtOp1; + switch (tree->OperGet()) + { + case GT_LT: + tree->SetOper(GT_GT); + break; + case GT_LE: + tree->SetOper(GT_GE); + break; + case GT_GT: + tree->SetOper(GT_LT); + break; + case GT_GE: + tree->SetOper(GT_LE); + break; + default: + break; + } + } + + assert(tree->OperIs(GT_LT, GT_LE, GT_EQ, GT_NE, GT_GT, GT_GE)); + + bool IsUnsigned = (tree->gtFlags & GTF_UNSIGNED) != 0; + + regNumber regOp1 = op1->GetRegNum(); + + if (op2->isContainedIntOrIImmed()) + { + ssize_t imm = op2->AsIntCon()->gtIconVal; + + if (imm) + { + switch (cmpSize) + { + case EA_4BYTE: + if (op1->gtFlags & GTF_UNSIGNED) + imm = static_cast(imm); + else + imm = static_cast(imm); + break; + case EA_8BYTE: + break; + case EA_1BYTE: + imm = static_cast(imm); + break; + //case EA_2BYTE: + // imm = static_cast(imm); + // break; + default: + assert(!"Unexpected type in jumpTrue(imm)."); + } + + emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm);//TODO: maybe optimize. + } + else + { + SaveCcResultReg = 0; + } + + if (tree->OperIs(GT_LT)) { + SaveCcResultReg |= ((int)regOp1); + ins = IsUnsigned ? INS_bltu : INS_blt; + } + else if (tree->OperIs(GT_LE)) { + SaveCcResultReg = imm ? ((((int)regOp1) << 5) | (int)REG_RA) : (((int)regOp1) << 5); + ins = IsUnsigned ? INS_bgeu : INS_bge; + } + else if (tree->OperIs(GT_GT)) { + SaveCcResultReg = imm ? ((((int)regOp1) << 5) | (int)REG_RA) : (((int)regOp1) << 5); + ins = IsUnsigned ? INS_bltu : INS_blt; + } + else if (tree->OperIs(GT_GE)) { + SaveCcResultReg |= ((int)regOp1); + ins = IsUnsigned ? INS_bgeu : INS_bge; + } + else if (tree->OperIs(GT_NE)) { + SaveCcResultReg |= ((int)regOp1); + ins = INS_bne; + } + else if (tree->OperIs(GT_EQ)) { + SaveCcResultReg |= ((int)regOp1); + ins = INS_beq; + } + } + else + { + regNumber regOp2 = op2->GetRegNum(); + if (IsUnsigned && cmpSize == EA_4BYTE && op2->OperIs(GT_LCL_VAR) && compiler->lvaTable[op2->AsLclVar()->GetLclNum()].lvIsRegCandidate()) + {//TODO: should amend further!!! + regNumber tmpRegOp1 = tree->ExtractTempReg(); + emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, REG_RA, regOp2, 31, 0); + emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, tmpRegOp1, regOp1, 31, 0); + regOp1 = tmpRegOp1; + regOp2 = REG_RA; + } + else if (IsUnsigned && cmpSize == EA_4BYTE && op1->OperIs(GT_LCL_VAR) && compiler->lvaTable[op1->AsLclVar()->GetLclNum()].lvIsRegCandidate()) + {//TODO: should amend further!!! + regNumber tmpRegOp1 = tree->ExtractTempReg(); + emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, tmpRegOp1, regOp1, 31, 0); + emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, REG_RA, regOp2, 31, 0); + regOp1 = tmpRegOp1; + regOp2 = REG_RA; + } + else if (cmpSize == EA_4BYTE && op1->OperIs(GT_CALL) && op2->OperIs(GT_LCL_VAR) && compiler->lvaTable[op2->AsLclVar()->GetLclNum()].lvIsRegCandidate()) + {//TODO: should amend further!!! + emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, REG_RA, regOp2, 0); + regOp2 = REG_RA; + } + else if (cmpSize == EA_4BYTE && ((op1->gtFlags | op2->gtFlags) & GTF_UNSIGNED)) + {//TODO: should amend further!!! + if (!(op1->gtFlags & GTF_UNSIGNED)) + { + regNumber tmpRegOp1 = tree->ExtractTempReg(); + emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, tmpRegOp1, regOp1, 31, 0); + regOp1 = tmpRegOp1; + } + if (!(op2->gtFlags & GTF_UNSIGNED)) + { + emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, REG_RA, regOp2, 31, 0); + regOp2 = REG_RA; + } + } + + if (tree->OperIs(GT_LT)) { + SaveCcResultReg = ((int)regOp1 | ((int)regOp2 << 5)); + ins = IsUnsigned ? INS_bltu : INS_blt; + } + else if (tree->OperIs(GT_LE)) { + SaveCcResultReg = (((int)regOp1) << 5) | (int)regOp2; + ins = IsUnsigned ? INS_bgeu : INS_bge; + } + else if (tree->OperIs(GT_GT)) { + SaveCcResultReg = (((int)regOp1) << 5) | (int)regOp2; + ins = IsUnsigned ? INS_bltu : INS_blt; + } + else if (tree->OperIs(GT_GE)) { + SaveCcResultReg = ((int)regOp1 | ((int)regOp2 << 5)); + ins = IsUnsigned ? INS_bgeu : INS_bge; + } + else if (tree->OperIs(GT_NE)) { + SaveCcResultReg = (((int)regOp1) << 5) | (int)regOp2; + ins = INS_bne; + } + else if (tree->OperIs(GT_EQ)) { + SaveCcResultReg = (((int)regOp1) << 5) | (int)regOp2; + ins = INS_beq; + } + } + + if (IsEq) + emit->emitIns_J(ins, compiler->compCurBB->bbJumpDest, SaveCcResultReg);//5-bits; + else + { + jtrue->gtOp2 = (GenTree*)(uint64_t)SaveCcResultReg; + jtrue->SetRegNum((regNumber)ins); + } + } + } +} + +//------------------------------------------------------------------------ +// genCodeForJumpCompare: Generates code for jmpCompare statement. +// +// A GT_JCMP node is created when a comparison and conditional branch +// can be executed in a single instruction. +// +// LOONGARCH64 has a few instructions with this behavior. +// - beq/bne -- Compare and branch register equal/not equal +// +// The beq/bne supports the normal +/- 2^15 branch range for conditional branches +// +// A GT_JCMP beq/bne node is created when there is a GT_EQ or GT_NE +// integer/unsigned comparison against the value of Rt register which is used by +// a GT_JTRUE condition jump node. +// +// This node is repsonsible for consuming the register, and emitting the +// appropriate fused compare/test and branch instruction +// +// Two flags guide code generation +// GTF_JCMP_EQ -- Set if this is beq rather than bne +// +// Arguments: +// tree - The GT_JCMP tree node. +// +// Return Value: +// None +// +void CodeGen::genCodeForJumpCompare(GenTreeOp* tree) +{ + assert(compiler->compCurBB->bbJumpKind == BBJ_COND); + + GenTree* op1 = tree->gtGetOp1(); + GenTree* op2 = tree->gtGetOp2(); + + assert(tree->OperIs(GT_JCMP)); + assert(!varTypeIsFloating(tree)); + assert(!op1->isUsedFromMemory()); + assert(!op2->isUsedFromMemory()); + assert(op2->IsCnsIntOrI()); + assert(op2->isContained()); + + genConsumeOperands(tree); + + regNumber reg = op1->GetRegNum(); + emitAttr attr = emitActualTypeSize(op1->TypeGet()); + + //if (tree->gtFlags & GTF_JCMP_TST) + //{ + // assert(!"unimplemented on LOONGARCH yet"); + // //ssize_t compareImm = op2->AsIntCon()->IconValue(); + + // //assert(isPow2(compareImm)); + + // //instruction ins = (tree->gtFlags & GTF_JCMP_EQ) ? INS_tbz : INS_tbnz; + // //int imm = genLog2((size_t)compareImm); + + // //GetEmitter()->emitIns_J_R_I(ins, attr, compiler->compCurBB->bbJumpDest, reg, imm); + //} + //else + { + instruction ins; + int regs; + if (op2->AsIntCon()->gtIconVal) + { + assert(reg != REG_R21); + GetEmitter()->emitIns_I_la(EA_PTRSIZE, REG_R21, op2->AsIntCon()->gtIconVal); + regs = (int)reg << 5; + regs |= (int)REG_R21;//REG_R21 + ins = (tree->gtFlags & GTF_JCMP_EQ) ? INS_beq : INS_bne; + } + else + { + regs = (int)reg; + ins = (tree->gtFlags & GTF_JCMP_EQ) ? INS_beqz : INS_bnez; + } + + GetEmitter()->emitIns_J(ins, compiler->compCurBB->bbJumpDest, regs);//5-bits; + } +} + +//--------------------------------------------------------------------- +// genSPtoFPdelta - return offset from the stack pointer (Initial-SP) to the frame pointer. The frame pointer +// will point to the saved frame pointer slot (i.e., there will be frame pointer chaining). +// +int CodeGenInterface::genSPtoFPdelta() const +{ + assert(isFramePointerUsed()); + + int delta; + if (IsSaveFpRaWithAllCalleeSavedRegisters()) + { + //delta = (compiler->compCalleeRegsPushed -2)* REGSIZE_BYTES + compiler->compLclFrameSize; + //assert(delta == genTotalFrameSize() - compiler->lvaArgSize - 2*8); + delta = genTotalFrameSize() - (compiler->info.compIsVarArgs ? MAX_REG_ARG * REGSIZE_BYTES : 0) - + (compiler->compCalleeRegsPushed -1)* REGSIZE_BYTES; + } + else + { + delta = compiler->lvaOutgoingArgSpaceSize; + } + + assert(delta >= 0); + return delta; +} + +//--------------------------------------------------------------------- +// genTotalFrameSize - return the total size of the stack frame, including local size, +// callee-saved register size, etc. +// +// Return value: +// Total frame size +// + +int CodeGenInterface::genTotalFrameSize() const +{ + // For varargs functions, we home all the incoming register arguments. They are not + // included in the compCalleeRegsPushed count. This is like prespill on ARM32, but + // since we don't use "push" instructions to save them, we don't have to do the + // save of these varargs register arguments as the first thing in the prolog. + + assert(!IsUninitialized(compiler->compCalleeRegsPushed)); + + int totalFrameSize = (compiler->info.compIsVarArgs ? MAX_REG_ARG * REGSIZE_BYTES : 0) + + compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize; + + assert(totalFrameSize > 0); + return totalFrameSize; +} + +//--------------------------------------------------------------------- +// genCallerSPtoFPdelta - return the offset from Caller-SP to the frame pointer. +// This number is going to be negative, since the Caller-SP is at a higher +// address than the frame pointer. +// +// There must be a frame pointer to call this function! + +int CodeGenInterface::genCallerSPtoFPdelta() const +{ + assert(isFramePointerUsed()); + int callerSPtoFPdelta; + + callerSPtoFPdelta = genCallerSPtoInitialSPdelta() + genSPtoFPdelta(); + + assert(callerSPtoFPdelta <= 0); + return callerSPtoFPdelta; +} + +//--------------------------------------------------------------------- +// genCallerSPtoInitialSPdelta - return the offset from Caller-SP to Initial SP. +// +// This number will be negative. + +int CodeGenInterface::genCallerSPtoInitialSPdelta() const +{ + int callerSPtoSPdelta = 0; + + callerSPtoSPdelta -= genTotalFrameSize(); + + assert(callerSPtoSPdelta <= 0); + return callerSPtoSPdelta; +} + +//--------------------------------------------------------------------- +// SetSaveFpRaWithAllCalleeSavedRegisters - Set the variable that indicates if FP/RA registers +// are stored with the rest of the callee-saved registers. +void CodeGen::SetSaveFpRaWithAllCalleeSavedRegisters(bool value) +{ + JITDUMP("Setting genSaveFpRaWithAllCalleeSavedRegisters to %s\n", dspBool(value)); + genSaveFpRaWithAllCalleeSavedRegisters = value; +} + +//--------------------------------------------------------------------- +// IsSaveFpRaWithAllCalleeSavedRegisters - Return the value that indicates where FP/RA registers +// are stored in the prolog. +bool CodeGen::IsSaveFpRaWithAllCalleeSavedRegisters() const +{ + return genSaveFpRaWithAllCalleeSavedRegisters; +} + +/***************************************************************************** + * Emit a call to a helper function. + */ + +void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, regNumber callTargetReg /*= REG_NA */) +{ + void* addr = nullptr; + void* pAddr = nullptr; + + emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN; + addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, &pAddr); + regNumber callTarget = REG_NA; + + if (addr == nullptr) + { + // This is call to a runtime helper. + // li x, pAddr #NOTE: this maybe muti-instructions. + // ld x, [x] + // jr x + + if (callTargetReg == REG_NA) + { + // If a callTargetReg has not been explicitly provided, we will use REG_DEFAULT_HELPER_CALL_TARGET, but + // this is only a valid assumption if the helper call is known to kill REG_DEFAULT_HELPER_CALL_TARGET. + callTargetReg = REG_DEFAULT_HELPER_CALL_TARGET; + } + + regMaskTP callTargetMask = genRegMask(callTargetReg); + regMaskTP callKillSet = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper); + + // assert that all registers in callTargetMask are in the callKillSet + noway_assert((callTargetMask & callKillSet) == callTargetMask); + + callTarget = callTargetReg; + + //instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr); + //GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, callTarget, callTarget, 0); + if (compiler->opts.compReloc) + { + GetEmitter()->emitIns_R_AI(INS_bl, EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr); + } + else + { + //GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, callTarget, (ssize_t)pAddr); + //GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, ); + GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, callTarget, ((ssize_t)pAddr & 0xfffff000)>>12); + GetEmitter()->emitIns_R_I(INS_lu32i_d, EA_PTRSIZE, callTarget, (ssize_t)pAddr >> 32); + GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, ((ssize_t)pAddr & 0xfff)>>2); + } + regSet.verifyRegUsed(callTarget); + + callType = emitter::EC_INDIR_R; + } + + GetEmitter()->emitIns_Call(callType, compiler->eeFindHelper(helper), INDEBUG_LDISASM_COMMA(nullptr) addr, argSize, + retSize, EA_UNKNOWN, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, + gcInfo.gcRegByrefSetCur, DebugInfo(), /* IL offset */ + callTarget, /* ireg */ + REG_NA, 0, 0, /* xreg, xmul, disp */ + false /* isJump */ + ); + + regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper); + regSet.verifyRegistersUsed(killMask); +} + +#ifdef FEATURE_SIMD + +//------------------------------------------------------------------------ +// genSIMDIntrinsic: Generate code for a SIMD Intrinsic. This is the main +// routine which in turn calls appropriate genSIMDIntrinsicXXX() routine. +// +// Arguments: +// simdNode - The GT_SIMD node +// +// Return Value: +// None. +// +// Notes: +// Currently, we only recognize SIMDVector and SIMDVector, and +// a limited set of methods. +// +// TODO-CLEANUP Merge all versions of this function and move to new file simdcodegencommon.cpp. +void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode) +{ + assert(!"unimplemented on LOONGARCH yet"); +} + +insOpts CodeGen::genGetSimdInsOpt(emitAttr size, var_types elementType) +{ + assert(!"unimplemented on LOONGARCH yet"); + return INS_OPTS_NONE; +} + +// getOpForSIMDIntrinsic: return the opcode for the given SIMD Intrinsic +// +// Arguments: +// intrinsicId - SIMD intrinsic Id +// baseType - Base type of the SIMD vector +// immed - Out param. Any immediate byte operand that needs to be passed to SSE2 opcode +// +// +// Return Value: +// Instruction (op) to be used, and immed is set if instruction requires an immediate operand. +// +instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_types baseType, unsigned* ival /*=nullptr*/) +{ + assert(!"unimplemented on LOONGARCH yet"); + return INS_invalid; +} + +//------------------------------------------------------------------------ +// genSIMDIntrinsicInit: Generate code for SIMD Intrinsic Initialize. +// +// Arguments: +// simdNode - The GT_SIMD node +// +// Return Value: +// None. +// +void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode) +{ + assert(!"unimplemented on LOONGARCH yet"); +} + +//------------------------------------------------------------------------------------------- +// genSIMDIntrinsicInitN: Generate code for SIMD Intrinsic Initialize for the form that takes +// a number of arguments equal to the length of the Vector. +// +// Arguments: +// simdNode - The GT_SIMD node +// +// Return Value: +// None. +// +void CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode) +{ + assert(!"unimplemented on LOONGARCH yet"); +} + +//---------------------------------------------------------------------------------- +// genSIMDIntrinsicUnOp: Generate code for SIMD Intrinsic unary operations like sqrt. +// +// Arguments: +// simdNode - The GT_SIMD node +// +// Return Value: +// None. +// +void CodeGen::genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode) +{ + assert(!"unimplemented on LOONGARCH yet"); +} + +//-------------------------------------------------------------------------------- +// genSIMDIntrinsicWiden: Generate code for SIMD Intrinsic Widen operations +// +// Arguments: +// simdNode - The GT_SIMD node +// +// Notes: +// The Widen intrinsics are broken into separate intrinsics for the two results. +// +void CodeGen::genSIMDIntrinsicWiden(GenTreeSIMD* simdNode) +{ + assert(!"unimplemented on LOONGARCH yet"); +} + +//-------------------------------------------------------------------------------- +// genSIMDIntrinsicNarrow: Generate code for SIMD Intrinsic Narrow operations +// +// Arguments: +// simdNode - The GT_SIMD node +// +// Notes: +// This intrinsic takes two arguments. The first operand is narrowed to produce the +// lower elements of the results, and the second operand produces the high elements. +// +void CodeGen::genSIMDIntrinsicNarrow(GenTreeSIMD* simdNode) +{ + assert(!"unimplemented on LOONGARCH yet"); +} + +//-------------------------------------------------------------------------------- +// genSIMDIntrinsicBinOp: Generate code for SIMD Intrinsic binary operations +// add, sub, mul, bit-wise And, AndNot and Or. +// +// Arguments: +// simdNode - The GT_SIMD node +// +// Return Value: +// None. +// +void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode) +{ + assert(!"unimplemented on LOONGARCH yet"); +} + +//-------------------------------------------------------------------------------- +// genSIMDIntrinsicRelOp: Generate code for a SIMD Intrinsic relational operater +// == and != +// +// Arguments: +// simdNode - The GT_SIMD node +// +// Return Value: +// None. +// +void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode) +{ + assert(!"unimplemented on LOONGARCH yet"); +} + +//-------------------------------------------------------------------------------- +// genSIMDIntrinsicDotProduct: Generate code for SIMD Intrinsic Dot Product. +// +// Arguments: +// simdNode - The GT_SIMD node +// +// Return Value: +// None. +// +void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode) +{ + assert(!"unimplemented on LOONGARCH yet"); +} + +//------------------------------------------------------------------------------------ +// genSIMDIntrinsicGetItem: Generate code for SIMD Intrinsic get element at index i. +// +// Arguments: +// simdNode - The GT_SIMD node +// +// Return Value: +// None. +// +void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode) +{ + assert(!"unimplemented on LOONGARCH yet"); +} + +//------------------------------------------------------------------------------------ +// genSIMDIntrinsicSetItem: Generate code for SIMD Intrinsic set element at index i. +// +// Arguments: +// simdNode - The GT_SIMD node +// +// Return Value: +// None. +// +void CodeGen::genSIMDIntrinsicSetItem(GenTreeSIMD* simdNode) +{ + assert(!"unimplemented on LOONGARCH yet"); +} + +//----------------------------------------------------------------------------- +// genSIMDIntrinsicUpperSave: save the upper half of a TYP_SIMD16 vector to +// the given register, if any, or to memory. +// +// Arguments: +// simdNode - The GT_SIMD node +// +// Return Value: +// None. +// +// Notes: +// The upper half of all SIMD registers are volatile, even the callee-save registers. +// When a 16-byte SIMD value is live across a call, the register allocator will use this intrinsic +// to cause the upper half to be saved. It will first attempt to find another, unused, callee-save +// register. If such a register cannot be found, it will save it to an available caller-save register. +// In that case, this node will be marked GTF_SPILL, which will cause this method to save +// the upper half to the lclVar's home location. +// +void CodeGen::genSIMDIntrinsicUpperSave(GenTreeSIMD* simdNode) +{ + assert(!"unimplemented on LOONGARCH yet"); +} + +//----------------------------------------------------------------------------- +// genSIMDIntrinsicUpperRestore: Restore the upper half of a TYP_SIMD16 vector to +// the given register, if any, or to memory. +// +// Arguments: +// simdNode - The GT_SIMD node +// +// Return Value: +// None. +// +// Notes: +// For consistency with genSIMDIntrinsicUpperSave, and to ensure that lclVar nodes always +// have their home register, this node has its targetReg on the lclVar child, and its source +// on the simdNode. +// Regarding spill, please see the note above on genSIMDIntrinsicUpperSave. If we have spilled +// an upper-half to the lclVar's home location, this node will be marked GTF_SPILLED. +// +void CodeGen::genSIMDIntrinsicUpperRestore(GenTreeSIMD* simdNode) +{ + assert(!"unimplemented on LOONGARCH yet"); +} + +//----------------------------------------------------------------------------- +// genStoreIndTypeSIMD12: store indirect a TYP_SIMD12 (i.e. Vector3) to memory. +// Since Vector3 is not a hardware supported write size, it is performed +// as two writes: 8 byte followed by 4-byte. +// +// Arguments: +// treeNode - tree node that is attempting to store indirect +// +// +// Return Value: +// None. +// +void CodeGen::genStoreIndTypeSIMD12(GenTree* treeNode) +{ + assert(!"unimplemented on LOONGARCH yet"); +} + +//----------------------------------------------------------------------------- +// genLoadIndTypeSIMD12: load indirect a TYP_SIMD12 (i.e. Vector3) value. +// Since Vector3 is not a hardware supported write size, it is performed +// as two loads: 8 byte followed by 4-byte. +// +// Arguments: +// treeNode - tree node of GT_IND +// +// +// Return Value: +// None. +// +void CodeGen::genLoadIndTypeSIMD12(GenTree* treeNode) +{ + assert(!"unimplemented on LOONGARCH yet"); +} + +//----------------------------------------------------------------------------- +// genStoreLclTypeSIMD12: store a TYP_SIMD12 (i.e. Vector3) type field. +// Since Vector3 is not a hardware supported write size, it is performed +// as two stores: 8 byte followed by 4-byte. +// +// Arguments: +// treeNode - tree node that is attempting to store TYP_SIMD12 field +// +// Return Value: +// None. +// +void CodeGen::genStoreLclTypeSIMD12(GenTree* treeNode) +{ + assert(!"unimplemented on LOONGARCH yet"); +} + +#endif // FEATURE_SIMD + +#ifdef FEATURE_HW_INTRINSICS +#include "hwintrinsic.h" + +instruction CodeGen::getOpForHWIntrinsic(GenTreeHWIntrinsic* node, var_types instrType) +{ + assert(!"unimplemented on LOONGARCH yet"); + return INS_invalid; +} + +//------------------------------------------------------------------------ +// genHWIntrinsic: Produce code for a GT_HWINTRINSIC node. +// +// This is the main routine which in turn calls the genHWIntrinsicXXX() routines. +// +// Arguments: +// node - the GT_HWINTRINSIC node +// +// Return Value: +// None. +// +void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) +{ + assert(!"unimplemented on LOONGARCH yet"); +} + +//------------------------------------------------------------------------ +// genHWIntrinsicUnaryOp: +// +// Produce code for a GT_HWINTRINSIC node with form UnaryOp. +// +// Consumes one scalar operand produces a scalar +// +// Arguments: +// node - the GT_HWINTRINSIC node +// +// Return Value: +// None. +// +void CodeGen::genHWIntrinsicUnaryOp(GenTreeHWIntrinsic* node) +{ + assert(!"unimplemented on LOONGARCH yet"); +} + +//------------------------------------------------------------------------ +// genHWIntrinsicCrcOp: +// +// Produce code for a GT_HWINTRINSIC node with form CrcOp. +// +// Consumes two scalar operands and produces a scalar result +// +// This form differs from BinaryOp because the attr depends on the size of op2 +// +// Arguments: +// node - the GT_HWINTRINSIC node +// +// Return Value: +// None. +// +void CodeGen::genHWIntrinsicCrcOp(GenTreeHWIntrinsic* node) +{ + assert(!"unimplemented on LOONGARCH yet"); +} + +//------------------------------------------------------------------------ +// genHWIntrinsicSimdBinaryOp: +// +// Produce code for a GT_HWINTRINSIC node with form SimdBinaryOp. +// +// Consumes two SIMD operands and produces a SIMD result +// +// Arguments: +// node - the GT_HWINTRINSIC node +// +// Return Value: +// None. +// +void CodeGen::genHWIntrinsicSimdBinaryOp(GenTreeHWIntrinsic* node) +{ + assert(!"unimplemented on LOONGARCH yet"); +} + +//------------------------------------------------------------------------ +// genHWIntrinsicSwitchTable: generate the jump-table for imm-intrinsics +// with non-constant argument +// +// Arguments: +// swReg - register containing the switch case to execute +// tmpReg - temporary integer register for calculating the switch indirect branch target +// swMax - the number of switch cases. +// emitSwCase - lambda to generate an individual switch case +// +// Notes: +// Used for cases where an instruction only supports immediate operands, +// but at jit time the operand is not a constant. +// +// The importer is responsible for inserting an upstream range check +// (GT_HW_INTRINSIC_CHK) for swReg, so no range check is needed here. +// +template +void CodeGen::genHWIntrinsicSwitchTable(regNumber swReg, + regNumber tmpReg, + int swMax, + HWIntrinsicSwitchCaseBody emitSwCase) +{ + assert(!"unimplemented on LOONGARCH yet"); +} + +//------------------------------------------------------------------------ +// genHWIntrinsicSimdExtractOp: +// +// Produce code for a GT_HWINTRINSIC node with form SimdExtractOp. +// +// Consumes one SIMD operand and one scalar +// +// The element index operand is typically a const immediate +// When it is not, a switch table is generated +// +// See genHWIntrinsicSwitchTable comments +// +// Arguments: +// node - the GT_HWINTRINSIC node +// +// Return Value: +// None. +// +void CodeGen::genHWIntrinsicSimdExtractOp(GenTreeHWIntrinsic* node) +{ + assert(!"unimplemented on LOONGARCH yet"); +} + +//------------------------------------------------------------------------ +// genHWIntrinsicSimdInsertOp: +// +// Produce code for a GT_HWINTRINSIC node with form SimdInsertOp. +// +// Consumes one SIMD operand and two scalars +// +// The element index operand is typically a const immediate +// When it is not, a switch table is generated +// +// See genHWIntrinsicSwitchTable comments +// +// Arguments: +// node - the GT_HWINTRINSIC node +// +// Return Value: +// None. +// +void CodeGen::genHWIntrinsicSimdInsertOp(GenTreeHWIntrinsic* node) +{ + assert(!"unimplemented on LOONGARCH yet"); +} + +//------------------------------------------------------------------------ +// genHWIntrinsicSimdSelectOp: +// +// Produce code for a GT_HWINTRINSIC node with form SimdSelectOp. +// +// Consumes three SIMD operands and produces a SIMD result +// +// This intrinsic form requires one of the source registers to be the +// destination register. Inserts a INS_mov if this requirement is not met. +// +// Arguments: +// node - the GT_HWINTRINSIC node +// +// Return Value: +// None. +// +void CodeGen::genHWIntrinsicSimdSelectOp(GenTreeHWIntrinsic* node) +{ + assert(!"unimplemented on LOONGARCH yet"); +} + +//------------------------------------------------------------------------ +// genHWIntrinsicSimdSetAllOp: +// +// Produce code for a GT_HWINTRINSIC node with form SimdSetAllOp. +// +// Consumes single scalar operand and produces a SIMD result +// +// Arguments: +// node - the GT_HWINTRINSIC node +// +// Return Value: +// None. +// +void CodeGen::genHWIntrinsicSimdSetAllOp(GenTreeHWIntrinsic* node) +{ + assert(!"unimplemented on LOONGARCH yet"); +} + +//------------------------------------------------------------------------ +// genHWIntrinsicSimdUnaryOp: +// +// Produce code for a GT_HWINTRINSIC node with form SimdUnaryOp. +// +// Consumes single SIMD operand and produces a SIMD result +// +// Arguments: +// node - the GT_HWINTRINSIC node +// +// Return Value: +// None. +// +void CodeGen::genHWIntrinsicSimdUnaryOp(GenTreeHWIntrinsic* node) +{ + assert(!"unimplemented on LOONGARCH yet"); +} + +//------------------------------------------------------------------------ +// genHWIntrinsicSimdBinaryRMWOp: +// +// Produce code for a GT_HWINTRINSIC node with form SimdBinaryRMWOp. +// +// Consumes two SIMD operands and produces a SIMD result. +// First operand is both source and destination. +// +// Arguments: +// node - the GT_HWINTRINSIC node +// +// Return Value: +// None. +// +void CodeGen::genHWIntrinsicSimdBinaryRMWOp(GenTreeHWIntrinsic* node) +{ + assert(!"unimplemented on LOONGARCH yet"); +} + +//------------------------------------------------------------------------ +// genHWIntrinsicSimdTernaryRMWOp: +// +// Produce code for a GT_HWINTRINSIC node with form SimdTernaryRMWOp +// +// Consumes three SIMD operands and produces a SIMD result. +// First operand is both source and destination. +// +// Arguments: +// node - the GT_HWINTRINSIC node +// +// Return Value: +// None. +// +void CodeGen::genHWIntrinsicSimdTernaryRMWOp(GenTreeHWIntrinsic* node) +{ + assert(!"unimplemented on LOONGARCH yet"); +} + +//------------------------------------------------------------------------ +// genHWIntrinsicShaHashOp: +// +// Produce code for a GT_HWINTRINSIC node with form Sha1HashOp. +// Used in LOONGARCH64 SHA1 Hash operations. +// +// Consumes three operands and returns a Simd result. +// First Simd operand is both source and destination. +// Second Operand is an unsigned int. +// Third operand is a simd operand. + +// Arguments: +// node - the GT_HWINTRINSIC node +// +// Return Value: +// None. +// +void CodeGen::genHWIntrinsicShaHashOp(GenTreeHWIntrinsic* node) +{ + assert(!"unimplemented on LOONGARCH yet"); +} + +//------------------------------------------------------------------------ +// genHWIntrinsicShaRotateOp: +// +// Produce code for a GT_HWINTRINSIC node with form Sha1RotateOp. +// Used in LOONGARCH64 SHA1 Rotate operations. +// +// Consumes one integer operand and returns unsigned int result. +// +// Arguments: +// node - the GT_HWINTRINSIC node +// +// Return Value: +// None. +// +void CodeGen::genHWIntrinsicShaRotateOp(GenTreeHWIntrinsic* node) +{ + assert(!"unimplemented on LOONGARCH yet"); +} + +#endif // FEATURE_HW_INTRINSICS + +/***************************************************************************** + * Unit testing of the LOONGARCH64 emitter: generate a bunch of instructions into the prolog + * (it's as good a place as any), then use COMPlus_JitLateDisasm=* to see if the late + * disassembler thinks the instructions as the same as we do. + */ + +// Uncomment "#define ALL_LOONGARCH64_EMITTER_UNIT_TESTS" to run all the unit tests here. +// After adding a unit test, and verifying it works, put it under this #ifdef, so we don't see it run every time. +//#define ALL_LOONGARCH64_EMITTER_UNIT_TESTS + +#if defined(DEBUG) +void CodeGen::genLOONGARCH64EmitterUnitTests() +{ + if (!verbose) + { + return; + } + + if (!compiler->opts.altJit) + { + // No point doing this in a "real" JIT. + return; + } + + // Mark the "fake" instructions in the output. + printf("*************** In genLOONGARCH64EmitterUnitTests()\n"); + + printf("*************** End of genLOONGARCH64EmitterUnitTests()\n"); +} +#endif // defined(DEBUG) + +//------------------------------------------------------------------------ +// genStackPointerConstantAdjustment: add a specified constant value to the stack pointer. +// No probe is done. +// +// Arguments: +// spDelta - the value to add to SP. Must be negative or zero. +// regTmp - an available temporary register that is used if 'spDelta' cannot be encoded by +// 'sub sp, sp, #spDelta' instruction. +// Can be REG_NA if the caller knows for certain that 'spDelta' fits into the immediate +// value range. +// +// Return Value: +// None. +// +void CodeGen::genStackPointerConstantAdjustment(ssize_t spDelta, regNumber regTmp) +{ + assert(spDelta < 0); + + // We assert that the SP change is less than one page. If it's greater, you should have called a + // function that does a probe, which will in turn call this function. + assert((target_size_t)(-spDelta) <= compiler->eeGetPageSize()); + + if (-2048 <= spDelta && spDelta < 0) + GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, spDelta); + else + { + GetEmitter()->emitIns_R_R_I(INS_srai_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, 3); + GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, spDelta >> 3); + GetEmitter()->emitIns_R_R_I(INS_slli_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, 3); + } +} + +//------------------------------------------------------------------------ +// genStackPointerConstantAdjustmentWithProbe: add a specified constant value to the stack pointer, +// and probe the stack as appropriate. Should only be called as a helper for +// genStackPointerConstantAdjustmentLoopWithProbe. +// +// Arguments: +// spDelta - the value to add to SP. Must be negative or zero. If zero, the probe happens, +// but the stack pointer doesn't move. +// regTmp - temporary register to use as target for probe load instruction +// +// Return Value: +// None. +// +void CodeGen::genStackPointerConstantAdjustmentWithProbe(ssize_t spDelta, regNumber regTmp) +{ + GetEmitter()->emitIns_R_R_I(INS_ld_w, EA_4BYTE, regTmp, REG_SP, 0); + genStackPointerConstantAdjustment(spDelta, regTmp); +} + +//------------------------------------------------------------------------ +// genStackPointerConstantAdjustmentLoopWithProbe: Add a specified constant value to the stack pointer, +// and probe the stack as appropriate. Generates one probe per page, up to the total amount required. +// This will generate a sequence of probes in-line. +// +// Arguments: +// spDelta - the value to add to SP. Must be negative. +// regTmp - temporary register to use as target for probe load instruction +// +// Return Value: +// Offset in bytes from SP to last probed address. +// +target_ssize_t CodeGen::genStackPointerConstantAdjustmentLoopWithProbe(ssize_t spDelta, regNumber regTmp) +{ + assert(spDelta < 0); + + const target_size_t pageSize = compiler->eeGetPageSize(); + + ssize_t spRemainingDelta = spDelta; + do + { + ssize_t spOneDelta = -(ssize_t)min((target_size_t)-spRemainingDelta, pageSize); + genStackPointerConstantAdjustmentWithProbe(spOneDelta, regTmp); + spRemainingDelta -= spOneDelta; + } while (spRemainingDelta < 0); + + // What offset from the final SP was the last probe? This depends on the fact that + // genStackPointerConstantAdjustmentWithProbe() probes first, then does "SUB SP". + target_size_t lastTouchDelta = (target_size_t)(-spDelta) % pageSize; + if ((lastTouchDelta == 0) || (lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES > pageSize)) + { + // We haven't probed almost a complete page. If lastTouchDelta==0, then spDelta was an exact + // multiple of pageSize, which means we last probed exactly one page back. Otherwise, we probed + // the page, but very far from the end. If the next action on the stack might subtract from SP + // first, before touching the current SP, then we do one more probe at the very bottom. This can + // happen on x86, for example, when we copy an argument to the stack using a "SUB ESP; REP MOV" + // strategy. + + GetEmitter()->emitIns_R_R_I(INS_ld_w, EA_4BYTE, regTmp, REG_SP, 0); + lastTouchDelta = 0; + } + + return lastTouchDelta; +} + +//------------------------------------------------------------------------ +// genCodeForTreeNode Generate code for a single node in the tree. +// +// Preconditions: +// All operands have been evaluated. +// +void CodeGen::genCodeForTreeNode(GenTree* treeNode) +{ + regNumber targetReg = treeNode->GetRegNum(); + var_types targetType = treeNode->TypeGet(); + emitter* emit = GetEmitter(); + +#ifdef DEBUG + // Validate that all the operands for the current node are consumed in order. + // This is important because LSRA ensures that any necessary copies will be + // handled correctly. + lastConsumedNode = nullptr; + if (compiler->verbose) + { + unsigned seqNum = treeNode->gtSeqNum; // Useful for setting a conditional break in Visual Studio + compiler->gtDispLIRNode(treeNode, "Generating: "); + } +#endif // DEBUG + + // Is this a node whose value is already in a register? LSRA denotes this by + // setting the GTF_REUSE_REG_VAL flag. + if (treeNode->IsReuseRegVal()) + { + // For now, this is only used for constant nodes. + assert((treeNode->OperGet() == GT_CNS_INT) || (treeNode->OperGet() == GT_CNS_DBL)); + JITDUMP(" TreeNode is marked ReuseReg\n"); + return; + } + + // contained nodes are part of their parents for codegen purposes + // ex : immediates, most LEAs + if (treeNode->isContained()) + { + return; + } + + switch (treeNode->gtOper) + { + case GT_START_NONGC: + GetEmitter()->emitDisableGC(); + break; + + case GT_START_PREEMPTGC: + // Kill callee saves GC registers, and create a label + // so that information gets propagated to the emitter. + gcInfo.gcMarkRegSetNpt(RBM_INT_CALLEE_SAVED); + genDefineTempLabel(genCreateTempLabel()); + break; + + case GT_PROF_HOOK: + // We should be seeing this only if profiler hook is needed + noway_assert(compiler->compIsProfilerHookNeeded()); + +#ifdef PROFILING_SUPPORTED + // Right now this node is used only for tail calls. In future if + // we intend to use it for Enter or Leave hooks, add a data member + // to this node indicating the kind of profiler hook. For example, + // helper number can be used. + genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL); +#endif // PROFILING_SUPPORTED + break; + + case GT_LCLHEAP: + genLclHeap(treeNode); + break; + + case GT_CNS_INT: + if ((targetType == TYP_DOUBLE) || (targetType == TYP_FLOAT)) + treeNode->gtOper = GT_CNS_DBL; + FALLTHROUGH; + case GT_CNS_DBL: + genSetRegToConst(targetReg, targetType, treeNode); + genProduceReg(treeNode); + break; + + case GT_NOT: + case GT_NEG: + genCodeForNegNot(treeNode); + break; + + case GT_BSWAP: + case GT_BSWAP16: + genCodeForBswap(treeNode); + break; + + case GT_MOD: + case GT_UMOD: + case GT_DIV: + case GT_UDIV: + genCodeForDivMod(treeNode->AsOp()); + break; + + case GT_OR: + case GT_XOR: + case GT_AND: + assert(varTypeIsIntegralOrI(treeNode)); + + FALLTHROUGH; + + case GT_ADD: + case GT_SUB: + case GT_MUL: + genConsumeOperands(treeNode->AsOp()); + genCodeForBinary(treeNode->AsOp()); + break; + + case GT_LSH: + case GT_RSH: + case GT_RSZ: + case GT_ROR: + genCodeForShift(treeNode); + break; + + case GT_CAST: + genCodeForCast(treeNode->AsOp()); + break; + + case GT_BITCAST: + genCodeForBitCast(treeNode->AsOp()); + break; + + case GT_LCL_FLD_ADDR: + case GT_LCL_VAR_ADDR: + genCodeForLclAddr(treeNode); + break; + + case GT_LCL_FLD: + genCodeForLclFld(treeNode->AsLclFld()); + break; + + case GT_LCL_VAR: + genCodeForLclVar(treeNode->AsLclVar()); + break; + + case GT_STORE_LCL_FLD: + genCodeForStoreLclFld(treeNode->AsLclFld()); + break; + + case GT_STORE_LCL_VAR: + genCodeForStoreLclVar(treeNode->AsLclVar()); + break; + + case GT_RETFILT: + case GT_RETURN: + genReturn(treeNode); + break; + + case GT_LEA: + // If we are here, it is the case where there is an LEA that cannot be folded into a parent instruction. + genLeaInstruction(treeNode->AsAddrMode()); + break; + + case GT_INDEX_ADDR: + genCodeForIndexAddr(treeNode->AsIndexAddr()); + break; + + case GT_IND: + genCodeForIndir(treeNode->AsIndir()); + break; + + case GT_INC_SATURATE: + genCodeForIncSaturate(treeNode); + break; + + case GT_MULHI: + genCodeForMulHi(treeNode->AsOp()); + break; + + case GT_SWAP: + genCodeForSwap(treeNode->AsOp()); + break; + + case GT_JMP: + genJmpMethod(treeNode); + break; + + case GT_CKFINITE: + genCkfinite(treeNode); + break; + + case GT_INTRINSIC: + genIntrinsic(treeNode); + break; + +#ifdef FEATURE_SIMD + case GT_SIMD: + genSIMDIntrinsic(treeNode->AsSIMD()); + break; +#endif // FEATURE_SIMD + +#ifdef FEATURE_HW_INTRINSICS + case GT_HWINTRINSIC: + genHWIntrinsic(treeNode->AsHWIntrinsic()); + break; +#endif // FEATURE_HW_INTRINSICS + + case GT_EQ: + case GT_NE: + case GT_LT: + case GT_LE: + case GT_GE: + case GT_GT: + case GT_CMP: + if (treeNode->GetRegNum() != REG_NA) + { + genCodeForCompare(treeNode->AsOp()); + } + else if (!treeNode->gtNext) + genCodeForJumpTrue(treeNode->AsOp()); + else if (!treeNode->gtNext->OperIs(GT_JTRUE)) + { + GenTree* treeNode_next = treeNode->gtNext; + while (treeNode_next) + { + if (treeNode_next->OperIs(GT_JTRUE)) + break; + treeNode_next = treeNode_next->gtNext; + }; + assert(treeNode_next->OperIs(GT_JTRUE)); + //genCodeForJumpTrue(treeNode_next->AsOp()); + genCodeForCompare(treeNode_next->AsOp()); + } + break; + + case GT_JTRUE: + genCodeForJumpTrue(treeNode->AsOp()); + break; + + case GT_JCMP: + genCodeForJumpCompare(treeNode->AsOp()); + break; + + case GT_JCC: + genCodeForJcc(treeNode->AsCC()); + break; + + case GT_SETCC: + genCodeForSetcc(treeNode->AsCC()); + break; + + case GT_RETURNTRAP: + genCodeForReturnTrap(treeNode->AsOp()); + break; + + case GT_STOREIND: + genCodeForStoreInd(treeNode->AsStoreInd()); + break; + + case GT_COPY: + // This is handled at the time we call genConsumeReg() on the GT_COPY + break; + + case GT_FIELD_LIST: + // Should always be marked contained. + assert(!"LIST, FIELD_LIST nodes should always be marked contained."); + break; + + case GT_PUTARG_STK: + genPutArgStk(treeNode->AsPutArgStk()); + break; + + case GT_PUTARG_REG: + genPutArgReg(treeNode->AsOp()); + break; + +#if FEATURE_ARG_SPLIT + case GT_PUTARG_SPLIT: + genPutArgSplit(treeNode->AsPutArgSplit()); + break; +#endif // FEATURE_ARG_SPLIT + + case GT_CALL: + genCallInstruction(treeNode->AsCall()); + break; + + case GT_MEMORYBARRIER: + { + CodeGen::BarrierKind barrierKind = + treeNode->gtFlags & GTF_MEMORYBARRIER_LOAD ? BARRIER_LOAD_ONLY : BARRIER_FULL; + + instGen_MemoryBarrier(barrierKind); + break; + } + + case GT_XCHG: + case GT_XADD: + genLockedInstructions(treeNode->AsOp()); + break; + + case GT_CMPXCHG: + genCodeForCmpXchg(treeNode->AsCmpXchg()); + break; + + case GT_RELOAD: + // do nothing - reload is just a marker. + // The parent node will call genConsumeReg on this which will trigger the unspill of this node's child + // into the register specified in this node. + break; + + case GT_NOP: + break; + + case GT_KEEPALIVE: + if (treeNode->AsOp()->gtOp1->isContained()) + { + // For this case we simply need to update the lifetime of the local. + genUpdateLife(treeNode->AsOp()->gtOp1); + } + else + { + genConsumeReg(treeNode->AsOp()->gtOp1); + } + break; + + case GT_NO_OP: + instGen(INS_nop); + break; + + case GT_ARR_BOUNDS_CHECK: +#ifdef FEATURE_SIMD + case GT_SIMD_CHK: +#endif // FEATURE_SIMD +#ifdef FEATURE_HW_INTRINSICS + case GT_HW_INTRINSIC_CHK: +#endif // FEATURE_HW_INTRINSICS + genRangeCheck(treeNode); + break; + + case GT_PHYSREG: + genCodeForPhysReg(treeNode->AsPhysReg()); + break; + + case GT_NULLCHECK: + genCodeForNullCheck(treeNode->AsIndir()); + break; + + case GT_CATCH_ARG: + + noway_assert(handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp)); + + /* Catch arguments get passed in a register. genCodeForBBlist() + would have marked it as holding a GC object, but not used. */ + + noway_assert(gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT); + genConsumeReg(treeNode); + break; + + case GT_PINVOKE_PROLOG: + noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~fullIntArgRegMask()) == 0); + + // the runtime side requires the codegen here to be consistent +#ifdef PSEUDORANDOM_NOP_INSERTION + emit->emitDisableRandomNops(); +#endif // PSEUDORANDOM_NOP_INSERTION + break; + + case GT_LABEL: + genPendingCallLabel = genCreateTempLabel(); + emit->emitIns_R_L(INS_ld_d, EA_PTRSIZE, genPendingCallLabel, targetReg); + break; + + case GT_STORE_OBJ: + case GT_STORE_DYN_BLK: + case GT_STORE_BLK: + genCodeForStoreBlk(treeNode->AsBlk()); + break; + + case GT_JMPTABLE: + genJumpTable(treeNode); + break; + + case GT_SWITCH_TABLE: + genTableBasedSwitch(treeNode); + break; + + case GT_ARR_INDEX: + genCodeForArrIndex(treeNode->AsArrIndex()); + break; + + case GT_ARR_OFFSET: + genCodeForArrOffset(treeNode->AsArrOffs()); + break; + + case GT_IL_OFFSET: + // Do nothing; these nodes are simply markers for debug info. + break; + + default: + { +#ifdef DEBUG + char message[256]; + _snprintf_s(message, ArrLen(message), _TRUNCATE, "NYI: Unimplemented node type %s", + GenTree::OpName(treeNode->OperGet())); + NYIRAW(message); +#else + NYI("unimplemented node"); +#endif + } + break; + } +} + +//------------------------------------------------------------------------ +// genSetRegToIcon: Generate code that will set the given register to the integer constant. +// +void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type) +{ + // Reg cannot be a FP reg + assert(!genIsValidFloatReg(reg)); + + // The only TYP_REF constant that can come this path is a managed 'null' since it is not + // relocatable. Other ref type constants (e.g. string objects) go through a different + // code path. + noway_assert(type != TYP_REF || val == 0); + + GetEmitter()->emitIns_I_la(emitActualTypeSize(type), reg, val); + regSet.verifyRegUsed(reg); +} + +//--------------------------------------------------------------------- +// genSetGSSecurityCookie: Set the "GS" security cookie in the prolog. +// +// Arguments: +// initReg - register to use as a scratch register +// pInitRegZeroed - OUT parameter. *pInitRegZeroed is set to 'false' if and only if +// this call sets 'initReg' to a non-zero value. +// +// Return Value: +// None +// +void CodeGen::genSetGSSecurityCookie(regNumber initReg, bool* pInitRegZeroed) +{ + assert(compiler->compGeneratingProlog); + + if (!compiler->getNeedsGSSecurityCookie()) + { + return; + } + + if (compiler->gsGlobalSecurityCookieAddr == nullptr) + { + noway_assert(compiler->gsGlobalSecurityCookieVal != 0); + // initReg = #GlobalSecurityCookieVal; [frame.GSSecurityCookie] = initReg + genSetRegToIcon(initReg, compiler->gsGlobalSecurityCookieVal, TYP_I_IMPL); + GetEmitter()->emitIns_S_R(INS_st_d, EA_PTRSIZE, initReg, compiler->lvaGSSecurityCookie, 0); + } + else + { + //instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, initReg, (ssize_t)compiler->gsGlobalSecurityCookieAddr); + //GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, initReg, initReg, 0); + if (compiler->opts.compReloc) + { + GetEmitter()->emitIns_R_AI(INS_bl, EA_PTR_DSP_RELOC, initReg, (ssize_t)compiler->gsGlobalSecurityCookieAddr); + } + else + { + //GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, initReg, (ssize_t)compiler->gsGlobalSecurityCookieAddr); + //GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, initReg, initReg, ); + GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, initReg, ((ssize_t)compiler->gsGlobalSecurityCookieAddr & 0xfffff000)>>12); + GetEmitter()->emitIns_R_I(INS_lu32i_d, EA_PTRSIZE, initReg, (ssize_t)compiler->gsGlobalSecurityCookieAddr >> 32); + GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, initReg, initReg, ((ssize_t)compiler->gsGlobalSecurityCookieAddr & 0xfff)>>2); + } + regSet.verifyRegUsed(initReg); + GetEmitter()->emitIns_S_R(INS_st_d, EA_PTRSIZE, initReg, compiler->lvaGSSecurityCookie, 0); + } + + *pInitRegZeroed = false; +} + +//--------------------------------------------------------------------- +// genIntrinsic - generate code for a given intrinsic +// +// Arguments +// treeNode - the GT_INTRINSIC node +// +// Return value: +// None +// +void CodeGen::genIntrinsic(GenTree* treeNode) +{ + assert(!"unimplemented on LOONGARCH yet"); +} + +//--------------------------------------------------------------------- +// genPutArgStk - generate code for a GT_PUTARG_STK node +// +// Arguments +// treeNode - the GT_PUTARG_STK node +// +// Return value: +// None +// +void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) +{ + assert(treeNode->OperIs(GT_PUTARG_STK)); + GenTree* source = treeNode->gtOp1; + var_types targetType = genActualType(source->TypeGet()); + emitter* emit = GetEmitter(); + + // This is the varNum for our store operations, + // typically this is the varNum for the Outgoing arg space + // When we are generating a tail call it will be the varNum for arg0 + unsigned varNumOut = (unsigned)-1; + unsigned argOffsetMax = (unsigned)-1; // Records the maximum size of this area for assert checks + + // Get argument offset to use with 'varNumOut' + // Here we cross check that argument offset hasn't changed from lowering to codegen since + // we are storing arg slot number in GT_PUTARG_STK node in lowering phase. + unsigned argOffsetOut = treeNode->getArgOffset(); + +#ifdef DEBUG + fgArgTabEntry* curArgTabEntry = compiler->gtArgEntryByNode(treeNode->gtCall, treeNode); + assert(curArgTabEntry != nullptr); + DEBUG_ARG_SLOTS_ASSERT(argOffsetOut == (curArgTabEntry->slotNum * TARGET_POINTER_SIZE)); +#endif // DEBUG + + // Whether to setup stk arg in incoming or out-going arg area? + // Fast tail calls implemented as epilog+jmp = stk arg is setup in incoming arg area. + // All other calls - stk arg is setup in out-going arg area. + if (treeNode->putInIncomingArgArea()) + { + varNumOut = getFirstArgWithStackSlot(); + argOffsetMax = compiler->compArgSize; +#if FEATURE_FASTTAILCALL + // This must be a fast tail call. + assert(treeNode->gtCall->IsFastTailCall()); + + // Since it is a fast tail call, the existence of first incoming arg is guaranteed + // because fast tail call requires that in-coming arg area of caller is >= out-going + // arg area required for tail call. + LclVarDsc* varDsc = &(compiler->lvaTable[varNumOut]); + assert(varDsc != nullptr); +#endif // FEATURE_FASTTAILCALL + } + else + { + varNumOut = compiler->lvaOutgoingArgSpaceVar; + argOffsetMax = compiler->lvaOutgoingArgSpaceSize; + } + + bool isStruct = (targetType == TYP_STRUCT) || (source->OperGet() == GT_FIELD_LIST); + + if (!isStruct) // a normal non-Struct argument + { + if (varTypeIsSIMD(targetType)) + { + assert(!"unimplemented on LOONGARCH yet"); + } + + instruction storeIns = ins_Store(targetType); + emitAttr storeAttr = emitTypeSize(targetType); + + // If it is contained then source must be the integer constant zero + if (source->isContained()) + { + assert(source->OperGet() == GT_CNS_INT); + assert(source->AsIntConCommon()->IconValue() == 0); + + emit->emitIns_S_R(storeIns, storeAttr, REG_R0, varNumOut, argOffsetOut); + } + else + { + genConsumeReg(source); + if (storeIns == INS_st_w) + { + emit->emitIns_R_R_R(INS_add_w, EA_4BYTE, source->GetRegNum(), source->GetRegNum(), REG_R0); + storeIns = INS_st_d; + storeAttr = EA_8BYTE; + } + emit->emitIns_S_R(storeIns, storeAttr, source->GetRegNum(), varNumOut, argOffsetOut); + } + argOffsetOut += EA_SIZE_IN_BYTES(storeAttr); + assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area + } + else // We have some kind of a struct argument + { + assert(source->isContained()); // We expect that this node was marked as contained in Lower + + if (source->OperGet() == GT_FIELD_LIST) + { + genPutArgStkFieldList(treeNode, varNumOut); + } + else // We must have a GT_OBJ or a GT_LCL_VAR + { + noway_assert((source->OperGet() == GT_LCL_VAR) || (source->OperGet() == GT_OBJ)); + + var_types targetType = source->TypeGet(); + noway_assert(varTypeIsStruct(targetType)); + + // Setup loReg from the internal registers that we reserved in lower. + // + regNumber loReg = treeNode->ExtractTempReg(); + regNumber addrReg = REG_NA; + + GenTreeLclVarCommon* varNode = nullptr; + GenTree* addrNode = nullptr; + + if (source->OperGet() == GT_LCL_VAR) + { + varNode = source->AsLclVarCommon(); + } + else // we must have a GT_OBJ + { + assert(source->OperGet() == GT_OBJ); + + addrNode = source->AsOp()->gtOp1; + + // addrNode can either be a GT_LCL_VAR_ADDR or an address expression + // + if (addrNode->OperGet() == GT_LCL_VAR_ADDR) + { + // We have a GT_OBJ(GT_LCL_VAR_ADDR) + // + // We will treat this case the same as above + // (i.e if we just had this GT_LCL_VAR directly as the source) + // so update 'source' to point this GT_LCL_VAR_ADDR node + // and continue to the codegen for the LCL_VAR node below + // + varNode = addrNode->AsLclVarCommon(); + addrNode = nullptr; + } + else // addrNode is used + { + // Generate code to load the address that we need into a register + genConsumeAddress(addrNode); + addrReg = addrNode->GetRegNum(); + } + } + + // Either varNode or addrNOde must have been setup above, + // the xor ensures that only one of the two is setup, not both + assert((varNode != nullptr) ^ (addrNode != nullptr)); + + ClassLayout* layout; + + //unsigned gcPtrCount; // The count of GC pointers in the struct + unsigned srcSize; + bool isHfa; + + //gcPtrCount = treeNode->gtNumSlots; + // Setup the srcSize and layout + if (source->OperGet() == GT_LCL_VAR) + { + assert(varNode != nullptr); + LclVarDsc* varDsc = compiler->lvaGetDesc(varNode); + + // This struct also must live in the stack frame + // And it can't live in a register (SIMD) + assert(varDsc->lvType == TYP_STRUCT); + assert(varDsc->lvOnFrame && !varDsc->lvRegister); + + srcSize = varDsc->lvSize(); // This yields the roundUp size, but that is fine + // as that is how much stack is allocated for this LclVar + layout = varDsc->GetLayout(); + } + else // we must have a GT_OBJ + { + assert(source->OperGet() == GT_OBJ); + + // If the source is an OBJ node then we need to use the type information + // it provides (size and GC layout) even if the node wraps a lclvar. Due + // to struct reinterpretation (e.g. Unsafe.As) it is possible that + // the OBJ node has a different type than the lclvar. + CORINFO_CLASS_HANDLE objClass = source->AsObj()->GetLayout()->GetClassHandle(); + + srcSize = compiler->info.compCompHnd->getClassSize(objClass); + layout = source->AsObj()->GetLayout(); + } + + unsigned structSize; + + unsigned dstSize = treeNode->GetStackByteSize(); + if (dstSize != srcSize) + { + // We can generate a smaller code if store size is a multiple of TARGET_POINTER_SIZE. + // The dst size can be rounded up to PUTARG_STK size. + // The src size can be rounded up if it reads a local variable slot because the local + // variable stack allocation size is rounded up to be a multiple of the TARGET_POINTER_SIZE. + // The exception is arm64 apple arguments because they can be passed without padding. + if (varNode != nullptr) + { + // If we have a varNode, even if it was casted using `OBJ`, we can read its original memory size. + const LclVarDsc* varDsc = compiler->lvaGetDesc(varNode); + const unsigned varStackSize = varDsc->lvSize(); + if (varStackSize >= srcSize) + { + srcSize = varStackSize; + } + } + } + if (dstSize == srcSize) + { + structSize = dstSize; + } + else + { + // With Unsafe object wwe can have different strange combinations: + // PutArgStk<8>(Obj<16>(LclVar<8>)) -> copy 8 bytes; + // PutArgStk<16>(Obj<16>(LclVar<8>)) -> copy 16 bytes, reading undefined memory after the local. + structSize = min(dstSize, srcSize); + } + + int remainingSize = structSize; + unsigned structOffset = 0; + unsigned nextIndex = 0; + + while (remainingSize > 0) + { + var_types type; + + if (remainingSize >= TARGET_POINTER_SIZE) + { + type = layout->GetGCPtrType(nextIndex); + } + else // (remainingSize < TARGET_POINTER_SIZE) + { + // the left over size is smaller than a pointer and thus can never be a GC type + assert(!layout->IsGCPtr(nextIndex)); + + if (remainingSize == 1) + { + type = TYP_UBYTE; + } + else if (remainingSize == 2) + { + type = TYP_USHORT; + } + else + { + assert(remainingSize == 4); + type = TYP_UINT; + } + } + const emitAttr attr = emitTypeSize(type); + const unsigned moveSize = genTypeSize(type); + assert(EA_SIZE_IN_BYTES(attr) == moveSize); + + remainingSize -= moveSize; + + instruction loadIns = ins_Load(type); + if (varNode != nullptr) + { + // Load from our varNumImp source + emit->emitIns_R_S(loadIns, attr, loReg, varNode->GetLclNum(), structOffset); + } + else + { + assert(loReg != addrReg); + // Load from our address expression source + emit->emitIns_R_R_I(loadIns, attr, loReg, addrReg, structOffset); + } + + // Emit a store instruction to store the register into the outgoing argument area + instruction storeIns = ins_Store(type); + emit->emitIns_S_R(storeIns, attr, loReg, varNumOut, argOffsetOut); + argOffsetOut += moveSize; + assert(argOffsetOut <= argOffsetMax); // We can't write beyond the outgoing arg area + + structOffset += moveSize; + nextIndex++; + } + } + } +} + +//--------------------------------------------------------------------- +// genPutArgReg - generate code for a GT_PUTARG_REG node +// +// Arguments +// tree - the GT_PUTARG_REG node +// +// Return value: +// None +// +void CodeGen::genPutArgReg(GenTreeOp* tree) +{ + assert(tree->OperIs(GT_PUTARG_REG)); + + var_types targetType = tree->TypeGet(); + regNumber targetReg = tree->GetRegNum(); + + assert(targetType != TYP_STRUCT); + + GenTree* op1 = tree->gtOp1; + genConsumeReg(op1); + + // If child node is not already in the register we need, move it + if (targetReg != op1->GetRegNum()) + { + if (emitter::isFloatReg(targetReg) == emitter::isFloatReg(op1->GetRegNum())) + inst_RV_RV(ins_Copy(targetType), targetReg, op1->GetRegNum(), targetType); +#if 1 + else if (emitter::isFloatReg(targetReg)) + GetEmitter()->emitIns_R_R(INS_movgr2fr_d, EA_8BYTE, targetReg, op1->GetRegNum()); + else //if (!emitter::isFloatReg(targetReg)) + { + assert(!emitter::isFloatReg(targetReg)); + GetEmitter()->emitIns_R_R(INS_movfr2gr_d, EA_8BYTE, targetReg, op1->GetRegNum()); + } +#endif + } + genProduceReg(tree); +} + +#if FEATURE_ARG_SPLIT +//--------------------------------------------------------------------- +// genPutArgSplit - generate code for a GT_PUTARG_SPLIT node +// +// Arguments +// tree - the GT_PUTARG_SPLIT node +// +// Return value: +// None +// +void CodeGen::genPutArgSplit(GenTreePutArgSplit* treeNode) +{ + assert(treeNode->OperIs(GT_PUTARG_SPLIT)); + + GenTree* source = treeNode->gtOp1; + emitter* emit = GetEmitter(); + unsigned varNumOut = compiler->lvaOutgoingArgSpaceVar; + unsigned argOffsetMax = compiler->lvaOutgoingArgSpaceSize; + + if (source->OperGet() == GT_FIELD_LIST) + { + // Evaluate each of the GT_FIELD_LIST items into their register + // and store their register into the outgoing argument area + unsigned regIndex = 0; + unsigned firstOnStackOffs = UINT_MAX; + + for (GenTreeFieldList::Use& use : source->AsFieldList()->Uses()) + { + GenTree* nextArgNode = use.GetNode(); + regNumber fieldReg = nextArgNode->GetRegNum(); + genConsumeReg(nextArgNode); + + if (regIndex >= treeNode->gtNumRegs) + { + if (firstOnStackOffs == UINT_MAX) + { + firstOnStackOffs = use.GetOffset(); + } + var_types type = nextArgNode->TypeGet(); + emitAttr attr = emitTypeSize(type); + + unsigned offset = treeNode->getArgOffset() + use.GetOffset() - firstOnStackOffs; + // We can't write beyond the outgoing arg area + assert(offset + EA_SIZE_IN_BYTES(attr) <= argOffsetMax); + + // Emit store instructions to store the registers produced by the GT_FIELD_LIST into the outgoing + // argument area + emit->emitIns_S_R(ins_Store(type), attr, fieldReg, varNumOut, offset); + } + else + { + var_types type = treeNode->GetRegType(regIndex); + regNumber argReg = treeNode->GetRegNumByIdx(regIndex); + + // If child node is not already in the register we need, move it + if (argReg != fieldReg) + { + inst_RV_RV(ins_Copy(type), argReg, fieldReg, type); + } + regIndex++; + } + } + } + else + { + var_types targetType = source->TypeGet(); + assert(source->OperGet() == GT_OBJ); + assert(varTypeIsStruct(targetType)); + + regNumber baseReg = treeNode->ExtractTempReg(); + regNumber addrReg = REG_NA; + + GenTreeLclVarCommon* varNode = nullptr; + GenTree* addrNode = nullptr; + + addrNode = source->AsOp()->gtOp1; + + // addrNode can either be a GT_LCL_VAR_ADDR or an address expression + // + if (addrNode->OperGet() == GT_LCL_VAR_ADDR) + { + // We have a GT_OBJ(GT_LCL_VAR_ADDR) + // + // We will treat this case the same as above + // (i.e if we just had this GT_LCL_VAR directly as the source) + // so update 'source' to point this GT_LCL_VAR_ADDR node + // and continue to the codegen for the LCL_VAR node below + // + varNode = addrNode->AsLclVarCommon(); + addrNode = nullptr; + } + + // Either varNode or addrNOde must have been setup above, + // the xor ensures that only one of the two is setup, not both + assert((varNode != nullptr) ^ (addrNode != nullptr)); + + // This is the varNum for our load operations, + // only used when we have a struct with a LclVar source + unsigned srcVarNum = BAD_VAR_NUM; + + if (varNode != nullptr) + { + assert(varNode->isContained()); + srcVarNum = varNode->GetLclNum(); + assert(srcVarNum < compiler->lvaCount); + + // handle promote situation + LclVarDsc* varDsc = compiler->lvaTable + srcVarNum; + + // This struct also must live in the stack frame + // And it can't live in a register (SIMD) + assert(varDsc->lvType == TYP_STRUCT); + assert(varDsc->lvOnFrame && !varDsc->lvRegister); + + // We don't split HFA struct + assert(!varDsc->lvIsHfa()); + } + else // addrNode is used + { + assert(addrNode != nullptr); + // TODO-Cleanup: `Lowering::NewPutArg` marks only `LCL_VAR_ADDR` as contained nowadays, + // Generate code to load the address that we need into a register + genConsumeAddress(addrNode); + addrReg = addrNode->GetRegNum(); + + // If addrReg equal to baseReg, we use the last target register as alternative baseReg. + // Because the candidate mask for the internal baseReg does not include any of the target register, + // we can ensure that baseReg, addrReg, and the last target register are not all same. + assert(baseReg != addrReg); + + // We don't split HFA struct + assert(!compiler->IsHfa(source->AsObj()->GetLayout()->GetClassHandle())); + } + + ClassLayout* layout = source->AsObj()->GetLayout(); + + // Put on stack first + unsigned nextIndex = treeNode->gtNumRegs; + unsigned structOffset = nextIndex * TARGET_POINTER_SIZE; + int remainingSize = treeNode->GetStackByteSize(); + unsigned argOffsetOut = treeNode->getArgOffset(); + + // remainingSize is always multiple of TARGET_POINTER_SIZE + assert(remainingSize % TARGET_POINTER_SIZE == 0); + while (remainingSize > 0) + { + var_types type = layout->GetGCPtrType(nextIndex); + + if (varNode != nullptr) + { + // Load from our varNumImp source + emit->emitIns_R_S(INS_ld_d, emitTypeSize(type), baseReg, srcVarNum, structOffset); + } + else + { + // check for case of destroying the addrRegister while we still need it + assert(baseReg != addrReg); + + // Load from our address expression source + emit->emitIns_R_R_I(INS_ld_d, emitTypeSize(type), baseReg, addrReg, structOffset); + } + + // Emit str instruction to store the register into the outgoing argument area + emit->emitIns_S_R(INS_st_d, emitTypeSize(type), baseReg, varNumOut, argOffsetOut); + + argOffsetOut += TARGET_POINTER_SIZE; // We stored 4-bytes of the struct + assert(argOffsetOut <= argOffsetMax); // We can't write beyond the outgoing arg area + remainingSize -= TARGET_POINTER_SIZE; // We loaded 4-bytes of the struct + structOffset += TARGET_POINTER_SIZE; + nextIndex += 1; + } + + // We set up the registers in order, so that we assign the last target register `baseReg` is no longer in use, + // in case we had to reuse the last target register for it. + structOffset = 0; + for (unsigned idx = 0; idx < treeNode->gtNumRegs; idx++) + { + regNumber targetReg = treeNode->GetRegNumByIdx(idx); + var_types type = treeNode->GetRegType(idx); + + if (varNode != nullptr) + { + // Load from our varNumImp source + emit->emitIns_R_S(ins_Load(type), emitTypeSize(type), targetReg, srcVarNum, structOffset); + } + else + { + // check for case of destroying the addrRegister while we still need it + if (targetReg == addrReg && idx != treeNode->gtNumRegs - 1) + { + assert(targetReg != baseReg); + emit->emitIns_R_R_I(INS_ori, emitActualTypeSize(type), baseReg, addrReg, 0); + addrReg = baseReg; + } + + // Load from our address expression source + emit->emitIns_R_R_I(ins_Load(type), emitTypeSize(type), targetReg, addrReg, structOffset); + } + structOffset += TARGET_POINTER_SIZE; + } + } + genProduceReg(treeNode); +} +#endif // FEATURE_ARG_SPLIT + +// genMultiRegCallStoreToLocal: store multi-reg return value of a call node to a local +// +// Arguments: +// treeNode - Gentree of GT_STORE_LCL_VAR +// +// Return Value: +// None +// +// Assumption: +// The child of store is a multi-reg call node. +// genProduceReg() on treeNode is made by caller of this routine. +// +void CodeGen::genMultiRegCallStoreToLocal(GenTree* treeNode) +{ + assert(treeNode->OperGet() == GT_STORE_LCL_VAR); + + // Structs of size >=9 and <=16 are returned in two return registers on LOONGARCH64 and HFAs. + assert(varTypeIsStruct(treeNode)); + + // Assumption: current implementation requires that a multi-reg + // var in 'var = call' is flagged as lvIsMultiRegRet to prevent it from + // being promoted. + unsigned lclNum = treeNode->AsLclVarCommon()->GetLclNum(); + LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]); + noway_assert(varDsc->lvIsMultiRegRet); + + GenTree* op1 = treeNode->gtGetOp1(); + GenTree* actualOp1 = op1->gtSkipReloadOrCopy(); + GenTreeCall* call = actualOp1->AsCall(); + assert(call->HasMultiRegRetVal()); + + genConsumeRegs(op1); + + const ReturnTypeDesc* pRetTypeDesc = call->GetReturnTypeDesc(); + unsigned regCount = pRetTypeDesc->GetReturnRegCount(); + + if (treeNode->GetRegNum() != REG_NA) + { + assert(!"unimplemented on LOONGARCH yet"); + // Right now the only enregistrable multi-reg return types supported are SIMD types. + assert(varTypeIsSIMD(treeNode)); + assert(regCount != 0); + + regNumber dst = treeNode->GetRegNum(); + + // Treat dst register as a homogenous vector with element size equal to the src size + // Insert pieces in reverse order + for (int i = regCount - 1; i >= 0; --i) + { + var_types type = pRetTypeDesc->GetReturnRegType(i); + regNumber reg = call->GetRegNumByIdx(i); + if (op1->IsCopyOrReload()) + { + // GT_COPY/GT_RELOAD will have valid reg for those positions + // that need to be copied or reloaded. + regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(i); + if (reloadReg != REG_NA) + { + reg = reloadReg; + } + } + + assert(reg != REG_NA); + if (varTypeIsFloating(type)) + { + // If the register piece was passed in a floating point register + // Use a vector mov element instruction + // src is not a vector, so it is in the first element reg[0] + // mov dst[i], reg[0] + // This effectively moves from `reg[0]` to `dst[i]`, leaving other dst bits unchanged till further + // iterations + // For the case where reg == dst, if we iterate so that we write dst[0] last, we eliminate the need for + // a temporary + GetEmitter()->emitIns_R_R_I_I(INS_mov, emitTypeSize(type), dst, reg, i, 0); + } + else + { + // If the register piece was passed in an integer register + // Use a vector mov from general purpose register instruction + // mov dst[i], reg + // This effectively moves from `reg` to `dst[i]` + GetEmitter()->emitIns_R_R_I(INS_mov, emitTypeSize(type), dst, reg, i); + } + } + + genProduceReg(treeNode); + } + else + { + // Stack store + int offset = 0; + var_types type = pRetTypeDesc->GetReturnRegType(0); + regNumber reg = call->GetRegNumByIdx(0); + if (op1->IsCopyOrReload()) + { + // GT_COPY/GT_RELOAD will have valid reg for those positions + // that need to be copied or reloaded. + regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(0); + if (reloadReg != REG_NA) + { + reg = reloadReg; + } + } + + assert(reg != REG_NA); + GetEmitter()->emitIns_S_R(ins_Store(type), emitTypeSize(type), reg, lclNum, offset); + + if (1 < regCount) + { + offset = genTypeSize(type); + type = pRetTypeDesc->GetReturnRegType(1); + reg = call->GetRegNumByIdx(1); + offset = offset < genTypeSize(type) ? genTypeSize(type) : offset; + GetEmitter()->emitIns_S_R(ins_Store(type), emitTypeSize(type), reg, lclNum, offset); + } + + genUpdateLife(treeNode); + varDsc->SetRegNum(REG_STK); + } +} + +//------------------------------------------------------------------------ +// genRangeCheck: generate code for GT_ARR_BOUNDS_CHECK node. +// +void CodeGen::genRangeCheck(GenTree* oper) +{ + noway_assert(oper->OperIsBoundsCheck()); + GenTreeBoundsChk* bndsChk = oper->AsBoundsChk(); + + GenTree* arrLen = bndsChk->GetArrayLength(); + GenTree* arrIndex = bndsChk->GetIndex(); + GenTree* arrRef = NULL; + int lenOffset = 0; + + GenTree* src1; + GenTree* src2; + regNumber reg1; + regNumber reg2; + emitJumpKind jmpKind = EJ_jmp; + + genConsumeRegs(arrIndex); + genConsumeRegs(arrLen); + + emitter* emit = GetEmitter(); + GenTreeIntConCommon* intConst = nullptr; + if (arrIndex->isContainedIntOrIImmed()) + { + src1 = arrLen; + src2 = arrIndex; + reg1 = REG_R21; + reg2 = src1->GetRegNum(); + + intConst = src2->AsIntConCommon(); + ssize_t imm = intConst->IconValue(); + if (imm == INT64_MAX) + { + emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_R21, REG_R0, -1); + emit->emitIns_R_R_I(INS_srli_d, EA_PTRSIZE, REG_R21, REG_R21, 1); + } + else + { + emit->emitIns_I_la(EA_PTRSIZE, REG_R21, imm); + } + } + else + { + src1 = arrIndex; + src2 = arrLen; + reg1 = src1->GetRegNum(); + + if (src2->isContainedIntOrIImmed()) + { + reg2 = REG_R21; + ssize_t imm = src2->AsIntConCommon()->IconValue(); + emit->emitIns_I_la(EA_PTRSIZE, REG_R21, imm); + } + else + { + reg2 = src2->GetRegNum(); + } + } + +#ifdef DEBUG + var_types bndsChkType = genActualType(src2->TypeGet()); + var_types src1ChkType = genActualType(src1->TypeGet()); + // Bounds checks can only be 32 or 64 bit sized comparisons. + assert(bndsChkType == TYP_INT || bndsChkType == TYP_LONG); + assert(src1ChkType == TYP_INT || src1ChkType == TYP_LONG); +#endif // DEBUG + + genJumpToThrowHlpBlk_la(bndsChk->gtThrowKind, INS_bgeu, reg1, bndsChk->gtIndRngFailBB, reg2); +} + +//--------------------------------------------------------------------- +// genCodeForPhysReg - generate code for a GT_PHYSREG node +// +// Arguments +// tree - the GT_PHYSREG node +// +// Return value: +// None +// +void CodeGen::genCodeForPhysReg(GenTreePhysReg* tree) +{ + assert(tree->OperIs(GT_PHYSREG)); + + var_types targetType = tree->TypeGet(); + regNumber targetReg = tree->GetRegNum(); + + if (targetReg != tree->gtSrcReg) + { + inst_RV_RV(ins_Copy(targetType), targetReg, tree->gtSrcReg, targetType); + genTransferRegGCState(targetReg, tree->gtSrcReg); + } + + genProduceReg(tree); +} + +//--------------------------------------------------------------------- +// genCodeForNullCheck - generate code for a GT_NULLCHECK node +// +// Arguments +// tree - the GT_NULLCHECK node +// +// Return value: +// None +// +void CodeGen::genCodeForNullCheck(GenTreeIndir* tree) +{ + assert(tree->OperIs(GT_NULLCHECK)); + assert(!tree->gtOp1->isContained()); + regNumber addrReg = genConsumeReg(tree->gtOp1); + + regNumber targetReg = REG_R0; + + GetEmitter()->emitIns_R_R_I(INS_ld_w, EA_4BYTE, targetReg, addrReg, 0); +} + +//------------------------------------------------------------------------ +// genCodeForArrIndex: Generates code to bounds check the index for one dimension of an array reference, +// producing the effective index by subtracting the lower bound. +// +// Arguments: +// arrIndex - the node for which we're generating code +// +// Return Value: +// None. +// +void CodeGen::genCodeForArrIndex(GenTreeArrIndex* arrIndex) +{ + emitter* emit = GetEmitter(); + GenTree* arrObj = arrIndex->ArrObj(); + GenTree* indexNode = arrIndex->IndexExpr(); + regNumber arrReg = genConsumeReg(arrObj); + regNumber indexReg = genConsumeReg(indexNode); + regNumber tgtReg = arrIndex->GetRegNum(); + noway_assert(tgtReg != REG_NA); + + // We will use a temp register to load the lower bound and dimension size values. + + //regNumber tmpReg = arrIndex->GetSingleTempReg(); + assert(tgtReg != REG_R21); + + unsigned dim = arrIndex->gtCurrDim; + unsigned rank = arrIndex->gtArrRank; + unsigned offset; + + offset = compiler->eeGetMDArrayLowerBoundOffset(rank, dim); + emit->emitIns_R_R_I(INS_ld_w, EA_4BYTE, REG_R21, arrReg, offset); + emit->emitIns_R_R_R(INS_sub_w, EA_4BYTE, tgtReg, indexReg, REG_R21); + + offset = compiler->eeGetMDArrayLengthOffset(rank, dim); + emit->emitIns_R_R_I(INS_ld_w, EA_4BYTE, REG_R21, arrReg, offset); + genJumpToThrowHlpBlk_la(SCK_RNGCHK_FAIL, INS_bgeu, tgtReg, nullptr, REG_R21); + + genProduceReg(arrIndex); +} + +//------------------------------------------------------------------------ +// genCodeForArrOffset: Generates code to compute the flattened array offset for +// one dimension of an array reference: +// result = (prevDimOffset * dimSize) + effectiveIndex +// where dimSize is obtained from the arrObj operand +// +// Arguments: +// arrOffset - the node for which we're generating code +// +// Return Value: +// None. +// +// Notes: +// dimSize and effectiveIndex are always non-negative, the former by design, +// and the latter because it has been normalized to be zero-based. + +void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset) +{ + GenTree* offsetNode = arrOffset->gtOffset; + GenTree* indexNode = arrOffset->gtIndex; + regNumber tgtReg = arrOffset->GetRegNum(); + + noway_assert(tgtReg != REG_NA); + + if (!offsetNode->IsIntegralConst(0)) + { + emitter* emit = GetEmitter(); + regNumber offsetReg = genConsumeReg(offsetNode); + regNumber indexReg = genConsumeReg(indexNode); + regNumber arrReg = genConsumeReg(arrOffset->gtArrObj); + noway_assert(offsetReg != REG_NA); + noway_assert(indexReg != REG_NA); + noway_assert(arrReg != REG_NA); + + //regNumber tmpReg = arrOffset->GetSingleTempReg(); + + unsigned dim = arrOffset->gtCurrDim; + unsigned rank = arrOffset->gtArrRank; + unsigned offset = compiler->eeGetMDArrayLengthOffset(rank, dim); + + // Load tmpReg with the dimension size and evaluate + // tgtReg = offsetReg*tmpReg + indexReg. + emit->emitIns_R_R_I(INS_ld_w, EA_4BYTE, REG_R21, arrReg, offset); + emit->emitIns_R_R_R(INS_mul_d, EA_PTRSIZE, REG_R21, REG_R21, offsetReg); + emit->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, tgtReg, REG_R21, indexReg); + } + else + { + regNumber indexReg = genConsumeReg(indexNode); + if (indexReg != tgtReg) + { + GetEmitter()->emitIns_R_R_I(INS_ori, emitActualTypeSize(TYP_INT), tgtReg, indexReg, 0); + } + } + genProduceReg(arrOffset); +} + +//------------------------------------------------------------------------ +// genCodeForShift: Generates the code sequence for a GenTree node that +// represents a bit shift or rotate operation (<<, >>, >>>, rol, ror). +// +// Arguments: +// tree - the bit shift node (that specifies the type of bit shift to perform). +// +// Assumptions: +// a) All GenTrees are register allocated. +// +void CodeGen::genCodeForShift(GenTree* tree) +{ + //var_types targetType = tree->TypeGet(); + //genTreeOps oper = tree->OperGet(); + instruction ins = genGetInsForOper(tree); + emitAttr size = emitActualTypeSize(tree); + + assert(tree->GetRegNum() != REG_NA); + + genConsumeOperands(tree->AsOp()); + + GenTree* operand = tree->gtGetOp1(); + GenTree* shiftBy = tree->gtGetOp2(); + if (!shiftBy->IsCnsIntOrI()) + { + GetEmitter()->emitIns_R_R_R(ins, size, tree->GetRegNum(), operand->GetRegNum(), shiftBy->GetRegNum()); + } + else + { + unsigned shiftByImm = (unsigned)shiftBy->AsIntCon()->gtIconVal; + + //should check shiftByImm for loongarch32-ins. + unsigned immWidth = emitter::getBitWidth(size); // For LOONGARCH64, immWidth will be set to 32 or 64 + shiftByImm &= (immWidth - 1); + + if (ins == INS_slli_w && shiftByImm >= 32) + { + ins = INS_slli_d; + } + else if (ins == INS_slli_d && shiftByImm >= 32 && shiftByImm < 64) + { + ins = INS_slli_d; + } + else if (ins == INS_srai_d && shiftByImm >= 32 && shiftByImm < 64) + { + ins = INS_srai_d; + } + else if (ins == INS_srli_d && shiftByImm >= 32 && shiftByImm < 64) + { + ins = INS_srli_d; + } + else if (ins == INS_rotri_d && shiftByImm >= 32 && shiftByImm < 64) + { + ins = INS_rotri_d; + } + + GetEmitter()->emitIns_R_R_I(ins, size, tree->GetRegNum(), operand->GetRegNum(), shiftByImm); + } + + genProduceReg(tree); +} + +//------------------------------------------------------------------------ +// genCodeForLclAddr: Generates the code for GT_LCL_FLD_ADDR/GT_LCL_VAR_ADDR. +// +// Arguments: +// tree - the node. +// +void CodeGen::genCodeForLclAddr(GenTree* tree) +{ + assert(tree->OperIs(GT_LCL_FLD_ADDR, GT_LCL_VAR_ADDR)); + + var_types targetType = tree->TypeGet(); + regNumber targetReg = tree->GetRegNum(); + + // Address of a local var. + noway_assert((targetType == TYP_BYREF) || (targetType == TYP_I_IMPL)); + + emitAttr size = emitTypeSize(targetType); + + inst_RV_TT(INS_lea, targetReg, tree, 0, size); + genProduceReg(tree); +} + +//------------------------------------------------------------------------ +// genCodeForLclFld: Produce code for a GT_LCL_FLD node. +// +// Arguments: +// tree - the GT_LCL_FLD node +// +void CodeGen::genCodeForLclFld(GenTreeLclFld* tree) +{ + assert(tree->OperIs(GT_LCL_FLD)); + + var_types targetType = tree->TypeGet(); + regNumber targetReg = tree->GetRegNum(); + emitter* emit = GetEmitter(); + + NYI_IF(targetType == TYP_STRUCT, "GT_LCL_FLD: struct load local field not supported"); + assert(targetReg != REG_NA); + + emitAttr size = emitTypeSize(targetType); + unsigned offs = tree->GetLclOffs(); + unsigned varNum = tree->GetLclNum(); + assert(varNum < compiler->lvaCount); + + emit->emitIns_R_S(ins_Load(targetType), size, targetReg, varNum, offs); + + genProduceReg(tree); +} + +//------------------------------------------------------------------------ +// genCodeForIndexAddr: Produce code for a GT_INDEX_ADDR node. +// +// Arguments: +// tree - the GT_INDEX_ADDR node +// +void CodeGen::genCodeForIndexAddr(GenTreeIndexAddr* node) +{ + GenTree* const base = node->Arr(); + GenTree* const index = node->Index(); + + genConsumeReg(base); + genConsumeReg(index); + + // NOTE: `genConsumeReg` marks the consumed register as not a GC pointer, as it assumes that the input registers + // die at the first instruction generated by the node. This is not the case for `INDEX_ADDR`, however, as the + // base register is multiply-used. As such, we need to mark the base register as containing a GC pointer until + // we are finished generating the code for this node. + + gcInfo.gcMarkRegPtrVal(base->GetRegNum(), base->TypeGet()); + assert(!varTypeIsGC(index->TypeGet())); + + // The index is never contained, even if it is a constant. + assert(index->isUsedFromReg()); + + //const regNumber tmpReg = node->GetSingleTempReg(); + + // Generate the bounds check if necessary. + if ((node->gtFlags & GTF_INX_RNGCHK) != 0) + { + GetEmitter()->emitIns_R_R_I(INS_ld_w, EA_4BYTE, REG_R21, base->GetRegNum(), node->gtLenOffset); + // if (index >= REG_R21) + // { + // JumpToThrowHlpBlk; + // } + // + // sltu AT, index, REG_R21 + // bne AT, zero, RngChkExit + // IndRngFail: + // ... + // RngChkExit: + genJumpToThrowHlpBlk_la(SCK_RNGCHK_FAIL, INS_bgeu, index->GetRegNum(), node->gtIndRngFailBB, REG_R21); + } + + emitAttr attr = emitActualTypeSize(node); + // Can we use a ScaledAdd instruction? + // + if (isPow2(node->gtElemSize) && (node->gtElemSize <= 2048)) + { + DWORD scale; + BitScanForward(&scale, node->gtElemSize); + + // dest = base + index * scale + genScaledAdd(emitActualTypeSize(node), node->GetRegNum(), base->GetRegNum(), index->GetRegNum(), scale); + } + else // we have to load the element size and use a MADD (multiply-add) instruction + { + // REG_R21 = element size + CodeGen::genSetRegToIcon(REG_R21, (ssize_t)node->gtElemSize, TYP_INT); + + // dest = index * REG_R21 + base + if (attr == EA_4BYTE) + { + GetEmitter()->emitIns_R_R_R(INS_mul_w, EA_4BYTE, REG_R21, index->GetRegNum(), REG_R21); + GetEmitter()->emitIns_R_R_R(INS_add_w, attr, node->GetRegNum(), REG_R21, base->GetRegNum()); + } + else + { + GetEmitter()->emitIns_R_R_R(INS_mul_d, EA_PTRSIZE, REG_R21, index->GetRegNum(), REG_R21); + GetEmitter()->emitIns_R_R_R(INS_add_d, attr, node->GetRegNum(), REG_R21, base->GetRegNum()); + } + } + + // dest = dest + elemOffs + GetEmitter()->emitIns_R_R_I(INS_addi_d, attr, node->GetRegNum(), node->GetRegNum(), node->gtElemOffset); + + gcInfo.gcMarkRegSetNpt(base->gtGetRegMask()); + + genProduceReg(node); +} + +//------------------------------------------------------------------------ +// genCodeForIndir: Produce code for a GT_IND node. +// +// Arguments: +// tree - the GT_IND node +// +void CodeGen::genCodeForIndir(GenTreeIndir* tree) +{ + assert(tree->OperIs(GT_IND)); + +#ifdef FEATURE_SIMD + // Handling of Vector3 type values loaded through indirection. + if (tree->TypeGet() == TYP_SIMD12) + { + genLoadIndTypeSIMD12(tree); + return; + } +#endif // FEATURE_SIMD + + var_types type = tree->TypeGet(); + instruction ins = ins_Load(type); + instruction ins2 = INS_none; + regNumber targetReg = tree->GetRegNum(); + regNumber tmpReg = targetReg; + emitAttr attr = emitActualTypeSize(type); + int offset = 0; + + genConsumeAddress(tree->Addr()); + + if ((tree->gtFlags & GTF_IND_VOLATILE) != 0) + { + instGen_MemoryBarrier(BARRIER_FULL); + } + + GetEmitter()->emitInsLoadStoreOp(ins, emitActualTypeSize(type), targetReg, tree); + + genProduceReg(tree); +} + +//---------------------------------------------------------------------------------- +// genCodeForCpBlkHelper - Generate code for a CpBlk node by the means of the VM memcpy helper call +// +// Arguments: +// cpBlkNode - the GT_STORE_[BLK|OBJ|DYN_BLK] +// +// Preconditions: +// The register assignments have been set appropriately. +// This is validated by genConsumeBlockOp(). +// +void CodeGen::genCodeForCpBlkHelper(GenTreeBlk* cpBlkNode) +{ + // Destination address goes in arg0, source address goes in arg1, and size goes in arg2. + // genConsumeBlockOp takes care of this for us. + genConsumeBlockOp(cpBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2); + + if (cpBlkNode->gtFlags & GTF_BLK_VOLATILE) + { + // issue a full memory barrier before a volatile CpBlk operation + instGen_MemoryBarrier(); + } + + genEmitHelperCall(CORINFO_HELP_MEMCPY, 0, EA_UNKNOWN); + + if (cpBlkNode->gtFlags & GTF_BLK_VOLATILE) + { + // issue a INS_BARRIER_RMB after a volatile CpBlk operation + instGen_MemoryBarrier(BARRIER_FULL); + } +} + +//---------------------------------------------------------------------------------- +// genCodeForCpBlkUnroll: Generates CpBlk code by performing a loop unroll +// +// Arguments: +// cpBlkNode - Copy block node +// +// Return Value: +// None +// +// Assumption: +// The size argument of the CpBlk node is a constant and <= CPBLK_UNROLL_LIMIT bytes. +// +void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode) +{ + assert(cpBlkNode->OperIs(GT_STORE_BLK)); + + unsigned dstLclNum = BAD_VAR_NUM; + regNumber dstAddrBaseReg = REG_NA; + int dstOffset = 0; + GenTree* dstAddr = cpBlkNode->Addr(); + + if (!dstAddr->isContained()) + { + dstAddrBaseReg = genConsumeReg(dstAddr); + } + else if (dstAddr->OperIsAddrMode()) + { + assert(!dstAddr->AsAddrMode()->HasIndex()); + + dstAddrBaseReg = genConsumeReg(dstAddr->AsAddrMode()->Base()); + dstOffset = dstAddr->AsAddrMode()->Offset(); + } + else + { + assert(dstAddr->OperIsLocalAddr()); + dstLclNum = dstAddr->AsLclVarCommon()->GetLclNum(); + dstOffset = dstAddr->AsLclVarCommon()->GetLclOffs(); + } + + unsigned srcLclNum = BAD_VAR_NUM; + regNumber srcAddrBaseReg = REG_NA; + int srcOffset = 0; + GenTree* src = cpBlkNode->Data(); + + assert(src->isContained()); + + if (src->OperIs(GT_LCL_VAR, GT_LCL_FLD)) + { + srcLclNum = src->AsLclVarCommon()->GetLclNum(); + srcOffset = src->AsLclVarCommon()->GetLclOffs(); + } + else + { + assert(src->OperIs(GT_IND)); + GenTree* srcAddr = src->AsIndir()->Addr(); + + if (!srcAddr->isContained()) + { + srcAddrBaseReg = genConsumeReg(srcAddr); + } + else if (srcAddr->OperIsAddrMode()) + { + srcAddrBaseReg = genConsumeReg(srcAddr->AsAddrMode()->Base()); + srcOffset = srcAddr->AsAddrMode()->Offset(); + } + else + { + assert(srcAddr->OperIsLocalAddr()); + srcLclNum = srcAddr->AsLclVarCommon()->GetLclNum(); + srcOffset = srcAddr->AsLclVarCommon()->GetLclOffs(); + } + } + + if (cpBlkNode->IsVolatile()) + { + // issue a full memory barrier before a volatile CpBlk operation + instGen_MemoryBarrier(); + } + + emitter* emit = GetEmitter(); + unsigned size = cpBlkNode->GetLayout()->GetSize(); + + assert(size <= INT32_MAX); + assert(srcOffset < INT32_MAX - static_cast(size)); + assert(dstOffset < INT32_MAX - static_cast(size)); + + regNumber tempReg = cpBlkNode->ExtractTempReg(RBM_ALLINT); + + if (size >= 2 * REGSIZE_BYTES) + { + regNumber tempReg2 = REG_R21;//cpBlkNode->ExtractTempReg(RBM_ALLINT);//TODO:should amend. + + for (unsigned regSize = 2 * REGSIZE_BYTES; size >= regSize; + size -= regSize, srcOffset += regSize, dstOffset += regSize) + { + if (srcLclNum != BAD_VAR_NUM) + { + emit->emitIns_R_S(INS_ld_d, EA_8BYTE, tempReg, srcLclNum, srcOffset); + emit->emitIns_R_S(INS_ld_d, EA_8BYTE, tempReg2, srcLclNum, srcOffset + 8); + } + else + { + emit->emitIns_R_R_I(INS_ld_d, EA_8BYTE, tempReg, srcAddrBaseReg, srcOffset); + emit->emitIns_R_R_I(INS_ld_d, EA_8BYTE, tempReg2, srcAddrBaseReg, srcOffset + 8); + } + + if (dstLclNum != BAD_VAR_NUM) + { + emit->emitIns_S_R(INS_st_d, EA_8BYTE, tempReg, dstLclNum, dstOffset); + emit->emitIns_S_R(INS_st_d, EA_8BYTE, tempReg2, dstLclNum, dstOffset + 8); + } + else + { + emit->emitIns_R_R_I(INS_st_d, EA_8BYTE, tempReg, dstAddrBaseReg, dstOffset); + emit->emitIns_R_R_I(INS_st_d, EA_8BYTE, tempReg2, dstAddrBaseReg, dstOffset + 8); + } + } + } + + for (unsigned regSize = REGSIZE_BYTES; size > 0; size -= regSize, srcOffset += regSize, dstOffset += regSize) + { + while (regSize > size) + { + regSize /= 2; + } + + instruction loadIns; + instruction storeIns; + emitAttr attr; + + switch (regSize) + { + case 1: + loadIns = INS_ld_b; + storeIns = INS_st_b; + attr = EA_4BYTE; + break; + case 2: + loadIns = INS_ld_h; + storeIns = INS_st_h; + attr = EA_4BYTE; + break; + case 4: + loadIns = INS_ld_w; + storeIns = INS_st_w; + attr = EA_ATTR(regSize); + break; + case 8: + loadIns = INS_ld_d; + storeIns = INS_st_d; + attr = EA_ATTR(regSize); + break; + default: + unreached(); + } + + if (srcLclNum != BAD_VAR_NUM) + { + emit->emitIns_R_S(loadIns, attr, tempReg, srcLclNum, srcOffset); + } + else + { + emit->emitIns_R_R_I(loadIns, attr, tempReg, srcAddrBaseReg, srcOffset); + } + + if (dstLclNum != BAD_VAR_NUM) + { + emit->emitIns_S_R(storeIns, attr, tempReg, dstLclNum, dstOffset); + } + else + { + emit->emitIns_R_R_I(storeIns, attr, tempReg, dstAddrBaseReg, dstOffset); + } + } + + if (cpBlkNode->IsVolatile()) + { + // issue a load barrier after a volatile CpBlk operation + instGen_MemoryBarrier(BARRIER_LOAD_ONLY); + } +} + +//------------------------------------------------------------------------ +// genCodeForInitBlkHelper - Generate code for an InitBlk node by the means of the VM memcpy helper call +// +// Arguments: +// initBlkNode - the GT_STORE_[BLK|OBJ|DYN_BLK] +// +// Preconditions: +// The register assignments have been set appropriately. +// This is validated by genConsumeBlockOp(). +// +void CodeGen::genCodeForInitBlkHelper(GenTreeBlk* initBlkNode) +{ + // Size goes in arg2, source address goes in arg1, and size goes in arg2. + // genConsumeBlockOp takes care of this for us. + genConsumeBlockOp(initBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2); + + if (initBlkNode->gtFlags & GTF_BLK_VOLATILE) + { + // issue a full memory barrier before a volatile initBlock Operation + instGen_MemoryBarrier(); + } + + genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN); +} + +// Generate code for a load from some address + offset +// base: tree node which can be either a local address or arbitrary node +// offset: distance from the base from which to load +void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* base, unsigned offset) +{ + emitter* emit = GetEmitter(); + + if (base->OperIsLocalAddr()) + { + if (base->gtOper == GT_LCL_FLD_ADDR) + offset += base->AsLclFld()->GetLclOffs(); + emit->emitIns_R_S(ins, size, dst, base->AsLclVarCommon()->GetLclNum(), offset); + } + else + { + emit->emitIns_R_R_I(ins, size, dst, base->GetRegNum(), offset); + } +} + +// Generate code for a store to some address + offset +// base: tree node which can be either a local address or arbitrary node +// offset: distance from the base from which to load +void CodeGen::genCodeForStoreOffset(instruction ins, emitAttr size, regNumber src, GenTree* base, unsigned offset) +{ + emitter* emit = GetEmitter(); + + if (base->OperIsLocalAddr()) + { + if (base->gtOper == GT_LCL_FLD_ADDR) + offset += base->AsLclFld()->GetLclOffs(); + emit->emitIns_S_R(ins, size, src, base->AsLclVarCommon()->GetLclNum(), offset); + } + else + { + emit->emitIns_R_R_I(ins, size, src, base->GetRegNum(), offset); + } +} + +//------------------------------------------------------------------------ +// genCallInstruction: Produce code for a GT_CALL node +// +void CodeGen::genCallInstruction(GenTreeCall* call) +{ + gtCallTypes callType = (gtCallTypes)call->gtCallType; + + DebugInfo di; + + // all virtuals should have been expanded into a control expression + assert(!call->IsVirtual() || call->gtControlExpr || call->gtCallAddr); + + // Consume all the arg regs + for (GenTreeCall::Use& use : call->LateArgs()) + { + GenTree* argNode = use.GetNode(); + + fgArgTabEntry* curArgTabEntry = compiler->gtArgEntryByNode(call, argNode); + assert(curArgTabEntry); + + // GT_RELOAD/GT_COPY use the child node + argNode = argNode->gtSkipReloadOrCopy(); + + if (curArgTabEntry->GetRegNum() == REG_STK) + continue; + + // Deal with multi register passed struct args. + if (argNode->OperGet() == GT_FIELD_LIST) + { + regNumber argReg = curArgTabEntry->GetRegNum(); + for (GenTreeFieldList::Use& use : argNode->AsFieldList()->Uses()) + { + GenTree* putArgRegNode = use.GetNode(); + assert(putArgRegNode->gtOper == GT_PUTARG_REG); + + genConsumeReg(putArgRegNode); +#if 0 + inst_Mov_Extend(putArgRegNode->TypeGet(), /* srcInReg */ true, argReg, putArgRegNode->GetRegNum(), + /* canSkip */ true, emitActualTypeSize(TYP_I_IMPL)); + + argReg = genRegArgNext(argReg); +#endif + } + } +#if FEATURE_ARG_SPLIT + else if (curArgTabEntry->IsSplit()) + { + assert(curArgTabEntry->numRegs >= 1); + genConsumeArgSplitStruct(argNode->AsPutArgSplit()); + } +#endif // FEATURE_ARG_SPLIT + else + { + regNumber argReg = curArgTabEntry->GetRegNum(); + genConsumeReg(argNode); + if (argNode->GetRegNum() != argReg) + { + inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), true), argReg, argNode->GetRegNum()); + } + } + } + + // Insert a null check on "this" pointer if asked. + if (call->NeedsNullCheck()) + { + const regNumber regThis = genGetThisArgReg(call); + + // Ditto as genCodeForNullCheck + GetEmitter()->emitIns_R_R_I(INS_ld_w, EA_4BYTE, REG_R0, regThis, 0); + } + + // Either gtControlExpr != null or gtCallAddr != null or it is a direct non-virtual call to a user or helper + // method. + CORINFO_METHOD_HANDLE methHnd; + GenTree* target = call->gtControlExpr; + if (callType == CT_INDIRECT) + { + assert(target == nullptr); + target = call->gtCallAddr; + methHnd = nullptr; + } + else + { + methHnd = call->gtCallMethHnd; + } + + CORINFO_SIG_INFO* sigInfo = nullptr; +#ifdef DEBUG + // Pass the call signature information down into the emitter so the emitter can associate + // native call sites with the signatures they were generated from. + if (callType != CT_HELPER) + { + sigInfo = call->callSig; + } +#endif // DEBUG + + // If fast tail call, then we are done. In this case we setup the args (both reg args + // and stack args in incoming arg area) and call target. Epilog sequence would + // generate "br ". + if (call->IsFastTailCall()) + { + // Don't support fast tail calling JIT helpers + assert(callType != CT_HELPER); + + if (target != nullptr) + { + // Indirect fast tail calls materialize call target either in gtControlExpr or in gtCallAddr. + genConsumeReg(target); + + // Use REG_FASTTAILCALL_TARGET on LOONGARCH64 as the call target register. + if (target->GetRegNum() != REG_FASTTAILCALL_TARGET) + { + GetEmitter()->emitIns_R_R_I(INS_ori, EA_4BYTE, REG_FASTTAILCALL_TARGET, target->GetRegNum(), 0); + } + } + + return; + } + + // For a pinvoke to unmanaged code we emit a label to clear + // the GC pointer state before the callsite. + // We can't utilize the typical lazy killing of GC pointers + // at (or inside) the callsite. + if (compiler->killGCRefs(call)) + { + genDefineTempLabel(genCreateTempLabel()); + } + + // Determine return value size(s). + const ReturnTypeDesc* pRetTypeDesc = call->GetReturnTypeDesc(); + emitAttr retSize = EA_PTRSIZE; + emitAttr secondRetSize = EA_UNKNOWN; + + if (call->HasMultiRegRetVal()) + { + retSize = emitTypeSize(pRetTypeDesc->GetReturnRegType(0)); + secondRetSize = emitTypeSize(pRetTypeDesc->GetReturnRegType(1)); + } + else + { + assert(call->gtType != TYP_STRUCT); + + if (call->gtType == TYP_REF) + { + retSize = EA_GCREF; + } + else if (call->gtType == TYP_BYREF) + { + retSize = EA_BYREF; + } + } + + // We need to propagate the IL offset information to the call instruction, so we can emit + // an IL to native mapping record for the call, to support managed return value debugging. + // We don't want tail call helper calls that were converted from normal calls to get a record, + // so we skip this hash table lookup logic in that case. + if (compiler->opts.compDbgInfo && compiler->genCallSite2DebugInfoMap != nullptr && !call->IsTailCall()) + { + (void)compiler->genCallSite2DebugInfoMap->Lookup(call, &di); + } + + if (target != nullptr) + { + // A call target can not be a contained indirection + assert(!target->isContainedIndir()); + + genConsumeReg(target); + + // We have already generated code for gtControlExpr evaluating it into a register. + // We just need to emit "call reg" in this case. + // + assert(genIsValidIntReg(target->GetRegNum())); + + genEmitCall(emitter::EC_INDIR_R, methHnd, + INDEBUG_LDISASM_COMMA(sigInfo) nullptr, // addr + retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), di, + target->GetRegNum(), call->IsFastTailCall()); + } + else if (call->IsR2ROrVirtualStubRelativeIndir()) + { + // Generate a direct call to a non-virtual user defined or helper method + assert(callType == CT_HELPER || callType == CT_USER_FUNC); +#ifdef FEATURE_READYTORUN_COMPILER + assert(((call->IsR2RRelativeIndir()) && (call->gtEntryPoint.accessType == IAT_PVALUE)) || + ((call->IsVirtualStubRelativeIndir()) && (call->gtEntryPoint.accessType == IAT_VALUE))); +#endif // FEATURE_READYTORUN_COMPILER + assert(call->gtControlExpr == nullptr); + assert(!call->IsTailCall()); + + regNumber tmpReg = call->GetSingleTempReg(); + GetEmitter()->emitIns_R_R(ins_Load(TYP_I_IMPL), emitActualTypeSize(TYP_I_IMPL), tmpReg, REG_R2R_INDIRECT_PARAM); + + // We have now generated code for gtControlExpr evaluating it into `tmpReg`. + // We just need to emit "call tmpReg" in this case. + // + assert(genIsValidIntReg(tmpReg)); + + genEmitCall(emitter::EC_INDIR_R, methHnd, + INDEBUG_LDISASM_COMMA(sigInfo) nullptr, // addr + retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), di, tmpReg, call->IsFastTailCall()); + } + else + { + // Generate a direct call to a non-virtual user defined or helper method + assert(callType == CT_HELPER || callType == CT_USER_FUNC); + + void* addr = nullptr; +#ifdef FEATURE_READYTORUN_COMPILER + if (call->gtEntryPoint.addr != NULL) + { + assert(call->gtEntryPoint.accessType == IAT_VALUE); + addr = call->gtEntryPoint.addr; + } + else +#endif // FEATURE_READYTORUN_COMPILER + if (callType == CT_HELPER) + { + CorInfoHelpFunc helperNum = compiler->eeGetHelperNum(methHnd); + noway_assert(helperNum != CORINFO_HELP_UNDEF); + + void* pAddr = nullptr; + addr = compiler->compGetHelperFtn(helperNum, (void**)&pAddr); + assert(pAddr == nullptr); + } + else + { + // Direct call to a non-virtual user function. + addr = call->gtDirectCallAddress; + } + + assert(addr != nullptr); + +// Non-virtual direct call to known addresses + { + genEmitCall(emitter::EC_FUNC_TOKEN, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr, + retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), di, REG_R21, call->IsFastTailCall()); + } + } + + // if it was a pinvoke we may have needed to get the address of a label + if (genPendingCallLabel) + { + genDefineInlineTempLabel(genPendingCallLabel); + genPendingCallLabel = nullptr; + } + + // Update GC info: + // All Callee arg registers are trashed and no longer contain any GC pointers. + // TODO-Bug?: As a matter of fact shouldn't we be killing all of callee trashed regs here? + // For now we will assert that other than arg regs gc ref/byref set doesn't contain any other + // registers from RBM_CALLEE_TRASH + assert((gcInfo.gcRegGCrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0); + assert((gcInfo.gcRegByrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0); + gcInfo.gcRegGCrefSetCur &= ~RBM_ARG_REGS; + gcInfo.gcRegByrefSetCur &= ~RBM_ARG_REGS; + + var_types returnType = call->TypeGet(); + if (returnType != TYP_VOID) + { + regNumber returnReg; + + if (call->HasMultiRegRetVal()) + { + assert(pRetTypeDesc != nullptr); + unsigned regCount = pRetTypeDesc->GetReturnRegCount(); + + // If regs allocated to call node are different from ABI return + // regs in which the call has returned its result, move the result + // to regs allocated to call node. + for (unsigned i = 0; i < regCount; ++i) + { + var_types regType = pRetTypeDesc->GetReturnRegType(i); + returnReg = pRetTypeDesc->GetABIReturnReg(i); + regNumber allocatedReg = call->GetRegNumByIdx(i); + if (returnReg != allocatedReg) + { + inst_RV_RV(ins_Copy(regType), allocatedReg, returnReg, regType); + } + } + } + else + { + if (varTypeUsesFloatArgReg(returnType)) + { + returnReg = REG_FLOATRET; + } + else + { + returnReg = REG_INTRET; + } + + if (call->GetRegNum() != returnReg) + { + { + inst_RV_RV(ins_Copy(returnType), call->GetRegNum(), returnReg, returnType); + } + } + } + + genProduceReg(call); + } + + // If there is nothing next, that means the result is thrown away, so this value is not live. + // However, for minopts or debuggable code, we keep it live to support managed return value debugging. + if ((call->gtNext == nullptr) && !compiler->opts.MinOpts() && !compiler->opts.compDbgCode) + { + gcInfo.gcMarkRegSetNpt(RBM_INTRET); + } +} + +// Produce code for a GT_JMP node. +// The arguments of the caller needs to be transferred to the callee before exiting caller. +// The actual jump to callee is generated as part of caller epilog sequence. +// Therefore the codegen of GT_JMP is to ensure that the callee arguments are correctly setup. +void CodeGen::genJmpMethod(GenTree* jmp) +{ + assert(jmp->OperGet() == GT_JMP); + assert(compiler->compJmpOpUsed); + + // If no arguments, nothing to do + if (compiler->info.compArgsCount == 0) + { + return; + } + + // Make sure register arguments are in their initial registers + // and stack arguments are put back as well. + unsigned varNum; + LclVarDsc* varDsc; + + // First move any en-registered stack arguments back to the stack. + // At the same time any reg arg not in correct reg is moved back to its stack location. + // + // We are not strictly required to spill reg args that are not in the desired reg for a jmp call + // But that would require us to deal with circularity while moving values around. Spilling + // to stack makes the implementation simple, which is not a bad trade off given Jmp calls + // are not frequent. + for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++) + { + varDsc = compiler->lvaTable + varNum; + + if (varDsc->lvPromoted) + { + noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here + + unsigned fieldVarNum = varDsc->lvFieldLclStart; + varDsc = compiler->lvaTable + fieldVarNum; + } + noway_assert(varDsc->lvIsParam); + + if (varDsc->lvIsRegArg && (varDsc->GetRegNum() != REG_STK)) + { + // Skip reg args which are already in its right register for jmp call. + // If not, we will spill such args to their stack locations. + // + // If we need to generate a tail call profiler hook, then spill all + // arg regs to free them up for the callback. + if (!compiler->compIsProfilerHookNeeded() && (varDsc->GetRegNum() == varDsc->GetArgReg())) + continue; + } + else if (varDsc->GetRegNum() == REG_STK) + { + // Skip args which are currently living in stack. + continue; + } + + // If we came here it means either a reg argument not in the right register or + // a stack argument currently living in a register. In either case the following + // assert should hold. + assert(varDsc->GetRegNum() != REG_STK); + assert(varDsc->TypeGet() != TYP_STRUCT); + var_types storeType = genActualType(varDsc->TypeGet()); + emitAttr storeSize = emitActualTypeSize(storeType); + + GetEmitter()->emitIns_S_R(ins_Store(storeType), storeSize, varDsc->GetRegNum(), varNum, 0); + // Update GetRegNum() life and GC info to indicate GetRegNum() is dead and varDsc stack slot is going live. + // Note that we cannot modify varDsc->GetRegNum() here because another basic block may not be expecting it. + // Therefore manually update life of varDsc->GetRegNum(). + regMaskTP tempMask = genRegMask(varDsc->GetRegNum()); + regSet.RemoveMaskVars(tempMask); + gcInfo.gcMarkRegSetNpt(tempMask); + if (compiler->lvaIsGCTracked(varDsc)) + { + VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varNum); + } + } + +#ifdef PROFILING_SUPPORTED + // At this point all arg regs are free. + // Emit tail call profiler callback. + genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL); +#endif + + // Next move any un-enregistered register arguments back to their register. + regMaskTP fixedIntArgMask = RBM_NONE; // tracks the int arg regs occupying fixed args in case of a vararg method. + unsigned firstArgVarNum = BAD_VAR_NUM; // varNum of the first argument in case of a vararg method. + for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++) + { + varDsc = compiler->lvaTable + varNum; + if (varDsc->lvPromoted) + { + noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here + + unsigned fieldVarNum = varDsc->lvFieldLclStart; + varDsc = compiler->lvaTable + fieldVarNum; + } + noway_assert(varDsc->lvIsParam); + + // Skip if arg not passed in a register. + if (!varDsc->lvIsRegArg) + continue; + + // Register argument + noway_assert(isRegParamType(genActualType(varDsc->TypeGet()))); + + // Is register argument already in the right register? + // If not load it from its stack location. + regNumber argReg = varDsc->GetArgReg(); // incoming arg register + regNumber argRegNext = REG_NA; + + if (varDsc->GetRegNum() != argReg) + { + var_types loadType = TYP_UNDEF; + + //NOTE for LOONGARCH: not supports the HFA. + assert(!varDsc->lvIsHfaRegArg()); + { + if (varTypeIsStruct(varDsc)) + { + // Must be <= 16 bytes or else it wouldn't be passed in registers, + // which can be bigger (and is handled above). + noway_assert(EA_SIZE_IN_BYTES(varDsc->lvSize()) <= 16); + if (emitter::isFloatReg(argReg)) + { + loadType = varDsc->lvIs4Field1 ? TYP_FLOAT : TYP_DOUBLE; + } + else + loadType = varDsc->GetLayout()->GetGCPtrType(0); + } + else + { + loadType = compiler->mangleVarArgsType(genActualType(varDsc->TypeGet())); + } + + emitAttr loadSize = emitActualTypeSize(loadType); + GetEmitter()->emitIns_R_S(ins_Load(loadType), loadSize, argReg, varNum, 0); + + // Update argReg life and GC Info to indicate varDsc stack slot is dead and argReg is going live. + // Note that we cannot modify varDsc->GetRegNum() here because another basic block may not be expecting it. + // Therefore manually update life of argReg. Note that GT_JMP marks the end of the basic block + // and after which reg life and gc info will be recomputed for the new block in genCodeForBBList(). + regSet.AddMaskVars(genRegMask(argReg)); + gcInfo.gcMarkRegPtrVal(argReg, loadType); + + //if (compiler->lvaIsMultiregStruct(varDsc, compiler->info.compIsVarArgs)) + if (varDsc->GetOtherArgReg() < REG_STK) + { + // Restore the second register. + argRegNext = varDsc->GetOtherArgReg(); + + if (emitter::isFloatReg(argRegNext)) + { + loadType = varDsc->lvIs4Field2 ? TYP_FLOAT : TYP_DOUBLE; + } + else + loadType = varDsc->GetLayout()->GetGCPtrType(1); + + loadSize = emitActualTypeSize(loadType); + int offs = loadSize == EA_4BYTE ? 4 : 8; + GetEmitter()->emitIns_R_S(ins_Load(loadType), loadSize, argRegNext, varNum, offs); + + regSet.AddMaskVars(genRegMask(argRegNext)); + gcInfo.gcMarkRegPtrVal(argRegNext, loadType); + } + + if (compiler->lvaIsGCTracked(varDsc)) + { + VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex); + } + } + } + + if (compiler->info.compIsVarArgs) + { + assert(!"unimplemented on LOONGARCH yet!"); + // In case of a jmp call to a vararg method ensure only integer registers are passed. + assert((genRegMask(argReg) & (RBM_ARG_REGS)) != RBM_NONE); + assert(!varDsc->lvIsHfaRegArg()); + + fixedIntArgMask |= genRegMask(argReg); + + if (compiler->lvaIsMultiregStruct(varDsc, compiler->info.compIsVarArgs)) + { + assert(argRegNext != REG_NA); + fixedIntArgMask |= genRegMask(argRegNext); + } + + if (argReg == REG_ARG_0) + { + assert(firstArgVarNum == BAD_VAR_NUM); + firstArgVarNum = varNum; + } + } + + } + + // Jmp call to a vararg method - if the method has fewer than fixed arguments that can be max size of reg, + // load the remaining integer arg registers from the corresponding + // shadow stack slots. This is for the reason that we don't know the number and type + // of non-fixed params passed by the caller, therefore we have to assume the worst case + // of caller passing all integer arg regs that can be max size of reg. + // + // The caller could have passed gc-ref/byref type var args. Since these are var args + // the callee no way of knowing their gc-ness. Therefore, mark the region that loads + // remaining arg registers from shadow stack slots as non-gc interruptible. + if (fixedIntArgMask != RBM_NONE) + { + assert(compiler->info.compIsVarArgs); + assert(firstArgVarNum != BAD_VAR_NUM); + + regMaskTP remainingIntArgMask = RBM_ARG_REGS & ~fixedIntArgMask; + if (remainingIntArgMask != RBM_NONE) + { + GetEmitter()->emitDisableGC(); + for (int argNum = 0, argOffset = 0; argNum < MAX_REG_ARG; ++argNum) + { + regNumber argReg = intArgRegs[argNum]; + regMaskTP argRegMask = genRegMask(argReg); + + if ((remainingIntArgMask & argRegMask) != 0) + { + remainingIntArgMask &= ~argRegMask; + GetEmitter()->emitIns_R_S(INS_ld_d, EA_PTRSIZE, argReg, firstArgVarNum, argOffset); + } + + argOffset += REGSIZE_BYTES; + } + GetEmitter()->emitEnableGC(); + } + } +} + +//------------------------------------------------------------------------ +// genIntCastOverflowCheck: Generate overflow checking code for an integer cast. +// +// Arguments: +// cast - The GT_CAST node +// desc - The cast description +// reg - The register containing the value to check +// +void CodeGen::genIntCastOverflowCheck(GenTreeCast* cast, const GenIntCastDesc& desc, regNumber reg) +{ + switch (desc.CheckKind()) + { + case GenIntCastDesc::CHECK_POSITIVE: + { + genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_blt, reg, nullptr, REG_R0); + } + break; + + case GenIntCastDesc::CHECK_UINT_RANGE: + { + // We need to check if the value is not greater than 0xFFFFFFFF + // if the upper 32 bits are zero. + ssize_t imm = -1; + GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_8BYTE, REG_R21, REG_R0, imm); + + GetEmitter()->emitIns_R_R_I(INS_slli_d, EA_8BYTE, REG_R21, REG_R21, 32); + GetEmitter()->emitIns_R_R_R(INS_and, EA_8BYTE, REG_R21, reg, REG_R21); + genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bne, REG_R21); + } + break; + + case GenIntCastDesc::CHECK_POSITIVE_INT_RANGE: + { + // We need to check if the value is not greater than 0x7FFFFFFF + // if the upper 33 bits are zero. + //instGen_Set_Reg_To_Imm(EA_8BYTE, REG_R21, 0xFFFFFFFF80000000LL); + ssize_t imm = -1; + GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_8BYTE, REG_R21, REG_R0, imm); + + GetEmitter()->emitIns_R_R_I(INS_slli_d, EA_8BYTE, REG_R21, REG_R21, 31); + + GetEmitter()->emitIns_R_R_R(INS_and, EA_8BYTE, REG_R21, reg, REG_R21); + genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bne, REG_R21); + } + break; + + case GenIntCastDesc::CHECK_INT_RANGE: + { + const regNumber tempReg = cast->GetSingleTempReg(); + assert(tempReg != reg); + GetEmitter()->emitIns_I_la(EA_8BYTE, tempReg, INT32_MAX); + genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_blt, tempReg, nullptr, reg); + + GetEmitter()->emitIns_I_la(EA_8BYTE, tempReg, INT32_MIN); + genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_blt, reg, nullptr, tempReg); + } + break; + + default: + { + assert(desc.CheckKind() == GenIntCastDesc::CHECK_SMALL_INT_RANGE); + const int castMaxValue = desc.CheckSmallIntMax(); + const int castMinValue = desc.CheckSmallIntMin(); + instruction ins; + + if (castMaxValue > 2047) + {//should amend. should confirm !?!? + assert((castMaxValue == 32767) || (castMaxValue == 65535)); + GetEmitter()->emitIns_I_la(EA_ATTR(desc.CheckSrcSize()), REG_R21, castMaxValue + 1); + ins = castMinValue == 0 ? INS_bgeu : INS_bge; + genJumpToThrowHlpBlk_la(SCK_OVERFLOW, ins, reg, nullptr, REG_R21); + } + else + {//should amend. + GetEmitter()->emitIns_R_R_I(INS_addi_w, EA_ATTR(desc.CheckSrcSize()), REG_R21, REG_R0, castMaxValue); + ins = castMinValue == 0 ? INS_bltu : INS_blt; + genJumpToThrowHlpBlk_la(SCK_OVERFLOW, ins, REG_R21, nullptr, reg); + } + + if (castMinValue != 0) + { + if ((-2048 <= castMinValue) && (castMinValue < 2048)) + { + GetEmitter()->emitIns_R_R_I(INS_slti, EA_ATTR(desc.CheckSrcSize()), REG_R21, reg, castMinValue); + } + else + { + GetEmitter()->emitIns_I_la(EA_8BYTE, REG_R21, castMinValue); + GetEmitter()->emitIns_R_R_R(INS_slt, EA_ATTR(desc.CheckSrcSize()), REG_R21, reg, REG_R21); + } + genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bne, REG_R21); + } + } + break; + } +} + +//------------------------------------------------------------------------ +// genIntToIntCast: Generate code for an integer cast, with or without overflow check. +// +// Arguments: +// cast - The GT_CAST node +// +// Assumptions: +// The cast node is not a contained node and must have an assigned register. +// Neither the source nor target type can be a floating point type. +// +// TODO-LOONGARCH64-CQ: Allow castOp to be a contained node without an assigned register. +// +void CodeGen::genIntToIntCast(GenTreeCast* cast) +{ + genConsumeRegs(cast->gtGetOp1()); + + emitter* emit = GetEmitter(); + var_types dstType = cast->CastToType(); + var_types srcType = genActualType(cast->gtGetOp1()->TypeGet()); + const regNumber srcReg = cast->gtGetOp1()->GetRegNum(); + const regNumber dstReg = cast->GetRegNum(); + const unsigned char pos = 0; + const unsigned char size = 32; + + assert(genIsValidIntReg(srcReg)); + assert(genIsValidIntReg(dstReg)); + + GenIntCastDesc desc(cast); + + if (desc.CheckKind() != GenIntCastDesc::CHECK_NONE) + { + genIntCastOverflowCheck(cast, desc, srcReg); + } + + //if ((EA_ATTR(genTypeSize(srcType)) == EA_8BYTE) && (EA_ATTR(genTypeSize(dstType)) == EA_4BYTE)) + //{ + // if (dstType == TYP_INT) + // { + // // convert t0 int32 + // emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, dstReg, srcReg, 0); + // } + // else + // { + // // convert t0 uint32 + // emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_PTRSIZE, dstReg, srcReg, pos+31, pos); + // } + //} + //else if ((desc.ExtendKind() != GenIntCastDesc::COPY) || (srcReg != dstReg)) + if ((desc.ExtendKind() != GenIntCastDesc::COPY) || (srcReg != dstReg)) + { + instruction ins; + + switch (desc.ExtendKind()) + { + case GenIntCastDesc::ZERO_EXTEND_SMALL_INT: + if (desc.ExtendSrcSize() == 1) + { + emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_PTRSIZE, dstReg, srcReg, pos+7, pos); + } + else + { + emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_PTRSIZE, dstReg, srcReg, pos+15, pos); + } + break; + case GenIntCastDesc::SIGN_EXTEND_SMALL_INT: + ins = (desc.ExtendSrcSize() == 1) ? INS_ext_w_b : INS_ext_w_h; + emit->emitIns_R_R(ins, EA_PTRSIZE, dstReg, srcReg); + break; +#ifdef TARGET_64BIT + case GenIntCastDesc::ZERO_EXTEND_INT: + emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_PTRSIZE, dstReg, srcReg, pos+31, pos); + break; + case GenIntCastDesc::SIGN_EXTEND_INT: + emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, dstReg, srcReg, 0); + break; +#endif + default: + assert(desc.ExtendKind() == GenIntCastDesc::COPY); +#if 1 + if (srcType == TYP_INT) + emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, dstReg, srcReg, 0);//should amend. + else + emit->emitIns_R_R_I(INS_ori, EA_PTRSIZE, dstReg, srcReg, 0); +#else + emit->emitIns_R_R(INS_mov, EA_PTRSIZE, dstReg, srcReg); +#endif + break; + } + } + + genProduceReg(cast); +} + +//------------------------------------------------------------------------ +// genFloatToFloatCast: Generate code for a cast between float and double +// +// Arguments: +// treeNode - The GT_CAST node +// +// Return Value: +// None. +// +// Assumptions: +// Cast is a non-overflow conversion. +// The treeNode must have an assigned register. +// The cast is between float and double. +// +void CodeGen::genFloatToFloatCast(GenTree* treeNode) +{ + // float <--> double conversions are always non-overflow ones + assert(treeNode->OperGet() == GT_CAST); + assert(!treeNode->gtOverflow()); + + regNumber targetReg = treeNode->GetRegNum(); + assert(genIsValidFloatReg(targetReg)); + + GenTree* op1 = treeNode->AsOp()->gtOp1; + assert(!op1->isContained()); // Cannot be contained + assert(genIsValidFloatReg(op1->GetRegNum())); // Must be a valid float reg. + + var_types dstType = treeNode->CastToType(); + var_types srcType = op1->TypeGet(); + assert(varTypeIsFloating(srcType) && varTypeIsFloating(dstType)); + + genConsumeOperands(treeNode->AsOp()); + + // treeNode must be a reg + assert(!treeNode->isContained()); + + if (srcType != dstType) + { + instruction ins = (srcType == TYP_FLOAT) ? INS_fcvt_d_s // convert Single to Double + : INS_fcvt_s_d; // convert Double to Single + + GetEmitter()->emitIns_R_R(ins, emitActualTypeSize(treeNode), treeNode->GetRegNum(), op1->GetRegNum()); + } + else if (treeNode->GetRegNum() != op1->GetRegNum()) + { + // If double to double cast or float to float cast. Emit a move instruction. + instruction ins = (srcType == TYP_FLOAT) ? INS_fmov_s : INS_fmov_d; + GetEmitter()->emitIns_R_R(ins, emitActualTypeSize(treeNode), treeNode->GetRegNum(), op1->GetRegNum()); + } + + genProduceReg(treeNode); +} + +//------------------------------------------------------------------------ +// genCreateAndStoreGCInfo: Create and record GC Info for the function. +// +void CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, + unsigned prologSize, + unsigned epilogSize DEBUGARG(void* codePtr)) +{ + IAllocator* allowZeroAlloc = new (compiler, CMK_GC) CompIAllocator(compiler->getAllocatorGC()); + GcInfoEncoder* gcInfoEncoder = new (compiler, CMK_GC) + GcInfoEncoder(compiler->info.compCompHnd, compiler->info.compMethodInfo, allowZeroAlloc, NOMEM); + assert(gcInfoEncoder != nullptr); + + // Follow the code pattern of the x86 gc info encoder (genCreateAndStoreGCInfoJIT32). + gcInfo.gcInfoBlockHdrSave(gcInfoEncoder, codeSize, prologSize); + + // We keep the call count for the second call to gcMakeRegPtrTable() below. + unsigned callCnt = 0; + + // First we figure out the encoder ID's for the stack slots and registers. + gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS, &callCnt); + + // Now we've requested all the slots we'll need; "finalize" these (make more compact data structures for them). + gcInfoEncoder->FinalizeSlotIds(); + + // Now we can actually use those slot ID's to declare live ranges. + gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK, &callCnt); + + if (compiler->opts.compDbgEnC) + { + // what we have to preserve is called the "frame header" (see comments in VM\eetwain.cpp) + // which is: + // -return address + // -saved off RBP + // -saved 'this' pointer and bool for synchronized methods + + // 4 slots for RBP + return address + RSI + RDI + int preservedAreaSize = 4 * REGSIZE_BYTES; + + if (compiler->info.compFlags & CORINFO_FLG_SYNCH) + { + if (!(compiler->info.compFlags & CORINFO_FLG_STATIC)) + preservedAreaSize += REGSIZE_BYTES; + + preservedAreaSize += 1; // bool for synchronized methods + } + + // Used to signal both that the method is compiled for EnC, and also the size of the block at the top of the + // frame + gcInfoEncoder->SetSizeOfEditAndContinuePreservedArea(preservedAreaSize); + } + + if (compiler->opts.IsReversePInvoke()) + { + unsigned reversePInvokeFrameVarNumber = compiler->lvaReversePInvokeFrameVar; + assert(reversePInvokeFrameVarNumber != BAD_VAR_NUM); + const LclVarDsc* reversePInvokeFrameVar = compiler->lvaGetDesc(reversePInvokeFrameVarNumber);//TODO: unused. + gcInfoEncoder->SetReversePInvokeFrameSlot(reversePInvokeFrameVar->GetStackOffset()); + } + + gcInfoEncoder->Build(); + + // GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t) + // let's save the values anyway for debugging purposes + compiler->compInfoBlkAddr = gcInfoEncoder->Emit(); + compiler->compInfoBlkSize = 0; // not exposed by the GCEncoder interface +} + +/* TODO for LOONGARCH64: not used for loongarch */ +// clang-format off +const CodeGen::GenConditionDesc CodeGen::GenConditionDesc::map[32] +{ + //{ }, // NONE + //{ }, // 1 + //{ EJ_lt }, // SLT + //{ EJ_le }, // SLE + //{ EJ_ge }, // SGE + //{ EJ_gt }, // SGT + //{ EJ_mi }, // S + //{ EJ_pl }, // NS + + //{ EJ_eq }, // EQ + //{ EJ_ne }, // NE + //{ EJ_lo }, // ULT + //{ EJ_ls }, // ULE + //{ EJ_hs }, // UGE + //{ EJ_hi }, // UGT + //{ EJ_hs }, // C + //{ EJ_lo }, // NC + + //{ EJ_eq }, // FEQ + //{ EJ_gt, GT_AND, EJ_lo }, // FNE + //{ EJ_lo }, // FLT + //{ EJ_ls }, // FLE + //{ EJ_ge }, // FGE + //{ EJ_gt }, // FGT + //{ EJ_vs }, // O + //{ EJ_vc }, // NO + + //{ EJ_eq, GT_OR, EJ_vs }, // FEQU + //{ EJ_ne }, // FNEU + //{ EJ_lt }, // FLTU + //{ EJ_le }, // FLEU + //{ EJ_hs }, // FGEU + //{ EJ_hi }, // FGTU + //{ }, // P + //{ }, // NP +}; +// clang-format on + +//------------------------------------------------------------------------ +// inst_SETCC: Generate code to set a register to 0 or 1 based on a condition. +// +// Arguments: +// condition - The condition +// type - The type of the value to be produced +// dstReg - The destination register to be set to 1 or 0 +// +void CodeGen::inst_SETCC(GenCondition condition, var_types type, regNumber dstReg) +{ + /* TODO for LOONGARCH64: should redesign and delete. */ + assert(!"unimplemented on LOONGARCH yet"); +} + +//------------------------------------------------------------------------ +// genCodeForStoreBlk: Produce code for a GT_STORE_OBJ/GT_STORE_DYN_BLK/GT_STORE_BLK node. +// +// Arguments: +// tree - the node +// +void CodeGen::genCodeForStoreBlk(GenTreeBlk* blkOp) +{ + assert(blkOp->OperIs(GT_STORE_OBJ, GT_STORE_DYN_BLK, GT_STORE_BLK)); + + if (blkOp->OperIs(GT_STORE_OBJ)) + { + assert(!blkOp->gtBlkOpGcUnsafe); + assert(blkOp->OperIsCopyBlkOp()); + assert(blkOp->AsObj()->GetLayout()->HasGCPtr()); + genCodeForCpObj(blkOp->AsObj()); + return; + } + if (blkOp->gtBlkOpGcUnsafe) + { + GetEmitter()->emitDisableGC(); + } + bool isCopyBlk = blkOp->OperIsCopyBlkOp(); + + switch (blkOp->gtBlkOpKind) + { + case GenTreeBlk::BlkOpKindHelper: + if (isCopyBlk) + { + genCodeForCpBlkHelper(blkOp); + } + else + { + genCodeForInitBlkHelper(blkOp); + } + break; + + case GenTreeBlk::BlkOpKindUnroll: + if (isCopyBlk) + { + genCodeForCpBlkUnroll(blkOp); + } + else + { + genCodeForInitBlkUnroll(blkOp); + } + break; + + default: + unreached(); + } + + if (blkOp->gtBlkOpGcUnsafe) + { + GetEmitter()->emitEnableGC(); + } +} +#if 1 +//------------------------------------------------------------------------ +// genScaledAdd: A helper for genLeaInstruction. +//TODO: can amend further. +void CodeGen::genScaledAdd(emitAttr attr, regNumber targetReg, regNumber baseReg, regNumber indexReg, int scale) +{ + emitter* emit = GetEmitter(); + if (scale == 0) + { + // target = base + index + emit->emitIns_R_R_R(INS_add_d, attr, targetReg, baseReg, indexReg); + } + else + { + // target = base + index<emitIns_R_R_I(INS_slli_d, attr, REG_R21, indexReg, scale); + emit->emitIns_R_R_R(INS_add_d, attr, targetReg, baseReg, REG_R21); + } +} +#endif +//------------------------------------------------------------------------ +// genLeaInstruction: Produce code for a GT_LEA node. +// +// Arguments: +// lea - the node +// +void CodeGen::genLeaInstruction(GenTreeAddrMode* lea) +{ + genConsumeOperands(lea); + emitter* emit = GetEmitter(); + emitAttr size = emitTypeSize(lea); + int offset = lea->Offset(); + + // In LOONGARCH we can only load addresses of the form: + // + // [Base + index*scale] + // [Base + Offset] + // [Literal] (PC-Relative) + // + // So for the case of a LEA node of the form [Base + Index*Scale + Offset] we will generate: + // destReg = baseReg + indexReg * scale; + // destReg = destReg + offset; + // + // TODO-LOONGARCH64-CQ: The purpose of the GT_LEA node is to directly reflect a single target architecture + // addressing mode instruction. Currently we're 'cheating' by producing one or more + // instructions to generate the addressing mode so we need to modify lowering to + // produce LEAs that are a 1:1 relationship to the LOONGARCH64 architecture. + if (lea->Base() && lea->Index()) + { + GenTree* memBase = lea->Base(); + GenTree* index = lea->Index(); + + DWORD scale; + + assert(isPow2(lea->gtScale)); + BitScanForward(&scale, lea->gtScale); + + assert(scale <= 4); + + if (offset != 0) + { + regNumber tmpReg = lea->GetSingleTempReg(); + + // When generating fully interruptible code we have to use the "large offset" sequence + // when calculating a EA_BYREF as we can't report a byref that points outside of the object + // + bool useLargeOffsetSeq = compiler->GetInterruptible() && (size == EA_BYREF); + + if (!useLargeOffsetSeq && ((-2048 <= offset) && (offset <= 2047))) + { + // Generate code to set tmpReg = base + index*scale + genScaledAdd(size, tmpReg, memBase->GetRegNum(), index->GetRegNum(), scale); + + // Then compute target reg from [tmpReg + offset] + emit->emitIns_R_R_I(INS_addi_d, size, lea->GetRegNum(), tmpReg, offset); + } + else // large offset sequence + { + noway_assert(tmpReg != index->GetRegNum()); + noway_assert(tmpReg != memBase->GetRegNum()); + + // First load/store tmpReg with the offset constant + // rTmp = imm + GetEmitter()->emitIns_I_la(EA_PTRSIZE, tmpReg, offset); + + // Then add the scaled index register + // rTmp = rTmp + index*scale + genScaledAdd(EA_PTRSIZE, tmpReg, tmpReg, index->GetRegNum(), scale); + + // Then compute target reg from [base + tmpReg ] + // rDst = base + rTmp + emit->emitIns_R_R_R(INS_add_d, size, lea->GetRegNum(), memBase->GetRegNum(), tmpReg); + } + } + else + { + // Then compute target reg from [base + index*scale] + genScaledAdd(size, lea->GetRegNum(), memBase->GetRegNum(), index->GetRegNum(), scale); + } + } + else if (lea->Base()) + { + GenTree* memBase = lea->Base(); + + if ((-2048 <= offset) && (offset <= 2047)) + { + if (offset != 0) + { + // Then compute target reg from [memBase + offset] + emit->emitIns_R_R_I(INS_addi_d, size, lea->GetRegNum(), memBase->GetRegNum(), offset); + } + else // offset is zero + { + if (lea->GetRegNum() != memBase->GetRegNum()) + { + emit->emitIns_R_R_I(INS_ori, size, lea->GetRegNum(), memBase->GetRegNum(), 0); + } + } + } + else + { + // We require a tmpReg to hold the offset + regNumber tmpReg = lea->GetSingleTempReg(); + + // First load tmpReg with the large offset constant + GetEmitter()->emitIns_I_la(EA_PTRSIZE, tmpReg, offset); + + // Then compute target reg from [memBase + tmpReg] + emit->emitIns_R_R_R(INS_add_d, size, lea->GetRegNum(), memBase->GetRegNum(), tmpReg); + } + } + else if (lea->Index()) + { + // If we encounter a GT_LEA node without a base it means it came out + // when attempting to optimize an arbitrary arithmetic expression during lower. + // This is currently disabled in LOONGARCH64 since we need to adjust lower to account + // for the simpler instructions LOONGARCH64 supports. + // TODO-LOONGARCH64-CQ: Fix this and let LEA optimize arithmetic trees too. + assert(!"We shouldn't see a baseless address computation during CodeGen for LOONGARCH64"); + } + + genProduceReg(lea); +} + +//------------------------------------------------------------------------ +// genAllocLclFrame: Probe the stack and allocate the local stack frame: subtract from SP. +// +// Notes: +// On LOONGARCH64, this only does the probing; allocating the frame is done when callee-saved registers are saved. +// This is done before anything has been pushed. The previous frame might have a large outgoing argument +// space that has been allocated, but the lowest addresses have not been touched. Our frame setup might +// not touch up to the first 504 bytes. This means we could miss a guard page. On Windows, however, +// there are always three guard pages, so we will not miss them all. On Linux, there is only one guard +// page by default, so we need to be more careful. We do an extra probe if we might not have probed +// recently enough. That is, if a call and prolog establishment might lead to missing a page. We do this +// on Windows as well just to be consistent, even though it should not be necessary. +// +void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn) +{ + assert(compiler->compGeneratingProlog); + + if (frameSize == 0) + { + return; + } + + const target_size_t pageSize = compiler->eeGetPageSize(); + + // What offset from the final SP was the last probe? If we haven't probed almost a complete page, and + // if the next action on the stack might subtract from SP first, before touching the current SP, then + // we do one more probe at the very bottom. This can happen if we call a function on arm64 that does + // a "STP fp, lr, [sp-504]!", that is, pre-decrement SP then store. Note that we probe here for arm64, + // but we don't alter SP. + target_size_t lastTouchDelta = 0; + + assert(!compiler->info.compPublishStubParam || (REG_SECRET_STUB_PARAM != initReg)); + + if (frameSize < pageSize) + { + lastTouchDelta = frameSize; + } + else if (frameSize < 3 * pageSize) + { + // We don't need a register for the target of the dummy load + // ld_w $0,offset(base) will ignor the addr-exception. + regNumber rTemp = REG_R0; + lastTouchDelta = frameSize; + + for (target_size_t probeOffset = pageSize; probeOffset <= frameSize; probeOffset += pageSize) + { + // Generate: + // lw rTemp, -probeOffset(SP) // load into initReg + GetEmitter()->emitIns_I_la(EA_PTRSIZE, initReg, -(ssize_t)probeOffset); + GetEmitter()->emitIns_R_R_R(INS_ldx_w, EA_4BYTE, rTemp, REG_SPBASE, initReg); + regSet.verifyRegUsed(initReg); + *pInitRegZeroed = false; // The initReg does not contain zero + + lastTouchDelta -= pageSize; + } + + assert(lastTouchDelta == frameSize % pageSize); + compiler->unwindPadding(); + } + else + { + assert(frameSize >= 3 * pageSize); + + // Emit the following sequence to 'tickle' the pages. Note it is important that stack pointer not change + // until this is complete since the tickles could cause a stack overflow, and we need to be able to crawl + // the stack afterward (which means the stack pointer needs to be known). + // + // LOONGARCH64 needs 2 registers. See VERY_LARGE_FRAME_SIZE_REG_MASK for how these + // are reserved. + + regMaskTP availMask = RBM_ALLINT & (regSet.rsGetModifiedRegsMask() | ~RBM_INT_CALLEE_SAVED); + availMask &= ~maskArgRegsLiveIn; // Remove all of the incoming argument registers as they are currently live + availMask &= ~genRegMask(initReg); // Remove the pre-calculated initReg + + regNumber rOffset = initReg; + regNumber rLimit; + regMaskTP tempMask; + + // We don't need a register for the target of the dummy load + // ld_w $0,offset(base) will ignor the addr-exception. + regNumber rTemp = REG_R0; + + // We pick the next lowest register number for rLimit + noway_assert(availMask != RBM_NONE); + tempMask = genFindLowestBit(availMask); + rLimit = genRegNumFromMask(tempMask); + availMask &= ~tempMask; + + // Generate: + // + // instGen_Set_Reg_To_Imm(EA_PTRSIZE, rOffset, -(ssize_t)pageSize); + // instGen_Set_Reg_To_Imm(EA_PTRSIZE, rLimit, -(ssize_t)frameSize); + // INS_lu12i_w, REG_R21, pageSize >> 12 + // + // loop: + // ldx_w rTemp, sp, rOffset, + // sub_d rOffset, rOffset, REG_R21 + // bge rOffset, rLimit, loop // If rLimit is less or equal rOffset, we need to probe this rOffset. + + noway_assert((ssize_t)(int)frameSize == (ssize_t)frameSize); // make sure framesize safely fits within an int + + GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, rOffset, -(ssize_t)pageSize >> 12); + regSet.verifyRegUsed(rOffset); + GetEmitter()->emitIns_I_la(EA_PTRSIZE, rLimit, -(ssize_t)frameSize); + regSet.verifyRegUsed(rLimit); + + assert(!(pageSize & 0xfff)); + GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, pageSize >> 12); + + // There's a "virtual" label here. But we can't create a label in the prolog, so we use the magic + // `emitIns_J` with a negative `instrCount` to branch back a specific number of instructions. + + GetEmitter()->emitIns_R_R_R(INS_ldx_w, EA_4BYTE, rTemp, REG_SPBASE, rOffset); + GetEmitter()->emitIns_R_R_R(INS_sub_d, EA_PTRSIZE, rOffset, rOffset, REG_R21); + + assert(REG_R21 != rLimit); + assert(REG_R21 != rOffset); + ssize_t imm = -2 << 2; + GetEmitter()->emitIns_R_R_I(INS_bge, EA_PTRSIZE, rOffset, rLimit, imm); + + *pInitRegZeroed = false; // The initReg does not contain zero + + compiler->unwindPadding(); + + lastTouchDelta = frameSize % pageSize; + } + + if (lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES > pageSize) + { + + assert(lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES < 2 * pageSize); + GetEmitter()->emitIns_I_la(EA_PTRSIZE, initReg, -(ssize_t)frameSize); + GetEmitter()->emitIns_R_R_R(INS_ldx_w, EA_4BYTE, REG_R0, REG_SPBASE, initReg); + compiler->unwindPadding(); + + regSet.verifyRegUsed(initReg); + *pInitRegZeroed = false; // The initReg does not contain zero + } +} + +inline void CodeGen::genJumpToThrowHlpBlk_la(SpecialCodeKind codeKind, instruction ins, regNumber reg1, BasicBlock* failBlk, regNumber reg2) +{ + assert(INS_beq <= ins && ins <= INS_bgeu); + + bool useThrowHlpBlk = compiler->fgUseThrowHelperBlocks(); + + emitter* emit = GetEmitter(); + if (useThrowHlpBlk) + { + // For code with throw helper blocks, find and use the helper block for + // raising the exception. The block may be shared by other trees too. + + BasicBlock* excpRaisingBlock; + + if (failBlk != nullptr) + { + // We already know which block to jump to. Use that. + excpRaisingBlock = failBlk; + +#ifdef DEBUG + Compiler::AddCodeDsc* add = + compiler->fgFindExcptnTarget(codeKind, compiler->bbThrowIndex(compiler->compCurBB)); + assert(excpRaisingBlock == add->acdDstBlk); +#if !FEATURE_FIXED_OUT_ARGS + assert(add->acdStkLvlInit || isFramePointerUsed()); +#endif // !FEATURE_FIXED_OUT_ARGS +#endif // DEBUG + } + else + { + // Find the helper-block which raises the exception. + Compiler::AddCodeDsc* add = + compiler->fgFindExcptnTarget(codeKind, compiler->bbThrowIndex(compiler->compCurBB)); + PREFIX_ASSUME_MSG((add != nullptr), ("ERROR: failed to find exception throw block")); + excpRaisingBlock = add->acdDstBlk; +#if !FEATURE_FIXED_OUT_ARGS + assert(add->acdStkLvlInit || isFramePointerUsed()); +#endif // !FEATURE_FIXED_OUT_ARGS + } + + noway_assert(excpRaisingBlock != nullptr); + + // Jump to the exception-throwing block on error. + emit->emitIns_J(ins, excpRaisingBlock, (int)reg1 | ((int)reg2 << 5));//5-bits; + } + else + { + // The code to throw the exception will be generated inline, and + // we will jump around it in the normal non-exception case. + + void* pAddr = nullptr; + void* addr = compiler->compGetHelperFtn((CorInfoHelpFunc)(compiler->acdHelper(codeKind)), &pAddr); + emitter::EmitCallType callType; + regNumber callTarget; + + // maybe optimize + // ins = (instruction)(ins^((ins != INS_beq)+(ins != INS_bne))); + if(ins == INS_blt) + ins = INS_bge; + else if(ins == INS_bltu) + ins = INS_bgeu; + else if(ins == INS_bge) + ins = INS_blt; + else if(ins == INS_bgeu) + ins = INS_bltu; + else + ins = ins == INS_beq ? INS_bne : INS_beq; + if (addr == nullptr) + { + callType = emitter::EC_INDIR_R; + callTarget = REG_DEFAULT_HELPER_CALL_TARGET; + + //ssize_t imm = (4 + 1 + 1) << 2;// 4=li, 1=ld, 1=jirl.//TODO: maybe optimize. + + //instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr); + //emit->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, callTarget, callTarget, 0); + if (compiler->opts.compReloc) + { + ssize_t imm = (2 + 1) << 2;// , 1=jirl. + emit->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, reg2, imm); + GetEmitter()->emitIns_R_AI(INS_bl, EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr); + } + else + { + ssize_t imm = (3 + 1) << 2;// , 1=jirl. + emit->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, reg2, imm); + + //GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, callTarget, (ssize_t)pAddr); + //GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, ); + GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, callTarget, ((ssize_t)pAddr & 0xfffff000)>>12); + GetEmitter()->emitIns_R_I(INS_lu32i_d, EA_PTRSIZE, callTarget, (ssize_t)pAddr >> 32); + GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, ((ssize_t)pAddr & 0xfff)>>2); + } + } + else + {//INS_OPTS_C + callType = emitter::EC_FUNC_TOKEN; + callTarget = REG_NA; + + ssize_t imm = 5 << 2; + if (compiler->opts.compReloc) + imm = 3 << 2; + + emit->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, reg2, imm); + } + + emit->emitIns_Call(callType, compiler->eeFindHelper(compiler->acdHelper(codeKind)), INDEBUG_LDISASM_COMMA(nullptr) addr, 0, + EA_UNKNOWN, EA_UNKNOWN, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, + gcInfo.gcRegByrefSetCur, DebugInfo(), /* IL offset */ + callTarget, /* ireg */ + REG_NA, 0, 0, /* xreg, xmul, disp */ + false /* isJump */ + ); + + regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)(compiler->acdHelper(codeKind))); + regSet.verifyRegistersUsed(killMask); + } +} + +//----------------------------------------------------------------------------------- +// instGen_MemoryBarrier: Emit a MemoryBarrier instruction +// +// Arguments: +// barrierKind - kind of barrier to emit (Only supports the Full now!! This depends on the CPU). +// +// Notes: +// All MemoryBarriers instructions can be removed by DOTNET_JitNoMemoryBarriers=1 +// +void CodeGen::instGen_MemoryBarrier(BarrierKind barrierKind) +{ +#ifdef DEBUG + if (JitConfig.JitNoMemoryBarriers() == 1) + { + return; + } +#endif // DEBUG + + // TODO: Use the exact barrier type depending on the CPU. + GetEmitter()->emitIns_I(INS_dbar, EA_4BYTE, INS_BARRIER_FULL); +} + +//----------------------------------------------------------------------------------- +// genProfilingLeaveCallback: Generate the profiling function leave or tailcall callback. +// Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node. +// +// Arguments: +// helper - which helper to call. Either CORINFO_HELP_PROF_FCN_LEAVE or CORINFO_HELP_PROF_FCN_TAILCALL +// +// Return Value: +// None +// +void CodeGen::genProfilingLeaveCallback(unsigned helper /*= CORINFO_HELP_PROF_FCN_LEAVE*/) +{ + assert((helper == CORINFO_HELP_PROF_FCN_LEAVE) || (helper == CORINFO_HELP_PROF_FCN_TAILCALL)); + + // Only hook if profiler says it's okay. + if (!compiler->compIsProfilerHookNeeded()) + { + return; + } + + compiler->info.compProfilerCallback = true; + + // Need to save on to the stack level, since the helper call will pop the argument + unsigned saveStackLvl2 = genStackLevel; + + /* Restore the stack level */ + SetStackLevel(saveStackLvl2); +} + +/*----------------------------------------------------------------------------- + * + * Push any callee-saved registers we have used + */ + +void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed) +{ + assert(compiler->compGeneratingProlog); + + regMaskTP rsPushRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED; + +#if ETW_EBP_FRAMED + if (!isFramePointerUsed() && regSet.rsRegsModified(RBM_FPBASE)) + { + noway_assert(!"Used register RBM_FPBASE as a scratch register!"); + } +#endif + + // On LA we push the FP (frame-pointer) here along with all other callee saved registers + if (isFramePointerUsed()) + rsPushRegs |= RBM_FPBASE; + + // + // It may be possible to skip pushing/popping ra for leaf methods. However, such optimization would require + // changes in GC suspension architecture. + // + // We would need to guarantee that a tight loop calling a virtual leaf method can be suspended for GC. Today, we + // generate partially interruptible code for both the method that contains the tight loop with the call and the leaf + // method. GC suspension depends on return address hijacking in this case. Return address hijacking depends + // on the return address to be saved on the stack. If we skipped pushing/popping ra, the return address would never + // be saved on the stack and the GC suspension would time out. + // + // So if we wanted to skip pushing/popping ra for leaf frames, we would also need to do one of + // the following to make GC suspension work in the above scenario: + // - Make return address hijacking work even when ra is not saved on the stack. + // - Generate fully interruptible code for loops that contains calls + // - Generate fully interruptible code for leaf methods + // + // Given the limited benefit from this optimization (<10k for mscorlib NGen image), the extra complexity + // is not worth it. + // + + rsPushRegs |= RBM_RA; // We must save the return address (in the RA register). + regSet.rsMaskCalleeSaved = rsPushRegs; + regMaskTP maskSaveRegsFloat = rsPushRegs & RBM_ALLFLOAT; + regMaskTP maskSaveRegsInt = rsPushRegs & ~maskSaveRegsFloat; + +#ifdef DEBUG + if (compiler->compCalleeRegsPushed != genCountBits(rsPushRegs)) + { + printf("Error: unexpected number of callee-saved registers to push. Expected: %d. Got: %d ", + compiler->compCalleeRegsPushed, genCountBits(rsPushRegs)); + dspRegMask(rsPushRegs); + printf("\n"); + assert(compiler->compCalleeRegsPushed == genCountBits(rsPushRegs)); + } +#endif // DEBUG + + // See the document "LOONGARCH64 JIT Frame Layout" and/or "LOONGARCH64 Exception Data" for more details or requirements and + // options. Case numbers in comments here refer to this document. See also Compiler::lvaAssignFrameOffsets() + // for pictures of the general frame layouts, and CodeGen::genFuncletProlog() implementations (per architecture) + // for pictures of the funclet frame layouts. + // + // For most frames, generate, e.g.: + // sdc1 f31, off+7*8(sp) + // ... + // sdc1 f24, off(sp) + // + // sd s7, off2+7*8(sp) + // ... + // sd s1, off2+8(sp) + // sd s0, off2(sp) + // + // sd fp, 0(sp) + // sd ra, 8(sp) + // + // Notes: + // 1. FP is always saved, and the first store is FP, RA. + // 2. General-purpose registers are 8 bytes, floating-point registers are 8 bytes, but SIMD/FP registers 16 bytes. + // TODO: supporting SIMD feature ! + // 3. For frames with varargs, not implemented completely and not tested ! + // 4. We allocate the frame here; no further changes to SP are allowed (except in the body, for localloc). + // + // For functions with GS and localloc, we change the frame so the frame pointer and RA are saved at the top + // of the frame, just under the varargs registers (if any). Note that the funclet frames must follow the same + // rule, and both main frame and funclet frames (if any) must put PSPSym in the same offset from Caller-SP. + // Since this frame type is relatively rare, we force using it via stress modes, for additional coverage. + // + // The frames look like the following (simplified to only include components that matter for establishing the + // frames). See also Compiler::lvaAssignFrameOffsets(). + // + // + // Frames with FP, RA saved at bottom of frame (above outgoing argument space): + // + // | | + // |-----------------------| + // | incoming arguments | + // +=======================+ <---- Caller's SP + // | Arguments Or | // if needed. + // | Varargs regs space | // Only for varargs functions; 64 bytes (TODO: not implement completely) + // |-----------------------| + // |Callee saved registers | // not including FP/RA; multiple of 8 bytes + // |-----------------------| + // | PSP slot | // 8 bytes (omitted in CoreRT ABI) + // |-----------------------| + // | locals, temps, etc. | + // |-----------------------| + // | possible GS cookie | + // |-----------------------| + // | Saved RA | // 8 bytes + // |-----------------------| + // | Saved FP | // 8 bytes + // |-----------------------| + // | Outgoing arg space | // multiple of 8 bytes; if required (i.e., #outsz != 0) + // |-----------------------| <---- Ambient SP + // | | | + // ~ | Stack grows ~ + // | | downward | + // V + // + // + // Frames with FP, RA saved at top of frame (note: above all callee-saved regs): + // + // | | + // |-----------------------| + // | incoming arguments | + // +=======================+ <---- Caller's SP + // | Arguments Or | // if needed. + // | Varargs regs space | // Only for varargs functions; 64 bytes (TODO: not implement completely) + // |-----------------------| + // | Saved RA | // 8 bytes + // |-----------------------| + // | Saved FP | // 8 bytes + // |-----------------------| + // |Callee saved registers | // not including FP/RA; multiple of 8 bytes + // |-----------------------| + // | PSP slot | // 8 bytes (omitted in CoreRT ABI) + // |-----------------------| + // | locals, temps, etc. | + // |-----------------------| + // | possible GS cookie | + // |-----------------------| + // | Outgoing arg space | // multiple of 8 bytes; if required (i.e., #outsz != 0) + // |-----------------------| <---- Ambient SP + // | | | + // ~ | Stack grows ~ + // | | downward | + // V + // + + int totalFrameSize = genTotalFrameSize(); + + int offset; // This will be the starting place for saving the callee-saved registers, in increasing order. + +#ifdef DEBUG + if (verbose) + { + printf("Save float regs: "); + dspRegMask(maskSaveRegsFloat); + printf("\n"); + printf("Save int regs: "); + dspRegMask(maskSaveRegsInt); + printf("\n"); + } +#endif // DEBUG + + // The frameType number is arbitrary, is defined below, and corresponds to one of the frame styles we + // generate based on various sizes. + int frameType = 0; + + // The amount to subtract from SP before starting to store the callee-saved registers. It might be folded into the + // first save instruction as a "predecrement" amount, if possible. + int calleeSaveSPDelta = 0; + + // By default, we'll establish the frame pointer chain. (Note that currently frames without FP are NYI.) + bool establishFramePointer = true; + + // If we do establish the frame pointer, what is the amount we add to SP to do so? + unsigned offsetSpToSavedFp = 0; + + if (isFramePointerUsed()) + { + // We need to save both FP and RA. + + assert((maskSaveRegsInt & RBM_FP) != 0); + assert((maskSaveRegsInt & RBM_RA) != 0); + + // If we need to generate a GS cookie, we need to make sure the saved frame pointer and return address + // (FP and RA) are protected from buffer overrun by the GS cookie. If FP/RA are at the lowest addresses, + // then they are safe, since they are lower than any unsafe buffers. And the GS cookie we add will + // protect our caller's frame. If we have a localloc, however, that is dynamically placed lower than our + // saved FP/RA. In that case, we save FP/RA along with the rest of the callee-saved registers, above + // the GS cookie. + // + // After the frame is allocated, the frame pointer is established, pointing at the saved frame pointer to + // create a frame pointer chain. + // + + if (totalFrameSize < 2048) + { + GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, -totalFrameSize); + compiler->unwindAllocStack(totalFrameSize); + + if (!IsSaveFpRaWithAllCalleeSavedRegisters()) + { + // Case #1. + // + // Generate: + // daddiu sp, sp, -framesz + // sd fp, outsz(sp) + // sd ra, outsz+8(sp) + // + // The (totalFrameSize <= 2047) condition ensures the offsets of sd/ld. + // + // After saving callee-saved registers, we establish the frame pointer with: + // daddiu fp, sp, offset-fp + // We do this *after* saving callee-saved registers, so the prolog/epilog unwind codes mostly match. + + JITDUMP("Frame type 1. #outsz=%d; #framesz=%d; LclFrameSize=%d\n", unsigned(compiler->lvaOutgoingArgSpaceSize), + totalFrameSize, compiler->compLclFrameSize); + + frameType = 1; + + offsetSpToSavedFp = compiler->lvaOutgoingArgSpaceSize; + +//#ifdef OPTIMIZE_LOONGSON_EXT +// if (!(offsetSpToSavedFp & 0xf) && (offsetSpToSavedFp <= 0xff0)) +// { +// GetEmitter()->emitIns_R_R_R_I(INS_gssq, EA_PTRSIZE, REG_RA, REG_FP, REG_SPBASE, offsetSpToSavedFp >> 4); +// compiler->unwindSaveRegPair(REG_FP, REG_RA, offsetSpToSavedFp); +// } +// else +//#endif +// { + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offsetSpToSavedFp); + compiler->unwindSaveReg(REG_FP, offsetSpToSavedFp); + + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offsetSpToSavedFp + 8); + compiler->unwindSaveReg(REG_RA, offsetSpToSavedFp + 8); +// } + + maskSaveRegsInt &= ~(RBM_FP | RBM_RA); // We've already saved FP/RA + + offset = compiler->compLclFrameSize + 2*REGSIZE_BYTES;//FP/RA + } + else + { + frameType = 2; + + offsetSpToSavedFp = genSPtoFPdelta(); + + JITDUMP("Frame type 2(Top). #outsz=%d; #framesz=%d; LclFrameSize=%d, fpDelta:%d\n", unsigned(compiler->lvaOutgoingArgSpaceSize), + totalFrameSize, compiler->compLclFrameSize, offsetSpToSavedFp); + + offset = compiler->compLclFrameSize; + } + } + else + { + if (!IsSaveFpRaWithAllCalleeSavedRegisters()) + { + JITDUMP("Frame type 3. #outsz=%d; #framesz=%d; LclFrameSize=%d\n", unsigned(compiler->lvaOutgoingArgSpaceSize), + totalFrameSize, compiler->compLclFrameSize); + + frameType = 3; + + maskSaveRegsInt &= ~(RBM_FP | RBM_RA); // We've already saved FP/RA + + offset = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES; + calleeSaveSPDelta = AlignUp((UINT)offset, STACK_ALIGN); + offset = calleeSaveSPDelta - offset; + } + else + { + frameType = 4; + + JITDUMP("Frame type 4(Top). #outsz=%d; #framesz=%d; LclFrameSize=%d, SPDelta-1:%d\n", unsigned(compiler->lvaOutgoingArgSpaceSize), + totalFrameSize, compiler->compLclFrameSize, calleeSaveSPDelta); + + offset = totalFrameSize - compiler->compLclFrameSize; + calleeSaveSPDelta = AlignUp((UINT)offset, STACK_ALIGN); + offset = calleeSaveSPDelta - offset; + offsetSpToSavedFp = offset + REGSIZE_BYTES; + } + } + } + else + { + // No frame pointer (no chaining). + assert((maskSaveRegsInt & RBM_FP) == 0); + assert((maskSaveRegsInt & RBM_RA) != 0); + + // Note that there is no pre-indexed save_lrpair unwind code variant, so we can't allocate the frame using + // 'sd' if we only have one callee-saved register plus RA to save. + + NYI("Frame without frame pointer"); + offset = 0; + } + + assert(frameType != 0); + + JITDUMP(" offset=%d, calleeSaveSPDelta=%d\n", offset, calleeSaveSPDelta); + genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, offset, -calleeSaveSPDelta); + + // For varargs, home the incoming arg registers last. Note that there is nothing to unwind here, + // so we just report "NOP" unwind codes. If there's no more frame setup after this, we don't + // need to add codes at all. + + //if (compiler->info.compIsVarArgs) + //{ + // JITDUMP(" compIsVarArgs=true\n"); + + // // There are 8 general-purpose registers to home, thus 'offset' must be 16-byte aligned here. + // assert((offset % 16) == 0); + // for (regNumber reg1 = REG_ARG_FIRST; reg1 < REG_ARG_LAST; reg1 = REG_NEXT(REG_NEXT(reg1))) + // { + // regNumber reg2 = REG_NEXT(reg1); + // // sd REG, offset(SP) + // // sd REG + 1, (offset+8)(SP) + // GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, reg1, REG_SPBASE, offset); + // compiler->unwindNop(); + // GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, reg2, REG_SPBASE, offset + 8); + // compiler->unwindNop(); + // offset += 2 * REGSIZE_BYTES; + // } + //} + +#ifdef DEBUG + if (compiler->opts.disAsm) + printf("DEBUG: LOONGARCH64, frameType:%d\n\n", frameType); +#endif + if (frameType == 1) + { + //offsetSpToSavedFp = genSPtoFPdelta(); + } + else if (frameType == 2) + { + //offsetSpToSavedFp = genSPtoFPdelta(); + } + else if (frameType == 3) + { + if (compiler->lvaOutgoingArgSpaceSize >= 2040) + { + offset = totalFrameSize - calleeSaveSPDelta - compiler->lvaOutgoingArgSpaceSize; + calleeSaveSPDelta = AlignUp((UINT)offset, STACK_ALIGN); + offset = calleeSaveSPDelta - offset; + + genStackPointerAdjustment(-calleeSaveSPDelta, initReg, pInitRegZeroed, /* reportUnwindData */ true); + + offsetSpToSavedFp = offset; + +//#ifdef OPTIMIZE_LOONGSON_EXT +// if (!(offset & 0xf) && (offset <= 0xff0)) +// { +// GetEmitter()->emitIns_R_R_R_I(INS_gssq, EA_PTRSIZE, REG_RA, REG_FP, REG_SPBASE, offset >> 4); +// compiler->unwindSaveRegPair(REG_FP, REG_RA, offset); +// } +// else +//#endif +// { + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset); + compiler->unwindSaveReg(REG_FP, offset); + + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offset + 8); + compiler->unwindSaveReg(REG_RA, offset + 8); +// } + + genEstablishFramePointer(offset, /* reportUnwindData */ true); + + calleeSaveSPDelta = compiler->lvaOutgoingArgSpaceSize & ~0xf; + genStackPointerAdjustment(-calleeSaveSPDelta, initReg, pInitRegZeroed, /* reportUnwindData */ true); + } + else + { + calleeSaveSPDelta = totalFrameSize - calleeSaveSPDelta; + genStackPointerAdjustment(-calleeSaveSPDelta, initReg, pInitRegZeroed, /* reportUnwindData */ true); + + offset = compiler->lvaOutgoingArgSpaceSize; + +//#ifdef OPTIMIZE_LOONGSON_EXT +// if (!(offset & 0xf) && (offset <= 0xff0)) +// { +// GetEmitter()->emitIns_R_R_R_I(INS_gssq, EA_PTRSIZE, REG_RA, REG_FP, REG_SPBASE, offset >> 4); +// compiler->unwindSaveRegPair(REG_FP, REG_RA, offset); +// } +// else +//#endif + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset); + compiler->unwindSaveReg(REG_FP, offset); + + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offset + 8); + compiler->unwindSaveReg(REG_RA, offset + 8); + + genEstablishFramePointer(offset, /* reportUnwindData */ true); + } + + establishFramePointer = false; + } + else if (frameType == 4) + { + genEstablishFramePointer(offsetSpToSavedFp, /* reportUnwindData */ true); + + establishFramePointer = false; + + int remainingFrameSz = totalFrameSize - calleeSaveSPDelta; + + if (remainingFrameSz > 0) + { + genStackPointerAdjustment(-remainingFrameSz, initReg, pInitRegZeroed, /* reportUnwindData */ true); + } + } + else + { + unreached(); + } + + if (establishFramePointer) + { + JITDUMP(" offsetSpToSavedFp=%d\n", offsetSpToSavedFp); + genEstablishFramePointer(offsetSpToSavedFp, /* reportUnwindData */ true); + } +} + +//----------------------------------------------------------------------------------- +// genProfilingEnterCallback: Generate the profiling function enter callback. +// +// Arguments: +// initReg - register to use as scratch register +// pInitRegZeroed - OUT parameter. *pInitRegZeroed set to 'false' if 'initReg' is +// set to non-zero value after this call. +// +// Return Value: +// None +// +void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed) +{ + assert(compiler->compGeneratingProlog); + + // Give profiler a chance to back out of hooking this method + if (!compiler->compIsProfilerHookNeeded()) + { + return; + } +} +#endif // TARGET_LOONGARCH64 diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 3e9059eb156af..3fe0f14f9b750 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -538,12 +538,12 @@ var_types Compiler::getPrimitiveTypeForStruct(unsigned structSize, CORINFO_CLASS useType = TYP_SHORT; break; -#if !defined(TARGET_XARCH) || defined(UNIX_AMD64_ABI) +#if !defined(TARGET_XARCH) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) case 3: useType = TYP_INT; break; -#endif // !TARGET_XARCH || UNIX_AMD64_ABI +#endif // !TARGET_XARCH || UNIX_AMD64_ABI || TARGET_LOONGARCH64 #ifdef TARGET_64BIT case 4: @@ -551,14 +551,14 @@ var_types Compiler::getPrimitiveTypeForStruct(unsigned structSize, CORINFO_CLASS useType = TYP_INT; break; -#if !defined(TARGET_XARCH) || defined(UNIX_AMD64_ABI) +#if !defined(TARGET_XARCH) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) case 5: case 6: case 7: useType = TYP_I_IMPL; break; -#endif // !TARGET_XARCH || UNIX_AMD64_ABI +#endif // !TARGET_XARCH || UNIX_AMD64_ABI || TARGET_LOONGARCH64 #endif // TARGET_64BIT case TARGET_POINTER_SIZE: @@ -757,6 +757,27 @@ var_types Compiler::getArgTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, howToPassStruct = SPK_ByValue; useType = TYP_STRUCT; +#elif defined(TARGET_LOONGARCH64) + // Structs that are pointer sized or smaller. + //assert(structSize > TARGET_POINTER_SIZE); + + // On LOONGARCH64 structs that are 1-16 bytes are passed by value in one/multiple register(s) + if (structSize <= (TARGET_POINTER_SIZE * 2)) + { + // setup wbPassType and useType indicate that this is passed by value in multiple registers + // (when all of the parameters registers are used, then the stack will be used) + howToPassStruct = SPK_ByValue; + useType = TYP_STRUCT; + } + else // a structSize that is 17-32 bytes in size + { + // Otherwise we pass this struct by reference to a copy + // setup wbPassType and useType indicate that this is passed using one register + // (by reference to a copy) + howToPassStruct = SPK_ByReference; + useType = TYP_UNKNOWN; + } + #else // TARGET_XXX noway_assert(!"Unhandled TARGET in getArgTypeForStruct (with FEATURE_MULTIREG_ARGS=1)"); @@ -777,7 +798,7 @@ var_types Compiler::getArgTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, howToPassStruct = SPK_ByValue; useType = TYP_STRUCT; -#elif defined(TARGET_AMD64) || defined(TARGET_ARM64) +#elif defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) // Otherwise we pass this struct by reference to a copy // setup wbPassType and useType indicate that this is passed using one register (by reference to a copy) @@ -911,6 +932,24 @@ var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, useType = TYP_UNKNOWN; } +#ifdef TARGET_LOONGARCH64 + if (structSize <= (TARGET_POINTER_SIZE * 2)) + { + DWORD numFloatFields = info.compCompHnd->getFieldTypeByHnd(clsHnd); + + if (numFloatFields & 0x1) + { + howToReturnStruct = SPK_PrimitiveType; + useType = structSize > 4 ? TYP_DOUBLE : TYP_FLOAT; + } + else if (numFloatFields & 0xE) + { + howToReturnStruct = SPK_ByValue; + useType = TYP_STRUCT; + } + } +#endif //TARGET_LOONGARCH64 + // Check for cases where a small struct is returned in a register // via a primitive type. // @@ -1044,6 +1083,24 @@ var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, howToReturnStruct = SPK_ByReference; useType = TYP_UNKNOWN; +#elif defined(TARGET_LOONGARCH64) + + // On LOONGARCH64 structs that are 1-16 bytes are returned by value in one/multiple register(s) + if (structSize <= (TARGET_POINTER_SIZE * 2)) + { + // setup wbPassType and useType indicate that this is return by value in multiple registers + howToReturnStruct = SPK_ByValue; + useType = TYP_STRUCT; + } + else // a structSize that is 17-32 bytes in size + { + // Otherwise we return this struct using a return buffer/byreference. + // setup wbPassType and useType indicate that this is returned using a return buffer register + // (reference to a return buffer) + howToReturnStruct = SPK_ByReference; + useType = TYP_UNKNOWN; + } + #else // TARGET_XXX noway_assert(!"Unhandled TARGET in getReturnTypeForStruct (with FEATURE_MULTIREG_ARGS=1)"); @@ -2222,6 +2279,8 @@ void Compiler::compSetProcessor() info.genCPU = CPU_X86_PENTIUM_4; else info.genCPU = CPU_X86; +#elif defined(TARGET_LOONGARCH64) + info.genCPU = CPU_LOONGARCH64; #endif // @@ -2404,6 +2463,10 @@ void Compiler::compSetProcessor() } #endif +#if defined(TARGET_LOONGARCH64) + //TODO: should add LOONGARCH64's features for LOONGARCH64. +#endif + instructionSetFlags = EnsureInstructionSetFlagsAreValid(instructionSetFlags); opts.setSupportedISAs(instructionSetFlags); @@ -2588,6 +2651,8 @@ void Compiler::compInitOptions(JitFlags* jitFlags) // For non-adaptive, padding limit is same as specified by the alignment. opts.compJitAlignPaddingLimit = opts.compJitAlignLoopBoundary; } +#elif defined(TARGET_LOONGARCH64) + //TODO: should be adaptive on LoongArch64. #endif assert(isPow2(opts.compJitAlignLoopBoundary)); @@ -2934,6 +2999,11 @@ void Compiler::compInitOptions(JitFlags* jitFlags) opts.compJitSaveFpLrWithCalleeSavedRegisters = 0; #endif // defined(TARGET_ARM64) +#if defined(TARGET_LOONGARCH64) + // 0 is default: use the appropriate frame type based on the function. + opts.compJitSaveFpRaWithCalleeSavedRegisters = 0; +#endif // defined(TARGET_LOONGARCH64) + #ifdef DEBUG opts.dspInstrs = false; opts.dspLines = false; @@ -3432,6 +3502,13 @@ void Compiler::compInitOptions(JitFlags* jitFlags) opts.compJitSaveFpLrWithCalleeSavedRegisters = JitConfig.JitSaveFpLrWithCalleeSavedRegisters(); } #endif // defined(DEBUG) && defined(TARGET_ARM64) + +#if defined(DEBUG) && defined(TARGET_LOONGARCH64) + if ((s_pJitMethodSet == nullptr) || s_pJitMethodSet->IsActiveMethod(info.compFullName, info.compMethodHash())) + { + opts.compJitSaveFpRaWithCalleeSavedRegisters = JitConfig.JitSaveFpRaWithCalleeSavedRegisters(); + } +#endif // defined(DEBUG) && defined(TARGET_LOONGARCH64) } #ifdef DEBUG @@ -4030,7 +4107,7 @@ void Compiler::compSetOptimizationLevel() fgCanRelocateEHRegions = true; } -#ifdef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) // Function compRsvdRegCheck: // given a curState to use for calculating the total frame size // it will return true if the REG_OPT_RSVD should be reserved so @@ -4075,6 +4152,10 @@ bool Compiler::compRsvdRegCheck(FrameLayoutState curState) JITDUMP(" Returning true (ARM64)\n\n"); return true; // just always assume we'll need it, for now +#elif defined(TARGET_LOONGARCH64) + JITDUMP(" Returning true (LOONGARCH64)\n\n"); + return true; // just always assume we'll need it, for now + #else // TARGET_ARM // frame layout: @@ -4198,7 +4279,7 @@ bool Compiler::compRsvdRegCheck(FrameLayoutState curState) return false; #endif // TARGET_ARM } -#endif // TARGET_ARMARCH +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 //------------------------------------------------------------------------ // compGetTieringName: get a string describing tiered compilation settings diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index ea088cccdd529..9541b47114f30 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -475,7 +475,13 @@ class LclVarDsc #if defined(TARGET_AMD64) || defined(TARGET_ARM64) unsigned char lvIsImplicitByRef : 1; // Set if the argument is an implicit byref. -#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) +#elif defined(TARGET_LOONGARCH64) + unsigned char lvIsImplicitByRef : 1; // Set if the argument is an implicit byref. + unsigned char lvIs4Field1 : 1; // Set if the 1st field is int or float within struct for LA-ABI64. + unsigned char lvIs4Field2 : 1; // Set if the 2nd field is int or float within struct for LA-ABI64. + unsigned char lvIsSplit : 1; // Set if the argument is splited. also used the lvFldOffset. +#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH) + #if OPT_BOOL_OPS unsigned char lvIsBoolean : 1; // set if variable is boolean @@ -674,6 +680,9 @@ class LclVarDsc { assert(lvIsHfa()); assert(varTypeIsStruct(lvType)); +#if defined(TARGET_LOONGARCH64) + assert(!"lvHfaSlots called not support on LOONGARCH64!"); +#endif unsigned slots = 0; #ifdef TARGET_ARM slots = lvExactSize / sizeof(float); @@ -990,7 +999,7 @@ class LclVarDsc } #endif assert(m_layout != nullptr); -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) assert(varTypeIsStruct(TypeGet()) || (lvIsImplicitByRef && (TypeGet() == TYP_BYREF))); #else assert(varTypeIsStruct(TypeGet())); @@ -1614,7 +1623,7 @@ struct FuncInfoDsc emitLocation* coldStartLoc; // locations for the cold section, if there is one. emitLocation* coldEndLoc; -#elif defined(TARGET_ARMARCH) +#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) UnwindInfo uwi; // Unwind information for this function/funclet's hot section UnwindInfo* uwiCold; // Unwind information for this function/funclet's cold section @@ -1629,7 +1638,7 @@ struct FuncInfoDsc emitLocation* coldStartLoc; // locations for the cold section, if there is one. emitLocation* coldEndLoc; -#endif // TARGET_ARMARCH +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 #if defined(FEATURE_CFI_SUPPORT) jitstd::vector* cfiCodes; @@ -2148,7 +2157,7 @@ struct fgArgTabEntry // register numbers. void SetMultiRegNums() { -#if FEATURE_MULTIREG_ARGS && !defined(UNIX_AMD64_ABI) +#if FEATURE_MULTIREG_ARGS && !defined(UNIX_AMD64_ABI) && !defined(TARGET_LOONGARCH64) if (numRegs == 1) { return; @@ -2169,7 +2178,7 @@ struct fgArgTabEntry argReg = (regNumber)(argReg + regSize); setRegNum(regIndex, argReg); } -#endif // FEATURE_MULTIREG_ARGS && !defined(UNIX_AMD64_ABI) +#endif // FEATURE_MULTIREG_ARGS && !defined(UNIX_AMD64_ABI) && !defined(TARGET_LOONGARCH64) } #ifdef DEBUG @@ -2287,6 +2296,20 @@ class fgArgInfo const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr = nullptr); #endif // UNIX_AMD64_ABI +#if defined(TARGET_LOONGARCH64) + fgArgTabEntry* AddRegArg(unsigned argNum, + GenTree* node, + GenTreeCall::Use* use, + regNumber regNum, + unsigned numRegs, + unsigned byteSize, + unsigned byteAlignment, + bool isStruct, + bool isFloatHfa, /* unused */ + bool isVararg, + const regNumber nextOtherRegNum); +#endif + fgArgTabEntry* AddStkArg(unsigned argNum, GenTree* node, GenTreeCall::Use* use, @@ -4037,7 +4060,7 @@ class Compiler // For ARM64, this is structs larger than 16 bytes that are passed by reference. bool lvaIsImplicitByRefLocal(unsigned varNum) { -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) LclVarDsc* varDsc = lvaGetDesc(varNum); if (varDsc->lvIsImplicitByRef) { @@ -4046,7 +4069,7 @@ class Compiler assert(varTypeIsStruct(varDsc) || (varDsc->lvType == TYP_BYREF)); return true; } -#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) +#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) return false; } @@ -8019,9 +8042,14 @@ class Compiler // For SIMD types longer than 8 bytes Caller is responsible for saving and restoring Upper bytes. return ((type == TYP_SIMD16) || (type == TYP_SIMD12)); } -#else // !defined(TARGET_AMD64) && !defined(TARGET_ARM64) +#elif defined(TARGET_LOONGARCH64) + static bool varTypeNeedsPartialCalleeSave(var_types type) + {//TODO: supporting SIMD feature for LoongArch64. + return false; + } +#else // !defined(TARGET_AMD64) && !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) #error("Unknown target architecture for FEATURE_SIMD") -#endif // !defined(TARGET_AMD64) && !defined(TARGET_ARM64) +#endif // !defined(TARGET_AMD64) && !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE protected: @@ -8194,6 +8222,9 @@ class Compiler #elif defined(TARGET_ARM64) reg = REG_R11; regMask = RBM_R11; +#elif defined(TARGET_LOONGARCH64) + reg = REG_T8; + regMask = RBM_T8; #else #error Unsupported or unset target architecture #endif @@ -8612,6 +8643,15 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void unwindReturn(regNumber reg); // ret lr #endif // defined(TARGET_ARM64) +#if defined(TARGET_LOONGARCH64) + void unwindNop(); + void unwindPadding(); // Generate a sequence of unwind NOP codes representing instructions between the last + // instruction and the current location. + void unwindSaveReg(regNumber reg, int offset); + void unwindSaveRegPair(regNumber reg1, regNumber reg2, int offset); + void unwindReturn(regNumber reg); +#endif // defined(TARGET_LOONGARCH64) + // // Private "helper" functions for the unwind implementation. // @@ -8697,9 +8737,13 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX CORINFO_InstructionSet minimumIsa = InstructionSet_SSE2; #elif defined(TARGET_ARM64) CORINFO_InstructionSet minimumIsa = InstructionSet_AdvSimd; +#elif defined(TARGET_LOONGARCH64) + //TODO: supporting SIMD feature for LoongArch64. + assert(!"unimplemented yet on LA"); + CORINFO_InstructionSet minimumIsa = 0; #else #error Unsupported platform -#endif // !TARGET_XARCH && !TARGET_ARM64 +#endif // !TARGET_XARCH && !TARGET_ARM64 && !TARGET_LOONGARCH64 return compOpportunisticallyDependsOn(minimumIsa) && JitConfig.EnableHWIntrinsic(); #else @@ -9824,6 +9868,13 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX int compJitSaveFpLrWithCalleeSavedRegisters; #endif // defined(TARGET_ARM64) +#if defined(TARGET_LOONGARCH64) + // Decision about whether to save FP/RA registers with callee-saved registers (see + // COMPlus_JitSaveFpRaWithCalleSavedRegisters). + // TODO: will delete this in future. + int compJitSaveFpRaWithCalleeSavedRegisters; +#endif // defined(TARGET_LOONGARCH64) + #ifdef CONFIGURABLE_ARM_ABI bool compUseSoftFP = false; #else @@ -10132,6 +10183,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #define CPU_ARM 0x0300 // The generic ARM CPU #define CPU_ARM64 0x0400 // The generic ARM64 CPU +#define CPU_LOONGARCH64 0x0800 // The generic LOONGARCH64 CPU + unsigned genCPU; // What CPU are we running on // Number of class profile probes in this method @@ -10654,7 +10707,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void compSetProcessor(); void compInitDebuggingInfo(); void compSetOptimizationLevel(); -#ifdef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) bool compRsvdRegCheck(FrameLayoutState curState); #endif void compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFlags* compileFlags); @@ -12087,6 +12140,13 @@ const instruction INS_SQRT = INS_fsqrt; #endif // TARGET_ARM64 +#ifdef TARGET_LOONGARCH64 +const instruction INS_BREAKPOINT = INS_break; +const instruction INS_MULADD = INS_fmadd_d;// NOTE: default is double. +const instruction INS_ABS = INS_fabs_d; // NOTE: default is double. +const instruction INS_SQRT = INS_fsqrt_d;// NOTE: default is double. +#endif // TARGET_LOONGARCH64 + /*****************************************************************************/ extern const BYTE genTypeSizes[]; diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index 8fad38af36f84..e78270ea4b523 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -602,7 +602,7 @@ inline bool isRegParamType(var_types type) #endif // !TARGET_X86 } -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) /*****************************************************************************/ // Returns true if 'type' is a struct that can be enregistered for call args // or can be returned by value in multiple registers. @@ -660,7 +660,7 @@ inline bool Compiler::VarTypeIsMultiByteAndCanEnreg(var_types typ return result; } -#endif // TARGET_AMD64 || TARGET_ARM64 +#endif // TARGET_AMD64 || TARGET_ARM64 || TARGET_LOONGARCH64 /*****************************************************************************/ @@ -1104,14 +1104,14 @@ inline GenTree* Compiler::gtNewFieldRef(var_types typ, CORINFO_FIELD_HANDLE fldH { unsigned lclNum = obj->AsOp()->gtOp1->AsLclVarCommon()->GetLclNum(); lvaTable[lclNum].lvFieldAccessed = 1; -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) // These structs are passed by reference; we should probably be able to treat these // as non-global refs, but downstream logic expects these to be marked this way. if (lvaTable[lclNum].lvIsParam) { tree->gtFlags |= GTF_GLOB_REF; } -#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) +#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) } else { @@ -1844,7 +1844,7 @@ inline void LclVarDsc::incRefCnts(weight_t weight, Compiler* comp, RefCountState bool doubleWeight = lvIsTemp; -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64)|| defined(TARGET_LOONGARCH64) // and, for the time being, implicit byref params doubleWeight |= lvIsImplicitByRef; #endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) @@ -3074,6 +3074,8 @@ inline unsigned genMapFloatRegNumToRegArgNum(regNumber regNum) #ifdef TARGET_ARM return regNum - REG_F0; +#elif defined(TARGET_LOONGARCH64) + return regNum - REG_F0; #elif defined(TARGET_ARM64) return regNum - REG_V0; #elif defined(UNIX_AMD64_ABI) diff --git a/src/coreclr/jit/ee_il_dll.cpp b/src/coreclr/jit/ee_il_dll.cpp index ebf1ea2945195..b53608757f384 100644 --- a/src/coreclr/jit/ee_il_dll.cpp +++ b/src/coreclr/jit/ee_il_dll.cpp @@ -453,6 +453,14 @@ unsigned Compiler::eeGetArgSize(CORINFO_ARG_LIST_HANDLE list, CORINFO_SIG_INFO* } } } +#elif defined(TARGET_LOONGARCH64) + // Any structs that are larger than MAX_PASS_MULTIREG_BYTES are always passed by reference + if (structSize > MAX_PASS_MULTIREG_BYTES) + { + // This struct is passed by reference using a single 'slot' + return TARGET_POINTER_SIZE; + } + // otherwise will we pass this struct by value in multiple registers #elif !defined(TARGET_ARM) NYI("unknown target"); #endif // defined(TARGET_XXX) diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index ba4ab8f7b6caa..86150d141b9a5 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -744,8 +744,11 @@ insGroup* emitter::emitSavIG(bool emitAdd) ig->igFlags |= IGF_BYREF_REGS; // We'll allocate extra space (DWORD aligned) to record the GC regs - +#if defined(TARGET_LOONGARCH64) + gs += sizeof(regMaskTP); +#else gs += sizeof(int); +#endif } // Allocate space for the instructions and optional liveset @@ -758,7 +761,11 @@ insGroup* emitter::emitSavIG(bool emitAdd) { // Record the byref regs in front the of the instructions +#if defined(TARGET_LOONGARCH64) + *castto(id, regMaskTP*)++ = emitInitByrefRegs; +#else *castto(id, unsigned*)++ = (unsigned)emitInitByrefRegs; +#endif } // Do we need to store the liveset? @@ -790,10 +797,18 @@ insGroup* emitter::emitSavIG(bool emitAdd) // Record how many instructions and bytes of code this group contains +#ifdef TARGET_LOONGARCH64 + noway_assert((unsigned int)emitCurIGinsCnt == emitCurIGinsCnt); +#else noway_assert((BYTE)emitCurIGinsCnt == emitCurIGinsCnt); +#endif noway_assert((unsigned short)emitCurIGsize == emitCurIGsize); +#ifdef TARGET_LOONGARCH64 + ig->igInsCnt = (unsigned int)emitCurIGinsCnt; +#else ig->igInsCnt = (BYTE)emitCurIGinsCnt; +#endif ig->igSize = (unsigned short)emitCurIGsize; emitCurCodeOffset += emitCurIGsize; assert(IsCodeAligned(emitCurCodeOffset)); @@ -1118,6 +1133,10 @@ void emitter::emitBegFN(bool hasFramePtr emitFirstColdIG = nullptr; emitTotalCodeSize = 0; +#ifdef TARGET_LOONGARCH64 + emitCounts_INS_OPTS_J = 0; +#endif + #if EMITTER_STATS emitTotalIGmcnt++; emitSizeMethod = 0; @@ -1159,6 +1178,11 @@ void emitter::emitBegFN(bool hasFramePtr ig->igNext = nullptr; +//#ifdef TARGET_LOONGARCH64 +// On future maybe use this. +// ig->igJmpCnt = 0; +//#endif + #ifdef DEBUG emitScratchSigInfo = nullptr; #endif // DEBUG @@ -1296,6 +1320,12 @@ weight_t emitter::getCurrentBlockWeight() } } +#if defined(TARGET_LOONGARCH64) +void emitter::dispIns(instrDesc* id) +{ + assert(!"Not used on LOONGARCH64."); +} +#else void emitter::dispIns(instrDesc* id) { #ifdef DEBUG @@ -1317,6 +1347,7 @@ void emitter::dispIns(instrDesc* id) emitIFcounts[id->idInsFmt()]++; #endif } +#endif void emitter::appendToCurIG(instrDesc* id) { @@ -2302,6 +2333,11 @@ void emitter::emitSetFrameRangeGCRs(int offsLo, int offsHi) #ifdef TARGET_AMD64 // doesn't have to be all negative on amd printf("-%04X ... %04X\n", -offsLo, offsHi); +#elif defined(TARGET_LOONGARCH64) + if (offsHi < 0) + printf("-%04X ... -%04X\n", -offsLo, -offsHi); + else + printf("-%04X ... %04X\n", -offsLo, offsHi); #else printf("-%04X ... -%04X\n", -offsLo, -offsHi); assert(offsHi <= 0); @@ -2633,7 +2669,7 @@ const char* emitter::emitLabelString(insGroup* ig) #endif // DEBUG -#ifdef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) // Does the argument location point to an IG at the end of a function or funclet? // We can ignore the codePos part of the location, since it doesn't affect the @@ -2980,7 +3016,9 @@ void emitter::emitGenerateUnwindNop(instrDesc* id, void* context) comp->unwindNop(id->idCodeSize()); #elif defined(TARGET_ARM64) comp->unwindNop(); -#endif // defined(TARGET_ARM64) +#elif defined(TARGET_LOONGARCH64) + comp->unwindNop(); +#endif // defined(TARGET_LOONGARCH64) } /***************************************************************************** @@ -2994,7 +3032,7 @@ void emitter::emitUnwindNopPadding(emitLocation* locFrom, Compiler* comp) emitWalkIDs(locFrom, emitGenerateUnwindNop, comp); } -#endif // TARGET_ARMARCH +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 #if defined(TARGET_ARM) @@ -3377,6 +3415,9 @@ const size_t hexEncodingSize = 19; #elif defined(TARGET_ARM) const size_t basicIndent = 12; const size_t hexEncodingSize = 11; +#elif defined(TARGET_LOONGARCH64) +const size_t basicIndent = 12; +const size_t hexEncodingSize = 19; #endif #ifdef DEBUG @@ -4093,6 +4134,19 @@ void emitter::emitJumpDistBind() int jmp_iteration = 1; +#ifdef TARGET_LOONGARCH64 + //NOTE: + // bit0 of isLinkingEnd_LA: indicating whether updating the instrDescJmp's size with the type INS_OPTS_J; + // bit1 of isLinkingEnd_LA: indicating not needed updating ths size while emitTotalCodeSize <= (0x7fff << 2) or had updated; + unsigned int isLinkingEnd_LA = emitTotalCodeSize <= (0x7fff << 2) ? 2 : 0; + + UNATIVE_OFFSET ssz = 0; // relative small jump's delay-slot. + // small jump max. neg distance + NATIVE_OFFSET nsd = B_DIST_SMALL_MAX_NEG; + // small jump max. pos distance + NATIVE_OFFSET psd = B_DIST_SMALL_MAX_POS - emitCounts_INS_OPTS_J * (3 << 2);//the max placeholder sizeof(INS_OPTS_JIRL) - sizeof(INS_OPTS_J). +#endif + /*****************************************************************************/ /* If we iterate to look for more jumps to shorten, we start again here. */ /*****************************************************************************/ @@ -4129,9 +4183,11 @@ void emitter::emitJumpDistBind() UNATIVE_OFFSET jsz; // size of the jump instruction in bytes +#ifndef TARGET_LOONGARCH64 UNATIVE_OFFSET ssz = 0; // small jump size NATIVE_OFFSET nsd = 0; // small jump max. neg distance NATIVE_OFFSET psd = 0; // small jump max. pos distance +#endif #if defined(TARGET_ARM) UNATIVE_OFFSET msz = 0; // medium jump size @@ -4250,7 +4306,14 @@ void emitter::emitJumpDistBind() /* Make sure the jumps are properly ordered */ #ifdef DEBUG +#if defined(TARGET_LOONGARCH64) +#if defined(UNALIGNED_CHECK_DISABLE) + UNALIGNED_CHECK_DISABLE; +#endif + assert(lastLJ == nullptr || lastIG != jmp->idjIG || lastLJ->idjOffs < (jmp->idjOffs + adjLJ)); +#else assert(lastLJ == nullptr || lastIG != jmp->idjIG || lastLJ->idjOffs < jmp->idjOffs); +#endif lastLJ = (lastIG == jmp->idjIG) ? jmp : nullptr; assert(lastIG == nullptr || lastIG->igNum <= jmp->idjIG->igNum || jmp->idjIG == prologIG || @@ -4284,10 +4347,19 @@ void emitter::emitJumpDistBind() if (EMITVERBOSE) { printf("Adjusted offset of " FMT_BB " from %04X to %04X\n", lstIG->igNum, lstIG->igOffs, - lstIG->igOffs - adjIG); +#if defined(TARGET_LOONGARCH64) + lstIG->igOffs + adjIG +#else + lstIG->igOffs - adjIG +#endif + ); } #endif // DEBUG +#if defined(TARGET_LOONGARCH64) + lstIG->igOffs += adjIG; +#else lstIG->igOffs -= adjIG; +#endif assert(IsCodeAligned(lstIG->igOffs)); } while (lstIG != jmpIG); } @@ -4300,7 +4372,11 @@ void emitter::emitJumpDistBind() /* Apply any local size adjustment to the jump's relative offset */ +#if defined(TARGET_LOONGARCH64) + jmp->idjOffs += adjLJ; +#else jmp->idjOffs -= adjLJ; +#endif // If this is a jump via register, the instruction size does not change, so we are done. CLANG_FORMAT_COMMENT_ANCHOR; @@ -4348,8 +4424,9 @@ void emitter::emitJumpDistBind() if (jmp->idjShort) { +#ifndef TARGET_LOONGARCH64 assert(jmp->idCodeSize() == ssz); - +#endif // We should not be jumping/branching across funclets/functions emitCheckFuncletBranch(jmp, jmpIG); @@ -4459,7 +4536,11 @@ void emitter::emitJumpDistBind() here and the target could be shortened, causing the actual distance to shrink. */ +#if defined(TARGET_LOONGARCH64) + dstOffs += adjIG; +#else dstOffs -= adjIG; +#endif /* Compute the distance estimate */ @@ -4494,11 +4575,66 @@ void emitter::emitJumpDistBind() } #endif // DEBUG_EMIT +#if defined(TARGET_LOONGARCH64) + assert(jmpDist >= 0);//Forward jump + assert(!(jmpDist & 0x3)); + + if (isLinkingEnd_LA & 0x2) + { + jmp->idAddr()->iiaSetJmpOffset(jmpDist); + } + else if ((extra > 0) && (jmp->idInsOpt() == INS_OPTS_J)) + { + instruction ins = jmp->idIns(); + assert((INS_bceqz <= ins) && (ins <= INS_bl)); + + if (ins < INS_beqz) // bceqz/bcnez/beq/bne/blt/bltu/bge/bgeu < beqz < bnez // See instrsloongarch64.h. + { + if ((jmpDist + emitCounts_INS_OPTS_J*4) < 0x8000000) + { + extra = 4; + } + else + { + assert((jmpDist + emitCounts_INS_OPTS_J*4) < 0x8000000);//TODO:later will be deleted!!! + extra = 8; + } + } + else if (ins < INS_b)// beqz/bnez < b < bl // See instrsloongarch64.h. + { + if (jmpDist + emitCounts_INS_OPTS_J*4 < 0x200000 ) + continue; + + extra = 4; + //assert((emitTotalCodeSize + emitCounts_INS_OPTS_J*4) < 0x8000000); + assert((jmpDist + emitCounts_INS_OPTS_J*4) < 0x8000000); + } + else //if (ins == INS_b || ins == INS_bl) + { + assert(ins == INS_b || ins == INS_bl); + //assert((emitTotalCodeSize + emitCounts_INS_OPTS_J*4) < 0x8000000); + assert((jmpDist + emitCounts_INS_OPTS_J*4) < 0x8000000); + continue; + } + + jmp->idInsOpt(INS_OPTS_JIRL); + jmp->idCodeSize(jmp->idCodeSize() + extra); + jmpIG->igSize += extra;//the placeholder sizeof(INS_OPTS_JIRL) - sizeof(INS_OPTS_J). + adjLJ += extra; + adjIG += extra; + emitTotalCodeSize += extra; + jmpIG->igFlags |= IGF_UPD_ISZ; + isLinkingEnd_LA |= 0x1; + } + continue; + +#else // not defined(TARGET_LOONGARCH64) if (extra <= 0) { /* This jump will be a short one */ goto SHORT_JMP; } +#endif } else { @@ -4537,13 +4673,69 @@ void emitter::emitJumpDistBind() } #endif // DEBUG_EMIT +#if defined(TARGET_LOONGARCH64) + assert(jmpDist >= 0);//Backward jump + assert(!(jmpDist & 0x3)); + + if (isLinkingEnd_LA & 0x2) + { + jmp->idAddr()->iiaSetJmpOffset(-jmpDist);//Backward jump is negative! + } + else if ((extra > 0) && (jmp->idInsOpt() == INS_OPTS_J)) + { + instruction ins = jmp->idIns(); + assert((INS_bceqz <= ins) && (ins <= INS_bl)); + + if (ins < INS_beqz) // bceqz/bcnez/beq/bne/blt/bltu/bge/bgeu < beqz < bnez // See instrsloongarch64.h. + { + if ((jmpDist + emitCounts_INS_OPTS_J*4) < 0x8000000) + { + extra = 4; + } + else + { + assert((jmpDist + emitCounts_INS_OPTS_J*4) < 0x8000000);//TODO:later will be deleted!!! + extra = 8; + } + } + else if (ins < INS_b)// beqz/bnez < b < bl // See instrsloongarch64.h. + { + if (jmpDist + emitCounts_INS_OPTS_J*4 < 0x200000 ) + continue; + + extra = 4; + //assert((emitTotalCodeSize + emitCounts_INS_OPTS_J*4) < 0x8000000); + assert((jmpDist + emitCounts_INS_OPTS_J*4) < 0x8000000); + } + else //if (ins == INS_b || ins == INS_bl) + { + assert(ins == INS_b || ins == INS_bl); + //assert((emitTotalCodeSize + emitCounts_INS_OPTS_J*4) < 0x8000000); + assert((jmpDist + emitCounts_INS_OPTS_J*4) < 0x8000000);//TODO + continue; + } + + jmp->idInsOpt(INS_OPTS_JIRL); + jmp->idCodeSize(jmp->idCodeSize() + extra); + jmpIG->igSize += extra;//the placeholder sizeof(INS_OPTS_JIRL) - sizeof(INS_OPTS_J). + adjLJ += extra; + adjIG += extra; + emitTotalCodeSize += extra; + jmpIG->igFlags |= IGF_UPD_ISZ; + isLinkingEnd_LA |= 0x1; + } + continue; + +#else // not defined(TARGET_LOONGARCH64) if (extra <= 0) { /* This jump will be a short one */ goto SHORT_JMP; } +#endif } +#ifndef TARGET_LOONGARCH64 /* We arrive here if the jump couldn't be made short, at least for now */ /* We had better not have eagerly marked the jump as short @@ -4675,6 +4867,8 @@ void emitter::emitJumpDistBind() // The size of IF_LARGEJMP/IF_LARGEADR/IF_LARGELDC are 8 or 12. // All other code size is 4. assert((sizeDif == 4) || (sizeDif == 8)); +#elif defined(TARGET_LOONGARCH64) + assert(sizeDif == 0); #else #error Unsupported or unset target architecture #endif @@ -4735,8 +4929,40 @@ void emitter::emitJumpDistBind() jmpIG->igFlags |= IGF_UPD_ISZ; +#endif // not defined(TARGET_LOONGARCH64) } // end for each jump +#if defined(TARGET_LOONGARCH64) + if ((isLinkingEnd_LA & 0x3) < 0x2) + { + //indicating had updated the instrDescJmp's size with the type INS_OPTS_J. + isLinkingEnd_LA = 0x2; + //emitRecomputeIGoffsets(); + /* Adjust offsets of any remaining blocks */ + + for (;lstIG;) + { + lstIG = lstIG->igNext; + if (!lstIG) + { + break; + } +#ifdef DEBUG + if (EMITVERBOSE) + { + printf("Adjusted offset of " FMT_BB " from %04X to %04X\n", lstIG->igNum, lstIG->igOffs, + lstIG->igOffs + adjIG); + } +#endif // DEBUG + + lstIG->igOffs += adjIG; + + assert(IsCodeAligned(lstIG->igOffs)); + } + goto AGAIN; + } + +#else /* Did we shorten any jumps? */ if (adjIG) @@ -4800,6 +5026,8 @@ void emitter::emitJumpDistBind() goto AGAIN; } } +#endif + #ifdef DEBUG if (EMIT_INSTLIST_VERBOSE) { @@ -5620,6 +5848,11 @@ emitter::instrDescAlign* emitter::emitAlignInNextIG(instrDescAlign* alignInstr) void emitter::emitCheckFuncletBranch(instrDesc* jmp, insGroup* jmpIG) { +#ifdef TARGET_LOONGARCH64 + /* TODO: for LOONGARCH64: not support idDebugOnlyInfo.*/ + return; +#else + #ifdef DEBUG // We should not be jumping/branching across funclets/functions // Except possibly a 'call' to a finally funclet for a local unwind @@ -5715,6 +5948,7 @@ void emitter::emitCheckFuncletBranch(instrDesc* jmp, insGroup* jmpIG) } } #endif // DEBUG +#endif } /***************************************************************************** @@ -6563,6 +6797,11 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, ig->igSize = (unsigned short)(cp - bp); } +#ifdef TARGET_LOONGARCH64 + //cp = cp - 4; + unsigned actualCodeSize = cp - codeBlock; +#endif + #if EMIT_TRACK_STACK_DEPTH assert(emitCurStackLvl == 0); #endif @@ -6603,6 +6842,7 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, emitUpdateLiveGCregs(GCT_GCREF, RBM_NONE, cp); } +#ifndef TARGET_LOONGARCH64 /* Patch any forward jumps */ if (emitFwdJumps) @@ -6687,6 +6927,7 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, } } } +#endif //!TARGET_LOONGARCH64 #ifdef DEBUG if (emitComp->opts.disAsm) @@ -6695,7 +6936,9 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, } #endif +#ifndef TARGET_LOONGARCH64 unsigned actualCodeSize = emitCurCodeOffs(cp); +#endif #if defined(TARGET_ARM64) assert(emitTotalCodeSize == actualCodeSize); @@ -6786,6 +7029,13 @@ void emitter::emitGenGCInfoIfFuncletRetTarget(insGroup* ig, BYTE* cp) * instruction number for this instruction */ +#if defined(TARGET_LOONGARCH64) +unsigned emitter::emitFindInsNum(insGroup* ig, instrDesc* idMatch) +{ + assert(!"unimplemented yet on LOONGARCH"); + return -1; +} +#else unsigned emitter::emitFindInsNum(insGroup* ig, instrDesc* idMatch) { instrDesc* id = (instrDesc*)ig->igData; @@ -6814,6 +7064,7 @@ unsigned emitter::emitFindInsNum(insGroup* ig, instrDesc* idMatch) assert(!"emitFindInsNum failed"); return -1; } +#endif /***************************************************************************** * @@ -9264,13 +9515,14 @@ regMaskTP emitter::emitGetGCRegsKilledByNoGCCall(CorInfoHelpFunc helper) // This uses and defs RDI and RSI. result = RBM_CALLEE_TRASH_NOGC & ~(RBM_RDI | RBM_RSI); break; -#elif defined(TARGET_ARMARCH) +#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) result = RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF; break; #else assert(!"unknown arch"); #endif +#if !defined(TARGET_LOONGARCH64) case CORINFO_HELP_PROF_FCN_ENTER: result = RBM_PROFILER_ENTER_TRASH; break; @@ -9287,8 +9539,9 @@ regMaskTP emitter::emitGetGCRegsKilledByNoGCCall(CorInfoHelpFunc helper) case CORINFO_HELP_PROF_FCN_TAILCALL: result = RBM_PROFILER_TAILCALL_TRASH; break; +#endif // !defined(TARGET_LOONGARCH64) -#if defined(TARGET_ARMARCH) +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) case CORINFO_HELP_ASSIGN_REF: case CORINFO_HELP_CHECKED_ASSIGN_REF: result = RBM_CALLEE_GCTRASH_WRITEBARRIER; diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index ade4f7c3ca2c1..00124e42cac92 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -304,7 +304,11 @@ struct insGroup unsigned igStkLvl; // stack level on entry #endif regMaskSmall igGCregs; // set of registers with live GC refs +#ifdef TARGET_LOONGARCH64 + unsigned int igInsCnt; // # of instructions in this group +#else unsigned char igInsCnt; // # of instructions in this group +#endif #else // REGMASK_BITS @@ -344,8 +348,11 @@ struct insGroup ptr -= sizeof(VARSET_TP); } +#if defined(TARGET_LOONGARCH64) + ptr -= sizeof(VARSET_TP); +#else ptr -= sizeof(unsigned); - +#endif return *(unsigned*)ptr; } @@ -586,6 +593,10 @@ class emitter #define INSTR_ENCODED_SIZE 4 static_assert_no_msg(INS_count <= 512); instruction _idIns : 9; +#elif defined(TARGET_LOONGARCH64) + /* TODO: not include SIMD-vector. */ + static_assert_no_msg(INS_count <= 512); + instruction _idIns : 9; #else // !(defined(TARGET_XARCH) || defined(TARGET_ARM64)) static_assert_no_msg(INS_count <= 256); instruction _idIns : 8; @@ -595,6 +606,10 @@ class emitter #if defined(TARGET_XARCH) static_assert_no_msg(IF_COUNT <= 128); insFormat _idInsFmt : 7; +#elif defined(TARGET_LOONGARCH64) + //insFormat _idInsFmt : 5;// NOTE: LOONGARCH64 does not used the _idInsFmt . + unsigned _idCodeSize : 5; // the instruction(s) size of this instrDesc described. If not enough, please use the _idInsCount. + //unsigned _idInsCount : 5; // the instruction(s) count of this instrDesc described. #else static_assert_no_msg(IF_COUNT <= 256); insFormat _idInsFmt : 8; @@ -611,6 +626,15 @@ class emitter _idIns = ins; } +#if defined(TARGET_LOONGARCH64) + insFormat idInsFmt() const + {//not used for LOONGARCH64. + return (insFormat)0; + } + void idInsFmt(insFormat insFmt) + { + } +#else insFormat idInsFmt() const { return _idInsFmt; @@ -623,6 +647,7 @@ class emitter assert(insFmt < IF_COUNT); _idInsFmt = insFmt; } +#endif void idSetRelocFlags(emitAttr attr) { @@ -636,6 +661,7 @@ class emitter // amd64: 17 bits // arm: 16 bits // arm64: 17 bits + //loongarch64: 14 bits private: #if defined(TARGET_XARCH) @@ -645,6 +671,8 @@ class emitter // doesn't cross a byte boundary. #elif defined(TARGET_ARM64) // Moved the definition of '_idOpSize' later so that we don't cross a 32-bit boundary when laying out bitfields +#elif defined(TARGET_LOONGARCH64) + /* _idOpSize defined bellow. */ #else // ARM opSize _idOpSize : 2; // operand size: 0=1 , 1=2 , 2=4 , 3=8 #endif // ARM @@ -695,6 +723,13 @@ class emitter unsigned _idLclVar : 1; // access a local on stack #endif +#ifdef TARGET_LOONGARCH64 + /* TODO: for LOONGARCH: maybe delete on future. */ + opSize _idOpSize : 3; // operand size: 0=1 , 1=2 , 2=4 , 3=8, 4=16 + insOpts _idInsOpt : 6; // loongarch options for special: placeholders. e.g emitIns_R_C, also identifying the accessing a local on stack. + unsigned _idLclVar : 1; // access a local on stack. +#endif + #ifdef TARGET_ARM insSize _idInsSize : 2; // size of instruction: 16, 32 or 48 bits insFlags _idInsFlags : 1; // will this instruction set the flags @@ -711,6 +746,9 @@ class emitter #elif defined(TARGET_XARCH) // For xarch, we have used 14 bits from the second DWORD. #define ID_EXTRA_BITFIELD_BITS (14) +#elif defined(TARGET_LOONGARCH64) +// For Loongarch64, we have used 14 bits from the second DWORD. +#define ID_EXTRA_BITFIELD_BITS (14) #else #error Unsupported or unset target architecture #endif @@ -721,6 +759,7 @@ class emitter // amd64: 46 bits // arm: 48 bits // arm64: 49 bits + //loongarch64: 46 bits unsigned _idCnsReloc : 1; // LargeCns is an RVA and needs reloc tag unsigned _idDspReloc : 1; // LargeDsp is an RVA and needs reloc tag @@ -733,6 +772,7 @@ class emitter // amd64: 48 bits // arm: 50 bits // arm64: 51 bits + // loongarch64: 48 bits CLANG_FORMAT_COMMENT_ANCHOR; #define ID_EXTRA_BITS (ID_EXTRA_RELOC_BITS + ID_EXTRA_BITFIELD_BITS) @@ -810,7 +850,7 @@ class emitter // TODO-Cleanup: We should really add a DEBUG-only tag to this union so we can add asserts // about reading what we think is here, to avoid unexpected corruption issues. -#ifndef TARGET_ARM64 +#if !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) emitLclVarAddr iiaLclVar; #endif BasicBlock* iiaBBlabel; @@ -862,7 +902,38 @@ class emitter regNumber _idReg3 : REGNUM_BITS; regNumber _idReg4 : REGNUM_BITS; }; -#endif // defined(TARGET_XARCH) +#elif defined(TARGET_LOONGARCH64) // TARGET_XARCH + struct + { + unsigned int iiaEncodedInstr;//instruction's binary encoding. + regNumber _idReg3 : REGNUM_BITS; + regNumber _idReg4 : REGNUM_BITS; + }; + + struct + { + int iiaJmpOffset;//temporary saving the offset of jmp or data. + emitLclVarAddr iiaLclVar; + }; + + void iiaSetInstrEncode(unsigned int encode) + { + iiaEncodedInstr = encode; + } + unsigned int iiaGetInstrEncode() const + { + return iiaEncodedInstr; + } + + void iiaSetJmpOffset(int offset) + { + iiaJmpOffset = offset; + } + int iiaGetJmpOffset() const + { + return iiaJmpOffset; + } +#endif // defined(TARGET_LOONGARCH64) } _idAddrUnion; @@ -962,10 +1033,21 @@ class emitter _idInsFlags = sf; assert(sf == _idInsFlags); } -#endif // TARGET_ARM - emitAttr idOpSize() +#elif defined(TARGET_LOONGARCH64) + unsigned idCodeSize() const + { + return _idCodeSize;//_idInsCount; + } + void idCodeSize(unsigned sz) { + assert(sz < 32); + _idCodeSize = sz; + } +#endif // TARGET_LOONGARCH64 + + emitAttr idOpSize() + {//NOTE: not used for LOONGARCH64. return emitDecodeSize(_idOpSize); } void idOpSize(emitAttr opsz) @@ -1087,6 +1169,42 @@ class emitter #endif // TARGET_ARMARCH +#ifdef TARGET_LOONGARCH64 + insOpts idInsOpt() const + { + return (insOpts)_idInsOpt; + } + void idInsOpt(insOpts opt) + { + _idInsOpt = opt; + assert(opt == _idInsOpt); + } + + regNumber idReg3() const + { + assert(!idIsSmallDsc()); + return idAddr()->_idReg3; + } + void idReg3(regNumber reg) + { + assert(!idIsSmallDsc()); + idAddr()->_idReg3 = reg; + assert(reg == idAddr()->_idReg3); + } + regNumber idReg4() const + { + assert(!idIsSmallDsc()); + return idAddr()->_idReg4; + } + void idReg4(regNumber reg) + { + assert(!idIsSmallDsc()); + idAddr()->_idReg4 = reg; + assert(reg == idAddr()->_idReg4); + } + +#endif // TARGET_LOONGARCH64 + inline static bool fitsInSmallCns(ssize_t val) { return ((val >= ID_MIN_SMALL_CNS) && (val <= ID_MAX_SMALL_CNS)); @@ -1175,6 +1293,17 @@ class emitter } #endif // defined(TARGET_ARM) +#ifdef TARGET_LOONGARCH64 + bool idIsLclVar() const + { + return _idLclVar != 0; + } + void idSetIsLclVar() + { + _idLclVar = 1; + } +#endif // TARGET_LOONGARCH64 + bool idIsCnsReloc() const { return _idCnsReloc != 0; @@ -1325,6 +1454,23 @@ class emitter #define PERFSCORE_LATENCY_WR_GENERAL PERFSCORE_LATENCY_1C #define PERFSCORE_LATENCY_RD_WR_GENERAL PERFSCORE_LATENCY_4C +#elif defined(TARGET_LOONGARCH64) +// a read,write or modify from stack location, possible def to use latency from L0 cache +#define PERFSCORE_LATENCY_RD_STACK PERFSCORE_LATENCY_3C +#define PERFSCORE_LATENCY_WR_STACK PERFSCORE_LATENCY_1C +#define PERFSCORE_LATENCY_RD_WR_STACK PERFSCORE_LATENCY_3C + +// a read, write or modify from constant location, possible def to use latency from L0 cache +#define PERFSCORE_LATENCY_RD_CONST_ADDR PERFSCORE_LATENCY_3C +#define PERFSCORE_LATENCY_WR_CONST_ADDR PERFSCORE_LATENCY_1C +#define PERFSCORE_LATENCY_RD_WR_CONST_ADDR PERFSCORE_LATENCY_3C + +// a read, write or modify from memory location, possible def to use latency from L0 or L1 cache +// plus an extra cost (of 1.0) for a increased chance of a cache miss +#define PERFSCORE_LATENCY_RD_GENERAL PERFSCORE_LATENCY_4C +#define PERFSCORE_LATENCY_WR_GENERAL PERFSCORE_LATENCY_1C +#define PERFSCORE_LATENCY_RD_WR_GENERAL PERFSCORE_LATENCY_4C + #endif // TARGET_XXX // Make this an enum: @@ -1735,6 +1881,10 @@ class emitter #endif // defined(TARGET_X86) #endif // !defined(HOST_64BIT) +#ifdef TARGET_LOONGARCH64 + unsigned int emitCounts_INS_OPTS_J;//INS_OPTS_J +#endif // defined(TARGET_LOONGARCH64) + size_t emitIssue1Instr(insGroup* ig, instrDesc* id, BYTE** dp); size_t emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp); @@ -1783,9 +1933,18 @@ class emitter // ugly code like "movw r10, 0x488; add r10, sp; vstr s0, [r10]" for each store, which // eats up our insGroup buffer. #define SC_IG_BUFFER_SIZE (100 * sizeof(emitter::instrDesc) + 14 * SMALL_IDSC_SIZE) -#else // !TARGET_ARMARCH + +#elif defined(TARGET_LOONGARCH64) + +#ifdef DEBUG +#define SC_IG_BUFFER_SIZE (50 * sizeof(emitter::instrDesc) + 14 * SMALL_IDSC_SIZE) +#else +#define SC_IG_BUFFER_SIZE (50 * sizeof(emitter::instrDesc) + 20 * SMALL_IDSC_SIZE) +#endif + +#else // !TARGET_LOONGARCH64 #define SC_IG_BUFFER_SIZE (50 * sizeof(emitter::instrDesc) + 14 * SMALL_IDSC_SIZE) -#endif // !TARGET_ARMARCH +#endif // !TARGET_LOONGARCH64 size_t emitIGbuffSize; @@ -1981,7 +2140,19 @@ class emitter static void emitGenerateUnwindNop(instrDesc* id, void* context); -#endif // TARGET_ARMARCH +#elif defined(TARGET_LOONGARCH64) + void emitGetInstrDescs(insGroup* ig, instrDesc** id, int* insCnt); + bool emitGetLocationInfo(emitLocation* emitLoc, insGroup** pig, instrDesc** pid, int* pinsRemaining = NULL); + + bool emitNextID(insGroup*& ig, instrDesc*& id, int& insRemaining); + + typedef void (*emitProcessInstrFunc_t)(instrDesc* id, void* context); + + void emitWalkIDs(emitLocation* locFrom, emitProcessInstrFunc_t processFunc, void* context); + + static void emitGenerateUnwindNop(instrDesc* id, void* context); + +#endif // TARGET_LOONGARCH64 #ifdef TARGET_X86 void emitMarkStackLvl(unsigned stackLevel); @@ -2172,7 +2343,10 @@ class emitter // Returns "true" if instruction "id->idIns()" writes to a LclVar stack slot pair. bool emitInsWritesToLclVarStackLocPair(instrDesc* id); -#endif // TARGET_ARMARCH +#elif defined(TARGET_LOONGARCH64) + bool emitInsMayWriteToGCReg(instruction ins); + bool emitInsWritesToLclVarStackLoc(instrDesc* id); +#endif // TARGET_LOONGARCH64 /************************************************************************/ /* The following is used to distinguish helper vs non-helper calls */ diff --git a/src/coreclr/jit/emitdef.h b/src/coreclr/jit/emitdef.h index c9f003ccce1b6..35b46314a1225 100644 --- a/src/coreclr/jit/emitdef.h +++ b/src/coreclr/jit/emitdef.h @@ -12,6 +12,8 @@ #include "emitarm.h" #elif defined(TARGET_ARM64) #include "emitarm64.h" +#elif defined(TARGET_LOONGARCH64) +#include "emitloongarch64.h" #else #error Unsupported or unset target architecture #endif diff --git a/src/coreclr/jit/emitfmts.h b/src/coreclr/jit/emitfmts.h index c252c0b1237d3..77712ed95cce3 100644 --- a/src/coreclr/jit/emitfmts.h +++ b/src/coreclr/jit/emitfmts.h @@ -8,6 +8,8 @@ #include "emitfmtsarm.h" #elif defined(TARGET_ARM64) #include "emitfmtsarm64.h" +#elif defined(TARGET_LOONGARCH64) +#include "emitfmtsloongarch64.h" #else #error Unsupported or unset target architecture #endif // target type diff --git a/src/coreclr/jit/emitfmtsloongarch64.h b/src/coreclr/jit/emitfmtsloongarch64.h new file mode 100644 index 0000000000000..b4232269b144f --- /dev/null +++ b/src/coreclr/jit/emitfmtsloongarch64.h @@ -0,0 +1,82 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +// Copyright (c) Loongson Technology. All rights reserved. + +////////////////////////////////////////////////////////////////////////////// + +// clang-format off +#if !defined(TARGET_LOONGARCH64) +#error Unexpected target type +#endif + +#ifdef DEFINE_ID_OPS +////////////////////////////////////////////////////////////////////////////// + +#undef DEFINE_ID_OPS +enum ID_OPS +{ + ID_OP_NONE, // no additional arguments + ID_OP_SCNS, // small const operand (21-bits or less, no reloc) + ID_OP_JMP, // local jump + ID_OP_CALL, // method call + ID_OP_SPEC, // special handling required +}; + +////////////////////////////////////////////////////////////////////////////// +#else // !DEFINE_ID_OPS +////////////////////////////////////////////////////////////////////////////// + +#ifndef IF_DEF +#error Must define IF_DEF macro before including this file +#endif + +////////////////////////////////////////////////////////////////////////////// +// +// enum insFormat instruction enum ID_OPS +// scheduling +// (unused) +////////////////////////////////////////////////////////////////////////////// + +IF_DEF(NONE, IS_NONE, NONE) // + + +//IF_DEF(LABEL, IS_NONE, JMP) // label +//IF_DEF(LARGEJMP, IS_NONE, JMP) // large conditional branch pseudo-op (cond branch + uncond branch) +//IF_DEF(LARGEADR, IS_NONE, JMP) // large address pseudo-op (adrp + add) +//IF_DEF(LARGELDC, IS_NONE, JMP) // large constant pseudo-op (adrp + ldr) + + +IF_DEF(OPCODE, IS_NONE, NONE) +IF_DEF(OPCODES_16, IS_NONE, NONE) +IF_DEF(OP_FMT, IS_NONE, NONE) +IF_DEF(OP_FMT_16, IS_NONE, NONE) +IF_DEF(OP_FMTS_16, IS_NONE, NONE) +IF_DEF(FMT_FUNC, IS_NONE, NONE) +IF_DEF(FMT_FUNC_6, IS_NONE, NONE) +IF_DEF(FMT_FUNC_16, IS_NONE, NONE) +IF_DEF(FMT_FUNCS_6, IS_NONE, NONE) +IF_DEF(FMT_FUNCS_16, IS_NONE, NONE) +IF_DEF(FMT_FUNCS_6A, IS_NONE, NONE) +IF_DEF(FMT_FUNCS_11A, IS_NONE, NONE) +IF_DEF(FUNC, IS_NONE, NONE) +IF_DEF(FUNC_6, IS_NONE, NONE) +IF_DEF(FUNC_16, IS_NONE, NONE) +IF_DEF(FUNC_21, IS_NONE, NONE) +IF_DEF(FUNCS_6, IS_NONE, NONE) +IF_DEF(FUNCS_6A, IS_NONE, NONE) +IF_DEF(FUNCS_6B, IS_NONE, NONE) +IF_DEF(FUNCS_6C, IS_NONE, NONE) +IF_DEF(FUNCS_6D, IS_NONE, NONE) +IF_DEF(FUNCS_6E, IS_NONE, NONE) +IF_DEF(FUNCS_11, IS_NONE, NONE) + + +////////////////////////////////////////////////////////////////////////////// +#undef IF_DEF +////////////////////////////////////////////////////////////////////////////// + +#endif // !DEFINE_ID_OPS +////////////////////////////////////////////////////////////////////////////// +// clang-format on diff --git a/src/coreclr/jit/emitinl.h b/src/coreclr/jit/emitinl.h index 484eca3399b4e..82c78299efebd 100644 --- a/src/coreclr/jit/emitinl.h +++ b/src/coreclr/jit/emitinl.h @@ -335,6 +335,36 @@ inline ssize_t emitter::emitGetInsAmdAny(instrDesc* id) id->idReg2((regNumber)encodeMask); // Save in idReg2 +#elif defined(TARGET_LOONGARCH64) + assert(REGNUM_BITS >= 5); + encodeMask = 0; + + if ((regmask & RBM_S0) != RBM_NONE) + encodeMask |= 0x01; + if ((regmask & RBM_S1) != RBM_NONE) + encodeMask |= 0x02; + if ((regmask & RBM_S2) != RBM_NONE) + encodeMask |= 0x04; + if ((regmask & RBM_S3) != RBM_NONE) + encodeMask |= 0x08; + if ((regmask & RBM_S4) != RBM_NONE) + encodeMask |= 0x10; + + id->idReg1((regNumber)encodeMask); // Save in idReg1 + + encodeMask = 0; + + if ((regmask & RBM_S5) != RBM_NONE) + encodeMask |= 0x01; + if ((regmask & RBM_S6) != RBM_NONE) + encodeMask |= 0x02; + if ((regmask & RBM_S7) != RBM_NONE) + encodeMask |= 0x04; + if ((regmask & RBM_S8) != RBM_NONE) + encodeMask |= 0x08; + + id->idReg2((regNumber)encodeMask); // Save in idReg2 + #else NYI("unknown target"); #endif @@ -447,6 +477,32 @@ inline ssize_t emitter::emitGetInsAmdAny(instrDesc* id) if ((encodeMask & 0x10) != 0) regmask |= RBM_R28; +#elif defined(TARGET_LOONGARCH64) + assert(REGNUM_BITS >= 5); + encodeMask = id->idReg1(); + + if ((encodeMask & 0x01) != 0) + regmask |= RBM_S0; + if ((encodeMask & 0x02) != 0) + regmask |= RBM_S1; + if ((encodeMask & 0x04) != 0) + regmask |= RBM_S2; + if ((encodeMask & 0x08) != 0) + regmask |= RBM_S3; + if ((encodeMask & 0x10) != 0) + regmask |= RBM_S4; + + encodeMask = id->idReg2(); + + if ((encodeMask & 0x01) != 0) + regmask |= RBM_S5; + if ((encodeMask & 0x02) != 0) + regmask |= RBM_S6; + if ((encodeMask & 0x04) != 0) + regmask |= RBM_S7; + if ((encodeMask & 0x08) != 0) + regmask |= RBM_S8; + #else NYI("unknown target"); #endif diff --git a/src/coreclr/jit/emitjmps.h b/src/coreclr/jit/emitjmps.h index 4ed340302119d..0a19c7cbf138e 100644 --- a/src/coreclr/jit/emitjmps.h +++ b/src/coreclr/jit/emitjmps.h @@ -46,6 +46,26 @@ JMP_SMALL(lt , ge , blt ) // LT JMP_SMALL(gt , le , bgt ) // GT JMP_SMALL(le , gt , ble ) // LE +#elif defined(TARGET_LOONGARCH64) + +/* TODO for LOONGARCH: should redesign!!! */ +// jump reverse instruction condcode +JMP_SMALL(jmp , jmp , b ) // AL always +JMP_SMALL(eq , ne , beq ) // EQ +JMP_SMALL(ne , eq , bne ) // NE +//JMP_SMALL(hs , lo , bgez ) // HS also CS +//JMP_SMALL(lo , hs , bltz ) // LO also CC +//JMP_SMALL(mi , pl , bmi ) // MI +//JMP_SMALL(pl , mi , bpl ) // PL +//JMP_SMALL(vs , vc , bvs ) // VS +//JMP_SMALL(vc , vs , bvc ) // VC +//JMP_SMALL(hi , ls , bhi ) // HI +//JMP_SMALL(ls , hi , bls ) // LS +//JMP_SMALL(gez , ltz , bgez ) // GE +//JMP_SMALL(gtz , lez , bgtz ) // GT +//JMP_SMALL(ltz , gez , bltz ) // LT +//JMP_SMALL(lez , gtz , blez ) // LE + #else #error Unsupported or unset target architecture #endif // target type diff --git a/src/coreclr/jit/emitloongarch64.cpp b/src/coreclr/jit/emitloongarch64.cpp new file mode 100644 index 0000000000000..a5492bee3394b --- /dev/null +++ b/src/coreclr/jit/emitloongarch64.cpp @@ -0,0 +1,6780 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information.//emitarm64.cpp deletes this line. + +// Copyright (c) Loongson Technology. All rights reserved. + +/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX emitloongarch64.cpp XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ + +#include "jitpch.h" +#ifdef _MSC_VER +#pragma hdrstop +#endif + +#if defined(TARGET_LOONGARCH64) + +/*****************************************************************************/ +/*****************************************************************************/ + +#include "instr.h" +#include "emit.h" +#include "codegen.h" + +////These are used for loongarch64 instrs's dump. +////LA_OP_2R opcode: bit31 ~ bit10 +#define LA_2R_CLO_W 0x4 +#define LA_2R_CLZ_W 0x5 +#define LA_2R_CTO_W 0x6 +#define LA_2R_CTZ_W 0x7 +#define LA_2R_CLO_D 0x8 +#define LA_2R_CLZ_D 0x9 +#define LA_2R_CTO_D 0xa +#define LA_2R_CTZ_D 0xb +#define LA_2R_REVB_2H 0xc +#define LA_2R_REVB_4H 0xd +#define LA_2R_REVB_2W 0xe +#define LA_2R_REVB_D 0xf +#define LA_2R_REVH_2W 0x10 +#define LA_2R_REVH_D 0x11 +#define LA_2R_BITREV_4B 0x12 +#define LA_2R_BITREV_8B 0x13 +#define LA_2R_BITREV_W 0x14 +#define LA_2R_BITREV_D 0x15 +#define LA_2R_EXT_W_H 0x16 +#define LA_2R_EXT_W_B 0x17 +#define LA_2R_RDTIMEL_W 0x18 +#define LA_2R_RDTIMEH_W 0x19 +#define LA_2R_RDTIME_D 0x1a +#define LA_2R_CPUCFG 0x1b +#define LA_2R_ASRTLE_D 0x2 +#define LA_2R_ASRTGT_D 0x3 +#define LA_2R_FABS_S 0x4501 +#define LA_2R_FABS_D 0x4502 +#define LA_2R_FNEG_S 0x4505 +#define LA_2R_FNEG_D 0x4506 +#define LA_2R_FLOGB_S 0x4509 +#define LA_2R_FLOGB_D 0x450a +#define LA_2R_FCLASS_S 0x450d +#define LA_2R_FCLASS_D 0x450e +#define LA_2R_FSQRT_S 0x4511 +#define LA_2R_FSQRT_D 0x4512 +#define LA_2R_FRECIP_S 0x4515 +#define LA_2R_FRECIP_D 0x4516 +#define LA_2R_FRSQRT_S 0x4519 +#define LA_2R_FRSQRT_D 0x451a +#define LA_2R_FMOV_S 0x4525 +#define LA_2R_FMOV_D 0x4526 +#define LA_2R_MOVGR2FR_W 0x4529 +#define LA_2R_MOVGR2FR_D 0x452a +#define LA_2R_MOVGR2FRH_W 0x452b +#define LA_2R_MOVFR2GR_S 0x452d +#define LA_2R_MOVFR2GR_D 0x452e +#define LA_2R_MOVFRH2GR_S 0x452f +#define LA_2R_MOVGR2FCSR 0x4530 +#define LA_2R_MOVFCSR2GR 0x4532 +#define LA_2R_MOVFR2CF 0x4534 +#define LA_2R_MOVCF2FR 0x4535 +#define LA_2R_MOVGR2CF 0x4536 +#define LA_2R_MOVCF2GR 0x4537 +#define LA_2R_FCVT_S_D 0x4646 +#define LA_2R_FCVT_D_S 0x4649 +#define LA_2R_FTINTRM_W_S 0x4681 +#define LA_2R_FTINTRM_W_D 0x4682 +#define LA_2R_FTINTRM_L_S 0x4689 +#define LA_2R_FTINTRM_L_D 0x468a +#define LA_2R_FTINTRP_W_S 0x4691 +#define LA_2R_FTINTRP_W_D 0x4692 +#define LA_2R_FTINTRP_L_S 0x4699 +#define LA_2R_FTINTRP_L_D 0x469a +#define LA_2R_FTINTRZ_W_S 0x46a1 +#define LA_2R_FTINTRZ_W_D 0x46a2 +#define LA_2R_FTINTRZ_L_S 0x46a9 +#define LA_2R_FTINTRZ_L_D 0x46aa +#define LA_2R_FTINTRNE_W_S 0x46b1 +#define LA_2R_FTINTRNE_W_D 0x46b2 +#define LA_2R_FTINTRNE_L_S 0x46b9 +#define LA_2R_FTINTRNE_L_D 0x46ba +#define LA_2R_FTINT_W_S 0x46c1 +#define LA_2R_FTINT_W_D 0x46c2 +#define LA_2R_FTINT_L_S 0x46c9 +#define LA_2R_FTINT_L_D 0x46ca +#define LA_2R_FFINT_S_W 0x4744 +#define LA_2R_FFINT_S_L 0x4746 +#define LA_2R_FFINT_D_W 0x4748 +#define LA_2R_FFINT_D_L 0x474a +#define LA_2R_FRINT_S 0x4791 +#define LA_2R_FRINT_D 0x4792 +#define LA_2R_IOCSRRD_B 0x19200 +#define LA_2R_IOCSRRD_H 0x19201 +#define LA_2R_IOCSRRD_W 0x19202 +#define LA_2R_IOCSRRD_D 0x19203 +#define LA_2R_IOCSRWR_B 0x19204 +#define LA_2R_IOCSRWR_H 0x19205 +#define LA_2R_IOCSRWR_W 0x19206 +#define LA_2R_IOCSRWR_D 0x19207 + +////LA_OP_3R opcode: bit31 ~ bit15 +#define LA_3R_ADD_W 0x20 +#define LA_3R_ADD_D 0x21 +#define LA_3R_SUB_W 0x22 +#define LA_3R_SUB_D 0x23 +#define LA_3R_SLT 0x24 +#define LA_3R_SLTU 0x25 +#define LA_3R_MASKEQZ 0x26 +#define LA_3R_MASKNEZ 0x27 +#define LA_3R_NOR 0x28 +#define LA_3R_AND 0x29 +#define LA_3R_OR 0x2a +#define LA_3R_XOR 0x2b +#define LA_3R_ORN 0x2c +#define LA_3R_ANDN 0x2d +#define LA_3R_SLL_W 0x2e +#define LA_3R_SRL_W 0x2f +#define LA_3R_SRA_W 0x30 +#define LA_3R_SLL_D 0x31 +#define LA_3R_SRL_D 0x32 +#define LA_3R_SRA_D 0x33 +#define LA_3R_ROTR_W 0x36 +#define LA_3R_ROTR_D 0x37 +#define LA_3R_MUL_W 0x38 +#define LA_3R_MULH_W 0x39 +#define LA_3R_MULH_WU 0x3a +#define LA_3R_MUL_D 0x3b +#define LA_3R_MULH_D 0x3c +#define LA_3R_MULH_DU 0x3d +#define LA_3R_MULW_D_W 0x3e +#define LA_3R_MULW_D_WU 0x3f +#define LA_3R_DIV_W 0x40 +#define LA_3R_MOD_W 0x41 +#define LA_3R_DIV_WU 0x42 +#define LA_3R_MOD_WU 0x43 +#define LA_3R_DIV_D 0x44 +#define LA_3R_MOD_D 0x45 +#define LA_3R_DIV_DU 0x46 +#define LA_3R_MOD_DU 0x47 +#define LA_3R_CRC_W_B_W 0x48 +#define LA_3R_CRC_W_H_W 0x49 +#define LA_3R_CRC_W_W_W 0x4a +#define LA_3R_CRC_W_D_W 0x4b +#define LA_3R_CRCC_W_B_W 0x4c +#define LA_3R_CRCC_W_H_W 0x4d +#define LA_3R_CRCC_W_W_W 0x4e +#define LA_3R_CRCC_W_D_W 0x4f +#define LA_3R_FADD_S 0x201 +#define LA_3R_FADD_D 0x202 +#define LA_3R_FSUB_S 0x205 +#define LA_3R_FSUB_D 0x206 +#define LA_3R_FMUL_S 0x209 +#define LA_3R_FMUL_D 0x20a +#define LA_3R_FDIV_S 0x20d +#define LA_3R_FDIV_D 0x20e +#define LA_3R_FMAX_S 0x211 +#define LA_3R_FMAX_D 0x212 +#define LA_3R_FMIN_S 0x215 +#define LA_3R_FMIN_D 0x216 +#define LA_3R_FMAXA_S 0x219 +#define LA_3R_FMAXA_D 0x21a +#define LA_3R_FMINA_S 0x21d +#define LA_3R_FMINA_D 0x21e +#define LA_3R_FSCALEB_S 0x221 +#define LA_3R_FSCALEB_D 0x222 +#define LA_3R_FCOPYSIGN_S 0x225 +#define LA_3R_FCOPYSIGN_D 0x226 +#define LA_3R_INVTLB 0xc91 +#define LA_3R_LDX_B 0x7000 +#define LA_3R_LDX_H 0x7008 +#define LA_3R_LDX_W 0x7010 +#define LA_3R_LDX_D 0x7018 +#define LA_3R_STX_B 0x7020 +#define LA_3R_STX_H 0x7028 +#define LA_3R_STX_W 0x7030 +#define LA_3R_STX_D 0x7038 +#define LA_3R_LDX_BU 0x7040 +#define LA_3R_LDX_HU 0x7048 +#define LA_3R_LDX_WU 0x7050 +#define LA_3R_PRELDX 0x7058 +#define LA_3R_FLDX_S 0x7060 +#define LA_3R_FLDX_D 0x7068 +#define LA_3R_FSTX_S 0x7070 +#define LA_3R_FSTX_D 0x7078 +#define LA_3R_AMSWAP_W 0x70c0 +#define LA_3R_AMSWAP_D 0x70c1 +#define LA_3R_AMADD_W 0x70c2 +#define LA_3R_AMADD_D 0x70c3 +#define LA_3R_AMAND_W 0x70c4 +#define LA_3R_AMAND_D 0x70c5 +#define LA_3R_AMOR_W 0x70c6 +#define LA_3R_AMOR_D 0x70c7 +#define LA_3R_AMXOR_W 0x70c8 +#define LA_3R_AMXOR_D 0x70c9 +#define LA_3R_AMMAX_W 0x70ca +#define LA_3R_AMMAX_D 0x70cb +#define LA_3R_AMMIN_W 0x70cc +#define LA_3R_AMMIN_D 0x70cd +#define LA_3R_AMMAX_WU 0x70ce +#define LA_3R_AMMAX_DU 0x70cf +#define LA_3R_AMMIN_WU 0x70d0 +#define LA_3R_AMMIN_DU 0x70d1 +#define LA_3R_AMSWAP_DB_W 0x70d2 +#define LA_3R_AMSWAP_DB_D 0x70d3 +#define LA_3R_AMADD_DB_W 0x70d4 +#define LA_3R_AMADD_DB_D 0x70d5 +#define LA_3R_AMAND_DB_W 0x70d6 +#define LA_3R_AMAND_DB_D 0x70d7 +#define LA_3R_AMOR_DB_W 0x70d8 +#define LA_3R_AMOR_DB_D 0x70d9 +#define LA_3R_AMXOR_DB_W 0x70da +#define LA_3R_AMXOR_DB_D 0x70db +#define LA_3R_AMMAX_DB_W 0x70dc +#define LA_3R_AMMAX_DB_D 0x70dd +#define LA_3R_AMMIN_DB_W 0x70de +#define LA_3R_AMMIN_DB_D 0x70df +#define LA_3R_AMMAX_DB_WU 0x70e0 +#define LA_3R_AMMAX_DB_DU 0x70e1 +#define LA_3R_AMMIN_DB_WU 0x70e2 +#define LA_3R_AMMIN_DB_DU 0x70e3 +#define LA_3R_FLDGT_S 0x70e8 +#define LA_3R_FLDGT_D 0x70e9 +#define LA_3R_FLDLE_S 0x70ea +#define LA_3R_FLDLE_D 0x70eb +#define LA_3R_FSTGT_S 0x70ec +#define LA_3R_FSTGT_D 0x70ed +#define LA_3R_FSTLE_S 0x70ee +#define LA_3R_FSTLE_D 0x70ef +#define LA_3R_LDGT_B 0x70f0 +#define LA_3R_LDGT_H 0x70f1 +#define LA_3R_LDGT_W 0x70f2 +#define LA_3R_LDGT_D 0x70f3 +#define LA_3R_LDLE_B 0x70f4 +#define LA_3R_LDLE_H 0x70f5 +#define LA_3R_LDLE_W 0x70f6 +#define LA_3R_LDLE_D 0x70f7 +#define LA_3R_STGT_B 0x70f8 +#define LA_3R_STGT_H 0x70f9 +#define LA_3R_STGT_W 0x70fa +#define LA_3R_STGT_D 0x70fb +#define LA_3R_STLE_B 0x70fc +#define LA_3R_STLE_H 0x70fd +#define LA_3R_STLE_W 0x70fe +#define LA_3R_STLE_D 0x70ff + +////LA_OP_4R opcode: bit31 ~ bit20 +#define LA_4R_FMADD_S 0x81 +#define LA_4R_FMADD_D 0x82 +#define LA_4R_FMSUB_S 0x85 +#define LA_4R_FMSUB_D 0x86 +#define LA_4R_FNMADD_S 0x89 +#define LA_4R_FNMADD_D 0x8a +#define LA_4R_FNMSUB_S 0x8d +#define LA_4R_FNMSUB_D 0x8e +#define LA_4R_FSEL 0xd0 + +////LA_OP_2RI8 + +////LA_OP_2RI12 opcode: bit31 ~ bit22 +#define LA_2RI12_SLTI 0x8 +#define LA_2RI12_SLTUI 0x9 +#define LA_2RI12_ADDI_W 0xa +#define LA_2RI12_ADDI_D 0xb +#define LA_2RI12_LU52I_D 0xc +#define LA_2RI12_ANDI 0xd +#define LA_2RI12_ORI 0xe +#define LA_2RI12_XORI 0xf +#define LA_2RI12_CACHE 0x18 +#define LA_2RI12_LD_B 0xa0 +#define LA_2RI12_LD_H 0xa1 +#define LA_2RI12_LD_W 0xa2 +#define LA_2RI12_LD_D 0xa3 +#define LA_2RI12_ST_B 0xa4 +#define LA_2RI12_ST_H 0xa5 +#define LA_2RI12_ST_W 0xa6 +#define LA_2RI12_ST_D 0xa7 +#define LA_2RI12_LD_BU 0xa8 +#define LA_2RI12_LD_HU 0xa9 +#define LA_2RI12_LD_WU 0xaa +#define LA_2RI12_PRELD 0xab +#define LA_2RI12_FLD_S 0xac +#define LA_2RI12_FST_S 0xad +#define LA_2RI12_FLD_D 0xae +#define LA_2RI12_FST_D 0xaf + +////LA_OP_2RI14i opcode: bit31 ~ bit24 +#define LA_2RI14_LL_W 0x20 +#define LA_2RI14_SC_W 0x21 +#define LA_2RI14_LL_D 0x22 +#define LA_2RI14_SC_D 0x23 +#define LA_2RI14_LDPTR_W 0x24 +#define LA_2RI14_STPTR_W 0x25 +#define LA_2RI14_LDPTR_D 0x26 +#define LA_2RI14_STPTR_D 0x27 + +////LA_OP_2RI16 opcode: bit31 ~ bit26 +#define LA_2RI16_ADDU16I_D 0x4 +#define LA_2RI16_JIRL 0x13 +#define LA_2RI16_BEQ 0x16 +#define LA_2RI16_BNE 0x17 +#define LA_2RI16_BLT 0x18 +#define LA_2RI16_BGE 0x19 +#define LA_2RI16_BLTU 0x1a +#define LA_2RI16_BGEU 0x1b + +////LA_OP_1RI20 opcode: bit31 ~ bit25 +#define LA_1RI20_LU12I_W 0xa +#define LA_1RI20_LU32I_D 0xb +#define LA_1RI20_PCADDI 0xc +#define LA_1RI20_PCALAU12I 0xd +#define LA_1RI20_PCADDU12I 0xe +#define LA_1RI20_PCADDU18I 0xf + +////LA_OP_I26 +#define LA_I26_B 0x14 +#define LA_I26_BL 0x15 + +////LA_OP_1RI21 +#define LA_1RI21_BEQZ 0x10 +#define LA_1RI21_BNEZ 0x11 +#define LA_1RI21_BCEQZ 0x12 +#define LA_1RI21_BCNEZ 0x12 + +////other +#define LA_OP_ALSL_W 0x1 +#define LA_OP_ALSL_WU 0x1 +#define LA_OP_ALSL_D 0xb +#define LA_OP_BYTEPICK_W 0x2 +#define LA_OP_BYTEPICK_D 0x3 +#define LA_OP_BREAK 0x54 +#define LA_OP_DBGCALL 0x55 +#define LA_OP_SYSCALL 0x56 +#define LA_OP_SLLI_W 0x10 +#define LA_OP_SLLI_D 0x10 +#define LA_OP_SRLI_W 0x11 +#define LA_OP_SRLI_D 0x11 +#define LA_OP_SRAI_W 0x12 +#define LA_OP_SRAI_D 0x12 +#define LA_OP_ROTRI_W 0x13 +#define LA_OP_ROTRI_D 0x13 +#define LA_OP_FCMP_cond_S 0xc1 +#define LA_OP_FCMP_cond_D 0xc2 +#define LA_OP_BSTRINS_W 0x1 +#define LA_OP_BSTRPICK_W 0x1 +#define LA_OP_BSTRINS_D 0x2 +#define LA_OP_BSTRPICK_D 0x3 +#define LA_OP_DBAR 0x70e4 +#define LA_OP_IBAR 0x70e5 + +//// add other define-macro here. + + +/*****************************************************************************/ + +const instruction emitJumpKindInstructions[] = { + INS_nop, + +#define JMP_SMALL(en, rev, ins) INS_##ins, +#include "emitjmps.h" +}; + +const emitJumpKind emitReverseJumpKinds[] = { + EJ_NONE, + +#define JMP_SMALL(en, rev, ins) EJ_##rev, +#include "emitjmps.h" +}; + +/***************************************************************************** + * The macro define for instructions. + */ + +#define D_INST_2RI12(op0_code, op1_reg, op2_reg, op3_imm) \ + op0_code |= ((code_t)(op1_reg)); /* rd or fd or hint */ \ + op0_code |= ((code_t)(op2_reg))<<5; /* rj */ \ + op0_code |= ((op3_imm) & 0xfff)<<10 + +#define D_INST_add_d(op0_code, op1_reg, op2_reg, op3_reg) \ + op0_code |= ((code_t)(op1_reg));/* rd */ \ + op0_code |= ((code_t)(op2_reg))<<5;/* rj */ \ + op0_code |= ((code_t)(op3_reg))<<10 /* rk */ + +#define D_INST_3R(op0_code, op1_reg, op2_reg, op3_reg) \ + op0_code |= ((code_t)(op1_reg));/* rd */ \ + op0_code |= ((code_t)(op2_reg))<<5;/* rj */ \ + op0_code |= ((code_t)(op3_reg))<<10 /* rk */ + +#define D_INST_JIRL(op0_code, op1_reg, op2_reg, op3_imm) \ + op0_code |= ((code_t)(op1_reg)); /* rd */ \ + op0_code |= ((code_t)(op2_reg))<<5; /* rj */ \ + op0_code |= ((op3_imm) & 0xffff)<<10 /* offs */ \ + +#define D_INST_lu12i_w(op0_code, op1_reg, op2_imm) \ + op0_code |= ((code_t)(op1_reg)); /* rd */ \ + op0_code |= ((op2_imm) & 0xfffff)<<5 /* si20 */ + +#define D_INST_lu32i_d(op0_code, op1_reg, op2_imm) \ + D_INST_lu12i_w(op0_code, op1_reg, op2_imm) + +#define D_INST_lu52i_d(op0_code, op1_reg, op2_reg, op3_imm) \ + D_INST_2RI12(op0_code, op1_reg, op2_reg, op3_imm) + +#define D_INST_ori(op0_code, op1_reg, op2_reg, op3_imm) \ + D_INST_2RI12(op0_code, op1_reg, op2_reg, op3_imm) + +//Load or Store instructions. +#define D_INST_LS(op0_code, op1_reg, op2_reg, op3_imm) \ + D_INST_2RI12(op0_code, op1_reg, op2_reg, op3_imm) + +#define D_INST_Bcond(op0_code, op1_reg, op2_reg, op3_imm) \ + op0_code |= ((code_t)(op1_reg) /*& 0x1f */)<<5; /* rj */ \ + op0_code |= ((code_t)(op2_reg) /*& 0x1f */); /* rd */ \ + assert(!((code_t)(op3_imm) & 0x3)); \ + op0_code |= (((code_t)(op3_imm)<<8) & 0x3fffc00) /* offset */ + +#define D_INST_Bcond_Z(op0_code, op1_reg, op1_imm) \ + assert(!((code_t)(op1_imm) & 0x3)); \ + op0_code |= ((code_t)(op1_reg) /*& 0x1f */)<<5; /* rj */ \ + op0_code |= (((code_t)(op1_imm)<<8) & 0x3fffc00); \ + op0_code |= (((code_t)(op1_imm)>>18) & 0x1f) /* offset */ + +#define D_INST_B(op0_code, op1_imm) \ + assert(!((code_t)(op1_imm) & 0x3)); \ + op0_code |= (((code_t)(op1_imm)>>18) & 0x3ff); \ + op0_code |= (((code_t)(op1_imm)<<8) & 0x3fffc00) /* offset */ + +/***************************************************************************** + * Look up the instruction for a jump kind + */ + +/*static*/ instruction emitter::emitJumpKindToIns(emitJumpKind jumpKind) +{ + assert((unsigned)jumpKind < ArrLen(emitJumpKindInstructions)); + return emitJumpKindInstructions[jumpKind]; +} + +/***************************************************************************** +* Look up the jump kind for an instruction. It better be a conditional +* branch instruction with a jump kind! +*/ + +/*static*/ emitJumpKind emitter::emitInsToJumpKind(instruction ins) +{ +assert(!"unimplemented on LOONGARCH yet"); + return EJ_NONE; +#if 0 + for (unsigned i = 0; i < ArrLen(emitJumpKindInstructions); i++) + { + if (ins == emitJumpKindInstructions[i]) + { + emitJumpKind ret = (emitJumpKind)i; + assert(EJ_NONE < ret && ret < EJ_COUNT); + return ret; + } + } + unreached(); +#endif +} + +/***************************************************************************** + * Reverse the conditional jump + */ + +/*static*/ emitJumpKind emitter::emitReverseJumpKind(emitJumpKind jumpKind) +{ + assert(jumpKind < EJ_COUNT); + return emitReverseJumpKinds[jumpKind]; +} + +/***************************************************************************** + * + * Return the allocated size (in bytes) of the given instruction descriptor. + */ + +size_t emitter::emitSizeOfInsDsc(instrDesc* id) +{ + if (emitIsScnsInsDsc(id)) + return SMALL_IDSC_SIZE; + + insOpts insOp = id->idInsOpt(); + + switch (insOp) + { + case INS_OPTS_JIRL: + case INS_OPTS_J_cond: + case INS_OPTS_J: + return sizeof(instrDescJmp); + + case INS_OPTS_C: + if (id->idIsLargeCall()) + { + /* Must be a "fat" call descriptor */ + return sizeof(instrDescCGCA); + } + else + { + assert(!id->idIsLargeDsp()); + assert(!id->idIsLargeCns()); + return sizeof(instrDesc); + } + //break; + + case INS_OPTS_I: + case INS_OPTS_RC: + case INS_OPTS_RL: + case INS_OPTS_RELOC: + case INS_OPTS_NONE: + return sizeof(instrDesc); + default: + NO_WAY("unexpected instruction descriptor format"); + break; + } +} + +#ifdef DEBUG +/***************************************************************************** + * + * The following called for each recorded instruction -- use for debugging. + */ +void emitter::emitInsSanityCheck(instrDesc* id) +{ + /* What instruction format have we got? */ + + switch (id->idInsFmt()) + { + case IF_OPCODE: + case IF_OPCODES_16: + case IF_OP_FMT: + case IF_OP_FMT_16: + case IF_OP_FMTS_16: + case IF_FMT_FUNC: + case IF_FMT_FUNC_6: + case IF_FMT_FUNC_16: + case IF_FMT_FUNCS_6: + case IF_FMT_FUNCS_16: + case IF_FMT_FUNCS_6A: + case IF_FMT_FUNCS_11A: + case IF_FUNC: + case IF_FUNC_6: + case IF_FUNC_16: + case IF_FUNC_21: + case IF_FUNCS_6: + case IF_FUNCS_6A: + case IF_FUNCS_6B: + case IF_FUNCS_6C: + case IF_FUNCS_6D: + case IF_FUNCS_11: + //case IF_LA: + break; + + default: + printf("unexpected format %s\n", emitIfName(id->idInsFmt())); + assert(!"Unexpected format"); + break; + } +} +#endif // DEBUG + +inline bool emitter::emitInsMayWriteToGCReg(instruction ins) +{ + assert(ins != INS_invalid); + ////NOTE: please reference the file "instrsloongarch64.h" for details !!! + return (INS_mov <= ins) && (ins <= INS_jirl) ? true : false; +} + +bool emitter::emitInsWritesToLclVarStackLoc(instrDesc* id) +{ + if (!id->idIsLclVar()) + return false; + + instruction ins = id->idIns(); + + // This list is related to the list of instructions used to store local vars in emitIns_S_R(). + // We don't accept writing to float local vars. + + switch (ins) + { + case INS_st_d: + case INS_stptr_d: +/////// not used these instrs right now !!! + //case INS_sc_d: + //case INS_stx_d: +//#ifdef DEBUG +// case INS_st_b: +// case INS_st_h: +// case INS_st_w: +// case INS_stx_b: +// case INS_stx_h: +// case INS_stx_w: +// //case INS_sc_w: +// //case INS_stgt_b: +// //case INS_stgt_h: +// //case INS_stgt_w: +// //case INS_stgt_d: +// //case INS_stle_b: +// //case INS_stle_h: +// //case INS_stle_w: +// //case INS_stle_d: +//#endif + return true; + default: + return false; + } +} + +/*****************************************************************************/ +#ifdef DEBUG + +// clang-format off +static const char * const RegNames[] = +{ + #define REGDEF(name, rnum, mask, xname, wname) xname, + #include "register.h" +}; +// clang-format on + +#endif // DEBUG + +#define LD 1 +#define ST 2 + +// clang-format off +/*static*/ const BYTE CodeGenInterface::instInfo[] = +{ + #define INSTS(id, nm, fp, info, fmt, e1) info, + #include "instrs.h" +}; +// clang-format on + +//------------------------------------------------------------------------ +// emitInsLoad: Returns true if the instruction is some kind of load instruction. +// +bool emitter::emitInsIsLoad(instruction ins) +{ + // We have pseudo ins like lea which are not included in emitInsLdStTab. + if (ins < ArrLen(CodeGenInterface::instInfo)) + return (CodeGenInterface::instInfo[ins] & LD) != 0; + else + return false; +} + +//------------------------------------------------------------------------ +//emitInsIsStore: Returns true if the instruction is some kind of store instruction. +// +bool emitter::emitInsIsStore(instruction ins) +{ + // We have pseudo ins like lea which are not included in emitInsLdStTab. + if (ins < ArrLen(CodeGenInterface::instInfo)) + return (CodeGenInterface::instInfo[ins] & ST) != 0; + else + return false; +} + +//------------------------------------------------------------------------- +//emitInsIsLoadOrStore: Returns true if the instruction is some kind of load/store instruction. +// +bool emitter::emitInsIsLoadOrStore(instruction ins) +{ + // We have pseudo ins like lea which are not included in emitInsLdStTab. + if (ins < ArrLen(CodeGenInterface::instInfo)) + return (CodeGenInterface::instInfo[ins] & (LD | ST)) != 0; + else + return false; +} + +#undef LD +#undef ST + +/***************************************************************************** + * + * Returns the specific encoding of the given CPU instruction. + */ + +inline emitter::code_t emitter::emitInsCode(instruction ins /*, insFormat fmt*/) +{ + code_t code = BAD_CODE; + + // clang-format off + const static code_t insCode[] = + { + #define INSTS(id, nm, fp, info, fmt, e1) e1, + #include "instrs.h" + }; + // clang-format on + + code = insCode[ins]; + + assert((code != BAD_CODE)); + + return code; +} + +/**************************************************************************** + * + * Add an instruction with no operands. + */ + +void emitter::emitIns(instruction ins) +{ + //instrDesc* id = emitNewInstrSmall(EA_8BYTE); + instrDesc* id = emitNewInstr(EA_8BYTE); + + id->idIns(ins); + id->idAddr()->iiaSetInstrEncode(emitInsCode(ins)); + + id->idCodeSize(4); + //dispIns(id); + appendToCurIG(id); +} + +/***************************************************************************** + * + * Add an Load/Store instruction(s): base+offset and base-addr-computing if needed. + * For referencing a stack-based local variable and a register + */ +void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs) +{ + //assert(offs >= 0); + ssize_t imm; + + emitAttr size = EA_SIZE(attr);//it's better confirm attr with ins. + +#ifdef DEBUG + switch (ins) + { + case INS_st_b: + case INS_st_h: + case INS_st_w: + case INS_fst_s: + //case INS_swl: + //case INS_swr: + //case INS_sdl: + //case INS_sdr: + case INS_st_d: + case INS_fst_d: + break; + + default: + NYI("emitIns_S_R"); // FP locals? + return; + + } // end switch (ins) +#endif + + /* Figure out the variable's frame position */ + int base; + bool FPbased; + + base = emitComp->lvaFrameAddress(varx, &FPbased); + imm = offs < 0 ? -offs -8: base + offs; + + regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE; + reg2 = offs < 0 ? REG_R21 : reg2; + offs = offs < 0 ? -offs -8: offs; + + if ((-2048 <= imm) && (imm < 2048)) + { + //regs[1] = reg2; + } + else + { + ssize_t imm3 = imm & 0x800; + ssize_t imm2 = imm + imm3; + assert(isValidSimm20(imm2 >> 12)); + emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_RA, imm2 >> 12); + + emitIns_R_R_R(INS_add_d, attr, REG_RA, REG_RA, reg2); + + imm2 = imm2 & 0x7ff; + imm = imm3 ? imm2 - imm3 : imm2; + + reg2 = REG_RA; + } + + instrDesc* id = emitNewInstr(attr); + + id->idReg1(reg1); + + id->idReg2(reg2); + + id->idIns(ins); + + code_t code = emitInsCode(ins); + D_INST_2RI12(code, (reg1 & 0x1f), reg2, imm); + + id->idAddr()->iiaSetInstrEncode(code); + id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs); + id->idSetIsLclVar(); + + id->idCodeSize(4); + //dispIns(id); + appendToCurIG(id); +} + +void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs) +{ + //assert(offs >= 0); + ssize_t imm; + + emitAttr size = EA_SIZE(attr);//it's better confirm attr with ins. + +#ifdef DEBUG + switch (ins) + { + case INS_ld_b: + case INS_ld_bu: + + case INS_ld_h: + case INS_ld_hu: + + case INS_ld_w: + case INS_ld_wu: + case INS_fld_s: + + case INS_ld_d: + case INS_fld_d: + + //case INS_lwl: + //case INS_lwr: + + //case INS_ldl: + //case INS_ldr: + //assert(isValidGeneralDatasize(size) || isValidVectorDatasize(size)); + break; + + case INS_lea: + assert(size == EA_8BYTE); + break; + + default: + NYI("emitIns_R_S"); // FP locals? + return; + + } // end switch (ins) +#endif + + /* Figure out the variable's frame position */ + int base; + bool FPbased; + + base = emitComp->lvaFrameAddress(varx, &FPbased); + imm = offs < 0 ? -offs -8: base + offs; + + regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE; + reg2 = offs < 0 ? REG_R21 : reg2; + offs = offs < 0 ? -offs -8: offs; + + reg1 = (regNumber)((char)reg1 & 0x1f); + code_t code; + if ((-2048 <= imm) && (imm < 2048)) + { + if (ins == INS_lea) + { + ins = INS_addi_d; + } + code = emitInsCode(ins); + D_INST_2RI12(code, reg1, reg2, imm); + } + else + { + if (ins == INS_lea) + { + assert(isValidSimm20(imm >> 12)); + emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_RA, imm >> 12); + ssize_t imm2 = imm & 0xfff; + emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_RA, REG_RA, imm2); + + ins = INS_add_d; + code = emitInsCode(ins); + D_INST_add_d(code, reg1, reg2, REG_RA); + } + else + { + ssize_t imm3 = imm & 0x800; + ssize_t imm2 = imm + imm3; + assert(isValidSimm20(imm2 >> 12)); + emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_RA, imm2 >> 12); + + emitIns_R_R_R(INS_add_d, attr, REG_RA, REG_RA, reg2); + + imm2 = imm2 & 0x7ff; + code = emitInsCode(ins); + D_INST_2RI12(code, reg1/* & 0x1f*/, REG_RA, imm3 ? imm2 - imm3 : imm2); + } + //reg2 = REG_RA; + } + + instrDesc* id = emitNewInstr(attr); + + id->idReg1(reg1); + //id->idReg2(reg2);//not used. + + id->idIns(ins); + + id->idAddr()->iiaSetInstrEncode(code); + id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs); + id->idSetIsLclVar(); + + id->idCodeSize(4); + //dispIns(id); + appendToCurIG(id); +} + +/***************************************************************************** + * + * Add an instruction with a single immediate value. + */ + +void emitter::emitIns_I(instruction ins, emitAttr attr, ssize_t imm) +{ + code_t code = emitInsCode(ins); + + switch (ins) + { + case INS_b: + case INS_bl: + assert(!(imm & 0x3)); + code |= ((imm>>18) & 0x3ff); //offs[25:16] + code |= ((imm>>2) & 0xffff)<<10;//offs[15:0] + break; + case INS_dbar: + case INS_ibar: + assert((0 <= imm) && (imm <= 0x7fff)); + code |= (imm & 0x7fff); //hint + break; + default: + unreached(); + } + + instrDesc* id = emitNewInstr(attr); + + id->idIns(ins); + id->idAddr()->iiaSetInstrEncode(code); + + id->idCodeSize(4); + //dispIns(id); + appendToCurIG(id); +} + +void emitter::emitIns_I_I(instruction ins, emitAttr attr, ssize_t cc, ssize_t offs) +{ +#ifdef DEBUG + switch (ins) + { + case INS_bceqz: + case INS_bcnez: + break; + //case INS_: + //case INS_: + // break; + + default: + unreached(); + } +#endif + + code_t code = emitInsCode(ins); + + assert(!(offs & 0x3)); + assert(!(cc >> 3)); + code |= ((cc & 0x7) << 5); //cj + code |= ((offs >> 18) & 0x1f); //offs[20:16] + code |= ((offs >> 2) & 0xffff)<<10;//offs[15:0] + + instrDesc* id = emitNewInstr(attr); + + id->idIns(ins); + id->idAddr()->iiaSetInstrEncode(code); + + id->idCodeSize(4); + //dispIns(id); + appendToCurIG(id); +} + +/***************************************************************************** + * + * Add an instruction referencing a single register. + */ + +void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg) +{ +assert(!"unimplemented on LOONGARCH yet"); +#if 0 + code_t code = emitInsCode(ins); + +#ifdef DEBUG +#endif + switch (ins) + { + case INS_jr: + case INS_jr_hb: + case INS_mthi: + case INS_mtlo: + code |= (reg & 0x1f)<<21;//rs + break; + + case INS_mfhi://mfhi + case INS_mflo: + code |= (reg & 0x1f)<<11;//rd + assert(isGeneralRegister(reg)); + break; + + default: + unreached(); + } + + instrDesc* id = emitNewInstr(attr); + + id->idIns(ins); + id->idReg1(reg); + id->idAddr()->iiaSetInstrEncode(code); + + id->idCodeSize(4); + //dispIns(id); + appendToCurIG(id); +#endif +} + +/***************************************************************************** + * + * Add an instruction referencing a register and a constant. + */ + +void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t imm, insOpts opt /* = INS_OPTS_NONE */) +{ + code_t code = emitInsCode(ins); +//#ifdef DEBUG + switch (ins) + { + case INS_lu12i_w: + case INS_lu32i_d: + case INS_pcaddi: + case INS_pcalau12i: + case INS_pcaddu12i: + case INS_pcaddu18i: + assert(isGeneralRegister(reg)); + assert((-524288 <= imm) && (imm < 524288)); + + code |= reg; //rd + code |= (imm & 0xfffff)<<5;//si20 + break; + case INS_beqz: + case INS_bnez: + assert(isGeneralRegisterOrR0(reg)); + assert(!(imm & 0x3)); + assert((-1048576 <= (imm>>2)) && ((imm>>2) <= 1048575)); + + code |= ((imm>>18) & 0x1f); //offs[20:16] + code |= reg << 5; //rj + code |= ((imm>>2) & 0xffff)<<10;//offs[15:0] + break; + case INS_movfr2cf: + assert(isFloatReg(reg)); + assert((0 <= imm) && (imm <= 7)); + + code |= (reg & 0x1f)<<5;//fj + code |= imm /*& 0x7*/; //cc + break; + case INS_movcf2fr: + assert(isFloatReg(reg)); + assert((0 <= imm) && (imm <= 7)); + + code |= (reg & 0x1f);//fd + code |= (imm /*& 0x7*/)<<5; //cc + break; + case INS_movgr2cf: + assert(isGeneralRegister(reg)); + assert((0 <= imm) && (imm <= 7)); + + code |= reg<<5;//rj + code |= imm /*& 0x7*/; //cc + break; + case INS_movcf2gr: + assert(isGeneralRegister(reg)); + assert((0 <= imm) && (imm <= 7)); + + code |= reg;//rd + code |= (imm /*& 0x7*/)<<5; //cc + break; + default: + unreached(); + break; + } // end switch (ins) +//#endif + + instrDesc* id = emitNewInstr(attr); + + id->idIns(ins); + id->idReg1(reg); + id->idAddr()->iiaSetInstrEncode(code); + + id->idCodeSize(4); + //dispIns(id); + appendToCurIG(id); +} + +//NOTEADD:This function is new in emitarm64.cpp,so it be added to emitloongarch.cpp. +// But I don't konw how to change it so that it can be used on LA. +// I just add a statement "assert(!"unimplemented on LOONGARCH yet");". +//------------------------------------------------------------------------ +// emitIns_Mov: Emits a move instruction +// +// Arguments: +// ins -- The instruction being emitted +// attr -- The emit attribute +// dstReg -- The destination register +// srcReg -- The source register +// canSkip -- true if the move can be elided when dstReg == srcReg, otherwise false +// insOpts -- The instruction options +// +void emitter::emitIns_Mov( + instruction ins, emitAttr attr, regNumber dstReg, regNumber srcReg, bool canSkip, insOpts opt /* = INS_OPTS_NONE */) +{//TODO: should amend for LoongArch64/LOONGARCH64. + assert(IsMovInstruction(ins)); + + emitIns_R_R(ins, attr, dstReg, srcReg); +} + +/***************************************************************************** + * + * Add an instruction referencing two registers + */ + +void emitter::emitIns_R_R( + instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insOpts opt /* = INS_OPTS_NONE */) +{ + code_t code = emitInsCode(ins); + + if (INS_mov == ins) { + assert(isGeneralRegisterOrR0(reg1)); + assert(isGeneralRegisterOrR0(reg2)); + code |= reg1; //rd + code |= reg2<<5; //rj + } + else if ((INS_ext_w_b <= ins) && (ins <= INS_cpucfg)) { + //case INS_ext_w_b: + //case INS_ext_w_h: + //case INS_clo_w: + //case INS_clz_w: + //case INS_cto_w: + //case INS_ctz_w: + //case INS_clo_d: + //case INS_clz_d: + //case INS_cto_d: + //case INS_ctz_d: + //case INS_revb_2h: + //case INS_revb_4h: + //case INS_revb_2w: + //case INS_revb_d: + //case INS_revh_2w: + //case INS_revh_d: + //case INS_bitrev_4b: + //case INS_bitrev_8b: + //case INS_bitrev_w: + //case INS_bitrev_d: + //case INS_rdtimel_w: + //case INS_rdtimeh_w: + //case INS_rdtime_d: + //case INS_cpucfg: + assert(isGeneralRegisterOrR0(reg1)); + assert(isGeneralRegisterOrR0(reg2)); + code |= reg1; //rd + code |= reg2 << 5;//rj + } + else if ((INS_asrtle_d == ins) || (INS_asrtgt_d == ins)) { + //case INS_asrtle_d: + //case INS_asrtgt_d: + assert(isGeneralRegisterOrR0(reg1)); + assert(isGeneralRegisterOrR0(reg2)); + code |= reg1 << 5; //rj + code |= reg2 << 10; //rk + } + else if ((INS_fabs_s <= ins) && (ins <= INS_fmov_d)) { + //case INS_fabs_s: + //case INS_fabs_d: + //case INS_fneg_s: + //case INS_fneg_d: + //case INS_fsqrt_s: + //case INS_fsqrt_d: + //case INS_frsqrt_s: + //case INS_frsqrt_d: + //case INS_frecip_s: + //case INS_frecip_d: + //case INS_flogb_s: + //case INS_flogb_d: + //case INS_fclass_s: + //case INS_fclass_d: + //case INS_fcvt_s_d: + //case INS_fcvt_d_s: + //case INS_ffint_s_w: + //case INS_ffint_s_l: + //case INS_ffint_d_w: + //case INS_ffint_d_l: + //case INS_ftint_w_s: + //case INS_ftint_w_d: + //case INS_ftint_l_s: + //case INS_ftint_l_d: + //case INS_ftintrm_w_s: + //case INS_ftintrm_w_d: + //case INS_ftintrm_l_s: + //case INS_ftintrm_l_d: + //case INS_ftintrp_w_s: + //case INS_ftintrp_w_d: + //case INS_ftintrp_l_s: + //case INS_ftintrp_l_d: + //case INS_ftintrz_w_s: + //case INS_ftintrz_w_d: + //case INS_ftintrz_l_s: + //case INS_ftintrz_l_d: + //case INS_ftintrne_w_s: + //case INS_ftintrne_w_d: + //case INS_ftintrne_l_s: + //case INS_ftintrne_l_d: + //case INS_frint_s: + //case INS_frint_d: + //case INS_fmov_s: + //case INS_fmov_d: + assert(isFloatReg(reg1)); + assert(isFloatReg(reg2)); + code |= (reg1 & 0x1f); //fd + code |= (reg2 & 0x1f)<<5; //fj + } + else if ((INS_movgr2fr_w <= ins) && (ins <= INS_movgr2frh_w)) { + //case INS_movgr2fr_w: + //case INS_movgr2fr_d: + //case INS_movgr2frh_w: + assert(isFloatReg(reg1)); + assert(isGeneralRegisterOrR0(reg2)); + code |= (reg1 & 0x1f); //fd + code |= reg2 << 5; //rj + } + else if ((INS_movfr2gr_s <= ins) && (ins <= INS_movfrh2gr_s)) { + //case INS_movfr2gr_s: + //case INS_movfr2gr_d: + //case INS_movfrh2gr_s: + assert(isGeneralRegisterOrR0(reg1)); + assert(isFloatReg(reg2)); + code |= reg1; //rd + code |= (reg2 & 0x1f)<<5; //fj + } + else if ((INS_dneg == ins) || (INS_neg == ins)) + { + assert(isGeneralRegisterOrR0(reg1)); + assert(isGeneralRegisterOrR0(reg2)); + //sub_d rd, zero, rk + //sub_w rd, zero, rk + code |= reg1; //rd + code |= reg2 << 10; //rk + } + else if (INS_not == ins) + { + assert(isGeneralRegisterOrR0(reg1)); + assert(isGeneralRegisterOrR0(reg2)); + //nor rd, rj, zero + code |= reg1; //rd + code |= reg2 << 5; //rj + } + else + { + unreached(); + } + + instrDesc* id = emitNewInstr(attr); + + id->idIns(ins); + id->idReg1(reg1); + id->idReg2(reg2); + id->idAddr()->iiaSetInstrEncode(code); + + id->idCodeSize(4); + //dispIns(id); + appendToCurIG(id); +} + +void emitter::emitIns_R_I_I( + instruction ins, emitAttr attr, regNumber reg, ssize_t hint, ssize_t off, insOpts opt /* = INS_OPTS_NONE */) +{ +assert(!"unimplemented on LOONGARCH yet"); +#if 0 +#ifdef DEBUG + switch (ins) + { + case INS_pref: + assert(isGeneralRegister(reg)); + assert((-32769 < off) && (off < 32768)); + break; + + default: + unreached(); + } +#endif + code_t code = emitInsCode(ins); + + code |= (hint & 0x1f)<<16; //hint + code |= (reg & 0x1f)<<21; //rs or base + code |= (off & 0xffff); //offset + + ssize_t imms[] = {hint, off}; + instrDesc* id = emitNewInstr(attr); + + id->idIns(ins); + id->idReg1(reg); + id->idAddr()->iiaSetInstrEncode(code); + + id->idCodeSize(4); + //dispIns(id); + appendToCurIG(id); +#endif +} + +/***************************************************************************** + * + * Add an instruction referencing two registers and a constant. + */ + +void emitter::emitIns_R_R_I( + instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, ssize_t imm, insOpts opt /* = INS_OPTS_NONE */) +{ + code_t code = emitInsCode(ins); + + if ((INS_slli_w <= ins) && (ins <= INS_rotri_w)) { + //INS_slli_w + //INS_srli_w + //INS_srai_w + //INS_rotri_w + assert(isGeneralRegister(reg1)); + assert(isGeneralRegisterOrR0(reg2)); + assert((0 <= imm) && (imm <= 0x1f)); + + code |= reg1; //rd + code |= reg2<<5; //rj + code |= (imm & 0x1f)<<10;//ui5 + } + else if ((INS_slli_d <= ins) && (ins <= INS_rotri_d)) { + //INS_slli_d + //INS_srli_d + //INS_srai_d + //INS_rotri_d + assert(isGeneralRegister(reg1)); + assert(isGeneralRegisterOrR0(reg2)); + assert((0 <= imm) && (imm <= 0x3f)); + + code |= reg1; //rd + code |= reg2<<5; //rj + code |= (imm & 0x3f)<<10;//ui6 + } + else if (((INS_addi_w <= ins) && (ins <= INS_xori)) || ((INS_ld_b <= ins) && (ins <= INS_ld_wu)) || ((INS_st_b <= ins) && (ins <= INS_st_d))) { +#ifdef DEBUG + assert(isGeneralRegister(reg1)); + assert(isGeneralRegisterOrR0(reg2)); + if (((INS_addi_w <= ins) && (ins <= INS_slti)) || ((INS_ld_b <= ins) && (ins <= INS_ld_wu)) || ((INS_st_b <= ins) && (ins <= INS_st_d))) { + //case INS_addi_w: + //case INS_addi_d: + //case INS_lu52i_d: + //case INS_slti: + //case INS_ld_b: + //case INS_ld_h: + //case INS_ld_w: + //case INS_ld_d: + //case INS_ld_bu: + //case INS_ld_hu: + //case INS_ld_wu: + //case INS_st_b: + //case INS_st_h: + //case INS_st_w: + //case INS_st_d: + + assert((-2048 <= imm) && (imm <= 2047)); + } + else if (ins == INS_sltui) + { + //case INS_sltui: + assert((0 <= imm) && (imm <= 0x7ff)); + } + else + { + //case INS_andi: + //case INS_ori: + //case INS_xori: + assert((0 <= imm) && (imm <= 0xfff)); + } +#endif + code |= reg1; //rd + code |= reg2<<5; //rj + code |= (imm & 0xfff)<<10;//si12 or ui12 + } + else if ((INS_fld_s <= ins) && (ins <= INS_fst_d)) { + //INS_fld_s + //INS_fld_d + //INS_fst_s + //INS_fst_d + assert(isFloatReg(reg1)); + assert(isGeneralRegisterOrR0(reg2)); + assert((-2048 <= imm) && (imm <= 2047)); + + code |= reg1 & 0x1f; //fd + code |= reg2 << 5; //rj + code |= (imm & 0xfff)<<10;//si12 + } + else if (((INS_ll_d >= ins) && (ins >= INS_ldptr_w)) || ((INS_sc_d >= ins) && (ins >= INS_stptr_w))) { + //INS_ldptr_w + //INS_ldptr_d + //INS_ll_w + //INS_ll_d + + //INS_stptr_w + //INS_stptr_d + //INS_sc_w + //INS_sc_d + assert(isGeneralRegister(reg1)); + assert(isGeneralRegisterOrR0(reg2)); + assert((-8192 <= imm) && (imm <= 8191)); + + code |= reg1; //rd + code |= reg2 << 5; //rj + code |= (imm & 0x3fff)<<10;//si14 + } + else if ((INS_beq <= ins) && (ins <= INS_bgeu)) + { + //INS_beq + //INS_bne + //INS_blt + //INS_bltu + //INS_bge + //INS_bgeu + assert(isGeneralRegisterOrR0(reg1)); + assert(isGeneralRegisterOrR0(reg2)); + assert(!(imm & 0x3)); + assert((-32768 <= (imm>>2)) && ((imm>>2) <= 32767)); + + code |= reg1 << 5; //rj + code |= reg2; //rd + code |= ((imm>>2) & 0xffff)<<10;//offs16 + } + else if ((INS_fcmp_caf_s <= ins) && (ins <= INS_fcmp_sune_s)) + { + //INS_fcmp_caf_s + //INS_fcmp_cun_s + //INS_fcmp_ceq_s + //INS_fcmp_cueq_s + //INS_fcmp_clt_s + //INS_fcmp_cult_s + //INS_fcmp_cle_s + //INS_fcmp_cule_s + //INS_fcmp_cne_s + //INS_fcmp_cor_s + //INS_fcmp_cune_s + //INS_fcmp_saf_d + //INS_fcmp_sun_d + //INS_fcmp_seq_d + //INS_fcmp_sueq_d + //INS_fcmp_slt_d + //INS_fcmp_sult_d + //INS_fcmp_sle_d + //INS_fcmp_sule_d + //INS_fcmp_sne_d + //INS_fcmp_sor_d + //INS_fcmp_sune_d + //INS_fcmp_caf_d + //INS_fcmp_cun_d + //INS_fcmp_ceq_d + //INS_fcmp_cueq_d + //INS_fcmp_clt_d + //INS_fcmp_cult_d + //INS_fcmp_cle_d + //INS_fcmp_cule_d + //INS_fcmp_cne_d + //INS_fcmp_cor_d + //INS_fcmp_cune_d + //INS_fcmp_saf_s + //INS_fcmp_sun_s + //INS_fcmp_seq_s + //INS_fcmp_sueq_s + //INS_fcmp_slt_s + //INS_fcmp_sult_s + //INS_fcmp_sle_s + //INS_fcmp_sule_s + //INS_fcmp_sne_s + //INS_fcmp_sor_s + //INS_fcmp_sune_s + assert(isFloatReg(reg1)); + assert(isFloatReg(reg2)); + assert((0 <= imm) && (imm <= 7)); + + code |= (reg1 & 0x1f)<<5; //fj + code |= (reg2 & 0x1f)<<10; //fk + code |= imm & 0x7; //cc + } + else if (INS_addu16i_d == ins) { + assert(isGeneralRegister(reg1)); + assert(isGeneralRegisterOrR0(reg2)); + assert((-32768 <= imm) && (imm < 32768)); + + code |= reg1; //rd + code |= reg2<<5; //rj + code |= (imm & 0xffff)<<10;//si16 + } + else if (INS_jirl == ins) + { + assert(isGeneralRegister(reg1)); + assert(isGeneralRegisterOrR0(reg2)); + assert((-32768 <= imm) && (imm < 32768)); + + code |= reg1; //rd + code |= reg2<<5; //rj + code |= (imm & 0xffff)<<10;//offs16 + } + else + { + unreached(); + } + + instrDesc* id = emitNewInstr(attr); + + id->idIns(ins); + id->idReg1(reg1); + id->idReg2(reg2); + id->idAddr()->iiaSetInstrEncode(code); + + id->idCodeSize(4); + //dispIns(id); + appendToCurIG(id); +} + +/***************************************************************************** +* +* Add an instruction referencing two registers and a constant. +* Also checks for a large immediate that needs a second instruction +* and will load it in reg1 +* +* - Supports instructions: add, adds, sub, subs, and, ands, eor and orr +* - Requires that reg1 is a general register and not SP or ZR +* - Requires that reg1 != reg2 +*/ +void emitter::emitIns_R_R_Imm(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, ssize_t imm) +{//maybe optimize. + assert(isGeneralRegister(reg1)); + assert(reg1 != reg2); + + bool immFits = true; + +#ifdef DEBUG + switch (ins) + { + case INS_addi_w: + case INS_addi_d: + //case INS_lui: + //case INS_lbu: + //case INS_lhu: + //case INS_lwu: + //case INS_lb: + //case INS_lh: + //case INS_lw: + case INS_ld_d: + //case INS_sb: + //case INS_sh: + //case INS_sw: + //case INS_sd: + ////case INS_lwc1: + ////case INS_ldc1: + immFits = isValidSimm12(imm); + break; + + case INS_andi: + case INS_ori: + case INS_xori: + immFits = (0 <= imm) && (imm <= 0xfff); + break; + + default: + assert(!"Unsupported instruction in emitIns_R_R_Imm"); + } +#endif + + if (immFits) + { + emitIns_R_R_I(ins, attr, reg1, reg2, imm); + } + else + { + // Load 'imm' into the reg1 register + // then issue: 'ins' reg1, reg2, reg1 + // + assert(!EA_IS_RELOC(attr)); + emitIns_I_la(attr, reg1, imm); + //codeGen->instGen_Set_Reg_To_Imm(attr, reg1, imm); + emitIns_R_R_R(ins, attr, reg1, reg2, reg1); + } +} + +/***************************************************************************** + * + * Add an instruction referencing three registers. + */ + +void emitter::emitIns_R_R_R( + instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, insOpts opt) /* = INS_OPTS_NONE */ +{ + code_t code = emitInsCode(ins); + + if (((INS_add_w <= ins) && (ins <= INS_crcc_w_d_w)) || ((INS_ldx_b <= ins) && (ins <= INS_ldle_d)) || ((INS_stx_b <= ins) && (ins <= INS_stle_d))) { + //case INS_add_w: + //case INS_add_d: + //case INS_sub_w: + //case INS_sub_d: + //case INS_and: + //case INS_or: + //case INS_nor: + //case INS_xor: + //case INS_andn: + //case INS_orn: + + //case INS_mul_w: + //case INS_mul_d: + //case INS_mulh_w: + //case INS_mulh_wu: + //case INS_mulh_d: + //case INS_mulh_du: + //case INS_mulw_d_w: + //case INS_mulw_d_wu: + //case INS_div_w: + //case INS_div_wu: + //case INS_div_d: + //case INS_div_du: + //case INS_mod_w: + //case INS_mod_wu: + //case INS_mod_d: + //case INS_mod_du: + + //case INS_sll_w: + //case INS_srl_w: + //case INS_sra_w: + //case INS_rotr_w: + //case INS_sll_d: + //case INS_srl_d: + //case INS_sra_d: + //case INS_rotr_d: + + //case INS_maskeqz: + //case INS_masknez: + + //case INS_slt: + //case INS_sltu: + + //case INS_ldx_b: + //case INS_ldx_h: + //case INS_ldx_w: + //case INS_ldx_d: + //case INS_ldx_bu: + //case INS_ldx_hu: + //case INS_ldx_wu: + //case INS_stx_b: + //case INS_stx_h: + //case INS_stx_w: + //case INS_stx_d: + + //case INS_ldgt_b: + //case INS_ldgt_h: + //case INS_ldgt_w: + //case INS_ldgt_d: + //case INS_ldle_b: + //case INS_ldle_h: + //case INS_ldle_w: + //case INS_ldle_d: + //case INS_stgt_b: + //case INS_stgt_h: + //case INS_stgt_w: + //case INS_stgt_d: + //case INS_stle_b: + //case INS_stle_h: + //case INS_stle_w: + //case INS_stle_d: + + //case INS_amswap_w: + //case INS_amswap_d: + //case INS_amswap_db_w: + //case INS_amswap_db_d: + //case INS_amadd_w: + //case INS_amadd_d: + //case INS_amadd_db_w: + //case INS_amadd_db_d: + //case INS_amand_w: + //case INS_amand_d: + //case INS_amand_db_w: + //case INS_amand_db_d: + //case INS_amor_w: + //case INS_amor_d: + //case INS_amor_db_w: + //case INS_amor_db_d: + //case INS_amxor_w: + //case INS_amxor_d: + //case INS_amxor_db_w: + //case INS_amxor_db_d: + //case INS_ammax_w: + //case INS_ammax_d: + //case INS_ammax_db_w: + //case INS_ammax_db_d: + //case INS_ammin_w: + //case INS_ammin_d: + //case INS_ammin_db_w: + //case INS_ammin_db_d: + //case INS_ammax_wu: + //case INS_ammax_du: + //case INS_ammax_db_wu: + //case INS_ammax_db_du: + //case INS_ammin_wu: + //case INS_ammin_du: + //case INS_ammin_db_wu: + //case INS_ammin_db_du: + + //case INS_crc_w_b_w: + //case INS_crc_w_h_w: + //case INS_crc_w_w_w: + //case INS_crc_w_d_w: + //case INS_crcc_w_b_w: + //case INS_crcc_w_h_w: + //case INS_crcc_w_w_w: + //case INS_crcc_w_d_w: + assert(isGeneralRegister(reg1)); + assert(isGeneralRegisterOrR0(reg2)); + assert(isGeneralRegisterOrR0(reg3)); + + code |= (reg1 /*& 0x1f*/); //rd + code |= (reg2 /*& 0x1f*/)<<5; //rj + code |= (reg3 /*& 0x1f*/)<<10;//rk + } + else if ((INS_fadd_s <= ins) && (ins <= INS_fcopysign_d)) { + //case INS_fadd_s: + //case INS_fadd_d: + //case INS_fsub_s: + //case INS_fsub_d: + //case INS_fmul_s: + //case INS_fmul_d: + //case INS_fdiv_s: + //case INS_fdiv_d: + //case INS_fmax_s: + //case INS_fmax_d: + //case INS_fmin_s: + //case INS_fmin_d: + //case INS_fmaxa_s: + //case INS_fmaxa_d: + //case INS_fmina_s: + //case INS_fmina_d: + //case INS_fscaleb_s: + //case INS_fscaleb_d: + //case INS_fcopysign_s: + //case INS_fcopysign_d: + assert(isFloatReg(reg1)); + assert(isFloatReg(reg2)); + assert(isFloatReg(reg3)); + + code |= (reg1 & 0x1f); //fd + code |= (reg2 & 0x1f)<<5; //fj + code |= (reg3 & 0x1f)<<10;//fk + } + else if ((INS_fldx_s <= ins) && (ins <= INS_fstle_d)) { + //case INS_fldx_s: + //case INS_fldx_d: + //case INS_fstx_s: + //case INS_fstx_d: + + //case INS_fldgt_s: + //case INS_fldgt_d: + //case INS_fldle_s: + //case INS_fldle_d: + //case INS_fstgt_s: + //case INS_fstgt_d: + //case INS_fstle_s: + //case INS_fstle_d: + assert(isFloatReg(reg1)); + assert(isGeneralRegisterOrR0(reg2)); + assert(isGeneralRegisterOrR0(reg3)); + + code |= reg1 & 0x1f; //fd + code |= reg2 << 5; //rj + code |= reg3 << 10; //rk + } + else + { + assert(!"Unsupported instruction in emitIns_R_R_R"); + } + + instrDesc* id = emitNewInstr(attr); + + id->idIns(ins); + id->idReg1(reg1); + id->idReg2(reg2); + id->idReg3(reg3); + id->idAddr()->iiaSetInstrEncode(code); + + id->idCodeSize(4); + //dispIns(id); + appendToCurIG(id); +} + +/***************************************************************************** + * + * Add an instruction referencing three registers and a constant. + */ + +void emitter::emitIns_R_R_R_I(instruction ins, + emitAttr attr, + regNumber reg1, + regNumber reg2, + regNumber reg3, + ssize_t imm, + insOpts opt /* = INS_OPTS_NONE */, + emitAttr attrReg2 /* = EA_UNKNOWN */) +{ + code_t code = emitInsCode(ins); + + if ((INS_alsl_w <= ins) && (ins <= INS_bytepick_w)) { + //INS_alsl_w + //INS_alsl_wu + //INS_alsl_d + //INS_bytepick_w + assert(isGeneralRegister(reg1)); + assert(isGeneralRegisterOrR0(reg2)); + assert(isGeneralRegisterOrR0(reg3)); + assert((0 <= imm) && (imm <= 3)); + + code |= reg1; //rd + code |= reg2 << 5; //rj + code |= reg3 << 10;//rk + code |= (imm /*& 0x3*/)<<15; //sa2 + } + else if (INS_bytepick_d == ins) { + assert(isGeneralRegister(reg1)); + assert(isGeneralRegisterOrR0(reg2)); + assert(isGeneralRegisterOrR0(reg3)); + assert((0 <= imm) && (imm <= 7)); + + code |= reg1; //rd + code |= reg2 << 5; //rj + code |= reg3 << 10;//rk + code |= (imm /*& 0x7*/)<<15; //sa3 + } + else if (INS_fsel == ins) + { + assert(isFloatReg(reg1)); + assert(isFloatReg(reg2)); + assert(isFloatReg(reg3)); + assert((0 <= imm) && (imm <= 7)); + + code |= (reg1 & 0x1f); //fd + code |= (reg2 & 0x1f)<<5; //fj + code |= (reg3 & 0x1f)<<10; //fk + code |= (imm /*& 0x7*/)<<15; //ca + } + else + { + unreached(); + } + + instrDesc* id = emitNewInstr(attr); + + id->idIns(ins); + id->idReg1(reg1); + id->idReg2(reg2); + id->idReg3(reg3); + id->idAddr()->iiaSetInstrEncode(code); + + id->idCodeSize(4); + //dispIns(id); + appendToCurIG(id); +} + +#if 1 +/***************************************************************************** + * + * Add an instruction referencing three registers, with an extend option + */ + +void emitter::emitIns_R_R_R_Ext(instruction ins, + emitAttr attr, + regNumber reg1, + regNumber reg2, + regNumber reg3, + insOpts opt, /* = INS_OPTS_NONE */ + int shiftAmount) /* = -1 -- unset */ +{ +assert(!"unimplemented on LOONGARCH yet"); +} + +/***************************************************************************** + * + * Add an instruction referencing two registers and two constants. + */ + +void emitter::emitIns_R_R_I_I( + instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int imm1, int imm2, insOpts opt) +{ + code_t code = emitInsCode(ins); + + assert(isGeneralRegisterOrR0(reg1)); + assert(isGeneralRegisterOrR0(reg2)); + switch (ins) + { + case INS_bstrins_w: + case INS_bstrpick_w: + code |= (reg1 /*& 0x1f*/); //rd + code |= (reg2 /*& 0x1f*/)<<5; //rj + assert((0<=imm2) && (imm2<=imm1) && (imm1<32)); + code |= (imm1 & 0x1f)<<16; //msbw + code |= (imm2 & 0x1f)<<10; //lsbw + break; + case INS_bstrins_d: + case INS_bstrpick_d: + code |= (reg1 /*& 0x1f*/); //rd + code |= (reg2 /*& 0x1f*/)<<5; //rj + assert((0<=imm2) && (imm2<=imm1) && (imm1<64)); + code |= (imm1 & 0x3f)<<16; //msbd + code |= (imm2 & 0x3f)<<10; //lsbd + break; + default: + unreached(); + } + + instrDesc* id = emitNewInstr(attr); + + id->idIns(ins); + id->idReg1(reg1); + id->idReg2(reg2); + id->idAddr()->iiaSetInstrEncode(code); + + id->idCodeSize(4); + //dispIns(id); + appendToCurIG(id); +} + +/***************************************************************************** + * + * Add an instruction referencing four registers. + */ + +void emitter::emitIns_R_R_R_R( + instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, regNumber reg4) +{ + code_t code = emitInsCode(ins); + +//#ifdef DEBUG + switch (ins) + { + case INS_fmadd_s: + case INS_fmadd_d: + case INS_fmsub_s: + case INS_fmsub_d: + case INS_fnmadd_s: + case INS_fnmadd_d: + case INS_fnmsub_s: + case INS_fnmsub_d: + assert(isFloatReg(reg1)); + assert(isFloatReg(reg2)); + assert(isFloatReg(reg3)); + assert(isFloatReg(reg4)); + + code |= (reg1 & 0x1f); //fd + code |= (reg2 & 0x1f)<<5; //fj + code |= (reg3 & 0x1f)<<10; //fk + code |= (reg4 & 0x1f)<<15; //fa + break; + default: + unreached(); + } +//#endif + + instrDesc* id = emitNewInstr(attr); + + id->idIns(ins); + id->idReg1(reg1); + id->idAddr()->iiaSetInstrEncode(code); + + id->idCodeSize(4); + //dispIns(id); + appendToCurIG(id); +} + +/***************************************************************************** + * + * Add an instruction with a static data member operand. If 'size' is 0, the + * instruction operates on the address of the static member instead of its + * value (e.g. "push offset clsvar", rather than "push dword ptr [clsvar]"). + */ + +void emitter::emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, int offs) +{ +assert(!"unimplemented on LOONGARCH yet"); +#if 0 + NYI("emitIns_C"); +#endif +} + +/***************************************************************************** + * + * Add an instruction referencing stack-based local variable. + */ + +void emitter::emitIns_S(instruction ins, emitAttr attr, int varx, int offs) +{ +assert(!"unimplemented on LOONGARCH yet"); +#if 0 + NYI("emitIns_S"); +#endif +} + +#if 0 +/***************************************************************************** + * + * Add an instruction referencing a register and a stack-based local variable. + */ + +void emitter::emitIns_R_R_S( + instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int sa) +{ + assert(!"unimplemented on LOONGARCH yet"); +#if 1 + regNumber regs[] = {reg1, reg2}; + ssize_t imm = (ssize_t)sa; + emitAllocInstrOnly(emitInsOps(ins, regs, &imm), attr); +#else + instrDesc* id = emitNewInstrCns(attr, sa); + insFormat fmt = IF_FMT_FUNC; + + id->idIns(ins); + id->idInsFmt(fmt); + id->idInsOpt(INS_OPTS_NONE); + + id->idReg1(reg1); + id->idReg2(reg2); + + //dispIns(id); + appendToCurIG(id); +#endif +} +#endif + +/***************************************************************************** + * + * Add an instruction referencing two register and consectutive stack-based local variable slots. + */ +void emitter::emitIns_R_R_S_S( + instruction ins, emitAttr attr1, emitAttr attr2, regNumber reg1, regNumber reg2, int varx, int offs) +{ +assert(!"unimplemented on LOONGARCH yet"); +} + +/***************************************************************************** + * + * Add an instruction referencing consecutive stack-based local variable slots and two registers + */ +void emitter::emitIns_S_S_R_R( + instruction ins, emitAttr attr1, emitAttr attr2, regNumber reg1, regNumber reg2, int varx, int offs) +{ +assert(!"unimplemented on LOONGARCH yet"); +} + +/***************************************************************************** + * + * Add an instruction referencing stack-based local variable and an immediate + */ +void emitter::emitIns_S_I(instruction ins, emitAttr attr, int varx, int offs, int val) +{ +assert(!"unimplemented on LOONGARCH yet"); +#if 0 + NYI("emitIns_S_I"); +#endif +} + +/***************************************************************************** + * + * Add an instruction with a register + static member operands. + * Constant is stored into JIT data which is adjacent to code. + * For LOONGARCH64, maybe not the best, here just suports the func-interface. + * + */ +void emitter::emitIns_R_C( + instruction ins, emitAttr attr, regNumber reg, regNumber addrReg, CORINFO_FIELD_HANDLE fldHnd, int offs) +{ + assert(offs >= 0); + assert(instrDesc::fitsInSmallCns(offs));//can optimize. + //assert(ins == INS_bl);//for special. indicating isGeneralRegister(reg). + //assert(isGeneralRegister(reg)); while load float the reg is FPR. + + //when id->idIns == bl, for reloc! 4-ins. + // pcaddu12i reg, off-hi-20bits + // addi_d reg, reg, off-lo-12bits + //when id->idIns == load-ins, for reloc! 4-ins. + // pcaddu12i reg, off-hi-20bits + // load reg, offs_lo-12bits(reg) #when ins is load ins. + // + // INS_OPTS_RC: ins == bl placeholders. 3-ins: ////TODO: maybe optimize. + // lu12i_w reg, addr-hi-20bits + // ori reg, reg, addr-lo-12bits + // lu32i_d reg, addr_hi-32bits + // + // INS_OPTS_RC: ins == load. 3-ins: + // lu12i_w at, offs_hi-20bits //NOTE: offs = (int)(offs_hi<<12) + (int)offs_lo + // lu32i_d at, 0xff addr_hi-32bits + // load reg, addr_lo-12bits(reg) #when ins is load ins. + + instrDesc* id = emitNewInstr(attr); + + id->idIns(ins); + assert(reg != REG_R0); //for special. reg Must not be R0. + id->idReg1(reg); // destination register that will get the constant value. + + id->idSmallCns(offs); //usually is 0. + id->idInsOpt(INS_OPTS_RC); + if (emitComp->opts.compReloc) + { + id->idSetIsDspReloc(); + id->idCodeSize(8); + } else + id->idCodeSize(12);//TODO: maybe optimize. + + if (EA_IS_GCREF(attr)) + { + /* A special value indicates a GCref pointer value */ + id->idGCref(GCT_GCREF); + id->idOpSize(EA_PTRSIZE); + } + else if (EA_IS_BYREF(attr)) + { + /* A special value indicates a Byref pointer value */ + id->idGCref(GCT_BYREF); + id->idOpSize(EA_PTRSIZE); + } + + //TODO: this maybe deleted. + id->idSetIsBound(); // We won't patch address since we will know the exact distance + // once JIT code and data are allocated together. + + assert(addrReg == REG_NA);//NOTE: for LOONGARCH64, not support addrReg != REG_NA. + + id->idAddr()->iiaFieldHnd = fldHnd; + + //dispIns(id);//loongarch dumping instr by other-fun. + appendToCurIG(id); +} + +/***************************************************************************** + * + * Add an instruction with a static member + constant. + */ + +void emitter::emitIns_C_I(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, ssize_t offs, ssize_t val) +{ +assert(!"unimplemented on LOONGARCH yet"); +#if 0 + NYI("emitIns_C_I"); +#endif +} + +/***************************************************************************** + * + * Add an instruction with a static member + register operands. + */ + +void emitter::emitIns_C_R(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, regNumber reg, int offs) +{ +assert(!"unimplemented on LOONGARCH yet"); +#if 0 + assert(!"emitIns_C_R not supported for RyuJIT backend"); +#endif +} + +void emitter::emitIns_R_AR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs) +{ +assert(!"unimplemented on LOONGARCH yet"); +#if 0 + NYI("emitIns_R_AR"); +#endif +} + +// This computes address from the immediate which is relocatable. +void emitter::emitIns_R_AI(instruction ins, + emitAttr attr, + regNumber reg, + ssize_t addr DEBUGARG(size_t targetHandle) DEBUGARG(GenTreeFlags gtFlags)) +{ + assert(EA_IS_RELOC(attr));//EA_PTR_DSP_RELOC + assert(ins == INS_bl);//for special. + assert(isGeneralRegister(reg)); + + // INS_OPTS_RELOC: placeholders. 2-ins: + // case:EA_HANDLE_CNS_RELOC + // pcaddu12i reg, off-hi-20bits + // addi_d reg, reg, off-lo-12bits + // case:EA_PTR_DSP_RELOC + // pcaddu12i reg, off-hi-20bits + // ldptr_d reg, reg, off-lo-12bits + + instrDesc* id = emitNewInstr(attr); + + id->idIns(ins); + assert(reg != REG_R0); //for special. reg Must not be R0. + id->idReg1(reg); // destination register that will get the constant value. + + id->idInsOpt(INS_OPTS_RELOC); + + if (EA_IS_GCREF(attr)) + { + /* A special value indicates a GCref pointer value */ + id->idGCref(GCT_GCREF); + id->idOpSize(EA_PTRSIZE); + } + else if (EA_IS_BYREF(attr)) + { + /* A special value indicates a Byref pointer value */ + id->idGCref(GCT_BYREF); + id->idOpSize(EA_PTRSIZE); + } + + id->idAddr()->iiaAddr = (BYTE*)addr; + + id->idCodeSize(8); + //dispIns(id);//loongarch dumping instr by other-fun. + appendToCurIG(id); +} + +void emitter::emitIns_AR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs) +{ +assert(!"unimplemented on LOONGARCH yet"); +#if 0 + NYI("emitIns_AR_R"); +#endif +} + +void emitter::emitIns_R_ARR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp) +{ +assert(!"unimplemented on LOONGARCH yet"); +#if 0 + NYI("emitIns_R_ARR"); +#endif +} + +void emitter::emitIns_ARR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp) +{ +assert(!"unimplemented on LOONGARCH yet"); +#if 0 + NYI("emitIns_R_ARR"); +#endif +} + +void emitter::emitIns_R_ARX( + instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, unsigned mul, int disp) +{ +assert(!"unimplemented on LOONGARCH yet"); +#if 0 + NYI("emitIns_R_ARR"); +#endif +} + +/***************************************************************************** + * + * Add a data label instruction. + */ +void emitter::emitIns_R_D(instruction ins, emitAttr attr, unsigned offs, regNumber reg) +{ + NYI("emitIns_R_D"); +} + +void emitter::emitIns_J_R_I(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg, int imm) +{ + assert(!"unimplemented on LOONGARCH yet"); +} +#endif + +/***************************************************************************** + * + * Record that a jump instruction uses the short encoding + * + */ +void emitter::emitSetShortJump(instrDescJmp* id) +{ +/* TODO: maybe delete it on future. */ + return; +} + +/***************************************************************************** + * + * Add a label instruction. + */ + +void emitter::emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg) +{ + assert(dst->bbFlags & BBF_HAS_LABEL); + + //if for reloc! 4-ins: + // pcaddu12i reg, offset-hi20 + // addi_d reg, reg, offset-lo12 + // + //else: 3-ins: + // lu12i_w reg, dst-hi-20bits + // ori reg, reg, dst-lo-12bits + // bstrins_d reg, zero, msbd, lsbd / lu32i_d reg, 0xff + + instrDesc* id = emitNewInstr(attr); + + id->idIns(ins); + id->idInsOpt(INS_OPTS_RL); + id->idAddr()->iiaBBlabel = dst; + + if (emitComp->opts.compReloc) + { + id->idSetIsDspReloc(); + id->idCodeSize(8); + } else + id->idCodeSize(12); + + id->idReg1(reg); + + if (EA_IS_GCREF(attr)) + { + /* A special value indicates a GCref pointer value */ + id->idGCref(GCT_GCREF); + id->idOpSize(EA_PTRSIZE); + } + else if (EA_IS_BYREF(attr)) + { + /* A special value indicates a Byref pointer value */ + id->idGCref(GCT_BYREF); + id->idOpSize(EA_PTRSIZE); + } + +#ifdef DEBUG + // Mark the catch return + if (emitComp->compCurBB->bbJumpKind == BBJ_EHCATCHRET) + { + id->idDebugOnlyInfo()->idCatchRet = true; + } +#endif // DEBUG + + //dispIns(id); + appendToCurIG(id); +} + +void emitter::emitIns_J_R(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg) +{ + assert(!"unimplemented on LOONGARCH yet: emitIns_J_R.");//not used. +} + +//NOTE: +// For loongarch64, emitIns_J is just only jump, not include the condition branch! +// The condition branch is the emitIns_J_cond_la(). +// If using "BasicBlock* dst" lable as target, the INS_OPTS_J is a short jump while long jump will be replace by INS_OPTS_JIRL. +// +// The arg "instrCount" is two regs's encoding when ins is beq/bne/blt/bltu/bge/bgeu/beqz/bnez. +void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount) +{ + if (dst == nullptr) + {//Now this case not used for loongarch64. + assert(instrCount != 0); + assert(ins == INS_b);//when dst==nullptr, ins is INS_b by now. + +#if 1 + assert((-33554432 <= instrCount) && (instrCount < 33554432));//0x2000000. + emitIns_I(ins, EA_PTRSIZE, instrCount << 2);//NOTE: instrCount is the number of the instructions. +#else + instrCount = instrCount << 2; + if ((-33554432 <= instrCount) && (instrCount < 33554432)) + { + /* This jump is really short */ + emitIns_I(ins, EA_PTRSIZE, instrCount); + } + else + { + //NOTE: should not be here !!! + assert(!"should not be here on LOONGARCH64 !!!"); + + //emitIns_I(INS_bl, EA_PTRSIZE, 4); + + //ssize_t imm = ((ssize_t)instrCount>>12); + //assert(isValidSimm12(imm)); + //emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, imm); + //imm = (instrCount & 0xfffff); + //emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, imm); + + //emitIns_R_R_R(INS_add_d, EA_8BYTE, REG_R21, REG_R21, REG_RA); + //emitIns_R_R_I(INS_jirl, EA_PTRSIZE, REG_R0, REG_R21, 0); + } +#endif + return ; + } + + // (dst != nullptr) + // + // INS_OPTS_J: placeholders. 1-ins: if the dst outof-range will be replaced by INS_OPTS_JIRL. + // bceqz/bcnez/beq/bne/blt/bltu/bge/bgeu/beqz/bnez/b/bl dst + + assert(dst->bbFlags & BBF_HAS_LABEL); + + instrDescJmp* id = emitNewInstrJmp(); + assert((INS_bceqz <= ins) && (ins <= INS_bl)); + id->idIns(ins); + id->idReg1((regNumber)(instrCount & 0x1f)); + id->idReg2((regNumber)((instrCount >> 5 ) & 0x1f)); + + id->idInsOpt(INS_OPTS_J); + emitCounts_INS_OPTS_J++; + id->idAddr()->iiaBBlabel = dst; + + if (emitComp->opts.compReloc) + { + id->idSetIsDspReloc(); + } + + id->idjShort = false; + + ////TODO: maybe deleted this for loongarch64. + id->idjKeepLong = emitComp->fgInDifferentRegions(emitComp->compCurBB, dst); +#ifdef DEBUG + if (emitComp->opts.compLongAddress) // Force long branches + id->idjKeepLong = 1; +#endif // DEBUG + + /* Record the jump's IG and offset within it */ + id->idjIG = emitCurIG; + id->idjOffs = emitCurIGsize; + + /* Append this jump to this IG's jump list */ + id->idjNext = emitCurIGjmpList; + emitCurIGjmpList = id; + +#if EMITTER_STATS + emitTotalIGjmps++; +#endif + + id->idCodeSize(4); + //dispIns(id); + appendToCurIG(id); +} + +//NOTE: +// For loongarch64, emitIns_J_cond_la() is the condition branch. +// NOTE: Only supported short branch so far !!! +// +void emitter::emitIns_J_cond_la(instruction ins, BasicBlock* dst, regNumber reg1, regNumber reg2) +{ + //TODO: + // Now the emitIns_J_cond_la() is only the short condition branch. + // There is no long condition branch for loongarch64 so far. + // For loongarch64, the long condition branch is like this: + // ---> branch_condition condition_target; //here is the condition branch, short branch is enough. + // ---> jump jump_target; (this supporting the long jump.) + // condition_target: + // ... + // ... + // jump_target: + // + // + // INS_OPTS_J_cond: placeholders. 1-ins. + // ins reg1, reg2, dst + + assert(dst != nullptr); + assert(dst->bbFlags & BBF_HAS_LABEL); + + instrDescJmp* id = emitNewInstrJmp(); + + id->idIns(ins); + id->idReg1(reg1); + id->idReg2(reg2); + id->idjShort = false; + + id->idInsOpt(INS_OPTS_J_cond); + id->idAddr()->iiaBBlabel = dst; + + id->idjKeepLong = emitComp->fgInDifferentRegions(emitComp->compCurBB, dst); +#ifdef DEBUG + if (emitComp->opts.compLongAddress) // Force long branches + id->idjKeepLong = 1; +#endif // DEBUG + + /* Record the jump's IG and offset within it */ + id->idjIG = emitCurIG; + id->idjOffs = emitCurIGsize; + + /* Append this jump to this IG's jump list */ + id->idjNext = emitCurIGjmpList; + emitCurIGjmpList = id; + +#if EMITTER_STATS + emitTotalIGjmps++; +#endif + + id->idCodeSize(4); + //dispIns(id); + appendToCurIG(id); +} + +void emitter::emitIns_I_la(emitAttr size, regNumber reg, ssize_t imm) +{ + assert(!EA_IS_RELOC(size)); + assert(isGeneralRegister(reg)); + //size = EA_SIZE(size); + + if (-1 == (imm >> 11) || 0 == (imm >> 11)) { + emitIns_R_R_I(INS_addi_w, size, reg, REG_R0, imm); + return; + } + + if (0 == (imm >> 12)) { + emitIns_R_R_I(INS_ori, size, reg, REG_R0, imm); + return; + } + + instrDesc* id = emitNewInstr(size); + + if ((imm == INT64_MAX) || (imm == 0xffffffff)) { + //emitIns_R_R_I(INS_addi_d, size, reg, REG_R0, -1); + //emitIns_R_R_I(INS_srli_d, size, reg, reg, ui6); + id->idReg2((regNumber)1); // special for INT64_MAX(ui6=1) or UINT32_MAX(ui6=32); + id->idCodeSize(8); + } else if (-1 == (imm >> 31) || 0 == (imm >> 31)) { + //emitIns_R_I(INS_lu12i_w, size, reg, (imm >> 12)); + //emitIns_R_R_I(INS_ori, size, reg, reg, imm); + + id->idCodeSize(8); + } else if (-1 == (imm >> 51) || 0 == (imm >> 51)) { + // low-32bits. + //emitIns_R_I(INS_lu12i_w, size, reg, (imm >> 12); + //emitIns_R_R_I(INS_ori, size, reg, reg, imm); + // + // high-20bits. + //emitIns_R_I(INS_lu32i_d, size, reg, (imm>>32)); + + id->idCodeSize(12); + } else {// 0xffff ffff ffff ffff. + // low-32bits. + //emitIns_R_I(INS_lu12i_w, size, reg, (imm >> 12)); + //emitIns_R_R_I(INS_ori, size, reg, reg, imm); + // + // high-32bits. + //emitIns_R_I(INS_lu32i_d, size, reg, (imm>>32)); + //emitIns_R_R_I(INS_lu52i_d, size, reg, reg, (imm>>52)); + + id->idCodeSize(16); + } + + id->idIns(INS_lu12i_w); + id->idReg1(reg); // destination register that will get the constant value. + assert(reg != REG_R0); + + id->idInsOpt(INS_OPTS_I); + + id->idAddr()->iiaAddr = (BYTE*)imm; + + //dispIns(id); + appendToCurIG(id); +} + +/***************************************************************************** + * + * Add a call instruction (direct or indirect). + * argSize<0 means that the caller will pop the arguments + * + * The other arguments are interpreted depending on callType as shown: + * Unless otherwise specified, ireg,xreg,xmul,disp should have default values. + * + * EC_FUNC_TOKEN : addr is the method address + * + * If callType is one of these emitCallTypes, addr has to be NULL. + * EC_INDIR_R : "call ireg". + * + * For LOONGARCH xreg, xmul and disp are never used and should always be 0/REG_NA. + * + * Please consult the "debugger team notification" comment in genFnProlog(). + */ + +void emitter::emitIns_Call(EmitCallType callType, + CORINFO_METHOD_HANDLE methHnd, + INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE + void* addr, + ssize_t argSize, + emitAttr retSize + MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), + VARSET_VALARG_TP ptrVars, + regMaskTP gcrefRegs, + regMaskTP byrefRegs, + const DebugInfo& di /* = DebugInfo() */, + regNumber ireg /* = REG_NA */, + regNumber xreg /* = REG_NA */, + unsigned xmul /* = 0 */, + ssize_t disp /* = 0 */, + bool isJump /* = false */) +{ + /* Sanity check the arguments depending on callType */ + + assert(callType < EC_COUNT); + assert((callType != EC_FUNC_TOKEN) || + (ireg == REG_NA && xreg == REG_NA && xmul == 0 && disp == 0)); + assert(callType < EC_INDIR_R || addr == NULL); + assert(callType != EC_INDIR_R || (ireg < REG_COUNT && xreg == REG_NA && xmul == 0 && disp == 0)); + + // ARM never uses these + assert(xreg == REG_NA && xmul == 0 && disp == 0); + + // Our stack level should be always greater than the bytes of arguments we push. Just + // a sanity test. + assert((unsigned)abs(argSize) <= codeGen->genStackLevel); + + // Trim out any callee-trashed registers from the live set. + regMaskTP savedSet = emitGetGCRegsSavedOrModified(methHnd); + gcrefRegs &= savedSet; + byrefRegs &= savedSet; + +#ifdef DEBUG + if (EMIT_GC_VERBOSE) + { + printf("Call: GCvars=%s ", VarSetOps::ToString(emitComp, ptrVars)); + dumpConvertedVarSet(emitComp, ptrVars); + printf(", gcrefRegs="); + printRegMaskInt(gcrefRegs); + emitDispRegSet(gcrefRegs); + printf(", byrefRegs="); + printRegMaskInt(byrefRegs); + emitDispRegSet(byrefRegs); + printf("\n"); + } +#endif + + /* Managed RetVal: emit sequence point for the call */ + if (emitComp->opts.compDbgInfo && di.GetLocation().IsValid()) + { + codeGen->genIPmappingAdd(IPmappingDscKind::Normal, di, false); + } + + /* + We need to allocate the appropriate instruction descriptor based + on whether this is a direct/indirect call, and whether we need to + record an updated set of live GC variables. + */ + instrDesc* id; + + assert(argSize % REGSIZE_BYTES == 0); + int argCnt = (int)(argSize / (int)REGSIZE_BYTES); + + if (callType >= EC_INDIR_R) + { + /* Indirect call, virtual calls */ + + assert(callType == EC_INDIR_R); + + id = emitNewInstrCallInd(argCnt, disp, ptrVars, gcrefRegs, byrefRegs, retSize, secondRetSize); + } + else + { + /* Helper/static/nonvirtual/function calls (direct or through handle), + and calls to an absolute addr. */ + + assert(callType == EC_FUNC_TOKEN); + + id = emitNewInstrCallDir(argCnt, ptrVars, gcrefRegs, byrefRegs, retSize, secondRetSize); + } + + /* Update the emitter's live GC ref sets */ + + VarSetOps::Assign(emitComp, emitThisGCrefVars, ptrVars); + emitThisGCrefRegs = gcrefRegs; + emitThisByrefRegs = byrefRegs; + + id->idSetIsNoGC(emitNoGChelper(methHnd)); + + /* Set the instruction - special case jumping a function */ + instruction ins; + + ins = INS_jirl; // jirl t2 + id->idIns(ins); + + id->idInsOpt(INS_OPTS_C); + //TODO: maybe optimize. + + // INS_OPTS_C: placeholders. 1/2/4-ins: + // if (callType == EC_INDIR_R) + // jirl REG_R0/REG_RA, ireg, 0 <---- 1-ins + // else if (callType == EC_FUNC_TOKEN || callType == EC_FUNC_ADDR) + // if reloc: + // //pc + offset_38bits # only when reloc. + // pcaddu18i t2, addr-hi20 + // jilr r0/1,t2,addr-lo18 + // + // else: + // lu12i_w t2, dst_offset_lo32-hi + // ori t2, t2, dst_offset_lo32-lo + // lu32i_d t2, dst_offset_hi32-lo + // jirl REG_R0/REG_RA, t2, 0 + + /* Record the address: method, indirection, or funcptr */ + if (callType == EC_INDIR_R) + { + /* This is an indirect call (either a virtual call or func ptr call) */ + //assert(callType == EC_INDIR_R); + + id->idSetIsCallRegPtr(); + + regNumber reg_jirl = isJump ? REG_R0 : REG_RA; + id->idReg4(reg_jirl); + id->idReg3(ireg);//NOTE: for EC_INDIR_R, using idReg3. + assert(xreg == REG_NA); + + id->idCodeSize(4); + } + else + { + /* This is a simple direct call: "call helper/method/addr" */ + + assert(callType == EC_FUNC_TOKEN); + assert(addr != NULL); + assert(((long)addr & 3) == 0); + + addr = (void*)((long)addr + (isJump ? 0 : 1));//NOTE: low-bit0 is used for jirl ra/r0,rd,0 + id->idAddr()->iiaAddr = (BYTE*)addr; + + if (emitComp->opts.compReloc) + { + id->idSetIsDspReloc(); + id->idCodeSize(8); + } else { + id->idCodeSize(16); + } + } + +#ifdef DEBUG + if (EMIT_GC_VERBOSE) + { + if (id->idIsLargeCall()) + { + printf("[%02u] Rec call GC vars = %s\n", id->idDebugOnlyInfo()->idNum, + VarSetOps::ToString(emitComp, ((instrDescCGCA*)id)->idcGCvars)); + } + } + + id->idDebugOnlyInfo()->idMemCookie = (size_t)methHnd; // method token + id->idDebugOnlyInfo()->idCallSig = sigInfo; +#endif // DEBUG + +#ifdef LATE_DISASM + if (addr != nullptr) + { + codeGen->getDisAssembler().disSetMethod((size_t)addr, methHnd); + } +#endif // LATE_DISASM + + //dispIns(id); + appendToCurIG(id); +} + +/***************************************************************************** + * + * Output a call instruction. + */ + +unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t code) +{ + unsigned char callInstrSize = sizeof(code_t); // 4 bytes + regMaskTP gcrefRegs; + regMaskTP byrefRegs; + + VARSET_TP GCvars(VarSetOps::UninitVal()); + + // Is this a "fat" call descriptor? + if (id->idIsLargeCall()) + { + instrDescCGCA* idCall = (instrDescCGCA*)id; + gcrefRegs = idCall->idcGcrefRegs; + byrefRegs = idCall->idcByrefRegs; + VarSetOps::Assign(emitComp, GCvars, idCall->idcGCvars); + } + else + { + assert(!id->idIsLargeDsp()); + assert(!id->idIsLargeCns()); + + gcrefRegs = emitDecodeCallGCregs(id); + byrefRegs = 0; + VarSetOps::AssignNoCopy(emitComp, GCvars, VarSetOps::MakeEmpty(emitComp)); + } + + /* We update the GC info before the call as the variables cannot be + used by the call. Killing variables before the call helps with + boundary conditions if the call is CORINFO_HELP_THROW - see bug 50029. + If we ever track aliased variables (which could be used by the + call), we would have to keep them alive past the call. */ + + emitUpdateLiveGCvars(GCvars, dst); +#ifdef DEBUG + //NOTEADD: + // Output any delta in GC variable info, corresponding to the before-call GC var updates done above. + if (EMIT_GC_VERBOSE || emitComp->opts.disasmWithGC) + { + emitDispGCVarDelta(); //define in emit.cpp + } +#endif // DEBUG + + assert(id->idIns() == INS_jirl); + if (id->idIsCallRegPtr()) + {//EC_INDIR_R + code = emitInsCode(id->idIns()); + D_INST_JIRL(code, id->idReg4(), id->idReg3(), 0); + } + else if (id->idIsReloc()) + { + // pc + offset_38bits + // + // pcaddu18i t2, addr-hi20 + // jilr r0/1,t2,addr-lo18 + + long addr = (long)id->idAddr()->iiaAddr;//get addr. + //should assert(addr-dst < 38bits); + + int reg2 = (int)addr & 1; + addr = addr ^ 1; + + emitRecordRelocation(dst, (BYTE*)addr, IMAGE_REL_LOONGARCH64_PC); + + *(code_t *)dst = 0x1e00000e; + dst += 4; +#ifdef DEBUG + code = emitInsCode(INS_pcaddu18i); + assert((code | (14)) == 0x1e00000e); + assert((int)REG_T2 == 14); + code = emitInsCode(INS_jirl); + assert(code == 0x4c000000); +#endif + *(code_t *)dst = 0x4c000000 | (14<<5) | reg2; + } + else + { + // lu12i_w t2, dst_offset_lo32-hi //TODO: maybe optimize. + // ori t2, t2, dst_offset_lo32-lo + // lu32i_d t2, dst_offset_hi32-lo + // jirl t2 + + ssize_t imm = (ssize_t)(id->idAddr()->iiaAddr); + //assert((imm >> 32) <= 0x7ffff);//In fact max is <= 0xffff. + assert((imm >> 32) == 0xff);//for LA64 addr-is 0xff. but this is not the best !!! + + int reg2 = (int)(imm & 1); + imm -= reg2; + + code = emitInsCode(INS_lu12i_w); + D_INST_lu12i_w(code, REG_T2, imm >> 12); + *(code_t *)dst = code; + dst += 4; + + code = emitInsCode(INS_ori); + D_INST_ori(code, REG_T2, REG_T2, imm); + *(code_t *)dst = code; + dst += 4; + + //emitIns_R_I(INS_lu32i_d, size, REG_T2, imm >> 32); + code = emitInsCode(INS_lu32i_d); + //D_INST_lu32i_d(code, REG_T2, imm >> 32); + D_INST_lu32i_d(code, REG_T2, 0xff); + *(code_t *)dst = code; + dst += 4; + + code = emitInsCode(INS_jirl); + D_INST_JIRL(code, reg2, REG_T2, 0); + } + + // Now output the call instruction and update the 'dst' pointer + // + unsigned outputInstrSize = emitOutput_Instr(dst, code); + dst += outputInstrSize; + + // update volatile regs within emitThisGCrefRegs and emitThisByrefRegs. + if (gcrefRegs != emitThisGCrefRegs) + { + emitUpdateLiveGCregs(GCT_GCREF, gcrefRegs, dst); + } + if (byrefRegs != emitThisByrefRegs) + { + emitUpdateLiveGCregs(GCT_BYREF, byrefRegs, dst); + } + + // All call instructions are 4-byte in size on LOONGARCH64 + // not including delay-slot which processed later. + assert(outputInstrSize == callInstrSize); + + // If the method returns a GC ref, mark INTRET (A0) appropriately. + if (id->idGCref() == GCT_GCREF) + { + gcrefRegs = emitThisGCrefRegs | RBM_INTRET; + } + else if (id->idGCref() == GCT_BYREF) + { + byrefRegs = emitThisByrefRegs | RBM_INTRET; + } + + // If is a multi-register return method is called, mark INTRET_1 (A1) appropriately + if (id->idIsLargeCall()) + { + instrDescCGCA* idCall = (instrDescCGCA*)id; + if (idCall->idSecondGCref() == GCT_GCREF) + { + gcrefRegs |= RBM_INTRET_1; + } + else if (idCall->idSecondGCref() == GCT_BYREF) + { + byrefRegs |= RBM_INTRET_1; + } + } + + // If the GC register set has changed, report the new set. + if (gcrefRegs != emitThisGCrefRegs) + { + emitUpdateLiveGCregs(GCT_GCREF, gcrefRegs, dst); + } + // If the Byref register set has changed, report the new set. + if (byrefRegs != emitThisByrefRegs) + { + emitUpdateLiveGCregs(GCT_BYREF, byrefRegs, dst); + } + + // Some helper calls may be marked as not requiring GC info to be recorded. + if (!id->idIsNoGC()) + { + // On LOONGARCH64, as on AMD64, we don't change the stack pointer to push/pop args. + // So we're not really doing a "stack pop" here (note that "args" is 0), but we use this mechanism + // to record the call for GC info purposes. (It might be best to use an alternate call, + // and protect "emitStackPop" under the EMIT_TRACK_STACK_DEPTH preprocessor variable.) + emitStackPop(dst, /*isCall*/ true, callInstrSize, /*args*/ 0); + + // Do we need to record a call location for GC purposes? + // + if (!emitFullGCinfo) + { + emitRecordGCcall(dst, callInstrSize); + } + } + if (id->idIsCallRegPtr()) + { + callInstrSize = 1 << 2; + } + else + { + callInstrSize = id->idIsReloc()? (2 << 2) : (4 << 2);// INS_OPTS_C: 2/4-ins. + } + + return callInstrSize; +} + +/***************************************************************************** + * + * Emit a 32-bit LOONGARCH64 instruction + */ + +/*static*/ unsigned emitter::emitOutput_Instr(BYTE* dst, code_t code) +{ + assert(sizeof(code_t) == 4); + BYTE* dstRW = dst + writeableOffset; + *((code_t*)dstRW) = code; + + return sizeof(code_t); +} + +/***************************************************************************** +* + * Append the machine code corresponding to the given instruction descriptor + * to the code block at '*dp'; the base of the code block is 'bp', and 'ig' + * is the instruction group that contains the instruction. Updates '*dp' to + * point past the generated code, and returns the size of the instruction + * descriptor in bytes. + */ + +size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) +{ + BYTE* dst = *dp; + BYTE* dst2 = dst;//addr for updating gc info if needed. + code_t code = 0; + instruction ins; + size_t sz;// = emitSizeOfInsDsc(id); + +#ifdef DEBUG +#if DUMP_GC_TABLES + bool dspOffs = emitComp->opts.dspGCtbls; +#else + bool dspOffs = !emitComp->opts.disDiffable; +#endif +#endif // DEBUG + + assert(REG_NA == (int)REG_NA); + + insOpts insOp = id->idInsOpt(); + + switch (insOp) + { + case INS_OPTS_RELOC: + { + // case:EA_HANDLE_CNS_RELOC + // pcaddu12i reg, off-hi-20bits + // addi_d reg, reg, off-lo-12bits + // case:EA_PTR_DSP_RELOC + // pcaddu12i reg, off-hi-20bits + // ldptr_d reg, reg, off-lo-12bits + + regNumber reg1 = id->idReg1(); + + emitRecordRelocation(dst, id->idAddr()->iiaAddr, IMAGE_REL_LOONGARCH64_PC); + + *(code_t *)dst = 0x1c000000 | (code_t)reg1; + dst += 4; + dst2 = dst; + +#ifdef DEBUG + code = emitInsCode(INS_pcaddu12i); + assert(code == 0x1c000000); + code = emitInsCode(INS_addi_d); + assert(code == 0x02c00000); + code = emitInsCode(INS_ldptr_d); + assert(code == 0x26000000); +#endif + + if (id->idIsCnsReloc()) + { + ins = INS_addi_d; + *(code_t *)dst = 0x02c00000 | (code_t)reg1 | (code_t)(reg1<<5); + } + else //if (id->idIsDspReloc()) + { + assert(id->idIsDspReloc()); + ins = INS_ldptr_d; + *(code_t *)dst = 0x26000000 | (code_t)reg1 | (code_t)(reg1<<5); + } + + if (id->idGCref() != GCT_NONE) + { + emitGCregLiveUpd(id->idGCref(), reg1, dst); + } + else + { + emitGCregDeadUpd(reg1, dst); + } + + dst += 4; + + sz = sizeof(instrDesc); + } + break; + case INS_OPTS_I: + { + ssize_t imm = (ssize_t)(id->idAddr()->iiaAddr); + regNumber reg1 = id->idReg1(); + dst2 += 4;//assert(dst2 == dst); + + switch (id->idCodeSize()) + { + case 8://if (id->idCodeSize() == 8) + { + if (id->idReg2()) { // special for INT64_MAX or UINT32_MAX; + code = emitInsCode(INS_addi_d); + //emitIns_R_R_I(INS_addi_d, size, reg, REG_R0, -1); + D_INST_2RI12(code, reg1, REG_R0, -1); + *(code_t *)dst = code; + dst += 4; + + ssize_t ui6 = (imm == INT64_MAX) ? 1 : 32; + code = emitInsCode(INS_srli_d); + //emitIns_R_R_I(INS_srli_d, size, reg, reg, ui6); + code |= ((code_t)reg1 | ((code_t)reg1 << 5) | (ui6 << 10)); + *(code_t *)dst = code; + } + else { + code = emitInsCode(INS_lu12i_w); + D_INST_lu12i_w(code, reg1, imm >> 12); + *(code_t *)dst = code; + dst += 4; + + code = emitInsCode(INS_ori); + D_INST_ori(code, reg1, reg1, imm); + *(code_t *)dst = code; + } + break; + } + case 12: //else if (id->idCodeSize() == 12) + { + code = emitInsCode(INS_lu12i_w); + D_INST_lu12i_w(code, reg1, imm >> 12); + *(code_t *)dst = code; + dst += 4; + + code = emitInsCode(INS_ori); + D_INST_ori(code, reg1, reg1, imm); + *(code_t *)dst = code; + dst += 4; + + code = emitInsCode(INS_lu32i_d); + //emitIns_R_I(INS_lu32i_d, size, reg, (imm>>32)); + D_INST_lu32i_d(code, reg1, imm >> 32); + *(code_t *)dst = code; + + break; + } + case 16://else if (id->idCodeSize() == 16) + { + code = emitInsCode(INS_lu12i_w); + D_INST_lu12i_w(code, reg1, imm >> 12); + *(code_t *)dst = code; + dst += 4; + + code = emitInsCode(INS_ori); + D_INST_ori(code, reg1, reg1, imm); + *(code_t *)dst = code; + dst += 4; + + code = emitInsCode(INS_lu32i_d); + D_INST_lu32i_d(code, reg1, imm >> 32); + *(code_t *)dst = code; + dst += 4; + + code = emitInsCode(INS_lu52i_d); + D_INST_lu52i_d(code, reg1, reg1, imm >> 52); + *(code_t *)dst = code; + + break; + } + default : + unreached(); + break; + } + + ins = INS_ori; + dst += 4; + + sz = sizeof(instrDesc); + } + break; + case INS_OPTS_RC: + { + // Reference to JIT data + + //when id->idIns == bl, for reloc! + // pcaddu12i r21, off-hi-20bits + // addi_d reg, r21, off-lo-12bits + //when id->idIns == load-ins + // pcaddu12i r21, off-hi-20bits + // load reg, offs_lo-12bits(r21) #when ins is load ins. + // + //when id->idIns == bl + // lu12i_w r21, addr-hi-20bits + // ori reg, r21, addr-lo-12bits + // lu32i_d reg, addr_hi-32bits + // + //when id->idIns == load-ins + // lu12i_w r21, offs_hi-20bits + // lu32i_d r21, 0xff addr_hi-32bits + // load reg, addr_lo-12bits(r21) + assert(id->idAddr()->iiaIsJitDataOffset()); + assert(id->idGCref() == GCT_NONE); + + int doff = id->idAddr()->iiaGetJitDataOffset(); + assert(doff >= 0); + + ssize_t imm = emitGetInsSC(id); + assert((imm >= 0) && (imm < 0x4000)); // 0x4000 is arbitrary, currently 'imm' is always 0. + + unsigned dataOffs = (unsigned)(doff + imm); + + assert(dataOffs < emitDataSize()); + + ins = id->idIns(); + regNumber reg1 = id->idReg1(); + + if (id->idIsReloc()) + { + //get the addr-offset of the data. + imm = (ssize_t)emitConsBlock - (ssize_t)dst + dataOffs; + assert(imm > 0); + assert(!(imm & 3)); + + doff = (int)(imm & 0x800); + imm += doff; + assert(isValidSimm20(imm >> 12)); + + doff = (int)(imm & 0x7ff) - doff;//addr-lo-12bit. + +#ifdef DEBUG + code = emitInsCode(INS_pcaddu12i); + assert(code == 0x1c000000); +#endif + code = 0x1c000000 | 21; + *(code_t *)dst = code | (((code_t)imm & 0xfffff000) >> 7); + dst += 4; + + if (ins == INS_bl) + { + assert(isGeneralRegister(reg1)); + ins = INS_addi_d; +#ifdef DEBUG + code = emitInsCode(INS_addi_d); + assert(code == 0x02c00000); +#endif + code = 0x02c00000 | (21<<5); + *(code_t *)dst = code | (code_t)reg1 | (((code_t)doff & 0xfff) << 10); + } + else + { + code = emitInsCode(ins); + D_INST_LS(code, (reg1 & 0x1f), REG_R21, doff);//NOTE:here must be REG_R21 !!! + *(code_t *)dst = code; + } + dst += 4; + dst2 = dst; + } + else + { + //get the addr of the data. + imm = (ssize_t)emitConsBlock + dataOffs; + + code = emitInsCode(INS_lu12i_w); + if (ins == INS_bl) + { + assert((imm >> 32) == 0xff); + //assert((imm >> 32) <= 0x7ffff); + + doff = (int)imm >> 12; + D_INST_lu12i_w(code, REG_R21, doff); + *(code_t *)dst = code; + dst += 4; + + code = emitInsCode(INS_ori); + D_INST_ori(code, reg1, REG_R21, imm); + *(code_t *)dst = code; + dst += 4; + dst2 = dst; + + ins = INS_lu32i_d; + code = emitInsCode(INS_lu32i_d); + //D_INST_lu32i_d(code, reg1, imm >> 32); + D_INST_lu32i_d(code, reg1, 0xff); + *(code_t *)dst = code; + dst += 4; + } + else + { + doff = (int)(imm & 0x800); + imm += doff; + doff = (int)(imm & 0x7ff) - doff;//addr-lo-12bit. + + assert((imm >> 32) == 0xff); + //assert((imm >> 32) <= 0x7ffff); + + dataOffs = (unsigned)(imm >> 12); //addr-hi-20bits. + D_INST_lu12i_w(code, REG_R21, dataOffs); + *(code_t *)dst = code; + dst += 4; + + //emitIns_R_I(INS_lu32i_d, size, REG_R21, imm >> 32); + code = emitInsCode(INS_lu32i_d); + //D_INST_lu32i_d(code, REG_R21, imm >> 32); + D_INST_lu32i_d(code, REG_R21, 0xff); + *(code_t *)dst = code; + dst += 4; + + code = emitInsCode(ins); + D_INST_LS(code, (reg1 & 0x1f), REG_R21, doff); + *(code_t *)dst = code; + dst += 4; + dst2 = dst; + } + } + + sz = sizeof(instrDesc); + } + break; + + case INS_OPTS_RL: + { + //if for reloc! + // pcaddu12i reg, offset-hi20 + // addi_d reg, reg, offset-lo12 + // + //else: ////TODO:optimize. + // lu12i_w reg, dst-hi-12bits + // ori reg, reg, dst-lo-12bits + // lu32i_d reg, dst-hi-32bits + + insGroup* tgtIG = (insGroup*)emitCodeGetCookie(id->idAddr()->iiaBBlabel); + id->idAddr()->iiaIGlabel = tgtIG; + + regNumber reg1 = id->idReg1(); + assert(isGeneralRegister(reg1)); + + if (id->idIsReloc()) + { + ssize_t imm = (ssize_t)tgtIG->igOffs; + imm = (ssize_t)emitCodeBlock + imm - (ssize_t)dst; + assert((imm & 3) == 0); + + int doff = (int)(imm & 0x800); + imm += doff; + assert(isValidSimm20(imm >> 12)); + + doff = (int)(imm & 0x7ff) - doff;//addr-lo-12bit. + + code = 0x1c000000; + *(code_t *)dst = code | (code_t)reg1 | ((imm & 0xfffff000)>>7); + dst += 4; + dst2 = dst; +#ifdef DEBUG + code = emitInsCode(INS_pcaddu12i); + assert(code == 0x1c000000); + code = emitInsCode(INS_addi_d); + assert(code == 0x02c00000); +#endif + *(code_t *)dst = 0x02c00000 | (code_t)reg1 | ((code_t)reg1<<5) | ((doff & 0xfff)<<10); + ins = INS_addi_d; + } else + { + ssize_t imm = (ssize_t)tgtIG->igOffs + (ssize_t)emitCodeBlock; + //assert((imm >> 32) <= 0x7ffff);//In fact max is <= 0xffff + assert((imm >> 32) == 0xff); + + code = emitInsCode(INS_lu12i_w); + D_INST_lu12i_w(code, REG_R21, imm >> 12); + *(code_t *)dst = code; + dst += 4; + + code = emitInsCode(INS_ori); + D_INST_ori(code, reg1, REG_R21, imm); + *(code_t *)dst = code; + dst += 4; + dst2 = dst; + + ins = INS_lu32i_d; + //emitIns_R_I(INS_lu32i_d, size, reg1, 0xff); + code = emitInsCode(INS_lu32i_d); + //D_INST_lu32i_d(code, reg1, imm >> 32); + D_INST_lu32i_d(code, reg1, 0xff); + *(code_t *)dst = code; + } + + dst += 4; + + sz = sizeof(instrDesc); + } + break; + case INS_OPTS_JIRL: + // case_1: <----------from INS_OPTS_J: + // xor r21,reg1,reg2 | bne/beq _next | bcnez/bceqz _next + // bnez/beqz dst | b dst | b dst + //_next: + // + // case_2: <---------- TODO: from INS_OPTS_J: + // bnez/beqz _next: + // pcaddi r21,off-hi + // jirl r0,r21,off-lo + //_next: + // + // case_3: <----------INS_OPTS_JIRL: //not used by now !!! + // b dst + // + // case_4: <----------INS_OPTS_JIRL: //not used by now !!! + // pcaddi r21,off-hi + // jirl r0,r21,off-lo + // + { + instrDescJmp* jmp = (instrDescJmp*) id; + + regNumber reg1 = id->idReg1(); + { + ssize_t imm = (ssize_t)id->idAddr()->iiaGetJmpOffset(); + imm -= 4; + + ins = jmp->idIns(); + assert(jmp->idCodeSize() > 4); //The original INS_OPTS_JIRL: not used by now!!! + switch (jmp->idCodeSize()) + { + case 8: + { + regNumber reg2 = id->idReg2(); + assert((INS_bceqz <= ins) && (ins <= INS_bgeu)); + //assert((INS_bceqz <= ins) && (ins <= INS_bl));//TODO + if ((INS_beq == ins) || (INS_bne == ins)) + { + if ((-0x400000 <= imm) && (imm < 0x400000)) + { + code = emitInsCode(INS_xor); + D_INST_3R(code, REG_R21, reg1, reg2); + *(code_t *)dst = code; + dst += 4; + + code = emitInsCode(ins == INS_beq ? INS_beqz : INS_bnez); + D_INST_Bcond_Z(code, REG_R21, imm); + *(code_t *)dst = code; + dst += 4; + } + else //if ((-0x8000000 <= imm) && (imm < 0x8000000)) + { + assert((-0x8000000 <= imm) && (imm < 0x8000000)); + assert((INS_bne & 0xfffe) == INS_beq); + + code = emitInsCode((instruction)((int)ins ^ 0x1)); + code |= ((code_t)(reg1) /*& 0x1f */)<<5; /* rj */ + code |= ((code_t)(reg2) /*& 0x1f */); /* rd */ + code |= 0x800; + *(code_t *)dst = code; + dst += 4; + + code = emitInsCode(INS_b); + D_INST_B(code, imm); + *(code_t *)dst = code; + dst += 4; + } + //else + // unreached(); + } + else if ((INS_bceqz == ins) || (INS_bcnez == ins)) + { + assert((-0x8000000 <= imm) && (imm < 0x8000000)); + assert((INS_bcnez & 0xfffe) == INS_bceqz); + + code = emitInsCode((instruction)((int)ins ^ 0x1)); + code |= ((code_t)reg1)<<5; /* rj */ + code |= 0x800; + *(code_t *)dst = code; + dst += 4; + + code = emitInsCode(INS_b); + D_INST_B(code, imm); + *(code_t *)dst = code; + dst += 4; + } + else if ((INS_blt <= ins) && (ins <= INS_bgeu)) + { + assert((-0x8000000 <= imm) && (imm < 0x8000000)); + assert((INS_bge & 0xfffe) == INS_blt); + assert((INS_bgeu & 0xfffe) == INS_bltu); + + code = emitInsCode((instruction)((int)ins ^ 0x1)); + code |= ((code_t)(reg1) /*& 0x1f */)<<5; /* rj */ + code |= ((code_t)(reg2) /*& 0x1f */); /* rd */ + code |= 0x800; + *(code_t *)dst = code; + dst += 4; + + code = emitInsCode(INS_b); + D_INST_B(code, imm); + *(code_t *)dst = code; + dst += 4; + } + break; + } + //case 12: + default : + unreached(); + break; + } + } + sz = sizeof(instrDescJmp); + } + break; + case INS_OPTS_J_cond: + // b_cond dst-relative. + // + //NOTE: + // the case "imm > 0x7fff" not supported. + // More info within the emitter::emitIns_J_cond_la(); + { + ssize_t imm = (ssize_t) id->idAddr()->iiaGetJmpOffset();//get jmp's offset relative delay-slot. + assert((OFFSET_DIST_SMALL_MAX_NEG << 2) <= imm && imm <= (OFFSET_DIST_SMALL_MAX_POS << 2)); + assert(!(imm & 3)); + + ins = id->idIns(); + code = emitInsCode(ins); + D_INST_Bcond(code, id->idReg1(), id->idReg2(), imm); + *(code_t *)dst = code; + dst += 4; + + sz = sizeof(instrDescJmp); + } + break; + case INS_OPTS_J: + // bceqz/bcnez/beq/bne/blt/bltu/bge/bgeu/beqz/bnez/b/bl dst-relative. + { + ssize_t imm = (ssize_t) id->idAddr()->iiaGetJmpOffset();//get jmp's offset relative delay-slot. + assert(!(imm & 3)); + + ins = id->idIns(); + code = emitInsCode(ins); + if (ins == INS_b || ins == INS_bl) + { + D_INST_B(code, imm); + } + else if (ins == INS_bnez || ins == INS_beqz) + { + D_INST_Bcond_Z(code, id->idReg1(), imm); + } + else if (ins == INS_bcnez || ins == INS_bceqz) + { + assert((code_t)(id->idReg1()) < 8);//cc + D_INST_Bcond_Z(code, id->idReg1(), imm); + } + else if ((INS_beq <= ins) && (ins <= INS_bgeu)) + { + D_INST_Bcond(code, id->idReg1(), id->idReg2(), imm); + } + else + { + assert(!"unimplemented on LOONGARCH yet"); + } + *(code_t *)dst = code; + dst += 4; + + sz = sizeof(instrDescJmp); + } + break; + + case INS_OPTS_C: + if (id->idIsLargeCall()) + { + /* Must be a "fat" call descriptor */ + sz = sizeof(instrDescCGCA); + } + else + { + assert(!id->idIsLargeDsp()); + assert(!id->idIsLargeCns()); + sz = sizeof(instrDesc); + } + dst += emitOutputCall(ig, dst, id, 0); + ins = INS_nop; + break; + + //case INS_OPTS_NONE: + default: + //assert(id->idGCref() == GCT_NONE); + *(code_t *)dst = id->idAddr()->iiaGetInstrEncode(); + dst += 4; + dst2 = dst; + ins = id->idIns(); + sz = emitSizeOfInsDsc(id); + break; + } + + // Determine if any registers now hold GC refs, or whether a register that was overwritten held a GC ref. + // We assume here that "id->idGCref()" is not GC_NONE only if the instruction described by "id" writes a + // GC ref to register "id->idReg1()". (It may, apparently, also not be GC_NONE in other cases, such as + // for stores, but we ignore those cases here.) + if (emitInsMayWriteToGCReg(ins)) // True if "id->idIns()" writes to a register than can hold GC ref. + { + // We assume that "idReg1" is the primary destination register for all instructions + if (id->idGCref() != GCT_NONE) + { + emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst2); + } + else + { + emitGCregDeadUpd(id->idReg1(), dst2); + } + + //if (emitInsMayWriteMultipleRegs(id)) + //{ + // // INS_gslq etc... + // // "idReg2" is the secondary destination register + // if (id->idGCrefReg2() != GCT_NONE) + // { + // emitGCregLiveUpd(id->idGCrefReg2(), id->idReg2(), *dp); + // } + // else + // { + // emitGCregDeadUpd(id->idReg2(), *dp); + // } + //} + } + + // Now we determine if the instruction has written to a (local variable) stack location, and either written a GC + // ref or overwritten one. + if (emitInsWritesToLclVarStackLoc(id) /*|| emitInsWritesToLclVarStackLocPair(id)*/) + { + int varNum = id->idAddr()->iiaLclVar.lvaVarNum(); + unsigned ofs = AlignDown(id->idAddr()->iiaLclVar.lvaOffset(), TARGET_POINTER_SIZE); + bool FPbased; + int adr = emitComp->lvaFrameAddress(varNum, &FPbased); + if (id->idGCref() != GCT_NONE) + { + emitGCvarLiveUpd(adr + ofs, varNum, id->idGCref(), dst2 DEBUG_ARG(varNum)); + } + else + { + // If the type of the local is a gc ref type, update the liveness. + var_types vt; + if (varNum >= 0) + { + // "Regular" (non-spill-temp) local. + vt = var_types(emitComp->lvaTable[varNum].lvType); + } + else + { + TempDsc* tmpDsc = codeGen->regSet.tmpFindNum(varNum); + vt = tmpDsc->tdTempType(); + } + if (vt == TYP_REF || vt == TYP_BYREF) + emitGCvarDeadUpd(adr + ofs, dst2 DEBUG_ARG(varNum)); + } + //if (emitInsWritesToLclVarStackLocPair(id)) + //{ + // unsigned ofs2 = ofs + TARGET_POINTER_SIZE; + // if (id->idGCrefReg2() != GCT_NONE) + // { + // emitGCvarLiveUpd(adr + ofs2, varNum, id->idGCrefReg2(), *dp); + // } + // else + // { + // // If the type of the local is a gc ref type, update the liveness. + // var_types vt; + // if (varNum >= 0) + // { + // // "Regular" (non-spill-temp) local. + // vt = var_types(emitComp->lvaTable[varNum].lvType); + // } + // else + // { + // TempDsc* tmpDsc = codeGen->regSet.tmpFindNum(varNum); + // vt = tmpDsc->tdTempType(); + // } + // if (vt == TYP_REF || vt == TYP_BYREF) + // emitGCvarDeadUpd(adr + ofs2, *dp); + // } + //} + } + +#ifdef DEBUG + /* Make sure we set the instruction descriptor size correctly */ + + //size_t expected = emitSizeOfInsDsc(id); + //assert(sz == expected); + + if (emitComp->opts.disAsm || emitComp->verbose) + { + code_t *cp = (code_t*) *dp; + while ((BYTE*)cp != dst) + { + emitDisInsName(*cp, (BYTE*)cp, id); + cp++; + } + //emitDispIns(id, false, dspOffs, true, emitCurCodeOffs(odst), *dp, (dst - *dp), ig); + } + + if (emitComp->compDebugBreak) + { + // For example, set JitBreakEmitOutputInstr=a6 will break when this method is called for + // emitting instruction a6, (i.e. IN00a6 in jitdump). + if ((unsigned)JitConfig.JitBreakEmitOutputInstr() == id->idDebugOnlyInfo()->idNum) + { + assert(!"JitBreakEmitOutputInstr reached"); + } + } +#endif + + /* All instructions are expected to generate code */ + + assert(*dp != dst); + + *dp = dst; + + return sz; +} + +/*****************************************************************************/ +/*****************************************************************************/ + +#ifdef DEBUG + +/**************************************************************************** + * + * Display the given instruction. + */ + +//NOTE: At least 32bytes within dst. +void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) +{ + const BYTE* insstrs = dst; + + if (!code) + { + printf("LOONGARCH invalid instruction: 0x%x\n", code); + assert(!"invalid inscode on LOONGARCH!"); + return ; + } + +// clang-format off + const char * const regName[] = {"zero", "ra", "tp", "sp", "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "x0", "fp", "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8"}; + + const char * const FregName[] = {"fa0", "fa1", "fa2", "fa3", "fa4", "fa5", "fa6", "fa7", "ft0", "ft1", "ft2", "ft3", "ft4", "ft5", "ft6", "ft7", "ft8", "ft9", "ft10", "ft11", "ft12", "ft13", "ft14", "ft15", "fs0", "fs1", "fs2", "fs3", "fs4", "fs5", "fs6", "fs7"}; + + const char * const CFregName[] = {"fcc0", "fcc1", "fcc2", "fcc3", "fcc4", "fcc5", "fcc6", "fcc7"}; +// clang-format on + + + unsigned int opcode = (code>>26) & 0x3f; + + //bits: 31-26,MSB6 + switch (opcode) + { + case 0x0: + { + goto Label_OPCODE_0; + //break; + } + //case 0x1: + //{ + // assert(!"unimplemented on loongarch yet!"); + // //goto Label_OPCODE_1; + // break; + //} + case 0x2: + { + goto Label_OPCODE_2; + //break; + } + case 0x3: + { + goto Label_OPCODE_3; + //break; + } + case 0xe: + { + goto Label_OPCODE_E; + //break; + } + case LA_2RI16_ADDU16I_D: //0x4 + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + short si16 = (code >> 10) & 0xffff; + printf(" 0x%llx addu16i.d %s, %s, %d\n", insstrs, rd, rj, si16); + return; + } + case 0x5: + case 0x6: + case 0x7: + { + //bits: 31-25,MSB7 + unsigned int inscode = (code >> 25) & 0x7f; + const char *rd = regName[code & 0x1f]; + unsigned int si20 = (code >> 5) & 0xfffff; + switch (inscode) + { + case LA_1RI20_LU12I_W: + printf(" 0x%llx lu12i.w %s, 0x%x\n", insstrs, rd, si20); + return; + case LA_1RI20_LU32I_D: + printf(" 0x%llx lu32i.d %s, 0x%x\n", insstrs, rd, si20); + return; + case LA_1RI20_PCADDI: + printf(" 0x%llx pcaddi %s, 0x%x\n", insstrs, rd, si20); + return; + case LA_1RI20_PCALAU12I: + printf(" 0x%llx pcalau12i %s, 0x%x\n", insstrs, rd, si20); + return; + case LA_1RI20_PCADDU12I: + printf(" 0x%llx pcaddu12i %s, 0x%x\n", insstrs, rd, si20); + return; + case LA_1RI20_PCADDU18I: + { + printf(" 0x%llx pcaddu18i %s, 0x%x\n", insstrs, rd, si20); + return; + } + default : + printf("LOONGARCH illegal instruction: 0x%08x\n", code); + return; + } + return; + } + case 0x8: + case 0x9: + { + //bits: 31-24,MSB8 + unsigned int inscode = (code >> 24) & 0xff; + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + short si14 = ((code >> 10) & 0x3fff)<<2; + si14 >>= 2; + switch (inscode) + { + case LA_2RI14_LL_W: + printf(" 0x%llx ll.w %s, %s, %d\n", insstrs, rd, rj, si14); + return; + case LA_2RI14_SC_W: + printf(" 0x%llx sc.w %s, %s, %d\n", insstrs, rd, rj, si14); + return; + case LA_2RI14_LL_D: + printf(" 0x%llx ll.d %s, %s, %d\n", insstrs, rd, rj, si14); + return; + case LA_2RI14_SC_D: + printf(" 0x%llx sc.d %s, %s, %d\n", insstrs, rd, rj, si14); + return; + case LA_2RI14_LDPTR_W: + printf(" 0x%llx ldptr.w %s, %s, %d\n", insstrs, rd, rj, si14); + return; + case LA_2RI14_STPTR_W: + printf(" 0x%llx stptr.w %s, %s, %d\n", insstrs, rd, rj, si14); + return; + case LA_2RI14_LDPTR_D: + printf(" 0x%llx ldptr.d %s, %s, %d\n", insstrs, rd, rj, si14); + return; + case LA_2RI14_STPTR_D: + printf(" 0x%llx stptr.d %s, %s, %d\n", insstrs, rd, rj, si14); + return; + default : + printf("LOONGARCH illegal instruction: 0x%08x\n", code); + return; + } + return; + } + case 0xa: + { + //bits: 31-24,MSB8 + unsigned int inscode = (code >> 22) & 0x3ff; + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *fd = FregName[code & 0x1f]; + short si12 = ((code >> 10) & 0xfff)<<4; + si12 >>= 4; + switch (inscode) + { + case LA_2RI12_LD_B: + printf(" 0x%llx ld.b %s, %s, %d\n", insstrs, rd, rj, si12); + return; + case LA_2RI12_LD_H: + printf(" 0x%llx ld.h %s, %s, %d\n", insstrs, rd, rj, si12); + return; + case LA_2RI12_LD_W: + printf(" 0x%llx ld.w %s, %s, %d\n", insstrs, rd, rj, si12); + return; + case LA_2RI12_LD_D: + printf(" 0x%llx ld.d %s, %s, %d\n", insstrs, rd, rj, si12); + return; + case LA_2RI12_ST_B: + printf(" 0x%llx st.b %s, %s, %d\n", insstrs, rd, rj, si12); + return; + case LA_2RI12_ST_H: + printf(" 0x%llx st.h %s, %s, %d\n", insstrs, rd, rj, si12); + return; + case LA_2RI12_ST_W: + printf(" 0x%llx st.w %s, %s, %d\n", insstrs, rd, rj, si12); + return; + case LA_2RI12_ST_D: + printf(" 0x%llx st.d %s, %s, %d\n", insstrs, rd, rj, si12); + return; + case LA_2RI12_LD_BU: + printf(" 0x%llx ld.bu %s, %s, %d\n", insstrs, rd, rj, si12); + return; + case LA_2RI12_LD_HU: + printf(" 0x%llx ld.hu %s, %s, %d\n", insstrs, rd, rj, si12); + return; + case LA_2RI12_LD_WU: + printf(" 0x%llx ld.wu %s, %s, %d\n", insstrs, rd, rj, si12); + return; + case LA_2RI12_PRELD: + assert(!"unimplemented on loongarch yet!"); + return; + case LA_2RI12_FLD_S: + printf(" 0x%llx fld.s %s, %s, %d\n", insstrs, fd, rj, si12); + return; + case LA_2RI12_FST_S: + printf(" 0x%llx fst.s %s, %s, %d\n", insstrs, fd, rj, si12); + return; + case LA_2RI12_FLD_D: + printf(" 0x%llx fld.d %s, %s, %d\n", insstrs, fd, rj, si12); + return; + case LA_2RI12_FST_D: + printf(" 0x%llx fst.d %s, %s, %d\n", insstrs, fd, rj, si12); + return; + default : + printf("LOONGARCH illegal instruction: 0x%08x\n", code); + return; + } + return; + } + case LA_1RI21_BEQZ: //0x10 + { + const char *rj = regName[(code>>5) & 0x1f]; + int offs21 = (((code >> 10) & 0xffff) | ((code & 0x1f) << 16))<<11; + offs21 >>= 9; + printf(" 0x%llx beqz %s, 0x%llx\n", insstrs, rj, (int64_t)insstrs + offs21); + return; + } + case LA_1RI21_BNEZ: //0x11 + { + const char *rj = regName[(code>>5) & 0x1f]; + int offs21 = (((code >> 10) & 0xffff) | ((code & 0x1f) << 16))<<11; + offs21 >>= 9; + printf(" 0x%llx bnez %s, 0x%llx\n", insstrs, rj, (int64_t)insstrs + offs21); + return; + } + case 0x12: + { + //LA_1RI21_BCEQZ + //LA_1RI21_BCNEZ + const char *cj = CFregName[(code>>5) & 0x7]; + int offs21 = (((code >> 10) & 0xffff) | ((code & 0x1f) << 16)) << 11; + offs21 >>= 9; + if (0 == ((code>>8) & 0x3)) { + printf(" 0x%llx bceqz %s, 0x%llx\n", insstrs, cj, (int64_t)insstrs + offs21); + return; + } + else if (1 == ((code>>8) & 0x3)) { + printf(" 0x%llx bcnez %s, 0x%llx\n", insstrs, cj, (int64_t)insstrs + offs21); + return; + } + else { + printf("LOONGARCH illegal instruction: 0x%08x\n", code); + return; + } + return; + } + case LA_2RI16_JIRL: //0x13 + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + int offs16 = (short)((code >> 10) & 0xffff); + offs16 <<= 2; + if(id->idDebugOnlyInfo()->idMemCookie) + { + assert(0 < id->idDebugOnlyInfo()->idMemCookie); + const char* methodName; + methodName = emitComp->eeGetMethodFullName((CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie); + printf(" 0x%llx jirl %s, %s, %d #%s\n", insstrs, rd, rj, offs16, methodName); + } + else + { + printf(" 0x%llx jirl %s, %s, %d\n", insstrs, rd, rj, offs16); + } + return; + } + case LA_I26_B: //0x14 + { + int offs26 = (((code >> 10) & 0xffff) | ((code & 0x3ff) << 16))<<6; + offs26 >>= 4; + printf(" 0x%llx b 0x%llx\n", insstrs, (int64_t)insstrs + offs26); + return; + } + case LA_I26_BL: //0x15 + { + int offs26 = (((code >> 10) & 0xffff) | ((code & 0x3ff) << 16))<<6; + offs26 >>= 4; + printf(" 0x%llx bl 0x%llx\n", insstrs, (int64_t)insstrs + offs26); + return; + } + case LA_2RI16_BEQ: //0x16 + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + int offs16 = (short)((code >> 10) & 0xffff); + offs16 <<= 2; + printf(" 0x%llx beq %s, %s, 0x%llx\n", insstrs, rj, rd, (int64_t)insstrs + offs16); + return; + } + case LA_2RI16_BNE: //0x17 + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + int offs16 = (short)((code >> 10) & 0xffff); + offs16 <<= 2; + printf(" 0x%llx bne %s, %s, 0x%llx\n", insstrs, rj, rd, (int64_t)insstrs + offs16); + return; + } + case LA_2RI16_BLT: //0x18 + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + int offs16 = (short)((code >> 10) & 0xffff); + offs16 <<= 2; + printf(" 0x%llx blt %s, %s, 0x%llx\n", insstrs, rj, rd, (int64_t)insstrs + offs16); + return; + } + case LA_2RI16_BGE: //0x19 + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + int offs16 = (short)((code >> 10) & 0xffff); + offs16 <<= 2; + printf(" 0x%llx bge %s, %s, 0x%llx\n", insstrs, rj, rd, (int64_t)insstrs + offs16); + return; + } + case LA_2RI16_BLTU: //0x1a + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + int offs16 = (short)((code >> 10) & 0xffff); + offs16 <<= 2; + printf(" 0x%llx bltu %s, %s, 0x%llx\n", insstrs, rj, rd, (int64_t)insstrs + offs16); + return; + } + case LA_2RI16_BGEU: //0x1b + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + int offs16 = (short)((code >> 10) & 0xffff); + offs16 <<= 2; + printf(" 0x%llx bgeu %s, %s, 0x%llx\n", insstrs, rj, rd, (int64_t)insstrs + offs16); + return; + } + + default : + printf("LOONGARCH illegal instruction: 0x%08x\n", code); + return; + } + +Label_OPCODE_0: + opcode = (code >> 22) & 0x3ff; + + //bits: 31-22,MSB10 + switch (opcode) + { + case 0x0: + { + //bits: 31-18,MSB14 + unsigned int inscode1 = (code >> 18) & 0x3fff; + switch (inscode1) + { + case 0x0: + { + //bits: 31-15,MSB17 + unsigned int inscode2 = (code >> 15) & 0x1ffff; + switch (inscode2) + { + case 0x0: + { + //bits:31-10,MSB22 + unsigned int inscode3 = (code >> 10) & 0x3fffff; + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + switch (inscode3) + { + case LA_2R_CLO_W: + printf(" 0x%llx clo.w %s, %s\n", insstrs, rd, rj); + return; + case LA_2R_CLZ_W: + printf(" 0x%llx clz.w %s, %s\n", insstrs, rd, rj); + return; + case LA_2R_CTO_W: + printf(" 0x%llx cto.w %s, %s\n", insstrs, rd, rj); + return; + case LA_2R_CTZ_W: + printf(" 0x%llx ctz.w %s, %s\n", insstrs, rd, rj); + return; + case LA_2R_CLO_D: + printf(" 0x%llx clo.d %s, %s\n", insstrs, rd, rj); + return; + case LA_2R_CLZ_D: + printf(" 0x%llx clz.d %s, %s\n", insstrs, rd, rj); + return; + case LA_2R_CTO_D: + printf(" 0x%llx cto.d %s, %s\n", insstrs, rd, rj); + return; + case LA_2R_CTZ_D: + printf(" 0x%llx ctz.d %s, %s\n", insstrs, rd, rj); + return; + case LA_2R_REVB_2H: + printf(" 0x%llx revb.2h %s, %s\n", insstrs, rd, rj); + return; + case LA_2R_REVB_4H: + printf(" 0x%llx revb.4h %s, %s\n", insstrs, rd, rj); + return; + case LA_2R_REVB_2W: + printf(" 0x%llx revb.2w %s, %s\n", insstrs, rd, rj); + return; + case LA_2R_REVB_D: + printf(" 0x%llx revb.d %s, %s\n", insstrs, rd, rj); + return; + case LA_2R_REVH_2W: + printf(" 0x%llx revh.2w %s, %s\n", insstrs, rd, rj); + return; + case LA_2R_REVH_D: + printf(" 0x%llx revh.d %s, %s\n", insstrs, rd, rj); + return; + case LA_2R_BITREV_4B: + printf(" 0x%llx bitrev.4b %s, %s\n", insstrs, rd, rj); + return; + case LA_2R_BITREV_8B: + printf(" 0x%llx bitrev.8b %s, %s\n", insstrs, rd, rj); + return; + case LA_2R_BITREV_W: + printf(" 0x%llx bitrev.w %s, %s\n", insstrs, rd, rj); + return; + case LA_2R_BITREV_D: + printf(" 0x%llx bitrev.d %s, %s\n", insstrs, rd, rj); + return; + case LA_2R_EXT_W_H: + printf(" 0x%llx ext.w.h %s, %s\n", insstrs, rd, rj); + return; + case LA_2R_EXT_W_B: + printf(" 0x%llx ext.w.b %s, %s\n", insstrs, rd, rj); + return; + case LA_2R_RDTIMEL_W: + printf(" 0x%llx rdtimel.w %s, %s\n", insstrs, rd, rj); + return; + case LA_2R_RDTIMEH_W: + printf(" 0x%llx rdtimeh.w %s, %s\n", insstrs, rd, rj); + return; + case LA_2R_RDTIME_D: + printf(" 0x%llx rdtime.d %s, %s\n", insstrs, rd, rj); + return; + case LA_2R_CPUCFG: + printf(" 0x%llx cpucfg %s, %s\n", insstrs, rd, rj); + return; + + default : + printf("LOONGARCH illegal instruction: 0x%08x\n", code); + return; + } + return; + } + case LA_2R_ASRTLE_D: + { + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx asrtle.d %s, %s\n", insstrs, rj, rk); + return; + } + case LA_2R_ASRTGT_D: + { + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx asrtgt.d %s, %s\n", insstrs, rj, rk); + return; + } + default : + printf("LOONGARCH illegal instruction: 0x%08x\n", code); + return; + } + return; + } + case 0x1: + { + //LA_OP_ALSL_W + //LA_OP_ALSL_WU + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + unsigned int sa2 = (code>>15) & 0x3; + if (0 == ((code>>17) & 0x1)) { + printf(" 0x%llx alsl.w %s, %s, %s, %d\n", insstrs, rd, rj, rk, (sa2+1)); + return; + } else if (1 == ((code>>17) & 0x1)) { + printf(" 0x%llx alsl.wu %s, %s, %s, %d\n", insstrs, rd, rj, rk, (sa2+1)); + return; + } else { + printf("LOONGARCH illegal instruction: 0x%08x\n", code); + return; + } + return; + } + case LA_OP_BYTEPICK_W: //0x2 + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + unsigned int sa2 = (code>>15) & 0x3; + printf(" 0x%llx bytepick.w %s, %s, %s, %d\n", insstrs, rd, rj, rk, sa2); + return; + } + case LA_OP_BYTEPICK_D: //0x3 + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + unsigned int sa3 = (code>>15) & 0x7; + printf(" 0x%llx bytepick.d %s, %s, %s, %d\n", insstrs, rd, rj, rk, sa3); + return; + } + case 0x4: + case 0x5: + case 0x6: + case 0x7: + case 0x8: + case 0x9: + { + //bits: 31-15,MSB17 + unsigned int inscode2 = (code >> 15) & 0x1ffff; + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + + switch (inscode2) + { + case LA_3R_ADD_W: + printf(" 0x%llx add.w %s, %s, %s\n", insstrs, rd, rj, rk); + return; + case LA_3R_ADD_D: + printf(" 0x%llx add.d %s, %s, %s\n", insstrs, rd, rj, rk); + return; + case LA_3R_SUB_W: + printf(" 0x%llx sub.w %s, %s, %s\n", insstrs, rd, rj, rk); + return; + case LA_3R_SUB_D: + printf(" 0x%llx sub.d %s, %s, %s\n", insstrs, rd, rj, rk); + return; + case LA_3R_SLT: + printf(" 0x%llx slt %s, %s, %s\n", insstrs, rd, rj, rk); + return; + case LA_3R_SLTU: + printf(" 0x%llx sltu %s, %s, %s\n", insstrs, rd, rj, rk); + return; + case LA_3R_MASKEQZ: + printf(" 0x%llx maskeqz %s, %s, %s\n", insstrs, rd, rj, rk); + return; + case LA_3R_MASKNEZ: + printf(" 0x%llx masknez %s, %s, %s\n", insstrs, rd, rj, rk); + return; + case LA_3R_NOR: + printf(" 0x%llx nor %s, %s, %s\n", insstrs, rd, rj, rk); + return; + case LA_3R_AND: + printf(" 0x%llx and %s, %s, %s\n", insstrs, rd, rj, rk); + return; + case LA_3R_OR: + printf(" 0x%llx or %s, %s, %s\n", insstrs, rd, rj, rk); + return; + case LA_3R_XOR: + printf(" 0x%llx xor %s, %s, %s\n", insstrs, rd, rj, rk); + return; + case LA_3R_ORN: + printf(" 0x%llx orn %s, %s, %s\n", insstrs, rd, rj, rk); + return; + case LA_3R_ANDN: + printf(" 0x%llx andn %s, %s, %s\n", insstrs, rd, rj, rk); + return; + case LA_3R_SLL_W: + printf(" 0x%llx sll.w %s, %s, %s\n", insstrs, rd, rj, rk); + return; + case LA_3R_SRL_W: + printf(" 0x%llx srl.w %s, %s, %s\n", insstrs, rd, rj, rk); + return; + case LA_3R_SRA_W: + printf(" 0x%llx sra.w %s, %s, %s\n", insstrs, rd, rj, rk); + return; + case LA_3R_SLL_D: + printf(" 0x%llx sll.d %s, %s, %s\n", insstrs, rd, rj, rk); + return; + case LA_3R_SRL_D: + printf(" 0x%llx srl.d %s, %s, %s\n", insstrs, rd, rj, rk); + return; + case LA_3R_SRA_D: + printf(" 0x%llx sra.d %s, %s, %s\n", insstrs, rd, rj, rk); + return; + case LA_3R_ROTR_W: + printf(" 0x%llx rotr.w %s, %s, %s\n", insstrs, rd, rj, rk); + return; + case LA_3R_ROTR_D: + printf(" 0x%llx rotr.d %s, %s, %s\n", insstrs, rd, rj, rk); + return; + case LA_3R_MUL_W: + printf(" 0x%llx mul.w %s, %s, %s\n", insstrs, rd, rj, rk); + return; + case LA_3R_MULH_W: + printf(" 0x%llx mulh.w %s, %s, %s\n", insstrs, rd, rj, rk); + return; + case LA_3R_MULH_WU: + printf(" 0x%llx mulh.wu %s, %s, %s\n", insstrs, rd, rj, rk); + return; + case LA_3R_MUL_D: + printf(" 0x%llx mul.d %s, %s, %s\n", insstrs, rd, rj, rk); + return; + case LA_3R_MULH_D: + printf(" 0x%llx mulh.d %s, %s, %s\n", insstrs, rd, rj, rk); + return; + case LA_3R_MULH_DU: + printf(" 0x%llx mulh.du %s, %s, %s\n", insstrs, rd, rj, rk); + return; + case LA_3R_MULW_D_W: + printf(" 0x%llx mulw.d.w %s, %s, %s\n", insstrs, rd, rj, rk); + return; + case LA_3R_MULW_D_WU: + printf(" 0x%llx mulw.d.wu %s, %s, %s\n", insstrs, rd, rj, rk); + return; + case LA_3R_DIV_W: + printf(" 0x%llx div.w %s, %s, %s\n", insstrs, rd, rj, rk); + return; + case LA_3R_MOD_W: + printf(" 0x%llx mod.w %s, %s, %s\n", insstrs, rd, rj, rk); + return; + case LA_3R_DIV_WU: + printf(" 0x%llx div.wu %s, %s, %s\n", insstrs, rd, rj, rk); + return; + case LA_3R_MOD_WU: + printf(" 0x%llx mod.wu %s, %s, %s\n", insstrs, rd, rj, rk); + return; + case LA_3R_DIV_D: + printf(" 0x%llx div.d %s, %s, %s\n", insstrs, rd, rj, rk); + return; + case LA_3R_MOD_D: + printf(" 0x%llx mod.d %s, %s, %s\n", insstrs, rd, rj, rk); + return; + case LA_3R_DIV_DU: + printf(" 0x%llx div.du %s, %s, %s\n", insstrs, rd, rj, rk); + return; + case LA_3R_MOD_DU: + printf(" 0x%llx mod.du %s, %s, %s\n", insstrs, rd, rj, rk); + return; + case LA_3R_CRC_W_B_W: + printf(" 0x%llx crc.w.b.w %s, %s, %s\n", insstrs, rd, rj, rk); + return; + case LA_3R_CRC_W_H_W: + printf(" 0x%llx crc.w.h.w %s, %s, %s\n", insstrs, rd, rj, rk); + return; + case LA_3R_CRC_W_W_W: + printf(" 0x%llx crc.w.w.w %s, %s, %s\n", insstrs, rd, rj, rk); + return; + case LA_3R_CRC_W_D_W: + printf(" 0x%llx crc.w.d.w %s, %s, %s\n", insstrs, rd, rj, rk); + return; + case LA_3R_CRCC_W_B_W: + printf(" 0x%llx crcc.w.b.w %s, %s, %s\n", insstrs, rd, rj, rk); + return; + case LA_3R_CRCC_W_H_W: + printf(" 0x%llx crcc.w.h.w %s, %s, %s\n", insstrs, rd, rj, rk); + return; + case LA_3R_CRCC_W_W_W: + printf(" 0x%llx crcc.w.w.w %s, %s, %s\n", insstrs, rd, rj, rk); + return; + case LA_3R_CRCC_W_D_W: + printf(" 0x%llx crcc.w.d.w %s, %s, %s\n", insstrs, rd, rj, rk); + return; + default : + printf("LOONGARCH illegal instruction: 0x%08x\n", code); + return; + } + } + case 0xa: + { + //bits: 31-15,MSB17 + unsigned int inscode2 = (code >> 15) & 0x1ffff; + unsigned int codefield = code & 0x7fff; + switch (inscode2) + { + case LA_OP_BREAK: + printf(" 0x%llx break 0x%x\n", insstrs, codefield); + return; + case LA_OP_DBGCALL: + printf(" 0x%llx dbgcall 0x%x\n", insstrs, codefield); + return; + case LA_OP_SYSCALL: + printf(" 0x%llx syscall 0x%x\n", insstrs, codefield); + return; + default : + printf("LOONGARCH illegal instruction: 0x%08x\n", code); + return; + } + } + case LA_OP_ALSL_D: //0xb + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + unsigned int sa2 = (code>>15) & 0x3; + printf(" 0x%llx alsl.d %s, %s, %s, %d\n", insstrs, rd, rj, rk, (sa2+1)); + return; + } + default : + printf("LOONGARCH illegal instruction: 0x%08x\n", code); + return; + } + return; + } + case 0x1: + { + if (code & 0x200000) { + //LA_OP_BSTRINS_W + //LA_OP_BSTRPICK_W + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + unsigned int lsbw = (code >> 10) & 0x1f; + unsigned int msbw = (code >> 16) & 0x1f; + if (!(code & 0x8000)) { + printf(" 0x%llx bstrins.w %s, %s, %d, %d\n", insstrs, rd, rj, msbw, lsbw); + return; + } else if (code & 0x8000) { + printf(" 0x%llx bstrpick.w %s, %s, %d, %d\n", insstrs, rd, rj, msbw, lsbw); + return; + } else { + printf("LOONGARCH illegal instruction: 0x%08x\n", code); + return; + } + } + else { + //bits: 31-18,MSB14 + unsigned int inscode1 = (code >> 18) & 0x3fff; + switch (inscode1) + { + case 0x10: + { + //LA_OP_SLLI_W: + //LA_OP_SLLI_D: + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + if (1 == ((code>>15) & 0x7)) { + unsigned int ui5 = (code>>10) & 0x1f; + printf(" 0x%llx slli.w %s, %s, %d\n", insstrs, rd, rj, ui5); + return; + } else if (1 == ((code>>16) & 0x3)) { + unsigned int ui6 = (code>>10) & 0x3f; + printf(" 0x%llx slli.d %s, %s, %d\n", insstrs, rd, rj, ui6); + return; + } else { + printf("LOONGARCH illegal instruction: 0x%08x\n", code); + return; + } + return; + } + case 0x11: + { + //LA_OP_SRLI_W: + //LA_OP_SRLI_D: + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + if (1 == ((code>>15) & 0x7)) { + unsigned int ui5 = (code>>10) & 0x1f; + printf(" 0x%llx srli.w %s, %s, %d\n", insstrs, rd, rj, ui5); + return; + } else if (1 == ((code>>16) & 0x3)) { + unsigned int ui6 = (code>>10) & 0x3f; + printf(" 0x%llx srli.d %s, %s, %d\n", insstrs, rd, rj, ui6); + return; + } else { + printf("LOONGARCH illegal instruction: 0x%08x\n", code); + return; + } + return; + } + case 0x12: + { + //LA_OP_SRAI_W: + //LA_OP_SRAI_D: + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + if (1 == ((code>>15) & 0x7)) { + unsigned int ui5 = (code>>10) & 0x1f; + printf(" 0x%llx srai.w %s, %s, %d\n", insstrs, rd, rj, ui5); + return; + } else if (1 == ((code>>16) & 0x3)) { + unsigned int ui6 = (code>>10) & 0x3f; + printf(" 0x%llx srai.d %s, %s, %d\n", insstrs, rd, rj, ui6); + return; + } else { + printf("LOONGARCH illegal instruction: 0x%08x\n", code); + return; + } + return; + } + case 0x13: + { + //LA_OP_ROTRI_W: + //LA_OP_ROTRI_D: + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + if (1 == ((code>>15) & 0x7)) { + unsigned int ui5 = (code>>10) & 0x1f; + printf(" 0x%llx rotri.w %s, %s, %d\n", insstrs, rd, rj, ui5); + return; + } else if (1 == ((code>>16) & 0x3)) { + unsigned int ui6 = (code>>10) & 0x3f; + printf(" 0x%llx rotri.d %s, %s, %d\n", insstrs, rd, rj, ui6); + return; + } else { + printf("LOONGARCH illegal instruction: 0x%08x\n", code); + return; + } + return; + } + default : + printf("LOONGARCH illegal instruction: 0x%08x\n", code); + return; + } + return; + } + return; + } + case LA_OP_BSTRINS_D: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + unsigned int lsbd = (code >> 10) & 0x3f; + unsigned int msbd = (code >> 16) & 0x3f; + printf(" 0x%llx bstrins.d %s, %s, %d, %d\n", insstrs, rd, rj, msbd, lsbd); + return; + } + case LA_OP_BSTRPICK_D: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + unsigned int lsbd = (code >> 10) & 0x3f; + unsigned int msbd = (code >> 16) & 0x3f; + printf(" 0x%llx bstrpick.d %s, %s, %d, %d\n", insstrs, rd, rj, msbd, lsbd); + return; + } + case 0x4: + { + //bits: 31-15,MSB17 + unsigned int inscode1 = (code >> 15) & 0x1ffff; + const char *fd = FregName[code & 0x1f]; + const char *fj = FregName[(code>>5) & 0x1f]; + const char *fk = FregName[(code>>10) & 0x1f]; + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + + switch (inscode1) + { + case LA_3R_FADD_S: + printf(" 0x%llx fadd.s %s, %s, %s\n", insstrs, fd, fj, fk); + return; + case LA_3R_FADD_D: + printf(" 0x%llx fadd.d %s, %s, %s\n", insstrs, fd, fj, fk); + return; + case LA_3R_FSUB_S: + printf(" 0x%llx fsub.s %s, %s, %s\n", insstrs, fd, fj, fk); + return; + case LA_3R_FSUB_D: + printf(" 0x%llx fsub.d %s, %s, %s\n", insstrs, fd, fj, fk); + return; + case LA_3R_FMUL_S: + printf(" 0x%llx fmul.s %s, %s, %s\n", insstrs, fd, fj, fk); + return; + case LA_3R_FMUL_D: + printf(" 0x%llx fmul.d %s, %s, %s\n", insstrs, fd, fj, fk); + return; + case LA_3R_FDIV_S: + printf(" 0x%llx fdiv.s %s, %s, %s\n", insstrs, fd, fj, fk); + return; + case LA_3R_FDIV_D: + printf(" 0x%llx fdiv.d %s, %s, %s\n", insstrs, fd, fj, fk); + return; + case LA_3R_FMAX_S: + printf(" 0x%llx fmax.s %s, %s, %s\n", insstrs, fd, fj, fk); + return; + case LA_3R_FMAX_D: + printf(" 0x%llx fmax.d %s, %s, %s\n", insstrs, fd, fj, fk); + return; + case LA_3R_FMIN_S: + printf(" 0x%llx fmin.s %s, %s, %s\n", insstrs, fd, fj, fk); + return; + case LA_3R_FMIN_D: + printf(" 0x%llx fmin.d %s, %s, %s\n", insstrs, fd, fj, fk); + return; + case LA_3R_FMAXA_S: + printf(" 0x%llx fmaxa.s %s, %s, %s\n", insstrs, fd, fj, fk); + return; + case LA_3R_FMAXA_D: + printf(" 0x%llx fmaxa.d %s, %s, %s\n", insstrs, fd, fj, fk); + return; + case LA_3R_FMINA_S: + printf(" 0x%llx fmina.s %s, %s, %s\n", insstrs, fd, fj, fk); + return; + case LA_3R_FMINA_D: + printf(" 0x%llx fmina.d %s, %s, %s\n", insstrs, fd, fj, fk); + return; + case LA_3R_FSCALEB_S: + printf(" 0x%llx fscaleb.s %s, %s, %s\n", insstrs, fd, fj, fk); + return; + case LA_3R_FSCALEB_D: + printf(" 0x%llx fscaleb.d %s, %s, %s\n", insstrs, fd, fj, fk); + return; + case LA_3R_FCOPYSIGN_S: + printf(" 0x%llx fcopysign.s %s, %s, %s\n", insstrs, fd, fj, fk); + return; + case LA_3R_FCOPYSIGN_D: + printf(" 0x%llx fcopysign.d %s, %s, %s\n", insstrs, fd, fj, fk); + return; + case 0x228: + case 0x229: + case 0x232: + case 0x234: + case 0x235: + case 0x236: + case 0x23a: + case 0x23c: + { + //bits:31-10,MSB22 + unsigned int inscode2 = (code >> 10) & 0x3fffff; + switch (inscode2) + { + case LA_2R_FABS_S: + printf(" 0x%llx fabs.s %s, %s\n", insstrs, fd, fj); + return; + case LA_2R_FABS_D: + printf(" 0x%llx fabs.d %s, %s\n", insstrs, fd, fj); + return; + case LA_2R_FNEG_S: + printf(" 0x%llx fneg.s %s, %s\n", insstrs, fd, fj); + return; + case LA_2R_FNEG_D: + printf(" 0x%llx fneg.d %s, %s\n", insstrs, fd, fj); + return; + case LA_2R_FLOGB_S: + printf(" 0x%llx flogb.s %s, %s\n", insstrs, fd, fj); + return; + case LA_2R_FLOGB_D: + printf(" 0x%llx flogb.d %s, %s\n", insstrs, fd, fj); + return; + case LA_2R_FCLASS_S: + printf(" 0x%llx fclass.s %s, %s\n", insstrs, fd, fj); + return; + case LA_2R_FCLASS_D: + printf(" 0x%llx fclass.d %s, %s\n", insstrs, fd, fj); + return; + case LA_2R_FSQRT_S: + printf(" 0x%llx fsqrt.s %s, %s\n", insstrs, fd, fj); + return; + case LA_2R_FSQRT_D: + printf(" 0x%llx fsqrt.d %s, %s\n", insstrs, fd, fj); + return; + case LA_2R_FRECIP_S: + printf(" 0x%llx frecip.s %s, %s\n", insstrs, fd, fj); + return; + case LA_2R_FRECIP_D: + printf(" 0x%llx frecip.d %s, %s\n", insstrs, fd, fj); + return; + case LA_2R_FRSQRT_S: + printf(" 0x%llx frsqrt.s %s, %s\n", insstrs, fd, fj); + return; + case LA_2R_FRSQRT_D: + printf(" 0x%llx frsqrt.d %s, %s\n", insstrs, fd, fj); + return; + case LA_2R_FMOV_S: + printf(" 0x%llx fmov.s %s, %s\n", insstrs, fd, fj); + return; + case LA_2R_FMOV_D: + printf(" 0x%llx fmov.d %s, %s\n", insstrs, fd, fj); + return; + case LA_2R_MOVGR2FR_W: + printf(" 0x%llx movgr2fr.w %s, %s\n", insstrs, fd, rj); + return; + case LA_2R_MOVGR2FR_D: + printf(" 0x%llx movgr2fr.d %s, %s\n", insstrs, fd, rj); + return; + case LA_2R_MOVGR2FRH_W: + printf(" 0x%llx movgr2frh.w %s, %s\n", insstrs, fd, rj); + return; + case LA_2R_MOVFR2GR_S: + printf(" 0x%llx movfr2gr.s %s, %s\n", insstrs, rd, fj); + return; + case LA_2R_MOVFR2GR_D: + printf(" 0x%llx movfr2gr.d %s, %s\n", insstrs, rd, fj); + return; + case LA_2R_MOVFRH2GR_S: + printf(" 0x%llx movfrh2gr.s %s, %s\n", insstrs, rd, fj); + return; + case LA_2R_MOVGR2FCSR: + assert(!"unimplemented on loongarch yet!"); + return; + case LA_2R_MOVFCSR2GR: + assert(!"unimplemented on loongarch yet!"); + return; + case LA_2R_MOVFR2CF: + { + const char *cd = CFregName[code & 0x7]; + printf(" 0x%llx movfr2cf %s, %s\n", insstrs, cd, fj); + return; + } + case LA_2R_MOVCF2FR: + { + const char *cj = CFregName[(code>>5) & 0x7]; + printf(" 0x%llx movcf2fr %s, %s\n", insstrs, fd, cj); + return; + } + case LA_2R_MOVGR2CF: + { + const char *cd = CFregName[code & 0x7]; + printf(" 0x%llx movgr2cf %s, %s\n", insstrs, cd, rj); + return; + } + case LA_2R_MOVCF2GR: + { + const char *cj = CFregName[(code>>5) & 0x7]; + printf(" 0x%llx movcf2gr %s, %s\n", insstrs, rd, cj); + return; + } + case LA_2R_FCVT_S_D: + printf(" 0x%llx fcvt.s.d %s, %s\n", insstrs, fd, fj); + return; + case LA_2R_FCVT_D_S: + printf(" 0x%llx fcvt.d.s %s, %s\n", insstrs, fd, fj); + return; + case LA_2R_FTINTRM_W_S: + printf(" 0x%llx ftintrm.w.s %s, %s\n", insstrs, fd, fj); + return; + case LA_2R_FTINTRM_W_D: + printf(" 0x%llx ftintrm.w.d %s, %s\n", insstrs, fd, fj); + return; + case LA_2R_FTINTRM_L_S: + printf(" 0x%llx ftintrm.l.s %s, %s\n", insstrs, fd, fj); + return; + case LA_2R_FTINTRM_L_D: + printf(" 0x%llx ftintrm.l.d %s, %s\n", insstrs, fd, fj); + return; + case LA_2R_FTINTRP_W_S: + printf(" 0x%llx ftintrp.w.s %s, %s\n", insstrs, fd, fj); + return; + case LA_2R_FTINTRP_W_D: + printf(" 0x%llx ftintrp.w.d %s, %s\n", insstrs, fd, fj); + return; + case LA_2R_FTINTRP_L_S: + printf(" 0x%llx ftintrp.l.s %s, %s\n", insstrs, fd, fj); + return; + case LA_2R_FTINTRP_L_D: + printf(" 0x%llx ftintrp.l.d %s, %s\n", insstrs, fd, fj); + return; + case LA_2R_FTINTRZ_W_S: + printf(" 0x%llx ftintrz.w.s %s, %s\n", insstrs, fd, fj); + return; + case LA_2R_FTINTRZ_W_D: + printf(" 0x%llx ftintrz.w.d %s, %s\n", insstrs, fd, fj); + return; + case LA_2R_FTINTRZ_L_S: + printf(" 0x%llx ftintrz.l.s %s, %s\n", insstrs, fd, fj); + return; + case LA_2R_FTINTRZ_L_D: + printf(" 0x%llx ftintrz.l.d %s, %s\n", insstrs, fd, fj); + return; + case LA_2R_FTINTRNE_W_S: + printf(" 0x%llx ftintrne.w.s %s, %s\n", insstrs, fd, fj); + return; + case LA_2R_FTINTRNE_W_D: + printf(" 0x%llx ftintrne.w.d %s, %s\n", insstrs, fd, fj); + return; + case LA_2R_FTINTRNE_L_S: + printf(" 0x%llx ftintrne.l.s %s, %s\n", insstrs, fd, fj); + return; + case LA_2R_FTINTRNE_L_D: + printf(" 0x%llx ftintrne.l.d %s, %s\n", insstrs, fd, fj); + return; + case LA_2R_FTINT_W_S: + printf(" 0x%llx ftint.w.s %s, %s\n", insstrs, fd, fj); + return; + case LA_2R_FTINT_W_D: + printf(" 0x%llx ftint.w.d %s, %s\n", insstrs, fd, fj); + return; + case LA_2R_FTINT_L_S: + printf(" 0x%llx ftint.l.s %s, %s\n", insstrs, fd, fj); + return; + case LA_2R_FTINT_L_D: + printf(" 0x%llx ftint.l.d %s, %s\n", insstrs, fd, fj); + return; + case LA_2R_FFINT_S_W: + printf(" 0x%llx ffint.s.w %s, %s\n", insstrs, fd, fj); + return; + case LA_2R_FFINT_S_L: + printf(" 0x%llx ffint.s.l %s, %s\n", insstrs, fd, fj); + return; + case LA_2R_FFINT_D_W: + printf(" 0x%llx ffint.d.w %s, %s\n", insstrs, fd, fj); + return; + case LA_2R_FFINT_D_L: + printf(" 0x%llx ffint.d.l %s, %s\n", insstrs, fd, fj); + return; + case LA_2R_FRINT_S: + printf(" 0x%llx frint.s %s, %s\n", insstrs, fd, fj); + return; + case LA_2R_FRINT_D: + printf(" 0x%llx frint.d %s, %s\n", insstrs, fd, fj); + return; + default : + printf("LOONGARCH illegal instruction: 0x%08x\n", code); + return; + } + return; + } + + default : + printf("LOONGARCH illegal instruction: 0x%08x\n", code); + return; + } + return; + } + case LA_2RI12_SLTI: //0x8 + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + short si12 = ((code >> 10) & 0xfff)<<4; + si12 >>= 4; + printf(" 0x%llx slti %s, %s, %d\n", insstrs, rd, rj, si12); + return; + } + case LA_2RI12_SLTUI: //0x9 + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + short si12 = ((code >> 10) & 0xfff)<<4; + si12 >>= 4; + printf(" 0x%llx sltui %s, %s, %d\n", insstrs, rd, rj, si12); + return; + } + case LA_2RI12_ADDI_W: //0xa + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + short si12 = ((code >> 10) & 0xfff)<<4; + si12 >>= 4; + printf(" 0x%llx addi.w %s, %s, %d\n", insstrs, rd, rj, si12); + return; + } + case LA_2RI12_ADDI_D: //0xb + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + short si12 = ((code >> 10) & 0xfff)<<4; + si12 >>= 4; + printf(" 0x%llx addi.d %s, %s, %ld\n", insstrs, rd, rj, si12); + return; + } + case LA_2RI12_LU52I_D: //0xc + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + unsigned int si12 = (code >> 10) & 0xfff; + printf(" 0x%llx lu52i.d %s, %s, 0x%x\n", insstrs, rd, rj, si12); + return; + } + case LA_2RI12_ANDI: //0xd + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + unsigned int ui12 = ((code >> 10) & 0xfff); + printf(" 0x%llx andi %s, %s, 0x%x\n", insstrs, rd, rj, ui12); + return; + } + case LA_2RI12_ORI: //0xe + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + unsigned int ui12 = ((code >> 10) & 0xfff); + printf(" 0x%llx ori %s, %s, 0x%x\n", insstrs, rd, rj, ui12); + return; + } + case LA_2RI12_XORI: //0xf + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + unsigned int ui12 = ((code >> 10) & 0xfff); + printf(" 0x%llx xori %s, %s, 0x%x\n", insstrs, rd, rj, ui12); + return; + } + + default : + printf("LOONGARCH illegal instruction: 0x%08x\n", code); + return; + } + +//Label_OPCODE_1: +// opcode = (code >> 24) & 0xff; +// //bits: 31-24,MSB8 + + +Label_OPCODE_2: + opcode = (code >> 20) & 0xfff; + + //bits: 31-20,MSB12 + switch (opcode) + { + case LA_4R_FMADD_S: + { + const char *fd = FregName[code & 0x1f]; + const char *fj = FregName[(code>>5) & 0x1f]; + const char *fk = FregName[(code>>10) & 0x1f]; + const char *fa = FregName[(code>>15) & 0x1f]; + printf(" 0x%llx fmadd.s %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa); + return; + } + case LA_4R_FMADD_D: + { + const char *fd = FregName[code & 0x1f]; + const char *fj = FregName[(code>>5) & 0x1f]; + const char *fk = FregName[(code>>10) & 0x1f]; + const char *fa = FregName[(code>>15) & 0x1f]; + printf(" 0x%llx fmadd.d %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa); + return; + } + case LA_4R_FMSUB_S: + { + const char *fd = FregName[code & 0x1f]; + const char *fj = FregName[(code>>5) & 0x1f]; + const char *fk = FregName[(code>>10) & 0x1f]; + const char *fa = FregName[(code>>15) & 0x1f]; + printf(" 0x%llx fmsub.s %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa); + return; + } + case LA_4R_FMSUB_D: + { + const char *fd = FregName[code & 0x1f]; + const char *fj = FregName[(code>>5) & 0x1f]; + const char *fk = FregName[(code>>10) & 0x1f]; + const char *fa = FregName[(code>>15) & 0x1f]; + printf(" 0x%llx fmsub.d %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa); + return; + } + case LA_4R_FNMADD_S: + { + const char *fd = FregName[code & 0x1f]; + const char *fj = FregName[(code>>5) & 0x1f]; + const char *fk = FregName[(code>>10) & 0x1f]; + const char *fa = FregName[(code>>15) & 0x1f]; + printf(" 0x%llx fnmadd.s %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa); + return; + } + case LA_4R_FNMADD_D: + { + const char *fd = FregName[code & 0x1f]; + const char *fj = FregName[(code>>5) & 0x1f]; + const char *fk = FregName[(code>>10) & 0x1f]; + const char *fa = FregName[(code>>15) & 0x1f]; + printf(" 0x%llx fnmadd.d %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa); + return; + } + case LA_4R_FNMSUB_S: + { + const char *fd = FregName[code & 0x1f]; + const char *fj = FregName[(code>>5) & 0x1f]; + const char *fk = FregName[(code>>10) & 0x1f]; + const char *fa = FregName[(code>>15) & 0x1f]; + printf(" 0x%llx fnmsub.s %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa); + return; + } + case LA_4R_FNMSUB_D: + { + const char *fd = FregName[code & 0x1f]; + const char *fj = FregName[(code>>5) & 0x1f]; + const char *fk = FregName[(code>>10) & 0x1f]; + const char *fa = FregName[(code>>15) & 0x1f]; + printf(" 0x%llx fnmsub.d %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa); + return; + } + default : + printf("LOONGARCH illegal instruction: 0x%08x\n", code); + return; + } + +Label_OPCODE_3: + opcode = (code >> 20) & 0xfff; + + //bits: 31-20,MSB12 + switch (opcode) + { + case LA_OP_FCMP_cond_S: + { + //bits:19-15,cond + unsigned int cond = (code >> 15) & 0x1f; + const char *cd = CFregName[code & 0x7]; + const char *fj = FregName[(code>>5) & 0x1f]; + const char *fk = FregName[(code>>10) & 0x1f]; + switch (cond) + { + case 0x0: + printf(" 0x%llx fcmp.caf.s %s, %s, %s\n", insstrs, cd, fj, fk); + return; + case 0x1: + printf(" 0x%llx fcmp.saf.s %s, %s, %s\n", insstrs, cd, fj, fk); + return; + case 0x2: + printf(" 0x%llx fcmp.clt.s %s, %s, %s\n", insstrs, cd, fj, fk); + return; + case 0x3: + printf(" 0x%llx fcmp.slt.s %s, %s, %s\n", insstrs, cd, fj, fk); + return; + case 0x4: + printf(" 0x%llx fcmp.ceq.s %s, %s, %s\n", insstrs, cd, fj, fk); + return; + case 0x5: + printf(" 0x%llx fcmp.seq.s %s, %s, %s\n", insstrs, cd, fj, fk); + return; + case 0x6: + printf(" 0x%llx fcmp.cle.s %s, %s, %s\n", insstrs, cd, fj, fk); + return; + case 0x7: + printf(" 0x%llx fcmp.sle.s %s, %s, %s\n", insstrs, cd, fj, fk); + return; + case 0x8: + printf(" 0x%llx fcmp.cun.s %s, %s, %s\n", insstrs, cd, fj, fk); + return; + case 0x9: + printf(" 0x%llx fcmp.sun.s %s, %s, %s\n", insstrs, cd, fj, fk); + return; + case 0xA: + printf(" 0x%llx fcmp.cult.s %s, %s, %s\n", insstrs, cd, fj, fk); + return; + case 0xB: + printf(" 0x%llx fcmp.sult.s %s, %s, %s\n", insstrs, cd, fj, fk); + return; + case 0xC: + printf(" 0x%llx fcmp.cueq.s %s, %s, %s\n", insstrs, cd, fj, fk); + return; + case 0xD: + printf(" 0x%llx fcmp.sueq.s %s, %s, %s\n", insstrs, cd, fj, fk); + return; + case 0xE: + printf(" 0x%llx fcmp.cule.s %s, %s, %s\n", insstrs, cd, fj, fk); + return; + case 0xF: + printf(" 0x%llx fcmp.sule.s %s, %s, %s\n", insstrs, cd, fj, fk); + return; + case 0x10: + printf(" 0x%llx fcmp.cne.s %s, %s, %s\n", insstrs, cd, fj, fk); + return; + case 0x11: + printf(" 0x%llx fcmp.sne.s %s, %s, %s\n", insstrs, cd, fj, fk); + return; + case 0x14: + printf(" 0x%llx fcmp.cor.s %s, %s, %s\n", insstrs, cd, fj, fk); + return; + case 0x15: + printf(" 0x%llx fcmp.sor.s %s, %s, %s\n", insstrs, cd, fj, fk); + return; + case 0x18: + printf(" 0x%llx fcmp.cune.s %s, %s, %s\n", insstrs, cd, fj, fk); + return; + case 0x19: + printf(" 0x%llx fcmp.sune.s %s, %s, %s\n", insstrs, cd, fj, fk); + return; + default : + printf("LOONGARCH illegal instruction: 0x%08x\n", code); + return; + } + } + case LA_OP_FCMP_cond_D: + { + //bits:19-15,cond + unsigned int cond = (code >> 15) & 0x1f; + const char *cd = CFregName[code & 0x7]; + const char *fj = FregName[(code>>5) & 0x1f]; + const char *fk = FregName[(code>>10) & 0x1f]; + switch (cond) + { + case 0x0: + printf(" 0x%llx fcmp.caf.d %s, %s, %s\n", insstrs, cd, fj, fk); + return; + case 0x1: + printf(" 0x%llx fcmp.saf.d %s, %s, %s\n", insstrs, cd, fj, fk); + return; + case 0x2: + printf(" 0x%llx fcmp.clt.d %s, %s, %s\n", insstrs, cd, fj, fk); + return; + case 0x3: + printf(" 0x%llx fcmp.slt.d %s, %s, %s\n", insstrs, cd, fj, fk); + return; + case 0x4: + printf(" 0x%llx fcmp.ceq.d %s, %s, %s\n", insstrs, cd, fj, fk); + return; + case 0x5: + printf(" 0x%llx fcmp.seq.d %s, %s, %s\n", insstrs, cd, fj, fk); + return; + case 0x6: + printf(" 0x%llx fcmp.cle.d %s, %s, %s\n", insstrs, cd, fj, fk); + return; + case 0x7: + printf(" 0x%llx fcmp.sle.d %s, %s, %s\n", insstrs, cd, fj, fk); + return; + case 0x8: + printf(" 0x%llx fcmp.cun.d %s, %s, %s\n", insstrs, cd, fj, fk); + return; + case 0x9: + printf(" 0x%llx fcmp.sun.d %s, %s, %s\n", insstrs, cd, fj, fk); + return; + case 0xA: + printf(" 0x%llx fcmp.cult.d %s, %s, %s\n", insstrs, cd, fj, fk); + return; + case 0xB: + printf(" 0x%llx fcmp.sult.d %s, %s, %s\n", insstrs, cd, fj, fk); + return; + case 0xC: + printf(" 0x%llx fcmp.cueq.d %s, %s, %s\n", insstrs, cd, fj, fk); + return; + case 0xD: + printf(" 0x%llx fcmp.sueq.d %s, %s, %s\n", insstrs, cd, fj, fk); + return; + case 0xE: + printf(" 0x%llx fcmp.cule.d %s, %s, %s\n", insstrs, cd, fj, fk); + return; + case 0xF: + printf(" 0x%llx fcmp.sule.d %s, %s, %s\n", insstrs, cd, fj, fk); + return; + case 0x10: + printf(" 0x%llx fcmp.cne.d %s, %s, %s\n", insstrs, cd, fj, fk); + return; + case 0x11: + printf(" 0x%llx fcmp.sne.d %s, %s, %s\n", insstrs, cd, fj, fk); + return; + case 0x14: + printf(" 0x%llx fcmp.cor.d %s, %s, %s\n", insstrs, cd, fj, fk); + return; + case 0x15: + printf(" 0x%llx fcmp.sor.d %s, %s, %s\n", insstrs, cd, fj, fk); + return; + case 0x18: + printf(" 0x%llx fcmp.cune.d %s, %s, %s\n", insstrs, cd, fj, fk); + return; + case 0x19: + printf(" 0x%llx fcmp.sune.d %s, %s, %s\n", insstrs, cd, fj, fk); + return; + default : + printf("LOONGARCH illegal instruction: 0x%08x\n", code); + return; + } + } + case LA_4R_FSEL: + { + const char *fd = FregName[code & 0x1f]; + const char *fj = FregName[(code>>5) & 0x1f]; + const char *fk = FregName[(code>>10) & 0x1f]; + const char *ca = CFregName[(code>>15) & 0x7]; + printf(" 0x%llx fsel %s, %s, %s, %s\n", insstrs, fd, fj, fk, ca); + return; + } + default : + printf("LOONGARCH illegal instruction: 0x%08x\n", code); + return; + } + +Label_OPCODE_E: + opcode = (code >> 15) & 0x1ffff; + + //bits: 31-15,MSB17 + switch (opcode) + { + case LA_3R_LDX_B: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx ldx.b %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_LDX_H: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx ldx.h %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_LDX_W: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx ldx.w %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_LDX_D: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx ldx.d %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_STX_B: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx stx.b %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_STX_H: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx stx.h %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_STX_W: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx stx.w %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_STX_D: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx stx.d %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_LDX_BU: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx ldx.bu %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_LDX_HU: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx ldx.hu %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_LDX_WU: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx ldx.wu %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_PRELDX: + assert(!"unimplemented on loongarch yet!"); + return; + case LA_3R_FLDX_S: + { + const char *fd = FregName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx fldx.s %s, %s, %s\n", insstrs, fd, rj, rk); + return; + } + case LA_3R_FLDX_D: + { + const char *fd = FregName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx fldx.d %s, %s, %s\n", insstrs, fd, rj, rk); + return; + } + case LA_3R_FSTX_S: + { + const char *fd = FregName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx fstx.s %s, %s, %s\n", insstrs, fd, rj, rk); + return; + } + case LA_3R_FSTX_D: + { + const char *fd = FregName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx fstx.d %s, %s, %s\n", insstrs, fd, rj, rk); + return; + } + case LA_3R_AMSWAP_W: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx amswap.w %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_AMSWAP_D: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx amswap.d %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_AMADD_W: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx amadd.w %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_AMADD_D: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx amadd.d %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_AMAND_W: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx amand.w %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_AMAND_D: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx amand.d %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_AMOR_W: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx amor.w %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_AMOR_D: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx amor.d %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_AMXOR_W: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx amxor.w %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_AMXOR_D: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx amxor.d %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_AMMAX_W: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx ammax.w %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_AMMAX_D: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx ammax.d %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_AMMIN_W: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx ammin.w %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_AMMIN_D: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx ammin.d %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_AMMAX_WU: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx ammax.wu %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_AMMAX_DU: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx ammax.du %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_AMMIN_WU: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx ammin.wu %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_AMMIN_DU: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx ammin.du %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_AMSWAP_DB_W: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx amswap_db.w %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_AMSWAP_DB_D: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx amswap_db.d %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_AMADD_DB_W: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx amadd_db.w %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_AMADD_DB_D: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx amadd_db.d %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_AMAND_DB_W: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx amand_db.w %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_AMAND_DB_D: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx amand_db.d %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_AMOR_DB_W: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx amor_db.w %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_AMOR_DB_D: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx amor_db.d %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_AMXOR_DB_W: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx amxor_db.w %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_AMXOR_DB_D: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx amxor_db.d %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_AMMAX_DB_W: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx ammax_db.w %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_AMMAX_DB_D: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx ammax_db.d %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_AMMIN_DB_W: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx ammin_db.w %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_AMMIN_DB_D: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx ammin_db.d %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_AMMAX_DB_WU: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx ammax_db.wu %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_AMMAX_DB_DU: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx ammax_db.du %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_AMMIN_DB_WU: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx ammin_db.wu %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_AMMIN_DB_DU: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx ammin_db.du %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_OP_DBAR: + { + unsigned int hint = code & 0x7fff; + printf(" 0x%llx dbar 0x%x\n", insstrs, hint); + return; + } + case LA_OP_IBAR: + { + unsigned int hint = code & 0x7fff; + printf(" 0x%llx ibar 0x%x\n", insstrs, hint); + return; + } + case LA_3R_FLDGT_S: + { + const char *fd = FregName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx fldgt.s %s, %s, %s\n", insstrs, fd, rj, rk); + return; + } + case LA_3R_FLDGT_D: + { + const char *fd = FregName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx fldgt.d %s, %s, %s\n", insstrs, fd, rj, rk); + return; + } + case LA_3R_FLDLE_S: + { + const char *fd = FregName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx fldle.s %s, %s, %s\n", insstrs, fd, rj, rk); + return; + } + case LA_3R_FLDLE_D: + { + const char *fd = FregName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx fldle.d %s, %s, %s\n", insstrs, fd, rj, rk); + return; + } + case LA_3R_FSTGT_S: + { + const char *fd = FregName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx fstgt.s %s, %s, %s\n", insstrs, fd, rj, rk); + return; + } + case LA_3R_FSTGT_D: + { + const char *fd = FregName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx fstgt.d %s, %s, %s\n", insstrs, fd, rj, rk); + return; + } + case LA_3R_FSTLE_S: + { + const char *fd = FregName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx fstle.s %s, %s, %s\n", insstrs, fd, rj, rk); + return; + } + case LA_3R_FSTLE_D: + { + const char *fd = FregName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx fstle.d %s, %s, %s\n", insstrs, fd, rj, rk); + return; + } + case LA_3R_LDGT_B: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx ldgt.b %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_LDGT_H: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx ldgt.h %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_LDGT_W: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx ldgt.w %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_LDGT_D: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx ldgt.d %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_LDLE_B: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx ldle.b %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_LDLE_H: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx ldle.h %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_LDLE_W: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx ldle.w %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_LDLE_D: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx ldle.d %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_STGT_B: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx stgt.b %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_STGT_H: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx stgt.h %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_STGT_W: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx stgt.w %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_STGT_D: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx stgt.d %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_STLE_B: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx stle.b %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_STLE_H: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx stle.h %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_STLE_W: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx stle.w %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + case LA_3R_STLE_D: + { + const char *rd = regName[code & 0x1f]; + const char *rj = regName[(code>>5) & 0x1f]; + const char *rk = regName[(code>>10) & 0x1f]; + printf(" 0x%llx stle.d %s, %s, %s\n", insstrs, rd, rj, rk); + return; + } + default : + printf("LOONGARCH illegal instruction: 0x%08x\n", code); + return; + } +} + +/***************************************************************************** + * + * Display (optionally) the instruction encoding in hex + */ + +void emitter::emitDispInsHex(instrDesc* id, BYTE* code, size_t sz) +{ + // We do not display the instruction hex if we want diff-able disassembly + if (!emitComp->opts.disDiffable) + { + if (sz == 4) + { + printf(" %08X ", (*((code_t*)code))); + } + else + { + assert(sz == 0); + printf(" "); + } + } +} + +void emitter::emitDispIns( + instrDesc* id, bool isNew, bool doffs, bool asmfm, unsigned offset, BYTE* pCode, size_t sz, insGroup* ig) +{//not used on loongarch64. + printf("------------not implements emitDispIns() for loongarch64!!!\n"); +} + +/***************************************************************************** + * + * Display a stack frame reference. + */ + +void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm) +{ + printf("["); + + if (varx < 0) + printf("TEMP_%02u", -varx); + else + emitComp->gtDispLclVar(+varx, false); + + if (disp < 0) + printf("-0x%02x", -disp); + else if (disp > 0) + printf("+0x%02x", +disp); + + printf("]"); + + if (varx >= 0 && emitComp->opts.varNames) + { + LclVarDsc* varDsc; + const char* varName; + + assert((unsigned)varx < emitComp->lvaCount); + varDsc = emitComp->lvaTable + varx; + varName = emitComp->compLocalVarName(varx, offs); + + if (varName) + { + printf("'%s", varName); + + if (disp < 0) + printf("-%d", -disp); + else if (disp > 0) + printf("+%d", +disp); + + printf("'"); + } + } +} + +#endif // DEBUG + +// Generate code for a load or store operation with a potentially complex addressing mode +// This method handles the case of a GT_IND with contained GT_LEA op1 of the x86 form [base + index*sccale + offset] +// Since LOONGARCH64 does not directly support this complex of an addressing mode +// we may generates up to three instructions for this for LOONGARCH64 +// +void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataReg, GenTreeIndir* indir) +{ + GenTree* addr = indir->Addr(); + + if (addr->isContained()) + { + assert(addr->OperIs(GT_CLS_VAR_ADDR, GT_LCL_VAR_ADDR, GT_LCL_FLD_ADDR, GT_LEA)); + + int offset = 0; + DWORD lsl = 0; + + if (addr->OperGet() == GT_LEA) + { + offset = addr->AsAddrMode()->Offset(); + if (addr->AsAddrMode()->gtScale > 0) + { + assert(isPow2(addr->AsAddrMode()->gtScale)); + BitScanForward(&lsl, addr->AsAddrMode()->gtScale); + } + } + + GenTree* memBase = indir->Base(); + emitAttr addType = varTypeIsGC(memBase) ? EA_BYREF : EA_PTRSIZE; + + if (indir->HasIndex()) + { + GenTree* index = indir->Index(); + + if (offset != 0) + { + regNumber tmpReg = indir->GetSingleTempReg(); + + if (isValidSimm12(offset)) + { + if (lsl > 0) + { + // Generate code to set tmpReg = base + index*scale + emitIns_R_R_I(INS_slli_d, addType, REG_R21, index->GetRegNum(), lsl); + emitIns_R_R_R(INS_add_d, addType, tmpReg, memBase->GetRegNum(), REG_R21); + } + else // no scale + { + // Generate code to set tmpReg = base + index + emitIns_R_R_R(INS_add_d, addType, tmpReg, memBase->GetRegNum(), index->GetRegNum()); + } + + noway_assert(emitInsIsLoad(ins) || (tmpReg != dataReg)); + + // Then load/store dataReg from/to [tmpReg + offset] + emitIns_R_R_I(ins, attr, dataReg, tmpReg, offset); + } + else // large offset + { + // First load/store tmpReg with the large offset constant + emitIns_I_la(EA_PTRSIZE, tmpReg, offset);//codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset); + // Then add the base register + // rd = rd + base + emitIns_R_R_R(INS_add_d, addType, tmpReg, tmpReg, memBase->GetRegNum()); + + noway_assert(emitInsIsLoad(ins) || (tmpReg != dataReg)); + noway_assert(tmpReg != index->GetRegNum()); + + // Then load/store dataReg from/to [tmpReg + index*scale] + emitIns_R_R_I(INS_slli_d, addType, REG_R21, index->GetRegNum(), lsl); + emitIns_R_R_R(INS_add_d, addType, tmpReg, tmpReg, REG_R21); + emitIns_R_R_I(ins, attr, dataReg, tmpReg, 0); + } + } + else // (offset == 0) + { + // Then load/store dataReg from/to [memBase + index] + switch (EA_SIZE(emitTypeSize(indir->TypeGet()))) + { + case EA_1BYTE: + assert(((ins <= INS_ld_wu) && (ins >= INS_ld_b)) || ((ins <= INS_st_d) && (ins >= INS_st_b))); + if (ins <= INS_ld_wu) + { + if (varTypeIsUnsigned(indir->TypeGet())) + ins = INS_ldx_bu; + else + ins = INS_ldx_b; + } + else + ins = INS_stx_b; + break; + case EA_2BYTE: + assert(((ins <= INS_ld_wu) && (ins >= INS_ld_b)) || ((ins <= INS_st_d) && (ins >= INS_st_b))); + if (ins <= INS_ld_wu) + { + if (varTypeIsUnsigned(indir->TypeGet())) + ins = INS_ldx_hu; + else + ins = INS_ldx_h; + } + else + ins = INS_stx_h; + break; + case EA_4BYTE: + assert(((ins <= INS_ld_wu) && (ins >= INS_ld_b)) || ((ins <= INS_st_d) && (ins >= INS_st_b)) || (ins == INS_fst_s) || (ins == INS_fld_s)); + assert(INS_fst_s > INS_st_d); + if (ins <= INS_ld_wu) + { + if (varTypeIsUnsigned(indir->TypeGet())) + ins = INS_ldx_wu; + else + ins = INS_ldx_w; + } + else if (ins == INS_fld_s) + ins = INS_fldx_s; + else if (ins == INS_fst_s) + ins = INS_fstx_s; + else + ins = INS_stx_w; + break; + case EA_8BYTE: + assert(((ins <= INS_ld_wu) && (ins >= INS_ld_b)) || ((ins <= INS_st_d) && (ins >= INS_st_b)) || (ins == INS_fst_d) || (ins == INS_fld_d)); + assert(INS_fst_d > INS_st_d); + if (ins <= INS_ld_wu) + { + ins = INS_ldx_d; + } + else if (ins == INS_fld_d) + ins = INS_fldx_d; + else if (ins == INS_fst_d) + ins = INS_fstx_d; + else + ins = INS_stx_d; + break; + default: + assert(!"------------TODO for LOONGARCH64: unsupported ins."); + } + + if (lsl > 0) + { + // Then load/store dataReg from/to [memBase + index*scale] + emitIns_R_R_I(INS_slli_d, emitActualTypeSize(index->TypeGet()), REG_R21, index->GetRegNum(), lsl); + emitIns_R_R_R(ins, attr, dataReg, memBase->GetRegNum(), REG_R21); + } + else // no scale + { + emitIns_R_R_R(ins, attr, dataReg, memBase->GetRegNum(), index->GetRegNum()); + } + } + } + else // no Index register + { + if (addr->OperGet() == GT_CLS_VAR_ADDR) + { + // Get a temp integer register to compute long address. + regNumber addrReg = indir->GetSingleTempReg(); + emitIns_R_C(ins, attr, dataReg, addrReg, addr->AsClsVar()->gtClsVarHnd, 0); + } + else if (addr->OperIs(GT_LCL_VAR_ADDR, GT_LCL_FLD_ADDR)) + { + GenTreeLclVarCommon* varNode = addr->AsLclVarCommon(); + unsigned lclNum = varNode->GetLclNum(); + unsigned offset = varNode->GetLclOffs(); + if (emitInsIsStore(ins)) + { + emitIns_S_R(ins, attr, dataReg, lclNum, offset); + } + else + { + emitIns_R_S(ins, attr, dataReg, lclNum, offset); + } + } + else if (isValidSimm12(offset)) + { + // Then load/store dataReg from/to [memBase + offset] + emitIns_R_R_I(ins, attr, dataReg, memBase->GetRegNum(), offset); + } + else + { + // We require a tmpReg to hold the offset + regNumber tmpReg = indir->GetSingleTempReg(); + + // First load/store tmpReg with the large offset constant + emitIns_I_la(EA_PTRSIZE, tmpReg, offset); + //codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset); + + // Then load/store dataReg from/to [memBase + tmpReg] + emitIns_R_R_R(INS_add_d, addType, tmpReg, memBase->GetRegNum(), tmpReg); + emitIns_R_R_I(ins, attr, dataReg, tmpReg, 0); + } + } + } + else // addr is not contained, so we evaluate it into a register + { +#ifdef DEBUG + if (addr->OperIs(GT_LCL_VAR_ADDR, GT_LCL_FLD_ADDR)) + { + // If the local var is a gcref or byref, the local var better be untracked, because we have + // no logic here to track local variable lifetime changes, like we do in the contained case + // above. E.g., for a `str r0,[r1]` for byref `r1` to local `V01`, we won't store the local + // `V01` and so the emitter can't update the GC lifetime for `V01` if this is a variable birth. + GenTreeLclVarCommon* varNode = addr->AsLclVarCommon(); + unsigned lclNum = varNode->GetLclNum(); + LclVarDsc* varDsc = emitComp->lvaGetDesc(lclNum); + assert(!varDsc->lvTracked); + } +#endif // DEBUG + // Then load/store dataReg from/to [addrReg] + emitIns_R_R_I(ins, attr, dataReg, addr->GetRegNum(), 0); + } +} + +// The callee must call genConsumeReg() for any non-contained srcs +// and genProduceReg() for any non-contained dsts. + +regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src) +{ + assert(!"unimplemented on LOONGARCH yet"); + return REG_R0; +} + +// The callee must call genConsumeReg() for any non-contained srcs +// and genProduceReg() for any non-contained dsts. +regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src1, GenTree* src2) +{ + // dst can only be a reg + assert(!dst->isContained()); + + // find immed (if any) - it cannot be a dst + // Only one src can be an int. + GenTreeIntConCommon* intConst = nullptr; + GenTree* nonIntReg = nullptr; + + bool needCheckOv = dst->gtOverflowEx(); + + if (varTypeIsFloating(dst)) + { + // src1 can only be a reg + assert(!src1->isContained()); + // src2 can only be a reg + assert(!src2->isContained()); + } + else // not floating point + { + // src2 can be immed or reg + assert(!src2->isContained() || src2->isContainedIntOrIImmed()); + + // Check src2 first as we can always allow it to be a contained immediate + if (src2->isContainedIntOrIImmed()) + { + intConst = src2->AsIntConCommon(); + nonIntReg = src1; + } + // Only for commutative operations do we check src1 and allow it to be a contained immediate + else if (dst->OperIsCommutative()) + { + // src1 can be immed or reg + assert(!src1->isContained() || src1->isContainedIntOrIImmed()); + + // Check src1 and allow it to be a contained immediate + if (src1->isContainedIntOrIImmed()) + { + assert(!src2->isContainedIntOrIImmed()); + intConst = src1->AsIntConCommon(); + nonIntReg = src2; + } + } + else + { + // src1 can only be a reg + assert(!src1->isContained()); + } + } + + if (needCheckOv) + { + if (ins == INS_add_d) + { + assert(attr == EA_8BYTE); + } + else if (ins == INS_add_w)// || ins == INS_add + { + assert(attr == EA_4BYTE); + } + else if (ins == INS_addi_d) + { + assert(intConst != nullptr); + } + else if (ins == INS_addi_w) + { + assert(intConst != nullptr); + } + else if (ins == INS_sub_d) + { + assert(attr == EA_8BYTE); + } + else if (ins == INS_sub_w) + { + assert(attr == EA_4BYTE); + } + else if ((ins == INS_mul_d) || (ins == INS_mulh_d) || (ins == INS_mulh_du)) + { + assert(attr == EA_8BYTE); + //NOTE: overflow format doesn't support an int constant operand directly. + assert(intConst == nullptr); + } + else if ((ins == INS_mul_w) || (ins == INS_mulw_d_w) || (ins == INS_mulh_w) || (ins == INS_mulh_wu) || (ins == INS_mulw_d_wu)) + { + assert(attr == EA_4BYTE); + //NOTE: overflow format doesn't support an int constant operand directly. + assert(intConst == nullptr); + } + else + { +#ifdef DEBUG + printf("LOONGARCH64-Invalid ins for overflow check: %s\n", codeGen->genInsName(ins)); +#endif + assert(!"Invalid ins for overflow check"); + } + } + + if (intConst != nullptr) + {//should re-design this case!!! ---2020.04.11. + ssize_t imm = intConst->IconValue(); + if (ins == INS_andi || ins == INS_ori || ins == INS_xori) + //assert((0 <= imm) && (imm <= 0xfff)); + assert((-2048 <= imm) && (imm <= 0xfff)); + else + assert((-2049 < imm) && (imm < 2048)); + + if (ins == INS_sub_d) + { + assert(attr == EA_8BYTE); + assert(imm != -2048); + ins = INS_addi_d; + imm = -imm; + } + else if (ins == INS_sub_w) + { + assert(attr == EA_4BYTE); + assert(imm != -2048); + ins = INS_addi_w; + imm = -imm; + } + + assert(ins == INS_addi_d || ins == INS_addi_w || ins == INS_andi || ins == INS_ori || ins == INS_xori); + + if ((imm < 0) && (ins == INS_andi || ins == INS_ori || ins == INS_xori)) + { + assert(attr == EA_8BYTE || attr == EA_4BYTE); + assert(nonIntReg->GetRegNum() != REG_R21); + + emitIns_R_R_I(INS_addi_d, EA_8BYTE, REG_R21, REG_R0, imm); + + if (ins == INS_andi) + { + ins = INS_and; + } + else if (ins == INS_ori) + { + ins = INS_or; + } + else if (ins == INS_xori) + { + ins = INS_xor; + } + else + { + unreached(); + } + + emitIns_R_R_R(ins, attr, dst->GetRegNum(), REG_R21, nonIntReg->GetRegNum()); + + goto L_Done; + } + + if (needCheckOv) + { + emitIns_R_R_R(INS_or, attr, REG_R21, nonIntReg->GetRegNum(), REG_R0); + } + + emitIns_R_R_I(ins, attr, dst->GetRegNum(), nonIntReg->GetRegNum(), imm); + + if (needCheckOv) + { + if (ins == INS_addi_d || ins == INS_addi_w) + { + // A = B + C + if ((dst->gtFlags & GTF_UNSIGNED) != 0) + { + codeGen->genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bltu, dst->GetRegNum(), nullptr, REG_R21); + } + else + { + if (imm > 0) + { + // B > 0 and C > 0, if A < B, goto overflow + BasicBlock* tmpLabel = codeGen->genCreateTempLabel(); + emitIns_J_cond_la(INS_bge, tmpLabel, REG_R0, REG_R21); + emitIns_R_R_I(INS_slti, EA_PTRSIZE, REG_R21, dst->GetRegNum(), imm); + + codeGen->genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bne, REG_R21); + + codeGen->genDefineTempLabel(tmpLabel); + } + else if (imm < 0) + { + // B < 0 and C < 0, if A > B, goto overflow + BasicBlock* tmpLabel = codeGen->genCreateTempLabel(); + emitIns_J_cond_la(INS_bge, tmpLabel, REG_R21, REG_R0); + emitIns_R_R_I(INS_addi_d, attr, REG_R21, REG_R0, imm); + + codeGen->genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_blt, REG_R21, nullptr, dst->GetRegNum()); + + codeGen->genDefineTempLabel(tmpLabel); + } + } + } + else + { + assert(!"unimplemented on LOONGARCH yet"); + } + } + } + else if (varTypeIsFloating(dst)) + { + emitIns_R_R_R(ins, attr, dst->GetRegNum(), src1->GetRegNum(), src2->GetRegNum()); + } + else if (dst->OperGet() == GT_MUL) + { + if (!needCheckOv && !(dst->gtFlags & GTF_UNSIGNED)) + { + emitIns_R_R_R(ins, attr, dst->GetRegNum(), src1->GetRegNum(), src2->GetRegNum()); + } + else + { + if (needCheckOv) + { + assert(REG_R21 != dst->GetRegNum()); + assert(REG_R21 != src1->GetRegNum()); + assert(REG_R21 != src2->GetRegNum()); + + instruction ins2; + + if ((dst->gtFlags & GTF_UNSIGNED) != 0) + { + if (attr == EA_4BYTE) + ins2 = INS_mulh_wu; + else + ins2 = INS_mulh_du; + } + else + { + if (attr == EA_8BYTE) + ins2 = INS_mulh_d; + else + ins2 = INS_mulh_w; + } + + emitIns_R_R_R(ins2, attr, REG_R21, src1->GetRegNum(), src2->GetRegNum()); + } + + // n * n bytes will store n bytes result + emitIns_R_R_R(ins, attr, dst->GetRegNum(), src1->GetRegNum(), src2->GetRegNum()); + + if ((dst->gtFlags & GTF_UNSIGNED) != 0) + { + if (attr == EA_4BYTE) + emitIns_R_R_I_I(INS_bstrins_d, EA_8BYTE, dst->GetRegNum(), REG_R0, 63, 32); + //else + //{ + // assert(!"unimplemented on LOONGARCH yet: ulong * ulong !!!"); + //} + } + + if (needCheckOv) + { + assert(REG_R21 != dst->GetRegNum()); + assert(REG_R21 != src1->GetRegNum()); + assert(REG_R21 != src2->GetRegNum()); + + if ((dst->gtFlags & GTF_UNSIGNED) != 0) + { + codeGen->genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bne, REG_R21); + } + else + { + size_t imm = (EA_SIZE(attr) == EA_8BYTE) ? 63 : 31; + emitIns_R_R_I(EA_SIZE(attr) == EA_8BYTE ? INS_srai_d : INS_srai_w, attr, REG_T0, dst->GetRegNum(), imm); + //TODO: FIXME:should confirm reg REG_T0! + codeGen->genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bne, REG_R21, nullptr, REG_T0); + } + } + } + } + else if (dst->OperGet() == GT_AND || dst->OperGet() == GT_OR || dst->OperGet() == GT_XOR) + { + emitIns_R_R_R(ins, attr, dst->GetRegNum(), src1->GetRegNum(), src2->GetRegNum()); + + //NOTE: can/should amend: LOONGARCH needs to sign-extend dst when deal with 32bit data. + if (EA_SIZE(attr) == EA_4BYTE) + emitIns_R_R_I(INS_slli_w, attr, dst->GetRegNum(), dst->GetRegNum(), 0); + } + else + { + regNumber regOp1 = src1->GetRegNum(); + regNumber regOp2 = src2->GetRegNum(); + regNumber saveOperReg1 = REG_NA; + regNumber saveOperReg2 = REG_NA; + + if ((dst->gtFlags & GTF_UNSIGNED) && (attr == EA_8BYTE)) + { + if (src1->gtType == TYP_INT) + { + assert(REG_R21 != regOp1); + assert(REG_RA != regOp1); + emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, REG_RA, regOp1, /*src1->GetRegNum(),*/ 31, 0); + regOp1 = REG_RA;//dst->ExtractTempReg(); + } + if (src2->gtType == TYP_INT) + { + assert(REG_R21 != regOp2); + assert(REG_RA != regOp2); + emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, REG_R21, regOp2, /*src2->GetRegNum(),*/ 31, 0); + regOp2 = REG_R21;//dst->ExtractTempReg(); + } + } + if (needCheckOv) + { + assert(!varTypeIsFloating(dst)); + + assert(REG_R21 != dst->GetRegNum()); + assert(REG_RA != dst->GetRegNum()); + + if (dst->GetRegNum() == regOp1) + { + assert(REG_R21 != regOp1); + assert(REG_RA != regOp1); + saveOperReg1 = REG_R21; + saveOperReg2 = regOp2; + emitIns_R_R_R(INS_or, attr, REG_R21, regOp1, REG_R0); + } + else if (dst->GetRegNum() == regOp2) + { + assert(REG_R21 != regOp2); + assert(REG_RA != regOp2); + saveOperReg1 = regOp1; + saveOperReg2 = REG_R21; + emitIns_R_R_R(INS_or, attr, REG_R21, regOp2, REG_R0); + } + else + { + saveOperReg1 = regOp1; + saveOperReg2 = regOp2; + } + } + + emitIns_R_R_R(ins, attr, dst->GetRegNum(), regOp1, regOp2); + + if (needCheckOv) + { + if (dst->OperGet() == GT_ADD || dst->OperGet() == GT_SUB) + { + ssize_t imm; + regNumber tempReg1; + regNumber tempReg2; + // ADD : A = B + C + // SUB : C = A - B + if ((dst->gtFlags & GTF_UNSIGNED) != 0) + { + // if A < B, goto overflow + if (dst->OperGet() == GT_ADD) + { + tempReg1 = dst->GetRegNum(); + tempReg2 = saveOperReg1; + } + else + { + tempReg1 = saveOperReg1; + tempReg2 = saveOperReg2; + } + codeGen->genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bltu, tempReg1, nullptr, tempReg2); + } + else + { + tempReg1 = REG_RA; + tempReg2 = dst->GetSingleTempReg(); + assert(tempReg1 != tempReg2); + assert(tempReg1 != saveOperReg1); + assert(tempReg2 != saveOperReg2); + + ssize_t ui6 = (attr == EA_4BYTE) ? 31 : 63; + if (dst->OperGet() == GT_ADD) + emitIns_R_R_I(INS_srli_d, attr, tempReg1, saveOperReg1, ui6); + else + emitIns_R_R_I(INS_srli_d, attr, tempReg1, dst->GetRegNum(), ui6); + emitIns_R_R_I(INS_srli_d, attr, tempReg2, saveOperReg2, ui6); + + emitIns_R_R_R(INS_xor, attr, tempReg1, tempReg1, tempReg2); + if (attr == EA_4BYTE) + { + imm = 1; + emitIns_R_R_I(INS_andi, attr, tempReg1, tempReg1, imm); + emitIns_R_R_I(INS_andi, attr, tempReg2, tempReg2, imm); + } + // if (B > 0 && C < 0) || (B < 0 && C > 0), skip overflow + BasicBlock* tmpLabel = codeGen->genCreateTempLabel(); + BasicBlock* tmpLabel2 = codeGen->genCreateTempLabel(); + BasicBlock* tmpLabel3 = codeGen->genCreateTempLabel(); + + emitIns_J_cond_la(INS_bne, tmpLabel, tempReg1, REG_R0); + + emitIns_J_cond_la(INS_bne, tmpLabel3, tempReg2, REG_R0); + + // B > 0 and C > 0, if A < B, goto overflow + emitIns_J_cond_la(INS_bge, tmpLabel, dst->OperGet() == GT_ADD ? dst->GetRegNum() : saveOperReg1, dst->OperGet() == GT_ADD ? saveOperReg1 : saveOperReg2); + + codeGen->genDefineTempLabel(tmpLabel2); + + codeGen->genJumpToThrowHlpBlk(EJ_jmp, SCK_OVERFLOW); + + codeGen->genDefineTempLabel(tmpLabel3); + + // B < 0 and C < 0, if A > B, goto overflow + emitIns_J_cond_la(INS_blt, tmpLabel2, dst->OperGet() == GT_ADD ? saveOperReg1 : saveOperReg2, dst->OperGet() == GT_ADD ? dst->GetRegNum() : saveOperReg1); + + codeGen->genDefineTempLabel(tmpLabel); + } + } + else + { +#ifdef DEBUG + printf("---------[LOONGARCH64]-NOTE: UnsignedOverflow instruction %d\n", ins); +#endif + assert(!"unimplemented on LOONGARCH yet"); + } + } + } + +L_Done: + + return dst->GetRegNum(); +} + +unsigned emitter::get_curTotalCodeSize() +{ + return emitTotalCodeSize; +} + +#if defined(DEBUG) || defined(LATE_DISASM) + +//---------------------------------------------------------------------------------------- +// getInsExecutionCharacteristics: +// Returns the current instruction execution characteristics +// +// Arguments: +// id - The current instruction descriptor to be evaluated +// +// Return Value: +// A struct containing the current instruction execution characteristics +// +// Notes: +// The instruction latencies and throughput values returned by this function +// are NOT accurate and just a function feature. +emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(instrDesc* id) +{ + insExecutionCharacteristics result; + + //TODO: support this function for LoongArch64. + result.insThroughput = PERFSCORE_THROUGHPUT_ZERO; + result.insLatency = PERFSCORE_LATENCY_ZERO; + result.insMemoryAccessKind = PERFSCORE_MEMORY_NONE; + + return result; +} + +#endif // defined(DEBUG) || defined(LATE_DISASM) + +#ifdef DEBUG +//------------------------------------------------------------------------ +// emitRegName: Returns a general-purpose register name or SIMD and floating-point scalar register name. +// +// Arguments: +// reg - A general-purpose register or SIMD and floating-point register. +// size - A register size. +// varName - unused parameter. +// +// Return value: +// A string that represents a general-purpose register name or SIMD and floating-point scalar register name. +// +const char* emitter::emitRegName(regNumber reg, emitAttr size, bool varName) +{ + assert(reg < REG_COUNT); + + const char* rn = nullptr; + + rn = RegNames[reg]; + assert(rn != nullptr); + + return rn; +} +#endif + +//------------------------------------------------------------------------ +// IsMovInstruction: Determines whether a give instruction is a move instruction +// +// Arguments: +// ins -- The instruction being checked +// +bool emitter::IsMovInstruction(instruction ins) +{ + switch (ins) + { + case INS_mov: + case INS_fmov_s: + case INS_fmov_d: + case INS_movgr2fr_w: + case INS_movgr2fr_d: + case INS_movfr2gr_s: + case INS_movfr2gr_d: + { + return true; + } + + default: + { + return false; + } + } +} + +//---------------------------------------------------------------------------------------- +// IsRedundantMov: +// Check if the current `mov` instruction is redundant and can be omitted. +// A `mov` is redundant in following 3 cases: +// +// 1. Move to same register +// (Except 4-byte movement like "mov w1, w1" which zeros out upper bits of x1 register) +// +// mov Rx, Rx +// +// 2. Move that is identical to last instruction emitted. +// +// mov Rx, Ry # <-- last instruction +// mov Rx, Ry # <-- current instruction can be omitted. +// +// 3. Opposite Move as that of last instruction emitted. +// +// mov Rx, Ry # <-- last instruction +// mov Ry, Rx # <-- current instruction can be omitted. +// +// Arguments: +// ins - The current instruction +// size - Operand size of current instruction +// dst - The current destination +// src - The current source +// canSkip - The move can be skipped as it doesn't represent special semantics +// +// Return Value: +// true if previous instruction moved from current dst to src. + +bool emitter::IsRedundantMov(instruction ins, emitAttr size, regNumber dst, regNumber src, bool canSkip) +{ + assert(!"unimplemented on LOONGARCH yet"); + return false; +#if 0 + assert(ins == INS_mov); + + if (canSkip && (dst == src)) + { + // These elisions used to be explicit even when optimizations were disabled + return true; + } + + if (!emitComp->opts.OptimizationEnabled()) + { + // The remaining move elisions should only happen if optimizations are enabled + return false; + } + + if (dst == src) + { + // A mov with a EA_4BYTE has the side-effect of clearing the upper bits + // So only eliminate mov instructions that are not clearing the upper bits + // + if (isGeneralRegisterOrSP(dst) && (size == EA_8BYTE)) + { + JITDUMP("\n -- suppressing mov because src and dst is same 8-byte register.\n"); + return true; + } + else if (isVectorRegister(dst) && (size == EA_16BYTE)) + { + JITDUMP("\n -- suppressing mov because src and dst is same 16-byte register.\n"); + return true; + } + } + + bool isFirstInstrInBlock = (emitCurIGinsCnt == 0) && ((emitCurIG->igFlags & IGF_EXTEND) == 0); + + if (!isFirstInstrInBlock && // Don't optimize if instruction is not the first instruction in IG. + (emitLastIns != nullptr) && + (emitLastIns->idIns() == INS_mov) && // Don't optimize if last instruction was not 'mov'. + (emitLastIns->idOpSize() == size)) // Don't optimize if operand size is different than previous instruction. + { + // Check if we did same move in prev instruction except dst/src were switched. + regNumber prevDst = emitLastIns->idReg1(); + regNumber prevSrc = emitLastIns->idReg2(); + insFormat lastInsfmt = emitLastIns->idInsFmt(); + + // Sometimes emitLastIns can be a mov with single register e.g. "mov reg, #imm". So ensure to + // optimize formats that does vector-to-vector or scalar-to-scalar register movs. + // + const bool isValidLastInsFormats = + ((lastInsfmt == IF_DV_3C) || (lastInsfmt == IF_DR_2G) || (lastInsfmt == IF_DR_2E)); + + if (isValidLastInsFormats && (prevDst == dst) && (prevSrc == src)) + { + assert(emitLastIns->idOpSize() == size); + JITDUMP("\n -- suppressing mov because previous instruction already moved from src to dst register.\n"); + return true; + } + + if ((prevDst == src) && (prevSrc == dst) && isValidLastInsFormats) + { + // For mov with EA_8BYTE, ensure src/dst are both scalar or both vector. + if (size == EA_8BYTE) + { + if (isVectorRegister(src) == isVectorRegister(dst)) + { + JITDUMP("\n -- suppressing mov because previous instruction already did an opposite move from dst " + "to src register.\n"); + return true; + } + } + + // For mov with EA_16BYTE, both src/dst will be vector. + else if (size == EA_16BYTE) + { + assert(isVectorRegister(src) && isVectorRegister(dst)); + assert(lastInsfmt == IF_DV_3C); + + JITDUMP("\n -- suppressing mov because previous instruction already did an opposite move from dst to " + "src register.\n"); + return true; + } + + // For mov of other sizes, don't optimize because it has side-effect of clearing the upper bits. + } + } + + return false; +#endif +} + +//---------------------------------------------------------------------------------------- +// IsRedundantLdStr: +// For ldr/str pair next to each other, check if the current load or store is needed or is +// the value already present as of previous instruction. +// +// ldr x1, [x2, #56] +// str x1, [x2, #56] <-- redundant +// +// OR +// +// str x1, [x2, #56] +// ldr x1, [x2, #56] <-- redundant + +// Arguments: +// ins - The current instruction +// dst - The current destination +// src - The current source +// imm - Immediate offset +// size - Operand size +// fmt - Format of instruction +// Return Value: +// true if previous instruction already has desired value in register/memory location. + +bool emitter::IsRedundantLdStr( + instruction ins, regNumber reg1, regNumber reg2, ssize_t imm, emitAttr size, insFormat fmt) +{ + assert(!"unimplemented on LOONGARCH yet"); + return false; +#if 0 + bool isFirstInstrInBlock = (emitCurIGinsCnt == 0) && ((emitCurIG->igFlags & IGF_EXTEND) == 0); + + if (((ins != INS_ldr) && (ins != INS_str)) || (isFirstInstrInBlock) || (emitLastIns == nullptr)) + { + return false; + } + + regNumber prevReg1 = emitLastIns->idReg1(); + regNumber prevReg2 = emitLastIns->idReg2(); + insFormat lastInsfmt = emitLastIns->idInsFmt(); + emitAttr prevSize = emitLastIns->idOpSize(); + ssize_t prevImm = emitLastIns->idIsLargeCns() ? ((instrDescCns*)emitLastIns)->idcCnsVal : emitLastIns->idSmallCns(); + + // Only optimize if: + // 1. "base" or "base plus immediate offset" addressing modes. + // 2. Addressing mode matches with previous instruction. + // 3. The operand size matches with previous instruction + if (((fmt != IF_LS_2A) && (fmt != IF_LS_2B)) || (fmt != lastInsfmt) || (prevSize != size)) + { + return false; + } + + if ((ins == INS_ldr) && (emitLastIns->idIns() == INS_str)) + { + // If reg1 is of size less than 8-bytes, then eliminating the 'ldr' + // will not zero the upper bits of reg1. + + // Make sure operand size is 8-bytes + // str w0, [x1, #4] + // ldr w0, [x1, #4] <-- can't eliminate because upper-bits of x0 won't get set. + if (size != EA_8BYTE) + { + return false; + } + + if ((prevReg1 == reg1) && (prevReg2 == reg2) && (imm == prevImm)) + { + JITDUMP("\n -- suppressing 'ldr reg%u [reg%u, #%u]' as previous 'str reg%u [reg%u, #%u]' was from same " + "location.\n", + reg1, reg2, imm, prevReg1, prevReg2, prevImm); + return true; + } + } + else if ((ins == INS_str) && (emitLastIns->idIns() == INS_ldr)) + { + // Make sure src and dst registers are not same. + // ldr x0, [x0, #4] + // str x0, [x0, #4] <-- can't eliminate because [x0+3] is not same destination as previous source. + // Note, however, that we can not eliminate store in the following sequence + // ldr wzr, [x0, #4] + // str wzr, [x0, #4] + // since load operation doesn't (and can't) change the value of its destination register. + if ((reg1 != reg2) && (prevReg1 == reg1) && (prevReg2 == reg2) && (imm == prevImm) && (reg1 != REG_ZR)) + { + JITDUMP("\n -- suppressing 'str reg%u [reg%u, #%u]' as previous 'ldr reg%u [reg%u, #%u]' was from same " + "location.\n", + reg1, reg2, imm, prevReg1, prevReg2, prevImm); + return true; + } + } + + return false; +#endif +} +#endif // defined(TARGET_LOONGARCH64) diff --git a/src/coreclr/jit/emitloongarch64.h b/src/coreclr/jit/emitloongarch64.h new file mode 100644 index 0000000000000..50da1b09a0f20 --- /dev/null +++ b/src/coreclr/jit/emitloongarch64.h @@ -0,0 +1,421 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +// Copyright (c) Loongson Technology. All rights reserved. + +#if defined(TARGET_LOONGARCH64) + +// The LOONGARCH64 instructions are all 32 bits in size. +// we use an unsigned int to hold the encoded instructions. +// This typedef defines the type that we use to hold encoded instructions. +// +typedef unsigned int code_t; + +/************************************************************************/ +/* Routines that compute the size of / encode instructions */ +/************************************************************************/ + +struct CnsVal +{ + ssize_t cnsVal; + bool cnsReloc; +}; + +#ifdef DEBUG + +/************************************************************************/ +/* Debug-only routines to display instructions */ +/************************************************************************/ + +const char* emitFPregName(unsigned reg, bool varName = true); +const char* emitVectorRegName(regNumber reg); + +//NOTE: At least 32bytes within dst. +void emitDisInsName(code_t code, const BYTE* dst, instrDesc* id); +#endif // DEBUG + +void emitIns_J_cond_la(instruction ins, BasicBlock* dst, regNumber reg1 = REG_R0, regNumber reg2 = REG_R0); +void emitIns_I_la(emitAttr attr, regNumber reg, ssize_t imm); + +/************************************************************************/ +/* Private members that deal with target-dependent instr. descriptors */ +/************************************************************************/ + +private: +instrDesc* emitNewInstrCallDir(int argCnt, + VARSET_VALARG_TP GCvars, + regMaskTP gcrefRegs, + regMaskTP byrefRegs, + emitAttr retSize + MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize)); + +instrDesc* emitNewInstrCallInd(int argCnt, + ssize_t disp, + VARSET_VALARG_TP GCvars, + regMaskTP gcrefRegs, + regMaskTP byrefRegs, + emitAttr retSize + MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize)); + +/************************************************************************/ +/* Private helpers for instruction output */ +/************************************************************************/ + +private: +bool emitInsIsLoad(instruction ins); +bool emitInsIsStore(instruction ins); +bool emitInsIsLoadOrStore(instruction ins); + +emitter::code_t emitInsCode(instruction ins /*, insFormat fmt*/); + +// Generate code for a load or store operation and handle the case of contained GT_LEA op1 with [base + offset] +void emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataReg, GenTreeIndir* indir); + +// Emit the 32-bit LOONGARCH64 instruction 'code' into the 'dst' buffer +unsigned emitOutput_Instr(BYTE* dst, code_t code); + +//NOTEADD: New functions in emitarm64.h +// Method to do check if mov is redundant with respect to the last instruction. +// If yes, the caller of this method can choose to omit current mov instruction. +static bool IsMovInstruction(instruction ins); +bool IsRedundantMov(instruction ins, emitAttr size, regNumber dst, regNumber src, bool canSkip); +bool IsRedundantLdStr(instruction ins, regNumber reg1, regNumber reg2, ssize_t imm, emitAttr size, insFormat fmt);//New functions end. + +/************************************************************************ +* +* This union is used to to encode/decode the special LOONGARCH64 immediate values +* that is listed as imm(N,r,s) and referred to as 'bitmask immediate' +*/ + +union bitMaskImm { + struct + { + unsigned immS : 6; // bits 0..5 + unsigned immR : 6; // bits 6..11 + unsigned immN : 1; // bits 12 + }; + unsigned immNRS; // concat N:R:S forming a 13-bit unsigned immediate +}; + +/************************************************************************ +* +* Convert between a 64-bit immediate and its 'bitmask immediate' +* representation imm(i16,hw) +*/ + +//static emitter::bitMaskImm emitEncodeBitMaskImm(INT64 imm, emitAttr size); + +//static INT64 emitDecodeBitMaskImm(const emitter::bitMaskImm bmImm, emitAttr size); + +/************************************************************************ +* +* This union is used to to encode/decode the special LOONGARCH64 immediate values +* that is listed as imm(i16,hw) and referred to as 'halfword immediate' +*/ + +union halfwordImm { + struct + { + unsigned immVal : 16; // bits 0..15 + unsigned immHW : 2; // bits 16..17 + }; + unsigned immHWVal; // concat HW:Val forming a 18-bit unsigned immediate +}; + +/************************************************************************ +* +* Convert between a 64-bit immediate and its 'halfword immediate' +* representation imm(i16,hw) +*/ + +//static emitter::halfwordImm emitEncodeHalfwordImm(INT64 imm, emitAttr size); + +//static INT64 emitDecodeHalfwordImm(const emitter::halfwordImm hwImm, emitAttr size); + +/************************************************************************ +* +* This union is used to encode/decode the special LOONGARCH64 immediate values +* that is listed as imm(i16,by) and referred to as 'byteShifted immediate' +*/ + +union byteShiftedImm { + struct + { + unsigned immVal : 8; // bits 0..7 + unsigned immBY : 2; // bits 8..9 + unsigned immOnes : 1; // bit 10 + }; + unsigned immBSVal; // concat Ones:BY:Val forming a 10-bit unsigned immediate +}; + +/************************************************************************ +* +* Convert between a 16/32-bit immediate and its 'byteShifted immediate' +* representation imm(i8,by) +*/ + +//static emitter::byteShiftedImm emitEncodeByteShiftedImm(INT64 imm, emitAttr size, bool allow_MSL); + +//static INT32 emitDecodeByteShiftedImm(const emitter::byteShiftedImm bsImm, emitAttr size); + +/************************************************************************ +* +* This union is used to to encode/decode the special LOONGARCH64 immediate values +* that are use for FMOV immediate and referred to as 'float 8-bit immediate' +*/ + +union floatImm8 { + struct + { + unsigned immMant : 4; // bits 0..3 + unsigned immExp : 3; // bits 4..6 + unsigned immSign : 1; // bits 7 + }; + unsigned immFPIVal; // concat Sign:Exp:Mant forming an 8-bit unsigned immediate +}; + +/************************************************************************ +* +* Convert between a double and its 'float 8-bit immediate' representation +*/ + +//static emitter::floatImm8 emitEncodeFloatImm8(double immDbl); + +//static double emitDecodeFloatImm8(const emitter::floatImm8 fpImm); + +/************************************************************************ +* +* This union is used to to encode/decode the cond, nzcv and imm5 values for +* instructions that use them in the small constant immediate field +*/ + +union condFlagsImm { + struct + { + //insCond cond : 4; // bits 0..3 + //insCflags flags : 4; // bits 4..7 + unsigned imm5 : 5; // bits 8..12 + }; + unsigned immCFVal; // concat imm5:flags:cond forming an 13-bit unsigned immediate +}; + +// Returns true if 'reg' represents an integer register. +static bool isIntegerRegister(regNumber reg) +{ + return (reg >= REG_INT_FIRST) && (reg <= REG_INT_LAST); +} + +// Returns true if 'value' is a legal signed immediate 12 bit encoding. +static bool isValidSimm12(ssize_t value) +{ + return -( ((int)1) << 11 ) <= value && value < ( ((int)1) << 11 ); +}; + +// Returns true if 'value' is a legal signed immediate 16 bit encoding. +static bool isValidSimm16(ssize_t value) +{ + return -( ((int)1) << 15 ) <= value && value < ( ((int)1) << 15 ); +}; + +// Returns true if 'value' is a legal signed immediate 20 bit encoding. +static bool isValidSimm20(ssize_t value) +{ + return -( ((int)1) << 19 ) <= value && value < ( ((int)1) << 19 ); +}; + +/************************************************************************/ +/* Public inline informational methods */ +/************************************************************************/ + +public: + +// Returns the number of bits used by the given 'size'. +inline static unsigned getBitWidth(emitAttr size) +{ + assert(size <= EA_8BYTE); + return (unsigned)size * BITS_PER_BYTE; +} + +inline static bool isGeneralRegister(regNumber reg) +{ + // Excludes REG_R0 ?? + return (reg >= REG_INT_FIRST) && (reg <= REG_INT_LAST); +} + +inline static bool isGeneralRegisterOrR0(regNumber reg) +{ + return (reg >= REG_FIRST) && (reg <= REG_INT_LAST); +} // Includes REG_R0 + +inline static bool isFloatReg(regNumber reg) +{ + return (reg >= REG_FP_FIRST && reg <= REG_FP_LAST); +} + +/************************************************************************/ +/* The public entry points to output instructions */ +/************************************************************************/ + +public: +void emitIns(instruction ins); + +void emitIns_S_R(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs); +void emitIns_R_S(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs); + +void emitIns_I(instruction ins, emitAttr attr, ssize_t imm); +void emitIns_I_I(instruction ins, emitAttr attr, ssize_t cc, ssize_t offs); + +void emitIns_R_I_I( + instruction ins, emitAttr attr, regNumber reg1, ssize_t hint, ssize_t off, insOpts opt = INS_OPTS_NONE); + +void emitIns_R(instruction ins, emitAttr attr, regNumber reg); + +void emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t imm, insOpts opt = INS_OPTS_NONE); + +//NOTEADD: NEW function in emitarm64. +void emitIns_Mov( + instruction ins, emitAttr attr, regNumber dstReg, regNumber srcReg, bool canSkip, insOpts opt = INS_OPTS_NONE); + +void emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insOpts opt = INS_OPTS_NONE); + +void emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insFlags flags) +{ + emitIns_R_R(ins, attr, reg1, reg2); +} + +void emitIns_R_R_I( + instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, ssize_t imm, insOpts opt = INS_OPTS_NONE); + +// Checks for a large immediate that needs a second instruction +void emitIns_R_R_Imm(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, ssize_t imm); + +void emitIns_R_R_R( + instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, insOpts opt = INS_OPTS_NONE); + +void emitIns_R_R_R_I(instruction ins, + emitAttr attr, + regNumber reg1, + regNumber reg2, + regNumber reg3, + ssize_t imm, + insOpts opt = INS_OPTS_NONE, + emitAttr attrReg2 = EA_UNKNOWN); + +void emitIns_R_R_R_Ext(instruction ins, + emitAttr attr, + regNumber reg1, + regNumber reg2, + regNumber reg3, + insOpts opt = INS_OPTS_NONE, + int shiftAmount = -1); + +//NODECHANGE: ADD an arg. +void emitIns_R_R_I_I( + instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int imm1, int imm2, insOpts opt = INS_OPTS_NONE); + +void emitIns_R_R_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, regNumber reg4); + +//void emitIns_BARR(instruction ins, insBarrier barrier); + +void emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fdlHnd, int offs); + +void emitIns_S(instruction ins, emitAttr attr, int varx, int offs); + +void emitIns_S_S_R_R( + instruction ins, emitAttr attr, emitAttr attr2, regNumber ireg, regNumber ireg2, int varx, int offs); + +//void emitIns_R_R_S( +// instruction ins, emitAttr attr, regNumber ireg, regNumber ireg2, int sa); + +void emitIns_R_R_S_S( + instruction ins, emitAttr attr, emitAttr attr2, regNumber ireg, regNumber ireg2, int varx, int offs); + +void emitIns_S_I(instruction ins, emitAttr attr, int varx, int offs, int val); + +void emitIns_R_C( + instruction ins, emitAttr attr, regNumber reg, regNumber tmpReg, CORINFO_FIELD_HANDLE fldHnd, int offs); + +void emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg); + +void emitIns_J_R(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg); + +void emitIns_C_R(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, regNumber reg, int offs); + +void emitIns_C_I(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fdlHnd, ssize_t offs, ssize_t val); + +void emitIns_R_D(instruction ins, emitAttr attr, unsigned offs, regNumber reg); + +void emitIns_J_R_I(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg, int instrCount); + +void emitIns_I_AR(instruction ins, emitAttr attr, int val, regNumber reg, int offs); + +void emitIns_R_AR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs); + +//NODECHANGE: ADD a description of arguments "disp" +void emitIns_R_AI(instruction ins, + emitAttr attr, + regNumber reg, + ssize_t disp DEBUGARG(size_t targetHandle = 0) DEBUGARG(GenTreeFlags gtFlags = GTF_EMPTY)); + + +void emitIns_AR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs); + +void emitIns_R_ARR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp); + +void emitIns_ARR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp); + +void emitIns_R_ARX( + instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, unsigned mul, int disp); + +enum EmitCallType +{ + + // I have included here, but commented out, all the values used by the x86 emitter. + // However, LOONGARCH has a much reduced instruction set, and so the LOONGARCH emitter only + // supports a subset of the x86 variants. By leaving them commented out, it becomes + // a compile time error if code tries to use them (and hopefully see this comment + // and know why they are unavailible on LOONGARCH), while making it easier to stay + // in-sync with x86 and possibly add them back in if needed. + + EC_FUNC_TOKEN, // Direct call to a helper/static/nonvirtual/global method + // EC_FUNC_TOKEN_INDIR, // Indirect call to a helper/static/nonvirtual/global method + //EC_FUNC_ADDR, // Direct call to an absolute address + + // EC_FUNC_VIRTUAL, // Call to a virtual method (using the vtable) + EC_INDIR_R, // Indirect call via register + // EC_INDIR_SR, // Indirect call via stack-reference (local var) + // EC_INDIR_C, // Indirect call via static class var + // EC_INDIR_ARD, // Indirect call via an addressing mode + + EC_COUNT +}; + +void emitIns_Call(EmitCallType callType, + CORINFO_METHOD_HANDLE methHnd, + INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE + void* addr, + ssize_t argSize, + emitAttr retSize + MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), + VARSET_VALARG_TP ptrVars, + regMaskTP gcrefRegs, + regMaskTP byrefRegs, + const DebugInfo& di, + regNumber ireg = REG_NA, + regNumber xreg = REG_NA, + unsigned xmul = 0, + ssize_t disp = 0, + bool isJump = false); + +unsigned emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t code); +//BYTE* emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* i); +//BYTE* emitOutputLoadLabel(BYTE* dst, BYTE* srcAddr, BYTE* dstAddr, instrDescJmp* id); +//BYTE* emitOutputShortBranch(BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, instrDescJmp* id); +//BYTE* emitOutputShortAddress(BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, regNumber reg); +//BYTE* emitOutputShortConstant( +// BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, regNumber reg, emitAttr opSize); + +unsigned get_curTotalCodeSize(); // bytes of code + +#endif // TARGET_LOONGARCH64 diff --git a/src/coreclr/jit/emitpub.h b/src/coreclr/jit/emitpub.h index 4982104acc749..02ab3bb879d6f 100644 --- a/src/coreclr/jit/emitpub.h +++ b/src/coreclr/jit/emitpub.h @@ -139,7 +139,7 @@ static void InitTranslator(PDBRewriter* pPDB, int* rgSecMap, IMAGE_SECTION_HEADE /* Interface for generating unwind information */ /************************************************************************/ -#ifdef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) bool emitIsFuncEnd(emitLocation* emitLoc, emitLocation* emitLocNextFragment = NULL); @@ -151,7 +151,7 @@ void emitSplit(emitLocation* startLoc, void emitUnwindNopPadding(emitLocation* locFrom, Compiler* comp); -#endif // TARGET_ARMARCH +#endif // TARGET_ARMARCH || defined(TARGET_LOONGARCH64) #if defined(TARGET_ARM) diff --git a/src/coreclr/jit/error.h b/src/coreclr/jit/error.h index 126a8665a34e8..fdd75fed5f535 100644 --- a/src/coreclr/jit/error.h +++ b/src/coreclr/jit/error.h @@ -174,6 +174,7 @@ extern void notYetImplemented(const char* msg, const char* file, unsigned line); #define NYI_X86(msg) do { } while (0) #define NYI_ARM(msg) do { } while (0) #define NYI_ARM64(msg) do { } while (0) +#define NYI_LOONGARCH64(msg) do { } while (0) #elif defined(TARGET_X86) @@ -181,6 +182,7 @@ extern void notYetImplemented(const char* msg, const char* file, unsigned line); #define NYI_X86(msg) NYIRAW("NYI_X86: " msg) #define NYI_ARM(msg) do { } while (0) #define NYI_ARM64(msg) do { } while (0) +#define NYI_LOONGARCH64(msg) do { } while (0) #elif defined(TARGET_ARM) @@ -188,6 +190,7 @@ extern void notYetImplemented(const char* msg, const char* file, unsigned line); #define NYI_X86(msg) do { } while (0) #define NYI_ARM(msg) NYIRAW("NYI_ARM: " msg) #define NYI_ARM64(msg) do { } while (0) +#define NYI_LOONGARCH64(msg) do { } while (0) #elif defined(TARGET_ARM64) @@ -195,10 +198,18 @@ extern void notYetImplemented(const char* msg, const char* file, unsigned line); #define NYI_X86(msg) do { } while (0) #define NYI_ARM(msg) do { } while (0) #define NYI_ARM64(msg) NYIRAW("NYI_ARM64: " msg) +#define NYI_LOONGARCH64(msg) do { } while (0) + +#elif defined(TARGET_LOONGARCH64) +#define NYI_AMD64(msg) do { } while (0) +#define NYI_X86(msg) do { } while (0) +#define NYI_ARM(msg) do { } while (0) +#define NYI_ARM64(msg) do { } while (0) +#define NYI_LOONGARCH64(msg) NYIRAW("NYI_LOONGARCH64: " msg) #else -#error "Unknown platform, not x86, ARM, or AMD64?" +#error "Unknown platform, not x86, ARM, LOONGARCH64 or AMD64?" #endif diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 4b14dc38522be..622c3e8d1b640 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -3042,6 +3042,27 @@ bool Compiler::gtMarkAddrMode(GenTree* addr, int* pCostEx, int* pCostSz, var_typ *pCostSz += idx->GetCostSz(); } + if (cns != 0) + { + if (cns >= (4096 * genTypeSize(type))) + { + *pCostEx += 1; + *pCostSz += 4; + } + } +#elif defined(TARGET_LOONGARCH64) + if (base) + { + *pCostEx += base->GetCostEx(); + *pCostSz += base->GetCostSz(); + } + + if (idx) + { + *pCostEx += idx->GetCostEx(); + *pCostSz += idx->GetCostSz(); + } + // TODO: workround, should amend for LoongArch64. if (cns != 0) { if (cns >= (4096 * genTypeSize(type))) @@ -3464,13 +3485,20 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) } goto COMMON_CNS; +#elif defined(TARGET_LOONGARCH64) + case GT_CNS_STR: + case GT_CNS_LNG: + case GT_CNS_INT: + // TODO: workround, should amend for LoongArch64. + costEx = 4; + costSz = 4; + goto COMMON_CNS; #else case GT_CNS_STR: case GT_CNS_LNG: case GT_CNS_INT: #error "Unknown TARGET" #endif - COMMON_CNS: /* Note that some code below depends on constants always getting @@ -3526,6 +3554,10 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) costEx = IND_COST_EX; costSz = 4; } +#elif defined(TARGET_LOONGARCH64) + // TODO: workround, should amend for LoongArch64. + costEx = 2; + costSz = 8; #else #error "Unknown TARGET" #endif @@ -3699,6 +3731,15 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) costEx = IND_COST_EX * 2; costSz = 6; } +#elif defined(TARGET_LOONGARCH64) + // TODO: workround, should amend for LoongArch64. + costEx = 1; + costSz = 2; + if (isflt || varTypeIsFloating(op1->TypeGet())) + { + costEx = 2; + costSz = 4; + } #else #error "Unknown TARGET" #endif @@ -5909,6 +5950,9 @@ GenTree* Compiler::gtNewZeroConNode(var_types type) switch (type) { case TYP_INT: +#ifdef TARGET_LOONGARCH64 + case TYP_UINT: +#endif zero = gtNewIconNode(0); break; @@ -6703,7 +6747,7 @@ bool GenTreeOp::UsesDivideByConstOptimized(Compiler* comp) } // TODO-ARM-CQ: Currently there's no GT_MULHI for ARM32 -#if defined(TARGET_XARCH) || defined(TARGET_ARM64) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) if (!comp->opts.MinOpts() && ((divisorValue >= 3) || !isSignedDivide)) { // All checks pass we can perform the division operation using a reciprocal multiply. @@ -13569,8 +13613,11 @@ GenTree* Compiler::gtFoldExprConst(GenTree* tree) case TYP_INT: +#ifdef TARGET_LOONGARCH64 + assert(tree->TypeIs(TYP_INT) || tree->TypeIs(TYP_LONG) || varTypeIsGC(tree) || tree->OperIs(GT_MKREFANY)); +#else assert(tree->TypeIs(TYP_INT) || varTypeIsGC(tree) || tree->OperIs(GT_MKREFANY)); - +#endif // No GC pointer types should be folded here... assert(!varTypeIsGC(op1->TypeGet()) && !varTypeIsGC(op2->TypeGet())); @@ -21353,6 +21400,42 @@ void ReturnTypeDesc::InitializeStructReturnType(Compiler* comp, m_regType[i] = comp->getJitGCType(gcPtrs[i]); } +#elif defined(TARGET_LOONGARCH64) + assert((structSize >= TARGET_POINTER_SIZE) && (structSize <= (2 * TARGET_POINTER_SIZE))); + + DWORD numFloatFields = comp->info.compCompHnd->getFieldTypeByHnd(retClsHnd); + BYTE gcPtrs[2] = {TYPE_GC_NONE, TYPE_GC_NONE}; + comp->info.compCompHnd->getClassGClayout(retClsHnd, &gcPtrs[0]); + + if (numFloatFields & 0x8) + { + assert((structSize > 8) == ((numFloatFields & 0x30) > 0)); + m_regType[0] = numFloatFields & 0x10 ? TYP_DOUBLE : TYP_FLOAT; + m_regType[1] = numFloatFields & 0x20 ? TYP_DOUBLE : TYP_FLOAT; + comp->compFloatingPointUsed = true; + } + else if (numFloatFields & 0x2) + { + assert((structSize > 8) == ((numFloatFields & 0x30) > 0)); + m_regType[0] = numFloatFields & 0x10 ? TYP_DOUBLE : TYP_FLOAT; + m_regType[1] = numFloatFields & 0x20 ? comp->getJitGCType(gcPtrs[1]) : TYP_INT; + comp->compFloatingPointUsed = true; + } + else if (numFloatFields & 0x4) + { + assert((structSize > 8) == ((numFloatFields & 0x30) > 0)); + m_regType[0] = numFloatFields & 0x10 ? comp->getJitGCType(gcPtrs[0]) : TYP_INT; + m_regType[1] = numFloatFields & 0x20 ? TYP_DOUBLE : TYP_FLOAT; + comp->compFloatingPointUsed = true; + } + else + { + for (unsigned i = 0; i < 2; ++i) + { + m_regType[i] = comp->getJitGCType(gcPtrs[i]); + } + } + #elif defined(TARGET_X86) // an 8-byte struct returned using two registers @@ -21543,6 +21626,21 @@ regNumber ReturnTypeDesc::GetABIReturnReg(unsigned idx) const resultReg = (regNumber)((unsigned)(REG_FLOATRET) + idx); // V0, V1, V2 or V3 } +#elif defined(TARGET_LOONGARCH64) + var_types regType = GetReturnRegType(idx); + if (idx == 0) + { + resultReg = varTypeIsIntegralOrI(regType) ? REG_INTRET : REG_FLOATRET; // V0 or F0 + } + else + { + noway_assert(idx < 2); // Up to 2 return registers for two-float-field structs + if (varTypeIsIntegralOrI(regType)) + resultReg = varTypeIsIntegralOrI(GetReturnRegType(0)) ? REG_INTRET_1 : REG_INTRET; // V0 or V1 + else //if (!varTypeIsIntegralOrI(regType)) + resultReg = varTypeIsIntegralOrI(GetReturnRegType(0)) ? REG_FLOATRET : REG_FLOATRET_1; // F0 or F1 + } + #endif // TARGET_XXX assert(resultReg != REG_NA); diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 3fd3792eb8b1f..190a280cad12f 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -4487,12 +4487,17 @@ struct GenTreeCall final : public GenTree } #endif +#if defined(TARGET_LOONGARCH64) + return (gtType == TYP_STRUCT) && (gtReturnTypeDesc.GetReturnRegCount() > 1); +#else if (!varTypeIsStruct(gtType) || HasRetBufArg()) { return false; } // Now it is a struct that is returned in registers. return GetReturnTypeDesc()->IsMultiRegRetType(); +#endif + #else // !FEATURE_MULTIREG_RET return false; #endif // !FEATURE_MULTIREG_RET diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 3519ac6f7b650..9d4b4131e5d54 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -8513,7 +8513,7 @@ bool Compiler::impTailCallRetTypeCompatible(bool allowWideni return true; } -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) // Jit64 compat: if (callerRetType == TYP_VOID) { @@ -10335,7 +10335,7 @@ GenTree* Compiler::impFixupStructReturnType(GenTree* op, return impAssignMultiRegTypeToVar(op, retClsHnd DEBUGARG(unmgdCallConv)); } -#elif FEATURE_MULTIREG_RET && defined(TARGET_ARM64) +#elif FEATURE_MULTIREG_RET && (defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)) // Is method returning a multi-reg struct? if (IsMultiRegReturnedType(retClsHnd, unmgdCallConv)) @@ -11388,12 +11388,32 @@ var_types Compiler::impGetByRefResultType(genTreeOps oper, bool fUnsigned, GenTr if (genActualType(op1->TypeGet()) != TYP_I_IMPL) { // insert an explicit upcast +#ifdef TARGET_LOONGARCH64 + if (op1->TypeGet() == TYP_INT && op1->gtOper == GT_CNS_INT) + { + op1->AsIntCon()->gtIconVal = fUnsigned ? (uint32_t)op1->AsIntCon()->gtIconVal : op1->AsIntCon()->gtIconVal; + op1->gtType = TYP_LONG; + } + else if (op1->gtOper == GT_CNS_INT) + *pOp1 = gtNewCastNode(TYP_I_IMPL, op1, fUnsigned, fUnsigned ? TYP_U_IMPL : TYP_I_IMPL); +#else op1 = *pOp1 = gtNewCastNode(TYP_I_IMPL, op1, fUnsigned, fUnsigned ? TYP_U_IMPL : TYP_I_IMPL); +#endif } else if (genActualType(op2->TypeGet()) != TYP_I_IMPL) { // insert an explicit upcast +#ifdef TARGET_LOONGARCH64 + if (op2->TypeGet() == TYP_INT && op2->gtOper == GT_CNS_INT) + { + op2->AsIntCon()->gtIconVal = fUnsigned ? (uint32_t)op2->AsIntCon()->gtIconVal : op2->AsIntCon()->gtIconVal; + op2->gtType = TYP_LONG; + } + else + *pOp2 = gtNewCastNode(TYP_I_IMPL, op2, fUnsigned, fUnsigned ? TYP_U_IMPL : TYP_I_IMPL); +#else op2 = *pOp2 = gtNewCastNode(TYP_I_IMPL, op2, fUnsigned, fUnsigned ? TYP_U_IMPL : TYP_I_IMPL); +#endif } type = TYP_I_IMPL; @@ -12445,7 +12465,17 @@ void Compiler::impImportBlockCode(BasicBlock* block) } #endif // FEATURE_SIMD +#ifdef TARGET_LOONGARCH64 + if (prevOpcode == CEE_LDC_I4_0 || prevOpcode == CEE_LDNULL) + { + op1->gtType = lclTyp; + op1->gtFlags |= GTF_CONTAINED; + } + else + op1 = impImplicitIorI4Cast(op1, lclTyp); +#else op1 = impImplicitIorI4Cast(op1, lclTyp); +#endif #ifdef TARGET_64BIT // Downcast the TYP_I_IMPL into a 32-bit Int for x86 JIT compatiblity @@ -13529,7 +13559,15 @@ void Compiler::impImportBlockCode(BasicBlock* block) op1 = impPopStack().val; // operand to be shifted impBashVarAddrsToI(op1, op2); +#ifdef TARGET_LOONGARCH64 + if (op2->gtOper == GT_CNS_INT && op2->AsIntCon()->gtIconVal > 31) + { + type = TYP_LONG; + } else + type = genActualType(op1->TypeGet()); +#else type = genActualType(op1->TypeGet()); +#endif op1 = gtNewOperNode(oper, type, op1, op2); impPushOnStack(op1, tiRetVal); @@ -13760,10 +13798,26 @@ void Compiler::impImportBlockCode(BasicBlock* block) // See also identical code in impGetByRefResultType and STSFLD import. if (varTypeIsI(op1) && (genActualType(op2) == TYP_INT)) { +#ifdef TARGET_LOONGARCH64 + if (op2->gtOper == GT_CNS_INT) + { + op2->AsIntCon()->gtIconVal = uns ? (uint32_t)op2->AsIntCon()->gtIconVal : (int32_t)op2->AsIntCon()->gtIconVal; + op2->gtType = TYP_LONG; + } + else +#endif op2 = gtNewCastNode(TYP_I_IMPL, op2, uns, TYP_I_IMPL); } else if (varTypeIsI(op2) && (genActualType(op1) == TYP_INT)) { +#ifdef TARGET_LOONGARCH64 + if (op1->gtOper == GT_CNS_INT) + { + op1->AsIntCon()->gtIconVal = uns ? (uint32_t)op1->AsIntCon()->gtIconVal : (int32_t)op1->AsIntCon()->gtIconVal; + op1->gtType = TYP_LONG; + } + else +#endif op1 = gtNewCastNode(TYP_I_IMPL, op1, uns, TYP_I_IMPL); } #endif // TARGET_64BIT @@ -13857,6 +13911,18 @@ void Compiler::impImportBlockCode(BasicBlock* block) op1 = impPopStack().val; #ifdef TARGET_64BIT +#ifdef TARGET_LOONGARCH64 + if ((op2->OperGet() == GT_CNS_INT)/* && (op2->AsIntCon()->IconValue() == 0)*/) + { + op2->gtType = op1->TypeGet(); + } + /*if (op1->OperGet() == GT_CNS_INT) + { + //assert(op1->gtType == op2->TypeGet()); + //op2->gtType = op1->TypeGet(); + op1->gtFlags |= GTF_CONTAINED; + }*/ +#else if ((op1->TypeGet() == TYP_I_IMPL) && (genActualType(op2->TypeGet()) == TYP_INT)) { op2 = gtNewCastNode(TYP_I_IMPL, op2, uns, uns ? TYP_U_IMPL : TYP_I_IMPL); @@ -13865,11 +13931,20 @@ void Compiler::impImportBlockCode(BasicBlock* block) { op1 = gtNewCastNode(TYP_I_IMPL, op1, uns, uns ? TYP_U_IMPL : TYP_I_IMPL); } +#endif #endif // TARGET_64BIT +#ifdef TARGET_LOONGARCH64 + assertImp((genActualType(op1->TypeGet()) == TYP_LONG || genActualType(op1->TypeGet()) == TYP_INT) || + (genActualType(op2->TypeGet()) == TYP_LONG || genActualType(op2->TypeGet()) == TYP_INT) || + genActualType(op1->TypeGet()) == genActualType(op2->TypeGet()) || + varTypeIsI(op1->TypeGet()) && varTypeIsI(op2->TypeGet()) || + varTypeIsFloating(op1->gtType) && varTypeIsFloating(op2->gtType)); +#else assertImp(genActualType(op1->TypeGet()) == genActualType(op2->TypeGet()) || (varTypeIsI(op1->TypeGet()) && varTypeIsI(op2->TypeGet())) || (varTypeIsFloating(op1->gtType) && varTypeIsFloating(op2->gtType))); +#endif if (opts.OptimizationEnabled() && (block->bbJumpDest == block->bbNext)) { @@ -14148,6 +14223,17 @@ void Compiler::impImportBlockCode(BasicBlock* block) } op1 = impPopStack().val; +#ifdef TARGET_LOONGARCH64 + if (!callNode && prevOpcode == CEE_LDC_I4_0) + { + assert(op1->gtOper == GT_CNS_INT && op1->AsIntCon()->gtIconVal == 0); + op1->gtType = genActualType(lclTyp); + impPushOnStack(op1, tiRetVal); + //opcode = CEE_LDC_I4_0; + break; + } +#endif + impBashVarAddrsToI(op1); // Casts from floating point types must not have GTF_UNSIGNED set. @@ -14158,6 +14244,34 @@ void Compiler::impImportBlockCode(BasicBlock* block) // At this point uns, ovf, callNode are all set. +#ifdef TARGET_LOONGARCH64 + if (varTypeIsSmall(lclTyp) && !ovfl && op1->gtOper == GT_CNS_INT) + { + switch (lclTyp) + { + case TYP_BYTE: + op1->AsIntCon()->gtIconVal = (int8_t)op1->AsIntCon()->gtIconVal; + break; + case TYP_UBYTE: + op1->AsIntCon()->gtIconVal = (uint8_t)op1->AsIntCon()->gtIconVal; + break; + case TYP_USHORT: + op1->AsIntCon()->gtIconVal = (uint16_t)op1->AsIntCon()->gtIconVal; + break; + case TYP_SHORT: + op1->AsIntCon()->gtIconVal = (short)op1->AsIntCon()->gtIconVal; + break; + default: + assert(!"unexpected type"); + return; + } + + op1->gtType == TYP_INT; + + impPushOnStack(op1, tiRetVal); + break; + } else +#endif if (varTypeIsSmall(lclTyp) && !ovfl && op1->gtType == TYP_INT && op1->gtOper == GT_AND) { op2 = op1->AsOp()->gtOp2; @@ -14222,6 +14336,32 @@ void Compiler::impImportBlockCode(BasicBlock* block) op1 = gtNewCastNodeL(type, op1, uns, lclTyp); } else +#ifdef TARGET_LOONGARCH64 + if (type != TYP_LONG) + { + if (!ovfl && op1->gtOper == GT_CNS_INT && op1->TypeGet() == TYP_LONG) + { + assert(lclTyp == TYP_INT || lclTyp == TYP_UINT); + if (lclTyp == TYP_INT) + { + op1->AsIntCon()->gtIconVal = (int32_t)op1->AsIntCon()->gtIconVal; + op1->gtType = TYP_INT; + } + else if (lclTyp == TYP_UINT) + { + op1->AsIntCon()->gtIconVal = (uint32_t)op1->AsIntCon()->gtIconVal; + op1->gtType = TYP_UINT; + } + else + op1 = gtNewCastNode(type, op1, uns, lclTyp); + } + else + { + op1 = gtNewCastNode(type, op1, uns, lclTyp); + } + } + else +#endif { op1 = gtNewCastNode(type, op1, uns, lclTyp); } @@ -14231,11 +14371,13 @@ void Compiler::impImportBlockCode(BasicBlock* block) op1->gtFlags |= (GTF_OVERFLOW | GTF_EXCEPT); } +#ifndef TARGET_LOONGARCH64 if (op1->gtGetOp1()->OperIsConst() && opts.OptimizationEnabled()) { // Try and fold the introduced cast op1 = gtFoldExprConst(op1); } +#endif } impPushOnStack(op1, tiRetVal); @@ -16051,6 +16193,9 @@ void Compiler::impImportBlockCode(BasicBlock* block) op2->gtType = TYP_I_IMPL; } else +#ifdef TARGET_LOONGARCH64 + if (genActualType(op2->TypeGet()) != TYP_INT) +#endif { bool isUnsigned = false; op2 = gtNewCastNode(TYP_I_IMPL, op2, isUnsigned, TYP_I_IMPL); @@ -17644,6 +17789,9 @@ bool Compiler::impReturnInstruction(int prefixFlags, OPCODE& opcode) op2 = impImplicitR4orR8Cast(op2, info.compRetType); // Note that we allow TYP_I_IMPL<->TYP_BYREF transformation, but only TYP_I_IMPL<-TYP_REF. assertImp((genActualType(op2->TypeGet()) == genActualType(info.compRetType)) || +#ifdef TARGET_LOONGARCH64 + genTypeStSz(op2->TypeGet()) == genTypeStSz(info.compRetType) || +#endif ((op2->TypeGet() == TYP_I_IMPL) && TypeIs(info.compRetType, TYP_BYREF)) || (op2->TypeIs(TYP_BYREF, TYP_REF) && (info.compRetType == TYP_I_IMPL)) || (varTypeIsFloating(op2->gtType) && varTypeIsFloating(info.compRetType)) || @@ -17872,7 +18020,7 @@ bool Compiler::impReturnInstruction(int prefixFlags, OPCODE& opcode) } } else -#elif defined(TARGET_ARM64) +#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) ReturnTypeDesc retTypeDesc; retTypeDesc.InitializeStructReturnType(this, retClsHnd, info.compCallConv); unsigned retRegCount = retTypeDesc.GetReturnRegCount(); @@ -18515,6 +18663,14 @@ void Compiler::impImportBlock(BasicBlock* block) { // Spill clique has decided this should be "native int", but this block only pushes an "int". // Insert a sign-extension to "native int" so we match the clique. +#ifdef TARGET_LOONGARCH64 + if (tree->gtOper == GT_CNS_INT/* && !tree->AsIntCon()->gtIconVal*/) + { + tree->gtType = TYP_I_IMPL; + tree->SetContained(); + } + else +#endif verCurrentState.esStack[level].val = gtNewCastNode(TYP_I_IMPL, tree, false, TYP_I_IMPL); } @@ -18539,6 +18695,14 @@ void Compiler::impImportBlock(BasicBlock* block) { // Spill clique has decided this should be "byref", but this block only pushes an "int". // Insert a sign-extension to "native int" so we match the clique size. +#ifdef TARGET_LOONGARCH64 + if (tree->gtOper == GT_CNS_INT /*&& !tree->AsIntCon()->gtIconVal*/) + { + tree->gtType = TYP_I_IMPL; + tree->SetContained(); + } + else +#endif verCurrentState.esStack[level].val = gtNewCastNode(TYP_I_IMPL, tree, false, TYP_I_IMPL); } } @@ -21351,6 +21515,14 @@ bool Compiler::IsTargetIntrinsic(NamedIntrinsic intrinsicName) case NI_System_Math_Sqrt: return true; + default: + return false; + } +#elif defined(TARGET_LOONGARCH64) + switch (intrinsicName) + { + // LOONGARCH64: will amend in the future + default: return false; } diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index bbf204c74caa8..62b2eade19cec 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -66,6 +66,10 @@ const char* CodeGen::genInsName(instruction ins) #define INST9(id, nm, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9 ) nm, #include "instrs.h" +#elif defined(TARGET_LOONGARCH64) + #define INSTS(id, nm, fp, ldst, fmt, e1) nm, + #include "instrs.h" + #else #error "Unknown TARGET" #endif @@ -434,6 +438,21 @@ void CodeGen::inst_Mov(var_types dstType, emitAttr size, insFlags flags /* = INS_FLAGS_DONT_CARE */) { +#ifdef TARGET_LOONGARCH64 + if (isFloatRegType(dstType) != genIsValidFloatReg(dstReg)) + { + if (dstType == TYP_FLOAT) + dstType = TYP_INT; + else if (dstType == TYP_DOUBLE) + dstType = TYP_LONG; + else if (dstType == TYP_INT) + dstType = TYP_FLOAT; + else if (dstType == TYP_LONG) + dstType = TYP_DOUBLE; + else + assert(!"unimplemented on LOONGARCH yet"); + } +#endif instruction ins = ins_Copy(srcReg, dstType); if (size == EA_UNKNOWN) @@ -523,6 +542,8 @@ void CodeGen::inst_RV_RV_RV(instruction ins, { #ifdef TARGET_ARM GetEmitter()->emitIns_R_R_R(ins, size, reg1, reg2, reg3, flags); +#elif defined(TARGET_LOONGARCH64) + GetEmitter()->emitIns_R_R_R(ins, size, reg1, reg2, reg3); #elif defined(TARGET_XARCH) GetEmitter()->emitIns_R_R_R(ins, size, reg1, reg2, reg3); #else @@ -599,6 +620,8 @@ void CodeGen::inst_RV_IV( assert(ins != INS_tst); assert(ins != INS_mov); GetEmitter()->emitIns_R_R_I(ins, size, reg, reg, val); +#elif defined(TARGET_LOONGARCH64) + GetEmitter()->emitIns_R_R_I(ins, size, reg, reg, val); #else // !TARGET_ARM #ifdef TARGET_AMD64 // Instead of an 8-byte immediate load, a 4-byte immediate will do fine @@ -878,6 +901,15 @@ void CodeGen::inst_RV_TT(instruction ins, return; } #else // !TARGET_ARM +#ifdef TARGET_LOONGARCH64 + if (emitter::isFloatReg(reg)) + assert((ins==INS_fld_d) || (ins==INS_fld_s)); + else if (emitter::isGeneralRegister(reg) && (ins != INS_lea)) + {//TODO should amend for LOONGARCH64 !!! + //assert((ins==INS_ld_d) || (ins==INS_ld_w)); + ins = size == EA_4BYTE ? INS_ld_w : INS_ld_d; + } +#endif GetEmitter()->emitIns_R_S(ins, size, reg, varNum, offs); return; #endif // !TARGET_ARM @@ -1442,6 +1474,13 @@ bool CodeGenInterface::validImmForBL(ssize_t addr) } #endif // TARGET_ARM64 +#if defined(TARGET_LOONGARCH64) +bool CodeGenInterface::validImmForBAL(ssize_t addr) +{//TODO: can amend/optimize for LoongArch64. + return false; +} +#endif // TARGET_LOONGARCH64 + /***************************************************************************** * * Get the machine dependent instruction for performing sign/zero extension. @@ -1452,6 +1491,10 @@ bool CodeGenInterface::validImmForBL(ssize_t addr) */ instruction CodeGen::ins_Move_Extend(var_types srcType, bool srcInReg) { +#ifdef TARGET_LOONGARCH64 + assert(!"unimplemented yet on LoongArch64 for unused."); +#endif + instruction ins = INS_invalid; if (varTypeIsSIMD(srcType)) @@ -1633,6 +1676,8 @@ instruction CodeGenInterface::ins_Load(var_types srcType, bool aligned /*=false* } #elif defined(TARGET_ARM64) return INS_ldr; +//#elif defined(TARGET_LOONGARCH64) +// //TODO: add SIMD for LoongArch64. #else assert(!"ins_Load with SIMD type"); #endif @@ -1657,6 +1702,19 @@ instruction CodeGenInterface::ins_Load(var_types srcType, bool aligned /*=false* return INS_ldr; #elif defined(TARGET_ARM) return INS_vldr; +#elif defined(TARGET_LOONGARCH64) + if (srcType == TYP_DOUBLE) + { + return INS_fld_d; + } + else if (srcType == TYP_FLOAT) + { + return INS_fld_s; + } + else + { + assert(!"unhandled floating type"); + } #else assert(!varTypeIsFloating(srcType)); #endif @@ -1695,6 +1753,34 @@ instruction CodeGenInterface::ins_Load(var_types srcType, bool aligned /*=false* else ins = INS_ldrsh; } +#elif defined(TARGET_LOONGARCH64) + if (varTypeIsByte(srcType)) + { + if (varTypeIsUnsigned(srcType)) + ins = INS_ld_bu; + else + ins = INS_ld_b; + } + else if (varTypeIsShort(srcType)) + { + if (varTypeIsUnsigned(srcType)) + ins = INS_ld_hu; + else + ins = INS_ld_h; + } + else if (TYP_INT == srcType) + { + ins = INS_ld_w; + } + else if (TYP_UINT == srcType) + { + ins = INS_ld_wu; + } + else + { + //assert((TYP_LONG == srcType) || (TYP_ULONG == srcType)); + ins = INS_ld_d;//default ld_d. + } #else NYI("ins_Load"); #endif @@ -1746,6 +1832,15 @@ instruction CodeGen::ins_Copy(var_types dstType) { return INS_mov; } +#elif defined(TARGET_LOONGARCH64) + if (varTypeIsFloating(dstType)) + { + return dstType == TYP_FLOAT ? INS_fmov_s : INS_fmov_d; + } + else + { + return INS_mov; + } #else // TARGET_* #error "Unknown TARGET_" #endif @@ -1797,6 +1892,19 @@ instruction CodeGen::ins_Copy(regNumber srcReg, var_types dstType) assert(dstType == TYP_INT); return INS_vmov_f2i; } +#elif defined(TARGET_LOONGARCH64) + // No SIMD support yet. + assert(!varTypeIsSIMD(dstType)); + if (dstIsFloatReg) + { + assert(!genIsValidFloatReg(srcReg)); + return dstType == TYP_FLOAT ? INS_movgr2fr_w : INS_movgr2fr_d; + } + else + { + assert(genIsValidFloatReg(srcReg)); + return EA_SIZE(emitActualTypeSize(dstType)) == EA_4BYTE ? INS_movfr2gr_s : INS_movfr2gr_d; + } #else // TARGET* #error "Unknown TARGET" #endif @@ -1863,6 +1971,19 @@ instruction CodeGenInterface::ins_Store(var_types dstType, bool aligned /*=false { return INS_vstr; } +#elif defined(TARGET_LOONGARCH64) + assert(!varTypeIsSIMD(dstType)); + if (varTypeIsFloating(dstType)) + { + if (dstType == TYP_DOUBLE) + { + return INS_fst_d; + } + else if (dstType == TYP_FLOAT) + { + return INS_fst_s; + } + } #else assert(!varTypeIsSIMD(dstType)); assert(!varTypeIsFloating(dstType)); @@ -1877,6 +1998,15 @@ instruction CodeGenInterface::ins_Store(var_types dstType, bool aligned /*=false ins = INS_strb; else if (varTypeIsShort(dstType)) ins = INS_strh; +#elif defined(TARGET_LOONGARCH64) + if (varTypeIsByte(dstType)) + ins = INS_st_b; + else if (varTypeIsShort(dstType)) + ins = INS_st_h; + else if ((TYP_INT == dstType) || (TYP_UINT == dstType)) + ins = INS_st_w; + else //if ((TYP_LONG == dstType) || (TYP_ULONG == dstType) || (TYP_REF == dstType)) + ins = INS_st_d;//default st_d. #else NYI("ins_Store"); #endif @@ -2152,6 +2282,8 @@ void CodeGen::instGen_Set_Reg_To_Zero(emitAttr size, regNumber reg, insFlags fla GetEmitter()->emitIns_R_R(INS_xor, size, reg, reg); #elif defined(TARGET_ARMARCH) GetEmitter()->emitIns_R_I(INS_mov, size, reg, 0 ARM_ARG(flags)); +#elif defined(TARGET_LOONGARCH64) + GetEmitter()->emitIns_R_R_I(INS_ori, size, reg, REG_R0, 0); #else #error "Unknown TARGET" #endif diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h index 1e9302cf503e8..dc0f0a3c925b4 100644 --- a/src/coreclr/jit/instr.h +++ b/src/coreclr/jit/instr.h @@ -6,7 +6,11 @@ #define _INSTR_H_ /*****************************************************************************/ +#ifdef TARGET_LOONGARCH64 +#define BAD_CODE 0XFFFFFFFF +#else #define BAD_CODE 0x0BADC0DE // better not match a real encoding! +#endif /*****************************************************************************/ @@ -47,6 +51,14 @@ enum instruction : unsigned INS_lea, // Not a real instruction. It is used for load the address of stack locals +#elif defined(TARGET_LOONGARCH64) + #define INSTS(id, nm, fp, ldst, fmt, e1) INS_##id, + #include "instrs.h" + + //INS_dneg, // Not a real instruction. It will be translated to dsubu. + //INS_neg, // Not a real instruction. It will be translated to subu. + //INS_not, // Not a real instruction. It will be translated to nor. + INS_lea, // Not a real instruction. It is used for load the address of stack locals #else #error Unsupported target architecture #endif @@ -144,6 +156,13 @@ enum insFlags: unsigned INS_FLAGS_SET = 0x01, INS_FLAGS_DONT_CARE = 0x02, }; +#elif defined(TARGET_LOONGARCH64) +enum insFlags: unsigned +{ + INS_FLAGS_NOT_SET = 0x00, + INS_FLAGS_SET = 0x01, + INS_FLAGS_DONT_CARE = 0x02, +}; #else #error Unsupported target architecture #endif @@ -275,7 +294,6 @@ enum insBarrier : unsigned INS_BARRIER_OSHLD = 1, INS_BARRIER_OSHST = 2, INS_BARRIER_OSH = 3, - INS_BARRIER_NSHLD = 5, INS_BARRIER_NSHST = 6, INS_BARRIER_NSH = 7, @@ -288,6 +306,33 @@ enum insBarrier : unsigned INS_BARRIER_ST = 14, INS_BARRIER_SY = 15, }; +#elif defined(TARGET_LOONGARCH64) +enum insOpts : unsigned +{ + INS_OPTS_NONE, + + INS_OPTS_RC, // see ::emitIns_R_C(). + INS_OPTS_RL, // see ::emitIns_R_L(). + INS_OPTS_JIRL, // see ::emitIns_J_R(). + INS_OPTS_J, // see ::emitIns_J(). + INS_OPTS_J_cond, // see ::emitIns_J_cond_la(). + INS_OPTS_I, // see ::emitIns_I_la(). + //INS_OPTS_J2, // see ::emitIns_J(). + INS_OPTS_C, // see ::emitIns_Call(). + INS_OPTS_RELOC, // see ::emitIns_R_AI(). + //INS_OPTS_, // see ::(). + //INS_OPTS_, // see ::(). +}; + +enum insBarrier : unsigned +{ + INS_BARRIER_FULL = 0, + INS_BARRIER_WMB = INS_BARRIER_FULL,//4, + INS_BARRIER_MB = INS_BARRIER_FULL,//16, + INS_BARRIER_ACQ = INS_BARRIER_FULL,//17, + INS_BARRIER_REL = INS_BARRIER_FULL,//18, + INS_BARRIER_RMB = INS_BARRIER_FULL,//19, +}; #endif #undef EA_UNKNOWN diff --git a/src/coreclr/jit/instrs.h b/src/coreclr/jit/instrs.h index b543f781645f5..aa16547f44be7 100644 --- a/src/coreclr/jit/instrs.h +++ b/src/coreclr/jit/instrs.h @@ -7,6 +7,8 @@ #include "instrsarm.h" #elif defined(TARGET_ARM64) #include "instrsarm64.h" +#elif defined(TARGET_LOONGARCH64) +#include "instrsloongarch64.h" #else #error Unsupported or unset target architecture #endif // target type diff --git a/src/coreclr/jit/instrsloongarch64.h b/src/coreclr/jit/instrsloongarch64.h new file mode 100644 index 0000000000000..1c16d53fd453a --- /dev/null +++ b/src/coreclr/jit/instrsloongarch64.h @@ -0,0 +1,499 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +// Copyright (c) Loongson Technology. All rights reserved. + +/***************************************************************************** + * Loongarch64 instructions for JIT compiler + * + * id -- the enum name for the instruction + * nm -- textual name (for assembly dipslay) + * fp -- floating point instruction + * ld/st/cmp -- load/store/compare instruction + * fmt -- encoding format used by this instruction + * e1 -- encoding 1 + * e2 -- encoding 2 + * e3 -- encoding 3 + * e4 -- encoding 4 + * e5 -- encoding 5 + * +******************************************************************************/ + +#if !defined(TARGET_LOONGARCH64) +#error Unexpected target type +#endif + +#ifndef INSTS +#error INSTS must be defined before including this file. +#endif + +/*****************************************************************************/ +/* The following is LOONGARCH64-specific */ +/*****************************************************************************/ + +// If you're adding a new instruction: +// You need not only to fill in one of these macros describing the instruction, but also: +// * If the instruction writes to more than one destination register, update the function +// emitInsMayWriteMultipleRegs in emitLoongarch64.cpp. + +// clang-format off +INSTS(invalid, "INVALID", 0, 0, IF_NONE, BAD_CODE) + + +INSTS(nop , "nop", 0, 0, IF_LA, 0x03400000) + +////INS_bceqz/INS_beq/INS_blt/INS_bltu must be even number. +INSTS(bceqz, "bceqz", 0, 0, IF_LA, 0x48000000) +INSTS(bcnez, "bcnez", 0, 0, IF_LA, 0x48000100) + +INSTS(beq, "beq", 0, 0, IF_LA, 0x58000000) +INSTS(bne, "bne", 0, 0, IF_LA, 0x5c000000) + +INSTS(blt, "blt", 0, 0, IF_LA, 0x60000000) +INSTS(bge, "bge", 0, 0, IF_LA, 0x64000000) +INSTS(bltu, "bltu", 0, 0, IF_LA, 0x68000000) +INSTS(bgeu, "bgeu", 0, 0, IF_LA, 0x6c000000) + +////R_I. +INSTS(beqz, "beqz", 0, 0, IF_LA, 0x40000000) +INSTS(bnez, "bnez", 0, 0, IF_LA, 0x44000000) + +////I. +INSTS(b, "b", 0, 0, IF_LA, 0x50000000) +INSTS(bl, "bl", 0, 0, IF_LA, 0x54000000) + +//////////////////////////////////////////////// +////NOTE: Begin +//// the fllowing instructions will be used by emitter::emitInsMayWriteToGCReg(). +//////////////////////////////////////////////// +// enum name FP LD/ST FMT ENCODE + +////NOTE: mov must be the first one !!! more info to see emitter::emitInsMayWriteToGCReg(). +INSTS(mov, "mov", 0, 0, IF_LA, 0x03800000) + // mov rd,rj + //NOTE: On loongarch, usually it's name is move, but here for compatible using mov. + // In fact, mov is an alias commond, "ori rd,rj,0" +INSTS(dneg, "dneg", 0, 0, IF_LA, 0x00118000) + //dneg is a alias instruction. + //sub_d rd, zero, rk +INSTS(neg, "neg", 0, 0, IF_LA, 0x00110000) + //neg is a alias instruction. + //sub_w rd, zero, rk +INSTS(not, "not", 0, 0, IF_LA, 0x00140000) + //not is a alias instruction. + //nor rd, rj, zero + +// enum:id name FP LD/ST Formate ENCODE +////R_R_R. +INSTS(add_w, "add.w", 0, 0, IF_LA, 0x00100000) +INSTS(add_d, "add.d", 0, 0, IF_LA, 0x00108000) +INSTS(sub_w, "sub.w", 0, 0, IF_LA, 0x00110000) +INSTS(sub_d, "sub.d", 0, 0, IF_LA, 0x00118000) + +INSTS(and, "and", 0, 0, IF_LA, 0x00148000) +INSTS(or, "or", 0, 0, IF_LA, 0x00150000) +INSTS(nor, "nor", 0, 0, IF_LA, 0x00140000) +INSTS(xor, "xor", 0, 0, IF_LA, 0x00158000) +INSTS(andn, "andn", 0, 0, IF_LA, 0x00168000) +INSTS(orn, "orn", 0, 0, IF_LA, 0x00160000) + +INSTS(mul_w, "mul.w", 0, 0, IF_LA, 0x001c0000) +INSTS(mul_d, "mul.d", 0, 0, IF_LA, 0x001d8000) +INSTS(mulh_w, "mulh.w", 0, 0, IF_LA, 0x001c8000) +INSTS(mulh_wu, "mulh.wu", 0, 0, IF_LA, 0x001d0000) +INSTS(mulh_d, "mulh.d", 0, 0, IF_LA, 0x001e0000) +INSTS(mulh_du, "mulh.du", 0, 0, IF_LA, 0x001e8000) +INSTS(mulw_d_w, "mulw.d.w", 0, 0, IF_LA, 0x001f0000) +INSTS(mulw_d_wu, "mulw.d.wu", 0, 0, IF_LA, 0x001f8000) +INSTS(div_w, "div.w", 0, 0, IF_LA, 0x00200000) +INSTS(div_wu, "div.wu", 0, 0, IF_LA, 0x00210000) +INSTS(div_d, "div.d", 0, 0, IF_LA, 0x00220000) +INSTS(div_du, "div.du", 0, 0, IF_LA, 0x00230000) +INSTS(mod_w, "mod.w", 0, 0, IF_LA, 0x00208000) +INSTS(mod_wu, "mod.wu", 0, 0, IF_LA, 0x00218000) +INSTS(mod_d, "mod.d", 0, 0, IF_LA, 0x00228000) +INSTS(mod_du, "mod.du", 0, 0, IF_LA, 0x00238000) + +INSTS(sll_w, "sll.w", 0, 0, IF_LA, 0x00170000) +INSTS(srl_w, "srl.w", 0, 0, IF_LA, 0x00178000) +INSTS(sra_w, "sra.w", 0, 0, IF_LA, 0x00180000) +INSTS(rotr_w, "rotr_w", 0, 0, IF_LA, 0x001b0000) +INSTS(sll_d, "sll.d", 0, 0, IF_LA, 0x00188000) +INSTS(srl_d, "srl.d", 0, 0, IF_LA, 0x00190000) +INSTS(sra_d, "sra.d", 0, 0, IF_LA, 0x00198000) +INSTS(rotr_d, "rotr.d", 0, 0, IF_LA, 0x001b8000) + +INSTS(maskeqz, "maskeqz", 0, 0, IF_LA, 0x00130000) +INSTS(masknez, "masknez", 0, 0, IF_LA, 0x00138000) + +INSTS(slt, "slt", 0, 0, IF_LA, 0x00120000) +INSTS(sltu, "sltu", 0, 0, IF_LA, 0x00128000) + +INSTS(amswap_w, "amswap.w", 0, 0, IF_LA, 0x38600000) +INSTS(amswap_d, "amswap.d", 0, 0, IF_LA, 0x38608000) +INSTS(amswap_db_w, "amswap_db.w", 0, 0, IF_LA, 0x38690000) +INSTS(amswap_db_d, "amswap_db.d", 0, 0, IF_LA, 0x38698000) +INSTS(amadd_w, "amadd.w", 0, 0, IF_LA, 0x38610000) +INSTS(amadd_d, "amadd.d", 0, 0, IF_LA, 0x38618000) +INSTS(amadd_db_w, "amadd_db.w", 0, 0, IF_LA, 0x386a0000) +INSTS(amadd_db_d, "amadd_db.d", 0, 0, IF_LA, 0x386a8000) +INSTS(amand_w, "amand.w", 0, 0, IF_LA, 0x38620000) +INSTS(amand_d, "amand.d", 0, 0, IF_LA, 0x38628000) +INSTS(amand_db_w, "amand_db.w", 0, 0, IF_LA, 0x386b0000) +INSTS(amand_db_d, "amand_db.d", 0, 0, IF_LA, 0x386b8000) +INSTS(amor_w, "amor.w", 0, 0, IF_LA, 0x38630000) +INSTS(amor_d, "amor.d", 0, 0, IF_LA, 0x38638000) +INSTS(amor_db_w, "amor_db.w", 0, 0, IF_LA, 0x386c0000) +INSTS(amor_db_d, "amor_db.d", 0, 0, IF_LA, 0x386c8000) +INSTS(amxor_w, "amxor.w", 0, 0, IF_LA, 0x38640000) +INSTS(amxor_d, "amxor.d", 0, 0, IF_LA, 0x38648000) +INSTS(amxor_db_w, "amxor_db.w", 0, 0, IF_LA, 0x386d0000) +INSTS(amxor_db_d, "amxor_db.d", 0, 0, IF_LA, 0x386d8000) +INSTS(ammax_w, "ammax.w", 0, 0, IF_LA, 0x38650000) +INSTS(ammax_d, "ammax.d", 0, 0, IF_LA, 0x38658000) +INSTS(ammax_db_w, "ammax_db.w", 0, 0, IF_LA, 0x386e0000) +INSTS(ammax_db_d, "ammax_db.d", 0, 0, IF_LA, 0x386e8000) +INSTS(ammin_w, "ammin.w", 0, 0, IF_LA, 0x38660000) +INSTS(ammin_d, "ammin.d", 0, 0, IF_LA, 0x38668000) +INSTS(ammin_db_w, "ammin_db.w", 0, 0, IF_LA, 0x386f0000) +INSTS(ammin_db_d, "ammin_db.d", 0, 0, IF_LA, 0x386f8000) +INSTS(ammax_wu, "ammax.wu", 0, 0, IF_LA, 0x38670000) +INSTS(ammax_du, "ammax.du", 0, 0, IF_LA, 0x38678000) +INSTS(ammax_db_wu, "ammax_db.wu", 0, 0, IF_LA, 0x38700000) +INSTS(ammax_db_du, "ammax_db.du", 0, 0, IF_LA, 0x38708000) +INSTS(ammin_wu, "ammin.wu", 0, 0, IF_LA, 0x38680000) +INSTS(ammin_du, "ammin.du", 0, 0, IF_LA, 0x38688000) +INSTS(ammin_db_wu, "ammin_db.wu", 0, 0, IF_LA, 0x38710000) +INSTS(ammin_db_du, "ammin_db.du", 0, 0, IF_LA, 0x38718000) + +INSTS(crc_w_b_w, "crc.w.b.w", 0, 0, IF_LA, 0x00240000) +INSTS(crc_w_h_w, "crc.w.h.w", 0, 0, IF_LA, 0x00248000) +INSTS(crc_w_w_w, "crc.w.w.w", 0, 0, IF_LA, 0x00250000) +INSTS(crc_w_d_w, "crc.w.d.w", 0, 0, IF_LA, 0x00258000) +INSTS(crcc_w_b_w, "crcc.w.b.w", 0, 0, IF_LA, 0x00260000) +INSTS(crcc_w_h_w, "crcc.w.h.w", 0, 0, IF_LA, 0x00268000) +INSTS(crcc_w_w_w, "crcc.w.w.w", 0, 0, IF_LA, 0x00270000) +INSTS(crcc_w_d_w, "crcc.w.d.w", 0, 0, IF_LA, 0x00278000) + +////R_R_R_I. +INSTS(alsl_w, "alsl.w", 0, 0, IF_LA, 0x00040000) +INSTS(alsl_wu, "alsl.wu", 0, 0, IF_LA, 0x00060000) +INSTS(alsl_d, "alsl.d", 0, 0, IF_LA, 0x002c0000) + +INSTS(bytepick_w, "bytepick.w", 0, 0, IF_LA, 0x00080000) +INSTS(bytepick_d, "bytepick.d", 0, 0, IF_LA, 0x000c0000) + +INSTS(fsel, "fsel", 0, 0, IF_LA, 0x0d000000) + +////R_I. +INSTS(lu12i_w, "lu12i.w", 0, 0, IF_LA, 0x14000000) +INSTS(lu32i_d, "lu32i.d", 0, 0, IF_LA, 0x16000000) + +INSTS(pcaddi, "pcaddi", 0, 0, IF_LA, 0x18000000) +INSTS(pcaddu12i, "pcaddu12i", 0, 0, IF_LA, 0x1c000000) +INSTS(pcalau12i, "pcalau12i", 0, 0, IF_LA, 0x1a000000) +INSTS(pcaddu18i, "pcaddu18i", 0, 0, IF_LA, 0x1e000000) + +////R_R. +INSTS(ext_w_b, "ext.w.b", 0, 0, IF_LA, 0x00005c00) +INSTS(ext_w_h, "ext.w.h", 0, 0, IF_LA, 0x00005800) +INSTS(clo_w, "clo.w", 0, 0, IF_LA, 0x00001000) +INSTS(clz_w, "clz.w", 0, 0, IF_LA, 0x00001400) +INSTS(cto_w, "cto.w", 0, 0, IF_LA, 0x00001800) +INSTS(ctz_w, "ctz.w", 0, 0, IF_LA, 0x00001c00) +INSTS(clo_d, "clo.d", 0, 0, IF_LA, 0x00002000) +INSTS(clz_d, "clz.d", 0, 0, IF_LA, 0x00002400) +INSTS(cto_d, "cto.d", 0, 0, IF_LA, 0x00002800) +INSTS(ctz_d, "ctz.d", 0, 0, IF_LA, 0x00002c00) +INSTS(revb_2h, "revb.2h", 0, 0, IF_LA, 0x00003000) +INSTS(revb_4h, "revb.4h", 0, 0, IF_LA, 0x00003400) +INSTS(revb_2w, "revb.2w", 0, 0, IF_LA, 0x00003800) +INSTS(revb_d, "revb.d", 0, 0, IF_LA, 0x00003c00) +INSTS(revh_2w, "revh.2w", 0, 0, IF_LA, 0x00004000) +INSTS(revh_d, "revh.d", 0, 0, IF_LA, 0x00004400) +INSTS(bitrev_4b, "bitrev.4b", 0, 0, IF_LA, 0x00004800) +INSTS(bitrev_8b, "bitrev.8b", 0, 0, IF_LA, 0x00004c00) +INSTS(bitrev_w, "bitrev.w", 0, 0, IF_LA, 0x00005000) +INSTS(bitrev_d, "bitrev.d", 0, 0, IF_LA, 0x00005400) +INSTS(rdtimel_w, "rdtimel.w", 0, 0, IF_LA, 0x00006000) +INSTS(rdtimeh_w, "rdtimeh.w", 0, 0, IF_LA, 0x00006400) +INSTS(rdtime_d, "rdtime.d", 0, 0, IF_LA, 0x00006800) +INSTS(cpucfg, "cpucfg", 0, 0, IF_LA, 0x00006c00) + +////R_R_I_I. +INSTS(bstrins_w, "bstrins.w", 0, 0, IF_LA, 0x00600000) +INSTS(bstrins_d, "bstrins.d", 0, 0, IF_LA, 0x00800000) +INSTS(bstrpick_w, "bstrpick.w", 0, 0, IF_LA, 0x00608000) +INSTS(bstrpick_d, "bstrpick.d", 0, 0, IF_LA, 0x00c00000) + +////Load. +INSTS(ld_b, "ld.b", 0, LD, IF_LA, 0x28000000) +INSTS(ld_h, "ld.h", 0, LD, IF_LA, 0x28400000) +INSTS(ld_w, "ld.w", 0, LD, IF_LA, 0x28800000) +INSTS(ld_d, "ld.d", 0, LD, IF_LA, 0x28c00000) +INSTS(ld_bu, "ld.bu", 0, LD, IF_LA, 0x2a000000) +INSTS(ld_hu, "ld.hu", 0, LD, IF_LA, 0x2a400000) +INSTS(ld_wu, "ld.wu", 0, LD, IF_LA, 0x2a800000) + +INSTS(ldptr_w, "ldptr.w", 0, LD, IF_LA, 0x24000000) +INSTS(ldptr_d, "ldptr.d", 0, LD, IF_LA, 0x26000000) +INSTS(ll_w, "ll.w", 0, 0, IF_LA, 0x20000000) +INSTS(ll_d, "ll.d", 0, 0, IF_LA, 0x22000000) + +INSTS(ldx_b, "ldx.b", 0, LD, IF_LA, 0x38000000) +INSTS(ldx_h, "ldx.h", 0, LD, IF_LA, 0x38040000) +INSTS(ldx_w, "ldx.w", 0, LD, IF_LA, 0x38080000) +INSTS(ldx_d, "ldx.d", 0, LD, IF_LA, 0x380c0000) +INSTS(ldx_bu, "ldx.bu", 0, LD, IF_LA, 0x38200000) +INSTS(ldx_hu, "ldx.hu", 0, LD, IF_LA, 0x38240000) +INSTS(ldx_wu, "ldx.wu", 0, LD, IF_LA, 0x38280000) + +INSTS(ldgt_b, "ldgt.b", 0, 0, IF_LA, 0x38780000) +INSTS(ldgt_h, "ldgt.h", 0, 0, IF_LA, 0x38788000) +INSTS(ldgt_w, "ldgt.w", 0, 0, IF_LA, 0x38790000) +INSTS(ldgt_d, "ldgt.d", 0, 0, IF_LA, 0x38798000) +INSTS(ldle_b, "ldle.b", 0, 0, IF_LA, 0x387a0000) +INSTS(ldle_h, "ldle.h", 0, 0, IF_LA, 0x387a8000) +INSTS(ldle_w, "ldle.w", 0, 0, IF_LA, 0x387b0000) +INSTS(ldle_d, "ldle.d", 0, 0, IF_LA, 0x387b8000) + +////R_R_I. +INSTS(addi_w, "addi.w", 0, 0, IF_LA, 0x02800000) +INSTS(addi_d, "addi.d", 0, 0, IF_LA, 0x02c00000) +INSTS(lu52i_d, "lu52i.d", 0, 0, IF_LA, 0x03000000) +INSTS(slti, "slti", 0, 0, IF_LA, 0x02000000) + +INSTS(sltui, "sltui", 0, 0, IF_LA, 0x02400000) +INSTS(andi, "andi", 0, 0, IF_LA, 0x03400000) +INSTS(ori, "ori", 0, 0, IF_LA, 0x03800000) +INSTS(xori, "xori", 0, 0, IF_LA, 0x03c00000) + +INSTS(slli_w, "slli.w", 0, 0, IF_LA, 0x00408000) +INSTS(srli_w, "srli.w", 0, 0, IF_LA, 0x00448000) +INSTS(srai_w, "srai.w", 0, 0, IF_LA, 0x00488000) +INSTS(rotri_w, "rotri.w", 0, 0, IF_LA, 0x004c8000) +INSTS(slli_d, "slli.d", 0, 0, IF_LA, 0x00410000) +INSTS(srli_d, "srli.d", 0, 0, IF_LA, 0x00450000) +INSTS(srai_d, "srai.d", 0, 0, IF_LA, 0x00490000) +INSTS(rotri_d, "rotri.d", 0, 0, IF_LA, 0x004d0000) + +INSTS(addu16i_d, "addu16i.d", 0, 0, IF_LA, 0x10000000) + +INSTS(jirl, "jirl", 0, 0, IF_LA, 0x4c000000) + +////NOTE: jirl must be the last one !!! more info to see emitter::emitInsMayWriteToGCReg(). +//////////////////////////////////////////////// +////NOTE: End +//// the above instructions will be used by emitter::emitInsMayWriteToGCReg(). +//////////////////////////////////////////////// +////Store. +INSTS(st_b, "st.b", 0, ST, IF_LA, 0x29000000) +INSTS(st_h, "st.h", 0, ST, IF_LA, 0x29400000) +INSTS(st_w, "st.w", 0, ST, IF_LA, 0x29800000) +INSTS(st_d, "st.d", 0, ST, IF_LA, 0x29c00000) + +INSTS(stptr_w, "stptr.w", 0, ST, IF_LA, 0x25000000) +INSTS(stptr_d, "stptr.d", 0, ST, IF_LA, 0x27000000) +INSTS(sc_w, "sc.w", 0, 0, IF_LA, 0x21000000) +INSTS(sc_d, "sc.d", 0, 0, IF_LA, 0x23000000) + +INSTS(stx_b, "stx.b", 0, ST, IF_LA, 0x38100000) +INSTS(stx_h, "stx.h", 0, ST, IF_LA, 0x38140000) +INSTS(stx_w, "stx.w", 0, ST, IF_LA, 0x38180000) +INSTS(stx_d, "stx.d", 0, ST, IF_LA, 0x381c0000) +INSTS(stgt_b, "stgt.b", 0, 0, IF_LA, 0x387c0000) +INSTS(stgt_h, "stgt.h", 0, 0, IF_LA, 0x387c8000) +INSTS(stgt_w, "stgt.w", 0, 0, IF_LA, 0x387d0000) +INSTS(stgt_d, "stgt.d", 0, 0, IF_LA, 0x387d8000) +INSTS(stle_b, "stle.b", 0, 0, IF_LA, 0x387e0000) +INSTS(stle_h, "stle.h", 0, 0, IF_LA, 0x387e8000) +INSTS(stle_w, "stle.w", 0, 0, IF_LA, 0x387f0000) +INSTS(stle_d, "stle.d", 0, 0, IF_LA, 0x387f8000) + +INSTS(dbar, "dbar", 0, 0, IF_LA, 0x38720000) +INSTS(ibar, "ibar", 0, 0, IF_LA, 0x38728000) + +INSTS(syscall, "syscall", 0, 0, IF_LA, 0x002b0000) +INSTS(break, "break", 0, 0, IF_LA, 0x002a0005) + +INSTS(asrtle_d, "asrtle.d", 0, 0, IF_LA, 0x00010000) +INSTS(asrtgt_d, "asrtgt.d", 0, 0, IF_LA, 0x00018000) + +INSTS(preld, "preld", 0, LD, IF_LA, 0x2ac00000) +INSTS(preldx, "preldx", 0, LD, IF_LA, 0x382c0000) + +////Float instructions. +////R_R_R. +INSTS(fadd_s, "fadd.s", 0, 0, IF_LA, 0x01008000) +INSTS(fadd_d, "fadd.d", 0, 0, IF_LA, 0x01010000) +INSTS(fsub_s, "fsub.s", 0, 0, IF_LA, 0x01028000) +INSTS(fsub_d, "fsub.d", 0, 0, IF_LA, 0x01030000) +INSTS(fmul_s, "fmul.s", 0, 0, IF_LA, 0x01048000) +INSTS(fmul_d, "fmul.d", 0, 0, IF_LA, 0x01050000) +INSTS(fdiv_s, "fdiv.s", 0, 0, IF_LA, 0x01068000) +INSTS(fdiv_d, "fdiv.d", 0, 0, IF_LA, 0x01070000) + +INSTS(fmax_s, "fmax.s", 0, 0, IF_LA, 0x01088000) +INSTS(fmax_d, "fmax.d", 0, 0, IF_LA, 0x01090000) +INSTS(fmin_s, "fmin.s", 0, 0, IF_LA, 0x010a8000) +INSTS(fmin_d, "fmin.d", 0, 0, IF_LA, 0x010b0000) +INSTS(fmaxa_s, "fmaxa.s", 0, 0, IF_LA, 0x010c8000) +INSTS(fmaxa_d, "fmaxa.d", 0, 0, IF_LA, 0x010d0000) +INSTS(fmina_s, "fmina.s", 0, 0, IF_LA, 0x010e8000) +INSTS(fmina_d, "fmina.d", 0, 0, IF_LA, 0x010f0000) + +INSTS(fscaleb_s, "fscaleb.s", 0, 0, IF_LA, 0x01108000) +INSTS(fscaleb_d, "fscaleb.d", 0, 0, IF_LA, 0x01110000) + +INSTS(fcopysign_s, "fcopysign.s", 0, 0, IF_LA, 0x01128000) +INSTS(fcopysign_d, "fcopysign.d", 0, 0, IF_LA, 0x01130000) + +INSTS(fldx_s, "fldx.s", 0, LD, IF_LA, 0x38300000) +INSTS(fldx_d, "fldx.d", 0, LD, IF_LA, 0x38340000) +INSTS(fstx_s, "fstx.s", 0, ST, IF_LA, 0x38380000) +INSTS(fstx_d, "fstx.d", 0, ST, IF_LA, 0x383c0000) + +INSTS(fldgt_s, "fldgt.s", 0, 0, IF_LA, 0x38740000) +INSTS(fldgt_d, "fldgt.d", 0, 0, IF_LA, 0x38748000) +INSTS(fldle_s, "fldle.s", 0, 0, IF_LA, 0x38750000) +INSTS(fldle_d, "fldle.d", 0, 0, IF_LA, 0x38758000) +INSTS(fstgt_s, "fstgt.s", 0, 0, IF_LA, 0x38760000) +INSTS(fstgt_d, "fstgt.d", 0, 0, IF_LA, 0x38768000) +INSTS(fstle_s, "fstle.s", 0, 0, IF_LA, 0x38770000) +INSTS(fstle_d, "fstle.d", 0, 0, IF_LA, 0x38778000) + +////R_R_R_R. +INSTS(fmadd_s, "fmadd.s", 0, 0, IF_LA, 0x08100000) +INSTS(fmadd_d, "fmadd.d", 0, 0, IF_LA, 0x08200000) +INSTS(fmsub_s, "fmsub.s", 0, 0, IF_LA, 0x08500000) +INSTS(fmsub_d, "fmsub.d", 0, 0, IF_LA, 0x08600000) +INSTS(fnmadd_s, "fnmadd.s", 0, 0, IF_LA, 0x08900000) +INSTS(fnmadd_d, "fnmadd.d", 0, 0, IF_LA, 0x08a00000) +INSTS(fnmsub_s, "fnmsub.s", 0, 0, IF_LA, 0x08d00000) +INSTS(fnmsub_d, "fnmsub.d", 0, 0, IF_LA, 0x08e00000) + +////R_R. +INSTS(fabs_s, "fabs.s", 0, 0, IF_LA, 0x01140400) +INSTS(fabs_d, "fabs.d", 0, 0, IF_LA, 0x01140800) +INSTS(fneg_s, "fneg.s", 0, 0, IF_LA, 0x01141400) +INSTS(fneg_d, "fneg.d", 0, 0, IF_LA, 0x01141800) + +INSTS(fsqrt_s, "fsqrt.s", 0, 0, IF_LA, 0x01144400) +INSTS(fsqrt_d, "fsqrt.d", 0, 0, IF_LA, 0x01144800) +INSTS(frsqrt_s, "frsqrt.s", 0, 0, IF_LA, 0x01146400) +INSTS(frsqrt_d, "frsqrt.d", 0, 0, IF_LA, 0x01146800) +INSTS(frecip_s, "frecip.s", 0, 0, IF_LA, 0x01145400) +INSTS(frecip_d, "frecip.d", 0, 0, IF_LA, 0x01145800) +INSTS(flogb_s, "flogb.s", 0, 0, IF_LA, 0x01142400) +INSTS(flogb_d, "flogb.d", 0, 0, IF_LA, 0x01142800) +INSTS(fclass_s, "fclass.s", 0, 0, IF_LA, 0x01143400) +INSTS(fclass_d, "fclass.d", 0, 0, IF_LA, 0x01143800) + +INSTS(fcvt_s_d, "fcvt.s.d", 0, 0, IF_LA, 0x01191800) +INSTS(fcvt_d_s, "fcvt.d.s", 0, 0, IF_LA, 0x01192400) +INSTS(ffint_s_w, "ffint.s.w", 0, 0, IF_LA, 0x011d1000) +INSTS(ffint_s_l, "ffint.s.l", 0, 0, IF_LA, 0x011d1800) +INSTS(ffint_d_w, "ffint.d.w", 0, 0, IF_LA, 0x011d2000) +INSTS(ffint_d_l, "ffint.d.l", 0, 0, IF_LA, 0x011d2800) +INSTS(ftint_w_s, "ftint.w.s", 0, 0, IF_LA, 0x011b0400) +INSTS(ftint_w_d, "ftint.w.d", 0, 0, IF_LA, 0x011b0800) +INSTS(ftint_l_s, "ftint.l.s", 0, 0, IF_LA, 0x011b2400) +INSTS(ftint_l_d, "ftint.l.d", 0, 0, IF_LA, 0x011b2800) +INSTS(ftintrm_w_s, "ftintrm.w.s", 0, 0, IF_LA, 0x011a0400) +INSTS(ftintrm_w_d, "ftintrm.w.d", 0, 0, IF_LA, 0x011a0800) +INSTS(ftintrm_l_s, "ftintrm.l.s", 0, 0, IF_LA, 0x011a2400) +INSTS(ftintrm_l_d, "ftintrm.l.d", 0, 0, IF_LA, 0x011a2800) +INSTS(ftintrp_w_s, "ftintrp.w.s", 0, 0, IF_LA, 0x011a4400) +INSTS(ftintrp_w_d, "ftintrp.w.d", 0, 0, IF_LA, 0x011a4800) +INSTS(ftintrp_l_s, "ftintrp.l.s", 0, 0, IF_LA, 0x011a6400) +INSTS(ftintrp_l_d, "ftintrp.l.d", 0, 0, IF_LA, 0x011a6800) +INSTS(ftintrz_w_s, "ftintrz.w.s", 0, 0, IF_LA, 0x011a8400) +INSTS(ftintrz_w_d, "ftintrz.w.d", 0, 0, IF_LA, 0x011a8800) +INSTS(ftintrz_l_s, "ftintrz.l.s", 0, 0, IF_LA, 0x011aa400) +INSTS(ftintrz_l_d, "ftintrz.l.d", 0, 0, IF_LA, 0x011aa800) +INSTS(ftintrne_w_s, "ftintrne.w.s", 0, 0, IF_LA, 0x011ac400) +INSTS(ftintrne_w_d, "ftintrne.w.d", 0, 0, IF_LA, 0x011ac800) +INSTS(ftintrne_l_s, "ftintrne.l.s", 0, 0, IF_LA, 0x011ae400) +INSTS(ftintrne_l_d, "ftintrne.l.d", 0, 0, IF_LA, 0x011ae800) +INSTS(frint_s, "frint.s", 0, 0, IF_LA, 0x011e4400) +INSTS(frint_d, "frint.d", 0, 0, IF_LA, 0x011e4800) + +INSTS(fmov_s, "fmov.s", 0, 0, IF_LA, 0x01149400) +INSTS(fmov_d, "fmov.d", 0, 0, IF_LA, 0x01149800) + +INSTS(movgr2fr_w, "movgr2fr.w", 0, 0, IF_LA, 0x0114a400) +INSTS(movgr2fr_d, "movgr2fr.d", 0, 0, IF_LA, 0x0114a800) +INSTS(movgr2frh_w, "movgr2frh.w", 0, 0, IF_LA, 0x0114ac00) +INSTS(movfr2gr_s, "movfr2gr.s", 0, 0, IF_LA, 0x0114b400) +INSTS(movfr2gr_d, "movfr2gr.d", 0, 0, IF_LA, 0x0114b800) +INSTS(movfrh2gr_s, "movfrh2gr.s", 0, 0, IF_LA, 0x0114bc00) + +//// +INSTS(movgr2fcsr, "movgr2fcsr", 0, 0, IF_LA, 0x0114c000) +INSTS(movfcsr2gr, "movfcsr2gr", 0, 0, IF_LA, 0x0114c800) +INSTS(movfr2cf, "movfr2cf", 0, 0, IF_LA, 0x0114d000) +INSTS(movcf2fr, "movcf2fr", 0, 0, IF_LA, 0x0114d400) +INSTS(movgr2cf, "movgr2cf", 0, 0, IF_LA, 0x0114d800) +INSTS(movcf2gr, "movcf2gr", 0, 0, IF_LA, 0x0114dc00) + +////R_R_I. +INSTS(fcmp_caf_s, "fcmp.caf.s", 0, 0, IF_LA, 0x0c100000) +INSTS(fcmp_cun_s, "fcmp.cun.s", 0, 0, IF_LA, 0x0c140000) +INSTS(fcmp_ceq_s, "fcmp.ceq.s", 0, 0, IF_LA, 0x0c120000) +INSTS(fcmp_cueq_s, "fcmp.cueq.s", 0, 0, IF_LA, 0x0c160000) +INSTS(fcmp_clt_s, "fcmp.clt.s", 0, 0, IF_LA, 0x0c110000) +INSTS(fcmp_cult_s, "fcmp.cult.s", 0, 0, IF_LA, 0x0c150000) +INSTS(fcmp_cle_s, "fcmp.cle.s", 0, 0, IF_LA, 0x0c130000) +INSTS(fcmp_cule_s, "fcmp.cule.s", 0, 0, IF_LA, 0x0c170000) +INSTS(fcmp_cne_s, "fcmp.cne.s", 0, 0, IF_LA, 0x0c180000) +INSTS(fcmp_cor_s, "fcmp.cor.s", 0, 0, IF_LA, 0x0c1a0000) +INSTS(fcmp_cune_s, "fcmp.cune.s", 0, 0, IF_LA, 0x0c1c0000) + +INSTS(fcmp_saf_d, "fcmp.saf.d", 0, 0, IF_LA, 0x0c208000) +INSTS(fcmp_sun_d, "fcmp.sun.d", 0, 0, IF_LA, 0x0c248000) +INSTS(fcmp_seq_d, "fcmp.seq.d", 0, 0, IF_LA, 0x0c228000) +INSTS(fcmp_sueq_d, "fcmp.sueq.d", 0, 0, IF_LA, 0x0c268000) +INSTS(fcmp_slt_d, "fcmp.slt.d", 0, 0, IF_LA, 0x0c218000) +INSTS(fcmp_sult_d, "fcmp.sult.d", 0, 0, IF_LA, 0x0c258000) +INSTS(fcmp_sle_d, "fcmp.sle.d", 0, 0, IF_LA, 0x0c238000) +INSTS(fcmp_sule_d, "fcmp.sule.d", 0, 0, IF_LA, 0x0c278000) +INSTS(fcmp_sne_d, "fcmp.sne.d", 0, 0, IF_LA, 0x0c288000) +INSTS(fcmp_sor_d, "fcmp.sor.d", 0, 0, IF_LA, 0x0c2a8000) +INSTS(fcmp_sune_d, "fcmp.sune.d", 0, 0, IF_LA, 0x0c2c8000) + +INSTS(fcmp_caf_d, "fcmp.caf.d", 0, 0, IF_LA, 0x0c200000) +INSTS(fcmp_cun_d, "fcmp.cun.d", 0, 0, IF_LA, 0x0c240000) +INSTS(fcmp_ceq_d, "fcmp.ceq.d", 0, 0, IF_LA, 0x0c220000) +INSTS(fcmp_cueq_d, "fcmp.cueq.d", 0, 0, IF_LA, 0x0c260000) +INSTS(fcmp_clt_d, "fcmp.clt.d", 0, 0, IF_LA, 0x0c210000) +INSTS(fcmp_cult_d, "fcmp.cult.d", 0, 0, IF_LA, 0x0c250000) +INSTS(fcmp_cle_d, "fcmp.cle.d", 0, 0, IF_LA, 0x0c230000) +INSTS(fcmp_cule_d, "fcmp.cule.d", 0, 0, IF_LA, 0x0c270000) +INSTS(fcmp_cne_d, "fcmp.cne.d", 0, 0, IF_LA, 0x0c280000) +INSTS(fcmp_cor_d, "fcmp.cor.d", 0, 0, IF_LA, 0x0c2a0000) +INSTS(fcmp_cune_d, "fcmp.cune.d", 0, 0, IF_LA, 0x0c2c0000) + +INSTS(fcmp_saf_s, "fcmp.saf.s", 0, 0, IF_LA, 0x0c108000) +INSTS(fcmp_sun_s, "fcmp.sun.s", 0, 0, IF_LA, 0x0c148000) +INSTS(fcmp_seq_s, "fcmp.seq.s", 0, 0, IF_LA, 0x0c128000) +INSTS(fcmp_sueq_s, "fcmp.sueq.s", 0, 0, IF_LA, 0x0c168000) +INSTS(fcmp_slt_s, "fcmp.slt.s", 0, 0, IF_LA, 0x0c118000) +INSTS(fcmp_sult_s, "fcmp.sult.s", 0, 0, IF_LA, 0x0c158000) +INSTS(fcmp_sle_s, "fcmp.sle.s", 0, 0, IF_LA, 0x0c138000) +INSTS(fcmp_sule_s, "fcmp.sule.s", 0, 0, IF_LA, 0x0c178000) +INSTS(fcmp_sne_s, "fcmp.sne.s", 0, 0, IF_LA, 0x0c188000) +INSTS(fcmp_sor_s, "fcmp.sor.s", 0, 0, IF_LA, 0x0c1a8000) +INSTS(fcmp_sune_s, "fcmp.sune.s", 0, 0, IF_LA, 0x0c1c8000) + +////R_R_I. +INSTS(fld_s, "fld.s", 0, LD, IF_LA, 0x2b000000) +INSTS(fld_d, "fld.d", 0, LD, IF_LA, 0x2b800000) +INSTS(fst_s, "fst.s", 0, ST, IF_LA, 0x2b400000) +INSTS(fst_d, "fst.d", 0, ST, IF_LA, 0x2bc00000) + + +// clang-format on +/*****************************************************************************/ +#undef INSTS +/*****************************************************************************/ diff --git a/src/coreclr/jit/jit.h b/src/coreclr/jit/jit.h index d8cb5cabfb065..8f0ab9b4f7954 100644 --- a/src/coreclr/jit/jit.h +++ b/src/coreclr/jit/jit.h @@ -42,6 +42,9 @@ #if defined(HOST_ARM64) #error Cannot define both HOST_X86 and HOST_ARM64 #endif +#if defined(HOST_LOONGARCH64) +#error Cannot define both HOST_X86 and HOST_LOONGARCH64 +#endif #elif defined(HOST_AMD64) #if defined(HOST_X86) #error Cannot define both HOST_AMD64 and HOST_X86 @@ -52,6 +55,9 @@ #if defined(HOST_ARM64) #error Cannot define both HOST_AMD64 and HOST_ARM64 #endif +#if defined(HOST_LOONGARCH64) +#error Cannot define both HOST_AMD64 and HOST_LOONGARCH64 +#endif #elif defined(HOST_ARM) #if defined(HOST_X86) #error Cannot define both HOST_ARM and HOST_X86 @@ -62,6 +68,9 @@ #if defined(HOST_ARM64) #error Cannot define both HOST_ARM and HOST_ARM64 #endif +#if defined(HOST_LOONGARCH64) +#error Cannot define both HOST_ARM and HOST_LOONGARCH64 +#endif #elif defined(HOST_ARM64) #if defined(HOST_X86) #error Cannot define both HOST_ARM64 and HOST_X86 @@ -72,6 +81,22 @@ #if defined(HOST_ARM) #error Cannot define both HOST_ARM64 and HOST_ARM #endif +#if defined(HOST_LOONGARCH64) +#error Cannot define both HOST_ARM64 and HOST_LOONGARCH64 +#endif +#elif defined(HOST_LOONGARCH64) +#if defined(HOST_X86) +#error Cannot define both HOST_LOONGARCH64 and HOST_X86 +#endif +#if defined(HOST_AMD64) +#error Cannot define both HOST_LOONGARCH64 and HOST_AMD64 +#endif +#if defined(HOST_ARM) +#error Cannot define both HOST_LOONGARCH64 and HOST_ARM +#endif +#if defined(HOST_ARM64) +#error Cannot define both HOST_LOONGARCH64 and HOST_ARM64 +#endif #else #error Unsupported or unset host architecture #endif @@ -86,6 +111,9 @@ #if defined(TARGET_ARM64) #error Cannot define both TARGET_X86 and TARGET_ARM64 #endif +#if defined(TARGET_LOONGARCH64) +#error Cannot define both TARGET_X86 and TARGET_LOONGARCH64 +#endif #elif defined(TARGET_AMD64) #if defined(TARGET_X86) #error Cannot define both TARGET_AMD64 and TARGET_X86 @@ -96,6 +124,9 @@ #if defined(TARGET_ARM64) #error Cannot define both TARGET_AMD64 and TARGET_ARM64 #endif +#if defined(TARGET_LOONGARCH64) +#error Cannot define both TARGET_AMD64 and TARGET_LOONGARCH64 +#endif #elif defined(TARGET_ARM) #if defined(TARGET_X86) #error Cannot define both TARGET_ARM and TARGET_X86 @@ -106,6 +137,9 @@ #if defined(TARGET_ARM64) #error Cannot define both TARGET_ARM and TARGET_ARM64 #endif +#if defined(TARGET_LOONGARCH64) +#error Cannot define both TARGET_ARM and TARGET_LOONGARCH64 +#endif #elif defined(TARGET_ARM64) #if defined(TARGET_X86) #error Cannot define both TARGET_ARM64 and TARGET_X86 @@ -116,6 +150,22 @@ #if defined(TARGET_ARM) #error Cannot define both TARGET_ARM64 and TARGET_ARM #endif +#if defined(TARGET_LOONGARCH64) +#error Cannot define both TARGET_ARM64 and TARGET_LOONGARCH64 +#endif +#elif defined(TARGET_LOONGARCH64) +#if defined(TARGET_X86) +#error Cannot define both TARGET_LOONGARCH64 and TARGET_X86 +#endif +#if defined(TARGET_AMD64) +#error Cannot define both TARGET_LOONGARCH64 and TARGET_AMD64 +#endif +#if defined(TARGET_ARM) +#error Cannot define both TARGET_LOONGARCH64 and TARGET_ARM +#endif +#if defined(TARGET_ARM64) +#error Cannot define both TARGET_LOONGARCH64 and TARGET_ARM64 +#endif #else #error Unsupported or unset target architecture #endif @@ -163,6 +213,8 @@ #define IMAGE_FILE_MACHINE_TARGET IMAGE_FILE_MACHINE_ARMNT #elif defined(TARGET_ARM64) #define IMAGE_FILE_MACHINE_TARGET IMAGE_FILE_MACHINE_ARM64 // 0xAA64 +#elif defined(TARGET_LOONGARCH64) +#define IMAGE_FILE_MACHINE_TARGET IMAGE_FILE_MACHINE_LOONGARCH64 // 0xDD64 #else #error Unsupported or unset target architecture #endif @@ -207,6 +259,14 @@ #define UNIX_AMD64_ABI_ONLY(x) #endif // defined(UNIX_AMD64_ABI) +#if defined(TARGET_LOONGARCH64) +#define UNIX_LOONGARCH64_ONLY_ARG(x) , x +#define UNIX_LOONGARCH64_ONLY(x) x +#else // !TARGET_LOONGARCH64 +#define UNIX_LOONGARCH64_ONLY_ARG(x) +#define UNIX_LOONGARCH64_ONLY(x) +#endif // TARGET_LOONGARCH64 + #if defined(DEBUG) #define DEBUG_ARG_SLOTS #endif @@ -224,7 +284,7 @@ #define DEBUG_ARG_SLOTS_ASSERT(x) #endif -#if defined(UNIX_AMD64_ABI) || !defined(TARGET_64BIT) || defined(TARGET_ARM64) +#if defined(UNIX_AMD64_ABI) || !defined(TARGET_64BIT) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) #define FEATURE_PUT_STRUCT_ARG_STK 1 #endif @@ -236,7 +296,7 @@ #define UNIX_AMD64_ABI_ONLY(x) #endif // defined(UNIX_AMD64_ABI) -#if defined(UNIX_AMD64_ABI) || defined(TARGET_ARM64) +#if defined(UNIX_AMD64_ABI) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) #define MULTIREG_HAS_SECOND_GC_RET 1 #define MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(x) , x #define MULTIREG_HAS_SECOND_GC_RET_ONLY(x) x @@ -249,7 +309,7 @@ // Arm64 Windows supports FEATURE_ARG_SPLIT, note this is different from // the official Arm64 ABI. // Case: splitting 16 byte struct between x7 and stack -#if defined(TARGET_ARM) || defined(TARGET_ARM64) +#if defined(TARGET_ARM) || defined(TARGET_ARM64)/* || defined(TARGET_LOONGARCH64)*/ #define FEATURE_ARG_SPLIT 1 #else #define FEATURE_ARG_SPLIT 0 diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 5f9fa62008b30..7478529966657 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -343,6 +343,11 @@ CONFIG_INTEGER(EnableArm64Sm4, W("EnableArm64Sm4"), 1) CONFIG_INTEGER(EnableArm64Sve, W("EnableArm64Sve"), 1) #endif // defined(TARGET_ARM64) +#if defined(TARGET_LOONGARCH64) +//TODO: should add LOONGARCH64's features here. +CONFIG_INTEGER(EnableHWIntrinsic, W("EnableHWIntrinsic"), 0) +#endif // defined(TARGET_LOONGARCH64) + // clang-format on #ifdef FEATURE_SIMD @@ -571,6 +576,18 @@ CONFIG_STRING(JitFunctionFile, W("JitFunctionFile")) // of the frame) CONFIG_INTEGER(JitSaveFpLrWithCalleeSavedRegisters, W("JitSaveFpLrWithCalleeSavedRegisters"), 0) #endif // defined(TARGET_ARM64) + +#if defined(TARGET_LOONGARCH64) +// JitSaveFpRaWithCalleeSavedRegisters: +// 0: use default frame type decision +// 1: disable frames that save FP/RA registers with the callee-saved registers (at the top of the frame) +// 2: force all frames to use the frame types that save FP/RA registers with the callee-saved registers (at the top +// of the frame) +CONFIG_INTEGER(JitSaveFpRaWithCalleeSavedRegisters, W("JitSaveFpRaWithCalleeSavedRegisters"), 0) + +// Disable emitDispIns by default +CONFIG_INTEGER(JitDispIns, W("JitDispIns"), 0) +#endif // defined(TARGET_LOONGARCH64) #endif // DEBUG CONFIG_INTEGER(JitEnregStructLocals, W("JitEnregStructLocals"), 1) // Allow to enregister locals with struct type. diff --git a/src/coreclr/jit/jiteh.cpp b/src/coreclr/jit/jiteh.cpp index 5cf2d7bfb8e68..be8f1ec1fb903 100644 --- a/src/coreclr/jit/jiteh.cpp +++ b/src/coreclr/jit/jiteh.cpp @@ -888,7 +888,7 @@ unsigned Compiler::ehGetCallFinallyRegionIndex(unsigned finallyIndex, bool* inTr assert(finallyIndex != EHblkDsc::NO_ENCLOSING_INDEX); assert(ehGetDsc(finallyIndex)->HasFinallyHandler()); -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) return ehGetDsc(finallyIndex)->ebdGetEnclosingRegionIndex(inTryRegion); #else *inTryRegion = true; diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 4d01e63ef13b6..2cd755c49c475 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -643,7 +643,12 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un LclVarDsc* varDsc = varDscInfo->varDsc; CORINFO_CLASS_HANDLE typeHnd = nullptr; +#if defined(TARGET_LOONGARCH64) + int flags = 0; + CorInfoTypeWithMod corInfoType = info.compCompHnd->getArgType(&info.compMethodInfo->args, argLst, &typeHnd, &flags); +#else CorInfoTypeWithMod corInfoType = info.compCompHnd->getArgType(&info.compMethodInfo->args, argLst, &typeHnd); +#endif varDsc->lvIsParam = 1; lvaInitVarDsc(varDsc, varDscInfo->varNum, strip(corInfoType), typeHnd, argLst, &info.compMethodInfo->args); @@ -654,7 +659,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un lvaSetClass(varDscInfo->varNum, clsHnd); } - // For ARM, ARM64, and AMD64 varargs, all arguments go in integer registers + // For ARM, ARM64, LOONGARCH64, and AMD64 varargs, all arguments go in integer registers var_types argType = mangleVarArgsType(varDsc->TypeGet()); var_types origArgType = argType; @@ -808,6 +813,24 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un } } #else // !TARGET_ARM + +#if defined(TARGET_LOONGARCH64) + + if (compFeatureArgSplit()) + { + // This does not affect the normal calling convention for LoongArch64!! + if (this->info.compIsVarArgs && argType == TYP_STRUCT) + { + if (varDscInfo->canEnreg(TYP_INT, 1) && // The beginning of the struct can go in a register + !varDscInfo->canEnreg(TYP_INT, cSlots)) // The end of the struct can't fit in a register + { + cSlotsToEnregister = 1; // Force the split + } + } + } + +#endif // defined(TARGET_LOONGARCH64) + #if defined(UNIX_AMD64_ABI) SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; if (varTypeIsStruct(argType)) @@ -868,9 +891,72 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un canPassArgInRegisters = varDscInfo->canEnreg(TYP_I_IMPL, cSlotsToEnregister); } else +#elif defined(TARGET_LOONGARCH64) + var_types arg1_Type = TYP_UNKNOWN; + var_types arg2_Type = TYP_UNKNOWN; + if (flags & 0xf) + { + assert(varTypeIsStruct(argType)); + int float_num = 0; + if (flags == 1) + { + assert(argSize <= 8); + assert(varDsc->lvExactSize <= argSize); + float_num = 1; + + arg1_Type = (varDsc->lvExactSize == 8) ? TYP_DOUBLE : TYP_FLOAT; + canPassArgInRegisters = varDscInfo->canEnreg(arg1_Type, 1); + } + else if (flags & 0x8) + { + arg1_Type = (flags & 0x10) ? TYP_DOUBLE : TYP_FLOAT; + arg2_Type = (flags & 0x20) ? TYP_DOUBLE : TYP_FLOAT; + float_num = 2; + canPassArgInRegisters = varDscInfo->canEnreg(TYP_DOUBLE, 2); + } + else if (flags & 2) + { + float_num = 1; + canPassArgInRegisters = varDscInfo->canEnreg(TYP_DOUBLE, 1); + canPassArgInRegisters = canPassArgInRegisters && varDscInfo->canEnreg(TYP_I_IMPL, 1); + + arg1_Type = (flags & 0x10) ? TYP_DOUBLE : TYP_FLOAT; + arg2_Type = (flags & 0x20) ? TYP_LONG : TYP_INT; + } + else if (flags & 4) + { + float_num = 1; + canPassArgInRegisters = varDscInfo->canEnreg(TYP_DOUBLE, 1); + canPassArgInRegisters = canPassArgInRegisters && varDscInfo->canEnreg(TYP_I_IMPL, 1); + + arg1_Type = (flags & 0x10) ? TYP_LONG : TYP_INT; + arg2_Type = (flags & 0x20) ? TYP_DOUBLE : TYP_FLOAT; + } + + if (!canPassArgInRegisters) + { + assert(float_num > 0); + canPassArgInRegisters = varDscInfo->canEnreg(argType, cSlotsToEnregister); + arg1_Type = TYP_UNKNOWN; + arg2_Type = TYP_UNKNOWN; + } + } + else #endif // defined(UNIX_AMD64_ABI) { canPassArgInRegisters = varDscInfo->canEnreg(argType, cSlotsToEnregister); +#if defined(TARGET_LOONGARCH64) + if (!canPassArgInRegisters && varTypeIsFloating(argType)) + { + canPassArgInRegisters = varDscInfo->canEnreg(TYP_I_IMPL, cSlotsToEnregister); + argType = canPassArgInRegisters ? TYP_I_IMPL : argType; + } + if (!canPassArgInRegisters && (cSlots > 1)) + { + canPassArgInRegisters = varDscInfo->canEnreg(TYP_I_IMPL, 1); + arg1_Type = canPassArgInRegisters ? TYP_I_IMPL : TYP_UNKNOWN; + } +#endif } if (canPassArgInRegisters) @@ -900,7 +986,13 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un } } else -#endif // defined(UNIX_AMD64_ABI) +#elif defined(TARGET_LOONGARCH64) + if (arg1_Type != TYP_UNKNOWN) + { + firstAllocatedRegArgNum = varDscInfo->allocRegArg(arg1_Type, 1); + } + else +#endif // defined(TARGET_LOONGARCH64) { firstAllocatedRegArgNum = varDscInfo->allocRegArg(argType, cSlots); } @@ -948,6 +1040,40 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un varDsc->SetOtherArgReg(genMapRegArgNumToRegNum(secondAllocatedRegArgNum, secondEightByteType)); } } +#elif defined(TARGET_LOONGARCH64) + if (argType == TYP_STRUCT) + { + if (arg1_Type != TYP_UNKNOWN) + { + varDsc->SetArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, arg1_Type)); + varDsc->lvIs4Field1 = (int)emitActualTypeSize(arg1_Type) == 4 ? 1 : 0; + if (arg2_Type != TYP_UNKNOWN) + { + firstAllocatedRegArgNum = varDscInfo->allocRegArg(arg2_Type, 1); + varDsc->SetOtherArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, arg2_Type)); + varDsc->lvIs4Field2 = (int)emitActualTypeSize(arg2_Type) == 4 ? 1 : 0; + varDscInfo->hasMultiSlotStruct = true; + } + else if (cSlots > 1) + { + varDsc->lvIsSplit = 1; + //varDsc->lvFldOffset = 0; + varDsc->SetOtherArgReg(REG_STK); + varDscInfo->hasMultiSlotStruct = true; + varDscInfo->setAllRegArgUsed(arg1_Type); + varDscInfo->stackArgSize += TARGET_POINTER_SIZE; + } + } + else + { + varDsc->SetArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, TYP_I_IMPL)); + if (cSlots == 2) + { + varDsc->SetOtherArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum + 1, TYP_I_IMPL)); + varDscInfo->hasMultiSlotStruct = true; + } + } + } #else // ARM32 if (varTypeIsStruct(argType)) { @@ -1064,6 +1190,10 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un // varDscInfo->setAllRegArgUsed(argType); +#elif defined(TARGET_LOONGARCH64) + + varDscInfo->setAllRegArgUsed(argType); + #endif // TARGET_XXX #if FEATURE_FASTTAILCALL @@ -1383,7 +1513,12 @@ void Compiler::lvaInitVarDsc(LclVarDsc* varDsc, #if defined(TARGET_AMD64) || defined(TARGET_ARM64) varDsc->lvIsImplicitByRef = 0; -#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) +#elif defined(TARGET_LOONGARCH64) + varDsc->lvIsImplicitByRef = 0; + varDsc->lvIs4Field1 = 0; + varDsc->lvIs4Field2 = 0; + varDsc->lvIsSplit = 0; +#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) // Set the lvType (before this point it is TYP_UNDEF). @@ -2020,7 +2155,7 @@ bool Compiler::StructPromotionHelper::ShouldPromoteStructVar(unsigned lclNum) JITDUMP("Not promoting multi-reg returned struct local V%02u with holes.\n", lclNum); shouldPromote = false; } -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_ARM) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) // TODO-PERF - Only do this when the LclVar is used in an argument context // TODO-ARM64 - HFA support should also eliminate the need for this. // TODO-ARM32 - HFA support should also eliminate the need for this. @@ -2037,7 +2172,7 @@ bool Compiler::StructPromotionHelper::ShouldPromoteStructVar(unsigned lclNum) lclNum, structPromotionInfo.fieldCnt); shouldPromote = false; } -#endif // TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM +#endif // TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM || TARGET_LOONGARCH64 else if (varDsc->lvIsParam && !compiler->lvaIsImplicitByRefLocal(lclNum) && !varDsc->lvIsHfa()) { #if FEATURE_MULTIREG_STRUCT_PROMOTE @@ -2352,7 +2487,7 @@ void Compiler::StructPromotionHelper::PromoteStructVar(unsigned lclNum) compiler->compLongUsed = true; } -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) // Reset the implicitByRef flag. fieldVarDsc->lvIsImplicitByRef = 0; @@ -2668,7 +2803,7 @@ bool Compiler::lvaIsMultiregStruct(LclVarDsc* varDsc, bool isVarArg) return true; } -#if defined(UNIX_AMD64_ABI) || defined(TARGET_ARM64) +#if defined(UNIX_AMD64_ABI) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) if (howToPassStruct == SPK_ByValue) { assert(type == TYP_STRUCT); @@ -2709,7 +2844,7 @@ void Compiler::lvaSetStruct(unsigned varNum, CORINFO_CLASS_HANDLE typeHnd, bool CorInfoType simdBaseJitType = CORINFO_TYPE_UNDEF; varDsc->lvType = impNormStructType(typeHnd, &simdBaseJitType); -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) // Mark implicit byref struct parameters if (varDsc->lvIsParam && !varDsc->lvIsStructField) { @@ -2722,7 +2857,7 @@ void Compiler::lvaSetStruct(unsigned varNum, CORINFO_CLASS_HANDLE typeHnd, bool varDsc->lvIsImplicitByRef = 1; } } -#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) +#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) #if FEATURE_SIMD if (simdBaseJitType != CORINFO_TYPE_UNDEF) @@ -3702,20 +3837,20 @@ size_t LclVarDsc::lvArgStackSize() const #if defined(WINDOWS_AMD64_ABI) // Structs are either passed by reference or can be passed by value using one pointer stackSize = TARGET_POINTER_SIZE; -#elif defined(TARGET_ARM64) || defined(UNIX_AMD64_ABI) +#elif defined(TARGET_ARM64) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) // lvSize performs a roundup. stackSize = this->lvSize(); -#if defined(TARGET_ARM64) +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) if ((stackSize > TARGET_POINTER_SIZE * 2) && (!this->lvIsHfa())) { // If the size is greater than 16 bytes then it will // be passed by reference. stackSize = TARGET_POINTER_SIZE; } -#endif // defined(TARGET_ARM64) +#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) -#else // !TARGET_ARM64 !WINDOWS_AMD64_ABI !UNIX_AMD64_ABI +#else // !TARGET_ARM64 !WINDOWS_AMD64_ABI !UNIX_AMD64_ABI !TARGET_LOONGARCH64 NYI("Unsupported target."); unreached(); @@ -5224,7 +5359,14 @@ void Compiler::lvaFixVirtualFrameOffsets() JITDUMP("--- delta bump %d for RBP frame\n", codeGen->genTotalFrameSize() - codeGen->genSPtoFPdelta()); delta += codeGen->genTotalFrameSize() - codeGen->genSPtoFPdelta(); } -#endif // TARGET_AMD64 +#elif defined(TARGET_LOONGARCH64) + else + { + // FP is used. + JITDUMP("--- delta bump %d for RBP frame\n", codeGen->genTotalFrameSize() - codeGen->genSPtoFPdelta()); + delta += codeGen->genTotalFrameSize() - codeGen->genSPtoFPdelta(); + } +#endif // TARGET_LOONGARCH64 // For OSR, update the delta to reflect the current policy that // RBP points at the base of the new frame, and RSP is relative to that RBP. @@ -5287,6 +5429,11 @@ void Compiler::lvaFixVirtualFrameOffsets() JITDUMP("-- V%02u was %d, now %d\n", lclNum, varDsc->GetStackOffset(), varDsc->GetStackOffset() + delta); varDsc->SetStackOffset(varDsc->GetStackOffset() + delta); +#if defined(TARGET_LOONGARCH64) + if (varDsc->GetStackOffset() >= delta) + varDsc->SetStackOffset(varDsc->GetStackOffset() + (varDsc->lvIsSplit ? 8 : 0)); +#endif + #if DOUBLE_ALIGN if (genDoubleAlign() && !codeGen->isFramePointerUsed()) { @@ -5343,6 +5490,18 @@ void Compiler::lvaFixVirtualFrameOffsets() { lvaTable[lvaRetAddrVar].SetStackOffset(REGSIZE_BYTES); } +#elif defined(TARGET_LOONGARCH64) + // We normally add alignment below the locals between them and the outgoing + // arg space area. When we store fp/ra at the bottom, however, this will be + // below the alignment. So we should not apply the alignment adjustment to + // them. On LOONGARCH64 it turns out we always store these at +0 and +8 of the FP, + // so instead of dealing with skipping adjustment just for them we just set + // them here always. + assert(codeGen->isFramePointerUsed()); + if (lvaRetAddrVar != BAD_VAR_NUM) + { + lvaTable[lvaRetAddrVar].SetStackOffset(REGSIZE_BYTES); + } #endif } @@ -5740,7 +5899,7 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, * when updating the current offset on the stack */ CLANG_FORMAT_COMMENT_ANCHOR; -#if !defined(TARGET_ARMARCH) +#if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64) #if DEBUG // TODO: Remove this noway_assert and replace occurrences of TARGET_POINTER_SIZE with argSize // Also investigate why we are incrementing argOffs for X86 as this seems incorrect @@ -5848,6 +6007,18 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, varDsc->SetStackOffset(argOffs); argOffs += argSize; } + +#elif defined(TARGET_LOONGARCH64) + //if (compFeatureArgSplit() && this->info.compIsVarArgs) + //{//TODO: should confirm for "info.compIsVarArgs". + // if (varDsc->lvType == TYP_STRUCT && varDsc->lvOtherArgReg >= MAX_REG_ARG && varDsc->lvOtherArgReg != REG_NA) + // { + // // This is a split struct. It will account for an extra (8 bytes) + // // of alignment. + // varDsc->lvStkOffs += TARGET_POINTER_SIZE; + // argOffs += TARGET_POINTER_SIZE; + // } + //} #else // TARGET* #error Unsupported or unset target architecture #endif // TARGET* @@ -6075,6 +6246,30 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() } #endif // TARGET_XARCH +#ifdef TARGET_LOONGARCH64 + // Decide where to save FP and RA registers. We store FP/RA registers at the bottom of the frame if there is + // a frame pointer used (so we get positive offsets from the frame pointer to access locals), but not if we + // need a GS cookie AND localloc is used, since we need the GS cookie to protect the saved return value, + // and also the saved frame pointer. See CodeGen::genPushCalleeSavedRegisters() for more details about the + // frame types. Since saving FP/RA at high addresses is a relatively rare case, force using it during stress. + // (It should be legal to use these frame types for every frame). + + if (opts.compJitSaveFpRaWithCalleeSavedRegisters == 0) + { + // Default configuration + codeGen->SetSaveFpRaWithAllCalleeSavedRegisters((getNeedsGSSecurityCookie() && compLocallocUsed) || + compStressCompile(STRESS_GENERIC_VARN, 20)); + } + else if (opts.compJitSaveFpRaWithCalleeSavedRegisters == 1) + { + codeGen->SetSaveFpRaWithAllCalleeSavedRegisters(false); // Disable using new frames + } + else if (opts.compJitSaveFpRaWithCalleeSavedRegisters == 2) + { + codeGen->SetSaveFpRaWithAllCalleeSavedRegisters(true); // Force using new frames + } +#endif // TARGET_LOONGARCH64 + int preSpillSize = 0; bool mustDoubleAlign = false; @@ -6116,7 +6311,29 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() stkOffs -= (compCalleeRegsPushed - 2) * REGSIZE_BYTES; } -#else // !TARGET_ARM64 +#elif defined(TARGET_LOONGARCH64) + + int initialStkOffs = 0; + if (info.compIsVarArgs) + { + // For varargs we always save all of the integer register arguments + // so that they are contiguous with the incoming stack arguments. + initialStkOffs = MAX_REG_ARG * REGSIZE_BYTES; + stkOffs -= initialStkOffs; + } + if (codeGen->IsSaveFpRaWithAllCalleeSavedRegisters() || + !isFramePointerUsed()) // Note that currently we always have a frame pointer + { + stkOffs -= compCalleeRegsPushed * REGSIZE_BYTES; + } + else + { + // Subtract off FP and RA. + assert(compCalleeRegsPushed >= 2); + stkOffs -= (compCalleeRegsPushed - 2) * REGSIZE_BYTES; + } + +#else // !TARGET_LOONGARCH64 #ifdef TARGET_ARM // On ARM32 LR is part of the pushed registers and is always stored at the // top. @@ -6127,7 +6344,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() #endif stkOffs -= compCalleeRegsPushed * REGSIZE_BYTES; -#endif // !TARGET_ARM64 +#endif // !TARGET_LOONGARCH64 compLclFrameSize = 0; @@ -6192,6 +6409,17 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() } #endif // FEATURE_EH_FUNCLETS && defined(TARGET_ARMARCH) +#if defined(FEATURE_EH_FUNCLETS) && defined(TARGET_LOONGARCH64) + if (lvaPSPSym != BAD_VAR_NUM) + { + // If we need a PSPSym, allocate it first, before anything else, including + // padding (so we can avoid computing the same padding in the funclet + // frame). Note that there is no special padding requirement for the PSPSym. + noway_assert(codeGen->isFramePointerUsed()); // We need an explicit frame pointer + stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaPSPSym, TARGET_POINTER_SIZE, stkOffs); + } +#endif // FEATURE_EH_FUNCLETS || TARGET_LOONGARCH64 + if (mustDoubleAlign) { if (lvaDoneFrameLayout != FINAL_FRAME_LAYOUT) @@ -6591,6 +6819,15 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() } #endif +#ifdef TARGET_LOONGARCH64 + if (info.compIsVarArgs && varDsc->GetArgReg() != theFixedRetBuffArgNum()) + {//TODO: add VarArgs for LOONGARCH64. + // Stack offset to parameters should point to home area which will be preallocated. + varDsc->SetStackOffset(-initialStkOffs + genMapIntRegNumToRegArgNum(varDsc->GetArgReg()) * REGSIZE_BYTES); + continue; + } +#endif + #endif // !TARGET_AMD64 } @@ -6700,6 +6937,19 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() } #endif // TARGET_ARM #endif // TARGET_ARM64 + +#ifdef TARGET_LOONGARCH64 + // If we have an incoming register argument that has a struct promoted field + // then we need to copy the lvStkOff (the stack home) from the reg arg to the field lclvar + // + if (varDsc->lvIsRegArg && varDsc->lvPromotedStruct()) + { + noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here + + unsigned fieldVarNum = varDsc->lvFieldLclStart; + lvaTable[fieldVarNum].SetStackOffset(varDsc->GetStackOffset()); + } +#endif // TARGET_LOONGARCH64 } } @@ -6804,6 +7054,15 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() } #endif // TARGET_ARM64 +#ifdef TARGET_LOONGARCH64 + if (!codeGen->IsSaveFpRaWithAllCalleeSavedRegisters() && + isFramePointerUsed()) // Note that currently we always have a frame pointer + { + // Create space for saving FP and RA. + stkOffs -= 2 * REGSIZE_BYTES; + } +#endif // TARGET_LOONGARCH64 + #if FEATURE_FIXED_OUT_ARGS if (lvaOutgoingArgSpaceSize > 0) { @@ -6839,6 +7098,13 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() pushedCount += 1; // pushed PC (return address) #endif +#ifdef TARGET_LOONGARCH64 + if (info.compIsVarArgs) + { + pushedCount += MAX_REG_ARG; + } +#endif + noway_assert(compLclFrameSize + originalFrameSize == (unsigned)-(stkOffs + (pushedCount * (int)TARGET_POINTER_SIZE))); } @@ -7092,6 +7358,34 @@ void Compiler::lvaAlignFrame() } } +#elif defined(TARGET_LOONGARCH64) + + // First, align up to 8. + if ((compLclFrameSize % 8) != 0) + { + lvaIncrementFrameSize(8 - (compLclFrameSize % 8)); + } + else if (lvaDoneFrameLayout != FINAL_FRAME_LAYOUT) + { + // If we are not doing final layout, we don't know the exact value of compLclFrameSize + // and thus do not know how much we will need to add in order to be aligned. + // We add 8 so compLclFrameSize is still a multiple of 8. + lvaIncrementFrameSize(8); + } + assert((compLclFrameSize % 8) == 0); + + // Ensure that the stack is always 16-byte aligned by grabbing an unused 16-byte + // if needed. + bool regPushedCountAligned = (compCalleeRegsPushed % (16 / REGSIZE_BYTES)) != 0; + bool lclFrameSizeAligned = (compLclFrameSize % 16) != 0; + + // If this isn't the final frame layout, assume we have to push an extra QWORD + // Just so the offsets are true upper limits. + if ((lvaDoneFrameLayout != FINAL_FRAME_LAYOUT) || (regPushedCountAligned != lclFrameSizeAligned)) + { + lvaIncrementFrameSize(REGSIZE_BYTES); + } + #else NYI("TARGET specific lvaAlignFrame"); #endif // !TARGET_AMD64 @@ -7666,6 +7960,11 @@ unsigned Compiler::lvaFrameSize(FrameLayoutState curState) { compCalleeFPRegsSavedMask = RBM_NONE; } +#elif defined(TARGET_LOONGARCH64) + if (compFloatingPointUsed) + compCalleeRegsPushed += CNT_CALLEE_SAVED_FLOAT; + + compCalleeRegsPushed++; // we always push RA. See genPushCalleeSavedRegisters #endif #if DOUBLE_ALIGN @@ -7696,6 +7995,14 @@ unsigned Compiler::lvaFrameSize(FrameLayoutState curState) calleeSavedRegMaxSz += REGSIZE_BYTES; // we always push LR. See genPushCalleeSavedRegisters #endif +#if defined(TARGET_LOONGARCH64) + if (compFloatingPointUsed) + { + calleeSavedRegMaxSz += CALLEE_SAVED_FLOAT_MAXSZ; + } + calleeSavedRegMaxSz += REGSIZE_BYTES; // we always push RA. See genPushCalleeSavedRegisters +#endif + result = compLclFrameSize + calleeSavedRegMaxSz; return result; } @@ -7988,6 +8295,13 @@ Compiler::fgWalkResult Compiler::lvaStressLclFldCB(GenTree** pTree, fgWalkData* padding = roundUp(padding, alignment); #endif // TARGET_ARMARCH +#ifdef TARGET_LOONGARCH64 + unsigned alignment = 1; + pComp->codeGen->InferOpSizeAlign(lcl, &alignment); + alignment = roundUp(alignment, TARGET_POINTER_SIZE); + padding = roundUp(padding, alignment); +#endif // TARGET_LOONGARCH64 + // Change the variable to a TYP_BLK if (varType != TYP_BLK) { diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 2467a00accdaa..c484c0fd0c9b1 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -197,7 +197,7 @@ GenTree* Lowering::LowerNode(GenTree* node) LowerCast(node); break; -#if defined(TARGET_XARCH) || defined(TARGET_ARM64) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) case GT_ARR_BOUNDS_CHECK: #ifdef FEATURE_SIMD case GT_SIMD_CHK: @@ -230,7 +230,7 @@ GenTree* Lowering::LowerNode(GenTree* node) case GT_LSH: case GT_RSH: case GT_RSZ: -#if defined(TARGET_XARCH) || defined(TARGET_ARM64) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) LowerShift(node->AsOp()); #else ContainCheckShiftRotate(node->AsOp()); @@ -310,7 +310,7 @@ GenTree* Lowering::LowerNode(GenTree* node) LowerStoreLocCommon(node->AsLclVarCommon()); break; -#if defined(TARGET_ARM64) +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) case GT_CMPXCHG: CheckImmedAndMakeContained(node, node->AsCmpXchg()->gtOpComparand); break; @@ -338,7 +338,7 @@ GenTree* Lowering::LowerNode(GenTree* node) break; #endif -#ifndef TARGET_ARMARCH +#if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64) // TODO-ARMARCH-CQ: We should contain this as long as the offset fits. case GT_OBJ: if (node->AsObj()->Addr()->OperIsLocalAddr()) @@ -1040,6 +1040,15 @@ GenTree* Lowering::NewPutArg(GenTreeCall* call, GenTree* arg, fgArgTabEntry* inf MakeSrcContained(arg, arg->AsObj()->Addr()); } } +#elif defined(TARGET_LOONGARCH64) + if (type == TYP_STRUCT) + { + arg->SetContained(); + if ((arg->OperGet() == GT_OBJ) && (arg->AsObj()->Addr()->OperGet() == GT_LCL_VAR_ADDR)) + { + MakeSrcContained(arg, arg->AsObj()->Addr()); + } + } #endif #if FEATURE_ARG_SPLIT @@ -1423,6 +1432,30 @@ void Lowering::LowerArg(GenTreeCall* call, GenTree** ppArg) } #endif // TARGET_ARMARCH +#if defined(TARGET_LOONGARCH64) + if (call->IsVarargs() /*|| comp->opts.compUseSoftFP*/) + { + // For vararg call or on armel, reg args should be all integer. + // Insert copies as needed to move float value to integer register. + GenTree* newNode = LowerFloatArg(ppArg, info); + if (newNode != nullptr) + { + type = newNode->TypeGet(); + } + } + else + { + GenTree* putArg = NewPutArg(call, arg, info, type); + + // In the case of register passable struct (in one or two registers) + // the NewPutArg returns a new node (GT_PUTARG_REG or a GT_FIELD_LIST with two GT_PUTARG_REGs.) + // If an extra node is returned, splice it in the right place in the tree. + if (arg != putArg) + { + ReplaceArgWithPutArgOrBitcast(ppArg, putArg); + } + } +#else GenTree* putArg = NewPutArg(call, arg, info, type); // In the case of register passable struct (in one or two registers) @@ -1432,10 +1465,11 @@ void Lowering::LowerArg(GenTreeCall* call, GenTree** ppArg) { ReplaceArgWithPutArgOrBitcast(ppArg, putArg); } +#endif } } -#ifdef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) //------------------------------------------------------------------------ // LowerFloatArg: Lower float call arguments on the arm platform. // @@ -2498,7 +2532,8 @@ GenTree* Lowering::OptimizeConstCompare(GenTree* cmp) { assert(cmp->gtGetOp2()->IsIntegralConst()); -#if defined(TARGET_XARCH) || defined(TARGET_ARM64) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64)// || defined(TARGET_LOONGARCH64) + ////TODO: add optimize for LoongArch64. GenTree* op1 = cmp->gtGetOp1(); GenTreeIntCon* op2 = cmp->gtGetOp2()->AsIntCon(); ssize_t op2Value = op2->IconValue(); @@ -2867,7 +2902,53 @@ GenTree* Lowering::LowerJTrue(GenTreeOp* jtrue) return nullptr; } } -#endif // TARGET_ARM64 +#elif defined(TARGET_LOONGARCH64) + GenTree* relop = jtrue->gtGetOp1(); + GenTree* relopOp1 = relop->AsOp()->gtGetOp1(); + GenTree* relopOp2 = relop->AsOp()->gtGetOp2(); + + if (relopOp1->IsCnsIntOrI() && relopOp2->IsCnsIntOrI()) + { + relopOp1->SetContained(); + relopOp2->SetContained(); + } + else if (relop->gtNext == jtrue) + { + if (relopOp2->IsCnsIntOrI()) + { + if (relop->OperIs(GT_EQ, GT_NE)) + { + + // Codegen will use beq or bne in codegen. + GenTreeFlags flags = relop->OperIs(GT_EQ) ? GTF_JCMP_EQ : GTF_EMPTY; + + relop->SetOper(GT_JCMP); + relop->gtFlags &= ~(GTF_JCMP_TST | GTF_JCMP_EQ); + relop->gtFlags |= flags; + relop->gtType = TYP_VOID; + + relopOp2->SetContained(); + + BlockRange().Remove(jtrue); + + assert(relop->gtNext == nullptr); + return nullptr; + } + } + else if (relopOp1->IsCnsIntOrI()) + { + relopOp1->SetContained(); + } + } + else if (relopOp1->IsCnsIntOrI()) + { + relopOp1->SetContained(); + } + else if (relopOp2->IsCnsIntOrI()) + { + relopOp2->SetContained(); + } +#endif // TARGET_LOONGARCH64 ContainCheckJTrue(jtrue); @@ -5106,7 +5187,7 @@ GenTree* Lowering::LowerAdd(GenTreeOp* node) return next; } -#ifndef TARGET_ARMARCH +#if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64) if (BlockRange().TryGetUse(node, &use)) { // If this is a child of an indir, let the parent handle it. @@ -5117,7 +5198,7 @@ GenTree* Lowering::LowerAdd(GenTreeOp* node) TryCreateAddrMode(node, false); } } -#endif // !TARGET_ARMARCH +#endif // !TARGET_ARMARCH && !TARGET_LOONGARCH64 } if (node->OperIs(GT_ADD)) @@ -5283,7 +5364,7 @@ bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod) } // TODO-ARM-CQ: Currently there's no GT_MULHI for ARM32 -#if defined(TARGET_XARCH) || defined(TARGET_ARM64) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) if (!comp->opts.MinOpts() && (divisorValue >= 3)) { size_t magic; @@ -5363,7 +5444,7 @@ bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod) GenTree* firstNode = nullptr; GenTree* adjustedDividend = dividend; -#ifdef TARGET_ARM64 +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) // On ARM64 we will use a 32x32->64 bit multiply instead of a 64x64->64 one. bool widenToNativeIntForMul = (type != TYP_I_IMPL) && !simpleMul; #else @@ -5417,7 +5498,7 @@ bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod) } else { -#ifdef TARGET_ARM64 +#if defined(TARGET_ARM64) //|| defined(TARGET_LOONGARCH64) // 64-bit MUL is more expensive than UMULL on ARM64. genTreeOps mulOper = simpleMul ? GT_MUL_LONG : GT_MULHI; #else @@ -5509,7 +5590,11 @@ GenTree* Lowering::LowerConstIntDivOrMod(GenTree* node) GenTree* dividend = divMod->gtGetOp1(); GenTree* divisor = divMod->gtGetOp2(); +#if defined(TARGET_LOONGARCH64) + const var_types type = genActualType(divMod->TypeGet()); +#else const var_types type = divMod->TypeGet(); +#endif assert((type == TYP_INT) || (type == TYP_LONG)); #if defined(USE_HELPERS_FOR_INT_DIV) @@ -5571,7 +5656,7 @@ GenTree* Lowering::LowerConstIntDivOrMod(GenTree* node) return nullptr; } -#if defined(TARGET_XARCH) || defined(TARGET_ARM64) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) ssize_t magic; int shift; diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index ed0ecc5661970..d5835f183e3ba 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -157,7 +157,7 @@ class Lowering final : public Phase void ReplaceArgWithPutArgOrBitcast(GenTree** ppChild, GenTree* newNode); GenTree* NewPutArg(GenTreeCall* call, GenTree* arg, fgArgTabEntry* info, var_types type); void LowerArg(GenTreeCall* call, GenTree** ppTree); -#ifdef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) GenTree* LowerFloatArg(GenTree** pArg, fgArgTabEntry* info); GenTree* LowerFloatArgReg(GenTree* arg, regNumber regNum); #endif diff --git a/src/coreclr/jit/lowerloongarch64.cpp b/src/coreclr/jit/lowerloongarch64.cpp new file mode 100644 index 0000000000000..0b77c8a27d3b8 --- /dev/null +++ b/src/coreclr/jit/lowerloongarch64.cpp @@ -0,0 +1,1693 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +// Copyright (c) Loongson Technology. All rights reserved. + +/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX Lowering for LOONGARCH64 common code XX +XX XX +XX This encapsulates common logic for lowering trees for the LOONGARCH64 XX +XX architectures. For a more detailed view of what is lowering, please XX +XX take a look at Lower.cpp XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ + +#include "jitpch.h" +#ifdef _MSC_VER +#pragma hdrstop +#endif + +#ifdef TARGET_LOONGARCH64 // This file is ONLY used for LOONGARCH64 architectures + +#include "jit.h" +#include "sideeffects.h" +#include "lower.h" +#include "lsra.h" + +#ifdef FEATURE_HW_INTRINSICS +#include "hwintrinsic.h" +#endif + +//------------------------------------------------------------------------ +// IsCallTargetInRange: Can a call target address be encoded in-place? +// +// Return Value: +// True if the addr fits into the range. +// +bool Lowering::IsCallTargetInRange(void* addr) +{ + ////TODO for LOONGARCH64: should amend for optimize! + //assert(!"unimplemented on LOONGARCH yet"); + //return comp->codeGen->validImmForBAL((ssize_t)addr); + return false; +} + +//------------------------------------------------------------------------ +// IsContainableImmed: Is an immediate encodable in-place? +// +// Return Value: +// True if the immediate can be folded into an instruction, +// for example small enough and non-relocatable. +// +bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) const +{ + if (!varTypeIsFloating(parentNode->TypeGet())) + { + // Make sure we have an actual immediate + if (!childNode->IsCnsIntOrI()) + return false; + if (childNode->AsIntCon()->ImmedValNeedsReloc(comp)) + return false; + + // TODO-CrossBitness: we wouldn't need the cast below if GenTreeIntCon::gtIconVal had target_ssize_t type. + target_ssize_t immVal = (target_ssize_t)childNode->AsIntCon()->gtIconVal; + emitAttr attr = emitActualTypeSize(childNode->TypeGet()); + emitAttr size = EA_SIZE(attr); + + switch (parentNode->OperGet()) + { + case GT_ADD: + return comp->compOpportunisticallyDependsOn(InstructionSet_Atomics) ? false + : ((-2048 <= immVal) && (immVal <= 2047)); + break; + case GT_CMPXCHG: + case GT_LOCKADD: + case GT_XADD: + assert(!"unimplemented on LOONGARCH yet"); + break; + + case GT_EQ: + case GT_NE: + case GT_LT: + case GT_LE: + case GT_GE: + case GT_GT: +#ifdef FEATURE_SIMD + case GT_SIMD_CHK: +#endif +#ifdef FEATURE_HW_INTRINSICS + case GT_HW_INTRINSIC_CHK: +#endif + return ((-32768 <= immVal) && (immVal <= 32767)); + case GT_AND: + case GT_OR: + case GT_XOR: + return ((-2048 <= immVal) && (immVal <= 2047)); + case GT_JCMP: + assert(((parentNode->gtFlags & GTF_JCMP_TST) == 0) ? (immVal == 0) : isPow2(immVal)); + return true; + + case GT_STORE_LCL_FLD: + case GT_STORE_LCL_VAR: + if (immVal == 0) + return true; + break; + + default: + break; + } + } + + return false; +} + +//------------------------------------------------------------------------ +// LowerMul: Lower a GT_MUL/GT_MULHI/GT_MUL_LONG node. +// +// TODO: For LoongArch64 recognized GT_MULs that can be turned into GT_MUL_LONGs, as +// those are cheaper. Performs contaiment checks. +// +// Arguments: +// mul - The node to lower +// +// Return Value: +// The next node to lower. +// +GenTree* Lowering::LowerMul(GenTreeOp* mul) +{ + assert(mul->OperIsMul()); + + //if (comp->opts.OptimizationEnabled() && mul->OperIs(GT_MUL) && mul->IsValidLongMul()) + //{ + // GenTreeCast* op1 = mul->gtGetOp1()->AsCast(); + // GenTree* op2 = mul->gtGetOp2(); + + // mul->ClearOverflow(); + // mul->ClearUnsigned(); + // if (op1->IsUnsigned()) + // { + // mul->SetUnsigned(); + // } + + // mul->gtOp1 = op1->CastOp(); + // BlockRange().Remove(op1); + + // if (op2->OperIs(GT_CAST)) + // { + // mul->gtOp2 = op2->AsCast()->CastOp(); + // BlockRange().Remove(op2); + // } + // else + // { + // assert(op2->IsIntegralConst()); + // assert(FitsIn(op2->AsIntConCommon()->IntegralValue())); + + // op2->ChangeType(TYP_INT); + // } + + // mul->ChangeOper(GT_MUL_LONG); + //} + + ContainCheckMul(mul); + + return mul->gtNext; +} + +//------------------------------------------------------------------------ +// LowerStoreLoc: Lower a store of a lclVar +// +// Arguments: +// storeLoc - the local store (GT_STORE_LCL_FLD or GT_STORE_LCL_VAR) +// +// Notes: +// This involves: +// - Widening operations of unsigneds. +// +void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc) +{ + // Try to widen the ops if they are going into a local var. + GenTree* op1 = storeLoc->gtGetOp1(); + if ((storeLoc->gtOper == GT_STORE_LCL_VAR) && (op1->gtOper == GT_CNS_INT)) + { + GenTreeIntCon* con = op1->AsIntCon(); + ssize_t ival = con->gtIconVal; + unsigned varNum = storeLoc->GetLclNum(); + LclVarDsc* varDsc = comp->lvaGetDesc(varNum); + + if (varDsc->lvIsSIMDType()) + { + noway_assert(storeLoc->gtType != TYP_STRUCT); + } + unsigned size = genTypeSize(storeLoc); + // If we are storing a constant into a local variable + // we extend the size of the store here + if ((size < 4) && !varTypeIsStruct(varDsc)) + { + if (!varTypeIsUnsigned(varDsc)) + { + if (genTypeSize(storeLoc) == 1) + { + if ((ival & 0x7f) != ival) + { + ival = ival | 0xffffff00; + } + } + else + { + assert(genTypeSize(storeLoc) == 2); + if ((ival & 0x7fff) != ival) + { + ival = ival | 0xffff0000; + } + } + } + + // A local stack slot is at least 4 bytes in size, regardless of + // what the local var is typed as, so auto-promote it here + // unless it is a field of a promoted struct + // TODO-CQ: if the field is promoted shouldn't we also be able to do this? + if (!varDsc->lvIsStructField) + { + storeLoc->gtType = TYP_INT; + con->SetIconValue(ival); + } + } + } + if (storeLoc->OperIs(GT_STORE_LCL_FLD)) + { + // We should only encounter this for lclVars that are lvDoNotEnregister. + verifyLclFldDoNotEnregister(storeLoc->GetLclNum()); + } + ContainCheckStoreLoc(storeLoc); +} + +//------------------------------------------------------------------------ +// LowerStoreIndir: Determine addressing mode for an indirection, and whether operands are contained. +// +// Arguments: +// node - The indirect store node (GT_STORE_IND) of interest +// +// Return Value: +// None. +// +void Lowering::LowerStoreIndir(GenTreeStoreInd* node) +{ + ContainCheckStoreIndir(node); +} + +//------------------------------------------------------------------------ +// LowerBlockStore: Set block store type +// +// Arguments: +// blkNode - The block store node of interest +// +// Return Value: +// None. +// +void Lowering::LowerBlockStore(GenTreeBlk* blkNode) +{ + GenTree* dstAddr = blkNode->Addr(); + GenTree* src = blkNode->Data(); + unsigned size = blkNode->Size(); + + if (blkNode->OperIsInitBlkOp()) + { + if (src->OperIs(GT_INIT_VAL)) + { + src->SetContained(); + src = src->AsUnOp()->gtGetOp1(); + } + if (blkNode->OperIs(GT_STORE_OBJ)) + { + blkNode->SetOper(GT_STORE_BLK); + } + + if (!blkNode->OperIs(GT_STORE_DYN_BLK) && (size <= INITBLK_UNROLL_LIMIT) && src->OperIs(GT_CNS_INT)) + { + blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll; + + // The fill value of an initblk is interpreted to hold a + // value of (unsigned int8) however a constant of any size + // may practically reside on the evaluation stack. So extract + // the lower byte out of the initVal constant and replicate + // it to a larger constant whose size is sufficient to support + // the largest width store of the desired inline expansion. + + ssize_t fill = src->AsIntCon()->IconValue() & 0xFF; + if (fill == 0) + { + src->SetContained();; + } + else if (size >= REGSIZE_BYTES) + { + fill *= 0x0101010101010101LL; + src->gtType = TYP_LONG; + } + else + { + fill *= 0x01010101; + } + src->AsIntCon()->SetIconValue(fill); + + ContainBlockStoreAddress(blkNode, size, dstAddr); + } + else + { + blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper; + } + } + else + { + assert(src->OperIs(GT_IND, GT_LCL_VAR, GT_LCL_FLD)); + src->SetContained(); + + if (src->OperIs(GT_IND)) + { + // TODO-Cleanup: Make sure that GT_IND lowering didn't mark the source address as contained. + // Sometimes the GT_IND type is a non-struct type and then GT_IND lowering may contain the + // address, not knowing that GT_IND is part of a block op that has containment restrictions. + src->AsIndir()->Addr()->ClearContained(); + } + else if (src->OperIs(GT_LCL_VAR)) + { + // TODO-1stClassStructs: for now we can't work with STORE_BLOCK source in register. + const unsigned srcLclNum = src->AsLclVar()->GetLclNum(); + comp->lvaSetVarDoNotEnregister(srcLclNum DEBUGARG(DoNotEnregisterReason::BlockOp)); + } + if (blkNode->OperIs(GT_STORE_OBJ)) + { + if (!blkNode->AsObj()->GetLayout()->HasGCPtr()) + { + blkNode->SetOper(GT_STORE_BLK); + } + else if (dstAddr->OperIsLocalAddr() && (size <= CPBLK_UNROLL_LIMIT)) + { + // If the size is small enough to unroll then we need to mark the block as non-interruptible + // to actually allow unrolling. The generated code does not report GC references loaded in the + // temporary register(s) used for copying. + blkNode->SetOper(GT_STORE_BLK); + blkNode->gtBlkOpGcUnsafe = true; + } + } + + // CopyObj or CopyBlk + if (blkNode->OperIs(GT_STORE_OBJ)) + { + assert((dstAddr->TypeGet() == TYP_BYREF) || (dstAddr->TypeGet() == TYP_I_IMPL)); + + blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll; + } +//////////////////////////////////////////////////////////////////////////////////////////////////////// + else if (blkNode->OperIs(GT_STORE_BLK) && (size <= CPBLK_UNROLL_LIMIT)) + { + blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll; + + if (src->OperIs(GT_IND)) + { + ContainBlockStoreAddress(blkNode, size, src->AsIndir()->Addr()); + } + + ContainBlockStoreAddress(blkNode, size, dstAddr); + } + else + { + assert(blkNode->OperIs(GT_STORE_BLK, GT_STORE_DYN_BLK)); + + blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper; + } + + } +} + +//------------------------------------------------------------------------ +// LowerCast: Lower GT_CAST(srcType, DstType) nodes. +// +// Arguments: +// tree - GT_CAST node to be lowered +// +// Return Value: +// None. +// +// Notes: +// Casts from float/double to a smaller int type are transformed as follows: +// GT_CAST(float/double, byte) = GT_CAST(GT_CAST(float/double, int32), byte) +// GT_CAST(float/double, sbyte) = GT_CAST(GT_CAST(float/double, int32), sbyte) +// GT_CAST(float/double, int16) = GT_CAST(GT_CAST(double/double, int32), int16) +// GT_CAST(float/double, uint16) = GT_CAST(GT_CAST(double/double, int32), uint16) +// +// Note that for the overflow conversions we still depend on helper calls and +// don't expect to see them here. +// i) GT_CAST(float/double, int type with overflow detection) +// + +void Lowering::ContainBlockStoreAddress(GenTreeBlk* blkNode, unsigned size, GenTree* addr) +{ + assert(blkNode->OperIs(GT_STORE_BLK) && (blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindUnroll)); + assert(size < INT32_MAX); + + if (addr->OperIsLocalAddr()) + { + addr->SetContained(); + return; + } + + if (!addr->OperIs(GT_ADD) || addr->gtOverflow() || !addr->AsOp()->gtGetOp2()->OperIs(GT_CNS_INT)) + { + return; + } + + GenTreeIntCon* offsetNode = addr->AsOp()->gtGetOp2()->AsIntCon(); + ssize_t offset = offsetNode->IconValue(); + + // All integer load/store instructions on both ARM32 and ARM64 support + // offsets in range -255..255. Of course, this is a rather conservative + // check. For example, if the offset and size are a multiple of 8 we + // could allow a combined offset of up to 32760 on ARM64. + if ((offset < -255) || (offset > 255) || (offset + static_cast(size) > 256)) + { + return; + } + + if (!IsSafeToContainMem(blkNode, addr)) + { + return; + } + + BlockRange().Remove(offsetNode); + + addr->ChangeOper(GT_LEA); + addr->AsAddrMode()->SetIndex(nullptr); + addr->AsAddrMode()->SetScale(0); + addr->AsAddrMode()->SetOffset(static_cast(offset)); + addr->SetContained(); +} + + + + +void Lowering::LowerCast(GenTree* tree) +{ + assert(tree->OperGet() == GT_CAST); + + JITDUMP("LowerCast for: "); + DISPNODE(tree); + JITDUMP("\n"); + + GenTree* op1 = tree->AsOp()->gtOp1; + var_types dstType = tree->CastToType(); + var_types srcType = genActualType(op1->TypeGet()); + var_types tmpType = TYP_UNDEF; + + if (varTypeIsFloating(srcType)) + { + noway_assert(!tree->gtOverflow()); + assert(!varTypeIsSmall(dstType)); // fgMorphCast creates intermediate casts when converting from float to small + // int. + } + + assert(!varTypeIsSmall(srcType)); + + if (tmpType != TYP_UNDEF) + { + GenTree* tmp = comp->gtNewCastNode(tmpType, op1, tree->IsUnsigned(), tmpType); + tmp->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT)); + + tree->gtFlags &= ~GTF_UNSIGNED; + tree->AsOp()->gtOp1 = tmp; + BlockRange().InsertAfter(op1, tmp); + } + + // Now determine if we have operands that should be contained. + ContainCheckCast(tree->AsCast()); +} + +//------------------------------------------------------------------------ +// LowerRotate: Lower GT_ROL and GT_ROR nodes. +// +// Arguments: +// tree - the node to lower +// +// Return Value: +// None. +// +void Lowering::LowerRotate(GenTree* tree) +{ + if (tree->OperGet() == GT_ROL) + { + // Convert ROL into ROR. + GenTree* rotatedValue = tree->AsOp()->gtOp1; + unsigned rotatedValueBitSize = genTypeSize(rotatedValue->gtType) * 8; + GenTree* rotateLeftIndexNode = tree->AsOp()->gtOp2; + + if (rotateLeftIndexNode->IsCnsIntOrI()) + { + ssize_t rotateLeftIndex = rotateLeftIndexNode->AsIntCon()->gtIconVal; + ssize_t rotateRightIndex = rotatedValueBitSize - rotateLeftIndex; + rotateLeftIndexNode->AsIntCon()->gtIconVal = rotateRightIndex; + } + else + { + GenTree* tmp = comp->gtNewOperNode(GT_NEG, genActualType(rotateLeftIndexNode->gtType), rotateLeftIndexNode); + BlockRange().InsertAfter(rotateLeftIndexNode, tmp); + tree->AsOp()->gtOp2 = tmp; + } + tree->ChangeOper(GT_ROR); + } + ContainCheckShiftRotate(tree->AsOp()); +} + +#ifdef FEATURE_SIMD +//---------------------------------------------------------------------------------------------- +// Lowering::LowerSIMD: Perform containment analysis for a SIMD intrinsic node. +// +// Arguments: +// simdNode - The SIMD intrinsic node. +// +void Lowering::LowerSIMD(GenTreeSIMD* simdNode) +{ +assert(!"unimplemented on LOONGARCH yet"); +#if 0 + assert(simdNode->gtType != TYP_SIMD32); + + if (simdNode->TypeGet() == TYP_SIMD12) + { + // GT_SIMD node requiring to produce TYP_SIMD12 in fact + // produces a TYP_SIMD16 result + simdNode->gtType = TYP_SIMD16; + } + + ContainCheckSIMD(simdNode); +#endif +} +#endif // FEATURE_SIMD + +#ifdef FEATURE_HW_INTRINSICS +//---------------------------------------------------------------------------------------------- +// Lowering::LowerHWIntrinsic: Perform containment analysis for a hardware intrinsic node. +// +// Arguments: +// node - The hardware intrinsic node. +// +void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) +{ +assert(!"unimplemented on LOONGARCH yet"); +#if 0 + auto intrinsicID = node->gtHWIntrinsicId; + auto intrinsicInfo = HWIntrinsicInfo::lookup(node->gtHWIntrinsicId); + + // + // Lower unsupported Unsigned Compare Zero intrinsics to their trivial transformations + // + // LOONGARCH64 does not support most forms of compare zero for Unsigned values + // This is because some are non-sensical, and the rest are trivial transformations of other operators + // + if ((intrinsicInfo.flags & HWIntrinsicInfo::LowerCmpUZero) && varTypeIsUnsigned(node->gtSIMDBaseType)) + { + auto setAllVector = node->gtSIMDSize > 8 ? NI_LOONGARCH64_SIMD_SetAllVector128 : NI_LOONGARCH64_SIMD_SetAllVector64; + + auto origOp1 = node->gtOp.gtOp1; + + switch (intrinsicID) + { + case NI_LOONGARCH64_SIMD_GT_ZERO: + // Unsigned > 0 ==> !(Unsigned == 0) + node->gtOp.gtOp1 = + comp->gtNewSimdHWIntrinsicNode(node->TypeGet(), node->gtOp.gtOp1, NI_LOONGARCH64_SIMD_EQ_ZERO, + node->gtSIMDBaseType, node->gtSIMDSize); + node->gtHWIntrinsicId = NI_LOONGARCH64_SIMD_BitwiseNot; + BlockRange().InsertBefore(node, node->gtOp.gtOp1); + break; + case NI_LOONGARCH64_SIMD_LE_ZERO: + // Unsigned <= 0 ==> Unsigned == 0 + node->gtHWIntrinsicId = NI_LOONGARCH64_SIMD_EQ_ZERO; + break; + case NI_LOONGARCH64_SIMD_GE_ZERO: + case NI_LOONGARCH64_SIMD_LT_ZERO: + // Unsigned >= 0 ==> Always true + // Unsigned < 0 ==> Always false + node->gtHWIntrinsicId = setAllVector; + node->gtOp.gtOp1 = comp->gtNewLconNode((intrinsicID == NI_LOONGARCH64_SIMD_GE_ZERO) ? ~0ULL : 0ULL); + BlockRange().InsertBefore(node, node->gtOp.gtOp1); + if ((origOp1->gtFlags & GTF_ALL_EFFECT) == 0) + { + BlockRange().Remove(origOp1, true); + } + else + { + origOp1->SetUnusedValue(); + } + break; + default: + assert(!"Unhandled LowerCmpUZero case"); + } + } + + ContainCheckHWIntrinsic(node); +#endif +} + +//---------------------------------------------------------------------------------------------- +// Lowering::IsValidConstForMovImm: Determines if the given node can be replaced by a mov/fmov immediate instruction +// +// Arguments: +// node - The hardware intrinsic node. +// +// Returns: +// true if the node can be replaced by a mov/fmov immediate instruction; otherwise, false +// +// IMPORTANT: +// This check may end up modifying node->gtOp1 if it is a cast node that can be removed +bool Lowering::IsValidConstForMovImm(GenTreeHWIntrinsic* node) +{ + assert((node->gtHWIntrinsicId == NI_Vector64_Create) || (node->gtHWIntrinsicId == NI_Vector128_Create) || + (node->gtHWIntrinsicId == NI_Vector64_CreateScalarUnsafe) || + (node->gtHWIntrinsicId == NI_Vector128_CreateScalarUnsafe) || + (node->gtHWIntrinsicId == NI_AdvSimd_DuplicateToVector64) || + (node->gtHWIntrinsicId == NI_AdvSimd_DuplicateToVector128) || + (node->gtHWIntrinsicId == NI_AdvSimd_Arm64_DuplicateToVector64) || + (node->gtHWIntrinsicId == NI_AdvSimd_Arm64_DuplicateToVector128)); + assert(HWIntrinsicInfo::lookupNumArgs(node) == 1); + + GenTree* op1 = node->gtOp1; + GenTree* castOp = nullptr; + + if (varTypeIsIntegral(node->GetSimdBaseType()) && op1->OperIs(GT_CAST)) + { + // We will sometimes get a cast around a constant value (such as for + // certain long constants) which would block the below containment. + // So we will temporarily check what the cast is from instead so we + // can catch those cases as well. + + castOp = op1->AsCast()->CastOp(); + op1 = castOp; + } + + if (op1->IsCnsIntOrI()) + { + const ssize_t dataValue = op1->AsIntCon()->gtIconVal; + + if (comp->GetEmitter()->emitIns_valid_imm_for_movi(dataValue, emitActualTypeSize(node->GetSimdBaseType()))) + { + if (castOp != nullptr) + { + // We found a containable immediate under + // a cast, so remove the cast from the LIR. + + BlockRange().Remove(node->gtOp1); + node->gtOp1 = op1; + } + return true; + } + } + else if (op1->IsCnsFltOrDbl()) + { + assert(varTypeIsFloating(node->GetSimdBaseType())); + assert(castOp == nullptr); + + const double dataValue = op1->AsDblCon()->gtDconVal; + return comp->GetEmitter()->emitIns_valid_imm_for_fmov(dataValue); + } + + return false; +} + +//---------------------------------------------------------------------------------------------- +// Lowering::LowerHWIntrinsicCmpOp: Lowers a Vector128 or Vector256 comparison intrinsic +// +// Arguments: +// node - The hardware intrinsic node. +// cmpOp - The comparison operation, currently must be GT_EQ or GT_NE +// +void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp) +{ + NamedIntrinsic intrinsicId = node->gtHWIntrinsicId; + CorInfoType simdBaseJitType = node->GetSimdBaseJitType(); + var_types simdBaseType = node->GetSimdBaseType(); + unsigned simdSize = node->GetSimdSize(); + var_types simdType = Compiler::getSIMDTypeForSize(simdSize); + + assert((intrinsicId == NI_Vector64_op_Equality) || (intrinsicId == NI_Vector64_op_Inequality) || + (intrinsicId == NI_Vector128_op_Equality) || (intrinsicId == NI_Vector128_op_Inequality)); + + assert(varTypeIsSIMD(simdType)); + assert(varTypeIsArithmetic(simdBaseType)); + assert(simdSize != 0); + assert(node->gtType == TYP_BOOL); + assert((cmpOp == GT_EQ) || (cmpOp == GT_NE)); + + // We have the following (with the appropriate simd size and where the intrinsic could be op_Inequality): + // /--* op2 simd + // /--* op1 simd + // node = * HWINTRINSIC simd T op_Equality + + GenTree* op1 = node->gtGetOp1(); + GenTree* op2 = node->gtGetOp2(); + + NamedIntrinsic cmpIntrinsic; + + switch (simdBaseType) + { + case TYP_BYTE: + case TYP_UBYTE: + case TYP_SHORT: + case TYP_USHORT: + case TYP_INT: + case TYP_UINT: + case TYP_FLOAT: + { + cmpIntrinsic = NI_AdvSimd_CompareEqual; + break; + } + + case TYP_LONG: + case TYP_ULONG: + case TYP_DOUBLE: + { + cmpIntrinsic = NI_AdvSimd_Arm64_CompareEqual; + break; + } + + default: + { + unreached(); + } + } + + GenTree* cmp = comp->gtNewSimdHWIntrinsicNode(simdType, op1, op2, cmpIntrinsic, simdBaseJitType, simdSize); + BlockRange().InsertBefore(node, cmp); + LowerNode(cmp); + + if ((simdBaseType == TYP_FLOAT) && (simdSize == 12)) + { + // For TYP_SIMD12 we don't want the upper bits to participate in the comparison. So, we will insert all ones + // into those bits of the result, "as if" the upper bits are equal. Then if all lower bits are equal, we get the + // expected all-ones result, and will get the expected 0's only where there are non-matching bits. + + GenTree* idxCns = comp->gtNewIconNode(3, TYP_INT); + BlockRange().InsertAfter(cmp, idxCns); + + GenTree* insCns = comp->gtNewIconNode(-1, TYP_INT); + BlockRange().InsertAfter(idxCns, insCns); + + GenTree* tmp = comp->gtNewSimdAsHWIntrinsicNode(simdType, cmp, idxCns, insCns, NI_AdvSimd_Insert, + CORINFO_TYPE_INT, simdSize); + BlockRange().InsertAfter(insCns, tmp); + LowerNode(tmp); + + cmp = tmp; + } + + GenTree* msk = + comp->gtNewSimdHWIntrinsicNode(simdType, cmp, NI_AdvSimd_Arm64_MinAcross, CORINFO_TYPE_UBYTE, simdSize); + BlockRange().InsertAfter(cmp, msk); + LowerNode(msk); + + GenTree* zroCns = comp->gtNewIconNode(0, TYP_INT); + BlockRange().InsertAfter(msk, zroCns); + + GenTree* val = + comp->gtNewSimdAsHWIntrinsicNode(TYP_UBYTE, msk, zroCns, NI_AdvSimd_Extract, CORINFO_TYPE_UBYTE, simdSize); + BlockRange().InsertAfter(zroCns, val); + LowerNode(val); + + zroCns = comp->gtNewIconNode(0, TYP_INT); + BlockRange().InsertAfter(val, zroCns); + + node->ChangeOper(cmpOp); + + node->gtType = TYP_INT; + node->gtOp1 = val; + node->gtOp2 = zroCns; + + // The CompareEqual will set (condition is true) or clear (condition is false) all bits of the respective element + // The MinAcross then ensures we get either all bits set (all conditions are true) or clear (any condition is false) + // So, we need to invert the condition from the operation since we compare against zero + + GenCondition cmpCnd = (cmpOp == GT_EQ) ? GenCondition::NE : GenCondition::EQ; + GenTree* cc = LowerNodeCC(node, cmpCnd); + + node->gtType = TYP_VOID; + node->ClearUnusedValue(); + + LowerNode(node); +} + +//---------------------------------------------------------------------------------------------- +// Lowering::LowerHWIntrinsicCreate: Lowers a Vector64 or Vector128 Create call +// +// Arguments: +// node - The hardware intrinsic node. +// +void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) +{ + NamedIntrinsic intrinsicId = node->gtHWIntrinsicId; + var_types simdType = node->gtType; + CorInfoType simdBaseJitType = node->GetSimdBaseJitType(); + var_types simdBaseType = node->GetSimdBaseType(); + unsigned simdSize = node->GetSimdSize(); + VectorConstant vecCns = {}; + + if ((simdSize == 8) && (simdType == TYP_DOUBLE)) + { + // TODO-Cleanup: Struct retyping means we have the wrong type here. We need to + // manually fix it up so the simdType checks below are correct. + simdType = TYP_SIMD8; + } + + assert(varTypeIsSIMD(simdType)); + assert(varTypeIsArithmetic(simdBaseType)); + assert(simdSize != 0); + + GenTreeArgList* argList = nullptr; + GenTree* op1 = node->gtGetOp1(); + GenTree* op2 = node->gtGetOp2(); + + // Spare GenTrees to be used for the lowering logic below + // Defined upfront to avoid naming conflicts, etc... + GenTree* idx = nullptr; + GenTree* tmp1 = nullptr; + GenTree* tmp2 = nullptr; + GenTree* tmp3 = nullptr; + + assert(op1 != nullptr); + + unsigned argCnt = 0; + unsigned cnsArgCnt = 0; + + if (op1->OperIsList()) + { + assert(op2 == nullptr); + + for (argList = op1->AsArgList(); argList != nullptr; argList = argList->Rest()) + { + if (HandleArgForHWIntrinsicCreate(argList->Current(), argCnt, vecCns, simdBaseType)) + { + cnsArgCnt += 1; + } + argCnt += 1; + } + } + else + { + if (HandleArgForHWIntrinsicCreate(op1, argCnt, vecCns, simdBaseType)) + { + cnsArgCnt += 1; + } + argCnt += 1; + + if (op2 != nullptr) + { + if (HandleArgForHWIntrinsicCreate(op2, argCnt, vecCns, simdBaseType)) + { + cnsArgCnt += 1; + } + argCnt += 1; + } + else if (cnsArgCnt == 1) + { + // These intrinsics are meant to set the same value to every element + // so we'll just specially handle it here and copy it into the remaining + // indices. + + for (unsigned i = 1; i < simdSize / genTypeSize(simdBaseType); i++) + { + HandleArgForHWIntrinsicCreate(op1, i, vecCns, simdBaseType); + } + } + } + assert((argCnt == 1) || (argCnt == (simdSize / genTypeSize(simdBaseType)))); + + if ((argCnt == cnsArgCnt) && (argCnt == 1)) + { + GenTree* castOp = nullptr; + + if (varTypeIsIntegral(simdBaseType) && op1->OperIs(GT_CAST)) + { + // We will sometimes get a cast around a constant value (such as for + // certain long constants) which would block the below containment. + // So we will temporarily check what the cast is from instead so we + // can catch those cases as well. + + castOp = op1->AsCast()->CastOp(); + op1 = castOp; + } + + if (IsValidConstForMovImm(node)) + { + // Set the cnsArgCnt to zero so we get lowered to a DuplicateToVector + // intrinsic, which will itself mark the node as contained. + cnsArgCnt = 0; + + // Reacquire op1 as the above check may have removed a cast node and + // changed op1. + op1 = node->gtOp1; + } + } + + if (argCnt == cnsArgCnt) + { + if (op1->OperIsList()) + { + for (argList = op1->AsArgList(); argList != nullptr; argList = argList->Rest()) + { + BlockRange().Remove(argList->Current()); + } + } + else + { + BlockRange().Remove(op1); + + if (op2 != nullptr) + { + BlockRange().Remove(op2); + } + } + + assert((simdSize == 8) || (simdSize == 16)); + + if (VectorConstantIsBroadcastedI64(vecCns, simdSize / 8)) + { + // If we are a single constant or if all parts are the same, we might be able to optimize + // this even further for certain values, such as Zero or AllBitsSet. + + if (vecCns.i64[0] == 0) + { + node->gtOp1 = nullptr; + node->gtOp2 = nullptr; + node->gtHWIntrinsicId = (simdSize == 8) ? NI_Vector64_get_Zero : NI_Vector128_get_Zero; + return; + } + else if (vecCns.i64[0] == -1) + { + node->gtOp1 = nullptr; + node->gtOp2 = nullptr; + node->gtHWIntrinsicId = (simdSize == 8) ? NI_Vector64_get_AllBitsSet : NI_Vector128_get_AllBitsSet; + return; + } + } + + unsigned cnsSize = (simdSize == 12) ? 16 : simdSize; + unsigned cnsAlign = cnsSize; + var_types dataType = Compiler::getSIMDTypeForSize(simdSize); + + UNATIVE_OFFSET cnum = comp->GetEmitter()->emitDataConst(&vecCns, cnsSize, cnsAlign, dataType); + CORINFO_FIELD_HANDLE hnd = comp->eeFindJitDataOffs(cnum); + GenTree* clsVarAddr = new (comp, GT_CLS_VAR_ADDR) GenTreeClsVar(GT_CLS_VAR_ADDR, TYP_I_IMPL, hnd, nullptr); + BlockRange().InsertBefore(node, clsVarAddr); + + node->ChangeOper(GT_IND); + node->gtOp1 = clsVarAddr; + + // TODO-ARM64-CQ: We should be able to modify at least the paths that use Insert to trivially support partial + // vector constants. With this, we can create a constant if say 50% of the inputs are also constant and just + // insert the non-constant values which should still allow some gains. + + return; + } + else if (argCnt == 1) + { + // We have the following (where simd is simd8 or simd16): + // /--* op1 T + // node = * HWINTRINSIC simd T Create + + // We will be constructing the following parts: + // /--* op1 T + // node = * HWINTRINSIC simd T DuplicateToVector + + // This is roughly the following managed code: + // return AdvSimd.Arm64.DuplicateToVector(op1); + + if (varTypeIsLong(simdBaseType) || (simdBaseType == TYP_DOUBLE)) + { + node->gtHWIntrinsicId = + (simdType == TYP_SIMD8) ? NI_AdvSimd_Arm64_DuplicateToVector64 : NI_AdvSimd_Arm64_DuplicateToVector128; + } + else + { + node->gtHWIntrinsicId = + (simdType == TYP_SIMD8) ? NI_AdvSimd_DuplicateToVector64 : NI_AdvSimd_DuplicateToVector128; + } + return; + } + + // We have the following (where simd is simd8 or simd16): + // /--* op1 T + // +--* ... T + // +--* opN T + // node = * HWINTRINSIC simd T Create + + if (op1->OperIsList()) + { + argList = op1->AsArgList(); + op1 = argList->Current(); + argList = argList->Rest(); + } + + // We will be constructing the following parts: + // /--* op1 T + // tmp1 = * HWINTRINSIC simd8 T CreateScalarUnsafe + // ... + + // This is roughly the following managed code: + // var tmp1 = Vector64.CreateScalarUnsafe(op1); + // ... + + NamedIntrinsic createScalarUnsafe = + (simdType == TYP_SIMD8) ? NI_Vector64_CreateScalarUnsafe : NI_Vector128_CreateScalarUnsafe; + + tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, op1, createScalarUnsafe, simdBaseJitType, simdSize); + BlockRange().InsertAfter(op1, tmp1); + LowerNode(tmp1); + + unsigned N = 0; + GenTree* opN = nullptr; + + for (N = 1; N < argCnt - 1; N++) + { + // We will be constructing the following parts: + // ... + // idx = CNS_INT int N + // /--* tmp1 simd + // +--* idx int + // +--* opN T + // tmp1 = * HWINTRINSIC simd T Insert + // ... + + // This is roughly the following managed code: + // ... + // tmp1 = AdvSimd.Insert(tmp1, N, opN); + // ... + + opN = argList->Current(); + + idx = comp->gtNewIconNode(N, TYP_INT); + BlockRange().InsertBefore(opN, idx); + + tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, tmp1, idx, opN, NI_AdvSimd_Insert, simdBaseJitType, simdSize); + BlockRange().InsertAfter(opN, tmp1); + LowerNode(tmp1); + + argList = argList->Rest(); + } + + assert(N == (argCnt - 1)); + + // We will be constructing the following parts: + // idx = CNS_INT int N + // /--* tmp1 simd + // +--* idx int + // +--* opN T + // node = * HWINTRINSIC simd T Insert + + // This is roughly the following managed code: + // ... + // tmp1 = AdvSimd.Insert(tmp1, N, opN); + // ... + + opN = (argCnt == 2) ? op2 : argList->Current(); + + idx = comp->gtNewIconNode(N, TYP_INT); + BlockRange().InsertBefore(opN, idx); + + node->gtOp1 = comp->gtNewArgList(tmp1, idx, opN); + node->gtOp2 = nullptr; + + node->gtHWIntrinsicId = NI_AdvSimd_Insert; +} + +//---------------------------------------------------------------------------------------------- +// Lowering::LowerHWIntrinsicDot: Lowers a Vector64 or Vector128 Dot call +// +// Arguments: +// node - The hardware intrinsic node. +// +void Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) +{ + NamedIntrinsic intrinsicId = node->gtHWIntrinsicId; + CorInfoType simdBaseJitType = node->GetSimdBaseJitType(); + var_types simdBaseType = node->GetSimdBaseType(); + unsigned simdSize = node->GetSimdSize(); + var_types simdType = Compiler::getSIMDTypeForSize(simdSize); + + assert((intrinsicId == NI_Vector64_Dot) || (intrinsicId == NI_Vector128_Dot)); + assert(varTypeIsSIMD(simdType)); + assert(varTypeIsArithmetic(simdBaseType)); + assert(simdSize != 0); + + GenTree* op1 = node->gtGetOp1(); + GenTree* op2 = node->gtGetOp2(); + + assert(op1 != nullptr); + assert(op2 != nullptr); + assert(!op1->OperIsList()); + + // Spare GenTrees to be used for the lowering logic below + // Defined upfront to avoid naming conflicts, etc... + GenTree* idx = nullptr; + GenTree* tmp1 = nullptr; + GenTree* tmp2 = nullptr; + + if (simdSize == 12) + { + assert(simdBaseType == TYP_FLOAT); + + // For 12 byte SIMD, we need to clear the upper 4 bytes: + // idx = CNS_INT int 0x03 + // tmp1 = * CNS_DLB float 0.0 + // /--* op1 simd16 + // +--* idx int + // +--* tmp1 simd16 + // op1 = * HWINTRINSIC simd16 T Insert + // ... + + // This is roughly the following managed code: + // op1 = AdvSimd.Insert(op1, 0x03, 0.0f); + // ... + + idx = comp->gtNewIconNode(0x03, TYP_INT); + BlockRange().InsertAfter(op1, idx); + + tmp1 = comp->gtNewZeroConNode(TYP_FLOAT); + BlockRange().InsertAfter(idx, tmp1); + LowerNode(tmp1); + + op1 = comp->gtNewSimdAsHWIntrinsicNode(simdType, op1, idx, tmp1, NI_AdvSimd_Insert, simdBaseJitType, simdSize); + BlockRange().InsertAfter(tmp1, op1); + LowerNode(op1); + + idx = comp->gtNewIconNode(0x03, TYP_INT); + BlockRange().InsertAfter(op2, idx); + + tmp2 = comp->gtNewZeroConNode(TYP_FLOAT); + BlockRange().InsertAfter(idx, tmp2); + LowerNode(tmp2); + + op2 = comp->gtNewSimdAsHWIntrinsicNode(simdType, op2, idx, tmp2, NI_AdvSimd_Insert, simdBaseJitType, simdSize); + BlockRange().InsertAfter(tmp2, op2); + LowerNode(op2); + } + + // We will be constructing the following parts: + // ... + // /--* op1 simd16 + // +--* op2 simd16 + // tmp1 = * HWINTRINSIC simd16 T Multiply + // ... + + // This is roughly the following managed code: + // ... + // var tmp1 = AdvSimd.Multiply(op1, op2); + // ... + + NamedIntrinsic multiply = (simdBaseType == TYP_DOUBLE) ? NI_AdvSimd_Arm64_Multiply : NI_AdvSimd_Multiply; + assert(!varTypeIsLong(simdBaseType)); + + tmp1 = comp->gtNewSimdAsHWIntrinsicNode(simdType, op1, op2, multiply, simdBaseJitType, simdSize); + BlockRange().InsertBefore(node, tmp1); + LowerNode(tmp1); + + if (varTypeIsFloating(simdBaseType)) + { + // We will be constructing the following parts: + // ... + // /--* tmp1 simd16 + // * STORE_LCL_VAR simd16 + // tmp1 = LCL_VAR simd16 + // tmp2 = LCL_VAR simd16 + // ... + + // This is roughly the following managed code: + // ... + // var tmp2 = tmp1; + // ... + + node->gtOp1 = tmp1; + LIR::Use tmp1Use(BlockRange(), &node->gtOp1, node); + ReplaceWithLclVar(tmp1Use); + tmp1 = node->gtOp1; + + tmp2 = comp->gtClone(tmp1); + BlockRange().InsertAfter(tmp1, tmp2); + + if (simdSize == 8) + { + assert(simdBaseType == TYP_FLOAT); + + // We will be constructing the following parts: + // ... + // /--* tmp1 simd8 + // +--* tmp2 simd8 + // tmp1 = * HWINTRINSIC simd8 T AddPairwise + // ... + + // This is roughly the following managed code: + // ... + // var tmp1 = AdvSimd.AddPairwise(tmp1, tmp2); + // ... + + tmp1 = comp->gtNewSimdAsHWIntrinsicNode(simdType, tmp1, tmp2, NI_AdvSimd_AddPairwise, simdBaseJitType, + simdSize); + BlockRange().InsertAfter(tmp2, tmp1); + LowerNode(tmp1); + } + else + { + assert((simdSize == 12) || (simdSize == 16)); + + // We will be constructing the following parts: + // ... + // /--* tmp1 simd16 + // +--* tmp2 simd16 + // tmp2 = * HWINTRINSIC simd16 T AddPairwise + // ... + + // This is roughly the following managed code: + // ... + // var tmp1 = AdvSimd.Arm64.AddPairwise(tmp1, tmp2); + // ... + + tmp1 = comp->gtNewSimdAsHWIntrinsicNode(simdType, tmp1, tmp2, NI_AdvSimd_Arm64_AddPairwise, simdBaseJitType, + simdSize); + BlockRange().InsertAfter(tmp2, tmp1); + LowerNode(tmp1); + + if (simdBaseType == TYP_FLOAT) + { + // Float needs an additional pairwise add to finish summing the parts + // The first will have summed e0 with e1 and e2 with e3 and then repeats that for the upper half + // So, we will have a vector that looks like this: + // < e0 + e1, e2 + e3, e0 + e1, e2 + e3> + // Doing a second horizontal add with itself will then give us + // e0 + e1 + e2 + e3 in all elements of the vector + + // We will be constructing the following parts: + // ... + // /--* tmp1 simd16 + // * STORE_LCL_VAR simd16 + // tmp1 = LCL_VAR simd16 + // tmp2 = LCL_VAR simd16 + // /--* tmp1 simd16 + // +--* tmp2 simd16 + // tmp2 = * HWINTRINSIC simd16 T AddPairwise + // ... + + // This is roughly the following managed code: + // ... + // var tmp2 = tmp1; + // var tmp1 = AdvSimd.Arm64.AddPairwise(tmp1, tmp2); + // ... + + node->gtOp1 = tmp1; + LIR::Use tmp1Use(BlockRange(), &node->gtOp1, node); + ReplaceWithLclVar(tmp1Use); + tmp1 = node->gtOp1; + + tmp2 = comp->gtClone(tmp1); + BlockRange().InsertAfter(tmp1, tmp2); + + tmp1 = comp->gtNewSimdAsHWIntrinsicNode(simdType, tmp1, tmp2, NI_AdvSimd_Arm64_AddPairwise, + simdBaseJitType, simdSize); + BlockRange().InsertAfter(tmp2, tmp1); + LowerNode(tmp1); + } + } + + tmp2 = tmp1; + } + else + { + assert(varTypeIsIntegral(simdBaseType)); + + // We will be constructing the following parts: + // ... + // /--* tmp1 simd16 + // tmp2 = * HWINTRINSIC simd16 T AddAcross + // ... + + // This is roughly the following managed code: + // ... + // var tmp2 = AdvSimd.Arm64.AddAcross(tmp1); + // ... + + tmp2 = comp->gtNewSimdAsHWIntrinsicNode(simdType, tmp1, NI_AdvSimd_Arm64_AddAcross, simdBaseJitType, simdSize); + BlockRange().InsertAfter(tmp1, tmp2); + LowerNode(tmp2); + } + + // We will be constructing the following parts: + // ... + // /--* tmp2 simd16 + // node = * HWINTRINSIC simd16 T ToScalar + + // This is roughly the following managed code: + // ... + // return tmp2.ToScalar(); + + node->gtOp1 = tmp2; + node->gtOp2 = nullptr; + + node->gtHWIntrinsicId = (simdSize == 8) ? NI_Vector64_ToScalar : NI_Vector128_ToScalar; + LowerNode(node); + + return; +} + +#endif // FEATURE_HW_INTRINSICS + +//------------------------------------------------------------------------ +// Containment analysis +//------------------------------------------------------------------------ + +//------------------------------------------------------------------------ +// ContainCheckCallOperands: Determine whether operands of a call should be contained. +// +// Arguments: +// call - The call node of interest +// +// Return Value: +// None. +// +void Lowering::ContainCheckCallOperands(GenTreeCall* call) +{ + // There are no contained operands for LOONGARCH. +} + +//------------------------------------------------------------------------ +// ContainCheckStoreIndir: determine whether the sources of a STOREIND node should be contained. +// +// Arguments: +// node - pointer to the node +// +void Lowering::ContainCheckStoreIndir(GenTreeStoreInd* node) +{ +#if 0 +assert(!"unimplemented on LOONGARCH yet"); +#else + + GenTree* src = node->Data(); + if (!varTypeIsFloating(src->TypeGet()) && src->IsIntegralConst(0)) + { + // an integer zero for 'src' can be contained. + MakeSrcContained(node, src); + } + + ContainCheckIndir(node); + +#endif +} + +//------------------------------------------------------------------------ +// ContainCheckIndir: Determine whether operands of an indir should be contained. +// +// Arguments: +// indirNode - The indirection node of interest +// +// Notes: +// This is called for both store and load indirections. +// +// Return Value: +// None. +// +void Lowering::ContainCheckIndir(GenTreeIndir* indirNode) +{ + // If this is the rhs of a block copy it will be handled when we handle the store. + if (indirNode->TypeGet() == TYP_STRUCT) + { + return; + } + +#ifdef FEATURE_SIMD + assert(!"unimplemented on LOONGARCH yet"); + // If indirTree is of TYP_SIMD12, don't mark addr as contained + // so that it always get computed to a register. This would + // mean codegen side logic doesn't need to handle all possible + // addr expressions that could be contained. + // + // TODO-LOONGARCH64-CQ: handle other addr mode expressions that could be marked + // as contained. + if (indirNode->TypeGet() == TYP_SIMD12) + { + return; + } +#endif // FEATURE_SIMD + + GenTree* addr = indirNode->Addr(); + if ((addr->OperGet() == GT_LEA) && IsSafeToContainMem(indirNode, addr)) + { + MakeSrcContained(indirNode, addr); + } + else if (addr->OperIs(GT_LCL_VAR_ADDR, GT_LCL_FLD_ADDR)) + { + // These nodes go into an addr mode: + // - GT_LCL_VAR_ADDR, GT_LCL_FLD_ADDR is a stack addr mode. + MakeSrcContained(indirNode, addr); + } + else if (addr->OperIs(GT_CLS_VAR_ADDR)) + { + // These nodes go into an addr mode: + // - GT_CLS_VAR_ADDR turns into a constant. + // make this contained, it turns into a constant that goes into an addr mode + MakeSrcContained(indirNode, addr); + } + +} + +//------------------------------------------------------------------------ +// ContainCheckBinary: Determine whether a binary op's operands should be contained. +// +// Arguments: +// node - the node we care about +// +void Lowering::ContainCheckBinary(GenTreeOp* node) +{ + // Check and make op2 contained (if it is a containable immediate) + CheckImmedAndMakeContained(node, node->gtOp2); +} + +//------------------------------------------------------------------------ +// ContainCheckMul: Determine whether a mul op's operands should be contained. +// +// Arguments: +// node - the node we care about +// +void Lowering::ContainCheckMul(GenTreeOp* node) +{ + ContainCheckBinary(node); +} + +//------------------------------------------------------------------------ +// ContainCheckDivOrMod: determine which operands of a div/mod should be contained. +// +// Arguments: +// node - the node we care about +// +void Lowering::ContainCheckDivOrMod(GenTreeOp* node) +{ + assert(node->OperIs(GT_MOD, GT_UMOD, GT_DIV, GT_UDIV)); +} + +//------------------------------------------------------------------------ +// ContainCheckShiftRotate: Determine whether a mul op's operands should be contained. +// +// Arguments: +// node - the node we care about +// +void Lowering::ContainCheckShiftRotate(GenTreeOp* node) +{ + GenTree* shiftBy = node->gtOp2; + assert(node->OperIsShiftOrRotate()); + + if (shiftBy->IsCnsIntOrI()) + { + MakeSrcContained(node, shiftBy); + } +} + +//------------------------------------------------------------------------ +// ContainCheckStoreLoc: determine whether the source of a STORE_LCL* should be contained. +// +// Arguments: +// node - pointer to the node +// +void Lowering::ContainCheckStoreLoc(GenTreeLclVarCommon* storeLoc) const +{ + assert(storeLoc->OperIsLocalStore()); + GenTree* op1 = storeLoc->gtGetOp1(); + + if (op1->OperIs(GT_BITCAST)) + { + // If we know that the source of the bitcast will be in a register, then we can make + // the bitcast itself contained. This will allow us to store directly from the other + // type if this node doesn't get a register. + GenTree* bitCastSrc = op1->gtGetOp1(); + if (!bitCastSrc->isContained() && !bitCastSrc->IsRegOptional()) + { + op1->SetContained(); + return; + } + } + + const LclVarDsc* varDsc = comp->lvaGetDesc(storeLoc); + + + + +#ifdef FEATURE_SIMD + if (varTypeIsSIMD(storeLoc)) + { + // If this is a store to memory, we can initialize a zero vector in memory from REG_ZR. + if ((op1->IsIntegralConst(0) || op1->IsSIMDZero()) && varDsc->lvDoNotEnregister) + { + // For an InitBlk we want op1 to be contained + MakeSrcContained(storeLoc, op1); + if (op1->IsSIMDZero()) + { + MakeSrcContained(op1, op1->gtGetOp1()); + } + } + return; + } +#endif // FEATURE_SIMD + if (IsContainableImmed(storeLoc, op1)) + { + MakeSrcContained(storeLoc, op1); + } + + // If the source is a containable immediate, make it contained, unless it is + // an int-size or larger store of zero to memory, because we can generate smaller code + // by zeroing a register and then storing it. + var_types type = varDsc->GetRegisterType(storeLoc); + if (IsContainableImmed(storeLoc, op1) && (!op1->IsIntegralConst(0) || varTypeIsSmall(type))) + { + MakeSrcContained(storeLoc, op1); + } +} + +//------------------------------------------------------------------------ +// ContainCheckCast: determine whether the source of a CAST node should be contained. +// +// Arguments: +// node - pointer to the node +// +void Lowering::ContainCheckCast(GenTreeCast* node) +{ +} + +//------------------------------------------------------------------------ +// ContainCheckCompare: determine whether the sources of a compare node should be contained. +// +// Arguments: +// node - pointer to the node +// +void Lowering::ContainCheckCompare(GenTreeOp* cmp) +{ + CheckImmedAndMakeContained(cmp, cmp->gtOp2); +} + +//------------------------------------------------------------------------ +// ContainCheckBoundsChk: determine whether any source of a bounds check node should be contained. +// +// Arguments: +// node - pointer to the node +// +void Lowering::ContainCheckBoundsChk(GenTreeBoundsChk* node) +{ + assert(node->OperIsBoundsCheck()); + if (!CheckImmedAndMakeContained(node, node->GetIndex())) + { + CheckImmedAndMakeContained(node, node->GetArrayLength()); + } +} + +#ifdef FEATURE_SIMD +//---------------------------------------------------------------------------------------------- +// ContainCheckSIMD: Perform containment analysis for a SIMD intrinsic node. +// +// Arguments: +// simdNode - The SIMD intrinsic node. +// +void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode) +{ +assert(!"unimplemented on LOONGARCH yet"); +#if 0 + switch (simdNode->gtSIMDIntrinsicID) + { + GenTree* op1; + GenTree* op2; + + case SIMDIntrinsicInit: + op1 = simdNode->gtOp.gtOp1; + if (op1->IsIntegralConst(0)) + { + MakeSrcContained(simdNode, op1); + } + break; + + case SIMDIntrinsicInitArray: + // We have an array and an index, which may be contained. + CheckImmedAndMakeContained(simdNode, simdNode->gtGetOp2()); + break; + + case SIMDIntrinsicOpEquality: + case SIMDIntrinsicOpInEquality: + // TODO-LOONGARCH64-CQ Support containing 0 + break; + + case SIMDIntrinsicGetItem: + { + // This implements get_Item method. The sources are: + // - the source SIMD struct + // - index (which element to get) + // The result is baseType of SIMD struct. + op1 = simdNode->gtOp.gtOp1; + op2 = simdNode->gtOp.gtOp2; + + // If the index is a constant, mark it as contained. + if (op2->IsCnsIntOrI()) + { + MakeSrcContained(simdNode, op2); + } + + if (IsContainableMemoryOp(op1)) + { + MakeSrcContained(simdNode, op1); + if (op1->OperGet() == GT_IND) + { + op1->AsIndir()->Addr()->ClearContained(); + } + } + break; + } + + default: + break; + } +#endif +} +#endif // FEATURE_SIMD + +#ifdef FEATURE_HW_INTRINSICS +//---------------------------------------------------------------------------------------------- +// ContainCheckHWIntrinsic: Perform containment analysis for a hardware intrinsic node. +// +// Arguments: +// node - The hardware intrinsic node. +// +void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) +{ +assert(!"unimplemented on LOONGARCH yet"); +#if 0 + GenTreeArgList* argList = nullptr; + GenTree* op1 = node->gtOp.gtOp1; + GenTree* op2 = node->gtOp.gtOp2; + + if (op1->OperIs(GT_LIST)) + { + argList = op1->AsArgList(); + op1 = argList->Current(); + op2 = argList->Rest()->Current(); + } + + switch (HWIntrinsicInfo::lookup(node->gtHWIntrinsicId).form) + { + case HWIntrinsicInfo::SimdExtractOp: + if (op2->IsCnsIntOrI()) + { + MakeSrcContained(node, op2); + } + break; + + case HWIntrinsicInfo::SimdInsertOp: + if (op2->IsCnsIntOrI()) + { + MakeSrcContained(node, op2); + +#if 0 + // This is currently not supported downstream. The following (at least) need to be modifed: + // GenTree::isContainableHWIntrinsic() needs to handle this. + // CodeGen::genConsumRegs() + // + GenTree* op3 = argList->Rest()->Rest()->Current(); + + // In the HW intrinsics C# API there is no direct way to specify a vector element to element mov + // VX[a] = VY[b] + // In C# this would naturally be expressed by + // Insert(VX, a, Extract(VY, b)) + // If both a & b are immediate constants contain the extract/getItem so that we can emit + // the single instruction mov Vx[a], Vy[b] + if (op3->OperIs(GT_HWIntrinsic) && (op3->AsHWIntrinsic()->gtHWIntrinsicId == NI_LOONGARCH64_SIMD_GetItem)) + { + ContainCheckHWIntrinsic(op3->AsHWIntrinsic()); + + if (op3->gtOp.gtOp2->isContained()) + { + MakeSrcContained(node, op3); + } + } +#endif + } + break; + + default: + break; + } +#endif +} +#endif // FEATURE_HW_INTRINSICS + +#endif // TARGET_LOONGARCH64 diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 9308836ff78fc..b43d29ed7cfc3 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -702,6 +702,8 @@ LinearScan::LinearScan(Compiler* theCompiler) enregisterLocalVars = compiler->compEnregLocals(); #ifdef TARGET_ARM64 availableIntRegs = (RBM_ALLINT & ~(RBM_PR | RBM_FP | RBM_LR) & ~compiler->codeGen->regSet.rsMaskResvd); +#elif TARGET_LOONGARCH64 + availableIntRegs = (RBM_ALLINT & ~(RBM_FP | RBM_RA) & ~compiler->codeGen->regSet.rsMaskResvd); #else availableIntRegs = (RBM_ALLINT & ~compiler->codeGen->regSet.rsMaskResvd); #endif @@ -1570,7 +1572,11 @@ bool LinearScan::isRegCandidate(LclVarDsc* varDsc) // vars will have `lvMustInit` set, because emitter has poor support for struct liveness, // but if the variable is tracked the prolog generator would expect it to be in liveIn set, // so an assert in `genFnProlog` will fire. +#ifdef TARGET_LOONGARCH64 + return !genIsValidFloatReg(varDsc->GetOtherArgReg()) && compiler->compEnregStructLocals() && !varDsc->HasGCPtr(); +#else return compiler->compEnregStructLocals() && !varDsc->HasGCPtr(); +#endif case TYP_UNDEF: case TYP_UNKNOWN: @@ -2571,7 +2577,7 @@ void LinearScan::setFrameType() compiler->rpFrameType = frameType; -#ifdef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) // Determine whether we need to reserve a register for large lclVar offsets. if (compiler->compRsvdRegCheck(Compiler::REGALLOC_FRAME_LAYOUT)) { @@ -2581,7 +2587,7 @@ void LinearScan::setFrameType() JITDUMP(" Reserved REG_OPT_RSVD (%s) due to large frame\n", getRegName(REG_OPT_RSVD)); removeMask |= RBM_OPT_RSVD; } -#endif // TARGET_ARMARCH +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 if ((removeMask != RBM_NONE) && ((availableIntRegs & removeMask) != 0)) { @@ -2647,11 +2653,16 @@ RegisterType LinearScan::getRegisterType(Interval* currentInterval, RefPosition* assert(refPosition->getInterval() == currentInterval); RegisterType regType = currentInterval->registerType; regMaskTP candidates = refPosition->registerAssignment; - +#ifdef TARGET_LOONGARCH64 + if ((candidates & allRegs(regType)) != RBM_NONE) + return regType; + else + return TYP_I_IMPL; +#else assert((candidates & allRegs(regType)) != RBM_NONE); return regType; +#endif } - //------------------------------------------------------------------------ // isMatchingConstant: Check to see whether a given register contains the constant referenced // by the given RefPosition @@ -7673,7 +7684,7 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block) } } -#ifdef TARGET_ARM64 +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) // Next, if this blocks ends with a JCMP, we have to make sure: // 1. Not to copy into the register that JCMP uses // e.g. JCMP w21, BRANCH @@ -7786,7 +7797,7 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block) sameToReg = REG_NA; } -#ifdef TARGET_ARM64 +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) if (jcmpLocalVarDsc && (jcmpLocalVarDsc->lvVarIndex == outResolutionSetVarIndex)) { sameToReg = REG_NA; diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 1b549424f0fc4..5e803336e2b4d 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -762,6 +762,9 @@ class LinearScan : public LinearScanInterface #elif defined(TARGET_X86) static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EDI); static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7); +#elif defined(TARGET_LOONGARCH64) + static const regMaskTP LsraLimitSmallIntSet = (RBM_T1 | RBM_T3 | RBM_A0 | RBM_A1 | RBM_T0); + static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F8 | RBM_F9); #else #error Unsupported or unset target architecture #endif // target @@ -2215,7 +2218,12 @@ class RefPosition // The max bits needed is based on max value of MAX_RET_REG_COUNT value // across all targets and that happens 4 on on Arm. Hence index value // would be 0..MAX_RET_REG_COUNT-1. +#ifdef TARGET_LOONGARCH64 + //TODO for LOONGARCH64: should confirm for ArgSplit? + unsigned char multiRegIdx : 3; +#else // !TARGET_LOONGARCH64 unsigned char multiRegIdx : 2; +#endif // !TARGET_LOONGARCH64 // Last Use - this may be true for multiple RefPositions in the same Interval unsigned char lastUse : 1; diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index fb5747079fa90..e139d877e1322 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -593,7 +593,9 @@ RefPosition* LinearScan::newRefPosition(Interval* theInterval, regNumber physicalReg = genRegNumFromMask(mask); RefPosition* pos = newRefPosition(physicalReg, theLocation, RefTypeFixedReg, nullptr, mask); assert(theInterval != nullptr); +#ifndef TARGET_LOONGARCH64 assert((allRegs(theInterval->registerType) & mask) != 0); +#endif } RefPosition* newRP = newRefPositionRaw(theLocation, theTreeNode, theRefType); @@ -3927,6 +3929,13 @@ int LinearScan::BuildGCWriteBarrier(GenTree* tree) addrCandidates = RBM_WRITE_BARRIER_DST; srcCandidates = RBM_WRITE_BARRIER_SRC; +#elif defined(TARGET_LOONGARCH64) + // the 'addr' goes into (REG_WRITE_BARRIER_DST) + // the 'src' goes into (REG_WRITE_BARRIER_SRC) + // + addrCandidates = RBM_WRITE_BARRIER_DST; + srcCandidates = RBM_WRITE_BARRIER_SRC; + #elif defined(TARGET_X86) && NOGC_WRITE_BARRIERS bool useOptimizedWriteBarrierHelper = compiler->codeGen->genUseOptimizedWriteBarriers(tree, src); diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp new file mode 100644 index 0000000000000..8ce30f7bb6855 --- /dev/null +++ b/src/coreclr/jit/lsraloongarch64.cpp @@ -0,0 +1,1731 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +// Copyright (c) Loongson Technology. All rights reserved. + +/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX Register Requirements for LOONGARCH64 XX +XX XX +XX This encapsulates all the logic for setting register requirements for XX +XX the LOONGARCH64 architecture. XX +XX XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ + +#include "jitpch.h" +#ifdef _MSC_VER +#pragma hdrstop +#endif + +#ifdef TARGET_LOONGARCH64 + +#include "jit.h" +#include "sideeffects.h" +#include "lower.h" + +//------------------------------------------------------------------------ +// BuildNode: Build the RefPositions for for a node +// +// Arguments: +// treeNode - the node of interest +// +// Return Value: +// The number of sources consumed by this node. +// +// Notes: +// Preconditions: +// LSRA Has been initialized. +// +// Postconditions: +// RefPositions have been built for all the register defs and uses required +// for this node. +// +int LinearScan::BuildNode(GenTree* tree) +{ + assert(!tree->isContained()); + int srcCount; + int dstCount = 0; + regMaskTP dstCandidates = RBM_NONE; + regMaskTP killMask = RBM_NONE; + bool isLocalDefUse = false; + + // Reset the build-related members of LinearScan. + clearBuildState(); + + // Set the default dstCount. This may be modified below. + if (tree->IsValue()) + { + dstCount = 1; + if (tree->IsUnusedValue()) + { + isLocalDefUse = true; + } + } + else + { + dstCount = 0; + } + + switch (tree->OperGet()) + { + default: + srcCount = BuildSimple(tree); + break; + + case GT_LCL_VAR: + // We make a final determination about whether a GT_LCL_VAR is a candidate or contained + // after liveness. In either case we don't build any uses or defs. Otherwise, this is a + // load of a stack-based local into a register and we'll fall through to the general + // local case below. + if (checkContainedOrCandidateLclVar(tree->AsLclVar())) + { + return 0; + } + FALLTHROUGH; + case GT_LCL_FLD: + { + srcCount = 0; +#ifdef FEATURE_SIMD + // Need an additional register to read upper 4 bytes of Vector3. + if (tree->TypeGet() == TYP_SIMD12) + { + // We need an internal register different from targetReg in which 'tree' produces its result + // because both targetReg and internal reg will be in use at the same time. + buildInternalFloatRegisterDefForNode(tree, allSIMDRegs()); + setInternalRegsDelayFree = true; + buildInternalRegisterUses(); + } +#endif + BuildDef(tree); + } + break; + + case GT_STORE_LCL_VAR: + if (tree->IsMultiRegLclVar() && isCandidateMultiRegLclVar(tree->AsLclVar())) + { + dstCount = compiler->lvaGetDesc(tree->AsLclVar()->GetLclNum())->lvFieldCnt; + } + FALLTHROUGH; + + case GT_STORE_LCL_FLD: + srcCount = BuildStoreLoc(tree->AsLclVarCommon()); + break; + + case GT_FIELD_LIST: + // These should always be contained. We don't correctly allocate or + // generate code for a non-contained GT_FIELD_LIST. + noway_assert(!"Non-contained GT_FIELD_LIST"); + srcCount = 0; + break; + + case GT_ARGPLACE: + case GT_NO_OP: + case GT_START_NONGC: + srcCount = 0; + assert(dstCount == 0); + break; + + case GT_PROF_HOOK: + srcCount = 0; + assert(dstCount == 0); + killMask = getKillSetForProfilerHook(); + BuildDefsWithKills(tree, 0, RBM_NONE, killMask); + break; + + case GT_START_PREEMPTGC: + // This kills GC refs in callee save regs + srcCount = 0; + assert(dstCount == 0); + BuildDefsWithKills(tree, 0, RBM_NONE, RBM_NONE); + break; + + case GT_CNS_DBL: + { + GenTreeDblCon* dblConst = tree->AsDblCon(); + double constValue = dblConst->AsDblCon()->gtDconVal; + + if ((constValue == (double)(int)constValue) && (-2048 <= constValue) && (constValue <= 2047)) + { + // Directly encode constant to instructions. + } + else + { + // Reserve int to load constant from memory (IF_LARGELDC) + buildInternalIntRegisterDefForNode(tree); + buildInternalRegisterUses(); + } + } + FALLTHROUGH; + + case GT_CNS_INT: + { + srcCount = 0; + assert(dstCount == 1); + RefPosition* def = BuildDef(tree); + def->getInterval()->isConstant = true; + } + break; + + case GT_BOX: + case GT_COMMA: + case GT_QMARK: + case GT_COLON: + srcCount = 0; + assert(dstCount == 0); + unreached(); + break; + + case GT_RETURN: + srcCount = BuildReturn(tree); + killMask = getKillSetForReturn(); + BuildDefsWithKills(tree, 0, RBM_NONE, killMask); + break; + + case GT_RETFILT: + assert(dstCount == 0); + if (tree->TypeGet() == TYP_VOID) + { + srcCount = 0; + } + else + { + assert(tree->TypeGet() == TYP_INT); + srcCount = 1; + BuildUse(tree->gtGetOp1(), RBM_INTRET); + } + break; + + case GT_NOP: + // A GT_NOP is either a passthrough (if it is void, or if it has + // a child), but must be considered to produce a dummy value if it + // has a type but no child. + srcCount = 0; + if (tree->TypeGet() != TYP_VOID && tree->gtGetOp1() == nullptr) + { + assert(dstCount == 1); + BuildDef(tree); + } + else + { + assert(dstCount == 0); + } + break; + + case GT_KEEPALIVE: + assert(dstCount == 0); + srcCount = BuildOperandUses(tree->gtGetOp1()); + break; + + case GT_JTRUE: + srcCount = 0; + assert(dstCount == 0); + break; + + case GT_JMP: + srcCount = 0; + assert(dstCount == 0); + break; + + case GT_SWITCH: + // This should never occur since switch nodes must not be visible at this + // point in the JIT. + srcCount = 0; + noway_assert(!"Switch must be lowered at this point"); + break; + + case GT_JMPTABLE: + srcCount = 0; + assert(dstCount == 1); + BuildDef(tree); + break; + + case GT_SWITCH_TABLE: + buildInternalIntRegisterDefForNode(tree); + srcCount = BuildBinaryUses(tree->AsOp()); + assert(dstCount == 0); + break; + + case GT_ASG: + noway_assert(!"We should never hit any assignment operator in lowering"); + srcCount = 0; + break; + + case GT_ADD: + case GT_SUB: + if (varTypeIsFloating(tree->TypeGet())) + { + // overflow operations aren't supported on float/double types. + assert(!tree->gtOverflow()); + + // No implicit conversions at this stage as the expectation is that + // everything is made explicit by adding casts. + assert(tree->gtGetOp1()->TypeGet() == tree->gtGetOp2()->TypeGet()); + } + + if (tree->gtOverflow()) + { + // Need a register different from target reg to check for overflow. + buildInternalIntRegisterDefForNode(tree); + setInternalRegsDelayFree = true; + } + FALLTHROUGH; + + case GT_AND: + case GT_OR: + case GT_XOR: + case GT_LSH: + case GT_RSH: + case GT_RSZ: + case GT_ROR: + srcCount = BuildBinaryUses(tree->AsOp()); + buildInternalRegisterUses(); + assert(dstCount == 1); + BuildDef(tree); + break; + + case GT_RETURNTRAP: + // this just turns into a compare of its child with an int + // + a conditional call + BuildUse(tree->gtGetOp1()); + srcCount = 1; + assert(dstCount == 0); + killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC); + BuildDefsWithKills(tree, 0, RBM_NONE, killMask); + break; + + //case GT_MOD: + //case GT_UMOD: + // NYI_IF(varTypeIsFloating(tree->TypeGet()), "FP Remainder in LOONGARCH64"); + // assert(!"Shouldn't see an integer typed GT_MOD node in LOONGARCH64"); + // srcCount = 0; + // break; + + case GT_MUL: + case GT_MOD: + case GT_UMOD: + case GT_DIV: + case GT_MULHI: + case GT_UDIV: + { + if (emitActualTypeSize(tree) == EA_4BYTE) + { + // We need two registers: tmpRegOp1 and tmpRegOp2 + buildInternalIntRegisterDefForNode(tree); + buildInternalIntRegisterDefForNode(tree); + } + + srcCount = BuildBinaryUses(tree->AsOp()); + buildInternalRegisterUses(); + assert(dstCount == 1); + BuildDef(tree); + } + break; + + case GT_INTRINSIC: + { + noway_assert((tree->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Abs) || + (tree->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Ceiling) || + (tree->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Floor) || + (tree->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Round) || + (tree->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Sqrt)); + + // Both operand and its result must be of the same floating point type. + GenTree* op1 = tree->gtGetOp1(); + assert(varTypeIsFloating(op1)); + assert(op1->TypeGet() == tree->TypeGet()); + + BuildUse(op1); + srcCount = 1; + assert(dstCount == 1); + BuildDef(tree); + } + break; + +#ifdef FEATURE_SIMD + case GT_SIMD: + srcCount = BuildSIMD(tree->AsSIMD()); + break; +#endif // FEATURE_SIMD + +#ifdef FEATURE_HW_INTRINSICS + case GT_HWINTRINSIC: + srcCount = BuildHWIntrinsic(tree->AsHWIntrinsic()); + break; +#endif // FEATURE_HW_INTRINSICS + + case GT_CAST: + assert(dstCount == 1); + srcCount = BuildCast(tree->AsCast()); + break; + + case GT_NEG: + case GT_NOT: + BuildUse(tree->gtGetOp1()); + srcCount = 1; + assert(dstCount == 1); + BuildDef(tree); + break; + + case GT_EQ: + case GT_NE: + case GT_LT: + case GT_LE: + case GT_GE: + case GT_GT: + case GT_JCMP: + if (!varTypeIsFloating(tree->gtGetOp1())) + { + // We need two registers: tmpRegOp1 and tmpRegOp2 + buildInternalIntRegisterDefForNode(tree); + buildInternalIntRegisterDefForNode(tree); + buildInternalRegisterUses(); + } + srcCount = BuildCmp(tree); + break; + + case GT_CKFINITE: + srcCount = 1; + assert(dstCount == 1); + buildInternalIntRegisterDefForNode(tree); + BuildUse(tree->gtGetOp1()); + BuildDef(tree); + buildInternalRegisterUses(); + break; + + case GT_CMPXCHG: + { + GenTreeCmpXchg* cmpXchgNode = tree->AsCmpXchg(); + srcCount = cmpXchgNode->gtOpComparand->isContained() ? 2 : 3; + assert(dstCount == 1); + + if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics)) + { + // For LOONGARCH exclusives requires a single internal register + buildInternalIntRegisterDefForNode(tree); + } + + // For LOONGARCH exclusives the lifetime of the addr and data must be extended because + // it may be used multiple during retries + + // For LOONGARCH atomic cas the lifetime of the addr and data must be extended to prevent + // them being reused as the target register which must be destroyed early + + RefPosition* locationUse = BuildUse(tree->AsCmpXchg()->gtOpLocation); + setDelayFree(locationUse); + RefPosition* valueUse = BuildUse(tree->AsCmpXchg()->gtOpValue); + setDelayFree(valueUse); + if (!cmpXchgNode->gtOpComparand->isContained()) + { + RefPosition* comparandUse = BuildUse(tree->AsCmpXchg()->gtOpComparand); + + // For LOONGARCH exclusives the lifetime of the comparand must be extended because + // it may be used used multiple during retries + if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics)) + { + setDelayFree(comparandUse); + } + } + + // Internals may not collide with target + setInternalRegsDelayFree = true; + buildInternalRegisterUses(); + BuildDef(tree); + } + break; + + case GT_LOCKADD: + case GT_XORR: + case GT_XAND: + case GT_XADD: + case GT_XCHG: + { + assert(dstCount == (tree->TypeGet() == TYP_VOID) ? 0 : 1); + srcCount = tree->gtGetOp2()->isContained() ? 1 : 2; + + if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics)) + { + // GT_XCHG requires a single internal register; the others require two. + buildInternalIntRegisterDefForNode(tree); + if (tree->OperGet() != GT_XCHG) + { + buildInternalIntRegisterDefForNode(tree); + } + } + else if (tree->OperIs(GT_XAND)) + { + // for ldclral we need an internal register. + buildInternalIntRegisterDefForNode(tree); + } + + assert(!tree->gtGetOp1()->isContained()); + RefPosition* op1Use = BuildUse(tree->gtGetOp1()); + RefPosition* op2Use = nullptr; + if (!tree->gtGetOp2()->isContained()) + { + op2Use = BuildUse(tree->gtGetOp2()); + } + + // For LOONGARCH exclusives the lifetime of the addr and data must be extended because + // it may be used used multiple during retries + if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics)) + { + // Internals may not collide with target + if (dstCount == 1) + { + setDelayFree(op1Use); + if (op2Use != nullptr) + { + setDelayFree(op2Use); + } + setInternalRegsDelayFree = true; + } + buildInternalRegisterUses(); + } + if (dstCount == 1) + { + BuildDef(tree); + } + } + break; + +#if FEATURE_ARG_SPLIT + case GT_PUTARG_SPLIT: + srcCount = BuildPutArgSplit(tree->AsPutArgSplit()); + dstCount = tree->AsPutArgSplit()->gtNumRegs; + break; +#endif // FEATURE _SPLIT_ARG + + case GT_PUTARG_STK: + srcCount = BuildPutArgStk(tree->AsPutArgStk()); + break; + + case GT_PUTARG_REG: + srcCount = BuildPutArgReg(tree->AsUnOp()); + break; + + case GT_CALL: + srcCount = BuildCall(tree->AsCall()); + if (tree->AsCall()->HasMultiRegRetVal()) + { + dstCount = tree->AsCall()->GetReturnTypeDesc()->GetReturnRegCount(); + } + break; + + case GT_ADDR: + { + // For a GT_ADDR, the child node should not be evaluated into a register + GenTree* child = tree->gtGetOp1(); + assert(!isCandidateLocalRef(child)); + assert(child->isContained()); + assert(dstCount == 1); + srcCount = 0; + BuildDef(tree); + } + break; + + case GT_BLK: + case GT_DYN_BLK: + // These should all be eliminated prior to Lowering. + assert(!"Non-store block node in Lowering"); + srcCount = 0; + break; + + case GT_STORE_BLK: + case GT_STORE_OBJ: + case GT_STORE_DYN_BLK: + srcCount = BuildBlockStore(tree->AsBlk()); + break; + + case GT_INIT_VAL: + // Always a passthrough of its child's value. + assert(!"INIT_VAL should always be contained"); + srcCount = 0; + break; + + case GT_LCLHEAP: + { + assert(dstCount == 1); + + // Need a variable number of temp regs (see genLclHeap() in codegenamd64.cpp): + // Here '-' means don't care. + // + // Size? Init Memory? # temp regs + // 0 - 0 + // const and <=6 ptr words - 0 + // const and 6 ptr words Yes 0 + // Non-const Yes 0 + // Non-const No 2 + // + + GenTree* size = tree->gtGetOp1(); + if (size->IsCnsIntOrI()) + { + assert(size->isContained()); + srcCount = 0; + + size_t sizeVal = size->AsIntCon()->gtIconVal; + + if (sizeVal != 0) + { + // Compute the amount of memory to properly STACK_ALIGN. + // Note: The Gentree node is not updated here as it is cheap to recompute stack aligned size. + // This should also help in debugging as we can examine the original size specified with + // localloc. + sizeVal = AlignUp(sizeVal, STACK_ALIGN); + size_t stpCount = sizeVal / (REGSIZE_BYTES * 2); + + // For small allocations up to 4 'stp' instructions (i.e. 16 to 64 bytes of localloc) + // + if (stpCount <= 4) + { + // Need no internal registers + } + else if (!compiler->info.compInitMem) + { + // No need to initialize allocated stack space. + if (sizeVal < compiler->eeGetPageSize()) + { + // Need no internal registers + } + else + { + // We need two registers: regCnt and RegTmp + buildInternalIntRegisterDefForNode(tree); + buildInternalIntRegisterDefForNode(tree); + } + } + } + } + else + { + srcCount = 1; + if (!compiler->info.compInitMem) + { + buildInternalIntRegisterDefForNode(tree); + buildInternalIntRegisterDefForNode(tree); + } + } + + if (!size->isContained()) + { + BuildUse(size); + } + buildInternalRegisterUses(); + BuildDef(tree); + } + break; + + case GT_ARR_BOUNDS_CHECK: +#ifdef FEATURE_SIMD + case GT_SIMD_CHK: +#endif // FEATURE_SIMD +#ifdef FEATURE_HW_INTRINSICS + case GT_HW_INTRINSIC_CHK: +#endif // FEATURE_HW_INTRINSICS + { + GenTreeBoundsChk* node = tree->AsBoundsChk(); + // Consumes arrLen & index - has no result + assert(dstCount == 0); + srcCount = BuildOperandUses(node->GetIndex()); + srcCount += BuildOperandUses(node->GetArrayLength()); + } + break; + + case GT_ARR_ELEM: + // These must have been lowered to GT_ARR_INDEX + noway_assert(!"We should never see a GT_ARR_ELEM in lowering"); + srcCount = 0; + assert(dstCount == 0); + break; + + case GT_ARR_INDEX: + { + srcCount = 2; + assert(dstCount == 1); + buildInternalIntRegisterDefForNode(tree); + setInternalRegsDelayFree = true; + + // For GT_ARR_INDEX, the lifetime of the arrObj must be extended because it is actually used multiple + // times while the result is being computed. + RefPosition* arrObjUse = BuildUse(tree->AsArrIndex()->ArrObj()); + setDelayFree(arrObjUse); + BuildUse(tree->AsArrIndex()->IndexExpr()); + buildInternalRegisterUses(); + BuildDef(tree); + } + break; + + case GT_ARR_OFFSET: + // This consumes the offset, if any, the arrObj and the effective index, + // and produces the flattened offset for this dimension. + srcCount = 2; + if (!tree->AsArrOffs()->gtOffset->isContained()) + { + BuildUse(tree->AsArrOffs()->gtOffset); + srcCount++; + } + BuildUse(tree->AsArrOffs()->gtIndex); + BuildUse(tree->AsArrOffs()->gtArrObj); + assert(dstCount == 1); + buildInternalIntRegisterDefForNode(tree); + buildInternalRegisterUses(); + BuildDef(tree); + break; + + case GT_LEA: + { + GenTreeAddrMode* lea = tree->AsAddrMode(); + + GenTree* base = lea->Base(); + GenTree* index = lea->Index(); + int cns = lea->Offset(); + + // This LEA is instantiating an address, so we set up the srcCount here. + srcCount = 0; + if (base != nullptr) + { + srcCount++; + BuildUse(base); + } + if (index != nullptr) + { + srcCount++; + BuildUse(index); + } + assert(dstCount == 1); + + // On LOONGARCH64 we may need a single internal register + // (when both conditions are true then we still only need a single internal register) + if ((index != nullptr) && (cns != 0)) + { + // LOONGARCH64 does not support both Index and offset so we need an internal register + buildInternalIntRegisterDefForNode(tree); + } + else if (!((-2048 <= cns) && (cns <= 2047))) + { + // This offset can't be contained in the add instruction, so we need an internal register + buildInternalIntRegisterDefForNode(tree); + } + buildInternalRegisterUses(); + BuildDef(tree); + } + break; + + case GT_STOREIND: + { + assert(dstCount == 0); + + if (compiler->codeGen->gcInfo.gcIsWriteBarrierStoreIndNode(tree)) + { + srcCount = BuildGCWriteBarrier(tree); + break; + } + + srcCount = BuildIndir(tree->AsIndir()); + if (!tree->gtGetOp2()->isContained()) + { + BuildUse(tree->gtGetOp2()); + srcCount++; + } + } + break; + + case GT_NULLCHECK: + case GT_IND: + assert(dstCount == (tree->OperIs(GT_NULLCHECK) ? 0 : 1)); + srcCount = BuildIndir(tree->AsIndir()); + break; + + case GT_CATCH_ARG: + srcCount = 0; + assert(dstCount == 1); + BuildDef(tree, RBM_EXCEPTION_OBJECT); + break; + + case GT_CLS_VAR: + srcCount = 0; + // GT_CLS_VAR, by the time we reach the backend, must always + // be a pure use. + // It will produce a result of the type of the + // node, and use an internal register for the address. + + assert(dstCount == 1); + assert((tree->gtFlags & (GTF_VAR_DEF | GTF_VAR_USEASG)) == 0); + buildInternalIntRegisterDefForNode(tree); + buildInternalRegisterUses(); + BuildDef(tree); + break; + + case GT_INDEX_ADDR: + assert(dstCount == 1); + srcCount = BuildBinaryUses(tree->AsOp()); + buildInternalIntRegisterDefForNode(tree); + buildInternalRegisterUses(); + BuildDef(tree); + break; + + } // end switch (tree->OperGet()) + + if (tree->IsUnusedValue() && (dstCount != 0)) + { + isLocalDefUse = true; + } + // We need to be sure that we've set srcCount and dstCount appropriately + assert((dstCount < 2) || tree->IsMultiRegNode()); + assert(isLocalDefUse == (tree->IsValue() && tree->IsUnusedValue())); + assert(!tree->IsUnusedValue() || (dstCount != 0)); + assert(dstCount == tree->GetRegisterDstCount(compiler)); + return srcCount; +} + +#ifdef FEATURE_SIMD +//------------------------------------------------------------------------ +// BuildSIMD: Set the NodeInfo for a GT_SIMD tree. +// +// Arguments: +// tree - The GT_SIMD node of interest +// +// Return Value: +// The number of sources consumed by this node. +// +int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) +{ +assert(!"unimplemented on LOONGARCH yet"); +#if 0 + int srcCount = 0; + // Only SIMDIntrinsicInit can be contained + if (simdTree->isContained()) + { + assert(simdTree->gtSIMDIntrinsicID == SIMDIntrinsicInit); + } + int dstCount = simdTree->IsValue() ? 1 : 0; + assert(dstCount == 1); + + bool buildUses = true; + + GenTree* op1 = simdTree->gtGetOp1(); + GenTree* op2 = simdTree->gtGetOp2(); + + switch (simdTree->gtSIMDIntrinsicID) + { + case SIMDIntrinsicInit: + case SIMDIntrinsicCast: + case SIMDIntrinsicSqrt: + case SIMDIntrinsicAbs: + case SIMDIntrinsicConvertToSingle: + case SIMDIntrinsicConvertToInt32: + case SIMDIntrinsicConvertToDouble: + case SIMDIntrinsicConvertToInt64: + case SIMDIntrinsicWidenLo: + case SIMDIntrinsicWidenHi: + // No special handling required. + break; + + case SIMDIntrinsicGetItem: + { + op1 = simdTree->gtGetOp1(); + op2 = simdTree->gtGetOp2(); + + // We have an object and an index, either of which may be contained. + bool setOp2DelayFree = false; + if (!op2->IsCnsIntOrI() && (!op1->isContained() || op1->OperIsLocal())) + { + // If the index is not a constant and the object is not contained or is a local + // we will need a general purpose register to calculate the address + // internal register must not clobber input index + // TODO-Cleanup: An internal register will never clobber a source; this code actually + // ensures that the index (op2) doesn't interfere with the target. + buildInternalIntRegisterDefForNode(simdTree); + setOp2DelayFree = true; + } + srcCount += BuildOperandUses(op1); + if (!op2->isContained()) + { + RefPosition* op2Use = BuildUse(op2); + if (setOp2DelayFree) + { + setDelayFree(op2Use); + } + srcCount++; + } + + if (!op2->IsCnsIntOrI() && (!op1->isContained())) + { + // If vector is not already in memory (contained) and the index is not a constant, + // we will use the SIMD temp location to store the vector. + compiler->getSIMDInitTempVarNum(); + } + buildUses = false; + } + break; + + case SIMDIntrinsicAdd: + case SIMDIntrinsicSub: + case SIMDIntrinsicMul: + case SIMDIntrinsicDiv: + case SIMDIntrinsicBitwiseAnd: + case SIMDIntrinsicBitwiseAndNot: + case SIMDIntrinsicBitwiseOr: + case SIMDIntrinsicBitwiseXor: + case SIMDIntrinsicMin: + case SIMDIntrinsicMax: + case SIMDIntrinsicEqual: + case SIMDIntrinsicLessThan: + case SIMDIntrinsicGreaterThan: + case SIMDIntrinsicLessThanOrEqual: + case SIMDIntrinsicGreaterThanOrEqual: + // No special handling required. + break; + + case SIMDIntrinsicSetX: + case SIMDIntrinsicSetY: + case SIMDIntrinsicSetZ: + case SIMDIntrinsicSetW: + case SIMDIntrinsicNarrow: + { + // Op1 will write to dst before Op2 is free + BuildUse(op1); + RefPosition* op2Use = BuildUse(op2); + setDelayFree(op2Use); + srcCount = 2; + buildUses = false; + break; + } + + case SIMDIntrinsicInitN: + { + var_types baseType = simdTree->gtSIMDBaseType; + srcCount = (short)(simdTree->gtSIMDSize / genTypeSize(baseType)); + if (varTypeIsFloating(simdTree->gtSIMDBaseType)) + { + // Need an internal register to stitch together all the values into a single vector in a SIMD reg. + buildInternalFloatRegisterDefForNode(simdTree); + } + + for (GenTree* operand : simdTree->Operands()) + { + assert(operand->TypeIs(baseType)); + assert(!operand->isContained()); + + BuildUse(operand); + } + + buildUses = false; + break; + } + + case SIMDIntrinsicInitArray: + // We have an array and an index, which may be contained. + break; + + case SIMDIntrinsicOpEquality: + case SIMDIntrinsicOpInEquality: + buildInternalFloatRegisterDefForNode(simdTree); + break; + + case SIMDIntrinsicDotProduct: + buildInternalFloatRegisterDefForNode(simdTree); + break; + + case SIMDIntrinsicSelect: + // TODO-LOONGARCH64-CQ Allow lowering to see SIMDIntrinsicSelect so we can generate BSL VC, VA, VB + // bsl target register must be VC. Reserve a temp in case we need to shuffle things. + // This will require a different approach, as GenTreeSIMD has only two operands. + assert(!"SIMDIntrinsicSelect not yet supported"); + buildInternalFloatRegisterDefForNode(simdTree); + break; + + case SIMDIntrinsicInitArrayX: + case SIMDIntrinsicInitFixed: + case SIMDIntrinsicCopyToArray: + case SIMDIntrinsicCopyToArrayX: + case SIMDIntrinsicNone: + case SIMDIntrinsicGetCount: + case SIMDIntrinsicGetOne: + case SIMDIntrinsicGetZero: + case SIMDIntrinsicGetAllOnes: + case SIMDIntrinsicGetX: + case SIMDIntrinsicGetY: + case SIMDIntrinsicGetZ: + case SIMDIntrinsicGetW: + case SIMDIntrinsicInstEquals: + case SIMDIntrinsicHWAccel: + case SIMDIntrinsicWiden: + case SIMDIntrinsicInvalid: + assert(!"These intrinsics should not be seen during register allocation"); + __fallthrough; + + default: + noway_assert(!"Unimplemented SIMD node type."); + unreached(); + } + if (buildUses) + { + assert(!op1->OperIs(GT_LIST)); + assert(srcCount == 0); + srcCount = BuildOperandUses(op1); + if ((op2 != nullptr) && !op2->isContained()) + { + srcCount += BuildOperandUses(op2); + } + } + assert(internalCount <= MaxInternalCount); + buildInternalRegisterUses(); + if (dstCount == 1) + { + BuildDef(simdTree); + } + else + { + assert(dstCount == 0); + } + return srcCount; +#endif +} +#endif // FEATURE_SIMD + +#ifdef FEATURE_HW_INTRINSICS +#include "hwintrinsic.h" +//------------------------------------------------------------------------ +// BuildHWIntrinsic: Set the NodeInfo for a GT_HWINTRINSIC tree. +// +// Arguments: +// tree - The GT_HWINTRINSIC node of interest +// +// Return Value: +// The number of sources consumed by this node. +// +int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) +{ +assert(!"unimplemented on LOONGARCH yet"); +#if 0 + NamedIntrinsic intrinsicID = intrinsicTree->gtHWIntrinsicId; + int numArgs = HWIntrinsicInfo::lookupNumArgs(intrinsicTree); + + GenTree* op1 = intrinsicTree->gtGetOp1(); + GenTree* op2 = intrinsicTree->gtGetOp2(); + GenTree* op3 = nullptr; + int srcCount = 0; + + if ((op1 != nullptr) && op1->OperIsList()) + { + // op2 must be null, and there must be at least two more arguments. + assert(op2 == nullptr); + noway_assert(op1->AsArgList()->Rest() != nullptr); + noway_assert(op1->AsArgList()->Rest()->Rest() != nullptr); + assert(op1->AsArgList()->Rest()->Rest()->Rest() == nullptr); + op2 = op1->AsArgList()->Rest()->Current(); + op3 = op1->AsArgList()->Rest()->Rest()->Current(); + op1 = op1->AsArgList()->Current(); + } + + bool op2IsDelayFree = false; + bool op3IsDelayFree = false; + + // Create internal temps, and handle any other special requirements. + switch (HWIntrinsicInfo::lookup(intrinsicID).form) + { + case HWIntrinsicInfo::Sha1HashOp: + assert((numArgs == 3) && (op2 != nullptr) && (op3 != nullptr)); + if (!op2->isContained()) + { + assert(!op3->isContained()); + op2IsDelayFree = true; + op3IsDelayFree = true; + setInternalRegsDelayFree = true; + } + buildInternalFloatRegisterDefForNode(intrinsicTree); + break; + case HWIntrinsicInfo::SimdTernaryRMWOp: + assert((numArgs == 3) && (op2 != nullptr) && (op3 != nullptr)); + if (!op2->isContained()) + { + assert(!op3->isContained()); + op2IsDelayFree = true; + op3IsDelayFree = true; + } + break; + case HWIntrinsicInfo::Sha1RotateOp: + buildInternalFloatRegisterDefForNode(intrinsicTree); + break; + + case HWIntrinsicInfo::SimdExtractOp: + case HWIntrinsicInfo::SimdInsertOp: + if (!op2->isContained()) + { + // We need a temp to create a switch table + buildInternalIntRegisterDefForNode(intrinsicTree); + } + break; + + default: + break; + } + + // Next, build uses + if (numArgs > 3) + { + srcCount = 0; + assert(!op2IsDelayFree && !op3IsDelayFree); + assert(op1->OperIs(GT_LIST)); + { + for (GenTreeArgList* list = op1->AsArgList(); list != nullptr; list = list->Rest()) + { + srcCount += BuildOperandUses(list->Current()); + } + } + assert(srcCount == numArgs); + } + else + { + if (op1 != nullptr) + { + srcCount += BuildOperandUses(op1); + if (op2 != nullptr) + { + srcCount += (op2IsDelayFree) ? BuildDelayFreeUses(op2) : BuildOperandUses(op2); + if (op3 != nullptr) + { + srcCount += (op3IsDelayFree) ? BuildDelayFreeUses(op3) : BuildOperandUses(op3); + } + } + } + } + buildInternalRegisterUses(); + + // Now defs + if (intrinsicTree->IsValue()) + { + BuildDef(intrinsicTree); + } + + return srcCount; +#endif +} +#endif + +//------------------------------------------------------------------------ +// BuildIndir: Specify register requirements for address expression +// of an indirection operation. +// +// Arguments: +// indirTree - GT_IND, GT_STOREIND or block gentree node +// +// Return Value: +// The number of sources consumed by this node. +// +int LinearScan::BuildIndir(GenTreeIndir* indirTree) +{ + // struct typed indirs are expected only on rhs of a block copy, + // but in this case they must be contained. + assert(indirTree->TypeGet() != TYP_STRUCT); + + GenTree* addr = indirTree->Addr(); + GenTree* index = nullptr; + int cns = 0; + + if (addr->isContained()) + { + if (addr->OperGet() == GT_LEA) + { + GenTreeAddrMode* lea = addr->AsAddrMode(); + index = lea->Index(); + cns = lea->Offset(); + + // On LOONGARCH we may need a single internal register + // (when both conditions are true then we still only need a single internal register) + if ((index != nullptr) && (cns != 0)) + { + // LOONGARCH does not support both Index and offset so we need an internal register + buildInternalIntRegisterDefForNode(indirTree); + } + else if (!((-2048 <= cns) && (cns <= 2047))) + { + // This offset can't be contained in the ldr/str instruction, so we need an internal register + buildInternalIntRegisterDefForNode(indirTree); + } + } + } + +#ifdef FEATURE_SIMD + if (indirTree->TypeGet() == TYP_SIMD12) + { + // If indirTree is of TYP_SIMD12, addr is not contained. See comment in LowerIndir(). + assert(!addr->isContained()); + + // Vector3 is read/written as two reads/writes: 8 byte and 4 byte. + // To assemble the vector properly we would need an additional int register + buildInternalIntRegisterDefForNode(indirTree); + } +#endif // FEATURE_SIMD + + int srcCount = BuildIndirUses(indirTree); + buildInternalRegisterUses(); + + if (!indirTree->OperIs(GT_STOREIND, GT_NULLCHECK)) + { + BuildDef(indirTree); + } + return srcCount; +} + +//------------------------------------------------------------------------ +// BuildCall: Set the NodeInfo for a call. +// +// Arguments: +// call - The call node of interest +// +// Return Value: +// The number of sources consumed by this node. +// +int LinearScan::BuildCall(GenTreeCall* call) +{ + bool hasMultiRegRetVal = false; + const ReturnTypeDesc* retTypeDesc = nullptr; + regMaskTP dstCandidates = RBM_NONE; + + int srcCount = 0; + int dstCount = 0; + if (call->TypeGet() != TYP_VOID) + { + hasMultiRegRetVal = call->HasMultiRegRetVal(); + if (hasMultiRegRetVal) + { + // dst count = number of registers in which the value is returned by call + retTypeDesc = call->GetReturnTypeDesc(); + dstCount = retTypeDesc->GetReturnRegCount(); + } + else + { + dstCount = 1; + } + } + + GenTree* ctrlExpr = call->gtControlExpr; + regMaskTP ctrlExprCandidates = RBM_NONE; + if (call->gtCallType == CT_INDIRECT) + { + // either gtControlExpr != null or gtCallAddr != null. + // Both cannot be non-null at the same time. + assert(ctrlExpr == nullptr); + assert(call->gtCallAddr != nullptr); + ctrlExpr = call->gtCallAddr; + } + + // set reg requirements on call target represented as control sequence. + if (ctrlExpr != nullptr) + { + // we should never see a gtControlExpr whose type is void. + assert(ctrlExpr->TypeGet() != TYP_VOID); + + // In case of fast tail implemented as jmp, make sure that gtControlExpr is + // computed into a register. + if (call->IsFastTailCall()) + { + // Fast tail call - make sure that call target is always computed in T9(LOONGARCH64) + // so that epilog sequence can generate "jr t9" to achieve fast tail call. + ctrlExprCandidates = RBM_FASTTAILCALL_TARGET; + } + } + else if (call->IsR2ROrVirtualStubRelativeIndir()) + { + buildInternalIntRegisterDefForNode(call); + } + + RegisterType registerType = call->TypeGet(); + +// Set destination candidates for return value of the call. + + if (hasMultiRegRetVal) + { + assert(retTypeDesc != nullptr); + dstCandidates = retTypeDesc->GetABIReturnRegs(); + } + else if (varTypeUsesFloatArgReg(registerType)) + { + dstCandidates = RBM_FLOATRET; + } + else if (registerType == TYP_LONG) + { + dstCandidates = RBM_LNGRET; + } + else + { + dstCandidates = RBM_INTRET; + } + + // First, count reg args + // Each register argument corresponds to one source. + bool callHasFloatRegArgs = false; + + for (GenTreeCall::Use& arg : call->LateArgs()) + { + GenTree* argNode = arg.GetNode(); + +#ifdef DEBUG + // During Build, we only use the ArgTabEntry for validation, + // as getting it is rather expensive. + fgArgTabEntry* curArgTabEntry = compiler->gtArgEntryByNode(call, argNode); + regNumber argReg = curArgTabEntry->GetRegNum(); + assert(curArgTabEntry != nullptr); +#endif + + if (argNode->gtOper == GT_PUTARG_STK) + { + // late arg that is not passed in a register + assert(curArgTabEntry->GetRegNum() == REG_STK); + // These should never be contained. + assert(!argNode->isContained()); + continue; + } + + // A GT_FIELD_LIST has a TYP_VOID, but is used to represent a multireg struct + if (argNode->OperGet() == GT_FIELD_LIST) + { + assert(argNode->isContained()); + + // There could be up to 2-4 PUTARG_REGs in the list (3 or 4 can only occur for HFAs) + for (GenTreeFieldList::Use& use : argNode->AsFieldList()->Uses()) + { +#ifdef DEBUG + assert(use.GetNode()->OperIs(GT_PUTARG_REG)); +#endif + BuildUse(use.GetNode(), genRegMask(use.GetNode()->GetRegNum())); + srcCount++; + } + } +#if FEATURE_ARG_SPLIT + else if (argNode->OperGet() == GT_PUTARG_SPLIT) + { + unsigned regCount = argNode->AsPutArgSplit()->gtNumRegs; + assert(regCount == curArgTabEntry->numRegs); + for (unsigned int i = 0; i < regCount; i++) + { + BuildUse(argNode, genRegMask(argNode->AsPutArgSplit()->GetRegNumByIdx(i)), i); + } + srcCount += regCount; + } +#endif // FEATURE_ARG_SPLIT + else + { + assert(argNode->OperIs(GT_PUTARG_REG)); + assert(argNode->GetRegNum() == argReg); + HandleFloatVarArgs(call, argNode, &callHasFloatRegArgs); + { + BuildUse(argNode, genRegMask(argNode->GetRegNum())); + srcCount++; + } + } + } + + // Now, count stack args + // Note that these need to be computed into a register, but then + // they're just stored to the stack - so the reg doesn't + // need to remain live until the call. In fact, it must not + // because the code generator doesn't actually consider it live, + // so it can't be spilled. + + for (GenTreeCall::Use& use : call->Args()) + { + GenTree* arg = use.GetNode(); + + // Skip arguments that have been moved to the Late Arg list + if ((arg->gtFlags & GTF_LATE_ARG) == 0) + { +#ifdef DEBUG + fgArgTabEntry* curArgTabEntry = compiler->gtArgEntryByNode(call, arg); + assert(curArgTabEntry != nullptr); +#endif +#if FEATURE_ARG_SPLIT + // PUTARG_SPLIT nodes must be in the gtCallLateArgs list, since they + // define registers used by the call. + assert(arg->OperGet() != GT_PUTARG_SPLIT); +#endif // FEATURE_ARG_SPLIT + if (arg->gtOper == GT_PUTARG_STK) + { + assert(curArgTabEntry->GetRegNum() == REG_STK); + } + else + { + assert(!arg->IsValue() || arg->IsUnusedValue()); + } + } + } + + // If it is a fast tail call, it is already preferenced to use IP0. + // Therefore, no need set src candidates on call tgt again. + if (call->IsVarargs() && callHasFloatRegArgs && !call->IsFastTailCall() && (ctrlExpr != nullptr)) + { + // Don't assign the call target to any of the argument registers because + // we will use them to also pass floating point arguments as required + // by LOONGARCH64 ABI. + ctrlExprCandidates = allRegs(TYP_INT) & ~(RBM_ARG_REGS); + } + + if (ctrlExpr != nullptr) + { + BuildUse(ctrlExpr, ctrlExprCandidates); + srcCount++; + } + + buildInternalRegisterUses(); + + // Now generate defs and kills. + regMaskTP killMask = getKillSetForCall(call); + BuildDefsWithKills(call, dstCount, dstCandidates, killMask); + return srcCount; +} + +//------------------------------------------------------------------------ +// BuildPutArgStk: Set the NodeInfo for a GT_PUTARG_STK node +// +// Arguments: +// argNode - a GT_PUTARG_STK node +// +// Return Value: +// The number of sources consumed by this node. +// +// Notes: +// Set the child node(s) to be contained when we have a multireg arg +// +int LinearScan::BuildPutArgStk(GenTreePutArgStk* argNode) +{ + assert(argNode->gtOper == GT_PUTARG_STK); + + GenTree* putArgChild = argNode->gtGetOp1(); + + int srcCount = 0; + + // Do we have a TYP_STRUCT argument (or a GT_FIELD_LIST), if so it must be a multireg pass-by-value struct + if (putArgChild->TypeIs(TYP_STRUCT) || putArgChild->OperIs(GT_FIELD_LIST)) + { + // We will use store instructions that each write a register sized value + + if (putArgChild->OperIs(GT_FIELD_LIST)) + { + assert(putArgChild->isContained()); + // We consume all of the items in the GT_FIELD_LIST + for (GenTreeFieldList::Use& use : putArgChild->AsFieldList()->Uses()) + { + BuildUse(use.GetNode()); + srcCount++; + } + } + else + { + // We can use a ldp/stp sequence so we need two internal registers for LOONGARCH64; one for ARM. + buildInternalIntRegisterDefForNode(argNode); + + if (putArgChild->OperGet() == GT_OBJ) + { + assert(putArgChild->isContained()); + GenTree* objChild = putArgChild->gtGetOp1(); + if (objChild->OperGet() == GT_LCL_VAR_ADDR) + { + // We will generate all of the code for the GT_PUTARG_STK, the GT_OBJ and the GT_LCL_VAR_ADDR + // as one contained operation, and there are no source registers. + // + assert(objChild->isContained()); + } + else + { + // We will generate all of the code for the GT_PUTARG_STK and its child node + // as one contained operation + // + srcCount = BuildOperandUses(objChild); + } + } + else + { + // No source registers. + putArgChild->OperIs(GT_LCL_VAR); + } + } + } + else + { + assert(!putArgChild->isContained()); + srcCount = BuildOperandUses(putArgChild); + } + buildInternalRegisterUses(); + return srcCount; +} + +#if FEATURE_ARG_SPLIT +//------------------------------------------------------------------------ +// BuildPutArgSplit: Set the NodeInfo for a GT_PUTARG_SPLIT node +// +// Arguments: +// argNode - a GT_PUTARG_SPLIT node +// +// Return Value: +// The number of sources consumed by this node. +// +// Notes: +// Set the child node(s) to be contained +// +int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode) +{ + int srcCount = 0; + assert(argNode->gtOper == GT_PUTARG_SPLIT); + + GenTree* putArgChild = argNode->gtGetOp1(); + + // Registers for split argument corresponds to source + int dstCount = argNode->gtNumRegs; + + regNumber argReg = argNode->GetRegNum(); + regMaskTP argMask = RBM_NONE; + regMaskTP argMaskArr[MAX_REG_ARG] = {RBM_NONE}; + + for (unsigned i = 0; i < dstCount; i++) + { + argMaskArr[i] = genRegMask(argNode->GetRegNumByIdx(i)); + argMask |= argMaskArr[i]; + } + + if (putArgChild->OperGet() == GT_FIELD_LIST) + { + // Generated code: + // 1. Consume all of the items in the GT_FIELD_LIST (source) + // 2. Store to target slot and move to target registers (destination) from source + // + unsigned sourceRegCount = 0; + + // To avoid redundant moves, have the argument operand computed in the + // register in which the argument is passed to the call. + + for (GenTreeFieldList::Use& use : putArgChild->AsFieldList()->Uses()) + { + GenTree* node = use.GetNode(); + assert(!node->isContained()); + // The only multi-reg nodes we should see are OperIsMultiRegOp() + assert(!node->IsMultiRegNode()); + + // Consume all the registers, setting the appropriate register mask for the ones that + // go into registers. + // (sourceRegCount < argNode->gtNumRegs) + BuildUse(node, argMaskArr[sourceRegCount], 0); + sourceRegCount++; + } + srcCount += sourceRegCount; + assert(putArgChild->isContained()); + } + else + { + assert(putArgChild->TypeGet() == TYP_STRUCT); + assert(putArgChild->OperGet() == GT_OBJ); + + // We can use a ldr/str sequence so we need an internal register + buildInternalIntRegisterDefForNode(argNode, allRegs(TYP_INT) & ~argMask); + + GenTree* objChild = putArgChild->gtGetOp1(); + if (objChild->OperGet() == GT_LCL_VAR_ADDR) + { + // We will generate all of the code for the GT_PUTARG_SPLIT, the GT_OBJ and the GT_LCL_VAR_ADDR + // as one contained operation + // + assert(objChild->isContained()); + } + else + { + srcCount = BuildIndirUses(putArgChild->AsIndir()); + } + assert(putArgChild->isContained()); + } + buildInternalRegisterUses(); + assert((argMask != RBM_NONE) && ((int)genCountBits(argMask) == dstCount)); + for (int i = 0; i < dstCount; i++) + { + BuildDef(argNode, argMaskArr[i], i); + } + return srcCount; +} +#endif // FEATURE_ARG_SPLIT + +//------------------------------------------------------------------------ +// BuildBlockStore: Build the RefPositions for a block store node. +// +// Arguments: +// blkNode - The block store node of interest +// +// Return Value: +// The number of sources consumed by this node. +// +int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) +{ + GenTree* dstAddr = blkNode->Addr(); + GenTree* src = blkNode->Data(); + unsigned size = blkNode->Size(); + + GenTree* srcAddrOrFill = nullptr; + + regMaskTP dstAddrRegMask = RBM_NONE; + regMaskTP srcRegMask = RBM_NONE; + regMaskTP sizeRegMask = RBM_NONE; + + if (blkNode->OperIsInitBlkOp()) + { + if (src->OperIs(GT_INIT_VAL)) + { + assert(src->isContained()); + src = src->AsUnOp()->gtGetOp1(); + } + + srcAddrOrFill = src; + + switch (blkNode->gtBlkOpKind) + { + case GenTreeBlk::BlkOpKindUnroll: + break; + + case GenTreeBlk::BlkOpKindHelper: + assert(!src->isContained()); + dstAddrRegMask = RBM_ARG_0; + srcRegMask = RBM_ARG_1; + sizeRegMask = RBM_ARG_2; + break; + + default: + unreached(); + } + } + else + { + if (src->OperIs(GT_IND)) + { + assert(src->isContained()); + srcAddrOrFill = src->AsIndir()->Addr(); + } + + if (blkNode->OperIs(GT_STORE_OBJ)) + { + // We don't need to materialize the struct size but we still need + // a temporary register to perform the sequence of loads and stores. + // We can't use the special Write Barrier registers, so exclude them from the mask + regMaskTP internalIntCandidates = + allRegs(TYP_INT) & ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF); + buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates); + + if (size >= 2 * REGSIZE_BYTES) + { + // We will use ldp/stp to reduce code size and improve performance + // so we need to reserve an extra internal register + buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates); + } + + // If we have a dest address we want it in RBM_WRITE_BARRIER_DST_BYREF. + dstAddrRegMask = RBM_WRITE_BARRIER_DST_BYREF; + + // If we have a source address we want it in REG_WRITE_BARRIER_SRC_BYREF. + // Otherwise, if it is a local, codegen will put its address in REG_WRITE_BARRIER_SRC_BYREF, + // which is killed by a StoreObj (and thus needn't be reserved). + if (srcAddrOrFill != nullptr) + { + assert(!srcAddrOrFill->isContained()); + srcRegMask = RBM_WRITE_BARRIER_SRC_BYREF; + } + } + else + { + switch (blkNode->gtBlkOpKind) + { + case GenTreeBlk::BlkOpKindUnroll: + buildInternalIntRegisterDefForNode(blkNode); + break; + + case GenTreeBlk::BlkOpKindHelper: + dstAddrRegMask = RBM_ARG_0; + if (srcAddrOrFill != nullptr) + { + assert(!srcAddrOrFill->isContained()); + srcRegMask = RBM_ARG_1; + } + sizeRegMask = RBM_ARG_2; + break; + + default: + unreached(); + } + } + } + + if (!blkNode->OperIs(GT_STORE_DYN_BLK) && (sizeRegMask != RBM_NONE)) + { + // Reserve a temp register for the block size argument. + buildInternalIntRegisterDefForNode(blkNode, sizeRegMask); + } + + int useCount = 0; + + if (!dstAddr->isContained()) + { + useCount++; + BuildUse(dstAddr, dstAddrRegMask); + } + else if (dstAddr->OperIsAddrMode()) + { + useCount += BuildAddrUses(dstAddr->AsAddrMode()->Base()); + } + + if (srcAddrOrFill != nullptr) + { + if (!srcAddrOrFill->isContained()) + { + useCount++; + BuildUse(srcAddrOrFill, srcRegMask); + } + else if (srcAddrOrFill->OperIsAddrMode()) + { + useCount += BuildAddrUses(srcAddrOrFill->AsAddrMode()->Base()); + } + } + + if (blkNode->OperIs(GT_STORE_DYN_BLK)) + { + useCount++; + BuildUse(blkNode->AsDynBlk()->gtDynamicSize, sizeRegMask); + } + + buildInternalRegisterUses(); + regMaskTP killMask = getKillSetForBlockStore(blkNode); + BuildDefsWithKills(blkNode, 0, RBM_NONE, killMask); + return useCount; +} + +//------------------------------------------------------------------------ +// BuildCast: Set the NodeInfo for a GT_CAST. +// +// Arguments: +// cast - The GT_CAST node +// +// Return Value: +// The number of sources consumed by this node. +// +int LinearScan::BuildCast(GenTreeCast* cast) +{ + GenTree* src = cast->gtGetOp1(); + + const var_types srcType = genActualType(src->TypeGet()); + const var_types castType = cast->gtCastType; + + // Overflow checking cast from TYP_LONG to TYP_INT requires a temporary register to + // store the min and max immediate values that cannot be encoded in the CMP instruction. + if (cast->gtOverflow() && varTypeIsLong(srcType) && !cast->IsUnsigned() && (castType == TYP_INT)) + { + buildInternalIntRegisterDefForNode(cast); + } + + int srcCount = BuildOperandUses(src); + buildInternalRegisterUses(); + BuildDef(cast); + return srcCount; +} + +#endif // TARGET_LOONGARCH64 diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index f3aceac0dd14c..29d9e37fea8f7 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -179,7 +179,7 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) if (varTypeIsFloating(srcType) && varTypeIsIntegral(dstType)) { if (srcType == TYP_FLOAT -#if defined(TARGET_ARM64) +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) // Arm64: src = float, dst is overflow conversion. // This goes through helper and hence src needs to be converted to double. && tree->gtOverflow() @@ -214,7 +214,7 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) { if (!tree->gtOverflow()) { -#ifdef TARGET_ARM64 // ARM64 supports all non-overflow checking conversions directly. +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)// On ARM64 All non-overflow checking conversions can be optimized return nullptr; #else switch (dstType) @@ -903,6 +903,36 @@ fgArgTabEntry* fgArgInfo::AddRegArg(unsigned } #endif // defined(UNIX_AMD64_ABI) +#if defined(TARGET_LOONGARCH64) +fgArgTabEntry* fgArgInfo::AddRegArg(unsigned argNum, + GenTree* node, + GenTreeCall::Use* use, + regNumber regNum, + unsigned numRegs, + unsigned byteSize, + unsigned byteAlignment, + bool isStruct, + bool isFloatHfa, /* unused */ + bool isVararg, + const regNumber otherRegNum) +{ + fgArgTabEntry* curArgTabEntry = AddRegArg(argNum, node, use, regNum, numRegs, byteSize, byteAlignment, isStruct, false, isVararg); + assert(curArgTabEntry != nullptr); + + curArgTabEntry->isStruct = isStruct; // is this a struct arg + + INDEBUG(curArgTabEntry->checkIsStruct();) + assert(numRegs <= 2); + if (numRegs == 2) + { + curArgTabEntry->setRegNum(1, otherRegNum); + //curArgTabEntry->isSplit = true; + } + + return curArgTabEntry; +} +#endif // defined(TARGET_LOONGARCH64) + fgArgTabEntry* fgArgInfo::AddStkArg(unsigned argNum, GenTree* node, GenTreeCall::Use* use, @@ -1751,7 +1781,7 @@ GenTree* Compiler::fgMakeTmpArgNode(fgArgTabEntry* curArgTabEntry) if (varTypeIsStruct(type)) { -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_ARM) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) // Can this type be passed as a primitive type? // If so, the following call will return the corresponding primitive type. @@ -1813,6 +1843,19 @@ GenTree* Compiler::fgMakeTmpArgNode(fgArgTabEntry* curArgTabEntry) // This will be passed by value in two registers. assert(addrNode != nullptr); + // Create an Obj of the temp to use it as a call argument. + arg = gtNewObjNode(lvaGetStruct(tmpVarNum), arg); + } +#elif defined(TARGET_LOONGARCH64) + assert(varTypeIsStruct(type)); + if (lvaIsMultiregStruct(varDsc, curArgTabEntry->IsVararg())) + { + // ToDo-LOONGARCH64: Consider using: arg->ChangeOper(GT_LCL_FLD); + // as that is how UNIX_AMD64_ABI works. + // We will create a GT_OBJ for the argument below. + // This will be passed by value in two registers. + assert(addrNode != nullptr); + // Create an Obj of the temp to use it as a call argument. arg = gtNewObjNode(lvaGetStruct(tmpVarNum), arg); } @@ -1823,7 +1866,7 @@ GenTree* Compiler::fgMakeTmpArgNode(fgArgTabEntry* curArgTabEntry) #endif // FEATURE_MULTIREG_ARGS } -#else // not (TARGET_AMD64 or TARGET_ARM64 or TARGET_ARM) +#else // not (TARGET_AMD64 or TARGET_ARM64 or TARGET_ARM or TARGET_LOONGARCH64) // other targets, we pass the struct by value assert(varTypeIsStruct(type)); @@ -1834,7 +1877,7 @@ GenTree* Compiler::fgMakeTmpArgNode(fgArgTabEntry* curArgTabEntry) // gtNewObjNode will set the GTF_EXCEPT flag if this is not a local stack object. arg = gtNewObjNode(lvaGetStruct(tmpVarNum), addrNode); -#endif // not (TARGET_AMD64 or TARGET_ARM64 or TARGET_ARM) +#endif // not (TARGET_AMD64 or TARGET_ARM64 or TARGET_ARM or TARGET_LOONGARCH64) } // (varTypeIsStruct(type)) @@ -1959,7 +2002,7 @@ void fgArgInfo::EvalArgsToTemps() if (setupArg->OperIsCopyBlkOp()) { setupArg = compiler->fgMorphCopyBlock(setupArg); -#if defined(TARGET_ARMARCH) || defined(UNIX_AMD64_ABI) +#if defined(TARGET_ARMARCH) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) if (lclVarType == TYP_STRUCT) { // This scalar LclVar widening step is only performed for ARM architectures. @@ -1970,7 +2013,7 @@ void fgArgInfo::EvalArgsToTemps() scalarType = compiler->getPrimitiveTypeForStruct(structSize, clsHnd, curArgTabEntry->IsVararg()); } -#endif // TARGET_ARMARCH || defined (UNIX_AMD64_ABI) +#endif // TARGET_ARMARCH || defined (UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) } // scalarType can be set to a wider type for ARM or unix amd64 architectures: (3 => 4) or (5,6,7 => @@ -2847,6 +2890,11 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) passUsingFloatRegs = false; +#elif defined(TARGET_LOONGARCH64) + assert(!callIsVararg); + assert(!isHfaArg); + passUsingFloatRegs = !callIsVararg && varTypeIsFloating(argx); + #else #error Unsupported or unset target architecture #endif // TARGET* @@ -2963,6 +3011,27 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) size = genTypeStSz(argx->gtType); byteSize = genTypeSize(argx); } +#elif defined(TARGET_LOONGARCH64) + DWORD numFloatFields = 0; + if (!isStructArg) + { + size = 1; + byteSize = genTypeSize(argx); + } + else + { + // Structs are either passed in 1 or 2 (64-bit) slots. + // Structs that are the size of 2 pointers are passed by value in multiple registers, + // if sufficient registers are available. + // Structs that are larger than 2 pointers are passed by reference (to a copy). + size = (unsigned)(roundUp(structSize, TARGET_POINTER_SIZE)) / TARGET_POINTER_SIZE; + + if (size > 2) + { + size = 1; + } + byteSize = structSize; + } #else #error Unsupported or unset target architecture #endif // TARGET_XXX @@ -2975,6 +3044,27 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) structPassingKind howToPassStruct; structBaseType = getArgTypeForStruct(objClass, &howToPassStruct, callIsVararg, structSize); passStructByRef = (howToPassStruct == SPK_ByReference); +#if defined(TARGET_LOONGARCH64) + if (!passStructByRef) + { + assert((howToPassStruct == SPK_ByValue) || (howToPassStruct == SPK_PrimitiveType)); + + numFloatFields = info.compCompHnd->getFieldTypeByHnd(objClass); + + passUsingFloatRegs = (numFloatFields & 0xf) ? true : false; + compFloatingPointUsed |= passUsingFloatRegs; + + if (numFloatFields & 7) + size = 1; + else if (numFloatFields & 8) + size = 2; + } + else //if (passStructByRef) + { + size = 1; + byteSize = TARGET_POINTER_SIZE; + } +#else if (howToPassStruct == SPK_ByReference) { byteSize = TARGET_POINTER_SIZE; @@ -3003,6 +3093,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) { size = 1; } +#endif } const var_types argType = args->GetNode()->TypeGet(); @@ -3023,6 +3114,9 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) argAlignBytes = eeGetArgAlignment(argType, isFloatHfa); } +#ifdef TARGET_LOONGARCH64 + regNumber nextOtherRegNum = REG_STK; +#endif // // Figure out if the argument will be passed in a register. // @@ -3117,7 +3211,69 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) } } } -#else // not TARGET_ARM or TARGET_ARM64 + +#elif defined(TARGET_LOONGARCH64) + if (passUsingFloatRegs) + { + // Check if the last register needed is still in the fp argument register range. + passUsingFloatRegs = isRegArg = (nextFltArgRegNum + (size - 1)) < MAX_FLOAT_REG_ARG; + + if (isStructArg) + { + if ((numFloatFields & 0x6) && passUsingFloatRegs) + passUsingFloatRegs = isRegArg = intArgRegNum < maxRegArgs; + + if (!passUsingFloatRegs) + { + size = structSize > 8 ? 2 : 1; + numFloatFields = 0; + } + else if (passUsingFloatRegs) + { + if (numFloatFields & 0x8) + nextOtherRegNum = genMapFloatRegArgNumToRegNum(nextFltArgRegNum + 1); + else if (numFloatFields & 0x4) + { + assert(size == 1); + size = 2; + passUsingFloatRegs = false; + nextOtherRegNum = genMapFloatRegArgNumToRegNum(nextFltArgRegNum); + } + else if (/*(size == 1) && */(numFloatFields & 0x2)) + { + assert((size == 1) && (numFloatFields & 0x2)); + size = 2; + nextOtherRegNum = genMapIntRegArgNumToRegNum(intArgRegNum); + } + } + } + + assert(!isHfaArg);//LOONGARCH not support HFA. + } + + // if run out the fp argument register, try the int argument register. + if (!isRegArg) + { + // Check if the last register needed is still in the int argument register range. + isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs; + if (!passUsingFloatRegs && isRegArg && (size > 1)) + nextOtherRegNum = genMapIntRegArgNumToRegNum(intArgRegNum + 1); + + // Did we run out of registers when we had a 16-byte struct (size===2) ? + // (i.e we only have one register remaining but we needed two registers to pass this arg) + // This prevents us from backfilling a subsequent arg into x7 + // + if (!isRegArg && (size > 1)) + { + // We also must update intArgRegNum so that we no longer try to + // allocate any new general purpose registers for args + // + isRegArg = intArgRegNum < maxRegArgs;//the split-struct case. + nextOtherRegNum = REG_STK; + //assert((intArgRegNum + 1) == maxRegArgs); + } + } +#else // not TARGET_ARM or TARGET_ARM64 or TARGET_LOONGARCH64 #if defined(UNIX_AMD64_ABI) @@ -3269,7 +3425,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) // This is a register argument - put it in the table newArgEntry = call->fgArgInfo->AddRegArg(argIndex, argx, args, nextRegNum, size, byteSize, argAlignBytes, isStructArg, - isFloatHfa, callIsVararg UNIX_AMD64_ABI_ONLY_ARG(nextOtherRegNum) + isFloatHfa, callIsVararg UNIX_LOONGARCH64_ONLY_ARG(nextOtherRegNum) UNIX_AMD64_ABI_ONLY_ARG(nextOtherRegNum) UNIX_AMD64_ABI_ONLY_ARG(structIntRegs) UNIX_AMD64_ABI_ONLY_ARG(structFloatRegs) UNIX_AMD64_ABI_ONLY_ARG(&structDesc)); @@ -3278,6 +3434,48 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) // Set up the next intArgRegNum and fltArgRegNum values. if (!isBackFilled) { +#if defined(TARGET_LOONGARCH64) + // Increment intArgRegNum by 'size' registers + if (!isNonStandard) + { + if ((size > 1) && ((intArgRegNum + 1) == maxRegArgs) && (nextOtherRegNum == REG_STK)) + { +#if FEATURE_ARG_SPLIT + // This indicates a partial enregistration of a struct type + assert((isStructArg) || argx->OperIs(GT_FIELD_LIST) || argx->OperIsCopyBlkOp() || + (argx->gtOper == GT_COMMA && (argx->gtFlags & GTF_ASG))); + call->fgArgInfo->SplitArg(argIndex, 1, 1); +#endif // FEATURE_ARG_SPLIT + assert(!passUsingFloatRegs); + assert(size == 2); + //assert(nextOtherRegNum == REG_STK); + intArgRegNum = maxRegArgs; + } + else if ((numFloatFields & 0xf) == 0x0) + { + if (passUsingFloatRegs) + fltArgRegNum += 1; + else + intArgRegNum += size; + } + else if (numFloatFields & 0x1) + { + structBaseType = structSize == 8 ? TYP_DOUBLE : TYP_FLOAT; + fltArgRegNum += 1; + } + else if (numFloatFields & 0x6) + { + //assert((numFloatFields & 0x2) || (numFloatFields & 0x4)); + fltArgRegNum += 1; + intArgRegNum += 1; + } + else if (numFloatFields & 0x8) + { + fltArgRegNum += 2; + } + } +#else + #if defined(UNIX_AMD64_ABI) if (isStructArg) { @@ -3326,6 +3524,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) } } } +#endif // defined(TARGET_LOONGARCH64) } } else // We have an argument that is not passed in a register @@ -3358,7 +3557,11 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) if (newArgEntry->isStruct) { newArgEntry->passedByRef = passStructByRef; +#if defined(TARGET_LOONGARCH64) + newArgEntry->argType = (numFloatFields & 0xe) || (structBaseType == TYP_UNKNOWN) ? argx->TypeGet() : structBaseType; +#else newArgEntry->argType = (structBaseType == TYP_UNKNOWN) ? argx->TypeGet() : structBaseType; +#endif } else { @@ -3571,6 +3774,12 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) assert(!"Structs are not passed by reference on x64/ux"); #endif // UNIX_AMD64_ABI } +#if defined(DEBUG) && defined(TARGET_LOONGARCH64) + else if ((structBaseType == TYP_STRUCT) && (originalSize == TARGET_POINTER_SIZE) && (size == 2)) + { + DEBUG_ARG_SLOTS_ASSERT(size == argEntry->numRegs); + } +#endif else // This is passed by value. { // Check to see if we can transform this into load of a primitive type. @@ -3596,7 +3805,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) canTransform = (!argEntry->IsHfaArg() || (passingSize == genTypeSize(argEntry->GetHfaType()))); } -#if defined(TARGET_ARM64) || defined(UNIX_AMD64_ABI) +#if defined(TARGET_ARM64) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) // For ARM64 or AMD64/UX we can pass non-power-of-2 structs in a register, but we can // only transform in that case if the arg is a local. // TODO-CQ: This transformation should be applicable in general, not just for the ARM64 @@ -3606,7 +3815,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) canTransform = (lclVar != nullptr); passingSize = genTypeSize(structBaseType); } -#endif // TARGET_ARM64 || UNIX_AMD64_ABI +#endif // TARGET_ARM64 || UNIX_AMD64_ABI || TARGET_LOONGARCH64 } if (!canTransform) @@ -3649,6 +3858,11 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) { copyBlkClass = objClass; } +#elif defined(TARGET_LOONGARCH64) + if ((passingSize != structSize) && (lclVar == nullptr)) + { + copyBlkClass = objClass; + } #endif #ifdef TARGET_ARM @@ -3767,7 +3981,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) ((copyBlkClass != NO_CLASS_HANDLE) && varTypeIsEnregisterable(structBaseType))); } -#if !defined(UNIX_AMD64_ABI) && !defined(TARGET_ARMARCH) +#if !defined(UNIX_AMD64_ABI) && !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64) // TODO-CQ-XARCH: there is no need for a temp copy if we improve our code generation in // `genPutStructArgStk` for xarch like we did it for Arm/Arm64. @@ -3860,8 +4074,12 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) #if FEATURE_MULTIREG_ARGS if (isStructArg) { +#if defined(TARGET_LOONGARCH64) + if ((argEntry->numRegs + argEntry->GetStackSlotsNumber()) > 1) +#else if (((argEntry->numRegs + argEntry->GetStackSlotsNumber()) > 1) || (isHfaArg && argx->TypeGet() == TYP_STRUCT)) +#endif { hasMultiregStructArgs = true; } @@ -4093,6 +4311,28 @@ void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call) if ((size > 1) || (fgEntryPtr->IsHfaArg() && argx->TypeGet() == TYP_STRUCT)) { foundStructArg = true; +#if defined(TARGET_LOONGARCH64) + if (!argx->OperIs(GT_FIELD_LIST)) + { + GenTree* newArgx = fgMorphMultiregStructArg(argx, fgEntryPtr); + + // Did we replace 'argx' with a new tree? + if (newArgx != argx) + { + // link the new arg node into either the late arg list or the gtCallArgs list + if (isLateArg) + { + lateUse->SetNode(newArgx); + } + else + { + use.SetNode(newArgx); + } + + assert(fgEntryPtr->GetNode() == newArgx); + } + } +#else if (varTypeIsStruct(argx) && !argx->OperIs(GT_FIELD_LIST)) { if (fgEntryPtr->IsHfaRegArg()) @@ -4142,6 +4382,7 @@ void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call) assert(fgEntryPtr->GetNode() == newArgx); } } +#endif } } @@ -4180,9 +4421,11 @@ void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call) // GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntryPtr) { +#if !defined(TARGET_LOONGARCH64) assert(varTypeIsStruct(arg->TypeGet())); +#endif -#if !defined(TARGET_ARMARCH) && !defined(UNIX_AMD64_ABI) +#if !defined(TARGET_ARMARCH) && !defined(UNIX_AMD64_ABI) && !defined(TARGET_LOONGARCH64) NYI("fgMorphMultiregStructArg requires implementation for this target"); #endif @@ -4233,7 +4476,36 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry #if FEATURE_MULTIREG_ARGS // Examine 'arg' and setup argValue objClass and structSize // +#if defined(TARGET_LOONGARCH64) + const CORINFO_CLASS_HANDLE objClass = gtGetStructHandleIfPresent(arg); + if (objClass == NO_CLASS_HANDLE) + { + assert(arg->TypeGet() != TYP_STRUCT); + assert(arg->OperGet() == GT_LCL_FLD); + assert(fgEntryPtr->numRegs == 2); + + GenTreeLclVarCommon* varNode = arg->AsLclVarCommon(); + unsigned varNum = varNode->GetLclNum(); + assert(varNum < lvaCount); + LclVarDsc* varDsc = &lvaTable[varNum]; + assert(varDsc->lvExactSize == 8); + + unsigned offset = arg->AsLclVarCommon()->GetLclOffs(); + GenTreeFieldList* newArg = nullptr; + var_types tmp_type = fgEntryPtr->isPassedInFloatRegisters() ? TYP_FLOAT : TYP_INT; + arg->gtType = tmp_type; + + newArg = new (this, GT_FIELD_LIST) GenTreeFieldList(); + newArg->AddField(this, arg, offset, tmp_type); + tmp_type = isValidFloatArgReg(fgEntryPtr->GetOtherRegNum()) ? TYP_FLOAT : TYP_INT; + GenTree* nextLclFld = gtNewLclFldNode(varNum, tmp_type, offset + 4); + newArg->AddField(this, nextLclFld, offset + 4, tmp_type); + + return newArg; + } +#else const CORINFO_CLASS_HANDLE objClass = gtGetStructHandle(arg); +#endif GenTree* argValue = arg; // normally argValue will be arg, but see right below unsigned structSize = 0; @@ -4351,11 +4623,11 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry case 2: type[lastElem] = TYP_SHORT; break; -#if defined(TARGET_ARM64) || defined(UNIX_AMD64_ABI) +#if defined(TARGET_ARM64) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) case 4: type[lastElem] = TYP_INT; break; -#endif // (TARGET_ARM64) || (UNIX_AMD64_ABI) +#endif // (TARGET_ARM64) || (UNIX_AMD64_ABI) || (TARGET_LOONGARCH64) default: noway_assert(!"NYI: odd sized struct in fgMorphMultiregStructArg"); break; @@ -4394,6 +4666,7 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry #endif // DEBUG #ifndef UNIX_AMD64_ABI +#if !defined(TARGET_LOONGARCH64) // This local variable must match the layout of the 'objClass' type exactly if (varDsc->lvIsHfa() #if !defined(HOST_UNIX) && defined(TARGET_ARM64) @@ -4413,6 +4686,7 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry } } else +#endif { #if defined(TARGET_ARM64) // We must have a 16-byte struct (non-HFA) @@ -4555,7 +4829,16 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry // lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DoNotEnregisterReason::LocalField)); } -#endif // TARGET_ARM +#elif defined(TARGET_LOONGARCH64) + // Is this LclVar a promoted struct with exactly same size? + assert(!varDsc->lvPromoted); + + assert(structSize >= TARGET_POINTER_SIZE); + { + // We will create a list of GT_LCL_FLDs nodes to pass this struct + lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DoNotEnregisterReason::LocalField)); + } +#endif // TARGET_LOONGARCH64 } // If we didn't set newarg to a new List Node tree @@ -4621,12 +4904,66 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry // unsigned offset = baseOffset; newArg = new (this, GT_FIELD_LIST) GenTreeFieldList(); +#if defined(TARGET_LOONGARCH64) + DWORD numFloatFields = info.compCompHnd->getFieldTypeByHnd(objClass); + if ((numFloatFields & 0xe) /*&& (varDsc->lvSize() == TARGET_POINTER_SIZE)*/) + { + assert((numFloatFields & 0xf) > 1); + var_types tmp_type_1; + var_types tmp_type_2; + + compFloatingPointUsed = true; + if (numFloatFields & 0x8) + { + tmp_type_1 = numFloatFields & 0x10 ? TYP_DOUBLE : TYP_FLOAT; + tmp_type_2 = numFloatFields & 0x20 ? TYP_DOUBLE : TYP_FLOAT; + } + else if (numFloatFields & 0x2) + { + tmp_type_1 = numFloatFields & 0x10 ? TYP_DOUBLE : TYP_FLOAT; + //tmp_type_2 = numFloatFields & 0x20 ? TYP_LONG: TYP_INT;type[0] + tmp_type_2 = numFloatFields & 0x20 ? type[1] : TYP_INT; + } + else if (numFloatFields & 0x4) + { + //tmp_type_1 = numFloatFields & 0x10 ? TYP_LONG: TYP_INT; + tmp_type_1 = numFloatFields & 0x10 ? type[0] : TYP_INT; + tmp_type_2 = numFloatFields & 0x20 ? TYP_DOUBLE : TYP_FLOAT; + } + else + { + assert(!"----------------unimplemented type-case... on LOONGARCH"); + unreached(); + } + elemSize = numFloatFields & 0x30 ? 8 : 4;; + + GenTree* nextLclFld = gtNewLclFldNode(varNum, tmp_type_1, offset); + newArg->AddField(this, nextLclFld, offset, tmp_type_1); + offset += elemSize; + nextLclFld = gtNewLclFldNode(varNum, tmp_type_2, offset); + newArg->AddField(this, nextLclFld, offset, tmp_type_2); + } + else + { + GenTree* nextLclFld = gtNewLclFldNode(varNum, type[0], offset); + newArg->AddField(this, nextLclFld, offset, type[0]); + + if (elemCount > 1) + { + assert(elemCount == 2); + elemSize = genTypeSize(type[1]); + nextLclFld = gtNewLclFldNode(varNum, type[1], offset + elemSize); + newArg->AddField(this, nextLclFld, offset + elemSize, type[1]); + } + } +#else for (unsigned inx = 0; inx < elemCount; inx++) { GenTree* nextLclFld = gtNewLclFldNode(varNum, type[inx], offset); newArg->AddField(this, nextLclFld, offset, type[inx]); offset += genTypeSize(type[inx]); } +#endif } // Are we passing a GT_OBJ struct? // @@ -4656,6 +4993,59 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry newArg = new (this, GT_FIELD_LIST) GenTreeFieldList(); unsigned offset = 0; +#if defined(TARGET_LOONGARCH64) + DWORD numFloatFields = info.compCompHnd->getFieldTypeByHnd(objClass); + if (numFloatFields & 0xe) + { + assert((numFloatFields & 0xf) > 1); + var_types tmp_type_1; + var_types tmp_type_2; + + compFloatingPointUsed = true; + if (numFloatFields & 0x8) + { + tmp_type_1 = numFloatFields & 0x10 ? TYP_DOUBLE : TYP_FLOAT; + tmp_type_2 = numFloatFields & 0x20 ? TYP_DOUBLE : TYP_FLOAT; + } + else if (numFloatFields & 0x2) + { + tmp_type_1 = numFloatFields & 0x10 ? TYP_DOUBLE : TYP_FLOAT; + //tmp_type_2 = numFloatFields & 0x20 ? TYP_LONG: TYP_INT; + tmp_type_2 = numFloatFields & 0x20 ? type[1] : TYP_INT; + } + else if (numFloatFields & 0x4) + { + //tmp_type_1 = numFloatFields & 0x10 ? TYP_LONG: TYP_INT; + tmp_type_1 = numFloatFields & 0x10 ? type[0] : TYP_INT; + tmp_type_2 = numFloatFields & 0x20 ? TYP_DOUBLE : TYP_FLOAT; + } + else + { + assert(!"----------------unimplemented type-case... on LOONGARCH"); + unreached(); + } + elemSize = numFloatFields & 0x30 ? 8 : 4;; + + GenTree* curItem = gtNewIndir(tmp_type_1, baseAddr); + // For safety all GT_IND should have at least GT_GLOB_REF set. + curItem->gtFlags |= GTF_GLOB_REF; + + newArg = new (this, GT_FIELD_LIST) GenTreeFieldList(); + newArg->AddField(this, curItem, 0, tmp_type_1); + + //GenTree* curAddr = baseAddr; + baseAddr = gtCloneExpr(baseAddr); + noway_assert(baseAddr != nullptr); + baseAddr = gtNewOperNode(GT_ADD, addrType, baseAddr, gtNewIconNode(elemSize, TYP_I_IMPL)); + + curItem = gtNewIndir(tmp_type_2, baseAddr); + // For safety all GT_IND should have at least GT_GLOB_REF set. + curItem->gtFlags |= GTF_GLOB_REF; + + newArg->AddField(this, curItem, elemSize, tmp_type_2); + } + else +#endif for (unsigned inx = 0; inx < elemCount; inx++) { GenTree* curAddr = baseAddr; @@ -5060,6 +5450,12 @@ void Compiler::fgMoveOpsLeft(GenTree* tree) noway_assert(op1->gtType == TYP_I_IMPL && ad1->gtType == TYP_I_IMPL); new_op1->gtType = TYP_I_IMPL; } +#ifdef TARGET_LOONGARCH64 + else if ((op1->TypeGet() == TYP_LONG) && (ad2->TypeGet() == TYP_INT)) + { + new_op1->gtType = TYP_LONG; + } +#endif // If new_op1 is a new expression. Assign it a new unique value number. // vnStore is null before the ValueNumber phase has run @@ -5352,6 +5748,7 @@ GenTree* Compiler::fgMorphArrayIndex(GenTree* tree) noway_assert(index2 != nullptr); } +#ifndef TARGET_LOONGARCH64 // Next introduce a GT_ARR_BOUNDS_CHECK node var_types bndsChkType = TYP_INT; // By default, try to use 32-bit comparison for array bounds check. @@ -5371,6 +5768,9 @@ GenTree* Compiler::fgMorphArrayIndex(GenTree* tree) { arrLen = gtNewCastNode(bndsChkType, arrLen, true, bndsChkType); } +#else + GenTree* arrLen = gtNewArrLen(TYP_INT, arrRef, (int)lenOffs, compCurBB); +#endif GenTreeBoundsChk* arrBndsChk = new (this, GT_ARR_BOUNDS_CHECK) GenTreeBoundsChk(GT_ARR_BOUNDS_CHECK, TYP_VOID, index, arrLen, SCK_RNGCHK_FAIL); @@ -5389,6 +5789,7 @@ GenTree* Compiler::fgMorphArrayIndex(GenTree* tree) GenTree* addr; #ifdef TARGET_64BIT +#ifndef TARGET_LOONGARCH64 // Widen 'index' on 64-bit targets if (index->TypeGet() != TYP_I_IMPL) { @@ -5401,6 +5802,7 @@ GenTree* Compiler::fgMorphArrayIndex(GenTree* tree) index = gtNewCastNode(TYP_I_IMPL, index, true, TYP_I_IMPL); } } +#endif #endif // TARGET_64BIT /* Scale the index value if necessary */ @@ -12226,6 +12628,22 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac) break; #endif +#ifdef TARGET_LOONGARCH64 + case GT_DIV: + case GT_MOD: + if (!varTypeIsFloating(tree->gtType)) + { + // Codegen for this instruction needs to be able to throw two exceptions: + fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW); + fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO); + } + break; + case GT_UDIV: + case GT_UMOD: + // Codegen for this instruction needs to be able to throw one exception: + fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO); + break; +#endif case GT_ADD: CM_OVF_OP: @@ -13948,6 +14366,12 @@ GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree) // be in a fully-interruptible code region. if (!varTypeIsGC(ad1->TypeGet()) && !varTypeIsGC(op2->TypeGet())) { +#ifdef TARGET_LOONGARCH64 + if ((op2->TypeGet() == TYP_LONG) /*&& (op1->TypeGet() == TYP_INT)*/) + { + op1->gtType = TYP_LONG; + } +#endif tree->gtOp2 = ad2; op1->AsOp()->gtOp2 = op2; @@ -17139,7 +17563,7 @@ void Compiler::fgMorphLocalField(GenTree* tree, GenTree* parent) void Compiler::fgResetImplicitByRefRefCount() { -#if (defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)) || defined(TARGET_ARM64) +#if (defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) #ifdef DEBUG if (verbose) { @@ -17162,7 +17586,7 @@ void Compiler::fgResetImplicitByRefRefCount() } } -#endif // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64 +#endif // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64 || TARGET_LOONGARCH64 } //------------------------------------------------------------------------ @@ -17176,7 +17600,7 @@ void Compiler::fgResetImplicitByRefRefCount() void Compiler::fgRetypeImplicitByRefArgs() { -#if (defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)) || defined(TARGET_ARM64) +#if (defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) #ifdef DEBUG if (verbose) { @@ -17375,7 +17799,7 @@ void Compiler::fgRetypeImplicitByRefArgs() } } -#endif // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64 +#endif // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64 || TARGET_LOONGARCH64 } //------------------------------------------------------------------------ @@ -17388,7 +17812,7 @@ void Compiler::fgMarkDemotedImplicitByRefArgs() { JITDUMP("\n*************** In fgMarkDemotedImplicitByRefArgs()\n"); -#if (defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)) || defined(TARGET_ARM64) +#if (defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++) { @@ -17449,7 +17873,7 @@ void Compiler::fgMarkDemotedImplicitByRefArgs() } } -#endif // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64 +#endif // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64 || TARGET_LOONGARCH64 } /***************************************************************************** @@ -17459,11 +17883,11 @@ void Compiler::fgMarkDemotedImplicitByRefArgs() */ bool Compiler::fgMorphImplicitByRefArgs(GenTree* tree) { -#if (!defined(TARGET_AMD64) || defined(UNIX_AMD64_ABI)) && !defined(TARGET_ARM64) +#if (!defined(TARGET_AMD64) || defined(UNIX_AMD64_ABI)) && !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) return false; -#else // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64 +#else // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64 || TARGET_LOONGARCH64 bool changed = false; @@ -17498,7 +17922,7 @@ bool Compiler::fgMorphImplicitByRefArgs(GenTree* tree) } return changed; -#endif // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64 +#endif // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64 || TARGET_LOONGARCH64 } GenTree* Compiler::fgMorphImplicitByRefArgs(GenTree* tree, bool isAddr) @@ -17647,7 +18071,11 @@ GenTree* Compiler::fgMorphImplicitByRefArgs(GenTree* tree, bool isAddr) void Compiler::fgAddFieldSeqForZeroOffset(GenTree* addr, FieldSeqNode* fieldSeqZero) { // We expect 'addr' to be an address at this point. +#ifdef TARGET_LOONGARCH64 + assert(addr->TypeGet() == TYP_BYREF || addr->TypeGet() == TYP_I_IMPL || addr->TypeGet() == TYP_INT || addr->TypeGet() == TYP_REF); +#else assert(addr->TypeGet() == TYP_BYREF || addr->TypeGet() == TYP_I_IMPL || addr->TypeGet() == TYP_REF); +#endif // Tunnel through any commas. const bool commaOnly = true; diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 5fc26491fc616..1dff1eba7a6d8 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -5114,7 +5114,9 @@ bool Compiler::optNarrowTree(GenTree* tree, var_types srct, var_types dstt, Valu unsigned kind; noway_assert(tree); +#ifndef TARGET_LOONGARCH64 noway_assert(genActualType(tree->gtType) == genActualType(srct)); +#endif /* Assume we're only handling integer types */ noway_assert(varTypeIsIntegral(srct)); @@ -5282,8 +5284,13 @@ bool Compiler::optNarrowTree(GenTree* tree, var_types srct, var_types dstt, Valu switch (tree->gtOper) { case GT_AND: +#ifdef TARGET_LOONGARCH64 + noway_assert(genTypeSize(genActualType(tree->gtType)) >= genTypeSize(genActualType(op1->gtType))); + noway_assert(genTypeSize(genActualType(tree->gtType)) >= genTypeSize(genActualType(op2->gtType))); +#else noway_assert(genActualType(tree->gtType) == genActualType(op1->gtType)); noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType)); +#endif GenTree* opToNarrow; opToNarrow = nullptr; diff --git a/src/coreclr/jit/regalloc.cpp b/src/coreclr/jit/regalloc.cpp index 939ea56badf2c..5720c4a23e3eb 100644 --- a/src/coreclr/jit/regalloc.cpp +++ b/src/coreclr/jit/regalloc.cpp @@ -162,6 +162,18 @@ regNumber Compiler::raUpdateRegStateForArg(RegState* regState, LclVarDsc* argDsc #if FEATURE_MULTIREG_ARGS if (varTypeIsStruct(argDsc->lvType)) { +#ifdef TARGET_LOONGARCH64 + { + if (argDsc->GetOtherArgReg() != REG_NA) + { + inArgMask = genRegMask(argDsc->GetOtherArgReg()); + if (emitter::isFloatReg(argDsc->GetOtherArgReg())) + codeGen->floatRegState.rsCalleeRegArgMaskLiveIn |= inArgMask; + else + codeGen->intRegState.rsCalleeRegArgMaskLiveIn |= inArgMask; + } + } +#else if (argDsc->lvIsHfaRegArg()) { assert(regState->rsIsFloat); @@ -186,6 +198,7 @@ regNumber Compiler::raUpdateRegStateForArg(RegState* regState, LclVarDsc* argDsc regState->rsCalleeRegArgMaskLiveIn |= genRegMask(nextArgReg); } } +#endif } #endif // FEATURE_MULTIREG_ARGS @@ -256,6 +269,16 @@ bool Compiler::rpMustCreateEBPFrame(INDEBUG(const char** wbReason)) } #endif // TARGET_ARM64 +#ifdef TARGET_LOONGARCH64 + // TODO-LOONGARCH64-NYI: This is temporary: force a frame pointer-based frame until genFnProlog + // can handle non-frame pointer frames. + if (!result) + { + INDEBUG(reason = "Temporary LOONGARCH64 force frame pointer"); + result = true; + } +#endif // TARGET_LOONGARCH64 + #ifdef DEBUG if ((result == true) && (wbReason != nullptr)) { diff --git a/src/coreclr/jit/register.h b/src/coreclr/jit/register.h index d06bef0cea1d9..971974722eee8 100644 --- a/src/coreclr/jit/register.h +++ b/src/coreclr/jit/register.h @@ -103,6 +103,9 @@ REGDEF(STK, 16+XMMBASE, 0x0000, "STK" ) #elif defined(TARGET_ARM64) #include "registerarm64.h" +#elif defined(TARGET_LOONGARCH64) + #include "registerloongarch64.h" + #else #error Unsupported or unset target architecture #endif // target type diff --git a/src/coreclr/jit/register_arg_convention.cpp b/src/coreclr/jit/register_arg_convention.cpp index a90e61c3a59fd..16c3f051f74bf 100644 --- a/src/coreclr/jit/register_arg_convention.cpp +++ b/src/coreclr/jit/register_arg_convention.cpp @@ -42,6 +42,17 @@ unsigned InitVarDscInfo::allocRegArg(var_types type, unsigned numRegs /* = 1 */) // For System V the reg type counters should be independent. nextReg(TYP_INT, numRegs); nextReg(TYP_FLOAT, numRegs); +#elif defined(TARGET_LOONGARCH64) + // LA-ABI64. + if (numRegs > MAX_PASS_MULTIREG_BYTES/TARGET_POINTER_SIZE) + { + assert(varTypeIsStruct(type)); + nextReg(TYP_INT, 1);//TYP_BYREF + }//TODO:struct-float. + else + { + nextReg(type, numRegs); + } #else // We didn't back-fill a register (on ARM), so skip the number of registers that we allocated. nextReg(type, numRegs); diff --git a/src/coreclr/jit/registerloongarch64.h b/src/coreclr/jit/registerloongarch64.h new file mode 100644 index 0000000000000..4127ce8ca4ace --- /dev/null +++ b/src/coreclr/jit/registerloongarch64.h @@ -0,0 +1,108 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// clang-format off + +/*****************************************************************************/ +/*****************************************************************************/ +#ifndef REGDEF +#error Must define REGDEF macro before including this file +#endif +#ifndef REGALIAS +#define REGALIAS(alias, realname) +#endif + +#define RMASK(x) (1ULL << (x)) + +/* +REGDEF(name, rnum, mask, xname, wname) */ +REGDEF(R0, 0, 0x0001, "zero" , "zero" ) +REGDEF(RA, 1, 0x0002, "ra" , "ra" ) +REGDEF(TP, 2, 0x0004, "tp" , "tp" ) +REGDEF(SP, 3, 0x0008, "sp" , "sp" ) +REGDEF(A0, 4, 0x0010, "a0" , "a0" ) +REGDEF(A1, 5, 0x0020, "a1" , "a1" ) +REGDEF(A2, 6, 0x0040, "a2" , "a2" ) +REGDEF(A3, 7, 0x0080, "a3" , "a3" ) +REGDEF(A4, 8, 0x0100, "a4" , "a4" ) +REGDEF(A5, 9, 0x0200, "a5" , "a5" ) +REGDEF(A6, 10, 0x0400, "a6" , "a6" ) +REGDEF(A7, 11, 0x0800, "a7" , "a7" ) +REGDEF(T0, 12, 0x1000, "t0" , "t0" ) +REGDEF(T1, 13, 0x2000, "t1" , "t1" ) +REGDEF(T2, 14, 0x4000, "t2" , "t2" ) +REGDEF(T3, 15, 0x8000, "t3" , "t3" ) +REGDEF(T4, 16, 0x10000, "t4" , "t4" ) +REGDEF(T5, 17, 0x20000, "t5" , "t5" ) +REGDEF(T6, 18, 0x40000, "t6" , "t6" ) +REGDEF(T7, 19, 0x80000, "t7" , "t7" ) +REGDEF(T8, 20, 0x100000, "t8" , "t8" ) +REGDEF(X0, 21, 0x200000, "x0" , "x0" ) +REGDEF(FP, 22, 0x400000, "fp" , "fp" ) +REGDEF(S0, 23, 0x800000, "s0" , "s0" ) +REGDEF(S1, 24, 0x1000000, "s1" , "s1" ) +REGDEF(S2, 25, 0x2000000, "s2" , "s2" ) +REGDEF(S3, 26, 0x4000000, "s3" , "s3" ) +REGDEF(S4, 27, 0x8000000, "s4" , "s4" ) +REGDEF(S5, 28, 0x10000000, "s5" , "s5" ) +REGDEF(S6, 29, 0x20000000, "s6" , "s6" ) +REGDEF(S7, 30, 0x40000000, "s7" , "s7" ) +REGDEF(S8, 31, 0x80000000, "s8" , "s8" ) + + +REGALIAS(R21, X0) + +#define FBASE 32 +#define FMASK(x) (1ULL << (FBASE+(x))) + +/* +REGDEF(name, rnum, mask, xname, wname) */ +REGDEF(F0, 0+FBASE, FMASK(0), "f0", "f0") +REGDEF(F1, 1+FBASE, FMASK(1), "f1", "f1") +REGDEF(F2, 2+FBASE, FMASK(2), "f2", "f2") +REGDEF(F3, 3+FBASE, FMASK(3), "f3", "f3") +REGDEF(F4, 4+FBASE, FMASK(4), "f4", "f4") +REGDEF(F5, 5+FBASE, FMASK(5), "f5", "f5") +REGDEF(F6, 6+FBASE, FMASK(6), "f6", "f6") +REGDEF(F7, 7+FBASE, FMASK(7), "f7", "f7") +REGDEF(F8, 8+FBASE, FMASK(8), "f8", "f8") +REGDEF(F9, 9+FBASE, FMASK(9), "f9", "f9") +REGDEF(F10, 10+FBASE, FMASK(10), "f10", "f10") +REGDEF(F11, 11+FBASE, FMASK(11), "f11", "f11") +REGDEF(F12, 12+FBASE, FMASK(12), "f12", "f12") +REGDEF(F13, 13+FBASE, FMASK(13), "f13", "f13") +REGDEF(F14, 14+FBASE, FMASK(14), "f14", "f14") +REGDEF(F15, 15+FBASE, FMASK(15), "f15", "f15") +REGDEF(F16, 16+FBASE, FMASK(16), "f16", "f16") +REGDEF(F17, 17+FBASE, FMASK(17), "f17", "f17") +REGDEF(F18, 18+FBASE, FMASK(18), "f18", "f18") +REGDEF(F19, 19+FBASE, FMASK(19), "f19", "f19") +REGDEF(F20, 20+FBASE, FMASK(20), "f20", "f20") +REGDEF(F21, 21+FBASE, FMASK(21), "f21", "f21") +REGDEF(F22, 22+FBASE, FMASK(22), "f22", "f22") +REGDEF(F23, 23+FBASE, FMASK(23), "f23", "f23") +REGDEF(F24, 24+FBASE, FMASK(24), "f24", "f24") +REGDEF(F25, 25+FBASE, FMASK(25), "f25", "f25") +REGDEF(F26, 26+FBASE, FMASK(26), "f26", "f26") +REGDEF(F27, 27+FBASE, FMASK(27), "f27", "f27") +REGDEF(F28, 28+FBASE, FMASK(28), "f28", "f28") +REGDEF(F29, 29+FBASE, FMASK(29), "f29", "f29") +REGDEF(F30, 30+FBASE, FMASK(30), "f30", "f30") +REGDEF(F31, 31+FBASE, FMASK(31), "f31", "f31") + +// The registers with values 64 (NBASE) and above are not real register numbers +#define NBASE 64 + +// This must be last! +REGDEF(STK, 0+NBASE, 0x0000, "STK", "STK") + +/*****************************************************************************/ +#undef RMASK +#undef FMASK +#undef FBASE +#undef NBASE +#undef REGDEF +#undef REGALIAS +/*****************************************************************************/ + +// clang-format on diff --git a/src/coreclr/jit/regset.cpp b/src/coreclr/jit/regset.cpp index 58439020fd693..aade930da4fd5 100644 --- a/src/coreclr/jit/regset.cpp +++ b/src/coreclr/jit/regset.cpp @@ -23,7 +23,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX /*****************************************************************************/ -#ifdef TARGET_ARM64 +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) const regMaskSmall regMasks[] = { #define REGDEF(name, rnum, mask, xname, wname) mask, #include "register.h" @@ -230,6 +230,8 @@ RegSet::RegSet(Compiler* compiler, GCInfo& gcInfo) : m_rsCompiler(compiler), m_r #ifdef TARGET_ARMARCH rsMaskCalleeSaved = RBM_NONE; +#elif defined(TARGET_LOONGARCH64) + rsMaskCalleeSaved = RBM_NONE; #endif // TARGET_ARMARCH #ifdef TARGET_ARM diff --git a/src/coreclr/jit/regset.h b/src/coreclr/jit/regset.h index 34a9bcea64629..a200b5686a7df 100644 --- a/src/coreclr/jit/regset.h +++ b/src/coreclr/jit/regset.h @@ -125,6 +125,8 @@ class RegSet #ifdef TARGET_ARMARCH regMaskTP rsMaskCalleeSaved; // mask of the registers pushed/popped in the prolog/epilog +#elif defined(TARGET_LOONGARCH64) + regMaskTP rsMaskCalleeSaved; // mask of the registers pushed/popped in the prolog/epilog #endif // TARGET_ARM public: // TODO-Cleanup: Should be private, but Compiler uses it diff --git a/src/coreclr/jit/scopeinfo.cpp b/src/coreclr/jit/scopeinfo.cpp index 69a63e020b46c..03ddbdfd7585c 100644 --- a/src/coreclr/jit/scopeinfo.cpp +++ b/src/coreclr/jit/scopeinfo.cpp @@ -295,7 +295,7 @@ void CodeGenInterface::siVarLoc::siFillStackVarLoc( case TYP_LONG: case TYP_DOUBLE: #endif // TARGET_64BIT -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) // In the AMD64 ABI we are supposed to pass a struct by reference when its // size is not 1, 2, 4 or 8 bytes in size. During fgMorph, the compiler modifies // the IR to comply with the ABI and therefore changes the type of the lclVar @@ -314,7 +314,7 @@ void CodeGenInterface::siVarLoc::siFillStackVarLoc( this->vlType = VLT_STK_BYREF; } else -#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) +#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) { this->vlType = VLT_STK; } @@ -1600,11 +1600,34 @@ void CodeGen::psiBegProlog() if (!isStructHandled) { #ifdef DEBUG +#ifdef TARGET_LOONGARCH64 + var_types regType; + if (varTypeIsStruct(lclVarDsc)) + { + // Must be <= 16 bytes or else it wouldn't be passed in registers, + // which can be bigger (and is handled above). + noway_assert(EA_SIZE_IN_BYTES(lclVarDsc->lvSize()) <= 16); + if (emitter::isFloatReg(lclVarDsc->GetArgReg())) + { + //regType = lclVarDsc->lvIs4Field1 ? TYP_FLOAT : TYP_DOUBLE; + regType = TYP_DOUBLE; + } + else + regType = lclVarDsc->GetLayout()->GetGCPtrType(0); + } + else + { + regType = compiler->mangleVarArgsType(lclVarDsc->TypeGet()); + if (emitter::isGeneralRegisterOrR0(lclVarDsc->GetArgReg()) && isFloatRegType(regType)) + regType = TYP_LONG; + } +#else var_types regType = compiler->mangleVarArgsType(lclVarDsc->TypeGet()); if (lclVarDsc->lvIsHfaRegArg()) { regType = lclVarDsc->GetHfaType(); } +#endif assert(genMapRegNumToRegArgNum(lclVarDsc->GetArgReg(), regType) != (unsigned)-1); #endif // DEBUG diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index b6ab3166e10f8..e78b74616ce41 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -34,7 +34,7 @@ inline bool compMacOsArm64Abi() } inline bool compFeatureArgSplit() { - return TargetArchitecture::IsArm32 || (TargetOS::IsWindows && TargetArchitecture::IsArm64); + return TargetArchitecture::IsLoongArch64 || TargetArchitecture::IsArm32 || (TargetOS::IsWindows && TargetArchitecture::IsArm64); } inline bool compUnixX86Abi() { @@ -51,6 +51,8 @@ inline bool compUnixX86Abi() #define TARGET_READABLE_NAME "ARM" #elif defined(TARGET_ARM64) #define TARGET_READABLE_NAME "ARM64" +#elif defined(TARGET_LOONGARCH64) +#define TARGET_READABLE_NAME "LOONGARCH64" #else #error Unsupported or unset target architecture #endif @@ -70,6 +72,10 @@ inline bool compUnixX86Abi() #define REGMASK_BITS 64 #define CSE_CONST_SHARED_LOW_BITS 12 +#elif defined(TARGET_LOONGARCH64) +#define REGMASK_BITS 64 +#define CSE_CONST_SHARED_LOW_BITS 12 + #else #error Unsupported or unset target architecture #endif @@ -170,6 +176,27 @@ enum _regMask_enum : unsigned #include "register.h" }; +#elif defined(TARGET_LOONGARCH64) + +enum _regNumber_enum : unsigned +{ +#define REGDEF(name, rnum, mask, xname, wname) REG_##name = rnum, +#define REGALIAS(alias, realname) REG_##alias = REG_##realname, +#include "register.h" + + REG_COUNT, + REG_NA = REG_COUNT, + ACTUAL_REG_COUNT = REG_COUNT - 1 // everything but REG_STK (only real regs) +}; + +enum _regMask_enum : unsigned __int64 +{ + RBM_NONE = 0, +#define REGDEF(name, rnum, mask, xname, wname) RBM_##name = mask, +#define REGALIAS(alias, realname) RBM_##alias = RBM_##realname, +#include "register.h" +}; + #else #error Unsupported target architecture #endif @@ -185,7 +212,7 @@ enum _regMask_enum : unsigned // In any case, we believe that is OK to freely cast between these types; no information will // be lost. -#ifdef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) typedef unsigned __int64 regMaskTP; #else typedef unsigned regMaskTP; @@ -237,6 +264,8 @@ typedef unsigned char regNumberSmall; #include "targetarm.h" #elif defined(TARGET_ARM64) #include "targetarm64.h" +#elif defined(TARGET_LOONGARCH64) +#include "targetloongarch64.h" #else #error Unsupported or unset target architecture #endif @@ -536,7 +565,7 @@ inline regMaskTP genRegMask(regNumber reg) inline regMaskTP genRegMaskFloat(regNumber reg, var_types type /* = TYP_DOUBLE */) { -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_X86) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_X86) || defined(TARGET_LOONGARCH64) assert(genIsValidFloatReg(reg)); assert((unsigned)reg < ArrLen(regMasks)); return regMasks[reg]; @@ -672,7 +701,7 @@ inline bool isFloatRegType(var_types type) C_ASSERT((RBM_ALLINT & RBM_SPBASE) == RBM_NONE); C_ASSERT((RBM_INT_CALLEE_SAVED & RBM_SPBASE) == RBM_NONE); -#if ETW_EBP_FRAMED +#if ETW_EBP_FRAMED && !defined(TARGET_LOONGARCH64) // Frame pointer isn't either if we're supporting ETW frame chaining C_ASSERT((RBM_ALLINT & RBM_FPBASE) == RBM_NONE); C_ASSERT((RBM_INT_CALLEE_SAVED & RBM_FPBASE) == RBM_NONE); diff --git a/src/coreclr/jit/targetloongarch64.cpp b/src/coreclr/jit/targetloongarch64.cpp new file mode 100644 index 0000000000000..92f076eba3388 --- /dev/null +++ b/src/coreclr/jit/targetloongarch64.cpp @@ -0,0 +1,30 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +// Copyright (c) Loongson Technology. All rights reserved. + +/*****************************************************************************/ + +#include "jitpch.h" +#ifdef _MSC_VER +#pragma hdrstop +#endif + +#if defined(TARGET_LOONGARCH64) + +#include "target.h" + +const char* Target::g_tgtCPUName = "loongarch64"; +const Target::ArgOrder Target::g_tgtArgOrder = ARG_ORDER_R2L; +const Target::ArgOrder Target::g_tgtUnmanagedArgOrder = ARG_ORDER_R2L; + +// clang-format off +const regNumber intArgRegs [] = {REG_A0, REG_A1, REG_A2, REG_A3, REG_A4, REG_A5, REG_A6, REG_A7}; +const regMaskTP intArgMasks[] = {RBM_A0, RBM_A1, RBM_A2, RBM_A3, RBM_A4, RBM_A5, RBM_A6, RBM_A7}; + +const regNumber fltArgRegs [] = {REG_F0, REG_F1, REG_F2, REG_F3, REG_F4, REG_F5, REG_F6, REG_F7 }; +const regMaskTP fltArgMasks[] = {RBM_F0, RBM_F1, RBM_F2, RBM_F3, RBM_F4, RBM_F5, RBM_F6, RBM_F7 }; +// clang-format on + +#endif // TARGET_LOONGARCH64 diff --git a/src/coreclr/jit/targetloongarch64.h b/src/coreclr/jit/targetloongarch64.h new file mode 100644 index 0000000000000..cf97f4148cf16 --- /dev/null +++ b/src/coreclr/jit/targetloongarch64.h @@ -0,0 +1,336 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +#pragma once + +#if !defined(TARGET_LOONGARCH64) +#error The file should not be included for this platform. +#endif + +// clang-format off + #define CPU_LOAD_STORE_ARCH 1 + //#define CPU_LONG_USES_REGPAIR 0 + #define CPU_HAS_FP_SUPPORT 1 + #define ROUND_FLOAT 0 // Do not round intermed float expression results + #define CPU_HAS_BYTE_REGS 0 + //#define CPU_USES_BLOCK_MOVE 0 + + #define CPBLK_UNROLL_LIMIT 64 // Upper bound to let the code generator to loop unroll CpBlk. + #define INITBLK_UNROLL_LIMIT 64 // Upper bound to let the code generator to loop unroll InitBlk. + +#ifdef FEATURE_SIMD +#pragma error("SIMD Unimplemented yet LOONGARCH") + #define ALIGN_SIMD_TYPES 1 // whether SIMD type locals are to be aligned + #define FEATURE_PARTIAL_SIMD_CALLEE_SAVE 1 // Whether SIMD registers are partially saved at calls +#endif // FEATURE_SIMD + + #define FEATURE_FIXED_OUT_ARGS 1 // Preallocate the outgoing arg area in the prolog + #define FEATURE_STRUCTPROMOTE 0 // JIT Optimization to promote fields of structs into registers + #define FEATURE_MULTIREG_STRUCT_PROMOTE 0 // True when we want to promote fields of a multireg struct into registers + #define FEATURE_FASTTAILCALL 1 // Tail calls made as epilog+jmp + #define FEATURE_TAILCALL_OPT 1 // opportunistic Tail calls (i.e. without ".tail" prefix) made as fast tail calls. + #define FEATURE_SET_FLAGS 0 // Set to true to force the JIT to mark the trees with GTF_SET_FLAGS when the flags need to be set + #define FEATURE_MULTIREG_ARGS_OR_RET 1 // Support for passing and/or returning single values in more than one register + #define FEATURE_MULTIREG_ARGS 1 // Support for passing a single argument in more than one register + #define FEATURE_MULTIREG_RET 1 // Support for returning a single value in more than one register + #define FEATURE_STRUCT_CLASSIFIER 0 // Uses a classifier function to determine is structs are passed/returned in more than one register + #define MAX_PASS_SINGLEREG_BYTES 8 // Maximum size of a struct passed in a single register (8-byte). + #define MAX_PASS_MULTIREG_BYTES 16 // Maximum size of a struct that could be passed in more than one register + #define MAX_RET_MULTIREG_BYTES 16 // Maximum size of a struct that could be returned in more than one register (Max is an HFA of 2 doubles) + #define MAX_ARG_REG_COUNT 2 // Maximum registers used to pass a single argument in multiple registers. + #define MAX_RET_REG_COUNT 2 // Maximum registers used to return a value. + #define MAX_MULTIREG_COUNT 2 // Maxiumum number of registers defined by a single instruction (including calls). + // This is also the maximum number of registers for a MultiReg node. + + + #define NOGC_WRITE_BARRIERS 1 // We have specialized WriteBarrier JIT Helpers that DO-NOT trash the RBM_CALLEE_TRASH registers + #define USER_ARGS_COME_LAST 1 + #define EMIT_TRACK_STACK_DEPTH 1 // This is something of a workaround. For both ARM and AMD64, the frame size is fixed, so we don't really + // need to track stack depth, but this is currently necessary to get GC information reported at call sites. + #define TARGET_POINTER_SIZE 8 // equal to sizeof(void*) and the managed pointer size in bytes for this target + #define FEATURE_EH 1 // To aid platform bring-up, eliminate exceptional EH clauses (catch, filter, filter-handler, fault) and directly execute 'finally' clauses. + #define FEATURE_EH_FUNCLETS 1 + #define FEATURE_EH_CALLFINALLY_THUNKS 1 // Generate call-to-finally code in "thunks" in the enclosing EH region, protected by "cloned finally" clauses. + #define ETW_EBP_FRAMED 1 // if 1 we cannot use REG_FP as a scratch register and must setup the frame pointer for most methods + #define CSE_CONSTS 1 // Enable if we want to CSE constants + + #define REG_FP_FIRST REG_F0 + #define REG_FP_LAST REG_F31 + #define FIRST_FP_ARGREG REG_F0 + #define LAST_FP_ARGREG REG_F7 + + #define REGNUM_BITS 6 // number of bits in a REG_* within registerloongarch64.h + #define REGSIZE_BYTES 8 // number of bytes in one general purpose register + #define FP_REGSIZE_BYTES 8 // number of bytes in one FP register + #define FPSAVE_REGSIZE_BYTES 8 // number of bytes in one FP register that are saved/restored. + + #define MIN_ARG_AREA_FOR_CALL 0 // Minimum required outgoing argument space for a call. + + #define CODE_ALIGN 4 // code alignment requirement + #define STACK_ALIGN 16 // stack alignment requirement + + #define RBM_INT_CALLEE_SAVED (RBM_S0|RBM_S1|RBM_S2|RBM_S3|RBM_S4|RBM_S5|RBM_S6|RBM_S7|RBM_S8) + #define RBM_INT_CALLEE_TRASH (RBM_A0|RBM_A1|RBM_A2|RBM_A3|RBM_A4|RBM_A5|RBM_A6|RBM_A7|RBM_T0|RBM_T1|RBM_T2|RBM_T3|RBM_T4|RBM_T5|RBM_T6|RBM_T7|RBM_T8) + #define RBM_FLT_CALLEE_SAVED (RBM_F24|RBM_F25|RBM_F26|RBM_F27|RBM_F28|RBM_F29|RBM_F30|RBM_F31) + //#define RBM_FLT_CALLEE_TRASH (RBM_F0|RBM_F1|RBM_F2|RBM_F3|RBM_F4|RBM_F5|RBM_F6|RBM_F7|RBM_F8|RBM_F9|RBM_F10|RBM_F12|RBM_F13|RBM_F14|RBM_F15|RBM_F16|RBM_F17|RBM_F18|RBM_F19|RBM_F20|RBM_F21|RBM_F22|RBM_F23) + #define RBM_FLT_CALLEE_TRASH (RBM_F0|RBM_F1|RBM_F2|RBM_F3|RBM_F4|RBM_F5|RBM_F6|RBM_F7) + + #define RBM_CALLEE_SAVED (RBM_INT_CALLEE_SAVED | RBM_FLT_CALLEE_SAVED) + #define RBM_CALLEE_TRASH (RBM_INT_CALLEE_TRASH | RBM_FLT_CALLEE_TRASH) + + #define REG_DEFAULT_HELPER_CALL_TARGET REG_T2 + #define RBM_DEFAULT_HELPER_CALL_TARGET RBM_T2 + + #define REG_FASTTAILCALL_TARGET REG_T4 // Target register for fast tail call + #define RBM_FASTTAILCALL_TARGET RBM_T4 + + #define RBM_ALLINT (RBM_INT_CALLEE_SAVED | RBM_INT_CALLEE_TRASH) + #define RBM_ALLFLOAT (RBM_FLT_CALLEE_SAVED | RBM_FLT_CALLEE_TRASH) + #define RBM_ALLDOUBLE RBM_ALLFLOAT + + // REG_VAR_ORDER is: (CALLEE_TRASH & ~CALLEE_TRASH_NOGC), CALLEE_TRASH_NOGC, CALLEE_SAVED + #define REG_VAR_ORDER REG_A0,REG_A1,REG_A2,REG_A3,REG_A4,REG_A5,REG_A6,REG_A7, \ + REG_T0,REG_T1,REG_T2,REG_T3,REG_T4,REG_T5,REG_T6,REG_T7,REG_T8, \ + REG_CALLEE_SAVED_ORDER + + #define REG_VAR_ORDER_FLT REG_F12,REG_F13,REG_F14,REG_F15,REG_F16,REG_F17,REG_F18,REG_F19, \ + REG_F2,REG_F3,REG_F4,REG_F5,REG_F6,REG_F7,REG_F8,REG_F9,REG_F10, \ + REG_F20,REG_F21,REG_F22,REG_F23, \ + REG_F24,REG_F25,REG_F26,REG_F27,REG_F28,REG_F29,REG_F30,REG_F31, \ + REG_F1,REG_F0 + + #define REG_CALLEE_SAVED_ORDER REG_S0,REG_S1,REG_S2,REG_S3,REG_S4,REG_S5,REG_S6,REG_S7,REG_S8 + #define RBM_CALLEE_SAVED_ORDER RBM_S0,RBM_S1,RBM_S2,RBM_S3,RBM_S4,RBM_S5,RBM_S6,RBM_S7,RBM_S8 + + #define CNT_CALLEE_SAVED (9) //s0-s8, not including fp,ra. + #define CNT_CALLEE_TRASH (17) + #define CNT_CALLEE_ENREG (CNT_CALLEE_SAVED-1) + + #define CNT_CALLEE_SAVED_FLOAT (8) + #define CNT_CALLEE_TRASH_FLOAT (24) + + #define CALLEE_SAVED_REG_MAXSZ (CNT_CALLEE_SAVED * REGSIZE_BYTES) + #define CALLEE_SAVED_FLOAT_MAXSZ (CNT_CALLEE_SAVED_FLOAT * FPSAVE_REGSIZE_BYTES) + + #define REG_TMP_0 REG_T0 + + // Temporary registers used for the GS cookie check. + #define REG_GSCOOKIE_TMP_0 REG_T0 + #define REG_GSCOOKIE_TMP_1 REG_T1 + + // register to hold shift amount; no special register is required on LOONGARCH64. + #define REG_SHIFT REG_NA + #define RBM_SHIFT RBM_ALLINT + //#define PREDICT_REG_SHIFT PREDICT_REG + + // This is a general scratch register that does not conflict with the argument registers + #define REG_SCRATCH REG_T0 + + // This is a float scratch register that does not conflict with the argument registers + #define REG_SCRATCH_FLT REG_F11 + + // This is a general register that can be optionally reserved for other purposes during codegen + #define REG_OPT_RSVD REG_T1 + #define RBM_OPT_RSVD RBM_T1 + + // Where is the exception object on entry to the handler block? + #define REG_EXCEPTION_OBJECT REG_A0 + #define RBM_EXCEPTION_OBJECT RBM_A0 + + #define REG_JUMP_THUNK_PARAM REG_T2 + #define RBM_JUMP_THUNK_PARAM RBM_T2 + + // LOONGARCH64 write barrier ABI (see vm/loongarch64/asmhelpers.S): + // CORINFO_HELP_ASSIGN_REF (JIT_WriteBarrier), CORINFO_HELP_CHECKED_ASSIGN_REF (JIT_CheckedWriteBarrier): + // On entry: + // v0: the destination address (LHS of the assignment) + // v1: the object reference (RHS of the assignment) + // On exit: + // t0: trashed + // t1: trashed + // t2: trashed + // t3: trashed + // v0: incremented by 8 + // v1: trashed + // ??: trashed if FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP (currently non-Windows) + // CORINFO_HELP_ASSIGN_BYREF (JIT_ByRefWriteBarrier): + // On entry: + // t8: the source address (points to object reference to write) + // v0: the destination address (object reference written here) + // On exit: + // t8: incremented by 8 + // v0: incremented by 8 + // + // Note that while ?reg? is currently only trashed under FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP, + // currently only set for non-Windows//, it is expected to be set in the future for Windows, and for R2R. + // So simply always consider it trashed, to avoid later breaking changes. + + #define REG_WRITE_BARRIER_DST REG_T6 + #define RBM_WRITE_BARRIER_DST RBM_T6 + + #define REG_WRITE_BARRIER_SRC REG_T7 + #define RBM_WRITE_BARRIER_SRC RBM_T7 + + #define REG_WRITE_BARRIER_DST_BYREF REG_T6 + #define RBM_WRITE_BARRIER_DST_BYREF RBM_T6 + + #define REG_WRITE_BARRIER_SRC_BYREF REG_T8 + #define RBM_WRITE_BARRIER_SRC_BYREF RBM_T8 + + #define RBM_CALLEE_TRASH_NOGC (RBM_T0|RBM_T1|RBM_T2|RBM_T3|RBM_T4|RBM_T6|RBM_T7|RBM_DEFAULT_HELPER_CALL_TARGET) + + // Registers killed by CORINFO_HELP_ASSIGN_REF and CORINFO_HELP_CHECKED_ASSIGN_REF. + #define RBM_CALLEE_TRASH_WRITEBARRIER (RBM_WRITE_BARRIER_DST|RBM_CALLEE_TRASH_NOGC) + + // Registers no longer containing GC pointers after CORINFO_HELP_ASSIGN_REF and CORINFO_HELP_CHECKED_ASSIGN_REF. + #define RBM_CALLEE_GCTRASH_WRITEBARRIER RBM_CALLEE_TRASH_NOGC + + // Registers killed by CORINFO_HELP_ASSIGN_BYREF. + #define RBM_CALLEE_TRASH_WRITEBARRIER_BYREF (RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF | RBM_CALLEE_TRASH_NOGC) + + // Registers no longer containing GC pointers after CORINFO_HELP_ASSIGN_BYREF. + // Note that a0 and a1 are still valid byref pointers after this helper call, despite their value being changed. + #define RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF RBM_CALLEE_TRASH_NOGC + + // GenericPInvokeCalliHelper VASigCookie Parameter + #define REG_PINVOKE_COOKIE_PARAM REG_T3 + #define RBM_PINVOKE_COOKIE_PARAM RBM_T3 + + // GenericPInvokeCalliHelper unmanaged target Parameter + #define REG_PINVOKE_TARGET_PARAM REG_T2 + #define RBM_PINVOKE_TARGET_PARAM RBM_T2 + + // IL stub's secret MethodDesc parameter (JitFlags::JIT_FLAG_PUBLISH_SECRET_PARAM) + #define REG_SECRET_STUB_PARAM REG_T2 + #define RBM_SECRET_STUB_PARAM RBM_T2 + + // R2R indirect call. Use the same registers as VSD + #define REG_R2R_INDIRECT_PARAM REG_T8 + #define RBM_R2R_INDIRECT_PARAM RBM_T8 + + #define REG_INDIRECT_CALL_TARGET_REG REG_T6 + + // Registers used by PInvoke frame setup //should confirm. + #define REG_PINVOKE_FRAME REG_T0 + #define RBM_PINVOKE_FRAME RBM_T0 + #define REG_PINVOKE_TCB REG_T1 + #define RBM_PINVOKE_TCB RBM_T1 + #define REG_PINVOKE_SCRATCH REG_T1 + #define RBM_PINVOKE_SCRATCH RBM_T1 + + // The following defines are useful for iterating a regNumber + #define REG_FIRST REG_R0 + #define REG_INT_FIRST REG_R0 + #define REG_INT_LAST REG_S8 + #define REG_INT_COUNT (REG_INT_LAST - REG_INT_FIRST + 1) + #define REG_NEXT(reg) ((regNumber)((unsigned)(reg) + 1)) + #define REG_PREV(reg) ((regNumber)((unsigned)(reg) - 1)) + + // The following registers are used in emitting Enter/Leave/Tailcall profiler callbacks + #define REG_PROFILER_ENTER_ARG_FUNC_ID REG_R10 + #define RBM_PROFILER_ENTER_ARG_FUNC_ID RBM_R10 + #define REG_PROFILER_ENTER_ARG_CALLER_SP REG_R11 + #define RBM_PROFILER_ENTER_ARG_CALLER_SP RBM_R11 + #define REG_PROFILER_LEAVE_ARG_FUNC_ID REG_R10 + #define RBM_PROFILER_LEAVE_ARG_FUNC_ID RBM_R10 + #define REG_PROFILER_LEAVE_ARG_CALLER_SP REG_R11 + #define RBM_PROFILER_LEAVE_ARG_CALLER_SP RBM_R11 + + // The registers trashed by profiler enter/leave/tailcall hook + #define RBM_PROFILER_ENTER_TRASH (RBM_CALLEE_TRASH & ~(RBM_ARG_REGS|RBM_FLTARG_REGS|RBM_FP)) + #define RBM_PROFILER_LEAVE_TRASH (RBM_CALLEE_TRASH & ~(RBM_ARG_REGS|RBM_FLTARG_REGS|RBM_FP)) + #define RBM_PROFILER_TAILCALL_TRASH RBM_PROFILER_LEAVE_TRASH + + // Which register are int and long values returned in ? + #define REG_INTRET REG_A0 + #define RBM_INTRET RBM_A0 + #define REG_LNGRET REG_A0 + #define RBM_LNGRET RBM_A0 + // second return register for 16-byte structs + #define REG_INTRET_1 REG_A1 + #define RBM_INTRET_1 RBM_A1 + + #define REG_FLOATRET REG_F0 + #define RBM_FLOATRET RBM_F0 + #define RBM_DOUBLERET RBM_F0 + #define REG_FLOATRET_1 REG_F1 + #define RBM_FLOATRET_1 RBM_F1 + #define RBM_DOUBLERET_1 RBM_F1 + + // The registers trashed by the CORINFO_HELP_STOP_FOR_GC helper + #define RBM_STOP_FOR_GC_TRASH RBM_CALLEE_TRASH + + // The registers trashed by the CORINFO_HELP_INIT_PINVOKE_FRAME helper. + #define RBM_INIT_PINVOKE_FRAME_TRASH RBM_CALLEE_TRASH + + #define REG_FPBASE REG_FP + #define RBM_FPBASE RBM_FP + #define STR_FPBASE "fp" + #define REG_SPBASE REG_SP + #define RBM_SPBASE RBM_SP // reuse the RBM for REG_SP + #define STR_SPBASE "sp" + + #define FIRST_ARG_STACK_OFFS (2*REGSIZE_BYTES) // Caller's saved FP and return address + + #define MAX_REG_ARG 8 + #define MAX_FLOAT_REG_ARG 8 + + #define REG_ARG_FIRST REG_A0 + #define REG_ARG_LAST REG_A7 + #define REG_ARG_FP_FIRST REG_F0 + #define REG_ARG_FP_LAST REG_F7 + #define INIT_ARG_STACK_SLOT 0 // No outgoing reserved stack slots + + #define REG_ARG_0 REG_A0 + #define REG_ARG_1 REG_A1 + #define REG_ARG_2 REG_A2 + #define REG_ARG_3 REG_A3 + #define REG_ARG_4 REG_A4 + #define REG_ARG_5 REG_A5 + #define REG_ARG_6 REG_A6 + #define REG_ARG_7 REG_A7 + + extern const regNumber intArgRegs [MAX_REG_ARG]; + extern const regMaskTP intArgMasks[MAX_REG_ARG]; + + #define RBM_ARG_0 RBM_A0 + #define RBM_ARG_1 RBM_A1 + #define RBM_ARG_2 RBM_A2 + #define RBM_ARG_3 RBM_A3 + #define RBM_ARG_4 RBM_A4 + #define RBM_ARG_5 RBM_A5 + #define RBM_ARG_6 RBM_A6 + #define RBM_ARG_7 RBM_A7 + + #define REG_FLTARG_0 REG_F0 + #define REG_FLTARG_1 REG_F1 + #define REG_FLTARG_2 REG_F2 + #define REG_FLTARG_3 REG_F3 + #define REG_FLTARG_4 REG_F4 + #define REG_FLTARG_5 REG_F5 + #define REG_FLTARG_6 REG_F6 + #define REG_FLTARG_7 REG_F7 + + #define RBM_FLTARG_0 RBM_F0 + #define RBM_FLTARG_1 RBM_F1 + #define RBM_FLTARG_2 RBM_F2 + #define RBM_FLTARG_3 RBM_F3 + #define RBM_FLTARG_4 RBM_F4 + #define RBM_FLTARG_5 RBM_F5 + #define RBM_FLTARG_6 RBM_F6 + #define RBM_FLTARG_7 RBM_F7 + + #define RBM_ARG_REGS (RBM_ARG_0|RBM_ARG_1|RBM_ARG_2|RBM_ARG_3|RBM_ARG_4|RBM_ARG_5|RBM_ARG_6|RBM_ARG_7) + #define RBM_FLTARG_REGS (RBM_FLTARG_0|RBM_FLTARG_1|RBM_FLTARG_2|RBM_FLTARG_3|RBM_FLTARG_4|RBM_FLTARG_5|RBM_FLTARG_6|RBM_FLTARG_7) + + extern const regNumber fltArgRegs [MAX_FLOAT_REG_ARG]; + extern const regMaskTP fltArgMasks[MAX_FLOAT_REG_ARG]; + + #define B_DIST_SMALL_MAX_NEG (-131072) + #define B_DIST_SMALL_MAX_POS (+131071) + + #define OFFSET_DIST_SMALL_MAX_NEG (-2048) + #define OFFSET_DIST_SMALL_MAX_POS (+2047) + + #define STACK_PROBE_BOUNDARY_THRESHOLD_BYTES 0 + +// clang-format on diff --git a/src/coreclr/jit/unwind.cpp b/src/coreclr/jit/unwind.cpp index 8d5efd0051906..ffb7cf71d886b 100644 --- a/src/coreclr/jit/unwind.cpp +++ b/src/coreclr/jit/unwind.cpp @@ -412,7 +412,7 @@ UNATIVE_OFFSET Compiler::unwindGetCurrentOffset(FuncInfoDsc* func) else { if (TargetArchitecture::IsX64 || - (TargetOS::IsUnix && (TargetArchitecture::IsArmArch || TargetArchitecture::IsX86))) + (TargetOS::IsUnix && (TargetArchitecture::IsArmArch || TargetArchitecture::IsX86 || TargetArchitecture::IsLoongArch64))) { assert(func->startLoc != nullptr); offset = func->startLoc->GetFuncletPrologOffset(GetEmitter()); @@ -442,6 +442,10 @@ UNATIVE_OFFSET Compiler::unwindGetCurrentOffset(FuncInfoDsc* func) // See unwindX86.cpp +#elif defined(TARGET_LOONGARCH64) + +// See unwindLoongarch64.cpp + #else // TARGET* #error Unsupported or unset target architecture diff --git a/src/coreclr/jit/unwind.h b/src/coreclr/jit/unwind.h index c578c30cb78d0..bb93348cc2fdd 100644 --- a/src/coreclr/jit/unwind.h +++ b/src/coreclr/jit/unwind.h @@ -10,7 +10,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */ -#ifdef TARGET_ARMARCH +////TODO for LOONGARCH64: should seperately define for loongarch64. +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) // Windows no longer imposes a maximum prolog size. However, we still have an // assert here just to inform us if we increase the size of the prolog @@ -34,7 +35,15 @@ const unsigned MAX_EPILOG_SIZE_BYTES = 100; #define UW_MAX_FRAGMENT_SIZE_BYTES (1U << 20) #define UW_MAX_CODE_WORDS_COUNT 31 #define UW_MAX_EPILOG_START_INDEX 0x3FFU -#endif // TARGET_ARM64 +#elif defined(TARGET_LOONGARCH64) +const unsigned MAX_PROLOG_SIZE_BYTES = 200; +const unsigned MAX_EPILOG_SIZE_BYTES = 200; +#define UWC_END 0xE4 // "end" unwind code +#define UWC_END_C 0xE5 // "end_c" unwind code +#define UW_MAX_FRAGMENT_SIZE_BYTES (1U << 20) +#define UW_MAX_CODE_WORDS_COUNT 31 +#define UW_MAX_EPILOG_START_INDEX 0x3FFU +#endif // TARGET_LOONGARCH64 #define UW_MAX_EPILOG_COUNT 31 // Max number that can be encoded in the "Epilog count" field // of the .pdata record @@ -131,6 +140,8 @@ class UnwindCodesBase return b >= 0xFD; #elif defined(TARGET_ARM64) return (b == UWC_END); // TODO-ARM64-Bug?: what about the "end_c" code? +#elif defined(TARGET_LOONGARCH64) + return (b == UWC_END); #endif // TARGET_ARM64 } @@ -813,7 +824,7 @@ class UnwindInfo : public UnwindBase // Given the first byte of the unwind code, check that its opsize matches // the last instruction added in the emitter. void CheckOpsize(BYTE b1); -#elif defined(TARGET_ARM64) +#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) void CheckOpsize(BYTE b1) { } // nothing to do; all instructions are 4 bytes diff --git a/src/coreclr/jit/unwindloongarch64.cpp b/src/coreclr/jit/unwindloongarch64.cpp new file mode 100644 index 0000000000000..eae92c102e381 --- /dev/null +++ b/src/coreclr/jit/unwindloongarch64.cpp @@ -0,0 +1,2347 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// Copyright (c) Loongson Technology. All rights reserved. + +/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX UnwindInfo XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ + +#include "jitpch.h" +#ifdef _MSC_VER +#pragma hdrstop +#endif + +#if defined(TARGET_LOONGARCH64) + +#if defined(FEATURE_CFI_SUPPORT) +short Compiler::mapRegNumToDwarfReg(regNumber reg) +{ + short dwarfReg = DWARF_REG_ILLEGAL; + + switch (reg) + { + case REG_R0: + dwarfReg = 0; + break; + case REG_RA: + dwarfReg = 1; + break; + case REG_TP: + dwarfReg = 2; + break; + case REG_SP: + dwarfReg = 3; + break; + case REG_A0: + dwarfReg = 4; + break; + case REG_A1: + dwarfReg = 5; + break; + case REG_A2: + dwarfReg = 6; + break; + case REG_A3: + dwarfReg = 7; + break; + case REG_A4: + dwarfReg = 8; + break; + case REG_A5: + dwarfReg = 9; + break; + case REG_A6: + dwarfReg = 10; + break; + case REG_A7: + dwarfReg = 11; + break; + case REG_T0: + dwarfReg = 12; + break; + case REG_T1: + dwarfReg = 13; + break; + case REG_T2: + dwarfReg = 14; + break; + case REG_T3: + dwarfReg = 15; + break; + case REG_T4: + dwarfReg = 16; + break; + case REG_T5: + dwarfReg = 17; + break; + case REG_T6: + dwarfReg = 18; + break; + case REG_T7: + dwarfReg = 19; + break; + case REG_T8: + dwarfReg = 20; + break; + case REG_X0: + dwarfReg = 21; + break; + case REG_FP: + dwarfReg = 22; + break; + case REG_S0: + dwarfReg = 23; + break; + case REG_S1: + dwarfReg = 24; + break; + case REG_S2: + dwarfReg = 25; + break; + case REG_S3: + dwarfReg = 26; + break; + case REG_S4: + dwarfReg = 27; + break; + case REG_S5: + dwarfReg = 28; + break; + case REG_S6: + dwarfReg = 29; + break; + case REG_S7: + dwarfReg = 30; + break; + case REG_S8: + dwarfReg = 31; + break; + case REG_F0: + dwarfReg = 64; + break; + case REG_F1: + dwarfReg = 65; + break; + case REG_F2: + dwarfReg = 66; + break; + case REG_F3: + dwarfReg = 67; + break; + case REG_F4: + dwarfReg = 68; + break; + case REG_F5: + dwarfReg = 69; + break; + case REG_F6: + dwarfReg = 70; + break; + case REG_F7: + dwarfReg = 71; + break; + case REG_F8: + dwarfReg = 72; + break; + case REG_F9: + dwarfReg = 73; + break; + case REG_F10: + dwarfReg = 74; + break; + case REG_F11: + dwarfReg = 75; + break; + case REG_F12: + dwarfReg = 76; + break; + case REG_F13: + dwarfReg = 77; + break; + case REG_F14: + dwarfReg = 78; + break; + case REG_F15: + dwarfReg = 79; + break; + case REG_F16: + dwarfReg = 80; + break; + case REG_F17: + dwarfReg = 81; + break; + case REG_F18: + dwarfReg = 82; + break; + case REG_F19: + dwarfReg = 83; + break; + case REG_F20: + dwarfReg = 84; + break; + case REG_F21: + dwarfReg = 85; + break; + case REG_F22: + dwarfReg = 86; + break; + case REG_F23: + dwarfReg = 87; + break; + case REG_F24: + dwarfReg = 88; + break; + case REG_F25: + dwarfReg = 89; + break; + case REG_F26: + dwarfReg = 90; + break; + case REG_F27: + dwarfReg = 91; + break; + case REG_F28: + dwarfReg = 92; + break; + case REG_F29: + dwarfReg = 93; + break; + case REG_F30: + dwarfReg = 94; + break; + case REG_F31: + dwarfReg = 95; + break; + + default: + NYI("CFI codes"); + } + + return dwarfReg; +} +#endif // FEATURE_CFI_SUPPORT + +void Compiler::unwindPush(regNumber reg) +{ + unreached(); // use one of the unwindSaveReg* functions instead. +} + +void Compiler::unwindAllocStack(unsigned size) +{ +#if defined(TARGET_UNIX) + if (generateCFIUnwindCodes()) + { + if (compGeneratingProlog) + { + unwindAllocStackCFI(size); + } + + return; + } +#endif // TARGET_UNIX + + UnwindInfo* pu = &funCurrentFunc()->uwi; + + assert(size % 16 == 0); + unsigned x = size / 16; + + if (x <= 0x1F) + { + // alloc_s: 000xxxxx: allocate small stack with size < 128 (2^5 * 16) + // TODO-Review: should say size < 512 + + pu->AddCode((BYTE)x); + } + else if (x <= 0x7F) + { + // alloc_m: 11000xxx | xxxxxxxx: allocate large stack with size < 2k (2^7 * 16) + + pu->AddCode(0xC0 | (BYTE)(x >> 8), (BYTE)x); + } + else + { + // alloc_l: 11100000 | xxxxxxxx | xxxxxxxx | xxxxxxxx : allocate large stack with size < 256M (2^24 * 16) + // + // For large stack size, the most significant bits + // are stored first (and next to the opCode) per the unwind spec. + + pu->AddCode(0xE0, (BYTE)(x >> 16), (BYTE)(x >> 8), (BYTE)x); + } +} + +void Compiler::unwindSetFrameReg(regNumber reg, unsigned offset) +{ +#if defined(TARGET_UNIX) + if (generateCFIUnwindCodes()) + { + if (compGeneratingProlog) + { + unwindSetFrameRegCFI(reg, offset); + } + + return; + } +#endif // TARGET_UNIX + + UnwindInfo* pu = &funCurrentFunc()->uwi; + + if (offset == 0) + { + assert(reg == REG_FP); + + // set_fp: 11100001 : set up fp : with : move fp, sp + pu->AddCode(0xE1); + } + else + { + // add_fp: 11100010 | 000xxxxx | xxxxxxxx : set up fp with : addi.d fp, sp, #x * 8 + + assert(reg == REG_FP); + assert((offset % 8) == 0); + + unsigned x = offset / 8; + assert(x <= 0x1FF); + + pu->AddCode(0xE2, (BYTE)(x >> 8), (BYTE)x); + } +} + +void Compiler::unwindSaveReg(regNumber reg, unsigned offset) +{ + unwindSaveReg(reg, (int)offset); +} + +void Compiler::unwindNop() +{ + UnwindInfo* pu = &funCurrentFunc()->uwi; + +#ifdef DEBUG + if (verbose) + { + printf("unwindNop: adding NOP\n"); + } +#endif + + INDEBUG(pu->uwiAddingNOP = true); + + // nop: 11100011: no unwind operation is required. + pu->AddCode(0xE3); + + INDEBUG(pu->uwiAddingNOP = false); +} + +void Compiler::unwindSaveReg(regNumber reg, int offset) +{ + + // st.d reg, sp, offset + + // offset for store in prolog must be positive and a multiple of 8. + assert(0 <= offset && offset <= 2047); + assert((offset % 8) == 0); + +#if defined(TARGET_UNIX) + if (generateCFIUnwindCodes()) + { + if (compGeneratingProlog) + { + FuncInfoDsc* func = funCurrentFunc(); + UNATIVE_OFFSET cbProlog = unwindGetCurrentOffset(func); + + createCfiCode(func, cbProlog, CFI_REL_OFFSET, mapRegNumToDwarfReg(reg), offset); + } + + return; + } +#endif // TARGET_UNIX + int z = offset / 8; + //assert(0 <= z && z <= 0xFF); + + UnwindInfo* pu = &funCurrentFunc()->uwi; + + if (emitter::isGeneralRegister(reg)) + { + // save_reg: 11010000 | 000xxxxx | zzzzzzzz: save reg r(1 + #X) at [sp + #Z * 8], offset <= 2047 + + assert(reg == REG_RA || reg == REG_FP || // first legal register: RA + (REG_S0 <= reg && reg <= REG_S8)); // last legal register: S8 + + BYTE x = (BYTE)(reg - REG_RA); + assert(0 <= x && x <= 0x1E); + + pu->AddCode(0xD0, (BYTE)x, (BYTE)z); + } + else + { + // save_freg: 11011100 | 0xxxzzzz | zzzzzzzz : save reg f(24 + #X) at [sp + #Z * 8], offset <= 2047 + + assert(REG_F24 <= reg && // first legal register: F24 + reg <= REG_F31); // last legal register: F31 + + BYTE x = (BYTE)(reg - REG_F24); + assert(0 <= x && x <= 0x7); + + pu->AddCode(0xDC, (BYTE)(x << 4) | (BYTE)(z >> 8), (BYTE)z); + } +} + +void Compiler::unwindSaveRegPair(regNumber reg1, regNumber reg2, int offset) +{ + //TODO:temp not used on loongarch64. + assert(!"unimplemented on LOONGARCH yet"); +#if 0 + UnwindInfo* pu = &funCurrentFunc()->uwi; + + // stp reg1, reg2, [sp, #offset] + + // offset for store pair in prolog must be positive and a multiple of 16. + assert(0 <= offset && offset <= 0xff0); + assert((offset % 16) == 0); + + int z = offset / 8; + //assert(0 <= z && z <= 0x1FE); + +#if defined(TARGET_UNIX) + if (generateCFIUnwindCodes()) + { + if (compGeneratingProlog) + { + FuncInfoDsc* func = funCurrentFunc(); + UNATIVE_OFFSET cbProlog = unwindGetCurrentOffset(func); + + createCfiCode(func, cbProlog, CFI_REL_OFFSET, mapRegNumToDwarfReg(reg1), offset); + createCfiCode(func, cbProlog, CFI_REL_OFFSET, mapRegNumToDwarfReg(reg2), offset + 8); + } + + return; + } +#endif // TARGET_UNIX + if (reg1 == REG_FP) + { + // save_fpra: 0100zzzz | zzzzzzzz: save pair at [sp+#Z*8], offset <= 0xff0 + assert(reg2 == REG_RA); + + pu->AddCode(0x40 | (BYTE)(z >> 8), (BYTE)z); + } + else if (reg2 == REG_RA) + { + assert(!"unimplemented on LOONGARCH yet"); + } + else if (emitter::isGeneralRegister(reg1)) + { + // save_regp: 11001000 | 0xxxzzzz | zzzzzzzz: save s(0 + #X) pair at [sp + #Z * 8], offset <= 4080 + assert(REG_NEXT(reg1) == reg2); + assert(REG_S0 <= reg1 && // first legal pair: S0, S1 + reg1 <= REG_S6); // last legal pair: S6, S7 (FP is never saved without RA) + + BYTE x = (BYTE)(reg1 - REG_S0); + //assert(0 <= x && x <= 0x6); + + pu->AddCode(0xC8, (BYTE)(x << 4) | (BYTE)(z >> 8), (BYTE)z); + } + else + { + assert(!"unimplemented on LOONGARCH yet"); + } +#endif +} + +void Compiler::unwindReturn(regNumber reg) +{ + // Nothing to do; we will always have at least one trailing "end" opcode in our padding. +} + +/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX Unwind Info Debug helpers XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ + +#ifdef DEBUG + +// Return the size of the unwind code (from 1 to 4 bytes), given the first byte of the unwind bytes + +unsigned GetUnwindSizeFromUnwindHeader(BYTE b1) +{ + static BYTE s_UnwindSize[256] = { + // array of unwind sizes, in bytes (as specified in the LOONGARCH unwind specification) + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 00-0F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 10-1F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 20-2F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 30-3F + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 40-4F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 50-5F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 60-6F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 70-7F + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 80-8F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 90-9F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A0-AF + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B0-BF + 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 3, 2, 2, 2, // C0-CF + 3, 2, 2, 2, 2, 2, 3, 2, 3, 2, 3, 2, 3, 2, 2, 1, // D0-DF + 4, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E0-EF + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F0-FF + }; + + unsigned size = s_UnwindSize[b1]; + assert(1 <= size && size <= 4); + return size; +} + +#endif // DEBUG + +/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX Unwind Info Support Classes XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ + +/////////////////////////////////////////////////////////////////////////////// +// +// UnwindCodesBase +// +/////////////////////////////////////////////////////////////////////////////// + +#ifdef DEBUG + +// Walk the prolog codes and calculate the size of the prolog or epilog, in bytes. +unsigned UnwindCodesBase::GetCodeSizeFromUnwindCodes(bool isProlog) +{ + BYTE* pCodesStart = GetCodes(); + BYTE* pCodes = pCodesStart; + unsigned size = 0; + for (;;) + { + BYTE b1 = *pCodes; + if (IsEndCode(b1)) + { + break; // We hit an "end" code; we're done + } + size += 4; // All codes represent 4 byte instructions. + pCodes += GetUnwindSizeFromUnwindHeader(b1); + assert(pCodes - pCodesStart < 256); // 255 is the absolute maximum number of code bytes allowed + } + return size; +} + +#endif // DEBUG + +/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX Debug dumpers XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ + +#ifdef DEBUG + +// start is 0-based index from LSB, length is number of bits +DWORD ExtractBits(DWORD dw, DWORD start, DWORD length) +{ + return (dw >> start) & ((1 << length) - 1); +} + +// Dump the unwind data. +// Arguments: +// isHotCode: true if this unwind data is for the hot section +// startOffset: byte offset of the code start that this unwind data represents +// endOffset: byte offset of the code end that this unwind data represents +// pHeader: pointer to the unwind data blob +// unwindBlockSize: size in bytes of the unwind data blob + +void DumpUnwindInfo(Compiler* comp, + bool isHotCode, + UNATIVE_OFFSET startOffset, + UNATIVE_OFFSET endOffset, + const BYTE* const pHeader, + ULONG unwindBlockSize) +{ + printf("Unwind Info%s:\n", isHotCode ? "" : " COLD"); + + // pHeader is not guaranteed to be aligned. We put four 0xFF end codes at the end + // to provide padding, and round down to get a multiple of 4 bytes in size. + DWORD UNALIGNED* pdw = (DWORD UNALIGNED*)pHeader; + DWORD dw; + + dw = *pdw++; + + DWORD codeWords = ExtractBits(dw, 27, 5); + DWORD epilogCount = ExtractBits(dw, 22, 5); + DWORD EBit = ExtractBits(dw, 21, 1); + DWORD XBit = ExtractBits(dw, 20, 1); + DWORD Vers = ExtractBits(dw, 18, 2); + DWORD functionLength = ExtractBits(dw, 0, 18); + + printf(" >> Start offset : 0x%06x (not in unwind data)\n", comp->dspOffset(startOffset)); + printf(" >> End offset : 0x%06x (not in unwind data)\n", comp->dspOffset(endOffset)); + printf(" Code Words : %u\n", codeWords); + printf(" Epilog Count : %u\n", epilogCount); + printf(" E bit : %u\n", EBit); + printf(" X bit : %u\n", XBit); + printf(" Vers : %u\n", Vers); + printf(" Function Length : %u (0x%05x) Actual length = %u (0x%06x)\n", functionLength, functionLength, + functionLength * 4, functionLength * 4); + + assert(functionLength * 4 == endOffset - startOffset); + + if (codeWords == 0 && epilogCount == 0) + { + // We have an extension word specifying a larger number of Code Words or Epilog Counts + // than can be specified in the header word. + + dw = *pdw++; + + codeWords = ExtractBits(dw, 16, 8); + epilogCount = ExtractBits(dw, 0, 16); + assert((dw & 0xF0000000) == 0); // reserved field should be zero + + printf(" ---- Extension word ----\n"); + printf(" Extended Code Words : %u\n", codeWords); + printf(" Extended Epilog Count : %u\n", epilogCount); + } + + bool epilogStartAt[1024] = {}; // One byte per possible epilog start index; initialized to false + + if (EBit == 0) + { + // We have an array of epilog scopes + + printf(" ---- Epilog scopes ----\n"); + if (epilogCount == 0) + { + printf(" No epilogs\n"); + } + else + { + for (DWORD scope = 0; scope < epilogCount; scope++) + { + dw = *pdw++; + + DWORD epilogStartOffset = ExtractBits(dw, 0, 18); + DWORD res = ExtractBits(dw, 18, 4); + DWORD epilogStartIndex = ExtractBits(dw, 22, 10); + + // Note that epilogStartOffset for a funclet is the offset from the beginning + // of the current funclet, not the offset from the beginning of the main function. + // To help find it when looking through JitDump output, also show the offset from + // the beginning of the main function. + DWORD epilogStartOffsetFromMainFunctionBegin = epilogStartOffset * 4 + startOffset; + + assert(res == 0); + + printf(" ---- Scope %d\n", scope); + printf(" Epilog Start Offset : %u (0x%05x) Actual offset = %u (0x%06x) Offset from main " + "function begin = %u (0x%06x)\n", + comp->dspOffset(epilogStartOffset), comp->dspOffset(epilogStartOffset), + comp->dspOffset(epilogStartOffset * 4), comp->dspOffset(epilogStartOffset * 4), + comp->dspOffset(epilogStartOffsetFromMainFunctionBegin), + comp->dspOffset(epilogStartOffsetFromMainFunctionBegin)); + printf(" Epilog Start Index : %u (0x%02x)\n", epilogStartIndex, epilogStartIndex); + + epilogStartAt[epilogStartIndex] = true; // an epilog starts at this offset in the unwind codes + } + } + } + else + { + printf(" --- One epilog, unwind codes at %u\n", epilogCount); + assert(epilogCount < ArrLen(epilogStartAt)); + epilogStartAt[epilogCount] = true; // the one and only epilog starts its unwind codes at this offset + } + + // Dump the unwind codes + + printf(" ---- Unwind codes ----\n"); + + DWORD countOfUnwindCodes = codeWords * 4; + PBYTE pUnwindCode = (PBYTE)pdw; + BYTE b1, b2, b3, b4; + DWORD x, z; + for (DWORD i = 0; i < countOfUnwindCodes; i++) + { + // Does this byte start an epilog sequence? If so, note that fact. + if (epilogStartAt[i]) + { + printf(" ---- Epilog start at index %u ----\n", i); + } + + b1 = *pUnwindCode++; + + if ((b1 & 0xE0) == 0) + { + // alloc_s: 000xxxxx: allocate small stack with size < 128 (2^5 * 16) + // TODO-Review:should say size < 512 + x = b1 & 0x1F; + printf(" %02X alloc_s #%u (0x%02X); addi.d sp, sp, -%u (0x%03X)\n", b1, x, x, x * 16, x * 16); + } +#if 0 + else if ((b1 & 0xE0) == 0x20) + { + // save_s0s1_x: 001zzzzz: save pair at [sp-#Z*8]!, pre-indexed offset >= -248 + z = b1 & 0x1F; + printf(" %02X save_s0s1_x #%u (0x%02X); Two sd %s, %s, [sp, #-%u]!\n", b1, z, z, + getRegName(REG_S0), getRegName(REG_S1), z * 8); + } + else if ((b1 & 0xF0) == 0x40) + { + // save_fpra: 0100zzzz | zzzzzzzz: save pair at [sp+#Z*8], offset <= 4080 + assert(i + 1 < countOfUnwindCodes); + b2 = *pUnwindCode++; + i++; + + z = ((DWORD)(b1 & 0xF) << 8) | (DWORD)b2; + printf(" %02X %02X save_fpra #%u (0x%03X); Two sd %s, %s, [sp, #%u]\n", b1, b2, z, z, getRegName(REG_FP), + getRegName(REG_RA), z * 8); + } + else if ((b1 & 0xF0) == 0x80) + { + // save_fpra_x: 1000zzzz | zzzzzzzz: save pair at [sp-(#Z+1)*8]!, pre-indexed offset >= -32768 + assert(i + 1 < countOfUnwindCodes); + b2 = *pUnwindCode++; + i++; + + z = ((DWORD)(b1 & 0xF) << 8) | (DWORD)b2; + printf(" %02X %02X save_fpra_x #%u (0x%03X); Two sd %s, %s, [sp, #-%u]!\n", b1, b2, z, z, + getRegName(REG_FP), getRegName(REG_RA), (z + 1) * 8); + } +#endif + else if ((b1 & 0xF8) == 0xC0) + { + // alloc_m: 11000xxx | xxxxxxxx: allocate large stack with size < 2k (2^7 * 16) + assert(i + 1 < countOfUnwindCodes); + b2 = *pUnwindCode++; + i++; + + x = ((DWORD)(b1 & 0x7) << 8) | (DWORD)b2; + + printf(" %02X %02X alloc_m #%u (0x%03X); addi.d sp, sp, -%u (0x%04X)\n", b1, b2, x, x, x * 16, + x * 16); + } + else if (b1 == 0xD0) + { + // save_reg: 11010000 | 000xxxxx | zzzzzzzz: save reg r(1 + #X) at [sp + #Z * 8], offset <= 2047 + assert(i + 1 < countOfUnwindCodes); + b2 = *pUnwindCode++; + b3 = *pUnwindCode++; + i += 2; + + x = (DWORD)b2; + z = (DWORD)b3; + + printf(" %02X %02X %02X save_reg X#%u Z#%u (0x%02X); st.d %s, sp, %u\n", b1, b2, b3, x, z, z, + getRegName(REG_RA + x), z * 8); + } +#if 0 + else if (b1 == 0xC8) + { + // save_regp: 11001000 | 0xxxzzzz | zzzzzzzz: save s(0 + #X) pair at [sp + #Z * 8], offset <= 4080 + assert(i + 1 < countOfUnwindCodes); + b2 = *pUnwindCode++; + b3 = *pUnwindCode++; + i += 2; + + x = (DWORD)(b2 >> 4); + z = ((DWORD)(b2 & 0xF) << 8) | (DWORD)b3; + + printf(" %02X %02X %02X save_regp X#%u Z#%u (0x%02X); Two sd %s, %s, [sp, #%u]\n", b1, b2, b3, x, z, z, + getRegName(REG_S0 + x), getRegName(REG_S0 + x + 1), z * 8); + } + else if (b1 == 0xCC) + { + // save_regp_x: 11001100 | 0xxxzzzz | zzzzzzzz: save pair s(0 + #X) at [sp - (#Z + 1) * 8]!, pre-indexed offset >= + // -32768 + assert(i + 1 < countOfUnwindCodes); + b2 = *pUnwindCode++; + b3 = *pUnwindCode++; + i+= 2; + + x = (DWORD)(b2 >> 4); + z = ((DWORD)(b2 & 0xF) << 8) | (DWORD)b3; + + printf(" %02X %02X %02X save_regp_x X#%u Z#%u (0x%02X); Two sd %s, %s, [sp, #-%u]!\n", b1, b2, b3, x, z, z, + getRegName(REG_S0 + x), getRegName(REG_S0 + x + 1), (z + 1) * 8); + } + else if ((b1 & 0xFE) == 0xD4) + { + // save_reg_x: 1101010x | xxxzzzzz: save reg s(0 + #X) at [sp - (#Z + 1) * 8]!, pre-indexed offset >= -16384 + assert(i + 1 < countOfUnwindCodes); + b2 = *pUnwindCode++; + i++; + + x = ((DWORD)(b1 & 0x1) << 3) | (DWORD)(b2 >> 5); + z = (DWORD)(b2 & 0x1F); + + printf(" %02X %02X save_reg_x X#%u Z#%u (0x%02X); sd %s, [sp, #-%u]!\n", b1, b2, x, z, z, + getRegName(REG_S0 + x), (z + 1) * 8); + } + else if (b1 == 0xD6) + { + // save_rapair: 11010110 | 0xxxzzzz | zzzzzzzz: save pair at [sp + #Z * 8], offset <= 32767 + assert(i + 1 < countOfUnwindCodes); + b2 = *pUnwindCode++; + b3 = *pUnwindCode++; + i += 2; + + x = (DWORD)(b2 >> 4); + z = ((DWORD)(b2 & 0xF) << 8) | (DWORD)b3; + + printf(" %02X %02X %02X save_lrpair X#%u Z#%u (0x%02X); Two sd %s, %s, [sp, #%u]\n", b1, b2, b3, x, z, z, + getRegName(REG_S0 + x), getRegName(REG_RA), z * 8); + } + else if (b1 == 0xD8) + { + // save_fregp: 11011000 | 0xxxzzzz | zzzzzzzz : save pair f(24 + #X) at [sp + #Z * 8], offset <= 32767 + assert(i + 1 < countOfUnwindCodes); + b2 = *pUnwindCode++; + b3 = *pUnwindCode++; + i += 2; + + x = (DWORD)(b2 >> 4); + z = ((DWORD)(b2 & 0xF) << 8) | (DWORD)b3; + + printf(" %02X %02X %02X save_fregp X#%u Z#%u (0x%02X); Two sdc1 %s, %s, [sp, #%u]\n", b1, b2, b3, x, z, z, + getRegName(REG_F24 + x, true), getRegName(REG_F24 + x + 1, true), z * 8); + } + else if (b1 == 0xDA) + { + // save_fregp_x: 11011010 | 0xxxzzzz | zzzzzzzz : save pair f(24 + #X), at [sp - (#Z + 1) * 8]!, pre-indexed offset >= + // -32768 + assert(i + 1 < countOfUnwindCodes); + b2 = *pUnwindCode++; + b3 = *pUnwindCode++; + i += 2; + + x = (DWORD)(b2 >> 4); + z = ((DWORD)(b2 & 0xF) << 8) | (DWORD)b3; + + printf(" %02X %02X %02X save_fregp_x X#%u Z#%u (0x%02X); Two sdc1 %s, %s, [sp, #-%u]!\n", b1, b2, b3, x, z, z, + getRegName(REG_F24 + x, true), getRegName(REG_F24 + x + 1, true), (z + 1) * 8); + } +#endif + else if (b1 == 0xDC) + { + // save_freg: 11011100 | 0xxxzzzz | zzzzzzzz : save reg f(24 + #X) at [sp + #Z * 8], offset <= 2047 + assert(i + 1 < countOfUnwindCodes); + b2 = *pUnwindCode++; + b3 = *pUnwindCode++; + i += 2; + + x = (DWORD)(b2 >> 4); + z = ((DWORD)(b2 & 0xF) << 8) | (DWORD)b3; + + printf(" %02X %02X %02X save_freg X#%u Z#%u (0x%02X); fst.d %s, [sp, #%u]\n", b1, b2, b3, x, z, z, + getRegName(REG_F24 + x), z * 8); + } +#if 0 + else if (b1 == 0xDE) + { + // save_freg_x: 11011110 | xxxzzzzz : save reg f(24 + #X) at [sp - (#Z + 1) * 8]!, pre - indexed offset >= + // -16384 + assert(i + 1 < countOfUnwindCodes); + b2 = *pUnwindCode++; + i++; + + x = (DWORD)(b2 >> 5); + z = (DWORD)(b2 & 0x1F); + + printf(" %02X %02X save_freg_x X#%u Z#%u (0x%02X); sdc1 %s, [sp, #-%u]!\n", b1, b2, x, z, z, + getRegName(REG_F24 + x, true), (z + 1) * 8); + } +#endif + else if (b1 == 0xE0) + { + // alloc_l: 11100000 | xxxxxxxx | xxxxxxxx | xxxxxxxx : allocate large stack with size < 256M (2^24 * 16) + assert(i + 3 < countOfUnwindCodes); + b2 = *pUnwindCode++; + b3 = *pUnwindCode++; + b4 = *pUnwindCode++; + i += 3; + + x = ((DWORD)b2 << 16) | ((DWORD)b3 << 8) | (DWORD)b4; + + printf(" %02X %02X %02X %02X alloc_l %u (0x%06X); addi.d sp, sp, -%u (%06X)\n", b1, b2, b3, b4, x, x, + x * 16, x * 16); + } + else if (b1 == 0xE1) + { + // set_fp: 11100001 : set up $29 : with : move fp, sp + + printf(" %02X set_fp; move %s, sp\n", b1, getRegName(REG_FP)); + } + else if (b1 == 0xE2) + { + // add_fp: 11100010 | 000xxxxx | xxxxxxxx : set up fp with : addi.d fp, sp, #x * 8 + assert(i + 2 < countOfUnwindCodes); + b2 = *pUnwindCode++; + b3 = *pUnwindCode++; + i += 2; + + x = ((DWORD)(b2 & 0x1F) << 8) | (DWORD)b3; + + printf(" %02X %02X %02X add_fp %u (0x%02X); addi.d %s, sp, #%u\n", b1, b2, b3, x, x, getRegName(REG_FP), + x * 8); + } + else if (b1 == 0xE3) + { + // nop: 11100011: no unwind operation is required. + + printf(" %02X nop\n", b1); + } + else if (b1 == 0xE4) + { + // end: 11100100 : end of unwind code + + printf(" %02X end\n", b1); + } + else if (b1 == 0xE5) + { + // end_c: 11100101 : end of unwind code in current chained scope. + + printf(" %02X end_c\n", b1); + } + else if (b1 == 0xE6) + { + // save_next: 11100110 : save next non - volatile Int or FP register pair. + + printf(" %02X save_next\n", b1); + } + else + { + printf("===========[loongarch64] Unknown / reserved unwind code: %02X\n", b1); + // Unknown / reserved unwind code + assert(!"Internal error decoding unwind codes"); + } + } + + pdw += codeWords; + assert((PBYTE)pdw == pUnwindCode); + assert((PBYTE)pdw == pHeader + unwindBlockSize); + + assert(XBit == 0); // We don't handle the case where exception data is present, such as the Exception Handler RVA + + printf("\n"); +} + +#endif // DEBUG + +/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX Unwind APIs XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ + +void Compiler::unwindBegProlog() +{ + assert(compGeneratingProlog); + +#if defined(TARGET_UNIX) + if (generateCFIUnwindCodes()) + { + unwindBegPrologCFI(); + return; + } +#endif // TARGET_UNIX + + FuncInfoDsc* func = funCurrentFunc(); + + // There is only one prolog for a function/funclet, and it comes first. So now is + // a good time to initialize all the unwind data structures. + + emitLocation* startLoc; + emitLocation* endLoc; + unwindGetFuncLocations(func, true, &startLoc, &endLoc); + + func->uwi.InitUnwindInfo(this, startLoc, endLoc); + func->uwi.CaptureLocation(); + + func->uwiCold = NULL; // No cold data yet +} + +void Compiler::unwindEndProlog() +{ + assert(compGeneratingProlog); +} + +void Compiler::unwindBegEpilog() +{ + assert(compGeneratingEpilog); + +#if defined(TARGET_UNIX) + if (generateCFIUnwindCodes()) + { + return; + } +#endif // TARGET_UNIX + + funCurrentFunc()->uwi.AddEpilog(); +} + +void Compiler::unwindEndEpilog() +{ + assert(compGeneratingEpilog); +} + +// The instructions between the last captured "current state" and the current instruction +// are in the prolog but have no effect for unwinding. Emit the appropriate NOP unwind codes +// for them. +void Compiler::unwindPadding() +{ +#if defined(TARGET_UNIX) + if (generateCFIUnwindCodes()) + { + return; + } +#endif // TARGET_UNIX + + UnwindInfo* pu = &funCurrentFunc()->uwi; + GetEmitter()->emitUnwindNopPadding(pu->GetCurrentEmitterLocation(), this); +} + +// Ask the VM to reserve space for the unwind information for the function and +// all its funclets. +void Compiler::unwindReserve() +{ + assert(!compGeneratingProlog); + assert(!compGeneratingEpilog); + + assert(compFuncInfoCount > 0); + for (unsigned funcIdx = 0; funcIdx < compFuncInfoCount; funcIdx++) + { + unwindReserveFunc(funGetFunc(funcIdx)); + } +} + +void Compiler::unwindReserveFunc(FuncInfoDsc* func) +{ + BOOL isFunclet = (func->funKind == FUNC_ROOT) ? FALSE : TRUE; + bool funcHasColdSection = false; + +#if defined(TARGET_UNIX) + if (generateCFIUnwindCodes()) + { + DWORD unwindCodeBytes = 0; + if (fgFirstColdBlock != nullptr) + { + eeReserveUnwindInfo(isFunclet, true /*isColdCode*/, unwindCodeBytes); + } + unwindCodeBytes = (DWORD)(func->cfiCodes->size() * sizeof(CFI_CODE)); + eeReserveUnwindInfo(isFunclet, false /*isColdCode*/, unwindCodeBytes); + + return; + } +#endif // TARGET_UNIX + + // If there is cold code, split the unwind data between the hot section and the + // cold section. This needs to be done before we split into fragments, as each + // of the hot and cold sections can have multiple fragments. + + if (fgFirstColdBlock != NULL) + { + assert(!isFunclet); // TODO-CQ: support hot/cold splitting with EH + + emitLocation* startLoc; + emitLocation* endLoc; + unwindGetFuncLocations(func, false, &startLoc, &endLoc); + + func->uwiCold = new (this, CMK_UnwindInfo) UnwindInfo(); + func->uwiCold->InitUnwindInfo(this, startLoc, endLoc); + func->uwiCold->HotColdSplitCodes(&func->uwi); + + funcHasColdSection = true; + } + + // First we need to split the function or funclet into fragments that are no larger + // than 512K, so the fragment size will fit in the unwind data "Function Length" field. + // The LOONGARCH Exception Data specification "Function Fragments" section describes this. + func->uwi.Split(); + + func->uwi.Reserve(isFunclet, true); + + // After the hot section, split and reserve the cold section + + if (funcHasColdSection) + { + assert(func->uwiCold != NULL); + + func->uwiCold->Split(); + func->uwiCold->Reserve(isFunclet, false); + } +} + +// unwindEmit: Report all the unwind information to the VM. +// Arguments: +// pHotCode: Pointer to the beginning of the memory with the function and funclet hot code +// pColdCode: Pointer to the beginning of the memory with the function and funclet cold code. + +void Compiler::unwindEmit(void* pHotCode, void* pColdCode) +{ + assert(compFuncInfoCount > 0); + for (unsigned funcIdx = 0; funcIdx < compFuncInfoCount; funcIdx++) + { + unwindEmitFunc(funGetFunc(funcIdx), pHotCode, pColdCode); + } +} + +void Compiler::unwindEmitFunc(FuncInfoDsc* func, void* pHotCode, void* pColdCode) +{ + // Verify that the JIT enum is in sync with the JIT-EE interface enum + static_assert_no_msg(FUNC_ROOT == (FuncKind)CORJIT_FUNC_ROOT); + static_assert_no_msg(FUNC_HANDLER == (FuncKind)CORJIT_FUNC_HANDLER); + static_assert_no_msg(FUNC_FILTER == (FuncKind)CORJIT_FUNC_FILTER); + +#if defined(TARGET_UNIX) + if (generateCFIUnwindCodes()) + { + unwindEmitFuncCFI(func, pHotCode, pColdCode); + return; + } +#endif // TARGET_UNIX + + func->uwi.Allocate((CorJitFuncKind)func->funKind, pHotCode, pColdCode, true); + + if (func->uwiCold != NULL) + { + func->uwiCold->Allocate((CorJitFuncKind)func->funKind, pHotCode, pColdCode, false); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// UnwindPrologCodes +// +/////////////////////////////////////////////////////////////////////////////// + +// We're going to use the prolog codes memory to store the final unwind data. +// Ensure we have enough memory to store everything. If 'epilogBytes' > 0, then +// move the prolog codes so there are 'epilogBytes' bytes after the prolog codes. +// Set the header pointer for future use, adding the header bytes (this pointer +// is updated when a header byte is added), and remember the index that points +// to the beginning of the header. + +void UnwindPrologCodes::SetFinalSize(int headerBytes, int epilogBytes) +{ +#ifdef DEBUG + // We're done adding codes. Check that we didn't accidentally create a bigger prolog. + unsigned codeSize = GetCodeSizeFromUnwindCodes(true); + assert(codeSize <= MAX_PROLOG_SIZE_BYTES); +#endif // DEBUG + + int prologBytes = Size(); + + EnsureSize(headerBytes + prologBytes + epilogBytes + 3); // 3 = padding bytes for alignment + + upcUnwindBlockSlot = upcCodeSlot - headerBytes - epilogBytes; // Index of the first byte of the unwind header + + assert(upcMemSize == upcUnwindBlockSlot + headerBytes + prologBytes + epilogBytes + 3); + + upcHeaderSlot = upcUnwindBlockSlot - 1; // upcHeaderSlot is always incremented before storing + assert(upcHeaderSlot >= -1); + + if (epilogBytes > 0) + { + // The prolog codes that are already at the end of the array need to get moved to the middle, + // with space for the non-matching epilog codes to follow. + + memmove_s(&upcMem[upcUnwindBlockSlot + headerBytes], upcMemSize - (upcUnwindBlockSlot + headerBytes), + &upcMem[upcCodeSlot], prologBytes); + + // Note that the three UWC_END padding bytes still exist at the end of the array. + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef DEBUG + // Zero out the epilog codes memory, to ensure we've copied the right bytes. Don't zero the padding bytes. + memset(&upcMem[upcUnwindBlockSlot + headerBytes + prologBytes], 0, epilogBytes); +#endif // DEBUG + + upcEpilogSlot = + upcUnwindBlockSlot + headerBytes + prologBytes; // upcEpilogSlot points to the next epilog location to fill + + // Update upcCodeSlot to point at the new beginning of the prolog codes + upcCodeSlot = upcUnwindBlockSlot + headerBytes; + } +} + +// Add a header word. Header words are added starting at the beginning, in order: first to last. +// This is in contrast to the prolog unwind codes, which are added in reverse order. +void UnwindPrologCodes::AddHeaderWord(DWORD d) +{ + assert(-1 <= upcHeaderSlot); + assert(upcHeaderSlot + 4 < upcCodeSlot); // Don't collide with the unwind codes that are already there! + + // Store it byte-by-byte in little-endian format. We've already ensured there is enough space + // in SetFinalSize(). + upcMem[++upcHeaderSlot] = (BYTE)d; + upcMem[++upcHeaderSlot] = (BYTE)(d >> 8); + upcMem[++upcHeaderSlot] = (BYTE)(d >> 16); + upcMem[++upcHeaderSlot] = (BYTE)(d >> 24); +} + +// AppendEpilog: copy the epilog bytes to the next epilog bytes slot +void UnwindPrologCodes::AppendEpilog(UnwindEpilogInfo* pEpi) +{ + assert(upcEpilogSlot != -1); + + int epiSize = pEpi->Size(); + memcpy_s(&upcMem[upcEpilogSlot], upcMemSize - upcEpilogSlot - 3, pEpi->GetCodes(), + epiSize); // -3 to avoid writing to the alignment padding + assert(pEpi->GetStartIndex() == + upcEpilogSlot - upcCodeSlot); // Make sure we copied it where we expected to copy it. + + upcEpilogSlot += epiSize; + assert(upcEpilogSlot <= upcMemSize - 3); +} + +// GetFinalInfo: return a pointer to the final unwind info to hand to the VM, and the size of this info in bytes +void UnwindPrologCodes::GetFinalInfo(/* OUT */ BYTE** ppUnwindBlock, /* OUT */ ULONG* pUnwindBlockSize) +{ + assert(upcHeaderSlot + 1 == upcCodeSlot); // We better have filled in the header before asking for the final data! + + *ppUnwindBlock = &upcMem[upcUnwindBlockSlot]; + + // We put 4 'end' codes at the end for padding, so we can ensure we have an + // unwind block that is a multiple of 4 bytes in size. Subtract off three 'end' + // codes (leave one), and then align the size up to a multiple of 4. + *pUnwindBlockSize = AlignUp((UINT)(upcMemSize - upcUnwindBlockSlot - 3), sizeof(DWORD)); +} + +int UnwindPrologCodes::Match(UnwindEpilogInfo* pEpi) +{ + if (Size() < pEpi->Size()) + { + return -1; + } + + int matchIndex = 0;//Size() - pEpi->Size(); + + BYTE* pProlog = GetCodes(); + BYTE* pEpilog = pEpi->GetCodes(); + + //First check set_fp. + if (0 < pEpi->Size()) + { + if (*pProlog == 0xE1) + { + pProlog++; + if (*pEpilog == 0xE1) + { + pEpilog++; + } + else + { + matchIndex = 1; + } + } + else if (*pProlog == 0xE2) + { + pProlog += 3; + if (*pEpilog == 0xE1) + { + pEpilog += 3; + } + else + { + matchIndex = 3; + } + } + } + + if (0 == memcmp(pProlog, pEpilog, pEpi->Size())) + { + return matchIndex; + } + + return -1; +} + +// Copy the prolog codes from another prolog. The only time this is legal is +// if we are at the initial state and no prolog codes have been added. +// This is used to create the 'phantom' prolog for non-first fragments. + +void UnwindPrologCodes::CopyFrom(UnwindPrologCodes* pCopyFrom) +{ + assert(uwiComp == pCopyFrom->uwiComp); + assert(upcMem == upcMemLocal); + assert(upcMemSize == UPC_LOCAL_COUNT); + assert(upcHeaderSlot == -1); + assert(upcEpilogSlot == -1); + + // Copy the codes + EnsureSize(pCopyFrom->upcMemSize); + assert(upcMemSize == pCopyFrom->upcMemSize); + memcpy_s(upcMem, upcMemSize, pCopyFrom->upcMem, pCopyFrom->upcMemSize); + + // Copy the other data + upcCodeSlot = pCopyFrom->upcCodeSlot; + upcHeaderSlot = pCopyFrom->upcHeaderSlot; + upcEpilogSlot = pCopyFrom->upcEpilogSlot; + upcUnwindBlockSlot = pCopyFrom->upcUnwindBlockSlot; +} + +void UnwindPrologCodes::EnsureSize(int requiredSize) +{ + if (requiredSize > upcMemSize) + { + // Reallocate, and copy everything to a new array. + + // Choose the next power of two size. This may or may not be the best choice. + noway_assert((requiredSize & 0xC0000000) == 0); // too big! + int newSize; + for (newSize = upcMemSize << 1; newSize < requiredSize; newSize <<= 1) + { + // do nothing + } + + BYTE* newUnwindCodes = new (uwiComp, CMK_UnwindInfo) BYTE[newSize]; + memcpy_s(newUnwindCodes + newSize - upcMemSize, upcMemSize, upcMem, + upcMemSize); // copy the existing data to the end +#ifdef DEBUG + // Clear the old unwind codes; nobody should be looking at them + memset(upcMem, 0xFF, upcMemSize); +#endif // DEBUG + upcMem = newUnwindCodes; // we don't free anything that used to be there since we have a no-release allocator + upcCodeSlot += newSize - upcMemSize; + upcMemSize = newSize; + } +} + +#ifdef DEBUG +void UnwindPrologCodes::Dump(int indent) +{ + printf("%*sUnwindPrologCodes @0x%08p, size:%d:\n", indent, "", dspPtr(this), sizeof(*this)); + printf("%*s uwiComp: 0x%08p\n", indent, "", dspPtr(uwiComp)); + printf("%*s &upcMemLocal[0]: 0x%08p\n", indent, "", dspPtr(&upcMemLocal[0])); + printf("%*s upcMem: 0x%08p\n", indent, "", dspPtr(upcMem)); + printf("%*s upcMemSize: %d\n", indent, "", upcMemSize); + printf("%*s upcCodeSlot: %d\n", indent, "", upcCodeSlot); + printf("%*s upcHeaderSlot: %d\n", indent, "", upcHeaderSlot); + printf("%*s upcEpilogSlot: %d\n", indent, "", upcEpilogSlot); + printf("%*s upcUnwindBlockSlot: %d\n", indent, "", upcUnwindBlockSlot); + + if (upcMemSize > 0) + { + printf("%*s codes:", indent, ""); + for (int i = 0; i < upcMemSize; i++) + { + printf(" %02x", upcMem[i]); + if (i == upcCodeSlot) + printf(" <-C"); + else if (i == upcHeaderSlot) + printf(" <-H"); + else if (i == upcEpilogSlot) + printf(" <-E"); + else if (i == upcUnwindBlockSlot) + printf(" <-U"); + } + printf("\n"); + } +} +#endif // DEBUG + +/////////////////////////////////////////////////////////////////////////////// +// +// UnwindEpilogCodes +// +/////////////////////////////////////////////////////////////////////////////// + +void UnwindEpilogCodes::EnsureSize(int requiredSize) +{ + if (requiredSize > uecMemSize) + { + // Reallocate, and copy everything to a new array. + + // Choose the next power of two size. This may or may not be the best choice. + noway_assert((requiredSize & 0xC0000000) == 0); // too big! + int newSize; + for (newSize = uecMemSize << 1; newSize < requiredSize; newSize <<= 1) + { + // do nothing + } + + BYTE* newUnwindCodes = new (uwiComp, CMK_UnwindInfo) BYTE[newSize]; + memcpy_s(newUnwindCodes, newSize, uecMem, uecMemSize); +#ifdef DEBUG + // Clear the old unwind codes; nobody should be looking at them + memset(uecMem, 0xFF, uecMemSize); +#endif // DEBUG + uecMem = newUnwindCodes; // we don't free anything that used to be there since we have a no-release allocator + // uecCodeSlot stays the same + uecMemSize = newSize; + } +} + +#ifdef DEBUG +void UnwindEpilogCodes::Dump(int indent) +{ + printf("%*sUnwindEpilogCodes @0x%08p, size:%d:\n", indent, "", dspPtr(this), sizeof(*this)); + printf("%*s uwiComp: 0x%08p\n", indent, "", dspPtr(uwiComp)); + printf("%*s &uecMemLocal[0]: 0x%08p\n", indent, "", dspPtr(&uecMemLocal[0])); + printf("%*s uecMem: 0x%08p\n", indent, "", dspPtr(uecMem)); + printf("%*s uecMemSize: %d\n", indent, "", uecMemSize); + printf("%*s uecCodeSlot: %d\n", indent, "", uecCodeSlot); + printf("%*s uecFinalized: %s\n", indent, "", dspBool(uecFinalized)); + + if (uecMemSize > 0) + { + printf("%*s codes:", indent, ""); + for (int i = 0; i < uecMemSize; i++) + { + printf(" %02x", uecMem[i]); + if (i == uecCodeSlot) + printf(" <-C"); // Indicate the current pointer + } + printf("\n"); + } +} +#endif // DEBUG + +/////////////////////////////////////////////////////////////////////////////// +// +// UnwindEpilogInfo +// +/////////////////////////////////////////////////////////////////////////////// + +// Do the current unwind codes match those of the argument epilog? +// If they don't match, return -1. If they do, return the offset into +// our codes at which the argument codes match. Note that this means that +// the argument codes can match a subset of our codes. The subset needs to be at +// the end, for the "end" code to match. +// +// Note that if we wanted to handle 0xFD and 0xFE codes, by converting +// an existing 0xFF code to one of those, we might do that here. + +int UnwindEpilogInfo::Match(UnwindEpilogInfo* pEpi) +{ + if (Matches()) + { + // We are already matched to someone else, and won't provide codes to the final layout + return -1; + } + + if (Size() < pEpi->Size()) + { + return -1; + } + + int matchIndex = Size() - pEpi->Size(); + + if (0 == memcmp(GetCodes() + matchIndex, pEpi->GetCodes(), pEpi->Size())) + { + return matchIndex; + } + + return -1; +} + +void UnwindEpilogInfo::CaptureEmitLocation() +{ + noway_assert(epiEmitLocation == NULL); // This function is only called once per epilog + epiEmitLocation = new (uwiComp, CMK_UnwindInfo) emitLocation(); + epiEmitLocation->CaptureLocation(uwiComp->GetEmitter()); +} + +void UnwindEpilogInfo::FinalizeOffset() +{ + epiStartOffset = epiEmitLocation->CodeOffset(uwiComp->GetEmitter()); +} + +#ifdef DEBUG +void UnwindEpilogInfo::Dump(int indent) +{ + printf("%*sUnwindEpilogInfo @0x%08p, size:%d:\n", indent, "", dspPtr(this), sizeof(*this)); + printf("%*s uwiComp: 0x%08p\n", indent, "", dspPtr(uwiComp)); + printf("%*s epiNext: 0x%08p\n", indent, "", dspPtr(epiNext)); + printf("%*s epiEmitLocation: 0x%08p\n", indent, "", dspPtr(epiEmitLocation)); + printf("%*s epiStartOffset: 0x%x\n", indent, "", epiStartOffset); + printf("%*s epiMatches: %s\n", indent, "", dspBool(epiMatches)); + printf("%*s epiStartIndex: %d\n", indent, "", epiStartIndex); + + epiCodes.Dump(indent + 2); +} +#endif // DEBUG + +/////////////////////////////////////////////////////////////////////////////// +// +// UnwindFragmentInfo +// +/////////////////////////////////////////////////////////////////////////////// + +UnwindFragmentInfo::UnwindFragmentInfo(Compiler* comp, emitLocation* emitLoc, bool hasPhantomProlog) + : UnwindBase(comp) + , ufiNext(NULL) + , ufiEmitLoc(emitLoc) + , ufiHasPhantomProlog(hasPhantomProlog) + , ufiPrologCodes(comp) + , ufiEpilogFirst(comp) + , ufiEpilogList(NULL) + , ufiEpilogLast(NULL) + , ufiCurCodes(&ufiPrologCodes) + , ufiSize(0) + , ufiStartOffset(UFI_ILLEGAL_OFFSET) +{ +#ifdef DEBUG + ufiNum = 1; + ufiInProlog = true; + ufiInitialized = UFI_INITIALIZED_PATTERN; +#endif // DEBUG +} + +void UnwindFragmentInfo::FinalizeOffset() +{ + if (ufiEmitLoc == NULL) + { + // NULL emit location means the beginning of the code. This is to handle the first fragment prolog. + ufiStartOffset = 0; + } + else + { + ufiStartOffset = ufiEmitLoc->CodeOffset(uwiComp->GetEmitter()); + } + + for (UnwindEpilogInfo* pEpi = ufiEpilogList; pEpi != NULL; pEpi = pEpi->epiNext) + { + pEpi->FinalizeOffset(); + } +} + +void UnwindFragmentInfo::AddEpilog() +{ + assert(ufiInitialized == UFI_INITIALIZED_PATTERN); + +#ifdef DEBUG + if (ufiInProlog) + { + assert(ufiEpilogList == NULL); + ufiInProlog = false; + } + else + { + assert(ufiEpilogList != NULL); + } +#endif // DEBUG + + // Either allocate a new epilog object, or, for the first one, use the + // preallocated one that is a member of the UnwindFragmentInfo class. + + UnwindEpilogInfo* newepi; + + if (ufiEpilogList == NULL) + { + // Use the epilog that's in the class already. Be sure to initialize it! + newepi = ufiEpilogList = &ufiEpilogFirst; + } + else + { + newepi = new (uwiComp, CMK_UnwindInfo) UnwindEpilogInfo(uwiComp); + } + + // Put the new epilog at the end of the epilog list + + if (ufiEpilogLast != NULL) + { + ufiEpilogLast->epiNext = newepi; + } + + ufiEpilogLast = newepi; + + // What is the starting code offset of the epilog? Store an emitter location + // so we can ask the emitter later, after codegen. + + newepi->CaptureEmitLocation(); + + // Put subsequent unwind codes in this new epilog + + ufiCurCodes = &newepi->epiCodes; +} + +// Copy the prolog codes from the 'pCopyFrom' fragment. These prolog codes will +// become 'phantom' prolog codes in this fragment. Note that this fragment should +// not have any prolog codes currently; it is at the initial state. + +void UnwindFragmentInfo::CopyPrologCodes(UnwindFragmentInfo* pCopyFrom) +{ + ufiPrologCodes.CopyFrom(&pCopyFrom->ufiPrologCodes); + ufiPrologCodes.AddCode(UWC_END_C); +} + +// Split the epilog codes that currently exist in 'pSplitFrom'. The ones that represent +// epilogs that start at or after the location represented by 'emitLoc' are removed +// from 'pSplitFrom' and moved to this fragment. Note that this fragment should not have +// any epilog codes currently; it is at the initial state. + +void UnwindFragmentInfo::SplitEpilogCodes(emitLocation* emitLoc, UnwindFragmentInfo* pSplitFrom) +{ + UnwindEpilogInfo* pEpiPrev; + UnwindEpilogInfo* pEpi; + + UNATIVE_OFFSET splitOffset = emitLoc->CodeOffset(uwiComp->GetEmitter()); + + for (pEpiPrev = NULL, pEpi = pSplitFrom->ufiEpilogList; pEpi != NULL; pEpiPrev = pEpi, pEpi = pEpi->epiNext) + { + pEpi->FinalizeOffset(); // Get the offset of the epilog from the emitter so we can compare it + if (pEpi->GetStartOffset() >= splitOffset) + { + // This epilog and all following epilogs, which must be in order of increasing offsets, + // get moved to this fragment. + + // Splice in the epilogs to this fragment. Set the head of the epilog + // list to this epilog. + ufiEpilogList = pEpi; // In this case, don't use 'ufiEpilogFirst' + ufiEpilogLast = pSplitFrom->ufiEpilogLast; + + // Splice out the tail of the list from the 'pSplitFrom' epilog list + pSplitFrom->ufiEpilogLast = pEpiPrev; + if (pSplitFrom->ufiEpilogLast == NULL) + { + pSplitFrom->ufiEpilogList = NULL; + } + else + { + pSplitFrom->ufiEpilogLast->epiNext = NULL; + } + + // No more codes should be added once we start splitting + pSplitFrom->ufiCurCodes = NULL; + ufiCurCodes = NULL; + + break; + } + } +} + +// Is this epilog at the end of an unwind fragment? Ask the emitter. +// Note that we need to know this before all code offsets are finalized, +// so we can determine whether we can omit an epilog scope word for a +// single matching epilog. + +bool UnwindFragmentInfo::IsAtFragmentEnd(UnwindEpilogInfo* pEpi) +{ + return uwiComp->GetEmitter()->emitIsFuncEnd(pEpi->epiEmitLocation, (ufiNext == NULL) ? NULL : ufiNext->ufiEmitLoc); +} + +// Merge the unwind codes as much as possible. +// This function is called before all offsets are final. +// Also, compute the size of the final unwind block. Store this +// and some other data for later, when we actually emit the +// unwind block. + +void UnwindFragmentInfo::MergeCodes() +{ + assert(ufiInitialized == UFI_INITIALIZED_PATTERN); + + unsigned epilogCount = 0; + unsigned epilogCodeBytes = 0; // The total number of unwind code bytes used by epilogs that don't match the + // prolog codes + unsigned epilogIndex = ufiPrologCodes.Size(); // The "Epilog Start Index" for the next non-matching epilog codes + UnwindEpilogInfo* pEpi; + + for (pEpi = ufiEpilogList; pEpi != NULL; pEpi = pEpi->epiNext) + { + ++epilogCount; + + pEpi->FinalizeCodes(); + + // Does this epilog match the prolog? + // NOTE: for the purpose of matching, we don't handle the 0xFD and 0xFE end codes that allow slightly unequal + // prolog and epilog codes. + + int matchIndex; + + matchIndex = ufiPrologCodes.Match(pEpi); + if (matchIndex != -1) + { + pEpi->SetMatches(); + pEpi->SetStartIndex(matchIndex); // Prolog codes start at zero, so matchIndex is exactly the start index + } + else + { + // The epilog codes don't match the prolog codes. Do they match any of the epilogs + // we've seen so far? + + bool matched = false; + for (UnwindEpilogInfo* pEpi2 = ufiEpilogList; pEpi2 != pEpi; pEpi2 = pEpi2->epiNext) + { + matchIndex = pEpi2->Match(pEpi); + if (matchIndex != -1) + { + // Use the same epilog index as the one we matched, as it has already been set. + pEpi->SetMatches(); + pEpi->SetStartIndex(pEpi2->GetStartIndex() + matchIndex); // We might match somewhere inside pEpi2's + // codes, in which case matchIndex > 0 + matched = true; + break; + } + } + + if (!matched) + { + pEpi->SetStartIndex(epilogIndex); // We'll copy these codes to the next available location + epilogCodeBytes += pEpi->Size(); + epilogIndex += pEpi->Size(); + } + } + } + + DWORD codeBytes = ufiPrologCodes.Size() + epilogCodeBytes; + codeBytes = AlignUp(codeBytes, sizeof(DWORD)); + + DWORD codeWords = + codeBytes / sizeof(DWORD); // This is how many words we need to store all the unwind codes in the unwind block + + // Do we need the 2nd header word for "Extended Code Words" or "Extended Epilog Count"? + + bool needExtendedCodeWordsEpilogCount = + (codeWords > UW_MAX_CODE_WORDS_COUNT) || (epilogCount > UW_MAX_EPILOG_COUNT); + + // How many epilog scope words do we need? + + bool setEBit = false; // do we need to set the E bit? + unsigned epilogScopes = epilogCount; // Note that this could be zero if we have no epilogs! + + if (epilogCount == 1) + { + assert(ufiEpilogList != NULL); + assert(ufiEpilogList->epiNext == NULL); + + if (ufiEpilogList->Matches() && (ufiEpilogList->GetStartIndex() == 0) && // The match is with the prolog + !needExtendedCodeWordsEpilogCount && IsAtFragmentEnd(ufiEpilogList)) + { + epilogScopes = 0; // Don't need any epilog scope words + setEBit = true; + } + } + + DWORD headerBytes = (1 // Always need first header DWORD + + (needExtendedCodeWordsEpilogCount ? 1 : 0) // Do we need the 2nd DWORD for Extended Code + // Words or Extended Epilog Count? + + epilogScopes // One DWORD per epilog scope, for EBit = 0 + ) * + sizeof(DWORD); // convert it to bytes + + DWORD finalSize = headerBytes + codeBytes; // Size of actual unwind codes, aligned up to 4-byte words, + // including end padding if necessary + + // Construct the final unwind information. + + // We re-use the memory for the prolog unwind codes to construct the full unwind data. If all the epilogs + // match the prolog, this is easy: we just prepend the header. If there are epilog codes that don't match + // the prolog, we still use the prolog codes memory, but it's a little more complicated, since the + // unwind info is ordered as: (a) header, (b) prolog codes, (c) non-matching epilog codes. And, the prolog + // codes array is filled in from end-to-beginning. So, we compute the size of memory we need, ensure we + // have that much memory, and then copy the prolog codes to the right place, appending the non-matching + // epilog codes and prepending the header. + + ufiPrologCodes.SetFinalSize(headerBytes, epilogCodeBytes); + + if (epilogCodeBytes != 0) + { + // We need to copy the epilog code bytes to their final memory location + + for (pEpi = ufiEpilogList; pEpi != NULL; pEpi = pEpi->epiNext) + { + if (!pEpi->Matches()) + { + ufiPrologCodes.AppendEpilog(pEpi); + } + } + } + + // Save some data for later + ufiSize = finalSize; + ufiSetEBit = setEBit; + ufiNeedExtendedCodeWordsEpilogCount = needExtendedCodeWordsEpilogCount; + ufiCodeWords = codeWords; + ufiEpilogScopes = epilogScopes; +} + +// Finalize: Prepare the unwind information for the VM. Compute and prepend the unwind header. + +void UnwindFragmentInfo::Finalize(UNATIVE_OFFSET functionLength) +{ + assert(ufiInitialized == UFI_INITIALIZED_PATTERN); + +#ifdef DEBUG + if (0 && uwiComp->verbose) + { + printf("*************** Before fragment #%d finalize\n", ufiNum); + Dump(); + } +#endif + +// Compute the header + + noway_assert((functionLength & 3) == 0); + DWORD headerFunctionLength = functionLength / 4; + + DWORD headerVers = 0; // Version of the unwind info is zero. No other version number is currently defined. + DWORD headerXBit = 0; // We never generate "exception data", but the VM might add some. + DWORD headerEBit; + DWORD headerEpilogCount; // This depends on how we set headerEBit. + DWORD headerCodeWords; + DWORD headerExtendedEpilogCount = 0; // This depends on how we set headerEBit. + DWORD headerExtendedCodeWords = 0; + + if (ufiSetEBit) + { + headerEBit = 1; + headerEpilogCount = ufiEpilogList->GetStartIndex(); // probably zero -- the start of the prolog codes! + headerCodeWords = ufiCodeWords; + } + else + { + headerEBit = 0; + + if (ufiNeedExtendedCodeWordsEpilogCount) + { + headerEpilogCount = 0; + headerCodeWords = 0; + headerExtendedEpilogCount = ufiEpilogScopes; + headerExtendedCodeWords = ufiCodeWords; + } + else + { + headerEpilogCount = ufiEpilogScopes; + headerCodeWords = ufiCodeWords; + } + } + + // Start writing the header + + noway_assert(headerFunctionLength <= + 0x3FFFFU); // We create fragments to prevent this from firing, so if it hits, we have an internal error + + if ((headerEpilogCount > UW_MAX_EPILOG_COUNT) || (headerCodeWords > UW_MAX_CODE_WORDS_COUNT)) + { + IMPL_LIMITATION("unwind data too large"); + } + + DWORD header = headerFunctionLength | (headerVers << 18) | (headerXBit << 20) | (headerEBit << 21) | + (headerEpilogCount << 22) | (headerCodeWords << 27); + + ufiPrologCodes.AddHeaderWord(header); + + // Construct the second header word, if needed + + if (ufiNeedExtendedCodeWordsEpilogCount) + { + noway_assert(headerEBit == 0); + noway_assert(headerEpilogCount == 0); + noway_assert(headerCodeWords == 0); + noway_assert((headerExtendedEpilogCount > UW_MAX_EPILOG_COUNT) || + (headerExtendedCodeWords > UW_MAX_CODE_WORDS_COUNT)); + + if ((headerExtendedEpilogCount > UW_MAX_EXTENDED_EPILOG_COUNT) || + (headerExtendedCodeWords > UW_MAX_EXTENDED_CODE_WORDS_COUNT)) + { + IMPL_LIMITATION("unwind data too large"); + } + + DWORD header2 = headerExtendedEpilogCount | (headerExtendedCodeWords << 16); + + ufiPrologCodes.AddHeaderWord(header2); + } + + // Construct the epilog scope words, if needed + + if (!ufiSetEBit) + { + for (UnwindEpilogInfo* pEpi = ufiEpilogList; pEpi != NULL; pEpi = pEpi->epiNext) + { + // The epilog must strictly follow the prolog. The prolog is in the first fragment of + // the hot section. If this epilog is at the start of a fragment, it can't be the + // first fragment in the hot section. We actually don't know if we're processing + // the hot or cold section (or a funclet), so we can't distinguish these cases. Thus, + // we just assert that the epilog starts within the fragment. + assert(pEpi->GetStartOffset() >= GetStartOffset()); + + // We report the offset of an epilog as the offset from the beginning of the function/funclet fragment, + // NOT the offset from the beginning of the main function. + DWORD headerEpilogStartOffset = pEpi->GetStartOffset() - GetStartOffset(); + + noway_assert((headerEpilogStartOffset & 3) == 0); + headerEpilogStartOffset /= 4; // The unwind data stores the actual offset divided by 4 (since the low 2 bits + // of the actual offset is always zero) + + DWORD headerEpilogStartIndex = pEpi->GetStartIndex(); + + if ((headerEpilogStartOffset > UW_MAX_EPILOG_START_OFFSET) || + (headerEpilogStartIndex > UW_MAX_EPILOG_START_INDEX)) + { + IMPL_LIMITATION("unwind data too large"); + } + + DWORD epilogScopeWord = headerEpilogStartOffset | (headerEpilogStartIndex << 22); + + ufiPrologCodes.AddHeaderWord(epilogScopeWord); + } + } + + // The unwind code words are already here, following the header, so we're done! +} + +void UnwindFragmentInfo::Reserve(bool isFunclet, bool isHotCode) +{ + assert(isHotCode || !isFunclet); // TODO-CQ: support hot/cold splitting in functions with EH + + MergeCodes(); + + BOOL isColdCode = isHotCode ? FALSE : TRUE; + + ULONG unwindSize = Size(); + +#ifdef DEBUG + if (uwiComp->verbose) + { + if (ufiNum != 1) + printf("reserveUnwindInfo: fragment #%d:\n", ufiNum); + } +#endif + + uwiComp->eeReserveUnwindInfo(isFunclet, isColdCode, unwindSize); +} + +// Allocate the unwind info for a fragment with the VM. +// Arguments: +// funKind: funclet kind +// pHotCode: hot section code buffer +// pColdCode: cold section code buffer +// funcEndOffset: offset of the end of this function/funclet. Used if this fragment is the last one for a +// function/funclet. +// isHotCode: are we allocating the unwind info for the hot code section? + +void UnwindFragmentInfo::Allocate( + CorJitFuncKind funKind, void* pHotCode, void* pColdCode, UNATIVE_OFFSET funcEndOffset, bool isHotCode) +{ + UNATIVE_OFFSET startOffset; + UNATIVE_OFFSET endOffset; + UNATIVE_OFFSET codeSize; + + // We don't support hot/cold splitting with EH, so if there is cold code, this + // better not be a funclet! + // TODO-CQ: support funclets in cold code + + noway_assert(isHotCode || funKind == CORJIT_FUNC_ROOT); + + // Compute the final size, and start and end offsets of the fragment + + startOffset = GetStartOffset(); + + if (ufiNext == NULL) + { + // This is the last fragment, so the fragment extends to the end of the function/fragment. + assert(funcEndOffset != 0); + endOffset = funcEndOffset; + } + else + { + // The fragment length is all the code between the beginning of this fragment + // and the beginning of the next fragment. Note that all fragments have had their + // offsets computed before any fragment is allocated. + endOffset = ufiNext->GetStartOffset(); + } + + assert(endOffset > startOffset); + codeSize = endOffset - startOffset; + + // Finalize the fragment unwind block to hand to the VM + + Finalize(codeSize); + + // Get the final unwind information and hand it to the VM + + ULONG unwindBlockSize; + BYTE* pUnwindBlock; + + GetFinalInfo(&pUnwindBlock, &unwindBlockSize); + +#ifdef DEBUG + if (uwiComp->opts.dspUnwind) + { + DumpUnwindInfo(uwiComp, isHotCode, startOffset, endOffset, pUnwindBlock, unwindBlockSize); + } +#endif // DEBUG + + // Adjust for cold or hot code: + // 1. The VM doesn't want the cold code pointer unless this is cold code. + // 2. The startOffset and endOffset need to be from the base of the hot section for hot code + // and from the base of the cold section for cold code + + if (isHotCode) + { + assert(endOffset <= uwiComp->info.compTotalHotCodeSize); + pColdCode = NULL; + } + else + { + assert(startOffset >= uwiComp->info.compTotalHotCodeSize); + startOffset -= uwiComp->info.compTotalHotCodeSize; + endOffset -= uwiComp->info.compTotalHotCodeSize; + } + +#ifdef DEBUG + if (uwiComp->verbose) + { + if (ufiNum != 1) + printf("unwindEmit: fragment #%d:\n", ufiNum); + } +#endif // DEBUG + + uwiComp->eeAllocUnwindInfo((BYTE*)pHotCode, (BYTE*)pColdCode, startOffset, endOffset, unwindBlockSize, pUnwindBlock, + funKind); +} + +#ifdef DEBUG +void UnwindFragmentInfo::Dump(int indent) +{ + unsigned count; + UnwindEpilogInfo* pEpi; + + count = 0; + for (pEpi = ufiEpilogList; pEpi != NULL; pEpi = pEpi->epiNext) + { + ++count; + } + + printf("%*sUnwindFragmentInfo #%d, @0x%08p, size:%d:\n", indent, "", ufiNum, dspPtr(this), sizeof(*this)); + printf("%*s uwiComp: 0x%08p\n", indent, "", dspPtr(uwiComp)); + printf("%*s ufiNext: 0x%08p\n", indent, "", dspPtr(ufiNext)); + printf("%*s ufiEmitLoc: 0x%08p\n", indent, "", dspPtr(ufiEmitLoc)); + printf("%*s ufiHasPhantomProlog: %s\n", indent, "", dspBool(ufiHasPhantomProlog)); + printf("%*s %d epilog%s\n", indent, "", count, (count != 1) ? "s" : ""); + printf("%*s ufiEpilogList: 0x%08p\n", indent, "", dspPtr(ufiEpilogList)); + printf("%*s ufiEpilogLast: 0x%08p\n", indent, "", dspPtr(ufiEpilogLast)); + printf("%*s ufiCurCodes: 0x%08p\n", indent, "", dspPtr(ufiCurCodes)); + printf("%*s ufiSize: %u\n", indent, "", ufiSize); + printf("%*s ufiSetEBit: %s\n", indent, "", dspBool(ufiSetEBit)); + printf("%*s ufiNeedExtendedCodeWordsEpilogCount: %s\n", indent, "", dspBool(ufiNeedExtendedCodeWordsEpilogCount)); + printf("%*s ufiCodeWords: %u\n", indent, "", ufiCodeWords); + printf("%*s ufiEpilogScopes: %u\n", indent, "", ufiEpilogScopes); + printf("%*s ufiStartOffset: 0x%x\n", indent, "", ufiStartOffset); + printf("%*s ufiInProlog: %s\n", indent, "", dspBool(ufiInProlog)); + printf("%*s ufiInitialized: 0x%08x\n", indent, "", ufiInitialized); + + ufiPrologCodes.Dump(indent + 2); + + for (pEpi = ufiEpilogList; pEpi != NULL; pEpi = pEpi->epiNext) + { + pEpi->Dump(indent + 2); + } +} +#endif // DEBUG + +/////////////////////////////////////////////////////////////////////////////// +// +// UnwindInfo +// +/////////////////////////////////////////////////////////////////////////////// + +void UnwindInfo::InitUnwindInfo(Compiler* comp, emitLocation* startLoc, emitLocation* endLoc) +{ + uwiComp = comp; + + // The first fragment is a member of UnwindInfo, so it doesn't need to be allocated. + // However, its constructor needs to be explicitly called, since the constructor for + // UnwindInfo is not called. + + new (&uwiFragmentFirst, jitstd::placement_t()) UnwindFragmentInfo(comp, startLoc, false); + + uwiFragmentLast = &uwiFragmentFirst; + + uwiEndLoc = endLoc; + + // Allocate an emitter location object. It is initialized to something + // invalid: it has a null 'ig' that needs to get set before it can be used. + // Note that when we create an UnwindInfo for the cold section, this never + // gets initialized with anything useful, since we never add unwind codes + // to the cold section; we simply distribute the existing (previously added) codes. + uwiCurLoc = new (uwiComp, CMK_UnwindInfo) emitLocation(); + +#ifdef DEBUG + uwiInitialized = UWI_INITIALIZED_PATTERN; + uwiAddingNOP = false; +#endif // DEBUG +} + +// Split the unwind codes in 'puwi' into those that are in the hot section (leave them in 'puwi') +// and those that are in the cold section (move them to 'this'). There is exactly one fragment +// in each UnwindInfo; the fragments haven't been split for size, yet. + +void UnwindInfo::HotColdSplitCodes(UnwindInfo* puwi) +{ + // Ensure that there is exactly a single fragment in both the hot and the cold sections + assert(&uwiFragmentFirst == uwiFragmentLast); + assert(&puwi->uwiFragmentFirst == puwi->uwiFragmentLast); + assert(uwiFragmentLast->ufiNext == NULL); + assert(puwi->uwiFragmentLast->ufiNext == NULL); + + // The real prolog is in the hot section, so this, cold, section has a phantom prolog + uwiFragmentLast->ufiHasPhantomProlog = true; + uwiFragmentLast->CopyPrologCodes(puwi->uwiFragmentLast); + + // Now split the epilog codes + uwiFragmentLast->SplitEpilogCodes(uwiFragmentLast->ufiEmitLoc, puwi->uwiFragmentLast); +} + +// Split the function or funclet into fragments that are no larger than 512K, +// so the fragment size will fit in the unwind data "Function Length" field. +// The LOONGARCH Exception Data specification "Function Fragments" section describes this. +// We split the function so that it is no larger than 512K bytes, or the value of +// the COMPlus_JitSplitFunctionSize value, if defined (and smaller). We must determine +// how to split the function/funclet before we issue the instructions, so we can +// reserve the unwind space with the VM. The instructions issued may shrink (but not +// expand!) during issuing (although this is extremely rare in any case, and may not +// actually occur on LOONGARCH), so we don't finalize actual sizes or offsets. +// +// LOONGARCH64 has very similar limitations, except functions can be up to 1MB. TODO-LOONGARCH64-Bug?: make sure this works! +// +// We don't split any prolog or epilog. Ideally, we might not split an instruction, +// although that doesn't matter because the unwind at any point would still be +// well-defined. + +void UnwindInfo::Split() +{ + UNATIVE_OFFSET maxFragmentSize; // The maximum size of a code fragment in bytes + + maxFragmentSize = UW_MAX_FRAGMENT_SIZE_BYTES; + +#ifdef DEBUG + // Consider COMPlus_JitSplitFunctionSize + unsigned splitFunctionSize = (unsigned)JitConfig.JitSplitFunctionSize(); + + if (splitFunctionSize != 0) + if (splitFunctionSize < maxFragmentSize) + maxFragmentSize = splitFunctionSize; +#endif // DEBUG + + // Now, there should be exactly one fragment. + + assert(uwiFragmentLast != NULL); + assert(uwiFragmentLast == &uwiFragmentFirst); + assert(uwiFragmentLast->ufiNext == NULL); + + // Find the code size of this function/funclet. + + UNATIVE_OFFSET startOffset; + UNATIVE_OFFSET endOffset; + UNATIVE_OFFSET codeSize; + + if (uwiFragmentLast->ufiEmitLoc == NULL) + { + // NULL emit location means the beginning of the code. This is to handle the first fragment prolog. + startOffset = 0; + } + else + { + startOffset = uwiFragmentLast->ufiEmitLoc->CodeOffset(uwiComp->GetEmitter()); + } + + if (uwiEndLoc == NULL) + { + // Note that compTotalHotCodeSize and compTotalColdCodeSize are computed before issuing instructions + // from the emitter instruction group offsets, and will be accurate unless the issued code shrinks. + // compNativeCodeSize is precise, but is only set after instructions are issued, which is too late + // for us, since we need to decide how many fragments we need before the code memory is allocated + // (which is before instruction issuing). + UNATIVE_OFFSET estimatedTotalCodeSize = + uwiComp->info.compTotalHotCodeSize + uwiComp->info.compTotalColdCodeSize; + assert(estimatedTotalCodeSize != 0); + endOffset = estimatedTotalCodeSize; + } + else + { + endOffset = uwiEndLoc->CodeOffset(uwiComp->GetEmitter()); + } + + assert(endOffset > startOffset); // there better be at least 1 byte of code + codeSize = endOffset - startOffset; + + // Now that we know the code size for this section (main function hot or cold, or funclet), + // figure out how many fragments we're going to need. + + UNATIVE_OFFSET numberOfFragments = (codeSize + maxFragmentSize - 1) / maxFragmentSize; // round up + assert(numberOfFragments > 0); + + if (numberOfFragments == 1) + { + // No need to split; we're done + return; + } + + // Now, we're going to commit to splitting the function into "numberOfFragments" fragments, + // for the purpose of unwind information. We need to do the actual splits so we can figure out + // the size of each piece of unwind data for the call to reserveUnwindInfo(). We won't know + // the actual offsets of the splits since we haven't issued the instructions yet, so store + // an emitter location instead of an offset, and "finalize" the offset in the unwindEmit() phase, + // like we do for the function length and epilog offsets. + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef DEBUG + if (uwiComp->verbose) + { + printf("Split unwind info into %d fragments (function/funclet size: %d, maximum fragment size: %d)\n", + numberOfFragments, codeSize, maxFragmentSize); + } +#endif // DEBUG + + // Call the emitter to do the split, and call us back for every split point it chooses. + uwiComp->GetEmitter()->emitSplit(uwiFragmentLast->ufiEmitLoc, uwiEndLoc, maxFragmentSize, (void*)this, + EmitSplitCallback); + +#ifdef DEBUG + // Did the emitter split the function/funclet into as many fragments as we asked for? + // It might be fewer if the COMPlus_JitSplitFunctionSize was used, but it better not + // be fewer if we're splitting into 512K blocks! + + unsigned fragCount = 0; + for (UnwindFragmentInfo* pFrag = &uwiFragmentFirst; pFrag != NULL; pFrag = pFrag->ufiNext) + { + ++fragCount; + } + if (fragCount < numberOfFragments) + { + if (uwiComp->verbose) + { + printf("WARNING: asked the emitter for %d fragments, but only got %d\n", numberOfFragments, fragCount); + } + + // If this fires, then we split into fewer fragments than we asked for, and we are using + // the default, unwind-data-defined 512K maximum fragment size. We won't be able to fit + // this fragment into the unwind data! If you set COMPlus_JitSplitFunctionSize to something + // small, we might not be able to split into as many fragments as asked for, because we + // can't split prologs or epilogs. + assert(maxFragmentSize != UW_MAX_FRAGMENT_SIZE_BYTES); + } +#endif // DEBUG +} + +/*static*/ void UnwindInfo::EmitSplitCallback(void* context, emitLocation* emitLoc) +{ + UnwindInfo* puwi = (UnwindInfo*)context; + puwi->AddFragment(emitLoc); +} + +// Reserve space for the unwind info for all fragments + +void UnwindInfo::Reserve(bool isFunclet, bool isHotCode) +{ + assert(uwiInitialized == UWI_INITIALIZED_PATTERN); + assert(isHotCode || !isFunclet); + + for (UnwindFragmentInfo* pFrag = &uwiFragmentFirst; pFrag != NULL; pFrag = pFrag->ufiNext) + { + pFrag->Reserve(isFunclet, isHotCode); + } +} + +// Allocate and populate VM unwind info for all fragments + +void UnwindInfo::Allocate(CorJitFuncKind funKind, void* pHotCode, void* pColdCode, bool isHotCode) +{ + assert(uwiInitialized == UWI_INITIALIZED_PATTERN); + + UnwindFragmentInfo* pFrag; + + // First, finalize all the offsets (the location of the beginning of fragments, and epilogs), + // so a fragment can use the finalized offset of the subsequent fragment to determine its code size. + + UNATIVE_OFFSET endOffset; + + if (uwiEndLoc == NULL) + { + assert(uwiComp->info.compNativeCodeSize != 0); + endOffset = uwiComp->info.compNativeCodeSize; + } + else + { + endOffset = uwiEndLoc->CodeOffset(uwiComp->GetEmitter()); + } + + for (pFrag = &uwiFragmentFirst; pFrag != NULL; pFrag = pFrag->ufiNext) + { + pFrag->FinalizeOffset(); + } + + for (pFrag = &uwiFragmentFirst; pFrag != NULL; pFrag = pFrag->ufiNext) + { + pFrag->Allocate(funKind, pHotCode, pColdCode, endOffset, isHotCode); + } +} + +void UnwindInfo::AddEpilog() +{ + assert(uwiInitialized == UWI_INITIALIZED_PATTERN); + assert(uwiFragmentLast != NULL); + uwiFragmentLast->AddEpilog(); + CaptureLocation(); +} + +void UnwindInfo::CaptureLocation() +{ + assert(uwiInitialized == UWI_INITIALIZED_PATTERN); + assert(uwiCurLoc != NULL); + uwiCurLoc->CaptureLocation(uwiComp->GetEmitter()); +} + +void UnwindInfo::AddFragment(emitLocation* emitLoc) +{ + assert(uwiInitialized == UWI_INITIALIZED_PATTERN); + assert(uwiFragmentLast != NULL); + + UnwindFragmentInfo* newFrag = new (uwiComp, CMK_UnwindInfo) UnwindFragmentInfo(uwiComp, emitLoc, true); + +#ifdef DEBUG + newFrag->ufiNum = uwiFragmentLast->ufiNum + 1; +#endif // DEBUG + + newFrag->CopyPrologCodes(&uwiFragmentFirst); + newFrag->SplitEpilogCodes(emitLoc, uwiFragmentLast); + + // Link the new fragment in at the end of the fragment list + uwiFragmentLast->ufiNext = newFrag; + uwiFragmentLast = newFrag; +} + +#ifdef DEBUG + +void UnwindInfo::Dump(bool isHotCode, int indent) +{ + unsigned count; + UnwindFragmentInfo* pFrag; + + count = 0; + for (pFrag = &uwiFragmentFirst; pFrag != NULL; pFrag = pFrag->ufiNext) + { + ++count; + } + + printf("%*sUnwindInfo %s@0x%08p, size:%d:\n", indent, "", isHotCode ? "" : "COLD ", dspPtr(this), sizeof(*this)); + printf("%*s uwiComp: 0x%08p\n", indent, "", dspPtr(uwiComp)); + printf("%*s %d fragment%s\n", indent, "", count, (count != 1) ? "s" : ""); + printf("%*s uwiFragmentLast: 0x%08p\n", indent, "", dspPtr(uwiFragmentLast)); + printf("%*s uwiEndLoc: 0x%08p\n", indent, "", dspPtr(uwiEndLoc)); + printf("%*s uwiInitialized: 0x%08x\n", indent, "", uwiInitialized); + + for (pFrag = &uwiFragmentFirst; pFrag != NULL; pFrag = pFrag->ufiNext) + { + pFrag->Dump(indent + 2); + } +} + +#endif // DEBUG + +#endif // TARGET_LOONGARCH64 diff --git a/src/coreclr/jit/utils.cpp b/src/coreclr/jit/utils.cpp index d6f9df4ce6b1e..5530998e627a6 100644 --- a/src/coreclr/jit/utils.cpp +++ b/src/coreclr/jit/utils.cpp @@ -132,6 +132,8 @@ const char* getRegName(regNumber reg) static const char* const regNames[] = { #if defined(TARGET_ARM64) #define REGDEF(name, rnum, mask, xname, wname) xname, +#elif defined(TARGET_LOONGARCH64) +#define REGDEF(name, rnum, mask, xname, wname) xname, #else #define REGDEF(name, rnum, mask, sname) sname, #endif @@ -217,7 +219,7 @@ const char* getRegNameFloat(regNumber reg, var_types type) return regName; } -#elif defined(TARGET_ARM64) +#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) static const char* regNamesFloat[] = { #define REGDEF(name, rnum, mask, xname, wname) xname, @@ -316,6 +318,14 @@ void dspRegMask(regMaskTP regMask, size_t minSiz) } #elif defined(TARGET_X86) // No register ranges + +#elif defined(TARGET_LOONGARCH64) + if (REG_A0 <= regNum && regNum <= REG_X0) + { + regHead = regNum; + inRegRange = true; + sep = "-"; + } #else // TARGET* #error Unsupported or unset target architecture #endif // TARGET* @@ -325,10 +335,12 @@ void dspRegMask(regMaskTP regMask, size_t minSiz) // We've already printed a register. Is this the end of a range? else if ((regNum == REG_INT_LAST) || (regNum == REG_R17) // last register before TEB || (regNum == REG_R28)) // last register before FP -#else // TARGET_ARM64 +#elif defined(TARGET_LOONGARCH64) + else if ((regNum == REG_INT_LAST) || (regNum == REG_X0)) +#else // TARGET_LOONGARCH64 // We've already printed a register. Is this the end of a range? else if (regNum == REG_INT_LAST) -#endif // TARGET_ARM64 +#endif // TARGET_LOONGARCH64 { const char* nam = getRegName(regNum); printf("%s%s", sep, nam); diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index af8d375219917..209149c1291f6 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -57,6 +57,8 @@ struct FloatTraits unsigned bits = 0xFFC00000u; #elif defined(TARGET_ARMARCH) unsigned bits = 0x7FC00000u; +#elif defined(TARGET_LOONGARCH64) + unsigned bits = 0xFFC00000u; #else #error Unsupported or unset target architecture #endif @@ -83,6 +85,8 @@ struct DoubleTraits unsigned long long bits = 0xFFF8000000000000ull; #elif defined(TARGET_ARMARCH) unsigned long long bits = 0x7FF8000000000000ull; +#elif defined(TARGET_LOONGARCH64) + unsigned long long bits = 0xFFF8000000000000ull; #else #error Unsupported or unset target architecture #endif @@ -106,7 +110,7 @@ struct DoubleTraits template TFp FpAdd(TFp value1, TFp value2) { -#ifdef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) // If [value1] is negative infinity and [value2] is positive infinity // the result is NaN. // If [value1] is positive infinity and [value2] is negative infinity @@ -124,7 +128,7 @@ TFp FpAdd(TFp value1, TFp value2) return TFpTraits::NaN(); } } -#endif // TARGET_ARMARCH +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 return value1 + value2; } @@ -142,7 +146,7 @@ TFp FpAdd(TFp value1, TFp value2) template TFp FpSub(TFp value1, TFp value2) { -#ifdef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) // If [value1] is positive infinity and [value2] is positive infinity // the result is NaN. // If [value1] is negative infinity and [value2] is negative infinity @@ -160,7 +164,7 @@ TFp FpSub(TFp value1, TFp value2) return TFpTraits::NaN(); } } -#endif // TARGET_ARMARCH +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 return value1 - value2; } @@ -178,7 +182,7 @@ TFp FpSub(TFp value1, TFp value2) template TFp FpMul(TFp value1, TFp value2) { -#ifdef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) // From the ECMA standard: // // If [value1] is zero and [value2] is infinity @@ -194,7 +198,7 @@ TFp FpMul(TFp value1, TFp value2) { return TFpTraits::NaN(); } -#endif // TARGET_ARMARCH +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 return value1 * value2; } @@ -212,7 +216,7 @@ TFp FpMul(TFp value1, TFp value2) template TFp FpDiv(TFp dividend, TFp divisor) { -#ifdef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) // From the ECMA standard: // // If [dividend] is zero and [divisor] is zero @@ -228,7 +232,7 @@ TFp FpDiv(TFp dividend, TFp divisor) { return TFpTraits::NaN(); } -#endif // TARGET_ARMARCH +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 return dividend / divisor; } @@ -2776,7 +2780,11 @@ ValueNum ValueNumStore::EvalFuncForConstantArgs(var_types typ, VNFunc func, Valu } else { +#ifdef TARGET_LOONGARCH64 + assert(typ == TYP_INT || typ == TYP_LONG); +#else assert(typ == TYP_INT); +#endif int resultVal = EvalOp(func, arg0Val, arg1Val); // Bin op on a handle results in a handle. ValueNum handleVN = IsVNHandle(arg0VN) ? arg0VN : IsVNHandle(arg1VN) ? arg1VN : NoVN; @@ -9877,7 +9885,7 @@ void Compiler::fgValueNumberHelperCallFunc(GenTreeCall* call, VNFunc vnf, ValueN vnpUniq.SetBoth(vnStore->VNForExpr(compCurBB, call->TypeGet())); } -#if defined(FEATURE_READYTORUN) && defined(TARGET_ARMARCH) +#if defined(FEATURE_READYTORUN) && (defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)) if (call->IsR2RRelativeIndir()) { #ifdef DEBUG @@ -9892,7 +9900,7 @@ void Compiler::fgValueNumberHelperCallFunc(GenTreeCall* call, VNFunc vnf, ValueN // in morph. So we do not need to use EntryPointAddrAsArg0, because arg0 is already an entry point addr. useEntryPointAddrAsArg0 = false; } -#endif // FEATURE_READYTORUN && TARGET_ARMARCH +#endif // FEATURE_READYTORUN && (TARGET_ARMARCH || TARGET_LOONGARCH64) if (nArgs == 0) { diff --git a/src/coreclr/jit/valuenumfuncs.h b/src/coreclr/jit/valuenumfuncs.h index d5a81d03ca502..8a0aa8fc9bc4e 100644 --- a/src/coreclr/jit/valuenumfuncs.h +++ b/src/coreclr/jit/valuenumfuncs.h @@ -177,6 +177,10 @@ ValueNumFuncDef(HWI_##isa##_##name, argCount, false, false, false) // All of t #elif defined (TARGET_ARM) // No Hardware Intrinsics on ARM32 + +#elif defined (TARGET_LOONGARCH64) + //TODO: add LoongArch64's Hardware Instructions. + #else #error Unsupported platform #endif From 98a8b9db19142395c419dc47eb4651a6a86a45f1 Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Sat, 18 Dec 2021 15:05:26 +0800 Subject: [PATCH 02/46] [LoongArch64] add jit/CMakeLists.txt from #62889. --- src/coreclr/jit/CMakeLists.txt | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/CMakeLists.txt b/src/coreclr/jit/CMakeLists.txt index c46cdd18a164e..3c6febd86ad53 100644 --- a/src/coreclr/jit/CMakeLists.txt +++ b/src/coreclr/jit/CMakeLists.txt @@ -41,6 +41,9 @@ function(create_standalone_jit) elseif(TARGETDETAILS_ARCH STREQUAL "s390x") set(JIT_ARCH_SOURCES ${JIT_S390X_SOURCES}) set(JIT_ARCH_HEADERS ${JIT_S390X_HEADERS}) + elseif(TARGETDETAILS_ARCH STREQUAL "loongarch64") + set(JIT_ARCH_SOURCES ${JIT_LOONGARCH64_SOURCES}) + set(JIT_ARCH_HEADERS ${JIT_LOONGARCH64_HEADERS}) else() clr_unknown_arch() endif() @@ -224,6 +227,17 @@ set( JIT_S390X_SOURCES # Not supported as JIT target ) +set( JIT_LOONGARCH64_SOURCES + codegenloongarch64.cpp + emitloongarch64.cpp + lowerloongarch64.cpp + lsraloongarch64.cpp + targetloongarch64.cpp + unwindloongarch64.cpp + ##hwintrinsiclistloongarch64.cpp ###TODO:Not implemented on loongarch64 yet. + ##simdashwintrinsiclistloongarch64.cpp ###TODO:Not implemented on loongarch64 yet. +) + # We include the headers here for better experience in IDEs. set( JIT_HEADERS ../inc/corinfo.h @@ -366,6 +380,15 @@ set ( JIT_S390X_HEADERS # Not supported as JIT target ) +set( JIT_LOONGARCH64_HEADERS + emitloongarch64.h + emitfmtsloongarch64.h + instrsloongarch64.h + registerloongarch64.h +#hwintrinsiclistloongarch64.h ###TODO:Not implemented on loongarch64 yet. +#simdashwintrinsiclistloongarch64.h +) + convert_to_absolute_path(JIT_SOURCES ${JIT_SOURCES}) convert_to_absolute_path(JIT_HEADERS ${JIT_HEADERS}) convert_to_absolute_path(JIT_RESOURCES ${JIT_RESOURCES}) @@ -382,6 +405,8 @@ convert_to_absolute_path(JIT_ARM64_SOURCES ${JIT_ARM64_SOURCES}) convert_to_absolute_path(JIT_ARM64_HEADERS ${JIT_ARM64_HEADERS}) convert_to_absolute_path(JIT_S390X_SOURCES ${JIT_S390X_SOURCES}) convert_to_absolute_path(JIT_S390X_HEADERS ${JIT_S390X_HEADERS}) +convert_to_absolute_path(JIT_LOONGARCH64_SOURCES ${JIT_LOONGARCH64_SOURCES}) +convert_to_absolute_path(JIT_LOONGARCH64_HEADERS ${JIT_LOONGARCH64_HEADERS}) if(CLR_CMAKE_TARGET_ARCH_AMD64) set(JIT_ARCH_SOURCES ${JIT_AMD64_SOURCES}) @@ -398,6 +423,9 @@ elseif(CLR_CMAKE_TARGET_ARCH_ARM64) elseif(CLR_CMAKE_TARGET_ARCH_S390X) set(JIT_ARCH_SOURCES ${JIT_S390X_SOURCES}) set(JIT_ARCH_HEADERS ${JIT_S390X_HEADERS}) +elseif(CLR_CMAKE_TARGET_ARCH_LOONGARCH64) + set(JIT_ARCH_SOURCES ${JIT_LOONGARCH64_SOURCES}) + set(JIT_ARCH_HEADERS ${JIT_LOONGARCH64_HEADERS}) else() clr_unknown_arch() endif() @@ -535,11 +563,12 @@ install_clr(TARGETS clrjit DESTINATIONS . sharedFramework COMPONENT jit) # Enable profile guided optimization add_pgo(clrjit) -if (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64) +if (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64) create_standalone_jit(TARGET clrjit_universal_arm64_${ARCH_HOST_NAME} OS universal ARCH arm64 DESTINATIONS .) create_standalone_jit(TARGET clrjit_unix_x64_${ARCH_HOST_NAME} OS unix ARCH x64 DESTINATIONS .) create_standalone_jit(TARGET clrjit_win_x64_${ARCH_HOST_NAME} OS win ARCH x64 DESTINATIONS .) -endif (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64) + create_standalone_jit(TARGET clrjit_unix_loongarch64_${ARCH_HOST_NAME} OS unix ARCH loongarch64 DESTINATIONS .) +endif (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64) create_standalone_jit(TARGET clrjit_universal_arm_${ARCH_HOST_NAME} OS universal ARCH arm DESTINATIONS .) target_compile_definitions(clrjit_universal_arm_${ARCH_HOST_NAME} PRIVATE ARM_SOFTFP CONFIGURABLE_ARM_ABI) From 0b561c4915a5b4787777878a7503be1ff09e3e0f Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Fri, 7 Jan 2022 13:02:13 +0800 Subject: [PATCH 03/46] [LoongArch64] update LoongArch64 after merge from main. --- src/coreclr/jit/codegen.h | 2 +- src/coreclr/jit/codegencommon.cpp | 15 ++ src/coreclr/jit/codegenloongarch64.cpp | 332 ++----------------------- src/coreclr/jit/emitloongarch64.cpp | 15 +- src/coreclr/jit/lclvars.cpp | 2 +- src/coreclr/jit/lower.cpp | 8 +- src/coreclr/jit/lowerloongarch64.cpp | 15 +- src/coreclr/jit/lsraloongarch64.cpp | 8 +- 8 files changed, 58 insertions(+), 339 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 52161b44ea34e..11e6eb7edfe0c 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -614,7 +614,7 @@ class CodeGen final : public CodeGenInterface #endif #if defined(DEBUG) && defined(TARGET_LOONGARCH64) - void genLOONGARCH64EmitterUnitTests(); + void genLoongArch64EmitterUnitTests(); #endif #if defined(DEBUG) && defined(LATE_DISASM) && defined(TARGET_AMD64) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index daddd89ab35b2..832471bd44798 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -12205,6 +12205,20 @@ void CodeGen::genStructReturn(GenTree* treeNode) GenTreeLclVar* lclNode = actualOp1->AsLclVar(); LclVarDsc* varDsc = compiler->lvaGetDesc(lclNode); assert(varDsc->lvIsMultiRegRet); +#ifdef TARGET_LOONGARCH64 + var_types type = retTypeDesc.GetReturnRegType(0); + regNumber toReg = retTypeDesc.GetABIReturnReg(0); + GetEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), toReg, lclNode->GetLclNum(), 0); + if (regCount > 1) + { + assert(regCount == 2); + int offset = genTypeSize(type); + type = retTypeDesc.GetReturnRegType(1); + offset = offset < genTypeSize(type) ? genTypeSize(type) : offset; + toReg = retTypeDesc.GetABIReturnReg(1); + GetEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), toReg, lclNode->GetLclNum(), offset); + } +#else int offset = 0; for (unsigned i = 0; i < regCount; ++i) { @@ -12213,6 +12227,7 @@ void CodeGen::genStructReturn(GenTree* treeNode) GetEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), toReg, lclNode->GetLclNum(), offset); offset += genTypeSize(type); } +#endif } else { diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index 49cc67b50dc36..c5f407f3be311 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -3161,9 +3161,10 @@ instruction CodeGen::genGetInsForOper(GenTree* treeNode) case GT_MUL: if ((attr == EA_8BYTE) || (attr == EA_BYREF)) { - //if ((treeNode->gtFlags & GTF_UNSIGNED) != 0) - // ins = INS_mul_d; - //else + op2 = treeNode->gtGetOp2(); + if (genActualTypeIsInt(op1) && genActualTypeIsInt(op2)) + ins = treeNode->IsUnsigned() ? INS_mulw_d_wu : INS_mulw_d_w; + else ins = INS_mul_d; } else @@ -3442,8 +3443,8 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree) dataReg = data->GetRegNum(); } - var_types type = tree->TypeGet(); - instruction ins = ins_Store(type); + var_types type = tree->TypeGet(); + instruction ins = ins_Store(type); if ((tree->gtFlags & GTF_IND_VOLATILE) != 0) { @@ -4361,7 +4362,7 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue) switch (cmpSize) { case EA_4BYTE: - if (op1->gtFlags & GTF_UNSIGNED) + if (IsUnsigned || ((op2->gtFlags | op1->gtFlags) & GTF_UNSIGNED)) imm = static_cast(imm); else imm = static_cast(imm); @@ -4551,7 +4552,15 @@ void CodeGen::genCodeForJumpCompare(GenTreeOp* tree) if (op2->AsIntCon()->gtIconVal) { assert(reg != REG_R21); - GetEmitter()->emitIns_I_la(EA_PTRSIZE, REG_R21, op2->AsIntCon()->gtIconVal); + ssize_t imm = op2->AsIntCon()->gtIconVal; + if ((tree->gtFlags & GTF_UNSIGNED) && (attr == EA_4BYTE)) + { + assert(reg != REG_RA); + imm = (int32_t)imm; + GetEmitter()->emitIns_R_R_I(INS_slli_w, EA_4BYTE, REG_RA, reg, 0); + reg = REG_RA; + } + GetEmitter()->emitIns_I_la(EA_PTRSIZE, REG_R21, imm); regs = (int)reg << 5; regs |= (int)REG_R21;//REG_R21 ins = (tree->gtFlags & GTF_JCMP_EQ) ? INS_beq : INS_bne; @@ -5017,297 +5026,6 @@ void CodeGen::genStoreLclTypeSIMD12(GenTree* treeNode) #endif // FEATURE_SIMD -#ifdef FEATURE_HW_INTRINSICS -#include "hwintrinsic.h" - -instruction CodeGen::getOpForHWIntrinsic(GenTreeHWIntrinsic* node, var_types instrType) -{ - assert(!"unimplemented on LOONGARCH yet"); - return INS_invalid; -} - -//------------------------------------------------------------------------ -// genHWIntrinsic: Produce code for a GT_HWINTRINSIC node. -// -// This is the main routine which in turn calls the genHWIntrinsicXXX() routines. -// -// Arguments: -// node - the GT_HWINTRINSIC node -// -// Return Value: -// None. -// -void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) -{ - assert(!"unimplemented on LOONGARCH yet"); -} - -//------------------------------------------------------------------------ -// genHWIntrinsicUnaryOp: -// -// Produce code for a GT_HWINTRINSIC node with form UnaryOp. -// -// Consumes one scalar operand produces a scalar -// -// Arguments: -// node - the GT_HWINTRINSIC node -// -// Return Value: -// None. -// -void CodeGen::genHWIntrinsicUnaryOp(GenTreeHWIntrinsic* node) -{ - assert(!"unimplemented on LOONGARCH yet"); -} - -//------------------------------------------------------------------------ -// genHWIntrinsicCrcOp: -// -// Produce code for a GT_HWINTRINSIC node with form CrcOp. -// -// Consumes two scalar operands and produces a scalar result -// -// This form differs from BinaryOp because the attr depends on the size of op2 -// -// Arguments: -// node - the GT_HWINTRINSIC node -// -// Return Value: -// None. -// -void CodeGen::genHWIntrinsicCrcOp(GenTreeHWIntrinsic* node) -{ - assert(!"unimplemented on LOONGARCH yet"); -} - -//------------------------------------------------------------------------ -// genHWIntrinsicSimdBinaryOp: -// -// Produce code for a GT_HWINTRINSIC node with form SimdBinaryOp. -// -// Consumes two SIMD operands and produces a SIMD result -// -// Arguments: -// node - the GT_HWINTRINSIC node -// -// Return Value: -// None. -// -void CodeGen::genHWIntrinsicSimdBinaryOp(GenTreeHWIntrinsic* node) -{ - assert(!"unimplemented on LOONGARCH yet"); -} - -//------------------------------------------------------------------------ -// genHWIntrinsicSwitchTable: generate the jump-table for imm-intrinsics -// with non-constant argument -// -// Arguments: -// swReg - register containing the switch case to execute -// tmpReg - temporary integer register for calculating the switch indirect branch target -// swMax - the number of switch cases. -// emitSwCase - lambda to generate an individual switch case -// -// Notes: -// Used for cases where an instruction only supports immediate operands, -// but at jit time the operand is not a constant. -// -// The importer is responsible for inserting an upstream range check -// (GT_HW_INTRINSIC_CHK) for swReg, so no range check is needed here. -// -template -void CodeGen::genHWIntrinsicSwitchTable(regNumber swReg, - regNumber tmpReg, - int swMax, - HWIntrinsicSwitchCaseBody emitSwCase) -{ - assert(!"unimplemented on LOONGARCH yet"); -} - -//------------------------------------------------------------------------ -// genHWIntrinsicSimdExtractOp: -// -// Produce code for a GT_HWINTRINSIC node with form SimdExtractOp. -// -// Consumes one SIMD operand and one scalar -// -// The element index operand is typically a const immediate -// When it is not, a switch table is generated -// -// See genHWIntrinsicSwitchTable comments -// -// Arguments: -// node - the GT_HWINTRINSIC node -// -// Return Value: -// None. -// -void CodeGen::genHWIntrinsicSimdExtractOp(GenTreeHWIntrinsic* node) -{ - assert(!"unimplemented on LOONGARCH yet"); -} - -//------------------------------------------------------------------------ -// genHWIntrinsicSimdInsertOp: -// -// Produce code for a GT_HWINTRINSIC node with form SimdInsertOp. -// -// Consumes one SIMD operand and two scalars -// -// The element index operand is typically a const immediate -// When it is not, a switch table is generated -// -// See genHWIntrinsicSwitchTable comments -// -// Arguments: -// node - the GT_HWINTRINSIC node -// -// Return Value: -// None. -// -void CodeGen::genHWIntrinsicSimdInsertOp(GenTreeHWIntrinsic* node) -{ - assert(!"unimplemented on LOONGARCH yet"); -} - -//------------------------------------------------------------------------ -// genHWIntrinsicSimdSelectOp: -// -// Produce code for a GT_HWINTRINSIC node with form SimdSelectOp. -// -// Consumes three SIMD operands and produces a SIMD result -// -// This intrinsic form requires one of the source registers to be the -// destination register. Inserts a INS_mov if this requirement is not met. -// -// Arguments: -// node - the GT_HWINTRINSIC node -// -// Return Value: -// None. -// -void CodeGen::genHWIntrinsicSimdSelectOp(GenTreeHWIntrinsic* node) -{ - assert(!"unimplemented on LOONGARCH yet"); -} - -//------------------------------------------------------------------------ -// genHWIntrinsicSimdSetAllOp: -// -// Produce code for a GT_HWINTRINSIC node with form SimdSetAllOp. -// -// Consumes single scalar operand and produces a SIMD result -// -// Arguments: -// node - the GT_HWINTRINSIC node -// -// Return Value: -// None. -// -void CodeGen::genHWIntrinsicSimdSetAllOp(GenTreeHWIntrinsic* node) -{ - assert(!"unimplemented on LOONGARCH yet"); -} - -//------------------------------------------------------------------------ -// genHWIntrinsicSimdUnaryOp: -// -// Produce code for a GT_HWINTRINSIC node with form SimdUnaryOp. -// -// Consumes single SIMD operand and produces a SIMD result -// -// Arguments: -// node - the GT_HWINTRINSIC node -// -// Return Value: -// None. -// -void CodeGen::genHWIntrinsicSimdUnaryOp(GenTreeHWIntrinsic* node) -{ - assert(!"unimplemented on LOONGARCH yet"); -} - -//------------------------------------------------------------------------ -// genHWIntrinsicSimdBinaryRMWOp: -// -// Produce code for a GT_HWINTRINSIC node with form SimdBinaryRMWOp. -// -// Consumes two SIMD operands and produces a SIMD result. -// First operand is both source and destination. -// -// Arguments: -// node - the GT_HWINTRINSIC node -// -// Return Value: -// None. -// -void CodeGen::genHWIntrinsicSimdBinaryRMWOp(GenTreeHWIntrinsic* node) -{ - assert(!"unimplemented on LOONGARCH yet"); -} - -//------------------------------------------------------------------------ -// genHWIntrinsicSimdTernaryRMWOp: -// -// Produce code for a GT_HWINTRINSIC node with form SimdTernaryRMWOp -// -// Consumes three SIMD operands and produces a SIMD result. -// First operand is both source and destination. -// -// Arguments: -// node - the GT_HWINTRINSIC node -// -// Return Value: -// None. -// -void CodeGen::genHWIntrinsicSimdTernaryRMWOp(GenTreeHWIntrinsic* node) -{ - assert(!"unimplemented on LOONGARCH yet"); -} - -//------------------------------------------------------------------------ -// genHWIntrinsicShaHashOp: -// -// Produce code for a GT_HWINTRINSIC node with form Sha1HashOp. -// Used in LOONGARCH64 SHA1 Hash operations. -// -// Consumes three operands and returns a Simd result. -// First Simd operand is both source and destination. -// Second Operand is an unsigned int. -// Third operand is a simd operand. - -// Arguments: -// node - the GT_HWINTRINSIC node -// -// Return Value: -// None. -// -void CodeGen::genHWIntrinsicShaHashOp(GenTreeHWIntrinsic* node) -{ - assert(!"unimplemented on LOONGARCH yet"); -} - -//------------------------------------------------------------------------ -// genHWIntrinsicShaRotateOp: -// -// Produce code for a GT_HWINTRINSIC node with form Sha1RotateOp. -// Used in LOONGARCH64 SHA1 Rotate operations. -// -// Consumes one integer operand and returns unsigned int result. -// -// Arguments: -// node - the GT_HWINTRINSIC node -// -// Return Value: -// None. -// -void CodeGen::genHWIntrinsicShaRotateOp(GenTreeHWIntrinsic* node) -{ - assert(!"unimplemented on LOONGARCH yet"); -} - -#endif // FEATURE_HW_INTRINSICS - /***************************************************************************** * Unit testing of the LOONGARCH64 emitter: generate a bunch of instructions into the prolog * (it's as good a place as any), then use COMPlus_JitLateDisasm=* to see if the late @@ -5319,7 +5037,7 @@ void CodeGen::genHWIntrinsicShaRotateOp(GenTreeHWIntrinsic* node) //#define ALL_LOONGARCH64_EMITTER_UNIT_TESTS #if defined(DEBUG) -void CodeGen::genLOONGARCH64EmitterUnitTests() +void CodeGen::genLoongArch64EmitterUnitTests() { if (!verbose) { @@ -5333,9 +5051,9 @@ void CodeGen::genLOONGARCH64EmitterUnitTests() } // Mark the "fake" instructions in the output. - printf("*************** In genLOONGARCH64EmitterUnitTests()\n"); + printf("*************** In genLoongArch64EmitterUnitTests()\n"); - printf("*************** End of genLOONGARCH64EmitterUnitTests()\n"); + printf("*************** End of genLoongArch64EmitterUnitTests()\n"); } #endif // defined(DEBUG) @@ -5758,13 +5476,7 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) instGen(INS_nop); break; - case GT_ARR_BOUNDS_CHECK: -#ifdef FEATURE_SIMD - case GT_SIMD_CHK: -#endif // FEATURE_SIMD -#ifdef FEATURE_HW_INTRINSICS - case GT_HW_INTRINSIC_CHK: -#endif // FEATURE_HW_INTRINSICS + case GT_BOUNDS_CHECK: genRangeCheck(treeNode); break; @@ -6571,11 +6283,11 @@ void CodeGen::genMultiRegCallStoreToLocal(GenTree* treeNode) } //------------------------------------------------------------------------ -// genRangeCheck: generate code for GT_ARR_BOUNDS_CHECK node. +// genRangeCheck: generate code for GT_BOUNDS_CHECK node. // void CodeGen::genRangeCheck(GenTree* oper) { - noway_assert(oper->OperIsBoundsCheck()); + noway_assert(oper->OperIs(GT_BOUNDS_CHECK)); GenTreeBoundsChk* bndsChk = oper->AsBoundsChk(); GenTree* arrLen = bndsChk->GetArrayLength(); diff --git a/src/coreclr/jit/emitloongarch64.cpp b/src/coreclr/jit/emitloongarch64.cpp index a5492bee3394b..f7c06ad5cc7f6 100644 --- a/src/coreclr/jit/emitloongarch64.cpp +++ b/src/coreclr/jit/emitloongarch64.cpp @@ -1135,7 +1135,8 @@ void emitter::emitIns_Mov( {//TODO: should amend for LoongArch64/LOONGARCH64. assert(IsMovInstruction(ins)); - emitIns_R_R(ins, attr, dstReg, srcReg); + if (!canSkip || (dstReg != srcReg)) + emitIns_R_R(ins, attr, dstReg, srcReg); } /***************************************************************************** @@ -1605,8 +1606,8 @@ void emitter::emitIns_R_R_Imm(instruction ins, emitAttr attr, regNumber reg1, re // assert(!EA_IS_RELOC(attr)); emitIns_I_la(attr, reg1, imm); - //codeGen->instGen_Set_Reg_To_Imm(attr, reg1, imm); - emitIns_R_R_R(ins, attr, reg1, reg2, reg1); + assert(ins == INS_ld_d); + emitIns_R_R_R(INS_ldx_d, attr, reg1, reg2, reg1); } } @@ -6317,10 +6318,12 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, } else { + assert(REG_RA != dst->GetRegNum()); + assert(REG_RA != src1->GetRegNum()); + assert(REG_RA != src2->GetRegNum()); size_t imm = (EA_SIZE(attr) == EA_8BYTE) ? 63 : 31; - emitIns_R_R_I(EA_SIZE(attr) == EA_8BYTE ? INS_srai_d : INS_srai_w, attr, REG_T0, dst->GetRegNum(), imm); - //TODO: FIXME:should confirm reg REG_T0! - codeGen->genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bne, REG_R21, nullptr, REG_T0); + emitIns_R_R_I(EA_SIZE(attr) == EA_8BYTE ? INS_srai_d : INS_srai_w, attr, REG_RA, dst->GetRegNum(), imm); + codeGen->genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bne, REG_R21, nullptr, REG_RA); } } } diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 23e0761a8515f..2dff5054a8cc8 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -645,7 +645,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un #if defined(TARGET_LOONGARCH64) int flags = 0; - CorInfoTypeWithMod corInfoType = info.compCompHnd->getArgType(&info.compMethodInfo->args, argLst, &typeHnd, &flags); + CorInfoTypeWithMod corInfoType = info.compCompHnd->getArgType2(&info.compMethodInfo->args, argLst, &typeHnd, &flags); #else CorInfoTypeWithMod corInfoType = info.compCompHnd->getArgType(&info.compMethodInfo->args, argLst, &typeHnd); #endif diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 0e0c6394d9d88..86925234b877b 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -3649,10 +3649,16 @@ void Lowering::LowerStoreSingleRegCallStruct(GenTreeBlk* store) assert(!call->HasMultiRegRetVal()); const ClassLayout* layout = store->GetLayout(); - const var_types regType = layout->GetRegisterType(); + var_types regType = layout->GetRegisterType(); if (regType != TYP_UNDEF) { +#if defined(TARGET_LOONGARCH64) + if (varTypeIsFloating(call->TypeGet())) + regType = call->TypeGet(); + assert(regType != TYP_UNDEF); + assert(regType != TYP_STRUCT); +#endif store->ChangeType(regType); store->SetOper(GT_STOREIND); LowerStoreIndirCommon(store->AsStoreInd()); diff --git a/src/coreclr/jit/lowerloongarch64.cpp b/src/coreclr/jit/lowerloongarch64.cpp index 0b77c8a27d3b8..3f5df4552c731 100644 --- a/src/coreclr/jit/lowerloongarch64.cpp +++ b/src/coreclr/jit/lowerloongarch64.cpp @@ -87,12 +87,7 @@ bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) const case GT_LE: case GT_GE: case GT_GT: -#ifdef FEATURE_SIMD - case GT_SIMD_CHK: -#endif -#ifdef FEATURE_HW_INTRINSICS - case GT_HW_INTRINSIC_CHK: -#endif + case GT_BOUNDS_CHECK: return ((-32768 <= immVal) && (immVal <= 32767)); case GT_AND: case GT_OR: @@ -1335,10 +1330,6 @@ void Lowering::ContainCheckCallOperands(GenTreeCall* call) // void Lowering::ContainCheckStoreIndir(GenTreeStoreInd* node) { -#if 0 -assert(!"unimplemented on LOONGARCH yet"); -#else - GenTree* src = node->Data(); if (!varTypeIsFloating(src->TypeGet()) && src->IsIntegralConst(0)) { @@ -1347,8 +1338,6 @@ assert(!"unimplemented on LOONGARCH yet"); } ContainCheckIndir(node); - -#endif } //------------------------------------------------------------------------ @@ -1547,7 +1536,7 @@ void Lowering::ContainCheckCompare(GenTreeOp* cmp) // void Lowering::ContainCheckBoundsChk(GenTreeBoundsChk* node) { - assert(node->OperIsBoundsCheck()); + assert(node->OperIs(GT_BOUNDS_CHECK)); if (!CheckImmedAndMakeContained(node, node->GetIndex())) { CheckImmedAndMakeContained(node, node->GetArrayLength()); diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp index 8ce30f7bb6855..ba2f6f3536f24 100644 --- a/src/coreclr/jit/lsraloongarch64.cpp +++ b/src/coreclr/jit/lsraloongarch64.cpp @@ -621,13 +621,7 @@ int LinearScan::BuildNode(GenTree* tree) } break; - case GT_ARR_BOUNDS_CHECK: -#ifdef FEATURE_SIMD - case GT_SIMD_CHK: -#endif // FEATURE_SIMD -#ifdef FEATURE_HW_INTRINSICS - case GT_HW_INTRINSIC_CHK: -#endif // FEATURE_HW_INTRINSICS + case GT_BOUNDS_CHECK: { GenTreeBoundsChk* node = tree->AsBoundsChk(); // Consumes arrLen & index - has no result From a5424e895fe95f9abe619ed6df17f95f08bd9d54 Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Mon, 10 Jan 2022 12:16:04 +0800 Subject: [PATCH 04/46] [LoongArch64] Fix the error for "IsLoongArch64". --- src/coreclr/inc/targetosarch.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/coreclr/inc/targetosarch.h b/src/coreclr/inc/targetosarch.h index b2d1c06a22d66..9025a8608af0f 100644 --- a/src/coreclr/inc/targetosarch.h +++ b/src/coreclr/inc/targetosarch.h @@ -41,27 +41,38 @@ class TargetArchitecture static const bool IsArm64 = false; static const bool IsArm32 = true; static const bool IsArmArch = true; + static const bool IsLoongArch64 = false; #elif defined(TARGET_ARM64) static const bool IsX86 = false; static const bool IsX64 = false; static const bool IsArm64 = true; static const bool IsArm32 = false; static const bool IsArmArch = true; + static const bool IsLoongArch64 = false; #elif defined(TARGET_AMD64) static const bool IsX86 = false; static const bool IsX64 = true; static const bool IsArm64 = false; static const bool IsArm32 = false; static const bool IsArmArch = false; + static const bool IsLoongArch64 = false; #elif defined(TARGET_X86) static const bool IsX86 = true; static const bool IsX64 = false; static const bool IsArm64 = false; static const bool IsArm32 = false; static const bool IsArmArch = false; + static const bool IsLoongArch64 = false; +#elif defined(TARGET_LOONGARCH64) + static const bool IsX86 = false; + static const bool IsX64 = false; + static const bool IsArm64 = false; + static const bool IsArm32 = false; + static const bool IsArmArch = false; + static const bool IsLoongArch64 = true; #else #error Unknown architecture #endif }; -#endif // targetosarch_h \ No newline at end of file +#endif // targetosarch_h From 3be6cbcdc49d8a860f7fcde3067f1bb9151e4379 Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Mon, 10 Jan 2022 12:44:23 +0800 Subject: [PATCH 05/46] [LoongArch64] Fix the cross-compiling error. --- src/coreclr/jit/importer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 7d256c481a70c..2c874beb70868 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -17731,7 +17731,7 @@ bool Compiler::impReturnInstruction(int prefixFlags, OPCODE& opcode) // Note that we allow TYP_I_IMPL<->TYP_BYREF transformation, but only TYP_I_IMPL<-TYP_REF. assertImp((genActualType(op2->TypeGet()) == genActualType(info.compRetType)) || #ifdef TARGET_LOONGARCH64 - genTypeStSz(op2->TypeGet()) == genTypeStSz(info.compRetType) || + (genTypeStSz(op2->TypeGet()) == genTypeStSz(info.compRetType)) || #endif ((op2->TypeGet() == TYP_I_IMPL) && TypeIs(info.compRetType, TYP_BYREF)) || (op2->TypeIs(TYP_BYREF, TYP_REF) && (info.compRetType == TYP_I_IMPL)) || From 8ded978c65fc7f51ef651dfb34336a735900798c Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Mon, 10 Jan 2022 14:48:44 +0800 Subject: [PATCH 06/46] [LoongArch64] Fixed the compiling errors after merge. --- src/coreclr/jit/CMakeLists.txt | 8 +++++--- src/coreclr/jit/ICorJitInfo_API_wrapper.hpp | 18 +----------------- src/coreclr/jit/jit.h | 2 +- 3 files changed, 7 insertions(+), 21 deletions(-) diff --git a/src/coreclr/jit/CMakeLists.txt b/src/coreclr/jit/CMakeLists.txt index 3c6febd86ad53..ed86960817e7c 100644 --- a/src/coreclr/jit/CMakeLists.txt +++ b/src/coreclr/jit/CMakeLists.txt @@ -563,12 +563,14 @@ install_clr(TARGETS clrjit DESTINATIONS . sharedFramework COMPONENT jit) # Enable profile guided optimization add_pgo(clrjit) -if (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64) +#if (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64) +if (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64) create_standalone_jit(TARGET clrjit_universal_arm64_${ARCH_HOST_NAME} OS universal ARCH arm64 DESTINATIONS .) create_standalone_jit(TARGET clrjit_unix_x64_${ARCH_HOST_NAME} OS unix ARCH x64 DESTINATIONS .) create_standalone_jit(TARGET clrjit_win_x64_${ARCH_HOST_NAME} OS win ARCH x64 DESTINATIONS .) - create_standalone_jit(TARGET clrjit_unix_loongarch64_${ARCH_HOST_NAME} OS unix ARCH loongarch64 DESTINATIONS .) -endif (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64) + #create_standalone_jit(TARGET clrjit_unix_loongarch64_${ARCH_HOST_NAME} OS unix ARCH loongarch64 DESTINATIONS .) +endif (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64) +#endif (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64) create_standalone_jit(TARGET clrjit_universal_arm_${ARCH_HOST_NAME} OS universal ARCH arm DESTINATIONS .) target_compile_definitions(clrjit_universal_arm_${ARCH_HOST_NAME} PRIVATE ARM_SOFTFP CONFIGURABLE_ARM_ABI) diff --git a/src/coreclr/jit/ICorJitInfo_API_wrapper.hpp b/src/coreclr/jit/ICorJitInfo_API_wrapper.hpp index 8326f17cfc7ee..1e2627ccb3c9a 100644 --- a/src/coreclr/jit/ICorJitInfo_API_wrapper.hpp +++ b/src/coreclr/jit/ICorJitInfo_API_wrapper.hpp @@ -986,30 +986,14 @@ CORINFO_ARG_LIST_HANDLE WrapICorJitInfo::getArgNext( CorInfoTypeWithMod WrapICorJitInfo::getArgType( CORINFO_SIG_INFO* sig, CORINFO_ARG_LIST_HANDLE args, - CORINFO_CLASS_HANDLE* vcTypeRet -#if defined(TARGET_LOONGARCH64) - ,int *flags = NULL -#endif -) + CORINFO_CLASS_HANDLE* vcTypeRet) { API_ENTER(getArgType); -#if defined(TARGET_LOONGARCH64) - CorInfoTypeWithMod temp = wrapHnd->getArgType(sig, args, vcTypeRet, flags); -#else CorInfoTypeWithMod temp = wrapHnd->getArgType(sig, args, vcTypeRet); -#endif API_LEAVE(getArgType); return temp; } -uint32_t WrapICorJitInfo::getFieldTypeByHnd(CORINFO_CLASS_HANDLE cls) -{ - API_ENTER(getFieldTypeByHnd); - DWORD temp = wrapHnd->getFieldTypeByHnd(cls); - API_LEAVE(getFieldTypeByHnd); - return temp; -} - CORINFO_CLASS_HANDLE WrapICorJitInfo::getArgClass( CORINFO_SIG_INFO* sig, CORINFO_ARG_LIST_HANDLE args) diff --git a/src/coreclr/jit/jit.h b/src/coreclr/jit/jit.h index a6a6977be0225..987ea401c503a 100644 --- a/src/coreclr/jit/jit.h +++ b/src/coreclr/jit/jit.h @@ -214,7 +214,7 @@ #elif defined(TARGET_ARM64) #define IMAGE_FILE_MACHINE_TARGET IMAGE_FILE_MACHINE_ARM64 // 0xAA64 #elif defined(TARGET_LOONGARCH64) -#define IMAGE_FILE_MACHINE_TARGET IMAGE_FILE_MACHINE_LOONGARCH64 // 0xDD64 +#define IMAGE_FILE_MACHINE_TARGET IMAGE_FILE_MACHINE_LOONGARCH64 // 0x6264 #else #error Unsupported or unset target architecture #endif From eba508b1274272f58b24ecefd470212b878f5e40 Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Mon, 10 Jan 2022 17:48:14 +0800 Subject: [PATCH 07/46] [LoongArch64] revert `src/coreclr/jit/ICorJitInfo_API_names.h`. --- src/coreclr/jit/ICorJitInfo_API_names.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/coreclr/jit/ICorJitInfo_API_names.h b/src/coreclr/jit/ICorJitInfo_API_names.h index f9597085d11df..d373091453220 100644 --- a/src/coreclr/jit/ICorJitInfo_API_names.h +++ b/src/coreclr/jit/ICorJitInfo_API_names.h @@ -122,7 +122,6 @@ DEF_CLR_API(getMethodNameFromMetadata) DEF_CLR_API(getMethodHash) DEF_CLR_API(findNameOfToken) DEF_CLR_API(getSystemVAmd64PassStructInRegisterDescriptor) -DEF_CLR_API(getFieldTypeByHnd) DEF_CLR_API(getThreadTLSIndex) DEF_CLR_API(getInlinedCallFrameVptr) DEF_CLR_API(getAddrOfCaptureThreadGlobal) From e1b998665a3c2d6298f485764af41328fd8d2acc Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Mon, 10 Jan 2022 19:18:56 +0800 Subject: [PATCH 08/46] [LoongArch64] workround the compiling error on windows. --- src/coreclr/jit/importer.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 2c874beb70868..f06e2be8c5a7d 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -17729,14 +17729,20 @@ bool Compiler::impReturnInstruction(int prefixFlags, OPCODE& opcode) op2 = impImplicitIorI4Cast(op2, info.compRetType); op2 = impImplicitR4orR8Cast(op2, info.compRetType); // Note that we allow TYP_I_IMPL<->TYP_BYREF transformation, but only TYP_I_IMPL<-TYP_REF. - assertImp((genActualType(op2->TypeGet()) == genActualType(info.compRetType)) || #ifdef TARGET_LOONGARCH64 + assertImp((genActualType(op2->TypeGet()) == genActualType(info.compRetType)) || (genTypeStSz(op2->TypeGet()) == genTypeStSz(info.compRetType)) || -#endif ((op2->TypeGet() == TYP_I_IMPL) && TypeIs(info.compRetType, TYP_BYREF)) || (op2->TypeIs(TYP_BYREF, TYP_REF) && (info.compRetType == TYP_I_IMPL)) || (varTypeIsFloating(op2->gtType) && varTypeIsFloating(info.compRetType)) || (varTypeIsStruct(op2) && varTypeIsStruct(info.compRetType))); +#else + assertImp((genActualType(op2->TypeGet()) == genActualType(info.compRetType)) || + ((op2->TypeGet() == TYP_I_IMPL) && TypeIs(info.compRetType, TYP_BYREF)) || + (op2->TypeIs(TYP_BYREF, TYP_REF) && (info.compRetType == TYP_I_IMPL)) || + (varTypeIsFloating(op2->gtType) && varTypeIsFloating(info.compRetType)) || + (varTypeIsStruct(op2) && varTypeIsStruct(info.compRetType))); +#endif #ifdef DEBUG if (!isTailCall && opts.compGcChecks && (info.compRetType == TYP_REF)) From d5133483a426bd101022f0ef41fe0d85ef58bc29 Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Tue, 11 Jan 2022 15:31:59 +0800 Subject: [PATCH 09/46] [LoongArch64] amend the code-format. --- src/coreclr/jit/codegen.h | 12 +- src/coreclr/jit/codegencommon.cpp | 231 +- src/coreclr/jit/codegenlinear.cpp | 6 +- src/coreclr/jit/codegenloongarch64.cpp | 1358 ++++--- src/coreclr/jit/compiler.cpp | 10 +- src/coreclr/jit/compiler.h | 21 +- src/coreclr/jit/compiler.hpp | 4 +- src/coreclr/jit/ee_il_dll.cpp | 2 +- src/coreclr/jit/emit.cpp | 103 +- src/coreclr/jit/emit.h | 44 +- src/coreclr/jit/emitloongarch64.cpp | 3795 ++++++++++--------- src/coreclr/jit/emitloongarch64.h | 84 +- src/coreclr/jit/gentree.cpp | 22 +- src/coreclr/jit/importer.cpp | 118 +- src/coreclr/jit/instr.cpp | 18 +- src/coreclr/jit/instrsloongarch64.h | 1 - src/coreclr/jit/jit.h | 2 +- src/coreclr/jit/lclvars.cpp | 70 +- src/coreclr/jit/lower.cpp | 2 +- src/coreclr/jit/lowerloongarch64.cpp | 39 +- src/coreclr/jit/lsra.cpp | 13 +- src/coreclr/jit/lsra.h | 14 +- src/coreclr/jit/lsraloongarch64.cpp | 20 +- src/coreclr/jit/morph.cpp | 127 +- src/coreclr/jit/register_arg_convention.cpp | 6 +- src/coreclr/jit/regset.h | 2 +- src/coreclr/jit/scopeinfo.cpp | 2 +- src/coreclr/jit/target.h | 3 +- src/coreclr/jit/targetloongarch64.cpp | 4 +- src/coreclr/jit/unwind.cpp | 3 +- src/coreclr/jit/unwindloongarch64.cpp | 59 +- src/coreclr/jit/utils.cpp | 4 +- src/coreclr/jit/valuenum.cpp | 2 +- 33 files changed, 3148 insertions(+), 3053 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 11e6eb7edfe0c..b44ed34f09857 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -239,7 +239,11 @@ class CodeGen final : public CodeGenInterface #ifdef TARGET_LOONGARCH64 void genSetRegToIcon(regNumber reg, ssize_t val, var_types type); - void genJumpToThrowHlpBlk_la(SpecialCodeKind codeKind, instruction ins, regNumber reg1, BasicBlock* failBlk = nullptr, regNumber reg2 = REG_R0); + void genJumpToThrowHlpBlk_la(SpecialCodeKind codeKind, + instruction ins, + regNumber reg1, + BasicBlock* failBlk = nullptr, + regNumber reg2 = REG_R0); #else void genCheckOverflow(GenTree* tree); #endif @@ -1335,7 +1339,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #endif // FEATURE_PUT_STRUCT_ARG_STK #ifdef TARGET_LOONGARCH64 - //TODO for LOONGARCH64 : maybe delete on LA64? + // TODO for LOONGARCH64 : maybe delete on LA64? void genCodeForStoreOffset(instruction ins, emitAttr size, regNumber src, GenTree* base, unsigned offset); #endif @@ -1363,7 +1367,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX BasicBlock* genCallFinally(BasicBlock* block); void genCodeForJumpTrue(GenTreeOp* jtrue); #if defined(TARGET_LOONGARCH64) - //TODO: refactor for LA. + // TODO: refactor for LA. void genCodeForJumpCompare(GenTreeOp* tree); #endif #if defined(TARGET_ARM64) @@ -1401,7 +1405,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void genFloatReturn(GenTree* treeNode); #endif // TARGET_X86 -#if defined(TARGET_ARM64)|| defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) void genSimpleReturn(GenTree* treeNode); #endif // TARGET_ARM64 || TARGET_LOONGARCH64 diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 832471bd44798..b4966106a9dc8 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -1785,19 +1785,24 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg) else { //// Ngen case - GS cookie constant needs to be accessed through an indirection. - //instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSConst, (ssize_t)compiler->gsGlobalSecurityCookieAddr); - //GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, regGSConst, regGSConst, 0); + // instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSConst, (ssize_t)compiler->gsGlobalSecurityCookieAddr); + // GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, regGSConst, regGSConst, 0); if (compiler->opts.compReloc) { - GetEmitter()->emitIns_R_AI(INS_bl, EA_PTR_DSP_RELOC, regGSConst, (ssize_t)compiler->gsGlobalSecurityCookieAddr); + GetEmitter()->emitIns_R_AI(INS_bl, EA_PTR_DSP_RELOC, regGSConst, + (ssize_t)compiler->gsGlobalSecurityCookieAddr); } else - {////TODO:LoongArch64 should amend for optimize! - //GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, regGSConst, (ssize_t)compiler->gsGlobalSecurityCookieAddr); - //GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, regGSConst, regGSConst, ); - GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, regGSConst, ((ssize_t)compiler->gsGlobalSecurityCookieAddr & 0xfffff000)>>12); - GetEmitter()->emitIns_R_I(INS_lu32i_d, EA_PTRSIZE, regGSConst, (ssize_t)compiler->gsGlobalSecurityCookieAddr >> 32); - GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, regGSConst, regGSConst, ((ssize_t)compiler->gsGlobalSecurityCookieAddr & 0xfff)>>2); + { ////TODO:LoongArch64 should amend for optimize! + // GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, regGSConst, + // (ssize_t)compiler->gsGlobalSecurityCookieAddr); + // GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, regGSConst, regGSConst, ); + GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, regGSConst, + ((ssize_t)compiler->gsGlobalSecurityCookieAddr & 0xfffff000) >> 12); + GetEmitter()->emitIns_R_I(INS_lu32i_d, EA_PTRSIZE, regGSConst, + (ssize_t)compiler->gsGlobalSecurityCookieAddr >> 32); + GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, regGSConst, regGSConst, + ((ssize_t)compiler->gsGlobalSecurityCookieAddr & 0xfff) >> 2); } regSet.verifyRegUsed(regGSConst); } @@ -1809,7 +1814,7 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg) GetEmitter()->emitIns_J_cond_la(INS_beq, gsCheckBlk, regGSConst, regGSValue); // regGSConst and regGSValue aren't needed anymore, we can use them for helper call - genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN, regGSConst);//no branch-delay! + genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN, regGSConst); genDefineTempLabel(gsCheckBlk); } #endif // TARGET_LOONGARCH64 @@ -1921,8 +1926,8 @@ void CodeGen::genJumpToThrowHlpBlk(emitJumpKind jumpKind, SpecialCodeKind codeKi } else { - // The code to throw the exception will be generated inline, and - // we will jump around it in the normal non-exception case. +// The code to throw the exception will be generated inline, and +// we will jump around it in the normal non-exception case. #ifndef TARGET_LOONGARCH64 BasicBlock* tgtBlk = nullptr; @@ -3324,8 +3329,8 @@ void CodeGen::genFnPrologCalleeRegArgs() noway_assert(regArgMaskLive != 0); unsigned varNum; - unsigned regArgsVars[MAX_REG_ARG*2] = {0}; - unsigned regArgNum = 0; + unsigned regArgsVars[MAX_REG_ARG * 2] = {0}; + unsigned regArgNum = 0; for (varNum = 0; varNum < compiler->lvaCount; ++varNum) { LclVarDsc* varDsc = compiler->lvaTable + varNum; @@ -3349,7 +3354,8 @@ void CodeGen::genFnPrologCalleeRegArgs() { if (varDsc->GetArgInitReg() > REG_ARG_LAST) { - inst_Mov(genIsValidFloatReg(varDsc->GetArgInitReg()) ? TYP_DOUBLE : TYP_LONG, varDsc->GetArgInitReg(), varDsc->GetArgReg(), false); + inst_Mov(genIsValidFloatReg(varDsc->GetArgInitReg()) ? TYP_DOUBLE : TYP_LONG, + varDsc->GetArgInitReg(), varDsc->GetArgReg(), false); regArgMaskLive &= ~genRegMask(varDsc->GetArgReg()); } else @@ -3386,7 +3392,7 @@ void CodeGen::genFnPrologCalleeRegArgs() { storeType = varDsc->lvIs4Field1 ? TYP_FLOAT : TYP_DOUBLE; } - else //if (emitter::isGeneralRegister(varDsc->GetArgReg())) + else // if (emitter::isGeneralRegister(varDsc->GetArgReg())) { assert(emitter::isGeneralRegister(varDsc->GetArgReg())); if (varDsc->lvIs4Field1) @@ -3394,7 +3400,7 @@ void CodeGen::genFnPrologCalleeRegArgs() else storeType = varDsc->GetLayout()->GetGCPtrType(0); } - slotSize = (unsigned)emitActualTypeSize(storeType); + slotSize = (unsigned)emitActualTypeSize(storeType); #if FEATURE_MULTIREG_ARGS // Must be <= MAX_PASS_MULTIREG_BYTES or else it wouldn't be passed in registers @@ -3426,12 +3432,12 @@ void CodeGen::genFnPrologCalleeRegArgs() { assert(srcRegNum != varDsc->GetOtherArgReg()); - int tmp_offset = 0; - regNumber tmp_reg = REG_NA; + int tmp_offset = 0; + regNumber tmp_reg = REG_NA; bool FPbased; - int baseOffset = 0;//(regArgTab[argNum].slot - 1) * slotSize; - int base = compiler->lvaFrameAddress(varNum, &FPbased); + int baseOffset = 0; //(regArgTab[argNum].slot - 1) * slotSize; + int base = compiler->lvaFrameAddress(varNum, &FPbased); base += baseOffset; @@ -3444,9 +3450,9 @@ void CodeGen::genFnPrologCalleeRegArgs() if (tmp_reg == REG_NA) { regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE; - tmp_offset = base; - tmp_reg = REG_R21; - GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset>>12); + tmp_offset = base; + tmp_reg = REG_R21; + GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset >> 12); GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, tmp_offset & 0xfff); GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, REG_R21, REG_R21, reg2); GetEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, -8); @@ -3466,10 +3472,10 @@ void CodeGen::genFnPrologCalleeRegArgs() if (emitter::isFloatReg(varDsc->GetOtherArgReg())) { baseOffset = (int)EA_SIZE(emitActualTypeSize(storeType)); - storeType = varDsc->lvIs4Field2 ? TYP_FLOAT : TYP_DOUBLE; - size = EA_SIZE(emitActualTypeSize(storeType)); + storeType = varDsc->lvIs4Field2 ? TYP_FLOAT : TYP_DOUBLE; + size = EA_SIZE(emitActualTypeSize(storeType)); baseOffset = baseOffset < (int)size ? (int)size : baseOffset; - srcRegNum = varDsc->GetOtherArgReg(); + srcRegNum = varDsc->GetOtherArgReg(); } else if (emitter::isGeneralRegister(varDsc->GetOtherArgReg())) { @@ -3478,10 +3484,10 @@ void CodeGen::genFnPrologCalleeRegArgs() storeType = TYP_INT; else storeType = varDsc->GetLayout()->GetGCPtrType(1); - size = emitActualTypeSize(storeType); + size = emitActualTypeSize(storeType); if (baseOffset < (int)EA_SIZE(size)) baseOffset = (int)EA_SIZE(size); - srcRegNum = varDsc->GetOtherArgReg(); + srcRegNum = varDsc->GetOtherArgReg(); } if (srcRegNum == varDsc->GetOtherArgReg()) @@ -3497,9 +3503,9 @@ void CodeGen::genFnPrologCalleeRegArgs() if (tmp_reg == REG_NA) { regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE; - tmp_offset = base; - tmp_reg = REG_R21; - GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset>>12); + tmp_offset = base; + tmp_reg = REG_R21; + GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset >> 12); GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, tmp_offset & 0xfff); GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, REG_R21, REG_R21, reg2); GetEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, -8); @@ -3510,7 +3516,7 @@ void CodeGen::genFnPrologCalleeRegArgs() GetEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, baseOffset); } } - regArgMaskLive &= ~genRegMask(srcRegNum);//maybe do this later is better! + regArgMaskLive &= ~genRegMask(srcRegNum); // maybe do this later is better! } else if (varDsc->lvIsSplit) { @@ -3518,7 +3524,8 @@ void CodeGen::genFnPrologCalleeRegArgs() baseOffset = 8; base += 8; - GetEmitter()->emitIns_R_R_Imm(INS_ld_d, size/*EA_PTRSIZE*/, REG_SCRATCH, REG_SPBASE, genTotalFrameSize()); + GetEmitter()->emitIns_R_R_Imm(INS_ld_d, size /*EA_PTRSIZE*/, REG_SCRATCH, REG_SPBASE, + genTotalFrameSize()); if ((-2048 <= base) && (base < 2048)) { GetEmitter()->emitIns_S_R(INS_st_d, size, REG_SCRATCH, varNum, baseOffset); @@ -3528,9 +3535,9 @@ void CodeGen::genFnPrologCalleeRegArgs() if (tmp_reg == REG_NA) { regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE; - tmp_offset = base; - tmp_reg = REG_R21; - GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset>>12); + tmp_offset = base; + tmp_reg = REG_R21; + GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset >> 12); GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, tmp_offset & 0xfff); GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, REG_R21, REG_R21, reg2); GetEmitter()->emitIns_S_R(INS_st_d, size, REG_ARG_LAST, varNum, -8); @@ -3554,23 +3561,25 @@ void CodeGen::genFnPrologCalleeRegArgs() while (regArgNum > 0) { - varNum = regArgsVars[regArgNum - 1]; + varNum = regArgsVars[regArgNum - 1]; LclVarDsc* varDsc = compiler->lvaTable + varNum; if (varDsc->GetArgInitReg() > varDsc->GetArgReg()) { var_types destMemType = varDsc->TypeGet(); - GetEmitter()->emitIns_R_R(ins_Copy(destMemType), emitActualTypeSize(destMemType), varDsc->GetArgInitReg(), varDsc->GetArgReg()); + GetEmitter()->emitIns_R_R(ins_Copy(destMemType), emitActualTypeSize(destMemType), varDsc->GetArgInitReg(), + varDsc->GetArgReg()); regArgNum--; regArgMaskLive &= ~genRegMask(varDsc->GetArgReg()); } else { - for (int i=0; i < regArgNum; i++) + for (int i = 0; i < regArgNum; i++) { - LclVarDsc* varDsc2 = compiler->lvaTable + regArgsVars[i]; - var_types destMemType = varDsc2->GetRegisterType(); - inst_Mov(destMemType, varDsc2->GetArgInitReg(), varDsc2->GetArgReg(), /* canSkip */ false, emitActualTypeSize(destMemType)); + LclVarDsc* varDsc2 = compiler->lvaTable + regArgsVars[i]; + var_types destMemType = varDsc2->GetRegisterType(); + inst_Mov(destMemType, varDsc2->GetArgInitReg(), varDsc2->GetArgReg(), /* canSkip */ false, + emitActualTypeSize(destMemType)); regArgMaskLive &= ~genRegMask(varDsc2->GetArgReg()); } break; @@ -3578,9 +3587,8 @@ void CodeGen::genFnPrologCalleeRegArgs() } assert(!regArgMaskLive); - } -#else //!defined(TARGET_LOONGARCH64) +#else //! defined(TARGET_LOONGARCH64) void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbered, RegState* regState) { #ifdef DEBUG @@ -3654,20 +3662,20 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere // struct regArgElem { - unsigned varNum; // index into compiler->lvaTable[] for this register argument + unsigned varNum; // index into compiler->lvaTable[] for this register argument #if defined(UNIX_AMD64_ABI) - var_types type; // the Jit type of this regArgTab entry -#endif // defined(UNIX_AMD64_ABI) - unsigned trashBy; // index into this regArgTab[] table of the register that will be copied to this register. - // That is, for regArgTab[x].trashBy = y, argument register number 'y' will be copied to - // argument register number 'x'. Only used when circular = true. - char slot; // 0 means the register is not used for a register argument - // 1 means the first part of a register argument - // 2, 3 or 4 means the second,third or fourth part of a multireg argument - bool stackArg; // true if the argument gets homed to the stack - bool writeThru; // true if the argument gets homed to both stack and register - bool processed; // true after we've processed the argument (and it is in its final location) - bool circular; // true if this register participates in a circular dependency loop. + var_types type; // the Jit type of this regArgTab entry +#endif // defined(UNIX_AMD64_ABI) + unsigned trashBy; // index into this regArgTab[] table of the register that will be copied to this register. + // That is, for regArgTab[x].trashBy = y, argument register number 'y' will be copied to + // argument register number 'x'. Only used when circular = true. + char slot; // 0 means the register is not used for a register argument + // 1 means the first part of a register argument + // 2, 3 or 4 means the second,third or fourth part of a multireg argument + bool stackArg; // true if the argument gets homed to the stack + bool writeThru; // true if the argument gets homed to both stack and register + bool processed; // true after we've processed the argument (and it is in its final location) + bool circular; // true if this register participates in a circular dependency loop. #ifdef UNIX_AMD64_ABI @@ -4470,10 +4478,10 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere else #endif // TARGET_XARCH { - var_types destMemType = varDscDest->TypeGet(); + var_types destMemType = varDscDest->TypeGet(); #ifdef TARGET_ARM - bool cycleAllDouble = true; // assume the best + bool cycleAllDouble = true; // assume the best unsigned iter = begReg; do @@ -4898,8 +4906,8 @@ void CodeGen::genEnregisterIncomingStackArgs() unsigned varNum = 0; #ifdef TARGET_LOONGARCH64 - int tmp_offset = 0; - regNumber tmp_reg = REG_NA; + int tmp_offset = 0; + regNumber tmp_reg = REG_NA; #endif for (LclVarDsc *varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++) @@ -4951,8 +4959,8 @@ void CodeGen::genEnregisterIncomingStackArgs() #ifdef TARGET_LOONGARCH64 { bool FPbased; - //int baseOffset = (regArgTab[argNum].slot - 1) * slotSize; - int base = compiler->lvaFrameAddress(varNum, &FPbased); + // int baseOffset = (regArgTab[argNum].slot - 1) * slotSize; + int base = compiler->lvaFrameAddress(varNum, &FPbased); if ((-2048 <= base) && (base < 2048)) { @@ -4963,9 +4971,9 @@ void CodeGen::genEnregisterIncomingStackArgs() if (tmp_reg == REG_NA) { regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE; - tmp_offset = base; - tmp_reg = REG_R21; - GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset>>12); + tmp_offset = base; + tmp_reg = REG_R21; + GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset >> 12); GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, tmp_offset & 0xfff); GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, REG_R21, REG_R21, reg2); GetEmitter()->emitIns_R_S(ins_Load(regType), emitTypeSize(regType), regNum, varNum, -8); @@ -5959,8 +5967,9 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) if (!IsSaveFpRaWithAllCalleeSavedRegisters()) { - JITDUMP("Frame type 1(save FP/RA at bottom). #outsz=%d; #framesz=%d; localloc? %s\n", unsigned(compiler->lvaOutgoingArgSpaceSize), - totalFrameSize, dspBool(compiler->compLocallocUsed)); + JITDUMP("Frame type 1(save FP/RA at bottom). #outsz=%d; #framesz=%d; localloc? %s\n", + unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, + dspBool(compiler->compLocallocUsed)); frameType = 1; @@ -5974,17 +5983,18 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) calleeSaveSPOffset = compiler->compLclFrameSize; - JITDUMP("Frame type 2(Top). #outsz=%d; #framesz=%d; localloc? %s\n", unsigned(compiler->lvaOutgoingArgSpaceSize), - totalFrameSize, dspBool(compiler->compLocallocUsed)); - + JITDUMP("Frame type 2(Top). #outsz=%d; #framesz=%d; localloc? %s\n", + unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, + dspBool(compiler->compLocallocUsed)); } - //calleeSaveSPDelta = 0; + // calleeSaveSPDelta = 0; } else { if (!IsSaveFpRaWithAllCalleeSavedRegisters()) { - JITDUMP("Frame type 3(save FP/RA at bottom). #outsz=%d; #framesz=%d; #calleeSaveRegsPushed:%d; localloc? %s\n", + JITDUMP("Frame type 3(save FP/RA at bottom). #outsz=%d; #framesz=%d; #calleeSaveRegsPushed:%d; " + "localloc? %s\n", unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compCalleeRegsPushed, dspBool(compiler->compLocallocUsed)); @@ -5993,13 +6003,13 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) int outSzAligned; if (compiler->lvaOutgoingArgSpaceSize >= 2040) { - int offset = totalFrameSize - compiler->compLclFrameSize - 2*REGSIZE_BYTES; - calleeSaveSPDelta = AlignUp((UINT)offset, STACK_ALIGN); + int offset = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES; + calleeSaveSPDelta = AlignUp((UINT)offset, STACK_ALIGN); calleeSaveSPOffset = calleeSaveSPDelta - offset; - int offset2 = totalFrameSize - calleeSaveSPDelta - compiler->lvaOutgoingArgSpaceSize; + int offset2 = totalFrameSize - calleeSaveSPDelta - compiler->lvaOutgoingArgSpaceSize; calleeSaveSPDelta = AlignUp((UINT)offset2, STACK_ALIGN); - offset2 = calleeSaveSPDelta - offset2; + offset2 = calleeSaveSPDelta - offset2; if (compiler->compLocallocUsed) { @@ -6009,8 +6019,8 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) } else { - outSzAligned = compiler->lvaOutgoingArgSpaceSize & ~0xf; - //if (outSzAligned > 0) + outSzAligned = compiler->lvaOutgoingArgSpaceSize & ~0xf; + // if (outSzAligned > 0) { genStackPointerAdjustment(outSzAligned, REG_R21, nullptr, /* reportUnwindData */ true); } @@ -6026,7 +6036,7 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) genStackPointerAdjustment(calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true); - calleeSaveSPDelta = totalFrameSize - compiler->compLclFrameSize - 2*REGSIZE_BYTES; + calleeSaveSPDelta = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES; calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDelta, STACK_ALIGN); } else @@ -6047,11 +6057,12 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset2); compiler->unwindSaveReg(REG_FP, offset2); - calleeSaveSPOffset = totalFrameSize - compiler->compLclFrameSize - 2*REGSIZE_BYTES; - calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPOffset, STACK_ALIGN); + calleeSaveSPOffset = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES; + calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPOffset, STACK_ALIGN); calleeSaveSPOffset = calleeSaveSPDelta - calleeSaveSPOffset; - genStackPointerAdjustment(totalFrameSize - calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true); + genStackPointerAdjustment(totalFrameSize - calleeSaveSPDelta, REG_R21, nullptr, + /* reportUnwindData */ true); } } else @@ -6063,7 +6074,7 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) dspBool(compiler->compLocallocUsed)); calleeSaveSPOffset = totalFrameSize - compiler->compLclFrameSize; - calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPOffset, STACK_ALIGN); + calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPOffset, STACK_ALIGN); calleeSaveSPOffset = calleeSaveSPDelta - calleeSaveSPOffset; if (compiler->compLocallocUsed) @@ -6115,11 +6126,11 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) } else if (frameType == 3) { - //genStackPointerAdjustment(calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true); + // genStackPointerAdjustment(calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true); } else if (frameType == 4) { - //genStackPointerAdjustment(calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true); + // genStackPointerAdjustment(calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true); } else { @@ -6710,7 +6721,7 @@ void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg, regMaskTP regMask; regMaskTP availMask = regSet.rsGetModifiedRegsMask() | RBM_INT_CALLEE_TRASH; // Set of available registers - //see: src/jit/registerloongarch64.h + // see: src/jit/registerloongarch64.h availMask &= ~intRegState.rsCalleeRegArgMaskLiveIn; // Remove all of the incoming argument registers as they are // currently live availMask &= ~genRegMask(initReg); // Remove the pre-calculated initReg as we will zero it and maybe use it for @@ -6721,7 +6732,7 @@ void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg, // rAddr is not a live incoming argument reg assert((genRegMask(rAddr) & intRegState.rsCalleeRegArgMaskLiveIn) == 0); - assert(untrLclLo%4 == 0); + assert(untrLclLo % 4 == 0); if ((-2048 <= untrLclLo) && (untrLclLo < 2048)) { @@ -6815,7 +6826,7 @@ void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg, } noway_assert(uCntBytes == 0); -#else // TARGET* +#else // TARGET* #error Unsupported or unset target architecture #endif // TARGET* } @@ -7190,7 +7201,7 @@ void CodeGen::genReportGenericContextArg(regNumber initReg, bool* pInitRegZeroed compiler->lvaCachedGenericContextArgOffset()); #elif defined(TARGET_LOONGARCH64) genInstrWithConstant(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(), - compiler->lvaCachedGenericContextArgOffset(), REG_R21); + compiler->lvaCachedGenericContextArgOffset(), REG_R21); #else // !ARM64 !ARM !LOONGARCH64 // mov [ebp-lvaCachedGenericContextArgOffset()], reg GetEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(), @@ -9407,20 +9418,20 @@ void CodeGen::genFnEpilog(BasicBlock* block) switch (addrInfo.accessType) { case IAT_VALUE: - //if (validImmForBAL((ssize_t)addrInfo.addr)) - //{ - // // Simple direct call + // if (validImmForBAL((ssize_t)addrInfo.addr)) + //{ + // // Simple direct call - // //TODO for LA. - // callType = emitter::EC_FUNC_TOKEN; - // addr = addrInfo.addr; - // indCallReg = REG_NA; - // break; - //} + // //TODO for LA. + // callType = emitter::EC_FUNC_TOKEN; + // addr = addrInfo.addr; + // indCallReg = REG_NA; + // break; + //} - //// otherwise the target address doesn't fit in an immediate - //// so we have to burn a register... - //__fallthrough; + //// otherwise the target address doesn't fit in an immediate + //// so we have to burn a register... + //__fallthrough; case IAT_PVALUE: // Load the address into a register, load indirect and call through a register @@ -9490,7 +9501,7 @@ void CodeGen::genFnEpilog(BasicBlock* block) // https://github.com/dotnet/coreclr/issues/4827 // Do we need a special encoding for stack walker like rex.w prefix for x64? - //TODO for LA: whether the relative address is enough for optimize? + // TODO for LA: whether the relative address is enough for optimize? GetEmitter()->emitIns_R_R_I(INS_jirl, emitTypeSize(TYP_I_IMPL), REG_R0, REG_FASTTAILCALL_TARGET, 0); } #endif // FEATURE_FASTTAILCALL @@ -10656,7 +10667,7 @@ bool Compiler::IsMultiRegReturnedType(CORINFO_CLASS_HANDLE hClass, CorInfoCallCo structPassingKind howToReturnStruct; var_types returnType = getReturnTypeForStruct(hClass, callConv, &howToReturnStruct); -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) return (varTypeIsStruct(returnType) && (howToReturnStruct != SPK_PrimitiveType)); #else return (varTypeIsStruct(returnType)); @@ -12206,16 +12217,16 @@ void CodeGen::genStructReturn(GenTree* treeNode) LclVarDsc* varDsc = compiler->lvaGetDesc(lclNode); assert(varDsc->lvIsMultiRegRet); #ifdef TARGET_LOONGARCH64 - var_types type = retTypeDesc.GetReturnRegType(0); - regNumber toReg = retTypeDesc.GetABIReturnReg(0); + var_types type = retTypeDesc.GetReturnRegType(0); + regNumber toReg = retTypeDesc.GetABIReturnReg(0); GetEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), toReg, lclNode->GetLclNum(), 0); if (regCount > 1) { assert(regCount == 2); int offset = genTypeSize(type); - type = retTypeDesc.GetReturnRegType(1); - offset = offset < genTypeSize(type) ? genTypeSize(type) : offset; - toReg = retTypeDesc.GetABIReturnReg(1); + type = retTypeDesc.GetReturnRegType(1); + offset = offset < genTypeSize(type) ? genTypeSize(type) : offset; + toReg = retTypeDesc.GetABIReturnReg(1); GetEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), toReg, lclNode->GetLclNum(), offset); } #else diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index e10e498466f52..92043be1edc67 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -2516,10 +2516,10 @@ CodeGen::GenIntCastDesc::GenIntCastDesc(GenTreeCast* cast) } #ifdef TARGET_LOONGARCH64 - m_extendKind = castUnsigned ? ZERO_EXTEND_INT : SIGN_EXTEND_INT; - cast->gtFlags |= castUnsigned ? GTF_UNSIGNED : GTF_EMPTY; + m_extendKind = castUnsigned ? ZERO_EXTEND_INT : SIGN_EXTEND_INT; + cast->gtFlags |= castUnsigned ? GTF_UNSIGNED : GTF_EMPTY; #else - m_extendKind = COPY; + m_extendKind = COPY; #endif m_extendSrcSize = 4; } diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index c5f407f3be311..be858336db73d 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -24,14 +24,15 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #include "gcinfo.h" #include "gcinfoencoder.h" -static short splitLow(int value) { +static short splitLow(int value) +{ return (value & 0xffff); } // Returns true if 'value' is a legal signed immediate 16 bit encoding. static bool isValidSimm16(ssize_t value) { - return -( ((int)1) << 15 ) <= value && value < ( ((int)1) << 15 ); + return -(((int)1) << 15) <= value && value < (((int)1) << 15); }; // Returns true if 'value' is a legal unsigned immediate 16 bit encoding. @@ -43,7 +44,7 @@ static bool isValidUimm16(ssize_t value) // Returns true if 'value' is a legal signed immediate 12 bit encoding. static bool isValidSimm12(ssize_t value) { - return -( ((int)1) << 11 ) <= value && value < ( ((int)1) << 11 ); + return -(((int)1) << 11) <= value && value < (((int)1) << 11); }; // Returns true if 'value' is a legal unsigned immediate 11 bit encoding. @@ -102,7 +103,7 @@ bool CodeGen::genInstrWithConstant(instruction ins, regNumber tmpReg, bool inUnwindRegion /* = false */) { - emitAttr size = EA_SIZE(attr); + emitAttr size = EA_SIZE(attr); // reg1 is usually a dest register // reg2 is always source register @@ -260,26 +261,16 @@ void CodeGen::genPrologSaveRegPair(regNumber reg1, // generate addi.d SP,SP,-imm genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, /* reportUnwindData */ true); - assert((spDelta+spOffset+16)<=0); + assert((spDelta + spOffset + 16) <= 0); - assert(spOffset <= 2031);//2047-16 + assert(spOffset <= 2031); // 2047-16 } -// #ifdef OPTIMIZE_LOONGSON_EXT -// if (!(spOffset & 0xf) && (spOffset <= 0xff0) && (INS_st_d == ins)) -// { -// GetEmitter()->emitIns_R_R_R_I(INS_gssq, EA_PTRSIZE, reg2, reg1, REG_SPBASE, spOffset >> 4); -// compiler->unwindSaveRegPair(reg1, reg2, spOffset); -// } -// else -// #endif - { GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); compiler->unwindSaveReg(reg1, spOffset); - GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg2, REG_SPBASE, spOffset+8); - compiler->unwindSaveReg(reg2, spOffset+8); - } + GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg2, REG_SPBASE, spOffset + 8); + compiler->unwindSaveReg(reg2, spOffset + 8); } //------------------------------------------------------------------------ @@ -320,7 +311,6 @@ void CodeGen::genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNum GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); compiler->unwindSaveReg(reg1, spOffset); - } //------------------------------------------------------------------------ @@ -365,42 +355,23 @@ void CodeGen::genEpilogRestoreRegPair(regNumber reg1, if (spDelta != 0) { assert(!useSaveNextPair); -// #ifdef OPTIMIZE_LOONGSON_EXT -// if (!(spOffset & 0xf) && (spOffset <= 0xff0) && (INS_ld_d == ins)) -// { -// GetEmitter()->emitIns_R_R_R_I(INS_gslq, EA_PTRSIZE, reg2, reg1, REG_SPBASE, spOffset >> 4); -// compiler->unwindSaveRegPair(reg1, reg2, spOffset); -// } -// else -// #endif - { - GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg2, REG_SPBASE, spOffset+8); - compiler->unwindSaveReg(reg2, spOffset+8); + + GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg2, REG_SPBASE, spOffset + 8); + compiler->unwindSaveReg(reg2, spOffset + 8); GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); compiler->unwindSaveReg(reg1, spOffset); - } // generate daddiu SP,SP,imm genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, /* reportUnwindData */ true); } else { -// #ifdef OPTIMIZE_LOONGSON_EXT -// if (!(spOffset & 0xf) && (spOffset <= 0xff0) && (INS_ld_d == ins)) -// { -// GetEmitter()->emitIns_R_R_R_I(INS_gslq, EA_PTRSIZE, reg2, reg1, REG_SPBASE, spOffset >> 4); -// compiler->unwindSaveRegPair(reg1, reg2, spOffset); -// } -// else -// #endif - { - GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg2, REG_SPBASE, spOffset+8); - compiler->unwindSaveReg(reg2, spOffset+8); + GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg2, REG_SPBASE, spOffset + 8); + compiler->unwindSaveReg(reg2, spOffset + 8); GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); compiler->unwindSaveReg(reg1, spOffset); - } } } @@ -1064,8 +1035,8 @@ void CodeGen::genFuncletProlog(BasicBlock* block) assert((maskSaveRegsInt & RBM_RA) != 0); assert((maskSaveRegsInt & RBM_FP) != 0); - bool isFilter = (block->bbCatchTyp == BBCT_FILTER); - int frameSize = genFuncletInfo.fiSpDelta1; + bool isFilter = (block->bbCatchTyp == BBCT_FILTER); + int frameSize = genFuncletInfo.fiSpDelta1; regMaskTP maskArgRegsLiveIn; if (isFilter) @@ -1096,25 +1067,18 @@ void CodeGen::genFuncletProlog(BasicBlock* block) assert(genFuncletInfo.fiSP_to_FPRA_save_delta < 2040); genStackPointerAdjustment(frameSize, REG_R21, nullptr, /* reportUnwindData */ true); -// #ifdef OPTIMIZE_LOONGSON_EXT -// if (!(genFuncletInfo.fiSP_to_FPRA_save_delta & 0xf) && (genFuncletInfo.fiSP_to_FPRA_save_delta <= 0xff0)) -// { -// GetEmitter()->emitIns_R_R_R_I(INS_gssq, EA_PTRSIZE, REG_RA, REG_FP, REG_SPBASE, genFuncletInfo.fiSP_to_FPRA_save_delta >> 4); -// compiler->unwindSaveRegPair(REG_FP, REG_RA, genFuncletInfo.fiSP_to_FPRA_save_delta); -// } -// else -// #endif - { - GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, genFuncletInfo.fiSP_to_FPRA_save_delta); + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, + genFuncletInfo.fiSP_to_FPRA_save_delta); compiler->unwindSaveReg(REG_FP, genFuncletInfo.fiSP_to_FPRA_save_delta); - GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_RA, REG_SPBASE, genFuncletInfo.fiSP_to_FPRA_save_delta + 8); + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_RA, REG_SPBASE, + genFuncletInfo.fiSP_to_FPRA_save_delta + 8); compiler->unwindSaveReg(REG_RA, genFuncletInfo.fiSP_to_FPRA_save_delta + 8); - } maskSaveRegsInt &= ~(RBM_RA | RBM_FP); // We've saved these now - genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, genFuncletInfo.fiSP_to_PSP_slot_delta + 8, 0); + genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, genFuncletInfo.fiSP_to_PSP_slot_delta + 8, + 0); } else if (genFuncletInfo.fiFrameType == 2) { @@ -1125,34 +1089,25 @@ void CodeGen::genFuncletProlog(BasicBlock* block) assert(genFuncletInfo.fiSP_to_FPRA_save_delta < 2040); genStackPointerAdjustment(frameSize, REG_R21, nullptr, /* reportUnwindData */ true); - genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, genFuncletInfo.fiSP_to_PSP_slot_delta + 8, 0); + genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, genFuncletInfo.fiSP_to_PSP_slot_delta + 8, + 0); } else if (genFuncletInfo.fiFrameType == 3) { // fiFrameType constraints: assert(frameSize < -2048); - offset = -frameSize - genFuncletInfo.fiSP_to_FPRA_save_delta; + offset = -frameSize - genFuncletInfo.fiSP_to_FPRA_save_delta; int SP_delta = roundUp((UINT)offset, STACK_ALIGN); - offset = SP_delta - offset; + offset = SP_delta - offset; genStackPointerAdjustment(-SP_delta, REG_R21, nullptr, /* reportUnwindData */ true); -// #ifdef OPTIMIZE_LOONGSON_EXT -// if (!(offset & 0xf) && (offset <= 0xff0)) -// { -// GetEmitter()->emitIns_R_R_R_I(INS_gssq, EA_PTRSIZE, REG_RA, REG_FP, REG_SPBASE, offset >> 4); -// compiler->unwindSaveRegPair(REG_FP, REG_RA, offset); -// } -// else -// #endif - { GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset); compiler->unwindSaveReg(REG_FP, offset); GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offset + 8); compiler->unwindSaveReg(REG_RA, offset + 8); - } maskSaveRegsInt &= ~(RBM_RA | RBM_FP); // We've saved these now @@ -1166,9 +1121,9 @@ void CodeGen::genFuncletProlog(BasicBlock* block) // fiFrameType constraints: assert(frameSize < -2048); - offset = -frameSize - (genFuncletInfo.fiSP_to_PSP_slot_delta + 8); + offset = -frameSize - (genFuncletInfo.fiSP_to_PSP_slot_delta + 8); int SP_delta = roundUp((UINT)offset, STACK_ALIGN); - offset = SP_delta - offset; + offset = SP_delta - offset; genStackPointerAdjustment(-SP_delta, REG_R21, nullptr, /* reportUnwindData */ true); @@ -1201,8 +1156,8 @@ void CodeGen::genFuncletProlog(BasicBlock* block) regSet.verifyRegUsed(REG_A1); // Store the PSP value (aka CallerSP) - genInstrWithConstant(INS_st_d, EA_PTRSIZE, REG_A1, REG_SPBASE, genFuncletInfo.fiSP_to_PSP_slot_delta, REG_A2, - false); + genInstrWithConstant(INS_st_d, EA_PTRSIZE, REG_A1, REG_SPBASE, genFuncletInfo.fiSP_to_PSP_slot_delta, + REG_A2, false); // re-establish the frame pointer genInstrWithConstant(INS_addi_d, EA_PTRSIZE, REG_FPBASE, REG_A1, @@ -1217,8 +1172,8 @@ void CodeGen::genFuncletProlog(BasicBlock* block) -genFuncletInfo.fiFunction_CallerSP_to_FP_delta, REG_A2, false); regSet.verifyRegUsed(REG_A3); - genInstrWithConstant(INS_st_d, EA_PTRSIZE, REG_A3, REG_SPBASE, genFuncletInfo.fiSP_to_PSP_slot_delta, REG_A2, - false); + genInstrWithConstant(INS_st_d, EA_PTRSIZE, REG_A3, REG_SPBASE, genFuncletInfo.fiSP_to_PSP_slot_delta, + REG_A2, false); } } } @@ -1238,7 +1193,7 @@ void CodeGen::genFuncletEpilog() ScopedSetVariable _setGeneratingEpilog(&compiler->compGeneratingEpilog, true); bool unwindStarted = false; - int frameSize = genFuncletInfo.fiSpDelta1; + int frameSize = genFuncletInfo.fiSpDelta1; if (!unwindStarted) { @@ -1272,21 +1227,13 @@ void CodeGen::genFuncletEpilog() genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, genFuncletInfo.fiSP_to_PSP_slot_delta + 8, 0); -// #ifdef OPTIMIZE_LOONGSON_EXT -// if (!(genFuncletInfo.fiSP_to_FPRA_save_delta & 0xf) && (genFuncletInfo.fiSP_to_FPRA_save_delta <= 0xff0)) -// { -// GetEmitter()->emitIns_R_R_R_I(INS_gslq, EA_PTRSIZE, REG_RA, REG_FP, REG_SPBASE, genFuncletInfo.fiSP_to_FPRA_save_delta >> 4); -// compiler->unwindSaveRegPair(REG_FP, REG_RA, genFuncletInfo.fiSP_to_FPRA_save_delta); -// } -// else -// #endif - { - GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, genFuncletInfo.fiSP_to_FPRA_save_delta + 8); + GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, + genFuncletInfo.fiSP_to_FPRA_save_delta + 8); compiler->unwindSaveReg(REG_RA, genFuncletInfo.fiSP_to_FPRA_save_delta + 8); - GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, genFuncletInfo.fiSP_to_FPRA_save_delta); + GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, + genFuncletInfo.fiSP_to_FPRA_save_delta); compiler->unwindSaveReg(REG_FP, genFuncletInfo.fiSP_to_FPRA_save_delta); - } // generate daddiu SP,SP,imm genStackPointerAdjustment(-frameSize, REG_R21, nullptr, /* reportUnwindData */ true); @@ -1307,36 +1254,26 @@ void CodeGen::genFuncletEpilog() // fiFrameType constraints: assert(frameSize < -2048); - - int offset = -frameSize - genFuncletInfo.fiSP_to_FPRA_save_delta; + int offset = -frameSize - genFuncletInfo.fiSP_to_FPRA_save_delta; int SP_delta = roundUp((UINT)offset, STACK_ALIGN); - offset = SP_delta - offset; + offset = SP_delta - offset; - //first, generate daddiu SP,SP,imm + // first, generate daddiu SP,SP,imm genStackPointerAdjustment(-frameSize - SP_delta, REG_R21, nullptr, /* reportUnwindData */ true); int offset2 = frameSize + SP_delta + genFuncletInfo.fiSP_to_PSP_slot_delta + 8; - assert(offset2 < 2040);//can amend. + assert(offset2 < 2040); // can amend. regsToRestoreMask &= ~(RBM_RA | RBM_FP); // We restore FP/RA at the end genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, offset2, 0); -// #ifdef OPTIMIZE_LOONGSON_EXT -// if (!(offset & 0xf) && (offset <= 0xff0)) -// { -// GetEmitter()->emitIns_R_R_R_I(INS_gslq, EA_PTRSIZE, REG_RA, REG_FP, REG_SPBASE, offset >> 4); -// compiler->unwindSaveRegPair(REG_FP, REG_RA, offset); -// } -// else -// #endif - { GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offset + 8); compiler->unwindSaveReg(REG_RA, offset + 8); GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset); compiler->unwindSaveReg(REG_FP, offset); - } - //second, generate daddiu SP,SP,imm for remaine space. + + // second, generate daddiu SP,SP,imm for remaine space. genStackPointerAdjustment(SP_delta, REG_R21, nullptr, /* reportUnwindData */ true); } else if (genFuncletInfo.fiFrameType == 4) @@ -1344,9 +1281,9 @@ void CodeGen::genFuncletEpilog() // fiFrameType constraints: assert(frameSize < -2048); - int offset = -frameSize - (genFuncletInfo.fiSP_to_PSP_slot_delta + 8); + int offset = -frameSize - (genFuncletInfo.fiSP_to_PSP_slot_delta + 8); int SP_delta = roundUp((UINT)offset, STACK_ALIGN); - offset = SP_delta - offset; + offset = SP_delta - offset; genStackPointerAdjustment(-frameSize - SP_delta, REG_R21, nullptr, /* reportUnwindData */ true); @@ -1397,9 +1334,10 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() unsigned saveRegsPlusPSPSize; if (!IsSaveFpRaWithAllCalleeSavedRegisters()) - saveRegsPlusPSPSize = roundUp((UINT)genTotalFrameSize(), STACK_ALIGN) - compiler->compLclFrameSize +PSPSize/* -2*8*/; + saveRegsPlusPSPSize = + roundUp((UINT)genTotalFrameSize(), STACK_ALIGN) - compiler->compLclFrameSize + PSPSize /* -2*8*/; else - saveRegsPlusPSPSize = roundUp((UINT)genTotalFrameSize(), STACK_ALIGN) - compiler->compLclFrameSize +PSPSize; + saveRegsPlusPSPSize = roundUp((UINT)genTotalFrameSize(), STACK_ALIGN) - compiler->compLclFrameSize + PSPSize; if (compiler->info.compIsVarArgs) { @@ -1424,12 +1362,12 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() unsigned funcletFrameAlignmentPad = funcletFrameSizeAligned - funcletFrameSize; assert((funcletFrameAlignmentPad == 0) || (funcletFrameAlignmentPad == REGSIZE_BYTES)); - if (maxFuncletFrameSizeAligned <= (2048-8)) + if (maxFuncletFrameSizeAligned <= (2048 - 8)) { if (!IsSaveFpRaWithAllCalleeSavedRegisters()) { genFuncletInfo.fiFrameType = 1; - saveRegsPlusPSPSize -= 2*8;// FP/RA + saveRegsPlusPSPSize -= 2 * 8; // FP/RA } else { @@ -1445,7 +1383,7 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() if (!IsSaveFpRaWithAllCalleeSavedRegisters()) { genFuncletInfo.fiFrameType = 3; - saveRegsPlusPSPSize -= 2*8;// FP/RA + saveRegsPlusPSPSize -= 2 * 8; // FP/RA } else { @@ -1454,14 +1392,13 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() } } - int CallerSP_to_PSP_slot_delta = -(int)saveRegsPlusPSPSize; - genFuncletInfo.fiSpDelta1 = -(int)funcletFrameSizeAligned; - int SP_to_PSP_slot_delta = funcletFrameSizeAligned - saveRegsPlusPSPSize; + genFuncletInfo.fiSpDelta1 = -(int)funcletFrameSizeAligned; + int SP_to_PSP_slot_delta = funcletFrameSizeAligned - saveRegsPlusPSPSize; /* Now save it for future use */ - genFuncletInfo.fiSaveRegs = rsMaskSaveRegs; - genFuncletInfo.fiSP_to_FPRA_save_delta = SP_to_FPRA_save_delta; + genFuncletInfo.fiSaveRegs = rsMaskSaveRegs; + genFuncletInfo.fiSP_to_FPRA_save_delta = SP_to_FPRA_save_delta; genFuncletInfo.fiSP_to_PSP_slot_delta = SP_to_PSP_slot_delta; genFuncletInfo.fiCallerSP_to_PSP_slot_delta = CallerSP_to_PSP_slot_delta; @@ -1580,7 +1517,7 @@ void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, regNumber reg, ssize_t imm, insFlags flags DEBUGARG(size_t targetHandle) DEBUGARG(GenTreeFlags gtFlags)) -{//maybe optimize. +{ // maybe optimize. emitter* emit = GetEmitter(); if (!compiler->opts.compReloc) @@ -1590,12 +1527,12 @@ void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, if (EA_IS_RELOC(size)) { - assert(genIsValidIntReg(reg));//TODO: maybe optimize!!! - emit->emitIns_R_AI(INS_bl, size, reg, imm);//for example: EA_PTR_DSP_RELOC + assert(genIsValidIntReg(reg)); // TODO: maybe optimize!!! + emit->emitIns_R_AI(INS_bl, size, reg, imm); // for example: EA_PTR_DSP_RELOC } else { - emit->emitIns_I_la(size, reg, imm);//TODO: maybe optimize. + emit->emitIns_I_la(size, reg, imm); // TODO: maybe optimize. } regSet.verifyRegUsed(reg); @@ -1618,10 +1555,10 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre GenTreeIntConCommon* con = tree->AsIntConCommon(); ssize_t cnsVal = con->IconValue(); - //if (con->ImmedValNeedsReloc(compiler)) + // if (con->ImmedValNeedsReloc(compiler)) if (con->ImmedValNeedsReloc(compiler) && compiler->opts.compReloc) { - //instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, targetReg, cnsVal); + // instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, targetReg, cnsVal); assert(compiler->opts.compReloc); GetEmitter()->emitIns_R_AI(INS_bl, EA_HANDLE_CNS_RELOC, targetReg, cnsVal); regSet.verifyRegUsed(targetReg); @@ -1654,7 +1591,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre else { // Get a temp integer register to compute long address. - //regNumber addrReg = tree->GetSingleTempReg(); + // regNumber addrReg = tree->GetSingleTempReg(); // We must load the FP constant from the constant pool // Emit a data section constant for the float or double constant. @@ -1747,12 +1684,11 @@ void CodeGen::genCodeForMulHi(GenTreeOp* treeNode) // This method is expected to have called genConsumeOperands() before calling it. void CodeGen::genCodeForBinary(GenTreeOp* treeNode) { - const genTreeOps oper = treeNode->OperGet(); - regNumber targetReg = treeNode->GetRegNum(); - emitter* emit = GetEmitter(); + const genTreeOps oper = treeNode->OperGet(); + regNumber targetReg = treeNode->GetRegNum(); + emitter* emit = GetEmitter(); - assert(oper == GT_ADD || oper == GT_SUB || oper == GT_MUL || oper == GT_AND || - oper == GT_OR || oper == GT_XOR); + assert(oper == GT_ADD || oper == GT_SUB || oper == GT_MUL || oper == GT_AND || oper == GT_OR || oper == GT_XOR); GenTree* op1 = treeNode->gtGetOp1(); GenTree* op2 = treeNode->gtGetOp2(); @@ -1788,10 +1724,10 @@ void CodeGen::genCodeForLclVar(GenTreeLclVar* tree) if (!isRegCandidate && !(tree->gtFlags & GTF_SPILLED)) { - var_types targetType = varDsc->GetRegisterType(tree); - //if (tree->gtFlags & GTF_UNSIGNED) + var_types targetType = varDsc->GetRegisterType(tree); + // if (tree->gtFlags & GTF_UNSIGNED) // targetType = varTypeSignedToUnsigned(targetType);//uuuuu. - emitter* emit = GetEmitter(); + emitter* emit = GetEmitter(); // targetType must be a normal scalar type and not a TYP_STRUCT assert(targetType != TYP_STRUCT); @@ -1890,13 +1826,13 @@ void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode) return; } - regNumber targetReg = lclNode->GetRegNum(); - emitter* emit = GetEmitter(); + regNumber targetReg = lclNode->GetRegNum(); + emitter* emit = GetEmitter(); unsigned varNum = lclNode->GetLclNum(); assert(varNum < compiler->lvaCount); - LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); - var_types targetType = varDsc->GetRegisterType(lclNode); + LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); + var_types targetType = varDsc->GetRegisterType(lclNode); if (lclNode->IsMultiReg()) { @@ -2055,7 +1991,9 @@ void CodeGen::genSimpleReturn(GenTree* treeNode) } else { - if (attr == EA_4BYTE) // && op1->OperIs(GT_LCL_VAR) && (emitActualTypeSize(compiler->lvaTable[op1->AsLclVar()->GetLclNum()].lvType) == EA_8BYTE)) + if (attr == EA_4BYTE) // && op1->OperIs(GT_LCL_VAR) && + // (emitActualTypeSize(compiler->lvaTable[op1->AsLclVar()->GetLclNum()].lvType) == + // EA_8BYTE)) { if (treeNode->gtFlags & GTF_UNSIGNED) GetEmitter()->emitIns_R_R_I_I(INS_bstrpick_d, EA_PTRSIZE, retReg, op1->GetRegNum(), 31, 0); @@ -2085,7 +2023,7 @@ void CodeGen::genLclHeap(GenTree* tree) regNumber pspSymReg = REG_NA; var_types type = genActualType(size->gtType); emitAttr easz = emitTypeSize(type); - BasicBlock* endLabel = nullptr;//can optimize for loongarch. + BasicBlock* endLabel = nullptr; // can optimize for loongarch. unsigned stackAdjustment = 0; const target_ssize_t ILLEGAL_LAST_TOUCH_DELTA = (target_ssize_t)-1; target_ssize_t lastTouchDelta = @@ -2160,10 +2098,9 @@ void CodeGen::genLclHeap(GenTree* tree) if (compiler->lvaOutgoingArgSpaceSize > 0) { unsigned outgoingArgSpaceAligned = roundUp(compiler->lvaOutgoingArgSpaceSize, STACK_ALIGN); - //assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == 0); // This must be true for the stack to remain + // assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == 0); // This must be true for the stack to remain // // aligned - genInstrWithConstant(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, outgoingArgSpaceAligned, - rsGetRsvdReg()); + genInstrWithConstant(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, outgoingArgSpaceAligned, rsGetRsvdReg()); stackAdjustment += outgoingArgSpaceAligned; } @@ -2206,7 +2143,7 @@ void CodeGen::genLclHeap(GenTree* tree) emit->emitIns_R_R_I(INS_ld_w, EA_4BYTE, REG_R0, REG_SP, 0); lastTouchDelta = amount; - imm = -(ssize_t)amount; + imm = -(ssize_t)amount; assert(-8192 <= imm && imm < 0); if (-2048 <= imm && imm < 0) emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, imm); @@ -2259,7 +2196,7 @@ void CodeGen::genLclHeap(GenTree* tree) emit->emitIns_R_R_I(INS_addi_d, emitActualTypeSize(type), regCnt, regCnt, -16); - assert(imm == (-4 << 2));//goto loop. + assert(imm == (-4 << 2)); // goto loop. emit->emitIns_R_R_I(INS_bne, EA_PTRSIZE, regCnt, REG_R0, (-4 << 2)); lastTouchDelta = 0; @@ -2307,10 +2244,10 @@ void CodeGen::genLclHeap(GenTree* tree) // Overflow, set regCnt to lowest possible value emit->emitIns_R_R_R(INS_masknez, EA_PTRSIZE, regCnt, regCnt, REG_R21); - assert(compiler->eeGetPageSize() == ((compiler->eeGetPageSize()>>12)<<12)); - emit->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, regTmp, compiler->eeGetPageSize()>>12); + assert(compiler->eeGetPageSize() == ((compiler->eeGetPageSize() >> 12) << 12)); + emit->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, regTmp, compiler->eeGetPageSize() >> 12); - //genDefineTempLabel(loop); + // genDefineTempLabel(loop); // tickle the page - Read from the updated SP - this triggers a page fault when on the guard page emit->emitIns_R_R_I(INS_ld_w, EA_4BYTE, REG_R0, REG_SPBASE, 0); @@ -2320,7 +2257,7 @@ void CodeGen::genLclHeap(GenTree* tree) assert(regTmp != REG_R21); - ssize_t imm = 3 << 2;//goto done. + ssize_t imm = 3 << 2; // goto done. emit->emitIns_R_R_I(INS_bltu, EA_PTRSIZE, REG_R21, regCnt, imm); emit->emitIns_R_R_R(INS_sub_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, regTmp); @@ -2330,7 +2267,7 @@ void CodeGen::genLclHeap(GenTree* tree) emit->emitIns_I(INS_b, EA_PTRSIZE, imm); // Done with stack tickle loop - //genDefineTempLabel(done); + // genDefineTempLabel(done); // Now just move the final value to SP emit->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_SPBASE, regCnt, 0); @@ -2430,7 +2367,7 @@ void CodeGen::genCodeForBswap(GenTree* tree) // tree - the node // void CodeGen::genCodeForDivMod(GenTreeOp* tree) -{//can amend further. +{ // can amend further. assert(tree->OperIs(GT_MOD, GT_UMOD, GT_DIV, GT_UDIV)); var_types targetType = tree->TypeGet(); @@ -2444,9 +2381,10 @@ void CodeGen::genCodeForDivMod(GenTreeOp* tree) assert(varTypeIsFloating(tree->gtOp1)); assert(varTypeIsFloating(tree->gtOp2)); assert(tree->gtOper == GT_DIV); - //genCodeForBinary(tree); + // genCodeForBinary(tree); instruction ins = genGetInsForOper(tree); - emit->emitIns_R_R_R(ins, emitActualTypeSize(targetType), tree->GetRegNum(), tree->gtOp1->GetRegNum(), tree->gtOp2->GetRegNum()); + emit->emitIns_R_R_R(ins, emitActualTypeSize(targetType), tree->GetRegNum(), tree->gtOp1->GetRegNum(), + tree->gtOp2->GetRegNum()); } else // an integer divide operation { @@ -2461,23 +2399,23 @@ void CodeGen::genCodeForDivMod(GenTreeOp* tree) } else // the divisor is not the constant zero { - GenTree* src1 = tree->gtOp1; + GenTree* src1 = tree->gtOp1; unsigned TypeSize = genTypeSize(genActualType(tree->TypeGet())); - emitAttr size = EA_ATTR(TypeSize); + emitAttr size = EA_ATTR(TypeSize); - assert(TypeSize >= genTypeSize(genActualType(src1->TypeGet())) - && TypeSize >= genTypeSize(genActualType(divisorOp->TypeGet()))); + assert(TypeSize >= genTypeSize(genActualType(src1->TypeGet())) && + TypeSize >= genTypeSize(genActualType(divisorOp->TypeGet()))); - //ssize_t intConstValue = divisorOp->AsIntCon()->gtIconVal; - regNumber Reg1 = src1->GetRegNum(); - regNumber divisorReg = divisorOp->GetRegNum(); + // ssize_t intConstValue = divisorOp->AsIntCon()->gtIconVal; + regNumber Reg1 = src1->GetRegNum(); + regNumber divisorReg = divisorOp->GetRegNum(); instruction ins; // Check divisorOp first as we can always allow it to be a contained immediate if (divisorOp->isContainedIntOrIImmed()) { ssize_t intConst = (int)(divisorOp->AsIntCon()->gtIconVal); - divisorReg = REG_R21; + divisorReg = REG_R21; if ((-2048 <= intConst) && (intConst <= 0x7ff)) emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_R21, REG_R0, (short)intConst); else @@ -2497,7 +2435,7 @@ void CodeGen::genCodeForDivMod(GenTreeOp* tree) { assert(!divisorOp->isContainedIntOrIImmed()); ssize_t intConst = (int)(src1->AsIntCon()->gtIconVal); - Reg1 = REG_R21; + Reg1 = REG_R21; if ((-2048 <= intConst) && (intConst <= 0x7ff)) emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_R21, REG_R0, (short)intConst); else @@ -2527,7 +2465,7 @@ void CodeGen::genCodeForDivMod(GenTreeOp* tree) if (divisorOp->IsCnsIntOrI()) { ssize_t intConstValue = divisorOp->AsIntCon()->gtIconVal; - //assert(intConstValue != 0); // already checked above by IsIntegralConst(0) + // assert(intConstValue != 0); // already checked above by IsIntegralConst(0) if (intConstValue != -1) { checkDividend = false; // We statically know that the dividend is not -1 @@ -2543,7 +2481,7 @@ void CodeGen::genCodeForDivMod(GenTreeOp* tree) { // Check if the divisor is not -1 branch to 'sdivLabel' emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_R21, REG_R0, -1); - BasicBlock* sdivLabel = genCreateTempLabel();//can optimize for loongarch64. + BasicBlock* sdivLabel = genCreateTempLabel(); // can optimize for loongarch64. emit->emitIns_J_cond_la(INS_bne, sdivLabel, REG_R21, divisorReg); // If control flow continues past here the 'divisorReg' is known to be -1 @@ -2555,7 +2493,8 @@ void CodeGen::genCodeForDivMod(GenTreeOp* tree) emit->emitIns_J_cond_la(INS_beq, sdivLabel, dividendReg, REG_R0); - emit->emitIns_R_R_R(size == EA_4BYTE ? INS_add_w : INS_add_d, size, REG_R21, dividendReg, dividendReg); + emit->emitIns_R_R_R(size == EA_4BYTE ? INS_add_w : INS_add_d, size, REG_R21, dividendReg, + dividendReg); genJumpToThrowHlpBlk_la(SCK_ARITH_EXCPN, INS_beq, REG_R21); genDefineTempLabel(sdivLabel); } @@ -2582,7 +2521,7 @@ void CodeGen::genCodeForDivMod(GenTreeOp* tree) emit->emitIns_R_R_R(ins, size, tree->GetRegNum(), Reg1, divisorReg); } - else //if (tree->gtOper == GT_UDIV) GT_UMOD + else // if (tree->gtOper == GT_UDIV) GT_UMOD { // Only one possible exception // (AnyVal / 0) => DivideByZeroException @@ -2607,7 +2546,7 @@ void CodeGen::genCodeForDivMod(GenTreeOp* tree) else ins = INS_mod_wu; - //TODO: temp workround, should amend for optimize. + // TODO: temp workround, should amend for optimize. emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, Reg1, Reg1, 0); emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, divisorReg, divisorReg, 0); } @@ -2846,10 +2785,12 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode) emit->emitIns_R_R_I(INS_ld_d, attr0, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, 0); emit->emitIns_R_R_I(INS_ld_d, attr1, tmpReg2, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE); - emit->emitIns_R_R_I(INS_addi_d, attrSrcAddr, REG_WRITE_BARRIER_SRC_BYREF, REG_WRITE_BARRIER_SRC_BYREF, 2 * TARGET_POINTER_SIZE); + emit->emitIns_R_R_I(INS_addi_d, attrSrcAddr, REG_WRITE_BARRIER_SRC_BYREF, REG_WRITE_BARRIER_SRC_BYREF, + 2 * TARGET_POINTER_SIZE); emit->emitIns_R_R_I(INS_st_d, attr0, tmpReg, REG_WRITE_BARRIER_DST_BYREF, 0); emit->emitIns_R_R_I(INS_st_d, attr1, tmpReg2, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE); - emit->emitIns_R_R_I(INS_addi_d, attrDstAddr, REG_WRITE_BARRIER_DST_BYREF, REG_WRITE_BARRIER_DST_BYREF, 2 * TARGET_POINTER_SIZE); + emit->emitIns_R_R_I(INS_addi_d, attrDstAddr, REG_WRITE_BARRIER_DST_BYREF, REG_WRITE_BARRIER_DST_BYREF, + 2 * TARGET_POINTER_SIZE); i += 2; } @@ -2859,9 +2800,11 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode) emitAttr attr0 = emitTypeSize(layout->GetGCPtrType(i + 0)); emit->emitIns_R_R_I(INS_ld_d, attr0, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, 0); - emit->emitIns_R_R_I(INS_addi_d, attrSrcAddr, REG_WRITE_BARRIER_SRC_BYREF, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE); + emit->emitIns_R_R_I(INS_addi_d, attrSrcAddr, REG_WRITE_BARRIER_SRC_BYREF, REG_WRITE_BARRIER_SRC_BYREF, + TARGET_POINTER_SIZE); emit->emitIns_R_R_I(INS_st_d, attr0, tmpReg, REG_WRITE_BARRIER_DST_BYREF, 0); - emit->emitIns_R_R_I(INS_addi_d, attrDstAddr, REG_WRITE_BARRIER_DST_BYREF, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE); + emit->emitIns_R_R_I(INS_addi_d, attrDstAddr, REG_WRITE_BARRIER_DST_BYREF, REG_WRITE_BARRIER_DST_BYREF, + TARGET_POINTER_SIZE); } } else @@ -2878,18 +2821,22 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode) { emit->emitIns_R_R_I(INS_ld_d, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, 0); emit->emitIns_R_R_I(INS_ld_d, EA_8BYTE, tmpReg2, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE); - emit->emitIns_R_R_I(INS_addi_d, attrSrcAddr, REG_WRITE_BARRIER_SRC_BYREF, REG_WRITE_BARRIER_SRC_BYREF, 2 * TARGET_POINTER_SIZE); + emit->emitIns_R_R_I(INS_addi_d, attrSrcAddr, REG_WRITE_BARRIER_SRC_BYREF, + REG_WRITE_BARRIER_SRC_BYREF, 2 * TARGET_POINTER_SIZE); emit->emitIns_R_R_I(INS_st_d, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_DST_BYREF, 0); emit->emitIns_R_R_I(INS_st_d, EA_8BYTE, tmpReg2, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE); - emit->emitIns_R_R_I(INS_addi_d, attrDstAddr, REG_WRITE_BARRIER_DST_BYREF, REG_WRITE_BARRIER_DST_BYREF, 2 * TARGET_POINTER_SIZE); + emit->emitIns_R_R_I(INS_addi_d, attrDstAddr, REG_WRITE_BARRIER_DST_BYREF, + REG_WRITE_BARRIER_DST_BYREF, 2 * TARGET_POINTER_SIZE); ++i; // extra increment of i, since we are copying two items } else { emit->emitIns_R_R_I(INS_ld_d, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, 0); - emit->emitIns_R_R_I(INS_addi_d, attrSrcAddr, REG_WRITE_BARRIER_SRC_BYREF, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE); + emit->emitIns_R_R_I(INS_addi_d, attrSrcAddr, REG_WRITE_BARRIER_SRC_BYREF, + REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE); emit->emitIns_R_R_I(INS_st_d, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_DST_BYREF, 0); - emit->emitIns_R_R_I(INS_addi_d, attrDstAddr, REG_WRITE_BARRIER_DST_BYREF, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE); + emit->emitIns_R_R_I(INS_addi_d, attrDstAddr, REG_WRITE_BARRIER_DST_BYREF, + REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE); } } else @@ -2931,7 +2878,7 @@ void CodeGen::genTableBasedSwitch(GenTree* treeNode) GetEmitter()->emitIns_R_R_I(INS_ld_w, EA_4BYTE, baseReg, baseReg, 0); // add it to the absolute address of fgFirstBB - //compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET;//TODO for LOONGARCH64. + // compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET;//TODO for LOONGARCH64. GetEmitter()->emitIns_R_L(INS_lea, EA_PTRSIZE, compiler->fgFirstBB, tmpReg); GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, baseReg, baseReg, tmpReg); @@ -3000,17 +2947,17 @@ void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode) static inline bool isImmed(GenTree* treeNode) { - if (treeNode->gtGetOp1()->isContainedIntOrIImmed()) - { + if (treeNode->gtGetOp1()->isContainedIntOrIImmed()) + { + return true; + } + else if (treeNode->OperIsBinary()) + { + if (treeNode->gtGetOp2()->isContainedIntOrIImmed()) return true; - } - else if (treeNode->OperIsBinary()) - { - if (treeNode->gtGetOp2()->isContainedIntOrIImmed()) - return true; - } + } - return false; + return false; } instruction CodeGen::genGetInsForOper(GenTree* treeNode) @@ -3019,8 +2966,8 @@ instruction CodeGen::genGetInsForOper(GenTree* treeNode) genTreeOps oper = treeNode->OperGet(); GenTree* op1 = treeNode->gtGetOp1(); GenTree* op2; - emitAttr attr = emitActualTypeSize(treeNode); - bool isImm = false; + emitAttr attr = emitActualTypeSize(treeNode); + bool isImm = false; instruction ins = INS_break; @@ -3161,7 +3108,7 @@ instruction CodeGen::genGetInsForOper(GenTree* treeNode) case GT_MUL: if ((attr == EA_8BYTE) || (attr == EA_BYREF)) { - op2 = treeNode->gtGetOp2(); + op2 = treeNode->gtGetOp2(); if (genActualTypeIsInt(op1) && genActualTypeIsInt(op2)) ins = treeNode->IsUnsigned() ? INS_mulw_d_wu : INS_mulw_d_w; else @@ -3220,7 +3167,7 @@ instruction CodeGen::genGetInsForOper(GenTree* treeNode) isImm = isImmed(treeNode); if (isImm) { - //it's better to check sa. + // it's better to check sa. if (attr == EA_4BYTE) ins = INS_slli_w; else @@ -3239,7 +3186,7 @@ instruction CodeGen::genGetInsForOper(GenTree* treeNode) isImm = isImmed(treeNode); if (isImm) { - //it's better to check sa. + // it's better to check sa. if (attr == EA_4BYTE) ins = INS_srli_w; else @@ -3258,7 +3205,7 @@ instruction CodeGen::genGetInsForOper(GenTree* treeNode) isImm = isImmed(treeNode); if (isImm) { - //it's better to check sa. + // it's better to check sa. if (attr == EA_4BYTE) ins = INS_srai_w; else @@ -3277,7 +3224,7 @@ instruction CodeGen::genGetInsForOper(GenTree* treeNode) isImm = isImmed(treeNode); if (isImm) { - //it's better to check sa. + // it's better to check sa. if (attr == EA_4BYTE) ins = INS_rotri_w; else @@ -3332,45 +3279,45 @@ void CodeGen::genCodeForReturnTrap(GenTreeOp* tree) BasicBlock* skipLabel = genCreateTempLabel(); GetEmitter()->emitIns_J_cond_la(INS_beq, skipLabel, data->GetRegNum(), REG_R0); - void* pAddr = nullptr; - void* addr = compiler->compGetHelperFtn(CORINFO_HELP_STOP_FOR_GC, &pAddr); + void* pAddr = nullptr; + void* addr = compiler->compGetHelperFtn(CORINFO_HELP_STOP_FOR_GC, &pAddr); emitter::EmitCallType callType; - regNumber callTarget; + regNumber callTarget; if (addr == nullptr) { - callType = emitter::EC_INDIR_R; + callType = emitter::EC_INDIR_R; callTarget = REG_DEFAULT_HELPER_CALL_TARGET; - //instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr); + // instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr); if (compiler->opts.compReloc) { GetEmitter()->emitIns_R_AI(INS_bl, EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr); } else { - //GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, callTarget, (ssize_t)pAddr); - //GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, ); - GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, callTarget, ((ssize_t)pAddr & 0xfffff000)>>12); + // GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, callTarget, (ssize_t)pAddr); + // GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, ); + GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, callTarget, ((ssize_t)pAddr & 0xfffff000) >> 12); GetEmitter()->emitIns_R_I(INS_lu32i_d, EA_PTRSIZE, callTarget, (ssize_t)pAddr >> 32); - GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, ((ssize_t)pAddr & 0xfff)>>2); + GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, ((ssize_t)pAddr & 0xfff) >> 2); } regSet.verifyRegUsed(callTarget); - //GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, callTarget, callTarget, 0); + // GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, callTarget, callTarget, 0); } else { - callType = emitter::EC_FUNC_TOKEN; + callType = emitter::EC_FUNC_TOKEN; callTarget = REG_NA; } ////TODO: can optimize further !!! - GetEmitter()->emitIns_Call(callType, compiler->eeFindHelper(CORINFO_HELP_STOP_FOR_GC), INDEBUG_LDISASM_COMMA(nullptr) addr, 0, - EA_UNKNOWN, EA_UNKNOWN, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, - gcInfo.gcRegByrefSetCur, DebugInfo(), /* IL offset */ - callTarget, /* ireg */ - REG_NA, 0, 0, /* xreg, xmul, disp */ - false /* isJump */ + GetEmitter()->emitIns_Call(callType, compiler->eeFindHelper(CORINFO_HELP_STOP_FOR_GC), + INDEBUG_LDISASM_COMMA(nullptr) addr, 0, EA_UNKNOWN, EA_UNKNOWN, gcInfo.gcVarPtrSetCur, + gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, DebugInfo(), /* IL offset */ + callTarget, /* ireg */ + REG_NA, 0, 0, /* xreg, xmul, disp */ + false /* isJump */ ); genDefineTempLabel(skipLabel); @@ -3443,8 +3390,8 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree) dataReg = data->GetRegNum(); } - var_types type = tree->TypeGet(); - instruction ins = ins_Store(type); + var_types type = tree->TypeGet(); + instruction ins = ins_Store(type); if ((tree->gtFlags & GTF_IND_VOLATILE) != 0) { @@ -3491,27 +3438,27 @@ void CodeGen::genIntToFloatCast(GenTree* treeNode) assert(genIsValidFloatReg(targetReg)); GenTree* op1 = treeNode->AsOp()->gtOp1; - assert(!op1->isContained()); // Cannot be contained + assert(!op1->isContained()); // Cannot be contained assert(genIsValidIntReg(op1->GetRegNum())); // Must be a valid int reg. var_types dstType = treeNode->CastToType(); var_types srcType = genActualType(op1->TypeGet()); assert(!varTypeIsFloating(srcType) && varTypeIsFloating(dstType)); - emitter *emit = GetEmitter(); + emitter* emit = GetEmitter(); emitAttr attr = emitActualTypeSize(dstType); // We should never see a srcType whose size is neither EA_4BYTE or EA_8BYTE emitAttr srcSize = EA_ATTR(genTypeSize(srcType)); noway_assert((srcSize == EA_4BYTE) || (srcSize == EA_8BYTE)); - bool IsUnsigned = treeNode->gtFlags & GTF_UNSIGNED; - instruction ins = INS_invalid; + bool IsUnsigned = treeNode->gtFlags & GTF_UNSIGNED; + instruction ins = INS_invalid; genConsumeOperands(treeNode->AsOp()); if (IsUnsigned) - {//should amend. + { // should amend. emit->emitIns_R_R(INS_movgr2fr_d, EA_8BYTE, REG_SCRATCH_FLT, op1->GetRegNum()); // save op1 if (srcSize == EA_8BYTE) @@ -3571,8 +3518,10 @@ void CodeGen::genIntToFloatCast(GenTree* treeNode) ssize_t imm = 3 << 2; emit->emitIns_R_R_I(INS_bge, EA_8BYTE, op1->GetRegNum(), REG_R0, imm); - emit->emitIns_R_R(dstType == TYP_DOUBLE ? INS_fmov_d : INS_fmov_s, attr, REG_SCRATCH_FLT, treeNode->GetRegNum()); - emit->emitIns_R_R_R(dstType == TYP_DOUBLE ? INS_fadd_d : INS_fadd_s, attr, treeNode->GetRegNum(), REG_SCRATCH_FLT, treeNode->GetRegNum()); + emit->emitIns_R_R(dstType == TYP_DOUBLE ? INS_fmov_d : INS_fmov_s, attr, REG_SCRATCH_FLT, + treeNode->GetRegNum()); + emit->emitIns_R_R_R(dstType == TYP_DOUBLE ? INS_fadd_d : INS_fadd_s, attr, treeNode->GetRegNum(), + REG_SCRATCH_FLT, treeNode->GetRegNum()); } } @@ -3604,7 +3553,7 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode) assert(genIsValidIntReg(targetReg)); // Must be a valid int reg. GenTree* op1 = treeNode->AsOp()->gtOp1; - assert(!op1->isContained()); // Cannot be contained + assert(!op1->isContained()); // Cannot be contained assert(genIsValidFloatReg(op1->GetRegNum())); // Must be a valid float reg. var_types dstType = treeNode->CastToType(); @@ -3618,9 +3567,9 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode) emitAttr dstSize = EA_ATTR(genTypeSize(dstType)); noway_assert((dstSize == EA_4BYTE) || (dstSize == EA_8BYTE)); - instruction ins1 = INS_invalid; - instruction ins2 = INS_invalid; - bool IsUnsigned = varTypeIsUnsigned(dstType); + instruction ins1 = INS_invalid; + instruction ins2 = INS_invalid; + bool IsUnsigned = varTypeIsUnsigned(dstType); regNumber tmpReg = REG_SCRATCH_FLT; assert(tmpReg != op1->GetRegNum()); @@ -3688,7 +3637,8 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode) //{ // GetEmitter()->emitIns_R_R(INS_movgr2fr_d, EA_8BYTE, tmpReg, REG_R0); - // GetEmitter()->emitIns_R_R_I(srcType == TYP_DOUBLE ? INS_c_olt_d : INS_c_olt_s, EA_8BYTE, op1->GetRegNum(), tmpReg, 2); + // GetEmitter()->emitIns_R_R_I(srcType == TYP_DOUBLE ? INS_c_olt_d : INS_c_olt_s, EA_8BYTE, op1->GetRegNum(), + // tmpReg, 2); // GetEmitter()->emitIns_I_I(INS_bc1f, EA_PTRSIZE, 2, 4 << 2); // GetEmitter()->emitIns_R_R_I(INS_ori*/, EA_PTRSIZE, treeNode->GetRegNum(), REG_R0, 0); @@ -3702,15 +3652,18 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode) GetEmitter()->emitIns_R_R(srcType == TYP_DOUBLE ? INS_movgr2fr_d : INS_movgr2fr_w, EA_8BYTE, tmpReg, REG_R21); - GetEmitter()->emitIns_R_R_I(srcType == TYP_DOUBLE ? INS_fcmp_clt_d : INS_fcmp_clt_s, EA_8BYTE, op1->GetRegNum(), tmpReg, 2); + GetEmitter()->emitIns_R_R_I(srcType == TYP_DOUBLE ? INS_fcmp_clt_d : INS_fcmp_clt_s, EA_8BYTE, op1->GetRegNum(), + tmpReg, 2); GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R0, 0); GetEmitter()->emitIns_I_I(INS_bcnez, EA_PTRSIZE, 2, 4 << 2); - GetEmitter()->emitIns_R_R_R(srcType == TYP_DOUBLE ? INS_fsub_d : INS_fsub_s, EA_8BYTE, tmpReg, op1->GetRegNum(), tmpReg); + GetEmitter()->emitIns_R_R_R(srcType == TYP_DOUBLE ? INS_fsub_d : INS_fsub_s, EA_8BYTE, tmpReg, op1->GetRegNum(), + tmpReg); GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R0, 1); - GetEmitter()->emitIns_R_R_I(dstSize == EA_8BYTE ? INS_slli_d : INS_slli_w, EA_PTRSIZE, REG_R21, REG_R21, dstSize == EA_8BYTE ? 63 : 31); + GetEmitter()->emitIns_R_R_I(dstSize == EA_8BYTE ? INS_slli_d : INS_slli_w, EA_PTRSIZE, REG_R21, REG_R21, + dstSize == EA_8BYTE ? 63 : 31); GetEmitter()->emitIns_R_R_R_I(INS_fsel, EA_PTRSIZE, tmpReg, tmpReg, op1->GetRegNum(), 2); @@ -3744,11 +3697,11 @@ void CodeGen::genCkfinite(GenTree* treeNode) { assert(treeNode->OperGet() == GT_CKFINITE); - GenTree* op1 = treeNode->AsOp()->gtOp1; - var_types targetType = treeNode->TypeGet(); - ssize_t expMask = (targetType == TYP_FLOAT) ? 0xFF : 0x7FF; // Bit mask to extract exponent. - ssize_t size = (targetType == TYP_FLOAT) ? 8 : 11; // Bit size to extract exponent. - ssize_t pos = (targetType == TYP_FLOAT) ? 23 : 52; // Bit pos of exponent. + GenTree* op1 = treeNode->AsOp()->gtOp1; + var_types targetType = treeNode->TypeGet(); + ssize_t expMask = (targetType == TYP_FLOAT) ? 0xFF : 0x7FF; // Bit mask to extract exponent. + ssize_t size = (targetType == TYP_FLOAT) ? 8 : 11; // Bit size to extract exponent. + ssize_t pos = (targetType == TYP_FLOAT) ? 23 : 52; // Bit pos of exponent. emitter* emit = GetEmitter(); emitAttr attr = emitActualTypeSize(treeNode); @@ -3761,7 +3714,7 @@ void CodeGen::genCkfinite(GenTree* treeNode) // Mask of exponent with all 1's and check if the exponent is all 1's instruction ins = (targetType == TYP_FLOAT) ? INS_bstrpick_w : INS_bstrpick_d; - emit->emitIns_R_R_I_I(ins, EA_PTRSIZE, intReg, intReg, pos+size-1, pos); + emit->emitIns_R_R_I_I(ins, EA_PTRSIZE, intReg, intReg, pos + size - 1, pos); emit->emitIns_R_R_I(INS_xori, attr, intReg, intReg, expMask); genJumpToThrowHlpBlk_la(SCK_ARITH_EXCPN, INS_beq, intReg); @@ -3785,19 +3738,19 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree) emitter* emit = GetEmitter(); GenTreeOp* tree = nullptr; - regNumber targetReg; + regNumber targetReg; if (jtree->OperIs(GT_JTRUE)) { - tree = jtree->gtGetOp1()->AsOp(); + tree = jtree->gtGetOp1()->AsOp(); targetReg = REG_RA; assert(tree->GetRegNum() == REG_NA); - jtree->gtOp2 = (GenTree*)REG_RA;//targetReg + jtree->gtOp2 = (GenTree*)REG_RA; // targetReg jtree->SetRegNum((regNumber)INS_bnez); } else { - tree = jtree; + tree = jtree; targetReg = tree->GetRegNum(); } assert(targetReg != REG_NA); @@ -3821,39 +3774,51 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree) assert(tree->OperIs(GT_LT, GT_LE, GT_EQ, GT_NE, GT_GT, GT_GE)); bool IsUnordered = (tree->gtFlags & GTF_RELOP_NAN_UN) != 0; - if(IsUnordered) - { - if(tree->OperIs(GT_LT)) - emit->emitIns_R_R_I(cmpSize==EA_4BYTE ? INS_fcmp_cult_s : INS_fcmp_cult_d, cmpSize, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/); - else if(tree->OperIs(GT_LE)) - emit->emitIns_R_R_I(cmpSize==EA_4BYTE ? INS_fcmp_cule_s : INS_fcmp_cule_d, cmpSize, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/); - else if(tree->OperIs(GT_EQ)) - emit->emitIns_R_R_I(cmpSize==EA_4BYTE ? INS_fcmp_cueq_s : INS_fcmp_cueq_d, cmpSize, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/); - else if(tree->OperIs(GT_NE)) - emit->emitIns_R_R_I(cmpSize==EA_4BYTE ? INS_fcmp_cune_s : INS_fcmp_cune_d, cmpSize, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/); - else if(tree->OperIs(GT_GT)) - emit->emitIns_R_R_I(cmpSize==EA_4BYTE ? INS_fcmp_cult_s : INS_fcmp_cult_d, cmpSize, op2->GetRegNum(), op1->GetRegNum(), 1/*cc*/); - else if(tree->OperIs(GT_GE)) - emit->emitIns_R_R_I(cmpSize==EA_4BYTE ? INS_fcmp_cule_s : INS_fcmp_cule_d, cmpSize, op2->GetRegNum(), op1->GetRegNum(), 1/*cc*/); + if (IsUnordered) + { + if (tree->OperIs(GT_LT)) + emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cult_s : INS_fcmp_cult_d, cmpSize, op1->GetRegNum(), + op2->GetRegNum(), 1 /*cc*/); + else if (tree->OperIs(GT_LE)) + emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cule_s : INS_fcmp_cule_d, cmpSize, op1->GetRegNum(), + op2->GetRegNum(), 1 /*cc*/); + else if (tree->OperIs(GT_EQ)) + emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cueq_s : INS_fcmp_cueq_d, cmpSize, op1->GetRegNum(), + op2->GetRegNum(), 1 /*cc*/); + else if (tree->OperIs(GT_NE)) + emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cune_s : INS_fcmp_cune_d, cmpSize, op1->GetRegNum(), + op2->GetRegNum(), 1 /*cc*/); + else if (tree->OperIs(GT_GT)) + emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cult_s : INS_fcmp_cult_d, cmpSize, op2->GetRegNum(), + op1->GetRegNum(), 1 /*cc*/); + else if (tree->OperIs(GT_GE)) + emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cule_s : INS_fcmp_cule_d, cmpSize, op2->GetRegNum(), + op1->GetRegNum(), 1 /*cc*/); } else { - if(tree->OperIs(GT_LT)) - emit->emitIns_R_R_I(cmpSize==EA_4BYTE ? INS_fcmp_clt_s : INS_fcmp_clt_d, cmpSize, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/); - else if(tree->OperIs(GT_LE)) - emit->emitIns_R_R_I(cmpSize==EA_4BYTE ? INS_fcmp_cle_s : INS_fcmp_cle_d, cmpSize, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/); - else if(tree->OperIs(GT_EQ)) - emit->emitIns_R_R_I(cmpSize==EA_4BYTE ? INS_fcmp_ceq_s : INS_fcmp_ceq_d, cmpSize, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/); - else if(tree->OperIs(GT_NE)) - emit->emitIns_R_R_I(cmpSize==EA_4BYTE ? INS_fcmp_cne_s : INS_fcmp_cne_d, cmpSize, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/); - else if(tree->OperIs(GT_GT)) - emit->emitIns_R_R_I(cmpSize==EA_4BYTE ? INS_fcmp_clt_s : INS_fcmp_clt_d, cmpSize, op2->GetRegNum(), op1->GetRegNum(), 1/*cc*/); - else if(tree->OperIs(GT_GE)) - emit->emitIns_R_R_I(cmpSize==EA_4BYTE ? INS_fcmp_cle_s : INS_fcmp_cle_d, cmpSize, op2->GetRegNum(), op1->GetRegNum(), 1/*cc*/); + if (tree->OperIs(GT_LT)) + emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_clt_s : INS_fcmp_clt_d, cmpSize, op1->GetRegNum(), + op2->GetRegNum(), 1 /*cc*/); + else if (tree->OperIs(GT_LE)) + emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cle_s : INS_fcmp_cle_d, cmpSize, op1->GetRegNum(), + op2->GetRegNum(), 1 /*cc*/); + else if (tree->OperIs(GT_EQ)) + emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_ceq_s : INS_fcmp_ceq_d, cmpSize, op1->GetRegNum(), + op2->GetRegNum(), 1 /*cc*/); + else if (tree->OperIs(GT_NE)) + emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cne_s : INS_fcmp_cne_d, cmpSize, op1->GetRegNum(), + op2->GetRegNum(), 1 /*cc*/); + else if (tree->OperIs(GT_GT)) + emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_clt_s : INS_fcmp_clt_d, cmpSize, op2->GetRegNum(), + op1->GetRegNum(), 1 /*cc*/); + else if (tree->OperIs(GT_GE)) + emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cle_s : INS_fcmp_cle_d, cmpSize, op2->GetRegNum(), + op1->GetRegNum(), 1 /*cc*/); } emit->emitIns_R_R(INS_mov, EA_PTRSIZE, targetReg, REG_R0); - emit->emitIns_R_I(INS_movcf2gr, EA_PTRSIZE, targetReg, 1/*cc*/); + emit->emitIns_R_I(INS_movcf2gr, EA_PTRSIZE, targetReg, 1 /*cc*/); } else if (op1->isContainedIntOrIImmed() && op2->isContainedIntOrIImmed()) { @@ -3862,72 +3827,72 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree) assert(tree->OperIs(GT_LT, GT_LE, GT_EQ, GT_NE, GT_GT, GT_GE)); - bool IsUnsigned = (tree->gtFlags & GTF_UNSIGNED) != 0; - instruction ins = INS_beqz; + bool IsUnsigned = (tree->gtFlags & GTF_UNSIGNED) != 0; + instruction ins = INS_beqz; switch (cmpSize) { - case EA_4BYTE: + case EA_4BYTE: { imm1 = static_cast(imm1); imm2 = static_cast(imm2); } break; - case EA_8BYTE: - break; - case EA_1BYTE: + case EA_8BYTE: + break; + case EA_1BYTE: { imm1 = static_cast(imm1); imm2 = static_cast(imm2); } break; - //case EA_2BYTE: - // imm = static_cast(imm); - // break; - default: - assert(!"Unexpected type in jumpCompare."); + // case EA_2BYTE: + // imm = static_cast(imm); + // break; + default: + assert(!"Unexpected type in jumpCompare."); } switch (tree->OperGet()) { - case GT_LT: - if (((!IsUnsigned) && (imm1 < imm2)) || ((IsUnsigned) && ((unsigned)imm1 < (unsigned)imm2))) - { - ins = INS_b; - } - break; - case GT_LE: - if (((!IsUnsigned) && (imm1 <= imm2)) || ((IsUnsigned) && ((unsigned)imm1 <= (unsigned)imm2))) - { - ins = INS_b; - } - break; - case GT_EQ: - if (imm1 == imm2) - { - ins = INS_b; - } - break; - case GT_NE: - if (imm1 != imm2) - { - ins = INS_b; - } - break; - case GT_GT: - if (((!IsUnsigned) && (imm1 > imm2)) || ((IsUnsigned) && ((unsigned)imm1 > (unsigned)imm2))) - { - ins = INS_b; - } - break; - case GT_GE: - if (((!IsUnsigned) && (imm1 >= imm2)) || ((IsUnsigned) && ((unsigned)imm1 >= (unsigned)imm2))) - { - ins = INS_b; - } - break; - default: - break; + case GT_LT: + if (((!IsUnsigned) && (imm1 < imm2)) || ((IsUnsigned) && ((unsigned)imm1 < (unsigned)imm2))) + { + ins = INS_b; + } + break; + case GT_LE: + if (((!IsUnsigned) && (imm1 <= imm2)) || ((IsUnsigned) && ((unsigned)imm1 <= (unsigned)imm2))) + { + ins = INS_b; + } + break; + case GT_EQ: + if (imm1 == imm2) + { + ins = INS_b; + } + break; + case GT_NE: + if (imm1 != imm2) + { + ins = INS_b; + } + break; + case GT_GT: + if (((!IsUnsigned) && (imm1 > imm2)) || ((IsUnsigned) && ((unsigned)imm1 > (unsigned)imm2))) + { + ins = INS_b; + } + break; + case GT_GE: + if (((!IsUnsigned) && (imm1 >= imm2)) || ((IsUnsigned) && ((unsigned)imm1 >= (unsigned)imm2))) + { + ins = INS_b; + } + break; + default: + break; } assert(ins != INS_invalid); @@ -3936,34 +3901,34 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree) } else { - //TODO:can optimize further. + // TODO:can optimize further. if (op1->isContainedIntOrIImmed()) { op1 = tree->gtOp2; op2 = tree->gtOp1; switch (tree->OperGet()) { - case GT_LT: - tree->SetOper(GT_GT); - break; - case GT_LE: - tree->SetOper(GT_GE); - break; - case GT_GT: - tree->SetOper(GT_LT); - break; - case GT_GE: - tree->SetOper(GT_LE); - break; - default: - break; + case GT_LT: + tree->SetOper(GT_GT); + break; + case GT_LE: + tree->SetOper(GT_GE); + break; + case GT_GT: + tree->SetOper(GT_LT); + break; + case GT_GE: + tree->SetOper(GT_LE); + break; + default: + break; } } assert(!op1->isContainedIntOrIImmed()); assert(tree->OperIs(GT_LT, GT_LE, GT_EQ, GT_NE, GT_GT, GT_GE)); - bool IsUnsigned = (tree->gtFlags & GTF_UNSIGNED) != 0; - regNumber regOp1 = op1->GetRegNum(); + bool IsUnsigned = (tree->gtFlags & GTF_UNSIGNED) != 0; + regNumber regOp1 = op1->GetRegNum(); if (op2->isContainedIntOrIImmed()) { @@ -3972,71 +3937,84 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree) { switch (cmpSize) { - case EA_4BYTE: - imm = static_cast(imm); - break; - case EA_8BYTE: - break; - case EA_1BYTE: - imm = static_cast(imm); - break; - //case EA_2BYTE: - // imm = static_cast(imm); - // break; - default: - assert(!"Unexpected type in jumpTrue(imm)."); + case EA_4BYTE: + imm = static_cast(imm); + break; + case EA_8BYTE: + break; + case EA_1BYTE: + imm = static_cast(imm); + break; + // case EA_2BYTE: + // imm = static_cast(imm); + // break; + default: + assert(!"Unexpected type in jumpTrue(imm)."); } } if (tree->OperIs(GT_LT)) { - if (!IsUnsigned && isValidSimm12(imm)) { + if (!IsUnsigned && isValidSimm12(imm)) + { emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, targetReg, regOp1, imm); } - else if (IsUnsigned && isValidUimm11(imm)) { + else if (IsUnsigned && isValidUimm11(imm)) + { emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, regOp1, imm); } - else { + else + { emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm); emit->emitIns_R_R_R(IsUnsigned ? INS_sltu : INS_slt, EA_PTRSIZE, targetReg, regOp1, REG_RA); } } else if (tree->OperIs(GT_LE)) { - if (!IsUnsigned && isValidSimm12(imm + 1)) { + if (!IsUnsigned && isValidSimm12(imm + 1)) + { emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, targetReg, regOp1, imm + 1); } - else if (IsUnsigned && isValidUimm11(imm + 1)) { + else if (IsUnsigned && isValidUimm11(imm + 1)) + { emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, regOp1, imm + 1); } - else { + else + { emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm + 1); emit->emitIns_R_R_R(IsUnsigned ? INS_sltu : INS_slt, EA_PTRSIZE, targetReg, regOp1, REG_RA); } } else if (tree->OperIs(GT_GT)) { - if (!IsUnsigned && isValidSimm12(imm + 1)) { + if (!IsUnsigned && isValidSimm12(imm + 1)) + { emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, REG_RA, regOp1, imm + 1); emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, REG_RA, 1); } - else if (IsUnsigned && isValidUimm11(imm + 1)) { + else if (IsUnsigned && isValidUimm11(imm + 1)) + { emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, REG_RA, regOp1, imm + 1); emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, REG_RA, 1); } - else { + else + { emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm); emit->emitIns_R_R_R(IsUnsigned ? INS_sltu : INS_slt, EA_PTRSIZE, targetReg, REG_RA, regOp1); } } else if (tree->OperIs(GT_GE)) - { if (!IsUnsigned && isValidSimm12(imm)) { + { + if (!IsUnsigned && isValidSimm12(imm)) + { emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, targetReg, regOp1, imm); } - else if (IsUnsigned && isValidUimm11(imm)) { + else if (IsUnsigned && isValidUimm11(imm)) + { emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, regOp1, imm); } - else { + else + { emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm); emit->emitIns_R_R_R(IsUnsigned ? INS_sltu : INS_slt, EA_PTRSIZE, targetReg, regOp1, REG_RA); } @@ -4044,14 +4022,17 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree) } else if (tree->OperIs(GT_NE)) { - if (!imm) { + if (!imm) + { emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, targetReg, REG_R0, regOp1); } - else if (isValidUimm12(imm)) { + else if (isValidUimm12(imm)) + { emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, regOp1, imm); emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, targetReg, REG_R0, targetReg); } - else { + else + { emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm); emit->emitIns_R_R_R(INS_xor, EA_PTRSIZE, targetReg, regOp1, REG_RA); emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, targetReg, REG_R0, targetReg); @@ -4059,14 +4040,17 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree) } else if (tree->OperIs(GT_EQ)) { - if (!imm) { + if (!imm) + { emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, regOp1, 1); } - else if (isValidUimm12(imm)) { + else if (isValidUimm12(imm)) + { emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, regOp1, imm); emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, targetReg, 1); } - else { + else + { emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm); emit->emitIns_R_R_R(INS_xor, EA_PTRSIZE, targetReg, regOp1, REG_RA); emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, targetReg, 1); @@ -4079,7 +4063,7 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree) { regNumber tmpRegOp1 = tree->ExtractTempReg(); regNumber tmpRegOp2 = tree->ExtractTempReg(); - regNumber regOp2 = op2->GetRegNum(); + regNumber regOp2 = op2->GetRegNum(); if (cmpSize == EA_4BYTE) { regOp1 = tmpRegOp1; @@ -4138,26 +4122,27 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree) // void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue) { - //assert(compiler->compCurBB->bbJumpKind == BBJ_COND);//should confirm. + // assert(compiler->compCurBB->bbJumpKind == BBJ_COND);//should confirm. ////assert(jtrue->OperIs(GT_JTRUE)); emitter* emit = GetEmitter(); - GenTreeOp* tree = jtrue->OperIs(GT_JTRUE) ? jtrue->gtGetOp1()->AsOp() : jtrue; - regNumber targetReg = tree->GetRegNum(); - instruction ins = INS_invalid; + GenTreeOp* tree = jtrue->OperIs(GT_JTRUE) ? jtrue->gtGetOp1()->AsOp() : jtrue; + regNumber targetReg = tree->GetRegNum(); + instruction ins = INS_invalid; if (jtrue->OperIs(GT_JTRUE) && jtrue->gtOp2) { - emit->emitIns_J((instruction)jtrue->GetRegNum(), compiler->compCurBB->bbJumpDest, (int)(int64_t)jtrue->gtOp2);//5-bits; + emit->emitIns_J((instruction)jtrue->GetRegNum(), compiler->compCurBB->bbJumpDest, + (int)(int64_t)jtrue->gtOp2); // 5-bits; jtrue->SetRegNum(REG_NA); jtrue->gtOp2 = nullptr; return; } else { - GenTree* op1 = tree->gtOp1; - GenTree* op2 = tree->gtOp2; + GenTree* op1 = tree->gtOp1; + GenTree* op2 = tree->gtOp2; var_types op1Type = genActualType(op1->TypeGet()); var_types op2Type = genActualType(op2->TypeGet()); @@ -4177,7 +4162,7 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue) if (varTypeIsFloating(op1Type)) { assert(genTypeSize(op1Type) == genTypeSize(op2Type)); - //int cc = 1; + // int cc = 1; assert(tree->OperIs(GT_LT, GT_LE, GT_EQ, GT_NE, GT_GT, GT_GE)); bool IsUnordered = (tree->gtFlags & GTF_RELOP_NAN_UN) != 0; @@ -4186,57 +4171,69 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue) { ins = INS_bcnez; if (cmpSize == EA_4BYTE) - emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cueq_s : INS_fcmp_ceq_s, EA_4BYTE, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/); + emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cueq_s : INS_fcmp_ceq_s, EA_4BYTE, op1->GetRegNum(), + op2->GetRegNum(), 1 /*cc*/); else - emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cueq_d : INS_fcmp_ceq_d, EA_8BYTE, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/); + emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cueq_d : INS_fcmp_ceq_d, EA_8BYTE, op1->GetRegNum(), + op2->GetRegNum(), 1 /*cc*/); } else if (tree->OperIs(GT_NE)) { ins = INS_bceqz; if (cmpSize == EA_4BYTE) - emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_ceq_s : INS_fcmp_cueq_s, EA_4BYTE, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/); + emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_ceq_s : INS_fcmp_cueq_s, EA_4BYTE, op1->GetRegNum(), + op2->GetRegNum(), 1 /*cc*/); else - emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_ceq_d : INS_fcmp_cueq_d, EA_8BYTE, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/); + emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_ceq_d : INS_fcmp_cueq_d, EA_8BYTE, op1->GetRegNum(), + op2->GetRegNum(), 1 /*cc*/); } else if (tree->OperIs(GT_LT)) { ins = INS_bcnez; if (cmpSize == EA_4BYTE) - emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cult_s : INS_fcmp_clt_s, EA_4BYTE, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/); + emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cult_s : INS_fcmp_clt_s, EA_4BYTE, op1->GetRegNum(), + op2->GetRegNum(), 1 /*cc*/); else - emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cult_d : INS_fcmp_clt_d, EA_8BYTE, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/); + emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cult_d : INS_fcmp_clt_d, EA_8BYTE, op1->GetRegNum(), + op2->GetRegNum(), 1 /*cc*/); } else if (tree->OperIs(GT_LE)) { ins = INS_bcnez; if (cmpSize == EA_4BYTE) - emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cule_s : INS_fcmp_cle_s, EA_4BYTE, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/); + emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cule_s : INS_fcmp_cle_s, EA_4BYTE, op1->GetRegNum(), + op2->GetRegNum(), 1 /*cc*/); else - emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cule_d : INS_fcmp_cle_d, EA_8BYTE, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/); + emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cule_d : INS_fcmp_cle_d, EA_8BYTE, op1->GetRegNum(), + op2->GetRegNum(), 1 /*cc*/); } else if (tree->OperIs(GT_GE)) { ins = INS_bceqz; if (cmpSize == EA_4BYTE) - emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_clt_s : INS_fcmp_cult_s, EA_4BYTE, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/); + emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_clt_s : INS_fcmp_cult_s, EA_4BYTE, op1->GetRegNum(), + op2->GetRegNum(), 1 /*cc*/); else - emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_clt_d : INS_fcmp_cult_d, EA_8BYTE, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/); + emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_clt_d : INS_fcmp_cult_d, EA_8BYTE, op1->GetRegNum(), + op2->GetRegNum(), 1 /*cc*/); } else if (tree->OperIs(GT_GT)) { ins = INS_bceqz; if (cmpSize == EA_4BYTE) - emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cle_s : INS_fcmp_cule_s, EA_4BYTE, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/); + emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cle_s : INS_fcmp_cule_s, EA_4BYTE, op1->GetRegNum(), + op2->GetRegNum(), 1 /*cc*/); else - emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cle_d : INS_fcmp_cule_d, EA_8BYTE, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/); + emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cle_d : INS_fcmp_cule_d, EA_8BYTE, op1->GetRegNum(), + op2->GetRegNum(), 1 /*cc*/); } - //assert(0 <= cc && cc < 8); + // assert(0 <= cc && cc < 8); if (IsEq) - emit->emitIns_J(ins, compiler->compCurBB->bbJumpDest, (int)1/*cc*/);//5-bits; + emit->emitIns_J(ins, compiler->compCurBB->bbJumpDest, (int)1 /*cc*/); // 5-bits; else { - jtrue->gtOp2 = (GenTree*)(1/*cc*/); + jtrue->gtOp2 = (GenTree*)(1 /*cc*/); jtrue->SetRegNum((regNumber)ins); } } @@ -4251,70 +4248,70 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue) switch (cmpSize) { - case EA_4BYTE: + case EA_4BYTE: { imm1 = static_cast(imm1); imm2 = static_cast(imm2); } break; - case EA_8BYTE: - break; - case EA_1BYTE: + case EA_8BYTE: + break; + case EA_1BYTE: { imm1 = static_cast(imm1); imm2 = static_cast(imm2); } break; - //case EA_2BYTE: - // imm = static_cast(imm); - // break; - default: - assert(!"Unexpected type in jumpTrue."); + // case EA_2BYTE: + // imm = static_cast(imm); + // break; + default: + assert(!"Unexpected type in jumpTrue."); } switch (tree->OperGet()) { - case GT_LT: - if (((!IsUnsigned) && (imm1 < imm2)) || ((IsUnsigned) && ((unsigned)imm1 < (unsigned)imm2))) - { - ins = INS_b; - } - break; - case GT_LE: - if (((!IsUnsigned) && (imm1 <= imm2)) || ((IsUnsigned) && ((unsigned)imm1 <= (unsigned)imm2))) - { - ins = INS_b; - } - break; - case GT_EQ: - if (imm1 == imm2) - { - ins = INS_b; - } - break; - case GT_NE: - if (imm1 != imm2) - { - ins = INS_b; - } - break; - case GT_GT: - if (((!IsUnsigned) && (imm1 > imm2)) || ((IsUnsigned) && ((unsigned)imm1 > (unsigned)imm2))) - { - ins = INS_b; - } - break; - case GT_GE: - if (((!IsUnsigned) && (imm1 >= imm2)) || ((IsUnsigned) && ((unsigned)imm1 >= (unsigned)imm2))) - { - ins = INS_b; - } - break; - default: - break; + case GT_LT: + if (((!IsUnsigned) && (imm1 < imm2)) || ((IsUnsigned) && ((unsigned)imm1 < (unsigned)imm2))) + { + ins = INS_b; + } + break; + case GT_LE: + if (((!IsUnsigned) && (imm1 <= imm2)) || ((IsUnsigned) && ((unsigned)imm1 <= (unsigned)imm2))) + { + ins = INS_b; + } + break; + case GT_EQ: + if (imm1 == imm2) + { + ins = INS_b; + } + break; + case GT_NE: + if (imm1 != imm2) + { + ins = INS_b; + } + break; + case GT_GT: + if (((!IsUnsigned) && (imm1 > imm2)) || ((IsUnsigned) && ((unsigned)imm1 > (unsigned)imm2))) + { + ins = INS_b; + } + break; + case GT_GE: + if (((!IsUnsigned) && (imm1 >= imm2)) || ((IsUnsigned) && ((unsigned)imm1 >= (unsigned)imm2))) + { + ins = INS_b; + } + break; + default: + break; } if (IsEq && (ins != INS_invalid)) - emit->emitIns_J(ins, compiler->compCurBB->bbJumpDest, 0);//5-bits; + emit->emitIns_J(ins, compiler->compCurBB->bbJumpDest, 0); // 5-bits; else if (ins != INS_invalid) { jtrue->gtOp2 = (GenTree*)(uint64_t)SaveCcResultReg; @@ -4323,27 +4320,27 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue) } else { - //TODO:can optimize further. + // TODO:can optimize further. if (op1->isContainedIntOrIImmed()) { op1 = tree->gtOp2; op2 = tree->gtOp1; switch (tree->OperGet()) { - case GT_LT: - tree->SetOper(GT_GT); - break; - case GT_LE: - tree->SetOper(GT_GE); - break; - case GT_GT: - tree->SetOper(GT_LT); - break; - case GT_GE: - tree->SetOper(GT_LE); - break; - default: - break; + case GT_LT: + tree->SetOper(GT_GT); + break; + case GT_LE: + tree->SetOper(GT_GE); + break; + case GT_GT: + tree->SetOper(GT_LT); + break; + case GT_GE: + tree->SetOper(GT_LE); + break; + default: + break; } } @@ -4361,52 +4358,58 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue) { switch (cmpSize) { - case EA_4BYTE: - if (IsUnsigned || ((op2->gtFlags | op1->gtFlags) & GTF_UNSIGNED)) - imm = static_cast(imm); - else - imm = static_cast(imm); - break; - case EA_8BYTE: - break; - case EA_1BYTE: - imm = static_cast(imm); - break; - //case EA_2BYTE: - // imm = static_cast(imm); - // break; - default: - assert(!"Unexpected type in jumpTrue(imm)."); + case EA_4BYTE: + if (IsUnsigned || ((op2->gtFlags | op1->gtFlags) & GTF_UNSIGNED)) + imm = static_cast(imm); + else + imm = static_cast(imm); + break; + case EA_8BYTE: + break; + case EA_1BYTE: + imm = static_cast(imm); + break; + // case EA_2BYTE: + // imm = static_cast(imm); + // break; + default: + assert(!"Unexpected type in jumpTrue(imm)."); } - emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm);//TODO: maybe optimize. + emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm); // TODO: maybe optimize. } else { SaveCcResultReg = 0; } - if (tree->OperIs(GT_LT)) { + if (tree->OperIs(GT_LT)) + { SaveCcResultReg |= ((int)regOp1); ins = IsUnsigned ? INS_bltu : INS_blt; } - else if (tree->OperIs(GT_LE)) { + else if (tree->OperIs(GT_LE)) + { SaveCcResultReg = imm ? ((((int)regOp1) << 5) | (int)REG_RA) : (((int)regOp1) << 5); - ins = IsUnsigned ? INS_bgeu : INS_bge; + ins = IsUnsigned ? INS_bgeu : INS_bge; } - else if (tree->OperIs(GT_GT)) { + else if (tree->OperIs(GT_GT)) + { SaveCcResultReg = imm ? ((((int)regOp1) << 5) | (int)REG_RA) : (((int)regOp1) << 5); - ins = IsUnsigned ? INS_bltu : INS_blt; + ins = IsUnsigned ? INS_bltu : INS_blt; } - else if (tree->OperIs(GT_GE)) { + else if (tree->OperIs(GT_GE)) + { SaveCcResultReg |= ((int)regOp1); ins = IsUnsigned ? INS_bgeu : INS_bge; } - else if (tree->OperIs(GT_NE)) { + else if (tree->OperIs(GT_NE)) + { SaveCcResultReg |= ((int)regOp1); ins = INS_bne; } - else if (tree->OperIs(GT_EQ)) { + else if (tree->OperIs(GT_EQ)) + { SaveCcResultReg |= ((int)regOp1); ins = INS_beq; } @@ -4414,29 +4417,32 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue) else { regNumber regOp2 = op2->GetRegNum(); - if (IsUnsigned && cmpSize == EA_4BYTE && op2->OperIs(GT_LCL_VAR) && compiler->lvaTable[op2->AsLclVar()->GetLclNum()].lvIsRegCandidate()) - {//TODO: should amend further!!! + if (IsUnsigned && cmpSize == EA_4BYTE && op2->OperIs(GT_LCL_VAR) && + compiler->lvaTable[op2->AsLclVar()->GetLclNum()].lvIsRegCandidate()) + { // TODO: should amend further!!! regNumber tmpRegOp1 = tree->ExtractTempReg(); emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, REG_RA, regOp2, 31, 0); emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, tmpRegOp1, regOp1, 31, 0); regOp1 = tmpRegOp1; regOp2 = REG_RA; } - else if (IsUnsigned && cmpSize == EA_4BYTE && op1->OperIs(GT_LCL_VAR) && compiler->lvaTable[op1->AsLclVar()->GetLclNum()].lvIsRegCandidate()) - {//TODO: should amend further!!! + else if (IsUnsigned && cmpSize == EA_4BYTE && op1->OperIs(GT_LCL_VAR) && + compiler->lvaTable[op1->AsLclVar()->GetLclNum()].lvIsRegCandidate()) + { // TODO: should amend further!!! regNumber tmpRegOp1 = tree->ExtractTempReg(); emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, tmpRegOp1, regOp1, 31, 0); emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, REG_RA, regOp2, 31, 0); regOp1 = tmpRegOp1; regOp2 = REG_RA; } - else if (cmpSize == EA_4BYTE && op1->OperIs(GT_CALL) && op2->OperIs(GT_LCL_VAR) && compiler->lvaTable[op2->AsLclVar()->GetLclNum()].lvIsRegCandidate()) - {//TODO: should amend further!!! + else if (cmpSize == EA_4BYTE && op1->OperIs(GT_CALL) && op2->OperIs(GT_LCL_VAR) && + compiler->lvaTable[op2->AsLclVar()->GetLclNum()].lvIsRegCandidate()) + { // TODO: should amend further!!! emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, REG_RA, regOp2, 0); regOp2 = REG_RA; } else if (cmpSize == EA_4BYTE && ((op1->gtFlags | op2->gtFlags) & GTF_UNSIGNED)) - {//TODO: should amend further!!! + { // TODO: should amend further!!! if (!(op1->gtFlags & GTF_UNSIGNED)) { regNumber tmpRegOp1 = tree->ExtractTempReg(); @@ -4450,34 +4456,40 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue) } } - if (tree->OperIs(GT_LT)) { + if (tree->OperIs(GT_LT)) + { SaveCcResultReg = ((int)regOp1 | ((int)regOp2 << 5)); - ins = IsUnsigned ? INS_bltu : INS_blt; + ins = IsUnsigned ? INS_bltu : INS_blt; } - else if (tree->OperIs(GT_LE)) { + else if (tree->OperIs(GT_LE)) + { SaveCcResultReg = (((int)regOp1) << 5) | (int)regOp2; - ins = IsUnsigned ? INS_bgeu : INS_bge; + ins = IsUnsigned ? INS_bgeu : INS_bge; } - else if (tree->OperIs(GT_GT)) { + else if (tree->OperIs(GT_GT)) + { SaveCcResultReg = (((int)regOp1) << 5) | (int)regOp2; - ins = IsUnsigned ? INS_bltu : INS_blt; + ins = IsUnsigned ? INS_bltu : INS_blt; } - else if (tree->OperIs(GT_GE)) { + else if (tree->OperIs(GT_GE)) + { SaveCcResultReg = ((int)regOp1 | ((int)regOp2 << 5)); - ins = IsUnsigned ? INS_bgeu : INS_bge; + ins = IsUnsigned ? INS_bgeu : INS_bge; } - else if (tree->OperIs(GT_NE)) { + else if (tree->OperIs(GT_NE)) + { SaveCcResultReg = (((int)regOp1) << 5) | (int)regOp2; - ins = INS_bne; + ins = INS_bne; } - else if (tree->OperIs(GT_EQ)) { + else if (tree->OperIs(GT_EQ)) + { SaveCcResultReg = (((int)regOp1) << 5) | (int)regOp2; - ins = INS_beq; + ins = INS_beq; } } if (IsEq) - emit->emitIns_J(ins, compiler->compCurBB->bbJumpDest, SaveCcResultReg);//5-bits; + emit->emitIns_J(ins, compiler->compCurBB->bbJumpDest, SaveCcResultReg); // 5-bits; else { jtrue->gtOp2 = (GenTree*)(uint64_t)SaveCcResultReg; @@ -4533,7 +4545,7 @@ void CodeGen::genCodeForJumpCompare(GenTreeOp* tree) regNumber reg = op1->GetRegNum(); emitAttr attr = emitActualTypeSize(op1->TypeGet()); - //if (tree->gtFlags & GTF_JCMP_TST) + // if (tree->gtFlags & GTF_JCMP_TST) //{ // assert(!"unimplemented on LOONGARCH yet"); // //ssize_t compareImm = op2->AsIntCon()->IconValue(); @@ -4545,10 +4557,10 @@ void CodeGen::genCodeForJumpCompare(GenTreeOp* tree) // //GetEmitter()->emitIns_J_R_I(ins, attr, compiler->compCurBB->bbJumpDest, reg, imm); //} - //else + // else { instruction ins; - int regs; + int regs; if (op2->AsIntCon()->gtIconVal) { assert(reg != REG_R21); @@ -4562,16 +4574,16 @@ void CodeGen::genCodeForJumpCompare(GenTreeOp* tree) } GetEmitter()->emitIns_I_la(EA_PTRSIZE, REG_R21, imm); regs = (int)reg << 5; - regs |= (int)REG_R21;//REG_R21 + regs |= (int)REG_R21; // REG_R21 ins = (tree->gtFlags & GTF_JCMP_EQ) ? INS_beq : INS_bne; } else { regs = (int)reg; - ins = (tree->gtFlags & GTF_JCMP_EQ) ? INS_beqz : INS_bnez; + ins = (tree->gtFlags & GTF_JCMP_EQ) ? INS_beqz : INS_bnez; } - GetEmitter()->emitIns_J(ins, compiler->compCurBB->bbJumpDest, regs);//5-bits; + GetEmitter()->emitIns_J(ins, compiler->compCurBB->bbJumpDest, regs); // 5-bits; } } @@ -4586,10 +4598,10 @@ int CodeGenInterface::genSPtoFPdelta() const int delta; if (IsSaveFpRaWithAllCalleeSavedRegisters()) { - //delta = (compiler->compCalleeRegsPushed -2)* REGSIZE_BYTES + compiler->compLclFrameSize; - //assert(delta == genTotalFrameSize() - compiler->lvaArgSize - 2*8); + // delta = (compiler->compCalleeRegsPushed -2)* REGSIZE_BYTES + compiler->compLclFrameSize; + // assert(delta == genTotalFrameSize() - compiler->lvaArgSize - 2*8); delta = genTotalFrameSize() - (compiler->info.compIsVarArgs ? MAX_REG_ARG * REGSIZE_BYTES : 0) - - (compiler->compCalleeRegsPushed -1)* REGSIZE_BYTES; + (compiler->compCalleeRegsPushed - 1) * REGSIZE_BYTES; } else { @@ -4709,19 +4721,19 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, callTarget = callTargetReg; - //instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr); - //GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, callTarget, callTarget, 0); + // instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr); + // GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, callTarget, callTarget, 0); if (compiler->opts.compReloc) { GetEmitter()->emitIns_R_AI(INS_bl, EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr); } else { - //GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, callTarget, (ssize_t)pAddr); - //GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, ); - GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, callTarget, ((ssize_t)pAddr & 0xfffff000)>>12); + // GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, callTarget, (ssize_t)pAddr); + // GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, ); + GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, callTarget, ((ssize_t)pAddr & 0xfffff000) >> 12); GetEmitter()->emitIns_R_I(INS_lu32i_d, EA_PTRSIZE, callTarget, (ssize_t)pAddr >> 32); - GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, ((ssize_t)pAddr & 0xfff)>>2); + GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, ((ssize_t)pAddr & 0xfff) >> 2); } regSet.verifyRegUsed(callTarget); @@ -4731,9 +4743,9 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, GetEmitter()->emitIns_Call(callType, compiler->eeFindHelper(helper), INDEBUG_LDISASM_COMMA(nullptr) addr, argSize, retSize, EA_UNKNOWN, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, DebugInfo(), /* IL offset */ - callTarget, /* ireg */ - REG_NA, 0, 0, /* xreg, xmul, disp */ - false /* isJump */ + callTarget, /* ireg */ + REG_NA, 0, 0, /* xreg, xmul, disp */ + false /* isJump */ ); regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper); @@ -5377,7 +5389,7 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) treeNode_next = treeNode_next->gtNext; }; assert(treeNode_next->OperIs(GT_JTRUE)); - //genCodeForJumpTrue(treeNode_next->AsOp()); + // genCodeForJumpTrue(treeNode_next->AsOp()); genCodeForCompare(treeNode_next->AsOp()); } break; @@ -5502,7 +5514,7 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) case GT_PINVOKE_PROLOG: noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~fullIntArgRegMask()) == 0); - // the runtime side requires the codegen here to be consistent +// the runtime side requires the codegen here to be consistent #ifdef PSEUDORANDOM_NOP_INSERTION emit->emitDisableRandomNops(); #endif // PSEUDORANDOM_NOP_INSERTION @@ -5600,19 +5612,24 @@ void CodeGen::genSetGSSecurityCookie(regNumber initReg, bool* pInitRegZeroed) } else { - //instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, initReg, (ssize_t)compiler->gsGlobalSecurityCookieAddr); - //GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, initReg, initReg, 0); + // instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, initReg, (ssize_t)compiler->gsGlobalSecurityCookieAddr); + // GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, initReg, initReg, 0); if (compiler->opts.compReloc) { - GetEmitter()->emitIns_R_AI(INS_bl, EA_PTR_DSP_RELOC, initReg, (ssize_t)compiler->gsGlobalSecurityCookieAddr); + GetEmitter()->emitIns_R_AI(INS_bl, EA_PTR_DSP_RELOC, initReg, + (ssize_t)compiler->gsGlobalSecurityCookieAddr); } else { - //GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, initReg, (ssize_t)compiler->gsGlobalSecurityCookieAddr); - //GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, initReg, initReg, ); - GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, initReg, ((ssize_t)compiler->gsGlobalSecurityCookieAddr & 0xfffff000)>>12); - GetEmitter()->emitIns_R_I(INS_lu32i_d, EA_PTRSIZE, initReg, (ssize_t)compiler->gsGlobalSecurityCookieAddr >> 32); - GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, initReg, initReg, ((ssize_t)compiler->gsGlobalSecurityCookieAddr & 0xfff)>>2); + // GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, initReg, + // (ssize_t)compiler->gsGlobalSecurityCookieAddr); + // GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, initReg, initReg, ); + GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, initReg, + ((ssize_t)compiler->gsGlobalSecurityCookieAddr & 0xfffff000) >> 12); + GetEmitter()->emitIns_R_I(INS_lu32i_d, EA_PTRSIZE, initReg, + (ssize_t)compiler->gsGlobalSecurityCookieAddr >> 32); + GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, initReg, initReg, + ((ssize_t)compiler->gsGlobalSecurityCookieAddr & 0xfff) >> 2); } regSet.verifyRegUsed(initReg); GetEmitter()->emitIns_S_R(INS_st_d, EA_PTRSIZE, initReg, compiler->lvaGSSecurityCookie, 0); @@ -5718,7 +5735,7 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) if (storeIns == INS_st_w) { emit->emitIns_R_R_R(INS_add_w, EA_4BYTE, source->GetRegNum(), source->GetRegNum(), REG_R0); - storeIns = INS_st_d; + storeIns = INS_st_d; storeAttr = EA_8BYTE; } emit->emitIns_S_R(storeIns, storeAttr, source->GetRegNum(), varNumOut, argOffsetOut); @@ -5743,7 +5760,7 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) // Setup loReg from the internal registers that we reserved in lower. // - regNumber loReg = treeNode->ExtractTempReg(); + regNumber loReg = treeNode->ExtractTempReg(); regNumber addrReg = REG_NA; GenTreeLclVarCommon* varNode = nullptr; @@ -5787,11 +5804,11 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) ClassLayout* layout; - //unsigned gcPtrCount; // The count of GC pointers in the struct + // unsigned gcPtrCount; // The count of GC pointers in the struct unsigned srcSize; bool isHfa; - //gcPtrCount = treeNode->gtNumSlots; + // gcPtrCount = treeNode->gtNumSlots; // Setup the srcSize and layout if (source->OperGet() == GT_LCL_VAR) { @@ -5804,8 +5821,8 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) assert(varDsc->lvOnFrame && !varDsc->lvRegister); srcSize = varDsc->lvSize(); // This yields the roundUp size, but that is fine - // as that is how much stack is allocated for this LclVar - layout = varDsc->GetLayout(); + // as that is how much stack is allocated for this LclVar + layout = varDsc->GetLayout(); } else // we must have a GT_OBJ { @@ -5946,7 +5963,7 @@ void CodeGen::genPutArgReg(GenTreeOp* tree) #if 1 else if (emitter::isFloatReg(targetReg)) GetEmitter()->emitIns_R_R(INS_movgr2fr_d, EA_8BYTE, targetReg, op1->GetRegNum()); - else //if (!emitter::isFloatReg(targetReg)) + else // if (!emitter::isFloatReg(targetReg)) { assert(!emitter::isFloatReg(targetReg)); GetEmitter()->emitIns_R_R(INS_movfr2gr_d, EA_8BYTE, targetReg, op1->GetRegNum()); @@ -6088,7 +6105,7 @@ void CodeGen::genPutArgSplit(GenTreePutArgSplit* treeNode) // We don't split HFA struct assert(!compiler->IsHfa(source->AsObj()->GetLayout()->GetClassHandle())); - } + } ClassLayout* layout = source->AsObj()->GetLayout(); @@ -6195,7 +6212,7 @@ void CodeGen::genMultiRegCallStoreToLocal(GenTree* treeNode) genConsumeRegs(op1); const ReturnTypeDesc* pRetTypeDesc = call->GetReturnTypeDesc(); - unsigned regCount = pRetTypeDesc->GetReturnRegCount(); + unsigned regCount = pRetTypeDesc->GetReturnRegCount(); if (treeNode->GetRegNum() != REG_NA) { @@ -6251,9 +6268,9 @@ void CodeGen::genMultiRegCallStoreToLocal(GenTree* treeNode) else { // Stack store - int offset = 0; - var_types type = pRetTypeDesc->GetReturnRegType(0); - regNumber reg = call->GetRegNumByIdx(0); + int offset = 0; + var_types type = pRetTypeDesc->GetReturnRegType(0); + regNumber reg = call->GetRegNumByIdx(0); if (op1->IsCopyOrReload()) { // GT_COPY/GT_RELOAD will have valid reg for those positions @@ -6271,8 +6288,8 @@ void CodeGen::genMultiRegCallStoreToLocal(GenTree* treeNode) if (1 < regCount) { offset = genTypeSize(type); - type = pRetTypeDesc->GetReturnRegType(1); - reg = call->GetRegNumByIdx(1); + type = pRetTypeDesc->GetReturnRegType(1); + reg = call->GetRegNumByIdx(1); offset = offset < genTypeSize(type) ? genTypeSize(type) : offset; GetEmitter()->emitIns_S_R(ins_Store(type), emitTypeSize(type), reg, lclNum, offset); } @@ -6304,7 +6321,7 @@ void CodeGen::genRangeCheck(GenTree* oper) genConsumeRegs(arrIndex); genConsumeRegs(arrLen); - emitter* emit = GetEmitter(); + emitter* emit = GetEmitter(); GenTreeIntConCommon* intConst = nullptr; if (arrIndex->isContainedIntOrIImmed()) { @@ -6313,7 +6330,7 @@ void CodeGen::genRangeCheck(GenTree* oper) reg1 = REG_R21; reg2 = src1->GetRegNum(); - intConst = src2->AsIntConCommon(); + intConst = src2->AsIntConCommon(); ssize_t imm = intConst->IconValue(); if (imm == INT64_MAX) { @@ -6333,7 +6350,7 @@ void CodeGen::genRangeCheck(GenTree* oper) if (src2->isContainedIntOrIImmed()) { - reg2 = REG_R21; + reg2 = REG_R21; ssize_t imm = src2->AsIntConCommon()->IconValue(); emit->emitIns_I_la(EA_PTRSIZE, REG_R21, imm); } @@ -6421,12 +6438,12 @@ void CodeGen::genCodeForArrIndex(GenTreeArrIndex* arrIndex) // We will use a temp register to load the lower bound and dimension size values. - //regNumber tmpReg = arrIndex->GetSingleTempReg(); + // regNumber tmpReg = arrIndex->GetSingleTempReg(); assert(tgtReg != REG_R21); - unsigned dim = arrIndex->gtCurrDim; - unsigned rank = arrIndex->gtArrRank; - unsigned offset; + unsigned dim = arrIndex->gtCurrDim; + unsigned rank = arrIndex->gtArrRank; + unsigned offset; offset = compiler->eeGetMDArrayLowerBoundOffset(rank, dim); emit->emitIns_R_R_I(INS_ld_w, EA_4BYTE, REG_R21, arrReg, offset); @@ -6473,11 +6490,11 @@ void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset) noway_assert(indexReg != REG_NA); noway_assert(arrReg != REG_NA); - //regNumber tmpReg = arrOffset->GetSingleTempReg(); + // regNumber tmpReg = arrOffset->GetSingleTempReg(); - unsigned dim = arrOffset->gtCurrDim; - unsigned rank = arrOffset->gtArrRank; - unsigned offset = compiler->eeGetMDArrayLengthOffset(rank, dim); + unsigned dim = arrOffset->gtCurrDim; + unsigned rank = arrOffset->gtArrRank; + unsigned offset = compiler->eeGetMDArrayLengthOffset(rank, dim); // Load tmpReg with the dimension size and evaluate // tgtReg = offsetReg*tmpReg + indexReg. @@ -6508,10 +6525,10 @@ void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset) // void CodeGen::genCodeForShift(GenTree* tree) { - //var_types targetType = tree->TypeGet(); - //genTreeOps oper = tree->OperGet(); - instruction ins = genGetInsForOper(tree); - emitAttr size = emitActualTypeSize(tree); + // var_types targetType = tree->TypeGet(); + // genTreeOps oper = tree->OperGet(); + instruction ins = genGetInsForOper(tree); + emitAttr size = emitActualTypeSize(tree); assert(tree->GetRegNum() != REG_NA); @@ -6527,7 +6544,7 @@ void CodeGen::genCodeForShift(GenTree* tree) { unsigned shiftByImm = (unsigned)shiftBy->AsIntCon()->gtIconVal; - //should check shiftByImm for loongarch32-ins. + // should check shiftByImm for loongarch32-ins. unsigned immWidth = emitter::getBitWidth(size); // For LOONGARCH64, immWidth will be set to 32 or 64 shiftByImm &= (immWidth - 1); @@ -6632,7 +6649,7 @@ void CodeGen::genCodeForIndexAddr(GenTreeIndexAddr* node) // The index is never contained, even if it is a constant. assert(index->isUsedFromReg()); - //const regNumber tmpReg = node->GetSingleTempReg(); + // const regNumber tmpReg = node->GetSingleTempReg(); // Generate the bounds check if necessary. if ((node->gtFlags & GTF_INX_RNGCHK) != 0) @@ -6711,9 +6728,9 @@ void CodeGen::genCodeForIndir(GenTreeIndir* tree) instruction ins = ins_Load(type); instruction ins2 = INS_none; regNumber targetReg = tree->GetRegNum(); - regNumber tmpReg = targetReg; - emitAttr attr = emitActualTypeSize(type); - int offset = 0; + regNumber tmpReg = targetReg; + emitAttr attr = emitActualTypeSize(type); + int offset = 0; genConsumeAddress(tree->Addr()); @@ -6848,7 +6865,7 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode) if (size >= 2 * REGSIZE_BYTES) { - regNumber tempReg2 = REG_R21;//cpBlkNode->ExtractTempReg(RBM_ALLINT);//TODO:should amend. + regNumber tempReg2 = REG_R21; // cpBlkNode->ExtractTempReg(RBM_ALLINT);//TODO:should amend. for (unsigned regSize = 2 * REGSIZE_BYTES; size >= regSize; size -= regSize, srcOffset += regSize, dstOffset += regSize) @@ -7178,8 +7195,8 @@ void CodeGen::genCallInstruction(GenTreeCall* call) genEmitCall(emitter::EC_INDIR_R, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) nullptr, // addr - retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), di, - target->GetRegNum(), call->IsFastTailCall()); + retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), di, target->GetRegNum(), + call->IsFastTailCall()); } else if (call->IsR2ROrVirtualStubRelativeIndir()) { @@ -7235,10 +7252,11 @@ void CodeGen::genCallInstruction(GenTreeCall* call) assert(addr != nullptr); -// Non-virtual direct call to known addresses + // Non-virtual direct call to known addresses { genEmitCall(emitter::EC_FUNC_TOKEN, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr, - retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), di, REG_R21, call->IsFastTailCall()); + retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), di, REG_R21, + call->IsFastTailCall()); } } @@ -7427,7 +7445,7 @@ void CodeGen::genJmpMethod(GenTree* jmp) { var_types loadType = TYP_UNDEF; - //NOTE for LOONGARCH: not supports the HFA. + // NOTE for LOONGARCH: not supports the HFA. assert(!varDsc->lvIsHfaRegArg()); { if (varTypeIsStruct(varDsc)) @@ -7451,13 +7469,14 @@ void CodeGen::genJmpMethod(GenTree* jmp) GetEmitter()->emitIns_R_S(ins_Load(loadType), loadSize, argReg, varNum, 0); // Update argReg life and GC Info to indicate varDsc stack slot is dead and argReg is going live. - // Note that we cannot modify varDsc->GetRegNum() here because another basic block may not be expecting it. + // Note that we cannot modify varDsc->GetRegNum() here because another basic block may not be expecting + // it. // Therefore manually update life of argReg. Note that GT_JMP marks the end of the basic block // and after which reg life and gc info will be recomputed for the new block in genCodeForBBList(). regSet.AddMaskVars(genRegMask(argReg)); gcInfo.gcMarkRegPtrVal(argReg, loadType); - //if (compiler->lvaIsMultiregStruct(varDsc, compiler->info.compIsVarArgs)) + // if (compiler->lvaIsMultiregStruct(varDsc, compiler->info.compIsVarArgs)) if (varDsc->GetOtherArgReg() < REG_STK) { // Restore the second register. @@ -7506,7 +7525,6 @@ void CodeGen::genJmpMethod(GenTree* jmp) firstArgVarNum = varNum; } } - } // Jmp call to a vararg method - if the method has fewer than fixed arguments that can be max size of reg, @@ -7580,7 +7598,7 @@ void CodeGen::genIntCastOverflowCheck(GenTreeCast* cast, const GenIntCastDesc& d { // We need to check if the value is not greater than 0x7FFFFFFF // if the upper 33 bits are zero. - //instGen_Set_Reg_To_Imm(EA_8BYTE, REG_R21, 0xFFFFFFFF80000000LL); + // instGen_Set_Reg_To_Imm(EA_8BYTE, REG_R21, 0xFFFFFFFF80000000LL); ssize_t imm = -1; GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_8BYTE, REG_R21, REG_R0, imm); @@ -7606,19 +7624,19 @@ void CodeGen::genIntCastOverflowCheck(GenTreeCast* cast, const GenIntCastDesc& d default: { assert(desc.CheckKind() == GenIntCastDesc::CHECK_SMALL_INT_RANGE); - const int castMaxValue = desc.CheckSmallIntMax(); - const int castMinValue = desc.CheckSmallIntMin(); + const int castMaxValue = desc.CheckSmallIntMax(); + const int castMinValue = desc.CheckSmallIntMin(); instruction ins; if (castMaxValue > 2047) - {//should amend. should confirm !?!? + { // should amend. should confirm !?!? assert((castMaxValue == 32767) || (castMaxValue == 65535)); GetEmitter()->emitIns_I_la(EA_ATTR(desc.CheckSrcSize()), REG_R21, castMaxValue + 1); ins = castMinValue == 0 ? INS_bgeu : INS_bge; genJumpToThrowHlpBlk_la(SCK_OVERFLOW, ins, reg, nullptr, REG_R21); } else - {//should amend. + { // should amend. GetEmitter()->emitIns_R_R_I(INS_addi_w, EA_ATTR(desc.CheckSrcSize()), REG_R21, REG_R0, castMaxValue); ins = castMinValue == 0 ? INS_bltu : INS_blt; genJumpToThrowHlpBlk_la(SCK_OVERFLOW, ins, REG_R21, nullptr, reg); @@ -7658,13 +7676,13 @@ void CodeGen::genIntToIntCast(GenTreeCast* cast) { genConsumeRegs(cast->gtGetOp1()); - emitter* emit = GetEmitter(); - var_types dstType = cast->CastToType(); - var_types srcType = genActualType(cast->gtGetOp1()->TypeGet()); - const regNumber srcReg = cast->gtGetOp1()->GetRegNum(); - const regNumber dstReg = cast->GetRegNum(); - const unsigned char pos = 0; - const unsigned char size = 32; + emitter* emit = GetEmitter(); + var_types dstType = cast->CastToType(); + var_types srcType = genActualType(cast->gtGetOp1()->TypeGet()); + const regNumber srcReg = cast->gtGetOp1()->GetRegNum(); + const regNumber dstReg = cast->GetRegNum(); + const unsigned char pos = 0; + const unsigned char size = 32; assert(genIsValidIntReg(srcReg)); assert(genIsValidIntReg(dstReg)); @@ -7676,7 +7694,7 @@ void CodeGen::genIntToIntCast(GenTreeCast* cast) genIntCastOverflowCheck(cast, desc, srcReg); } - //if ((EA_ATTR(genTypeSize(srcType)) == EA_8BYTE) && (EA_ATTR(genTypeSize(dstType)) == EA_4BYTE)) + // if ((EA_ATTR(genTypeSize(srcType)) == EA_8BYTE) && (EA_ATTR(genTypeSize(dstType)) == EA_4BYTE)) //{ // if (dstType == TYP_INT) // { @@ -7689,7 +7707,7 @@ void CodeGen::genIntToIntCast(GenTreeCast* cast) // emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_PTRSIZE, dstReg, srcReg, pos+31, pos); // } //} - //else if ((desc.ExtendKind() != GenIntCastDesc::COPY) || (srcReg != dstReg)) + // else if ((desc.ExtendKind() != GenIntCastDesc::COPY) || (srcReg != dstReg)) if ((desc.ExtendKind() != GenIntCastDesc::COPY) || (srcReg != dstReg)) { instruction ins; @@ -7699,11 +7717,11 @@ void CodeGen::genIntToIntCast(GenTreeCast* cast) case GenIntCastDesc::ZERO_EXTEND_SMALL_INT: if (desc.ExtendSrcSize() == 1) { - emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_PTRSIZE, dstReg, srcReg, pos+7, pos); + emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_PTRSIZE, dstReg, srcReg, pos + 7, pos); } else { - emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_PTRSIZE, dstReg, srcReg, pos+15, pos); + emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_PTRSIZE, dstReg, srcReg, pos + 15, pos); } break; case GenIntCastDesc::SIGN_EXTEND_SMALL_INT: @@ -7712,7 +7730,7 @@ void CodeGen::genIntToIntCast(GenTreeCast* cast) break; #ifdef TARGET_64BIT case GenIntCastDesc::ZERO_EXTEND_INT: - emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_PTRSIZE, dstReg, srcReg, pos+31, pos); + emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_PTRSIZE, dstReg, srcReg, pos + 31, pos); break; case GenIntCastDesc::SIGN_EXTEND_INT: emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, dstReg, srcReg, 0); @@ -7722,7 +7740,7 @@ void CodeGen::genIntToIntCast(GenTreeCast* cast) assert(desc.ExtendKind() == GenIntCastDesc::COPY); #if 1 if (srcType == TYP_INT) - emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, dstReg, srcReg, 0);//should amend. + emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, dstReg, srcReg, 0); // should amend. else emit->emitIns_R_R_I(INS_ori, EA_PTRSIZE, dstReg, srcReg, 0); #else @@ -7759,7 +7777,7 @@ void CodeGen::genFloatToFloatCast(GenTree* treeNode) assert(genIsValidFloatReg(targetReg)); GenTree* op1 = treeNode->AsOp()->gtOp1; - assert(!op1->isContained()); // Cannot be contained + assert(!op1->isContained()); // Cannot be contained assert(genIsValidFloatReg(op1->GetRegNum())); // Must be a valid float reg. var_types dstType = treeNode->CastToType(); @@ -7843,7 +7861,7 @@ void CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, { unsigned reversePInvokeFrameVarNumber = compiler->lvaReversePInvokeFrameVar; assert(reversePInvokeFrameVarNumber != BAD_VAR_NUM); - const LclVarDsc* reversePInvokeFrameVar = compiler->lvaGetDesc(reversePInvokeFrameVarNumber);//TODO: unused. + const LclVarDsc* reversePInvokeFrameVar = compiler->lvaGetDesc(reversePInvokeFrameVarNumber); // TODO: unused. gcInfoEncoder->SetReversePInvokeFrameSlot(reversePInvokeFrameVar->GetStackOffset()); } @@ -7908,7 +7926,7 @@ const CodeGen::GenConditionDesc CodeGen::GenConditionDesc::map[32] void CodeGen::inst_SETCC(GenCondition condition, var_types type, regNumber dstReg) { /* TODO for LOONGARCH64: should redesign and delete. */ - assert(!"unimplemented on LOONGARCH yet"); + assert(!"unimplemented on LOONGARCH yet"); } //------------------------------------------------------------------------ @@ -7971,7 +7989,7 @@ void CodeGen::genCodeForStoreBlk(GenTreeBlk* blkOp) #if 1 //------------------------------------------------------------------------ // genScaledAdd: A helper for genLeaInstruction. -//TODO: can amend further. +// TODO: can amend further. void CodeGen::genScaledAdd(emitAttr attr, regNumber targetReg, regNumber baseReg, regNumber indexReg, int scale) { emitter* emit = GetEmitter(); @@ -8209,7 +8227,8 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni // loop: // ldx_w rTemp, sp, rOffset, // sub_d rOffset, rOffset, REG_R21 - // bge rOffset, rLimit, loop // If rLimit is less or equal rOffset, we need to probe this rOffset. + // bge rOffset, rLimit, loop // If rLimit is less or equal rOffset, we need to probe this + // rOffset. noway_assert((ssize_t)(int)frameSize == (ssize_t)frameSize); // make sure framesize safely fits within an int @@ -8252,7 +8271,8 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni } } -inline void CodeGen::genJumpToThrowHlpBlk_la(SpecialCodeKind codeKind, instruction ins, regNumber reg1, BasicBlock* failBlk, regNumber reg2) +inline void CodeGen::genJumpToThrowHlpBlk_la( + SpecialCodeKind codeKind, instruction ins, regNumber reg1, BasicBlock* failBlk, regNumber reg2) { assert(INS_beq <= ins && ins <= INS_bgeu); @@ -8295,7 +8315,7 @@ inline void CodeGen::genJumpToThrowHlpBlk_la(SpecialCodeKind codeKind, instructi noway_assert(excpRaisingBlock != nullptr); // Jump to the exception-throwing block on error. - emit->emitIns_J(ins, excpRaisingBlock, (int)reg1 | ((int)reg2 << 5));//5-bits; + emit->emitIns_J(ins, excpRaisingBlock, (int)reg1 | ((int)reg2 << 5)); // 5-bits; } else { @@ -8303,52 +8323,53 @@ inline void CodeGen::genJumpToThrowHlpBlk_la(SpecialCodeKind codeKind, instructi // we will jump around it in the normal non-exception case. void* pAddr = nullptr; - void* addr = compiler->compGetHelperFtn((CorInfoHelpFunc)(compiler->acdHelper(codeKind)), &pAddr); + void* addr = compiler->compGetHelperFtn((CorInfoHelpFunc)(compiler->acdHelper(codeKind)), &pAddr); emitter::EmitCallType callType; - regNumber callTarget; + regNumber callTarget; // maybe optimize // ins = (instruction)(ins^((ins != INS_beq)+(ins != INS_bne))); - if(ins == INS_blt) + if (ins == INS_blt) ins = INS_bge; - else if(ins == INS_bltu) + else if (ins == INS_bltu) ins = INS_bgeu; - else if(ins == INS_bge) + else if (ins == INS_bge) ins = INS_blt; - else if(ins == INS_bgeu) + else if (ins == INS_bgeu) ins = INS_bltu; else ins = ins == INS_beq ? INS_bne : INS_beq; if (addr == nullptr) { - callType = emitter::EC_INDIR_R; + callType = emitter::EC_INDIR_R; callTarget = REG_DEFAULT_HELPER_CALL_TARGET; - //ssize_t imm = (4 + 1 + 1) << 2;// 4=li, 1=ld, 1=jirl.//TODO: maybe optimize. + // ssize_t imm = (4 + 1 + 1) << 2;// 4=li, 1=ld, 1=jirl.//TODO: maybe optimize. - //instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr); - //emit->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, callTarget, callTarget, 0); + // instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr); + // emit->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, callTarget, callTarget, 0); if (compiler->opts.compReloc) { - ssize_t imm = (2 + 1) << 2;// , 1=jirl. + ssize_t imm = (2 + 1) << 2; // , 1=jirl. emit->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, reg2, imm); GetEmitter()->emitIns_R_AI(INS_bl, EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr); } else { - ssize_t imm = (3 + 1) << 2;// , 1=jirl. + ssize_t imm = (3 + 1) << 2; // , 1=jirl. emit->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, reg2, imm); - //GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, callTarget, (ssize_t)pAddr); - //GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, ); - GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, callTarget, ((ssize_t)pAddr & 0xfffff000)>>12); + // GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, callTarget, (ssize_t)pAddr); + // GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, ); + GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, callTarget, ((ssize_t)pAddr & 0xfffff000) >> 12); GetEmitter()->emitIns_R_I(INS_lu32i_d, EA_PTRSIZE, callTarget, (ssize_t)pAddr >> 32); - GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, ((ssize_t)pAddr & 0xfff)>>2); + GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, + ((ssize_t)pAddr & 0xfff) >> 2); } } else - {//INS_OPTS_C - callType = emitter::EC_FUNC_TOKEN; + { // INS_OPTS_C + callType = emitter::EC_FUNC_TOKEN; callTarget = REG_NA; ssize_t imm = 5 << 2; @@ -8358,13 +8379,13 @@ inline void CodeGen::genJumpToThrowHlpBlk_la(SpecialCodeKind codeKind, instructi emit->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, reg2, imm); } - emit->emitIns_Call(callType, compiler->eeFindHelper(compiler->acdHelper(codeKind)), INDEBUG_LDISASM_COMMA(nullptr) addr, 0, - EA_UNKNOWN, EA_UNKNOWN, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, - gcInfo.gcRegByrefSetCur, DebugInfo(), /* IL offset */ - callTarget, /* ireg */ - REG_NA, 0, 0, /* xreg, xmul, disp */ - false /* isJump */ - ); + emit->emitIns_Call(callType, compiler->eeFindHelper(compiler->acdHelper(codeKind)), + INDEBUG_LDISASM_COMMA(nullptr) addr, 0, EA_UNKNOWN, EA_UNKNOWN, gcInfo.gcVarPtrSetCur, + gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, DebugInfo(), /* IL offset */ + callTarget, /* ireg */ + REG_NA, 0, 0, /* xreg, xmul, disp */ + false /* isJump */ + ); regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)(compiler->acdHelper(codeKind))); regSet.verifyRegistersUsed(killMask); @@ -8465,7 +8486,7 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe // rsPushRegs |= RBM_RA; // We must save the return address (in the RA register). - regSet.rsMaskCalleeSaved = rsPushRegs; + regSet.rsMaskCalleeSaved = rsPushRegs; regMaskTP maskSaveRegsFloat = rsPushRegs & RBM_ALLFLOAT; regMaskTP maskSaveRegsInt = rsPushRegs & ~maskSaveRegsFloat; @@ -8480,7 +8501,8 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe } #endif // DEBUG - // See the document "LOONGARCH64 JIT Frame Layout" and/or "LOONGARCH64 Exception Data" for more details or requirements and + // See the document "LOONGARCH64 JIT Frame Layout" and/or "LOONGARCH64 Exception Data" for more details or + // requirements and // options. Case numbers in comments here refer to this document. See also Compiler::lvaAssignFrameOffsets() // for pictures of the general frame layouts, and CodeGen::genFuncletProlog() implementations (per architecture) // for pictures of the funclet frame layouts. @@ -8640,32 +8662,22 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe // daddiu fp, sp, offset-fp // We do this *after* saving callee-saved registers, so the prolog/epilog unwind codes mostly match. - JITDUMP("Frame type 1. #outsz=%d; #framesz=%d; LclFrameSize=%d\n", unsigned(compiler->lvaOutgoingArgSpaceSize), - totalFrameSize, compiler->compLclFrameSize); + JITDUMP("Frame type 1. #outsz=%d; #framesz=%d; LclFrameSize=%d\n", + unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compLclFrameSize); frameType = 1; offsetSpToSavedFp = compiler->lvaOutgoingArgSpaceSize; -//#ifdef OPTIMIZE_LOONGSON_EXT -// if (!(offsetSpToSavedFp & 0xf) && (offsetSpToSavedFp <= 0xff0)) -// { -// GetEmitter()->emitIns_R_R_R_I(INS_gssq, EA_PTRSIZE, REG_RA, REG_FP, REG_SPBASE, offsetSpToSavedFp >> 4); -// compiler->unwindSaveRegPair(REG_FP, REG_RA, offsetSpToSavedFp); -// } -// else -//#endif -// { GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offsetSpToSavedFp); compiler->unwindSaveReg(REG_FP, offsetSpToSavedFp); GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offsetSpToSavedFp + 8); compiler->unwindSaveReg(REG_RA, offsetSpToSavedFp + 8); -// } maskSaveRegsInt &= ~(RBM_FP | RBM_RA); // We've already saved FP/RA - offset = compiler->compLclFrameSize + 2*REGSIZE_BYTES;//FP/RA + offset = compiler->compLclFrameSize + 2 * REGSIZE_BYTES; // FP/RA } else { @@ -8673,8 +8685,9 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe offsetSpToSavedFp = genSPtoFPdelta(); - JITDUMP("Frame type 2(Top). #outsz=%d; #framesz=%d; LclFrameSize=%d, fpDelta:%d\n", unsigned(compiler->lvaOutgoingArgSpaceSize), - totalFrameSize, compiler->compLclFrameSize, offsetSpToSavedFp); + JITDUMP("Frame type 2(Top). #outsz=%d; #framesz=%d; LclFrameSize=%d, fpDelta:%d\n", + unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compLclFrameSize, + offsetSpToSavedFp); offset = compiler->compLclFrameSize; } @@ -8683,27 +8696,28 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe { if (!IsSaveFpRaWithAllCalleeSavedRegisters()) { - JITDUMP("Frame type 3. #outsz=%d; #framesz=%d; LclFrameSize=%d\n", unsigned(compiler->lvaOutgoingArgSpaceSize), - totalFrameSize, compiler->compLclFrameSize); + JITDUMP("Frame type 3. #outsz=%d; #framesz=%d; LclFrameSize=%d\n", + unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compLclFrameSize); frameType = 3; maskSaveRegsInt &= ~(RBM_FP | RBM_RA); // We've already saved FP/RA - offset = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES; + offset = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES; calleeSaveSPDelta = AlignUp((UINT)offset, STACK_ALIGN); - offset = calleeSaveSPDelta - offset; + offset = calleeSaveSPDelta - offset; } else { frameType = 4; - JITDUMP("Frame type 4(Top). #outsz=%d; #framesz=%d; LclFrameSize=%d, SPDelta-1:%d\n", unsigned(compiler->lvaOutgoingArgSpaceSize), - totalFrameSize, compiler->compLclFrameSize, calleeSaveSPDelta); + JITDUMP("Frame type 4(Top). #outsz=%d; #framesz=%d; LclFrameSize=%d, SPDelta-1:%d\n", + unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compLclFrameSize, + calleeSaveSPDelta); - offset = totalFrameSize - compiler->compLclFrameSize; + offset = totalFrameSize - compiler->compLclFrameSize; calleeSaveSPDelta = AlignUp((UINT)offset, STACK_ALIGN); - offset = calleeSaveSPDelta - offset; + offset = calleeSaveSPDelta - offset; offsetSpToSavedFp = offset + REGSIZE_BYTES; } } @@ -8726,28 +8740,28 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe JITDUMP(" offset=%d, calleeSaveSPDelta=%d\n", offset, calleeSaveSPDelta); genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, offset, -calleeSaveSPDelta); - // For varargs, home the incoming arg registers last. Note that there is nothing to unwind here, - // so we just report "NOP" unwind codes. If there's no more frame setup after this, we don't - // need to add codes at all. - - //if (compiler->info.compIsVarArgs) - //{ - // JITDUMP(" compIsVarArgs=true\n"); - - // // There are 8 general-purpose registers to home, thus 'offset' must be 16-byte aligned here. - // assert((offset % 16) == 0); - // for (regNumber reg1 = REG_ARG_FIRST; reg1 < REG_ARG_LAST; reg1 = REG_NEXT(REG_NEXT(reg1))) - // { - // regNumber reg2 = REG_NEXT(reg1); - // // sd REG, offset(SP) - // // sd REG + 1, (offset+8)(SP) - // GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, reg1, REG_SPBASE, offset); - // compiler->unwindNop(); - // GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, reg2, REG_SPBASE, offset + 8); - // compiler->unwindNop(); - // offset += 2 * REGSIZE_BYTES; - // } - //} +// For varargs, home the incoming arg registers last. Note that there is nothing to unwind here, +// so we just report "NOP" unwind codes. If there's no more frame setup after this, we don't +// need to add codes at all. + +// if (compiler->info.compIsVarArgs) +//{ +// JITDUMP(" compIsVarArgs=true\n"); + +// // There are 8 general-purpose registers to home, thus 'offset' must be 16-byte aligned here. +// assert((offset % 16) == 0); +// for (regNumber reg1 = REG_ARG_FIRST; reg1 < REG_ARG_LAST; reg1 = REG_NEXT(REG_NEXT(reg1))) +// { +// regNumber reg2 = REG_NEXT(reg1); +// // sd REG, offset(SP) +// // sd REG + 1, (offset+8)(SP) +// GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, reg1, REG_SPBASE, offset); +// compiler->unwindNop(); +// GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, reg2, REG_SPBASE, offset + 8); +// compiler->unwindNop(); +// offset += 2 * REGSIZE_BYTES; +// } +//} #ifdef DEBUG if (compiler->opts.disAsm) @@ -8755,39 +8769,29 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe #endif if (frameType == 1) { - //offsetSpToSavedFp = genSPtoFPdelta(); + // offsetSpToSavedFp = genSPtoFPdelta(); } else if (frameType == 2) { - //offsetSpToSavedFp = genSPtoFPdelta(); + // offsetSpToSavedFp = genSPtoFPdelta(); } else if (frameType == 3) { if (compiler->lvaOutgoingArgSpaceSize >= 2040) { - offset = totalFrameSize - calleeSaveSPDelta - compiler->lvaOutgoingArgSpaceSize; + offset = totalFrameSize - calleeSaveSPDelta - compiler->lvaOutgoingArgSpaceSize; calleeSaveSPDelta = AlignUp((UINT)offset, STACK_ALIGN); - offset = calleeSaveSPDelta - offset; + offset = calleeSaveSPDelta - offset; genStackPointerAdjustment(-calleeSaveSPDelta, initReg, pInitRegZeroed, /* reportUnwindData */ true); offsetSpToSavedFp = offset; -//#ifdef OPTIMIZE_LOONGSON_EXT -// if (!(offset & 0xf) && (offset <= 0xff0)) -// { -// GetEmitter()->emitIns_R_R_R_I(INS_gssq, EA_PTRSIZE, REG_RA, REG_FP, REG_SPBASE, offset >> 4); -// compiler->unwindSaveRegPair(REG_FP, REG_RA, offset); -// } -// else -//#endif -// { GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset); compiler->unwindSaveReg(REG_FP, offset); GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offset + 8); compiler->unwindSaveReg(REG_RA, offset + 8); -// } genEstablishFramePointer(offset, /* reportUnwindData */ true); @@ -8801,14 +8805,6 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe offset = compiler->lvaOutgoingArgSpaceSize; -//#ifdef OPTIMIZE_LOONGSON_EXT -// if (!(offset & 0xf) && (offset <= 0xff0)) -// { -// GetEmitter()->emitIns_R_R_R_I(INS_gssq, EA_PTRSIZE, REG_RA, REG_FP, REG_SPBASE, offset >> 4); -// compiler->unwindSaveRegPair(REG_FP, REG_RA, offset); -// } -// else -//#endif GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset); compiler->unwindSaveReg(REG_FP, offset); diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 388fae2b695bb..88a5940879517 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -759,7 +759,7 @@ var_types Compiler::getArgTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, #elif defined(TARGET_LOONGARCH64) // Structs that are pointer sized or smaller. - //assert(structSize > TARGET_POINTER_SIZE); + // assert(structSize > TARGET_POINTER_SIZE); // On LOONGARCH64 structs that are 1-16 bytes are passed by value in one/multiple register(s) if (structSize <= (TARGET_POINTER_SIZE * 2)) @@ -798,7 +798,7 @@ var_types Compiler::getArgTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, howToPassStruct = SPK_ByValue; useType = TYP_STRUCT; -#elif defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) // Otherwise we pass this struct by reference to a copy // setup wbPassType and useType indicate that this is passed using one register (by reference to a copy) @@ -948,7 +948,7 @@ var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, useType = TYP_STRUCT; } } -#endif //TARGET_LOONGARCH64 +#endif // TARGET_LOONGARCH64 // Check for cases where a small struct is returned in a register // via a primitive type. @@ -2464,7 +2464,7 @@ void Compiler::compSetProcessor() #endif #if defined(TARGET_LOONGARCH64) - //TODO: should add LOONGARCH64's features for LOONGARCH64. +// TODO: should add LOONGARCH64's features for LOONGARCH64. #endif instructionSetFlags = EnsureInstructionSetFlagsAreValid(instructionSetFlags); @@ -2652,7 +2652,7 @@ void Compiler::compInitOptions(JitFlags* jitFlags) opts.compJitAlignPaddingLimit = opts.compJitAlignLoopBoundary; } #elif defined(TARGET_LOONGARCH64) - //TODO: should be adaptive on LoongArch64. +// TODO: should be adaptive on LoongArch64. #endif assert(isPow2(opts.compJitAlignLoopBoundary)); diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index fde453e53d7d6..a477e24def83a 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -472,11 +472,10 @@ class LclVarDsc unsigned char lvIsImplicitByRef : 1; // Set if the argument is an implicit byref. #elif defined(TARGET_LOONGARCH64) unsigned char lvIsImplicitByRef : 1; // Set if the argument is an implicit byref. - unsigned char lvIs4Field1 : 1; // Set if the 1st field is int or float within struct for LA-ABI64. - unsigned char lvIs4Field2 : 1; // Set if the 2nd field is int or float within struct for LA-ABI64. - unsigned char lvIsSplit : 1; // Set if the argument is splited. also used the lvFldOffset. -#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH) - + unsigned char lvIs4Field1 : 1; // Set if the 1st field is int or float within struct for LA-ABI64. + unsigned char lvIs4Field2 : 1; // Set if the 2nd field is int or float within struct for LA-ABI64. + unsigned char lvIsSplit : 1; // Set if the argument is splited. also used the lvFldOffset. +#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) unsigned char lvIsBoolean : 1; // set if variable is boolean unsigned char lvSingleDef : 1; // variable has a single def @@ -8029,7 +8028,7 @@ class Compiler } #elif defined(TARGET_LOONGARCH64) static bool varTypeNeedsPartialCalleeSave(var_types type) - {//TODO: supporting SIMD feature for LoongArch64. + { // TODO: supporting SIMD feature for LoongArch64. return false; } #else // !defined(TARGET_AMD64) && !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) @@ -8723,7 +8722,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #elif defined(TARGET_ARM64) CORINFO_InstructionSet minimumIsa = InstructionSet_AdvSimd; #elif defined(TARGET_LOONGARCH64) - //TODO: supporting SIMD feature for LoongArch64. + // TODO: supporting SIMD feature for LoongArch64. assert(!"unimplemented yet on LA"); CORINFO_InstructionSet minimumIsa = 0; #else @@ -12127,10 +12126,10 @@ const instruction INS_SQRT = INS_fsqrt; #ifdef TARGET_LOONGARCH64 const instruction INS_BREAKPOINT = INS_break; -const instruction INS_MULADD = INS_fmadd_d;// NOTE: default is double. -const instruction INS_ABS = INS_fabs_d; // NOTE: default is double. -const instruction INS_SQRT = INS_fsqrt_d;// NOTE: default is double. -#endif // TARGET_LOONGARCH64 +const instruction INS_MULADD = INS_fmadd_d; // NOTE: default is double. +const instruction INS_ABS = INS_fabs_d; // NOTE: default is double. +const instruction INS_SQRT = INS_fsqrt_d; // NOTE: default is double. +#endif // TARGET_LOONGARCH64 /*****************************************************************************/ diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index 23cd48bf2edb3..8e449aee28faa 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -1842,10 +1842,10 @@ inline void LclVarDsc::incRefCnts(weight_t weight, Compiler* comp, RefCountState bool doubleWeight = lvIsTemp; -#if defined(TARGET_AMD64) || defined(TARGET_ARM64)|| defined(TARGET_LOONGARCH64) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) // and, for the time being, implicit byref params doubleWeight |= lvIsImplicitByRef; -#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) +#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) if (doubleWeight && (weight * 2 > weight)) { diff --git a/src/coreclr/jit/ee_il_dll.cpp b/src/coreclr/jit/ee_il_dll.cpp index b53608757f384..898746249b619 100644 --- a/src/coreclr/jit/ee_il_dll.cpp +++ b/src/coreclr/jit/ee_il_dll.cpp @@ -460,7 +460,7 @@ unsigned Compiler::eeGetArgSize(CORINFO_ARG_LIST_HANDLE list, CORINFO_SIG_INFO* // This struct is passed by reference using a single 'slot' return TARGET_POINTER_SIZE; } - // otherwise will we pass this struct by value in multiple registers +// otherwise will we pass this struct by value in multiple registers #elif !defined(TARGET_ARM) NYI("unknown target"); #endif // defined(TARGET_XXX) diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index 638e0cd5c7266..b09a3ac7483dd 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -743,7 +743,7 @@ insGroup* emitter::emitSavIG(bool emitAdd) ig->igFlags |= IGF_BYREF_REGS; - // We'll allocate extra space (DWORD aligned) to record the GC regs +// We'll allocate extra space (DWORD aligned) to record the GC regs #if defined(TARGET_LOONGARCH64) gs += sizeof(regMaskTP); #else @@ -759,7 +759,7 @@ insGroup* emitter::emitSavIG(bool emitAdd) if (ig->igFlags & IGF_BYREF_REGS) { - // Record the byref regs in front the of the instructions +// Record the byref regs in front the of the instructions #if defined(TARGET_LOONGARCH64) *castto(id, regMaskTP*)++ = emitInitByrefRegs; @@ -795,7 +795,7 @@ insGroup* emitter::emitSavIG(bool emitAdd) } #endif - // Record how many instructions and bytes of code this group contains +// Record how many instructions and bytes of code this group contains #ifdef TARGET_LOONGARCH64 noway_assert((unsigned int)emitCurIGinsCnt == emitCurIGinsCnt); @@ -809,7 +809,7 @@ insGroup* emitter::emitSavIG(bool emitAdd) #else ig->igInsCnt = (BYTE)emitCurIGinsCnt; #endif - ig->igSize = (unsigned short)emitCurIGsize; + ig->igSize = (unsigned short)emitCurIGsize; emitCurCodeOffset += emitCurIGsize; assert(IsCodeAligned(emitCurCodeOffset)); @@ -4157,16 +4157,19 @@ void emitter::emitJumpDistBind() int jmp_iteration = 1; #ifdef TARGET_LOONGARCH64 - //NOTE: + // NOTE: // bit0 of isLinkingEnd_LA: indicating whether updating the instrDescJmp's size with the type INS_OPTS_J; - // bit1 of isLinkingEnd_LA: indicating not needed updating ths size while emitTotalCodeSize <= (0x7fff << 2) or had updated; + // bit1 of isLinkingEnd_LA: indicating not needed updating ths size while emitTotalCodeSize <= (0x7fff << 2) or had + // updated; unsigned int isLinkingEnd_LA = emitTotalCodeSize <= (0x7fff << 2) ? 2 : 0; UNATIVE_OFFSET ssz = 0; // relative small jump's delay-slot. // small jump max. neg distance - NATIVE_OFFSET nsd = B_DIST_SMALL_MAX_NEG; + NATIVE_OFFSET nsd = B_DIST_SMALL_MAX_NEG; // small jump max. pos distance - NATIVE_OFFSET psd = B_DIST_SMALL_MAX_POS - emitCounts_INS_OPTS_J * (3 << 2);//the max placeholder sizeof(INS_OPTS_JIRL) - sizeof(INS_OPTS_J). + NATIVE_OFFSET psd = + B_DIST_SMALL_MAX_POS - + emitCounts_INS_OPTS_J * (3 << 2); // the max placeholder sizeof(INS_OPTS_JIRL) - sizeof(INS_OPTS_J). #endif /*****************************************************************************/ @@ -4330,7 +4333,7 @@ void emitter::emitJumpDistBind() #ifdef DEBUG #if defined(TARGET_LOONGARCH64) #if defined(UNALIGNED_CHECK_DISABLE) - UNALIGNED_CHECK_DISABLE; + UNALIGNED_CHECK_DISABLE; #endif assert(lastLJ == nullptr || lastIG != jmp->idjIG || lastLJ->idjOffs < (jmp->idjOffs + adjLJ)); #else @@ -4374,7 +4377,7 @@ void emitter::emitJumpDistBind() #else lstIG->igOffs - adjIG #endif - ); + ); } #endif // DEBUG #if defined(TARGET_LOONGARCH64) @@ -4392,7 +4395,7 @@ void emitter::emitJumpDistBind() lstIG = jmpIG; } - /* Apply any local size adjustment to the jump's relative offset */ +/* Apply any local size adjustment to the jump's relative offset */ #if defined(TARGET_LOONGARCH64) jmp->idjOffs += adjLJ; @@ -4552,11 +4555,11 @@ void emitter::emitJumpDistBind() if (jmpIG->igNum < tgtIG->igNum) { - /* Forward jump */ +/* Forward jump */ - /* Adjust the target offset by the current delta. This is a worst-case estimate, as jumps between - here and the target could be shortened, causing the actual distance to shrink. - */ +/* Adjust the target offset by the current delta. This is a worst-case estimate, as jumps between + here and the target could be shortened, causing the actual distance to shrink. + */ #if defined(TARGET_LOONGARCH64) dstOffs += adjIG; @@ -4598,7 +4601,7 @@ void emitter::emitJumpDistBind() #endif // DEBUG_EMIT #if defined(TARGET_LOONGARCH64) - assert(jmpDist >= 0);//Forward jump + assert(jmpDist >= 0); // Forward jump assert(!(jmpDist & 0x3)); if (isLinkingEnd_LA & 0x2) @@ -4610,38 +4613,39 @@ void emitter::emitJumpDistBind() instruction ins = jmp->idIns(); assert((INS_bceqz <= ins) && (ins <= INS_bl)); - if (ins < INS_beqz) // bceqz/bcnez/beq/bne/blt/bltu/bge/bgeu < beqz < bnez // See instrsloongarch64.h. + if (ins < + INS_beqz) // bceqz/bcnez/beq/bne/blt/bltu/bge/bgeu < beqz < bnez // See instrsloongarch64.h. { - if ((jmpDist + emitCounts_INS_OPTS_J*4) < 0x8000000) + if ((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000) { extra = 4; } else { - assert((jmpDist + emitCounts_INS_OPTS_J*4) < 0x8000000);//TODO:later will be deleted!!! + assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000); // TODO:later will be deleted!!! extra = 8; } } - else if (ins < INS_b)// beqz/bnez < b < bl // See instrsloongarch64.h. + else if (ins < INS_b) // beqz/bnez < b < bl // See instrsloongarch64.h. { - if (jmpDist + emitCounts_INS_OPTS_J*4 < 0x200000 ) + if (jmpDist + emitCounts_INS_OPTS_J * 4 < 0x200000) continue; extra = 4; - //assert((emitTotalCodeSize + emitCounts_INS_OPTS_J*4) < 0x8000000); - assert((jmpDist + emitCounts_INS_OPTS_J*4) < 0x8000000); + // assert((emitTotalCodeSize + emitCounts_INS_OPTS_J*4) < 0x8000000); + assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000); } - else //if (ins == INS_b || ins == INS_bl) + else // if (ins == INS_b || ins == INS_bl) { assert(ins == INS_b || ins == INS_bl); - //assert((emitTotalCodeSize + emitCounts_INS_OPTS_J*4) < 0x8000000); - assert((jmpDist + emitCounts_INS_OPTS_J*4) < 0x8000000); + // assert((emitTotalCodeSize + emitCounts_INS_OPTS_J*4) < 0x8000000); + assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000); continue; } jmp->idInsOpt(INS_OPTS_JIRL); jmp->idCodeSize(jmp->idCodeSize() + extra); - jmpIG->igSize += extra;//the placeholder sizeof(INS_OPTS_JIRL) - sizeof(INS_OPTS_J). + jmpIG->igSize += extra; // the placeholder sizeof(INS_OPTS_JIRL) - sizeof(INS_OPTS_J). adjLJ += extra; adjIG += extra; emitTotalCodeSize += extra; @@ -4696,50 +4700,51 @@ void emitter::emitJumpDistBind() #endif // DEBUG_EMIT #if defined(TARGET_LOONGARCH64) - assert(jmpDist >= 0);//Backward jump + assert(jmpDist >= 0); // Backward jump assert(!(jmpDist & 0x3)); if (isLinkingEnd_LA & 0x2) { - jmp->idAddr()->iiaSetJmpOffset(-jmpDist);//Backward jump is negative! + jmp->idAddr()->iiaSetJmpOffset(-jmpDist); // Backward jump is negative! } else if ((extra > 0) && (jmp->idInsOpt() == INS_OPTS_J)) { instruction ins = jmp->idIns(); assert((INS_bceqz <= ins) && (ins <= INS_bl)); - if (ins < INS_beqz) // bceqz/bcnez/beq/bne/blt/bltu/bge/bgeu < beqz < bnez // See instrsloongarch64.h. + if (ins < + INS_beqz) // bceqz/bcnez/beq/bne/blt/bltu/bge/bgeu < beqz < bnez // See instrsloongarch64.h. { - if ((jmpDist + emitCounts_INS_OPTS_J*4) < 0x8000000) + if ((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000) { extra = 4; } else { - assert((jmpDist + emitCounts_INS_OPTS_J*4) < 0x8000000);//TODO:later will be deleted!!! + assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000); // TODO:later will be deleted!!! extra = 8; } } - else if (ins < INS_b)// beqz/bnez < b < bl // See instrsloongarch64.h. + else if (ins < INS_b) // beqz/bnez < b < bl // See instrsloongarch64.h. { - if (jmpDist + emitCounts_INS_OPTS_J*4 < 0x200000 ) + if (jmpDist + emitCounts_INS_OPTS_J * 4 < 0x200000) continue; extra = 4; - //assert((emitTotalCodeSize + emitCounts_INS_OPTS_J*4) < 0x8000000); - assert((jmpDist + emitCounts_INS_OPTS_J*4) < 0x8000000); + // assert((emitTotalCodeSize + emitCounts_INS_OPTS_J*4) < 0x8000000); + assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000); } - else //if (ins == INS_b || ins == INS_bl) + else // if (ins == INS_b || ins == INS_bl) { assert(ins == INS_b || ins == INS_bl); - //assert((emitTotalCodeSize + emitCounts_INS_OPTS_J*4) < 0x8000000); - assert((jmpDist + emitCounts_INS_OPTS_J*4) < 0x8000000);//TODO + // assert((emitTotalCodeSize + emitCounts_INS_OPTS_J*4) < 0x8000000); + assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000); // TODO continue; } jmp->idInsOpt(INS_OPTS_JIRL); jmp->idCodeSize(jmp->idCodeSize() + extra); - jmpIG->igSize += extra;//the placeholder sizeof(INS_OPTS_JIRL) - sizeof(INS_OPTS_J). + jmpIG->igSize += extra; // the placeholder sizeof(INS_OPTS_JIRL) - sizeof(INS_OPTS_J). adjLJ += extra; adjIG += extra; emitTotalCodeSize += extra; @@ -4951,18 +4956,18 @@ void emitter::emitJumpDistBind() jmpIG->igFlags |= IGF_UPD_ISZ; -#endif // not defined(TARGET_LOONGARCH64) - } // end for each jump +#endif // not defined(TARGET_LOONGARCH64) + } // end for each jump #if defined(TARGET_LOONGARCH64) if ((isLinkingEnd_LA & 0x3) < 0x2) { - //indicating had updated the instrDescJmp's size with the type INS_OPTS_J. + // indicating had updated the instrDescJmp's size with the type INS_OPTS_J. isLinkingEnd_LA = 0x2; - //emitRecomputeIGoffsets(); + // emitRecomputeIGoffsets(); /* Adjust offsets of any remaining blocks */ - for (;lstIG;) + for (; lstIG;) { lstIG = lstIG->igNext; if (!lstIG) @@ -6820,7 +6825,7 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, } #ifdef TARGET_LOONGARCH64 - //cp = cp - 4; + // cp = cp - 4; unsigned actualCodeSize = cp - codeBlock; #endif @@ -6949,7 +6954,7 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, } } } -#endif //!TARGET_LOONGARCH64 +#endif //! TARGET_LOONGARCH64 #ifdef DEBUG if (emitComp->opts.disAsm) @@ -8925,7 +8930,7 @@ cnsval_ssize_t emitter::emitGetInsSC(instrDesc* id) int adr = emitComp->lvaFrameAddress(varNum, &FPbased); int dsp = adr + offs; if (id->idIns() == INS_sub) - dsp = -dsp; + dsp = -dsp; #endif return dsp; } @@ -9538,7 +9543,7 @@ regMaskTP emitter::emitGetGCRegsKilledByNoGCCall(CorInfoHelpFunc helper) result = RBM_CALLEE_TRASH_NOGC & ~(RBM_RDI | RBM_RSI); break; #elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) - result = RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF; + result = RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF; break; #else assert(!"unknown arch"); diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 66a9ccaf207ee..15b84ae2b4cec 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -303,7 +303,7 @@ struct insGroup #if EMIT_TRACK_STACK_DEPTH unsigned igStkLvl; // stack level on entry #endif - regMaskSmall igGCregs; // set of registers with live GC refs + regMaskSmall igGCregs; // set of registers with live GC refs #ifdef TARGET_LOONGARCH64 unsigned int igInsCnt; // # of instructions in this group #else @@ -611,9 +611,10 @@ class emitter static_assert_no_msg(IF_COUNT <= 128); insFormat _idInsFmt : 7; #elif defined(TARGET_LOONGARCH64) - //insFormat _idInsFmt : 5;// NOTE: LOONGARCH64 does not used the _idInsFmt . - unsigned _idCodeSize : 5; // the instruction(s) size of this instrDesc described. If not enough, please use the _idInsCount. - //unsigned _idInsCount : 5; // the instruction(s) count of this instrDesc described. + // insFormat _idInsFmt : 5;// NOTE: LOONGARCH64 does not used the _idInsFmt . + unsigned _idCodeSize : 5; // the instruction(s) size of this instrDesc described. If not enough, please use the + // _idInsCount. + // unsigned _idInsCount : 5; // the instruction(s) count of this instrDesc described. #else static_assert_no_msg(IF_COUNT <= 256); insFormat _idInsFmt : 8; @@ -632,7 +633,7 @@ class emitter #if defined(TARGET_LOONGARCH64) insFormat idInsFmt() const - {//not used for LOONGARCH64. + { // not used for LOONGARCH64. return (insFormat)0; } void idInsFmt(insFormat insFmt) @@ -665,7 +666,7 @@ class emitter // amd64: 17 bits // arm: 16 bits // arm64: 17 bits - //loongarch64: 14 bits + // loongarch64: 14 bits private: #if defined(TARGET_XARCH) @@ -676,7 +677,7 @@ class emitter #elif defined(TARGET_ARM64) // Moved the definition of '_idOpSize' later so that we don't cross a 32-bit boundary when laying out bitfields #elif defined(TARGET_LOONGARCH64) - /* _idOpSize defined bellow. */ +/* _idOpSize defined bellow. */ #else // ARM opSize _idOpSize : 2; // operand size: 0=1 , 1=2 , 2=4 , 3=8 #endif // ARM @@ -729,8 +730,9 @@ class emitter #ifdef TARGET_LOONGARCH64 /* TODO: for LOONGARCH: maybe delete on future. */ - opSize _idOpSize : 3; // operand size: 0=1 , 1=2 , 2=4 , 3=8, 4=16 - insOpts _idInsOpt : 6; // loongarch options for special: placeholders. e.g emitIns_R_C, also identifying the accessing a local on stack. + opSize _idOpSize : 3; // operand size: 0=1 , 1=2 , 2=4 , 3=8, 4=16 + insOpts _idInsOpt : 6; // loongarch options for special: placeholders. e.g emitIns_R_C, also identifying the + // accessing a local on stack. unsigned _idLclVar : 1; // access a local on stack. #endif @@ -748,10 +750,10 @@ class emitter // For Arm64, we have used 17 bits from the second DWORD. #define ID_EXTRA_BITFIELD_BITS (17) #elif defined(TARGET_XARCH) - // For xarch, we have used 14 bits from the second DWORD. +// For xarch, we have used 14 bits from the second DWORD. #define ID_EXTRA_BITFIELD_BITS (14) #elif defined(TARGET_LOONGARCH64) -// For Loongarch64, we have used 14 bits from the second DWORD. + // For Loongarch64, we have used 14 bits from the second DWORD. #define ID_EXTRA_BITFIELD_BITS (14) #else #error Unsupported or unset target architecture @@ -763,7 +765,7 @@ class emitter // amd64: 46 bits // arm: 48 bits // arm64: 49 bits - //loongarch64: 46 bits + // loongarch64: 46 bits unsigned _idCnsReloc : 1; // LargeCns is an RVA and needs reloc tag unsigned _idDspReloc : 1; // LargeDsp is an RVA and needs reloc tag @@ -911,14 +913,14 @@ class emitter #elif defined(TARGET_LOONGARCH64) // TARGET_XARCH struct { - unsigned int iiaEncodedInstr;//instruction's binary encoding. - regNumber _idReg3 : REGNUM_BITS; - regNumber _idReg4 : REGNUM_BITS; + unsigned int iiaEncodedInstr; // instruction's binary encoding. + regNumber _idReg3 : REGNUM_BITS; + regNumber _idReg4 : REGNUM_BITS; }; struct { - int iiaJmpOffset;//temporary saving the offset of jmp or data. + int iiaJmpOffset; // temporary saving the offset of jmp or data. emitLclVarAddr iiaLclVar; }; @@ -939,7 +941,7 @@ class emitter { return iiaJmpOffset; } -#endif // defined(TARGET_LOONGARCH64) +#endif // defined(TARGET_LOONGARCH64) } _idAddrUnion; @@ -1043,7 +1045,7 @@ class emitter #elif defined(TARGET_LOONGARCH64) unsigned idCodeSize() const { - return _idCodeSize;//_idInsCount; + return _idCodeSize; //_idInsCount; } void idCodeSize(unsigned sz) { @@ -1053,7 +1055,7 @@ class emitter #endif // TARGET_LOONGARCH64 emitAttr idOpSize() - {//NOTE: not used for LOONGARCH64. + { // NOTE: not used for LOONGARCH64. return emitDecodeSize(_idOpSize); } void idOpSize(emitAttr opsz) @@ -1888,8 +1890,8 @@ class emitter #endif // !defined(HOST_64BIT) #ifdef TARGET_LOONGARCH64 - unsigned int emitCounts_INS_OPTS_J;//INS_OPTS_J -#endif // defined(TARGET_LOONGARCH64) + unsigned int emitCounts_INS_OPTS_J; // INS_OPTS_J +#endif // defined(TARGET_LOONGARCH64) size_t emitIssue1Instr(insGroup* ig, instrDesc* id, BYTE** dp); size_t emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp); diff --git a/src/coreclr/jit/emitloongarch64.cpp b/src/coreclr/jit/emitloongarch64.cpp index f7c06ad5cc7f6..e58ccb61282bb 100644 --- a/src/coreclr/jit/emitloongarch64.cpp +++ b/src/coreclr/jit/emitloongarch64.cpp @@ -29,349 +29,348 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX ////These are used for loongarch64 instrs's dump. ////LA_OP_2R opcode: bit31 ~ bit10 -#define LA_2R_CLO_W 0x4 -#define LA_2R_CLZ_W 0x5 -#define LA_2R_CTO_W 0x6 -#define LA_2R_CTZ_W 0x7 -#define LA_2R_CLO_D 0x8 -#define LA_2R_CLZ_D 0x9 -#define LA_2R_CTO_D 0xa -#define LA_2R_CTZ_D 0xb -#define LA_2R_REVB_2H 0xc -#define LA_2R_REVB_4H 0xd -#define LA_2R_REVB_2W 0xe -#define LA_2R_REVB_D 0xf -#define LA_2R_REVH_2W 0x10 -#define LA_2R_REVH_D 0x11 -#define LA_2R_BITREV_4B 0x12 -#define LA_2R_BITREV_8B 0x13 -#define LA_2R_BITREV_W 0x14 -#define LA_2R_BITREV_D 0x15 -#define LA_2R_EXT_W_H 0x16 -#define LA_2R_EXT_W_B 0x17 -#define LA_2R_RDTIMEL_W 0x18 -#define LA_2R_RDTIMEH_W 0x19 -#define LA_2R_RDTIME_D 0x1a -#define LA_2R_CPUCFG 0x1b -#define LA_2R_ASRTLE_D 0x2 -#define LA_2R_ASRTGT_D 0x3 -#define LA_2R_FABS_S 0x4501 -#define LA_2R_FABS_D 0x4502 -#define LA_2R_FNEG_S 0x4505 -#define LA_2R_FNEG_D 0x4506 -#define LA_2R_FLOGB_S 0x4509 -#define LA_2R_FLOGB_D 0x450a -#define LA_2R_FCLASS_S 0x450d -#define LA_2R_FCLASS_D 0x450e -#define LA_2R_FSQRT_S 0x4511 -#define LA_2R_FSQRT_D 0x4512 -#define LA_2R_FRECIP_S 0x4515 -#define LA_2R_FRECIP_D 0x4516 -#define LA_2R_FRSQRT_S 0x4519 -#define LA_2R_FRSQRT_D 0x451a -#define LA_2R_FMOV_S 0x4525 -#define LA_2R_FMOV_D 0x4526 -#define LA_2R_MOVGR2FR_W 0x4529 -#define LA_2R_MOVGR2FR_D 0x452a -#define LA_2R_MOVGR2FRH_W 0x452b -#define LA_2R_MOVFR2GR_S 0x452d -#define LA_2R_MOVFR2GR_D 0x452e -#define LA_2R_MOVFRH2GR_S 0x452f -#define LA_2R_MOVGR2FCSR 0x4530 -#define LA_2R_MOVFCSR2GR 0x4532 -#define LA_2R_MOVFR2CF 0x4534 -#define LA_2R_MOVCF2FR 0x4535 -#define LA_2R_MOVGR2CF 0x4536 -#define LA_2R_MOVCF2GR 0x4537 -#define LA_2R_FCVT_S_D 0x4646 -#define LA_2R_FCVT_D_S 0x4649 -#define LA_2R_FTINTRM_W_S 0x4681 -#define LA_2R_FTINTRM_W_D 0x4682 -#define LA_2R_FTINTRM_L_S 0x4689 -#define LA_2R_FTINTRM_L_D 0x468a -#define LA_2R_FTINTRP_W_S 0x4691 -#define LA_2R_FTINTRP_W_D 0x4692 -#define LA_2R_FTINTRP_L_S 0x4699 -#define LA_2R_FTINTRP_L_D 0x469a -#define LA_2R_FTINTRZ_W_S 0x46a1 -#define LA_2R_FTINTRZ_W_D 0x46a2 -#define LA_2R_FTINTRZ_L_S 0x46a9 -#define LA_2R_FTINTRZ_L_D 0x46aa -#define LA_2R_FTINTRNE_W_S 0x46b1 -#define LA_2R_FTINTRNE_W_D 0x46b2 -#define LA_2R_FTINTRNE_L_S 0x46b9 -#define LA_2R_FTINTRNE_L_D 0x46ba -#define LA_2R_FTINT_W_S 0x46c1 -#define LA_2R_FTINT_W_D 0x46c2 -#define LA_2R_FTINT_L_S 0x46c9 -#define LA_2R_FTINT_L_D 0x46ca -#define LA_2R_FFINT_S_W 0x4744 -#define LA_2R_FFINT_S_L 0x4746 -#define LA_2R_FFINT_D_W 0x4748 -#define LA_2R_FFINT_D_L 0x474a -#define LA_2R_FRINT_S 0x4791 -#define LA_2R_FRINT_D 0x4792 -#define LA_2R_IOCSRRD_B 0x19200 -#define LA_2R_IOCSRRD_H 0x19201 -#define LA_2R_IOCSRRD_W 0x19202 -#define LA_2R_IOCSRRD_D 0x19203 -#define LA_2R_IOCSRWR_B 0x19204 -#define LA_2R_IOCSRWR_H 0x19205 -#define LA_2R_IOCSRWR_W 0x19206 -#define LA_2R_IOCSRWR_D 0x19207 +#define LA_2R_CLO_W 0x4 +#define LA_2R_CLZ_W 0x5 +#define LA_2R_CTO_W 0x6 +#define LA_2R_CTZ_W 0x7 +#define LA_2R_CLO_D 0x8 +#define LA_2R_CLZ_D 0x9 +#define LA_2R_CTO_D 0xa +#define LA_2R_CTZ_D 0xb +#define LA_2R_REVB_2H 0xc +#define LA_2R_REVB_4H 0xd +#define LA_2R_REVB_2W 0xe +#define LA_2R_REVB_D 0xf +#define LA_2R_REVH_2W 0x10 +#define LA_2R_REVH_D 0x11 +#define LA_2R_BITREV_4B 0x12 +#define LA_2R_BITREV_8B 0x13 +#define LA_2R_BITREV_W 0x14 +#define LA_2R_BITREV_D 0x15 +#define LA_2R_EXT_W_H 0x16 +#define LA_2R_EXT_W_B 0x17 +#define LA_2R_RDTIMEL_W 0x18 +#define LA_2R_RDTIMEH_W 0x19 +#define LA_2R_RDTIME_D 0x1a +#define LA_2R_CPUCFG 0x1b +#define LA_2R_ASRTLE_D 0x2 +#define LA_2R_ASRTGT_D 0x3 +#define LA_2R_FABS_S 0x4501 +#define LA_2R_FABS_D 0x4502 +#define LA_2R_FNEG_S 0x4505 +#define LA_2R_FNEG_D 0x4506 +#define LA_2R_FLOGB_S 0x4509 +#define LA_2R_FLOGB_D 0x450a +#define LA_2R_FCLASS_S 0x450d +#define LA_2R_FCLASS_D 0x450e +#define LA_2R_FSQRT_S 0x4511 +#define LA_2R_FSQRT_D 0x4512 +#define LA_2R_FRECIP_S 0x4515 +#define LA_2R_FRECIP_D 0x4516 +#define LA_2R_FRSQRT_S 0x4519 +#define LA_2R_FRSQRT_D 0x451a +#define LA_2R_FMOV_S 0x4525 +#define LA_2R_FMOV_D 0x4526 +#define LA_2R_MOVGR2FR_W 0x4529 +#define LA_2R_MOVGR2FR_D 0x452a +#define LA_2R_MOVGR2FRH_W 0x452b +#define LA_2R_MOVFR2GR_S 0x452d +#define LA_2R_MOVFR2GR_D 0x452e +#define LA_2R_MOVFRH2GR_S 0x452f +#define LA_2R_MOVGR2FCSR 0x4530 +#define LA_2R_MOVFCSR2GR 0x4532 +#define LA_2R_MOVFR2CF 0x4534 +#define LA_2R_MOVCF2FR 0x4535 +#define LA_2R_MOVGR2CF 0x4536 +#define LA_2R_MOVCF2GR 0x4537 +#define LA_2R_FCVT_S_D 0x4646 +#define LA_2R_FCVT_D_S 0x4649 +#define LA_2R_FTINTRM_W_S 0x4681 +#define LA_2R_FTINTRM_W_D 0x4682 +#define LA_2R_FTINTRM_L_S 0x4689 +#define LA_2R_FTINTRM_L_D 0x468a +#define LA_2R_FTINTRP_W_S 0x4691 +#define LA_2R_FTINTRP_W_D 0x4692 +#define LA_2R_FTINTRP_L_S 0x4699 +#define LA_2R_FTINTRP_L_D 0x469a +#define LA_2R_FTINTRZ_W_S 0x46a1 +#define LA_2R_FTINTRZ_W_D 0x46a2 +#define LA_2R_FTINTRZ_L_S 0x46a9 +#define LA_2R_FTINTRZ_L_D 0x46aa +#define LA_2R_FTINTRNE_W_S 0x46b1 +#define LA_2R_FTINTRNE_W_D 0x46b2 +#define LA_2R_FTINTRNE_L_S 0x46b9 +#define LA_2R_FTINTRNE_L_D 0x46ba +#define LA_2R_FTINT_W_S 0x46c1 +#define LA_2R_FTINT_W_D 0x46c2 +#define LA_2R_FTINT_L_S 0x46c9 +#define LA_2R_FTINT_L_D 0x46ca +#define LA_2R_FFINT_S_W 0x4744 +#define LA_2R_FFINT_S_L 0x4746 +#define LA_2R_FFINT_D_W 0x4748 +#define LA_2R_FFINT_D_L 0x474a +#define LA_2R_FRINT_S 0x4791 +#define LA_2R_FRINT_D 0x4792 +#define LA_2R_IOCSRRD_B 0x19200 +#define LA_2R_IOCSRRD_H 0x19201 +#define LA_2R_IOCSRRD_W 0x19202 +#define LA_2R_IOCSRRD_D 0x19203 +#define LA_2R_IOCSRWR_B 0x19204 +#define LA_2R_IOCSRWR_H 0x19205 +#define LA_2R_IOCSRWR_W 0x19206 +#define LA_2R_IOCSRWR_D 0x19207 ////LA_OP_3R opcode: bit31 ~ bit15 -#define LA_3R_ADD_W 0x20 -#define LA_3R_ADD_D 0x21 -#define LA_3R_SUB_W 0x22 -#define LA_3R_SUB_D 0x23 -#define LA_3R_SLT 0x24 -#define LA_3R_SLTU 0x25 -#define LA_3R_MASKEQZ 0x26 -#define LA_3R_MASKNEZ 0x27 -#define LA_3R_NOR 0x28 -#define LA_3R_AND 0x29 -#define LA_3R_OR 0x2a -#define LA_3R_XOR 0x2b -#define LA_3R_ORN 0x2c -#define LA_3R_ANDN 0x2d -#define LA_3R_SLL_W 0x2e -#define LA_3R_SRL_W 0x2f -#define LA_3R_SRA_W 0x30 -#define LA_3R_SLL_D 0x31 -#define LA_3R_SRL_D 0x32 -#define LA_3R_SRA_D 0x33 -#define LA_3R_ROTR_W 0x36 -#define LA_3R_ROTR_D 0x37 -#define LA_3R_MUL_W 0x38 -#define LA_3R_MULH_W 0x39 -#define LA_3R_MULH_WU 0x3a -#define LA_3R_MUL_D 0x3b -#define LA_3R_MULH_D 0x3c -#define LA_3R_MULH_DU 0x3d -#define LA_3R_MULW_D_W 0x3e -#define LA_3R_MULW_D_WU 0x3f -#define LA_3R_DIV_W 0x40 -#define LA_3R_MOD_W 0x41 -#define LA_3R_DIV_WU 0x42 -#define LA_3R_MOD_WU 0x43 -#define LA_3R_DIV_D 0x44 -#define LA_3R_MOD_D 0x45 -#define LA_3R_DIV_DU 0x46 -#define LA_3R_MOD_DU 0x47 -#define LA_3R_CRC_W_B_W 0x48 -#define LA_3R_CRC_W_H_W 0x49 -#define LA_3R_CRC_W_W_W 0x4a -#define LA_3R_CRC_W_D_W 0x4b -#define LA_3R_CRCC_W_B_W 0x4c -#define LA_3R_CRCC_W_H_W 0x4d -#define LA_3R_CRCC_W_W_W 0x4e -#define LA_3R_CRCC_W_D_W 0x4f -#define LA_3R_FADD_S 0x201 -#define LA_3R_FADD_D 0x202 -#define LA_3R_FSUB_S 0x205 -#define LA_3R_FSUB_D 0x206 -#define LA_3R_FMUL_S 0x209 -#define LA_3R_FMUL_D 0x20a -#define LA_3R_FDIV_S 0x20d -#define LA_3R_FDIV_D 0x20e -#define LA_3R_FMAX_S 0x211 -#define LA_3R_FMAX_D 0x212 -#define LA_3R_FMIN_S 0x215 -#define LA_3R_FMIN_D 0x216 -#define LA_3R_FMAXA_S 0x219 -#define LA_3R_FMAXA_D 0x21a -#define LA_3R_FMINA_S 0x21d -#define LA_3R_FMINA_D 0x21e -#define LA_3R_FSCALEB_S 0x221 -#define LA_3R_FSCALEB_D 0x222 -#define LA_3R_FCOPYSIGN_S 0x225 -#define LA_3R_FCOPYSIGN_D 0x226 -#define LA_3R_INVTLB 0xc91 -#define LA_3R_LDX_B 0x7000 -#define LA_3R_LDX_H 0x7008 -#define LA_3R_LDX_W 0x7010 -#define LA_3R_LDX_D 0x7018 -#define LA_3R_STX_B 0x7020 -#define LA_3R_STX_H 0x7028 -#define LA_3R_STX_W 0x7030 -#define LA_3R_STX_D 0x7038 -#define LA_3R_LDX_BU 0x7040 -#define LA_3R_LDX_HU 0x7048 -#define LA_3R_LDX_WU 0x7050 -#define LA_3R_PRELDX 0x7058 -#define LA_3R_FLDX_S 0x7060 -#define LA_3R_FLDX_D 0x7068 -#define LA_3R_FSTX_S 0x7070 -#define LA_3R_FSTX_D 0x7078 -#define LA_3R_AMSWAP_W 0x70c0 -#define LA_3R_AMSWAP_D 0x70c1 -#define LA_3R_AMADD_W 0x70c2 -#define LA_3R_AMADD_D 0x70c3 -#define LA_3R_AMAND_W 0x70c4 -#define LA_3R_AMAND_D 0x70c5 -#define LA_3R_AMOR_W 0x70c6 -#define LA_3R_AMOR_D 0x70c7 -#define LA_3R_AMXOR_W 0x70c8 -#define LA_3R_AMXOR_D 0x70c9 -#define LA_3R_AMMAX_W 0x70ca -#define LA_3R_AMMAX_D 0x70cb -#define LA_3R_AMMIN_W 0x70cc -#define LA_3R_AMMIN_D 0x70cd -#define LA_3R_AMMAX_WU 0x70ce -#define LA_3R_AMMAX_DU 0x70cf -#define LA_3R_AMMIN_WU 0x70d0 -#define LA_3R_AMMIN_DU 0x70d1 -#define LA_3R_AMSWAP_DB_W 0x70d2 -#define LA_3R_AMSWAP_DB_D 0x70d3 -#define LA_3R_AMADD_DB_W 0x70d4 -#define LA_3R_AMADD_DB_D 0x70d5 -#define LA_3R_AMAND_DB_W 0x70d6 -#define LA_3R_AMAND_DB_D 0x70d7 -#define LA_3R_AMOR_DB_W 0x70d8 -#define LA_3R_AMOR_DB_D 0x70d9 -#define LA_3R_AMXOR_DB_W 0x70da -#define LA_3R_AMXOR_DB_D 0x70db -#define LA_3R_AMMAX_DB_W 0x70dc -#define LA_3R_AMMAX_DB_D 0x70dd -#define LA_3R_AMMIN_DB_W 0x70de -#define LA_3R_AMMIN_DB_D 0x70df -#define LA_3R_AMMAX_DB_WU 0x70e0 -#define LA_3R_AMMAX_DB_DU 0x70e1 -#define LA_3R_AMMIN_DB_WU 0x70e2 -#define LA_3R_AMMIN_DB_DU 0x70e3 -#define LA_3R_FLDGT_S 0x70e8 -#define LA_3R_FLDGT_D 0x70e9 -#define LA_3R_FLDLE_S 0x70ea -#define LA_3R_FLDLE_D 0x70eb -#define LA_3R_FSTGT_S 0x70ec -#define LA_3R_FSTGT_D 0x70ed -#define LA_3R_FSTLE_S 0x70ee -#define LA_3R_FSTLE_D 0x70ef -#define LA_3R_LDGT_B 0x70f0 -#define LA_3R_LDGT_H 0x70f1 -#define LA_3R_LDGT_W 0x70f2 -#define LA_3R_LDGT_D 0x70f3 -#define LA_3R_LDLE_B 0x70f4 -#define LA_3R_LDLE_H 0x70f5 -#define LA_3R_LDLE_W 0x70f6 -#define LA_3R_LDLE_D 0x70f7 -#define LA_3R_STGT_B 0x70f8 -#define LA_3R_STGT_H 0x70f9 -#define LA_3R_STGT_W 0x70fa -#define LA_3R_STGT_D 0x70fb -#define LA_3R_STLE_B 0x70fc -#define LA_3R_STLE_H 0x70fd -#define LA_3R_STLE_W 0x70fe -#define LA_3R_STLE_D 0x70ff +#define LA_3R_ADD_W 0x20 +#define LA_3R_ADD_D 0x21 +#define LA_3R_SUB_W 0x22 +#define LA_3R_SUB_D 0x23 +#define LA_3R_SLT 0x24 +#define LA_3R_SLTU 0x25 +#define LA_3R_MASKEQZ 0x26 +#define LA_3R_MASKNEZ 0x27 +#define LA_3R_NOR 0x28 +#define LA_3R_AND 0x29 +#define LA_3R_OR 0x2a +#define LA_3R_XOR 0x2b +#define LA_3R_ORN 0x2c +#define LA_3R_ANDN 0x2d +#define LA_3R_SLL_W 0x2e +#define LA_3R_SRL_W 0x2f +#define LA_3R_SRA_W 0x30 +#define LA_3R_SLL_D 0x31 +#define LA_3R_SRL_D 0x32 +#define LA_3R_SRA_D 0x33 +#define LA_3R_ROTR_W 0x36 +#define LA_3R_ROTR_D 0x37 +#define LA_3R_MUL_W 0x38 +#define LA_3R_MULH_W 0x39 +#define LA_3R_MULH_WU 0x3a +#define LA_3R_MUL_D 0x3b +#define LA_3R_MULH_D 0x3c +#define LA_3R_MULH_DU 0x3d +#define LA_3R_MULW_D_W 0x3e +#define LA_3R_MULW_D_WU 0x3f +#define LA_3R_DIV_W 0x40 +#define LA_3R_MOD_W 0x41 +#define LA_3R_DIV_WU 0x42 +#define LA_3R_MOD_WU 0x43 +#define LA_3R_DIV_D 0x44 +#define LA_3R_MOD_D 0x45 +#define LA_3R_DIV_DU 0x46 +#define LA_3R_MOD_DU 0x47 +#define LA_3R_CRC_W_B_W 0x48 +#define LA_3R_CRC_W_H_W 0x49 +#define LA_3R_CRC_W_W_W 0x4a +#define LA_3R_CRC_W_D_W 0x4b +#define LA_3R_CRCC_W_B_W 0x4c +#define LA_3R_CRCC_W_H_W 0x4d +#define LA_3R_CRCC_W_W_W 0x4e +#define LA_3R_CRCC_W_D_W 0x4f +#define LA_3R_FADD_S 0x201 +#define LA_3R_FADD_D 0x202 +#define LA_3R_FSUB_S 0x205 +#define LA_3R_FSUB_D 0x206 +#define LA_3R_FMUL_S 0x209 +#define LA_3R_FMUL_D 0x20a +#define LA_3R_FDIV_S 0x20d +#define LA_3R_FDIV_D 0x20e +#define LA_3R_FMAX_S 0x211 +#define LA_3R_FMAX_D 0x212 +#define LA_3R_FMIN_S 0x215 +#define LA_3R_FMIN_D 0x216 +#define LA_3R_FMAXA_S 0x219 +#define LA_3R_FMAXA_D 0x21a +#define LA_3R_FMINA_S 0x21d +#define LA_3R_FMINA_D 0x21e +#define LA_3R_FSCALEB_S 0x221 +#define LA_3R_FSCALEB_D 0x222 +#define LA_3R_FCOPYSIGN_S 0x225 +#define LA_3R_FCOPYSIGN_D 0x226 +#define LA_3R_INVTLB 0xc91 +#define LA_3R_LDX_B 0x7000 +#define LA_3R_LDX_H 0x7008 +#define LA_3R_LDX_W 0x7010 +#define LA_3R_LDX_D 0x7018 +#define LA_3R_STX_B 0x7020 +#define LA_3R_STX_H 0x7028 +#define LA_3R_STX_W 0x7030 +#define LA_3R_STX_D 0x7038 +#define LA_3R_LDX_BU 0x7040 +#define LA_3R_LDX_HU 0x7048 +#define LA_3R_LDX_WU 0x7050 +#define LA_3R_PRELDX 0x7058 +#define LA_3R_FLDX_S 0x7060 +#define LA_3R_FLDX_D 0x7068 +#define LA_3R_FSTX_S 0x7070 +#define LA_3R_FSTX_D 0x7078 +#define LA_3R_AMSWAP_W 0x70c0 +#define LA_3R_AMSWAP_D 0x70c1 +#define LA_3R_AMADD_W 0x70c2 +#define LA_3R_AMADD_D 0x70c3 +#define LA_3R_AMAND_W 0x70c4 +#define LA_3R_AMAND_D 0x70c5 +#define LA_3R_AMOR_W 0x70c6 +#define LA_3R_AMOR_D 0x70c7 +#define LA_3R_AMXOR_W 0x70c8 +#define LA_3R_AMXOR_D 0x70c9 +#define LA_3R_AMMAX_W 0x70ca +#define LA_3R_AMMAX_D 0x70cb +#define LA_3R_AMMIN_W 0x70cc +#define LA_3R_AMMIN_D 0x70cd +#define LA_3R_AMMAX_WU 0x70ce +#define LA_3R_AMMAX_DU 0x70cf +#define LA_3R_AMMIN_WU 0x70d0 +#define LA_3R_AMMIN_DU 0x70d1 +#define LA_3R_AMSWAP_DB_W 0x70d2 +#define LA_3R_AMSWAP_DB_D 0x70d3 +#define LA_3R_AMADD_DB_W 0x70d4 +#define LA_3R_AMADD_DB_D 0x70d5 +#define LA_3R_AMAND_DB_W 0x70d6 +#define LA_3R_AMAND_DB_D 0x70d7 +#define LA_3R_AMOR_DB_W 0x70d8 +#define LA_3R_AMOR_DB_D 0x70d9 +#define LA_3R_AMXOR_DB_W 0x70da +#define LA_3R_AMXOR_DB_D 0x70db +#define LA_3R_AMMAX_DB_W 0x70dc +#define LA_3R_AMMAX_DB_D 0x70dd +#define LA_3R_AMMIN_DB_W 0x70de +#define LA_3R_AMMIN_DB_D 0x70df +#define LA_3R_AMMAX_DB_WU 0x70e0 +#define LA_3R_AMMAX_DB_DU 0x70e1 +#define LA_3R_AMMIN_DB_WU 0x70e2 +#define LA_3R_AMMIN_DB_DU 0x70e3 +#define LA_3R_FLDGT_S 0x70e8 +#define LA_3R_FLDGT_D 0x70e9 +#define LA_3R_FLDLE_S 0x70ea +#define LA_3R_FLDLE_D 0x70eb +#define LA_3R_FSTGT_S 0x70ec +#define LA_3R_FSTGT_D 0x70ed +#define LA_3R_FSTLE_S 0x70ee +#define LA_3R_FSTLE_D 0x70ef +#define LA_3R_LDGT_B 0x70f0 +#define LA_3R_LDGT_H 0x70f1 +#define LA_3R_LDGT_W 0x70f2 +#define LA_3R_LDGT_D 0x70f3 +#define LA_3R_LDLE_B 0x70f4 +#define LA_3R_LDLE_H 0x70f5 +#define LA_3R_LDLE_W 0x70f6 +#define LA_3R_LDLE_D 0x70f7 +#define LA_3R_STGT_B 0x70f8 +#define LA_3R_STGT_H 0x70f9 +#define LA_3R_STGT_W 0x70fa +#define LA_3R_STGT_D 0x70fb +#define LA_3R_STLE_B 0x70fc +#define LA_3R_STLE_H 0x70fd +#define LA_3R_STLE_W 0x70fe +#define LA_3R_STLE_D 0x70ff ////LA_OP_4R opcode: bit31 ~ bit20 -#define LA_4R_FMADD_S 0x81 -#define LA_4R_FMADD_D 0x82 -#define LA_4R_FMSUB_S 0x85 -#define LA_4R_FMSUB_D 0x86 -#define LA_4R_FNMADD_S 0x89 -#define LA_4R_FNMADD_D 0x8a -#define LA_4R_FNMSUB_S 0x8d -#define LA_4R_FNMSUB_D 0x8e -#define LA_4R_FSEL 0xd0 +#define LA_4R_FMADD_S 0x81 +#define LA_4R_FMADD_D 0x82 +#define LA_4R_FMSUB_S 0x85 +#define LA_4R_FMSUB_D 0x86 +#define LA_4R_FNMADD_S 0x89 +#define LA_4R_FNMADD_D 0x8a +#define LA_4R_FNMSUB_S 0x8d +#define LA_4R_FNMSUB_D 0x8e +#define LA_4R_FSEL 0xd0 ////LA_OP_2RI8 ////LA_OP_2RI12 opcode: bit31 ~ bit22 -#define LA_2RI12_SLTI 0x8 -#define LA_2RI12_SLTUI 0x9 -#define LA_2RI12_ADDI_W 0xa -#define LA_2RI12_ADDI_D 0xb -#define LA_2RI12_LU52I_D 0xc -#define LA_2RI12_ANDI 0xd -#define LA_2RI12_ORI 0xe -#define LA_2RI12_XORI 0xf -#define LA_2RI12_CACHE 0x18 -#define LA_2RI12_LD_B 0xa0 -#define LA_2RI12_LD_H 0xa1 -#define LA_2RI12_LD_W 0xa2 -#define LA_2RI12_LD_D 0xa3 -#define LA_2RI12_ST_B 0xa4 -#define LA_2RI12_ST_H 0xa5 -#define LA_2RI12_ST_W 0xa6 -#define LA_2RI12_ST_D 0xa7 -#define LA_2RI12_LD_BU 0xa8 -#define LA_2RI12_LD_HU 0xa9 -#define LA_2RI12_LD_WU 0xaa -#define LA_2RI12_PRELD 0xab -#define LA_2RI12_FLD_S 0xac -#define LA_2RI12_FST_S 0xad -#define LA_2RI12_FLD_D 0xae -#define LA_2RI12_FST_D 0xaf +#define LA_2RI12_SLTI 0x8 +#define LA_2RI12_SLTUI 0x9 +#define LA_2RI12_ADDI_W 0xa +#define LA_2RI12_ADDI_D 0xb +#define LA_2RI12_LU52I_D 0xc +#define LA_2RI12_ANDI 0xd +#define LA_2RI12_ORI 0xe +#define LA_2RI12_XORI 0xf +#define LA_2RI12_CACHE 0x18 +#define LA_2RI12_LD_B 0xa0 +#define LA_2RI12_LD_H 0xa1 +#define LA_2RI12_LD_W 0xa2 +#define LA_2RI12_LD_D 0xa3 +#define LA_2RI12_ST_B 0xa4 +#define LA_2RI12_ST_H 0xa5 +#define LA_2RI12_ST_W 0xa6 +#define LA_2RI12_ST_D 0xa7 +#define LA_2RI12_LD_BU 0xa8 +#define LA_2RI12_LD_HU 0xa9 +#define LA_2RI12_LD_WU 0xaa +#define LA_2RI12_PRELD 0xab +#define LA_2RI12_FLD_S 0xac +#define LA_2RI12_FST_S 0xad +#define LA_2RI12_FLD_D 0xae +#define LA_2RI12_FST_D 0xaf ////LA_OP_2RI14i opcode: bit31 ~ bit24 -#define LA_2RI14_LL_W 0x20 -#define LA_2RI14_SC_W 0x21 -#define LA_2RI14_LL_D 0x22 -#define LA_2RI14_SC_D 0x23 -#define LA_2RI14_LDPTR_W 0x24 -#define LA_2RI14_STPTR_W 0x25 -#define LA_2RI14_LDPTR_D 0x26 -#define LA_2RI14_STPTR_D 0x27 +#define LA_2RI14_LL_W 0x20 +#define LA_2RI14_SC_W 0x21 +#define LA_2RI14_LL_D 0x22 +#define LA_2RI14_SC_D 0x23 +#define LA_2RI14_LDPTR_W 0x24 +#define LA_2RI14_STPTR_W 0x25 +#define LA_2RI14_LDPTR_D 0x26 +#define LA_2RI14_STPTR_D 0x27 ////LA_OP_2RI16 opcode: bit31 ~ bit26 -#define LA_2RI16_ADDU16I_D 0x4 -#define LA_2RI16_JIRL 0x13 -#define LA_2RI16_BEQ 0x16 -#define LA_2RI16_BNE 0x17 -#define LA_2RI16_BLT 0x18 -#define LA_2RI16_BGE 0x19 -#define LA_2RI16_BLTU 0x1a -#define LA_2RI16_BGEU 0x1b +#define LA_2RI16_ADDU16I_D 0x4 +#define LA_2RI16_JIRL 0x13 +#define LA_2RI16_BEQ 0x16 +#define LA_2RI16_BNE 0x17 +#define LA_2RI16_BLT 0x18 +#define LA_2RI16_BGE 0x19 +#define LA_2RI16_BLTU 0x1a +#define LA_2RI16_BGEU 0x1b ////LA_OP_1RI20 opcode: bit31 ~ bit25 -#define LA_1RI20_LU12I_W 0xa -#define LA_1RI20_LU32I_D 0xb -#define LA_1RI20_PCADDI 0xc -#define LA_1RI20_PCALAU12I 0xd -#define LA_1RI20_PCADDU12I 0xe -#define LA_1RI20_PCADDU18I 0xf +#define LA_1RI20_LU12I_W 0xa +#define LA_1RI20_LU32I_D 0xb +#define LA_1RI20_PCADDI 0xc +#define LA_1RI20_PCALAU12I 0xd +#define LA_1RI20_PCADDU12I 0xe +#define LA_1RI20_PCADDU18I 0xf ////LA_OP_I26 -#define LA_I26_B 0x14 -#define LA_I26_BL 0x15 +#define LA_I26_B 0x14 +#define LA_I26_BL 0x15 ////LA_OP_1RI21 -#define LA_1RI21_BEQZ 0x10 -#define LA_1RI21_BNEZ 0x11 -#define LA_1RI21_BCEQZ 0x12 -#define LA_1RI21_BCNEZ 0x12 +#define LA_1RI21_BEQZ 0x10 +#define LA_1RI21_BNEZ 0x11 +#define LA_1RI21_BCEQZ 0x12 +#define LA_1RI21_BCNEZ 0x12 ////other -#define LA_OP_ALSL_W 0x1 -#define LA_OP_ALSL_WU 0x1 -#define LA_OP_ALSL_D 0xb -#define LA_OP_BYTEPICK_W 0x2 -#define LA_OP_BYTEPICK_D 0x3 -#define LA_OP_BREAK 0x54 -#define LA_OP_DBGCALL 0x55 -#define LA_OP_SYSCALL 0x56 -#define LA_OP_SLLI_W 0x10 -#define LA_OP_SLLI_D 0x10 -#define LA_OP_SRLI_W 0x11 -#define LA_OP_SRLI_D 0x11 -#define LA_OP_SRAI_W 0x12 -#define LA_OP_SRAI_D 0x12 -#define LA_OP_ROTRI_W 0x13 -#define LA_OP_ROTRI_D 0x13 -#define LA_OP_FCMP_cond_S 0xc1 -#define LA_OP_FCMP_cond_D 0xc2 -#define LA_OP_BSTRINS_W 0x1 -#define LA_OP_BSTRPICK_W 0x1 -#define LA_OP_BSTRINS_D 0x2 -#define LA_OP_BSTRPICK_D 0x3 -#define LA_OP_DBAR 0x70e4 -#define LA_OP_IBAR 0x70e5 +#define LA_OP_ALSL_W 0x1 +#define LA_OP_ALSL_WU 0x1 +#define LA_OP_ALSL_D 0xb +#define LA_OP_BYTEPICK_W 0x2 +#define LA_OP_BYTEPICK_D 0x3 +#define LA_OP_BREAK 0x54 +#define LA_OP_DBGCALL 0x55 +#define LA_OP_SYSCALL 0x56 +#define LA_OP_SLLI_W 0x10 +#define LA_OP_SLLI_D 0x10 +#define LA_OP_SRLI_W 0x11 +#define LA_OP_SRLI_D 0x11 +#define LA_OP_SRAI_W 0x12 +#define LA_OP_SRAI_D 0x12 +#define LA_OP_ROTRI_W 0x13 +#define LA_OP_ROTRI_D 0x13 +#define LA_OP_FCMP_cond_S 0xc1 +#define LA_OP_FCMP_cond_D 0xc2 +#define LA_OP_BSTRINS_W 0x1 +#define LA_OP_BSTRPICK_W 0x1 +#define LA_OP_BSTRINS_D 0x2 +#define LA_OP_BSTRPICK_D 0x3 +#define LA_OP_DBAR 0x70e4 +#define LA_OP_IBAR 0x70e5 //// add other define-macro here. - /*****************************************************************************/ const instruction emitJumpKindInstructions[] = { @@ -392,59 +391,55 @@ const emitJumpKind emitReverseJumpKinds[] = { * The macro define for instructions. */ -#define D_INST_2RI12(op0_code, op1_reg, op2_reg, op3_imm) \ - op0_code |= ((code_t)(op1_reg)); /* rd or fd or hint */ \ - op0_code |= ((code_t)(op2_reg))<<5; /* rj */ \ - op0_code |= ((op3_imm) & 0xfff)<<10 +#define D_INST_2RI12(op0_code, op1_reg, op2_reg, op3_imm) \ + op0_code |= ((code_t)(op1_reg)); /* rd or fd or hint */ \ + op0_code |= ((code_t)(op2_reg)) << 5; /* rj */ \ + op0_code |= ((op3_imm)&0xfff) << 10 -#define D_INST_add_d(op0_code, op1_reg, op2_reg, op3_reg) \ - op0_code |= ((code_t)(op1_reg));/* rd */ \ - op0_code |= ((code_t)(op2_reg))<<5;/* rj */ \ - op0_code |= ((code_t)(op3_reg))<<10 /* rk */ +#define D_INST_add_d(op0_code, op1_reg, op2_reg, op3_reg) \ + op0_code |= ((code_t)(op1_reg)); /* rd */ \ + op0_code |= ((code_t)(op2_reg)) << 5; /* rj */ \ + op0_code |= ((code_t)(op3_reg)) << 10 /* rk */ -#define D_INST_3R(op0_code, op1_reg, op2_reg, op3_reg) \ - op0_code |= ((code_t)(op1_reg));/* rd */ \ - op0_code |= ((code_t)(op2_reg))<<5;/* rj */ \ - op0_code |= ((code_t)(op3_reg))<<10 /* rk */ +#define D_INST_3R(op0_code, op1_reg, op2_reg, op3_reg) \ + op0_code |= ((code_t)(op1_reg)); /* rd */ \ + op0_code |= ((code_t)(op2_reg)) << 5; /* rj */ \ + op0_code |= ((code_t)(op3_reg)) << 10 /* rk */ -#define D_INST_JIRL(op0_code, op1_reg, op2_reg, op3_imm) \ - op0_code |= ((code_t)(op1_reg)); /* rd */ \ - op0_code |= ((code_t)(op2_reg))<<5; /* rj */ \ - op0_code |= ((op3_imm) & 0xffff)<<10 /* offs */ \ +#define D_INST_JIRL(op0_code, op1_reg, op2_reg, op3_imm) \ + op0_code |= ((code_t)(op1_reg)); /* rd */ \ + op0_code |= ((code_t)(op2_reg)) << 5; /* rj */ \ + op0_code |= ((op3_imm)&0xffff) << 10 /* offs */ -#define D_INST_lu12i_w(op0_code, op1_reg, op2_imm) \ - op0_code |= ((code_t)(op1_reg)); /* rd */ \ - op0_code |= ((op2_imm) & 0xfffff)<<5 /* si20 */ +#define D_INST_lu12i_w(op0_code, op1_reg, op2_imm) \ + op0_code |= ((code_t)(op1_reg)); /* rd */ \ + op0_code |= ((op2_imm)&0xfffff) << 5 /* si20 */ -#define D_INST_lu32i_d(op0_code, op1_reg, op2_imm) \ - D_INST_lu12i_w(op0_code, op1_reg, op2_imm) +#define D_INST_lu32i_d(op0_code, op1_reg, op2_imm) D_INST_lu12i_w(op0_code, op1_reg, op2_imm) -#define D_INST_lu52i_d(op0_code, op1_reg, op2_reg, op3_imm) \ - D_INST_2RI12(op0_code, op1_reg, op2_reg, op3_imm) +#define D_INST_lu52i_d(op0_code, op1_reg, op2_reg, op3_imm) D_INST_2RI12(op0_code, op1_reg, op2_reg, op3_imm) -#define D_INST_ori(op0_code, op1_reg, op2_reg, op3_imm) \ - D_INST_2RI12(op0_code, op1_reg, op2_reg, op3_imm) +#define D_INST_ori(op0_code, op1_reg, op2_reg, op3_imm) D_INST_2RI12(op0_code, op1_reg, op2_reg, op3_imm) -//Load or Store instructions. -#define D_INST_LS(op0_code, op1_reg, op2_reg, op3_imm) \ - D_INST_2RI12(op0_code, op1_reg, op2_reg, op3_imm) +// Load or Store instructions. +#define D_INST_LS(op0_code, op1_reg, op2_reg, op3_imm) D_INST_2RI12(op0_code, op1_reg, op2_reg, op3_imm) -#define D_INST_Bcond(op0_code, op1_reg, op2_reg, op3_imm) \ - op0_code |= ((code_t)(op1_reg) /*& 0x1f */)<<5; /* rj */ \ - op0_code |= ((code_t)(op2_reg) /*& 0x1f */); /* rd */ \ - assert(!((code_t)(op3_imm) & 0x3)); \ - op0_code |= (((code_t)(op3_imm)<<8) & 0x3fffc00) /* offset */ +#define D_INST_Bcond(op0_code, op1_reg, op2_reg, op3_imm) \ + op0_code |= ((code_t)(op1_reg) /*& 0x1f */) << 5; /* rj */ \ + op0_code |= ((code_t)(op2_reg) /*& 0x1f */); /* rd */ \ + assert(!((code_t)(op3_imm)&0x3)); \ + op0_code |= (((code_t)(op3_imm) << 8) & 0x3fffc00) /* offset */ -#define D_INST_Bcond_Z(op0_code, op1_reg, op1_imm) \ - assert(!((code_t)(op1_imm) & 0x3)); \ - op0_code |= ((code_t)(op1_reg) /*& 0x1f */)<<5; /* rj */ \ - op0_code |= (((code_t)(op1_imm)<<8) & 0x3fffc00); \ - op0_code |= (((code_t)(op1_imm)>>18) & 0x1f) /* offset */ +#define D_INST_Bcond_Z(op0_code, op1_reg, op1_imm) \ + assert(!((code_t)(op1_imm)&0x3)); \ + op0_code |= ((code_t)(op1_reg) /*& 0x1f */) << 5; /* rj */ \ + op0_code |= (((code_t)(op1_imm) << 8) & 0x3fffc00); \ + op0_code |= (((code_t)(op1_imm) >> 18) & 0x1f) /* offset */ -#define D_INST_B(op0_code, op1_imm) \ - assert(!((code_t)(op1_imm) & 0x3)); \ - op0_code |= (((code_t)(op1_imm)>>18) & 0x3ff); \ - op0_code |= (((code_t)(op1_imm)<<8) & 0x3fffc00) /* offset */ +#define D_INST_B(op0_code, op1_imm) \ + assert(!((code_t)(op1_imm)&0x3)); \ + op0_code |= (((code_t)(op1_imm) >> 18) & 0x3ff); \ + op0_code |= (((code_t)(op1_imm) << 8) & 0x3fffc00) /* offset */ /***************************************************************************** * Look up the instruction for a jump kind @@ -463,7 +458,7 @@ const emitJumpKind emitReverseJumpKinds[] = { /*static*/ emitJumpKind emitter::emitInsToJumpKind(instruction ins) { -assert(!"unimplemented on LOONGARCH yet"); + assert(!"unimplemented on LOONGARCH yet"); return EJ_NONE; #if 0 for (unsigned i = 0; i < ArrLen(emitJumpKindInstructions); i++) @@ -520,7 +515,7 @@ size_t emitter::emitSizeOfInsDsc(instrDesc* id) assert(!id->idIsLargeCns()); return sizeof(instrDesc); } - //break; + // break; case INS_OPTS_I: case INS_OPTS_RC: @@ -567,7 +562,7 @@ void emitter::emitInsSanityCheck(instrDesc* id) case IF_FUNCS_6C: case IF_FUNCS_6D: case IF_FUNCS_11: - //case IF_LA: + // case IF_LA: break; default: @@ -582,7 +577,7 @@ inline bool emitter::emitInsMayWriteToGCReg(instruction ins) { assert(ins != INS_invalid); ////NOTE: please reference the file "instrsloongarch64.h" for details !!! - return (INS_mov <= ins) && (ins <= INS_jirl) ? true : false; + return (INS_mov <= ins) && (ins <= INS_jirl) ? true : false; } bool emitter::emitInsWritesToLclVarStackLoc(instrDesc* id) @@ -598,27 +593,27 @@ bool emitter::emitInsWritesToLclVarStackLoc(instrDesc* id) switch (ins) { case INS_st_d: + case INS_st_w: + case INS_st_b: + case INS_st_h: case INS_stptr_d: -/////// not used these instrs right now !!! - //case INS_sc_d: - //case INS_stx_d: -//#ifdef DEBUG -// case INS_st_b: -// case INS_st_h: -// case INS_st_w: -// case INS_stx_b: -// case INS_stx_h: -// case INS_stx_w: -// //case INS_sc_w: -// //case INS_stgt_b: -// //case INS_stgt_h: -// //case INS_stgt_w: -// //case INS_stgt_d: -// //case INS_stle_b: -// //case INS_stle_h: -// //case INS_stle_w: -// //case INS_stle_d: -//#endif + case INS_stx_d: + case INS_stx_w: + case INS_stx_b: + case INS_stx_h: + // case INS_sc_d: + // case INS_sc_w: + //// not used these instrs right now !!! + //#ifdef DEBUG + // case INS_stgt_b: + // case INS_stgt_h: + // case INS_stgt_w: + // case INS_stgt_d: + // case INS_stle_b: + // case INS_stle_h: + // case INS_stle_w: + // case INS_stle_d: + //#endif return true; default: return false; @@ -662,7 +657,7 @@ bool emitter::emitInsIsLoad(instruction ins) } //------------------------------------------------------------------------ -//emitInsIsStore: Returns true if the instruction is some kind of store instruction. +// emitInsIsStore: Returns true if the instruction is some kind of store instruction. // bool emitter::emitInsIsStore(instruction ins) { @@ -674,7 +669,7 @@ bool emitter::emitInsIsStore(instruction ins) } //------------------------------------------------------------------------- -//emitInsIsLoadOrStore: Returns true if the instruction is some kind of load/store instruction. +// emitInsIsLoadOrStore: Returns true if the instruction is some kind of load/store instruction. // bool emitter::emitInsIsLoadOrStore(instruction ins) { @@ -695,7 +690,7 @@ bool emitter::emitInsIsLoadOrStore(instruction ins) inline emitter::code_t emitter::emitInsCode(instruction ins /*, insFormat fmt*/) { - code_t code = BAD_CODE; + code_t code = BAD_CODE; // clang-format off const static code_t insCode[] = @@ -719,14 +714,14 @@ inline emitter::code_t emitter::emitInsCode(instruction ins /*, insFormat fmt*/) void emitter::emitIns(instruction ins) { - //instrDesc* id = emitNewInstrSmall(EA_8BYTE); + // instrDesc* id = emitNewInstrSmall(EA_8BYTE); instrDesc* id = emitNewInstr(EA_8BYTE); id->idIns(ins); id->idAddr()->iiaSetInstrEncode(emitInsCode(ins)); id->idCodeSize(4); - //dispIns(id); + // dispIns(id); appendToCurIG(id); } @@ -737,10 +732,10 @@ void emitter::emitIns(instruction ins) */ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs) { - //assert(offs >= 0); + // assert(offs >= 0); ssize_t imm; - emitAttr size = EA_SIZE(attr);//it's better confirm attr with ins. + emitAttr size = EA_SIZE(attr); // it's better confirm attr with ins. #ifdef DEBUG switch (ins) @@ -749,10 +744,10 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int va case INS_st_h: case INS_st_w: case INS_fst_s: - //case INS_swl: - //case INS_swr: - //case INS_sdl: - //case INS_sdr: + // case INS_swl: + // case INS_swr: + // case INS_sdl: + // case INS_sdr: case INS_st_d: case INS_fst_d: break; @@ -769,15 +764,15 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int va bool FPbased; base = emitComp->lvaFrameAddress(varx, &FPbased); - imm = offs < 0 ? -offs -8: base + offs; + imm = offs < 0 ? -offs - 8 : base + offs; regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE; - reg2 = offs < 0 ? REG_R21 : reg2; - offs = offs < 0 ? -offs -8: offs; + reg2 = offs < 0 ? REG_R21 : reg2; + offs = offs < 0 ? -offs - 8 : offs; if ((-2048 <= imm) && (imm < 2048)) { - //regs[1] = reg2; + // regs[1] = reg2; } else { @@ -789,7 +784,7 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int va emitIns_R_R_R(INS_add_d, attr, REG_RA, REG_RA, reg2); imm2 = imm2 & 0x7ff; - imm = imm3 ? imm2 - imm3 : imm2; + imm = imm3 ? imm2 - imm3 : imm2; reg2 = REG_RA; } @@ -810,16 +805,16 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int va id->idSetIsLclVar(); id->idCodeSize(4); - //dispIns(id); + // dispIns(id); appendToCurIG(id); } void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs) { - //assert(offs >= 0); + // assert(offs >= 0); ssize_t imm; - emitAttr size = EA_SIZE(attr);//it's better confirm attr with ins. + emitAttr size = EA_SIZE(attr); // it's better confirm attr with ins. #ifdef DEBUG switch (ins) @@ -837,12 +832,12 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va case INS_ld_d: case INS_fld_d: - //case INS_lwl: - //case INS_lwr: + // case INS_lwl: + // case INS_lwr: - //case INS_ldl: - //case INS_ldr: - //assert(isValidGeneralDatasize(size) || isValidVectorDatasize(size)); + // case INS_ldl: + // case INS_ldr: + // assert(isValidGeneralDatasize(size) || isValidVectorDatasize(size)); break; case INS_lea: @@ -861,11 +856,11 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va bool FPbased; base = emitComp->lvaFrameAddress(varx, &FPbased); - imm = offs < 0 ? -offs -8: base + offs; + imm = offs < 0 ? -offs - 8 : base + offs; regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE; - reg2 = offs < 0 ? REG_R21 : reg2; - offs = offs < 0 ? -offs -8: offs; + reg2 = offs < 0 ? REG_R21 : reg2; + offs = offs < 0 ? -offs - 8 : offs; reg1 = (regNumber)((char)reg1 & 0x1f); code_t code; @@ -887,7 +882,7 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va ssize_t imm2 = imm & 0xfff; emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_RA, REG_RA, imm2); - ins = INS_add_d; + ins = INS_add_d; code = emitInsCode(ins); D_INST_add_d(code, reg1, reg2, REG_RA); } @@ -902,15 +897,15 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va imm2 = imm2 & 0x7ff; code = emitInsCode(ins); - D_INST_2RI12(code, reg1/* & 0x1f*/, REG_RA, imm3 ? imm2 - imm3 : imm2); + D_INST_2RI12(code, reg1 /* & 0x1f*/, REG_RA, imm3 ? imm2 - imm3 : imm2); } - //reg2 = REG_RA; + // reg2 = REG_RA; } instrDesc* id = emitNewInstr(attr); id->idReg1(reg1); - //id->idReg2(reg2);//not used. + // id->idReg2(reg2);//not used. id->idIns(ins); @@ -919,7 +914,7 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va id->idSetIsLclVar(); id->idCodeSize(4); - //dispIns(id); + // dispIns(id); appendToCurIG(id); } @@ -937,13 +932,13 @@ void emitter::emitIns_I(instruction ins, emitAttr attr, ssize_t imm) case INS_b: case INS_bl: assert(!(imm & 0x3)); - code |= ((imm>>18) & 0x3ff); //offs[25:16] - code |= ((imm>>2) & 0xffff)<<10;//offs[15:0] + code |= ((imm >> 18) & 0x3ff); // offs[25:16] + code |= ((imm >> 2) & 0xffff) << 10; // offs[15:0] break; case INS_dbar: case INS_ibar: assert((0 <= imm) && (imm <= 0x7fff)); - code |= (imm & 0x7fff); //hint + code |= (imm & 0x7fff); // hint break; default: unreached(); @@ -955,7 +950,7 @@ void emitter::emitIns_I(instruction ins, emitAttr attr, ssize_t imm) id->idAddr()->iiaSetInstrEncode(code); id->idCodeSize(4); - //dispIns(id); + // dispIns(id); appendToCurIG(id); } @@ -967,8 +962,8 @@ void emitter::emitIns_I_I(instruction ins, emitAttr attr, ssize_t cc, ssize_t of case INS_bceqz: case INS_bcnez: break; - //case INS_: - //case INS_: + // case INS_: + // case INS_: // break; default: @@ -980,9 +975,9 @@ void emitter::emitIns_I_I(instruction ins, emitAttr attr, ssize_t cc, ssize_t of assert(!(offs & 0x3)); assert(!(cc >> 3)); - code |= ((cc & 0x7) << 5); //cj - code |= ((offs >> 18) & 0x1f); //offs[20:16] - code |= ((offs >> 2) & 0xffff)<<10;//offs[15:0] + code |= ((cc & 0x7) << 5); // cj + code |= ((offs >> 18) & 0x1f); // offs[20:16] + code |= ((offs >> 2) & 0xffff) << 10; // offs[15:0] instrDesc* id = emitNewInstr(attr); @@ -990,7 +985,7 @@ void emitter::emitIns_I_I(instruction ins, emitAttr attr, ssize_t cc, ssize_t of id->idAddr()->iiaSetInstrEncode(code); id->idCodeSize(4); - //dispIns(id); + // dispIns(id); appendToCurIG(id); } @@ -1001,7 +996,7 @@ void emitter::emitIns_I_I(instruction ins, emitAttr attr, ssize_t cc, ssize_t of void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg) { -assert(!"unimplemented on LOONGARCH yet"); + assert(!"unimplemented on LOONGARCH yet"); #if 0 code_t code = emitInsCode(ins); @@ -1046,7 +1041,7 @@ assert(!"unimplemented on LOONGARCH yet"); void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t imm, insOpts opt /* = INS_OPTS_NONE */) { code_t code = emitInsCode(ins); -//#ifdef DEBUG + //#ifdef DEBUG switch (ins) { case INS_lu12i_w: @@ -1058,52 +1053,52 @@ void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t assert(isGeneralRegister(reg)); assert((-524288 <= imm) && (imm < 524288)); - code |= reg; //rd - code |= (imm & 0xfffff)<<5;//si20 + code |= reg; // rd + code |= (imm & 0xfffff) << 5; // si20 break; case INS_beqz: case INS_bnez: assert(isGeneralRegisterOrR0(reg)); assert(!(imm & 0x3)); - assert((-1048576 <= (imm>>2)) && ((imm>>2) <= 1048575)); + assert((-1048576 <= (imm >> 2)) && ((imm >> 2) <= 1048575)); - code |= ((imm>>18) & 0x1f); //offs[20:16] - code |= reg << 5; //rj - code |= ((imm>>2) & 0xffff)<<10;//offs[15:0] + code |= ((imm >> 18) & 0x1f); // offs[20:16] + code |= reg << 5; // rj + code |= ((imm >> 2) & 0xffff) << 10; // offs[15:0] break; case INS_movfr2cf: assert(isFloatReg(reg)); assert((0 <= imm) && (imm <= 7)); - code |= (reg & 0x1f)<<5;//fj - code |= imm /*& 0x7*/; //cc + code |= (reg & 0x1f) << 5; // fj + code |= imm /*& 0x7*/; // cc break; case INS_movcf2fr: assert(isFloatReg(reg)); assert((0 <= imm) && (imm <= 7)); - code |= (reg & 0x1f);//fd - code |= (imm /*& 0x7*/)<<5; //cc + code |= (reg & 0x1f); // fd + code |= (imm /*& 0x7*/) << 5; // cc break; case INS_movgr2cf: assert(isGeneralRegister(reg)); assert((0 <= imm) && (imm <= 7)); - code |= reg<<5;//rj - code |= imm /*& 0x7*/; //cc + code |= reg << 5; // rj + code |= imm /*& 0x7*/; // cc break; case INS_movcf2gr: assert(isGeneralRegister(reg)); assert((0 <= imm) && (imm <= 7)); - code |= reg;//rd - code |= (imm /*& 0x7*/)<<5; //cc + code |= reg; // rd + code |= (imm /*& 0x7*/) << 5; // cc break; default: unreached(); break; } // end switch (ins) -//#endif + //#endif instrDesc* id = emitNewInstr(attr); @@ -1112,11 +1107,11 @@ void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t id->idAddr()->iiaSetInstrEncode(code); id->idCodeSize(4); - //dispIns(id); + // dispIns(id); appendToCurIG(id); } -//NOTEADD:This function is new in emitarm64.cpp,so it be added to emitloongarch.cpp. +// NOTEADD:This function is new in emitarm64.cpp,so it be added to emitloongarch.cpp. // But I don't konw how to change it so that it can be used on LA. // I just add a statement "assert(!"unimplemented on LOONGARCH yet");". //------------------------------------------------------------------------ @@ -1132,7 +1127,7 @@ void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t // void emitter::emitIns_Mov( instruction ins, emitAttr attr, regNumber dstReg, regNumber srcReg, bool canSkip, insOpts opt /* = INS_OPTS_NONE */) -{//TODO: should amend for LoongArch64/LOONGARCH64. +{ // TODO: should amend for LoongArch64/LOONGARCH64. assert(IsMovInstruction(ins)); if (!canSkip || (dstReg != srcReg)) @@ -1149,134 +1144,140 @@ void emitter::emitIns_R_R( { code_t code = emitInsCode(ins); - if (INS_mov == ins) { + if (INS_mov == ins) + { assert(isGeneralRegisterOrR0(reg1)); assert(isGeneralRegisterOrR0(reg2)); - code |= reg1; //rd - code |= reg2<<5; //rj + code |= reg1; // rd + code |= reg2 << 5; // rj } - else if ((INS_ext_w_b <= ins) && (ins <= INS_cpucfg)) { - //case INS_ext_w_b: - //case INS_ext_w_h: - //case INS_clo_w: - //case INS_clz_w: - //case INS_cto_w: - //case INS_ctz_w: - //case INS_clo_d: - //case INS_clz_d: - //case INS_cto_d: - //case INS_ctz_d: - //case INS_revb_2h: - //case INS_revb_4h: - //case INS_revb_2w: - //case INS_revb_d: - //case INS_revh_2w: - //case INS_revh_d: - //case INS_bitrev_4b: - //case INS_bitrev_8b: - //case INS_bitrev_w: - //case INS_bitrev_d: - //case INS_rdtimel_w: - //case INS_rdtimeh_w: - //case INS_rdtime_d: - //case INS_cpucfg: + else if ((INS_ext_w_b <= ins) && (ins <= INS_cpucfg)) + { + // case INS_ext_w_b: + // case INS_ext_w_h: + // case INS_clo_w: + // case INS_clz_w: + // case INS_cto_w: + // case INS_ctz_w: + // case INS_clo_d: + // case INS_clz_d: + // case INS_cto_d: + // case INS_ctz_d: + // case INS_revb_2h: + // case INS_revb_4h: + // case INS_revb_2w: + // case INS_revb_d: + // case INS_revh_2w: + // case INS_revh_d: + // case INS_bitrev_4b: + // case INS_bitrev_8b: + // case INS_bitrev_w: + // case INS_bitrev_d: + // case INS_rdtimel_w: + // case INS_rdtimeh_w: + // case INS_rdtime_d: + // case INS_cpucfg: assert(isGeneralRegisterOrR0(reg1)); assert(isGeneralRegisterOrR0(reg2)); - code |= reg1; //rd - code |= reg2 << 5;//rj + code |= reg1; // rd + code |= reg2 << 5; // rj } - else if ((INS_asrtle_d == ins) || (INS_asrtgt_d == ins)) { - //case INS_asrtle_d: - //case INS_asrtgt_d: + else if ((INS_asrtle_d == ins) || (INS_asrtgt_d == ins)) + { + // case INS_asrtle_d: + // case INS_asrtgt_d: assert(isGeneralRegisterOrR0(reg1)); assert(isGeneralRegisterOrR0(reg2)); - code |= reg1 << 5; //rj - code |= reg2 << 10; //rk + code |= reg1 << 5; // rj + code |= reg2 << 10; // rk } - else if ((INS_fabs_s <= ins) && (ins <= INS_fmov_d)) { - //case INS_fabs_s: - //case INS_fabs_d: - //case INS_fneg_s: - //case INS_fneg_d: - //case INS_fsqrt_s: - //case INS_fsqrt_d: - //case INS_frsqrt_s: - //case INS_frsqrt_d: - //case INS_frecip_s: - //case INS_frecip_d: - //case INS_flogb_s: - //case INS_flogb_d: - //case INS_fclass_s: - //case INS_fclass_d: - //case INS_fcvt_s_d: - //case INS_fcvt_d_s: - //case INS_ffint_s_w: - //case INS_ffint_s_l: - //case INS_ffint_d_w: - //case INS_ffint_d_l: - //case INS_ftint_w_s: - //case INS_ftint_w_d: - //case INS_ftint_l_s: - //case INS_ftint_l_d: - //case INS_ftintrm_w_s: - //case INS_ftintrm_w_d: - //case INS_ftintrm_l_s: - //case INS_ftintrm_l_d: - //case INS_ftintrp_w_s: - //case INS_ftintrp_w_d: - //case INS_ftintrp_l_s: - //case INS_ftintrp_l_d: - //case INS_ftintrz_w_s: - //case INS_ftintrz_w_d: - //case INS_ftintrz_l_s: - //case INS_ftintrz_l_d: - //case INS_ftintrne_w_s: - //case INS_ftintrne_w_d: - //case INS_ftintrne_l_s: - //case INS_ftintrne_l_d: - //case INS_frint_s: - //case INS_frint_d: - //case INS_fmov_s: - //case INS_fmov_d: + else if ((INS_fabs_s <= ins) && (ins <= INS_fmov_d)) + { + // case INS_fabs_s: + // case INS_fabs_d: + // case INS_fneg_s: + // case INS_fneg_d: + // case INS_fsqrt_s: + // case INS_fsqrt_d: + // case INS_frsqrt_s: + // case INS_frsqrt_d: + // case INS_frecip_s: + // case INS_frecip_d: + // case INS_flogb_s: + // case INS_flogb_d: + // case INS_fclass_s: + // case INS_fclass_d: + // case INS_fcvt_s_d: + // case INS_fcvt_d_s: + // case INS_ffint_s_w: + // case INS_ffint_s_l: + // case INS_ffint_d_w: + // case INS_ffint_d_l: + // case INS_ftint_w_s: + // case INS_ftint_w_d: + // case INS_ftint_l_s: + // case INS_ftint_l_d: + // case INS_ftintrm_w_s: + // case INS_ftintrm_w_d: + // case INS_ftintrm_l_s: + // case INS_ftintrm_l_d: + // case INS_ftintrp_w_s: + // case INS_ftintrp_w_d: + // case INS_ftintrp_l_s: + // case INS_ftintrp_l_d: + // case INS_ftintrz_w_s: + // case INS_ftintrz_w_d: + // case INS_ftintrz_l_s: + // case INS_ftintrz_l_d: + // case INS_ftintrne_w_s: + // case INS_ftintrne_w_d: + // case INS_ftintrne_l_s: + // case INS_ftintrne_l_d: + // case INS_frint_s: + // case INS_frint_d: + // case INS_fmov_s: + // case INS_fmov_d: assert(isFloatReg(reg1)); assert(isFloatReg(reg2)); - code |= (reg1 & 0x1f); //fd - code |= (reg2 & 0x1f)<<5; //fj + code |= (reg1 & 0x1f); // fd + code |= (reg2 & 0x1f) << 5; // fj } - else if ((INS_movgr2fr_w <= ins) && (ins <= INS_movgr2frh_w)) { - //case INS_movgr2fr_w: - //case INS_movgr2fr_d: - //case INS_movgr2frh_w: + else if ((INS_movgr2fr_w <= ins) && (ins <= INS_movgr2frh_w)) + { + // case INS_movgr2fr_w: + // case INS_movgr2fr_d: + // case INS_movgr2frh_w: assert(isFloatReg(reg1)); assert(isGeneralRegisterOrR0(reg2)); - code |= (reg1 & 0x1f); //fd - code |= reg2 << 5; //rj + code |= (reg1 & 0x1f); // fd + code |= reg2 << 5; // rj } - else if ((INS_movfr2gr_s <= ins) && (ins <= INS_movfrh2gr_s)) { - //case INS_movfr2gr_s: - //case INS_movfr2gr_d: - //case INS_movfrh2gr_s: + else if ((INS_movfr2gr_s <= ins) && (ins <= INS_movfrh2gr_s)) + { + // case INS_movfr2gr_s: + // case INS_movfr2gr_d: + // case INS_movfrh2gr_s: assert(isGeneralRegisterOrR0(reg1)); assert(isFloatReg(reg2)); - code |= reg1; //rd - code |= (reg2 & 0x1f)<<5; //fj + code |= reg1; // rd + code |= (reg2 & 0x1f) << 5; // fj } else if ((INS_dneg == ins) || (INS_neg == ins)) { assert(isGeneralRegisterOrR0(reg1)); assert(isGeneralRegisterOrR0(reg2)); - //sub_d rd, zero, rk - //sub_w rd, zero, rk - code |= reg1; //rd - code |= reg2 << 10; //rk + // sub_d rd, zero, rk + // sub_w rd, zero, rk + code |= reg1; // rd + code |= reg2 << 10; // rk } else if (INS_not == ins) { assert(isGeneralRegisterOrR0(reg1)); assert(isGeneralRegisterOrR0(reg2)); - //nor rd, rj, zero - code |= reg1; //rd - code |= reg2 << 5; //rj + // nor rd, rj, zero + code |= reg1; // rd + code |= reg2 << 5; // rj } else { @@ -1291,14 +1292,14 @@ void emitter::emitIns_R_R( id->idAddr()->iiaSetInstrEncode(code); id->idCodeSize(4); - //dispIns(id); + // dispIns(id); appendToCurIG(id); } void emitter::emitIns_R_I_I( instruction ins, emitAttr attr, regNumber reg, ssize_t hint, ssize_t off, insOpts opt /* = INS_OPTS_NONE */) { -assert(!"unimplemented on LOONGARCH yet"); + assert(!"unimplemented on LOONGARCH yet"); #if 0 #ifdef DEBUG switch (ins) @@ -1341,182 +1342,191 @@ void emitter::emitIns_R_R_I( { code_t code = emitInsCode(ins); - if ((INS_slli_w <= ins) && (ins <= INS_rotri_w)) { - //INS_slli_w - //INS_srli_w - //INS_srai_w - //INS_rotri_w + if ((INS_slli_w <= ins) && (ins <= INS_rotri_w)) + { + // INS_slli_w + // INS_srli_w + // INS_srai_w + // INS_rotri_w assert(isGeneralRegister(reg1)); assert(isGeneralRegisterOrR0(reg2)); assert((0 <= imm) && (imm <= 0x1f)); - code |= reg1; //rd - code |= reg2<<5; //rj - code |= (imm & 0x1f)<<10;//ui5 + code |= reg1; // rd + code |= reg2 << 5; // rj + code |= (imm & 0x1f) << 10; // ui5 } - else if ((INS_slli_d <= ins) && (ins <= INS_rotri_d)) { - //INS_slli_d - //INS_srli_d - //INS_srai_d - //INS_rotri_d + else if ((INS_slli_d <= ins) && (ins <= INS_rotri_d)) + { + // INS_slli_d + // INS_srli_d + // INS_srai_d + // INS_rotri_d assert(isGeneralRegister(reg1)); assert(isGeneralRegisterOrR0(reg2)); assert((0 <= imm) && (imm <= 0x3f)); - code |= reg1; //rd - code |= reg2<<5; //rj - code |= (imm & 0x3f)<<10;//ui6 + code |= reg1; // rd + code |= reg2 << 5; // rj + code |= (imm & 0x3f) << 10; // ui6 } - else if (((INS_addi_w <= ins) && (ins <= INS_xori)) || ((INS_ld_b <= ins) && (ins <= INS_ld_wu)) || ((INS_st_b <= ins) && (ins <= INS_st_d))) { + else if (((INS_addi_w <= ins) && (ins <= INS_xori)) || ((INS_ld_b <= ins) && (ins <= INS_ld_wu)) || + ((INS_st_b <= ins) && (ins <= INS_st_d))) + { #ifdef DEBUG assert(isGeneralRegister(reg1)); assert(isGeneralRegisterOrR0(reg2)); - if (((INS_addi_w <= ins) && (ins <= INS_slti)) || ((INS_ld_b <= ins) && (ins <= INS_ld_wu)) || ((INS_st_b <= ins) && (ins <= INS_st_d))) { - //case INS_addi_w: - //case INS_addi_d: - //case INS_lu52i_d: - //case INS_slti: - //case INS_ld_b: - //case INS_ld_h: - //case INS_ld_w: - //case INS_ld_d: - //case INS_ld_bu: - //case INS_ld_hu: - //case INS_ld_wu: - //case INS_st_b: - //case INS_st_h: - //case INS_st_w: - //case INS_st_d: + if (((INS_addi_w <= ins) && (ins <= INS_slti)) || ((INS_ld_b <= ins) && (ins <= INS_ld_wu)) || + ((INS_st_b <= ins) && (ins <= INS_st_d))) + { + // case INS_addi_w: + // case INS_addi_d: + // case INS_lu52i_d: + // case INS_slti: + // case INS_ld_b: + // case INS_ld_h: + // case INS_ld_w: + // case INS_ld_d: + // case INS_ld_bu: + // case INS_ld_hu: + // case INS_ld_wu: + // case INS_st_b: + // case INS_st_h: + // case INS_st_w: + // case INS_st_d: assert((-2048 <= imm) && (imm <= 2047)); } else if (ins == INS_sltui) { - //case INS_sltui: + // case INS_sltui: assert((0 <= imm) && (imm <= 0x7ff)); } else { - //case INS_andi: - //case INS_ori: - //case INS_xori: + // case INS_andi: + // case INS_ori: + // case INS_xori: assert((0 <= imm) && (imm <= 0xfff)); } #endif - code |= reg1; //rd - code |= reg2<<5; //rj - code |= (imm & 0xfff)<<10;//si12 or ui12 + code |= reg1; // rd + code |= reg2 << 5; // rj + code |= (imm & 0xfff) << 10; // si12 or ui12 } - else if ((INS_fld_s <= ins) && (ins <= INS_fst_d)) { - //INS_fld_s - //INS_fld_d - //INS_fst_s - //INS_fst_d + else if ((INS_fld_s <= ins) && (ins <= INS_fst_d)) + { + // INS_fld_s + // INS_fld_d + // INS_fst_s + // INS_fst_d assert(isFloatReg(reg1)); assert(isGeneralRegisterOrR0(reg2)); assert((-2048 <= imm) && (imm <= 2047)); - code |= reg1 & 0x1f; //fd - code |= reg2 << 5; //rj - code |= (imm & 0xfff)<<10;//si12 + code |= reg1 & 0x1f; // fd + code |= reg2 << 5; // rj + code |= (imm & 0xfff) << 10; // si12 } - else if (((INS_ll_d >= ins) && (ins >= INS_ldptr_w)) || ((INS_sc_d >= ins) && (ins >= INS_stptr_w))) { - //INS_ldptr_w - //INS_ldptr_d - //INS_ll_w - //INS_ll_d - - //INS_stptr_w - //INS_stptr_d - //INS_sc_w - //INS_sc_d + else if (((INS_ll_d >= ins) && (ins >= INS_ldptr_w)) || ((INS_sc_d >= ins) && (ins >= INS_stptr_w))) + { + // INS_ldptr_w + // INS_ldptr_d + // INS_ll_w + // INS_ll_d + + // INS_stptr_w + // INS_stptr_d + // INS_sc_w + // INS_sc_d assert(isGeneralRegister(reg1)); assert(isGeneralRegisterOrR0(reg2)); assert((-8192 <= imm) && (imm <= 8191)); - code |= reg1; //rd - code |= reg2 << 5; //rj - code |= (imm & 0x3fff)<<10;//si14 + code |= reg1; // rd + code |= reg2 << 5; // rj + code |= (imm & 0x3fff) << 10; // si14 } else if ((INS_beq <= ins) && (ins <= INS_bgeu)) { - //INS_beq - //INS_bne - //INS_blt - //INS_bltu - //INS_bge - //INS_bgeu + // INS_beq + // INS_bne + // INS_blt + // INS_bltu + // INS_bge + // INS_bgeu assert(isGeneralRegisterOrR0(reg1)); assert(isGeneralRegisterOrR0(reg2)); assert(!(imm & 0x3)); - assert((-32768 <= (imm>>2)) && ((imm>>2) <= 32767)); + assert((-32768 <= (imm >> 2)) && ((imm >> 2) <= 32767)); - code |= reg1 << 5; //rj - code |= reg2; //rd - code |= ((imm>>2) & 0xffff)<<10;//offs16 + code |= reg1 << 5; // rj + code |= reg2; // rd + code |= ((imm >> 2) & 0xffff) << 10; // offs16 } else if ((INS_fcmp_caf_s <= ins) && (ins <= INS_fcmp_sune_s)) { - //INS_fcmp_caf_s - //INS_fcmp_cun_s - //INS_fcmp_ceq_s - //INS_fcmp_cueq_s - //INS_fcmp_clt_s - //INS_fcmp_cult_s - //INS_fcmp_cle_s - //INS_fcmp_cule_s - //INS_fcmp_cne_s - //INS_fcmp_cor_s - //INS_fcmp_cune_s - //INS_fcmp_saf_d - //INS_fcmp_sun_d - //INS_fcmp_seq_d - //INS_fcmp_sueq_d - //INS_fcmp_slt_d - //INS_fcmp_sult_d - //INS_fcmp_sle_d - //INS_fcmp_sule_d - //INS_fcmp_sne_d - //INS_fcmp_sor_d - //INS_fcmp_sune_d - //INS_fcmp_caf_d - //INS_fcmp_cun_d - //INS_fcmp_ceq_d - //INS_fcmp_cueq_d - //INS_fcmp_clt_d - //INS_fcmp_cult_d - //INS_fcmp_cle_d - //INS_fcmp_cule_d - //INS_fcmp_cne_d - //INS_fcmp_cor_d - //INS_fcmp_cune_d - //INS_fcmp_saf_s - //INS_fcmp_sun_s - //INS_fcmp_seq_s - //INS_fcmp_sueq_s - //INS_fcmp_slt_s - //INS_fcmp_sult_s - //INS_fcmp_sle_s - //INS_fcmp_sule_s - //INS_fcmp_sne_s - //INS_fcmp_sor_s - //INS_fcmp_sune_s + // INS_fcmp_caf_s + // INS_fcmp_cun_s + // INS_fcmp_ceq_s + // INS_fcmp_cueq_s + // INS_fcmp_clt_s + // INS_fcmp_cult_s + // INS_fcmp_cle_s + // INS_fcmp_cule_s + // INS_fcmp_cne_s + // INS_fcmp_cor_s + // INS_fcmp_cune_s + // INS_fcmp_saf_d + // INS_fcmp_sun_d + // INS_fcmp_seq_d + // INS_fcmp_sueq_d + // INS_fcmp_slt_d + // INS_fcmp_sult_d + // INS_fcmp_sle_d + // INS_fcmp_sule_d + // INS_fcmp_sne_d + // INS_fcmp_sor_d + // INS_fcmp_sune_d + // INS_fcmp_caf_d + // INS_fcmp_cun_d + // INS_fcmp_ceq_d + // INS_fcmp_cueq_d + // INS_fcmp_clt_d + // INS_fcmp_cult_d + // INS_fcmp_cle_d + // INS_fcmp_cule_d + // INS_fcmp_cne_d + // INS_fcmp_cor_d + // INS_fcmp_cune_d + // INS_fcmp_saf_s + // INS_fcmp_sun_s + // INS_fcmp_seq_s + // INS_fcmp_sueq_s + // INS_fcmp_slt_s + // INS_fcmp_sult_s + // INS_fcmp_sle_s + // INS_fcmp_sule_s + // INS_fcmp_sne_s + // INS_fcmp_sor_s + // INS_fcmp_sune_s assert(isFloatReg(reg1)); assert(isFloatReg(reg2)); assert((0 <= imm) && (imm <= 7)); - code |= (reg1 & 0x1f)<<5; //fj - code |= (reg2 & 0x1f)<<10; //fk - code |= imm & 0x7; //cc + code |= (reg1 & 0x1f) << 5; // fj + code |= (reg2 & 0x1f) << 10; // fk + code |= imm & 0x7; // cc } - else if (INS_addu16i_d == ins) { + else if (INS_addu16i_d == ins) + { assert(isGeneralRegister(reg1)); assert(isGeneralRegisterOrR0(reg2)); assert((-32768 <= imm) && (imm < 32768)); - code |= reg1; //rd - code |= reg2<<5; //rj - code |= (imm & 0xffff)<<10;//si16 + code |= reg1; // rd + code |= reg2 << 5; // rj + code |= (imm & 0xffff) << 10; // si16 } else if (INS_jirl == ins) { @@ -1524,9 +1534,9 @@ void emitter::emitIns_R_R_I( assert(isGeneralRegisterOrR0(reg2)); assert((-32768 <= imm) && (imm < 32768)); - code |= reg1; //rd - code |= reg2<<5; //rj - code |= (imm & 0xffff)<<10;//offs16 + code |= reg1; // rd + code |= reg2 << 5; // rj + code |= (imm & 0xffff) << 10; // offs16 } else { @@ -1541,7 +1551,7 @@ void emitter::emitIns_R_R_I( id->idAddr()->iiaSetInstrEncode(code); id->idCodeSize(4); - //dispIns(id); + // dispIns(id); appendToCurIG(id); } @@ -1556,7 +1566,7 @@ void emitter::emitIns_R_R_I( * - Requires that reg1 != reg2 */ void emitter::emitIns_R_R_Imm(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, ssize_t imm) -{//maybe optimize. +{ // maybe optimize. assert(isGeneralRegister(reg1)); assert(reg1 != reg2); @@ -1567,20 +1577,20 @@ void emitter::emitIns_R_R_Imm(instruction ins, emitAttr attr, regNumber reg1, re { case INS_addi_w: case INS_addi_d: - //case INS_lui: - //case INS_lbu: - //case INS_lhu: - //case INS_lwu: - //case INS_lb: - //case INS_lh: - //case INS_lw: + // case INS_lui: + // case INS_lbu: + // case INS_lhu: + // case INS_lwu: + // case INS_lb: + // case INS_lh: + // case INS_lw: case INS_ld_d: - //case INS_sb: - //case INS_sh: - //case INS_sw: - //case INS_sd: - ////case INS_lwc1: - ////case INS_ldc1: + // case INS_sb: + // case INS_sh: + // case INS_sw: + // case INS_sd: + ////case INS_lwc1: + ////case INS_ldc1: immFits = isValidSimm12(imm); break; @@ -1621,182 +1631,186 @@ void emitter::emitIns_R_R_R( { code_t code = emitInsCode(ins); - if (((INS_add_w <= ins) && (ins <= INS_crcc_w_d_w)) || ((INS_ldx_b <= ins) && (ins <= INS_ldle_d)) || ((INS_stx_b <= ins) && (ins <= INS_stle_d))) { - //case INS_add_w: - //case INS_add_d: - //case INS_sub_w: - //case INS_sub_d: - //case INS_and: - //case INS_or: - //case INS_nor: - //case INS_xor: - //case INS_andn: - //case INS_orn: - - //case INS_mul_w: - //case INS_mul_d: - //case INS_mulh_w: - //case INS_mulh_wu: - //case INS_mulh_d: - //case INS_mulh_du: - //case INS_mulw_d_w: - //case INS_mulw_d_wu: - //case INS_div_w: - //case INS_div_wu: - //case INS_div_d: - //case INS_div_du: - //case INS_mod_w: - //case INS_mod_wu: - //case INS_mod_d: - //case INS_mod_du: - - //case INS_sll_w: - //case INS_srl_w: - //case INS_sra_w: - //case INS_rotr_w: - //case INS_sll_d: - //case INS_srl_d: - //case INS_sra_d: - //case INS_rotr_d: - - //case INS_maskeqz: - //case INS_masknez: - - //case INS_slt: - //case INS_sltu: - - //case INS_ldx_b: - //case INS_ldx_h: - //case INS_ldx_w: - //case INS_ldx_d: - //case INS_ldx_bu: - //case INS_ldx_hu: - //case INS_ldx_wu: - //case INS_stx_b: - //case INS_stx_h: - //case INS_stx_w: - //case INS_stx_d: - - //case INS_ldgt_b: - //case INS_ldgt_h: - //case INS_ldgt_w: - //case INS_ldgt_d: - //case INS_ldle_b: - //case INS_ldle_h: - //case INS_ldle_w: - //case INS_ldle_d: - //case INS_stgt_b: - //case INS_stgt_h: - //case INS_stgt_w: - //case INS_stgt_d: - //case INS_stle_b: - //case INS_stle_h: - //case INS_stle_w: - //case INS_stle_d: - - //case INS_amswap_w: - //case INS_amswap_d: - //case INS_amswap_db_w: - //case INS_amswap_db_d: - //case INS_amadd_w: - //case INS_amadd_d: - //case INS_amadd_db_w: - //case INS_amadd_db_d: - //case INS_amand_w: - //case INS_amand_d: - //case INS_amand_db_w: - //case INS_amand_db_d: - //case INS_amor_w: - //case INS_amor_d: - //case INS_amor_db_w: - //case INS_amor_db_d: - //case INS_amxor_w: - //case INS_amxor_d: - //case INS_amxor_db_w: - //case INS_amxor_db_d: - //case INS_ammax_w: - //case INS_ammax_d: - //case INS_ammax_db_w: - //case INS_ammax_db_d: - //case INS_ammin_w: - //case INS_ammin_d: - //case INS_ammin_db_w: - //case INS_ammin_db_d: - //case INS_ammax_wu: - //case INS_ammax_du: - //case INS_ammax_db_wu: - //case INS_ammax_db_du: - //case INS_ammin_wu: - //case INS_ammin_du: - //case INS_ammin_db_wu: - //case INS_ammin_db_du: - - //case INS_crc_w_b_w: - //case INS_crc_w_h_w: - //case INS_crc_w_w_w: - //case INS_crc_w_d_w: - //case INS_crcc_w_b_w: - //case INS_crcc_w_h_w: - //case INS_crcc_w_w_w: - //case INS_crcc_w_d_w: + if (((INS_add_w <= ins) && (ins <= INS_crcc_w_d_w)) || ((INS_ldx_b <= ins) && (ins <= INS_ldle_d)) || + ((INS_stx_b <= ins) && (ins <= INS_stle_d))) + { + // case INS_add_w: + // case INS_add_d: + // case INS_sub_w: + // case INS_sub_d: + // case INS_and: + // case INS_or: + // case INS_nor: + // case INS_xor: + // case INS_andn: + // case INS_orn: + + // case INS_mul_w: + // case INS_mul_d: + // case INS_mulh_w: + // case INS_mulh_wu: + // case INS_mulh_d: + // case INS_mulh_du: + // case INS_mulw_d_w: + // case INS_mulw_d_wu: + // case INS_div_w: + // case INS_div_wu: + // case INS_div_d: + // case INS_div_du: + // case INS_mod_w: + // case INS_mod_wu: + // case INS_mod_d: + // case INS_mod_du: + + // case INS_sll_w: + // case INS_srl_w: + // case INS_sra_w: + // case INS_rotr_w: + // case INS_sll_d: + // case INS_srl_d: + // case INS_sra_d: + // case INS_rotr_d: + + // case INS_maskeqz: + // case INS_masknez: + + // case INS_slt: + // case INS_sltu: + + // case INS_ldx_b: + // case INS_ldx_h: + // case INS_ldx_w: + // case INS_ldx_d: + // case INS_ldx_bu: + // case INS_ldx_hu: + // case INS_ldx_wu: + // case INS_stx_b: + // case INS_stx_h: + // case INS_stx_w: + // case INS_stx_d: + + // case INS_ldgt_b: + // case INS_ldgt_h: + // case INS_ldgt_w: + // case INS_ldgt_d: + // case INS_ldle_b: + // case INS_ldle_h: + // case INS_ldle_w: + // case INS_ldle_d: + // case INS_stgt_b: + // case INS_stgt_h: + // case INS_stgt_w: + // case INS_stgt_d: + // case INS_stle_b: + // case INS_stle_h: + // case INS_stle_w: + // case INS_stle_d: + + // case INS_amswap_w: + // case INS_amswap_d: + // case INS_amswap_db_w: + // case INS_amswap_db_d: + // case INS_amadd_w: + // case INS_amadd_d: + // case INS_amadd_db_w: + // case INS_amadd_db_d: + // case INS_amand_w: + // case INS_amand_d: + // case INS_amand_db_w: + // case INS_amand_db_d: + // case INS_amor_w: + // case INS_amor_d: + // case INS_amor_db_w: + // case INS_amor_db_d: + // case INS_amxor_w: + // case INS_amxor_d: + // case INS_amxor_db_w: + // case INS_amxor_db_d: + // case INS_ammax_w: + // case INS_ammax_d: + // case INS_ammax_db_w: + // case INS_ammax_db_d: + // case INS_ammin_w: + // case INS_ammin_d: + // case INS_ammin_db_w: + // case INS_ammin_db_d: + // case INS_ammax_wu: + // case INS_ammax_du: + // case INS_ammax_db_wu: + // case INS_ammax_db_du: + // case INS_ammin_wu: + // case INS_ammin_du: + // case INS_ammin_db_wu: + // case INS_ammin_db_du: + + // case INS_crc_w_b_w: + // case INS_crc_w_h_w: + // case INS_crc_w_w_w: + // case INS_crc_w_d_w: + // case INS_crcc_w_b_w: + // case INS_crcc_w_h_w: + // case INS_crcc_w_w_w: + // case INS_crcc_w_d_w: assert(isGeneralRegister(reg1)); assert(isGeneralRegisterOrR0(reg2)); assert(isGeneralRegisterOrR0(reg3)); - code |= (reg1 /*& 0x1f*/); //rd - code |= (reg2 /*& 0x1f*/)<<5; //rj - code |= (reg3 /*& 0x1f*/)<<10;//rk + code |= (reg1 /*& 0x1f*/); // rd + code |= (reg2 /*& 0x1f*/) << 5; // rj + code |= (reg3 /*& 0x1f*/) << 10; // rk } - else if ((INS_fadd_s <= ins) && (ins <= INS_fcopysign_d)) { - //case INS_fadd_s: - //case INS_fadd_d: - //case INS_fsub_s: - //case INS_fsub_d: - //case INS_fmul_s: - //case INS_fmul_d: - //case INS_fdiv_s: - //case INS_fdiv_d: - //case INS_fmax_s: - //case INS_fmax_d: - //case INS_fmin_s: - //case INS_fmin_d: - //case INS_fmaxa_s: - //case INS_fmaxa_d: - //case INS_fmina_s: - //case INS_fmina_d: - //case INS_fscaleb_s: - //case INS_fscaleb_d: - //case INS_fcopysign_s: - //case INS_fcopysign_d: + else if ((INS_fadd_s <= ins) && (ins <= INS_fcopysign_d)) + { + // case INS_fadd_s: + // case INS_fadd_d: + // case INS_fsub_s: + // case INS_fsub_d: + // case INS_fmul_s: + // case INS_fmul_d: + // case INS_fdiv_s: + // case INS_fdiv_d: + // case INS_fmax_s: + // case INS_fmax_d: + // case INS_fmin_s: + // case INS_fmin_d: + // case INS_fmaxa_s: + // case INS_fmaxa_d: + // case INS_fmina_s: + // case INS_fmina_d: + // case INS_fscaleb_s: + // case INS_fscaleb_d: + // case INS_fcopysign_s: + // case INS_fcopysign_d: assert(isFloatReg(reg1)); assert(isFloatReg(reg2)); assert(isFloatReg(reg3)); - code |= (reg1 & 0x1f); //fd - code |= (reg2 & 0x1f)<<5; //fj - code |= (reg3 & 0x1f)<<10;//fk + code |= (reg1 & 0x1f); // fd + code |= (reg2 & 0x1f) << 5; // fj + code |= (reg3 & 0x1f) << 10; // fk } - else if ((INS_fldx_s <= ins) && (ins <= INS_fstle_d)) { - //case INS_fldx_s: - //case INS_fldx_d: - //case INS_fstx_s: - //case INS_fstx_d: - - //case INS_fldgt_s: - //case INS_fldgt_d: - //case INS_fldle_s: - //case INS_fldle_d: - //case INS_fstgt_s: - //case INS_fstgt_d: - //case INS_fstle_s: - //case INS_fstle_d: + else if ((INS_fldx_s <= ins) && (ins <= INS_fstle_d)) + { + // case INS_fldx_s: + // case INS_fldx_d: + // case INS_fstx_s: + // case INS_fstx_d: + + // case INS_fldgt_s: + // case INS_fldgt_d: + // case INS_fldle_s: + // case INS_fldle_d: + // case INS_fstgt_s: + // case INS_fstgt_d: + // case INS_fstle_s: + // case INS_fstle_d: assert(isFloatReg(reg1)); assert(isGeneralRegisterOrR0(reg2)); assert(isGeneralRegisterOrR0(reg3)); - code |= reg1 & 0x1f; //fd - code |= reg2 << 5; //rj - code |= reg3 << 10; //rk + code |= reg1 & 0x1f; // fd + code |= reg2 << 5; // rj + code |= reg3 << 10; // rk } else { @@ -1812,7 +1826,7 @@ void emitter::emitIns_R_R_R( id->idAddr()->iiaSetInstrEncode(code); id->idCodeSize(4); - //dispIns(id); + // dispIns(id); appendToCurIG(id); } @@ -1832,31 +1846,33 @@ void emitter::emitIns_R_R_R_I(instruction ins, { code_t code = emitInsCode(ins); - if ((INS_alsl_w <= ins) && (ins <= INS_bytepick_w)) { - //INS_alsl_w - //INS_alsl_wu - //INS_alsl_d - //INS_bytepick_w + if ((INS_alsl_w <= ins) && (ins <= INS_bytepick_w)) + { + // INS_alsl_w + // INS_alsl_wu + // INS_alsl_d + // INS_bytepick_w assert(isGeneralRegister(reg1)); assert(isGeneralRegisterOrR0(reg2)); assert(isGeneralRegisterOrR0(reg3)); assert((0 <= imm) && (imm <= 3)); - code |= reg1; //rd - code |= reg2 << 5; //rj - code |= reg3 << 10;//rk - code |= (imm /*& 0x3*/)<<15; //sa2 + code |= reg1; // rd + code |= reg2 << 5; // rj + code |= reg3 << 10; // rk + code |= (imm /*& 0x3*/) << 15; // sa2 } - else if (INS_bytepick_d == ins) { + else if (INS_bytepick_d == ins) + { assert(isGeneralRegister(reg1)); assert(isGeneralRegisterOrR0(reg2)); assert(isGeneralRegisterOrR0(reg3)); assert((0 <= imm) && (imm <= 7)); - code |= reg1; //rd - code |= reg2 << 5; //rj - code |= reg3 << 10;//rk - code |= (imm /*& 0x7*/)<<15; //sa3 + code |= reg1; // rd + code |= reg2 << 5; // rj + code |= reg3 << 10; // rk + code |= (imm /*& 0x7*/) << 15; // sa3 } else if (INS_fsel == ins) { @@ -1865,10 +1881,10 @@ void emitter::emitIns_R_R_R_I(instruction ins, assert(isFloatReg(reg3)); assert((0 <= imm) && (imm <= 7)); - code |= (reg1 & 0x1f); //fd - code |= (reg2 & 0x1f)<<5; //fj - code |= (reg3 & 0x1f)<<10; //fk - code |= (imm /*& 0x7*/)<<15; //ca + code |= (reg1 & 0x1f); // fd + code |= (reg2 & 0x1f) << 5; // fj + code |= (reg3 & 0x1f) << 10; // fk + code |= (imm /*& 0x7*/) << 15; // ca } else { @@ -1884,7 +1900,7 @@ void emitter::emitIns_R_R_R_I(instruction ins, id->idAddr()->iiaSetInstrEncode(code); id->idCodeSize(4); - //dispIns(id); + // dispIns(id); appendToCurIG(id); } @@ -1902,7 +1918,7 @@ void emitter::emitIns_R_R_R_Ext(instruction ins, insOpts opt, /* = INS_OPTS_NONE */ int shiftAmount) /* = -1 -- unset */ { -assert(!"unimplemented on LOONGARCH yet"); + assert(!"unimplemented on LOONGARCH yet"); } /***************************************************************************** @@ -1921,19 +1937,19 @@ void emitter::emitIns_R_R_I_I( { case INS_bstrins_w: case INS_bstrpick_w: - code |= (reg1 /*& 0x1f*/); //rd - code |= (reg2 /*& 0x1f*/)<<5; //rj - assert((0<=imm2) && (imm2<=imm1) && (imm1<32)); - code |= (imm1 & 0x1f)<<16; //msbw - code |= (imm2 & 0x1f)<<10; //lsbw + code |= (reg1 /*& 0x1f*/); // rd + code |= (reg2 /*& 0x1f*/) << 5; // rj + assert((0 <= imm2) && (imm2 <= imm1) && (imm1 < 32)); + code |= (imm1 & 0x1f) << 16; // msbw + code |= (imm2 & 0x1f) << 10; // lsbw break; case INS_bstrins_d: case INS_bstrpick_d: - code |= (reg1 /*& 0x1f*/); //rd - code |= (reg2 /*& 0x1f*/)<<5; //rj - assert((0<=imm2) && (imm2<=imm1) && (imm1<64)); - code |= (imm1 & 0x3f)<<16; //msbd - code |= (imm2 & 0x3f)<<10; //lsbd + code |= (reg1 /*& 0x1f*/); // rd + code |= (reg2 /*& 0x1f*/) << 5; // rj + assert((0 <= imm2) && (imm2 <= imm1) && (imm1 < 64)); + code |= (imm1 & 0x3f) << 16; // msbd + code |= (imm2 & 0x3f) << 10; // lsbd break; default: unreached(); @@ -1947,7 +1963,7 @@ void emitter::emitIns_R_R_I_I( id->idAddr()->iiaSetInstrEncode(code); id->idCodeSize(4); - //dispIns(id); + // dispIns(id); appendToCurIG(id); } @@ -1961,7 +1977,7 @@ void emitter::emitIns_R_R_R_R( { code_t code = emitInsCode(ins); -//#ifdef DEBUG + //#ifdef DEBUG switch (ins) { case INS_fmadd_s: @@ -1977,15 +1993,15 @@ void emitter::emitIns_R_R_R_R( assert(isFloatReg(reg3)); assert(isFloatReg(reg4)); - code |= (reg1 & 0x1f); //fd - code |= (reg2 & 0x1f)<<5; //fj - code |= (reg3 & 0x1f)<<10; //fk - code |= (reg4 & 0x1f)<<15; //fa + code |= (reg1 & 0x1f); // fd + code |= (reg2 & 0x1f) << 5; // fj + code |= (reg3 & 0x1f) << 10; // fk + code |= (reg4 & 0x1f) << 15; // fa break; default: unreached(); } -//#endif + //#endif instrDesc* id = emitNewInstr(attr); @@ -1994,7 +2010,7 @@ void emitter::emitIns_R_R_R_R( id->idAddr()->iiaSetInstrEncode(code); id->idCodeSize(4); - //dispIns(id); + // dispIns(id); appendToCurIG(id); } @@ -2007,7 +2023,7 @@ void emitter::emitIns_R_R_R_R( void emitter::emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, int offs) { -assert(!"unimplemented on LOONGARCH yet"); + assert(!"unimplemented on LOONGARCH yet"); #if 0 NYI("emitIns_C"); #endif @@ -2020,7 +2036,7 @@ assert(!"unimplemented on LOONGARCH yet"); void emitter::emitIns_S(instruction ins, emitAttr attr, int varx, int offs) { -assert(!"unimplemented on LOONGARCH yet"); + assert(!"unimplemented on LOONGARCH yet"); #if 0 NYI("emitIns_S"); #endif @@ -2064,7 +2080,7 @@ void emitter::emitIns_R_R_S( void emitter::emitIns_R_R_S_S( instruction ins, emitAttr attr1, emitAttr attr2, regNumber reg1, regNumber reg2, int varx, int offs) { -assert(!"unimplemented on LOONGARCH yet"); + assert(!"unimplemented on LOONGARCH yet"); } /***************************************************************************** @@ -2074,7 +2090,7 @@ assert(!"unimplemented on LOONGARCH yet"); void emitter::emitIns_S_S_R_R( instruction ins, emitAttr attr1, emitAttr attr2, regNumber reg1, regNumber reg2, int varx, int offs) { -assert(!"unimplemented on LOONGARCH yet"); + assert(!"unimplemented on LOONGARCH yet"); } /***************************************************************************** @@ -2083,7 +2099,7 @@ assert(!"unimplemented on LOONGARCH yet"); */ void emitter::emitIns_S_I(instruction ins, emitAttr attr, int varx, int offs, int val) { -assert(!"unimplemented on LOONGARCH yet"); + assert(!"unimplemented on LOONGARCH yet"); #if 0 NYI("emitIns_S_I"); #endif @@ -2100,14 +2116,14 @@ void emitter::emitIns_R_C( instruction ins, emitAttr attr, regNumber reg, regNumber addrReg, CORINFO_FIELD_HANDLE fldHnd, int offs) { assert(offs >= 0); - assert(instrDesc::fitsInSmallCns(offs));//can optimize. - //assert(ins == INS_bl);//for special. indicating isGeneralRegister(reg). - //assert(isGeneralRegister(reg)); while load float the reg is FPR. + assert(instrDesc::fitsInSmallCns(offs)); // can optimize. + // assert(ins == INS_bl);//for special. indicating isGeneralRegister(reg). + // assert(isGeneralRegister(reg)); while load float the reg is FPR. - //when id->idIns == bl, for reloc! 4-ins. + // when id->idIns == bl, for reloc! 4-ins. // pcaddu12i reg, off-hi-20bits // addi_d reg, reg, off-lo-12bits - //when id->idIns == load-ins, for reloc! 4-ins. + // when id->idIns == load-ins, for reloc! 4-ins. // pcaddu12i reg, off-hi-20bits // load reg, offs_lo-12bits(reg) #when ins is load ins. // @@ -2124,17 +2140,18 @@ void emitter::emitIns_R_C( instrDesc* id = emitNewInstr(attr); id->idIns(ins); - assert(reg != REG_R0); //for special. reg Must not be R0. - id->idReg1(reg); // destination register that will get the constant value. + assert(reg != REG_R0); // for special. reg Must not be R0. + id->idReg1(reg); // destination register that will get the constant value. - id->idSmallCns(offs); //usually is 0. + id->idSmallCns(offs); // usually is 0. id->idInsOpt(INS_OPTS_RC); if (emitComp->opts.compReloc) { id->idSetIsDspReloc(); id->idCodeSize(8); - } else - id->idCodeSize(12);//TODO: maybe optimize. + } + else + id->idCodeSize(12); // TODO: maybe optimize. if (EA_IS_GCREF(attr)) { @@ -2149,15 +2166,15 @@ void emitter::emitIns_R_C( id->idOpSize(EA_PTRSIZE); } - //TODO: this maybe deleted. + // TODO: this maybe deleted. id->idSetIsBound(); // We won't patch address since we will know the exact distance // once JIT code and data are allocated together. - assert(addrReg == REG_NA);//NOTE: for LOONGARCH64, not support addrReg != REG_NA. + assert(addrReg == REG_NA); // NOTE: for LOONGARCH64, not support addrReg != REG_NA. id->idAddr()->iiaFieldHnd = fldHnd; - //dispIns(id);//loongarch dumping instr by other-fun. + // dispIns(id);//loongarch dumping instr by other-fun. appendToCurIG(id); } @@ -2168,7 +2185,7 @@ void emitter::emitIns_R_C( void emitter::emitIns_C_I(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, ssize_t offs, ssize_t val) { -assert(!"unimplemented on LOONGARCH yet"); + assert(!"unimplemented on LOONGARCH yet"); #if 0 NYI("emitIns_C_I"); #endif @@ -2181,7 +2198,7 @@ assert(!"unimplemented on LOONGARCH yet"); void emitter::emitIns_C_R(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, regNumber reg, int offs) { -assert(!"unimplemented on LOONGARCH yet"); + assert(!"unimplemented on LOONGARCH yet"); #if 0 assert(!"emitIns_C_R not supported for RyuJIT backend"); #endif @@ -2189,7 +2206,7 @@ assert(!"unimplemented on LOONGARCH yet"); void emitter::emitIns_R_AR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs) { -assert(!"unimplemented on LOONGARCH yet"); + assert(!"unimplemented on LOONGARCH yet"); #if 0 NYI("emitIns_R_AR"); #endif @@ -2201,8 +2218,8 @@ void emitter::emitIns_R_AI(instruction ins, regNumber reg, ssize_t addr DEBUGARG(size_t targetHandle) DEBUGARG(GenTreeFlags gtFlags)) { - assert(EA_IS_RELOC(attr));//EA_PTR_DSP_RELOC - assert(ins == INS_bl);//for special. + assert(EA_IS_RELOC(attr)); // EA_PTR_DSP_RELOC + assert(ins == INS_bl); // for special. assert(isGeneralRegister(reg)); // INS_OPTS_RELOC: placeholders. 2-ins: @@ -2216,8 +2233,8 @@ void emitter::emitIns_R_AI(instruction ins, instrDesc* id = emitNewInstr(attr); id->idIns(ins); - assert(reg != REG_R0); //for special. reg Must not be R0. - id->idReg1(reg); // destination register that will get the constant value. + assert(reg != REG_R0); // for special. reg Must not be R0. + id->idReg1(reg); // destination register that will get the constant value. id->idInsOpt(INS_OPTS_RELOC); @@ -2237,13 +2254,13 @@ void emitter::emitIns_R_AI(instruction ins, id->idAddr()->iiaAddr = (BYTE*)addr; id->idCodeSize(8); - //dispIns(id);//loongarch dumping instr by other-fun. + // dispIns(id);//loongarch dumping instr by other-fun. appendToCurIG(id); } void emitter::emitIns_AR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs) { -assert(!"unimplemented on LOONGARCH yet"); + assert(!"unimplemented on LOONGARCH yet"); #if 0 NYI("emitIns_AR_R"); #endif @@ -2251,7 +2268,7 @@ assert(!"unimplemented on LOONGARCH yet"); void emitter::emitIns_R_ARR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp) { -assert(!"unimplemented on LOONGARCH yet"); + assert(!"unimplemented on LOONGARCH yet"); #if 0 NYI("emitIns_R_ARR"); #endif @@ -2259,7 +2276,7 @@ assert(!"unimplemented on LOONGARCH yet"); void emitter::emitIns_ARR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp) { -assert(!"unimplemented on LOONGARCH yet"); + assert(!"unimplemented on LOONGARCH yet"); #if 0 NYI("emitIns_R_ARR"); #endif @@ -2268,7 +2285,7 @@ assert(!"unimplemented on LOONGARCH yet"); void emitter::emitIns_R_ARX( instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, unsigned mul, int disp) { -assert(!"unimplemented on LOONGARCH yet"); + assert(!"unimplemented on LOONGARCH yet"); #if 0 NYI("emitIns_R_ARR"); #endif @@ -2296,7 +2313,7 @@ void emitter::emitIns_J_R_I(instruction ins, emitAttr attr, BasicBlock* dst, reg */ void emitter::emitSetShortJump(instrDescJmp* id) { -/* TODO: maybe delete it on future. */ + /* TODO: maybe delete it on future. */ return; } @@ -2309,11 +2326,11 @@ void emitter::emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNu { assert(dst->bbFlags & BBF_HAS_LABEL); - //if for reloc! 4-ins: + // if for reloc! 4-ins: // pcaddu12i reg, offset-hi20 // addi_d reg, reg, offset-lo12 // - //else: 3-ins: + // else: 3-ins: // lu12i_w reg, dst-hi-20bits // ori reg, reg, dst-lo-12bits // bstrins_d reg, zero, msbd, lsbd / lu32i_d reg, 0xff @@ -2328,7 +2345,8 @@ void emitter::emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNu { id->idSetIsDspReloc(); id->idCodeSize(8); - } else + } + else id->idCodeSize(12); id->idReg1(reg); @@ -2354,31 +2372,32 @@ void emitter::emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNu } #endif // DEBUG - //dispIns(id); + // dispIns(id); appendToCurIG(id); } void emitter::emitIns_J_R(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg) { - assert(!"unimplemented on LOONGARCH yet: emitIns_J_R.");//not used. + assert(!"unimplemented on LOONGARCH yet: emitIns_J_R."); // not used. } -//NOTE: +// NOTE: // For loongarch64, emitIns_J is just only jump, not include the condition branch! // The condition branch is the emitIns_J_cond_la(). -// If using "BasicBlock* dst" lable as target, the INS_OPTS_J is a short jump while long jump will be replace by INS_OPTS_JIRL. +// If using "BasicBlock* dst" lable as target, the INS_OPTS_J is a short jump while long jump will be replace by +// INS_OPTS_JIRL. // // The arg "instrCount" is two regs's encoding when ins is beq/bne/blt/bltu/bge/bgeu/beqz/bnez. void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount) { if (dst == nullptr) - {//Now this case not used for loongarch64. + { // Now this case not used for loongarch64. assert(instrCount != 0); - assert(ins == INS_b);//when dst==nullptr, ins is INS_b by now. + assert(ins == INS_b); // when dst==nullptr, ins is INS_b by now. #if 1 - assert((-33554432 <= instrCount) && (instrCount < 33554432));//0x2000000. - emitIns_I(ins, EA_PTRSIZE, instrCount << 2);//NOTE: instrCount is the number of the instructions. + assert((-33554432 <= instrCount) && (instrCount < 33554432)); // 0x2000000. + emitIns_I(ins, EA_PTRSIZE, instrCount << 2); // NOTE: instrCount is the number of the instructions. #else instrCount = instrCount << 2; if ((-33554432 <= instrCount) && (instrCount < 33554432)) @@ -2388,22 +2407,22 @@ void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount) } else { - //NOTE: should not be here !!! + // NOTE: should not be here !!! assert(!"should not be here on LOONGARCH64 !!!"); - //emitIns_I(INS_bl, EA_PTRSIZE, 4); + // emitIns_I(INS_bl, EA_PTRSIZE, 4); - //ssize_t imm = ((ssize_t)instrCount>>12); - //assert(isValidSimm12(imm)); - //emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, imm); - //imm = (instrCount & 0xfffff); - //emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, imm); + // ssize_t imm = ((ssize_t)instrCount>>12); + // assert(isValidSimm12(imm)); + // emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, imm); + // imm = (instrCount & 0xfffff); + // emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, imm); - //emitIns_R_R_R(INS_add_d, EA_8BYTE, REG_R21, REG_R21, REG_RA); - //emitIns_R_R_I(INS_jirl, EA_PTRSIZE, REG_R0, REG_R21, 0); + // emitIns_R_R_R(INS_add_d, EA_8BYTE, REG_R21, REG_R21, REG_RA); + // emitIns_R_R_I(INS_jirl, EA_PTRSIZE, REG_R0, REG_R21, 0); } #endif - return ; + return; } // (dst != nullptr) @@ -2417,7 +2436,7 @@ void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount) assert((INS_bceqz <= ins) && (ins <= INS_bl)); id->idIns(ins); id->idReg1((regNumber)(instrCount & 0x1f)); - id->idReg2((regNumber)((instrCount >> 5 ) & 0x1f)); + id->idReg2((regNumber)((instrCount >> 5) & 0x1f)); id->idInsOpt(INS_OPTS_J); emitCounts_INS_OPTS_J++; @@ -2442,7 +2461,7 @@ void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount) id->idjOffs = emitCurIGsize; /* Append this jump to this IG's jump list */ - id->idjNext = emitCurIGjmpList; + id->idjNext = emitCurIGjmpList; emitCurIGjmpList = id; #if EMITTER_STATS @@ -2450,17 +2469,17 @@ void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount) #endif id->idCodeSize(4); - //dispIns(id); + // dispIns(id); appendToCurIG(id); } -//NOTE: +// NOTE: // For loongarch64, emitIns_J_cond_la() is the condition branch. // NOTE: Only supported short branch so far !!! // void emitter::emitIns_J_cond_la(instruction ins, BasicBlock* dst, regNumber reg1, regNumber reg2) { - //TODO: + // TODO: // Now the emitIns_J_cond_la() is only the short condition branch. // There is no long condition branch for loongarch64 so far. // For loongarch64, the long condition branch is like this: @@ -2499,7 +2518,7 @@ void emitter::emitIns_J_cond_la(instruction ins, BasicBlock* dst, regNumber reg1 id->idjOffs = emitCurIGsize; /* Append this jump to this IG's jump list */ - id->idjNext = emitCurIGjmpList; + id->idjNext = emitCurIGjmpList; emitCurIGjmpList = id; #if EMITTER_STATS @@ -2507,7 +2526,7 @@ void emitter::emitIns_J_cond_la(instruction ins, BasicBlock* dst, regNumber reg1 #endif id->idCodeSize(4); - //dispIns(id); + // dispIns(id); appendToCurIG(id); } @@ -2515,47 +2534,56 @@ void emitter::emitIns_I_la(emitAttr size, regNumber reg, ssize_t imm) { assert(!EA_IS_RELOC(size)); assert(isGeneralRegister(reg)); - //size = EA_SIZE(size); + // size = EA_SIZE(size); - if (-1 == (imm >> 11) || 0 == (imm >> 11)) { + if (-1 == (imm >> 11) || 0 == (imm >> 11)) + { emitIns_R_R_I(INS_addi_w, size, reg, REG_R0, imm); return; } - if (0 == (imm >> 12)) { + if (0 == (imm >> 12)) + { emitIns_R_R_I(INS_ori, size, reg, REG_R0, imm); return; } instrDesc* id = emitNewInstr(size); - if ((imm == INT64_MAX) || (imm == 0xffffffff)) { - //emitIns_R_R_I(INS_addi_d, size, reg, REG_R0, -1); - //emitIns_R_R_I(INS_srli_d, size, reg, reg, ui6); + if ((imm == INT64_MAX) || (imm == 0xffffffff)) + { + // emitIns_R_R_I(INS_addi_d, size, reg, REG_R0, -1); + // emitIns_R_R_I(INS_srli_d, size, reg, reg, ui6); id->idReg2((regNumber)1); // special for INT64_MAX(ui6=1) or UINT32_MAX(ui6=32); id->idCodeSize(8); - } else if (-1 == (imm >> 31) || 0 == (imm >> 31)) { - //emitIns_R_I(INS_lu12i_w, size, reg, (imm >> 12)); - //emitIns_R_R_I(INS_ori, size, reg, reg, imm); + } + else if (-1 == (imm >> 31) || 0 == (imm >> 31)) + { + // emitIns_R_I(INS_lu12i_w, size, reg, (imm >> 12)); + // emitIns_R_R_I(INS_ori, size, reg, reg, imm); id->idCodeSize(8); - } else if (-1 == (imm >> 51) || 0 == (imm >> 51)) { + } + else if (-1 == (imm >> 51) || 0 == (imm >> 51)) + { // low-32bits. - //emitIns_R_I(INS_lu12i_w, size, reg, (imm >> 12); - //emitIns_R_R_I(INS_ori, size, reg, reg, imm); + // emitIns_R_I(INS_lu12i_w, size, reg, (imm >> 12); + // emitIns_R_R_I(INS_ori, size, reg, reg, imm); // // high-20bits. - //emitIns_R_I(INS_lu32i_d, size, reg, (imm>>32)); + // emitIns_R_I(INS_lu32i_d, size, reg, (imm>>32)); id->idCodeSize(12); - } else {// 0xffff ffff ffff ffff. + } + else + { // 0xffff ffff ffff ffff. // low-32bits. - //emitIns_R_I(INS_lu12i_w, size, reg, (imm >> 12)); - //emitIns_R_R_I(INS_ori, size, reg, reg, imm); + // emitIns_R_I(INS_lu12i_w, size, reg, (imm >> 12)); + // emitIns_R_R_I(INS_ori, size, reg, reg, imm); // // high-32bits. - //emitIns_R_I(INS_lu32i_d, size, reg, (imm>>32)); - //emitIns_R_R_I(INS_lu52i_d, size, reg, reg, (imm>>52)); + // emitIns_R_I(INS_lu32i_d, size, reg, (imm>>32)); + // emitIns_R_R_I(INS_lu52i_d, size, reg, reg, (imm>>52)); id->idCodeSize(16); } @@ -2568,7 +2596,7 @@ void emitter::emitIns_I_la(emitAttr size, regNumber reg, ssize_t imm) id->idAddr()->iiaAddr = (BYTE*)imm; - //dispIns(id); + // dispIns(id); appendToCurIG(id); } @@ -2593,10 +2621,9 @@ void emitter::emitIns_I_la(emitAttr size, regNumber reg, ssize_t imm) void emitter::emitIns_Call(EmitCallType callType, CORINFO_METHOD_HANDLE methHnd, INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE - void* addr, - ssize_t argSize, - emitAttr retSize - MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), + void* addr, + ssize_t argSize, + emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), VARSET_VALARG_TP ptrVars, regMaskTP gcrefRegs, regMaskTP byrefRegs, @@ -2610,8 +2637,7 @@ void emitter::emitIns_Call(EmitCallType callType, /* Sanity check the arguments depending on callType */ assert(callType < EC_COUNT); - assert((callType != EC_FUNC_TOKEN) || - (ireg == REG_NA && xreg == REG_NA && xmul == 0 && disp == 0)); + assert((callType != EC_FUNC_TOKEN) || (ireg == REG_NA && xreg == REG_NA && xmul == 0 && disp == 0)); assert(callType < EC_INDIR_R || addr == NULL); assert(callType != EC_INDIR_R || (ireg < REG_COUNT && xreg == REG_NA && xmul == 0 && disp == 0)); @@ -2691,7 +2717,7 @@ void emitter::emitIns_Call(EmitCallType callType, id->idIns(ins); id->idInsOpt(INS_OPTS_C); - //TODO: maybe optimize. + // TODO: maybe optimize. // INS_OPTS_C: placeholders. 1/2/4-ins: // if (callType == EC_INDIR_R) @@ -2712,13 +2738,13 @@ void emitter::emitIns_Call(EmitCallType callType, if (callType == EC_INDIR_R) { /* This is an indirect call (either a virtual call or func ptr call) */ - //assert(callType == EC_INDIR_R); + // assert(callType == EC_INDIR_R); id->idSetIsCallRegPtr(); regNumber reg_jirl = isJump ? REG_R0 : REG_RA; id->idReg4(reg_jirl); - id->idReg3(ireg);//NOTE: for EC_INDIR_R, using idReg3. + id->idReg3(ireg); // NOTE: for EC_INDIR_R, using idReg3. assert(xreg == REG_NA); id->idCodeSize(4); @@ -2731,14 +2757,16 @@ void emitter::emitIns_Call(EmitCallType callType, assert(addr != NULL); assert(((long)addr & 3) == 0); - addr = (void*)((long)addr + (isJump ? 0 : 1));//NOTE: low-bit0 is used for jirl ra/r0,rd,0 + addr = (void*)((long)addr + (isJump ? 0 : 1)); // NOTE: low-bit0 is used for jirl ra/r0,rd,0 id->idAddr()->iiaAddr = (BYTE*)addr; if (emitComp->opts.compReloc) { id->idSetIsDspReloc(); id->idCodeSize(8); - } else { + } + else + { id->idCodeSize(16); } } @@ -2764,7 +2792,7 @@ void emitter::emitIns_Call(EmitCallType callType, } #endif // LATE_DISASM - //dispIns(id); + // dispIns(id); appendToCurIG(id); } @@ -2776,8 +2804,8 @@ void emitter::emitIns_Call(EmitCallType callType, unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t code) { unsigned char callInstrSize = sizeof(code_t); // 4 bytes - regMaskTP gcrefRegs; - regMaskTP byrefRegs; + regMaskTP gcrefRegs; + regMaskTP byrefRegs; VARSET_TP GCvars(VarSetOps::UninitVal()); @@ -2807,17 +2835,17 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t emitUpdateLiveGCvars(GCvars, dst); #ifdef DEBUG - //NOTEADD: + // NOTEADD: // Output any delta in GC variable info, corresponding to the before-call GC var updates done above. if (EMIT_GC_VERBOSE || emitComp->opts.disasmWithGC) { - emitDispGCVarDelta(); //define in emit.cpp + emitDispGCVarDelta(); // define in emit.cpp } #endif // DEBUG assert(id->idIns() == INS_jirl); if (id->idIsCallRegPtr()) - {//EC_INDIR_R + { // EC_INDIR_R code = emitInsCode(id->idIns()); D_INST_JIRL(code, id->idReg4(), id->idReg3(), 0); } @@ -2828,15 +2856,15 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t // pcaddu18i t2, addr-hi20 // jilr r0/1,t2,addr-lo18 - long addr = (long)id->idAddr()->iiaAddr;//get addr. - //should assert(addr-dst < 38bits); + long addr = (long)id->idAddr()->iiaAddr; // get addr. + // should assert(addr-dst < 38bits); int reg2 = (int)addr & 1; - addr = addr ^ 1; + addr = addr ^ 1; emitRecordRelocation(dst, (BYTE*)addr, IMAGE_REL_LOONGARCH64_PC); - *(code_t *)dst = 0x1e00000e; + *(code_t*)dst = 0x1e00000e; dst += 4; #ifdef DEBUG code = emitInsCode(INS_pcaddu18i); @@ -2845,37 +2873,37 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t code = emitInsCode(INS_jirl); assert(code == 0x4c000000); #endif - *(code_t *)dst = 0x4c000000 | (14<<5) | reg2; + *(code_t*)dst = 0x4c000000 | (14 << 5) | reg2; } else { - // lu12i_w t2, dst_offset_lo32-hi //TODO: maybe optimize. - // ori t2, t2, dst_offset_lo32-lo - // lu32i_d t2, dst_offset_hi32-lo - // jirl t2 + // lu12i_w t2, dst_offset_lo32-hi //TODO: maybe optimize. + // ori t2, t2, dst_offset_lo32-lo + // lu32i_d t2, dst_offset_hi32-lo + // jirl t2 ssize_t imm = (ssize_t)(id->idAddr()->iiaAddr); - //assert((imm >> 32) <= 0x7ffff);//In fact max is <= 0xffff. - assert((imm >> 32) == 0xff);//for LA64 addr-is 0xff. but this is not the best !!! + // assert((imm >> 32) <= 0x7ffff);//In fact max is <= 0xffff. + assert((imm >> 32) == 0xff); // for LA64 addr-is 0xff. but this is not the best !!! int reg2 = (int)(imm & 1); imm -= reg2; code = emitInsCode(INS_lu12i_w); D_INST_lu12i_w(code, REG_T2, imm >> 12); - *(code_t *)dst = code; + *(code_t*)dst = code; dst += 4; code = emitInsCode(INS_ori); D_INST_ori(code, REG_T2, REG_T2, imm); - *(code_t *)dst = code; + *(code_t*)dst = code; dst += 4; - //emitIns_R_I(INS_lu32i_d, size, REG_T2, imm >> 32); + // emitIns_R_I(INS_lu32i_d, size, REG_T2, imm >> 32); code = emitInsCode(INS_lu32i_d); - //D_INST_lu32i_d(code, REG_T2, imm >> 32); + // D_INST_lu32i_d(code, REG_T2, imm >> 32); D_INST_lu32i_d(code, REG_T2, 0xff); - *(code_t *)dst = code; + *(code_t*)dst = code; dst += 4; code = emitInsCode(INS_jirl); @@ -2958,7 +2986,7 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t } else { - callInstrSize = id->idIsReloc()? (2 << 2) : (4 << 2);// INS_OPTS_C: 2/4-ins. + callInstrSize = id->idIsReloc() ? (2 << 2) : (4 << 2); // INS_OPTS_C: 2/4-ins. } return callInstrSize; @@ -2972,7 +3000,7 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t /*static*/ unsigned emitter::emitOutput_Instr(BYTE* dst, code_t code) { assert(sizeof(code_t) == 4); - BYTE* dstRW = dst + writeableOffset; + BYTE* dstRW = dst + writeableOffset; *((code_t*)dstRW) = code; return sizeof(code_t); @@ -2989,11 +3017,11 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) { - BYTE* dst = *dp; - BYTE* dst2 = dst;//addr for updating gc info if needed. - code_t code = 0; + BYTE* dst = *dp; + BYTE* dst2 = dst; // addr for updating gc info if needed. + code_t code = 0; instruction ins; - size_t sz;// = emitSizeOfInsDsc(id); + size_t sz; // = emitSizeOfInsDsc(id); #ifdef DEBUG #if DUMP_GC_TABLES @@ -3022,7 +3050,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) emitRecordRelocation(dst, id->idAddr()->iiaAddr, IMAGE_REL_LOONGARCH64_PC); - *(code_t *)dst = 0x1c000000 | (code_t)reg1; + *(code_t*)dst = 0x1c000000 | (code_t)reg1; dst += 4; dst2 = dst; @@ -3037,14 +3065,14 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) if (id->idIsCnsReloc()) { - ins = INS_addi_d; - *(code_t *)dst = 0x02c00000 | (code_t)reg1 | (code_t)(reg1<<5); + ins = INS_addi_d; + *(code_t*)dst = 0x02c00000 | (code_t)reg1 | (code_t)(reg1 << 5); } - else //if (id->idIsDspReloc()) + else // if (id->idIsDspReloc()) { assert(id->idIsDspReloc()); - ins = INS_ldptr_d; - *(code_t *)dst = 0x26000000 | (code_t)reg1 | (code_t)(reg1<<5); + ins = INS_ldptr_d; + *(code_t*)dst = 0x26000000 | (code_t)reg1 | (code_t)(reg1 << 5); } if (id->idGCref() != GCT_NONE) @@ -3058,114 +3086,116 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += 4; - sz = sizeof(instrDesc); + sz = sizeof(instrDesc); } - break; + break; case INS_OPTS_I: { - ssize_t imm = (ssize_t)(id->idAddr()->iiaAddr); + ssize_t imm = (ssize_t)(id->idAddr()->iiaAddr); regNumber reg1 = id->idReg1(); - dst2 += 4;//assert(dst2 == dst); + dst2 += 4; // assert(dst2 == dst); switch (id->idCodeSize()) { - case 8://if (id->idCodeSize() == 8) - { - if (id->idReg2()) { // special for INT64_MAX or UINT32_MAX; - code = emitInsCode(INS_addi_d); - //emitIns_R_R_I(INS_addi_d, size, reg, REG_R0, -1); - D_INST_2RI12(code, reg1, REG_R0, -1); - *(code_t *)dst = code; - dst += 4; - - ssize_t ui6 = (imm == INT64_MAX) ? 1 : 32; - code = emitInsCode(INS_srli_d); - //emitIns_R_R_I(INS_srli_d, size, reg, reg, ui6); - code |= ((code_t)reg1 | ((code_t)reg1 << 5) | (ui6 << 10)); - *(code_t *)dst = code; + case 8: // if (id->idCodeSize() == 8) + { + if (id->idReg2()) + { // special for INT64_MAX or UINT32_MAX; + code = emitInsCode(INS_addi_d); + // emitIns_R_R_I(INS_addi_d, size, reg, REG_R0, -1); + D_INST_2RI12(code, reg1, REG_R0, -1); + *(code_t*)dst = code; + dst += 4; + + ssize_t ui6 = (imm == INT64_MAX) ? 1 : 32; + code = emitInsCode(INS_srli_d); + // emitIns_R_R_I(INS_srli_d, size, reg, reg, ui6); + code |= ((code_t)reg1 | ((code_t)reg1 << 5) | (ui6 << 10)); + *(code_t*)dst = code; + } + else + { + code = emitInsCode(INS_lu12i_w); + D_INST_lu12i_w(code, reg1, imm >> 12); + *(code_t*)dst = code; + dst += 4; + + code = emitInsCode(INS_ori); + D_INST_ori(code, reg1, reg1, imm); + *(code_t*)dst = code; + } + break; } - else { + case 12: // else if (id->idCodeSize() == 12) + { code = emitInsCode(INS_lu12i_w); D_INST_lu12i_w(code, reg1, imm >> 12); - *(code_t *)dst = code; + *(code_t*)dst = code; dst += 4; code = emitInsCode(INS_ori); D_INST_ori(code, reg1, reg1, imm); - *(code_t *)dst = code; - } - break; - } - case 12: //else if (id->idCodeSize() == 12) - { - code = emitInsCode(INS_lu12i_w); - D_INST_lu12i_w(code, reg1, imm >> 12); - *(code_t *)dst = code; - dst += 4; - - code = emitInsCode(INS_ori); - D_INST_ori(code, reg1, reg1, imm); - *(code_t *)dst = code; - dst += 4; + *(code_t*)dst = code; + dst += 4; - code = emitInsCode(INS_lu32i_d); - //emitIns_R_I(INS_lu32i_d, size, reg, (imm>>32)); - D_INST_lu32i_d(code, reg1, imm >> 32); - *(code_t *)dst = code; + code = emitInsCode(INS_lu32i_d); + // emitIns_R_I(INS_lu32i_d, size, reg, (imm>>32)); + D_INST_lu32i_d(code, reg1, imm >> 32); + *(code_t*)dst = code; - break; - } - case 16://else if (id->idCodeSize() == 16) - { - code = emitInsCode(INS_lu12i_w); - D_INST_lu12i_w(code, reg1, imm >> 12); - *(code_t *)dst = code; - dst += 4; + break; + } + case 16: // else if (id->idCodeSize() == 16) + { + code = emitInsCode(INS_lu12i_w); + D_INST_lu12i_w(code, reg1, imm >> 12); + *(code_t*)dst = code; + dst += 4; - code = emitInsCode(INS_ori); - D_INST_ori(code, reg1, reg1, imm); - *(code_t *)dst = code; - dst += 4; + code = emitInsCode(INS_ori); + D_INST_ori(code, reg1, reg1, imm); + *(code_t*)dst = code; + dst += 4; - code = emitInsCode(INS_lu32i_d); - D_INST_lu32i_d(code, reg1, imm >> 32); - *(code_t *)dst = code; - dst += 4; + code = emitInsCode(INS_lu32i_d); + D_INST_lu32i_d(code, reg1, imm >> 32); + *(code_t*)dst = code; + dst += 4; - code = emitInsCode(INS_lu52i_d); - D_INST_lu52i_d(code, reg1, reg1, imm >> 52); - *(code_t *)dst = code; + code = emitInsCode(INS_lu52i_d); + D_INST_lu52i_d(code, reg1, reg1, imm >> 52); + *(code_t*)dst = code; - break; - } - default : - unreached(); - break; + break; + } + default: + unreached(); + break; } ins = INS_ori; dst += 4; - sz = sizeof(instrDesc); + sz = sizeof(instrDesc); } - break; + break; case INS_OPTS_RC: { // Reference to JIT data - //when id->idIns == bl, for reloc! + // when id->idIns == bl, for reloc! // pcaddu12i r21, off-hi-20bits // addi_d reg, r21, off-lo-12bits - //when id->idIns == load-ins + // when id->idIns == load-ins // pcaddu12i r21, off-hi-20bits // load reg, offs_lo-12bits(r21) #when ins is load ins. // - //when id->idIns == bl + // when id->idIns == bl // lu12i_w r21, addr-hi-20bits // ori reg, r21, addr-lo-12bits // lu32i_d reg, addr_hi-32bits // - //when id->idIns == load-ins + // when id->idIns == load-ins // lu12i_w r21, offs_hi-20bits // lu32i_d r21, 0xff addr_hi-32bits // load reg, addr_lo-12bits(r21) @@ -3182,12 +3212,12 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) assert(dataOffs < emitDataSize()); - ins = id->idIns(); + ins = id->idIns(); regNumber reg1 = id->idReg1(); if (id->idIsReloc()) { - //get the addr-offset of the data. + // get the addr-offset of the data. imm = (ssize_t)emitConsBlock - (ssize_t)dst + dataOffs; assert(imm > 0); assert(!(imm & 3)); @@ -3196,14 +3226,14 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) imm += doff; assert(isValidSimm20(imm >> 12)); - doff = (int)(imm & 0x7ff) - doff;//addr-lo-12bit. + doff = (int)(imm & 0x7ff) - doff; // addr-lo-12bit. #ifdef DEBUG code = emitInsCode(INS_pcaddu12i); assert(code == 0x1c000000); #endif - code = 0x1c000000 | 21; - *(code_t *)dst = code | (((code_t)imm & 0xfffff000) >> 7); + code = 0x1c000000 | 21; + *(code_t*)dst = code | (((code_t)imm & 0xfffff000) >> 7); dst += 4; if (ins == INS_bl) @@ -3214,92 +3244,92 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) code = emitInsCode(INS_addi_d); assert(code == 0x02c00000); #endif - code = 0x02c00000 | (21<<5); - *(code_t *)dst = code | (code_t)reg1 | (((code_t)doff & 0xfff) << 10); + code = 0x02c00000 | (21 << 5); + *(code_t*)dst = code | (code_t)reg1 | (((code_t)doff & 0xfff) << 10); } else { code = emitInsCode(ins); - D_INST_LS(code, (reg1 & 0x1f), REG_R21, doff);//NOTE:here must be REG_R21 !!! - *(code_t *)dst = code; + D_INST_LS(code, (reg1 & 0x1f), REG_R21, doff); // NOTE:here must be REG_R21 !!! + *(code_t*)dst = code; } dst += 4; dst2 = dst; } else { - //get the addr of the data. + // get the addr of the data. imm = (ssize_t)emitConsBlock + dataOffs; code = emitInsCode(INS_lu12i_w); if (ins == INS_bl) { assert((imm >> 32) == 0xff); - //assert((imm >> 32) <= 0x7ffff); + // assert((imm >> 32) <= 0x7ffff); doff = (int)imm >> 12; D_INST_lu12i_w(code, REG_R21, doff); - *(code_t *)dst = code; + *(code_t*)dst = code; dst += 4; code = emitInsCode(INS_ori); D_INST_ori(code, reg1, REG_R21, imm); - *(code_t *)dst = code; + *(code_t*)dst = code; dst += 4; dst2 = dst; - ins = INS_lu32i_d; + ins = INS_lu32i_d; code = emitInsCode(INS_lu32i_d); - //D_INST_lu32i_d(code, reg1, imm >> 32); + // D_INST_lu32i_d(code, reg1, imm >> 32); D_INST_lu32i_d(code, reg1, 0xff); - *(code_t *)dst = code; + *(code_t*)dst = code; dst += 4; } else { doff = (int)(imm & 0x800); imm += doff; - doff = (int)(imm & 0x7ff) - doff;//addr-lo-12bit. + doff = (int)(imm & 0x7ff) - doff; // addr-lo-12bit. assert((imm >> 32) == 0xff); - //assert((imm >> 32) <= 0x7ffff); + // assert((imm >> 32) <= 0x7ffff); - dataOffs = (unsigned)(imm >> 12); //addr-hi-20bits. + dataOffs = (unsigned)(imm >> 12); // addr-hi-20bits. D_INST_lu12i_w(code, REG_R21, dataOffs); - *(code_t *)dst = code; + *(code_t*)dst = code; dst += 4; - //emitIns_R_I(INS_lu32i_d, size, REG_R21, imm >> 32); + // emitIns_R_I(INS_lu32i_d, size, REG_R21, imm >> 32); code = emitInsCode(INS_lu32i_d); - //D_INST_lu32i_d(code, REG_R21, imm >> 32); + // D_INST_lu32i_d(code, REG_R21, imm >> 32); D_INST_lu32i_d(code, REG_R21, 0xff); - *(code_t *)dst = code; + *(code_t*)dst = code; dst += 4; code = emitInsCode(ins); D_INST_LS(code, (reg1 & 0x1f), REG_R21, doff); - *(code_t *)dst = code; + *(code_t*)dst = code; dst += 4; dst2 = dst; } } - sz = sizeof(instrDesc); + sz = sizeof(instrDesc); } - break; + break; case INS_OPTS_RL: { - //if for reloc! + // if for reloc! // pcaddu12i reg, offset-hi20 // addi_d reg, reg, offset-lo12 // - //else: ////TODO:optimize. + // else: ////TODO:optimize. // lu12i_w reg, dst-hi-12bits // ori reg, reg, dst-lo-12bits // lu32i_d reg, dst-hi-32bits - insGroup* tgtIG = (insGroup*)emitCodeGetCookie(id->idAddr()->iiaBBlabel); + insGroup* tgtIG = (insGroup*)emitCodeGetCookie(id->idAddr()->iiaBBlabel); id->idAddr()->iiaIGlabel = tgtIG; regNumber reg1 = id->idReg1(); @@ -3308,17 +3338,17 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) if (id->idIsReloc()) { ssize_t imm = (ssize_t)tgtIG->igOffs; - imm = (ssize_t)emitCodeBlock + imm - (ssize_t)dst; + imm = (ssize_t)emitCodeBlock + imm - (ssize_t)dst; assert((imm & 3) == 0); int doff = (int)(imm & 0x800); imm += doff; assert(isValidSimm20(imm >> 12)); - doff = (int)(imm & 0x7ff) - doff;//addr-lo-12bit. + doff = (int)(imm & 0x7ff) - doff; // addr-lo-12bit. - code = 0x1c000000; - *(code_t *)dst = code | (code_t)reg1 | ((imm & 0xfffff000)>>7); + code = 0x1c000000; + *(code_t*)dst = code | (code_t)reg1 | ((imm & 0xfffff000) >> 7); dst += 4; dst2 = dst; #ifdef DEBUG @@ -3327,207 +3357,208 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) code = emitInsCode(INS_addi_d); assert(code == 0x02c00000); #endif - *(code_t *)dst = 0x02c00000 | (code_t)reg1 | ((code_t)reg1<<5) | ((doff & 0xfff)<<10); - ins = INS_addi_d; - } else + *(code_t*)dst = 0x02c00000 | (code_t)reg1 | ((code_t)reg1 << 5) | ((doff & 0xfff) << 10); + ins = INS_addi_d; + } + else { ssize_t imm = (ssize_t)tgtIG->igOffs + (ssize_t)emitCodeBlock; - //assert((imm >> 32) <= 0x7ffff);//In fact max is <= 0xffff + // assert((imm >> 32) <= 0x7ffff);//In fact max is <= 0xffff assert((imm >> 32) == 0xff); code = emitInsCode(INS_lu12i_w); D_INST_lu12i_w(code, REG_R21, imm >> 12); - *(code_t *)dst = code; + *(code_t*)dst = code; dst += 4; code = emitInsCode(INS_ori); D_INST_ori(code, reg1, REG_R21, imm); - *(code_t *)dst = code; + *(code_t*)dst = code; dst += 4; dst2 = dst; ins = INS_lu32i_d; - //emitIns_R_I(INS_lu32i_d, size, reg1, 0xff); + // emitIns_R_I(INS_lu32i_d, size, reg1, 0xff); code = emitInsCode(INS_lu32i_d); - //D_INST_lu32i_d(code, reg1, imm >> 32); + // D_INST_lu32i_d(code, reg1, imm >> 32); D_INST_lu32i_d(code, reg1, 0xff); - *(code_t *)dst = code; + *(code_t*)dst = code; } dst += 4; - sz = sizeof(instrDesc); + sz = sizeof(instrDesc); } - break; + break; case INS_OPTS_JIRL: - // case_1: <----------from INS_OPTS_J: - // xor r21,reg1,reg2 | bne/beq _next | bcnez/bceqz _next - // bnez/beqz dst | b dst | b dst - //_next: - // - // case_2: <---------- TODO: from INS_OPTS_J: - // bnez/beqz _next: - // pcaddi r21,off-hi - // jirl r0,r21,off-lo - //_next: - // - // case_3: <----------INS_OPTS_JIRL: //not used by now !!! - // b dst - // - // case_4: <----------INS_OPTS_JIRL: //not used by now !!! - // pcaddi r21,off-hi - // jirl r0,r21,off-lo - // - { - instrDescJmp* jmp = (instrDescJmp*) id; - - regNumber reg1 = id->idReg1(); + // case_1: <----------from INS_OPTS_J: + // xor r21,reg1,reg2 | bne/beq _next | bcnez/bceqz _next + // bnez/beqz dst | b dst | b dst + //_next: + // + // case_2: <---------- TODO: from INS_OPTS_J: + // bnez/beqz _next: + // pcaddi r21,off-hi + // jirl r0,r21,off-lo + //_next: + // + // case_3: <----------INS_OPTS_JIRL: //not used by now !!! + // b dst + // + // case_4: <----------INS_OPTS_JIRL: //not used by now !!! + // pcaddi r21,off-hi + // jirl r0,r21,off-lo + // { - ssize_t imm = (ssize_t)id->idAddr()->iiaGetJmpOffset(); - imm -= 4; + instrDescJmp* jmp = (instrDescJmp*)id; - ins = jmp->idIns(); - assert(jmp->idCodeSize() > 4); //The original INS_OPTS_JIRL: not used by now!!! - switch (jmp->idCodeSize()) + regNumber reg1 = id->idReg1(); { - case 8: + ssize_t imm = (ssize_t)id->idAddr()->iiaGetJmpOffset(); + imm -= 4; + + ins = jmp->idIns(); + assert(jmp->idCodeSize() > 4); // The original INS_OPTS_JIRL: not used by now!!! + switch (jmp->idCodeSize()) { - regNumber reg2 = id->idReg2(); - assert((INS_bceqz <= ins) && (ins <= INS_bgeu)); - //assert((INS_bceqz <= ins) && (ins <= INS_bl));//TODO - if ((INS_beq == ins) || (INS_bne == ins)) + case 8: { - if ((-0x400000 <= imm) && (imm < 0x400000)) + regNumber reg2 = id->idReg2(); + assert((INS_bceqz <= ins) && (ins <= INS_bgeu)); + // assert((INS_bceqz <= ins) && (ins <= INS_bl));//TODO + if ((INS_beq == ins) || (INS_bne == ins)) { - code = emitInsCode(INS_xor); - D_INST_3R(code, REG_R21, reg1, reg2); - *(code_t *)dst = code; + if ((-0x400000 <= imm) && (imm < 0x400000)) + { + code = emitInsCode(INS_xor); + D_INST_3R(code, REG_R21, reg1, reg2); + *(code_t*)dst = code; + dst += 4; + + code = emitInsCode(ins == INS_beq ? INS_beqz : INS_bnez); + D_INST_Bcond_Z(code, REG_R21, imm); + *(code_t*)dst = code; + dst += 4; + } + else // if ((-0x8000000 <= imm) && (imm < 0x8000000)) + { + assert((-0x8000000 <= imm) && (imm < 0x8000000)); + assert((INS_bne & 0xfffe) == INS_beq); + + code = emitInsCode((instruction)((int)ins ^ 0x1)); + code |= ((code_t)(reg1) /*& 0x1f */) << 5; /* rj */ + code |= ((code_t)(reg2) /*& 0x1f */); /* rd */ + code |= 0x800; + *(code_t*)dst = code; + dst += 4; + + code = emitInsCode(INS_b); + D_INST_B(code, imm); + *(code_t*)dst = code; + dst += 4; + } + // else + // unreached(); + } + else if ((INS_bceqz == ins) || (INS_bcnez == ins)) + { + assert((-0x8000000 <= imm) && (imm < 0x8000000)); + assert((INS_bcnez & 0xfffe) == INS_bceqz); + + code = emitInsCode((instruction)((int)ins ^ 0x1)); + code |= ((code_t)reg1) << 5; /* rj */ + code |= 0x800; + *(code_t*)dst = code; dst += 4; - code = emitInsCode(ins == INS_beq ? INS_beqz : INS_bnez); - D_INST_Bcond_Z(code, REG_R21, imm); - *(code_t *)dst = code; + code = emitInsCode(INS_b); + D_INST_B(code, imm); + *(code_t*)dst = code; dst += 4; } - else //if ((-0x8000000 <= imm) && (imm < 0x8000000)) + else if ((INS_blt <= ins) && (ins <= INS_bgeu)) { assert((-0x8000000 <= imm) && (imm < 0x8000000)); - assert((INS_bne & 0xfffe) == INS_beq); + assert((INS_bge & 0xfffe) == INS_blt); + assert((INS_bgeu & 0xfffe) == INS_bltu); code = emitInsCode((instruction)((int)ins ^ 0x1)); - code |= ((code_t)(reg1) /*& 0x1f */)<<5; /* rj */ - code |= ((code_t)(reg2) /*& 0x1f */); /* rd */ + code |= ((code_t)(reg1) /*& 0x1f */) << 5; /* rj */ + code |= ((code_t)(reg2) /*& 0x1f */); /* rd */ code |= 0x800; - *(code_t *)dst = code; + *(code_t*)dst = code; dst += 4; code = emitInsCode(INS_b); D_INST_B(code, imm); - *(code_t *)dst = code; + *(code_t*)dst = code; dst += 4; } - //else - // unreached(); - } - else if ((INS_bceqz == ins) || (INS_bcnez == ins)) - { - assert((-0x8000000 <= imm) && (imm < 0x8000000)); - assert((INS_bcnez & 0xfffe) == INS_bceqz); - - code = emitInsCode((instruction)((int)ins ^ 0x1)); - code |= ((code_t)reg1)<<5; /* rj */ - code |= 0x800; - *(code_t *)dst = code; - dst += 4; - - code = emitInsCode(INS_b); - D_INST_B(code, imm); - *(code_t *)dst = code; - dst += 4; - } - else if ((INS_blt <= ins) && (ins <= INS_bgeu)) - { - assert((-0x8000000 <= imm) && (imm < 0x8000000)); - assert((INS_bge & 0xfffe) == INS_blt); - assert((INS_bgeu & 0xfffe) == INS_bltu); - - code = emitInsCode((instruction)((int)ins ^ 0x1)); - code |= ((code_t)(reg1) /*& 0x1f */)<<5; /* rj */ - code |= ((code_t)(reg2) /*& 0x1f */); /* rd */ - code |= 0x800; - *(code_t *)dst = code; - dst += 4; - - code = emitInsCode(INS_b); - D_INST_B(code, imm); - *(code_t *)dst = code; - dst += 4; + break; } - break; + // case 12: + default: + unreached(); + break; } - //case 12: - default : - unreached(); - break; } + sz = sizeof(instrDescJmp); } - sz = sizeof(instrDescJmp); - } break; case INS_OPTS_J_cond: // b_cond dst-relative. // - //NOTE: + // NOTE: // the case "imm > 0x7fff" not supported. // More info within the emitter::emitIns_J_cond_la(); - { - ssize_t imm = (ssize_t) id->idAddr()->iiaGetJmpOffset();//get jmp's offset relative delay-slot. - assert((OFFSET_DIST_SMALL_MAX_NEG << 2) <= imm && imm <= (OFFSET_DIST_SMALL_MAX_POS << 2)); - assert(!(imm & 3)); + { + ssize_t imm = (ssize_t)id->idAddr()->iiaGetJmpOffset(); // get jmp's offset relative delay-slot. + assert((OFFSET_DIST_SMALL_MAX_NEG << 2) <= imm && imm <= (OFFSET_DIST_SMALL_MAX_POS << 2)); + assert(!(imm & 3)); - ins = id->idIns(); - code = emitInsCode(ins); - D_INST_Bcond(code, id->idReg1(), id->idReg2(), imm); - *(code_t *)dst = code; - dst += 4; + ins = id->idIns(); + code = emitInsCode(ins); + D_INST_Bcond(code, id->idReg1(), id->idReg2(), imm); + *(code_t*)dst = code; + dst += 4; - sz = sizeof(instrDescJmp); - } + sz = sizeof(instrDescJmp); + } break; case INS_OPTS_J: - // bceqz/bcnez/beq/bne/blt/bltu/bge/bgeu/beqz/bnez/b/bl dst-relative. - { - ssize_t imm = (ssize_t) id->idAddr()->iiaGetJmpOffset();//get jmp's offset relative delay-slot. - assert(!(imm & 3)); - - ins = id->idIns(); - code = emitInsCode(ins); - if (ins == INS_b || ins == INS_bl) - { - D_INST_B(code, imm); - } - else if (ins == INS_bnez || ins == INS_beqz) - { - D_INST_Bcond_Z(code, id->idReg1(), imm); - } - else if (ins == INS_bcnez || ins == INS_bceqz) - { - assert((code_t)(id->idReg1()) < 8);//cc - D_INST_Bcond_Z(code, id->idReg1(), imm); - } - else if ((INS_beq <= ins) && (ins <= INS_bgeu)) - { - D_INST_Bcond(code, id->idReg1(), id->idReg2(), imm); - } - else + // bceqz/bcnez/beq/bne/blt/bltu/bge/bgeu/beqz/bnez/b/bl dst-relative. { - assert(!"unimplemented on LOONGARCH yet"); - } - *(code_t *)dst = code; - dst += 4; + ssize_t imm = (ssize_t)id->idAddr()->iiaGetJmpOffset(); // get jmp's offset relative delay-slot. + assert(!(imm & 3)); - sz = sizeof(instrDescJmp); - } + ins = id->idIns(); + code = emitInsCode(ins); + if (ins == INS_b || ins == INS_bl) + { + D_INST_B(code, imm); + } + else if (ins == INS_bnez || ins == INS_beqz) + { + D_INST_Bcond_Z(code, id->idReg1(), imm); + } + else if (ins == INS_bcnez || ins == INS_bceqz) + { + assert((code_t)(id->idReg1()) < 8); // cc + D_INST_Bcond_Z(code, id->idReg1(), imm); + } + else if ((INS_beq <= ins) && (ins <= INS_bgeu)) + { + D_INST_Bcond(code, id->idReg1(), id->idReg2(), imm); + } + else + { + assert(!"unimplemented on LOONGARCH yet"); + } + *(code_t*)dst = code; + dst += 4; + + sz = sizeof(instrDescJmp); + } break; case INS_OPTS_C: @@ -3546,14 +3577,14 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) ins = INS_nop; break; - //case INS_OPTS_NONE: + // case INS_OPTS_NONE: default: - //assert(id->idGCref() == GCT_NONE); - *(code_t *)dst = id->idAddr()->iiaGetInstrEncode(); + // assert(id->idGCref() == GCT_NONE); + *(code_t*)dst = id->idAddr()->iiaGetInstrEncode(); dst += 4; dst2 = dst; - ins = id->idIns(); - sz = emitSizeOfInsDsc(id); + ins = id->idIns(); + sz = emitSizeOfInsDsc(id); break; } @@ -3573,7 +3604,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) emitGCregDeadUpd(id->idReg1(), dst2); } - //if (emitInsMayWriteMultipleRegs(id)) + // if (emitInsMayWriteMultipleRegs(id)) //{ // // INS_gslq etc... // // "idReg2" is the secondary destination register @@ -3617,7 +3648,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) if (vt == TYP_REF || vt == TYP_BYREF) emitGCvarDeadUpd(adr + ofs, dst2 DEBUG_ARG(varNum)); } - //if (emitInsWritesToLclVarStackLocPair(id)) + // if (emitInsWritesToLclVarStackLocPair(id)) //{ // unsigned ofs2 = ofs + TARGET_POINTER_SIZE; // if (id->idGCrefReg2() != GCT_NONE) @@ -3647,18 +3678,18 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) #ifdef DEBUG /* Make sure we set the instruction descriptor size correctly */ - //size_t expected = emitSizeOfInsDsc(id); - //assert(sz == expected); + // size_t expected = emitSizeOfInsDsc(id); + // assert(sz == expected); if (emitComp->opts.disAsm || emitComp->verbose) { - code_t *cp = (code_t*) *dp; + code_t* cp = (code_t*)*dp; while ((BYTE*)cp != dst) { emitDisInsName(*cp, (BYTE*)cp, id); cp++; } - //emitDispIns(id, false, dspOffs, true, emitCurCodeOffs(odst), *dp, (dst - *dp), ig); + // emitDispIns(id, false, dspOffs, true, emitCurCodeOffs(odst), *dp, (dst - *dp), ig); } if (emitComp->compDebugBreak) @@ -3691,7 +3722,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) * Display the given instruction. */ -//NOTE: At least 32bytes within dst. +// NOTE: At least 32bytes within dst. void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) { const BYTE* insstrs = dst; @@ -3700,29 +3731,28 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) { printf("LOONGARCH invalid instruction: 0x%x\n", code); assert(!"invalid inscode on LOONGARCH!"); - return ; + return; } -// clang-format off + // clang-format off const char * const regName[] = {"zero", "ra", "tp", "sp", "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "x0", "fp", "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8"}; const char * const FregName[] = {"fa0", "fa1", "fa2", "fa3", "fa4", "fa5", "fa6", "fa7", "ft0", "ft1", "ft2", "ft3", "ft4", "ft5", "ft6", "ft7", "ft8", "ft9", "ft10", "ft11", "ft12", "ft13", "ft14", "ft15", "fs0", "fs1", "fs2", "fs3", "fs4", "fs5", "fs6", "fs7"}; const char * const CFregName[] = {"fcc0", "fcc1", "fcc2", "fcc3", "fcc4", "fcc5", "fcc6", "fcc7"}; -// clang-format on - + // clang-format on - unsigned int opcode = (code>>26) & 0x3f; + unsigned int opcode = (code >> 26) & 0x3f; - //bits: 31-26,MSB6 + // bits: 31-26,MSB6 switch (opcode) { case 0x0: { - goto Label_OPCODE_0; - //break; + goto Label_OPCODE_0; + // break; } - //case 0x1: + // case 0x1: //{ // assert(!"unimplemented on loongarch yet!"); // //goto Label_OPCODE_1; @@ -3731,23 +3761,23 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) case 0x2: { goto Label_OPCODE_2; - //break; + // break; } case 0x3: { goto Label_OPCODE_3; - //break; + // break; } case 0xe: { goto Label_OPCODE_E; - //break; + // break; } - case LA_2RI16_ADDU16I_D: //0x4 + case LA_2RI16_ADDU16I_D: // 0x4 { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - short si16 = (code >> 10) & 0xffff; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + short si16 = (code >> 10) & 0xffff; printf(" 0x%llx addu16i.d %s, %s, %d\n", insstrs, rd, rj, si16); return; } @@ -3755,10 +3785,10 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) case 0x6: case 0x7: { - //bits: 31-25,MSB7 + // bits: 31-25,MSB7 unsigned int inscode = (code >> 25) & 0x7f; - const char *rd = regName[code & 0x1f]; - unsigned int si20 = (code >> 5) & 0xfffff; + const char* rd = regName[code & 0x1f]; + unsigned int si20 = (code >> 5) & 0xfffff; switch (inscode) { case LA_1RI20_LU12I_W: @@ -3781,7 +3811,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) printf(" 0x%llx pcaddu18i %s, 0x%x\n", insstrs, rd, si20); return; } - default : + default: printf("LOONGARCH illegal instruction: 0x%08x\n", code); return; } @@ -3790,11 +3820,11 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) case 0x8: case 0x9: { - //bits: 31-24,MSB8 + // bits: 31-24,MSB8 unsigned int inscode = (code >> 24) & 0xff; - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - short si14 = ((code >> 10) & 0x3fff)<<2; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + short si14 = ((code >> 10) & 0x3fff) << 2; si14 >>= 2; switch (inscode) { @@ -3822,7 +3852,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) case LA_2RI14_STPTR_D: printf(" 0x%llx stptr.d %s, %s, %d\n", insstrs, rd, rj, si14); return; - default : + default: printf("LOONGARCH illegal instruction: 0x%08x\n", code); return; } @@ -3830,12 +3860,12 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) } case 0xa: { - //bits: 31-24,MSB8 + // bits: 31-24,MSB8 unsigned int inscode = (code >> 22) & 0x3ff; - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *fd = FregName[code & 0x1f]; - short si12 = ((code >> 10) & 0xfff)<<4; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* fd = FregName[code & 0x1f]; + short si12 = ((code >> 10) & 0xfff) << 4; si12 >>= 4; switch (inscode) { @@ -3887,56 +3917,59 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) case LA_2RI12_FST_D: printf(" 0x%llx fst.d %s, %s, %d\n", insstrs, fd, rj, si12); return; - default : + default: printf("LOONGARCH illegal instruction: 0x%08x\n", code); return; } return; } - case LA_1RI21_BEQZ: //0x10 + case LA_1RI21_BEQZ: // 0x10 { - const char *rj = regName[(code>>5) & 0x1f]; - int offs21 = (((code >> 10) & 0xffff) | ((code & 0x1f) << 16))<<11; + const char* rj = regName[(code >> 5) & 0x1f]; + int offs21 = (((code >> 10) & 0xffff) | ((code & 0x1f) << 16)) << 11; offs21 >>= 9; printf(" 0x%llx beqz %s, 0x%llx\n", insstrs, rj, (int64_t)insstrs + offs21); return; } - case LA_1RI21_BNEZ: //0x11 + case LA_1RI21_BNEZ: // 0x11 { - const char *rj = regName[(code>>5) & 0x1f]; - int offs21 = (((code >> 10) & 0xffff) | ((code & 0x1f) << 16))<<11; + const char* rj = regName[(code >> 5) & 0x1f]; + int offs21 = (((code >> 10) & 0xffff) | ((code & 0x1f) << 16)) << 11; offs21 >>= 9; printf(" 0x%llx bnez %s, 0x%llx\n", insstrs, rj, (int64_t)insstrs + offs21); return; } case 0x12: { - //LA_1RI21_BCEQZ - //LA_1RI21_BCNEZ - const char *cj = CFregName[(code>>5) & 0x7]; - int offs21 = (((code >> 10) & 0xffff) | ((code & 0x1f) << 16)) << 11; + // LA_1RI21_BCEQZ + // LA_1RI21_BCNEZ + const char* cj = CFregName[(code >> 5) & 0x7]; + int offs21 = (((code >> 10) & 0xffff) | ((code & 0x1f) << 16)) << 11; offs21 >>= 9; - if (0 == ((code>>8) & 0x3)) { + if (0 == ((code >> 8) & 0x3)) + { printf(" 0x%llx bceqz %s, 0x%llx\n", insstrs, cj, (int64_t)insstrs + offs21); return; } - else if (1 == ((code>>8) & 0x3)) { + else if (1 == ((code >> 8) & 0x3)) + { printf(" 0x%llx bcnez %s, 0x%llx\n", insstrs, cj, (int64_t)insstrs + offs21); return; } - else { + else + { printf("LOONGARCH illegal instruction: 0x%08x\n", code); return; } return; } - case LA_2RI16_JIRL: //0x13 + case LA_2RI16_JIRL: // 0x13 { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - int offs16 = (short)((code >> 10) & 0xffff); + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + int offs16 = (short)((code >> 10) & 0xffff); offs16 <<= 2; - if(id->idDebugOnlyInfo()->idMemCookie) + if (id->idDebugOnlyInfo()->idMemCookie) { assert(0 < id->idDebugOnlyInfo()->idMemCookie); const char* methodName; @@ -3949,76 +3982,76 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) } return; } - case LA_I26_B: //0x14 + case LA_I26_B: // 0x14 { - int offs26 = (((code >> 10) & 0xffff) | ((code & 0x3ff) << 16))<<6; + int offs26 = (((code >> 10) & 0xffff) | ((code & 0x3ff) << 16)) << 6; offs26 >>= 4; printf(" 0x%llx b 0x%llx\n", insstrs, (int64_t)insstrs + offs26); return; } - case LA_I26_BL: //0x15 + case LA_I26_BL: // 0x15 { - int offs26 = (((code >> 10) & 0xffff) | ((code & 0x3ff) << 16))<<6; + int offs26 = (((code >> 10) & 0xffff) | ((code & 0x3ff) << 16)) << 6; offs26 >>= 4; printf(" 0x%llx bl 0x%llx\n", insstrs, (int64_t)insstrs + offs26); return; } - case LA_2RI16_BEQ: //0x16 + case LA_2RI16_BEQ: // 0x16 { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - int offs16 = (short)((code >> 10) & 0xffff); + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + int offs16 = (short)((code >> 10) & 0xffff); offs16 <<= 2; printf(" 0x%llx beq %s, %s, 0x%llx\n", insstrs, rj, rd, (int64_t)insstrs + offs16); return; } - case LA_2RI16_BNE: //0x17 + case LA_2RI16_BNE: // 0x17 { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - int offs16 = (short)((code >> 10) & 0xffff); + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + int offs16 = (short)((code >> 10) & 0xffff); offs16 <<= 2; printf(" 0x%llx bne %s, %s, 0x%llx\n", insstrs, rj, rd, (int64_t)insstrs + offs16); return; } - case LA_2RI16_BLT: //0x18 + case LA_2RI16_BLT: // 0x18 { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - int offs16 = (short)((code >> 10) & 0xffff); + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + int offs16 = (short)((code >> 10) & 0xffff); offs16 <<= 2; printf(" 0x%llx blt %s, %s, 0x%llx\n", insstrs, rj, rd, (int64_t)insstrs + offs16); return; } - case LA_2RI16_BGE: //0x19 + case LA_2RI16_BGE: // 0x19 { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - int offs16 = (short)((code >> 10) & 0xffff); + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + int offs16 = (short)((code >> 10) & 0xffff); offs16 <<= 2; printf(" 0x%llx bge %s, %s, 0x%llx\n", insstrs, rj, rd, (int64_t)insstrs + offs16); return; } - case LA_2RI16_BLTU: //0x1a + case LA_2RI16_BLTU: // 0x1a { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - int offs16 = (short)((code >> 10) & 0xffff); + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + int offs16 = (short)((code >> 10) & 0xffff); offs16 <<= 2; printf(" 0x%llx bltu %s, %s, 0x%llx\n", insstrs, rj, rd, (int64_t)insstrs + offs16); return; } - case LA_2RI16_BGEU: //0x1b + case LA_2RI16_BGEU: // 0x1b { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - int offs16 = (short)((code >> 10) & 0xffff); + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + int offs16 = (short)((code >> 10) & 0xffff); offs16 <<= 2; printf(" 0x%llx bgeu %s, %s, 0x%llx\n", insstrs, rj, rd, (int64_t)insstrs + offs16); return; } - default : + default: printf("LOONGARCH illegal instruction: 0x%08x\n", code); return; } @@ -4026,27 +4059,27 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) Label_OPCODE_0: opcode = (code >> 22) & 0x3ff; - //bits: 31-22,MSB10 + // bits: 31-22,MSB10 switch (opcode) { case 0x0: { - //bits: 31-18,MSB14 + // bits: 31-18,MSB14 unsigned int inscode1 = (code >> 18) & 0x3fff; switch (inscode1) { case 0x0: { - //bits: 31-15,MSB17 + // bits: 31-15,MSB17 unsigned int inscode2 = (code >> 15) & 0x1ffff; switch (inscode2) { case 0x0: { - //bits:31-10,MSB22 + // bits:31-10,MSB22 unsigned int inscode3 = (code >> 10) & 0x3fffff; - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; switch (inscode3) { case LA_2R_CLO_W: @@ -4122,7 +4155,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) printf(" 0x%llx cpucfg %s, %s\n", insstrs, rd, rj); return; - default : + default: printf("LOONGARCH illegal instruction: 0x%08x\n", code); return; } @@ -4130,19 +4163,19 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) } case LA_2R_ASRTLE_D: { - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx asrtle.d %s, %s\n", insstrs, rj, rk); return; } case LA_2R_ASRTGT_D: { - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx asrtgt.d %s, %s\n", insstrs, rj, rk); return; } - default : + default: printf("LOONGARCH illegal instruction: 0x%08x\n", code); return; } @@ -4150,39 +4183,44 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) } case 0x1: { - //LA_OP_ALSL_W - //LA_OP_ALSL_WU - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; - unsigned int sa2 = (code>>15) & 0x3; - if (0 == ((code>>17) & 0x1)) { - printf(" 0x%llx alsl.w %s, %s, %s, %d\n", insstrs, rd, rj, rk, (sa2+1)); + // LA_OP_ALSL_W + // LA_OP_ALSL_WU + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; + unsigned int sa2 = (code >> 15) & 0x3; + if (0 == ((code >> 17) & 0x1)) + { + printf(" 0x%llx alsl.w %s, %s, %s, %d\n", insstrs, rd, rj, rk, (sa2 + 1)); return; - } else if (1 == ((code>>17) & 0x1)) { - printf(" 0x%llx alsl.wu %s, %s, %s, %d\n", insstrs, rd, rj, rk, (sa2+1)); + } + else if (1 == ((code >> 17) & 0x1)) + { + printf(" 0x%llx alsl.wu %s, %s, %s, %d\n", insstrs, rd, rj, rk, (sa2 + 1)); return; - } else { + } + else + { printf("LOONGARCH illegal instruction: 0x%08x\n", code); return; } return; } - case LA_OP_BYTEPICK_W: //0x2 + case LA_OP_BYTEPICK_W: // 0x2 { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; - unsigned int sa2 = (code>>15) & 0x3; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; + unsigned int sa2 = (code >> 15) & 0x3; printf(" 0x%llx bytepick.w %s, %s, %s, %d\n", insstrs, rd, rj, rk, sa2); return; } - case LA_OP_BYTEPICK_D: //0x3 + case LA_OP_BYTEPICK_D: // 0x3 { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; - unsigned int sa3 = (code>>15) & 0x7; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; + unsigned int sa3 = (code >> 15) & 0x7; printf(" 0x%llx bytepick.d %s, %s, %s, %d\n", insstrs, rd, rj, rk, sa3); return; } @@ -4193,11 +4231,11 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) case 0x8: case 0x9: { - //bits: 31-15,MSB17 + // bits: 31-15,MSB17 unsigned int inscode2 = (code >> 15) & 0x1ffff; - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; switch (inscode2) { @@ -4339,15 +4377,15 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) case LA_3R_CRCC_W_D_W: printf(" 0x%llx crcc.w.d.w %s, %s, %s\n", insstrs, rd, rj, rk); return; - default : + default: printf("LOONGARCH illegal instruction: 0x%08x\n", code); return; } } case 0xa: { - //bits: 31-15,MSB17 - unsigned int inscode2 = (code >> 15) & 0x1ffff; + // bits: 31-15,MSB17 + unsigned int inscode2 = (code >> 15) & 0x1ffff; unsigned int codefield = code & 0x7fff; switch (inscode2) { @@ -4360,21 +4398,21 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) case LA_OP_SYSCALL: printf(" 0x%llx syscall 0x%x\n", insstrs, codefield); return; - default : + default: printf("LOONGARCH illegal instruction: 0x%08x\n", code); return; } } - case LA_OP_ALSL_D: //0xb + case LA_OP_ALSL_D: // 0xb { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; - unsigned int sa2 = (code>>15) & 0x3; - printf(" 0x%llx alsl.d %s, %s, %s, %d\n", insstrs, rd, rj, rk, (sa2+1)); + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; + unsigned int sa2 = (code >> 15) & 0x3; + printf(" 0x%llx alsl.d %s, %s, %s, %d\n", insstrs, rd, rj, rk, (sa2 + 1)); return; } - default : + default: printf("LOONGARCH illegal instruction: 0x%08x\n", code); return; } @@ -4382,44 +4420,56 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) } case 0x1: { - if (code & 0x200000) { - //LA_OP_BSTRINS_W - //LA_OP_BSTRPICK_W - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; + if (code & 0x200000) + { + // LA_OP_BSTRINS_W + // LA_OP_BSTRPICK_W + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; unsigned int lsbw = (code >> 10) & 0x1f; unsigned int msbw = (code >> 16) & 0x1f; - if (!(code & 0x8000)) { + if (!(code & 0x8000)) + { printf(" 0x%llx bstrins.w %s, %s, %d, %d\n", insstrs, rd, rj, msbw, lsbw); return; - } else if (code & 0x8000) { + } + else if (code & 0x8000) + { printf(" 0x%llx bstrpick.w %s, %s, %d, %d\n", insstrs, rd, rj, msbw, lsbw); return; - } else { + } + else + { printf("LOONGARCH illegal instruction: 0x%08x\n", code); return; } } - else { - //bits: 31-18,MSB14 + else + { + // bits: 31-18,MSB14 unsigned int inscode1 = (code >> 18) & 0x3fff; switch (inscode1) { case 0x10: { - //LA_OP_SLLI_W: - //LA_OP_SLLI_D: - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - if (1 == ((code>>15) & 0x7)) { - unsigned int ui5 = (code>>10) & 0x1f; + // LA_OP_SLLI_W: + // LA_OP_SLLI_D: + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + if (1 == ((code >> 15) & 0x7)) + { + unsigned int ui5 = (code >> 10) & 0x1f; printf(" 0x%llx slli.w %s, %s, %d\n", insstrs, rd, rj, ui5); return; - } else if (1 == ((code>>16) & 0x3)) { - unsigned int ui6 = (code>>10) & 0x3f; + } + else if (1 == ((code >> 16) & 0x3)) + { + unsigned int ui6 = (code >> 10) & 0x3f; printf(" 0x%llx slli.d %s, %s, %d\n", insstrs, rd, rj, ui6); return; - } else { + } + else + { printf("LOONGARCH illegal instruction: 0x%08x\n", code); return; } @@ -4427,19 +4477,24 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) } case 0x11: { - //LA_OP_SRLI_W: - //LA_OP_SRLI_D: - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - if (1 == ((code>>15) & 0x7)) { - unsigned int ui5 = (code>>10) & 0x1f; + // LA_OP_SRLI_W: + // LA_OP_SRLI_D: + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + if (1 == ((code >> 15) & 0x7)) + { + unsigned int ui5 = (code >> 10) & 0x1f; printf(" 0x%llx srli.w %s, %s, %d\n", insstrs, rd, rj, ui5); return; - } else if (1 == ((code>>16) & 0x3)) { - unsigned int ui6 = (code>>10) & 0x3f; + } + else if (1 == ((code >> 16) & 0x3)) + { + unsigned int ui6 = (code >> 10) & 0x3f; printf(" 0x%llx srli.d %s, %s, %d\n", insstrs, rd, rj, ui6); return; - } else { + } + else + { printf("LOONGARCH illegal instruction: 0x%08x\n", code); return; } @@ -4447,19 +4502,24 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) } case 0x12: { - //LA_OP_SRAI_W: - //LA_OP_SRAI_D: - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - if (1 == ((code>>15) & 0x7)) { - unsigned int ui5 = (code>>10) & 0x1f; + // LA_OP_SRAI_W: + // LA_OP_SRAI_D: + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + if (1 == ((code >> 15) & 0x7)) + { + unsigned int ui5 = (code >> 10) & 0x1f; printf(" 0x%llx srai.w %s, %s, %d\n", insstrs, rd, rj, ui5); return; - } else if (1 == ((code>>16) & 0x3)) { - unsigned int ui6 = (code>>10) & 0x3f; + } + else if (1 == ((code >> 16) & 0x3)) + { + unsigned int ui6 = (code >> 10) & 0x3f; printf(" 0x%llx srai.d %s, %s, %d\n", insstrs, rd, rj, ui6); return; - } else { + } + else + { printf("LOONGARCH illegal instruction: 0x%08x\n", code); return; } @@ -4467,36 +4527,41 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) } case 0x13: { - //LA_OP_ROTRI_W: - //LA_OP_ROTRI_D: - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - if (1 == ((code>>15) & 0x7)) { - unsigned int ui5 = (code>>10) & 0x1f; + // LA_OP_ROTRI_W: + // LA_OP_ROTRI_D: + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + if (1 == ((code >> 15) & 0x7)) + { + unsigned int ui5 = (code >> 10) & 0x1f; printf(" 0x%llx rotri.w %s, %s, %d\n", insstrs, rd, rj, ui5); return; - } else if (1 == ((code>>16) & 0x3)) { - unsigned int ui6 = (code>>10) & 0x3f; + } + else if (1 == ((code >> 16) & 0x3)) + { + unsigned int ui6 = (code >> 10) & 0x3f; printf(" 0x%llx rotri.d %s, %s, %d\n", insstrs, rd, rj, ui6); return; - } else { + } + else + { printf("LOONGARCH illegal instruction: 0x%08x\n", code); return; } return; } - default : + default: printf("LOONGARCH illegal instruction: 0x%08x\n", code); return; } return; - } + } return; } case LA_OP_BSTRINS_D: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; unsigned int lsbd = (code >> 10) & 0x3f; unsigned int msbd = (code >> 16) & 0x3f; printf(" 0x%llx bstrins.d %s, %s, %d, %d\n", insstrs, rd, rj, msbd, lsbd); @@ -4504,8 +4569,8 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) } case LA_OP_BSTRPICK_D: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; unsigned int lsbd = (code >> 10) & 0x3f; unsigned int msbd = (code >> 16) & 0x3f; printf(" 0x%llx bstrpick.d %s, %s, %d, %d\n", insstrs, rd, rj, msbd, lsbd); @@ -4513,13 +4578,13 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) } case 0x4: { - //bits: 31-15,MSB17 + // bits: 31-15,MSB17 unsigned int inscode1 = (code >> 15) & 0x1ffff; - const char *fd = FregName[code & 0x1f]; - const char *fj = FregName[(code>>5) & 0x1f]; - const char *fk = FregName[(code>>10) & 0x1f]; - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; + const char* fd = FregName[code & 0x1f]; + const char* fj = FregName[(code >> 5) & 0x1f]; + const char* fk = FregName[(code >> 10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; switch (inscode1) { @@ -4592,7 +4657,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) case 0x23a: case 0x23c: { - //bits:31-10,MSB22 + // bits:31-10,MSB22 unsigned int inscode2 = (code >> 10) & 0x3fffff; switch (inscode2) { @@ -4670,25 +4735,25 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) return; case LA_2R_MOVFR2CF: { - const char *cd = CFregName[code & 0x7]; + const char* cd = CFregName[code & 0x7]; printf(" 0x%llx movfr2cf %s, %s\n", insstrs, cd, fj); return; } case LA_2R_MOVCF2FR: { - const char *cj = CFregName[(code>>5) & 0x7]; + const char* cj = CFregName[(code >> 5) & 0x7]; printf(" 0x%llx movcf2fr %s, %s\n", insstrs, fd, cj); return; } case LA_2R_MOVGR2CF: { - const char *cd = CFregName[code & 0x7]; + const char* cd = CFregName[code & 0x7]; printf(" 0x%llx movgr2cf %s, %s\n", insstrs, cd, rj); return; } case LA_2R_MOVCF2GR: { - const char *cj = CFregName[(code>>5) & 0x7]; + const char* cj = CFregName[(code >> 5) & 0x7]; printf(" 0x%llx movcf2gr %s, %s\n", insstrs, rd, cj); return; } @@ -4776,177 +4841,176 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) case LA_2R_FRINT_D: printf(" 0x%llx frint.d %s, %s\n", insstrs, fd, fj); return; - default : + default: printf("LOONGARCH illegal instruction: 0x%08x\n", code); return; } return; } - default : + default: printf("LOONGARCH illegal instruction: 0x%08x\n", code); return; } return; } - case LA_2RI12_SLTI: //0x8 + case LA_2RI12_SLTI: // 0x8 { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - short si12 = ((code >> 10) & 0xfff)<<4; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + short si12 = ((code >> 10) & 0xfff) << 4; si12 >>= 4; printf(" 0x%llx slti %s, %s, %d\n", insstrs, rd, rj, si12); return; } - case LA_2RI12_SLTUI: //0x9 + case LA_2RI12_SLTUI: // 0x9 { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - short si12 = ((code >> 10) & 0xfff)<<4; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + short si12 = ((code >> 10) & 0xfff) << 4; si12 >>= 4; printf(" 0x%llx sltui %s, %s, %d\n", insstrs, rd, rj, si12); return; } - case LA_2RI12_ADDI_W: //0xa + case LA_2RI12_ADDI_W: // 0xa { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - short si12 = ((code >> 10) & 0xfff)<<4; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + short si12 = ((code >> 10) & 0xfff) << 4; si12 >>= 4; printf(" 0x%llx addi.w %s, %s, %d\n", insstrs, rd, rj, si12); return; } - case LA_2RI12_ADDI_D: //0xb + case LA_2RI12_ADDI_D: // 0xb { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - short si12 = ((code >> 10) & 0xfff)<<4; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + short si12 = ((code >> 10) & 0xfff) << 4; si12 >>= 4; printf(" 0x%llx addi.d %s, %s, %ld\n", insstrs, rd, rj, si12); return; } - case LA_2RI12_LU52I_D: //0xc + case LA_2RI12_LU52I_D: // 0xc { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; unsigned int si12 = (code >> 10) & 0xfff; printf(" 0x%llx lu52i.d %s, %s, 0x%x\n", insstrs, rd, rj, si12); return; } - case LA_2RI12_ANDI: //0xd + case LA_2RI12_ANDI: // 0xd { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; unsigned int ui12 = ((code >> 10) & 0xfff); printf(" 0x%llx andi %s, %s, 0x%x\n", insstrs, rd, rj, ui12); return; } - case LA_2RI12_ORI: //0xe + case LA_2RI12_ORI: // 0xe { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; unsigned int ui12 = ((code >> 10) & 0xfff); printf(" 0x%llx ori %s, %s, 0x%x\n", insstrs, rd, rj, ui12); return; } - case LA_2RI12_XORI: //0xf + case LA_2RI12_XORI: // 0xf { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; unsigned int ui12 = ((code >> 10) & 0xfff); printf(" 0x%llx xori %s, %s, 0x%x\n", insstrs, rd, rj, ui12); return; } - default : + default: printf("LOONGARCH illegal instruction: 0x%08x\n", code); return; } -//Label_OPCODE_1: +// Label_OPCODE_1: // opcode = (code >> 24) & 0xff; // //bits: 31-24,MSB8 - Label_OPCODE_2: opcode = (code >> 20) & 0xfff; - //bits: 31-20,MSB12 + // bits: 31-20,MSB12 switch (opcode) { case LA_4R_FMADD_S: { - const char *fd = FregName[code & 0x1f]; - const char *fj = FregName[(code>>5) & 0x1f]; - const char *fk = FregName[(code>>10) & 0x1f]; - const char *fa = FregName[(code>>15) & 0x1f]; + const char* fd = FregName[code & 0x1f]; + const char* fj = FregName[(code >> 5) & 0x1f]; + const char* fk = FregName[(code >> 10) & 0x1f]; + const char* fa = FregName[(code >> 15) & 0x1f]; printf(" 0x%llx fmadd.s %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa); return; } case LA_4R_FMADD_D: { - const char *fd = FregName[code & 0x1f]; - const char *fj = FregName[(code>>5) & 0x1f]; - const char *fk = FregName[(code>>10) & 0x1f]; - const char *fa = FregName[(code>>15) & 0x1f]; + const char* fd = FregName[code & 0x1f]; + const char* fj = FregName[(code >> 5) & 0x1f]; + const char* fk = FregName[(code >> 10) & 0x1f]; + const char* fa = FregName[(code >> 15) & 0x1f]; printf(" 0x%llx fmadd.d %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa); return; } case LA_4R_FMSUB_S: { - const char *fd = FregName[code & 0x1f]; - const char *fj = FregName[(code>>5) & 0x1f]; - const char *fk = FregName[(code>>10) & 0x1f]; - const char *fa = FregName[(code>>15) & 0x1f]; + const char* fd = FregName[code & 0x1f]; + const char* fj = FregName[(code >> 5) & 0x1f]; + const char* fk = FregName[(code >> 10) & 0x1f]; + const char* fa = FregName[(code >> 15) & 0x1f]; printf(" 0x%llx fmsub.s %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa); return; } case LA_4R_FMSUB_D: { - const char *fd = FregName[code & 0x1f]; - const char *fj = FregName[(code>>5) & 0x1f]; - const char *fk = FregName[(code>>10) & 0x1f]; - const char *fa = FregName[(code>>15) & 0x1f]; + const char* fd = FregName[code & 0x1f]; + const char* fj = FregName[(code >> 5) & 0x1f]; + const char* fk = FregName[(code >> 10) & 0x1f]; + const char* fa = FregName[(code >> 15) & 0x1f]; printf(" 0x%llx fmsub.d %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa); return; } case LA_4R_FNMADD_S: { - const char *fd = FregName[code & 0x1f]; - const char *fj = FregName[(code>>5) & 0x1f]; - const char *fk = FregName[(code>>10) & 0x1f]; - const char *fa = FregName[(code>>15) & 0x1f]; + const char* fd = FregName[code & 0x1f]; + const char* fj = FregName[(code >> 5) & 0x1f]; + const char* fk = FregName[(code >> 10) & 0x1f]; + const char* fa = FregName[(code >> 15) & 0x1f]; printf(" 0x%llx fnmadd.s %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa); return; } case LA_4R_FNMADD_D: { - const char *fd = FregName[code & 0x1f]; - const char *fj = FregName[(code>>5) & 0x1f]; - const char *fk = FregName[(code>>10) & 0x1f]; - const char *fa = FregName[(code>>15) & 0x1f]; + const char* fd = FregName[code & 0x1f]; + const char* fj = FregName[(code >> 5) & 0x1f]; + const char* fk = FregName[(code >> 10) & 0x1f]; + const char* fa = FregName[(code >> 15) & 0x1f]; printf(" 0x%llx fnmadd.d %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa); return; } case LA_4R_FNMSUB_S: { - const char *fd = FregName[code & 0x1f]; - const char *fj = FregName[(code>>5) & 0x1f]; - const char *fk = FregName[(code>>10) & 0x1f]; - const char *fa = FregName[(code>>15) & 0x1f]; + const char* fd = FregName[code & 0x1f]; + const char* fj = FregName[(code >> 5) & 0x1f]; + const char* fk = FregName[(code >> 10) & 0x1f]; + const char* fa = FregName[(code >> 15) & 0x1f]; printf(" 0x%llx fnmsub.s %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa); return; } case LA_4R_FNMSUB_D: { - const char *fd = FregName[code & 0x1f]; - const char *fj = FregName[(code>>5) & 0x1f]; - const char *fk = FregName[(code>>10) & 0x1f]; - const char *fa = FregName[(code>>15) & 0x1f]; + const char* fd = FregName[code & 0x1f]; + const char* fj = FregName[(code >> 5) & 0x1f]; + const char* fk = FregName[(code >> 10) & 0x1f]; + const char* fa = FregName[(code >> 15) & 0x1f]; printf(" 0x%llx fnmsub.d %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa); return; } - default : + default: printf("LOONGARCH illegal instruction: 0x%08x\n", code); return; } @@ -4954,16 +5018,16 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) Label_OPCODE_3: opcode = (code >> 20) & 0xfff; - //bits: 31-20,MSB12 + // bits: 31-20,MSB12 switch (opcode) { case LA_OP_FCMP_cond_S: { - //bits:19-15,cond + // bits:19-15,cond unsigned int cond = (code >> 15) & 0x1f; - const char *cd = CFregName[code & 0x7]; - const char *fj = FregName[(code>>5) & 0x1f]; - const char *fk = FregName[(code>>10) & 0x1f]; + const char* cd = CFregName[code & 0x7]; + const char* fj = FregName[(code >> 5) & 0x1f]; + const char* fk = FregName[(code >> 10) & 0x1f]; switch (cond) { case 0x0: @@ -5032,18 +5096,18 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) case 0x19: printf(" 0x%llx fcmp.sune.s %s, %s, %s\n", insstrs, cd, fj, fk); return; - default : + default: printf("LOONGARCH illegal instruction: 0x%08x\n", code); return; } } case LA_OP_FCMP_cond_D: { - //bits:19-15,cond + // bits:19-15,cond unsigned int cond = (code >> 15) & 0x1f; - const char *cd = CFregName[code & 0x7]; - const char *fj = FregName[(code>>5) & 0x1f]; - const char *fk = FregName[(code>>10) & 0x1f]; + const char* cd = CFregName[code & 0x7]; + const char* fj = FregName[(code >> 5) & 0x1f]; + const char* fk = FregName[(code >> 10) & 0x1f]; switch (cond) { case 0x0: @@ -5112,21 +5176,21 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) case 0x19: printf(" 0x%llx fcmp.sune.d %s, %s, %s\n", insstrs, cd, fj, fk); return; - default : + default: printf("LOONGARCH illegal instruction: 0x%08x\n", code); return; } } case LA_4R_FSEL: { - const char *fd = FregName[code & 0x1f]; - const char *fj = FregName[(code>>5) & 0x1f]; - const char *fk = FregName[(code>>10) & 0x1f]; - const char *ca = CFregName[(code>>15) & 0x7]; + const char* fd = FregName[code & 0x1f]; + const char* fj = FregName[(code >> 5) & 0x1f]; + const char* fk = FregName[(code >> 10) & 0x1f]; + const char* ca = CFregName[(code >> 15) & 0x7]; printf(" 0x%llx fsel %s, %s, %s, %s\n", insstrs, fd, fj, fk, ca); return; } - default : + default: printf("LOONGARCH illegal instruction: 0x%08x\n", code); return; } @@ -5134,94 +5198,94 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) Label_OPCODE_E: opcode = (code >> 15) & 0x1ffff; - //bits: 31-15,MSB17 + // bits: 31-15,MSB17 switch (opcode) { case LA_3R_LDX_B: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx ldx.b %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_LDX_H: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx ldx.h %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_LDX_W: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx ldx.w %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_LDX_D: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx ldx.d %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_STX_B: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx stx.b %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_STX_H: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx stx.h %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_STX_W: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx stx.w %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_STX_D: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx stx.d %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_LDX_BU: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx ldx.bu %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_LDX_HU: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx ldx.hu %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_LDX_WU: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx ldx.wu %s, %s, %s\n", insstrs, rd, rj, rk); return; } @@ -5230,321 +5294,321 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) return; case LA_3R_FLDX_S: { - const char *fd = FregName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* fd = FregName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx fldx.s %s, %s, %s\n", insstrs, fd, rj, rk); return; } case LA_3R_FLDX_D: { - const char *fd = FregName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* fd = FregName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx fldx.d %s, %s, %s\n", insstrs, fd, rj, rk); return; } case LA_3R_FSTX_S: { - const char *fd = FregName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* fd = FregName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx fstx.s %s, %s, %s\n", insstrs, fd, rj, rk); return; } case LA_3R_FSTX_D: { - const char *fd = FregName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* fd = FregName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx fstx.d %s, %s, %s\n", insstrs, fd, rj, rk); return; } case LA_3R_AMSWAP_W: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx amswap.w %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMSWAP_D: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx amswap.d %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMADD_W: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx amadd.w %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMADD_D: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx amadd.d %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMAND_W: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx amand.w %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMAND_D: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx amand.d %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMOR_W: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx amor.w %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMOR_D: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx amor.d %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMXOR_W: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx amxor.w %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMXOR_D: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx amxor.d %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMMAX_W: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx ammax.w %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMMAX_D: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx ammax.d %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMMIN_W: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx ammin.w %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMMIN_D: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx ammin.d %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMMAX_WU: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx ammax.wu %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMMAX_DU: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx ammax.du %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMMIN_WU: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx ammin.wu %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMMIN_DU: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx ammin.du %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMSWAP_DB_W: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx amswap_db.w %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMSWAP_DB_D: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx amswap_db.d %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMADD_DB_W: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx amadd_db.w %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMADD_DB_D: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx amadd_db.d %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMAND_DB_W: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx amand_db.w %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMAND_DB_D: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx amand_db.d %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMOR_DB_W: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx amor_db.w %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMOR_DB_D: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx amor_db.d %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMXOR_DB_W: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx amxor_db.w %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMXOR_DB_D: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx amxor_db.d %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMMAX_DB_W: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx ammax_db.w %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMMAX_DB_D: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx ammax_db.d %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMMIN_DB_W: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx ammin_db.w %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMMIN_DB_D: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx ammin_db.d %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMMAX_DB_WU: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx ammax_db.wu %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMMAX_DB_DU: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx ammax_db.du %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMMIN_DB_WU: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx ammin_db.wu %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMMIN_DB_DU: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx ammin_db.du %s, %s, %s\n", insstrs, rd, rj, rk); return; } @@ -5562,197 +5626,197 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) } case LA_3R_FLDGT_S: { - const char *fd = FregName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* fd = FregName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx fldgt.s %s, %s, %s\n", insstrs, fd, rj, rk); return; } case LA_3R_FLDGT_D: { - const char *fd = FregName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* fd = FregName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx fldgt.d %s, %s, %s\n", insstrs, fd, rj, rk); return; } case LA_3R_FLDLE_S: { - const char *fd = FregName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* fd = FregName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx fldle.s %s, %s, %s\n", insstrs, fd, rj, rk); return; } case LA_3R_FLDLE_D: { - const char *fd = FregName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* fd = FregName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx fldle.d %s, %s, %s\n", insstrs, fd, rj, rk); return; } case LA_3R_FSTGT_S: { - const char *fd = FregName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* fd = FregName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx fstgt.s %s, %s, %s\n", insstrs, fd, rj, rk); return; } case LA_3R_FSTGT_D: { - const char *fd = FregName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* fd = FregName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx fstgt.d %s, %s, %s\n", insstrs, fd, rj, rk); return; } case LA_3R_FSTLE_S: { - const char *fd = FregName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* fd = FregName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx fstle.s %s, %s, %s\n", insstrs, fd, rj, rk); return; } case LA_3R_FSTLE_D: { - const char *fd = FregName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* fd = FregName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx fstle.d %s, %s, %s\n", insstrs, fd, rj, rk); return; } case LA_3R_LDGT_B: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx ldgt.b %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_LDGT_H: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx ldgt.h %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_LDGT_W: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx ldgt.w %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_LDGT_D: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx ldgt.d %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_LDLE_B: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx ldle.b %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_LDLE_H: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx ldle.h %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_LDLE_W: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx ldle.w %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_LDLE_D: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx ldle.d %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_STGT_B: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx stgt.b %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_STGT_H: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx stgt.h %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_STGT_W: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx stgt.w %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_STGT_D: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx stgt.d %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_STLE_B: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx stle.b %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_STLE_H: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx stle.h %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_STLE_W: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx stle.w %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_STLE_D: { - const char *rd = regName[code & 0x1f]; - const char *rj = regName[(code>>5) & 0x1f]; - const char *rk = regName[(code>>10) & 0x1f]; + const char* rd = regName[code & 0x1f]; + const char* rj = regName[(code >> 5) & 0x1f]; + const char* rk = regName[(code >> 10) & 0x1f]; printf(" 0x%llx stle.d %s, %s, %s\n", insstrs, rd, rj, rk); return; } - default : + default: printf("LOONGARCH illegal instruction: 0x%08x\n", code); return; } @@ -5782,7 +5846,7 @@ void emitter::emitDispInsHex(instrDesc* id, BYTE* code, size_t sz) void emitter::emitDispIns( instrDesc* id, bool isNew, bool doffs, bool asmfm, unsigned offset, BYTE* pCode, size_t sz, insGroup* ig) -{//not used on loongarch64. +{ // not used on loongarch64. printf("------------not implements emitDispIns() for loongarch64!!!\n"); } @@ -5845,8 +5909,8 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR { assert(addr->OperIs(GT_CLS_VAR_ADDR, GT_LCL_VAR_ADDR, GT_LCL_FLD_ADDR, GT_LEA)); - int offset = 0; - DWORD lsl = 0; + int offset = 0; + DWORD lsl = 0; if (addr->OperGet() == GT_LEA) { @@ -5891,7 +5955,8 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR else // large offset { // First load/store tmpReg with the large offset constant - emitIns_I_la(EA_PTRSIZE, tmpReg, offset);//codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset); + emitIns_I_la(EA_PTRSIZE, tmpReg, + offset); // codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset); // Then add the base register // rd = rd + base emitIns_R_R_R(INS_add_d, addType, tmpReg, tmpReg, memBase->GetRegNum()); @@ -5910,63 +5975,65 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR // Then load/store dataReg from/to [memBase + index] switch (EA_SIZE(emitTypeSize(indir->TypeGet()))) { - case EA_1BYTE: - assert(((ins <= INS_ld_wu) && (ins >= INS_ld_b)) || ((ins <= INS_st_d) && (ins >= INS_st_b))); - if (ins <= INS_ld_wu) - { - if (varTypeIsUnsigned(indir->TypeGet())) - ins = INS_ldx_bu; + case EA_1BYTE: + assert(((ins <= INS_ld_wu) && (ins >= INS_ld_b)) || ((ins <= INS_st_d) && (ins >= INS_st_b))); + if (ins <= INS_ld_wu) + { + if (varTypeIsUnsigned(indir->TypeGet())) + ins = INS_ldx_bu; + else + ins = INS_ldx_b; + } else - ins = INS_ldx_b; - } - else - ins = INS_stx_b; - break; - case EA_2BYTE: - assert(((ins <= INS_ld_wu) && (ins >= INS_ld_b)) || ((ins <= INS_st_d) && (ins >= INS_st_b))); - if (ins <= INS_ld_wu) - { - if (varTypeIsUnsigned(indir->TypeGet())) - ins = INS_ldx_hu; + ins = INS_stx_b; + break; + case EA_2BYTE: + assert(((ins <= INS_ld_wu) && (ins >= INS_ld_b)) || ((ins <= INS_st_d) && (ins >= INS_st_b))); + if (ins <= INS_ld_wu) + { + if (varTypeIsUnsigned(indir->TypeGet())) + ins = INS_ldx_hu; + else + ins = INS_ldx_h; + } else - ins = INS_ldx_h; - } - else - ins = INS_stx_h; - break; - case EA_4BYTE: - assert(((ins <= INS_ld_wu) && (ins >= INS_ld_b)) || ((ins <= INS_st_d) && (ins >= INS_st_b)) || (ins == INS_fst_s) || (ins == INS_fld_s)); - assert(INS_fst_s > INS_st_d); - if (ins <= INS_ld_wu) - { - if (varTypeIsUnsigned(indir->TypeGet())) - ins = INS_ldx_wu; + ins = INS_stx_h; + break; + case EA_4BYTE: + assert(((ins <= INS_ld_wu) && (ins >= INS_ld_b)) || ((ins <= INS_st_d) && (ins >= INS_st_b)) || + (ins == INS_fst_s) || (ins == INS_fld_s)); + assert(INS_fst_s > INS_st_d); + if (ins <= INS_ld_wu) + { + if (varTypeIsUnsigned(indir->TypeGet())) + ins = INS_ldx_wu; + else + ins = INS_ldx_w; + } + else if (ins == INS_fld_s) + ins = INS_fldx_s; + else if (ins == INS_fst_s) + ins = INS_fstx_s; else - ins = INS_ldx_w; - } - else if (ins == INS_fld_s) - ins = INS_fldx_s; - else if (ins == INS_fst_s) - ins = INS_fstx_s; - else - ins = INS_stx_w; - break; - case EA_8BYTE: - assert(((ins <= INS_ld_wu) && (ins >= INS_ld_b)) || ((ins <= INS_st_d) && (ins >= INS_st_b)) || (ins == INS_fst_d) || (ins == INS_fld_d)); - assert(INS_fst_d > INS_st_d); - if (ins <= INS_ld_wu) - { - ins = INS_ldx_d; - } - else if (ins == INS_fld_d) - ins = INS_fldx_d; - else if (ins == INS_fst_d) - ins = INS_fstx_d; - else - ins = INS_stx_d; - break; - default: - assert(!"------------TODO for LOONGARCH64: unsupported ins."); + ins = INS_stx_w; + break; + case EA_8BYTE: + assert(((ins <= INS_ld_wu) && (ins >= INS_ld_b)) || ((ins <= INS_st_d) && (ins >= INS_st_b)) || + (ins == INS_fst_d) || (ins == INS_fld_d)); + assert(INS_fst_d > INS_st_d); + if (ins <= INS_ld_wu) + { + ins = INS_ldx_d; + } + else if (ins == INS_fld_d) + ins = INS_fldx_d; + else if (ins == INS_fst_d) + ins = INS_fstx_d; + else + ins = INS_stx_d; + break; + default: + assert(!"------------TODO for LOONGARCH64: unsupported ins."); } if (lsl > 0) @@ -6015,7 +6082,7 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR // First load/store tmpReg with the large offset constant emitIns_I_la(EA_PTRSIZE, tmpReg, offset); - //codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset); + // codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset); // Then load/store dataReg from/to [memBase + tmpReg] emitIns_R_R_R(INS_add_d, addType, tmpReg, memBase->GetRegNum(), tmpReg); @@ -6026,17 +6093,17 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR else // addr is not contained, so we evaluate it into a register { #ifdef DEBUG - if (addr->OperIs(GT_LCL_VAR_ADDR, GT_LCL_FLD_ADDR)) - { - // If the local var is a gcref or byref, the local var better be untracked, because we have - // no logic here to track local variable lifetime changes, like we do in the contained case - // above. E.g., for a `str r0,[r1]` for byref `r1` to local `V01`, we won't store the local - // `V01` and so the emitter can't update the GC lifetime for `V01` if this is a variable birth. - GenTreeLclVarCommon* varNode = addr->AsLclVarCommon(); - unsigned lclNum = varNode->GetLclNum(); - LclVarDsc* varDsc = emitComp->lvaGetDesc(lclNum); - assert(!varDsc->lvTracked); - } + if (addr->OperIs(GT_LCL_VAR_ADDR, GT_LCL_FLD_ADDR)) + { + // If the local var is a gcref or byref, the local var better be untracked, because we have + // no logic here to track local variable lifetime changes, like we do in the contained case + // above. E.g., for a `str r0,[r1]` for byref `r1` to local `V01`, we won't store the local + // `V01` and so the emitter can't update the GC lifetime for `V01` if this is a variable birth. + GenTreeLclVarCommon* varNode = addr->AsLclVarCommon(); + unsigned lclNum = varNode->GetLclNum(); + LclVarDsc* varDsc = emitComp->lvaGetDesc(lclNum); + assert(!varDsc->lvTracked); + } #endif // DEBUG // Then load/store dataReg from/to [addrReg] emitIns_R_R_I(ins, attr, dataReg, addr->GetRegNum(), 0); @@ -6111,7 +6178,7 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, { assert(attr == EA_8BYTE); } - else if (ins == INS_add_w)// || ins == INS_add + else if (ins == INS_add_w) // || ins == INS_add { assert(attr == EA_4BYTE); } @@ -6134,13 +6201,14 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, else if ((ins == INS_mul_d) || (ins == INS_mulh_d) || (ins == INS_mulh_du)) { assert(attr == EA_8BYTE); - //NOTE: overflow format doesn't support an int constant operand directly. + // NOTE: overflow format doesn't support an int constant operand directly. assert(intConst == nullptr); } - else if ((ins == INS_mul_w) || (ins == INS_mulw_d_w) || (ins == INS_mulh_w) || (ins == INS_mulh_wu) || (ins == INS_mulw_d_wu)) + else if ((ins == INS_mul_w) || (ins == INS_mulw_d_w) || (ins == INS_mulh_w) || (ins == INS_mulh_wu) || + (ins == INS_mulw_d_wu)) { assert(attr == EA_4BYTE); - //NOTE: overflow format doesn't support an int constant operand directly. + // NOTE: overflow format doesn't support an int constant operand directly. assert(intConst == nullptr); } else @@ -6153,10 +6221,10 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, } if (intConst != nullptr) - {//should re-design this case!!! ---2020.04.11. + { // should re-design this case!!! ---2020.04.11. ssize_t imm = intConst->IconValue(); if (ins == INS_andi || ins == INS_ori || ins == INS_xori) - //assert((0 <= imm) && (imm <= 0xfff)); + // assert((0 <= imm) && (imm <= 0xfff)); assert((-2048 <= imm) && (imm <= 0xfff)); else assert((-2049 < imm) && (imm < 2048)); @@ -6300,7 +6368,7 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, { if (attr == EA_4BYTE) emitIns_R_R_I_I(INS_bstrins_d, EA_8BYTE, dst->GetRegNum(), REG_R0, 63, 32); - //else + // else //{ // assert(!"unimplemented on LOONGARCH yet: ulong * ulong !!!"); //} @@ -6322,7 +6390,8 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, assert(REG_RA != src1->GetRegNum()); assert(REG_RA != src2->GetRegNum()); size_t imm = (EA_SIZE(attr) == EA_8BYTE) ? 63 : 31; - emitIns_R_R_I(EA_SIZE(attr) == EA_8BYTE ? INS_srai_d : INS_srai_w, attr, REG_RA, dst->GetRegNum(), imm); + emitIns_R_R_I(EA_SIZE(attr) == EA_8BYTE ? INS_srai_d : INS_srai_w, attr, REG_RA, dst->GetRegNum(), + imm); codeGen->genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bne, REG_R21, nullptr, REG_RA); } } @@ -6332,14 +6401,14 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, { emitIns_R_R_R(ins, attr, dst->GetRegNum(), src1->GetRegNum(), src2->GetRegNum()); - //NOTE: can/should amend: LOONGARCH needs to sign-extend dst when deal with 32bit data. + // NOTE: can/should amend: LOONGARCH needs to sign-extend dst when deal with 32bit data. if (EA_SIZE(attr) == EA_4BYTE) emitIns_R_R_I(INS_slli_w, attr, dst->GetRegNum(), dst->GetRegNum(), 0); } else { - regNumber regOp1 = src1->GetRegNum(); - regNumber regOp2 = src2->GetRegNum(); + regNumber regOp1 = src1->GetRegNum(); + regNumber regOp2 = src2->GetRegNum(); regNumber saveOperReg1 = REG_NA; regNumber saveOperReg2 = REG_NA; @@ -6350,14 +6419,14 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, assert(REG_R21 != regOp1); assert(REG_RA != regOp1); emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, REG_RA, regOp1, /*src1->GetRegNum(),*/ 31, 0); - regOp1 = REG_RA;//dst->ExtractTempReg(); + regOp1 = REG_RA; // dst->ExtractTempReg(); } if (src2->gtType == TYP_INT) { assert(REG_R21 != regOp2); assert(REG_RA != regOp2); emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, REG_R21, regOp2, /*src2->GetRegNum(),*/ 31, 0); - regOp2 = REG_R21;//dst->ExtractTempReg(); + regOp2 = REG_R21; // dst->ExtractTempReg(); } } if (needCheckOv) @@ -6396,7 +6465,7 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, { if (dst->OperGet() == GT_ADD || dst->OperGet() == GT_SUB) { - ssize_t imm; + ssize_t imm; regNumber tempReg1; regNumber tempReg2; // ADD : A = B + C @@ -6439,7 +6508,7 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, emitIns_R_R_I(INS_andi, attr, tempReg2, tempReg2, imm); } // if (B > 0 && C < 0) || (B < 0 && C > 0), skip overflow - BasicBlock* tmpLabel = codeGen->genCreateTempLabel(); + BasicBlock* tmpLabel = codeGen->genCreateTempLabel(); BasicBlock* tmpLabel2 = codeGen->genCreateTempLabel(); BasicBlock* tmpLabel3 = codeGen->genCreateTempLabel(); @@ -6448,7 +6517,8 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, emitIns_J_cond_la(INS_bne, tmpLabel3, tempReg2, REG_R0); // B > 0 and C > 0, if A < B, goto overflow - emitIns_J_cond_la(INS_bge, tmpLabel, dst->OperGet() == GT_ADD ? dst->GetRegNum() : saveOperReg1, dst->OperGet() == GT_ADD ? saveOperReg1 : saveOperReg2); + emitIns_J_cond_la(INS_bge, tmpLabel, dst->OperGet() == GT_ADD ? dst->GetRegNum() : saveOperReg1, + dst->OperGet() == GT_ADD ? saveOperReg1 : saveOperReg2); codeGen->genDefineTempLabel(tmpLabel2); @@ -6457,7 +6527,8 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, codeGen->genDefineTempLabel(tmpLabel3); // B < 0 and C < 0, if A > B, goto overflow - emitIns_J_cond_la(INS_blt, tmpLabel2, dst->OperGet() == GT_ADD ? saveOperReg1 : saveOperReg2, dst->OperGet() == GT_ADD ? dst->GetRegNum() : saveOperReg1); + emitIns_J_cond_la(INS_blt, tmpLabel2, dst->OperGet() == GT_ADD ? saveOperReg1 : saveOperReg2, + dst->OperGet() == GT_ADD ? dst->GetRegNum() : saveOperReg1); codeGen->genDefineTempLabel(tmpLabel); } @@ -6477,7 +6548,7 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, return dst->GetRegNum(); } -unsigned emitter::get_curTotalCodeSize() +unsigned emitter::get_curTotalCodeSize() { return emitTotalCodeSize; } @@ -6501,9 +6572,9 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins { insExecutionCharacteristics result; - //TODO: support this function for LoongArch64. - result.insThroughput = PERFSCORE_THROUGHPUT_ZERO; - result.insLatency = PERFSCORE_LATENCY_ZERO; + // TODO: support this function for LoongArch64. + result.insThroughput = PERFSCORE_THROUGHPUT_ZERO; + result.insLatency = PERFSCORE_LATENCY_ZERO; result.insMemoryAccessKind = PERFSCORE_MEMORY_NONE; return result; @@ -6544,7 +6615,7 @@ const char* emitter::emitRegName(regNumber reg, emitAttr size, bool varName) // bool emitter::IsMovInstruction(instruction ins) { - switch (ins) + switch (ins) { case INS_mov: case INS_fmov_s: diff --git a/src/coreclr/jit/emitloongarch64.h b/src/coreclr/jit/emitloongarch64.h index 50da1b09a0f20..e9cc1e9d831d7 100644 --- a/src/coreclr/jit/emitloongarch64.h +++ b/src/coreclr/jit/emitloongarch64.h @@ -31,7 +31,7 @@ struct CnsVal const char* emitFPregName(unsigned reg, bool varName = true); const char* emitVectorRegName(regNumber reg); -//NOTE: At least 32bytes within dst. +// NOTE: At least 32bytes within dst. void emitDisInsName(code_t code, const BYTE* dst, instrDesc* id); #endif // DEBUG @@ -47,16 +47,14 @@ instrDesc* emitNewInstrCallDir(int argCnt, VARSET_VALARG_TP GCvars, regMaskTP gcrefRegs, regMaskTP byrefRegs, - emitAttr retSize - MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize)); + emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize)); instrDesc* emitNewInstrCallInd(int argCnt, ssize_t disp, VARSET_VALARG_TP GCvars, regMaskTP gcrefRegs, regMaskTP byrefRegs, - emitAttr retSize - MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize)); + emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize)); /************************************************************************/ /* Private helpers for instruction output */ @@ -75,12 +73,13 @@ void emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataReg, GenTr // Emit the 32-bit LOONGARCH64 instruction 'code' into the 'dst' buffer unsigned emitOutput_Instr(BYTE* dst, code_t code); -//NOTEADD: New functions in emitarm64.h +// NOTEADD: New functions in emitarm64.h // Method to do check if mov is redundant with respect to the last instruction. // If yes, the caller of this method can choose to omit current mov instruction. static bool IsMovInstruction(instruction ins); bool IsRedundantMov(instruction ins, emitAttr size, regNumber dst, regNumber src, bool canSkip); -bool IsRedundantLdStr(instruction ins, regNumber reg1, regNumber reg2, ssize_t imm, emitAttr size, insFormat fmt);//New functions end. +bool IsRedundantLdStr( + instruction ins, regNumber reg1, regNumber reg2, ssize_t imm, emitAttr size, insFormat fmt); // New functions end. /************************************************************************ * @@ -104,9 +103,9 @@ union bitMaskImm { * representation imm(i16,hw) */ -//static emitter::bitMaskImm emitEncodeBitMaskImm(INT64 imm, emitAttr size); +// static emitter::bitMaskImm emitEncodeBitMaskImm(INT64 imm, emitAttr size); -//static INT64 emitDecodeBitMaskImm(const emitter::bitMaskImm bmImm, emitAttr size); +// static INT64 emitDecodeBitMaskImm(const emitter::bitMaskImm bmImm, emitAttr size); /************************************************************************ * @@ -129,9 +128,9 @@ union halfwordImm { * representation imm(i16,hw) */ -//static emitter::halfwordImm emitEncodeHalfwordImm(INT64 imm, emitAttr size); +// static emitter::halfwordImm emitEncodeHalfwordImm(INT64 imm, emitAttr size); -//static INT64 emitDecodeHalfwordImm(const emitter::halfwordImm hwImm, emitAttr size); +// static INT64 emitDecodeHalfwordImm(const emitter::halfwordImm hwImm, emitAttr size); /************************************************************************ * @@ -155,9 +154,9 @@ union byteShiftedImm { * representation imm(i8,by) */ -//static emitter::byteShiftedImm emitEncodeByteShiftedImm(INT64 imm, emitAttr size, bool allow_MSL); +// static emitter::byteShiftedImm emitEncodeByteShiftedImm(INT64 imm, emitAttr size, bool allow_MSL); -//static INT32 emitDecodeByteShiftedImm(const emitter::byteShiftedImm bsImm, emitAttr size); +// static INT32 emitDecodeByteShiftedImm(const emitter::byteShiftedImm bsImm, emitAttr size); /************************************************************************ * @@ -180,9 +179,9 @@ union floatImm8 { * Convert between a double and its 'float 8-bit immediate' representation */ -//static emitter::floatImm8 emitEncodeFloatImm8(double immDbl); +// static emitter::floatImm8 emitEncodeFloatImm8(double immDbl); -//static double emitDecodeFloatImm8(const emitter::floatImm8 fpImm); +// static double emitDecodeFloatImm8(const emitter::floatImm8 fpImm); /************************************************************************ * @@ -193,9 +192,9 @@ union floatImm8 { union condFlagsImm { struct { - //insCond cond : 4; // bits 0..3 - //insCflags flags : 4; // bits 4..7 - unsigned imm5 : 5; // bits 8..12 + // insCond cond : 4; // bits 0..3 + // insCflags flags : 4; // bits 4..7 + unsigned imm5 : 5; // bits 8..12 }; unsigned immCFVal; // concat imm5:flags:cond forming an 13-bit unsigned immediate }; @@ -209,19 +208,19 @@ static bool isIntegerRegister(regNumber reg) // Returns true if 'value' is a legal signed immediate 12 bit encoding. static bool isValidSimm12(ssize_t value) { - return -( ((int)1) << 11 ) <= value && value < ( ((int)1) << 11 ); + return -(((int)1) << 11) <= value && value < (((int)1) << 11); }; // Returns true if 'value' is a legal signed immediate 16 bit encoding. static bool isValidSimm16(ssize_t value) { - return -( ((int)1) << 15 ) <= value && value < ( ((int)1) << 15 ); + return -(((int)1) << 15) <= value && value < (((int)1) << 15); }; // Returns true if 'value' is a legal signed immediate 20 bit encoding. static bool isValidSimm20(ssize_t value) { - return -( ((int)1) << 19 ) <= value && value < ( ((int)1) << 19 ); + return -(((int)1) << 19) <= value && value < (((int)1) << 19); }; /************************************************************************/ @@ -229,7 +228,6 @@ static bool isValidSimm20(ssize_t value) /************************************************************************/ public: - // Returns the number of bits used by the given 'size'. inline static unsigned getBitWidth(emitAttr size) { @@ -273,7 +271,7 @@ void emitIns_R(instruction ins, emitAttr attr, regNumber reg); void emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t imm, insOpts opt = INS_OPTS_NONE); -//NOTEADD: NEW function in emitarm64. +// NOTEADD: NEW function in emitarm64. void emitIns_Mov( instruction ins, emitAttr attr, regNumber dstReg, regNumber srcReg, bool canSkip, insOpts opt = INS_OPTS_NONE); @@ -310,13 +308,13 @@ void emitIns_R_R_R_Ext(instruction ins, insOpts opt = INS_OPTS_NONE, int shiftAmount = -1); -//NODECHANGE: ADD an arg. +// NODECHANGE: ADD an arg. void emitIns_R_R_I_I( instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int imm1, int imm2, insOpts opt = INS_OPTS_NONE); void emitIns_R_R_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, regNumber reg4); -//void emitIns_BARR(instruction ins, insBarrier barrier); +// void emitIns_BARR(instruction ins, insBarrier barrier); void emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fdlHnd, int offs); @@ -325,7 +323,7 @@ void emitIns_S(instruction ins, emitAttr attr, int varx, int offs); void emitIns_S_S_R_R( instruction ins, emitAttr attr, emitAttr attr2, regNumber ireg, regNumber ireg2, int varx, int offs); -//void emitIns_R_R_S( +// void emitIns_R_R_S( // instruction ins, emitAttr attr, regNumber ireg, regNumber ireg2, int sa); void emitIns_R_R_S_S( @@ -352,13 +350,12 @@ void emitIns_I_AR(instruction ins, emitAttr attr, int val, regNumber reg, int of void emitIns_R_AR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs); -//NODECHANGE: ADD a description of arguments "disp" +// NODECHANGE: ADD a description of arguments "disp" void emitIns_R_AI(instruction ins, emitAttr attr, regNumber reg, ssize_t disp DEBUGARG(size_t targetHandle = 0) DEBUGARG(GenTreeFlags gtFlags = GTF_EMPTY)); - void emitIns_AR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs); void emitIns_R_ARR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp); @@ -380,7 +377,7 @@ enum EmitCallType EC_FUNC_TOKEN, // Direct call to a helper/static/nonvirtual/global method // EC_FUNC_TOKEN_INDIR, // Indirect call to a helper/static/nonvirtual/global method - //EC_FUNC_ADDR, // Direct call to an absolute address + // EC_FUNC_ADDR, // Direct call to an absolute address // EC_FUNC_VIRTUAL, // Call to a virtual method (using the vtable) EC_INDIR_R, // Indirect call via register @@ -394,28 +391,27 @@ enum EmitCallType void emitIns_Call(EmitCallType callType, CORINFO_METHOD_HANDLE methHnd, INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE - void* addr, - ssize_t argSize, - emitAttr retSize - MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), + void* addr, + ssize_t argSize, + emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), VARSET_VALARG_TP ptrVars, regMaskTP gcrefRegs, regMaskTP byrefRegs, const DebugInfo& di, - regNumber ireg = REG_NA, - regNumber xreg = REG_NA, - unsigned xmul = 0, - ssize_t disp = 0, - bool isJump = false); + regNumber ireg = REG_NA, + regNumber xreg = REG_NA, + unsigned xmul = 0, + ssize_t disp = 0, + bool isJump = false); unsigned emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t code); -//BYTE* emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* i); -//BYTE* emitOutputLoadLabel(BYTE* dst, BYTE* srcAddr, BYTE* dstAddr, instrDescJmp* id); -//BYTE* emitOutputShortBranch(BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, instrDescJmp* id); -//BYTE* emitOutputShortAddress(BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, regNumber reg); -//BYTE* emitOutputShortConstant( +// BYTE* emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* i); +// BYTE* emitOutputLoadLabel(BYTE* dst, BYTE* srcAddr, BYTE* dstAddr, instrDescJmp* id); +// BYTE* emitOutputShortBranch(BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, instrDescJmp* id); +// BYTE* emitOutputShortAddress(BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, regNumber reg); +// BYTE* emitOutputShortConstant( // BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, regNumber reg, emitAttr opSize); -unsigned get_curTotalCodeSize(); // bytes of code +unsigned get_curTotalCodeSize(); // bytes of code #endif // TARGET_LOONGARCH64 diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 9cfd25bbfcc42..7d9446d38a2e1 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -3463,10 +3463,10 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) case GT_CNS_STR: case GT_CNS_LNG: case GT_CNS_INT: - // TODO: workround, should amend for LoongArch64. + // TODO: workround, should amend for LoongArch64. costEx = 4; costSz = 4; - goto COMMON_CNS; + goto COMMON_CNS; #else case GT_CNS_STR: case GT_CNS_LNG: @@ -21498,28 +21498,28 @@ void ReturnTypeDesc::InitializeStructReturnType(Compiler* comp, assert((structSize >= TARGET_POINTER_SIZE) && (structSize <= (2 * TARGET_POINTER_SIZE))); DWORD numFloatFields = comp->info.compCompHnd->getFieldTypeByHnd(retClsHnd); - BYTE gcPtrs[2] = {TYPE_GC_NONE, TYPE_GC_NONE}; + BYTE gcPtrs[2] = {TYPE_GC_NONE, TYPE_GC_NONE}; comp->info.compCompHnd->getClassGClayout(retClsHnd, &gcPtrs[0]); if (numFloatFields & 0x8) { assert((structSize > 8) == ((numFloatFields & 0x30) > 0)); - m_regType[0] = numFloatFields & 0x10 ? TYP_DOUBLE : TYP_FLOAT; - m_regType[1] = numFloatFields & 0x20 ? TYP_DOUBLE : TYP_FLOAT; + m_regType[0] = numFloatFields & 0x10 ? TYP_DOUBLE : TYP_FLOAT; + m_regType[1] = numFloatFields & 0x20 ? TYP_DOUBLE : TYP_FLOAT; comp->compFloatingPointUsed = true; } else if (numFloatFields & 0x2) { assert((structSize > 8) == ((numFloatFields & 0x30) > 0)); - m_regType[0] = numFloatFields & 0x10 ? TYP_DOUBLE : TYP_FLOAT; - m_regType[1] = numFloatFields & 0x20 ? comp->getJitGCType(gcPtrs[1]) : TYP_INT; + m_regType[0] = numFloatFields & 0x10 ? TYP_DOUBLE : TYP_FLOAT; + m_regType[1] = numFloatFields & 0x20 ? comp->getJitGCType(gcPtrs[1]) : TYP_INT; comp->compFloatingPointUsed = true; } else if (numFloatFields & 0x4) { assert((structSize > 8) == ((numFloatFields & 0x30) > 0)); - m_regType[0] = numFloatFields & 0x10 ? comp->getJitGCType(gcPtrs[0]) : TYP_INT; - m_regType[1] = numFloatFields & 0x20 ? TYP_DOUBLE : TYP_FLOAT; + m_regType[0] = numFloatFields & 0x10 ? comp->getJitGCType(gcPtrs[0]) : TYP_INT; + m_regType[1] = numFloatFields & 0x20 ? TYP_DOUBLE : TYP_FLOAT; comp->compFloatingPointUsed = true; } else @@ -21728,10 +21728,10 @@ regNumber ReturnTypeDesc::GetABIReturnReg(unsigned idx) const } else { - noway_assert(idx < 2); // Up to 2 return registers for two-float-field structs + noway_assert(idx < 2); // Up to 2 return registers for two-float-field structs if (varTypeIsIntegralOrI(regType)) resultReg = varTypeIsIntegralOrI(GetReturnRegType(0)) ? REG_INTRET_1 : REG_INTRET; // V0 or V1 - else //if (!varTypeIsIntegralOrI(regType)) + else // if (!varTypeIsIntegralOrI(regType)) resultReg = varTypeIsIntegralOrI(GetReturnRegType(0)) ? REG_FLOATRET : REG_FLOATRET_1; // F0 or F1 } diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index f06e2be8c5a7d..ea0529989c056 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -11338,11 +11338,12 @@ var_types Compiler::impGetByRefResultType(genTreeOps oper, bool fUnsigned, GenTr if (genActualType(op1->TypeGet()) != TYP_I_IMPL) { - // insert an explicit upcast +// insert an explicit upcast #ifdef TARGET_LOONGARCH64 if (op1->TypeGet() == TYP_INT && op1->gtOper == GT_CNS_INT) { - op1->AsIntCon()->gtIconVal = fUnsigned ? (uint32_t)op1->AsIntCon()->gtIconVal : op1->AsIntCon()->gtIconVal; + op1->AsIntCon()->gtIconVal = + fUnsigned ? (uint32_t)op1->AsIntCon()->gtIconVal : op1->AsIntCon()->gtIconVal; op1->gtType = TYP_LONG; } else if (op1->gtOper == GT_CNS_INT) @@ -11353,11 +11354,12 @@ var_types Compiler::impGetByRefResultType(genTreeOps oper, bool fUnsigned, GenTr } else if (genActualType(op2->TypeGet()) != TYP_I_IMPL) { - // insert an explicit upcast +// insert an explicit upcast #ifdef TARGET_LOONGARCH64 if (op2->TypeGet() == TYP_INT && op2->gtOper == GT_CNS_INT) { - op2->AsIntCon()->gtIconVal = fUnsigned ? (uint32_t)op2->AsIntCon()->gtIconVal : op2->AsIntCon()->gtIconVal; + op2->AsIntCon()->gtIconVal = + fUnsigned ? (uint32_t)op2->AsIntCon()->gtIconVal : op2->AsIntCon()->gtIconVal; op2->gtType = TYP_LONG; } else @@ -12425,7 +12427,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) else op1 = impImplicitIorI4Cast(op1, lclTyp); #else - op1 = impImplicitIorI4Cast(op1, lclTyp); + op1 = impImplicitIorI4Cast(op1, lclTyp); #endif #ifdef TARGET_64BIT @@ -13514,12 +13516,13 @@ void Compiler::impImportBlockCode(BasicBlock* block) if (op2->gtOper == GT_CNS_INT && op2->AsIntCon()->gtIconVal > 31) { type = TYP_LONG; - } else + } + else type = genActualType(op1->TypeGet()); #else type = genActualType(op1->TypeGet()); #endif - op1 = gtNewOperNode(oper, type, op1, op2); + op1 = gtNewOperNode(oper, type, op1, op2); impPushOnStack(op1, tiRetVal); break; @@ -13752,24 +13755,26 @@ void Compiler::impImportBlockCode(BasicBlock* block) #ifdef TARGET_LOONGARCH64 if (op2->gtOper == GT_CNS_INT) { - op2->AsIntCon()->gtIconVal = uns ? (uint32_t)op2->AsIntCon()->gtIconVal : (int32_t)op2->AsIntCon()->gtIconVal; + op2->AsIntCon()->gtIconVal = + uns ? (uint32_t)op2->AsIntCon()->gtIconVal : (int32_t)op2->AsIntCon()->gtIconVal; op2->gtType = TYP_LONG; } else #endif - op2 = gtNewCastNode(TYP_I_IMPL, op2, uns, TYP_I_IMPL); + op2 = gtNewCastNode(TYP_I_IMPL, op2, uns, TYP_I_IMPL); } else if (varTypeIsI(op2) && (genActualType(op1) == TYP_INT)) { #ifdef TARGET_LOONGARCH64 if (op1->gtOper == GT_CNS_INT) { - op1->AsIntCon()->gtIconVal = uns ? (uint32_t)op1->AsIntCon()->gtIconVal : (int32_t)op1->AsIntCon()->gtIconVal; + op1->AsIntCon()->gtIconVal = + uns ? (uint32_t)op1->AsIntCon()->gtIconVal : (int32_t)op1->AsIntCon()->gtIconVal; op1->gtType = TYP_LONG; } else #endif - op1 = gtNewCastNode(TYP_I_IMPL, op1, uns, TYP_I_IMPL); + op1 = gtNewCastNode(TYP_I_IMPL, op1, uns, TYP_I_IMPL); } #endif // TARGET_64BIT @@ -13863,16 +13868,16 @@ void Compiler::impImportBlockCode(BasicBlock* block) #ifdef TARGET_64BIT #ifdef TARGET_LOONGARCH64 - if ((op2->OperGet() == GT_CNS_INT)/* && (op2->AsIntCon()->IconValue() == 0)*/) + if ((op2->OperGet() == GT_CNS_INT) /* && (op2->AsIntCon()->IconValue() == 0)*/) { op2->gtType = op1->TypeGet(); } - /*if (op1->OperGet() == GT_CNS_INT) - { - //assert(op1->gtType == op2->TypeGet()); - //op2->gtType = op1->TypeGet(); - op1->gtFlags |= GTF_CONTAINED; - }*/ +/*if (op1->OperGet() == GT_CNS_INT) +{ + //assert(op1->gtType == op2->TypeGet()); + //op2->gtType = op1->TypeGet(); + op1->gtFlags |= GTF_CONTAINED; +}*/ #else if ((op1->TypeGet() == TYP_I_IMPL) && (genActualType(op2->TypeGet()) == TYP_INT)) { @@ -14180,7 +14185,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) assert(op1->gtOper == GT_CNS_INT && op1->AsIntCon()->gtIconVal == 0); op1->gtType = genActualType(lclTyp); impPushOnStack(op1, tiRetVal); - //opcode = CEE_LDC_I4_0; + // opcode = CEE_LDC_I4_0; break; } #endif @@ -14193,37 +14198,38 @@ void Compiler::impImportBlockCode(BasicBlock* block) uns = false; } - // At this point uns, ovf, callNode are all set. +// At this point uns, ovf, callNode are all set. #ifdef TARGET_LOONGARCH64 - if (varTypeIsSmall(lclTyp) && !ovfl && op1->gtOper == GT_CNS_INT) + if (varTypeIsSmall(lclTyp) && !ovfl && op1->gtOper == GT_CNS_INT) + { + switch (lclTyp) { - switch (lclTyp) - { - case TYP_BYTE: - op1->AsIntCon()->gtIconVal = (int8_t)op1->AsIntCon()->gtIconVal; - break; - case TYP_UBYTE: - op1->AsIntCon()->gtIconVal = (uint8_t)op1->AsIntCon()->gtIconVal; - break; - case TYP_USHORT: - op1->AsIntCon()->gtIconVal = (uint16_t)op1->AsIntCon()->gtIconVal; - break; - case TYP_SHORT: - op1->AsIntCon()->gtIconVal = (short)op1->AsIntCon()->gtIconVal; - break; - default: - assert(!"unexpected type"); - return; - } + case TYP_BYTE: + op1->AsIntCon()->gtIconVal = (int8_t)op1->AsIntCon()->gtIconVal; + break; + case TYP_UBYTE: + op1->AsIntCon()->gtIconVal = (uint8_t)op1->AsIntCon()->gtIconVal; + break; + case TYP_USHORT: + op1->AsIntCon()->gtIconVal = (uint16_t)op1->AsIntCon()->gtIconVal; + break; + case TYP_SHORT: + op1->AsIntCon()->gtIconVal = (short)op1->AsIntCon()->gtIconVal; + break; + default: + assert(!"unexpected type"); + return; + } - op1->gtType == TYP_INT; + op1->gtType == TYP_INT; - impPushOnStack(op1, tiRetVal); - break; - } else + impPushOnStack(op1, tiRetVal); + break; + } + else #endif - if (varTypeIsSmall(lclTyp) && !ovfl && op1->gtType == TYP_INT && op1->gtOper == GT_AND) + if (varTypeIsSmall(lclTyp) && !ovfl && op1->gtType == TYP_INT && op1->gtOper == GT_AND) { op2 = op1->AsOp()->gtOp2; @@ -14288,7 +14294,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) } else #ifdef TARGET_LOONGARCH64 - if (type != TYP_LONG) + if (type != TYP_LONG) { if (!ovfl && op1->gtOper == GT_CNS_INT && op1->TypeGet() == TYP_LONG) { @@ -14296,12 +14302,12 @@ void Compiler::impImportBlockCode(BasicBlock* block) if (lclTyp == TYP_INT) { op1->AsIntCon()->gtIconVal = (int32_t)op1->AsIntCon()->gtIconVal; - op1->gtType = TYP_INT; + op1->gtType = TYP_INT; } else if (lclTyp == TYP_UINT) { op1->AsIntCon()->gtIconVal = (uint32_t)op1->AsIntCon()->gtIconVal; - op1->gtType = TYP_UINT; + op1->gtType = TYP_UINT; } else op1 = gtNewCastNode(type, op1, uns, lclTyp); @@ -16135,7 +16141,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) } else #ifdef TARGET_LOONGARCH64 - if (genActualType(op2->TypeGet()) != TYP_INT) + if (genActualType(op2->TypeGet()) != TYP_INT) #endif { bool isUnsigned = false; @@ -17728,7 +17734,7 @@ bool Compiler::impReturnInstruction(int prefixFlags, OPCODE& opcode) impBashVarAddrsToI(op2); op2 = impImplicitIorI4Cast(op2, info.compRetType); op2 = impImplicitR4orR8Cast(op2, info.compRetType); - // Note that we allow TYP_I_IMPL<->TYP_BYREF transformation, but only TYP_I_IMPL<-TYP_REF. +// Note that we allow TYP_I_IMPL<->TYP_BYREF transformation, but only TYP_I_IMPL<-TYP_REF. #ifdef TARGET_LOONGARCH64 assertImp((genActualType(op2->TypeGet()) == genActualType(info.compRetType)) || (genTypeStSz(op2->TypeGet()) == genTypeStSz(info.compRetType)) || @@ -18608,17 +18614,17 @@ void Compiler::impImportBlock(BasicBlock* block) } else if (genActualType(tree->gtType) == TYP_INT && lvaTable[tempNum].lvType == TYP_I_IMPL) { - // Spill clique has decided this should be "native int", but this block only pushes an "int". - // Insert a sign-extension to "native int" so we match the clique. +// Spill clique has decided this should be "native int", but this block only pushes an "int". +// Insert a sign-extension to "native int" so we match the clique. #ifdef TARGET_LOONGARCH64 - if (tree->gtOper == GT_CNS_INT/* && !tree->AsIntCon()->gtIconVal*/) + if (tree->gtOper == GT_CNS_INT /* && !tree->AsIntCon()->gtIconVal*/) { tree->gtType = TYP_I_IMPL; tree->SetContained(); } else #endif - verCurrentState.esStack[level].val = gtNewCastNode(TYP_I_IMPL, tree, false, TYP_I_IMPL); + verCurrentState.esStack[level].val = gtNewCastNode(TYP_I_IMPL, tree, false, TYP_I_IMPL); } // Consider the case where one branch left a 'byref' on the stack and the other leaves @@ -18640,8 +18646,8 @@ void Compiler::impImportBlock(BasicBlock* block) } else if (genActualType(tree->gtType) == TYP_INT && lvaTable[tempNum].lvType == TYP_BYREF) { - // Spill clique has decided this should be "byref", but this block only pushes an "int". - // Insert a sign-extension to "native int" so we match the clique size. +// Spill clique has decided this should be "byref", but this block only pushes an "int". +// Insert a sign-extension to "native int" so we match the clique size. #ifdef TARGET_LOONGARCH64 if (tree->gtOper == GT_CNS_INT /*&& !tree->AsIntCon()->gtIconVal*/) { @@ -18650,7 +18656,7 @@ void Compiler::impImportBlock(BasicBlock* block) } else #endif - verCurrentState.esStack[level].val = gtNewCastNode(TYP_I_IMPL, tree, false, TYP_I_IMPL); + verCurrentState.esStack[level].val = gtNewCastNode(TYP_I_IMPL, tree, false, TYP_I_IMPL); } } #endif // TARGET_64BIT diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index 62b2eade19cec..e80855be507dc 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -900,13 +900,13 @@ void CodeGen::inst_RV_TT(instruction ins, regSet.verifyRegUsed(regTmp); return; } -#else // !TARGET_ARM +#else // !TARGET_ARM #ifdef TARGET_LOONGARCH64 if (emitter::isFloatReg(reg)) - assert((ins==INS_fld_d) || (ins==INS_fld_s)); + assert((ins == INS_fld_d) || (ins == INS_fld_s)); else if (emitter::isGeneralRegister(reg) && (ins != INS_lea)) - {//TODO should amend for LOONGARCH64 !!! - //assert((ins==INS_ld_d) || (ins==INS_ld_w)); + { // TODO should amend for LOONGARCH64 !!! + // assert((ins==INS_ld_d) || (ins==INS_ld_w)); ins = size == EA_4BYTE ? INS_ld_w : INS_ld_d; } #endif @@ -1476,7 +1476,7 @@ bool CodeGenInterface::validImmForBL(ssize_t addr) #if defined(TARGET_LOONGARCH64) bool CodeGenInterface::validImmForBAL(ssize_t addr) -{//TODO: can amend/optimize for LoongArch64. +{ // TODO: can amend/optimize for LoongArch64. return false; } #endif // TARGET_LOONGARCH64 @@ -1778,8 +1778,8 @@ instruction CodeGenInterface::ins_Load(var_types srcType, bool aligned /*=false* } else { - //assert((TYP_LONG == srcType) || (TYP_ULONG == srcType)); - ins = INS_ld_d;//default ld_d. + // assert((TYP_LONG == srcType) || (TYP_ULONG == srcType)); + ins = INS_ld_d; // default ld_d. } #else NYI("ins_Load"); @@ -2005,8 +2005,8 @@ instruction CodeGenInterface::ins_Store(var_types dstType, bool aligned /*=false ins = INS_st_h; else if ((TYP_INT == dstType) || (TYP_UINT == dstType)) ins = INS_st_w; - else //if ((TYP_LONG == dstType) || (TYP_ULONG == dstType) || (TYP_REF == dstType)) - ins = INS_st_d;//default st_d. + else // if ((TYP_LONG == dstType) || (TYP_ULONG == dstType) || (TYP_REF == dstType)) + ins = INS_st_d; // default st_d. #else NYI("ins_Store"); #endif diff --git a/src/coreclr/jit/instrsloongarch64.h b/src/coreclr/jit/instrsloongarch64.h index 1c16d53fd453a..f171bc69a7a68 100644 --- a/src/coreclr/jit/instrsloongarch64.h +++ b/src/coreclr/jit/instrsloongarch64.h @@ -492,7 +492,6 @@ INSTS(fld_d, "fld.d", 0, LD, IF_LA, 0x2b800000) INSTS(fst_s, "fst.s", 0, ST, IF_LA, 0x2b400000) INSTS(fst_d, "fst.d", 0, ST, IF_LA, 0x2bc00000) - // clang-format on /*****************************************************************************/ #undef INSTS diff --git a/src/coreclr/jit/jit.h b/src/coreclr/jit/jit.h index 987ea401c503a..96b9f15c279a0 100644 --- a/src/coreclr/jit/jit.h +++ b/src/coreclr/jit/jit.h @@ -309,7 +309,7 @@ // Arm64 Windows supports FEATURE_ARG_SPLIT, note this is different from // the official Arm64 ABI. // Case: splitting 16 byte struct between x7 and stack -#if defined(TARGET_ARM) || defined(TARGET_ARM64)/* || defined(TARGET_LOONGARCH64)*/ +#if defined(TARGET_ARM) || defined(TARGET_ARM64) /* || defined(TARGET_LOONGARCH64)*/ #define FEATURE_ARG_SPLIT 1 #else #define FEATURE_ARG_SPLIT 0 diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 2dff5054a8cc8..ce652f10e91fe 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -610,7 +610,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un } #elif defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI) // On System V type environment the float registers are not indexed together with the int ones. - varDscInfo->floatRegArgNum = varDscInfo->intRegArgNum; + varDscInfo->floatRegArgNum = varDscInfo->intRegArgNum; #endif // TARGET* CORINFO_ARG_LIST_HANDLE argLst = info.compMethodInfo->args.args; @@ -644,12 +644,13 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un CORINFO_CLASS_HANDLE typeHnd = nullptr; #if defined(TARGET_LOONGARCH64) - int flags = 0; - CorInfoTypeWithMod corInfoType = info.compCompHnd->getArgType2(&info.compMethodInfo->args, argLst, &typeHnd, &flags); + int flags = 0; + CorInfoTypeWithMod corInfoType = + info.compCompHnd->getArgType2(&info.compMethodInfo->args, argLst, &typeHnd, &flags); #else CorInfoTypeWithMod corInfoType = info.compCompHnd->getArgType(&info.compMethodInfo->args, argLst, &typeHnd); #endif - varDsc->lvIsParam = 1; + varDsc->lvIsParam = 1; lvaInitVarDsc(varDsc, varDscInfo->varNum, strip(corInfoType), typeHnd, argLst, &info.compMethodInfo->args); @@ -904,19 +905,19 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un assert(varDsc->lvExactSize <= argSize); float_num = 1; - arg1_Type = (varDsc->lvExactSize == 8) ? TYP_DOUBLE : TYP_FLOAT; + arg1_Type = (varDsc->lvExactSize == 8) ? TYP_DOUBLE : TYP_FLOAT; canPassArgInRegisters = varDscInfo->canEnreg(arg1_Type, 1); } else if (flags & 0x8) { - arg1_Type = (flags & 0x10) ? TYP_DOUBLE : TYP_FLOAT; - arg2_Type = (flags & 0x20) ? TYP_DOUBLE : TYP_FLOAT; - float_num = 2; + arg1_Type = (flags & 0x10) ? TYP_DOUBLE : TYP_FLOAT; + arg2_Type = (flags & 0x20) ? TYP_DOUBLE : TYP_FLOAT; + float_num = 2; canPassArgInRegisters = varDscInfo->canEnreg(TYP_DOUBLE, 2); } else if (flags & 2) { - float_num = 1; + float_num = 1; canPassArgInRegisters = varDscInfo->canEnreg(TYP_DOUBLE, 1); canPassArgInRegisters = canPassArgInRegisters && varDscInfo->canEnreg(TYP_I_IMPL, 1); @@ -925,7 +926,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un } else if (flags & 4) { - float_num = 1; + float_num = 1; canPassArgInRegisters = varDscInfo->canEnreg(TYP_DOUBLE, 1); canPassArgInRegisters = canPassArgInRegisters && varDscInfo->canEnreg(TYP_I_IMPL, 1); @@ -937,8 +938,8 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un { assert(float_num > 0); canPassArgInRegisters = varDscInfo->canEnreg(argType, cSlotsToEnregister); - arg1_Type = TYP_UNKNOWN; - arg2_Type = TYP_UNKNOWN; + arg1_Type = TYP_UNKNOWN; + arg2_Type = TYP_UNKNOWN; } } else @@ -949,12 +950,12 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un if (!canPassArgInRegisters && varTypeIsFloating(argType)) { canPassArgInRegisters = varDscInfo->canEnreg(TYP_I_IMPL, cSlotsToEnregister); - argType = canPassArgInRegisters ? TYP_I_IMPL : argType; + argType = canPassArgInRegisters ? TYP_I_IMPL : argType; } if (!canPassArgInRegisters && (cSlots > 1)) { canPassArgInRegisters = varDscInfo->canEnreg(TYP_I_IMPL, 1); - arg1_Type = canPassArgInRegisters ? TYP_I_IMPL : TYP_UNKNOWN; + arg1_Type = canPassArgInRegisters ? TYP_I_IMPL : TYP_UNKNOWN; } #endif } @@ -986,7 +987,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un } } else -#elif defined(TARGET_LOONGARCH64) +#elif defined(TARGET_LOONGARCH64) if (arg1_Type != TYP_UNKNOWN) { firstAllocatedRegArgNum = varDscInfo->allocRegArg(arg1_Type, 1); @@ -1051,13 +1052,13 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un { firstAllocatedRegArgNum = varDscInfo->allocRegArg(arg2_Type, 1); varDsc->SetOtherArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, arg2_Type)); - varDsc->lvIs4Field2 = (int)emitActualTypeSize(arg2_Type) == 4 ? 1 : 0; + varDsc->lvIs4Field2 = (int)emitActualTypeSize(arg2_Type) == 4 ? 1 : 0; varDscInfo->hasMultiSlotStruct = true; } else if (cSlots > 1) { varDsc->lvIsSplit = 1; - //varDsc->lvFldOffset = 0; + // varDsc->lvFldOffset = 0; varDsc->SetOtherArgReg(REG_STK); varDscInfo->hasMultiSlotStruct = true; varDscInfo->setAllRegArgUsed(arg1_Type); @@ -1515,9 +1516,9 @@ void Compiler::lvaInitVarDsc(LclVarDsc* varDsc, varDsc->lvIsImplicitByRef = 0; #elif defined(TARGET_LOONGARCH64) varDsc->lvIsImplicitByRef = 0; - varDsc->lvIs4Field1 = 0; - varDsc->lvIs4Field2 = 0; - varDsc->lvIsSplit = 0; + varDsc->lvIs4Field1 = 0; + varDsc->lvIs4Field2 = 0; + varDsc->lvIsSplit = 0; #endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) // Set the lvType (before this point it is TYP_UNDEF). @@ -1849,7 +1850,7 @@ bool Compiler::StructPromotionHelper::CanPromoteStructType(CORINFO_CLASS_HANDLE const int MaxOffset = MAX_NumOfFieldsInPromotableStruct * FP_REGSIZE_BYTES; #endif // defined(TARGET_XARCH) || defined(TARGET_ARM64) #else // !FEATURE_SIMD - const int MaxOffset = MAX_NumOfFieldsInPromotableStruct * sizeof(double); + const int MaxOffset = MAX_NumOfFieldsInPromotableStruct * sizeof(double); #endif // !FEATURE_SIMD assert((BYTE)MaxOffset == MaxOffset); // because lvaStructFieldInfo.fldOffset is byte-sized @@ -5418,7 +5419,7 @@ void Compiler::lvaFixVirtualFrameOffsets() JITDUMP("-- V%02u was %d, now %d\n", lclNum, varDsc->GetStackOffset(), varDsc->GetStackOffset() + delta); varDsc->SetStackOffset(varDsc->GetStackOffset() + delta); -#if defined(TARGET_LOONGARCH64) +#if defined(TARGET_LOONGARCH64) if (varDsc->GetStackOffset() >= delta) varDsc->SetStackOffset(varDsc->GetStackOffset() + (varDsc->lvIsSplit ? 8 : 0)); #endif @@ -5998,16 +5999,16 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, } #elif defined(TARGET_LOONGARCH64) - //if (compFeatureArgSplit() && this->info.compIsVarArgs) - //{//TODO: should confirm for "info.compIsVarArgs". - // if (varDsc->lvType == TYP_STRUCT && varDsc->lvOtherArgReg >= MAX_REG_ARG && varDsc->lvOtherArgReg != REG_NA) - // { - // // This is a split struct. It will account for an extra (8 bytes) - // // of alignment. - // varDsc->lvStkOffs += TARGET_POINTER_SIZE; - // argOffs += TARGET_POINTER_SIZE; - // } - //} +// if (compFeatureArgSplit() && this->info.compIsVarArgs) +//{//TODO: should confirm for "info.compIsVarArgs". +// if (varDsc->lvType == TYP_STRUCT && varDsc->lvOtherArgReg >= MAX_REG_ARG && varDsc->lvOtherArgReg != REG_NA) +// { +// // This is a split struct. It will account for an extra (8 bytes) +// // of alignment. +// varDsc->lvStkOffs += TARGET_POINTER_SIZE; +// argOffs += TARGET_POINTER_SIZE; +// } +//} #else // TARGET* #error Unsupported or unset target architecture #endif // TARGET* @@ -6815,9 +6816,10 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() #ifdef TARGET_LOONGARCH64 if (info.compIsVarArgs && varDsc->GetArgReg() != theFixedRetBuffArgNum()) - {//TODO: add VarArgs for LOONGARCH64. + { // TODO: add VarArgs for LOONGARCH64. // Stack offset to parameters should point to home area which will be preallocated. - varDsc->SetStackOffset(-initialStkOffs + genMapIntRegNumToRegArgNum(varDsc->GetArgReg()) * REGSIZE_BYTES); + varDsc->SetStackOffset(-initialStkOffs + + genMapIntRegNumToRegArgNum(varDsc->GetArgReg()) * REGSIZE_BYTES); continue; } #endif diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 86925234b877b..6a3fb1fb91165 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -2526,7 +2526,7 @@ GenTree* Lowering::OptimizeConstCompare(GenTree* cmp) { assert(cmp->gtGetOp2()->IsIntegralConst()); -#if defined(TARGET_XARCH) || defined(TARGET_ARM64)// || defined(TARGET_LOONGARCH64) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) // || defined(TARGET_LOONGARCH64) ////TODO: add optimize for LoongArch64. GenTree* op1 = cmp->gtGetOp1(); GenTreeIntCon* op2 = cmp->gtGetOp2()->AsIntCon(); diff --git a/src/coreclr/jit/lowerloongarch64.cpp b/src/coreclr/jit/lowerloongarch64.cpp index 3f5df4552c731..82809712de064 100644 --- a/src/coreclr/jit/lowerloongarch64.cpp +++ b/src/coreclr/jit/lowerloongarch64.cpp @@ -42,8 +42,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX bool Lowering::IsCallTargetInRange(void* addr) { ////TODO for LOONGARCH64: should amend for optimize! - //assert(!"unimplemented on LOONGARCH yet"); - //return comp->codeGen->validImmForBAL((ssize_t)addr); + // assert(!"unimplemented on LOONGARCH yet"); + // return comp->codeGen->validImmForBAL((ssize_t)addr); return false; } @@ -72,8 +72,8 @@ bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) const switch (parentNode->OperGet()) { case GT_ADD: - return comp->compOpportunisticallyDependsOn(InstructionSet_Atomics) ? false - : ((-2048 <= immVal) && (immVal <= 2047)); + return comp->compOpportunisticallyDependsOn(InstructionSet_Atomics) ? false : ((-2048 <= immVal) && + (immVal <= 2047)); break; case GT_CMPXCHG: case GT_LOCKADD: @@ -127,7 +127,7 @@ GenTree* Lowering::LowerMul(GenTreeOp* mul) { assert(mul->OperIsMul()); - //if (comp->opts.OptimizationEnabled() && mul->OperIs(GT_MUL) && mul->IsValidLongMul()) + // if (comp->opts.OptimizationEnabled() && mul->OperIs(GT_MUL) && mul->IsValidLongMul()) //{ // GenTreeCast* op1 = mul->gtGetOp1()->AsCast(); // GenTree* op2 = mul->gtGetOp2(); @@ -256,7 +256,7 @@ void Lowering::LowerStoreIndir(GenTreeStoreInd* node) // void Lowering::LowerBlockStore(GenTreeBlk* blkNode) { - GenTree* dstAddr = blkNode->Addr(); + GenTree* dstAddr = blkNode->Addr(); GenTree* src = blkNode->Data(); unsigned size = blkNode->Size(); @@ -286,7 +286,8 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) ssize_t fill = src->AsIntCon()->IconValue() & 0xFF; if (fill == 0) { - src->SetContained();; + src->SetContained(); + ; } else if (size >= REGSIZE_BYTES) { @@ -347,7 +348,7 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll; } -//////////////////////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////////////////////// else if (blkNode->OperIs(GT_STORE_BLK) && (size <= CPBLK_UNROLL_LIMIT)) { blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll; @@ -365,7 +366,6 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper; } - } } @@ -432,9 +432,6 @@ void Lowering::ContainBlockStoreAddress(GenTreeBlk* blkNode, unsigned size, GenT addr->SetContained(); } - - - void Lowering::LowerCast(GenTree* tree) { assert(tree->OperGet() == GT_CAST); @@ -491,8 +488,8 @@ void Lowering::LowerRotate(GenTree* tree) if (rotateLeftIndexNode->IsCnsIntOrI()) { - ssize_t rotateLeftIndex = rotateLeftIndexNode->AsIntCon()->gtIconVal; - ssize_t rotateRightIndex = rotatedValueBitSize - rotateLeftIndex; + ssize_t rotateLeftIndex = rotateLeftIndexNode->AsIntCon()->gtIconVal; + ssize_t rotateRightIndex = rotatedValueBitSize - rotateLeftIndex; rotateLeftIndexNode->AsIntCon()->gtIconVal = rotateRightIndex; } else @@ -515,7 +512,7 @@ void Lowering::LowerRotate(GenTree* tree) // void Lowering::LowerSIMD(GenTreeSIMD* simdNode) { -assert(!"unimplemented on LOONGARCH yet"); + assert(!"unimplemented on LOONGARCH yet"); #if 0 assert(simdNode->gtType != TYP_SIMD32); @@ -540,7 +537,7 @@ assert(!"unimplemented on LOONGARCH yet"); // void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) { -assert(!"unimplemented on LOONGARCH yet"); + assert(!"unimplemented on LOONGARCH yet"); #if 0 auto intrinsicID = node->gtHWIntrinsicId; auto intrinsicInfo = HWIntrinsicInfo::lookup(node->gtHWIntrinsicId); @@ -1375,7 +1372,7 @@ void Lowering::ContainCheckIndir(GenTreeIndir* indirNode) } #endif // FEATURE_SIMD - GenTree* addr = indirNode->Addr(); + GenTree* addr = indirNode->Addr(); if ((addr->OperGet() == GT_LEA) && IsSafeToContainMem(indirNode, addr)) { MakeSrcContained(indirNode, addr); @@ -1393,7 +1390,6 @@ void Lowering::ContainCheckIndir(GenTreeIndir* indirNode) // make this contained, it turns into a constant that goes into an addr mode MakeSrcContained(indirNode, addr); } - } //------------------------------------------------------------------------ @@ -1473,9 +1469,6 @@ void Lowering::ContainCheckStoreLoc(GenTreeLclVarCommon* storeLoc) const const LclVarDsc* varDsc = comp->lvaGetDesc(storeLoc); - - - #ifdef FEATURE_SIMD if (varTypeIsSIMD(storeLoc)) { @@ -1552,7 +1545,7 @@ void Lowering::ContainCheckBoundsChk(GenTreeBoundsChk* node) // void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode) { -assert(!"unimplemented on LOONGARCH yet"); + assert(!"unimplemented on LOONGARCH yet"); #if 0 switch (simdNode->gtSIMDIntrinsicID) { @@ -1619,7 +1612,7 @@ assert(!"unimplemented on LOONGARCH yet"); // void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) { -assert(!"unimplemented on LOONGARCH yet"); + assert(!"unimplemented on LOONGARCH yet"); #if 0 GenTreeArgList* argList = nullptr; GenTree* op1 = node->gtOp.gtOp1; diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index b43d29ed7cfc3..c9556de7ed488 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -705,7 +705,7 @@ LinearScan::LinearScan(Compiler* theCompiler) #elif TARGET_LOONGARCH64 availableIntRegs = (RBM_ALLINT & ~(RBM_FP | RBM_RA) & ~compiler->codeGen->regSet.rsMaskResvd); #else - availableIntRegs = (RBM_ALLINT & ~compiler->codeGen->regSet.rsMaskResvd); + availableIntRegs = (RBM_ALLINT & ~compiler->codeGen->regSet.rsMaskResvd); #endif #if ETW_EBP_FRAMED @@ -1568,12 +1568,13 @@ bool LinearScan::isRegCandidate(LclVarDsc* varDsc) #endif // FEATURE_SIMD case TYP_STRUCT: - // TODO-1stClassStructs: support vars with GC pointers. The issue is that such - // vars will have `lvMustInit` set, because emitter has poor support for struct liveness, - // but if the variable is tracked the prolog generator would expect it to be in liveIn set, - // so an assert in `genFnProlog` will fire. +// TODO-1stClassStructs: support vars with GC pointers. The issue is that such +// vars will have `lvMustInit` set, because emitter has poor support for struct liveness, +// but if the variable is tracked the prolog generator would expect it to be in liveIn set, +// so an assert in `genFnProlog` will fire. #ifdef TARGET_LOONGARCH64 - return !genIsValidFloatReg(varDsc->GetOtherArgReg()) && compiler->compEnregStructLocals() && !varDsc->HasGCPtr(); + return !genIsValidFloatReg(varDsc->GetOtherArgReg()) && compiler->compEnregStructLocals() && + !varDsc->HasGCPtr(); #else return compiler->compEnregStructLocals() && !varDsc->HasGCPtr(); #endif diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 5e803336e2b4d..202760e1c3bcf 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -2213,15 +2213,15 @@ class RefPosition // no reg is allocated. unsigned char regOptional : 1; - // Used by RefTypeDef/Use positions of a multi-reg call node. - // Indicates the position of the register that this ref position refers to. - // The max bits needed is based on max value of MAX_RET_REG_COUNT value - // across all targets and that happens 4 on on Arm. Hence index value - // would be 0..MAX_RET_REG_COUNT-1. +// Used by RefTypeDef/Use positions of a multi-reg call node. +// Indicates the position of the register that this ref position refers to. +// The max bits needed is based on max value of MAX_RET_REG_COUNT value +// across all targets and that happens 4 on on Arm. Hence index value +// would be 0..MAX_RET_REG_COUNT-1. #ifdef TARGET_LOONGARCH64 - //TODO for LOONGARCH64: should confirm for ArgSplit? + // TODO for LOONGARCH64: should confirm for ArgSplit? unsigned char multiRegIdx : 3; -#else // !TARGET_LOONGARCH64 +#else // !TARGET_LOONGARCH64 unsigned char multiRegIdx : 2; #endif // !TARGET_LOONGARCH64 diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp index ba2f6f3536f24..6d056a46d737d 100644 --- a/src/coreclr/jit/lsraloongarch64.cpp +++ b/src/coreclr/jit/lsraloongarch64.cpp @@ -298,8 +298,8 @@ int LinearScan::BuildNode(GenTree* tree) BuildDefsWithKills(tree, 0, RBM_NONE, killMask); break; - //case GT_MOD: - //case GT_UMOD: + // case GT_MOD: + // case GT_UMOD: // NYI_IF(varTypeIsFloating(tree->TypeGet()), "FP Remainder in LOONGARCH64"); // assert(!"Shouldn't see an integer typed GT_MOD node in LOONGARCH64"); // srcCount = 0; @@ -790,7 +790,7 @@ int LinearScan::BuildNode(GenTree* tree) // int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) { -assert(!"unimplemented on LOONGARCH yet"); + assert(!"unimplemented on LOONGARCH yet"); #if 0 int srcCount = 0; // Only SIMDIntrinsicInit can be contained @@ -997,7 +997,7 @@ assert(!"unimplemented on LOONGARCH yet"); // int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) { -assert(!"unimplemented on LOONGARCH yet"); + assert(!"unimplemented on LOONGARCH yet"); #if 0 NamedIntrinsic intrinsicID = intrinsicTree->gtHWIntrinsicId; int numArgs = HWIntrinsicInfo::lookupNumArgs(intrinsicTree); @@ -1180,9 +1180,9 @@ int LinearScan::BuildIndir(GenTreeIndir* indirTree) // int LinearScan::BuildCall(GenTreeCall* call) { - bool hasMultiRegRetVal = false; - const ReturnTypeDesc* retTypeDesc = nullptr; - regMaskTP dstCandidates = RBM_NONE; + bool hasMultiRegRetVal = false; + const ReturnTypeDesc* retTypeDesc = nullptr; + regMaskTP dstCandidates = RBM_NONE; int srcCount = 0; int dstCount = 0; @@ -1234,7 +1234,7 @@ int LinearScan::BuildCall(GenTreeCall* call) RegisterType registerType = call->TypeGet(); -// Set destination candidates for return value of the call. + // Set destination candidates for return value of the call. if (hasMultiRegRetVal) { @@ -1474,8 +1474,8 @@ int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode) // Registers for split argument corresponds to source int dstCount = argNode->gtNumRegs; - regNumber argReg = argNode->GetRegNum(); - regMaskTP argMask = RBM_NONE; + regNumber argReg = argNode->GetRegNum(); + regMaskTP argMask = RBM_NONE; regMaskTP argMaskArr[MAX_REG_ARG] = {RBM_NONE}; for (unsigned i = 0; i < dstCount; i++) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 872790417791a..ba813f2779050 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -214,7 +214,8 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) { if (!tree->gtOverflow()) { -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)// On ARM64 All non-overflow checking conversions can be optimized +#if defined(TARGET_ARM64) || \ + defined(TARGET_LOONGARCH64) // On ARM64 All non-overflow checking conversions can be optimized return nullptr; #else switch (dstType) @@ -916,7 +917,8 @@ fgArgTabEntry* fgArgInfo::AddRegArg(unsigned argNum, bool isVararg, const regNumber otherRegNum) { - fgArgTabEntry* curArgTabEntry = AddRegArg(argNum, node, use, regNum, numRegs, byteSize, byteAlignment, isStruct, false, isVararg); + fgArgTabEntry* curArgTabEntry = + AddRegArg(argNum, node, use, regNum, numRegs, byteSize, byteAlignment, isStruct, false, isVararg); assert(curArgTabEntry != nullptr); curArgTabEntry->isStruct = isStruct; // is this a struct arg @@ -926,7 +928,7 @@ fgArgTabEntry* fgArgInfo::AddRegArg(unsigned argNum, if (numRegs == 2) { curArgTabEntry->setRegNum(1, otherRegNum); - //curArgTabEntry->isSplit = true; + // curArgTabEntry->isSplit = true; } return curArgTabEntry; @@ -2888,7 +2890,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) #elif defined(TARGET_X86) - passUsingFloatRegs = false; + passUsingFloatRegs = false; #elif defined(TARGET_LOONGARCH64) assert(!callIsVararg); @@ -3015,7 +3017,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) DWORD numFloatFields = 0; if (!isStructArg) { - size = 1; + size = 1; byteSize = genTypeSize(argx); } else @@ -3059,9 +3061,9 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) else if (numFloatFields & 8) size = 2; } - else //if (passStructByRef) + else // if (passStructByRef) { - size = 1; + size = 1; byteSize = TARGET_POINTER_SIZE; } #else @@ -3225,7 +3227,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) if (!passUsingFloatRegs) { - size = structSize > 8 ? 2 : 1; + size = structSize > 8 ? 2 : 1; numFloatFields = 0; } else if (passUsingFloatRegs) @@ -3235,20 +3237,20 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) else if (numFloatFields & 0x4) { assert(size == 1); - size = 2; + size = 2; passUsingFloatRegs = false; - nextOtherRegNum = genMapFloatRegArgNumToRegNum(nextFltArgRegNum); + nextOtherRegNum = genMapFloatRegArgNumToRegNum(nextFltArgRegNum); } - else if (/*(size == 1) && */(numFloatFields & 0x2)) + else if (/*(size == 1) && */ (numFloatFields & 0x2)) { assert((size == 1) && (numFloatFields & 0x2)); - size = 2; + size = 2; nextOtherRegNum = genMapIntRegArgNumToRegNum(intArgRegNum); } } } - assert(!isHfaArg);//LOONGARCH not support HFA. + assert(!isHfaArg); // LOONGARCH not support HFA. } // if run out the fp argument register, try the int argument register. @@ -3268,9 +3270,9 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) // We also must update intArgRegNum so that we no longer try to // allocate any new general purpose registers for args // - isRegArg = intArgRegNum < maxRegArgs;//the split-struct case. + isRegArg = intArgRegNum < maxRegArgs; // the split-struct case. nextOtherRegNum = REG_STK; - //assert((intArgRegNum + 1) == maxRegArgs); + // assert((intArgRegNum + 1) == maxRegArgs); } } #else // not TARGET_ARM or TARGET_ARM64 or TARGET_LOONGARCH64 @@ -3425,10 +3427,11 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) // This is a register argument - put it in the table newArgEntry = call->fgArgInfo->AddRegArg(argIndex, argx, args, nextRegNum, size, byteSize, argAlignBytes, isStructArg, - isFloatHfa, callIsVararg UNIX_LOONGARCH64_ONLY_ARG(nextOtherRegNum) UNIX_AMD64_ABI_ONLY_ARG(nextOtherRegNum) - UNIX_AMD64_ABI_ONLY_ARG(structIntRegs) - UNIX_AMD64_ABI_ONLY_ARG(structFloatRegs) - UNIX_AMD64_ABI_ONLY_ARG(&structDesc)); + isFloatHfa, callIsVararg UNIX_LOONGARCH64_ONLY_ARG(nextOtherRegNum) + UNIX_AMD64_ABI_ONLY_ARG(nextOtherRegNum) + UNIX_AMD64_ABI_ONLY_ARG(structIntRegs) + UNIX_AMD64_ABI_ONLY_ARG(structFloatRegs) + UNIX_AMD64_ABI_ONLY_ARG(&structDesc)); newArgEntry->SetIsBackFilled(isBackFilled); // Set up the next intArgRegNum and fltArgRegNum values. @@ -3448,7 +3451,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) #endif // FEATURE_ARG_SPLIT assert(!passUsingFloatRegs); assert(size == 2); - //assert(nextOtherRegNum == REG_STK); + // assert(nextOtherRegNum == REG_STK); intArgRegNum = maxRegArgs; } else if ((numFloatFields & 0xf) == 0x0) @@ -3465,7 +3468,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) } else if (numFloatFields & 0x6) { - //assert((numFloatFields & 0x2) || (numFloatFields & 0x4)); + // assert((numFloatFields & 0x2) || (numFloatFields & 0x4)); fltArgRegNum += 1; intArgRegNum += 1; } @@ -3558,9 +3561,10 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) { newArgEntry->passedByRef = passStructByRef; #if defined(TARGET_LOONGARCH64) - newArgEntry->argType = (numFloatFields & 0xe) || (structBaseType == TYP_UNKNOWN) ? argx->TypeGet() : structBaseType; + newArgEntry->argType = + (numFloatFields & 0xe) || (structBaseType == TYP_UNKNOWN) ? argx->TypeGet() : structBaseType; #else - newArgEntry->argType = (structBaseType == TYP_UNKNOWN) ? argx->TypeGet() : structBaseType; + newArgEntry->argType = (structBaseType == TYP_UNKNOWN) ? argx->TypeGet() : structBaseType; #endif } else @@ -4474,8 +4478,8 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry } #if FEATURE_MULTIREG_ARGS - // Examine 'arg' and setup argValue objClass and structSize - // +// Examine 'arg' and setup argValue objClass and structSize +// #if defined(TARGET_LOONGARCH64) const CORINFO_CLASS_HANDLE objClass = gtGetStructHandleIfPresent(arg); if (objClass == NO_CLASS_HANDLE) @@ -4490,24 +4494,24 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry LclVarDsc* varDsc = &lvaTable[varNum]; assert(varDsc->lvExactSize == 8); - unsigned offset = arg->AsLclVarCommon()->GetLclOffs(); - GenTreeFieldList* newArg = nullptr; - var_types tmp_type = fgEntryPtr->isPassedInFloatRegisters() ? TYP_FLOAT : TYP_INT; - arg->gtType = tmp_type; + unsigned offset = arg->AsLclVarCommon()->GetLclOffs(); + GenTreeFieldList* newArg = nullptr; + var_types tmp_type = fgEntryPtr->isPassedInFloatRegisters() ? TYP_FLOAT : TYP_INT; + arg->gtType = tmp_type; newArg = new (this, GT_FIELD_LIST) GenTreeFieldList(); newArg->AddField(this, arg, offset, tmp_type); - tmp_type = isValidFloatArgReg(fgEntryPtr->GetOtherRegNum()) ? TYP_FLOAT : TYP_INT; + tmp_type = isValidFloatArgReg(fgEntryPtr->GetOtherRegNum()) ? TYP_FLOAT : TYP_INT; GenTree* nextLclFld = gtNewLclFldNode(varNum, tmp_type, offset + 4); newArg->AddField(this, nextLclFld, offset + 4, tmp_type); return newArg; } #else - const CORINFO_CLASS_HANDLE objClass = gtGetStructHandle(arg); + const CORINFO_CLASS_HANDLE objClass = gtGetStructHandle(arg); #endif - GenTree* argValue = arg; // normally argValue will be arg, but see right below - unsigned structSize = 0; + GenTree* argValue = arg; // normally argValue will be arg, but see right below + unsigned structSize = 0; if (arg->TypeGet() != TYP_STRUCT) { @@ -4929,12 +4933,12 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry else if (numFloatFields & 0x2) { tmp_type_1 = numFloatFields & 0x10 ? TYP_DOUBLE : TYP_FLOAT; - //tmp_type_2 = numFloatFields & 0x20 ? TYP_LONG: TYP_INT;type[0] + // tmp_type_2 = numFloatFields & 0x20 ? TYP_LONG: TYP_INT;type[0] tmp_type_2 = numFloatFields & 0x20 ? type[1] : TYP_INT; } else if (numFloatFields & 0x4) { - //tmp_type_1 = numFloatFields & 0x10 ? TYP_LONG: TYP_INT; + // tmp_type_1 = numFloatFields & 0x10 ? TYP_LONG: TYP_INT; tmp_type_1 = numFloatFields & 0x10 ? type[0] : TYP_INT; tmp_type_2 = numFloatFields & 0x20 ? TYP_DOUBLE : TYP_FLOAT; } @@ -4943,7 +4947,7 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry assert(!"----------------unimplemented type-case... on LOONGARCH"); unreached(); } - elemSize = numFloatFields & 0x30 ? 8 : 4;; + elemSize = numFloatFields & 0x30 ? 8 : 4; GenTree* nextLclFld = gtNewLclFldNode(varNum, tmp_type_1, offset); newArg->AddField(this, nextLclFld, offset, tmp_type_1); @@ -4959,7 +4963,7 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry if (elemCount > 1) { assert(elemCount == 2); - elemSize = genTypeSize(type[1]); + elemSize = genTypeSize(type[1]); nextLclFld = gtNewLclFldNode(varNum, type[1], offset + elemSize); newArg->AddField(this, nextLclFld, offset + elemSize, type[1]); } @@ -5018,12 +5022,12 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry else if (numFloatFields & 0x2) { tmp_type_1 = numFloatFields & 0x10 ? TYP_DOUBLE : TYP_FLOAT; - //tmp_type_2 = numFloatFields & 0x20 ? TYP_LONG: TYP_INT; + // tmp_type_2 = numFloatFields & 0x20 ? TYP_LONG: TYP_INT; tmp_type_2 = numFloatFields & 0x20 ? type[1] : TYP_INT; } else if (numFloatFields & 0x4) { - //tmp_type_1 = numFloatFields & 0x10 ? TYP_LONG: TYP_INT; + // tmp_type_1 = numFloatFields & 0x10 ? TYP_LONG: TYP_INT; tmp_type_1 = numFloatFields & 0x10 ? type[0] : TYP_INT; tmp_type_2 = numFloatFields & 0x20 ? TYP_DOUBLE : TYP_FLOAT; } @@ -5032,7 +5036,7 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry assert(!"----------------unimplemented type-case... on LOONGARCH"); unreached(); } - elemSize = numFloatFields & 0x30 ? 8 : 4;; + elemSize = numFloatFields & 0x30 ? 8 : 4; GenTree* curItem = gtNewIndir(tmp_type_1, baseAddr); // For safety all GT_IND should have at least GT_GLOB_REF set. @@ -5041,7 +5045,7 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry newArg = new (this, GT_FIELD_LIST) GenTreeFieldList(); newArg->AddField(this, curItem, 0, tmp_type_1); - //GenTree* curAddr = baseAddr; + // GenTree* curAddr = baseAddr; baseAddr = gtCloneExpr(baseAddr); noway_assert(baseAddr != nullptr); baseAddr = gtNewOperNode(GT_ADD, addrType, baseAddr, gtNewIconNode(elemSize, TYP_I_IMPL)); @@ -5054,26 +5058,28 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry } else #endif - for (unsigned inx = 0; inx < elemCount; inx++) { - GenTree* curAddr = baseAddr; - if (offset != 0) - { - GenTree* baseAddrDup = gtCloneExpr(baseAddr); - noway_assert(baseAddrDup != nullptr); - curAddr = gtNewOperNode(GT_ADD, addrType, baseAddrDup, gtNewIconNode(offset, TYP_I_IMPL)); - } - else + for (unsigned inx = 0; inx < elemCount; inx++) { - curAddr = baseAddr; - } - GenTree* curItem = gtNewIndir(type[inx], curAddr); + GenTree* curAddr = baseAddr; + if (offset != 0) + { + GenTree* baseAddrDup = gtCloneExpr(baseAddr); + noway_assert(baseAddrDup != nullptr); + curAddr = gtNewOperNode(GT_ADD, addrType, baseAddrDup, gtNewIconNode(offset, TYP_I_IMPL)); + } + else + { + curAddr = baseAddr; + } + GenTree* curItem = gtNewIndir(type[inx], curAddr); - // For safety all GT_IND should have at least GT_GLOB_REF set. - curItem->gtFlags |= GTF_GLOB_REF; + // For safety all GT_IND should have at least GT_GLOB_REF set. + curItem->gtFlags |= GTF_GLOB_REF; - newArg->AddField(this, curItem, offset, type[inx]); - offset += genTypeSize(type[inx]); + newArg->AddField(this, curItem, offset, type[inx]); + offset += genTypeSize(type[inx]); + } } } } @@ -5777,7 +5783,7 @@ GenTree* Compiler::fgMorphArrayIndex(GenTree* tree) arrLen = gtNewCastNode(bndsChkType, arrLen, true, bndsChkType); } #else - GenTree* arrLen = gtNewArrLen(TYP_INT, arrRef, (int)lenOffs, compCurBB); + GenTree* arrLen = gtNewArrLen(TYP_INT, arrRef, (int)lenOffs, compCurBB); #endif GenTreeBoundsChk* arrBndsChk = new (this, GT_BOUNDS_CHECK) GenTreeBoundsChk(index, arrLen, SCK_RNGCHK_FAIL); @@ -18031,9 +18037,10 @@ GenTree* Compiler::fgMorphImplicitByRefArgs(GenTree* tree, bool isAddr) // void Compiler::fgAddFieldSeqForZeroOffset(GenTree* addr, FieldSeqNode* fieldSeqZero) { - // We expect 'addr' to be an address at this point. +// We expect 'addr' to be an address at this point. #ifdef TARGET_LOONGARCH64 - assert(addr->TypeGet() == TYP_BYREF || addr->TypeGet() == TYP_I_IMPL || addr->TypeGet() == TYP_INT || addr->TypeGet() == TYP_REF); + assert(addr->TypeGet() == TYP_BYREF || addr->TypeGet() == TYP_I_IMPL || addr->TypeGet() == TYP_INT || + addr->TypeGet() == TYP_REF); #else assert(addr->TypeGet() == TYP_BYREF || addr->TypeGet() == TYP_I_IMPL || addr->TypeGet() == TYP_REF); #endif diff --git a/src/coreclr/jit/register_arg_convention.cpp b/src/coreclr/jit/register_arg_convention.cpp index 16c3f051f74bf..755dd28915684 100644 --- a/src/coreclr/jit/register_arg_convention.cpp +++ b/src/coreclr/jit/register_arg_convention.cpp @@ -44,11 +44,11 @@ unsigned InitVarDscInfo::allocRegArg(var_types type, unsigned numRegs /* = 1 */) nextReg(TYP_FLOAT, numRegs); #elif defined(TARGET_LOONGARCH64) // LA-ABI64. - if (numRegs > MAX_PASS_MULTIREG_BYTES/TARGET_POINTER_SIZE) + if (numRegs > MAX_PASS_MULTIREG_BYTES / TARGET_POINTER_SIZE) { assert(varTypeIsStruct(type)); - nextReg(TYP_INT, 1);//TYP_BYREF - }//TODO:struct-float. + nextReg(TYP_INT, 1); // TYP_BYREF + } // TODO:struct-float. else { nextReg(type, numRegs); diff --git a/src/coreclr/jit/regset.h b/src/coreclr/jit/regset.h index a200b5686a7df..f2bc7875152b6 100644 --- a/src/coreclr/jit/regset.h +++ b/src/coreclr/jit/regset.h @@ -127,7 +127,7 @@ class RegSet regMaskTP rsMaskCalleeSaved; // mask of the registers pushed/popped in the prolog/epilog #elif defined(TARGET_LOONGARCH64) regMaskTP rsMaskCalleeSaved; // mask of the registers pushed/popped in the prolog/epilog -#endif // TARGET_ARM +#endif // TARGET_ARM public: // TODO-Cleanup: Should be private, but Compiler uses it regMaskTP rsMaskResvd; // mask of the registers that are reserved for special purposes (typically empty) diff --git a/src/coreclr/jit/scopeinfo.cpp b/src/coreclr/jit/scopeinfo.cpp index 03ddbdfd7585c..2e315d2faa84b 100644 --- a/src/coreclr/jit/scopeinfo.cpp +++ b/src/coreclr/jit/scopeinfo.cpp @@ -1609,7 +1609,7 @@ void CodeGen::psiBegProlog() noway_assert(EA_SIZE_IN_BYTES(lclVarDsc->lvSize()) <= 16); if (emitter::isFloatReg(lclVarDsc->GetArgReg())) { - //regType = lclVarDsc->lvIs4Field1 ? TYP_FLOAT : TYP_DOUBLE; + // regType = lclVarDsc->lvIs4Field1 ? TYP_FLOAT : TYP_DOUBLE; regType = TYP_DOUBLE; } else diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index e78b74616ce41..97d9a69328483 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -34,7 +34,8 @@ inline bool compMacOsArm64Abi() } inline bool compFeatureArgSplit() { - return TargetArchitecture::IsLoongArch64 || TargetArchitecture::IsArm32 || (TargetOS::IsWindows && TargetArchitecture::IsArm64); + return TargetArchitecture::IsLoongArch64 || TargetArchitecture::IsArm32 || + (TargetOS::IsWindows && TargetArchitecture::IsArm64); } inline bool compUnixX86Abi() { diff --git a/src/coreclr/jit/targetloongarch64.cpp b/src/coreclr/jit/targetloongarch64.cpp index 92f076eba3388..08c2ed857231a 100644 --- a/src/coreclr/jit/targetloongarch64.cpp +++ b/src/coreclr/jit/targetloongarch64.cpp @@ -15,8 +15,8 @@ #include "target.h" -const char* Target::g_tgtCPUName = "loongarch64"; -const Target::ArgOrder Target::g_tgtArgOrder = ARG_ORDER_R2L; +const char* Target::g_tgtCPUName = "loongarch64"; +const Target::ArgOrder Target::g_tgtArgOrder = ARG_ORDER_R2L; const Target::ArgOrder Target::g_tgtUnmanagedArgOrder = ARG_ORDER_R2L; // clang-format off diff --git a/src/coreclr/jit/unwind.cpp b/src/coreclr/jit/unwind.cpp index ffb7cf71d886b..6ad60a064f35c 100644 --- a/src/coreclr/jit/unwind.cpp +++ b/src/coreclr/jit/unwind.cpp @@ -412,7 +412,8 @@ UNATIVE_OFFSET Compiler::unwindGetCurrentOffset(FuncInfoDsc* func) else { if (TargetArchitecture::IsX64 || - (TargetOS::IsUnix && (TargetArchitecture::IsArmArch || TargetArchitecture::IsX86 || TargetArchitecture::IsLoongArch64))) + (TargetOS::IsUnix && + (TargetArchitecture::IsArmArch || TargetArchitecture::IsX86 || TargetArchitecture::IsLoongArch64))) { assert(func->startLoc != nullptr); offset = func->startLoc->GetFuncletPrologOffset(GetEmitter()); diff --git a/src/coreclr/jit/unwindloongarch64.cpp b/src/coreclr/jit/unwindloongarch64.cpp index eae92c102e381..00ffa5482185d 100644 --- a/src/coreclr/jit/unwindloongarch64.cpp +++ b/src/coreclr/jit/unwindloongarch64.cpp @@ -360,7 +360,7 @@ void Compiler::unwindSaveReg(regNumber reg, int offset) } #endif // TARGET_UNIX int z = offset / 8; - //assert(0 <= z && z <= 0xFF); + // assert(0 <= z && z <= 0xFF); UnwindInfo* pu = &funCurrentFunc()->uwi; @@ -368,8 +368,8 @@ void Compiler::unwindSaveReg(regNumber reg, int offset) { // save_reg: 11010000 | 000xxxxx | zzzzzzzz: save reg r(1 + #X) at [sp + #Z * 8], offset <= 2047 - assert(reg == REG_RA || reg == REG_FP || // first legal register: RA - (REG_S0 <= reg && reg <= REG_S8)); // last legal register: S8 + assert(reg == REG_RA || reg == REG_FP || // first legal register: RA + (REG_S0 <= reg && reg <= REG_S8)); // last legal register: S8 BYTE x = (BYTE)(reg - REG_RA); assert(0 <= x && x <= 0x1E); @@ -381,7 +381,7 @@ void Compiler::unwindSaveReg(regNumber reg, int offset) // save_freg: 11011100 | 0xxxzzzz | zzzzzzzz : save reg f(24 + #X) at [sp + #Z * 8], offset <= 2047 assert(REG_F24 <= reg && // first legal register: F24 - reg <= REG_F31); // last legal register: F31 + reg <= REG_F31); // last legal register: F31 BYTE x = (BYTE)(reg - REG_F24); assert(0 <= x && x <= 0x7); @@ -392,7 +392,7 @@ void Compiler::unwindSaveReg(regNumber reg, int offset) void Compiler::unwindSaveRegPair(regNumber reg1, regNumber reg2, int offset) { - //TODO:temp not used on loongarch64. + // TODO:temp not used on loongarch64. assert(!"unimplemented on LOONGARCH yet"); #if 0 UnwindInfo* pu = &funCurrentFunc()->uwi; @@ -473,22 +473,22 @@ unsigned GetUnwindSizeFromUnwindHeader(BYTE b1) { static BYTE s_UnwindSize[256] = { // array of unwind sizes, in bytes (as specified in the LOONGARCH unwind specification) - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 00-0F - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 10-1F - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 20-2F - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 30-3F - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 40-4F - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 50-5F - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 60-6F - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 70-7F - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 80-8F - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 90-9F - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A0-AF - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B0-BF - 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 3, 2, 2, 2, // C0-CF - 3, 2, 2, 2, 2, 2, 3, 2, 3, 2, 3, 2, 3, 2, 2, 1, // D0-DF - 4, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E0-EF - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F0-FF + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 00-0F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 10-1F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 20-2F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 30-3F + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 40-4F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 50-5F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 60-6F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 70-7F + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 80-8F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 90-9F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A0-AF + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B0-BF + 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 3, 2, 2, 2, // C0-CF + 3, 2, 2, 2, 2, 2, 3, 2, 3, 2, 3, 2, 3, 2, 2, 1, // D0-DF + 4, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E0-EF + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F0-FF }; unsigned size = s_UnwindSize[b1]; @@ -891,8 +891,8 @@ void DumpUnwindInfo(Compiler* comp, x = ((DWORD)(b2 & 0x1F) << 8) | (DWORD)b3; - printf(" %02X %02X %02X add_fp %u (0x%02X); addi.d %s, sp, #%u\n", b1, b2, b3, x, x, getRegName(REG_FP), - x * 8); + printf(" %02X %02X %02X add_fp %u (0x%02X); addi.d %s, sp, #%u\n", b1, b2, b3, x, x, + getRegName(REG_FP), x * 8); } else if (b1 == 0xE3) { @@ -1227,12 +1227,12 @@ int UnwindPrologCodes::Match(UnwindEpilogInfo* pEpi) return -1; } - int matchIndex = 0;//Size() - pEpi->Size(); + int matchIndex = 0; // Size() - pEpi->Size(); BYTE* pProlog = GetCodes(); BYTE* pEpilog = pEpi->GetCodes(); - //First check set_fp. + // First check set_fp. if (0 < pEpi->Size()) { if (*pProlog == 0xE1) @@ -1783,7 +1783,7 @@ void UnwindFragmentInfo::Finalize(UNATIVE_OFFSET functionLength) } #endif -// Compute the header + // Compute the header noway_assert((functionLength & 3) == 0); DWORD headerFunctionLength = functionLength / 4; @@ -1791,7 +1791,7 @@ void UnwindFragmentInfo::Finalize(UNATIVE_OFFSET functionLength) DWORD headerVers = 0; // Version of the unwind info is zero. No other version number is currently defined. DWORD headerXBit = 0; // We never generate "exception data", but the VM might add some. DWORD headerEBit; - DWORD headerEpilogCount; // This depends on how we set headerEBit. + DWORD headerEpilogCount; // This depends on how we set headerEBit. DWORD headerCodeWords; DWORD headerExtendedEpilogCount = 0; // This depends on how we set headerEBit. DWORD headerExtendedCodeWords = 0; @@ -2107,7 +2107,8 @@ void UnwindInfo::HotColdSplitCodes(UnwindInfo* puwi) // expand!) during issuing (although this is extremely rare in any case, and may not // actually occur on LOONGARCH), so we don't finalize actual sizes or offsets. // -// LOONGARCH64 has very similar limitations, except functions can be up to 1MB. TODO-LOONGARCH64-Bug?: make sure this works! +// LOONGARCH64 has very similar limitations, except functions can be up to 1MB. TODO-LOONGARCH64-Bug?: make sure this +// works! // // We don't split any prolog or epilog. Ideally, we might not split an instruction, // although that doesn't matter because the unwind at any point would still be @@ -2200,7 +2201,7 @@ void UnwindInfo::Split() // Call the emitter to do the split, and call us back for every split point it chooses. uwiComp->GetEmitter()->emitSplit(uwiFragmentLast->ufiEmitLoc, uwiEndLoc, maxFragmentSize, (void*)this, - EmitSplitCallback); + EmitSplitCallback); #ifdef DEBUG // Did the emitter split the function/funclet into as many fragments as we asked for? diff --git a/src/coreclr/jit/utils.cpp b/src/coreclr/jit/utils.cpp index dfad0b8d9e4dd..078de7524a3a4 100644 --- a/src/coreclr/jit/utils.cpp +++ b/src/coreclr/jit/utils.cpp @@ -337,10 +337,10 @@ void dspRegMask(regMaskTP regMask, size_t minSiz) || (regNum == REG_R28)) // last register before FP #elif defined(TARGET_LOONGARCH64) else if ((regNum == REG_INT_LAST) || (regNum == REG_X0)) -#else // TARGET_LOONGARCH64 +#else // TARGET_LOONGARCH64 // We've already printed a register. Is this the end of a range? else if (regNum == REG_INT_LAST) -#endif // TARGET_LOONGARCH64 +#endif // TARGET_LOONGARCH64 { const char* nam = getRegName(regNum); printf("%s%s", sep, nam); diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index a267d32fbc44e..bd03c4cbaad02 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -58,7 +58,7 @@ struct FloatTraits #elif defined(TARGET_ARMARCH) unsigned bits = 0x7FC00000u; #elif defined(TARGET_LOONGARCH64) - unsigned bits = 0xFFC00000u; + unsigned bits = 0xFFC00000u; #else #error Unsupported or unset target architecture #endif From 1e92895507798284668600bb3f0dbf2ce4a574e1 Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Tue, 11 Jan 2022 19:22:36 +0800 Subject: [PATCH 10/46] [LoongArch64] update by `git apply format.patch`. --- src/coreclr/jit/codegenloongarch64.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index be858336db73d..e3400b54667a7 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -1067,8 +1067,7 @@ void CodeGen::genFuncletProlog(BasicBlock* block) assert(genFuncletInfo.fiSP_to_FPRA_save_delta < 2040); genStackPointerAdjustment(frameSize, REG_R21, nullptr, /* reportUnwindData */ true); - GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, - genFuncletInfo.fiSP_to_FPRA_save_delta); + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, genFuncletInfo.fiSP_to_FPRA_save_delta); compiler->unwindSaveReg(REG_FP, genFuncletInfo.fiSP_to_FPRA_save_delta); GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_RA, REG_SPBASE, @@ -1231,8 +1230,7 @@ void CodeGen::genFuncletEpilog() genFuncletInfo.fiSP_to_FPRA_save_delta + 8); compiler->unwindSaveReg(REG_RA, genFuncletInfo.fiSP_to_FPRA_save_delta + 8); - GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, - genFuncletInfo.fiSP_to_FPRA_save_delta); + GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, genFuncletInfo.fiSP_to_FPRA_save_delta); compiler->unwindSaveReg(REG_FP, genFuncletInfo.fiSP_to_FPRA_save_delta); // generate daddiu SP,SP,imm From 348a7f62a6209b046c0fa716a949a8260dadae8d Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Fri, 14 Jan 2022 12:32:41 +0800 Subject: [PATCH 11/46] [LoongArch64] Delete the interface getArgType2. And refactor the returned values of `getFieldSizeClassificationByHnd`. Keep aligned with #62893 `02ff4bfa41d7887b151d381e2697ba82ab6a0bca`. --- src/coreclr/jit/compiler.cpp | 6 ++-- src/coreclr/jit/gentree.cpp | 26 +++++++-------- src/coreclr/jit/lclvars.cpp | 36 ++++++++++---------- src/coreclr/jit/morph.cpp | 64 +++++++++++++++++------------------- 4 files changed, 65 insertions(+), 67 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 9ee7ae6ffd183..e06d37bde9912 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -935,14 +935,14 @@ var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, #ifdef TARGET_LOONGARCH64 if (structSize <= (TARGET_POINTER_SIZE * 2)) { - DWORD numFloatFields = info.compCompHnd->getFieldTypeByHnd(clsHnd); + uint32_t numFloatFields = info.compCompHnd->getFieldSizeClassificationByHnd(clsHnd); - if (numFloatFields & 0x1) + if (numFloatFields & STRUCT_FLOAT_FIELD_ONLY_ONE) { howToReturnStruct = SPK_PrimitiveType; useType = structSize > 4 ? TYP_DOUBLE : TYP_FLOAT; } - else if (numFloatFields & 0xE) + else if (numFloatFields & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_ONE)) { howToReturnStruct = SPK_ByValue; useType = TYP_STRUCT; diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index b01ab98be43b3..bb6c3f5bb08fd 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -21696,29 +21696,29 @@ void ReturnTypeDesc::InitializeStructReturnType(Compiler* comp, #elif defined(TARGET_LOONGARCH64) assert((structSize >= TARGET_POINTER_SIZE) && (structSize <= (2 * TARGET_POINTER_SIZE))); - DWORD numFloatFields = comp->info.compCompHnd->getFieldTypeByHnd(retClsHnd); + uint32_t numFloatFields = comp->info.compCompHnd->getFieldSizeClassificationByHnd(retClsHnd); BYTE gcPtrs[2] = {TYPE_GC_NONE, TYPE_GC_NONE}; comp->info.compCompHnd->getClassGClayout(retClsHnd, &gcPtrs[0]); - if (numFloatFields & 0x8) + if (numFloatFields & STRUCT_FLOAT_FIELD_ONLY_TWO) { - assert((structSize > 8) == ((numFloatFields & 0x30) > 0)); - m_regType[0] = numFloatFields & 0x10 ? TYP_DOUBLE : TYP_FLOAT; - m_regType[1] = numFloatFields & 0x20 ? TYP_DOUBLE : TYP_FLOAT; + assert((structSize > 8) == ((numFloatFields & STRUCT_HAS_8BYTES_FIELDS_MASK) > 0)); + m_regType[0] = numFloatFields & STRUCT_FIRST_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT; + m_regType[1] = numFloatFields & STRUCT_SECOND_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT; comp->compFloatingPointUsed = true; } - else if (numFloatFields & 0x2) + else if (numFloatFields & STRUCT_FLOAT_FIELD_FIRST) { - assert((structSize > 8) == ((numFloatFields & 0x30) > 0)); - m_regType[0] = numFloatFields & 0x10 ? TYP_DOUBLE : TYP_FLOAT; - m_regType[1] = numFloatFields & 0x20 ? comp->getJitGCType(gcPtrs[1]) : TYP_INT; + assert((structSize > 8) == ((numFloatFields & STRUCT_HAS_8BYTES_FIELDS_MASK) > 0)); + m_regType[0] = numFloatFields & STRUCT_FIRST_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT; + m_regType[1] = numFloatFields & STRUCT_SECOND_FIELD_SIZE_IS8 ? comp->getJitGCType(gcPtrs[1]) : TYP_INT; comp->compFloatingPointUsed = true; } - else if (numFloatFields & 0x4) + else if (numFloatFields & STRUCT_FLOAT_FIELD_SECOND) { - assert((structSize > 8) == ((numFloatFields & 0x30) > 0)); - m_regType[0] = numFloatFields & 0x10 ? comp->getJitGCType(gcPtrs[0]) : TYP_INT; - m_regType[1] = numFloatFields & 0x20 ? TYP_DOUBLE : TYP_FLOAT; + assert((structSize > 8) == ((numFloatFields & STRUCT_HAS_8BYTES_FIELDS_MASK) > 0)); + m_regType[0] = numFloatFields & STRUCT_FIRST_FIELD_SIZE_IS8 ? comp->getJitGCType(gcPtrs[0]) : TYP_INT; + m_regType[1] = numFloatFields & STRUCT_SECOND_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT; comp->compFloatingPointUsed = true; } else diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index ce652f10e91fe..1e7906375bc9a 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -643,13 +643,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un LclVarDsc* varDsc = varDscInfo->varDsc; CORINFO_CLASS_HANDLE typeHnd = nullptr; -#if defined(TARGET_LOONGARCH64) - int flags = 0; - CorInfoTypeWithMod corInfoType = - info.compCompHnd->getArgType2(&info.compMethodInfo->args, argLst, &typeHnd, &flags); -#else CorInfoTypeWithMod corInfoType = info.compCompHnd->getArgType(&info.compMethodInfo->args, argLst, &typeHnd); -#endif varDsc->lvIsParam = 1; lvaInitVarDsc(varDsc, varDscInfo->varNum, strip(corInfoType), typeHnd, argLst, &info.compMethodInfo->args); @@ -675,6 +669,14 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un bool isHfaArg = false; var_types hfaType = TYP_UNDEF; +#if defined(TARGET_LOONGARCH64) + uint32_t floatFlags = STRUCT_NO_FLOAT_FIELD; + if ((strip(corInfoType) == CORINFO_TYPE_VALUECLASS) && (argSize <= MAX_PASS_MULTIREG_BYTES)) + { + floatFlags = info.compCompHnd->getFieldSizeClassificationByHnd(typeHnd); + } +#endif + // Methods that use VarArg or SoftFP cannot have HFA arguments except // Native varargs on arm64 unix use the regular calling convention. if (((TargetOS::IsUnix && TargetArchitecture::IsArm64) || !info.compIsVarArgs) && !opts.compUseSoftFP) @@ -895,11 +897,11 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un #elif defined(TARGET_LOONGARCH64) var_types arg1_Type = TYP_UNKNOWN; var_types arg2_Type = TYP_UNKNOWN; - if (flags & 0xf) + if (floatFlags & STRUCT_HAS_FLOAT_FIELDS_MASK) { assert(varTypeIsStruct(argType)); int float_num = 0; - if (flags == 1) + if (floatFlags == STRUCT_FLOAT_FIELD_ONLY_ONE) { assert(argSize <= 8); assert(varDsc->lvExactSize <= argSize); @@ -908,30 +910,30 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un arg1_Type = (varDsc->lvExactSize == 8) ? TYP_DOUBLE : TYP_FLOAT; canPassArgInRegisters = varDscInfo->canEnreg(arg1_Type, 1); } - else if (flags & 0x8) + else if (floatFlags & STRUCT_FLOAT_FIELD_ONLY_TWO) { - arg1_Type = (flags & 0x10) ? TYP_DOUBLE : TYP_FLOAT; - arg2_Type = (flags & 0x20) ? TYP_DOUBLE : TYP_FLOAT; + arg1_Type = (floatFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; + arg2_Type = (floatFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; float_num = 2; canPassArgInRegisters = varDscInfo->canEnreg(TYP_DOUBLE, 2); } - else if (flags & 2) + else if (floatFlags & STRUCT_FLOAT_FIELD_FIRST) { float_num = 1; canPassArgInRegisters = varDscInfo->canEnreg(TYP_DOUBLE, 1); canPassArgInRegisters = canPassArgInRegisters && varDscInfo->canEnreg(TYP_I_IMPL, 1); - arg1_Type = (flags & 0x10) ? TYP_DOUBLE : TYP_FLOAT; - arg2_Type = (flags & 0x20) ? TYP_LONG : TYP_INT; + arg1_Type = (floatFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; + arg2_Type = (floatFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_LONG : TYP_INT; } - else if (flags & 4) + else if (floatFlags & STRUCT_FLOAT_FIELD_SECOND) { float_num = 1; canPassArgInRegisters = varDscInfo->canEnreg(TYP_DOUBLE, 1); canPassArgInRegisters = canPassArgInRegisters && varDscInfo->canEnreg(TYP_I_IMPL, 1); - arg1_Type = (flags & 0x10) ? TYP_LONG : TYP_INT; - arg2_Type = (flags & 0x20) ? TYP_DOUBLE : TYP_FLOAT; + arg1_Type = (floatFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_LONG : TYP_INT; + arg2_Type = (floatFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; } if (!canPassArgInRegisters) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index f0e1647414878..2bed706b954ce 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -3051,14 +3051,14 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) { assert((howToPassStruct == SPK_ByValue) || (howToPassStruct == SPK_PrimitiveType)); - numFloatFields = info.compCompHnd->getFieldTypeByHnd(objClass); + numFloatFields = info.compCompHnd->getFieldSizeClassificationByHnd(objClass); - passUsingFloatRegs = (numFloatFields & 0xf) ? true : false; + passUsingFloatRegs = (numFloatFields & STRUCT_HAS_FLOAT_FIELDS_MASK) ? true : false; compFloatingPointUsed |= passUsingFloatRegs; - if (numFloatFields & 7) + if (numFloatFields & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_TWO)) size = 1; - else if (numFloatFields & 8) + else if (numFloatFields & STRUCT_FLOAT_FIELD_ONLY_TWO) size = 2; } else // if (passStructByRef) @@ -4917,37 +4917,35 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry unsigned offset = baseOffset; newArg = new (this, GT_FIELD_LIST) GenTreeFieldList(); #if defined(TARGET_LOONGARCH64) - DWORD numFloatFields = info.compCompHnd->getFieldTypeByHnd(objClass); - if ((numFloatFields & 0xe) /*&& (varDsc->lvSize() == TARGET_POINTER_SIZE)*/) + uint32_t numFloatFields = info.compCompHnd->getFieldSizeClassificationByHnd(objClass); + if (numFloatFields & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_ONE)) { - assert((numFloatFields & 0xf) > 1); + assert((numFloatFields & STRUCT_HAS_FLOAT_FIELDS_MASK) > 1); var_types tmp_type_1; var_types tmp_type_2; compFloatingPointUsed = true; - if (numFloatFields & 0x8) + if (numFloatFields & STRUCT_FLOAT_FIELD_ONLY_TWO) { - tmp_type_1 = numFloatFields & 0x10 ? TYP_DOUBLE : TYP_FLOAT; - tmp_type_2 = numFloatFields & 0x20 ? TYP_DOUBLE : TYP_FLOAT; + tmp_type_1 = numFloatFields & STRUCT_FIRST_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT; + tmp_type_2 = numFloatFields & STRUCT_SECOND_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT; } - else if (numFloatFields & 0x2) + else if (numFloatFields & STRUCT_FLOAT_FIELD_FIRST) { - tmp_type_1 = numFloatFields & 0x10 ? TYP_DOUBLE : TYP_FLOAT; - // tmp_type_2 = numFloatFields & 0x20 ? TYP_LONG: TYP_INT;type[0] - tmp_type_2 = numFloatFields & 0x20 ? type[1] : TYP_INT; + tmp_type_1 = numFloatFields & STRUCT_FIRST_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT; + tmp_type_2 = numFloatFields & STRUCT_SECOND_FIELD_SIZE_IS8 ? type[1] : TYP_INT; } - else if (numFloatFields & 0x4) + else if (numFloatFields & STRUCT_FLOAT_FIELD_SECOND) { - // tmp_type_1 = numFloatFields & 0x10 ? TYP_LONG: TYP_INT; - tmp_type_1 = numFloatFields & 0x10 ? type[0] : TYP_INT; - tmp_type_2 = numFloatFields & 0x20 ? TYP_DOUBLE : TYP_FLOAT; + tmp_type_1 = numFloatFields & STRUCT_FIRST_FIELD_SIZE_IS8 ? type[0] : TYP_INT; + tmp_type_2 = numFloatFields & STRUCT_SECOND_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT; } else { assert(!"----------------unimplemented type-case... on LOONGARCH"); unreached(); } - elemSize = numFloatFields & 0x30 ? 8 : 4; + elemSize = numFloatFields & STRUCT_HAS_8BYTES_FIELDS_MASK ? 8 : 4; GenTree* nextLclFld = gtNewLclFldNode(varNum, tmp_type_1, offset); newArg->AddField(this, nextLclFld, offset, tmp_type_1); @@ -5006,37 +5004,35 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry newArg = new (this, GT_FIELD_LIST) GenTreeFieldList(); unsigned offset = 0; #if defined(TARGET_LOONGARCH64) - DWORD numFloatFields = info.compCompHnd->getFieldTypeByHnd(objClass); - if (numFloatFields & 0xe) + uint32_t numFloatFields = info.compCompHnd->getFieldSizeClassificationByHnd(objClass); + if (numFloatFields & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_ONE)) { - assert((numFloatFields & 0xf) > 1); + assert((numFloatFields & STRUCT_HAS_FLOAT_FIELDS_MASK) > 1); var_types tmp_type_1; var_types tmp_type_2; compFloatingPointUsed = true; - if (numFloatFields & 0x8) + if (numFloatFields & STRUCT_FLOAT_FIELD_ONLY_TWO) { - tmp_type_1 = numFloatFields & 0x10 ? TYP_DOUBLE : TYP_FLOAT; - tmp_type_2 = numFloatFields & 0x20 ? TYP_DOUBLE : TYP_FLOAT; + tmp_type_1 = numFloatFields & STRUCT_FIRST_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT; + tmp_type_2 = numFloatFields & STRUCT_SECOND_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT; } - else if (numFloatFields & 0x2) + else if (numFloatFields & STRUCT_FLOAT_FIELD_SECOND) { - tmp_type_1 = numFloatFields & 0x10 ? TYP_DOUBLE : TYP_FLOAT; - // tmp_type_2 = numFloatFields & 0x20 ? TYP_LONG: TYP_INT; - tmp_type_2 = numFloatFields & 0x20 ? type[1] : TYP_INT; + tmp_type_1 = numFloatFields & STRUCT_FIRST_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT; + tmp_type_2 = numFloatFields & STRUCT_SECOND_FIELD_SIZE_IS8 ? type[1] : TYP_INT; } - else if (numFloatFields & 0x4) + else if (numFloatFields & STRUCT_FLOAT_FIELD_SECOND) { - // tmp_type_1 = numFloatFields & 0x10 ? TYP_LONG: TYP_INT; - tmp_type_1 = numFloatFields & 0x10 ? type[0] : TYP_INT; - tmp_type_2 = numFloatFields & 0x20 ? TYP_DOUBLE : TYP_FLOAT; + tmp_type_1 = numFloatFields & STRUCT_FIRST_FIELD_SIZE_IS8 ? type[0] : TYP_INT; + tmp_type_2 = numFloatFields & STRUCT_SECOND_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT; } else { assert(!"----------------unimplemented type-case... on LOONGARCH"); unreached(); } - elemSize = numFloatFields & 0x30 ? 8 : 4; + elemSize = numFloatFields & STRUCT_HAS_8BYTES_FIELDS_MASK ? 8 : 4; GenTree* curItem = gtNewIndir(tmp_type_1, baseAddr); // For safety all GT_IND should have at least GT_GLOB_REF set. From a39969559566cd3382b07f1986714b1d40accbfd Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Fri, 14 Jan 2022 13:27:05 +0800 Subject: [PATCH 12/46] [LoongArch64] update code by `git apply format.patch` And update `getLoongArch64PassStructInRegisterFlags`. --- src/coreclr/jit/compiler.cpp | 2 +- src/coreclr/jit/gentree.cpp | 12 ++++++------ src/coreclr/jit/lclvars.cpp | 6 +++--- src/coreclr/jit/morph.cpp | 6 +++--- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index e06d37bde9912..75e7053b94825 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -935,7 +935,7 @@ var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, #ifdef TARGET_LOONGARCH64 if (structSize <= (TARGET_POINTER_SIZE * 2)) { - uint32_t numFloatFields = info.compCompHnd->getFieldSizeClassificationByHnd(clsHnd); + uint32_t numFloatFields = info.compCompHnd->getLoongArch64PassStructInRegisterFlags(clsHnd); if (numFloatFields & STRUCT_FLOAT_FIELD_ONLY_ONE) { diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index bb6c3f5bb08fd..44c25eaa8f80f 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -21696,8 +21696,8 @@ void ReturnTypeDesc::InitializeStructReturnType(Compiler* comp, #elif defined(TARGET_LOONGARCH64) assert((structSize >= TARGET_POINTER_SIZE) && (structSize <= (2 * TARGET_POINTER_SIZE))); - uint32_t numFloatFields = comp->info.compCompHnd->getFieldSizeClassificationByHnd(retClsHnd); - BYTE gcPtrs[2] = {TYPE_GC_NONE, TYPE_GC_NONE}; + uint32_t numFloatFields = comp->info.compCompHnd->getLoongArch64PassStructInRegisterFlags(retClsHnd); + BYTE gcPtrs[2] = {TYPE_GC_NONE, TYPE_GC_NONE}; comp->info.compCompHnd->getClassGClayout(retClsHnd, &gcPtrs[0]); if (numFloatFields & STRUCT_FLOAT_FIELD_ONLY_TWO) @@ -21710,15 +21710,15 @@ void ReturnTypeDesc::InitializeStructReturnType(Compiler* comp, else if (numFloatFields & STRUCT_FLOAT_FIELD_FIRST) { assert((structSize > 8) == ((numFloatFields & STRUCT_HAS_8BYTES_FIELDS_MASK) > 0)); - m_regType[0] = numFloatFields & STRUCT_FIRST_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT; - m_regType[1] = numFloatFields & STRUCT_SECOND_FIELD_SIZE_IS8 ? comp->getJitGCType(gcPtrs[1]) : TYP_INT; + m_regType[0] = numFloatFields & STRUCT_FIRST_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT; + m_regType[1] = numFloatFields & STRUCT_SECOND_FIELD_SIZE_IS8 ? comp->getJitGCType(gcPtrs[1]) : TYP_INT; comp->compFloatingPointUsed = true; } else if (numFloatFields & STRUCT_FLOAT_FIELD_SECOND) { assert((structSize > 8) == ((numFloatFields & STRUCT_HAS_8BYTES_FIELDS_MASK) > 0)); - m_regType[0] = numFloatFields & STRUCT_FIRST_FIELD_SIZE_IS8 ? comp->getJitGCType(gcPtrs[0]) : TYP_INT; - m_regType[1] = numFloatFields & STRUCT_SECOND_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT; + m_regType[0] = numFloatFields & STRUCT_FIRST_FIELD_SIZE_IS8 ? comp->getJitGCType(gcPtrs[0]) : TYP_INT; + m_regType[1] = numFloatFields & STRUCT_SECOND_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT; comp->compFloatingPointUsed = true; } else diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 1e7906375bc9a..9b0607a104609 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -610,7 +610,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un } #elif defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI) // On System V type environment the float registers are not indexed together with the int ones. - varDscInfo->floatRegArgNum = varDscInfo->intRegArgNum; + varDscInfo->floatRegArgNum = varDscInfo->intRegArgNum; #endif // TARGET* CORINFO_ARG_LIST_HANDLE argLst = info.compMethodInfo->args.args; @@ -644,7 +644,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un CORINFO_CLASS_HANDLE typeHnd = nullptr; CorInfoTypeWithMod corInfoType = info.compCompHnd->getArgType(&info.compMethodInfo->args, argLst, &typeHnd); - varDsc->lvIsParam = 1; + varDsc->lvIsParam = 1; lvaInitVarDsc(varDsc, varDscInfo->varNum, strip(corInfoType), typeHnd, argLst, &info.compMethodInfo->args); @@ -673,7 +673,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un uint32_t floatFlags = STRUCT_NO_FLOAT_FIELD; if ((strip(corInfoType) == CORINFO_TYPE_VALUECLASS) && (argSize <= MAX_PASS_MULTIREG_BYTES)) { - floatFlags = info.compCompHnd->getFieldSizeClassificationByHnd(typeHnd); + floatFlags = info.compCompHnd->getLoongArch64PassStructInRegisterFlags(typeHnd); } #endif diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 2bed706b954ce..5a24f80721d30 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -3051,7 +3051,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) { assert((howToPassStruct == SPK_ByValue) || (howToPassStruct == SPK_PrimitiveType)); - numFloatFields = info.compCompHnd->getFieldSizeClassificationByHnd(objClass); + numFloatFields = info.compCompHnd->getLoongArch64PassStructInRegisterFlags(objClass); passUsingFloatRegs = (numFloatFields & STRUCT_HAS_FLOAT_FIELDS_MASK) ? true : false; compFloatingPointUsed |= passUsingFloatRegs; @@ -4917,7 +4917,7 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry unsigned offset = baseOffset; newArg = new (this, GT_FIELD_LIST) GenTreeFieldList(); #if defined(TARGET_LOONGARCH64) - uint32_t numFloatFields = info.compCompHnd->getFieldSizeClassificationByHnd(objClass); + uint32_t numFloatFields = info.compCompHnd->getLoongArch64PassStructInRegisterFlags(objClass); if (numFloatFields & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_ONE)) { assert((numFloatFields & STRUCT_HAS_FLOAT_FIELDS_MASK) > 1); @@ -5004,7 +5004,7 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry newArg = new (this, GT_FIELD_LIST) GenTreeFieldList(); unsigned offset = 0; #if defined(TARGET_LOONGARCH64) - uint32_t numFloatFields = info.compCompHnd->getFieldSizeClassificationByHnd(objClass); + uint32_t numFloatFields = info.compCompHnd->getLoongArch64PassStructInRegisterFlags(objClass); if (numFloatFields & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_ONE)) { assert((numFloatFields & STRUCT_HAS_FLOAT_FIELDS_MASK) > 1); From b2b53d270ee9516e9eea0531b0a8b5bafe019e65 Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Fri, 14 Jan 2022 15:10:01 +0800 Subject: [PATCH 13/46] [LoongArch64] Fixed the error when passing float-arg by integer-reg. --- src/coreclr/jit/codegenlinear.cpp | 7 +++++++ src/coreclr/jit/codegenloongarch64.cpp | 2 +- src/coreclr/jit/importer.cpp | 2 ++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index 92043be1edc67..e8ae2155ed1ca 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -1230,6 +1230,13 @@ void CodeGen::genUnspillRegIfNeeded(GenTree* tree) assert(!varTypeIsGC(varDsc)); spillType = lclActualType; } + +#if defined(TARGET_LOONGARCH64) + if (varTypeIsFloating(spillType) && emitter::isGeneralRegister(tree->GetRegNum())) + { + spillType = spillType == TYP_FLOAT ? TYP_INT : TYP_LONG; + } +#endif #elif defined(TARGET_ARM) // No normalizing for ARM #else diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index e3400b54667a7..6291b360bbfb7 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -4563,7 +4563,7 @@ void CodeGen::genCodeForJumpCompare(GenTreeOp* tree) { assert(reg != REG_R21); ssize_t imm = op2->AsIntCon()->gtIconVal; - if ((tree->gtFlags & GTF_UNSIGNED) && (attr == EA_4BYTE)) + if (attr == EA_4BYTE) { assert(reg != REG_RA); imm = (int32_t)imm; diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index dd89a0a063de4..08bc8cac5598d 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -14227,6 +14227,8 @@ void Compiler::impImportBlockCode(BasicBlock* block) if (!callNode && prevOpcode == CEE_LDC_I4_0) { assert(op1->gtOper == GT_CNS_INT && op1->AsIntCon()->gtIconVal == 0); + if (varTypeIsFloating(lclTyp)) + op1->gtOper = GT_CNS_DBL; op1->gtType = genActualType(lclTyp); impPushOnStack(op1, tiRetVal); // opcode = CEE_LDC_I4_0; From b5b60cb11e039dcd4d3b35e1e8d1377ce0a863f3 Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Tue, 18 Jan 2022 15:21:52 +0800 Subject: [PATCH 14/46] [Loongarch64] amend patch formate by 'git apply format.patch' --- src/coreclr/jit/importer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 08bc8cac5598d..48a2b23068260 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -14229,7 +14229,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) assert(op1->gtOper == GT_CNS_INT && op1->AsIntCon()->gtIconVal == 0); if (varTypeIsFloating(lclTyp)) op1->gtOper = GT_CNS_DBL; - op1->gtType = genActualType(lclTyp); + op1->gtType = genActualType(lclTyp); impPushOnStack(op1, tiRetVal); // opcode = CEE_LDC_I4_0; break; From 8ef00ba1535d0f71f52b84b9b5fe85788e93985c Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Wed, 19 Jan 2022 09:42:25 +0800 Subject: [PATCH 15/46] [LoongArch64] update the version of the `LICENSE description`. --- src/coreclr/jit/codegenloongarch64.cpp | 3 --- src/coreclr/jit/emitfmtsloongarch64.h | 3 --- src/coreclr/jit/emitloongarch64.cpp | 3 --- src/coreclr/jit/emitloongarch64.h | 3 --- src/coreclr/jit/instrsloongarch64.h | 3 --- src/coreclr/jit/lowerloongarch64.cpp | 3 --- src/coreclr/jit/lsraloongarch64.cpp | 3 --- src/coreclr/jit/targetloongarch64.cpp | 3 --- src/coreclr/jit/unwindloongarch64.cpp | 2 -- 9 files changed, 26 deletions(-) diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index 6291b360bbfb7..796c40b465af0 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -1,8 +1,5 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -// Copyright (c) Loongson Technology. All rights reserved. /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX diff --git a/src/coreclr/jit/emitfmtsloongarch64.h b/src/coreclr/jit/emitfmtsloongarch64.h index b4232269b144f..e04d60270d567 100644 --- a/src/coreclr/jit/emitfmtsloongarch64.h +++ b/src/coreclr/jit/emitfmtsloongarch64.h @@ -1,8 +1,5 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -// Copyright (c) Loongson Technology. All rights reserved. ////////////////////////////////////////////////////////////////////////////// diff --git a/src/coreclr/jit/emitloongarch64.cpp b/src/coreclr/jit/emitloongarch64.cpp index e58ccb61282bb..c22d729f62a60 100644 --- a/src/coreclr/jit/emitloongarch64.cpp +++ b/src/coreclr/jit/emitloongarch64.cpp @@ -1,8 +1,5 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information.//emitarm64.cpp deletes this line. - -// Copyright (c) Loongson Technology. All rights reserved. /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX diff --git a/src/coreclr/jit/emitloongarch64.h b/src/coreclr/jit/emitloongarch64.h index e9cc1e9d831d7..85841251de82a 100644 --- a/src/coreclr/jit/emitloongarch64.h +++ b/src/coreclr/jit/emitloongarch64.h @@ -1,8 +1,5 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -// Copyright (c) Loongson Technology. All rights reserved. #if defined(TARGET_LOONGARCH64) diff --git a/src/coreclr/jit/instrsloongarch64.h b/src/coreclr/jit/instrsloongarch64.h index f171bc69a7a68..3e54bce650d88 100644 --- a/src/coreclr/jit/instrsloongarch64.h +++ b/src/coreclr/jit/instrsloongarch64.h @@ -1,8 +1,5 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -// Copyright (c) Loongson Technology. All rights reserved. /***************************************************************************** * Loongarch64 instructions for JIT compiler diff --git a/src/coreclr/jit/lowerloongarch64.cpp b/src/coreclr/jit/lowerloongarch64.cpp index 82809712de064..692c9d0fe408e 100644 --- a/src/coreclr/jit/lowerloongarch64.cpp +++ b/src/coreclr/jit/lowerloongarch64.cpp @@ -1,8 +1,5 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -// Copyright (c) Loongson Technology. All rights reserved. /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp index 6d056a46d737d..801b863758edd 100644 --- a/src/coreclr/jit/lsraloongarch64.cpp +++ b/src/coreclr/jit/lsraloongarch64.cpp @@ -1,8 +1,5 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -// Copyright (c) Loongson Technology. All rights reserved. /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX diff --git a/src/coreclr/jit/targetloongarch64.cpp b/src/coreclr/jit/targetloongarch64.cpp index 08c2ed857231a..e0097a1b62a1c 100644 --- a/src/coreclr/jit/targetloongarch64.cpp +++ b/src/coreclr/jit/targetloongarch64.cpp @@ -1,8 +1,5 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -// Copyright (c) Loongson Technology. All rights reserved. /*****************************************************************************/ diff --git a/src/coreclr/jit/unwindloongarch64.cpp b/src/coreclr/jit/unwindloongarch64.cpp index 00ffa5482185d..110c88985a510 100644 --- a/src/coreclr/jit/unwindloongarch64.cpp +++ b/src/coreclr/jit/unwindloongarch64.cpp @@ -1,8 +1,6 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -// Copyright (c) Loongson Technology. All rights reserved. - /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XX XX From cadce2c59336cfe48d36985f55f68e20f52aff69 Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Thu, 10 Feb 2022 17:09:05 +0800 Subject: [PATCH 16/46] [LoongArch64] amend the CodeGen::genFnPrologCalleeRegArgs for the SC_IG_BUFFER_SIZE. --- src/coreclr/jit/codegencommon.cpp | 63 ++++++++++++++++++------------- src/coreclr/jit/emit.h | 4 -- src/coreclr/jit/instr.cpp | 15 ++++---- 3 files changed, 45 insertions(+), 37 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 3a61fc4cbaed5..dc91f7c690795 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -3448,21 +3448,20 @@ void CodeGen::genFnPrologCalleeRegArgs() } else { - if (tmp_reg == REG_NA) + assert(tmp_reg == REG_NA); + + tmp_offset = base; + tmp_reg = REG_R21; + if ((0 < base) && (base <= 0xfff)) { - regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE; - tmp_offset = base; - tmp_reg = REG_R21; - GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset >> 12); - GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, tmp_offset & 0xfff); - GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, REG_R21, REG_R21, reg2); - GetEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, -8); + GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R0, tmp_offset); } else { - baseOffset = -(base - tmp_offset) - 8; - GetEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, baseOffset); + GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset>>12); + GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, tmp_offset & 0xfff); } + GetEmitter()->emitIns_S_R(ins_Store(storeType, true), size, srcRegNum, varNum, -8); } regArgMaskLive &= ~genRegMask(srcRegNum); @@ -3503,18 +3502,24 @@ void CodeGen::genFnPrologCalleeRegArgs() { if (tmp_reg == REG_NA) { - regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE; - tmp_offset = base; - tmp_reg = REG_R21; - GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset >> 12); - GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, tmp_offset & 0xfff); - GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, REG_R21, REG_R21, reg2); - GetEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, -8); + tmp_offset = base; + tmp_reg = REG_R21; + if ((0 < base) && (base <= 0xfff)) + { + GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R0, tmp_offset); + } + else + { + GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset>>12); + GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, tmp_offset & 0xfff); + } + GetEmitter()->emitIns_S_R(ins_Store(storeType, true), size, srcRegNum, varNum, -8); } else { baseOffset = -(base - tmp_offset) - 8; - GetEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, baseOffset); + GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_R21, REG_R21, 8); + GetEmitter()->emitIns_S_R(ins_Store(storeType, true), size, srcRegNum, varNum, baseOffset); } } regArgMaskLive &= ~genRegMask(srcRegNum); // maybe do this later is better! @@ -3535,18 +3540,24 @@ void CodeGen::genFnPrologCalleeRegArgs() { if (tmp_reg == REG_NA) { - regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE; - tmp_offset = base; - tmp_reg = REG_R21; - GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset >> 12); - GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, tmp_offset & 0xfff); - GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, REG_R21, REG_R21, reg2); - GetEmitter()->emitIns_S_R(INS_st_d, size, REG_ARG_LAST, varNum, -8); + tmp_offset = base; + tmp_reg = REG_R21; + if ((0 < base) && (base <= 0xfff)) + { + GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R0, tmp_offset); + } + else + { + GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset>>12); + GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, tmp_offset & 0xfff); + } + GetEmitter()->emitIns_S_R(INS_stx_d, size, REG_ARG_LAST, varNum, -8); } else { baseOffset = -(base - tmp_offset) - 8; - GetEmitter()->emitIns_S_R(INS_st_d, size, REG_ARG_LAST, varNum, baseOffset); + GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_R21, REG_R21, 8); + GetEmitter()->emitIns_S_R(INS_stx_d, size, REG_ARG_LAST, varNum, baseOffset); } } } diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 15b84ae2b4cec..891ad9766af07 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -1944,11 +1944,7 @@ class emitter #elif defined(TARGET_LOONGARCH64) -#ifdef DEBUG -#define SC_IG_BUFFER_SIZE (50 * sizeof(emitter::instrDesc) + 14 * SMALL_IDSC_SIZE) -#else #define SC_IG_BUFFER_SIZE (50 * sizeof(emitter::instrDesc) + 20 * SMALL_IDSC_SIZE) -#endif #else // !TARGET_LOONGARCH64 #define SC_IG_BUFFER_SIZE (50 * sizeof(emitter::instrDesc) + 14 * SMALL_IDSC_SIZE) diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index e80855be507dc..57089672ac279 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -1917,6 +1917,7 @@ instruction CodeGen::ins_Copy(regNumber srcReg, var_types dstType) * Parameters * dstType - destination type * aligned - whether destination is properly aligned if dstType is a SIMD type + * - for LoongArch64 aligned is used for store-index. */ instruction CodeGenInterface::ins_Store(var_types dstType, bool aligned /*=false*/) { @@ -1977,11 +1978,11 @@ instruction CodeGenInterface::ins_Store(var_types dstType, bool aligned /*=false { if (dstType == TYP_DOUBLE) { - return INS_fst_d; + return aligned ? INS_fstx_d : INS_fst_d; } else if (dstType == TYP_FLOAT) { - return INS_fst_s; + return aligned ? INS_fstx_s : INS_fst_s; } } #else @@ -2000,13 +2001,13 @@ instruction CodeGenInterface::ins_Store(var_types dstType, bool aligned /*=false ins = INS_strh; #elif defined(TARGET_LOONGARCH64) if (varTypeIsByte(dstType)) - ins = INS_st_b; + ins = aligned ? INS_stx_b : INS_st_b; else if (varTypeIsShort(dstType)) - ins = INS_st_h; + ins = aligned ? INS_stx_h : INS_st_h; else if ((TYP_INT == dstType) || (TYP_UINT == dstType)) - ins = INS_st_w; - else // if ((TYP_LONG == dstType) || (TYP_ULONG == dstType) || (TYP_REF == dstType)) - ins = INS_st_d; // default st_d. + ins = aligned ? INS_stx_w : INS_st_w; + else // if ((TYP_LONG == dstType) || (TYP_ULONG == dstType) || (TYP_REF == dstType)) + ins = aligned ? INS_stx_d : INS_st_d; #else NYI("ins_Store"); #endif From 3c79267ec284381a3acd053fda7499944ac7598a Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Tue, 15 Feb 2022 11:37:10 +0800 Subject: [PATCH 17/46] [LoongArch64]: update the crossgen2 within the JIT. --- src/coreclr/jit/emitloongarch64.cpp | 39 +++++++++++++---------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/src/coreclr/jit/emitloongarch64.cpp b/src/coreclr/jit/emitloongarch64.cpp index c22d729f62a60..4c90765f7609f 100644 --- a/src/coreclr/jit/emitloongarch64.cpp +++ b/src/coreclr/jit/emitloongarch64.cpp @@ -2225,7 +2225,7 @@ void emitter::emitIns_R_AI(instruction ins, // addi_d reg, reg, off-lo-12bits // case:EA_PTR_DSP_RELOC // pcaddu12i reg, off-hi-20bits - // ldptr_d reg, reg, off-lo-12bits + // ld_d reg, reg, off-lo-12bits instrDesc* id = emitNewInstr(attr); @@ -2853,15 +2853,14 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t // pcaddu18i t2, addr-hi20 // jilr r0/1,t2,addr-lo18 + *(code_t*)dst = 0x1e00000e; + long addr = (long)id->idAddr()->iiaAddr; // get addr. // should assert(addr-dst < 38bits); int reg2 = (int)addr & 1; addr = addr ^ 1; - emitRecordRelocation(dst, (BYTE*)addr, IMAGE_REL_LOONGARCH64_PC); - - *(code_t*)dst = 0x1e00000e; dst += 4; #ifdef DEBUG code = emitInsCode(INS_pcaddu18i); @@ -2871,6 +2870,8 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t assert(code == 0x4c000000); #endif *(code_t*)dst = 0x4c000000 | (14 << 5) | reg2; + + emitRecordRelocation(dst - 4, (BYTE*)addr, IMAGE_REL_LOONGARCH64_JIR); } else { @@ -2907,10 +2908,7 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t D_INST_JIRL(code, reg2, REG_T2, 0); } - // Now output the call instruction and update the 'dst' pointer - // - unsigned outputInstrSize = emitOutput_Instr(dst, code); - dst += outputInstrSize; + dst += 4; // update volatile regs within emitThisGCrefRegs and emitThisByrefRegs. if (gcrefRegs != emitThisGCrefRegs) @@ -2922,10 +2920,6 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t emitUpdateLiveGCregs(GCT_BYREF, byrefRegs, dst); } - // All call instructions are 4-byte in size on LOONGARCH64 - // not including delay-slot which processed later. - assert(outputInstrSize == callInstrSize); - // If the method returns a GC ref, mark INTRET (A0) appropriately. if (id->idGCref() == GCT_GCREF) { @@ -3041,23 +3035,22 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) // addi_d reg, reg, off-lo-12bits // case:EA_PTR_DSP_RELOC // pcaddu12i reg, off-hi-20bits - // ldptr_d reg, reg, off-lo-12bits + // ld_d reg, reg, off-lo-12bits regNumber reg1 = id->idReg1(); - emitRecordRelocation(dst, id->idAddr()->iiaAddr, IMAGE_REL_LOONGARCH64_PC); - *(code_t*)dst = 0x1c000000 | (code_t)reg1; - dst += 4; + dst2 = dst; + dst += 4; #ifdef DEBUG code = emitInsCode(INS_pcaddu12i); assert(code == 0x1c000000); code = emitInsCode(INS_addi_d); assert(code == 0x02c00000); - code = emitInsCode(INS_ldptr_d); - assert(code == 0x26000000); + code = emitInsCode(INS_ld_d); + assert(code == 0x28c00000); #endif if (id->idIsCnsReloc()) @@ -3065,11 +3058,11 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) ins = INS_addi_d; *(code_t*)dst = 0x02c00000 | (code_t)reg1 | (code_t)(reg1 << 5); } - else // if (id->idIsDspReloc()) + else { assert(id->idIsDspReloc()); - ins = INS_ldptr_d; - *(code_t*)dst = 0x26000000 | (code_t)reg1 | (code_t)(reg1 << 5); + ins = INS_ld_d; + *(code_t*)dst = 0x28c00000 | (code_t)reg1 | (code_t)(reg1 << 5); } if (id->idGCref() != GCT_NONE) @@ -3083,6 +3076,10 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += 4; + emitRecordRelocation(dst2, id->idAddr()->iiaAddr, IMAGE_REL_LOONGARCH64_PC); + + dst2 += 4; + sz = sizeof(instrDesc); } break; From 7192df162f496186e017974513141baab8bb3b2b Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Tue, 15 Feb 2022 15:02:54 +0800 Subject: [PATCH 18/46] [LoongArch64] git-apply the `format.patch`. --- src/coreclr/jit/codegencommon.cpp | 6 +++--- src/coreclr/jit/importer.cpp | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 2e8907c4c313e..062bbd0eb1678 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -3466,7 +3466,7 @@ void CodeGen::genFnPrologCalleeRegArgs() } else { - GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset>>12); + GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset >> 12); GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, tmp_offset & 0xfff); } GetEmitter()->emitIns_S_R(ins_Store(storeType, true), size, srcRegNum, varNum, -8); @@ -3518,7 +3518,7 @@ void CodeGen::genFnPrologCalleeRegArgs() } else { - GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset>>12); + GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset >> 12); GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, tmp_offset & 0xfff); } GetEmitter()->emitIns_S_R(ins_Store(storeType, true), size, srcRegNum, varNum, -8); @@ -3556,7 +3556,7 @@ void CodeGen::genFnPrologCalleeRegArgs() } else { - GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset>>12); + GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset >> 12); GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, tmp_offset & 0xfff); } GetEmitter()->emitIns_S_R(INS_stx_d, size, REG_ARG_LAST, varNum, -8); diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 0b8c25447387b..d6e424baf0639 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -17948,7 +17948,7 @@ void Compiler::impImportBlock(BasicBlock* block) // Spill clique has decided this should be "byref", but this block only pushes an "int". // Insert a sign-extension to "native int" so we match the clique size. #ifdef TARGET_LOONGARCH64 - if (tree->gtOper == GT_CNS_INT) + if (tree->gtOper == GT_CNS_INT) { tree->gtType = TYP_I_IMPL; tree->SetContained(); From 619c8e89b9ad5b8be62fcabe172b9a7e8f467aba Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Tue, 15 Feb 2022 20:21:23 +0800 Subject: [PATCH 19/46] [LoongArch64] Fix the compiling error after merge-main. --- src/coreclr/jit/lowerloongarch64.cpp | 40 ++++++++++++++++++++++++++++ src/coreclr/jit/lsraloongarch64.cpp | 3 +-- src/coreclr/jit/targetloongarch64.h | 4 +++ 3 files changed, 45 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/lowerloongarch64.cpp b/src/coreclr/jit/lowerloongarch64.cpp index 692c9d0fe408e..7468766b7a052 100644 --- a/src/coreclr/jit/lowerloongarch64.cpp +++ b/src/coreclr/jit/lowerloongarch64.cpp @@ -160,6 +160,46 @@ GenTree* Lowering::LowerMul(GenTreeOp* mul) return mul->gtNext; } +//------------------------------------------------------------------------ +// LowerBinaryArithmetic: lowers the given binary arithmetic node. +// +// Arguments: +// node - the arithmetic node to lower +// +// Returns: +// The next node to lower. +// +GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* binOp) +{ + if (comp->opts.OptimizationEnabled() && binOp->OperIs(GT_AND)) + { + GenTree* opNode = nullptr; + GenTree* notNode = nullptr; + if (binOp->gtGetOp1()->OperIs(GT_NOT)) + { + notNode = binOp->gtGetOp1(); + opNode = binOp->gtGetOp2(); + } + else if (binOp->gtGetOp2()->OperIs(GT_NOT)) + { + notNode = binOp->gtGetOp2(); + opNode = binOp->gtGetOp1(); + } + + if (notNode != nullptr) + { + binOp->gtOp1 = opNode; + binOp->gtOp2 = notNode->AsUnOp()->gtGetOp1(); + binOp->ChangeOper(GT_AND_NOT); + BlockRange().Remove(notNode); + } + } + + ContainCheckBinary(binOp); + + return binOp->gtNext; +} + //------------------------------------------------------------------------ // LowerStoreLoc: Lower a store of a lclVar // diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp index 801b863758edd..66745063b96dd 100644 --- a/src/coreclr/jit/lsraloongarch64.cpp +++ b/src/coreclr/jit/lsraloongarch64.cpp @@ -526,7 +526,6 @@ int LinearScan::BuildNode(GenTree* tree) break; case GT_BLK: - case GT_DYN_BLK: // These should all be eliminated prior to Lowering. assert(!"Non-store block node in Lowering"); srcCount = 0; @@ -1681,7 +1680,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) if (blkNode->OperIs(GT_STORE_DYN_BLK)) { useCount++; - BuildUse(blkNode->AsDynBlk()->gtDynamicSize, sizeRegMask); + BuildUse(blkNode->AsStoreDynBlk()->gtDynamicSize, sizeRegMask); } buildInternalRegisterUses(); diff --git a/src/coreclr/jit/targetloongarch64.h b/src/coreclr/jit/targetloongarch64.h index cf97f4148cf16..2c6153f6579fd 100644 --- a/src/coreclr/jit/targetloongarch64.h +++ b/src/coreclr/jit/targetloongarch64.h @@ -262,6 +262,10 @@ // The registers trashed by the CORINFO_HELP_INIT_PINVOKE_FRAME helper. #define RBM_INIT_PINVOKE_FRAME_TRASH RBM_CALLEE_TRASH + #define RBM_VALIDATE_INDIRECT_CALL_TRASH (RBM_INT_CALLEE_TRASH & ~(RBM_A0 | RBM_A1 | RBM_A2 | RBM_A3 | RBM_A4 | RBM_A5 | RBM_A6 | RBM_A7 | RBM_T3)) + #define REG_VALIDATE_INDIRECT_CALL_ADDR REG_T3 + #define REG_DISPATCH_INDIRECT_CALL_ADDR REG_T0 + #define REG_FPBASE REG_FP #define RBM_FPBASE RBM_FP #define STR_FPBASE "fp" From 789c16fb7ab91b574296de2c1cb04cae4717a46e Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Wed, 16 Feb 2022 17:12:58 +0800 Subject: [PATCH 20/46] [LoongArch64] amend the code for reviewing by @BruceForstall. --- src/coreclr/jit/CMakeLists.txt | 4 - src/coreclr/jit/codegen.h | 67 +----------- src/coreclr/jit/codegencommon.cpp | 16 +-- src/coreclr/jit/codegeninterface.h | 10 +- src/coreclr/jit/codegenloongarch64.cpp | 19 ---- src/coreclr/jit/compiler.cpp | 6 -- src/coreclr/jit/compiler.h | 23 ++--- src/coreclr/jit/emitjmps.h | 16 +-- src/coreclr/jit/emitloongarch64.cpp | 2 +- src/coreclr/jit/gentree.h | 7 +- src/coreclr/jit/instr.cpp | 7 -- src/coreclr/jit/jit.h | 2 +- src/coreclr/jit/lclvars.cpp | 1 - src/coreclr/jit/lowerloongarch64.cpp | 4 +- src/coreclr/jit/lsra.h | 6 ++ src/coreclr/jit/lsrabuild.cpp | 44 ++++++++ src/coreclr/jit/regalloc.cpp | 13 --- src/coreclr/jit/registerloongarch64.h | 135 ++++++++++++------------- src/coreclr/jit/regset.cpp | 8 +- src/coreclr/jit/regset.h | 6 +- src/coreclr/jit/target.h | 6 +- src/coreclr/jit/targetloongarch64.h | 28 ++--- src/coreclr/jit/unwindloongarch64.cpp | 90 ++++------------- src/coreclr/jit/utils.cpp | 19 +++- src/coreclr/jit/valuenum.cpp | 15 ++- src/coreclr/jit/valuenumfuncs.h | 2 +- 26 files changed, 194 insertions(+), 362 deletions(-) diff --git a/src/coreclr/jit/CMakeLists.txt b/src/coreclr/jit/CMakeLists.txt index 9a073ab2bfb8a..caf0726d970b3 100644 --- a/src/coreclr/jit/CMakeLists.txt +++ b/src/coreclr/jit/CMakeLists.txt @@ -242,8 +242,6 @@ set( JIT_LOONGARCH64_SOURCES lsraloongarch64.cpp targetloongarch64.cpp unwindloongarch64.cpp - ##hwintrinsiclistloongarch64.cpp ###TODO:Not implemented on loongarch64 yet. - ##simdashwintrinsiclistloongarch64.cpp ###TODO:Not implemented on loongarch64 yet. ) # We include the headers here for better experience in IDEs. @@ -397,8 +395,6 @@ set( JIT_LOONGARCH64_HEADERS emitfmtsloongarch64.h instrsloongarch64.h registerloongarch64.h -#hwintrinsiclistloongarch64.h ###TODO:Not implemented on loongarch64 yet. -#simdashwintrinsiclistloongarch64.h ) convert_to_absolute_path(JIT_SOURCES ${JIT_SOURCES}) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index b44ed34f09857..60cbb3714dc74 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -278,7 +278,7 @@ class CodeGen final : public CodeGenInterface void genClearStackVec3ArgUpperBits(); #endif // UNIX_AMD64_ABI && FEATURE_SIMD -#if defined(TARGET_ARM64) +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) bool genInstrWithConstant(instruction ins, emitAttr attr, regNumber reg1, @@ -339,66 +339,6 @@ class CodeGen final : public CodeGenInterface void genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed); -#elif defined(TARGET_LOONGARCH64) - bool genInstrWithConstant(instruction ins, - emitAttr attr, - regNumber reg1, - regNumber reg2, - ssize_t imm, - regNumber tmpReg, - bool inUnwindRegion = false); - - void genStackPointerAdjustment(ssize_t spAdjustment, regNumber tmpReg, bool* pTmpRegIsZero, bool reportUnwindData); - - void genPrologSaveRegPair(regNumber reg1, - regNumber reg2, - int spOffset, - int spDelta, - bool useSaveNextPair, - regNumber tmpReg, - bool* pTmpRegIsZero); - - void genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero); - - void genEpilogRestoreRegPair(regNumber reg1, - regNumber reg2, - int spOffset, - int spDelta, - bool useSaveNextPair, - regNumber tmpReg, - bool* pTmpRegIsZero); - - void genEpilogRestoreReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero); - - // A simple struct to keep register pairs for prolog and epilog. - struct RegPair - { - regNumber reg1; - regNumber reg2; - bool useSaveNextPair; - - RegPair(regNumber reg1) : reg1(reg1), reg2(REG_NA), useSaveNextPair(false) - { - } - - RegPair(regNumber reg1, regNumber reg2) : reg1(reg1), reg2(reg2), useSaveNextPair(false) - { - assert(reg2 == REG_NEXT(reg1)); - } - }; - - static void genBuildRegPairsStack(regMaskTP regsMask, ArrayStack* regStack); - static void genSetUseSaveNextPairs(ArrayStack* regStack); - - static int genGetSlotSizeForRegsInMask(regMaskTP regsMask); - - void genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset); - void genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset); - - void genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset, int spDelta); - void genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset, int spDelta); - - void genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed); #else void genPushCalleeSavedRegisters(); #endif @@ -1338,11 +1278,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void genStoreRegToStackArg(var_types type, regNumber reg, int offset); #endif // FEATURE_PUT_STRUCT_ARG_STK -#ifdef TARGET_LOONGARCH64 - // TODO for LOONGARCH64 : maybe delete on LA64? - void genCodeForStoreOffset(instruction ins, emitAttr size, regNumber src, GenTree* base, unsigned offset); -#endif - void genCodeForStoreBlk(GenTreeBlk* storeBlkNode); #ifndef TARGET_X86 void genCodeForInitBlkHelper(GenTreeBlk* initBlkNode); diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 062bbd0eb1678..4f5ad64dce390 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -9460,21 +9460,7 @@ void CodeGen::genFnEpilog(BasicBlock* block) switch (addrInfo.accessType) { case IAT_VALUE: - // if (validImmForBAL((ssize_t)addrInfo.addr)) - //{ - // // Simple direct call - - // //TODO for LA. - // callType = emitter::EC_FUNC_TOKEN; - // addr = addrInfo.addr; - // indCallReg = REG_NA; - // break; - //} - - //// otherwise the target address doesn't fit in an immediate - //// so we have to burn a register... - //__fallthrough; - + //TODO-LOONGARCH64-CQ: using B/BL for optimization. case IAT_PVALUE: // Load the address into a register, load indirect and call through a register // We have to use REG_INDIRECT_CALL_TARGET_REG since we assume the argument registers are in use diff --git a/src/coreclr/jit/codegeninterface.h b/src/coreclr/jit/codegeninterface.h index 84d8560545894..f692193104f12 100644 --- a/src/coreclr/jit/codegeninterface.h +++ b/src/coreclr/jit/codegeninterface.h @@ -112,9 +112,7 @@ class CodeGenInterface private: #if defined(TARGET_XARCH) static const insFlags instInfo[INS_count]; -#elif defined(TARGET_ARM) || defined(TARGET_ARM64) - static const BYTE instInfo[INS_count]; -#elif defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) static const BYTE instInfo[INS_count]; #else #error Unsupported target architecture @@ -312,11 +310,7 @@ class CodeGenInterface bool validImmForAdd(target_ssize_t imm, insFlags flags); bool validImmForAlu(target_ssize_t imm); bool validImmForMov(target_ssize_t imm); -#ifdef TARGET_LOONGARCH64 - bool validImmForBAL(ssize_t addr); -#else bool validImmForBL(ssize_t addr); -#endif instruction ins_Load(var_types srcType, bool aligned = false); instruction ins_Store(var_types dstType, bool aligned = false); @@ -387,7 +381,7 @@ class CodeGenInterface bool m_cgInterruptible; #if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) bool m_cgHasTailCalls; -#endif // TARGET_ARMARCH +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 // The following will be set to true if we've determined that we need to // generate a full-blown pointer register map for the current method. diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index 796c40b465af0..3d05d9299c273 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -6996,25 +6996,6 @@ void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst } } -// Generate code for a store to some address + offset -// base: tree node which can be either a local address or arbitrary node -// offset: distance from the base from which to load -void CodeGen::genCodeForStoreOffset(instruction ins, emitAttr size, regNumber src, GenTree* base, unsigned offset) -{ - emitter* emit = GetEmitter(); - - if (base->OperIsLocalAddr()) - { - if (base->gtOper == GT_LCL_FLD_ADDR) - offset += base->AsLclFld()->GetLclOffs(); - emit->emitIns_S_R(ins, size, src, base->AsLclVarCommon()->GetLclNum(), offset); - } - else - { - emit->emitIns_R_R_I(ins, size, src, base->GetRegNum(), offset); - } -} - //------------------------------------------------------------------------ // genCallInstruction: Produce code for a GT_CALL node // diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index b5020f14de6e8..9c68c153e33b1 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -2321,10 +2321,6 @@ void Compiler::compSetProcessor() instructionSetFlags.AddInstructionSet(InstructionSet_Vector128); #endif // TARGET_ARM64 -#if defined(TARGET_LOONGARCH64) -// TODO: should add LOONGARCH64's features for LOONGARCH64. -#endif - instructionSetFlags = EnsureInstructionSetFlagsAreValid(instructionSetFlags); opts.setSupportedISAs(instructionSetFlags); @@ -2509,8 +2505,6 @@ void Compiler::compInitOptions(JitFlags* jitFlags) // For non-adaptive, padding limit is same as specified by the alignment. opts.compJitAlignPaddingLimit = opts.compJitAlignLoopBoundary; } -#elif defined(TARGET_LOONGARCH64) -// TODO: should be adaptive on LoongArch64. #endif assert(isPow2(opts.compJitAlignLoopBoundary)); diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 5c9e02769ad01..856705bf3ad33 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -469,14 +469,15 @@ class LclVarDsc unsigned char lvIsTemp : 1; // Short-lifetime compiler temp -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) - unsigned char lvIsImplicitByRef : 1; // Set if the argument is an implicit byref. -#elif defined(TARGET_LOONGARCH64) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) unsigned char lvIsImplicitByRef : 1; // Set if the argument is an implicit byref. +#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) + +#if defined(TARGET_LOONGARCH64) unsigned char lvIs4Field1 : 1; // Set if the 1st field is int or float within struct for LA-ABI64. unsigned char lvIs4Field2 : 1; // Set if the 2nd field is int or float within struct for LA-ABI64. - unsigned char lvIsSplit : 1; // Set if the argument is splited. also used the lvFldOffset. -#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) + unsigned char lvIsSplit : 1; // Set if the argument is splited. +#endif // defined(TARGET_LOONGARCH64) unsigned char lvIsBoolean : 1; // set if variable is boolean unsigned char lvSingleDef : 1; // variable has a single def @@ -671,9 +672,6 @@ class LclVarDsc { assert(lvIsHfa()); assert(varTypeIsStruct(lvType)); -#if defined(TARGET_LOONGARCH64) - assert(!"lvHfaSlots called not support on LOONGARCH64!"); -#endif unsigned slots = 0; #ifdef TARGET_ARM slots = lvExactSize / sizeof(float); @@ -8056,14 +8054,9 @@ class Compiler // For SIMD types longer than 8 bytes Caller is responsible for saving and restoring Upper bytes. return ((type == TYP_SIMD16) || (type == TYP_SIMD12)); } -#elif defined(TARGET_LOONGARCH64) - static bool varTypeNeedsPartialCalleeSave(var_types type) - { // TODO: supporting SIMD feature for LoongArch64. - return false; - } -#else // !defined(TARGET_AMD64) && !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) +#else // !defined(TARGET_AMD64) && !defined(TARGET_ARM64) #error("Unknown target architecture for FEATURE_SIMD") -#endif // !defined(TARGET_AMD64) && !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) +#endif // !defined(TARGET_AMD64) && !defined(TARGET_ARM64) #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE protected: diff --git a/src/coreclr/jit/emitjmps.h b/src/coreclr/jit/emitjmps.h index 0a19c7cbf138e..97e216dccbb60 100644 --- a/src/coreclr/jit/emitjmps.h +++ b/src/coreclr/jit/emitjmps.h @@ -48,23 +48,9 @@ JMP_SMALL(le , gt , ble ) // LE #elif defined(TARGET_LOONGARCH64) -/* TODO for LOONGARCH: should redesign!!! */ -// jump reverse instruction condcode -JMP_SMALL(jmp , jmp , b ) // AL always +JMP_SMALL(jmp , jmp , b ) JMP_SMALL(eq , ne , beq ) // EQ JMP_SMALL(ne , eq , bne ) // NE -//JMP_SMALL(hs , lo , bgez ) // HS also CS -//JMP_SMALL(lo , hs , bltz ) // LO also CC -//JMP_SMALL(mi , pl , bmi ) // MI -//JMP_SMALL(pl , mi , bpl ) // PL -//JMP_SMALL(vs , vc , bvs ) // VS -//JMP_SMALL(vc , vs , bvc ) // VC -//JMP_SMALL(hi , ls , bhi ) // HI -//JMP_SMALL(ls , hi , bls ) // LS -//JMP_SMALL(gez , ltz , bgez ) // GE -//JMP_SMALL(gtz , lez , bgtz ) // GT -//JMP_SMALL(ltz , gez , bltz ) // LT -//JMP_SMALL(lez , gtz , blez ) // LE #else #error Unsupported or unset target architecture diff --git a/src/coreclr/jit/emitloongarch64.cpp b/src/coreclr/jit/emitloongarch64.cpp index 4c90765f7609f..2bb2e27f4b5ce 100644 --- a/src/coreclr/jit/emitloongarch64.cpp +++ b/src/coreclr/jit/emitloongarch64.cpp @@ -623,7 +623,7 @@ bool emitter::emitInsWritesToLclVarStackLoc(instrDesc* id) // clang-format off static const char * const RegNames[] = { - #define REGDEF(name, rnum, mask, xname, wname) xname, + #define REGDEF(name, rnum, mask, sname) sname, #include "register.h" }; // clang-format on diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index bbe6b47b517b3..d718ddc00451b 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -4444,6 +4444,10 @@ struct GenTreeCall final : public GenTree bool HasMultiRegRetVal() const { #ifdef FEATURE_MULTIREG_RET +#if defined(TARGET_LOONGARCH64) + return (gtType == TYP_STRUCT) && (gtReturnTypeDesc.GetReturnRegCount() > 1); +#else + #if defined(TARGET_X86) || defined(TARGET_ARM) if (varTypeIsLong(gtType)) { @@ -4451,9 +4455,6 @@ struct GenTreeCall final : public GenTree } #endif -#if defined(TARGET_LOONGARCH64) - return (gtType == TYP_STRUCT) && (gtReturnTypeDesc.GetReturnRegCount() > 1); -#else if (!varTypeIsStruct(gtType) || HasRetBufArg()) { return false; diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index 8edf1587348c4..8f27dd6c231d9 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -1483,13 +1483,6 @@ bool CodeGenInterface::validImmForBL(ssize_t addr) } #endif // TARGET_ARM64 -#if defined(TARGET_LOONGARCH64) -bool CodeGenInterface::validImmForBAL(ssize_t addr) -{ // TODO: can amend/optimize for LoongArch64. - return false; -} -#endif // TARGET_LOONGARCH64 - /***************************************************************************** * * Get the machine dependent instruction for performing sign/zero extension. diff --git a/src/coreclr/jit/jit.h b/src/coreclr/jit/jit.h index 2316147f14960..46945ed7eae7f 100644 --- a/src/coreclr/jit/jit.h +++ b/src/coreclr/jit/jit.h @@ -309,7 +309,7 @@ // Arm64 Windows supports FEATURE_ARG_SPLIT, note this is different from // the official Arm64 ABI. // Case: splitting 16 byte struct between x7 and stack -#if defined(TARGET_ARM) || defined(TARGET_ARM64) /* || defined(TARGET_LOONGARCH64)*/ +#if defined(TARGET_ARM) || defined(TARGET_ARM64) #define FEATURE_ARG_SPLIT 1 #else #define FEATURE_ARG_SPLIT 0 diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 20fa0a5e10a56..ab262f28c5a64 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -1050,7 +1050,6 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un else if (cSlots > 1) { varDsc->lvIsSplit = 1; - // varDsc->lvFldOffset = 0; varDsc->SetOtherArgReg(REG_STK); varDscInfo->hasMultiSlotStruct = true; varDscInfo->setAllRegArgUsed(arg1_Type); diff --git a/src/coreclr/jit/lowerloongarch64.cpp b/src/coreclr/jit/lowerloongarch64.cpp index 7468766b7a052..6dd44bdf9b1a6 100644 --- a/src/coreclr/jit/lowerloongarch64.cpp +++ b/src/coreclr/jit/lowerloongarch64.cpp @@ -38,9 +38,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX // bool Lowering::IsCallTargetInRange(void* addr) { - ////TODO for LOONGARCH64: should amend for optimize! - // assert(!"unimplemented on LOONGARCH yet"); - // return comp->codeGen->validImmForBAL((ssize_t)addr); + //TODO-LOONGARCH64-CQ: using B/BL for optimization. return false; } diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 8793b619804b9..f6254ecdb79b3 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1011,6 +1011,12 @@ class LinearScan : public LinearScanInterface void unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc); #endif // defined(UNIX_AMD64_ABI) +#if defined(TARGET_LOONGARCH64) + // For LoongArch64's ABI, a struct can be passed + // partially using registers from the 2 register files. + void LoongArch64UpdateRegStateForArg(LclVarDsc* argDsc); +#endif + // Update reg state for an incoming register argument void updateRegStateForArg(LclVarDsc* argDsc); diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index b1504e1f73e3d..3379a54e4be08 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -2010,6 +2010,43 @@ void LinearScan::unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc) #endif // defined(UNIX_AMD64_ABI) +#ifdef TARGET_LOONGARCH64 +void LinearScan::LoongArch64UpdateRegStateForArg(LclVarDsc* argDsc) +{ + assert(varTypeIsStruct(argDsc)); + RegState* intRegState = &compiler->codeGen->intRegState; + RegState* floatRegState = &compiler->codeGen->floatRegState; + + if ((argDsc->GetArgReg() != REG_STK) && (argDsc->GetArgReg() != REG_NA)) + { + if (genRegMask(argDsc->GetArgReg()) & (RBM_ALLFLOAT)) + { + assert(genRegMask(argDsc->GetArgReg()) & (RBM_FLTARG_REGS)); + floatRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->GetArgReg()); + } + else + { + assert(genRegMask(argDsc->GetArgReg()) & (RBM_ARG_REGS)); + intRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->GetArgReg()); + } + } + + if ((argDsc->GetOtherArgReg() != REG_STK) && (argDsc->GetOtherArgReg() != REG_NA)) + { + if (genRegMask(argDsc->GetOtherArgReg()) & (RBM_ALLFLOAT)) + { + assert(genRegMask(argDsc->GetOtherArgReg()) & (RBM_FLTARG_REGS)); + floatRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->GetOtherArgReg()); + } + else + { + assert(genRegMask(argDsc->GetOtherArgReg()) & (RBM_ARG_REGS)); + intRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->GetOtherArgReg()); + } + } +} +#endif + //------------------------------------------------------------------------ // updateRegStateForArg: Updates rsCalleeRegArgMaskLiveIn for the appropriate // regState (either compiler->intRegState or compiler->floatRegState), @@ -2040,6 +2077,13 @@ void LinearScan::updateRegStateForArg(LclVarDsc* argDsc) } else #endif // defined(UNIX_AMD64_ABI) +#if defined(TARGET_LOONGARCH64) + if (varTypeIsStruct(argDsc)) + { + LoongArch64UpdateRegStateForArg(argDsc); + } + else +#endif { RegState* intRegState = &compiler->codeGen->intRegState; RegState* floatRegState = &compiler->codeGen->floatRegState; diff --git a/src/coreclr/jit/regalloc.cpp b/src/coreclr/jit/regalloc.cpp index 5720c4a23e3eb..532fa8fd40976 100644 --- a/src/coreclr/jit/regalloc.cpp +++ b/src/coreclr/jit/regalloc.cpp @@ -162,18 +162,6 @@ regNumber Compiler::raUpdateRegStateForArg(RegState* regState, LclVarDsc* argDsc #if FEATURE_MULTIREG_ARGS if (varTypeIsStruct(argDsc->lvType)) { -#ifdef TARGET_LOONGARCH64 - { - if (argDsc->GetOtherArgReg() != REG_NA) - { - inArgMask = genRegMask(argDsc->GetOtherArgReg()); - if (emitter::isFloatReg(argDsc->GetOtherArgReg())) - codeGen->floatRegState.rsCalleeRegArgMaskLiveIn |= inArgMask; - else - codeGen->intRegState.rsCalleeRegArgMaskLiveIn |= inArgMask; - } - } -#else if (argDsc->lvIsHfaRegArg()) { assert(regState->rsIsFloat); @@ -198,7 +186,6 @@ regNumber Compiler::raUpdateRegStateForArg(RegState* regState, LclVarDsc* argDsc regState->rsCalleeRegArgMaskLiveIn |= genRegMask(nextArgReg); } } -#endif } #endif // FEATURE_MULTIREG_ARGS diff --git a/src/coreclr/jit/registerloongarch64.h b/src/coreclr/jit/registerloongarch64.h index 4127ce8ca4ace..0d8beac0e3aac 100644 --- a/src/coreclr/jit/registerloongarch64.h +++ b/src/coreclr/jit/registerloongarch64.h @@ -15,40 +15,39 @@ #define RMASK(x) (1ULL << (x)) /* -REGDEF(name, rnum, mask, xname, wname) */ -REGDEF(R0, 0, 0x0001, "zero" , "zero" ) -REGDEF(RA, 1, 0x0002, "ra" , "ra" ) -REGDEF(TP, 2, 0x0004, "tp" , "tp" ) -REGDEF(SP, 3, 0x0008, "sp" , "sp" ) -REGDEF(A0, 4, 0x0010, "a0" , "a0" ) -REGDEF(A1, 5, 0x0020, "a1" , "a1" ) -REGDEF(A2, 6, 0x0040, "a2" , "a2" ) -REGDEF(A3, 7, 0x0080, "a3" , "a3" ) -REGDEF(A4, 8, 0x0100, "a4" , "a4" ) -REGDEF(A5, 9, 0x0200, "a5" , "a5" ) -REGDEF(A6, 10, 0x0400, "a6" , "a6" ) -REGDEF(A7, 11, 0x0800, "a7" , "a7" ) -REGDEF(T0, 12, 0x1000, "t0" , "t0" ) -REGDEF(T1, 13, 0x2000, "t1" , "t1" ) -REGDEF(T2, 14, 0x4000, "t2" , "t2" ) -REGDEF(T3, 15, 0x8000, "t3" , "t3" ) -REGDEF(T4, 16, 0x10000, "t4" , "t4" ) -REGDEF(T5, 17, 0x20000, "t5" , "t5" ) -REGDEF(T6, 18, 0x40000, "t6" , "t6" ) -REGDEF(T7, 19, 0x80000, "t7" , "t7" ) -REGDEF(T8, 20, 0x100000, "t8" , "t8" ) -REGDEF(X0, 21, 0x200000, "x0" , "x0" ) -REGDEF(FP, 22, 0x400000, "fp" , "fp" ) -REGDEF(S0, 23, 0x800000, "s0" , "s0" ) -REGDEF(S1, 24, 0x1000000, "s1" , "s1" ) -REGDEF(S2, 25, 0x2000000, "s2" , "s2" ) -REGDEF(S3, 26, 0x4000000, "s3" , "s3" ) -REGDEF(S4, 27, 0x8000000, "s4" , "s4" ) -REGDEF(S5, 28, 0x10000000, "s5" , "s5" ) -REGDEF(S6, 29, 0x20000000, "s6" , "s6" ) -REGDEF(S7, 30, 0x40000000, "s7" , "s7" ) -REGDEF(S8, 31, 0x80000000, "s8" , "s8" ) - +REGDEF(name, rnum, mask, sname) */ +REGDEF(R0, 0, 0x0001, "zero") +REGDEF(RA, 1, 0x0002, "ra" ) +REGDEF(TP, 2, 0x0004, "tp" ) +REGDEF(SP, 3, 0x0008, "sp" ) +REGDEF(A0, 4, 0x0010, "a0" ) +REGDEF(A1, 5, 0x0020, "a1" ) +REGDEF(A2, 6, 0x0040, "a2" ) +REGDEF(A3, 7, 0x0080, "a3" ) +REGDEF(A4, 8, 0x0100, "a4" ) +REGDEF(A5, 9, 0x0200, "a5" ) +REGDEF(A6, 10, 0x0400, "a6" ) +REGDEF(A7, 11, 0x0800, "a7" ) +REGDEF(T0, 12, 0x1000, "t0" ) +REGDEF(T1, 13, 0x2000, "t1" ) +REGDEF(T2, 14, 0x4000, "t2" ) +REGDEF(T3, 15, 0x8000, "t3" ) +REGDEF(T4, 16, 0x10000, "t4" ) +REGDEF(T5, 17, 0x20000, "t5" ) +REGDEF(T6, 18, 0x40000, "t6" ) +REGDEF(T7, 19, 0x80000, "t7" ) +REGDEF(T8, 20, 0x100000, "t8" ) +REGDEF(X0, 21, 0x200000, "x0" ) +REGDEF(FP, 22, 0x400000, "fp" ) +REGDEF(S0, 23, 0x800000, "s0" ) +REGDEF(S1, 24, 0x1000000, "s1" ) +REGDEF(S2, 25, 0x2000000, "s2" ) +REGDEF(S3, 26, 0x4000000, "s3" ) +REGDEF(S4, 27, 0x8000000, "s4" ) +REGDEF(S5, 28, 0x10000000, "s5" ) +REGDEF(S6, 29, 0x20000000, "s6" ) +REGDEF(S7, 30, 0x40000000, "s7" ) +REGDEF(S8, 31, 0x80000000, "s8" ) REGALIAS(R21, X0) @@ -56,45 +55,45 @@ REGALIAS(R21, X0) #define FMASK(x) (1ULL << (FBASE+(x))) /* -REGDEF(name, rnum, mask, xname, wname) */ -REGDEF(F0, 0+FBASE, FMASK(0), "f0", "f0") -REGDEF(F1, 1+FBASE, FMASK(1), "f1", "f1") -REGDEF(F2, 2+FBASE, FMASK(2), "f2", "f2") -REGDEF(F3, 3+FBASE, FMASK(3), "f3", "f3") -REGDEF(F4, 4+FBASE, FMASK(4), "f4", "f4") -REGDEF(F5, 5+FBASE, FMASK(5), "f5", "f5") -REGDEF(F6, 6+FBASE, FMASK(6), "f6", "f6") -REGDEF(F7, 7+FBASE, FMASK(7), "f7", "f7") -REGDEF(F8, 8+FBASE, FMASK(8), "f8", "f8") -REGDEF(F9, 9+FBASE, FMASK(9), "f9", "f9") -REGDEF(F10, 10+FBASE, FMASK(10), "f10", "f10") -REGDEF(F11, 11+FBASE, FMASK(11), "f11", "f11") -REGDEF(F12, 12+FBASE, FMASK(12), "f12", "f12") -REGDEF(F13, 13+FBASE, FMASK(13), "f13", "f13") -REGDEF(F14, 14+FBASE, FMASK(14), "f14", "f14") -REGDEF(F15, 15+FBASE, FMASK(15), "f15", "f15") -REGDEF(F16, 16+FBASE, FMASK(16), "f16", "f16") -REGDEF(F17, 17+FBASE, FMASK(17), "f17", "f17") -REGDEF(F18, 18+FBASE, FMASK(18), "f18", "f18") -REGDEF(F19, 19+FBASE, FMASK(19), "f19", "f19") -REGDEF(F20, 20+FBASE, FMASK(20), "f20", "f20") -REGDEF(F21, 21+FBASE, FMASK(21), "f21", "f21") -REGDEF(F22, 22+FBASE, FMASK(22), "f22", "f22") -REGDEF(F23, 23+FBASE, FMASK(23), "f23", "f23") -REGDEF(F24, 24+FBASE, FMASK(24), "f24", "f24") -REGDEF(F25, 25+FBASE, FMASK(25), "f25", "f25") -REGDEF(F26, 26+FBASE, FMASK(26), "f26", "f26") -REGDEF(F27, 27+FBASE, FMASK(27), "f27", "f27") -REGDEF(F28, 28+FBASE, FMASK(28), "f28", "f28") -REGDEF(F29, 29+FBASE, FMASK(29), "f29", "f29") -REGDEF(F30, 30+FBASE, FMASK(30), "f30", "f30") -REGDEF(F31, 31+FBASE, FMASK(31), "f31", "f31") +REGDEF(name, rnum, mask, sname) */ +REGDEF(F0, 0+FBASE, FMASK(0), "f0") +REGDEF(F1, 1+FBASE, FMASK(1), "f1") +REGDEF(F2, 2+FBASE, FMASK(2), "f2") +REGDEF(F3, 3+FBASE, FMASK(3), "f3") +REGDEF(F4, 4+FBASE, FMASK(4), "f4") +REGDEF(F5, 5+FBASE, FMASK(5), "f5") +REGDEF(F6, 6+FBASE, FMASK(6), "f6") +REGDEF(F7, 7+FBASE, FMASK(7), "f7") +REGDEF(F8, 8+FBASE, FMASK(8), "f8") +REGDEF(F9, 9+FBASE, FMASK(9), "f9") +REGDEF(F10, 10+FBASE, FMASK(10), "f10") +REGDEF(F11, 11+FBASE, FMASK(11), "f11") +REGDEF(F12, 12+FBASE, FMASK(12), "f12") +REGDEF(F13, 13+FBASE, FMASK(13), "f13") +REGDEF(F14, 14+FBASE, FMASK(14), "f14") +REGDEF(F15, 15+FBASE, FMASK(15), "f15") +REGDEF(F16, 16+FBASE, FMASK(16), "f16") +REGDEF(F17, 17+FBASE, FMASK(17), "f17") +REGDEF(F18, 18+FBASE, FMASK(18), "f18") +REGDEF(F19, 19+FBASE, FMASK(19), "f19") +REGDEF(F20, 20+FBASE, FMASK(20), "f20") +REGDEF(F21, 21+FBASE, FMASK(21), "f21") +REGDEF(F22, 22+FBASE, FMASK(22), "f22") +REGDEF(F23, 23+FBASE, FMASK(23), "f23") +REGDEF(F24, 24+FBASE, FMASK(24), "f24") +REGDEF(F25, 25+FBASE, FMASK(25), "f25") +REGDEF(F26, 26+FBASE, FMASK(26), "f26") +REGDEF(F27, 27+FBASE, FMASK(27), "f27") +REGDEF(F28, 28+FBASE, FMASK(28), "f28") +REGDEF(F29, 29+FBASE, FMASK(29), "f29") +REGDEF(F30, 30+FBASE, FMASK(30), "f30") +REGDEF(F31, 31+FBASE, FMASK(31), "f31") // The registers with values 64 (NBASE) and above are not real register numbers #define NBASE 64 // This must be last! -REGDEF(STK, 0+NBASE, 0x0000, "STK", "STK") +REGDEF(STK, 0+NBASE, 0x0000, "STK") /*****************************************************************************/ #undef RMASK diff --git a/src/coreclr/jit/regset.cpp b/src/coreclr/jit/regset.cpp index aade930da4fd5..d28a90ec36f5d 100644 --- a/src/coreclr/jit/regset.cpp +++ b/src/coreclr/jit/regset.cpp @@ -23,7 +23,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX /*****************************************************************************/ -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM64) const regMaskSmall regMasks[] = { #define REGDEF(name, rnum, mask, xname, wname) mask, #include "register.h" @@ -228,11 +228,9 @@ RegSet::RegSet(Compiler* compiler, GCInfo& gcInfo) : m_rsCompiler(compiler), m_r rsMaskResvd = RBM_NONE; -#ifdef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) rsMaskCalleeSaved = RBM_NONE; -#elif defined(TARGET_LOONGARCH64) - rsMaskCalleeSaved = RBM_NONE; -#endif // TARGET_ARMARCH +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 #ifdef TARGET_ARM rsMaskPreSpillRegArg = RBM_NONE; diff --git a/src/coreclr/jit/regset.h b/src/coreclr/jit/regset.h index f2bc7875152b6..a816c0d607757 100644 --- a/src/coreclr/jit/regset.h +++ b/src/coreclr/jit/regset.h @@ -123,11 +123,9 @@ class RegSet private: regMaskTP _rsMaskVars; // backing store for rsMaskVars property -#ifdef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) regMaskTP rsMaskCalleeSaved; // mask of the registers pushed/popped in the prolog/epilog -#elif defined(TARGET_LOONGARCH64) - regMaskTP rsMaskCalleeSaved; // mask of the registers pushed/popped in the prolog/epilog -#endif // TARGET_ARM +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 public: // TODO-Cleanup: Should be private, but Compiler uses it regMaskTP rsMaskResvd; // mask of the registers that are reserved for special purposes (typically empty) diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index 97d9a69328483..536ef627d6062 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -181,7 +181,7 @@ enum _regMask_enum : unsigned enum _regNumber_enum : unsigned { -#define REGDEF(name, rnum, mask, xname, wname) REG_##name = rnum, +#define REGDEF(name, rnum, mask, sname) REG_##name = rnum, #define REGALIAS(alias, realname) REG_##alias = REG_##realname, #include "register.h" @@ -193,7 +193,7 @@ enum _regNumber_enum : unsigned enum _regMask_enum : unsigned __int64 { RBM_NONE = 0, -#define REGDEF(name, rnum, mask, xname, wname) RBM_##name = mask, +#define REGDEF(name, rnum, mask, sname) RBM_##name = mask, #define REGALIAS(alias, realname) RBM_##alias = RBM_##realname, #include "register.h" }; @@ -702,7 +702,7 @@ inline bool isFloatRegType(var_types type) C_ASSERT((RBM_ALLINT & RBM_SPBASE) == RBM_NONE); C_ASSERT((RBM_INT_CALLEE_SAVED & RBM_SPBASE) == RBM_NONE); -#if ETW_EBP_FRAMED && !defined(TARGET_LOONGARCH64) +#if ETW_EBP_FRAMED // Frame pointer isn't either if we're supporting ETW frame chaining C_ASSERT((RBM_ALLINT & RBM_FPBASE) == RBM_NONE); C_ASSERT((RBM_INT_CALLEE_SAVED & RBM_FPBASE) == RBM_NONE); diff --git a/src/coreclr/jit/targetloongarch64.h b/src/coreclr/jit/targetloongarch64.h index 2c6153f6579fd..2bfaea897abef 100644 --- a/src/coreclr/jit/targetloongarch64.h +++ b/src/coreclr/jit/targetloongarch64.h @@ -8,11 +8,9 @@ // clang-format off #define CPU_LOAD_STORE_ARCH 1 - //#define CPU_LONG_USES_REGPAIR 0 #define CPU_HAS_FP_SUPPORT 1 #define ROUND_FLOAT 0 // Do not round intermed float expression results #define CPU_HAS_BYTE_REGS 0 - //#define CPU_USES_BLOCK_MOVE 0 #define CPBLK_UNROLL_LIMIT 64 // Upper bound to let the code generator to loop unroll CpBlk. #define INITBLK_UNROLL_LIMIT 64 // Upper bound to let the code generator to loop unroll InitBlk. @@ -71,7 +69,6 @@ #define RBM_INT_CALLEE_SAVED (RBM_S0|RBM_S1|RBM_S2|RBM_S3|RBM_S4|RBM_S5|RBM_S6|RBM_S7|RBM_S8) #define RBM_INT_CALLEE_TRASH (RBM_A0|RBM_A1|RBM_A2|RBM_A3|RBM_A4|RBM_A5|RBM_A6|RBM_A7|RBM_T0|RBM_T1|RBM_T2|RBM_T3|RBM_T4|RBM_T5|RBM_T6|RBM_T7|RBM_T8) #define RBM_FLT_CALLEE_SAVED (RBM_F24|RBM_F25|RBM_F26|RBM_F27|RBM_F28|RBM_F29|RBM_F30|RBM_F31) - //#define RBM_FLT_CALLEE_TRASH (RBM_F0|RBM_F1|RBM_F2|RBM_F3|RBM_F4|RBM_F5|RBM_F6|RBM_F7|RBM_F8|RBM_F9|RBM_F10|RBM_F12|RBM_F13|RBM_F14|RBM_F15|RBM_F16|RBM_F17|RBM_F18|RBM_F19|RBM_F20|RBM_F21|RBM_F22|RBM_F23) #define RBM_FLT_CALLEE_TRASH (RBM_F0|RBM_F1|RBM_F2|RBM_F3|RBM_F4|RBM_F5|RBM_F6|RBM_F7) #define RBM_CALLEE_SAVED (RBM_INT_CALLEE_SAVED | RBM_FLT_CALLEE_SAVED) @@ -120,7 +117,6 @@ // register to hold shift amount; no special register is required on LOONGARCH64. #define REG_SHIFT REG_NA #define RBM_SHIFT RBM_ALLINT - //#define PREDICT_REG_SHIFT PREDICT_REG // This is a general scratch register that does not conflict with the argument registers #define REG_SCRATCH REG_T0 @@ -142,27 +138,23 @@ // LOONGARCH64 write barrier ABI (see vm/loongarch64/asmhelpers.S): // CORINFO_HELP_ASSIGN_REF (JIT_WriteBarrier), CORINFO_HELP_CHECKED_ASSIGN_REF (JIT_CheckedWriteBarrier): // On entry: - // v0: the destination address (LHS of the assignment) - // v1: the object reference (RHS of the assignment) + // t6: the destination address (LHS of the assignment) + // t7: the object reference (RHS of the assignment) // On exit: // t0: trashed // t1: trashed - // t2: trashed // t3: trashed - // v0: incremented by 8 - // v1: trashed - // ??: trashed if FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP (currently non-Windows) + // t4: trashed + // t6: incremented by 8 + // t7: trashed // CORINFO_HELP_ASSIGN_BYREF (JIT_ByRefWriteBarrier): // On entry: // t8: the source address (points to object reference to write) - // v0: the destination address (object reference written here) + // t6: the destination address (object reference written here) // On exit: // t8: incremented by 8 - // v0: incremented by 8 + // t6: incremented by 8 // - // Note that while ?reg? is currently only trashed under FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP, - // currently only set for non-Windows//, it is expected to be set in the future for Windows, and for R2R. - // So simply always consider it trashed, to avoid later breaking changes. #define REG_WRITE_BARRIER_DST REG_T6 #define RBM_WRITE_BARRIER_DST RBM_T6 @@ -176,7 +168,7 @@ #define REG_WRITE_BARRIER_SRC_BYREF REG_T8 #define RBM_WRITE_BARRIER_SRC_BYREF RBM_T8 - #define RBM_CALLEE_TRASH_NOGC (RBM_T0|RBM_T1|RBM_T2|RBM_T3|RBM_T4|RBM_T6|RBM_T7|RBM_DEFAULT_HELPER_CALL_TARGET) + #define RBM_CALLEE_TRASH_NOGC (RBM_T0|RBM_T1|RBM_T3|RBM_T4|RBM_T6|RBM_T7|RBM_DEFAULT_HELPER_CALL_TARGET) // Registers killed by CORINFO_HELP_ASSIGN_REF and CORINFO_HELP_CHECKED_ASSIGN_REF. #define RBM_CALLEE_TRASH_WRITEBARRIER (RBM_WRITE_BARRIER_DST|RBM_CALLEE_TRASH_NOGC) @@ -209,7 +201,7 @@ #define REG_INDIRECT_CALL_TARGET_REG REG_T6 - // Registers used by PInvoke frame setup //should confirm. + // Registers used by PInvoke frame setup #define REG_PINVOKE_FRAME REG_T0 #define RBM_PINVOKE_FRAME RBM_T0 #define REG_PINVOKE_TCB REG_T1 @@ -270,7 +262,7 @@ #define RBM_FPBASE RBM_FP #define STR_FPBASE "fp" #define REG_SPBASE REG_SP - #define RBM_SPBASE RBM_SP // reuse the RBM for REG_SP + #define RBM_SPBASE RBM_SP #define STR_SPBASE "sp" #define FIRST_ARG_STACK_OFFS (2*REGSIZE_BYTES) // Caller's saved FP and return address diff --git a/src/coreclr/jit/unwindloongarch64.cpp b/src/coreclr/jit/unwindloongarch64.cpp index 110c88985a510..faae126aa5718 100644 --- a/src/coreclr/jit/unwindloongarch64.cpp +++ b/src/coreclr/jit/unwindloongarch64.cpp @@ -232,7 +232,7 @@ void Compiler::unwindPush(regNumber reg) void Compiler::unwindAllocStack(unsigned size) { -#if defined(TARGET_UNIX) +#if defined(FEATURE_CFI_SUPPORT) if (generateCFIUnwindCodes()) { if (compGeneratingProlog) @@ -242,7 +242,7 @@ void Compiler::unwindAllocStack(unsigned size) return; } -#endif // TARGET_UNIX +#endif // FEATURE_CFI_SUPPORT UnwindInfo* pu = &funCurrentFunc()->uwi; @@ -275,7 +275,7 @@ void Compiler::unwindAllocStack(unsigned size) void Compiler::unwindSetFrameReg(regNumber reg, unsigned offset) { -#if defined(TARGET_UNIX) +#if defined(FEATURE_CFI_SUPPORT) if (generateCFIUnwindCodes()) { if (compGeneratingProlog) @@ -285,7 +285,7 @@ void Compiler::unwindSetFrameReg(regNumber reg, unsigned offset) return; } -#endif // TARGET_UNIX +#endif // FEATURE_CFI_SUPPORT UnwindInfo* pu = &funCurrentFunc()->uwi; @@ -343,7 +343,7 @@ void Compiler::unwindSaveReg(regNumber reg, int offset) assert(0 <= offset && offset <= 2047); assert((offset % 8) == 0); -#if defined(TARGET_UNIX) +#if defined(FEATURE_CFI_SUPPORT) if (generateCFIUnwindCodes()) { if (compGeneratingProlog) @@ -356,7 +356,7 @@ void Compiler::unwindSaveReg(regNumber reg, int offset) return; } -#endif // TARGET_UNIX +#endif // FEATURE_CFI_SUPPORT int z = offset / 8; // assert(0 <= z && z <= 0xFF); @@ -390,63 +390,7 @@ void Compiler::unwindSaveReg(regNumber reg, int offset) void Compiler::unwindSaveRegPair(regNumber reg1, regNumber reg2, int offset) { - // TODO:temp not used on loongarch64. - assert(!"unimplemented on LOONGARCH yet"); -#if 0 - UnwindInfo* pu = &funCurrentFunc()->uwi; - - // stp reg1, reg2, [sp, #offset] - - // offset for store pair in prolog must be positive and a multiple of 16. - assert(0 <= offset && offset <= 0xff0); - assert((offset % 16) == 0); - - int z = offset / 8; - //assert(0 <= z && z <= 0x1FE); - -#if defined(TARGET_UNIX) - if (generateCFIUnwindCodes()) - { - if (compGeneratingProlog) - { - FuncInfoDsc* func = funCurrentFunc(); - UNATIVE_OFFSET cbProlog = unwindGetCurrentOffset(func); - - createCfiCode(func, cbProlog, CFI_REL_OFFSET, mapRegNumToDwarfReg(reg1), offset); - createCfiCode(func, cbProlog, CFI_REL_OFFSET, mapRegNumToDwarfReg(reg2), offset + 8); - } - - return; - } -#endif // TARGET_UNIX - if (reg1 == REG_FP) - { - // save_fpra: 0100zzzz | zzzzzzzz: save pair at [sp+#Z*8], offset <= 0xff0 - assert(reg2 == REG_RA); - - pu->AddCode(0x40 | (BYTE)(z >> 8), (BYTE)z); - } - else if (reg2 == REG_RA) - { - assert(!"unimplemented on LOONGARCH yet"); - } - else if (emitter::isGeneralRegister(reg1)) - { - // save_regp: 11001000 | 0xxxzzzz | zzzzzzzz: save s(0 + #X) pair at [sp + #Z * 8], offset <= 4080 - assert(REG_NEXT(reg1) == reg2); - assert(REG_S0 <= reg1 && // first legal pair: S0, S1 - reg1 <= REG_S6); // last legal pair: S6, S7 (FP is never saved without RA) - - BYTE x = (BYTE)(reg1 - REG_S0); - //assert(0 <= x && x <= 0x6); - - pu->AddCode(0xC8, (BYTE)(x << 4) | (BYTE)(z >> 8), (BYTE)z); - } - else - { - assert(!"unimplemented on LOONGARCH yet"); - } -#endif + assert(!"unused on LOONGARCH64 yet"); } void Compiler::unwindReturn(regNumber reg) @@ -948,13 +892,13 @@ void Compiler::unwindBegProlog() { assert(compGeneratingProlog); -#if defined(TARGET_UNIX) +#if defined(FEATURE_CFI_SUPPORT) if (generateCFIUnwindCodes()) { unwindBegPrologCFI(); return; } -#endif // TARGET_UNIX +#endif // FEATURE_CFI_SUPPORT FuncInfoDsc* func = funCurrentFunc(); @@ -980,12 +924,12 @@ void Compiler::unwindBegEpilog() { assert(compGeneratingEpilog); -#if defined(TARGET_UNIX) +#if defined(FEATURE_CFI_SUPPORT) if (generateCFIUnwindCodes()) { return; } -#endif // TARGET_UNIX +#endif // FEATURE_CFI_SUPPORT funCurrentFunc()->uwi.AddEpilog(); } @@ -1000,12 +944,12 @@ void Compiler::unwindEndEpilog() // for them. void Compiler::unwindPadding() { -#if defined(TARGET_UNIX) +#if defined(FEATURE_CFI_SUPPORT) if (generateCFIUnwindCodes()) { return; } -#endif // TARGET_UNIX +#endif // FEATURE_CFI_SUPPORT UnwindInfo* pu = &funCurrentFunc()->uwi; GetEmitter()->emitUnwindNopPadding(pu->GetCurrentEmitterLocation(), this); @@ -1030,7 +974,7 @@ void Compiler::unwindReserveFunc(FuncInfoDsc* func) BOOL isFunclet = (func->funKind == FUNC_ROOT) ? FALSE : TRUE; bool funcHasColdSection = false; -#if defined(TARGET_UNIX) +#if defined(FEATURE_CFI_SUPPORT) if (generateCFIUnwindCodes()) { DWORD unwindCodeBytes = 0; @@ -1043,7 +987,7 @@ void Compiler::unwindReserveFunc(FuncInfoDsc* func) return; } -#endif // TARGET_UNIX +#endif // FEATURE_CFI_SUPPORT // If there is cold code, split the unwind data between the hot section and the // cold section. This needs to be done before we split into fragments, as each @@ -1103,13 +1047,13 @@ void Compiler::unwindEmitFunc(FuncInfoDsc* func, void* pHotCode, void* pColdCode static_assert_no_msg(FUNC_HANDLER == (FuncKind)CORJIT_FUNC_HANDLER); static_assert_no_msg(FUNC_FILTER == (FuncKind)CORJIT_FUNC_FILTER); -#if defined(TARGET_UNIX) +#if defined(FEATURE_CFI_SUPPORT) if (generateCFIUnwindCodes()) { unwindEmitFuncCFI(func, pHotCode, pColdCode); return; } -#endif // TARGET_UNIX +#endif // FEATURE_CFI_SUPPORT func->uwi.Allocate((CorJitFuncKind)func->funKind, pHotCode, pColdCode, true); diff --git a/src/coreclr/jit/utils.cpp b/src/coreclr/jit/utils.cpp index 078de7524a3a4..9f3c50279fe5a 100644 --- a/src/coreclr/jit/utils.cpp +++ b/src/coreclr/jit/utils.cpp @@ -132,8 +132,6 @@ const char* getRegName(regNumber reg) static const char* const regNames[] = { #if defined(TARGET_ARM64) #define REGDEF(name, rnum, mask, xname, wname) xname, -#elif defined(TARGET_LOONGARCH64) -#define REGDEF(name, rnum, mask, xname, wname) xname, #else #define REGDEF(name, rnum, mask, sname) sname, #endif @@ -219,7 +217,7 @@ const char* getRegNameFloat(regNumber reg, var_types type) return regName; } -#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARM64) static const char* regNamesFloat[] = { #define REGDEF(name, rnum, mask, xname, wname) xname, @@ -229,6 +227,17 @@ const char* getRegNameFloat(regNumber reg, var_types type) return regNamesFloat[reg]; +#elif defined(TARGET_LOONGARCH64) + + static const char* regNamesFloat[] = { +#define REGDEF(name, rnum, mask, sname) sname, +#include "register.h" + }; + + assert((unsigned)reg < ArrLen(regNamesFloat)); + + return regNamesFloat[reg]; + #else static const char* regNamesFloat[] = { #define REGDEF(name, rnum, mask, sname) "x" sname, @@ -320,7 +329,7 @@ void dspRegMask(regMaskTP regMask, size_t minSiz) // No register ranges #elif defined(TARGET_LOONGARCH64) - if (REG_A0 <= regNum && regNum <= REG_X0) + if (REG_A0 <= regNum && regNum <= REG_T8) { regHead = regNum; inRegRange = true; @@ -336,7 +345,7 @@ void dspRegMask(regMaskTP regMask, size_t minSiz) else if ((regNum == REG_INT_LAST) || (regNum == REG_R17) // last register before TEB || (regNum == REG_R28)) // last register before FP #elif defined(TARGET_LOONGARCH64) - else if ((regNum == REG_INT_LAST) || (regNum == REG_X0)) + else if ((regNum == REG_INT_LAST) || (regNum == REG_A7) || (regNum == REG_T8)) #else // TARGET_LOONGARCH64 // We've already printed a register. Is this the end of a range? else if (regNum == REG_INT_LAST) diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index 0132d894c388f..52842c4375c21 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -49,16 +49,14 @@ struct FloatTraits // Notes: // "Default" NaN value returned by expression 0.0f / 0.0f on x86/x64 has // different binary representation (0xffc00000) than NaN on - // ARM32/ARM64 (0x7fc00000). + // ARM32/ARM64/LoongArch64 (0x7fc00000). static float NaN() { #if defined(TARGET_XARCH) unsigned bits = 0xFFC00000u; -#elif defined(TARGET_ARMARCH) +#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) unsigned bits = 0x7FC00000u; -#elif defined(TARGET_LOONGARCH64) - unsigned bits = 0xFFC00000u; #else #error Unsupported or unset target architecture #endif @@ -77,16 +75,14 @@ struct DoubleTraits // Notes: // "Default" NaN value returned by expression 0.0 / 0.0 on x86/x64 has // different binary representation (0xfff8000000000000) than NaN on - // ARM32/ARM64 (0x7ff8000000000000). + // ARM32/ARM64/LoongArch64 (0x7ff8000000000000). static double NaN() { #if defined(TARGET_XARCH) unsigned long long bits = 0xFFF8000000000000ull; -#elif defined(TARGET_ARMARCH) +#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) unsigned long long bits = 0x7FF8000000000000ull; -#elif defined(TARGET_LOONGARCH64) - unsigned long long bits = 0xFFF8000000000000ull; #else #error Unsupported or unset target architecture #endif @@ -2850,6 +2846,9 @@ ValueNum ValueNumStore::EvalFuncForConstantArgs(var_types typ, VNFunc func, Valu else { #ifdef TARGET_LOONGARCH64 + // For LoongArch64, the int32 will signed-extend default, + // e.g. `ld_w $r4, $r5, 4` loading a int32 from the addr `$r5+4`. + // So there is no need to signed-extend. assert(typ == TYP_INT || typ == TYP_LONG); #else assert(typ == TYP_INT); diff --git a/src/coreclr/jit/valuenumfuncs.h b/src/coreclr/jit/valuenumfuncs.h index aac563b265e36..872c5e376aed5 100644 --- a/src/coreclr/jit/valuenumfuncs.h +++ b/src/coreclr/jit/valuenumfuncs.h @@ -182,7 +182,7 @@ ValueNumFuncDef(HWI_##isa##_##name, argCount, false, false, false) // All of t // No Hardware Intrinsics on ARM32 #elif defined (TARGET_LOONGARCH64) - //TODO: add LoongArch64's Hardware Instructions. + //TODO-LOONGARCH64-CQ: add LoongArch64's Hardware Intrinsics Instructions if supported. #else #error Unsupported platform From 377c2fd67acec6709d959ebdf1e99341c7e79eee Mon Sep 17 00:00:00 2001 From: Qiao Pengcheng Date: Wed, 16 Feb 2022 20:37:01 +0800 Subject: [PATCH 21/46] [LoongArch64] apply the `format.patch`. --- src/coreclr/jit/codegencommon.cpp | 2 +- src/coreclr/jit/compiler.h | 10 +++++----- src/coreclr/jit/lowerloongarch64.cpp | 2 +- src/coreclr/jit/lsrabuild.cpp | 2 +- src/coreclr/jit/regset.h | 4 ++-- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 4f5ad64dce390..a8da6bf3c7308 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -9460,7 +9460,7 @@ void CodeGen::genFnEpilog(BasicBlock* block) switch (addrInfo.accessType) { case IAT_VALUE: - //TODO-LOONGARCH64-CQ: using B/BL for optimization. + // TODO-LOONGARCH64-CQ: using B/BL for optimization. case IAT_PVALUE: // Load the address into a register, load indirect and call through a register // We have to use REG_INDIRECT_CALL_TARGET_REG since we assume the argument registers are in use diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 856705bf3ad33..09ee3a6da4ec0 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -469,15 +469,15 @@ class LclVarDsc unsigned char lvIsTemp : 1; // Short-lifetime compiler temp -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) unsigned char lvIsImplicitByRef : 1; // Set if the argument is an implicit byref. #endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) #if defined(TARGET_LOONGARCH64) - unsigned char lvIs4Field1 : 1; // Set if the 1st field is int or float within struct for LA-ABI64. - unsigned char lvIs4Field2 : 1; // Set if the 2nd field is int or float within struct for LA-ABI64. - unsigned char lvIsSplit : 1; // Set if the argument is splited. -#endif // defined(TARGET_LOONGARCH64) + unsigned char lvIs4Field1 : 1; // Set if the 1st field is int or float within struct for LA-ABI64. + unsigned char lvIs4Field2 : 1; // Set if the 2nd field is int or float within struct for LA-ABI64. + unsigned char lvIsSplit : 1; // Set if the argument is splited. +#endif // defined(TARGET_LOONGARCH64) unsigned char lvIsBoolean : 1; // set if variable is boolean unsigned char lvSingleDef : 1; // variable has a single def diff --git a/src/coreclr/jit/lowerloongarch64.cpp b/src/coreclr/jit/lowerloongarch64.cpp index 6dd44bdf9b1a6..4196b23578f61 100644 --- a/src/coreclr/jit/lowerloongarch64.cpp +++ b/src/coreclr/jit/lowerloongarch64.cpp @@ -38,7 +38,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX // bool Lowering::IsCallTargetInRange(void* addr) { - //TODO-LOONGARCH64-CQ: using B/BL for optimization. + // TODO-LOONGARCH64-CQ: using B/BL for optimization. return false; } diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 3379a54e4be08..90473a99ea003 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -2078,7 +2078,7 @@ void LinearScan::updateRegStateForArg(LclVarDsc* argDsc) else #endif // defined(UNIX_AMD64_ABI) #if defined(TARGET_LOONGARCH64) - if (varTypeIsStruct(argDsc)) + if (varTypeIsStruct(argDsc)) { LoongArch64UpdateRegStateForArg(argDsc); } diff --git a/src/coreclr/jit/regset.h b/src/coreclr/jit/regset.h index a816c0d607757..9c1a1041eecf8 100644 --- a/src/coreclr/jit/regset.h +++ b/src/coreclr/jit/regset.h @@ -123,9 +123,9 @@ class RegSet private: regMaskTP _rsMaskVars; // backing store for rsMaskVars property -#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) regMaskTP rsMaskCalleeSaved; // mask of the registers pushed/popped in the prolog/epilog -#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 public: // TODO-Cleanup: Should be private, but Compiler uses it regMaskTP rsMaskResvd; // mask of the registers that are reserved for special purposes (typically empty) From f3f9636693ad95341de049776603f27c00a5b4bb Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Thu, 17 Feb 2022 15:18:43 +0800 Subject: [PATCH 22/46] [LoongArch64] round 2 amend for reviewing by @BruceForstall. --- src/coreclr/jit/codegencommon.cpp | 453 +-------------- src/coreclr/jit/codegenloongarch64.cpp | 444 +++++++++++++- src/coreclr/jit/emitloongarch64.cpp | 60 +- src/coreclr/jit/gentree.cpp | 51 +- src/coreclr/jit/importer.cpp | 14 +- src/coreclr/jit/instr.cpp | 20 +- src/coreclr/jit/instr.h | 18 +- src/coreclr/jit/instrsloongarch64.h | 774 ++++++++++++------------- src/coreclr/jit/lsra.h | 11 +- src/coreclr/jit/lsrabuild.cpp | 64 +- src/coreclr/jit/morph.cpp | 33 +- src/coreclr/jit/optimizer.cpp | 10 + 12 files changed, 980 insertions(+), 972 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index a8da6bf3c7308..38c98960fcf92 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -140,7 +140,6 @@ CodeGen::CodeGen(Compiler* theCompiler) : CodeGenInterface(theCompiler) #endif // TARGET_ARM64 #ifdef TARGET_LOONGARCH64 - SetHasTailCalls(false); genSaveFpRaWithAllCalleeSavedRegisters = false; #endif // TARGET_LOONGARCH64 } @@ -1757,72 +1756,6 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg) } #endif // TARGET_ARMARCH -#ifdef TARGET_LOONGARCH64 -//------------------------------------------------------------------------ -// genEmitGSCookieCheck: Generate code to check that the GS cookie -// wasn't thrashed by a buffer overrun. -// -void CodeGen::genEmitGSCookieCheck(bool pushReg) -{ - noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal); - - // Make sure that the return register is reported as live GC-ref so that any GC that kicks in while - // executing GS cookie check will not collect the object pointed to by REG_INTRET (R0). - if (!pushReg && (compiler->info.compRetType == TYP_REF)) - gcInfo.gcRegGCrefSetCur |= RBM_INTRET; - - // We need two temporary registers, to load the GS cookie values and compare them. We can't use - // any argument registers if 'pushReg' is true (meaning we have a JMP call). They should be - // callee-trash registers, which should not contain anything interesting at this point. - // We don't have any IR node representing this check, so LSRA can't communicate registers - // for us to use. - - regNumber regGSConst = REG_GSCOOKIE_TMP_0; - regNumber regGSValue = REG_GSCOOKIE_TMP_1; - - if (compiler->gsGlobalSecurityCookieAddr == nullptr) - { - // load the GS cookie constant into a reg - // - genSetRegToIcon(regGSConst, compiler->gsGlobalSecurityCookieVal, TYP_I_IMPL); - } - else - { - //// Ngen case - GS cookie constant needs to be accessed through an indirection. - // instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSConst, (ssize_t)compiler->gsGlobalSecurityCookieAddr); - // GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, regGSConst, regGSConst, 0); - if (compiler->opts.compReloc) - { - GetEmitter()->emitIns_R_AI(INS_bl, EA_PTR_DSP_RELOC, regGSConst, - (ssize_t)compiler->gsGlobalSecurityCookieAddr); - } - else - { ////TODO:LoongArch64 should amend for optimize! - // GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, regGSConst, - // (ssize_t)compiler->gsGlobalSecurityCookieAddr); - // GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, regGSConst, regGSConst, ); - GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, regGSConst, - ((ssize_t)compiler->gsGlobalSecurityCookieAddr & 0xfffff000) >> 12); - GetEmitter()->emitIns_R_I(INS_lu32i_d, EA_PTRSIZE, regGSConst, - (ssize_t)compiler->gsGlobalSecurityCookieAddr >> 32); - GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, regGSConst, regGSConst, - ((ssize_t)compiler->gsGlobalSecurityCookieAddr & 0xfff) >> 2); - } - regSet.verifyRegUsed(regGSConst); - } - // Load this method's GS value from the stack frame - GetEmitter()->emitIns_R_S(INS_ld_d, EA_PTRSIZE, regGSValue, compiler->lvaGSSecurityCookie, 0); - - // Compare with the GC cookie constant - BasicBlock* gsCheckBlk = genCreateTempLabel(); - GetEmitter()->emitIns_J_cond_la(INS_beq, gsCheckBlk, regGSConst, regGSValue); - - // regGSConst and regGSValue aren't needed anymore, we can use them for helper call - genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN, regGSConst); - genDefineTempLabel(gsCheckBlk); -} -#endif // TARGET_LOONGARCH64 - /***************************************************************************** * * Generate an exit sequence for a return from a method (note: when compiling @@ -3317,7 +3250,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function #endif -#if defined(TARGET_LOONGARCH64) +#ifdef TARGET_LOONGARCH64 void CodeGen::genFnPrologCalleeRegArgs() { assert(!(intRegState.rsCalleeRegArgMaskLiveIn & floatRegState.rsCalleeRegArgMaskLiveIn)); @@ -3538,7 +3471,7 @@ void CodeGen::genFnPrologCalleeRegArgs() baseOffset = 8; base += 8; - GetEmitter()->emitIns_R_R_Imm(INS_ld_d, size /*EA_PTRSIZE*/, REG_SCRATCH, REG_SPBASE, + GetEmitter()->emitIns_R_R_Imm(INS_ld_d, size, REG_SCRATCH, REG_SPBASE, genTotalFrameSize()); if ((-2048 <= base) && (base < 2048)) { @@ -3608,7 +3541,7 @@ void CodeGen::genFnPrologCalleeRegArgs() assert(!regArgMaskLive); } -#else //! defined(TARGET_LOONGARCH64) +#else void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbered, RegState* regState) { #ifdef DEBUG @@ -4979,7 +4912,6 @@ void CodeGen::genEnregisterIncomingStackArgs() #ifdef TARGET_LOONGARCH64 { bool FPbased; - // int baseOffset = (regArgTab[argNum].slot - 1) * slotSize; int base = compiler->lvaFrameAddress(varNum, &FPbased); if ((-2048 <= base) && (base < 2048)) @@ -5953,214 +5885,6 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) noway_assert(compiler->compCalleeRegsPushed == popCount); } -#elif defined(TARGET_LOONGARCH64) -void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) -{ - assert(compiler->compGeneratingEpilog); - - regMaskTP rsRestoreRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED; - - if (isFramePointerUsed()) - { - rsRestoreRegs |= RBM_FPBASE; - } - - rsRestoreRegs |= RBM_RA; // We must save/restore the return address. - - regMaskTP regsToRestoreMask = rsRestoreRegs; - - int totalFrameSize = genTotalFrameSize(); - - int calleeSaveSPOffset = 0; // This will be the starting place for restoring - // the callee-saved registers, in decreasing order. - int frameType = 0; // An indicator of what type of frame we are popping. - int calleeSaveSPDelta = 0; // Amount to add to SP after callee-saved registers have been restored. - - if (isFramePointerUsed()) - { - if (totalFrameSize <= 2047) - { - if (compiler->compLocallocUsed) - { - int SPtoFPdelta = genSPtoFPdelta(); - // Restore sp from fp - GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -SPtoFPdelta); - compiler->unwindSetFrameReg(REG_FPBASE, SPtoFPdelta); - } - - if (!IsSaveFpRaWithAllCalleeSavedRegisters()) - { - JITDUMP("Frame type 1(save FP/RA at bottom). #outsz=%d; #framesz=%d; localloc? %s\n", - unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, - dspBool(compiler->compLocallocUsed)); - - frameType = 1; - - regsToRestoreMask &= ~(RBM_FP | RBM_RA); // We'll restore FP/RA at the end. - - calleeSaveSPOffset = compiler->compLclFrameSize + 2 * REGSIZE_BYTES; - } - else - { - frameType = 2; - - calleeSaveSPOffset = compiler->compLclFrameSize; - - JITDUMP("Frame type 2(Top). #outsz=%d; #framesz=%d; localloc? %s\n", - unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, - dspBool(compiler->compLocallocUsed)); - } - // calleeSaveSPDelta = 0; - } - else - { - if (!IsSaveFpRaWithAllCalleeSavedRegisters()) - { - JITDUMP("Frame type 3(save FP/RA at bottom). #outsz=%d; #framesz=%d; #calleeSaveRegsPushed:%d; " - "localloc? %s\n", - unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compCalleeRegsPushed, - dspBool(compiler->compLocallocUsed)); - - frameType = 3; - - int outSzAligned; - if (compiler->lvaOutgoingArgSpaceSize >= 2040) - { - int offset = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES; - calleeSaveSPDelta = AlignUp((UINT)offset, STACK_ALIGN); - calleeSaveSPOffset = calleeSaveSPDelta - offset; - - int offset2 = totalFrameSize - calleeSaveSPDelta - compiler->lvaOutgoingArgSpaceSize; - calleeSaveSPDelta = AlignUp((UINT)offset2, STACK_ALIGN); - offset2 = calleeSaveSPDelta - offset2; - - if (compiler->compLocallocUsed) - { - // Restore sp from fp - GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -offset2); - compiler->unwindSetFrameReg(REG_FPBASE, offset2); - } - else - { - outSzAligned = compiler->lvaOutgoingArgSpaceSize & ~0xf; - // if (outSzAligned > 0) - { - genStackPointerAdjustment(outSzAligned, REG_R21, nullptr, /* reportUnwindData */ true); - } - } - - regsToRestoreMask &= ~(RBM_FP | RBM_RA); // We'll restore FP/RA at the end. - - GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offset2 + 8); - compiler->unwindSaveReg(REG_RA, offset2 + 8); - - GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset2); - compiler->unwindSaveReg(REG_FP, offset2); - - genStackPointerAdjustment(calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true); - - calleeSaveSPDelta = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES; - calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDelta, STACK_ALIGN); - } - else - { - int offset2 = compiler->lvaOutgoingArgSpaceSize; - if (compiler->compLocallocUsed) - { - // Restore sp from fp - GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -offset2); - compiler->unwindSetFrameReg(REG_FPBASE, offset2); - } - - regsToRestoreMask &= ~(RBM_FP | RBM_RA); // We'll restore FP/RA at the end. - - GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offset2 + 8); - compiler->unwindSaveReg(REG_RA, offset2 + 8); - - GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset2); - compiler->unwindSaveReg(REG_FP, offset2); - - calleeSaveSPOffset = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES; - calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPOffset, STACK_ALIGN); - calleeSaveSPOffset = calleeSaveSPDelta - calleeSaveSPOffset; - - genStackPointerAdjustment(totalFrameSize - calleeSaveSPDelta, REG_R21, nullptr, - /* reportUnwindData */ true); - } - } - else - { - frameType = 4; - - JITDUMP("Frame type 4(Top). #outsz=%d; #framesz=%d; #calleeSaveRegsPushed:%d; localloc? %s\n", - unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compCalleeRegsPushed, - dspBool(compiler->compLocallocUsed)); - - calleeSaveSPOffset = totalFrameSize - compiler->compLclFrameSize; - calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPOffset, STACK_ALIGN); - calleeSaveSPOffset = calleeSaveSPDelta - calleeSaveSPOffset; - - if (compiler->compLocallocUsed) - { - calleeSaveSPDelta = calleeSaveSPOffset + REGSIZE_BYTES; - - // Restore sp from fp - GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -calleeSaveSPDelta); - compiler->unwindSetFrameReg(REG_FPBASE, calleeSaveSPDelta); - } - else - { - calleeSaveSPDelta = totalFrameSize - calleeSaveSPDelta; - genStackPointerAdjustment(calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true); - } - - calleeSaveSPDelta = totalFrameSize - compiler->compLclFrameSize; - calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDelta, STACK_ALIGN); - } - } - } - else - { - // No frame pointer (no chaining). - NYI("Frame without frame pointer"); - calleeSaveSPOffset = 0; - } - - JITDUMP(" calleeSaveSPOffset=%d, calleeSaveSPDelta=%d\n", calleeSaveSPOffset, calleeSaveSPDelta); - genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, calleeSaveSPOffset, calleeSaveSPDelta); - - if (frameType == 1) - { - calleeSaveSPOffset = compiler->lvaOutgoingArgSpaceSize; - - GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, calleeSaveSPOffset + 8); - compiler->unwindSaveReg(REG_RA, calleeSaveSPOffset + 8); - - GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, calleeSaveSPOffset); - compiler->unwindSaveReg(REG_FP, calleeSaveSPOffset); - - GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize); - compiler->unwindAllocStack(totalFrameSize); - } - else if (frameType == 2) - { - GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize); - compiler->unwindAllocStack(totalFrameSize); - } - else if (frameType == 3) - { - // genStackPointerAdjustment(calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true); - } - else if (frameType == 4) - { - // genStackPointerAdjustment(calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true); - } - else - { - unreached(); - } -} - #endif // TARGET* // We need a register with value zero. Zero the initReg, if necessary, and set *pInitRegZeroed if so. @@ -9374,176 +9098,7 @@ void CodeGen::genFnEpilog(BasicBlock* block) } #elif defined(TARGET_LOONGARCH64) - -void CodeGen::genFnEpilog(BasicBlock* block) -{ -#ifdef DEBUG - if (verbose) - printf("*************** In genFnEpilog()\n"); -#endif // DEBUG - - ScopedSetVariable _setGeneratingEpilog(&compiler->compGeneratingEpilog, true); - - VarSetOps::Assign(compiler, gcInfo.gcVarPtrSetCur, GetEmitter()->emitInitGCrefVars); - gcInfo.gcRegGCrefSetCur = GetEmitter()->emitInitGCrefRegs; - gcInfo.gcRegByrefSetCur = GetEmitter()->emitInitByrefRegs; - -#ifdef DEBUG - if (compiler->opts.dspCode) - printf("\n__epilog:\n"); - - if (verbose) - { - printf("gcVarPtrSetCur=%s ", VarSetOps::ToString(compiler, gcInfo.gcVarPtrSetCur)); - dumpConvertedVarSet(compiler, gcInfo.gcVarPtrSetCur); - printf(", gcRegGCrefSetCur="); - printRegMaskInt(gcInfo.gcRegGCrefSetCur); - GetEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur); - printf(", gcRegByrefSetCur="); - printRegMaskInt(gcInfo.gcRegByrefSetCur); - GetEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur); - printf("\n"); - } -#endif // DEBUG - - bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0); - - GenTree* lastNode = block->lastNode(); - - // Method handle and address info used in case of jump epilog - CORINFO_METHOD_HANDLE methHnd = nullptr; - CORINFO_CONST_LOOKUP addrInfo; - addrInfo.addr = nullptr; - addrInfo.accessType = IAT_VALUE; - - if (jmpEpilog && lastNode->gtOper == GT_JMP) - { - methHnd = (CORINFO_METHOD_HANDLE)lastNode->AsVal()->gtVal1; - compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo); - } - - compiler->unwindBegEpilog(); - - if (jmpEpilog) - { - SetHasTailCalls(true); - - noway_assert(block->bbJumpKind == BBJ_RETURN); - noway_assert(block->GetFirstLIRNode() != nullptr); - - /* figure out what jump we have */ - GenTree* jmpNode = lastNode; -#if !FEATURE_FASTTAILCALL - noway_assert(jmpNode->gtOper == GT_JMP); -#else // FEATURE_FASTTAILCALL - // armarch - // If jmpNode is GT_JMP then gtNext must be null. - // If jmpNode is a fast tail call, gtNext need not be null since it could have embedded stmts. - noway_assert((jmpNode->gtOper != GT_JMP) || (jmpNode->gtNext == nullptr)); - - // Could either be a "jmp method" or "fast tail call" implemented as epilog+jmp - noway_assert((jmpNode->gtOper == GT_JMP) || - ((jmpNode->gtOper == GT_CALL) && jmpNode->AsCall()->IsFastTailCall())); - - // The next block is associated with this "if" stmt - if (jmpNode->gtOper == GT_JMP) -#endif // FEATURE_FASTTAILCALL - { - // Simply emit a jump to the methodHnd. This is similar to a call so we can use - // the same descriptor with some minor adjustments. - assert(methHnd != nullptr); - assert(addrInfo.addr != nullptr); - - emitter::EmitCallType callType; - void* addr; - regNumber indCallReg; - switch (addrInfo.accessType) - { - case IAT_VALUE: - // TODO-LOONGARCH64-CQ: using B/BL for optimization. - case IAT_PVALUE: - // Load the address into a register, load indirect and call through a register - // We have to use REG_INDIRECT_CALL_TARGET_REG since we assume the argument registers are in use - callType = emitter::EC_INDIR_R; - indCallReg = REG_INDIRECT_CALL_TARGET_REG; - addr = NULL; - instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addrInfo.addr); - if (addrInfo.accessType == IAT_PVALUE) - { - GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, indCallReg, indCallReg, 0); - regSet.verifyRegUsed(indCallReg); - } - break; - - case IAT_RELPVALUE: - { - // Load the address into a register, load relative indirect and call through a register - // We have to use R12 since we assume the argument registers are in use - // LR is used as helper register right before it is restored from stack, thus, - // all relative address calculations are performed before LR is restored. - callType = emitter::EC_INDIR_R; - indCallReg = REG_T2; - addr = NULL; - - regSet.verifyRegUsed(indCallReg); - break; - } - - case IAT_PPVALUE: - default: - NO_WAY("Unsupported JMP indirection"); - } - - /* Simply emit a jump to the methodHnd. This is similar to a call so we can use - * the same descriptor with some minor adjustments. - */ - - genPopCalleeSavedRegisters(true); - - // clang-format off - GetEmitter()->emitIns_Call(callType, - methHnd, - INDEBUG_LDISASM_COMMA(nullptr) - addr, - 0, // argSize - EA_UNKNOWN // retSize - MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(EA_UNKNOWN), // secondRetSize - gcInfo.gcVarPtrSetCur, - gcInfo.gcRegGCrefSetCur, - gcInfo.gcRegByrefSetCur, - DebugInfo(), - indCallReg, // ireg - REG_NA, // xreg - 0, // xmul - 0, // disp - true); // isJump - // clang-format on - CLANG_FORMAT_COMMENT_ANCHOR; - } -#if FEATURE_FASTTAILCALL - else - { - genPopCalleeSavedRegisters(true); - // Fast tail call. - // Call target = REG_FASTTAILCALL_TARGET - // https://github.com/dotnet/coreclr/issues/4827 - // Do we need a special encoding for stack walker like rex.w prefix for x64? - - // TODO for LA: whether the relative address is enough for optimize? - GetEmitter()->emitIns_R_R_I(INS_jirl, emitTypeSize(TYP_I_IMPL), REG_R0, REG_FASTTAILCALL_TARGET, 0); - } -#endif // FEATURE_FASTTAILCALL - } - else - { - genPopCalleeSavedRegisters(false); - - GetEmitter()->emitIns_R_R_I(INS_jirl, EA_PTRSIZE, REG_R0, REG_RA, 0); - compiler->unwindReturn(REG_RA); - } - - compiler->unwindEndEpilog(); -} +// see the codegenloongarch64.cpp #else // TARGET* #error Unsupported or unset target architecture diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index 3d05d9299c273..69e3886056dba 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -1426,6 +1426,176 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() #endif // DEBUG } +void CodeGen::genFnEpilog(BasicBlock* block) +{ +#ifdef DEBUG + if (verbose) + printf("*************** In genFnEpilog()\n"); +#endif // DEBUG + + ScopedSetVariable _setGeneratingEpilog(&compiler->compGeneratingEpilog, true); + + VarSetOps::Assign(compiler, gcInfo.gcVarPtrSetCur, GetEmitter()->emitInitGCrefVars); + gcInfo.gcRegGCrefSetCur = GetEmitter()->emitInitGCrefRegs; + gcInfo.gcRegByrefSetCur = GetEmitter()->emitInitByrefRegs; + +#ifdef DEBUG + if (compiler->opts.dspCode) + printf("\n__epilog:\n"); + + if (verbose) + { + printf("gcVarPtrSetCur=%s ", VarSetOps::ToString(compiler, gcInfo.gcVarPtrSetCur)); + dumpConvertedVarSet(compiler, gcInfo.gcVarPtrSetCur); + printf(", gcRegGCrefSetCur="); + printRegMaskInt(gcInfo.gcRegGCrefSetCur); + GetEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur); + printf(", gcRegByrefSetCur="); + printRegMaskInt(gcInfo.gcRegByrefSetCur); + GetEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur); + printf("\n"); + } +#endif // DEBUG + + bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0); + + GenTree* lastNode = block->lastNode(); + + // Method handle and address info used in case of jump epilog + CORINFO_METHOD_HANDLE methHnd = nullptr; + CORINFO_CONST_LOOKUP addrInfo; + addrInfo.addr = nullptr; + addrInfo.accessType = IAT_VALUE; + + if (jmpEpilog && lastNode->gtOper == GT_JMP) + { + methHnd = (CORINFO_METHOD_HANDLE)lastNode->AsVal()->gtVal1; + compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo); + } + + compiler->unwindBegEpilog(); + + if (jmpEpilog) + { + SetHasTailCalls(true); + + noway_assert(block->bbJumpKind == BBJ_RETURN); + noway_assert(block->GetFirstLIRNode() != nullptr); + + /* figure out what jump we have */ + GenTree* jmpNode = lastNode; +#if !FEATURE_FASTTAILCALL + noway_assert(jmpNode->gtOper == GT_JMP); +#else // FEATURE_FASTTAILCALL + // armarch + // If jmpNode is GT_JMP then gtNext must be null. + // If jmpNode is a fast tail call, gtNext need not be null since it could have embedded stmts. + noway_assert((jmpNode->gtOper != GT_JMP) || (jmpNode->gtNext == nullptr)); + + // Could either be a "jmp method" or "fast tail call" implemented as epilog+jmp + noway_assert((jmpNode->gtOper == GT_JMP) || + ((jmpNode->gtOper == GT_CALL) && jmpNode->AsCall()->IsFastTailCall())); + + // The next block is associated with this "if" stmt + if (jmpNode->gtOper == GT_JMP) +#endif // FEATURE_FASTTAILCALL + { + // Simply emit a jump to the methodHnd. This is similar to a call so we can use + // the same descriptor with some minor adjustments. + assert(methHnd != nullptr); + assert(addrInfo.addr != nullptr); + + emitter::EmitCallType callType; + void* addr; + regNumber indCallReg; + switch (addrInfo.accessType) + { + case IAT_VALUE: + //TODO-LOONGARCH64-CQ: using B/BL for optimization. + case IAT_PVALUE: + // Load the address into a register, load indirect and call through a register + // We have to use REG_INDIRECT_CALL_TARGET_REG since we assume the argument registers are in use + callType = emitter::EC_INDIR_R; + indCallReg = REG_INDIRECT_CALL_TARGET_REG; + addr = NULL; + instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addrInfo.addr); + if (addrInfo.accessType == IAT_PVALUE) + { + GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, indCallReg, indCallReg, 0); + regSet.verifyRegUsed(indCallReg); + } + break; + + case IAT_RELPVALUE: + { + // Load the address into a register, load relative indirect and call through a register + // We have to use R12 since we assume the argument registers are in use + // LR is used as helper register right before it is restored from stack, thus, + // all relative address calculations are performed before LR is restored. + callType = emitter::EC_INDIR_R; + indCallReg = REG_T2; + addr = NULL; + + regSet.verifyRegUsed(indCallReg); + break; + } + + case IAT_PPVALUE: + default: + NO_WAY("Unsupported JMP indirection"); + } + + /* Simply emit a jump to the methodHnd. This is similar to a call so we can use + * the same descriptor with some minor adjustments. + */ + + genPopCalleeSavedRegisters(true); + + // clang-format off + GetEmitter()->emitIns_Call(callType, + methHnd, + INDEBUG_LDISASM_COMMA(nullptr) + addr, + 0, // argSize + EA_UNKNOWN // retSize + MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(EA_UNKNOWN), // secondRetSize + gcInfo.gcVarPtrSetCur, + gcInfo.gcRegGCrefSetCur, + gcInfo.gcRegByrefSetCur, + DebugInfo(), + indCallReg, // ireg + REG_NA, // xreg + 0, // xmul + 0, // disp + true); // isJump + // clang-format on + CLANG_FORMAT_COMMENT_ANCHOR; + } +#if FEATURE_FASTTAILCALL + else + { + genPopCalleeSavedRegisters(true); + // Fast tail call. + // Call target = REG_FASTTAILCALL_TARGET + // https://github.com/dotnet/coreclr/issues/4827 + // Do we need a special encoding for stack walker like rex.w prefix for x64? + + // TODO for LA: whether the relative address is enough for optimize? + GetEmitter()->emitIns_R_R_I(INS_jirl, emitTypeSize(TYP_I_IMPL), REG_R0, REG_FASTTAILCALL_TARGET, 0); + } +#endif // FEATURE_FASTTAILCALL + } + else + { + genPopCalleeSavedRegisters(false); + + GetEmitter()->emitIns_R_R_I(INS_jirl, EA_PTRSIZE, REG_R0, REG_RA, 0); + compiler->unwindReturn(REG_RA); + } + + compiler->unwindEndEpilog(); +} + /* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX @@ -5633,6 +5803,70 @@ void CodeGen::genSetGSSecurityCookie(regNumber initReg, bool* pInitRegZeroed) *pInitRegZeroed = false; } +//------------------------------------------------------------------------ +// genEmitGSCookieCheck: Generate code to check that the GS cookie +// wasn't thrashed by a buffer overrun. +// +void CodeGen::genEmitGSCookieCheck(bool pushReg) +{ + noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal); + + // Make sure that the return register is reported as live GC-ref so that any GC that kicks in while + // executing GS cookie check will not collect the object pointed to by REG_INTRET (A0). + if (!pushReg && (compiler->info.compRetNativeType == TYP_REF)) + gcInfo.gcRegGCrefSetCur |= RBM_INTRET; + + // We need two temporary registers, to load the GS cookie values and compare them. We can't use + // any argument registers if 'pushReg' is true (meaning we have a JMP call). They should be + // callee-trash registers, which should not contain anything interesting at this point. + // We don't have any IR node representing this check, so LSRA can't communicate registers + // for us to use. + + regNumber regGSConst = REG_GSCOOKIE_TMP_0; + regNumber regGSValue = REG_GSCOOKIE_TMP_1; + + if (compiler->gsGlobalSecurityCookieAddr == nullptr) + { + // load the GS cookie constant into a reg + // + genSetRegToIcon(regGSConst, compiler->gsGlobalSecurityCookieVal, TYP_I_IMPL); + } + else + { + //// Ngen case - GS cookie constant needs to be accessed through an indirection. + // instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSConst, (ssize_t)compiler->gsGlobalSecurityCookieAddr); + // GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, regGSConst, regGSConst, 0); + if (compiler->opts.compReloc) + { + GetEmitter()->emitIns_R_AI(INS_bl, EA_PTR_DSP_RELOC, regGSConst, + (ssize_t)compiler->gsGlobalSecurityCookieAddr); + } + else + { ////TODO:LoongArch64 should amend for optimize! + // GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, regGSConst, + // (ssize_t)compiler->gsGlobalSecurityCookieAddr); + // GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, regGSConst, regGSConst, ); + GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, regGSConst, + ((ssize_t)compiler->gsGlobalSecurityCookieAddr & 0xfffff000) >> 12); + GetEmitter()->emitIns_R_I(INS_lu32i_d, EA_PTRSIZE, regGSConst, + (ssize_t)compiler->gsGlobalSecurityCookieAddr >> 32); + GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, regGSConst, regGSConst, + ((ssize_t)compiler->gsGlobalSecurityCookieAddr & 0xfff) >> 2); + } + regSet.verifyRegUsed(regGSConst); + } + // Load this method's GS value from the stack frame + GetEmitter()->emitIns_R_S(INS_ld_d, EA_PTRSIZE, regGSValue, compiler->lvaGSSecurityCookie, 0); + + // Compare with the GC cookie constant + BasicBlock* gsCheckBlk = genCreateTempLabel(); + GetEmitter()->emitIns_J_cond_la(INS_beq, gsCheckBlk, regGSConst, regGSValue); + + // regGSConst and regGSValue aren't needed anymore, we can use them for helper call + genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN, regGSConst); + genDefineTempLabel(gsCheckBlk); +} + //--------------------------------------------------------------------- // genIntrinsic - generate code for a given intrinsic // @@ -8421,7 +8655,7 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper /*= CORINFO_HELP_PROF_FC /*----------------------------------------------------------------------------- * - * Push any callee-saved registers we have used + * Push/Pop any callee-saved registers we have used */ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed) @@ -8817,6 +9051,213 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe } } +void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) +{ + assert(compiler->compGeneratingEpilog); + + regMaskTP rsRestoreRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED; + + if (isFramePointerUsed()) + { + rsRestoreRegs |= RBM_FPBASE; + } + + rsRestoreRegs |= RBM_RA; // We must save/restore the return address. + + regMaskTP regsToRestoreMask = rsRestoreRegs; + + int totalFrameSize = genTotalFrameSize(); + + int calleeSaveSPOffset = 0; // This will be the starting place for restoring + // the callee-saved registers, in decreasing order. + int frameType = 0; // An indicator of what type of frame we are popping. + int calleeSaveSPDelta = 0; // Amount to add to SP after callee-saved registers have been restored. + + if (isFramePointerUsed()) + { + if (totalFrameSize <= 2047) + { + if (compiler->compLocallocUsed) + { + int SPtoFPdelta = genSPtoFPdelta(); + // Restore sp from fp + GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -SPtoFPdelta); + compiler->unwindSetFrameReg(REG_FPBASE, SPtoFPdelta); + } + + if (!IsSaveFpRaWithAllCalleeSavedRegisters()) + { + JITDUMP("Frame type 1(save FP/RA at bottom). #outsz=%d; #framesz=%d; localloc? %s\n", + unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, + dspBool(compiler->compLocallocUsed)); + + frameType = 1; + + regsToRestoreMask &= ~(RBM_FP | RBM_RA); // We'll restore FP/RA at the end. + + calleeSaveSPOffset = compiler->compLclFrameSize + 2 * REGSIZE_BYTES; + } + else + { + frameType = 2; + + calleeSaveSPOffset = compiler->compLclFrameSize; + + JITDUMP("Frame type 2(Top). #outsz=%d; #framesz=%d; localloc? %s\n", + unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, + dspBool(compiler->compLocallocUsed)); + } + // calleeSaveSPDelta = 0; + } + else + { + if (!IsSaveFpRaWithAllCalleeSavedRegisters()) + { + JITDUMP("Frame type 3(save FP/RA at bottom). #outsz=%d; #framesz=%d; #calleeSaveRegsPushed:%d; " + "localloc? %s\n", + unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compCalleeRegsPushed, + dspBool(compiler->compLocallocUsed)); + + frameType = 3; + + int outSzAligned; + if (compiler->lvaOutgoingArgSpaceSize >= 2040) + { + int offset = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES; + calleeSaveSPDelta = AlignUp((UINT)offset, STACK_ALIGN); + calleeSaveSPOffset = calleeSaveSPDelta - offset; + + int offset2 = totalFrameSize - calleeSaveSPDelta - compiler->lvaOutgoingArgSpaceSize; + calleeSaveSPDelta = AlignUp((UINT)offset2, STACK_ALIGN); + offset2 = calleeSaveSPDelta - offset2; + + if (compiler->compLocallocUsed) + { + // Restore sp from fp + GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -offset2); + compiler->unwindSetFrameReg(REG_FPBASE, offset2); + } + else + { + outSzAligned = compiler->lvaOutgoingArgSpaceSize & ~0xf; + // if (outSzAligned > 0) + { + genStackPointerAdjustment(outSzAligned, REG_R21, nullptr, /* reportUnwindData */ true); + } + } + + regsToRestoreMask &= ~(RBM_FP | RBM_RA); // We'll restore FP/RA at the end. + + GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offset2 + 8); + compiler->unwindSaveReg(REG_RA, offset2 + 8); + + GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset2); + compiler->unwindSaveReg(REG_FP, offset2); + + genStackPointerAdjustment(calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true); + + calleeSaveSPDelta = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES; + calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDelta, STACK_ALIGN); + } + else + { + int offset2 = compiler->lvaOutgoingArgSpaceSize; + if (compiler->compLocallocUsed) + { + // Restore sp from fp + GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -offset2); + compiler->unwindSetFrameReg(REG_FPBASE, offset2); + } + + regsToRestoreMask &= ~(RBM_FP | RBM_RA); // We'll restore FP/RA at the end. + + GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offset2 + 8); + compiler->unwindSaveReg(REG_RA, offset2 + 8); + + GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset2); + compiler->unwindSaveReg(REG_FP, offset2); + + calleeSaveSPOffset = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES; + calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPOffset, STACK_ALIGN); + calleeSaveSPOffset = calleeSaveSPDelta - calleeSaveSPOffset; + + genStackPointerAdjustment(totalFrameSize - calleeSaveSPDelta, REG_R21, nullptr, + /* reportUnwindData */ true); + } + } + else + { + frameType = 4; + + JITDUMP("Frame type 4(Top). #outsz=%d; #framesz=%d; #calleeSaveRegsPushed:%d; localloc? %s\n", + unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compCalleeRegsPushed, + dspBool(compiler->compLocallocUsed)); + + calleeSaveSPOffset = totalFrameSize - compiler->compLclFrameSize; + calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPOffset, STACK_ALIGN); + calleeSaveSPOffset = calleeSaveSPDelta - calleeSaveSPOffset; + + if (compiler->compLocallocUsed) + { + calleeSaveSPDelta = calleeSaveSPOffset + REGSIZE_BYTES; + + // Restore sp from fp + GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -calleeSaveSPDelta); + compiler->unwindSetFrameReg(REG_FPBASE, calleeSaveSPDelta); + } + else + { + calleeSaveSPDelta = totalFrameSize - calleeSaveSPDelta; + genStackPointerAdjustment(calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true); + } + + calleeSaveSPDelta = totalFrameSize - compiler->compLclFrameSize; + calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDelta, STACK_ALIGN); + } + } + } + else + { + // No frame pointer (no chaining). + NYI("Frame without frame pointer"); + calleeSaveSPOffset = 0; + } + + JITDUMP(" calleeSaveSPOffset=%d, calleeSaveSPDelta=%d\n", calleeSaveSPOffset, calleeSaveSPDelta); + genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, calleeSaveSPOffset, calleeSaveSPDelta); + + if (frameType == 1) + { + calleeSaveSPOffset = compiler->lvaOutgoingArgSpaceSize; + + GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, calleeSaveSPOffset + 8); + compiler->unwindSaveReg(REG_RA, calleeSaveSPOffset + 8); + + GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, calleeSaveSPOffset); + compiler->unwindSaveReg(REG_FP, calleeSaveSPOffset); + + GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize); + compiler->unwindAllocStack(totalFrameSize); + } + else if (frameType == 2) + { + GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize); + compiler->unwindAllocStack(totalFrameSize); + } + else if (frameType == 3) + { + // genStackPointerAdjustment(calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true); + } + else if (frameType == 4) + { + // genStackPointerAdjustment(calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true); + } + else + { + unreached(); + } +} + //----------------------------------------------------------------------------------- // genProfilingEnterCallback: Generate the profiling function enter callback. // @@ -8838,4 +9279,5 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed) return; } } + #endif // TARGET_LOONGARCH64 diff --git a/src/coreclr/jit/emitloongarch64.cpp b/src/coreclr/jit/emitloongarch64.cpp index 2bb2e27f4b5ce..2d260ffed00ac 100644 --- a/src/coreclr/jit/emitloongarch64.cpp +++ b/src/coreclr/jit/emitloongarch64.cpp @@ -636,7 +636,7 @@ static const char * const RegNames[] = // clang-format off /*static*/ const BYTE CodeGenInterface::instInfo[] = { - #define INSTS(id, nm, fp, info, fmt, e1) info, + #define INST(id, nm, fp, info, fmt, e1) info, #include "instrs.h" }; // clang-format on @@ -692,7 +692,7 @@ inline emitter::code_t emitter::emitInsCode(instruction ins /*, insFormat fmt*/) // clang-format off const static code_t insCode[] = { - #define INSTS(id, nm, fp, info, fmt, e1) e1, + #define INST(id, nm, fp, info, fmt, e1) e1, #include "instrs.h" }; // clang-format on @@ -726,31 +726,57 @@ void emitter::emitIns(instruction ins) * * Add an Load/Store instruction(s): base+offset and base-addr-computing if needed. * For referencing a stack-based local variable and a register + * + * Special notes for LoongArch64: + * The parameter `offs` has special info. + * The real value of `offs` is positive. + * If the `offs` is negtive which its real value abs(offs), + * the negtive `offs` is special for optimizing the large offset which >2047. + * when offs >2047 we can't encode one instruction to load/store the data, + * if there are several load/store at this case, you have to repeat the similar + * large offs with reduntant instructions and maybe eat up the `SC_IG_BUFFER_SIZE`. + * + * Optimize the following: + * lu12i.w x0, 0x0 + * ori x0, x0, 0x9ac + * add.d x0, x0, fp + * fst.s fa0, x0, 0 + * + * For the offs within range [0,0x7ff], using one instruction: + * ori x0, x0, offs + * For the offs within range [0x1000,0xffffffff], using two instruction + * lu12i.w x0, offs-hi-20bits + * ori x0, x0, offs-low-12bits + * + * Store/Load the data: + * fstx.s fa0, x0, fp + * + * If the store/load are repeated, + * addi_d x0,x0,sizeof(type) + * fstx.s fa0, x0, fp + * */ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs) { - // assert(offs >= 0); ssize_t imm; - emitAttr size = EA_SIZE(attr); // it's better confirm attr with ins. + emitAttr size = EA_SIZE(attr); #ifdef DEBUG switch (ins) { case INS_st_b: case INS_st_h: + case INS_st_w: case INS_fst_s: - // case INS_swl: - // case INS_swr: - // case INS_sdl: - // case INS_sdr: + case INS_st_d: case INS_fst_d: break; default: - NYI("emitIns_S_R"); // FP locals? + NYI("emitIns_S_R"); return; } // end switch (ins) @@ -806,12 +832,14 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int va appendToCurIG(id); } +/* + * Special notes for `offs`, please see the comment for `emitter::emitIns_S_R`. + */ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs) { - // assert(offs >= 0); ssize_t imm; - emitAttr size = EA_SIZE(attr); // it's better confirm attr with ins. + emitAttr size = EA_SIZE(attr); #ifdef DEBUG switch (ins) @@ -829,12 +857,6 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va case INS_ld_d: case INS_fld_d: - // case INS_lwl: - // case INS_lwr: - - // case INS_ldl: - // case INS_ldr: - // assert(isValidGeneralDatasize(size) || isValidVectorDatasize(size)); break; case INS_lea: @@ -842,7 +864,7 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va break; default: - NYI("emitIns_R_S"); // FP locals? + NYI("emitIns_R_S"); return; } // end switch (ins) @@ -896,13 +918,11 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va code = emitInsCode(ins); D_INST_2RI12(code, reg1 /* & 0x1f*/, REG_RA, imm3 ? imm2 - imm3 : imm2); } - // reg2 = REG_RA; } instrDesc* id = emitNewInstr(attr); id->idReg1(reg1); - // id->idReg2(reg2);//not used. id->idIns(ins); diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 658cb84f4cf3d..f9eb57fe549ad 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -3160,7 +3160,7 @@ bool Compiler::gtMarkAddrMode(GenTree* addr, int* pCostEx, int* pCostSz, var_typ *pCostEx += idx->GetCostEx(); *pCostSz += idx->GetCostSz(); } - // TODO: workround, should amend for LoongArch64. + // TODO-LOONGARCH64: workround, should amend for LoongArch64. if (cns != 0) { if (cns >= (4096 * genTypeSize(type))) @@ -3587,7 +3587,7 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) case GT_CNS_STR: case GT_CNS_LNG: case GT_CNS_INT: - // TODO: workround, should amend for LoongArch64. + // TODO-LOONGARCH64: workround, should amend for LoongArch64. costEx = 4; costSz = 4; goto COMMON_CNS; @@ -3653,7 +3653,7 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) costSz = 4; } #elif defined(TARGET_LOONGARCH64) - // TODO: workround, should amend for LoongArch64. + // TODO-LOONGARCH64: workround, should amend for LoongArch64. costEx = 2; costSz = 8; #else @@ -3830,7 +3830,7 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) costSz = 6; } #elif defined(TARGET_LOONGARCH64) - // TODO: workround, should amend for LoongArch64. + // TODO-LOONGARCH64: workround, should amend for LoongArch64. costEx = 1; costSz = 2; if (isflt || varTypeIsFloating(op1->TypeGet())) @@ -6025,6 +6025,10 @@ GenTree* Compiler::gtNewZeroConNode(var_types type) case TYP_INT: #ifdef TARGET_LOONGARCH64 case TYP_UINT: + // For LoongArch64, the register $r0 is always const-zero with 64bits-width. + // Besides the instructions's operation of the 64bits and 32bits using the whole + // 64bits-width register which is unlike the AMD64 and ARM64. + // So for UINT type, LoongArch64 can't share with INT liking AMD64 and ARM64. #endif zero = gtNewIconNode(0); break; @@ -13657,7 +13661,12 @@ GenTree* Compiler::gtFoldExprConst(GenTree* tree) case TYP_INT: #ifdef TARGET_LOONGARCH64 - assert(tree->TypeIs(TYP_INT) || tree->TypeIs(TYP_LONG) || varTypeIsGC(tree) || tree->OperIs(GT_MKREFANY)); + // For LoongArch64's instructions operation of the 64bits and 32bits using the whole + // 64bits-width register which is unlike the AMD64 and ARM64. + // And the INT type instruction will be signed-extend by default. + // e.g. 'ld_w $r4, $5, 4' and `addi_w $r4,$r5,-1` the result of INT + // will be signed-extend by default. + assert(tree->TypeIs(TYP_INT, TYP_LONG) || varTypeIsGC(tree) || tree->OperIs(GT_MKREFANY)); #else assert(tree->TypeIs(TYP_INT) || varTypeIsGC(tree) || tree->OperIs(GT_MKREFANY)); #endif @@ -21873,30 +21882,30 @@ void ReturnTypeDesc::InitializeStructReturnType(Compiler* comp, #elif defined(TARGET_LOONGARCH64) assert((structSize >= TARGET_POINTER_SIZE) && (structSize <= (2 * TARGET_POINTER_SIZE))); - uint32_t numFloatFields = comp->info.compCompHnd->getLoongArch64PassStructInRegisterFlags(retClsHnd); - BYTE gcPtrs[2] = {TYPE_GC_NONE, TYPE_GC_NONE}; + uint32_t floatFieldFlags = comp->info.compCompHnd->getLoongArch64PassStructInRegisterFlags(retClsHnd); + BYTE gcPtrs[2] = {TYPE_GC_NONE, TYPE_GC_NONE}; comp->info.compCompHnd->getClassGClayout(retClsHnd, &gcPtrs[0]); - if (numFloatFields & STRUCT_FLOAT_FIELD_ONLY_TWO) + if (floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_TWO) { - assert((structSize > 8) == ((numFloatFields & STRUCT_HAS_8BYTES_FIELDS_MASK) > 0)); - m_regType[0] = numFloatFields & STRUCT_FIRST_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT; - m_regType[1] = numFloatFields & STRUCT_SECOND_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT; comp->compFloatingPointUsed = true; + assert((structSize > 8) == ((floatFieldFlags & STRUCT_HAS_8BYTES_FIELDS_MASK) > 0)); + m_regType[0] = (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; + m_regType[1] = (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; } - else if (numFloatFields & STRUCT_FLOAT_FIELD_FIRST) + else if (floatFieldFlags & STRUCT_FLOAT_FIELD_FIRST) { - assert((structSize > 8) == ((numFloatFields & STRUCT_HAS_8BYTES_FIELDS_MASK) > 0)); - m_regType[0] = numFloatFields & STRUCT_FIRST_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT; - m_regType[1] = numFloatFields & STRUCT_SECOND_FIELD_SIZE_IS8 ? comp->getJitGCType(gcPtrs[1]) : TYP_INT; comp->compFloatingPointUsed = true; + assert((structSize > 8) == ((floatFieldFlags & STRUCT_HAS_8BYTES_FIELDS_MASK) > 0)); + m_regType[0] = (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; + m_regType[1] = (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? comp->getJitGCType(gcPtrs[1]) : TYP_INT; } - else if (numFloatFields & STRUCT_FLOAT_FIELD_SECOND) + else if (floatFieldFlags & STRUCT_FLOAT_FIELD_SECOND) { - assert((structSize > 8) == ((numFloatFields & STRUCT_HAS_8BYTES_FIELDS_MASK) > 0)); - m_regType[0] = numFloatFields & STRUCT_FIRST_FIELD_SIZE_IS8 ? comp->getJitGCType(gcPtrs[0]) : TYP_INT; - m_regType[1] = numFloatFields & STRUCT_SECOND_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT; comp->compFloatingPointUsed = true; + assert((structSize > 8) == ((floatFieldFlags & STRUCT_HAS_8BYTES_FIELDS_MASK) > 0)); + m_regType[0] = (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? comp->getJitGCType(gcPtrs[0]) : TYP_INT; + m_regType[1] = (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; } else { @@ -22100,13 +22109,13 @@ regNumber ReturnTypeDesc::GetABIReturnReg(unsigned idx) const var_types regType = GetReturnRegType(idx); if (idx == 0) { - resultReg = varTypeIsIntegralOrI(regType) ? REG_INTRET : REG_FLOATRET; // V0 or F0 + resultReg = varTypeIsIntegralOrI(regType) ? REG_INTRET : REG_FLOATRET; // A0 or F0 } else { noway_assert(idx < 2); // Up to 2 return registers for two-float-field structs if (varTypeIsIntegralOrI(regType)) - resultReg = varTypeIsIntegralOrI(GetReturnRegType(0)) ? REG_INTRET_1 : REG_INTRET; // V0 or V1 + resultReg = varTypeIsIntegralOrI(GetReturnRegType(0)) ? REG_INTRET_1 : REG_INTRET; // A0 or A1 else // if (!varTypeIsIntegralOrI(regType)) resultReg = varTypeIsIntegralOrI(GetReturnRegType(0)) ? REG_FLOATRET : REG_FLOATRET_1; // F0 or F1 } diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index d6e424baf0639..394d219a99dd7 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -8444,7 +8444,7 @@ bool Compiler::impTailCallRetTypeCompatible(bool allowWideni { return (varTypeIsIntegral(calleeRetType) || isCalleeRetTypMBEnreg) && (callerRetTypeSize == calleeRetTypeSize); } -#endif // TARGET_AMD64 || TARGET_ARM64 +#endif // TARGET_AMD64 || TARGET_ARM64 || TARGET_LOONGARCH64 return false; } @@ -10296,7 +10296,7 @@ GenTree* Compiler::impFixupStructReturnType(GenTree* op, return impAssignMultiRegTypeToVar(op, retClsHnd DEBUGARG(unmgdCallConv)); } -#endif // FEATURE_MULTIREG_RET && TARGET_ARM64 +#endif // FEATURE_MULTIREG_RET && (TARGET_ARM64 || TARGET_LOONGARCH64) if (!op->IsCall() || !op->AsCall()->TreatAsHasRetBufArg(this)) { @@ -11313,6 +11313,11 @@ var_types Compiler::impGetByRefResultType(genTreeOps oper, bool fUnsigned, GenTr #ifdef TARGET_LOONGARCH64 if (op1->TypeGet() == TYP_INT && op1->gtOper == GT_CNS_INT) { + // For LoongArch64's instructions operation of the 64bits and 32bits using the whole + // 64bits-width register which is unlike the AMD64 and ARM64. + // And the INT type instruction will be signed-extend by default. + // e.g. 'ld_w $r4, $5, 4' and `addi_w $r4,$r5,-1` the result of INT + // will be signed-extend by default. op1->AsIntCon()->gtIconVal = fUnsigned ? (uint32_t)op1->AsIntCon()->gtIconVal : op1->AsIntCon()->gtIconVal; op1->gtType = TYP_LONG; @@ -11329,6 +11334,11 @@ var_types Compiler::impGetByRefResultType(genTreeOps oper, bool fUnsigned, GenTr #ifdef TARGET_LOONGARCH64 if (op2->TypeGet() == TYP_INT && op2->gtOper == GT_CNS_INT) { + // For LoongArch64's instructions operation of the 64bits and 32bits using the whole + // 64bits-width register which is unlike the AMD64 and ARM64. + // And the INT type instruction will be signed-extend by default. + // e.g. 'ld_w $r4, $5, 4' and `addi_w $r4,$r5,-1` the result of INT + // will be signed-extend by default. op2->AsIntCon()->gtIconVal = fUnsigned ? (uint32_t)op2->AsIntCon()->gtIconVal : op2->AsIntCon()->gtIconVal; op2->gtType = TYP_LONG; diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index 8f27dd6c231d9..83099e7018786 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -67,7 +67,7 @@ const char* CodeGen::genInsName(instruction ins) #include "instrs.h" #elif defined(TARGET_LOONGARCH64) - #define INSTS(id, nm, fp, ldst, fmt, e1) nm, + #define INST(id, nm, fp, ldst, fmt, e1) nm, #include "instrs.h" #else @@ -542,9 +542,7 @@ void CodeGen::inst_RV_RV_RV(instruction ins, { #ifdef TARGET_ARM GetEmitter()->emitIns_R_R_R(ins, size, reg1, reg2, reg3, flags); -#elif defined(TARGET_LOONGARCH64) - GetEmitter()->emitIns_R_R_R(ins, size, reg1, reg2, reg3); -#elif defined(TARGET_XARCH) +#elif defined(TARGET_XARCH) || defined(TARGET_LOONGARCH64) GetEmitter()->emitIns_R_R_R(ins, size, reg1, reg2, reg3); #else NYI("inst_RV_RV_RV"); @@ -902,11 +900,12 @@ void CodeGen::inst_RV_TT(instruction ins, } #else // !TARGET_ARM #ifdef TARGET_LOONGARCH64 + // For LoongArch64-ABI, the float arg might be passed by integer register, + // when there is no float register left but there is integer register(s) left. if (emitter::isFloatReg(reg)) assert((ins == INS_fld_d) || (ins == INS_fld_s)); else if (emitter::isGeneralRegister(reg) && (ins != INS_lea)) - { // TODO should amend for LOONGARCH64 !!! - // assert((ins==INS_ld_d) || (ins==INS_ld_w)); + { ins = size == EA_4BYTE ? INS_ld_w : INS_ld_d; } #endif @@ -1493,9 +1492,7 @@ bool CodeGenInterface::validImmForBL(ssize_t addr) */ instruction CodeGen::ins_Move_Extend(var_types srcType, bool srcInReg) { -#ifdef TARGET_LOONGARCH64 - assert(!"unimplemented yet on LoongArch64 for unused."); -#endif + NYI_LOONGARCH64("ins_Move_Extend"); instruction ins = INS_invalid; @@ -1678,8 +1675,6 @@ instruction CodeGenInterface::ins_Load(var_types srcType, bool aligned /*=false* } #elif defined(TARGET_ARM64) return INS_ldr; -//#elif defined(TARGET_LOONGARCH64) -// //TODO: add SIMD for LoongArch64. #else assert(!"ins_Load with SIMD type"); #endif @@ -1780,7 +1775,6 @@ instruction CodeGenInterface::ins_Load(var_types srcType, bool aligned /*=false* } else { - // assert((TYP_LONG == srcType) || (TYP_ULONG == srcType)); ins = INS_ld_d; // default ld_d. } #else @@ -2008,7 +2002,7 @@ instruction CodeGenInterface::ins_Store(var_types dstType, bool aligned /*=false ins = aligned ? INS_stx_h : INS_st_h; else if ((TYP_INT == dstType) || (TYP_UINT == dstType)) ins = aligned ? INS_stx_w : INS_st_w; - else // if ((TYP_LONG == dstType) || (TYP_ULONG == dstType) || (TYP_REF == dstType)) + else ins = aligned ? INS_stx_d : INS_st_d; #else NYI("ins_Store"); diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h index 2021f0251278a..87b004eaf7be1 100644 --- a/src/coreclr/jit/instr.h +++ b/src/coreclr/jit/instr.h @@ -52,12 +52,9 @@ enum instruction : unsigned INS_lea, // Not a real instruction. It is used for load the address of stack locals #elif defined(TARGET_LOONGARCH64) - #define INSTS(id, nm, fp, ldst, fmt, e1) INS_##id, + #define INST(id, nm, fp, ldst, fmt, e1) INS_##id, #include "instrs.h" - //INS_dneg, // Not a real instruction. It will be translated to dsubu. - //INS_neg, // Not a real instruction. It will be translated to subu. - //INS_not, // Not a real instruction. It will be translated to nor. INS_lea, // Not a real instruction. It is used for load the address of stack locals #else #error Unsupported target architecture @@ -152,7 +149,7 @@ enum insFlags : uint32_t INS_FLAGS_DONT_CARE = 0x00, }; -#elif defined(TARGET_ARM) || defined(TARGET_ARM64) +#elif defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) // TODO-Cleanup: Move 'insFlags' under TARGET_ARM enum insFlags: unsigned { @@ -160,13 +157,6 @@ enum insFlags: unsigned INS_FLAGS_SET = 0x01, INS_FLAGS_DONT_CARE = 0x02, }; -#elif defined(TARGET_LOONGARCH64) -enum insFlags: unsigned -{ - INS_FLAGS_NOT_SET = 0x00, - INS_FLAGS_SET = 0x01, - INS_FLAGS_DONT_CARE = 0x02, -}; #else #error Unsupported target architecture #endif @@ -298,6 +288,7 @@ enum insBarrier : unsigned INS_BARRIER_OSHLD = 1, INS_BARRIER_OSHST = 2, INS_BARRIER_OSH = 3, + INS_BARRIER_NSHLD = 5, INS_BARRIER_NSHST = 6, INS_BARRIER_NSH = 7, @@ -321,11 +312,8 @@ enum insOpts : unsigned INS_OPTS_J, // see ::emitIns_J(). INS_OPTS_J_cond, // see ::emitIns_J_cond_la(). INS_OPTS_I, // see ::emitIns_I_la(). - //INS_OPTS_J2, // see ::emitIns_J(). INS_OPTS_C, // see ::emitIns_Call(). INS_OPTS_RELOC, // see ::emitIns_R_AI(). - //INS_OPTS_, // see ::(). - //INS_OPTS_, // see ::(). }; enum insBarrier : unsigned diff --git a/src/coreclr/jit/instrsloongarch64.h b/src/coreclr/jit/instrsloongarch64.h index 3e54bce650d88..99cf4304a6200 100644 --- a/src/coreclr/jit/instrsloongarch64.h +++ b/src/coreclr/jit/instrsloongarch64.h @@ -21,8 +21,8 @@ #error Unexpected target type #endif -#ifndef INSTS -#error INSTS must be defined before including this file. +#ifndef INST +#error INST must be defined before including this file. #endif /*****************************************************************************/ @@ -35,30 +35,30 @@ // emitInsMayWriteMultipleRegs in emitLoongarch64.cpp. // clang-format off -INSTS(invalid, "INVALID", 0, 0, IF_NONE, BAD_CODE) +INST(invalid, "INVALID", 0, 0, IF_NONE, BAD_CODE) -INSTS(nop , "nop", 0, 0, IF_LA, 0x03400000) +INST(nop , "nop", 0, 0, IF_LA, 0x03400000) ////INS_bceqz/INS_beq/INS_blt/INS_bltu must be even number. -INSTS(bceqz, "bceqz", 0, 0, IF_LA, 0x48000000) -INSTS(bcnez, "bcnez", 0, 0, IF_LA, 0x48000100) +INST(bceqz, "bceqz", 0, 0, IF_LA, 0x48000000) +INST(bcnez, "bcnez", 0, 0, IF_LA, 0x48000100) -INSTS(beq, "beq", 0, 0, IF_LA, 0x58000000) -INSTS(bne, "bne", 0, 0, IF_LA, 0x5c000000) +INST(beq, "beq", 0, 0, IF_LA, 0x58000000) +INST(bne, "bne", 0, 0, IF_LA, 0x5c000000) -INSTS(blt, "blt", 0, 0, IF_LA, 0x60000000) -INSTS(bge, "bge", 0, 0, IF_LA, 0x64000000) -INSTS(bltu, "bltu", 0, 0, IF_LA, 0x68000000) -INSTS(bgeu, "bgeu", 0, 0, IF_LA, 0x6c000000) +INST(blt, "blt", 0, 0, IF_LA, 0x60000000) +INST(bge, "bge", 0, 0, IF_LA, 0x64000000) +INST(bltu, "bltu", 0, 0, IF_LA, 0x68000000) +INST(bgeu, "bgeu", 0, 0, IF_LA, 0x6c000000) ////R_I. -INSTS(beqz, "beqz", 0, 0, IF_LA, 0x40000000) -INSTS(bnez, "bnez", 0, 0, IF_LA, 0x44000000) +INST(beqz, "beqz", 0, 0, IF_LA, 0x40000000) +INST(bnez, "bnez", 0, 0, IF_LA, 0x44000000) ////I. -INSTS(b, "b", 0, 0, IF_LA, 0x50000000) -INSTS(bl, "bl", 0, 0, IF_LA, 0x54000000) +INST(b, "b", 0, 0, IF_LA, 0x50000000) +INST(bl, "bl", 0, 0, IF_LA, 0x54000000) //////////////////////////////////////////////// ////NOTE: Begin @@ -67,217 +67,217 @@ INSTS(bl, "bl", 0, 0, IF_LA, 0x54000000) // enum name FP LD/ST FMT ENCODE ////NOTE: mov must be the first one !!! more info to see emitter::emitInsMayWriteToGCReg(). -INSTS(mov, "mov", 0, 0, IF_LA, 0x03800000) +INST(mov, "mov", 0, 0, IF_LA, 0x03800000) // mov rd,rj //NOTE: On loongarch, usually it's name is move, but here for compatible using mov. // In fact, mov is an alias commond, "ori rd,rj,0" -INSTS(dneg, "dneg", 0, 0, IF_LA, 0x00118000) +INST(dneg, "dneg", 0, 0, IF_LA, 0x00118000) //dneg is a alias instruction. //sub_d rd, zero, rk -INSTS(neg, "neg", 0, 0, IF_LA, 0x00110000) +INST(neg, "neg", 0, 0, IF_LA, 0x00110000) //neg is a alias instruction. //sub_w rd, zero, rk -INSTS(not, "not", 0, 0, IF_LA, 0x00140000) +INST(not, "not", 0, 0, IF_LA, 0x00140000) //not is a alias instruction. //nor rd, rj, zero // enum:id name FP LD/ST Formate ENCODE ////R_R_R. -INSTS(add_w, "add.w", 0, 0, IF_LA, 0x00100000) -INSTS(add_d, "add.d", 0, 0, IF_LA, 0x00108000) -INSTS(sub_w, "sub.w", 0, 0, IF_LA, 0x00110000) -INSTS(sub_d, "sub.d", 0, 0, IF_LA, 0x00118000) - -INSTS(and, "and", 0, 0, IF_LA, 0x00148000) -INSTS(or, "or", 0, 0, IF_LA, 0x00150000) -INSTS(nor, "nor", 0, 0, IF_LA, 0x00140000) -INSTS(xor, "xor", 0, 0, IF_LA, 0x00158000) -INSTS(andn, "andn", 0, 0, IF_LA, 0x00168000) -INSTS(orn, "orn", 0, 0, IF_LA, 0x00160000) - -INSTS(mul_w, "mul.w", 0, 0, IF_LA, 0x001c0000) -INSTS(mul_d, "mul.d", 0, 0, IF_LA, 0x001d8000) -INSTS(mulh_w, "mulh.w", 0, 0, IF_LA, 0x001c8000) -INSTS(mulh_wu, "mulh.wu", 0, 0, IF_LA, 0x001d0000) -INSTS(mulh_d, "mulh.d", 0, 0, IF_LA, 0x001e0000) -INSTS(mulh_du, "mulh.du", 0, 0, IF_LA, 0x001e8000) -INSTS(mulw_d_w, "mulw.d.w", 0, 0, IF_LA, 0x001f0000) -INSTS(mulw_d_wu, "mulw.d.wu", 0, 0, IF_LA, 0x001f8000) -INSTS(div_w, "div.w", 0, 0, IF_LA, 0x00200000) -INSTS(div_wu, "div.wu", 0, 0, IF_LA, 0x00210000) -INSTS(div_d, "div.d", 0, 0, IF_LA, 0x00220000) -INSTS(div_du, "div.du", 0, 0, IF_LA, 0x00230000) -INSTS(mod_w, "mod.w", 0, 0, IF_LA, 0x00208000) -INSTS(mod_wu, "mod.wu", 0, 0, IF_LA, 0x00218000) -INSTS(mod_d, "mod.d", 0, 0, IF_LA, 0x00228000) -INSTS(mod_du, "mod.du", 0, 0, IF_LA, 0x00238000) - -INSTS(sll_w, "sll.w", 0, 0, IF_LA, 0x00170000) -INSTS(srl_w, "srl.w", 0, 0, IF_LA, 0x00178000) -INSTS(sra_w, "sra.w", 0, 0, IF_LA, 0x00180000) -INSTS(rotr_w, "rotr_w", 0, 0, IF_LA, 0x001b0000) -INSTS(sll_d, "sll.d", 0, 0, IF_LA, 0x00188000) -INSTS(srl_d, "srl.d", 0, 0, IF_LA, 0x00190000) -INSTS(sra_d, "sra.d", 0, 0, IF_LA, 0x00198000) -INSTS(rotr_d, "rotr.d", 0, 0, IF_LA, 0x001b8000) - -INSTS(maskeqz, "maskeqz", 0, 0, IF_LA, 0x00130000) -INSTS(masknez, "masknez", 0, 0, IF_LA, 0x00138000) - -INSTS(slt, "slt", 0, 0, IF_LA, 0x00120000) -INSTS(sltu, "sltu", 0, 0, IF_LA, 0x00128000) - -INSTS(amswap_w, "amswap.w", 0, 0, IF_LA, 0x38600000) -INSTS(amswap_d, "amswap.d", 0, 0, IF_LA, 0x38608000) -INSTS(amswap_db_w, "amswap_db.w", 0, 0, IF_LA, 0x38690000) -INSTS(amswap_db_d, "amswap_db.d", 0, 0, IF_LA, 0x38698000) -INSTS(amadd_w, "amadd.w", 0, 0, IF_LA, 0x38610000) -INSTS(amadd_d, "amadd.d", 0, 0, IF_LA, 0x38618000) -INSTS(amadd_db_w, "amadd_db.w", 0, 0, IF_LA, 0x386a0000) -INSTS(amadd_db_d, "amadd_db.d", 0, 0, IF_LA, 0x386a8000) -INSTS(amand_w, "amand.w", 0, 0, IF_LA, 0x38620000) -INSTS(amand_d, "amand.d", 0, 0, IF_LA, 0x38628000) -INSTS(amand_db_w, "amand_db.w", 0, 0, IF_LA, 0x386b0000) -INSTS(amand_db_d, "amand_db.d", 0, 0, IF_LA, 0x386b8000) -INSTS(amor_w, "amor.w", 0, 0, IF_LA, 0x38630000) -INSTS(amor_d, "amor.d", 0, 0, IF_LA, 0x38638000) -INSTS(amor_db_w, "amor_db.w", 0, 0, IF_LA, 0x386c0000) -INSTS(amor_db_d, "amor_db.d", 0, 0, IF_LA, 0x386c8000) -INSTS(amxor_w, "amxor.w", 0, 0, IF_LA, 0x38640000) -INSTS(amxor_d, "amxor.d", 0, 0, IF_LA, 0x38648000) -INSTS(amxor_db_w, "amxor_db.w", 0, 0, IF_LA, 0x386d0000) -INSTS(amxor_db_d, "amxor_db.d", 0, 0, IF_LA, 0x386d8000) -INSTS(ammax_w, "ammax.w", 0, 0, IF_LA, 0x38650000) -INSTS(ammax_d, "ammax.d", 0, 0, IF_LA, 0x38658000) -INSTS(ammax_db_w, "ammax_db.w", 0, 0, IF_LA, 0x386e0000) -INSTS(ammax_db_d, "ammax_db.d", 0, 0, IF_LA, 0x386e8000) -INSTS(ammin_w, "ammin.w", 0, 0, IF_LA, 0x38660000) -INSTS(ammin_d, "ammin.d", 0, 0, IF_LA, 0x38668000) -INSTS(ammin_db_w, "ammin_db.w", 0, 0, IF_LA, 0x386f0000) -INSTS(ammin_db_d, "ammin_db.d", 0, 0, IF_LA, 0x386f8000) -INSTS(ammax_wu, "ammax.wu", 0, 0, IF_LA, 0x38670000) -INSTS(ammax_du, "ammax.du", 0, 0, IF_LA, 0x38678000) -INSTS(ammax_db_wu, "ammax_db.wu", 0, 0, IF_LA, 0x38700000) -INSTS(ammax_db_du, "ammax_db.du", 0, 0, IF_LA, 0x38708000) -INSTS(ammin_wu, "ammin.wu", 0, 0, IF_LA, 0x38680000) -INSTS(ammin_du, "ammin.du", 0, 0, IF_LA, 0x38688000) -INSTS(ammin_db_wu, "ammin_db.wu", 0, 0, IF_LA, 0x38710000) -INSTS(ammin_db_du, "ammin_db.du", 0, 0, IF_LA, 0x38718000) - -INSTS(crc_w_b_w, "crc.w.b.w", 0, 0, IF_LA, 0x00240000) -INSTS(crc_w_h_w, "crc.w.h.w", 0, 0, IF_LA, 0x00248000) -INSTS(crc_w_w_w, "crc.w.w.w", 0, 0, IF_LA, 0x00250000) -INSTS(crc_w_d_w, "crc.w.d.w", 0, 0, IF_LA, 0x00258000) -INSTS(crcc_w_b_w, "crcc.w.b.w", 0, 0, IF_LA, 0x00260000) -INSTS(crcc_w_h_w, "crcc.w.h.w", 0, 0, IF_LA, 0x00268000) -INSTS(crcc_w_w_w, "crcc.w.w.w", 0, 0, IF_LA, 0x00270000) -INSTS(crcc_w_d_w, "crcc.w.d.w", 0, 0, IF_LA, 0x00278000) +INST(add_w, "add.w", 0, 0, IF_LA, 0x00100000) +INST(add_d, "add.d", 0, 0, IF_LA, 0x00108000) +INST(sub_w, "sub.w", 0, 0, IF_LA, 0x00110000) +INST(sub_d, "sub.d", 0, 0, IF_LA, 0x00118000) + +INST(and, "and", 0, 0, IF_LA, 0x00148000) +INST(or, "or", 0, 0, IF_LA, 0x00150000) +INST(nor, "nor", 0, 0, IF_LA, 0x00140000) +INST(xor, "xor", 0, 0, IF_LA, 0x00158000) +INST(andn, "andn", 0, 0, IF_LA, 0x00168000) +INST(orn, "orn", 0, 0, IF_LA, 0x00160000) + +INST(mul_w, "mul.w", 0, 0, IF_LA, 0x001c0000) +INST(mul_d, "mul.d", 0, 0, IF_LA, 0x001d8000) +INST(mulh_w, "mulh.w", 0, 0, IF_LA, 0x001c8000) +INST(mulh_wu, "mulh.wu", 0, 0, IF_LA, 0x001d0000) +INST(mulh_d, "mulh.d", 0, 0, IF_LA, 0x001e0000) +INST(mulh_du, "mulh.du", 0, 0, IF_LA, 0x001e8000) +INST(mulw_d_w, "mulw.d.w", 0, 0, IF_LA, 0x001f0000) +INST(mulw_d_wu, "mulw.d.wu", 0, 0, IF_LA, 0x001f8000) +INST(div_w, "div.w", 0, 0, IF_LA, 0x00200000) +INST(div_wu, "div.wu", 0, 0, IF_LA, 0x00210000) +INST(div_d, "div.d", 0, 0, IF_LA, 0x00220000) +INST(div_du, "div.du", 0, 0, IF_LA, 0x00230000) +INST(mod_w, "mod.w", 0, 0, IF_LA, 0x00208000) +INST(mod_wu, "mod.wu", 0, 0, IF_LA, 0x00218000) +INST(mod_d, "mod.d", 0, 0, IF_LA, 0x00228000) +INST(mod_du, "mod.du", 0, 0, IF_LA, 0x00238000) + +INST(sll_w, "sll.w", 0, 0, IF_LA, 0x00170000) +INST(srl_w, "srl.w", 0, 0, IF_LA, 0x00178000) +INST(sra_w, "sra.w", 0, 0, IF_LA, 0x00180000) +INST(rotr_w, "rotr_w", 0, 0, IF_LA, 0x001b0000) +INST(sll_d, "sll.d", 0, 0, IF_LA, 0x00188000) +INST(srl_d, "srl.d", 0, 0, IF_LA, 0x00190000) +INST(sra_d, "sra.d", 0, 0, IF_LA, 0x00198000) +INST(rotr_d, "rotr.d", 0, 0, IF_LA, 0x001b8000) + +INST(maskeqz, "maskeqz", 0, 0, IF_LA, 0x00130000) +INST(masknez, "masknez", 0, 0, IF_LA, 0x00138000) + +INST(slt, "slt", 0, 0, IF_LA, 0x00120000) +INST(sltu, "sltu", 0, 0, IF_LA, 0x00128000) + +INST(amswap_w, "amswap.w", 0, 0, IF_LA, 0x38600000) +INST(amswap_d, "amswap.d", 0, 0, IF_LA, 0x38608000) +INST(amswap_db_w, "amswap_db.w", 0, 0, IF_LA, 0x38690000) +INST(amswap_db_d, "amswap_db.d", 0, 0, IF_LA, 0x38698000) +INST(amadd_w, "amadd.w", 0, 0, IF_LA, 0x38610000) +INST(amadd_d, "amadd.d", 0, 0, IF_LA, 0x38618000) +INST(amadd_db_w, "amadd_db.w", 0, 0, IF_LA, 0x386a0000) +INST(amadd_db_d, "amadd_db.d", 0, 0, IF_LA, 0x386a8000) +INST(amand_w, "amand.w", 0, 0, IF_LA, 0x38620000) +INST(amand_d, "amand.d", 0, 0, IF_LA, 0x38628000) +INST(amand_db_w, "amand_db.w", 0, 0, IF_LA, 0x386b0000) +INST(amand_db_d, "amand_db.d", 0, 0, IF_LA, 0x386b8000) +INST(amor_w, "amor.w", 0, 0, IF_LA, 0x38630000) +INST(amor_d, "amor.d", 0, 0, IF_LA, 0x38638000) +INST(amor_db_w, "amor_db.w", 0, 0, IF_LA, 0x386c0000) +INST(amor_db_d, "amor_db.d", 0, 0, IF_LA, 0x386c8000) +INST(amxor_w, "amxor.w", 0, 0, IF_LA, 0x38640000) +INST(amxor_d, "amxor.d", 0, 0, IF_LA, 0x38648000) +INST(amxor_db_w, "amxor_db.w", 0, 0, IF_LA, 0x386d0000) +INST(amxor_db_d, "amxor_db.d", 0, 0, IF_LA, 0x386d8000) +INST(ammax_w, "ammax.w", 0, 0, IF_LA, 0x38650000) +INST(ammax_d, "ammax.d", 0, 0, IF_LA, 0x38658000) +INST(ammax_db_w, "ammax_db.w", 0, 0, IF_LA, 0x386e0000) +INST(ammax_db_d, "ammax_db.d", 0, 0, IF_LA, 0x386e8000) +INST(ammin_w, "ammin.w", 0, 0, IF_LA, 0x38660000) +INST(ammin_d, "ammin.d", 0, 0, IF_LA, 0x38668000) +INST(ammin_db_w, "ammin_db.w", 0, 0, IF_LA, 0x386f0000) +INST(ammin_db_d, "ammin_db.d", 0, 0, IF_LA, 0x386f8000) +INST(ammax_wu, "ammax.wu", 0, 0, IF_LA, 0x38670000) +INST(ammax_du, "ammax.du", 0, 0, IF_LA, 0x38678000) +INST(ammax_db_wu, "ammax_db.wu", 0, 0, IF_LA, 0x38700000) +INST(ammax_db_du, "ammax_db.du", 0, 0, IF_LA, 0x38708000) +INST(ammin_wu, "ammin.wu", 0, 0, IF_LA, 0x38680000) +INST(ammin_du, "ammin.du", 0, 0, IF_LA, 0x38688000) +INST(ammin_db_wu, "ammin_db.wu", 0, 0, IF_LA, 0x38710000) +INST(ammin_db_du, "ammin_db.du", 0, 0, IF_LA, 0x38718000) + +INST(crc_w_b_w, "crc.w.b.w", 0, 0, IF_LA, 0x00240000) +INST(crc_w_h_w, "crc.w.h.w", 0, 0, IF_LA, 0x00248000) +INST(crc_w_w_w, "crc.w.w.w", 0, 0, IF_LA, 0x00250000) +INST(crc_w_d_w, "crc.w.d.w", 0, 0, IF_LA, 0x00258000) +INST(crcc_w_b_w, "crcc.w.b.w", 0, 0, IF_LA, 0x00260000) +INST(crcc_w_h_w, "crcc.w.h.w", 0, 0, IF_LA, 0x00268000) +INST(crcc_w_w_w, "crcc.w.w.w", 0, 0, IF_LA, 0x00270000) +INST(crcc_w_d_w, "crcc.w.d.w", 0, 0, IF_LA, 0x00278000) ////R_R_R_I. -INSTS(alsl_w, "alsl.w", 0, 0, IF_LA, 0x00040000) -INSTS(alsl_wu, "alsl.wu", 0, 0, IF_LA, 0x00060000) -INSTS(alsl_d, "alsl.d", 0, 0, IF_LA, 0x002c0000) +INST(alsl_w, "alsl.w", 0, 0, IF_LA, 0x00040000) +INST(alsl_wu, "alsl.wu", 0, 0, IF_LA, 0x00060000) +INST(alsl_d, "alsl.d", 0, 0, IF_LA, 0x002c0000) -INSTS(bytepick_w, "bytepick.w", 0, 0, IF_LA, 0x00080000) -INSTS(bytepick_d, "bytepick.d", 0, 0, IF_LA, 0x000c0000) +INST(bytepick_w, "bytepick.w", 0, 0, IF_LA, 0x00080000) +INST(bytepick_d, "bytepick.d", 0, 0, IF_LA, 0x000c0000) -INSTS(fsel, "fsel", 0, 0, IF_LA, 0x0d000000) +INST(fsel, "fsel", 0, 0, IF_LA, 0x0d000000) ////R_I. -INSTS(lu12i_w, "lu12i.w", 0, 0, IF_LA, 0x14000000) -INSTS(lu32i_d, "lu32i.d", 0, 0, IF_LA, 0x16000000) +INST(lu12i_w, "lu12i.w", 0, 0, IF_LA, 0x14000000) +INST(lu32i_d, "lu32i.d", 0, 0, IF_LA, 0x16000000) -INSTS(pcaddi, "pcaddi", 0, 0, IF_LA, 0x18000000) -INSTS(pcaddu12i, "pcaddu12i", 0, 0, IF_LA, 0x1c000000) -INSTS(pcalau12i, "pcalau12i", 0, 0, IF_LA, 0x1a000000) -INSTS(pcaddu18i, "pcaddu18i", 0, 0, IF_LA, 0x1e000000) +INST(pcaddi, "pcaddi", 0, 0, IF_LA, 0x18000000) +INST(pcaddu12i, "pcaddu12i", 0, 0, IF_LA, 0x1c000000) +INST(pcalau12i, "pcalau12i", 0, 0, IF_LA, 0x1a000000) +INST(pcaddu18i, "pcaddu18i", 0, 0, IF_LA, 0x1e000000) ////R_R. -INSTS(ext_w_b, "ext.w.b", 0, 0, IF_LA, 0x00005c00) -INSTS(ext_w_h, "ext.w.h", 0, 0, IF_LA, 0x00005800) -INSTS(clo_w, "clo.w", 0, 0, IF_LA, 0x00001000) -INSTS(clz_w, "clz.w", 0, 0, IF_LA, 0x00001400) -INSTS(cto_w, "cto.w", 0, 0, IF_LA, 0x00001800) -INSTS(ctz_w, "ctz.w", 0, 0, IF_LA, 0x00001c00) -INSTS(clo_d, "clo.d", 0, 0, IF_LA, 0x00002000) -INSTS(clz_d, "clz.d", 0, 0, IF_LA, 0x00002400) -INSTS(cto_d, "cto.d", 0, 0, IF_LA, 0x00002800) -INSTS(ctz_d, "ctz.d", 0, 0, IF_LA, 0x00002c00) -INSTS(revb_2h, "revb.2h", 0, 0, IF_LA, 0x00003000) -INSTS(revb_4h, "revb.4h", 0, 0, IF_LA, 0x00003400) -INSTS(revb_2w, "revb.2w", 0, 0, IF_LA, 0x00003800) -INSTS(revb_d, "revb.d", 0, 0, IF_LA, 0x00003c00) -INSTS(revh_2w, "revh.2w", 0, 0, IF_LA, 0x00004000) -INSTS(revh_d, "revh.d", 0, 0, IF_LA, 0x00004400) -INSTS(bitrev_4b, "bitrev.4b", 0, 0, IF_LA, 0x00004800) -INSTS(bitrev_8b, "bitrev.8b", 0, 0, IF_LA, 0x00004c00) -INSTS(bitrev_w, "bitrev.w", 0, 0, IF_LA, 0x00005000) -INSTS(bitrev_d, "bitrev.d", 0, 0, IF_LA, 0x00005400) -INSTS(rdtimel_w, "rdtimel.w", 0, 0, IF_LA, 0x00006000) -INSTS(rdtimeh_w, "rdtimeh.w", 0, 0, IF_LA, 0x00006400) -INSTS(rdtime_d, "rdtime.d", 0, 0, IF_LA, 0x00006800) -INSTS(cpucfg, "cpucfg", 0, 0, IF_LA, 0x00006c00) +INST(ext_w_b, "ext.w.b", 0, 0, IF_LA, 0x00005c00) +INST(ext_w_h, "ext.w.h", 0, 0, IF_LA, 0x00005800) +INST(clo_w, "clo.w", 0, 0, IF_LA, 0x00001000) +INST(clz_w, "clz.w", 0, 0, IF_LA, 0x00001400) +INST(cto_w, "cto.w", 0, 0, IF_LA, 0x00001800) +INST(ctz_w, "ctz.w", 0, 0, IF_LA, 0x00001c00) +INST(clo_d, "clo.d", 0, 0, IF_LA, 0x00002000) +INST(clz_d, "clz.d", 0, 0, IF_LA, 0x00002400) +INST(cto_d, "cto.d", 0, 0, IF_LA, 0x00002800) +INST(ctz_d, "ctz.d", 0, 0, IF_LA, 0x00002c00) +INST(revb_2h, "revb.2h", 0, 0, IF_LA, 0x00003000) +INST(revb_4h, "revb.4h", 0, 0, IF_LA, 0x00003400) +INST(revb_2w, "revb.2w", 0, 0, IF_LA, 0x00003800) +INST(revb_d, "revb.d", 0, 0, IF_LA, 0x00003c00) +INST(revh_2w, "revh.2w", 0, 0, IF_LA, 0x00004000) +INST(revh_d, "revh.d", 0, 0, IF_LA, 0x00004400) +INST(bitrev_4b, "bitrev.4b", 0, 0, IF_LA, 0x00004800) +INST(bitrev_8b, "bitrev.8b", 0, 0, IF_LA, 0x00004c00) +INST(bitrev_w, "bitrev.w", 0, 0, IF_LA, 0x00005000) +INST(bitrev_d, "bitrev.d", 0, 0, IF_LA, 0x00005400) +INST(rdtimel_w, "rdtimel.w", 0, 0, IF_LA, 0x00006000) +INST(rdtimeh_w, "rdtimeh.w", 0, 0, IF_LA, 0x00006400) +INST(rdtime_d, "rdtime.d", 0, 0, IF_LA, 0x00006800) +INST(cpucfg, "cpucfg", 0, 0, IF_LA, 0x00006c00) ////R_R_I_I. -INSTS(bstrins_w, "bstrins.w", 0, 0, IF_LA, 0x00600000) -INSTS(bstrins_d, "bstrins.d", 0, 0, IF_LA, 0x00800000) -INSTS(bstrpick_w, "bstrpick.w", 0, 0, IF_LA, 0x00608000) -INSTS(bstrpick_d, "bstrpick.d", 0, 0, IF_LA, 0x00c00000) +INST(bstrins_w, "bstrins.w", 0, 0, IF_LA, 0x00600000) +INST(bstrins_d, "bstrins.d", 0, 0, IF_LA, 0x00800000) +INST(bstrpick_w, "bstrpick.w", 0, 0, IF_LA, 0x00608000) +INST(bstrpick_d, "bstrpick.d", 0, 0, IF_LA, 0x00c00000) ////Load. -INSTS(ld_b, "ld.b", 0, LD, IF_LA, 0x28000000) -INSTS(ld_h, "ld.h", 0, LD, IF_LA, 0x28400000) -INSTS(ld_w, "ld.w", 0, LD, IF_LA, 0x28800000) -INSTS(ld_d, "ld.d", 0, LD, IF_LA, 0x28c00000) -INSTS(ld_bu, "ld.bu", 0, LD, IF_LA, 0x2a000000) -INSTS(ld_hu, "ld.hu", 0, LD, IF_LA, 0x2a400000) -INSTS(ld_wu, "ld.wu", 0, LD, IF_LA, 0x2a800000) - -INSTS(ldptr_w, "ldptr.w", 0, LD, IF_LA, 0x24000000) -INSTS(ldptr_d, "ldptr.d", 0, LD, IF_LA, 0x26000000) -INSTS(ll_w, "ll.w", 0, 0, IF_LA, 0x20000000) -INSTS(ll_d, "ll.d", 0, 0, IF_LA, 0x22000000) - -INSTS(ldx_b, "ldx.b", 0, LD, IF_LA, 0x38000000) -INSTS(ldx_h, "ldx.h", 0, LD, IF_LA, 0x38040000) -INSTS(ldx_w, "ldx.w", 0, LD, IF_LA, 0x38080000) -INSTS(ldx_d, "ldx.d", 0, LD, IF_LA, 0x380c0000) -INSTS(ldx_bu, "ldx.bu", 0, LD, IF_LA, 0x38200000) -INSTS(ldx_hu, "ldx.hu", 0, LD, IF_LA, 0x38240000) -INSTS(ldx_wu, "ldx.wu", 0, LD, IF_LA, 0x38280000) - -INSTS(ldgt_b, "ldgt.b", 0, 0, IF_LA, 0x38780000) -INSTS(ldgt_h, "ldgt.h", 0, 0, IF_LA, 0x38788000) -INSTS(ldgt_w, "ldgt.w", 0, 0, IF_LA, 0x38790000) -INSTS(ldgt_d, "ldgt.d", 0, 0, IF_LA, 0x38798000) -INSTS(ldle_b, "ldle.b", 0, 0, IF_LA, 0x387a0000) -INSTS(ldle_h, "ldle.h", 0, 0, IF_LA, 0x387a8000) -INSTS(ldle_w, "ldle.w", 0, 0, IF_LA, 0x387b0000) -INSTS(ldle_d, "ldle.d", 0, 0, IF_LA, 0x387b8000) +INST(ld_b, "ld.b", 0, LD, IF_LA, 0x28000000) +INST(ld_h, "ld.h", 0, LD, IF_LA, 0x28400000) +INST(ld_w, "ld.w", 0, LD, IF_LA, 0x28800000) +INST(ld_d, "ld.d", 0, LD, IF_LA, 0x28c00000) +INST(ld_bu, "ld.bu", 0, LD, IF_LA, 0x2a000000) +INST(ld_hu, "ld.hu", 0, LD, IF_LA, 0x2a400000) +INST(ld_wu, "ld.wu", 0, LD, IF_LA, 0x2a800000) + +INST(ldptr_w, "ldptr.w", 0, LD, IF_LA, 0x24000000) +INST(ldptr_d, "ldptr.d", 0, LD, IF_LA, 0x26000000) +INST(ll_w, "ll.w", 0, 0, IF_LA, 0x20000000) +INST(ll_d, "ll.d", 0, 0, IF_LA, 0x22000000) + +INST(ldx_b, "ldx.b", 0, LD, IF_LA, 0x38000000) +INST(ldx_h, "ldx.h", 0, LD, IF_LA, 0x38040000) +INST(ldx_w, "ldx.w", 0, LD, IF_LA, 0x38080000) +INST(ldx_d, "ldx.d", 0, LD, IF_LA, 0x380c0000) +INST(ldx_bu, "ldx.bu", 0, LD, IF_LA, 0x38200000) +INST(ldx_hu, "ldx.hu", 0, LD, IF_LA, 0x38240000) +INST(ldx_wu, "ldx.wu", 0, LD, IF_LA, 0x38280000) + +INST(ldgt_b, "ldgt.b", 0, 0, IF_LA, 0x38780000) +INST(ldgt_h, "ldgt.h", 0, 0, IF_LA, 0x38788000) +INST(ldgt_w, "ldgt.w", 0, 0, IF_LA, 0x38790000) +INST(ldgt_d, "ldgt.d", 0, 0, IF_LA, 0x38798000) +INST(ldle_b, "ldle.b", 0, 0, IF_LA, 0x387a0000) +INST(ldle_h, "ldle.h", 0, 0, IF_LA, 0x387a8000) +INST(ldle_w, "ldle.w", 0, 0, IF_LA, 0x387b0000) +INST(ldle_d, "ldle.d", 0, 0, IF_LA, 0x387b8000) ////R_R_I. -INSTS(addi_w, "addi.w", 0, 0, IF_LA, 0x02800000) -INSTS(addi_d, "addi.d", 0, 0, IF_LA, 0x02c00000) -INSTS(lu52i_d, "lu52i.d", 0, 0, IF_LA, 0x03000000) -INSTS(slti, "slti", 0, 0, IF_LA, 0x02000000) +INST(addi_w, "addi.w", 0, 0, IF_LA, 0x02800000) +INST(addi_d, "addi.d", 0, 0, IF_LA, 0x02c00000) +INST(lu52i_d, "lu52i.d", 0, 0, IF_LA, 0x03000000) +INST(slti, "slti", 0, 0, IF_LA, 0x02000000) -INSTS(sltui, "sltui", 0, 0, IF_LA, 0x02400000) -INSTS(andi, "andi", 0, 0, IF_LA, 0x03400000) -INSTS(ori, "ori", 0, 0, IF_LA, 0x03800000) -INSTS(xori, "xori", 0, 0, IF_LA, 0x03c00000) +INST(sltui, "sltui", 0, 0, IF_LA, 0x02400000) +INST(andi, "andi", 0, 0, IF_LA, 0x03400000) +INST(ori, "ori", 0, 0, IF_LA, 0x03800000) +INST(xori, "xori", 0, 0, IF_LA, 0x03c00000) -INSTS(slli_w, "slli.w", 0, 0, IF_LA, 0x00408000) -INSTS(srli_w, "srli.w", 0, 0, IF_LA, 0x00448000) -INSTS(srai_w, "srai.w", 0, 0, IF_LA, 0x00488000) -INSTS(rotri_w, "rotri.w", 0, 0, IF_LA, 0x004c8000) -INSTS(slli_d, "slli.d", 0, 0, IF_LA, 0x00410000) -INSTS(srli_d, "srli.d", 0, 0, IF_LA, 0x00450000) -INSTS(srai_d, "srai.d", 0, 0, IF_LA, 0x00490000) -INSTS(rotri_d, "rotri.d", 0, 0, IF_LA, 0x004d0000) +INST(slli_w, "slli.w", 0, 0, IF_LA, 0x00408000) +INST(srli_w, "srli.w", 0, 0, IF_LA, 0x00448000) +INST(srai_w, "srai.w", 0, 0, IF_LA, 0x00488000) +INST(rotri_w, "rotri.w", 0, 0, IF_LA, 0x004c8000) +INST(slli_d, "slli.d", 0, 0, IF_LA, 0x00410000) +INST(srli_d, "srli.d", 0, 0, IF_LA, 0x00450000) +INST(srai_d, "srai.d", 0, 0, IF_LA, 0x00490000) +INST(rotri_d, "rotri.d", 0, 0, IF_LA, 0x004d0000) -INSTS(addu16i_d, "addu16i.d", 0, 0, IF_LA, 0x10000000) +INST(addu16i_d, "addu16i.d", 0, 0, IF_LA, 0x10000000) -INSTS(jirl, "jirl", 0, 0, IF_LA, 0x4c000000) +INST(jirl, "jirl", 0, 0, IF_LA, 0x4c000000) ////NOTE: jirl must be the last one !!! more info to see emitter::emitInsMayWriteToGCReg(). //////////////////////////////////////////////// @@ -285,211 +285,211 @@ INSTS(jirl, "jirl", 0, 0, IF_LA, 0x4c000000) //// the above instructions will be used by emitter::emitInsMayWriteToGCReg(). //////////////////////////////////////////////// ////Store. -INSTS(st_b, "st.b", 0, ST, IF_LA, 0x29000000) -INSTS(st_h, "st.h", 0, ST, IF_LA, 0x29400000) -INSTS(st_w, "st.w", 0, ST, IF_LA, 0x29800000) -INSTS(st_d, "st.d", 0, ST, IF_LA, 0x29c00000) - -INSTS(stptr_w, "stptr.w", 0, ST, IF_LA, 0x25000000) -INSTS(stptr_d, "stptr.d", 0, ST, IF_LA, 0x27000000) -INSTS(sc_w, "sc.w", 0, 0, IF_LA, 0x21000000) -INSTS(sc_d, "sc.d", 0, 0, IF_LA, 0x23000000) - -INSTS(stx_b, "stx.b", 0, ST, IF_LA, 0x38100000) -INSTS(stx_h, "stx.h", 0, ST, IF_LA, 0x38140000) -INSTS(stx_w, "stx.w", 0, ST, IF_LA, 0x38180000) -INSTS(stx_d, "stx.d", 0, ST, IF_LA, 0x381c0000) -INSTS(stgt_b, "stgt.b", 0, 0, IF_LA, 0x387c0000) -INSTS(stgt_h, "stgt.h", 0, 0, IF_LA, 0x387c8000) -INSTS(stgt_w, "stgt.w", 0, 0, IF_LA, 0x387d0000) -INSTS(stgt_d, "stgt.d", 0, 0, IF_LA, 0x387d8000) -INSTS(stle_b, "stle.b", 0, 0, IF_LA, 0x387e0000) -INSTS(stle_h, "stle.h", 0, 0, IF_LA, 0x387e8000) -INSTS(stle_w, "stle.w", 0, 0, IF_LA, 0x387f0000) -INSTS(stle_d, "stle.d", 0, 0, IF_LA, 0x387f8000) - -INSTS(dbar, "dbar", 0, 0, IF_LA, 0x38720000) -INSTS(ibar, "ibar", 0, 0, IF_LA, 0x38728000) - -INSTS(syscall, "syscall", 0, 0, IF_LA, 0x002b0000) -INSTS(break, "break", 0, 0, IF_LA, 0x002a0005) - -INSTS(asrtle_d, "asrtle.d", 0, 0, IF_LA, 0x00010000) -INSTS(asrtgt_d, "asrtgt.d", 0, 0, IF_LA, 0x00018000) - -INSTS(preld, "preld", 0, LD, IF_LA, 0x2ac00000) -INSTS(preldx, "preldx", 0, LD, IF_LA, 0x382c0000) +INST(st_b, "st.b", 0, ST, IF_LA, 0x29000000) +INST(st_h, "st.h", 0, ST, IF_LA, 0x29400000) +INST(st_w, "st.w", 0, ST, IF_LA, 0x29800000) +INST(st_d, "st.d", 0, ST, IF_LA, 0x29c00000) + +INST(stptr_w, "stptr.w", 0, ST, IF_LA, 0x25000000) +INST(stptr_d, "stptr.d", 0, ST, IF_LA, 0x27000000) +INST(sc_w, "sc.w", 0, 0, IF_LA, 0x21000000) +INST(sc_d, "sc.d", 0, 0, IF_LA, 0x23000000) + +INST(stx_b, "stx.b", 0, ST, IF_LA, 0x38100000) +INST(stx_h, "stx.h", 0, ST, IF_LA, 0x38140000) +INST(stx_w, "stx.w", 0, ST, IF_LA, 0x38180000) +INST(stx_d, "stx.d", 0, ST, IF_LA, 0x381c0000) +INST(stgt_b, "stgt.b", 0, 0, IF_LA, 0x387c0000) +INST(stgt_h, "stgt.h", 0, 0, IF_LA, 0x387c8000) +INST(stgt_w, "stgt.w", 0, 0, IF_LA, 0x387d0000) +INST(stgt_d, "stgt.d", 0, 0, IF_LA, 0x387d8000) +INST(stle_b, "stle.b", 0, 0, IF_LA, 0x387e0000) +INST(stle_h, "stle.h", 0, 0, IF_LA, 0x387e8000) +INST(stle_w, "stle.w", 0, 0, IF_LA, 0x387f0000) +INST(stle_d, "stle.d", 0, 0, IF_LA, 0x387f8000) + +INST(dbar, "dbar", 0, 0, IF_LA, 0x38720000) +INST(ibar, "ibar", 0, 0, IF_LA, 0x38728000) + +INST(syscall, "syscall", 0, 0, IF_LA, 0x002b0000) +INST(break, "break", 0, 0, IF_LA, 0x002a0005) + +INST(asrtle_d, "asrtle.d", 0, 0, IF_LA, 0x00010000) +INST(asrtgt_d, "asrtgt.d", 0, 0, IF_LA, 0x00018000) + +INST(preld, "preld", 0, LD, IF_LA, 0x2ac00000) +INST(preldx, "preldx", 0, LD, IF_LA, 0x382c0000) ////Float instructions. ////R_R_R. -INSTS(fadd_s, "fadd.s", 0, 0, IF_LA, 0x01008000) -INSTS(fadd_d, "fadd.d", 0, 0, IF_LA, 0x01010000) -INSTS(fsub_s, "fsub.s", 0, 0, IF_LA, 0x01028000) -INSTS(fsub_d, "fsub.d", 0, 0, IF_LA, 0x01030000) -INSTS(fmul_s, "fmul.s", 0, 0, IF_LA, 0x01048000) -INSTS(fmul_d, "fmul.d", 0, 0, IF_LA, 0x01050000) -INSTS(fdiv_s, "fdiv.s", 0, 0, IF_LA, 0x01068000) -INSTS(fdiv_d, "fdiv.d", 0, 0, IF_LA, 0x01070000) - -INSTS(fmax_s, "fmax.s", 0, 0, IF_LA, 0x01088000) -INSTS(fmax_d, "fmax.d", 0, 0, IF_LA, 0x01090000) -INSTS(fmin_s, "fmin.s", 0, 0, IF_LA, 0x010a8000) -INSTS(fmin_d, "fmin.d", 0, 0, IF_LA, 0x010b0000) -INSTS(fmaxa_s, "fmaxa.s", 0, 0, IF_LA, 0x010c8000) -INSTS(fmaxa_d, "fmaxa.d", 0, 0, IF_LA, 0x010d0000) -INSTS(fmina_s, "fmina.s", 0, 0, IF_LA, 0x010e8000) -INSTS(fmina_d, "fmina.d", 0, 0, IF_LA, 0x010f0000) - -INSTS(fscaleb_s, "fscaleb.s", 0, 0, IF_LA, 0x01108000) -INSTS(fscaleb_d, "fscaleb.d", 0, 0, IF_LA, 0x01110000) - -INSTS(fcopysign_s, "fcopysign.s", 0, 0, IF_LA, 0x01128000) -INSTS(fcopysign_d, "fcopysign.d", 0, 0, IF_LA, 0x01130000) - -INSTS(fldx_s, "fldx.s", 0, LD, IF_LA, 0x38300000) -INSTS(fldx_d, "fldx.d", 0, LD, IF_LA, 0x38340000) -INSTS(fstx_s, "fstx.s", 0, ST, IF_LA, 0x38380000) -INSTS(fstx_d, "fstx.d", 0, ST, IF_LA, 0x383c0000) - -INSTS(fldgt_s, "fldgt.s", 0, 0, IF_LA, 0x38740000) -INSTS(fldgt_d, "fldgt.d", 0, 0, IF_LA, 0x38748000) -INSTS(fldle_s, "fldle.s", 0, 0, IF_LA, 0x38750000) -INSTS(fldle_d, "fldle.d", 0, 0, IF_LA, 0x38758000) -INSTS(fstgt_s, "fstgt.s", 0, 0, IF_LA, 0x38760000) -INSTS(fstgt_d, "fstgt.d", 0, 0, IF_LA, 0x38768000) -INSTS(fstle_s, "fstle.s", 0, 0, IF_LA, 0x38770000) -INSTS(fstle_d, "fstle.d", 0, 0, IF_LA, 0x38778000) +INST(fadd_s, "fadd.s", 0, 0, IF_LA, 0x01008000) +INST(fadd_d, "fadd.d", 0, 0, IF_LA, 0x01010000) +INST(fsub_s, "fsub.s", 0, 0, IF_LA, 0x01028000) +INST(fsub_d, "fsub.d", 0, 0, IF_LA, 0x01030000) +INST(fmul_s, "fmul.s", 0, 0, IF_LA, 0x01048000) +INST(fmul_d, "fmul.d", 0, 0, IF_LA, 0x01050000) +INST(fdiv_s, "fdiv.s", 0, 0, IF_LA, 0x01068000) +INST(fdiv_d, "fdiv.d", 0, 0, IF_LA, 0x01070000) + +INST(fmax_s, "fmax.s", 0, 0, IF_LA, 0x01088000) +INST(fmax_d, "fmax.d", 0, 0, IF_LA, 0x01090000) +INST(fmin_s, "fmin.s", 0, 0, IF_LA, 0x010a8000) +INST(fmin_d, "fmin.d", 0, 0, IF_LA, 0x010b0000) +INST(fmaxa_s, "fmaxa.s", 0, 0, IF_LA, 0x010c8000) +INST(fmaxa_d, "fmaxa.d", 0, 0, IF_LA, 0x010d0000) +INST(fmina_s, "fmina.s", 0, 0, IF_LA, 0x010e8000) +INST(fmina_d, "fmina.d", 0, 0, IF_LA, 0x010f0000) + +INST(fscaleb_s, "fscaleb.s", 0, 0, IF_LA, 0x01108000) +INST(fscaleb_d, "fscaleb.d", 0, 0, IF_LA, 0x01110000) + +INST(fcopysign_s, "fcopysign.s", 0, 0, IF_LA, 0x01128000) +INST(fcopysign_d, "fcopysign.d", 0, 0, IF_LA, 0x01130000) + +INST(fldx_s, "fldx.s", 0, LD, IF_LA, 0x38300000) +INST(fldx_d, "fldx.d", 0, LD, IF_LA, 0x38340000) +INST(fstx_s, "fstx.s", 0, ST, IF_LA, 0x38380000) +INST(fstx_d, "fstx.d", 0, ST, IF_LA, 0x383c0000) + +INST(fldgt_s, "fldgt.s", 0, 0, IF_LA, 0x38740000) +INST(fldgt_d, "fldgt.d", 0, 0, IF_LA, 0x38748000) +INST(fldle_s, "fldle.s", 0, 0, IF_LA, 0x38750000) +INST(fldle_d, "fldle.d", 0, 0, IF_LA, 0x38758000) +INST(fstgt_s, "fstgt.s", 0, 0, IF_LA, 0x38760000) +INST(fstgt_d, "fstgt.d", 0, 0, IF_LA, 0x38768000) +INST(fstle_s, "fstle.s", 0, 0, IF_LA, 0x38770000) +INST(fstle_d, "fstle.d", 0, 0, IF_LA, 0x38778000) ////R_R_R_R. -INSTS(fmadd_s, "fmadd.s", 0, 0, IF_LA, 0x08100000) -INSTS(fmadd_d, "fmadd.d", 0, 0, IF_LA, 0x08200000) -INSTS(fmsub_s, "fmsub.s", 0, 0, IF_LA, 0x08500000) -INSTS(fmsub_d, "fmsub.d", 0, 0, IF_LA, 0x08600000) -INSTS(fnmadd_s, "fnmadd.s", 0, 0, IF_LA, 0x08900000) -INSTS(fnmadd_d, "fnmadd.d", 0, 0, IF_LA, 0x08a00000) -INSTS(fnmsub_s, "fnmsub.s", 0, 0, IF_LA, 0x08d00000) -INSTS(fnmsub_d, "fnmsub.d", 0, 0, IF_LA, 0x08e00000) +INST(fmadd_s, "fmadd.s", 0, 0, IF_LA, 0x08100000) +INST(fmadd_d, "fmadd.d", 0, 0, IF_LA, 0x08200000) +INST(fmsub_s, "fmsub.s", 0, 0, IF_LA, 0x08500000) +INST(fmsub_d, "fmsub.d", 0, 0, IF_LA, 0x08600000) +INST(fnmadd_s, "fnmadd.s", 0, 0, IF_LA, 0x08900000) +INST(fnmadd_d, "fnmadd.d", 0, 0, IF_LA, 0x08a00000) +INST(fnmsub_s, "fnmsub.s", 0, 0, IF_LA, 0x08d00000) +INST(fnmsub_d, "fnmsub.d", 0, 0, IF_LA, 0x08e00000) ////R_R. -INSTS(fabs_s, "fabs.s", 0, 0, IF_LA, 0x01140400) -INSTS(fabs_d, "fabs.d", 0, 0, IF_LA, 0x01140800) -INSTS(fneg_s, "fneg.s", 0, 0, IF_LA, 0x01141400) -INSTS(fneg_d, "fneg.d", 0, 0, IF_LA, 0x01141800) - -INSTS(fsqrt_s, "fsqrt.s", 0, 0, IF_LA, 0x01144400) -INSTS(fsqrt_d, "fsqrt.d", 0, 0, IF_LA, 0x01144800) -INSTS(frsqrt_s, "frsqrt.s", 0, 0, IF_LA, 0x01146400) -INSTS(frsqrt_d, "frsqrt.d", 0, 0, IF_LA, 0x01146800) -INSTS(frecip_s, "frecip.s", 0, 0, IF_LA, 0x01145400) -INSTS(frecip_d, "frecip.d", 0, 0, IF_LA, 0x01145800) -INSTS(flogb_s, "flogb.s", 0, 0, IF_LA, 0x01142400) -INSTS(flogb_d, "flogb.d", 0, 0, IF_LA, 0x01142800) -INSTS(fclass_s, "fclass.s", 0, 0, IF_LA, 0x01143400) -INSTS(fclass_d, "fclass.d", 0, 0, IF_LA, 0x01143800) - -INSTS(fcvt_s_d, "fcvt.s.d", 0, 0, IF_LA, 0x01191800) -INSTS(fcvt_d_s, "fcvt.d.s", 0, 0, IF_LA, 0x01192400) -INSTS(ffint_s_w, "ffint.s.w", 0, 0, IF_LA, 0x011d1000) -INSTS(ffint_s_l, "ffint.s.l", 0, 0, IF_LA, 0x011d1800) -INSTS(ffint_d_w, "ffint.d.w", 0, 0, IF_LA, 0x011d2000) -INSTS(ffint_d_l, "ffint.d.l", 0, 0, IF_LA, 0x011d2800) -INSTS(ftint_w_s, "ftint.w.s", 0, 0, IF_LA, 0x011b0400) -INSTS(ftint_w_d, "ftint.w.d", 0, 0, IF_LA, 0x011b0800) -INSTS(ftint_l_s, "ftint.l.s", 0, 0, IF_LA, 0x011b2400) -INSTS(ftint_l_d, "ftint.l.d", 0, 0, IF_LA, 0x011b2800) -INSTS(ftintrm_w_s, "ftintrm.w.s", 0, 0, IF_LA, 0x011a0400) -INSTS(ftintrm_w_d, "ftintrm.w.d", 0, 0, IF_LA, 0x011a0800) -INSTS(ftintrm_l_s, "ftintrm.l.s", 0, 0, IF_LA, 0x011a2400) -INSTS(ftintrm_l_d, "ftintrm.l.d", 0, 0, IF_LA, 0x011a2800) -INSTS(ftintrp_w_s, "ftintrp.w.s", 0, 0, IF_LA, 0x011a4400) -INSTS(ftintrp_w_d, "ftintrp.w.d", 0, 0, IF_LA, 0x011a4800) -INSTS(ftintrp_l_s, "ftintrp.l.s", 0, 0, IF_LA, 0x011a6400) -INSTS(ftintrp_l_d, "ftintrp.l.d", 0, 0, IF_LA, 0x011a6800) -INSTS(ftintrz_w_s, "ftintrz.w.s", 0, 0, IF_LA, 0x011a8400) -INSTS(ftintrz_w_d, "ftintrz.w.d", 0, 0, IF_LA, 0x011a8800) -INSTS(ftintrz_l_s, "ftintrz.l.s", 0, 0, IF_LA, 0x011aa400) -INSTS(ftintrz_l_d, "ftintrz.l.d", 0, 0, IF_LA, 0x011aa800) -INSTS(ftintrne_w_s, "ftintrne.w.s", 0, 0, IF_LA, 0x011ac400) -INSTS(ftintrne_w_d, "ftintrne.w.d", 0, 0, IF_LA, 0x011ac800) -INSTS(ftintrne_l_s, "ftintrne.l.s", 0, 0, IF_LA, 0x011ae400) -INSTS(ftintrne_l_d, "ftintrne.l.d", 0, 0, IF_LA, 0x011ae800) -INSTS(frint_s, "frint.s", 0, 0, IF_LA, 0x011e4400) -INSTS(frint_d, "frint.d", 0, 0, IF_LA, 0x011e4800) - -INSTS(fmov_s, "fmov.s", 0, 0, IF_LA, 0x01149400) -INSTS(fmov_d, "fmov.d", 0, 0, IF_LA, 0x01149800) - -INSTS(movgr2fr_w, "movgr2fr.w", 0, 0, IF_LA, 0x0114a400) -INSTS(movgr2fr_d, "movgr2fr.d", 0, 0, IF_LA, 0x0114a800) -INSTS(movgr2frh_w, "movgr2frh.w", 0, 0, IF_LA, 0x0114ac00) -INSTS(movfr2gr_s, "movfr2gr.s", 0, 0, IF_LA, 0x0114b400) -INSTS(movfr2gr_d, "movfr2gr.d", 0, 0, IF_LA, 0x0114b800) -INSTS(movfrh2gr_s, "movfrh2gr.s", 0, 0, IF_LA, 0x0114bc00) +INST(fabs_s, "fabs.s", 0, 0, IF_LA, 0x01140400) +INST(fabs_d, "fabs.d", 0, 0, IF_LA, 0x01140800) +INST(fneg_s, "fneg.s", 0, 0, IF_LA, 0x01141400) +INST(fneg_d, "fneg.d", 0, 0, IF_LA, 0x01141800) + +INST(fsqrt_s, "fsqrt.s", 0, 0, IF_LA, 0x01144400) +INST(fsqrt_d, "fsqrt.d", 0, 0, IF_LA, 0x01144800) +INST(frsqrt_s, "frsqrt.s", 0, 0, IF_LA, 0x01146400) +INST(frsqrt_d, "frsqrt.d", 0, 0, IF_LA, 0x01146800) +INST(frecip_s, "frecip.s", 0, 0, IF_LA, 0x01145400) +INST(frecip_d, "frecip.d", 0, 0, IF_LA, 0x01145800) +INST(flogb_s, "flogb.s", 0, 0, IF_LA, 0x01142400) +INST(flogb_d, "flogb.d", 0, 0, IF_LA, 0x01142800) +INST(fclass_s, "fclass.s", 0, 0, IF_LA, 0x01143400) +INST(fclass_d, "fclass.d", 0, 0, IF_LA, 0x01143800) + +INST(fcvt_s_d, "fcvt.s.d", 0, 0, IF_LA, 0x01191800) +INST(fcvt_d_s, "fcvt.d.s", 0, 0, IF_LA, 0x01192400) +INST(ffint_s_w, "ffint.s.w", 0, 0, IF_LA, 0x011d1000) +INST(ffint_s_l, "ffint.s.l", 0, 0, IF_LA, 0x011d1800) +INST(ffint_d_w, "ffint.d.w", 0, 0, IF_LA, 0x011d2000) +INST(ffint_d_l, "ffint.d.l", 0, 0, IF_LA, 0x011d2800) +INST(ftint_w_s, "ftint.w.s", 0, 0, IF_LA, 0x011b0400) +INST(ftint_w_d, "ftint.w.d", 0, 0, IF_LA, 0x011b0800) +INST(ftint_l_s, "ftint.l.s", 0, 0, IF_LA, 0x011b2400) +INST(ftint_l_d, "ftint.l.d", 0, 0, IF_LA, 0x011b2800) +INST(ftintrm_w_s, "ftintrm.w.s", 0, 0, IF_LA, 0x011a0400) +INST(ftintrm_w_d, "ftintrm.w.d", 0, 0, IF_LA, 0x011a0800) +INST(ftintrm_l_s, "ftintrm.l.s", 0, 0, IF_LA, 0x011a2400) +INST(ftintrm_l_d, "ftintrm.l.d", 0, 0, IF_LA, 0x011a2800) +INST(ftintrp_w_s, "ftintrp.w.s", 0, 0, IF_LA, 0x011a4400) +INST(ftintrp_w_d, "ftintrp.w.d", 0, 0, IF_LA, 0x011a4800) +INST(ftintrp_l_s, "ftintrp.l.s", 0, 0, IF_LA, 0x011a6400) +INST(ftintrp_l_d, "ftintrp.l.d", 0, 0, IF_LA, 0x011a6800) +INST(ftintrz_w_s, "ftintrz.w.s", 0, 0, IF_LA, 0x011a8400) +INST(ftintrz_w_d, "ftintrz.w.d", 0, 0, IF_LA, 0x011a8800) +INST(ftintrz_l_s, "ftintrz.l.s", 0, 0, IF_LA, 0x011aa400) +INST(ftintrz_l_d, "ftintrz.l.d", 0, 0, IF_LA, 0x011aa800) +INST(ftintrne_w_s, "ftintrne.w.s", 0, 0, IF_LA, 0x011ac400) +INST(ftintrne_w_d, "ftintrne.w.d", 0, 0, IF_LA, 0x011ac800) +INST(ftintrne_l_s, "ftintrne.l.s", 0, 0, IF_LA, 0x011ae400) +INST(ftintrne_l_d, "ftintrne.l.d", 0, 0, IF_LA, 0x011ae800) +INST(frint_s, "frint.s", 0, 0, IF_LA, 0x011e4400) +INST(frint_d, "frint.d", 0, 0, IF_LA, 0x011e4800) + +INST(fmov_s, "fmov.s", 0, 0, IF_LA, 0x01149400) +INST(fmov_d, "fmov.d", 0, 0, IF_LA, 0x01149800) + +INST(movgr2fr_w, "movgr2fr.w", 0, 0, IF_LA, 0x0114a400) +INST(movgr2fr_d, "movgr2fr.d", 0, 0, IF_LA, 0x0114a800) +INST(movgr2frh_w, "movgr2frh.w", 0, 0, IF_LA, 0x0114ac00) +INST(movfr2gr_s, "movfr2gr.s", 0, 0, IF_LA, 0x0114b400) +INST(movfr2gr_d, "movfr2gr.d", 0, 0, IF_LA, 0x0114b800) +INST(movfrh2gr_s, "movfrh2gr.s", 0, 0, IF_LA, 0x0114bc00) //// -INSTS(movgr2fcsr, "movgr2fcsr", 0, 0, IF_LA, 0x0114c000) -INSTS(movfcsr2gr, "movfcsr2gr", 0, 0, IF_LA, 0x0114c800) -INSTS(movfr2cf, "movfr2cf", 0, 0, IF_LA, 0x0114d000) -INSTS(movcf2fr, "movcf2fr", 0, 0, IF_LA, 0x0114d400) -INSTS(movgr2cf, "movgr2cf", 0, 0, IF_LA, 0x0114d800) -INSTS(movcf2gr, "movcf2gr", 0, 0, IF_LA, 0x0114dc00) +INST(movgr2fcsr, "movgr2fcsr", 0, 0, IF_LA, 0x0114c000) +INST(movfcsr2gr, "movfcsr2gr", 0, 0, IF_LA, 0x0114c800) +INST(movfr2cf, "movfr2cf", 0, 0, IF_LA, 0x0114d000) +INST(movcf2fr, "movcf2fr", 0, 0, IF_LA, 0x0114d400) +INST(movgr2cf, "movgr2cf", 0, 0, IF_LA, 0x0114d800) +INST(movcf2gr, "movcf2gr", 0, 0, IF_LA, 0x0114dc00) ////R_R_I. -INSTS(fcmp_caf_s, "fcmp.caf.s", 0, 0, IF_LA, 0x0c100000) -INSTS(fcmp_cun_s, "fcmp.cun.s", 0, 0, IF_LA, 0x0c140000) -INSTS(fcmp_ceq_s, "fcmp.ceq.s", 0, 0, IF_LA, 0x0c120000) -INSTS(fcmp_cueq_s, "fcmp.cueq.s", 0, 0, IF_LA, 0x0c160000) -INSTS(fcmp_clt_s, "fcmp.clt.s", 0, 0, IF_LA, 0x0c110000) -INSTS(fcmp_cult_s, "fcmp.cult.s", 0, 0, IF_LA, 0x0c150000) -INSTS(fcmp_cle_s, "fcmp.cle.s", 0, 0, IF_LA, 0x0c130000) -INSTS(fcmp_cule_s, "fcmp.cule.s", 0, 0, IF_LA, 0x0c170000) -INSTS(fcmp_cne_s, "fcmp.cne.s", 0, 0, IF_LA, 0x0c180000) -INSTS(fcmp_cor_s, "fcmp.cor.s", 0, 0, IF_LA, 0x0c1a0000) -INSTS(fcmp_cune_s, "fcmp.cune.s", 0, 0, IF_LA, 0x0c1c0000) - -INSTS(fcmp_saf_d, "fcmp.saf.d", 0, 0, IF_LA, 0x0c208000) -INSTS(fcmp_sun_d, "fcmp.sun.d", 0, 0, IF_LA, 0x0c248000) -INSTS(fcmp_seq_d, "fcmp.seq.d", 0, 0, IF_LA, 0x0c228000) -INSTS(fcmp_sueq_d, "fcmp.sueq.d", 0, 0, IF_LA, 0x0c268000) -INSTS(fcmp_slt_d, "fcmp.slt.d", 0, 0, IF_LA, 0x0c218000) -INSTS(fcmp_sult_d, "fcmp.sult.d", 0, 0, IF_LA, 0x0c258000) -INSTS(fcmp_sle_d, "fcmp.sle.d", 0, 0, IF_LA, 0x0c238000) -INSTS(fcmp_sule_d, "fcmp.sule.d", 0, 0, IF_LA, 0x0c278000) -INSTS(fcmp_sne_d, "fcmp.sne.d", 0, 0, IF_LA, 0x0c288000) -INSTS(fcmp_sor_d, "fcmp.sor.d", 0, 0, IF_LA, 0x0c2a8000) -INSTS(fcmp_sune_d, "fcmp.sune.d", 0, 0, IF_LA, 0x0c2c8000) - -INSTS(fcmp_caf_d, "fcmp.caf.d", 0, 0, IF_LA, 0x0c200000) -INSTS(fcmp_cun_d, "fcmp.cun.d", 0, 0, IF_LA, 0x0c240000) -INSTS(fcmp_ceq_d, "fcmp.ceq.d", 0, 0, IF_LA, 0x0c220000) -INSTS(fcmp_cueq_d, "fcmp.cueq.d", 0, 0, IF_LA, 0x0c260000) -INSTS(fcmp_clt_d, "fcmp.clt.d", 0, 0, IF_LA, 0x0c210000) -INSTS(fcmp_cult_d, "fcmp.cult.d", 0, 0, IF_LA, 0x0c250000) -INSTS(fcmp_cle_d, "fcmp.cle.d", 0, 0, IF_LA, 0x0c230000) -INSTS(fcmp_cule_d, "fcmp.cule.d", 0, 0, IF_LA, 0x0c270000) -INSTS(fcmp_cne_d, "fcmp.cne.d", 0, 0, IF_LA, 0x0c280000) -INSTS(fcmp_cor_d, "fcmp.cor.d", 0, 0, IF_LA, 0x0c2a0000) -INSTS(fcmp_cune_d, "fcmp.cune.d", 0, 0, IF_LA, 0x0c2c0000) - -INSTS(fcmp_saf_s, "fcmp.saf.s", 0, 0, IF_LA, 0x0c108000) -INSTS(fcmp_sun_s, "fcmp.sun.s", 0, 0, IF_LA, 0x0c148000) -INSTS(fcmp_seq_s, "fcmp.seq.s", 0, 0, IF_LA, 0x0c128000) -INSTS(fcmp_sueq_s, "fcmp.sueq.s", 0, 0, IF_LA, 0x0c168000) -INSTS(fcmp_slt_s, "fcmp.slt.s", 0, 0, IF_LA, 0x0c118000) -INSTS(fcmp_sult_s, "fcmp.sult.s", 0, 0, IF_LA, 0x0c158000) -INSTS(fcmp_sle_s, "fcmp.sle.s", 0, 0, IF_LA, 0x0c138000) -INSTS(fcmp_sule_s, "fcmp.sule.s", 0, 0, IF_LA, 0x0c178000) -INSTS(fcmp_sne_s, "fcmp.sne.s", 0, 0, IF_LA, 0x0c188000) -INSTS(fcmp_sor_s, "fcmp.sor.s", 0, 0, IF_LA, 0x0c1a8000) -INSTS(fcmp_sune_s, "fcmp.sune.s", 0, 0, IF_LA, 0x0c1c8000) +INST(fcmp_caf_s, "fcmp.caf.s", 0, 0, IF_LA, 0x0c100000) +INST(fcmp_cun_s, "fcmp.cun.s", 0, 0, IF_LA, 0x0c140000) +INST(fcmp_ceq_s, "fcmp.ceq.s", 0, 0, IF_LA, 0x0c120000) +INST(fcmp_cueq_s, "fcmp.cueq.s", 0, 0, IF_LA, 0x0c160000) +INST(fcmp_clt_s, "fcmp.clt.s", 0, 0, IF_LA, 0x0c110000) +INST(fcmp_cult_s, "fcmp.cult.s", 0, 0, IF_LA, 0x0c150000) +INST(fcmp_cle_s, "fcmp.cle.s", 0, 0, IF_LA, 0x0c130000) +INST(fcmp_cule_s, "fcmp.cule.s", 0, 0, IF_LA, 0x0c170000) +INST(fcmp_cne_s, "fcmp.cne.s", 0, 0, IF_LA, 0x0c180000) +INST(fcmp_cor_s, "fcmp.cor.s", 0, 0, IF_LA, 0x0c1a0000) +INST(fcmp_cune_s, "fcmp.cune.s", 0, 0, IF_LA, 0x0c1c0000) + +INST(fcmp_saf_d, "fcmp.saf.d", 0, 0, IF_LA, 0x0c208000) +INST(fcmp_sun_d, "fcmp.sun.d", 0, 0, IF_LA, 0x0c248000) +INST(fcmp_seq_d, "fcmp.seq.d", 0, 0, IF_LA, 0x0c228000) +INST(fcmp_sueq_d, "fcmp.sueq.d", 0, 0, IF_LA, 0x0c268000) +INST(fcmp_slt_d, "fcmp.slt.d", 0, 0, IF_LA, 0x0c218000) +INST(fcmp_sult_d, "fcmp.sult.d", 0, 0, IF_LA, 0x0c258000) +INST(fcmp_sle_d, "fcmp.sle.d", 0, 0, IF_LA, 0x0c238000) +INST(fcmp_sule_d, "fcmp.sule.d", 0, 0, IF_LA, 0x0c278000) +INST(fcmp_sne_d, "fcmp.sne.d", 0, 0, IF_LA, 0x0c288000) +INST(fcmp_sor_d, "fcmp.sor.d", 0, 0, IF_LA, 0x0c2a8000) +INST(fcmp_sune_d, "fcmp.sune.d", 0, 0, IF_LA, 0x0c2c8000) + +INST(fcmp_caf_d, "fcmp.caf.d", 0, 0, IF_LA, 0x0c200000) +INST(fcmp_cun_d, "fcmp.cun.d", 0, 0, IF_LA, 0x0c240000) +INST(fcmp_ceq_d, "fcmp.ceq.d", 0, 0, IF_LA, 0x0c220000) +INST(fcmp_cueq_d, "fcmp.cueq.d", 0, 0, IF_LA, 0x0c260000) +INST(fcmp_clt_d, "fcmp.clt.d", 0, 0, IF_LA, 0x0c210000) +INST(fcmp_cult_d, "fcmp.cult.d", 0, 0, IF_LA, 0x0c250000) +INST(fcmp_cle_d, "fcmp.cle.d", 0, 0, IF_LA, 0x0c230000) +INST(fcmp_cule_d, "fcmp.cule.d", 0, 0, IF_LA, 0x0c270000) +INST(fcmp_cne_d, "fcmp.cne.d", 0, 0, IF_LA, 0x0c280000) +INST(fcmp_cor_d, "fcmp.cor.d", 0, 0, IF_LA, 0x0c2a0000) +INST(fcmp_cune_d, "fcmp.cune.d", 0, 0, IF_LA, 0x0c2c0000) + +INST(fcmp_saf_s, "fcmp.saf.s", 0, 0, IF_LA, 0x0c108000) +INST(fcmp_sun_s, "fcmp.sun.s", 0, 0, IF_LA, 0x0c148000) +INST(fcmp_seq_s, "fcmp.seq.s", 0, 0, IF_LA, 0x0c128000) +INST(fcmp_sueq_s, "fcmp.sueq.s", 0, 0, IF_LA, 0x0c168000) +INST(fcmp_slt_s, "fcmp.slt.s", 0, 0, IF_LA, 0x0c118000) +INST(fcmp_sult_s, "fcmp.sult.s", 0, 0, IF_LA, 0x0c158000) +INST(fcmp_sle_s, "fcmp.sle.s", 0, 0, IF_LA, 0x0c138000) +INST(fcmp_sule_s, "fcmp.sule.s", 0, 0, IF_LA, 0x0c178000) +INST(fcmp_sne_s, "fcmp.sne.s", 0, 0, IF_LA, 0x0c188000) +INST(fcmp_sor_s, "fcmp.sor.s", 0, 0, IF_LA, 0x0c1a8000) +INST(fcmp_sune_s, "fcmp.sune.s", 0, 0, IF_LA, 0x0c1c8000) ////R_R_I. -INSTS(fld_s, "fld.s", 0, LD, IF_LA, 0x2b000000) -INSTS(fld_d, "fld.d", 0, LD, IF_LA, 0x2b800000) -INSTS(fst_s, "fst.s", 0, ST, IF_LA, 0x2b400000) -INSTS(fst_d, "fst.d", 0, ST, IF_LA, 0x2bc00000) +INST(fld_s, "fld.s", 0, LD, IF_LA, 0x2b000000) +INST(fld_d, "fld.d", 0, LD, IF_LA, 0x2b800000) +INST(fst_s, "fst.s", 0, ST, IF_LA, 0x2b400000) +INST(fst_d, "fst.d", 0, ST, IF_LA, 0x2bc00000) // clang-format on /*****************************************************************************/ -#undef INSTS +#undef INST /*****************************************************************************/ diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index f6254ecdb79b3..279ac5292ec0d 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1003,19 +1003,16 @@ class LinearScan : public LinearScanInterface void buildUpperVectorRestoreRefPosition(Interval* lclVarInterval, LsraLocation currentLoc, GenTree* node); #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE -#if defined(UNIX_AMD64_ABI) +#if defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) // For AMD64 on SystemV machines. This method // is called as replacement for raUpdateRegStateForArg // that is used on Windows. On System V systems a struct can be passed // partially using registers from the 2 register files. - void unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc); -#endif // defined(UNIX_AMD64_ABI) - -#if defined(TARGET_LOONGARCH64) + // // For LoongArch64's ABI, a struct can be passed // partially using registers from the 2 register files. - void LoongArch64UpdateRegStateForArg(LclVarDsc* argDsc); -#endif + void UpdateRegStateForStructArg(LclVarDsc* argDsc); +#endif // defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) // Update reg state for an incoming register argument void updateRegStateForArg(LclVarDsc* argDsc); diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 90473a99ea003..4df1f14b7a689 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -1962,9 +1962,11 @@ void LinearScan::insertZeroInitRefPositions() } } -#if defined(UNIX_AMD64_ABI) +#if defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) //------------------------------------------------------------------------ -// unixAmd64UpdateRegStateForArg: Sets the register state for an argument of type STRUCT for System V systems. +// UpdateRegStateForStructArg: +// Sets the register state for an argument of type STRUCT. +// This is shared between with AMD64's SystemV systems and LoongArch64-ABI. // // Arguments: // argDsc - the LclVarDsc for the argument of interest @@ -1973,7 +1975,7 @@ void LinearScan::insertZeroInitRefPositions() // See Compiler::raUpdateRegStateForArg(RegState *regState, LclVarDsc *argDsc) in regalloc.cpp // for how state for argument is updated for unix non-structs and Windows AMD64 structs. // -void LinearScan::unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc) +void LinearScan::UpdateRegStateForStructArg(LclVarDsc* argDsc) { assert(varTypeIsStruct(argDsc)); RegState* intRegState = &compiler->codeGen->intRegState; @@ -2008,44 +2010,7 @@ void LinearScan::unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc) } } -#endif // defined(UNIX_AMD64_ABI) - -#ifdef TARGET_LOONGARCH64 -void LinearScan::LoongArch64UpdateRegStateForArg(LclVarDsc* argDsc) -{ - assert(varTypeIsStruct(argDsc)); - RegState* intRegState = &compiler->codeGen->intRegState; - RegState* floatRegState = &compiler->codeGen->floatRegState; - - if ((argDsc->GetArgReg() != REG_STK) && (argDsc->GetArgReg() != REG_NA)) - { - if (genRegMask(argDsc->GetArgReg()) & (RBM_ALLFLOAT)) - { - assert(genRegMask(argDsc->GetArgReg()) & (RBM_FLTARG_REGS)); - floatRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->GetArgReg()); - } - else - { - assert(genRegMask(argDsc->GetArgReg()) & (RBM_ARG_REGS)); - intRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->GetArgReg()); - } - } - - if ((argDsc->GetOtherArgReg() != REG_STK) && (argDsc->GetOtherArgReg() != REG_NA)) - { - if (genRegMask(argDsc->GetOtherArgReg()) & (RBM_ALLFLOAT)) - { - assert(genRegMask(argDsc->GetOtherArgReg()) & (RBM_FLTARG_REGS)); - floatRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->GetOtherArgReg()); - } - else - { - assert(genRegMask(argDsc->GetOtherArgReg()) & (RBM_ARG_REGS)); - intRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->GetOtherArgReg()); - } - } -} -#endif +#endif // defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) //------------------------------------------------------------------------ // updateRegStateForArg: Updates rsCalleeRegArgMaskLiveIn for the appropriate @@ -2068,22 +2033,15 @@ void LinearScan::LoongArch64UpdateRegStateForArg(LclVarDsc* argDsc) // void LinearScan::updateRegStateForArg(LclVarDsc* argDsc) { -#if defined(UNIX_AMD64_ABI) - // For System V AMD64 calls the argDsc can have 2 registers (for structs.) - // Handle them here. +#if defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) + // For SystemV-AMD64 and LoongArch64 calls the argDsc + // can have 2 registers (for structs.). Handle them here. if (varTypeIsStruct(argDsc)) { - unixAmd64UpdateRegStateForArg(argDsc); + UpdateRegStateForStructArg(argDsc); } else -#endif // defined(UNIX_AMD64_ABI) -#if defined(TARGET_LOONGARCH64) - if (varTypeIsStruct(argDsc)) - { - LoongArch64UpdateRegStateForArg(argDsc); - } - else -#endif +#endif // defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) { RegState* intRegState = &compiler->codeGen->intRegState; RegState* floatRegState = &compiler->codeGen->floatRegState; diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index b545254743700..164bb0f0a6643 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -5515,6 +5515,11 @@ void Compiler::fgMoveOpsLeft(GenTree* tree) #ifdef TARGET_LOONGARCH64 else if ((op1->TypeGet() == TYP_LONG) && (ad2->TypeGet() == TYP_INT)) { + // For LoongArch64's instructions operation of the 64bits and 32bits using the whole + // 64bits-width register which is unlike the AMD64 and ARM64. + // And the INT type instruction will be signed-extend by default. + // e.g. 'ld_w $r4, $5, 4' and `addi_w $r4,$r5,-1` the result of INT + // will be signed-extend by default. new_op1->gtType = TYP_LONG; } #endif @@ -5812,7 +5817,14 @@ GenTree* Compiler::fgMorphArrayIndex(GenTree* tree) noway_assert(index2 != nullptr); } -#ifndef TARGET_LOONGARCH64 +#ifdef TARGET_LOONGARCH64 + // For LoongArch64's instructions operation of the 64bits and 32bits using the whole + // 64bits-width register which is unlike the AMD64 and ARM64. + // And the INT type instruction will be signed-extend by default. + // e.g. 'ld_w $r4, $5, 4' and `addi_w $r4,$r5,-1` the result of INT + // will be signed-extend by default. + GenTree* arrLen = gtNewArrLen(TYP_INT, arrRef, (int)lenOffs, compCurBB); +#else // Next introduce a GT_BOUNDS_CHECK node var_types bndsChkType = TYP_INT; // By default, try to use 32-bit comparison for array bounds check. @@ -5832,8 +5844,6 @@ GenTree* Compiler::fgMorphArrayIndex(GenTree* tree) { arrLen = gtNewCastNode(bndsChkType, arrLen, true, bndsChkType); } -#else - GenTree* arrLen = gtNewArrLen(TYP_INT, arrRef, (int)lenOffs, compCurBB); #endif GenTreeBoundsChk* arrBndsChk = new (this, GT_BOUNDS_CHECK) GenTreeBoundsChk(index, arrLen, SCK_RNGCHK_FAIL); @@ -5854,6 +5864,11 @@ GenTree* Compiler::fgMorphArrayIndex(GenTree* tree) #ifdef TARGET_64BIT #ifndef TARGET_LOONGARCH64 // Widen 'index' on 64-bit targets + // But For LoongArch64's instructions operation of the 64bits and 32bits using the whole + // 64bits-width register which is unlike the AMD64 and ARM64. + // And the INT type instruction will be signed-extend by default. + // e.g. 'ld_w $r4, $5, 4' and `addi_w $r4,$r5,-1` the result of INT + // will be signed-extend by default. if (index->TypeGet() != TYP_I_IMPL) { if (index->OperGet() == GT_CNS_INT) @@ -14532,7 +14547,12 @@ GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree) if (!varTypeIsGC(ad1->TypeGet()) && !varTypeIsGC(op2->TypeGet())) { #ifdef TARGET_LOONGARCH64 - if ((op2->TypeGet() == TYP_LONG) /*&& (op1->TypeGet() == TYP_INT)*/) + // For LoongArch64's instructions operation of the 64bits and 32bits using the whole + // 64bits-width register which is unlike the AMD64 and ARM64. + // And the INT type instruction will be signed-extend by default. + // e.g. 'ld_w $r4, $5, 4' and `addi_w $r4,$r5,-1` the result of INT + // will be signed-extend by default. + if (op2->TypeGet() == TYP_LONG) { op1->gtType = TYP_LONG; } @@ -18255,6 +18275,11 @@ void Compiler::fgAddFieldSeqForZeroOffset(GenTree* addr, FieldSeqNode* fieldSeqZ { // We expect 'addr' to be an address at this point. #ifdef TARGET_LOONGARCH64 + // For LoongArch64's instructions operation of the 64bits and 32bits using the whole + // 64bits-width register which is unlike the AMD64 and ARM64. + // And the INT type instruction will be signed-extend by default. + // e.g. 'ld_w $r4, $5, 4' and `addi_w $r4,$r5,-1` the result of INT + // will be signed-extend by default. assert(addr->TypeGet() == TYP_BYREF || addr->TypeGet() == TYP_I_IMPL || addr->TypeGet() == TYP_INT || addr->TypeGet() == TYP_REF); #else diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 4038a80476c78..1a3ad619b3da4 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -5124,6 +5124,11 @@ bool Compiler::optNarrowTree(GenTree* tree, var_types srct, var_types dstt, Valu noway_assert(tree); #ifndef TARGET_LOONGARCH64 + // For LoongArch64's instructions operation of the 64bits and 32bits using the whole + // 64bits-width register which is unlike the AMD64 and ARM64. + // And the INT type instruction will be signed-extend by default. + // e.g. 'ld_w $r4, $5, 4' and `addi_w $r4,$r5,-1` the result of INT + // will be signed-extend by default. So `LONG != INT(but default is LONG)` noway_assert(genActualType(tree->gtType) == genActualType(srct)); #endif @@ -5294,6 +5299,11 @@ bool Compiler::optNarrowTree(GenTree* tree, var_types srct, var_types dstt, Valu { case GT_AND: #ifdef TARGET_LOONGARCH64 + // For LoongArch64's instructions operation of the 64bits and 32bits using the whole + // 64bits-width register which is unlike the AMD64 and ARM64. + // And the INT type instruction will be signed-extend by default. + // e.g. 'ld_w $r4, $5, 4' and `addi_w $r4,$r5,-1` the result of INT + // will be signed-extend by default. So `LONG != INT(but default is LONG)` noway_assert(genTypeSize(genActualType(tree->gtType)) >= genTypeSize(genActualType(op1->gtType))); noway_assert(genTypeSize(genActualType(tree->gtType)) >= genTypeSize(genActualType(op2->gtType))); #else From 967402ec686cb3afa96275950cf8e81409d98211 Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Thu, 17 Feb 2022 19:17:07 +0800 Subject: [PATCH 23/46] [LoongArch64] round 3 amend for reviewing by @BruceForstall. --- src/coreclr/jit/codegencommon.cpp | 53 ++++++++------------------ src/coreclr/jit/codegenloongarch64.cpp | 35 +++-------------- src/coreclr/jit/gentree.cpp | 14 ++++--- src/coreclr/jit/instr.h | 5 ++- src/coreclr/jit/morph.cpp | 4 +- src/coreclr/jit/registerloongarch64.h | 3 ++ src/coreclr/jit/targetloongarch64.h | 5 ++- 7 files changed, 42 insertions(+), 77 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 38c98960fcf92..3a67607bc657a 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -1863,10 +1863,9 @@ void CodeGen::genJumpToThrowHlpBlk(emitJumpKind jumpKind, SpecialCodeKind codeKi } else { -// The code to throw the exception will be generated inline, and -// we will jump around it in the normal non-exception case. + // The code to throw the exception will be generated inline, and + // we will jump around it in the normal non-exception case. -#ifndef TARGET_LOONGARCH64 BasicBlock* tgtBlk = nullptr; emitJumpKind reverseJumpKind = emitter::emitReverseJumpKind(jumpKind); if (reverseJumpKind != jumpKind) @@ -1874,18 +1873,15 @@ void CodeGen::genJumpToThrowHlpBlk(emitJumpKind jumpKind, SpecialCodeKind codeKi tgtBlk = genCreateTempLabel(); inst_JMP(reverseJumpKind, tgtBlk); } -#endif genEmitHelperCall(compiler->acdHelper(codeKind), 0, EA_UNKNOWN); -#ifndef TARGET_LOONGARCH64 // Define the spot for the normal non-exception case to jump to. if (tgtBlk != nullptr) { assert(reverseJumpKind != jumpKind); genDefineTempLabel(tgtBlk); } -#endif } } @@ -3393,15 +3389,9 @@ void CodeGen::genFnPrologCalleeRegArgs() tmp_offset = base; tmp_reg = REG_R21; - if ((0 < base) && (base <= 0xfff)) - { - GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R0, tmp_offset); - } - else - { - GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset >> 12); - GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, tmp_offset & 0xfff); - } + GetEmitter()->emitIns_I_la(EA_PTRSIZE, REG_R21, base); + // NOTE: `REG_R21` will be used within `emitIns_S_R`. + // Details see the comment for `emitIns_S_R`. GetEmitter()->emitIns_S_R(ins_Store(storeType, true), size, srcRegNum, varNum, -8); } @@ -3445,15 +3435,9 @@ void CodeGen::genFnPrologCalleeRegArgs() { tmp_offset = base; tmp_reg = REG_R21; - if ((0 < base) && (base <= 0xfff)) - { - GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R0, tmp_offset); - } - else - { - GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset >> 12); - GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, tmp_offset & 0xfff); - } + GetEmitter()->emitIns_I_la(EA_PTRSIZE, REG_R21, base); + // NOTE: `REG_R21` will be used within `emitIns_S_R`. + // Details see the comment for `emitIns_S_R`. GetEmitter()->emitIns_S_R(ins_Store(storeType, true), size, srcRegNum, varNum, -8); } else @@ -3471,8 +3455,7 @@ void CodeGen::genFnPrologCalleeRegArgs() baseOffset = 8; base += 8; - GetEmitter()->emitIns_R_R_Imm(INS_ld_d, size, REG_SCRATCH, REG_SPBASE, - genTotalFrameSize()); + GetEmitter()->emitIns_R_R_Imm(INS_ld_d, size, REG_SCRATCH, REG_SPBASE, genTotalFrameSize()); if ((-2048 <= base) && (base < 2048)) { GetEmitter()->emitIns_S_R(INS_st_d, size, REG_SCRATCH, varNum, baseOffset); @@ -3483,15 +3466,9 @@ void CodeGen::genFnPrologCalleeRegArgs() { tmp_offset = base; tmp_reg = REG_R21; - if ((0 < base) && (base <= 0xfff)) - { - GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R0, tmp_offset); - } - else - { - GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset >> 12); - GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, tmp_offset & 0xfff); - } + GetEmitter()->emitIns_I_la(EA_PTRSIZE, REG_R21, base); + // NOTE: `REG_R21` will be used within `emitIns_S_R`. + // Details see the comment for `emitIns_S_R`. GetEmitter()->emitIns_S_R(INS_stx_d, size, REG_ARG_LAST, varNum, -8); } else @@ -4912,7 +4889,7 @@ void CodeGen::genEnregisterIncomingStackArgs() #ifdef TARGET_LOONGARCH64 { bool FPbased; - int base = compiler->lvaFrameAddress(varNum, &FPbased); + int base = compiler->lvaFrameAddress(varNum, &FPbased); if ((-2048 <= base) && (base < 2048)) { @@ -4925,8 +4902,8 @@ void CodeGen::genEnregisterIncomingStackArgs() regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE; tmp_offset = base; tmp_reg = REG_R21; - GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset >> 12); - GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, tmp_offset & 0xfff); + + GetEmitter()->emitIns_I_la(EA_PTRSIZE, REG_R21, base); GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, REG_R21, REG_R21, reg2); GetEmitter()->emitIns_R_S(ins_Load(regType), emitTypeSize(regType), regNum, varNum, -8); } diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index 69e3886056dba..d778aca393385 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -1511,7 +1511,7 @@ void CodeGen::genFnEpilog(BasicBlock* block) switch (addrInfo.accessType) { case IAT_VALUE: - //TODO-LOONGARCH64-CQ: using B/BL for optimization. + // TODO-LOONGARCH64-CQ: using B/BL for optimization. case IAT_PVALUE: // Load the address into a register, load indirect and call through a register // We have to use REG_INDIRECT_CALL_TARGET_REG since we assume the argument registers are in use @@ -2033,20 +2033,8 @@ void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode) { // This is only possible for a zero-init or bitcast. const bool zeroInit = data->IsIntegralConst(0); -#if 0 - //TODO: supporting the SIMD on LoongArch64. - if (zeroInit && varTypeIsSIMD(targetType)) - { - assert(!varTypeIsSIMD(targetType)); - //assert(targetType == TYP_SIMD8);//TODO:TYP_SIMD16 - assert(targetReg == REG_NA); - GetEmitter()->emitIns_S_R(INS_st_d, EA_8BYTE, REG_R0, varNum, 0); - genUpdateLife(lclNode); - return; - } -#else + // TODO-LOONGAARCH64-CQ: not supporting SIMD. assert(!varTypeIsSIMD(targetType)); -#endif if (zeroInit) { @@ -2581,13 +2569,7 @@ void CodeGen::genCodeForDivMod(GenTreeOp* tree) { ssize_t intConst = (int)(divisorOp->AsIntCon()->gtIconVal); divisorReg = REG_R21; - if ((-2048 <= intConst) && (intConst <= 0x7ff)) - emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_R21, REG_R0, (short)intConst); - else - { - emit->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, intConst >> 12); - emit->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, intConst & 0xfff); - } + emit->emitIns_I_la(EA_PTRSIZE, REG_R21, intConst); } // Only for commutative operations do we check src1 and allow it to be a contained immediate else if (tree->OperIsCommutative()) @@ -2601,13 +2583,7 @@ void CodeGen::genCodeForDivMod(GenTreeOp* tree) assert(!divisorOp->isContainedIntOrIImmed()); ssize_t intConst = (int)(src1->AsIntCon()->gtIconVal); Reg1 = REG_R21; - if ((-2048 <= intConst) && (intConst <= 0x7ff)) - emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_R21, REG_R0, (short)intConst); - else - { - emit->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, intConst >> 12); - emit->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, intConst & 0xfff); - } + emit->emitIns_I_la(EA_PTRSIZE, REG_R21, intConst); } } else @@ -5842,7 +5818,8 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg) (ssize_t)compiler->gsGlobalSecurityCookieAddr); } else - { ////TODO:LoongArch64 should amend for optimize! + { + //TODO-LOONGARCH64: should amend for optimize! // GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, regGSConst, // (ssize_t)compiler->gsGlobalSecurityCookieAddr); // GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, regGSConst, regGSConst, ); diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index f9eb57fe549ad..ac0ab72012a6e 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -6025,10 +6025,10 @@ GenTree* Compiler::gtNewZeroConNode(var_types type) case TYP_INT: #ifdef TARGET_LOONGARCH64 case TYP_UINT: - // For LoongArch64, the register $r0 is always const-zero with 64bits-width. - // Besides the instructions's operation of the 64bits and 32bits using the whole - // 64bits-width register which is unlike the AMD64 and ARM64. - // So for UINT type, LoongArch64 can't share with INT liking AMD64 and ARM64. +// For LoongArch64, the register $r0 is always const-zero with 64bits-width. +// Besides the instructions's operation of the 64bits and 32bits using the whole +// 64bits-width register which is unlike the AMD64 and ARM64. +// So for UINT type, LoongArch64 can't share with INT liking AMD64 and ARM64. #endif zero = gtNewIconNode(0); break; @@ -21898,13 +21898,15 @@ void ReturnTypeDesc::InitializeStructReturnType(Compiler* comp, comp->compFloatingPointUsed = true; assert((structSize > 8) == ((floatFieldFlags & STRUCT_HAS_8BYTES_FIELDS_MASK) > 0)); m_regType[0] = (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; - m_regType[1] = (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? comp->getJitGCType(gcPtrs[1]) : TYP_INT; + m_regType[1] = + (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? comp->getJitGCType(gcPtrs[1]) : TYP_INT; } else if (floatFieldFlags & STRUCT_FLOAT_FIELD_SECOND) { comp->compFloatingPointUsed = true; assert((structSize > 8) == ((floatFieldFlags & STRUCT_HAS_8BYTES_FIELDS_MASK) > 0)); - m_regType[0] = (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? comp->getJitGCType(gcPtrs[0]) : TYP_INT; + m_regType[0] = + (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? comp->getJitGCType(gcPtrs[0]) : TYP_INT; m_regType[1] = (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; } else diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h index 87b004eaf7be1..92b1c6f63f065 100644 --- a/src/coreclr/jit/instr.h +++ b/src/coreclr/jit/instr.h @@ -308,7 +308,7 @@ enum insOpts : unsigned INS_OPTS_RC, // see ::emitIns_R_C(). INS_OPTS_RL, // see ::emitIns_R_L(). - INS_OPTS_JIRL, // see ::emitIns_J_R(). + INS_OPTS_JIRL, // see ::emitIns_J_R(). INS_OPTS_J, // see ::emitIns_J(). INS_OPTS_J_cond, // see ::emitIns_J_cond_la(). INS_OPTS_I, // see ::emitIns_I_la(). @@ -318,6 +318,9 @@ enum insOpts : unsigned enum insBarrier : unsigned { + // TODO-LOONGARCH64-CQ: ALL there are the same value right now. + // These are reserved for future extention. + // Because the LoongArch64 doesn't support these right now. INS_BARRIER_FULL = 0, INS_BARRIER_WMB = INS_BARRIER_FULL,//4, INS_BARRIER_MB = INS_BARRIER_FULL,//16, diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 164bb0f0a6643..5dda47838191d 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -5333,7 +5333,7 @@ void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, GenTreeCall::Use* GenTree* arg = fgMakeTmpArgNode(argEntry); // Change the expression to "(tmp=val),tmp" - arg = gtNewOperNode(GT_COMMA, arg->TypeGet(), copyBlk, arg); + arg = gtNewOperNode(GT_COMMA, arg->TypeGet(), copyBlk, arg); #endif // FEATURE_FIXED_OUT_ARGS @@ -5823,7 +5823,7 @@ GenTree* Compiler::fgMorphArrayIndex(GenTree* tree) // And the INT type instruction will be signed-extend by default. // e.g. 'ld_w $r4, $5, 4' and `addi_w $r4,$r5,-1` the result of INT // will be signed-extend by default. - GenTree* arrLen = gtNewArrLen(TYP_INT, arrRef, (int)lenOffs, compCurBB); + GenTree* arrLen = gtNewArrLen(TYP_INT, arrRef, (int)lenOffs, compCurBB); #else // Next introduce a GT_BOUNDS_CHECK node var_types bndsChkType = TYP_INT; // By default, try to use 32-bit comparison for array bounds check. diff --git a/src/coreclr/jit/registerloongarch64.h b/src/coreclr/jit/registerloongarch64.h index 0d8beac0e3aac..5519b0639de4c 100644 --- a/src/coreclr/jit/registerloongarch64.h +++ b/src/coreclr/jit/registerloongarch64.h @@ -49,6 +49,9 @@ REGDEF(S6, 29, 0x20000000, "s6" ) REGDEF(S7, 30, 0x40000000, "s7" ) REGDEF(S8, 31, 0x80000000, "s8" ) +//NOTE for LoongArch64: +// The `REG_R21` which alias `REG_X0` is specially reserved !!! +// It can be used only by manully and should be very careful!!! REGALIAS(R21, X0) #define FBASE 32 diff --git a/src/coreclr/jit/targetloongarch64.h b/src/coreclr/jit/targetloongarch64.h index 2bfaea897abef..465e34af66170 100644 --- a/src/coreclr/jit/targetloongarch64.h +++ b/src/coreclr/jit/targetloongarch64.h @@ -6,6 +6,10 @@ #error The file should not be included for this platform. #endif +//NOTE for LoongArch64: +// The `REG_R21` which alias `REG_X0` is specially reserved !!! +// It can be used only by manully and should be very careful!!! + // clang-format off #define CPU_LOAD_STORE_ARCH 1 #define CPU_HAS_FP_SUPPORT 1 @@ -39,7 +43,6 @@ #define MAX_MULTIREG_COUNT 2 // Maxiumum number of registers defined by a single instruction (including calls). // This is also the maximum number of registers for a MultiReg node. - #define NOGC_WRITE_BARRIERS 1 // We have specialized WriteBarrier JIT Helpers that DO-NOT trash the RBM_CALLEE_TRASH registers #define USER_ARGS_COME_LAST 1 #define EMIT_TRACK_STACK_DEPTH 1 // This is something of a workaround. For both ARM and AMD64, the frame size is fixed, so we don't really From b9bd532d4b0536e05aa47762222251a8dd8eceb6 Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Thu, 17 Feb 2022 19:46:17 +0800 Subject: [PATCH 24/46] [LoongArch64] amend the format. --- src/coreclr/jit/codegenloongarch64.cpp | 2 +- src/coreclr/jit/compiler.cpp | 8 +-- src/coreclr/jit/morph.cpp | 94 +++++++++++++------------- src/coreclr/jit/targetloongarch64.h | 2 +- 4 files changed, 54 insertions(+), 52 deletions(-) diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index d778aca393385..d9051b85cbe0f 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -5819,7 +5819,7 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg) } else { - //TODO-LOONGARCH64: should amend for optimize! + // TODO-LOONGARCH64: should amend for optimize! // GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, regGSConst, // (ssize_t)compiler->gsGlobalSecurityCookieAddr); // GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, regGSConst, regGSConst, ); diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 9c68c153e33b1..278f6466aca6a 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -935,14 +935,14 @@ var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, #ifdef TARGET_LOONGARCH64 if (structSize <= (TARGET_POINTER_SIZE * 2)) { - uint32_t numFloatFields = info.compCompHnd->getLoongArch64PassStructInRegisterFlags(clsHnd); + uint32_t floatFieldFlags = info.compCompHnd->getLoongArch64PassStructInRegisterFlags(clsHnd); - if (numFloatFields & STRUCT_FLOAT_FIELD_ONLY_ONE) + if (floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_ONE) { howToReturnStruct = SPK_PrimitiveType; - useType = structSize > 4 ? TYP_DOUBLE : TYP_FLOAT; + useType = (structSize > 4) ? TYP_DOUBLE : TYP_FLOAT; } - else if (numFloatFields & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_ONE)) + else if (floatFieldFlags & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_ONE)) { howToReturnStruct = SPK_ByValue; useType = TYP_STRUCT; diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 5dda47838191d..6ef1c589ed297 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -2946,7 +2946,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) #elif defined(TARGET_X86) - passUsingFloatRegs = false; + passUsingFloatRegs = false; #elif defined(TARGET_LOONGARCH64) assert(!callIsVararg); @@ -3070,7 +3070,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) byteSize = genTypeSize(argx); } #elif defined(TARGET_LOONGARCH64) - DWORD numFloatFields = 0; + DWORD floatFieldFlags = 0; if (!isStructArg) { size = 1; @@ -3107,14 +3107,14 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) { assert((howToPassStruct == SPK_ByValue) || (howToPassStruct == SPK_PrimitiveType)); - numFloatFields = info.compCompHnd->getLoongArch64PassStructInRegisterFlags(objClass); + floatFieldFlags = info.compCompHnd->getLoongArch64PassStructInRegisterFlags(objClass); - passUsingFloatRegs = (numFloatFields & STRUCT_HAS_FLOAT_FIELDS_MASK) ? true : false; + passUsingFloatRegs = (floatFieldFlags & STRUCT_HAS_FLOAT_FIELDS_MASK) ? true : false; compFloatingPointUsed |= passUsingFloatRegs; - if (numFloatFields & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_TWO)) + if (floatFieldFlags & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_TWO)) size = 1; - else if (numFloatFields & STRUCT_FLOAT_FIELD_ONLY_TWO) + else if (floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_TWO) size = 2; } else // if (passStructByRef) @@ -3278,28 +3278,29 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) if (isStructArg) { - if ((numFloatFields & 0x6) && passUsingFloatRegs) + if ((floatFieldFlags & (STRUCT_FLOAT_FIELD_FIRST | STRUCT_FLOAT_FIELD_SECOND)) && + passUsingFloatRegs) passUsingFloatRegs = isRegArg = intArgRegNum < maxRegArgs; if (!passUsingFloatRegs) { - size = structSize > 8 ? 2 : 1; - numFloatFields = 0; + size = structSize > 8 ? 2 : 1; + floatFieldFlags = 0; } else if (passUsingFloatRegs) { - if (numFloatFields & 0x8) + if (floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_TWO) nextOtherRegNum = genMapFloatRegArgNumToRegNum(nextFltArgRegNum + 1); - else if (numFloatFields & 0x4) + else if (floatFieldFlags & STRUCT_FLOAT_FIELD_SECOND) { assert(size == 1); size = 2; passUsingFloatRegs = false; nextOtherRegNum = genMapFloatRegArgNumToRegNum(nextFltArgRegNum); } - else if (/*(size == 1) && */ (numFloatFields & 0x2)) + else if (floatFieldFlags & STRUCT_FLOAT_FIELD_FIRST) { - assert((size == 1) && (numFloatFields & 0x2)); + assert(size == 1); size = 2; nextOtherRegNum = genMapIntRegArgNumToRegNum(intArgRegNum); } @@ -3510,25 +3511,24 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) // assert(nextOtherRegNum == REG_STK); intArgRegNum = maxRegArgs; } - else if ((numFloatFields & 0xf) == 0x0) + else if ((floatFieldFlags & STRUCT_HAS_FLOAT_FIELDS_MASK) == 0x0) { if (passUsingFloatRegs) fltArgRegNum += 1; else intArgRegNum += size; } - else if (numFloatFields & 0x1) + else if (floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_ONE) { structBaseType = structSize == 8 ? TYP_DOUBLE : TYP_FLOAT; fltArgRegNum += 1; } - else if (numFloatFields & 0x6) + else if (floatFieldFlags & (STRUCT_FLOAT_FIELD_FIRST | STRUCT_FLOAT_FIELD_SECOND)) { - // assert((numFloatFields & 0x2) || (numFloatFields & 0x4)); fltArgRegNum += 1; intArgRegNum += 1; } - else if (numFloatFields & 0x8) + else if (floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_TWO) { fltArgRegNum += 2; } @@ -3617,8 +3617,10 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) { newArgEntry->passedByRef = passStructByRef; #if defined(TARGET_LOONGARCH64) - newArgEntry->argType = - (numFloatFields & 0xe) || (structBaseType == TYP_UNKNOWN) ? argx->TypeGet() : structBaseType; + newArgEntry->argType = ((floatFieldFlags & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_ONE)) || + (structBaseType == TYP_UNKNOWN)) + ? argx->TypeGet() + : structBaseType; #else newArgEntry->argType = (structBaseType == TYP_UNKNOWN) ? argx->TypeGet() : structBaseType; #endif @@ -4973,35 +4975,35 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry unsigned offset = baseOffset; newArg = new (this, GT_FIELD_LIST) GenTreeFieldList(); #if defined(TARGET_LOONGARCH64) - uint32_t numFloatFields = info.compCompHnd->getLoongArch64PassStructInRegisterFlags(objClass); - if (numFloatFields & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_ONE)) + uint32_t floatFieldFlags = info.compCompHnd->getLoongArch64PassStructInRegisterFlags(objClass); + if (floatFieldFlags & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_ONE)) { - assert((numFloatFields & STRUCT_HAS_FLOAT_FIELDS_MASK) > 1); + assert((floatFieldFlags & STRUCT_HAS_FLOAT_FIELDS_MASK) > 1); var_types tmp_type_1; var_types tmp_type_2; compFloatingPointUsed = true; - if (numFloatFields & STRUCT_FLOAT_FIELD_ONLY_TWO) + if (floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_TWO) { - tmp_type_1 = numFloatFields & STRUCT_FIRST_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT; - tmp_type_2 = numFloatFields & STRUCT_SECOND_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT; + tmp_type_1 = (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; + tmp_type_2 = (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; } - else if (numFloatFields & STRUCT_FLOAT_FIELD_FIRST) + else if (floatFieldFlags & STRUCT_FLOAT_FIELD_FIRST) { - tmp_type_1 = numFloatFields & STRUCT_FIRST_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT; - tmp_type_2 = numFloatFields & STRUCT_SECOND_FIELD_SIZE_IS8 ? type[1] : TYP_INT; + tmp_type_1 = (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; + tmp_type_2 = (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? type[1] : TYP_INT; } - else if (numFloatFields & STRUCT_FLOAT_FIELD_SECOND) + else if (floatFieldFlags & STRUCT_FLOAT_FIELD_SECOND) { - tmp_type_1 = numFloatFields & STRUCT_FIRST_FIELD_SIZE_IS8 ? type[0] : TYP_INT; - tmp_type_2 = numFloatFields & STRUCT_SECOND_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT; + tmp_type_1 = (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? type[0] : TYP_INT; + tmp_type_2 = (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; } else { assert(!"----------------unimplemented type-case... on LOONGARCH"); unreached(); } - elemSize = numFloatFields & STRUCT_HAS_8BYTES_FIELDS_MASK ? 8 : 4; + elemSize = (floatFieldFlags & STRUCT_HAS_8BYTES_FIELDS_MASK) ? 8 : 4; GenTree* nextLclFld = gtNewLclFldNode(varNum, tmp_type_1, offset); newArg->AddField(this, nextLclFld, offset, tmp_type_1); @@ -5060,35 +5062,35 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry newArg = new (this, GT_FIELD_LIST) GenTreeFieldList(); unsigned offset = 0; #if defined(TARGET_LOONGARCH64) - uint32_t numFloatFields = info.compCompHnd->getLoongArch64PassStructInRegisterFlags(objClass); - if (numFloatFields & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_ONE)) + uint32_t floatFieldFlags = info.compCompHnd->getLoongArch64PassStructInRegisterFlags(objClass); + if (floatFieldFlags & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_ONE)) { - assert((numFloatFields & STRUCT_HAS_FLOAT_FIELDS_MASK) > 1); + assert((floatFieldFlags & STRUCT_HAS_FLOAT_FIELDS_MASK) > 1); var_types tmp_type_1; var_types tmp_type_2; compFloatingPointUsed = true; - if (numFloatFields & STRUCT_FLOAT_FIELD_ONLY_TWO) + if (floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_TWO) { - tmp_type_1 = numFloatFields & STRUCT_FIRST_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT; - tmp_type_2 = numFloatFields & STRUCT_SECOND_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT; + tmp_type_1 = (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; + tmp_type_2 = (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; } - else if (numFloatFields & STRUCT_FLOAT_FIELD_SECOND) + else if (floatFieldFlags & STRUCT_FLOAT_FIELD_SECOND) { - tmp_type_1 = numFloatFields & STRUCT_FIRST_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT; - tmp_type_2 = numFloatFields & STRUCT_SECOND_FIELD_SIZE_IS8 ? type[1] : TYP_INT; + tmp_type_1 = (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; + tmp_type_2 = (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? type[1] : TYP_INT; } - else if (numFloatFields & STRUCT_FLOAT_FIELD_SECOND) + else if (floatFieldFlags & STRUCT_FLOAT_FIELD_SECOND) { - tmp_type_1 = numFloatFields & STRUCT_FIRST_FIELD_SIZE_IS8 ? type[0] : TYP_INT; - tmp_type_2 = numFloatFields & STRUCT_SECOND_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT; + tmp_type_1 = (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? type[0] : TYP_INT; + tmp_type_2 = (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; } else { assert(!"----------------unimplemented type-case... on LOONGARCH"); unreached(); } - elemSize = numFloatFields & STRUCT_HAS_8BYTES_FIELDS_MASK ? 8 : 4; + elemSize = (floatFieldFlags & STRUCT_HAS_8BYTES_FIELDS_MASK) ? 8 : 4; GenTree* curItem = gtNewIndir(tmp_type_1, baseAddr); // For safety all GT_IND should have at least GT_GLOB_REF set. diff --git a/src/coreclr/jit/targetloongarch64.h b/src/coreclr/jit/targetloongarch64.h index 465e34af66170..8e74ea0334a9c 100644 --- a/src/coreclr/jit/targetloongarch64.h +++ b/src/coreclr/jit/targetloongarch64.h @@ -6,7 +6,7 @@ #error The file should not be included for this platform. #endif -//NOTE for LoongArch64: +// NOTE for LoongArch64: // The `REG_R21` which alias `REG_X0` is specially reserved !!! // It can be used only by manully and should be very careful!!! From 6789232f2576c3d335318a7d5299dfaceab6c969 Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Sat, 19 Feb 2022 14:21:43 +0800 Subject: [PATCH 25/46] [LoongArch64] round 4 amending for reviewing. --- src/coreclr/jit/codegen.h | 11 +- src/coreclr/jit/codegencommon.cpp | 30 +- src/coreclr/jit/codegenlinear.cpp | 2 +- src/coreclr/jit/codegenloongarch64.cpp | 490 ++++++++----------------- src/coreclr/jit/emit.h | 9 +- src/coreclr/jit/emitfmtsloongarch64.h | 14 +- src/coreclr/jit/emitjmps.h | 1 + src/coreclr/jit/emitloongarch64.h | 141 ------- 8 files changed, 186 insertions(+), 512 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 946470d924dff..cd144bf8e8a4e 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -946,10 +946,10 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void genLeaInstruction(GenTreeAddrMode* lea); void genSetRegToCond(regNumber dstReg, GenTree* tree); -#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARMARCH) void genScaledAdd(emitAttr attr, regNumber targetReg, regNumber baseReg, regNumber indexReg, int scale); void genCodeForMulLong(GenTreeOp* mul); -#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 +#endif // TARGET_ARMARCH #if !defined(TARGET_64BIT) void genLongToIntCast(GenTree* treeNode); @@ -1267,8 +1267,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void genCodeForStoreLclFld(GenTreeLclFld* tree); void genCodeForStoreLclVar(GenTreeLclVar* tree); void genCodeForReturnTrap(GenTreeOp* tree); - void genCodeForJcc(GenTreeCC* tree); - void genCodeForSetcc(GenTreeCC* setcc); void genCodeForStoreInd(GenTreeStoreInd* tree); void genCodeForSwap(GenTreeOp* tree); void genCodeForCpObj(GenTreeObj* cpObjNode); @@ -1575,6 +1573,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX instruction genMapShiftInsToShiftByConstantIns(instruction ins, int shiftByValue); #endif // TARGET_XARCH +#ifndef TARGET_LOONGARCH64 // Maps a GenCondition code to a sequence of conditional jumps or other conditional instructions // such as X86's SETcc. A sequence of instructions rather than just a single one is required for // certain floating point conditions. @@ -1618,6 +1617,10 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void inst_JCC(GenCondition condition, BasicBlock* target); void inst_SETCC(GenCondition condition, var_types type, regNumber dstReg); + + void genCodeForJcc(GenTreeCC* tree); + void genCodeForSetcc(GenTreeCC* setcc); +#endif // !TARGET_LOONGARCH64 }; // A simple phase that just invokes a method on the codegen instance diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 054fe84564ea6..1c6cd7f1c1770 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -2999,20 +2999,20 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere // struct regArgElem { - unsigned varNum; // index into compiler->lvaTable[] for this register argument + unsigned varNum; // index into compiler->lvaTable[] for this register argument #if defined(UNIX_AMD64_ABI) - var_types type; // the Jit type of this regArgTab entry -#endif // defined(UNIX_AMD64_ABI) - unsigned trashBy; // index into this regArgTab[] table of the register that will be copied to this register. - // That is, for regArgTab[x].trashBy = y, argument register number 'y' will be copied to - // argument register number 'x'. Only used when circular = true. - char slot; // 0 means the register is not used for a register argument - // 1 means the first part of a register argument - // 2, 3 or 4 means the second,third or fourth part of a multireg argument - bool stackArg; // true if the argument gets homed to the stack - bool writeThru; // true if the argument gets homed to both stack and register - bool processed; // true after we've processed the argument (and it is in its final location) - bool circular; // true if this register participates in a circular dependency loop. + var_types type; // the Jit type of this regArgTab entry +#endif // defined(UNIX_AMD64_ABI) + unsigned trashBy; // index into this regArgTab[] table of the register that will be copied to this register. + // That is, for regArgTab[x].trashBy = y, argument register number 'y' will be copied to + // argument register number 'x'. Only used when circular = true. + char slot; // 0 means the register is not used for a register argument + // 1 means the first part of a register argument + // 2, 3 or 4 means the second,third or fourth part of a multireg argument + bool stackArg; // true if the argument gets homed to the stack + bool writeThru; // true if the argument gets homed to both stack and register + bool processed; // true after we've processed the argument (and it is in its final location) + bool circular; // true if this register participates in a circular dependency loop. #ifdef UNIX_AMD64_ABI @@ -3815,10 +3815,10 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere else #endif // TARGET_XARCH { - var_types destMemType = varDscDest->TypeGet(); + var_types destMemType = varDscDest->TypeGet(); #ifdef TARGET_ARM - bool cycleAllDouble = true; // assume the best + bool cycleAllDouble = true; // assume the best unsigned iter = begReg; do diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index fe38626f81541..bda049dd37abb 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -2648,7 +2648,6 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue) inst_JCC(condition, compiler->compCurBB->bbJumpDest); } -#endif // !TARGET_LOONGARCH64 //------------------------------------------------------------------------ // genCodeForJcc: Generate code for a GT_JCC node. @@ -2706,3 +2705,4 @@ void CodeGen::genCodeForSetcc(GenTreeCC* setcc) inst_SETCC(setcc->gtCondition, setcc->TypeGet(), setcc->GetRegNum()); genProduceReg(setcc); } +#endif // !TARGET_LOONGARCH64 diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index d59c99c07b2ce..2d856550a5486 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -26,35 +26,23 @@ static short splitLow(int value) return (value & 0xffff); } -// Returns true if 'value' is a legal signed immediate 16 bit encoding. -static bool isValidSimm16(ssize_t value) -{ - return -(((int)1) << 15) <= value && value < (((int)1) << 15); -}; - -// Returns true if 'value' is a legal unsigned immediate 16 bit encoding. -static bool isValidUimm16(ssize_t value) -{ - return (0 == (value >> 16)); -}; - // Returns true if 'value' is a legal signed immediate 12 bit encoding. static bool isValidSimm12(ssize_t value) { return -(((int)1) << 11) <= value && value < (((int)1) << 11); -}; +} // Returns true if 'value' is a legal unsigned immediate 11 bit encoding. static bool isValidUimm11(ssize_t value) { return (0 == (value >> 11)); -}; +} // Returns true if 'value' is a legal unsigned immediate 12 bit encoding. static bool isValidUimm12(ssize_t value) { return (0 == (value >> 12)); -}; +} /* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX @@ -73,10 +61,10 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX // // However the imm might not fit as a directly encodable immediate, // when it doesn't fit we generate extra instruction(s) that sets up -// the 'regTmp' with the proper immediate value. +// the 'tmpReg' with the proper immediate value. // -// li64 regTmp, imm -// ins reg1, reg2, regTmp +// li tmpReg, imm // li is pseudo instruction here which maybe 2-4 instructions. +// ins reg1, reg2, tmpReg // // Arguments: // ins - instruction @@ -104,7 +92,7 @@ bool CodeGen::genInstrWithConstant(instruction ins, // reg1 is usually a dest register // reg2 is always source register - assert(tmpReg != reg2); // regTmp can not match any source register + assert(tmpReg != reg2); // tmpReg can not match any source register #ifdef DEBUG switch (ins) @@ -131,7 +119,7 @@ bool CodeGen::genInstrWithConstant(instruction ins, break; } #endif - bool immFitsInIns = (-2048 <= imm) && (imm <= 2047); + bool immFitsInIns = isValidSimm12(imm); if (immFitsInIns) { @@ -1651,7 +1639,7 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu assert((genRegMask(rAddr) & intRegState.rsCalleeRegArgMaskLiveIn) == 0); assert(untrLclLo % 4 == 0); - if ((-2048 <= untrLclLo) && (untrLclLo < 2048)) + if (isValidSimm12(untrLclLo)) { GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, rAddr, genFramePointerReg(), untrLclLo); } @@ -1697,7 +1685,7 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu // argument reg instGen_Set_Reg_To_Imm(EA_PTRSIZE, rCnt, (ssize_t)uCntSlots / 2); - /* TODO for LA: maybe optimize further */ + // TODO-LOONGARCH64: maybe optimize further GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, rAddr, 8 + padding); GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, rAddr, 0 + padding); GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, rCnt, rCnt, -1); @@ -1829,7 +1817,7 @@ void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, regNumber reg, ssize_t imm, insFlags flags DEBUGARG(size_t targetHandle) DEBUGARG(GenTreeFlags gtFlags)) -{ // maybe optimize. +{ emitter* emit = GetEmitter(); if (!compiler->opts.compReloc) @@ -1839,12 +1827,12 @@ void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, if (EA_IS_RELOC(size)) { - assert(genIsValidIntReg(reg)); // TODO: maybe optimize!!! + assert(genIsValidIntReg(reg)); emit->emitIns_R_AI(INS_bl, size, reg, imm); // for example: EA_PTR_DSP_RELOC } else { - emit->emitIns_I_la(size, reg, imm); // TODO: maybe optimize. + emit->emitIns_I_la(size, reg, imm); } regSet.verifyRegUsed(reg); @@ -1896,10 +1884,6 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre // We will just zero out the entire vector register for both float and double emit->emitIns_R_R(INS_movgr2fr_d, EA_8BYTE, targetReg, REG_R0); } - /*else if (emitter::emitIns_valid_imm_for_fmov(constValue)) - {// LOONGARCH64 doesn't need this. - assert(!"unimplemented on LOONGARCH yet"); - }*/ else { // Get a temp integer register to compute long address. @@ -1928,26 +1912,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre // Produce code for a GT_INC_SATURATE node. void CodeGen::genCodeForIncSaturate(GenTree* tree) { - assert(!"unimplemented on LOONGARCH yet"); -#if 0 - regNumber targetReg = tree->GetRegNum(); - - // The arithmetic node must be sitting in a register (since it's not contained) - assert(!tree->isContained()); - // The dst can only be a register. - assert(targetReg != REG_NA); - - GenTree* operand = tree->gtGetOp1(); - assert(!operand->isContained()); - // The src must be a register. - regNumber operandReg = genConsumeReg(operand); - - GetEmitter()->emitIns_R_R_I(INS_addi_d, emitActualTypeSize(tree), targetReg, operandReg, 1); - GetEmitter()->emitIns_R_R_I(INS_bne, emitActualTypeSize(tree), targetReg, REG_R0, 2); - GetEmitter()->emitIns_R_R_R(INS_andn, emitActualTypeSize(tree), targetReg, targetReg, REG_R0); - - genProduceReg(tree); -#endif + NYI("unimplemented on LOONGARCH64 yet"); } // Generate code to get the high N bits of a N*N=2N bit multiplication result @@ -2152,7 +2117,7 @@ void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode) unsigned int regCount = varDsc->lvFieldCnt; for (unsigned i = 0; i < regCount; ++i) { - assert(!"unimplemented on LOONGARCH yet"); + NYI("unimplemented on LOONGARCH64 yet"); regNumber varReg = lclNode->GetRegByIndex(i); assert(varReg != REG_NA); unsigned fieldLclNum = varDsc->lvFieldLclStart + i; @@ -2180,7 +2145,7 @@ void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode) { // This is only possible for a zero-init or bitcast. const bool zeroInit = data->IsIntegralConst(0); - // TODO-LOONGAARCH64-CQ: not supporting SIMD. + // TODO-LOONGARCH64-CQ: not supporting SIMD. assert(!varTypeIsSIMD(targetType)); if (zeroInit) @@ -2655,7 +2620,7 @@ void CodeGen::genCodeForNegNot(GenTree* tree) // void CodeGen::genCodeForBswap(GenTree* tree) { - assert(!"unimpleement on LOONGAARCH64 yet"); + assert(!"unimpleement on LOONGARCH64 yet"); } //------------------------------------------------------------------------ @@ -2834,7 +2799,7 @@ void CodeGen::genCodeForDivMod(GenTreeOp* tree) else ins = INS_mod_wu; - // TODO: temp workround, should amend for optimize. + // TODO-LOONGARCH64: here is just for signed-extension ? emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, Reg1, Reg1, 0); emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, divisorReg, divisorReg, 0); } @@ -3141,7 +3106,7 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode) if (cpObjNode->gtFlags & GTF_BLK_VOLATILE) { // issue a INS_BARRIER_RMB after a volatile CpObj operation - ////TODO: there is only BARRIER_FULL for LOONGARCH64. + // TODO-LOONGARCH64: there is only BARRIER_FULL for LOONGARCH64. instGen_MemoryBarrier(BARRIER_FULL); } @@ -3166,7 +3131,6 @@ void CodeGen::genTableBasedSwitch(GenTree* treeNode) GetEmitter()->emitIns_R_R_I(INS_ld_w, EA_4BYTE, baseReg, baseReg, 0); // add it to the absolute address of fgFirstBB - // compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET;//TODO for LOONGARCH64. GetEmitter()->emitIns_R_L(INS_lea, EA_PTRSIZE, compiler->fgFirstBB, tmpReg); GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, baseReg, baseReg, tmpReg); @@ -3219,7 +3183,7 @@ void CodeGen::genJumpTable(GenTree* treeNode) // void CodeGen::genLockedInstructions(GenTreeOp* treeNode) { - assert(!"unimplemented on LOONGARCH yet"); + NYI("unimplemented on LOONGARCH64 yet"); } //------------------------------------------------------------------------ @@ -3230,7 +3194,7 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode) // void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode) { - assert(!"unimplemented on LOONGARCH yet"); + NYI("unimplemented on LOONGARCH64 yet"); } static inline bool isImmed(GenTree* treeNode) @@ -3577,13 +3541,13 @@ void CodeGen::genCodeForReturnTrap(GenTreeOp* tree) callType = emitter::EC_INDIR_R; callTarget = REG_DEFAULT_HELPER_CALL_TARGET; - // instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr); if (compiler->opts.compReloc) { GetEmitter()->emitIns_R_AI(INS_bl, EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr); } else { + // TODO-LOONGARCH64: maybe optimize further. // GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, callTarget, (ssize_t)pAddr); // GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, ); GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, callTarget, ((ssize_t)pAddr & 0xfffff000) >> 12); @@ -3591,7 +3555,6 @@ void CodeGen::genCodeForReturnTrap(GenTreeOp* tree) GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, ((ssize_t)pAddr & 0xfff) >> 2); } regSet.verifyRegUsed(callTarget); - // GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, callTarget, callTarget, 0); } else { @@ -3599,7 +3562,7 @@ void CodeGen::genCodeForReturnTrap(GenTreeOp* tree) callTarget = REG_NA; } - ////TODO: can optimize further !!! + // TODO-LOONGARCH64: can optimize further !!! GetEmitter()->emitIns_Call(callType, compiler->eeFindHelper(CORINFO_HELP_STOP_FOR_GC), INDEBUG_LDISASM_COMMA(nullptr) addr, 0, EA_UNKNOWN, EA_UNKNOWN, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, DebugInfo(), /* IL offset */ @@ -3647,10 +3610,10 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree) // as that is where 'addr' must go. noway_assert(data->GetRegNum() != REG_WRITE_BARRIER_DST_BYREF); - // 'addr' goes into x14 (REG_WRITE_BARRIER_DST) + // 'addr' goes into REG_T6 (REG_WRITE_BARRIER_DST) genCopyRegIfNeeded(addr, REG_WRITE_BARRIER_DST); - // 'data' goes into x15 (REG_WRITE_BARRIER_SRC) + // 'data' goes into REG_T7 (REG_WRITE_BARRIER_SRC) genCopyRegIfNeeded(data, REG_WRITE_BARRIER_SRC); genGCWriteBarrier(tree, writeBarrierForm); @@ -3699,7 +3662,7 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree) // void CodeGen::genCodeForSwap(GenTreeOp* tree) { - assert(!"unimplemented on LOONGARCH yet"); + NYI("unimplemented on LOONGARCH64 yet"); } //------------------------------------------------------------------------ @@ -3746,7 +3709,7 @@ void CodeGen::genIntToFloatCast(GenTree* treeNode) genConsumeOperands(treeNode->AsOp()); if (IsUnsigned) - { // should amend. + { emit->emitIns_R_R(INS_movgr2fr_d, EA_8BYTE, REG_SCRATCH_FLT, op1->GetRegNum()); // save op1 if (srcSize == EA_8BYTE) @@ -4189,7 +4152,6 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree) } else { - // TODO:can optimize further. if (op1->isContainedIntOrIImmed()) { op1 = tree->gtOp2; @@ -4410,9 +4372,6 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree) // void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue) { - // assert(compiler->compCurBB->bbJumpKind == BBJ_COND);//should confirm. - ////assert(jtrue->OperIs(GT_JTRUE)); - emitter* emit = GetEmitter(); GenTreeOp* tree = jtrue->OperIs(GT_JTRUE) ? jtrue->gtGetOp1()->AsOp() : jtrue; @@ -4450,11 +4409,11 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue) if (varTypeIsFloating(op1Type)) { assert(genTypeSize(op1Type) == genTypeSize(op2Type)); - // int cc = 1; assert(tree->OperIs(GT_LT, GT_LE, GT_EQ, GT_NE, GT_GT, GT_GE)); bool IsUnordered = (tree->gtFlags & GTF_RELOP_NAN_UN) != 0; + // here default use cc = 1 for float comparing. if (tree->OperIs(GT_EQ)) { ins = INS_bcnez; @@ -4516,7 +4475,6 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue) op2->GetRegNum(), 1 /*cc*/); } - // assert(0 <= cc && cc < 8); if (IsEq) emit->emitIns_J(ins, compiler->compCurBB->bbJumpDest, (int)1 /*cc*/); // 5-bits; else @@ -4550,9 +4508,7 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue) imm2 = static_cast(imm2); } break; - // case EA_2BYTE: - // imm = static_cast(imm); - // break; + default: assert(!"Unexpected type in jumpTrue."); } @@ -4608,7 +4564,6 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue) } else { - // TODO:can optimize further. if (op1->isContainedIntOrIImmed()) { op1 = tree->gtOp2; @@ -4657,14 +4612,12 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue) case EA_1BYTE: imm = static_cast(imm); break; - // case EA_2BYTE: - // imm = static_cast(imm); - // break; + default: assert(!"Unexpected type in jumpTrue(imm)."); } - emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm); // TODO: maybe optimize. + emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm); } else { @@ -4707,7 +4660,7 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue) regNumber regOp2 = op2->GetRegNum(); if (IsUnsigned && cmpSize == EA_4BYTE && op2->OperIs(GT_LCL_VAR) && compiler->lvaTable[op2->AsLclVar()->GetLclNum()].lvIsRegCandidate()) - { // TODO: should amend further!!! + { regNumber tmpRegOp1 = tree->ExtractTempReg(); emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, REG_RA, regOp2, 31, 0); emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, tmpRegOp1, regOp1, 31, 0); @@ -4716,7 +4669,7 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue) } else if (IsUnsigned && cmpSize == EA_4BYTE && op1->OperIs(GT_LCL_VAR) && compiler->lvaTable[op1->AsLclVar()->GetLclNum()].lvIsRegCandidate()) - { // TODO: should amend further!!! + { regNumber tmpRegOp1 = tree->ExtractTempReg(); emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, tmpRegOp1, regOp1, 31, 0); emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, REG_RA, regOp2, 31, 0); @@ -4725,12 +4678,12 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue) } else if (cmpSize == EA_4BYTE && op1->OperIs(GT_CALL) && op2->OperIs(GT_LCL_VAR) && compiler->lvaTable[op2->AsLclVar()->GetLclNum()].lvIsRegCandidate()) - { // TODO: should amend further!!! + { emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, REG_RA, regOp2, 0); regOp2 = REG_RA; } else if (cmpSize == EA_4BYTE && ((op1->gtFlags | op2->gtFlags) & GTF_UNSIGNED)) - { // TODO: should amend further!!! + { if (!(op1->gtFlags & GTF_UNSIGNED)) { regNumber tmpRegOp1 = tree->ExtractTempReg(); @@ -4833,46 +4786,31 @@ void CodeGen::genCodeForJumpCompare(GenTreeOp* tree) regNumber reg = op1->GetRegNum(); emitAttr attr = emitActualTypeSize(op1->TypeGet()); - // if (tree->gtFlags & GTF_JCMP_TST) - //{ - // assert(!"unimplemented on LOONGARCH yet"); - // //ssize_t compareImm = op2->AsIntCon()->IconValue(); - - // //assert(isPow2(compareImm)); - - // //instruction ins = (tree->gtFlags & GTF_JCMP_EQ) ? INS_tbz : INS_tbnz; - // //int imm = genLog2((size_t)compareImm); - - // //GetEmitter()->emitIns_J_R_I(ins, attr, compiler->compCurBB->bbJumpDest, reg, imm); - //} - // else + instruction ins; + int regs; + if (op2->AsIntCon()->gtIconVal) { - instruction ins; - int regs; - if (op2->AsIntCon()->gtIconVal) - { - assert(reg != REG_R21); - ssize_t imm = op2->AsIntCon()->gtIconVal; - if (attr == EA_4BYTE) - { - assert(reg != REG_RA); - imm = (int32_t)imm; - GetEmitter()->emitIns_R_R_I(INS_slli_w, EA_4BYTE, REG_RA, reg, 0); - reg = REG_RA; - } - GetEmitter()->emitIns_I_la(EA_PTRSIZE, REG_R21, imm); - regs = (int)reg << 5; - regs |= (int)REG_R21; // REG_R21 - ins = (tree->gtFlags & GTF_JCMP_EQ) ? INS_beq : INS_bne; - } - else + assert(reg != REG_R21); + ssize_t imm = op2->AsIntCon()->gtIconVal; + if (attr == EA_4BYTE) { - regs = (int)reg; - ins = (tree->gtFlags & GTF_JCMP_EQ) ? INS_beqz : INS_bnez; + assert(reg != REG_RA); + imm = (int32_t)imm; + GetEmitter()->emitIns_R_R_I(INS_slli_w, EA_4BYTE, REG_RA, reg, 0); + reg = REG_RA; } - - GetEmitter()->emitIns_J(ins, compiler->compCurBB->bbJumpDest, regs); // 5-bits; + GetEmitter()->emitIns_I_la(EA_PTRSIZE, REG_R21, imm); + regs = (int)reg << 5; + regs |= (int)REG_R21; + ins = (tree->gtFlags & GTF_JCMP_EQ) ? INS_beq : INS_bne; } + else + { + regs = (int)reg; + ins = (tree->gtFlags & GTF_JCMP_EQ) ? INS_beqz : INS_bnez; + } + + GetEmitter()->emitIns_J(ins, compiler->compCurBB->bbJumpDest, regs); // 5-bits; } //--------------------------------------------------------------------- @@ -4886,8 +4824,6 @@ int CodeGenInterface::genSPtoFPdelta() const int delta; if (IsSaveFpRaWithAllCalleeSavedRegisters()) { - // delta = (compiler->compCalleeRegsPushed -2)* REGSIZE_BYTES + compiler->compLclFrameSize; - // assert(delta == genTotalFrameSize() - compiler->lvaArgSize - 2*8); delta = genTotalFrameSize() - (compiler->info.compIsVarArgs ? MAX_REG_ARG * REGSIZE_BYTES : 0) - (compiler->compCalleeRegsPushed - 1) * REGSIZE_BYTES; } @@ -4990,9 +4926,9 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, if (addr == nullptr) { // This is call to a runtime helper. - // li x, pAddr #NOTE: this maybe muti-instructions. - // ld x, [x] - // jr x + // li reg, pAddr #NOTE: this maybe muti-instructions. + // ld_d reg, reg + // jirl reg if (callTargetReg == REG_NA) { @@ -5009,10 +4945,9 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, callTarget = callTargetReg; - // instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr); - // GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, callTarget, callTarget, 0); if (compiler->opts.compReloc) { + // TODO-LOONGARCH64: here the bl is special flag rather than a real instruction. GetEmitter()->emitIns_R_AI(INS_bl, EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr); } else @@ -5059,12 +4994,12 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, // TODO-CLEANUP Merge all versions of this function and move to new file simdcodegencommon.cpp. void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode) { - assert(!"unimplemented on LOONGARCH yet"); + NYI("unimplemented on LOONGARCH64 yet"); } insOpts CodeGen::genGetSimdInsOpt(emitAttr size, var_types elementType) { - assert(!"unimplemented on LOONGARCH yet"); + NYI("unimplemented on LOONGARCH64 yet"); return INS_OPTS_NONE; } @@ -5081,7 +5016,7 @@ insOpts CodeGen::genGetSimdInsOpt(emitAttr size, var_types elementType) // instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_types baseType, unsigned* ival /*=nullptr*/) { - assert(!"unimplemented on LOONGARCH yet"); + NYI("unimplemented on LOONGARCH64 yet"); return INS_invalid; } @@ -5096,7 +5031,7 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type // void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode) { - assert(!"unimplemented on LOONGARCH yet"); + NYI("unimplemented on LOONGARCH64 yet"); } //------------------------------------------------------------------------------------------- @@ -5111,7 +5046,7 @@ void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode) // void CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode) { - assert(!"unimplemented on LOONGARCH yet"); + NYI("unimplemented on LOONGARCH64 yet"); } //---------------------------------------------------------------------------------- @@ -5125,7 +5060,7 @@ void CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode) // void CodeGen::genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode) { - assert(!"unimplemented on LOONGARCH yet"); + NYI("unimplemented on LOONGARCH64 yet"); } //-------------------------------------------------------------------------------- @@ -5139,7 +5074,7 @@ void CodeGen::genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode) // void CodeGen::genSIMDIntrinsicWiden(GenTreeSIMD* simdNode) { - assert(!"unimplemented on LOONGARCH yet"); + NYI("unimplemented on LOONGARCH64 yet"); } //-------------------------------------------------------------------------------- @@ -5154,7 +5089,7 @@ void CodeGen::genSIMDIntrinsicWiden(GenTreeSIMD* simdNode) // void CodeGen::genSIMDIntrinsicNarrow(GenTreeSIMD* simdNode) { - assert(!"unimplemented on LOONGARCH yet"); + NYI("unimplemented on LOONGARCH64 yet"); } //-------------------------------------------------------------------------------- @@ -5169,7 +5104,7 @@ void CodeGen::genSIMDIntrinsicNarrow(GenTreeSIMD* simdNode) // void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode) { - assert(!"unimplemented on LOONGARCH yet"); + NYI("unimplemented on LOONGARCH64 yet"); } //-------------------------------------------------------------------------------- @@ -5184,7 +5119,7 @@ void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode) // void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode) { - assert(!"unimplemented on LOONGARCH yet"); + NYI("unimplemented on LOONGARCH64 yet"); } //-------------------------------------------------------------------------------- @@ -5198,7 +5133,7 @@ void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode) // void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode) { - assert(!"unimplemented on LOONGARCH yet"); + NYI("unimplemented on LOONGARCH64 yet"); } //------------------------------------------------------------------------------------ @@ -5212,7 +5147,7 @@ void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode) // void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode) { - assert(!"unimplemented on LOONGARCH yet"); + NYI("unimplemented on LOONGARCH64 yet"); } //------------------------------------------------------------------------------------ @@ -5226,7 +5161,7 @@ void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode) // void CodeGen::genSIMDIntrinsicSetItem(GenTreeSIMD* simdNode) { - assert(!"unimplemented on LOONGARCH yet"); + NYI("unimplemented on LOONGARCH64 yet"); } //----------------------------------------------------------------------------- @@ -5249,7 +5184,7 @@ void CodeGen::genSIMDIntrinsicSetItem(GenTreeSIMD* simdNode) // void CodeGen::genSIMDIntrinsicUpperSave(GenTreeSIMD* simdNode) { - assert(!"unimplemented on LOONGARCH yet"); + NYI("unimplemented on LOONGARCH64 yet"); } //----------------------------------------------------------------------------- @@ -5271,7 +5206,7 @@ void CodeGen::genSIMDIntrinsicUpperSave(GenTreeSIMD* simdNode) // void CodeGen::genSIMDIntrinsicUpperRestore(GenTreeSIMD* simdNode) { - assert(!"unimplemented on LOONGARCH yet"); + NYI("unimplemented on LOONGARCH64 yet"); } //----------------------------------------------------------------------------- @@ -5288,7 +5223,7 @@ void CodeGen::genSIMDIntrinsicUpperRestore(GenTreeSIMD* simdNode) // void CodeGen::genStoreIndTypeSIMD12(GenTree* treeNode) { - assert(!"unimplemented on LOONGARCH yet"); + NYI("unimplemented on LOONGARCH64 yet"); } //----------------------------------------------------------------------------- @@ -5305,7 +5240,7 @@ void CodeGen::genStoreIndTypeSIMD12(GenTree* treeNode) // void CodeGen::genLoadIndTypeSIMD12(GenTree* treeNode) { - assert(!"unimplemented on LOONGARCH yet"); + NYI("unimplemented on LOONGARCH64 yet"); } //----------------------------------------------------------------------------- @@ -5321,7 +5256,7 @@ void CodeGen::genLoadIndTypeSIMD12(GenTree* treeNode) // void CodeGen::genStoreLclTypeSIMD12(GenTree* treeNode) { - assert(!"unimplemented on LOONGARCH yet"); + NYI("unimplemented on LOONGARCH64 yet"); } #endif // FEATURE_SIMD @@ -5690,14 +5625,6 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) genCodeForJumpCompare(treeNode->AsOp()); break; - case GT_JCC: - genCodeForJcc(treeNode->AsCC()); - break; - - case GT_SETCC: - genCodeForSetcc(treeNode->AsCC()); - break; - case GT_RETURNTRAP: genCodeForReturnTrap(treeNode->AsOp()); break; @@ -5865,7 +5792,7 @@ void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type) // The only TYP_REF constant that can come this path is a managed 'null' since it is not // relocatable. Other ref type constants (e.g. string objects) go through a different // code path. - noway_assert(type != TYP_REF || val == 0); + noway_assert((type != TYP_REF) || (val == 0)); GetEmitter()->emitIns_I_la(emitActualTypeSize(type), reg, val); regSet.verifyRegUsed(reg); @@ -5966,7 +5893,7 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg) } else { - // TODO-LOONGARCH64: should amend for optimize! + // TODO-LOONGARCH64: maybe optimize further! // GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, regGSConst, // (ssize_t)compiler->gsGlobalSecurityCookieAddr); // GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, regGSConst, regGSConst, ); @@ -6002,7 +5929,7 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg) // void CodeGen::genIntrinsic(GenTree* treeNode) { - assert(!"unimplemented on LOONGARCH yet"); + NYI("unimplemented on LOONGARCH64 yet"); } //--------------------------------------------------------------------- @@ -6068,7 +5995,7 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) { if (varTypeIsSIMD(targetType)) { - assert(!"unimplemented on LOONGARCH yet"); + NYI("unimplemented on LOONGARCH64 yet"); } instruction storeIns = ins_Store(targetType); @@ -6569,7 +6496,7 @@ void CodeGen::genMultiRegCallStoreToLocal(GenTree* treeNode) if (treeNode->GetRegNum() != REG_NA) { - assert(!"unimplemented on LOONGARCH yet"); + NYI("unimplemented on LOONGARCH64 yet"); // Right now the only enregistrable multi-reg return types supported are SIMD types. assert(varTypeIsSIMD(treeNode)); assert(regCount != 0); @@ -7002,35 +6929,45 @@ void CodeGen::genCodeForIndexAddr(GenTreeIndexAddr* node) // The index is never contained, even if it is a constant. assert(index->isUsedFromReg()); - // const regNumber tmpReg = node->GetSingleTempReg(); - // Generate the bounds check if necessary. if ((node->gtFlags & GTF_INX_RNGCHK) != 0) { GetEmitter()->emitIns_R_R_I(INS_ld_w, EA_4BYTE, REG_R21, base->GetRegNum(), node->gtLenOffset); - // if (index >= REG_R21) - // { - // JumpToThrowHlpBlk; - // } + // if (index >= REG_R21) + // { + // JumpToThrowHlpBlk; + // } // - // sltu AT, index, REG_R21 - // bne AT, zero, RngChkExit + // sltu REG_R21, index, REG_R21 + // bne REG_21, zero, RngChkExit // IndRngFail: - // ... + // ... // RngChkExit: genJumpToThrowHlpBlk_la(SCK_RNGCHK_FAIL, INS_bgeu, index->GetRegNum(), node->gtIndRngFailBB, REG_R21); } emitAttr attr = emitActualTypeSize(node); - // Can we use a ScaledAdd instruction? + // Can we use a shift instruction for multiply ? // - if (isPow2(node->gtElemSize) && (node->gtElemSize <= 2048)) + if (isPow2(node->gtElemSize) && (node->gtElemSize < 0x10000000u)) { - DWORD scale; - BitScanForward(&scale, node->gtElemSize); + regNumber tmpReg; + if (node->gtElemSize == 0) + { + // dest = base + index + tmpReg = index->GetRegNum(); + } + else + { + DWORD scale; + BitScanForward(&scale, node->gtElemSize); - // dest = base + index * scale - genScaledAdd(emitActualTypeSize(node), node->GetRegNum(), base->GetRegNum(), index->GetRegNum(), scale); + // tmpReg = base + index << scale + // dest = base + tmpReg + GetEmitter()->emitIns_R_R_I(INS_slli_d, attr, REG_R21, index->GetRegNum(), scale); + tmpReg = REG_R21; + } + GetEmitter()->emitIns_R_R_R(INS_add_d, attr, node->GetRegNum(), base->GetRegNum(), tmpReg); } else // we have to load the element size and use a MADD (multiply-add) instruction { @@ -7218,7 +7155,7 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode) if (size >= 2 * REGSIZE_BYTES) { - regNumber tempReg2 = REG_R21; // cpBlkNode->ExtractTempReg(RBM_ALLINT);//TODO:should amend. + regNumber tempReg2 = REG_R21; for (unsigned regSize = 2 * REGSIZE_BYTES; size >= regSize; size -= regSize, srcOffset += regSize, dstOffset += regSize) @@ -7390,12 +7327,6 @@ void CodeGen::genCallInstruction(GenTreeCall* call) assert(putArgRegNode->gtOper == GT_PUTARG_REG); genConsumeReg(putArgRegNode); -#if 0 - inst_Mov_Extend(putArgRegNode->TypeGet(), /* srcInReg */ true, argReg, putArgRegNode->GetRegNum(), - /* canSkip */ true, emitActualTypeSize(TYP_I_IMPL)); - - argReg = genRegArgNext(argReg); -#endif } } #if FEATURE_ARG_SPLIT @@ -7840,7 +7771,7 @@ void CodeGen::genJmpMethod(GenTree* jmp) if (compiler->info.compIsVarArgs) { - assert(!"unimplemented on LOONGARCH yet!"); + NYI("unimplemented on LOONGARCH64 yet"); // In case of a jmp call to a vararg method ensure only integer registers are passed. assert((genRegMask(argReg) & (RBM_ARG_REGS)) != RBM_NONE); assert(!varDsc->lvIsHfaRegArg()); @@ -7963,14 +7894,14 @@ void CodeGen::genIntCastOverflowCheck(GenTreeCast* cast, const GenIntCastDesc& d instruction ins; if (castMaxValue > 2047) - { // should amend. should confirm !?!? + { assert((castMaxValue == 32767) || (castMaxValue == 65535)); GetEmitter()->emitIns_I_la(EA_ATTR(desc.CheckSrcSize()), REG_R21, castMaxValue + 1); ins = castMinValue == 0 ? INS_bgeu : INS_bge; genJumpToThrowHlpBlk_la(SCK_OVERFLOW, ins, reg, nullptr, REG_R21); } else - { // should amend. + { GetEmitter()->emitIns_R_R_I(INS_addi_w, EA_ATTR(desc.CheckSrcSize()), REG_R21, REG_R0, castMaxValue); ins = castMinValue == 0 ? INS_bltu : INS_blt; genJumpToThrowHlpBlk_la(SCK_OVERFLOW, ins, REG_R21, nullptr, reg); @@ -7978,7 +7909,7 @@ void CodeGen::genIntCastOverflowCheck(GenTreeCast* cast, const GenIntCastDesc& d if (castMinValue != 0) { - if ((-2048 <= castMinValue) && (castMinValue < 2048)) + if (isValidSimm12(castMinValue)) { GetEmitter()->emitIns_R_R_I(INS_slti, EA_ATTR(desc.CheckSrcSize()), REG_R21, reg, castMinValue); } @@ -8028,20 +7959,6 @@ void CodeGen::genIntToIntCast(GenTreeCast* cast) genIntCastOverflowCheck(cast, desc, srcReg); } - // if ((EA_ATTR(genTypeSize(srcType)) == EA_8BYTE) && (EA_ATTR(genTypeSize(dstType)) == EA_4BYTE)) - //{ - // if (dstType == TYP_INT) - // { - // // convert t0 int32 - // emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, dstReg, srcReg, 0); - // } - // else - // { - // // convert t0 uint32 - // emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_PTRSIZE, dstReg, srcReg, pos+31, pos); - // } - //} - // else if ((desc.ExtendKind() != GenIntCastDesc::COPY) || (srcReg != dstReg)) if ((desc.ExtendKind() != GenIntCastDesc::COPY) || (srcReg != dstReg)) { instruction ins; @@ -8062,24 +7979,20 @@ void CodeGen::genIntToIntCast(GenTreeCast* cast) ins = (desc.ExtendSrcSize() == 1) ? INS_ext_w_b : INS_ext_w_h; emit->emitIns_R_R(ins, EA_PTRSIZE, dstReg, srcReg); break; -#ifdef TARGET_64BIT + case GenIntCastDesc::ZERO_EXTEND_INT: emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_PTRSIZE, dstReg, srcReg, pos + 31, pos); break; case GenIntCastDesc::SIGN_EXTEND_INT: emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, dstReg, srcReg, 0); break; -#endif + default: assert(desc.ExtendKind() == GenIntCastDesc::COPY); -#if 1 if (srcType == TYP_INT) - emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, dstReg, srcReg, 0); // should amend. + emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, dstReg, srcReg, 0); else emit->emitIns_R_R_I(INS_ori, EA_PTRSIZE, dstReg, srcReg, 0); -#else - emit->emitIns_R_R(INS_mov, EA_PTRSIZE, dstReg, srcReg); -#endif break; } } @@ -8195,7 +8108,7 @@ void CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, { unsigned reversePInvokeFrameVarNumber = compiler->lvaReversePInvokeFrameVar; assert(reversePInvokeFrameVarNumber != BAD_VAR_NUM); - const LclVarDsc* reversePInvokeFrameVar = compiler->lvaGetDesc(reversePInvokeFrameVarNumber); // TODO: unused. + const LclVarDsc* reversePInvokeFrameVar = compiler->lvaGetDesc(reversePInvokeFrameVarNumber); gcInfoEncoder->SetReversePInvokeFrameSlot(reversePInvokeFrameVar->GetStackOffset()); } @@ -8207,62 +8120,6 @@ void CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, compiler->compInfoBlkSize = 0; // not exposed by the GCEncoder interface } -/* TODO for LOONGARCH64: not used for loongarch */ -// clang-format off -const CodeGen::GenConditionDesc CodeGen::GenConditionDesc::map[32] -{ - //{ }, // NONE - //{ }, // 1 - //{ EJ_lt }, // SLT - //{ EJ_le }, // SLE - //{ EJ_ge }, // SGE - //{ EJ_gt }, // SGT - //{ EJ_mi }, // S - //{ EJ_pl }, // NS - - //{ EJ_eq }, // EQ - //{ EJ_ne }, // NE - //{ EJ_lo }, // ULT - //{ EJ_ls }, // ULE - //{ EJ_hs }, // UGE - //{ EJ_hi }, // UGT - //{ EJ_hs }, // C - //{ EJ_lo }, // NC - - //{ EJ_eq }, // FEQ - //{ EJ_gt, GT_AND, EJ_lo }, // FNE - //{ EJ_lo }, // FLT - //{ EJ_ls }, // FLE - //{ EJ_ge }, // FGE - //{ EJ_gt }, // FGT - //{ EJ_vs }, // O - //{ EJ_vc }, // NO - - //{ EJ_eq, GT_OR, EJ_vs }, // FEQU - //{ EJ_ne }, // FNEU - //{ EJ_lt }, // FLTU - //{ EJ_le }, // FLEU - //{ EJ_hs }, // FGEU - //{ EJ_hi }, // FGTU - //{ }, // P - //{ }, // NP -}; -// clang-format on - -//------------------------------------------------------------------------ -// inst_SETCC: Generate code to set a register to 0 or 1 based on a condition. -// -// Arguments: -// condition - The condition -// type - The type of the value to be produced -// dstReg - The destination register to be set to 1 or 0 -// -void CodeGen::inst_SETCC(GenCondition condition, var_types type, regNumber dstReg) -{ - /* TODO for LOONGARCH64: should redesign and delete. */ - assert(!"unimplemented on LOONGARCH yet"); -} - //------------------------------------------------------------------------ // genCodeForStoreBlk: Produce code for a GT_STORE_OBJ/GT_STORE_DYN_BLK/GT_STORE_BLK node. // @@ -8320,26 +8177,7 @@ void CodeGen::genCodeForStoreBlk(GenTreeBlk* blkOp) GetEmitter()->emitEnableGC(); } } -#if 1 -//------------------------------------------------------------------------ -// genScaledAdd: A helper for genLeaInstruction. -// TODO: can amend further. -void CodeGen::genScaledAdd(emitAttr attr, regNumber targetReg, regNumber baseReg, regNumber indexReg, int scale) -{ - emitter* emit = GetEmitter(); - if (scale == 0) - { - // target = base + index - emit->emitIns_R_R_R(INS_add_d, attr, targetReg, baseReg, indexReg); - } - else - { - // target = base + index<emitIns_R_R_I(INS_slli_d, attr, REG_R21, indexReg, scale); - emit->emitIns_R_R_R(INS_add_d, attr, targetReg, baseReg, REG_R21); - } -} -#endif + //------------------------------------------------------------------------ // genLeaInstruction: Produce code for a GT_LEA node. // @@ -8353,14 +8191,9 @@ void CodeGen::genLeaInstruction(GenTreeAddrMode* lea) emitAttr size = emitTypeSize(lea); int offset = lea->Offset(); - // In LOONGARCH we can only load addresses of the form: - // - // [Base + index*scale] - // [Base + Offset] - // [Literal] (PC-Relative) - // // So for the case of a LEA node of the form [Base + Index*Scale + Offset] we will generate: - // destReg = baseReg + indexReg * scale; + // tmpReg = indexReg << scale; + // destReg = baseReg + tmpReg; // destReg = destReg + offset; // // TODO-LOONGARCH64-CQ: The purpose of the GT_LEA node is to directly reflect a single target architecture @@ -8372,59 +8205,50 @@ void CodeGen::genLeaInstruction(GenTreeAddrMode* lea) GenTree* memBase = lea->Base(); GenTree* index = lea->Index(); - DWORD scale; - assert(isPow2(lea->gtScale)); - BitScanForward(&scale, lea->gtScale); - - assert(scale <= 4); - if (offset != 0) + regNumber tmpReg; + if (lea->gtScale == 0) { - regNumber tmpReg = lea->GetSingleTempReg(); + tmpReg = index->GetRegNum(); + } + else + { + DWORD scale; + BitScanForward(&scale, lea->gtScale); + assert(scale <= 4); - // When generating fully interruptible code we have to use the "large offset" sequence - // when calculating a EA_BYREF as we can't report a byref that points outside of the object - // - bool useLargeOffsetSeq = compiler->GetInterruptible() && (size == EA_BYREF); + emit->emitIns_R_R_I(INS_slli_d, EA_PTRSIZE, REG_R21, index->GetRegNum(), scale); + tmpReg = REG_R21; + } - if (!useLargeOffsetSeq && ((-2048 <= offset) && (offset <= 2047))) + if (offset != 0) + { + if (isValidSimm12(offset)) { - // Generate code to set tmpReg = base + index*scale - genScaledAdd(size, tmpReg, memBase->GetRegNum(), index->GetRegNum(), scale); - - // Then compute target reg from [tmpReg + offset] - emit->emitIns_R_R_I(INS_addi_d, size, lea->GetRegNum(), tmpReg, offset); + emit->emitIns_R_R_I(INS_addi_d, size, tmpReg, tmpReg, offset); } - else // large offset sequence + else { - noway_assert(tmpReg != index->GetRegNum()); - noway_assert(tmpReg != memBase->GetRegNum()); - - // First load/store tmpReg with the offset constant - // rTmp = imm - GetEmitter()->emitIns_I_la(EA_PTRSIZE, tmpReg, offset); + regNumber tmpReg2 = lea->GetSingleTempReg(); - // Then add the scaled index register - // rTmp = rTmp + index*scale - genScaledAdd(EA_PTRSIZE, tmpReg, tmpReg, index->GetRegNum(), scale); + noway_assert(tmpReg2 != index->GetRegNum()); + noway_assert(tmpReg2 != memBase->GetRegNum()); + noway_assert(tmpReg2 != tmpReg); - // Then compute target reg from [base + tmpReg ] - // rDst = base + rTmp - emit->emitIns_R_R_R(INS_add_d, size, lea->GetRegNum(), memBase->GetRegNum(), tmpReg); + // compute the large offset. + emit->emitIns_I_la(EA_PTRSIZE, tmpReg2, offset); + emit->emitIns_R_R_R(INS_add_d, size, tmpReg, tmpReg, tmpReg2); } } - else - { - // Then compute target reg from [base + index*scale] - genScaledAdd(size, lea->GetRegNum(), memBase->GetRegNum(), index->GetRegNum(), scale); - } + + emit->emitIns_R_R_R(INS_add_d, size, lea->GetRegNum(), memBase->GetRegNum(), tmpReg); } else if (lea->Base()) { GenTree* memBase = lea->Base(); - if ((-2048 <= offset) && (offset <= 2047)) + if (isValidSimm12(offset)) { if (offset != 0) { @@ -8445,7 +8269,7 @@ void CodeGen::genLeaInstruction(GenTreeAddrMode* lea) regNumber tmpReg = lea->GetSingleTempReg(); // First load tmpReg with the large offset constant - GetEmitter()->emitIns_I_la(EA_PTRSIZE, tmpReg, offset); + emit->emitIns_I_la(EA_PTRSIZE, tmpReg, offset); // Then compute target reg from [memBase + tmpReg] emit->emitIns_R_R_R(INS_add_d, size, lea->GetRegNum(), memBase->GetRegNum(), tmpReg); @@ -8481,7 +8305,7 @@ void CodeGen::genEstablishFramePointer(int delta, bool reportUnwindData) } else { - assert((-2048 <= delta) && (delta < 2048)); + assert(isValidSimm12(delta)); GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, delta); } @@ -8705,7 +8529,7 @@ inline void CodeGen::genJumpToThrowHlpBlk_la( callType = emitter::EC_INDIR_R; callTarget = REG_DEFAULT_HELPER_CALL_TARGET; - // ssize_t imm = (4 + 1 + 1) << 2;// 4=li, 1=ld, 1=jirl.//TODO: maybe optimize. + // ssize_t imm = (4 + 1 + 1) << 2;// 4=li, 1=ld, 1=jirl. // instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr); // emit->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, callTarget, callTarget, 0); @@ -8771,7 +8595,7 @@ void CodeGen::instGen_MemoryBarrier(BarrierKind barrierKind) } #endif // DEBUG - // TODO: Use the exact barrier type depending on the CPU. + // TODO-LOONGARCH64: Use the exact barrier type depending on the CPU. GetEmitter()->emitIns_I(INS_dbar, EA_4BYTE, INS_BARRIER_FULL); } @@ -8883,7 +8707,7 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe // Notes: // 1. FP is always saved, and the first store is FP, RA. // 2. General-purpose registers are 8 bytes, floating-point registers are 8 bytes, but SIMD/FP registers 16 bytes. - // TODO: supporting SIMD feature ! + // TODO-LOONGARCH64: supporting SIMD feature ! // 3. For frames with varargs, not implemented completely and not tested ! // 4. We allocate the frame here; no further changes to SP are allowed (except in the body, for localloc). // @@ -8903,7 +8727,7 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe // | incoming arguments | // +=======================+ <---- Caller's SP // | Arguments Or | // if needed. - // | Varargs regs space | // Only for varargs functions; 64 bytes (TODO: not implement completely) + // | Varargs regs space | // Only for varargs functions; (varargs not implemented for LoongArch64) // |-----------------------| // |Callee saved registers | // not including FP/RA; multiple of 8 bytes // |-----------------------| @@ -8932,7 +8756,7 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe // | incoming arguments | // +=======================+ <---- Caller's SP // | Arguments Or | // if needed. - // | Varargs regs space | // Only for varargs functions; 64 bytes (TODO: not implement completely) + // | Varargs regs space | // Only for varargs functions; (varargs not implemented for LoongArch64) // |-----------------------| // | Saved RA | // 8 bytes // |-----------------------| @@ -9540,7 +9364,7 @@ void CodeGen::genFnPrologCalleeRegArgs() base += baseOffset; - if ((-2048 <= base) && (base < 2048)) + if (isValidSimm12(base)) { GetEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, baseOffset); } @@ -9586,7 +9410,7 @@ void CodeGen::genFnPrologCalleeRegArgs() { base += baseOffset; - if ((-2048 <= base) && (base < 2048)) + if (isValidSimm12(base)) { GetEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, baseOffset); } @@ -9617,7 +9441,7 @@ void CodeGen::genFnPrologCalleeRegArgs() base += 8; GetEmitter()->emitIns_R_R_Imm(INS_ld_d, size, REG_SCRATCH, REG_SPBASE, genTotalFrameSize()); - if ((-2048 <= base) && (base < 2048)) + if (isValidSimm12(base)) { GetEmitter()->emitIns_S_R(INS_st_d, size, REG_SCRATCH, varNum, baseOffset); } diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 2a19eb10bacec..fae914498d6e8 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -758,11 +758,8 @@ class emitter #elif defined(TARGET_ARM64) // For Arm64, we have used 17 bits from the second DWORD. #define ID_EXTRA_BITFIELD_BITS (17) -#elif defined(TARGET_XARCH) -// For xarch, we have used 14 bits from the second DWORD. -#define ID_EXTRA_BITFIELD_BITS (14) -#elif defined(TARGET_LOONGARCH64) - // For Loongarch64, we have used 14 bits from the second DWORD. +#elif defined(TARGET_XARCH) || defined(TARGET_LOONGARCH64) +// For xarch and LoongArch64, we have used 14 bits from the second DWORD. #define ID_EXTRA_BITFIELD_BITS (14) #else #error Unsupported or unset target architecture @@ -1052,7 +1049,7 @@ class emitter } #elif defined(TARGET_LOONGARCH64) - unsigned idCodeSize() const + unsigned idCodeSize() const { return _idCodeSize; //_idInsCount; } diff --git a/src/coreclr/jit/emitfmtsloongarch64.h b/src/coreclr/jit/emitfmtsloongarch64.h index e04d60270d567..2f47160ac8d39 100644 --- a/src/coreclr/jit/emitfmtsloongarch64.h +++ b/src/coreclr/jit/emitfmtsloongarch64.h @@ -11,16 +11,13 @@ #ifdef DEFINE_ID_OPS ////////////////////////////////////////////////////////////////////////////// -#undef DEFINE_ID_OPS enum ID_OPS { ID_OP_NONE, // no additional arguments - ID_OP_SCNS, // small const operand (21-bits or less, no reloc) - ID_OP_JMP, // local jump - ID_OP_CALL, // method call - ID_OP_SPEC, // special handling required }; +#undef DEFINE_ID_OPS + ////////////////////////////////////////////////////////////////////////////// #else // !DEFINE_ID_OPS ////////////////////////////////////////////////////////////////////////////// @@ -38,13 +35,6 @@ enum ID_OPS IF_DEF(NONE, IS_NONE, NONE) // - -//IF_DEF(LABEL, IS_NONE, JMP) // label -//IF_DEF(LARGEJMP, IS_NONE, JMP) // large conditional branch pseudo-op (cond branch + uncond branch) -//IF_DEF(LARGEADR, IS_NONE, JMP) // large address pseudo-op (adrp + add) -//IF_DEF(LARGELDC, IS_NONE, JMP) // large constant pseudo-op (adrp + ldr) - - IF_DEF(OPCODE, IS_NONE, NONE) IF_DEF(OPCODES_16, IS_NONE, NONE) IF_DEF(OP_FMT, IS_NONE, NONE) diff --git a/src/coreclr/jit/emitjmps.h b/src/coreclr/jit/emitjmps.h index 97e216dccbb60..cd10727f6eec3 100644 --- a/src/coreclr/jit/emitjmps.h +++ b/src/coreclr/jit/emitjmps.h @@ -48,6 +48,7 @@ JMP_SMALL(le , gt , ble ) // LE #elif defined(TARGET_LOONGARCH64) +// TODO-LOONGARCH64: adding other condition branches. JMP_SMALL(jmp , jmp , b ) JMP_SMALL(eq , ne , beq ) // EQ JMP_SMALL(ne , eq , bne ) // NE diff --git a/src/coreclr/jit/emitloongarch64.h b/src/coreclr/jit/emitloongarch64.h index 85841251de82a..0d9b5da867eb4 100644 --- a/src/coreclr/jit/emitloongarch64.h +++ b/src/coreclr/jit/emitloongarch64.h @@ -78,142 +78,12 @@ bool IsRedundantMov(instruction ins, emitAttr size, regNumber dst, regNumber src bool IsRedundantLdStr( instruction ins, regNumber reg1, regNumber reg2, ssize_t imm, emitAttr size, insFormat fmt); // New functions end. -/************************************************************************ -* -* This union is used to to encode/decode the special LOONGARCH64 immediate values -* that is listed as imm(N,r,s) and referred to as 'bitmask immediate' -*/ - -union bitMaskImm { - struct - { - unsigned immS : 6; // bits 0..5 - unsigned immR : 6; // bits 6..11 - unsigned immN : 1; // bits 12 - }; - unsigned immNRS; // concat N:R:S forming a 13-bit unsigned immediate -}; - -/************************************************************************ -* -* Convert between a 64-bit immediate and its 'bitmask immediate' -* representation imm(i16,hw) -*/ - -// static emitter::bitMaskImm emitEncodeBitMaskImm(INT64 imm, emitAttr size); - -// static INT64 emitDecodeBitMaskImm(const emitter::bitMaskImm bmImm, emitAttr size); - -/************************************************************************ -* -* This union is used to to encode/decode the special LOONGARCH64 immediate values -* that is listed as imm(i16,hw) and referred to as 'halfword immediate' -*/ - -union halfwordImm { - struct - { - unsigned immVal : 16; // bits 0..15 - unsigned immHW : 2; // bits 16..17 - }; - unsigned immHWVal; // concat HW:Val forming a 18-bit unsigned immediate -}; - -/************************************************************************ -* -* Convert between a 64-bit immediate and its 'halfword immediate' -* representation imm(i16,hw) -*/ - -// static emitter::halfwordImm emitEncodeHalfwordImm(INT64 imm, emitAttr size); - -// static INT64 emitDecodeHalfwordImm(const emitter::halfwordImm hwImm, emitAttr size); - -/************************************************************************ -* -* This union is used to encode/decode the special LOONGARCH64 immediate values -* that is listed as imm(i16,by) and referred to as 'byteShifted immediate' -*/ - -union byteShiftedImm { - struct - { - unsigned immVal : 8; // bits 0..7 - unsigned immBY : 2; // bits 8..9 - unsigned immOnes : 1; // bit 10 - }; - unsigned immBSVal; // concat Ones:BY:Val forming a 10-bit unsigned immediate -}; - -/************************************************************************ -* -* Convert between a 16/32-bit immediate and its 'byteShifted immediate' -* representation imm(i8,by) -*/ - -// static emitter::byteShiftedImm emitEncodeByteShiftedImm(INT64 imm, emitAttr size, bool allow_MSL); - -// static INT32 emitDecodeByteShiftedImm(const emitter::byteShiftedImm bsImm, emitAttr size); - -/************************************************************************ -* -* This union is used to to encode/decode the special LOONGARCH64 immediate values -* that are use for FMOV immediate and referred to as 'float 8-bit immediate' -*/ - -union floatImm8 { - struct - { - unsigned immMant : 4; // bits 0..3 - unsigned immExp : 3; // bits 4..6 - unsigned immSign : 1; // bits 7 - }; - unsigned immFPIVal; // concat Sign:Exp:Mant forming an 8-bit unsigned immediate -}; - -/************************************************************************ -* -* Convert between a double and its 'float 8-bit immediate' representation -*/ - -// static emitter::floatImm8 emitEncodeFloatImm8(double immDbl); - -// static double emitDecodeFloatImm8(const emitter::floatImm8 fpImm); - -/************************************************************************ -* -* This union is used to to encode/decode the cond, nzcv and imm5 values for -* instructions that use them in the small constant immediate field -*/ - -union condFlagsImm { - struct - { - // insCond cond : 4; // bits 0..3 - // insCflags flags : 4; // bits 4..7 - unsigned imm5 : 5; // bits 8..12 - }; - unsigned immCFVal; // concat imm5:flags:cond forming an 13-bit unsigned immediate -}; - -// Returns true if 'reg' represents an integer register. -static bool isIntegerRegister(regNumber reg) -{ - return (reg >= REG_INT_FIRST) && (reg <= REG_INT_LAST); -} - // Returns true if 'value' is a legal signed immediate 12 bit encoding. static bool isValidSimm12(ssize_t value) { return -(((int)1) << 11) <= value && value < (((int)1) << 11); }; -// Returns true if 'value' is a legal signed immediate 16 bit encoding. -static bool isValidSimm16(ssize_t value) -{ - return -(((int)1) << 15) <= value && value < (((int)1) << 15); -}; - // Returns true if 'value' is a legal signed immediate 20 bit encoding. static bool isValidSimm20(ssize_t value) { @@ -311,8 +181,6 @@ void emitIns_R_R_I_I( void emitIns_R_R_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, regNumber reg4); -// void emitIns_BARR(instruction ins, insBarrier barrier); - void emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fdlHnd, int offs); void emitIns_S(instruction ins, emitAttr attr, int varx, int offs); @@ -320,9 +188,6 @@ void emitIns_S(instruction ins, emitAttr attr, int varx, int offs); void emitIns_S_S_R_R( instruction ins, emitAttr attr, emitAttr attr2, regNumber ireg, regNumber ireg2, int varx, int offs); -// void emitIns_R_R_S( -// instruction ins, emitAttr attr, regNumber ireg, regNumber ireg2, int sa); - void emitIns_R_R_S_S( instruction ins, emitAttr attr, emitAttr attr2, regNumber ireg, regNumber ireg2, int varx, int offs); @@ -402,12 +267,6 @@ void emitIns_Call(EmitCallType callType, bool isJump = false); unsigned emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t code); -// BYTE* emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* i); -// BYTE* emitOutputLoadLabel(BYTE* dst, BYTE* srcAddr, BYTE* dstAddr, instrDescJmp* id); -// BYTE* emitOutputShortBranch(BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, instrDescJmp* id); -// BYTE* emitOutputShortAddress(BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, regNumber reg); -// BYTE* emitOutputShortConstant( -// BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, regNumber reg, emitAttr opSize); unsigned get_curTotalCodeSize(); // bytes of code From 5f896d5b7653e789ed45180789daeb42de7a4a5f Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Mon, 21 Feb 2022 14:33:42 +0800 Subject: [PATCH 26/46] [LoongArch64] add compiling the `clrjit_unix_loongarch64_*`. --- src/coreclr/gcinfo/CMakeLists.txt | 5 +++-- src/coreclr/jit/CMakeLists.txt | 8 +++----- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/coreclr/gcinfo/CMakeLists.txt b/src/coreclr/gcinfo/CMakeLists.txt index 8c966bb3403b5..5f10c54e5d9f9 100644 --- a/src/coreclr/gcinfo/CMakeLists.txt +++ b/src/coreclr/gcinfo/CMakeLists.txt @@ -69,11 +69,12 @@ if (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_ARM) create_gcinfo_lib(TARGET gcinfo_${TARGET_OS_NAME}_${ARCH_TARGET_NAME} OS ${TARGET_OS_NAME} ARCH ${ARCH_TARGET_NAME}) endif() -if (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64) +if (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64) + create_gcinfo_lib(TARGET gcinfo_unix_loongarch64 OS unix ARCH loongarch64) create_gcinfo_lib(TARGET gcinfo_universal_arm64 OS universal ARCH arm64) create_gcinfo_lib(TARGET gcinfo_unix_x64 OS unix ARCH x64) create_gcinfo_lib(TARGET gcinfo_win_x64 OS win ARCH x64) -endif (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64) +endif (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64) create_gcinfo_lib(TARGET gcinfo_universal_arm OS universal ARCH arm) create_gcinfo_lib(TARGET gcinfo_win_x86 OS win ARCH x86) diff --git a/src/coreclr/jit/CMakeLists.txt b/src/coreclr/jit/CMakeLists.txt index caf0726d970b3..a91c645898d18 100644 --- a/src/coreclr/jit/CMakeLists.txt +++ b/src/coreclr/jit/CMakeLists.txt @@ -576,14 +576,12 @@ install_clr(TARGETS clrjit DESTINATIONS . sharedFramework COMPONENT jit) # Enable profile guided optimization add_pgo(clrjit) -#if (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64) -if (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64) +if (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64) create_standalone_jit(TARGET clrjit_universal_arm64_${ARCH_HOST_NAME} OS universal ARCH arm64 DESTINATIONS .) create_standalone_jit(TARGET clrjit_unix_x64_${ARCH_HOST_NAME} OS unix ARCH x64 DESTINATIONS .) create_standalone_jit(TARGET clrjit_win_x64_${ARCH_HOST_NAME} OS win ARCH x64 DESTINATIONS .) - #create_standalone_jit(TARGET clrjit_unix_loongarch64_${ARCH_HOST_NAME} OS unix ARCH loongarch64 DESTINATIONS .) -endif (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64) -#endif (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64) + create_standalone_jit(TARGET clrjit_unix_loongarch64_${ARCH_HOST_NAME} OS unix ARCH loongarch64 DESTINATIONS .) +endif (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64) create_standalone_jit(TARGET clrjit_universal_arm_${ARCH_HOST_NAME} OS universal ARCH arm DESTINATIONS .) target_compile_definitions(clrjit_universal_arm_${ARCH_HOST_NAME} PRIVATE ARM_SOFTFP CONFIGURABLE_ARM_ABI) From d4a47ffba185e84189eb22b08f1cf1895143080d Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Wed, 23 Feb 2022 13:06:00 +0800 Subject: [PATCH 27/46] [LoongArch64] delete unused code and amend the format. Also amend inst_Mov. --- src/coreclr/jit/codegen.h | 3 + src/coreclr/jit/codegencommon.cpp | 2 +- src/coreclr/jit/codegenloongarch64.cpp | 2 +- src/coreclr/jit/emit.cpp | 17 +- src/coreclr/jit/emit.h | 31 +- src/coreclr/jit/emitloongarch64.cpp | 491 +-------------- src/coreclr/jit/emitloongarch64.h | 49 -- src/coreclr/jit/instr.cpp | 9 + src/coreclr/jit/instrsloongarch64.h | 829 ++++++++++++------------- src/coreclr/jit/lclvars.cpp | 60 +- src/coreclr/jit/lower.cpp | 23 +- src/coreclr/jit/lowerloongarch64.cpp | 773 +---------------------- src/coreclr/jit/lsraloongarch64.cpp | 14 +- src/coreclr/pal/inc/rt/ntimage.h | 1 + 14 files changed, 525 insertions(+), 1779 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 61732b9f007f2..89d8ba379b124 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -1526,7 +1526,10 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void inst_FS_ST(instruction ins, emitAttr size, TempDsc* tmp, unsigned ofs); +#ifndef TARGET_LOONGARCH64 + // Now this is only used on xarch. void inst_TT(instruction ins, GenTree* tree, unsigned offs = 0, int shfv = 0, emitAttr size = EA_UNKNOWN); +#endif void inst_TT_RV(instruction ins, emitAttr size, GenTree* tree, regNumber reg); diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 08db8279bb7d1..5900d163d4c41 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -6029,7 +6029,7 @@ void CodeGen::genFnProlog() #if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) genPushCalleeSavedRegisters(initReg, &initRegZeroed); -#else // !TARGET_ARM64 || !TARGET_LOONGARCH64 +#else // !TARGET_ARM64 || !TARGET_LOONGARCH64 if (!isOSRx64Root) { genPushCalleeSavedRegisters(); diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index 2d856550a5486..cfc8b6587cd3e 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -2191,7 +2191,7 @@ void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode) if (dataReg != targetReg) { // Assign into targetReg when dataReg (from op1) is not the same register - inst_Mov(targetType, targetReg, dataReg, true); + inst_Mov(targetType, targetReg, dataReg, true, emitActualTypeSize(targetType)); } genProduceReg(lclNode); } diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index 574b66263beb6..c371a33f03cda 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -795,21 +795,13 @@ insGroup* emitter::emitSavIG(bool emitAdd) } #endif -// Record how many instructions and bytes of code this group contains + // Record how many instructions and bytes of code this group contains -#ifdef TARGET_LOONGARCH64 - noway_assert((unsigned int)emitCurIGinsCnt == emitCurIGinsCnt); -#else noway_assert((BYTE)emitCurIGinsCnt == emitCurIGinsCnt); -#endif noway_assert((unsigned short)emitCurIGsize == emitCurIGsize); -#ifdef TARGET_LOONGARCH64 - ig->igInsCnt = (unsigned int)emitCurIGinsCnt; -#else ig->igInsCnt = (BYTE)emitCurIGinsCnt; -#endif - ig->igSize = (unsigned short)emitCurIGsize; + ig->igSize = (unsigned short)emitCurIGsize; emitCurCodeOffset += emitCurIGsize; assert(IsCodeAligned(emitCurCodeOffset)); @@ -1178,11 +1170,6 @@ void emitter::emitBegFN(bool hasFramePtr ig->igNext = nullptr; -//#ifdef TARGET_LOONGARCH64 -// On future maybe use this. -// ig->igJmpCnt = 0; -//#endif - #ifdef DEBUG emitScratchSigInfo = nullptr; #endif // DEBUG diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index fae914498d6e8..5008f79e680f9 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -303,12 +303,8 @@ struct insGroup #if EMIT_TRACK_STACK_DEPTH unsigned igStkLvl; // stack level on entry #endif - regMaskSmall igGCregs; // set of registers with live GC refs -#ifdef TARGET_LOONGARCH64 - unsigned int igInsCnt; // # of instructions in this group -#else + regMaskSmall igGCregs; // set of registers with live GC refs unsigned char igInsCnt; // # of instructions in this group -#endif #else // REGMASK_BITS @@ -598,20 +594,19 @@ class emitter static_assert_no_msg(INS_count <= 512); instruction _idIns : 9; #elif defined(TARGET_LOONGARCH64) - /* TODO: not include SIMD-vector. */ + // TODO-LoongArch64: not include SIMD-vector. static_assert_no_msg(INS_count <= 512); instruction _idIns : 9; -#else // !(defined(TARGET_XARCH) || defined(TARGET_ARM64)) +#else // !(defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)) static_assert_no_msg(INS_count <= 256); instruction _idIns : 8; -#endif // !(defined(TARGET_XARCH) || defined(TARGET_ARM64)) +#endif // !(defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)) // The format for the instruction #if defined(TARGET_XARCH) static_assert_no_msg(IF_COUNT <= 128); insFormat _idInsFmt : 7; #elif defined(TARGET_LOONGARCH64) - // insFormat _idInsFmt : 5;// NOTE: LOONGARCH64 does not used the _idInsFmt . unsigned _idCodeSize : 5; // the instruction(s) size of this instrDesc described. If not enough, please use the // _idInsCount. // unsigned _idInsCount : 5; // the instruction(s) count of this instrDesc described. @@ -683,13 +678,11 @@ class emitter opSize _idOpSize : 3; // operand size: 0=1 , 1=2 , 2=4 , 3=8, 4=16, 5=32 // At this point we have fully consumed first DWORD so that next field // doesn't cross a byte boundary. -#elif defined(TARGET_ARM64) -// Moved the definition of '_idOpSize' later so that we don't cross a 32-bit boundary when laying out bitfields -#elif defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) /* _idOpSize defined bellow. */ -#else // ARM - opSize _idOpSize : 2; // operand size: 0=1 , 1=2 , 2=4 , 3=8 -#endif // ARM +#else + opSize _idOpSize : 2; // operand size: 0=1 , 1=2 , 2=4 , 3=8 +#endif // ARM || TARGET_LOONGARCH64 // On Amd64, this is where the second DWORD begins // On System V a call could return a struct in 2 registers. The instrDescCGCA struct below has member that @@ -738,7 +731,7 @@ class emitter #endif #ifdef TARGET_LOONGARCH64 - /* TODO: for LOONGARCH: maybe delete on future. */ + // TODO-LoongArch64: maybe delete on future. opSize _idOpSize : 3; // operand size: 0=1 , 1=2 , 2=4 , 3=8, 4=16 insOpts _idInsOpt : 6; // loongarch options for special: placeholders. e.g emitIns_R_C, also identifying the // accessing a local on stack. @@ -759,7 +752,7 @@ class emitter // For Arm64, we have used 17 bits from the second DWORD. #define ID_EXTRA_BITFIELD_BITS (17) #elif defined(TARGET_XARCH) || defined(TARGET_LOONGARCH64) -// For xarch and LoongArch64, we have used 14 bits from the second DWORD. + // For xarch and LoongArch64, we have used 14 bits from the second DWORD. #define ID_EXTRA_BITFIELD_BITS (14) #else #error Unsupported or unset target architecture @@ -916,7 +909,7 @@ class emitter regNumber _idReg3 : REGNUM_BITS; regNumber _idReg4 : REGNUM_BITS; }; -#elif defined(TARGET_LOONGARCH64) // TARGET_XARCH +#elif defined(TARGET_LOONGARCH64) struct { unsigned int iiaEncodedInstr; // instruction's binary encoding. @@ -947,7 +940,7 @@ class emitter { return iiaJmpOffset; } -#endif // defined(TARGET_LOONGARCH64) +#endif // defined(TARGET_LOONGARCH64) } _idAddrUnion; diff --git a/src/coreclr/jit/emitloongarch64.cpp b/src/coreclr/jit/emitloongarch64.cpp index 2d260ffed00ac..dbf81a1e207fe 100644 --- a/src/coreclr/jit/emitloongarch64.cpp +++ b/src/coreclr/jit/emitloongarch64.cpp @@ -455,20 +455,8 @@ const emitJumpKind emitReverseJumpKinds[] = { /*static*/ emitJumpKind emitter::emitInsToJumpKind(instruction ins) { - assert(!"unimplemented on LOONGARCH yet"); + NYI_LOONGARCH64("emitInsToJumpKind-----unimplemented on LOONGARCH64 yet----"); return EJ_NONE; -#if 0 - for (unsigned i = 0; i < ArrLen(emitJumpKindInstructions); i++) - { - if (ins == emitJumpKindInstructions[i]) - { - emitJumpKind ret = (emitJumpKind)i; - assert(EJ_NONE < ret && ret < EJ_COUNT); - return ret; - } - } - unreached(); -#endif } /***************************************************************************** @@ -1006,50 +994,6 @@ void emitter::emitIns_I_I(instruction ins, emitAttr attr, ssize_t cc, ssize_t of appendToCurIG(id); } -/***************************************************************************** - * - * Add an instruction referencing a single register. - */ - -void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg) -{ - assert(!"unimplemented on LOONGARCH yet"); -#if 0 - code_t code = emitInsCode(ins); - -#ifdef DEBUG -#endif - switch (ins) - { - case INS_jr: - case INS_jr_hb: - case INS_mthi: - case INS_mtlo: - code |= (reg & 0x1f)<<21;//rs - break; - - case INS_mfhi://mfhi - case INS_mflo: - code |= (reg & 0x1f)<<11;//rd - assert(isGeneralRegister(reg)); - break; - - default: - unreached(); - } - - instrDesc* id = emitNewInstr(attr); - - id->idIns(ins); - id->idReg1(reg); - id->idAddr()->iiaSetInstrEncode(code); - - id->idCodeSize(4); - //dispIns(id); - appendToCurIG(id); -#endif -} - /***************************************************************************** * * Add an instruction referencing a register and a constant. @@ -1058,7 +1002,7 @@ void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg) void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t imm, insOpts opt /* = INS_OPTS_NONE */) { code_t code = emitInsCode(ins); - //#ifdef DEBUG + switch (ins) { case INS_lu12i_w: @@ -1115,7 +1059,6 @@ void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t unreached(); break; } // end switch (ins) - //#endif instrDesc* id = emitNewInstr(attr); @@ -1128,9 +1071,6 @@ void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t appendToCurIG(id); } -// NOTEADD:This function is new in emitarm64.cpp,so it be added to emitloongarch.cpp. -// But I don't konw how to change it so that it can be used on LA. -// I just add a statement "assert(!"unimplemented on LOONGARCH yet");". //------------------------------------------------------------------------ // emitIns_Mov: Emits a move instruction // @@ -1144,11 +1084,16 @@ void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t // void emitter::emitIns_Mov( instruction ins, emitAttr attr, regNumber dstReg, regNumber srcReg, bool canSkip, insOpts opt /* = INS_OPTS_NONE */) -{ // TODO: should amend for LoongArch64/LOONGARCH64. +{ // TODO-LoongArch64: should amend for LoongArch64/LOONGARCH64. assert(IsMovInstruction(ins)); if (!canSkip || (dstReg != srcReg)) - emitIns_R_R(ins, attr, dstReg, srcReg); + { + if ((EA_4BYTE == attr) && (INS_mov == ins)) + emitIns_R_R_I(INS_slli_w, attr, dstReg, srcReg, 0); + else + emitIns_R_R(ins, attr, dstReg, srcReg); + } } /***************************************************************************** @@ -1313,42 +1258,6 @@ void emitter::emitIns_R_R( appendToCurIG(id); } -void emitter::emitIns_R_I_I( - instruction ins, emitAttr attr, regNumber reg, ssize_t hint, ssize_t off, insOpts opt /* = INS_OPTS_NONE */) -{ - assert(!"unimplemented on LOONGARCH yet"); -#if 0 -#ifdef DEBUG - switch (ins) - { - case INS_pref: - assert(isGeneralRegister(reg)); - assert((-32769 < off) && (off < 32768)); - break; - - default: - unreached(); - } -#endif - code_t code = emitInsCode(ins); - - code |= (hint & 0x1f)<<16; //hint - code |= (reg & 0x1f)<<21; //rs or base - code |= (off & 0xffff); //offset - - ssize_t imms[] = {hint, off}; - instrDesc* id = emitNewInstr(attr); - - id->idIns(ins); - id->idReg1(reg); - id->idAddr()->iiaSetInstrEncode(code); - - id->idCodeSize(4); - //dispIns(id); - appendToCurIG(id); -#endif -} - /***************************************************************************** * * Add an instruction referencing two registers and a constant. @@ -1921,23 +1830,6 @@ void emitter::emitIns_R_R_R_I(instruction ins, appendToCurIG(id); } -#if 1 -/***************************************************************************** - * - * Add an instruction referencing three registers, with an extend option - */ - -void emitter::emitIns_R_R_R_Ext(instruction ins, - emitAttr attr, - regNumber reg1, - regNumber reg2, - regNumber reg3, - insOpts opt, /* = INS_OPTS_NONE */ - int shiftAmount) /* = -1 -- unset */ -{ - assert(!"unimplemented on LOONGARCH yet"); -} - /***************************************************************************** * * Add an instruction referencing two registers and two constants. @@ -2031,97 +1923,6 @@ void emitter::emitIns_R_R_R_R( appendToCurIG(id); } -/***************************************************************************** - * - * Add an instruction with a static data member operand. If 'size' is 0, the - * instruction operates on the address of the static member instead of its - * value (e.g. "push offset clsvar", rather than "push dword ptr [clsvar]"). - */ - -void emitter::emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, int offs) -{ - assert(!"unimplemented on LOONGARCH yet"); -#if 0 - NYI("emitIns_C"); -#endif -} - -/***************************************************************************** - * - * Add an instruction referencing stack-based local variable. - */ - -void emitter::emitIns_S(instruction ins, emitAttr attr, int varx, int offs) -{ - assert(!"unimplemented on LOONGARCH yet"); -#if 0 - NYI("emitIns_S"); -#endif -} - -#if 0 -/***************************************************************************** - * - * Add an instruction referencing a register and a stack-based local variable. - */ - -void emitter::emitIns_R_R_S( - instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int sa) -{ - assert(!"unimplemented on LOONGARCH yet"); -#if 1 - regNumber regs[] = {reg1, reg2}; - ssize_t imm = (ssize_t)sa; - emitAllocInstrOnly(emitInsOps(ins, regs, &imm), attr); -#else - instrDesc* id = emitNewInstrCns(attr, sa); - insFormat fmt = IF_FMT_FUNC; - - id->idIns(ins); - id->idInsFmt(fmt); - id->idInsOpt(INS_OPTS_NONE); - - id->idReg1(reg1); - id->idReg2(reg2); - - //dispIns(id); - appendToCurIG(id); -#endif -} -#endif - -/***************************************************************************** - * - * Add an instruction referencing two register and consectutive stack-based local variable slots. - */ -void emitter::emitIns_R_R_S_S( - instruction ins, emitAttr attr1, emitAttr attr2, regNumber reg1, regNumber reg2, int varx, int offs) -{ - assert(!"unimplemented on LOONGARCH yet"); -} - -/***************************************************************************** - * - * Add an instruction referencing consecutive stack-based local variable slots and two registers - */ -void emitter::emitIns_S_S_R_R( - instruction ins, emitAttr attr1, emitAttr attr2, regNumber reg1, regNumber reg2, int varx, int offs) -{ - assert(!"unimplemented on LOONGARCH yet"); -} - -/***************************************************************************** - * - * Add an instruction referencing stack-based local variable and an immediate - */ -void emitter::emitIns_S_I(instruction ins, emitAttr attr, int varx, int offs, int val) -{ - assert(!"unimplemented on LOONGARCH yet"); -#if 0 - NYI("emitIns_S_I"); -#endif -} - /***************************************************************************** * * Add an instruction with a register + static member operands. @@ -2144,7 +1945,7 @@ void emitter::emitIns_R_C( // pcaddu12i reg, off-hi-20bits // load reg, offs_lo-12bits(reg) #when ins is load ins. // - // INS_OPTS_RC: ins == bl placeholders. 3-ins: ////TODO: maybe optimize. + // INS_OPTS_RC: ins == bl placeholders. 3-ins: ////TODO-LoongArch64: maybe optimize. // lu12i_w reg, addr-hi-20bits // ori reg, reg, addr-lo-12bits // lu32i_d reg, addr_hi-32bits @@ -2168,7 +1969,7 @@ void emitter::emitIns_R_C( id->idCodeSize(8); } else - id->idCodeSize(12); // TODO: maybe optimize. + id->idCodeSize(12); // TODO-LoongArch64: maybe optimize. if (EA_IS_GCREF(attr)) { @@ -2183,7 +1984,7 @@ void emitter::emitIns_R_C( id->idOpSize(EA_PTRSIZE); } - // TODO: this maybe deleted. + // TODO-LoongArch64: this maybe deleted. id->idSetIsBound(); // We won't patch address since we will know the exact distance // once JIT code and data are allocated together. @@ -2195,38 +1996,9 @@ void emitter::emitIns_R_C( appendToCurIG(id); } -/***************************************************************************** - * - * Add an instruction with a static member + constant. - */ - -void emitter::emitIns_C_I(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, ssize_t offs, ssize_t val) -{ - assert(!"unimplemented on LOONGARCH yet"); -#if 0 - NYI("emitIns_C_I"); -#endif -} - -/***************************************************************************** - * - * Add an instruction with a static member + register operands. - */ - -void emitter::emitIns_C_R(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, regNumber reg, int offs) -{ - assert(!"unimplemented on LOONGARCH yet"); -#if 0 - assert(!"emitIns_C_R not supported for RyuJIT backend"); -#endif -} - void emitter::emitIns_R_AR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs) { - assert(!"unimplemented on LOONGARCH yet"); -#if 0 - NYI("emitIns_R_AR"); -#endif + NYI_LOONGARCH64("emitIns_R_AR-----unimplemented/unused on LOONGARCH64 yet----"); } // This computes address from the immediate which is relocatable. @@ -2275,54 +2047,6 @@ void emitter::emitIns_R_AI(instruction ins, appendToCurIG(id); } -void emitter::emitIns_AR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs) -{ - assert(!"unimplemented on LOONGARCH yet"); -#if 0 - NYI("emitIns_AR_R"); -#endif -} - -void emitter::emitIns_R_ARR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp) -{ - assert(!"unimplemented on LOONGARCH yet"); -#if 0 - NYI("emitIns_R_ARR"); -#endif -} - -void emitter::emitIns_ARR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp) -{ - assert(!"unimplemented on LOONGARCH yet"); -#if 0 - NYI("emitIns_R_ARR"); -#endif -} - -void emitter::emitIns_R_ARX( - instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, unsigned mul, int disp) -{ - assert(!"unimplemented on LOONGARCH yet"); -#if 0 - NYI("emitIns_R_ARR"); -#endif -} - -/***************************************************************************** - * - * Add a data label instruction. - */ -void emitter::emitIns_R_D(instruction ins, emitAttr attr, unsigned offs, regNumber reg) -{ - NYI("emitIns_R_D"); -} - -void emitter::emitIns_J_R_I(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg, int imm) -{ - assert(!"unimplemented on LOONGARCH yet"); -} -#endif - /***************************************************************************** * * Record that a jump instruction uses the short encoding @@ -2330,8 +2054,8 @@ void emitter::emitIns_J_R_I(instruction ins, emitAttr attr, BasicBlock* dst, reg */ void emitter::emitSetShortJump(instrDescJmp* id) { - /* TODO: maybe delete it on future. */ - return; + // TODO-LoongArch64: maybe delete it on future. + NYI_LOONGARCH64("emitSetShortJump-----unimplemented/unused on LOONGARCH64 yet----"); } /***************************************************************************** @@ -2395,7 +2119,7 @@ void emitter::emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNu void emitter::emitIns_J_R(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg) { - assert(!"unimplemented on LOONGARCH yet: emitIns_J_R."); // not used. + NYI_LOONGARCH64("emitIns_J_R-----unimplemented/unused on LOONGARCH64 yet----"); } // NOTE: @@ -2466,7 +2190,7 @@ void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount) id->idjShort = false; - ////TODO: maybe deleted this for loongarch64. + // TODO-LoongArch64: maybe deleted this. id->idjKeepLong = emitComp->fgInDifferentRegions(emitComp->compCurBB, dst); #ifdef DEBUG if (emitComp->opts.compLongAddress) // Force long branches @@ -2496,7 +2220,7 @@ void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount) // void emitter::emitIns_J_cond_la(instruction ins, BasicBlock* dst, regNumber reg1, regNumber reg2) { - // TODO: + // TODO-LoongArch64: // Now the emitIns_J_cond_la() is only the short condition branch. // There is no long condition branch for loongarch64 so far. // For loongarch64, the long condition branch is like this: @@ -2734,7 +2458,7 @@ void emitter::emitIns_Call(EmitCallType callType, id->idIns(ins); id->idInsOpt(INS_OPTS_C); - // TODO: maybe optimize. + // TODO-LoongArch64: maybe optimize. // INS_OPTS_C: placeholders. 1/2/4-ins: // if (callType == EC_INDIR_R) @@ -2895,7 +2619,7 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t } else { - // lu12i_w t2, dst_offset_lo32-hi //TODO: maybe optimize. + // lu12i_w t2, dst_offset_lo32-hi // TODO-LoongArch64: maybe optimize. // ori t2, t2, dst_offset_lo32-lo // lu32i_d t2, dst_offset_hi32-lo // jirl t2 @@ -3338,7 +3062,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) // pcaddu12i reg, offset-hi20 // addi_d reg, reg, offset-lo12 // - // else: ////TODO:optimize. + // else: // TODO-LoongArch64:optimize. // lu12i_w reg, dst-hi-12bits // ori reg, reg, dst-lo-12bits // lu32i_d reg, dst-hi-32bits @@ -3410,7 +3134,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) // bnez/beqz dst | b dst | b dst //_next: // - // case_2: <---------- TODO: from INS_OPTS_J: + // case_2: <---------- TODO-LoongArch64: from INS_OPTS_J: // bnez/beqz _next: // pcaddi r21,off-hi // jirl r0,r21,off-lo @@ -3439,7 +3163,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) { regNumber reg2 = id->idReg2(); assert((INS_bceqz <= ins) && (ins <= INS_bgeu)); - // assert((INS_bceqz <= ins) && (ins <= INS_bl));//TODO + // assert((INS_bceqz <= ins) && (ins <= INS_bl)); // TODO-LoongArch64 if ((INS_beq == ins) || (INS_bne == ins)) { if ((-0x400000 <= imm) && (imm < 0x400000)) @@ -3766,12 +3490,6 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) goto Label_OPCODE_0; // break; } - // case 0x1: - //{ - // assert(!"unimplemented on loongarch yet!"); - // //goto Label_OPCODE_1; - // break; - //} case 0x2: { goto Label_OPCODE_2; @@ -6129,7 +5847,7 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src) { - assert(!"unimplemented on LOONGARCH yet"); + NYI_LOONGARCH64("emitInsBinary-----unimplemented on LOONGARCH64 yet----"); return REG_R0; } @@ -6382,10 +6100,6 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, { if (attr == EA_4BYTE) emitIns_R_R_I_I(INS_bstrins_d, EA_8BYTE, dst->GetRegNum(), REG_R0, 63, 32); - // else - //{ - // assert(!"unimplemented on LOONGARCH yet: ulong * ulong !!!"); - //} } if (needCheckOv) @@ -6586,7 +6300,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins { insExecutionCharacteristics result; - // TODO: support this function for LoongArch64. + // TODO-LoongArch64: support this function. result.insThroughput = PERFSCORE_THROUGHPUT_ZERO; result.insLatency = PERFSCORE_LATENCY_ZERO; result.insMemoryAccessKind = PERFSCORE_MEMORY_NONE; @@ -6681,95 +6395,8 @@ bool emitter::IsMovInstruction(instruction ins) bool emitter::IsRedundantMov(instruction ins, emitAttr size, regNumber dst, regNumber src, bool canSkip) { - assert(!"unimplemented on LOONGARCH yet"); - return false; -#if 0 - assert(ins == INS_mov); - - if (canSkip && (dst == src)) - { - // These elisions used to be explicit even when optimizations were disabled - return true; - } - - if (!emitComp->opts.OptimizationEnabled()) - { - // The remaining move elisions should only happen if optimizations are enabled - return false; - } - - if (dst == src) - { - // A mov with a EA_4BYTE has the side-effect of clearing the upper bits - // So only eliminate mov instructions that are not clearing the upper bits - // - if (isGeneralRegisterOrSP(dst) && (size == EA_8BYTE)) - { - JITDUMP("\n -- suppressing mov because src and dst is same 8-byte register.\n"); - return true; - } - else if (isVectorRegister(dst) && (size == EA_16BYTE)) - { - JITDUMP("\n -- suppressing mov because src and dst is same 16-byte register.\n"); - return true; - } - } - - bool isFirstInstrInBlock = (emitCurIGinsCnt == 0) && ((emitCurIG->igFlags & IGF_EXTEND) == 0); - - if (!isFirstInstrInBlock && // Don't optimize if instruction is not the first instruction in IG. - (emitLastIns != nullptr) && - (emitLastIns->idIns() == INS_mov) && // Don't optimize if last instruction was not 'mov'. - (emitLastIns->idOpSize() == size)) // Don't optimize if operand size is different than previous instruction. - { - // Check if we did same move in prev instruction except dst/src were switched. - regNumber prevDst = emitLastIns->idReg1(); - regNumber prevSrc = emitLastIns->idReg2(); - insFormat lastInsfmt = emitLastIns->idInsFmt(); - - // Sometimes emitLastIns can be a mov with single register e.g. "mov reg, #imm". So ensure to - // optimize formats that does vector-to-vector or scalar-to-scalar register movs. - // - const bool isValidLastInsFormats = - ((lastInsfmt == IF_DV_3C) || (lastInsfmt == IF_DR_2G) || (lastInsfmt == IF_DR_2E)); - - if (isValidLastInsFormats && (prevDst == dst) && (prevSrc == src)) - { - assert(emitLastIns->idOpSize() == size); - JITDUMP("\n -- suppressing mov because previous instruction already moved from src to dst register.\n"); - return true; - } - - if ((prevDst == src) && (prevSrc == dst) && isValidLastInsFormats) - { - // For mov with EA_8BYTE, ensure src/dst are both scalar or both vector. - if (size == EA_8BYTE) - { - if (isVectorRegister(src) == isVectorRegister(dst)) - { - JITDUMP("\n -- suppressing mov because previous instruction already did an opposite move from dst " - "to src register.\n"); - return true; - } - } - - // For mov with EA_16BYTE, both src/dst will be vector. - else if (size == EA_16BYTE) - { - assert(isVectorRegister(src) && isVectorRegister(dst)); - assert(lastInsfmt == IF_DV_3C); - - JITDUMP("\n -- suppressing mov because previous instruction already did an opposite move from dst to " - "src register.\n"); - return true; - } - - // For mov of other sizes, don't optimize because it has side-effect of clearing the upper bits. - } - } - + NYI_LOONGARCH64("IsRedundantMov-----unimplemented on LOONGARCH64 yet----"); return false; -#endif } //---------------------------------------------------------------------------------------- @@ -6798,71 +6425,7 @@ bool emitter::IsRedundantMov(instruction ins, emitAttr size, regNumber dst, regN bool emitter::IsRedundantLdStr( instruction ins, regNumber reg1, regNumber reg2, ssize_t imm, emitAttr size, insFormat fmt) { - assert(!"unimplemented on LOONGARCH yet"); - return false; -#if 0 - bool isFirstInstrInBlock = (emitCurIGinsCnt == 0) && ((emitCurIG->igFlags & IGF_EXTEND) == 0); - - if (((ins != INS_ldr) && (ins != INS_str)) || (isFirstInstrInBlock) || (emitLastIns == nullptr)) - { - return false; - } - - regNumber prevReg1 = emitLastIns->idReg1(); - regNumber prevReg2 = emitLastIns->idReg2(); - insFormat lastInsfmt = emitLastIns->idInsFmt(); - emitAttr prevSize = emitLastIns->idOpSize(); - ssize_t prevImm = emitLastIns->idIsLargeCns() ? ((instrDescCns*)emitLastIns)->idcCnsVal : emitLastIns->idSmallCns(); - - // Only optimize if: - // 1. "base" or "base plus immediate offset" addressing modes. - // 2. Addressing mode matches with previous instruction. - // 3. The operand size matches with previous instruction - if (((fmt != IF_LS_2A) && (fmt != IF_LS_2B)) || (fmt != lastInsfmt) || (prevSize != size)) - { - return false; - } - - if ((ins == INS_ldr) && (emitLastIns->idIns() == INS_str)) - { - // If reg1 is of size less than 8-bytes, then eliminating the 'ldr' - // will not zero the upper bits of reg1. - - // Make sure operand size is 8-bytes - // str w0, [x1, #4] - // ldr w0, [x1, #4] <-- can't eliminate because upper-bits of x0 won't get set. - if (size != EA_8BYTE) - { - return false; - } - - if ((prevReg1 == reg1) && (prevReg2 == reg2) && (imm == prevImm)) - { - JITDUMP("\n -- suppressing 'ldr reg%u [reg%u, #%u]' as previous 'str reg%u [reg%u, #%u]' was from same " - "location.\n", - reg1, reg2, imm, prevReg1, prevReg2, prevImm); - return true; - } - } - else if ((ins == INS_str) && (emitLastIns->idIns() == INS_ldr)) - { - // Make sure src and dst registers are not same. - // ldr x0, [x0, #4] - // str x0, [x0, #4] <-- can't eliminate because [x0+3] is not same destination as previous source. - // Note, however, that we can not eliminate store in the following sequence - // ldr wzr, [x0, #4] - // str wzr, [x0, #4] - // since load operation doesn't (and can't) change the value of its destination register. - if ((reg1 != reg2) && (prevReg1 == reg1) && (prevReg2 == reg2) && (imm == prevImm) && (reg1 != REG_ZR)) - { - JITDUMP("\n -- suppressing 'str reg%u [reg%u, #%u]' as previous 'ldr reg%u [reg%u, #%u]' was from same " - "location.\n", - reg1, reg2, imm, prevReg1, prevReg2, prevImm); - return true; - } - } - + NYI_LOONGARCH64("IsRedundantLdStr-----unimplemented on LOONGARCH64 yet----"); return false; -#endif } #endif // defined(TARGET_LOONGARCH64) diff --git a/src/coreclr/jit/emitloongarch64.h b/src/coreclr/jit/emitloongarch64.h index 0d9b5da867eb4..2c9cbfd211bf1 100644 --- a/src/coreclr/jit/emitloongarch64.h +++ b/src/coreclr/jit/emitloongarch64.h @@ -70,7 +70,6 @@ void emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataReg, GenTr // Emit the 32-bit LOONGARCH64 instruction 'code' into the 'dst' buffer unsigned emitOutput_Instr(BYTE* dst, code_t code); -// NOTEADD: New functions in emitarm64.h // Method to do check if mov is redundant with respect to the last instruction. // If yes, the caller of this method can choose to omit current mov instruction. static bool IsMovInstruction(instruction ins); @@ -104,7 +103,6 @@ inline static unsigned getBitWidth(emitAttr size) inline static bool isGeneralRegister(regNumber reg) { - // Excludes REG_R0 ?? return (reg >= REG_INT_FIRST) && (reg <= REG_INT_LAST); } @@ -131,14 +129,8 @@ void emitIns_R_S(instruction ins, emitAttr attr, regNumber ireg, int varx, int o void emitIns_I(instruction ins, emitAttr attr, ssize_t imm); void emitIns_I_I(instruction ins, emitAttr attr, ssize_t cc, ssize_t offs); -void emitIns_R_I_I( - instruction ins, emitAttr attr, regNumber reg1, ssize_t hint, ssize_t off, insOpts opt = INS_OPTS_NONE); - -void emitIns_R(instruction ins, emitAttr attr, regNumber reg); - void emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t imm, insOpts opt = INS_OPTS_NONE); -// NOTEADD: NEW function in emitarm64. void emitIns_Mov( instruction ins, emitAttr attr, regNumber dstReg, regNumber srcReg, bool canSkip, insOpts opt = INS_OPTS_NONE); @@ -167,32 +159,11 @@ void emitIns_R_R_R_I(instruction ins, insOpts opt = INS_OPTS_NONE, emitAttr attrReg2 = EA_UNKNOWN); -void emitIns_R_R_R_Ext(instruction ins, - emitAttr attr, - regNumber reg1, - regNumber reg2, - regNumber reg3, - insOpts opt = INS_OPTS_NONE, - int shiftAmount = -1); - -// NODECHANGE: ADD an arg. void emitIns_R_R_I_I( instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int imm1, int imm2, insOpts opt = INS_OPTS_NONE); void emitIns_R_R_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, regNumber reg4); -void emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fdlHnd, int offs); - -void emitIns_S(instruction ins, emitAttr attr, int varx, int offs); - -void emitIns_S_S_R_R( - instruction ins, emitAttr attr, emitAttr attr2, regNumber ireg, regNumber ireg2, int varx, int offs); - -void emitIns_R_R_S_S( - instruction ins, emitAttr attr, emitAttr attr2, regNumber ireg, regNumber ireg2, int varx, int offs); - -void emitIns_S_I(instruction ins, emitAttr attr, int varx, int offs, int val); - void emitIns_R_C( instruction ins, emitAttr attr, regNumber reg, regNumber tmpReg, CORINFO_FIELD_HANDLE fldHnd, int offs); @@ -200,33 +171,13 @@ void emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg) void emitIns_J_R(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg); -void emitIns_C_R(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, regNumber reg, int offs); - -void emitIns_C_I(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fdlHnd, ssize_t offs, ssize_t val); - -void emitIns_R_D(instruction ins, emitAttr attr, unsigned offs, regNumber reg); - -void emitIns_J_R_I(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg, int instrCount); - -void emitIns_I_AR(instruction ins, emitAttr attr, int val, regNumber reg, int offs); - void emitIns_R_AR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs); -// NODECHANGE: ADD a description of arguments "disp" void emitIns_R_AI(instruction ins, emitAttr attr, regNumber reg, ssize_t disp DEBUGARG(size_t targetHandle = 0) DEBUGARG(GenTreeFlags gtFlags = GTF_EMPTY)); -void emitIns_AR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs); - -void emitIns_R_ARR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp); - -void emitIns_ARR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp); - -void emitIns_R_ARX( - instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, unsigned mul, int disp); - enum EmitCallType { diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index e9f127786b631..bae791f106393 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -424,7 +424,11 @@ void CodeGen::inst_RV(instruction ins, regNumber reg, var_types type, emitAttr s size = emitActualTypeSize(type); } +#ifdef TARGET_LOONGARCH64 + NYI_LOONGARCH64("inst_RV-----unimplemented/unused on LOONGARCH64 yet----"); +#else GetEmitter()->emitIns_R(ins, size, reg); +#endif } /***************************************************************************** @@ -647,6 +651,8 @@ void CodeGen::inst_RV_IV( * been made addressable). */ +#ifndef TARGET_LOONGARCH64 +// Now this is only used on xarch. void CodeGen::inst_TT(instruction ins, GenTree* tree, unsigned offs, int shfv, emitAttr size) { bool sizeInferred = false; @@ -747,6 +753,7 @@ void CodeGen::inst_TT(instruction ins, GenTree* tree, unsigned offs, int shfv, e assert(!"invalid address"); } } +#endif //------------------------------------------------------------------------ // inst_TT_RV: Generate a store of a lclVar @@ -903,7 +910,9 @@ void CodeGen::inst_RV_TT(instruction ins, // For LoongArch64-ABI, the float arg might be passed by integer register, // when there is no float register left but there is integer register(s) left. if (emitter::isFloatReg(reg)) + { assert((ins == INS_fld_d) || (ins == INS_fld_s)); + } else if (emitter::isGeneralRegister(reg) && (ins != INS_lea)) { ins = size == EA_4BYTE ? INS_ld_w : INS_ld_d; diff --git a/src/coreclr/jit/instrsloongarch64.h b/src/coreclr/jit/instrsloongarch64.h index 99cf4304a6200..e3da6728fd982 100644 --- a/src/coreclr/jit/instrsloongarch64.h +++ b/src/coreclr/jit/instrsloongarch64.h @@ -2,18 +2,14 @@ // The .NET Foundation licenses this file to you under the MIT license. /***************************************************************************** - * Loongarch64 instructions for JIT compiler + * LoongArch64 instructions for JIT compiler * - * id -- the enum name for the instruction - * nm -- textual name (for assembly dipslay) - * fp -- floating point instruction + * id -- the enum name for the instruction + * nm -- textual name (for assembly dipslay) + * fp -- floating point instruction * ld/st/cmp -- load/store/compare instruction - * fmt -- encoding format used by this instruction - * e1 -- encoding 1 - * e2 -- encoding 2 - * e3 -- encoding 3 - * e4 -- encoding 4 - * e5 -- encoding 5 + * fmt -- encoding format used by this instruction + * encode -- encoding 1 * ******************************************************************************/ @@ -35,459 +31,458 @@ // emitInsMayWriteMultipleRegs in emitLoongarch64.cpp. // clang-format off -INST(invalid, "INVALID", 0, 0, IF_NONE, BAD_CODE) +INST(invalid, "INVALID", 0, 0, IF_NONE, BAD_CODE) +INST(nop , "nop", 0, 0, IF_LA, 0x03400000) + // INS_bceqz/INS_beq/INS_blt/INS_bltu must be even number. +INST(bceqz, "bceqz", 0, 0, IF_LA, 0x48000000) +INST(bcnez, "bcnez", 0, 0, IF_LA, 0x48000100) -INST(nop , "nop", 0, 0, IF_LA, 0x03400000) +INST(beq, "beq", 0, 0, IF_LA, 0x58000000) +INST(bne, "bne", 0, 0, IF_LA, 0x5c000000) -////INS_bceqz/INS_beq/INS_blt/INS_bltu must be even number. -INST(bceqz, "bceqz", 0, 0, IF_LA, 0x48000000) -INST(bcnez, "bcnez", 0, 0, IF_LA, 0x48000100) - -INST(beq, "beq", 0, 0, IF_LA, 0x58000000) -INST(bne, "bne", 0, 0, IF_LA, 0x5c000000) - -INST(blt, "blt", 0, 0, IF_LA, 0x60000000) -INST(bge, "bge", 0, 0, IF_LA, 0x64000000) -INST(bltu, "bltu", 0, 0, IF_LA, 0x68000000) -INST(bgeu, "bgeu", 0, 0, IF_LA, 0x6c000000) +INST(blt, "blt", 0, 0, IF_LA, 0x60000000) +INST(bge, "bge", 0, 0, IF_LA, 0x64000000) +INST(bltu, "bltu", 0, 0, IF_LA, 0x68000000) +INST(bgeu, "bgeu", 0, 0, IF_LA, 0x6c000000) ////R_I. -INST(beqz, "beqz", 0, 0, IF_LA, 0x40000000) -INST(bnez, "bnez", 0, 0, IF_LA, 0x44000000) +INST(beqz, "beqz", 0, 0, IF_LA, 0x40000000) +INST(bnez, "bnez", 0, 0, IF_LA, 0x44000000) ////I. -INST(b, "b", 0, 0, IF_LA, 0x50000000) -INST(bl, "bl", 0, 0, IF_LA, 0x54000000) +INST(b, "b", 0, 0, IF_LA, 0x50000000) +INST(bl, "bl", 0, 0, IF_LA, 0x54000000) -//////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////// ////NOTE: Begin //// the fllowing instructions will be used by emitter::emitInsMayWriteToGCReg(). //////////////////////////////////////////////// // enum name FP LD/ST FMT ENCODE - +// ////NOTE: mov must be the first one !!! more info to see emitter::emitInsMayWriteToGCReg(). -INST(mov, "mov", 0, 0, IF_LA, 0x03800000) - // mov rd,rj - //NOTE: On loongarch, usually it's name is move, but here for compatible using mov. - // In fact, mov is an alias commond, "ori rd,rj,0" -INST(dneg, "dneg", 0, 0, IF_LA, 0x00118000) - //dneg is a alias instruction. - //sub_d rd, zero, rk -INST(neg, "neg", 0, 0, IF_LA, 0x00110000) - //neg is a alias instruction. - //sub_w rd, zero, rk -INST(not, "not", 0, 0, IF_LA, 0x00140000) - //not is a alias instruction. - //nor rd, rj, zero - -// enum:id name FP LD/ST Formate ENCODE +/////////////////////////////////////////////////////////////////////////////////////////// +// mov rd,rj +// In fact, mov is an alias instruction, "ori rd,rj,0" +INST(mov, "mov", 0, 0, IF_LA, 0x03800000) + //dneg is a alias instruction. + //sub_d rd, zero, rk +INST(dneg, "dneg", 0, 0, IF_LA, 0x00118000) + //neg is a alias instruction. + //sub_w rd, zero, rk +INST(neg, "neg", 0, 0, IF_LA, 0x00110000) + //not is a alias instruction. + //nor rd, rj, zero +INST(not, "not", 0, 0, IF_LA, 0x00140000) + +// enum:id name FP LD/ST Formate ENCODE ////R_R_R. -INST(add_w, "add.w", 0, 0, IF_LA, 0x00100000) -INST(add_d, "add.d", 0, 0, IF_LA, 0x00108000) -INST(sub_w, "sub.w", 0, 0, IF_LA, 0x00110000) -INST(sub_d, "sub.d", 0, 0, IF_LA, 0x00118000) - -INST(and, "and", 0, 0, IF_LA, 0x00148000) -INST(or, "or", 0, 0, IF_LA, 0x00150000) -INST(nor, "nor", 0, 0, IF_LA, 0x00140000) -INST(xor, "xor", 0, 0, IF_LA, 0x00158000) -INST(andn, "andn", 0, 0, IF_LA, 0x00168000) -INST(orn, "orn", 0, 0, IF_LA, 0x00160000) - -INST(mul_w, "mul.w", 0, 0, IF_LA, 0x001c0000) -INST(mul_d, "mul.d", 0, 0, IF_LA, 0x001d8000) -INST(mulh_w, "mulh.w", 0, 0, IF_LA, 0x001c8000) -INST(mulh_wu, "mulh.wu", 0, 0, IF_LA, 0x001d0000) -INST(mulh_d, "mulh.d", 0, 0, IF_LA, 0x001e0000) -INST(mulh_du, "mulh.du", 0, 0, IF_LA, 0x001e8000) -INST(mulw_d_w, "mulw.d.w", 0, 0, IF_LA, 0x001f0000) -INST(mulw_d_wu, "mulw.d.wu", 0, 0, IF_LA, 0x001f8000) -INST(div_w, "div.w", 0, 0, IF_LA, 0x00200000) -INST(div_wu, "div.wu", 0, 0, IF_LA, 0x00210000) -INST(div_d, "div.d", 0, 0, IF_LA, 0x00220000) -INST(div_du, "div.du", 0, 0, IF_LA, 0x00230000) -INST(mod_w, "mod.w", 0, 0, IF_LA, 0x00208000) -INST(mod_wu, "mod.wu", 0, 0, IF_LA, 0x00218000) -INST(mod_d, "mod.d", 0, 0, IF_LA, 0x00228000) -INST(mod_du, "mod.du", 0, 0, IF_LA, 0x00238000) - -INST(sll_w, "sll.w", 0, 0, IF_LA, 0x00170000) -INST(srl_w, "srl.w", 0, 0, IF_LA, 0x00178000) -INST(sra_w, "sra.w", 0, 0, IF_LA, 0x00180000) -INST(rotr_w, "rotr_w", 0, 0, IF_LA, 0x001b0000) -INST(sll_d, "sll.d", 0, 0, IF_LA, 0x00188000) -INST(srl_d, "srl.d", 0, 0, IF_LA, 0x00190000) -INST(sra_d, "sra.d", 0, 0, IF_LA, 0x00198000) -INST(rotr_d, "rotr.d", 0, 0, IF_LA, 0x001b8000) - -INST(maskeqz, "maskeqz", 0, 0, IF_LA, 0x00130000) -INST(masknez, "masknez", 0, 0, IF_LA, 0x00138000) - -INST(slt, "slt", 0, 0, IF_LA, 0x00120000) -INST(sltu, "sltu", 0, 0, IF_LA, 0x00128000) - -INST(amswap_w, "amswap.w", 0, 0, IF_LA, 0x38600000) -INST(amswap_d, "amswap.d", 0, 0, IF_LA, 0x38608000) -INST(amswap_db_w, "amswap_db.w", 0, 0, IF_LA, 0x38690000) -INST(amswap_db_d, "amswap_db.d", 0, 0, IF_LA, 0x38698000) -INST(amadd_w, "amadd.w", 0, 0, IF_LA, 0x38610000) -INST(amadd_d, "amadd.d", 0, 0, IF_LA, 0x38618000) -INST(amadd_db_w, "amadd_db.w", 0, 0, IF_LA, 0x386a0000) -INST(amadd_db_d, "amadd_db.d", 0, 0, IF_LA, 0x386a8000) -INST(amand_w, "amand.w", 0, 0, IF_LA, 0x38620000) -INST(amand_d, "amand.d", 0, 0, IF_LA, 0x38628000) -INST(amand_db_w, "amand_db.w", 0, 0, IF_LA, 0x386b0000) -INST(amand_db_d, "amand_db.d", 0, 0, IF_LA, 0x386b8000) -INST(amor_w, "amor.w", 0, 0, IF_LA, 0x38630000) -INST(amor_d, "amor.d", 0, 0, IF_LA, 0x38638000) -INST(amor_db_w, "amor_db.w", 0, 0, IF_LA, 0x386c0000) -INST(amor_db_d, "amor_db.d", 0, 0, IF_LA, 0x386c8000) -INST(amxor_w, "amxor.w", 0, 0, IF_LA, 0x38640000) -INST(amxor_d, "amxor.d", 0, 0, IF_LA, 0x38648000) -INST(amxor_db_w, "amxor_db.w", 0, 0, IF_LA, 0x386d0000) -INST(amxor_db_d, "amxor_db.d", 0, 0, IF_LA, 0x386d8000) -INST(ammax_w, "ammax.w", 0, 0, IF_LA, 0x38650000) -INST(ammax_d, "ammax.d", 0, 0, IF_LA, 0x38658000) -INST(ammax_db_w, "ammax_db.w", 0, 0, IF_LA, 0x386e0000) -INST(ammax_db_d, "ammax_db.d", 0, 0, IF_LA, 0x386e8000) -INST(ammin_w, "ammin.w", 0, 0, IF_LA, 0x38660000) -INST(ammin_d, "ammin.d", 0, 0, IF_LA, 0x38668000) -INST(ammin_db_w, "ammin_db.w", 0, 0, IF_LA, 0x386f0000) -INST(ammin_db_d, "ammin_db.d", 0, 0, IF_LA, 0x386f8000) -INST(ammax_wu, "ammax.wu", 0, 0, IF_LA, 0x38670000) -INST(ammax_du, "ammax.du", 0, 0, IF_LA, 0x38678000) -INST(ammax_db_wu, "ammax_db.wu", 0, 0, IF_LA, 0x38700000) -INST(ammax_db_du, "ammax_db.du", 0, 0, IF_LA, 0x38708000) -INST(ammin_wu, "ammin.wu", 0, 0, IF_LA, 0x38680000) -INST(ammin_du, "ammin.du", 0, 0, IF_LA, 0x38688000) -INST(ammin_db_wu, "ammin_db.wu", 0, 0, IF_LA, 0x38710000) -INST(ammin_db_du, "ammin_db.du", 0, 0, IF_LA, 0x38718000) - -INST(crc_w_b_w, "crc.w.b.w", 0, 0, IF_LA, 0x00240000) -INST(crc_w_h_w, "crc.w.h.w", 0, 0, IF_LA, 0x00248000) -INST(crc_w_w_w, "crc.w.w.w", 0, 0, IF_LA, 0x00250000) -INST(crc_w_d_w, "crc.w.d.w", 0, 0, IF_LA, 0x00258000) -INST(crcc_w_b_w, "crcc.w.b.w", 0, 0, IF_LA, 0x00260000) -INST(crcc_w_h_w, "crcc.w.h.w", 0, 0, IF_LA, 0x00268000) -INST(crcc_w_w_w, "crcc.w.w.w", 0, 0, IF_LA, 0x00270000) -INST(crcc_w_d_w, "crcc.w.d.w", 0, 0, IF_LA, 0x00278000) +INST(add_w, "add.w", 0, 0, IF_LA, 0x00100000) +INST(add_d, "add.d", 0, 0, IF_LA, 0x00108000) +INST(sub_w, "sub.w", 0, 0, IF_LA, 0x00110000) +INST(sub_d, "sub.d", 0, 0, IF_LA, 0x00118000) + +INST(and, "and", 0, 0, IF_LA, 0x00148000) +INST(or, "or", 0, 0, IF_LA, 0x00150000) +INST(nor, "nor", 0, 0, IF_LA, 0x00140000) +INST(xor, "xor", 0, 0, IF_LA, 0x00158000) +INST(andn, "andn", 0, 0, IF_LA, 0x00168000) +INST(orn, "orn", 0, 0, IF_LA, 0x00160000) + +INST(mul_w, "mul.w", 0, 0, IF_LA, 0x001c0000) +INST(mul_d, "mul.d", 0, 0, IF_LA, 0x001d8000) +INST(mulh_w, "mulh.w", 0, 0, IF_LA, 0x001c8000) +INST(mulh_wu, "mulh.wu", 0, 0, IF_LA, 0x001d0000) +INST(mulh_d, "mulh.d", 0, 0, IF_LA, 0x001e0000) +INST(mulh_du, "mulh.du", 0, 0, IF_LA, 0x001e8000) +INST(mulw_d_w, "mulw.d.w", 0, 0, IF_LA, 0x001f0000) +INST(mulw_d_wu, "mulw.d.wu", 0, 0, IF_LA, 0x001f8000) +INST(div_w, "div.w", 0, 0, IF_LA, 0x00200000) +INST(div_wu, "div.wu", 0, 0, IF_LA, 0x00210000) +INST(div_d, "div.d", 0, 0, IF_LA, 0x00220000) +INST(div_du, "div.du", 0, 0, IF_LA, 0x00230000) +INST(mod_w, "mod.w", 0, 0, IF_LA, 0x00208000) +INST(mod_wu, "mod.wu", 0, 0, IF_LA, 0x00218000) +INST(mod_d, "mod.d", 0, 0, IF_LA, 0x00228000) +INST(mod_du, "mod.du", 0, 0, IF_LA, 0x00238000) + +INST(sll_w, "sll.w", 0, 0, IF_LA, 0x00170000) +INST(srl_w, "srl.w", 0, 0, IF_LA, 0x00178000) +INST(sra_w, "sra.w", 0, 0, IF_LA, 0x00180000) +INST(rotr_w, "rotr_w", 0, 0, IF_LA, 0x001b0000) +INST(sll_d, "sll.d", 0, 0, IF_LA, 0x00188000) +INST(srl_d, "srl.d", 0, 0, IF_LA, 0x00190000) +INST(sra_d, "sra.d", 0, 0, IF_LA, 0x00198000) +INST(rotr_d, "rotr.d", 0, 0, IF_LA, 0x001b8000) + +INST(maskeqz, "maskeqz", 0, 0, IF_LA, 0x00130000) +INST(masknez, "masknez", 0, 0, IF_LA, 0x00138000) + +INST(slt, "slt", 0, 0, IF_LA, 0x00120000) +INST(sltu, "sltu", 0, 0, IF_LA, 0x00128000) + +INST(amswap_w, "amswap.w", 0, 0, IF_LA, 0x38600000) +INST(amswap_d, "amswap.d", 0, 0, IF_LA, 0x38608000) +INST(amswap_db_w, "amswap_db.w", 0, 0, IF_LA, 0x38690000) +INST(amswap_db_d, "amswap_db.d", 0, 0, IF_LA, 0x38698000) +INST(amadd_w, "amadd.w", 0, 0, IF_LA, 0x38610000) +INST(amadd_d, "amadd.d", 0, 0, IF_LA, 0x38618000) +INST(amadd_db_w, "amadd_db.w", 0, 0, IF_LA, 0x386a0000) +INST(amadd_db_d, "amadd_db.d", 0, 0, IF_LA, 0x386a8000) +INST(amand_w, "amand.w", 0, 0, IF_LA, 0x38620000) +INST(amand_d, "amand.d", 0, 0, IF_LA, 0x38628000) +INST(amand_db_w, "amand_db.w", 0, 0, IF_LA, 0x386b0000) +INST(amand_db_d, "amand_db.d", 0, 0, IF_LA, 0x386b8000) +INST(amor_w, "amor.w", 0, 0, IF_LA, 0x38630000) +INST(amor_d, "amor.d", 0, 0, IF_LA, 0x38638000) +INST(amor_db_w, "amor_db.w", 0, 0, IF_LA, 0x386c0000) +INST(amor_db_d, "amor_db.d", 0, 0, IF_LA, 0x386c8000) +INST(amxor_w, "amxor.w", 0, 0, IF_LA, 0x38640000) +INST(amxor_d, "amxor.d", 0, 0, IF_LA, 0x38648000) +INST(amxor_db_w, "amxor_db.w", 0, 0, IF_LA, 0x386d0000) +INST(amxor_db_d, "amxor_db.d", 0, 0, IF_LA, 0x386d8000) +INST(ammax_w, "ammax.w", 0, 0, IF_LA, 0x38650000) +INST(ammax_d, "ammax.d", 0, 0, IF_LA, 0x38658000) +INST(ammax_db_w, "ammax_db.w", 0, 0, IF_LA, 0x386e0000) +INST(ammax_db_d, "ammax_db.d", 0, 0, IF_LA, 0x386e8000) +INST(ammin_w, "ammin.w", 0, 0, IF_LA, 0x38660000) +INST(ammin_d, "ammin.d", 0, 0, IF_LA, 0x38668000) +INST(ammin_db_w, "ammin_db.w", 0, 0, IF_LA, 0x386f0000) +INST(ammin_db_d, "ammin_db.d", 0, 0, IF_LA, 0x386f8000) +INST(ammax_wu, "ammax.wu", 0, 0, IF_LA, 0x38670000) +INST(ammax_du, "ammax.du", 0, 0, IF_LA, 0x38678000) +INST(ammax_db_wu, "ammax_db.wu", 0, 0, IF_LA, 0x38700000) +INST(ammax_db_du, "ammax_db.du", 0, 0, IF_LA, 0x38708000) +INST(ammin_wu, "ammin.wu", 0, 0, IF_LA, 0x38680000) +INST(ammin_du, "ammin.du", 0, 0, IF_LA, 0x38688000) +INST(ammin_db_wu, "ammin_db.wu", 0, 0, IF_LA, 0x38710000) +INST(ammin_db_du, "ammin_db.du", 0, 0, IF_LA, 0x38718000) + +INST(crc_w_b_w, "crc.w.b.w", 0, 0, IF_LA, 0x00240000) +INST(crc_w_h_w, "crc.w.h.w", 0, 0, IF_LA, 0x00248000) +INST(crc_w_w_w, "crc.w.w.w", 0, 0, IF_LA, 0x00250000) +INST(crc_w_d_w, "crc.w.d.w", 0, 0, IF_LA, 0x00258000) +INST(crcc_w_b_w, "crcc.w.b.w", 0, 0, IF_LA, 0x00260000) +INST(crcc_w_h_w, "crcc.w.h.w", 0, 0, IF_LA, 0x00268000) +INST(crcc_w_w_w, "crcc.w.w.w", 0, 0, IF_LA, 0x00270000) +INST(crcc_w_d_w, "crcc.w.d.w", 0, 0, IF_LA, 0x00278000) ////R_R_R_I. -INST(alsl_w, "alsl.w", 0, 0, IF_LA, 0x00040000) -INST(alsl_wu, "alsl.wu", 0, 0, IF_LA, 0x00060000) -INST(alsl_d, "alsl.d", 0, 0, IF_LA, 0x002c0000) +INST(alsl_w, "alsl.w", 0, 0, IF_LA, 0x00040000) +INST(alsl_wu, "alsl.wu", 0, 0, IF_LA, 0x00060000) +INST(alsl_d, "alsl.d", 0, 0, IF_LA, 0x002c0000) -INST(bytepick_w, "bytepick.w", 0, 0, IF_LA, 0x00080000) -INST(bytepick_d, "bytepick.d", 0, 0, IF_LA, 0x000c0000) +INST(bytepick_w, "bytepick.w", 0, 0, IF_LA, 0x00080000) +INST(bytepick_d, "bytepick.d", 0, 0, IF_LA, 0x000c0000) -INST(fsel, "fsel", 0, 0, IF_LA, 0x0d000000) +INST(fsel, "fsel", 0, 0, IF_LA, 0x0d000000) ////R_I. -INST(lu12i_w, "lu12i.w", 0, 0, IF_LA, 0x14000000) -INST(lu32i_d, "lu32i.d", 0, 0, IF_LA, 0x16000000) +INST(lu12i_w, "lu12i.w", 0, 0, IF_LA, 0x14000000) +INST(lu32i_d, "lu32i.d", 0, 0, IF_LA, 0x16000000) -INST(pcaddi, "pcaddi", 0, 0, IF_LA, 0x18000000) -INST(pcaddu12i, "pcaddu12i", 0, 0, IF_LA, 0x1c000000) -INST(pcalau12i, "pcalau12i", 0, 0, IF_LA, 0x1a000000) -INST(pcaddu18i, "pcaddu18i", 0, 0, IF_LA, 0x1e000000) +INST(pcaddi, "pcaddi", 0, 0, IF_LA, 0x18000000) +INST(pcaddu12i, "pcaddu12i", 0, 0, IF_LA, 0x1c000000) +INST(pcalau12i, "pcalau12i", 0, 0, IF_LA, 0x1a000000) +INST(pcaddu18i, "pcaddu18i", 0, 0, IF_LA, 0x1e000000) ////R_R. -INST(ext_w_b, "ext.w.b", 0, 0, IF_LA, 0x00005c00) -INST(ext_w_h, "ext.w.h", 0, 0, IF_LA, 0x00005800) -INST(clo_w, "clo.w", 0, 0, IF_LA, 0x00001000) -INST(clz_w, "clz.w", 0, 0, IF_LA, 0x00001400) -INST(cto_w, "cto.w", 0, 0, IF_LA, 0x00001800) -INST(ctz_w, "ctz.w", 0, 0, IF_LA, 0x00001c00) -INST(clo_d, "clo.d", 0, 0, IF_LA, 0x00002000) -INST(clz_d, "clz.d", 0, 0, IF_LA, 0x00002400) -INST(cto_d, "cto.d", 0, 0, IF_LA, 0x00002800) -INST(ctz_d, "ctz.d", 0, 0, IF_LA, 0x00002c00) -INST(revb_2h, "revb.2h", 0, 0, IF_LA, 0x00003000) -INST(revb_4h, "revb.4h", 0, 0, IF_LA, 0x00003400) -INST(revb_2w, "revb.2w", 0, 0, IF_LA, 0x00003800) -INST(revb_d, "revb.d", 0, 0, IF_LA, 0x00003c00) -INST(revh_2w, "revh.2w", 0, 0, IF_LA, 0x00004000) -INST(revh_d, "revh.d", 0, 0, IF_LA, 0x00004400) -INST(bitrev_4b, "bitrev.4b", 0, 0, IF_LA, 0x00004800) -INST(bitrev_8b, "bitrev.8b", 0, 0, IF_LA, 0x00004c00) -INST(bitrev_w, "bitrev.w", 0, 0, IF_LA, 0x00005000) -INST(bitrev_d, "bitrev.d", 0, 0, IF_LA, 0x00005400) -INST(rdtimel_w, "rdtimel.w", 0, 0, IF_LA, 0x00006000) -INST(rdtimeh_w, "rdtimeh.w", 0, 0, IF_LA, 0x00006400) -INST(rdtime_d, "rdtime.d", 0, 0, IF_LA, 0x00006800) -INST(cpucfg, "cpucfg", 0, 0, IF_LA, 0x00006c00) +INST(ext_w_b, "ext.w.b", 0, 0, IF_LA, 0x00005c00) +INST(ext_w_h, "ext.w.h", 0, 0, IF_LA, 0x00005800) +INST(clo_w, "clo.w", 0, 0, IF_LA, 0x00001000) +INST(clz_w, "clz.w", 0, 0, IF_LA, 0x00001400) +INST(cto_w, "cto.w", 0, 0, IF_LA, 0x00001800) +INST(ctz_w, "ctz.w", 0, 0, IF_LA, 0x00001c00) +INST(clo_d, "clo.d", 0, 0, IF_LA, 0x00002000) +INST(clz_d, "clz.d", 0, 0, IF_LA, 0x00002400) +INST(cto_d, "cto.d", 0, 0, IF_LA, 0x00002800) +INST(ctz_d, "ctz.d", 0, 0, IF_LA, 0x00002c00) +INST(revb_2h, "revb.2h", 0, 0, IF_LA, 0x00003000) +INST(revb_4h, "revb.4h", 0, 0, IF_LA, 0x00003400) +INST(revb_2w, "revb.2w", 0, 0, IF_LA, 0x00003800) +INST(revb_d, "revb.d", 0, 0, IF_LA, 0x00003c00) +INST(revh_2w, "revh.2w", 0, 0, IF_LA, 0x00004000) +INST(revh_d, "revh.d", 0, 0, IF_LA, 0x00004400) +INST(bitrev_4b, "bitrev.4b", 0, 0, IF_LA, 0x00004800) +INST(bitrev_8b, "bitrev.8b", 0, 0, IF_LA, 0x00004c00) +INST(bitrev_w, "bitrev.w", 0, 0, IF_LA, 0x00005000) +INST(bitrev_d, "bitrev.d", 0, 0, IF_LA, 0x00005400) +INST(rdtimel_w, "rdtimel.w", 0, 0, IF_LA, 0x00006000) +INST(rdtimeh_w, "rdtimeh.w", 0, 0, IF_LA, 0x00006400) +INST(rdtime_d, "rdtime.d", 0, 0, IF_LA, 0x00006800) +INST(cpucfg, "cpucfg", 0, 0, IF_LA, 0x00006c00) ////R_R_I_I. -INST(bstrins_w, "bstrins.w", 0, 0, IF_LA, 0x00600000) -INST(bstrins_d, "bstrins.d", 0, 0, IF_LA, 0x00800000) -INST(bstrpick_w, "bstrpick.w", 0, 0, IF_LA, 0x00608000) -INST(bstrpick_d, "bstrpick.d", 0, 0, IF_LA, 0x00c00000) +INST(bstrins_w, "bstrins.w", 0, 0, IF_LA, 0x00600000) +INST(bstrins_d, "bstrins.d", 0, 0, IF_LA, 0x00800000) +INST(bstrpick_w, "bstrpick.w", 0, 0, IF_LA, 0x00608000) +INST(bstrpick_d, "bstrpick.d", 0, 0, IF_LA, 0x00c00000) ////Load. -INST(ld_b, "ld.b", 0, LD, IF_LA, 0x28000000) -INST(ld_h, "ld.h", 0, LD, IF_LA, 0x28400000) -INST(ld_w, "ld.w", 0, LD, IF_LA, 0x28800000) -INST(ld_d, "ld.d", 0, LD, IF_LA, 0x28c00000) -INST(ld_bu, "ld.bu", 0, LD, IF_LA, 0x2a000000) -INST(ld_hu, "ld.hu", 0, LD, IF_LA, 0x2a400000) -INST(ld_wu, "ld.wu", 0, LD, IF_LA, 0x2a800000) - -INST(ldptr_w, "ldptr.w", 0, LD, IF_LA, 0x24000000) -INST(ldptr_d, "ldptr.d", 0, LD, IF_LA, 0x26000000) -INST(ll_w, "ll.w", 0, 0, IF_LA, 0x20000000) -INST(ll_d, "ll.d", 0, 0, IF_LA, 0x22000000) - -INST(ldx_b, "ldx.b", 0, LD, IF_LA, 0x38000000) -INST(ldx_h, "ldx.h", 0, LD, IF_LA, 0x38040000) -INST(ldx_w, "ldx.w", 0, LD, IF_LA, 0x38080000) -INST(ldx_d, "ldx.d", 0, LD, IF_LA, 0x380c0000) -INST(ldx_bu, "ldx.bu", 0, LD, IF_LA, 0x38200000) -INST(ldx_hu, "ldx.hu", 0, LD, IF_LA, 0x38240000) -INST(ldx_wu, "ldx.wu", 0, LD, IF_LA, 0x38280000) - -INST(ldgt_b, "ldgt.b", 0, 0, IF_LA, 0x38780000) -INST(ldgt_h, "ldgt.h", 0, 0, IF_LA, 0x38788000) -INST(ldgt_w, "ldgt.w", 0, 0, IF_LA, 0x38790000) -INST(ldgt_d, "ldgt.d", 0, 0, IF_LA, 0x38798000) -INST(ldle_b, "ldle.b", 0, 0, IF_LA, 0x387a0000) -INST(ldle_h, "ldle.h", 0, 0, IF_LA, 0x387a8000) -INST(ldle_w, "ldle.w", 0, 0, IF_LA, 0x387b0000) -INST(ldle_d, "ldle.d", 0, 0, IF_LA, 0x387b8000) +INST(ld_b, "ld.b", 0, LD, IF_LA, 0x28000000) +INST(ld_h, "ld.h", 0, LD, IF_LA, 0x28400000) +INST(ld_w, "ld.w", 0, LD, IF_LA, 0x28800000) +INST(ld_d, "ld.d", 0, LD, IF_LA, 0x28c00000) +INST(ld_bu, "ld.bu", 0, LD, IF_LA, 0x2a000000) +INST(ld_hu, "ld.hu", 0, LD, IF_LA, 0x2a400000) +INST(ld_wu, "ld.wu", 0, LD, IF_LA, 0x2a800000) + +INST(ldptr_w, "ldptr.w", 0, LD, IF_LA, 0x24000000) +INST(ldptr_d, "ldptr.d", 0, LD, IF_LA, 0x26000000) +INST(ll_w, "ll.w", 0, 0, IF_LA, 0x20000000) +INST(ll_d, "ll.d", 0, 0, IF_LA, 0x22000000) + +INST(ldx_b, "ldx.b", 0, LD, IF_LA, 0x38000000) +INST(ldx_h, "ldx.h", 0, LD, IF_LA, 0x38040000) +INST(ldx_w, "ldx.w", 0, LD, IF_LA, 0x38080000) +INST(ldx_d, "ldx.d", 0, LD, IF_LA, 0x380c0000) +INST(ldx_bu, "ldx.bu", 0, LD, IF_LA, 0x38200000) +INST(ldx_hu, "ldx.hu", 0, LD, IF_LA, 0x38240000) +INST(ldx_wu, "ldx.wu", 0, LD, IF_LA, 0x38280000) + +INST(ldgt_b, "ldgt.b", 0, 0, IF_LA, 0x38780000) +INST(ldgt_h, "ldgt.h", 0, 0, IF_LA, 0x38788000) +INST(ldgt_w, "ldgt.w", 0, 0, IF_LA, 0x38790000) +INST(ldgt_d, "ldgt.d", 0, 0, IF_LA, 0x38798000) +INST(ldle_b, "ldle.b", 0, 0, IF_LA, 0x387a0000) +INST(ldle_h, "ldle.h", 0, 0, IF_LA, 0x387a8000) +INST(ldle_w, "ldle.w", 0, 0, IF_LA, 0x387b0000) +INST(ldle_d, "ldle.d", 0, 0, IF_LA, 0x387b8000) ////R_R_I. -INST(addi_w, "addi.w", 0, 0, IF_LA, 0x02800000) -INST(addi_d, "addi.d", 0, 0, IF_LA, 0x02c00000) -INST(lu52i_d, "lu52i.d", 0, 0, IF_LA, 0x03000000) -INST(slti, "slti", 0, 0, IF_LA, 0x02000000) - -INST(sltui, "sltui", 0, 0, IF_LA, 0x02400000) -INST(andi, "andi", 0, 0, IF_LA, 0x03400000) -INST(ori, "ori", 0, 0, IF_LA, 0x03800000) -INST(xori, "xori", 0, 0, IF_LA, 0x03c00000) - -INST(slli_w, "slli.w", 0, 0, IF_LA, 0x00408000) -INST(srli_w, "srli.w", 0, 0, IF_LA, 0x00448000) -INST(srai_w, "srai.w", 0, 0, IF_LA, 0x00488000) -INST(rotri_w, "rotri.w", 0, 0, IF_LA, 0x004c8000) -INST(slli_d, "slli.d", 0, 0, IF_LA, 0x00410000) -INST(srli_d, "srli.d", 0, 0, IF_LA, 0x00450000) -INST(srai_d, "srai.d", 0, 0, IF_LA, 0x00490000) -INST(rotri_d, "rotri.d", 0, 0, IF_LA, 0x004d0000) - -INST(addu16i_d, "addu16i.d", 0, 0, IF_LA, 0x10000000) - -INST(jirl, "jirl", 0, 0, IF_LA, 0x4c000000) - +INST(addi_w, "addi.w", 0, 0, IF_LA, 0x02800000) +INST(addi_d, "addi.d", 0, 0, IF_LA, 0x02c00000) +INST(lu52i_d, "lu52i.d", 0, 0, IF_LA, 0x03000000) +INST(slti, "slti", 0, 0, IF_LA, 0x02000000) + +INST(sltui, "sltui", 0, 0, IF_LA, 0x02400000) +INST(andi, "andi", 0, 0, IF_LA, 0x03400000) +INST(ori, "ori", 0, 0, IF_LA, 0x03800000) +INST(xori, "xori", 0, 0, IF_LA, 0x03c00000) + +INST(slli_w, "slli.w", 0, 0, IF_LA, 0x00408000) +INST(srli_w, "srli.w", 0, 0, IF_LA, 0x00448000) +INST(srai_w, "srai.w", 0, 0, IF_LA, 0x00488000) +INST(rotri_w, "rotri.w", 0, 0, IF_LA, 0x004c8000) +INST(slli_d, "slli.d", 0, 0, IF_LA, 0x00410000) +INST(srli_d, "srli.d", 0, 0, IF_LA, 0x00450000) +INST(srai_d, "srai.d", 0, 0, IF_LA, 0x00490000) +INST(rotri_d, "rotri.d", 0, 0, IF_LA, 0x004d0000) + +INST(addu16i_d, "addu16i.d", 0, 0, IF_LA, 0x10000000) + +INST(jirl, "jirl", 0, 0, IF_LA, 0x4c000000) +//////////////////////////////////////////////////////////////////////////////////////////// ////NOTE: jirl must be the last one !!! more info to see emitter::emitInsMayWriteToGCReg(). -//////////////////////////////////////////////// +// ////NOTE: End //// the above instructions will be used by emitter::emitInsMayWriteToGCReg(). -//////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////// + ////Store. -INST(st_b, "st.b", 0, ST, IF_LA, 0x29000000) -INST(st_h, "st.h", 0, ST, IF_LA, 0x29400000) -INST(st_w, "st.w", 0, ST, IF_LA, 0x29800000) -INST(st_d, "st.d", 0, ST, IF_LA, 0x29c00000) - -INST(stptr_w, "stptr.w", 0, ST, IF_LA, 0x25000000) -INST(stptr_d, "stptr.d", 0, ST, IF_LA, 0x27000000) -INST(sc_w, "sc.w", 0, 0, IF_LA, 0x21000000) -INST(sc_d, "sc.d", 0, 0, IF_LA, 0x23000000) - -INST(stx_b, "stx.b", 0, ST, IF_LA, 0x38100000) -INST(stx_h, "stx.h", 0, ST, IF_LA, 0x38140000) -INST(stx_w, "stx.w", 0, ST, IF_LA, 0x38180000) -INST(stx_d, "stx.d", 0, ST, IF_LA, 0x381c0000) -INST(stgt_b, "stgt.b", 0, 0, IF_LA, 0x387c0000) -INST(stgt_h, "stgt.h", 0, 0, IF_LA, 0x387c8000) -INST(stgt_w, "stgt.w", 0, 0, IF_LA, 0x387d0000) -INST(stgt_d, "stgt.d", 0, 0, IF_LA, 0x387d8000) -INST(stle_b, "stle.b", 0, 0, IF_LA, 0x387e0000) -INST(stle_h, "stle.h", 0, 0, IF_LA, 0x387e8000) -INST(stle_w, "stle.w", 0, 0, IF_LA, 0x387f0000) -INST(stle_d, "stle.d", 0, 0, IF_LA, 0x387f8000) - -INST(dbar, "dbar", 0, 0, IF_LA, 0x38720000) -INST(ibar, "ibar", 0, 0, IF_LA, 0x38728000) - -INST(syscall, "syscall", 0, 0, IF_LA, 0x002b0000) -INST(break, "break", 0, 0, IF_LA, 0x002a0005) - -INST(asrtle_d, "asrtle.d", 0, 0, IF_LA, 0x00010000) -INST(asrtgt_d, "asrtgt.d", 0, 0, IF_LA, 0x00018000) - -INST(preld, "preld", 0, LD, IF_LA, 0x2ac00000) -INST(preldx, "preldx", 0, LD, IF_LA, 0x382c0000) +INST(st_b, "st.b", 0, ST, IF_LA, 0x29000000) +INST(st_h, "st.h", 0, ST, IF_LA, 0x29400000) +INST(st_w, "st.w", 0, ST, IF_LA, 0x29800000) +INST(st_d, "st.d", 0, ST, IF_LA, 0x29c00000) + +INST(stptr_w, "stptr.w", 0, ST, IF_LA, 0x25000000) +INST(stptr_d, "stptr.d", 0, ST, IF_LA, 0x27000000) +INST(sc_w, "sc.w", 0, 0, IF_LA, 0x21000000) +INST(sc_d, "sc.d", 0, 0, IF_LA, 0x23000000) + +INST(stx_b, "stx.b", 0, ST, IF_LA, 0x38100000) +INST(stx_h, "stx.h", 0, ST, IF_LA, 0x38140000) +INST(stx_w, "stx.w", 0, ST, IF_LA, 0x38180000) +INST(stx_d, "stx.d", 0, ST, IF_LA, 0x381c0000) +INST(stgt_b, "stgt.b", 0, 0, IF_LA, 0x387c0000) +INST(stgt_h, "stgt.h", 0, 0, IF_LA, 0x387c8000) +INST(stgt_w, "stgt.w", 0, 0, IF_LA, 0x387d0000) +INST(stgt_d, "stgt.d", 0, 0, IF_LA, 0x387d8000) +INST(stle_b, "stle.b", 0, 0, IF_LA, 0x387e0000) +INST(stle_h, "stle.h", 0, 0, IF_LA, 0x387e8000) +INST(stle_w, "stle.w", 0, 0, IF_LA, 0x387f0000) +INST(stle_d, "stle.d", 0, 0, IF_LA, 0x387f8000) + +INST(dbar, "dbar", 0, 0, IF_LA, 0x38720000) +INST(ibar, "ibar", 0, 0, IF_LA, 0x38728000) + +INST(syscall, "syscall", 0, 0, IF_LA, 0x002b0000) +INST(break, "break", 0, 0, IF_LA, 0x002a0005) + +INST(asrtle_d, "asrtle.d", 0, 0, IF_LA, 0x00010000) +INST(asrtgt_d, "asrtgt.d", 0, 0, IF_LA, 0x00018000) + +INST(preld, "preld", 0, LD, IF_LA, 0x2ac00000) +INST(preldx, "preldx", 0, LD, IF_LA, 0x382c0000) ////Float instructions. ////R_R_R. -INST(fadd_s, "fadd.s", 0, 0, IF_LA, 0x01008000) -INST(fadd_d, "fadd.d", 0, 0, IF_LA, 0x01010000) -INST(fsub_s, "fsub.s", 0, 0, IF_LA, 0x01028000) -INST(fsub_d, "fsub.d", 0, 0, IF_LA, 0x01030000) -INST(fmul_s, "fmul.s", 0, 0, IF_LA, 0x01048000) -INST(fmul_d, "fmul.d", 0, 0, IF_LA, 0x01050000) -INST(fdiv_s, "fdiv.s", 0, 0, IF_LA, 0x01068000) -INST(fdiv_d, "fdiv.d", 0, 0, IF_LA, 0x01070000) - -INST(fmax_s, "fmax.s", 0, 0, IF_LA, 0x01088000) -INST(fmax_d, "fmax.d", 0, 0, IF_LA, 0x01090000) -INST(fmin_s, "fmin.s", 0, 0, IF_LA, 0x010a8000) -INST(fmin_d, "fmin.d", 0, 0, IF_LA, 0x010b0000) -INST(fmaxa_s, "fmaxa.s", 0, 0, IF_LA, 0x010c8000) -INST(fmaxa_d, "fmaxa.d", 0, 0, IF_LA, 0x010d0000) -INST(fmina_s, "fmina.s", 0, 0, IF_LA, 0x010e8000) -INST(fmina_d, "fmina.d", 0, 0, IF_LA, 0x010f0000) - -INST(fscaleb_s, "fscaleb.s", 0, 0, IF_LA, 0x01108000) -INST(fscaleb_d, "fscaleb.d", 0, 0, IF_LA, 0x01110000) - -INST(fcopysign_s, "fcopysign.s", 0, 0, IF_LA, 0x01128000) -INST(fcopysign_d, "fcopysign.d", 0, 0, IF_LA, 0x01130000) - -INST(fldx_s, "fldx.s", 0, LD, IF_LA, 0x38300000) -INST(fldx_d, "fldx.d", 0, LD, IF_LA, 0x38340000) -INST(fstx_s, "fstx.s", 0, ST, IF_LA, 0x38380000) -INST(fstx_d, "fstx.d", 0, ST, IF_LA, 0x383c0000) - -INST(fldgt_s, "fldgt.s", 0, 0, IF_LA, 0x38740000) -INST(fldgt_d, "fldgt.d", 0, 0, IF_LA, 0x38748000) -INST(fldle_s, "fldle.s", 0, 0, IF_LA, 0x38750000) -INST(fldle_d, "fldle.d", 0, 0, IF_LA, 0x38758000) -INST(fstgt_s, "fstgt.s", 0, 0, IF_LA, 0x38760000) -INST(fstgt_d, "fstgt.d", 0, 0, IF_LA, 0x38768000) -INST(fstle_s, "fstle.s", 0, 0, IF_LA, 0x38770000) -INST(fstle_d, "fstle.d", 0, 0, IF_LA, 0x38778000) +INST(fadd_s, "fadd.s", 0, 0, IF_LA, 0x01008000) +INST(fadd_d, "fadd.d", 0, 0, IF_LA, 0x01010000) +INST(fsub_s, "fsub.s", 0, 0, IF_LA, 0x01028000) +INST(fsub_d, "fsub.d", 0, 0, IF_LA, 0x01030000) +INST(fmul_s, "fmul.s", 0, 0, IF_LA, 0x01048000) +INST(fmul_d, "fmul.d", 0, 0, IF_LA, 0x01050000) +INST(fdiv_s, "fdiv.s", 0, 0, IF_LA, 0x01068000) +INST(fdiv_d, "fdiv.d", 0, 0, IF_LA, 0x01070000) + +INST(fmax_s, "fmax.s", 0, 0, IF_LA, 0x01088000) +INST(fmax_d, "fmax.d", 0, 0, IF_LA, 0x01090000) +INST(fmin_s, "fmin.s", 0, 0, IF_LA, 0x010a8000) +INST(fmin_d, "fmin.d", 0, 0, IF_LA, 0x010b0000) +INST(fmaxa_s, "fmaxa.s", 0, 0, IF_LA, 0x010c8000) +INST(fmaxa_d, "fmaxa.d", 0, 0, IF_LA, 0x010d0000) +INST(fmina_s, "fmina.s", 0, 0, IF_LA, 0x010e8000) +INST(fmina_d, "fmina.d", 0, 0, IF_LA, 0x010f0000) + +INST(fscaleb_s, "fscaleb.s", 0, 0, IF_LA, 0x01108000) +INST(fscaleb_d, "fscaleb.d", 0, 0, IF_LA, 0x01110000) + +INST(fcopysign_s, "fcopysign.s", 0, 0, IF_LA, 0x01128000) +INST(fcopysign_d, "fcopysign.d", 0, 0, IF_LA, 0x01130000) + +INST(fldx_s, "fldx.s", 0, LD, IF_LA, 0x38300000) +INST(fldx_d, "fldx.d", 0, LD, IF_LA, 0x38340000) +INST(fstx_s, "fstx.s", 0, ST, IF_LA, 0x38380000) +INST(fstx_d, "fstx.d", 0, ST, IF_LA, 0x383c0000) + +INST(fldgt_s, "fldgt.s", 0, 0, IF_LA, 0x38740000) +INST(fldgt_d, "fldgt.d", 0, 0, IF_LA, 0x38748000) +INST(fldle_s, "fldle.s", 0, 0, IF_LA, 0x38750000) +INST(fldle_d, "fldle.d", 0, 0, IF_LA, 0x38758000) +INST(fstgt_s, "fstgt.s", 0, 0, IF_LA, 0x38760000) +INST(fstgt_d, "fstgt.d", 0, 0, IF_LA, 0x38768000) +INST(fstle_s, "fstle.s", 0, 0, IF_LA, 0x38770000) +INST(fstle_d, "fstle.d", 0, 0, IF_LA, 0x38778000) ////R_R_R_R. -INST(fmadd_s, "fmadd.s", 0, 0, IF_LA, 0x08100000) -INST(fmadd_d, "fmadd.d", 0, 0, IF_LA, 0x08200000) -INST(fmsub_s, "fmsub.s", 0, 0, IF_LA, 0x08500000) -INST(fmsub_d, "fmsub.d", 0, 0, IF_LA, 0x08600000) -INST(fnmadd_s, "fnmadd.s", 0, 0, IF_LA, 0x08900000) -INST(fnmadd_d, "fnmadd.d", 0, 0, IF_LA, 0x08a00000) -INST(fnmsub_s, "fnmsub.s", 0, 0, IF_LA, 0x08d00000) -INST(fnmsub_d, "fnmsub.d", 0, 0, IF_LA, 0x08e00000) +INST(fmadd_s, "fmadd.s", 0, 0, IF_LA, 0x08100000) +INST(fmadd_d, "fmadd.d", 0, 0, IF_LA, 0x08200000) +INST(fmsub_s, "fmsub.s", 0, 0, IF_LA, 0x08500000) +INST(fmsub_d, "fmsub.d", 0, 0, IF_LA, 0x08600000) +INST(fnmadd_s, "fnmadd.s", 0, 0, IF_LA, 0x08900000) +INST(fnmadd_d, "fnmadd.d", 0, 0, IF_LA, 0x08a00000) +INST(fnmsub_s, "fnmsub.s", 0, 0, IF_LA, 0x08d00000) +INST(fnmsub_d, "fnmsub.d", 0, 0, IF_LA, 0x08e00000) ////R_R. -INST(fabs_s, "fabs.s", 0, 0, IF_LA, 0x01140400) -INST(fabs_d, "fabs.d", 0, 0, IF_LA, 0x01140800) -INST(fneg_s, "fneg.s", 0, 0, IF_LA, 0x01141400) -INST(fneg_d, "fneg.d", 0, 0, IF_LA, 0x01141800) - -INST(fsqrt_s, "fsqrt.s", 0, 0, IF_LA, 0x01144400) -INST(fsqrt_d, "fsqrt.d", 0, 0, IF_LA, 0x01144800) -INST(frsqrt_s, "frsqrt.s", 0, 0, IF_LA, 0x01146400) -INST(frsqrt_d, "frsqrt.d", 0, 0, IF_LA, 0x01146800) -INST(frecip_s, "frecip.s", 0, 0, IF_LA, 0x01145400) -INST(frecip_d, "frecip.d", 0, 0, IF_LA, 0x01145800) -INST(flogb_s, "flogb.s", 0, 0, IF_LA, 0x01142400) -INST(flogb_d, "flogb.d", 0, 0, IF_LA, 0x01142800) -INST(fclass_s, "fclass.s", 0, 0, IF_LA, 0x01143400) -INST(fclass_d, "fclass.d", 0, 0, IF_LA, 0x01143800) - -INST(fcvt_s_d, "fcvt.s.d", 0, 0, IF_LA, 0x01191800) -INST(fcvt_d_s, "fcvt.d.s", 0, 0, IF_LA, 0x01192400) -INST(ffint_s_w, "ffint.s.w", 0, 0, IF_LA, 0x011d1000) -INST(ffint_s_l, "ffint.s.l", 0, 0, IF_LA, 0x011d1800) -INST(ffint_d_w, "ffint.d.w", 0, 0, IF_LA, 0x011d2000) -INST(ffint_d_l, "ffint.d.l", 0, 0, IF_LA, 0x011d2800) -INST(ftint_w_s, "ftint.w.s", 0, 0, IF_LA, 0x011b0400) -INST(ftint_w_d, "ftint.w.d", 0, 0, IF_LA, 0x011b0800) -INST(ftint_l_s, "ftint.l.s", 0, 0, IF_LA, 0x011b2400) -INST(ftint_l_d, "ftint.l.d", 0, 0, IF_LA, 0x011b2800) -INST(ftintrm_w_s, "ftintrm.w.s", 0, 0, IF_LA, 0x011a0400) -INST(ftintrm_w_d, "ftintrm.w.d", 0, 0, IF_LA, 0x011a0800) -INST(ftintrm_l_s, "ftintrm.l.s", 0, 0, IF_LA, 0x011a2400) -INST(ftintrm_l_d, "ftintrm.l.d", 0, 0, IF_LA, 0x011a2800) -INST(ftintrp_w_s, "ftintrp.w.s", 0, 0, IF_LA, 0x011a4400) -INST(ftintrp_w_d, "ftintrp.w.d", 0, 0, IF_LA, 0x011a4800) -INST(ftintrp_l_s, "ftintrp.l.s", 0, 0, IF_LA, 0x011a6400) -INST(ftintrp_l_d, "ftintrp.l.d", 0, 0, IF_LA, 0x011a6800) -INST(ftintrz_w_s, "ftintrz.w.s", 0, 0, IF_LA, 0x011a8400) -INST(ftintrz_w_d, "ftintrz.w.d", 0, 0, IF_LA, 0x011a8800) -INST(ftintrz_l_s, "ftintrz.l.s", 0, 0, IF_LA, 0x011aa400) -INST(ftintrz_l_d, "ftintrz.l.d", 0, 0, IF_LA, 0x011aa800) -INST(ftintrne_w_s, "ftintrne.w.s", 0, 0, IF_LA, 0x011ac400) -INST(ftintrne_w_d, "ftintrne.w.d", 0, 0, IF_LA, 0x011ac800) -INST(ftintrne_l_s, "ftintrne.l.s", 0, 0, IF_LA, 0x011ae400) -INST(ftintrne_l_d, "ftintrne.l.d", 0, 0, IF_LA, 0x011ae800) -INST(frint_s, "frint.s", 0, 0, IF_LA, 0x011e4400) -INST(frint_d, "frint.d", 0, 0, IF_LA, 0x011e4800) - -INST(fmov_s, "fmov.s", 0, 0, IF_LA, 0x01149400) -INST(fmov_d, "fmov.d", 0, 0, IF_LA, 0x01149800) - -INST(movgr2fr_w, "movgr2fr.w", 0, 0, IF_LA, 0x0114a400) -INST(movgr2fr_d, "movgr2fr.d", 0, 0, IF_LA, 0x0114a800) -INST(movgr2frh_w, "movgr2frh.w", 0, 0, IF_LA, 0x0114ac00) -INST(movfr2gr_s, "movfr2gr.s", 0, 0, IF_LA, 0x0114b400) -INST(movfr2gr_d, "movfr2gr.d", 0, 0, IF_LA, 0x0114b800) -INST(movfrh2gr_s, "movfrh2gr.s", 0, 0, IF_LA, 0x0114bc00) +INST(fabs_s, "fabs.s", 0, 0, IF_LA, 0x01140400) +INST(fabs_d, "fabs.d", 0, 0, IF_LA, 0x01140800) +INST(fneg_s, "fneg.s", 0, 0, IF_LA, 0x01141400) +INST(fneg_d, "fneg.d", 0, 0, IF_LA, 0x01141800) + +INST(fsqrt_s, "fsqrt.s", 0, 0, IF_LA, 0x01144400) +INST(fsqrt_d, "fsqrt.d", 0, 0, IF_LA, 0x01144800) +INST(frsqrt_s, "frsqrt.s", 0, 0, IF_LA, 0x01146400) +INST(frsqrt_d, "frsqrt.d", 0, 0, IF_LA, 0x01146800) +INST(frecip_s, "frecip.s", 0, 0, IF_LA, 0x01145400) +INST(frecip_d, "frecip.d", 0, 0, IF_LA, 0x01145800) +INST(flogb_s, "flogb.s", 0, 0, IF_LA, 0x01142400) +INST(flogb_d, "flogb.d", 0, 0, IF_LA, 0x01142800) +INST(fclass_s, "fclass.s", 0, 0, IF_LA, 0x01143400) +INST(fclass_d, "fclass.d", 0, 0, IF_LA, 0x01143800) + +INST(fcvt_s_d, "fcvt.s.d", 0, 0, IF_LA, 0x01191800) +INST(fcvt_d_s, "fcvt.d.s", 0, 0, IF_LA, 0x01192400) +INST(ffint_s_w, "ffint.s.w", 0, 0, IF_LA, 0x011d1000) +INST(ffint_s_l, "ffint.s.l", 0, 0, IF_LA, 0x011d1800) +INST(ffint_d_w, "ffint.d.w", 0, 0, IF_LA, 0x011d2000) +INST(ffint_d_l, "ffint.d.l", 0, 0, IF_LA, 0x011d2800) +INST(ftint_w_s, "ftint.w.s", 0, 0, IF_LA, 0x011b0400) +INST(ftint_w_d, "ftint.w.d", 0, 0, IF_LA, 0x011b0800) +INST(ftint_l_s, "ftint.l.s", 0, 0, IF_LA, 0x011b2400) +INST(ftint_l_d, "ftint.l.d", 0, 0, IF_LA, 0x011b2800) +INST(ftintrm_w_s, "ftintrm.w.s", 0, 0, IF_LA, 0x011a0400) +INST(ftintrm_w_d, "ftintrm.w.d", 0, 0, IF_LA, 0x011a0800) +INST(ftintrm_l_s, "ftintrm.l.s", 0, 0, IF_LA, 0x011a2400) +INST(ftintrm_l_d, "ftintrm.l.d", 0, 0, IF_LA, 0x011a2800) +INST(ftintrp_w_s, "ftintrp.w.s", 0, 0, IF_LA, 0x011a4400) +INST(ftintrp_w_d, "ftintrp.w.d", 0, 0, IF_LA, 0x011a4800) +INST(ftintrp_l_s, "ftintrp.l.s", 0, 0, IF_LA, 0x011a6400) +INST(ftintrp_l_d, "ftintrp.l.d", 0, 0, IF_LA, 0x011a6800) +INST(ftintrz_w_s, "ftintrz.w.s", 0, 0, IF_LA, 0x011a8400) +INST(ftintrz_w_d, "ftintrz.w.d", 0, 0, IF_LA, 0x011a8800) +INST(ftintrz_l_s, "ftintrz.l.s", 0, 0, IF_LA, 0x011aa400) +INST(ftintrz_l_d, "ftintrz.l.d", 0, 0, IF_LA, 0x011aa800) +INST(ftintrne_w_s, "ftintrne.w.s", 0, 0, IF_LA, 0x011ac400) +INST(ftintrne_w_d, "ftintrne.w.d", 0, 0, IF_LA, 0x011ac800) +INST(ftintrne_l_s, "ftintrne.l.s", 0, 0, IF_LA, 0x011ae400) +INST(ftintrne_l_d, "ftintrne.l.d", 0, 0, IF_LA, 0x011ae800) +INST(frint_s, "frint.s", 0, 0, IF_LA, 0x011e4400) +INST(frint_d, "frint.d", 0, 0, IF_LA, 0x011e4800) + +INST(fmov_s, "fmov.s", 0, 0, IF_LA, 0x01149400) +INST(fmov_d, "fmov.d", 0, 0, IF_LA, 0x01149800) + +INST(movgr2fr_w, "movgr2fr.w", 0, 0, IF_LA, 0x0114a400) +INST(movgr2fr_d, "movgr2fr.d", 0, 0, IF_LA, 0x0114a800) +INST(movgr2frh_w, "movgr2frh.w", 0, 0, IF_LA, 0x0114ac00) +INST(movfr2gr_s, "movfr2gr.s", 0, 0, IF_LA, 0x0114b400) +INST(movfr2gr_d, "movfr2gr.d", 0, 0, IF_LA, 0x0114b800) +INST(movfrh2gr_s, "movfrh2gr.s", 0, 0, IF_LA, 0x0114bc00) //// -INST(movgr2fcsr, "movgr2fcsr", 0, 0, IF_LA, 0x0114c000) -INST(movfcsr2gr, "movfcsr2gr", 0, 0, IF_LA, 0x0114c800) -INST(movfr2cf, "movfr2cf", 0, 0, IF_LA, 0x0114d000) -INST(movcf2fr, "movcf2fr", 0, 0, IF_LA, 0x0114d400) -INST(movgr2cf, "movgr2cf", 0, 0, IF_LA, 0x0114d800) -INST(movcf2gr, "movcf2gr", 0, 0, IF_LA, 0x0114dc00) +INST(movgr2fcsr, "movgr2fcsr", 0, 0, IF_LA, 0x0114c000) +INST(movfcsr2gr, "movfcsr2gr", 0, 0, IF_LA, 0x0114c800) +INST(movfr2cf, "movfr2cf", 0, 0, IF_LA, 0x0114d000) +INST(movcf2fr, "movcf2fr", 0, 0, IF_LA, 0x0114d400) +INST(movgr2cf, "movgr2cf", 0, 0, IF_LA, 0x0114d800) +INST(movcf2gr, "movcf2gr", 0, 0, IF_LA, 0x0114dc00) ////R_R_I. -INST(fcmp_caf_s, "fcmp.caf.s", 0, 0, IF_LA, 0x0c100000) -INST(fcmp_cun_s, "fcmp.cun.s", 0, 0, IF_LA, 0x0c140000) -INST(fcmp_ceq_s, "fcmp.ceq.s", 0, 0, IF_LA, 0x0c120000) -INST(fcmp_cueq_s, "fcmp.cueq.s", 0, 0, IF_LA, 0x0c160000) -INST(fcmp_clt_s, "fcmp.clt.s", 0, 0, IF_LA, 0x0c110000) -INST(fcmp_cult_s, "fcmp.cult.s", 0, 0, IF_LA, 0x0c150000) -INST(fcmp_cle_s, "fcmp.cle.s", 0, 0, IF_LA, 0x0c130000) -INST(fcmp_cule_s, "fcmp.cule.s", 0, 0, IF_LA, 0x0c170000) -INST(fcmp_cne_s, "fcmp.cne.s", 0, 0, IF_LA, 0x0c180000) -INST(fcmp_cor_s, "fcmp.cor.s", 0, 0, IF_LA, 0x0c1a0000) -INST(fcmp_cune_s, "fcmp.cune.s", 0, 0, IF_LA, 0x0c1c0000) - -INST(fcmp_saf_d, "fcmp.saf.d", 0, 0, IF_LA, 0x0c208000) -INST(fcmp_sun_d, "fcmp.sun.d", 0, 0, IF_LA, 0x0c248000) -INST(fcmp_seq_d, "fcmp.seq.d", 0, 0, IF_LA, 0x0c228000) -INST(fcmp_sueq_d, "fcmp.sueq.d", 0, 0, IF_LA, 0x0c268000) -INST(fcmp_slt_d, "fcmp.slt.d", 0, 0, IF_LA, 0x0c218000) -INST(fcmp_sult_d, "fcmp.sult.d", 0, 0, IF_LA, 0x0c258000) -INST(fcmp_sle_d, "fcmp.sle.d", 0, 0, IF_LA, 0x0c238000) -INST(fcmp_sule_d, "fcmp.sule.d", 0, 0, IF_LA, 0x0c278000) -INST(fcmp_sne_d, "fcmp.sne.d", 0, 0, IF_LA, 0x0c288000) -INST(fcmp_sor_d, "fcmp.sor.d", 0, 0, IF_LA, 0x0c2a8000) -INST(fcmp_sune_d, "fcmp.sune.d", 0, 0, IF_LA, 0x0c2c8000) - -INST(fcmp_caf_d, "fcmp.caf.d", 0, 0, IF_LA, 0x0c200000) -INST(fcmp_cun_d, "fcmp.cun.d", 0, 0, IF_LA, 0x0c240000) -INST(fcmp_ceq_d, "fcmp.ceq.d", 0, 0, IF_LA, 0x0c220000) -INST(fcmp_cueq_d, "fcmp.cueq.d", 0, 0, IF_LA, 0x0c260000) -INST(fcmp_clt_d, "fcmp.clt.d", 0, 0, IF_LA, 0x0c210000) -INST(fcmp_cult_d, "fcmp.cult.d", 0, 0, IF_LA, 0x0c250000) -INST(fcmp_cle_d, "fcmp.cle.d", 0, 0, IF_LA, 0x0c230000) -INST(fcmp_cule_d, "fcmp.cule.d", 0, 0, IF_LA, 0x0c270000) -INST(fcmp_cne_d, "fcmp.cne.d", 0, 0, IF_LA, 0x0c280000) -INST(fcmp_cor_d, "fcmp.cor.d", 0, 0, IF_LA, 0x0c2a0000) -INST(fcmp_cune_d, "fcmp.cune.d", 0, 0, IF_LA, 0x0c2c0000) - -INST(fcmp_saf_s, "fcmp.saf.s", 0, 0, IF_LA, 0x0c108000) -INST(fcmp_sun_s, "fcmp.sun.s", 0, 0, IF_LA, 0x0c148000) -INST(fcmp_seq_s, "fcmp.seq.s", 0, 0, IF_LA, 0x0c128000) -INST(fcmp_sueq_s, "fcmp.sueq.s", 0, 0, IF_LA, 0x0c168000) -INST(fcmp_slt_s, "fcmp.slt.s", 0, 0, IF_LA, 0x0c118000) -INST(fcmp_sult_s, "fcmp.sult.s", 0, 0, IF_LA, 0x0c158000) -INST(fcmp_sle_s, "fcmp.sle.s", 0, 0, IF_LA, 0x0c138000) -INST(fcmp_sule_s, "fcmp.sule.s", 0, 0, IF_LA, 0x0c178000) -INST(fcmp_sne_s, "fcmp.sne.s", 0, 0, IF_LA, 0x0c188000) -INST(fcmp_sor_s, "fcmp.sor.s", 0, 0, IF_LA, 0x0c1a8000) -INST(fcmp_sune_s, "fcmp.sune.s", 0, 0, IF_LA, 0x0c1c8000) +INST(fcmp_caf_s, "fcmp.caf.s", 0, 0, IF_LA, 0x0c100000) +INST(fcmp_cun_s, "fcmp.cun.s", 0, 0, IF_LA, 0x0c140000) +INST(fcmp_ceq_s, "fcmp.ceq.s", 0, 0, IF_LA, 0x0c120000) +INST(fcmp_cueq_s, "fcmp.cueq.s", 0, 0, IF_LA, 0x0c160000) +INST(fcmp_clt_s, "fcmp.clt.s", 0, 0, IF_LA, 0x0c110000) +INST(fcmp_cult_s, "fcmp.cult.s", 0, 0, IF_LA, 0x0c150000) +INST(fcmp_cle_s, "fcmp.cle.s", 0, 0, IF_LA, 0x0c130000) +INST(fcmp_cule_s, "fcmp.cule.s", 0, 0, IF_LA, 0x0c170000) +INST(fcmp_cne_s, "fcmp.cne.s", 0, 0, IF_LA, 0x0c180000) +INST(fcmp_cor_s, "fcmp.cor.s", 0, 0, IF_LA, 0x0c1a0000) +INST(fcmp_cune_s, "fcmp.cune.s", 0, 0, IF_LA, 0x0c1c0000) + +INST(fcmp_saf_d, "fcmp.saf.d", 0, 0, IF_LA, 0x0c208000) +INST(fcmp_sun_d, "fcmp.sun.d", 0, 0, IF_LA, 0x0c248000) +INST(fcmp_seq_d, "fcmp.seq.d", 0, 0, IF_LA, 0x0c228000) +INST(fcmp_sueq_d, "fcmp.sueq.d", 0, 0, IF_LA, 0x0c268000) +INST(fcmp_slt_d, "fcmp.slt.d", 0, 0, IF_LA, 0x0c218000) +INST(fcmp_sult_d, "fcmp.sult.d", 0, 0, IF_LA, 0x0c258000) +INST(fcmp_sle_d, "fcmp.sle.d", 0, 0, IF_LA, 0x0c238000) +INST(fcmp_sule_d, "fcmp.sule.d", 0, 0, IF_LA, 0x0c278000) +INST(fcmp_sne_d, "fcmp.sne.d", 0, 0, IF_LA, 0x0c288000) +INST(fcmp_sor_d, "fcmp.sor.d", 0, 0, IF_LA, 0x0c2a8000) +INST(fcmp_sune_d, "fcmp.sune.d", 0, 0, IF_LA, 0x0c2c8000) + +INST(fcmp_caf_d, "fcmp.caf.d", 0, 0, IF_LA, 0x0c200000) +INST(fcmp_cun_d, "fcmp.cun.d", 0, 0, IF_LA, 0x0c240000) +INST(fcmp_ceq_d, "fcmp.ceq.d", 0, 0, IF_LA, 0x0c220000) +INST(fcmp_cueq_d, "fcmp.cueq.d", 0, 0, IF_LA, 0x0c260000) +INST(fcmp_clt_d, "fcmp.clt.d", 0, 0, IF_LA, 0x0c210000) +INST(fcmp_cult_d, "fcmp.cult.d", 0, 0, IF_LA, 0x0c250000) +INST(fcmp_cle_d, "fcmp.cle.d", 0, 0, IF_LA, 0x0c230000) +INST(fcmp_cule_d, "fcmp.cule.d", 0, 0, IF_LA, 0x0c270000) +INST(fcmp_cne_d, "fcmp.cne.d", 0, 0, IF_LA, 0x0c280000) +INST(fcmp_cor_d, "fcmp.cor.d", 0, 0, IF_LA, 0x0c2a0000) +INST(fcmp_cune_d, "fcmp.cune.d", 0, 0, IF_LA, 0x0c2c0000) + +INST(fcmp_saf_s, "fcmp.saf.s", 0, 0, IF_LA, 0x0c108000) +INST(fcmp_sun_s, "fcmp.sun.s", 0, 0, IF_LA, 0x0c148000) +INST(fcmp_seq_s, "fcmp.seq.s", 0, 0, IF_LA, 0x0c128000) +INST(fcmp_sueq_s, "fcmp.sueq.s", 0, 0, IF_LA, 0x0c168000) +INST(fcmp_slt_s, "fcmp.slt.s", 0, 0, IF_LA, 0x0c118000) +INST(fcmp_sult_s, "fcmp.sult.s", 0, 0, IF_LA, 0x0c158000) +INST(fcmp_sle_s, "fcmp.sle.s", 0, 0, IF_LA, 0x0c138000) +INST(fcmp_sule_s, "fcmp.sule.s", 0, 0, IF_LA, 0x0c178000) +INST(fcmp_sne_s, "fcmp.sne.s", 0, 0, IF_LA, 0x0c188000) +INST(fcmp_sor_s, "fcmp.sor.s", 0, 0, IF_LA, 0x0c1a8000) +INST(fcmp_sune_s, "fcmp.sune.s", 0, 0, IF_LA, 0x0c1c8000) ////R_R_I. -INST(fld_s, "fld.s", 0, LD, IF_LA, 0x2b000000) -INST(fld_d, "fld.d", 0, LD, IF_LA, 0x2b800000) -INST(fst_s, "fst.s", 0, ST, IF_LA, 0x2b400000) -INST(fst_d, "fst.d", 0, ST, IF_LA, 0x2bc00000) +INST(fld_s, "fld.s", 0, LD, IF_LA, 0x2b000000) +INST(fld_d, "fld.d", 0, LD, IF_LA, 0x2b800000) +INST(fst_s, "fst.s", 0, ST, IF_LA, 0x2b400000) +INST(fst_d, "fst.d", 0, ST, IF_LA, 0x2bc00000) // clang-format on /*****************************************************************************/ diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 414b223ee89b2..3d47ed3322b27 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -812,7 +812,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un if (compFeatureArgSplit()) { // This does not affect the normal calling convention for LoongArch64!! - if (this->info.compIsVarArgs && argType == TYP_STRUCT) + if (this->info.compIsVarArgs && (argType == TYP_STRUCT)) { if (varDscInfo->canEnreg(TYP_INT, 1) && // The beginning of the struct can go in a register !varDscInfo->canEnreg(TYP_INT, cSlots)) // The end of the struct can't fit in a register @@ -885,53 +885,53 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un } else #elif defined(TARGET_LOONGARCH64) - var_types arg1_Type = TYP_UNKNOWN; - var_types arg2_Type = TYP_UNKNOWN; + var_types arg1Type = TYP_UNKNOWN; + var_types arg2Type = TYP_UNKNOWN; if (floatFlags & STRUCT_HAS_FLOAT_FIELDS_MASK) { assert(varTypeIsStruct(argType)); - int float_num = 0; + int floatNum = 0; if (floatFlags == STRUCT_FLOAT_FIELD_ONLY_ONE) { assert(argSize <= 8); assert(varDsc->lvExactSize <= argSize); - float_num = 1; + floatNum = 1; - arg1_Type = (varDsc->lvExactSize == 8) ? TYP_DOUBLE : TYP_FLOAT; - canPassArgInRegisters = varDscInfo->canEnreg(arg1_Type, 1); + arg1Type = (varDsc->lvExactSize == 8) ? TYP_DOUBLE : TYP_FLOAT; + canPassArgInRegisters = varDscInfo->canEnreg(arg1Type, 1); } else if (floatFlags & STRUCT_FLOAT_FIELD_ONLY_TWO) { - arg1_Type = (floatFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; - arg2_Type = (floatFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; - float_num = 2; + arg1Type = (floatFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; + arg2Type = (floatFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; + floatNum = 2; canPassArgInRegisters = varDscInfo->canEnreg(TYP_DOUBLE, 2); } else if (floatFlags & STRUCT_FLOAT_FIELD_FIRST) { - float_num = 1; + floatNum = 1; canPassArgInRegisters = varDscInfo->canEnreg(TYP_DOUBLE, 1); canPassArgInRegisters = canPassArgInRegisters && varDscInfo->canEnreg(TYP_I_IMPL, 1); - arg1_Type = (floatFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; - arg2_Type = (floatFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_LONG : TYP_INT; + arg1Type = (floatFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; + arg2Type = (floatFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_LONG : TYP_INT; } else if (floatFlags & STRUCT_FLOAT_FIELD_SECOND) { - float_num = 1; + floatNum = 1; canPassArgInRegisters = varDscInfo->canEnreg(TYP_DOUBLE, 1); canPassArgInRegisters = canPassArgInRegisters && varDscInfo->canEnreg(TYP_I_IMPL, 1); - arg1_Type = (floatFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_LONG : TYP_INT; - arg2_Type = (floatFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; + arg1Type = (floatFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_LONG : TYP_INT; + arg2Type = (floatFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; } if (!canPassArgInRegisters) { - assert(float_num > 0); + assert(floatNum > 0); canPassArgInRegisters = varDscInfo->canEnreg(argType, cSlotsToEnregister); - arg1_Type = TYP_UNKNOWN; - arg2_Type = TYP_UNKNOWN; + arg1Type = TYP_UNKNOWN; + arg2Type = TYP_UNKNOWN; } } else @@ -947,7 +947,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un if (!canPassArgInRegisters && (cSlots > 1)) { canPassArgInRegisters = varDscInfo->canEnreg(TYP_I_IMPL, 1); - arg1_Type = canPassArgInRegisters ? TYP_I_IMPL : TYP_UNKNOWN; + arg1Type = canPassArgInRegisters ? TYP_I_IMPL : TYP_UNKNOWN; } #endif } @@ -980,9 +980,9 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un } else #elif defined(TARGET_LOONGARCH64) - if (arg1_Type != TYP_UNKNOWN) + if (arg1Type != TYP_UNKNOWN) { - firstAllocatedRegArgNum = varDscInfo->allocRegArg(arg1_Type, 1); + firstAllocatedRegArgNum = varDscInfo->allocRegArg(arg1Type, 1); } else #endif // defined(TARGET_LOONGARCH64) @@ -1036,15 +1036,15 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un #elif defined(TARGET_LOONGARCH64) if (argType == TYP_STRUCT) { - if (arg1_Type != TYP_UNKNOWN) + if (arg1Type != TYP_UNKNOWN) { - varDsc->SetArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, arg1_Type)); - varDsc->lvIs4Field1 = (int)emitActualTypeSize(arg1_Type) == 4 ? 1 : 0; - if (arg2_Type != TYP_UNKNOWN) + varDsc->SetArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, arg1Type)); + varDsc->lvIs4Field1 = (int)emitActualTypeSize(arg1Type) == 4 ? 1 : 0; + if (arg2Type != TYP_UNKNOWN) { - firstAllocatedRegArgNum = varDscInfo->allocRegArg(arg2_Type, 1); - varDsc->SetOtherArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, arg2_Type)); - varDsc->lvIs4Field2 = (int)emitActualTypeSize(arg2_Type) == 4 ? 1 : 0; + firstAllocatedRegArgNum = varDscInfo->allocRegArg(arg2Type, 1); + varDsc->SetOtherArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, arg2Type)); + varDsc->lvIs4Field2 = (int)emitActualTypeSize(arg2Type) == 4 ? 1 : 0; varDscInfo->hasMultiSlotStruct = true; } else if (cSlots > 1) @@ -1052,7 +1052,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un varDsc->lvIsSplit = 1; varDsc->SetOtherArgReg(REG_STK); varDscInfo->hasMultiSlotStruct = true; - varDscInfo->setAllRegArgUsed(arg1_Type); + varDscInfo->setAllRegArgUsed(arg1Type); varDscInfo->stackArgSize += TARGET_POINTER_SIZE; } } diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index a39ebbcdbab7c..bc14541634ff5 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -1080,7 +1080,7 @@ GenTree* Lowering::NewPutArg(GenTreeCall* call, GenTree* arg, fgArgTabEntry* inf bool isOnStack = (info->GetRegNum() == REG_STK); -#ifdef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) // Mark contained when we pass struct // GT_FIELD_LIST is always marked contained when it is generated if (type == TYP_STRUCT) @@ -1091,15 +1091,6 @@ GenTree* Lowering::NewPutArg(GenTreeCall* call, GenTree* arg, fgArgTabEntry* inf MakeSrcContained(arg, arg->AsObj()->Addr()); } } -#elif defined(TARGET_LOONGARCH64) - if (type == TYP_STRUCT) - { - arg->SetContained(); - if ((arg->OperGet() == GT_OBJ) && (arg->AsObj()->Addr()->OperGet() == GT_LCL_VAR_ADDR)) - { - MakeSrcContained(arg, arg->AsObj()->Addr()); - } - } #endif #if FEATURE_ARG_SPLIT @@ -1484,9 +1475,9 @@ void Lowering::LowerArg(GenTreeCall* call, GenTree** ppArg) #endif // TARGET_ARMARCH #if defined(TARGET_LOONGARCH64) - if (call->IsVarargs() /*|| comp->opts.compUseSoftFP*/) + if (call->IsVarargs()) { - // For vararg call or on armel, reg args should be all integer. + // For vararg call, reg args should be all integer. // Insert copies as needed to move float value to integer register. GenTree* newNode = LowerFloatArg(ppArg, info); if (newNode != nullptr) @@ -1522,7 +1513,7 @@ void Lowering::LowerArg(GenTreeCall* call, GenTree** ppArg) #if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) //------------------------------------------------------------------------ -// LowerFloatArg: Lower float call arguments on the arm platform. +// LowerFloatArg: Lower float call arguments on the arm/LoongArch64 platform. // // Arguments: // arg - The arg node @@ -2853,8 +2844,8 @@ GenTree* Lowering::OptimizeConstCompare(GenTree* cmp) { assert(cmp->gtGetOp2()->IsIntegralConst()); -#if defined(TARGET_XARCH) || defined(TARGET_ARM64) // || defined(TARGET_LOONGARCH64) - ////TODO: add optimize for LoongArch64. +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) + // TODO-LoongArch64: add optimize for LoongArch64. GenTree* op1 = cmp->gtGetOp1(); GenTreeIntCon* op2 = cmp->gtGetOp2()->AsIntCon(); ssize_t op2Value = op2->IconValue(); @@ -5784,7 +5775,7 @@ bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod) } else { -#if defined(TARGET_ARM64) //|| defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM64) // 64-bit MUL is more expensive than UMULL on ARM64. genTreeOps mulOper = simpleMul ? GT_MUL_LONG : GT_MULHI; #else diff --git a/src/coreclr/jit/lowerloongarch64.cpp b/src/coreclr/jit/lowerloongarch64.cpp index 4196b23578f61..0054decb0b019 100644 --- a/src/coreclr/jit/lowerloongarch64.cpp +++ b/src/coreclr/jit/lowerloongarch64.cpp @@ -4,9 +4,9 @@ /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XX XX -XX Lowering for LOONGARCH64 common code XX +XX Lowering for LOONGARCH64 common code XX XX XX -XX This encapsulates common logic for lowering trees for the LOONGARCH64 XX +XX This encapsulates common logic for lowering trees for the LOONGARCH64 XX XX architectures. For a more detailed view of what is lowering, please XX XX take a look at Lower.cpp XX XX XX @@ -67,13 +67,12 @@ bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) const switch (parentNode->OperGet()) { case GT_ADD: - return comp->compOpportunisticallyDependsOn(InstructionSet_Atomics) ? false : ((-2048 <= immVal) && - (immVal <= 2047)); + return ((-2048 <= immVal) && (immVal <= 2047)); break; case GT_CMPXCHG: case GT_LOCKADD: case GT_XADD: - assert(!"unimplemented on LOONGARCH yet"); + NYI_LOONGARCH64("unimplemented on LOONGARCH yet"); break; case GT_EQ: @@ -122,37 +121,6 @@ GenTree* Lowering::LowerMul(GenTreeOp* mul) { assert(mul->OperIsMul()); - // if (comp->opts.OptimizationEnabled() && mul->OperIs(GT_MUL) && mul->IsValidLongMul()) - //{ - // GenTreeCast* op1 = mul->gtGetOp1()->AsCast(); - // GenTree* op2 = mul->gtGetOp2(); - - // mul->ClearOverflow(); - // mul->ClearUnsigned(); - // if (op1->IsUnsigned()) - // { - // mul->SetUnsigned(); - // } - - // mul->gtOp1 = op1->CastOp(); - // BlockRange().Remove(op1); - - // if (op2->OperIs(GT_CAST)) - // { - // mul->gtOp2 = op2->AsCast()->CastOp(); - // BlockRange().Remove(op2); - // } - // else - // { - // assert(op2->IsIntegralConst()); - // assert(FitsIn(op2->AsIntConCommon()->IntegralValue())); - - // op2->ChangeType(TYP_INT); - // } - - // mul->ChangeOper(GT_MUL_LONG); - //} - ContainCheckMul(mul); return mul->gtNext; @@ -322,7 +290,6 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) if (fill == 0) { src->SetContained(); - ; } else if (size >= REGSIZE_BYTES) { @@ -547,19 +514,7 @@ void Lowering::LowerRotate(GenTree* tree) // void Lowering::LowerSIMD(GenTreeSIMD* simdNode) { - assert(!"unimplemented on LOONGARCH yet"); -#if 0 - assert(simdNode->gtType != TYP_SIMD32); - - if (simdNode->TypeGet() == TYP_SIMD12) - { - // GT_SIMD node requiring to produce TYP_SIMD12 in fact - // produces a TYP_SIMD16 result - simdNode->gtType = TYP_SIMD16; - } - - ContainCheckSIMD(simdNode); -#endif + NYI_LOONGARCH64("unimplemented on LoongArch64 yet"); } #endif // FEATURE_SIMD @@ -572,60 +527,7 @@ void Lowering::LowerSIMD(GenTreeSIMD* simdNode) // void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) { - assert(!"unimplemented on LOONGARCH yet"); -#if 0 - auto intrinsicID = node->gtHWIntrinsicId; - auto intrinsicInfo = HWIntrinsicInfo::lookup(node->gtHWIntrinsicId); - - // - // Lower unsupported Unsigned Compare Zero intrinsics to their trivial transformations - // - // LOONGARCH64 does not support most forms of compare zero for Unsigned values - // This is because some are non-sensical, and the rest are trivial transformations of other operators - // - if ((intrinsicInfo.flags & HWIntrinsicInfo::LowerCmpUZero) && varTypeIsUnsigned(node->gtSIMDBaseType)) - { - auto setAllVector = node->gtSIMDSize > 8 ? NI_LOONGARCH64_SIMD_SetAllVector128 : NI_LOONGARCH64_SIMD_SetAllVector64; - - auto origOp1 = node->gtOp.gtOp1; - - switch (intrinsicID) - { - case NI_LOONGARCH64_SIMD_GT_ZERO: - // Unsigned > 0 ==> !(Unsigned == 0) - node->gtOp.gtOp1 = - comp->gtNewSimdHWIntrinsicNode(node->TypeGet(), node->gtOp.gtOp1, NI_LOONGARCH64_SIMD_EQ_ZERO, - node->gtSIMDBaseType, node->gtSIMDSize); - node->gtHWIntrinsicId = NI_LOONGARCH64_SIMD_BitwiseNot; - BlockRange().InsertBefore(node, node->gtOp.gtOp1); - break; - case NI_LOONGARCH64_SIMD_LE_ZERO: - // Unsigned <= 0 ==> Unsigned == 0 - node->gtHWIntrinsicId = NI_LOONGARCH64_SIMD_EQ_ZERO; - break; - case NI_LOONGARCH64_SIMD_GE_ZERO: - case NI_LOONGARCH64_SIMD_LT_ZERO: - // Unsigned >= 0 ==> Always true - // Unsigned < 0 ==> Always false - node->gtHWIntrinsicId = setAllVector; - node->gtOp.gtOp1 = comp->gtNewLconNode((intrinsicID == NI_LOONGARCH64_SIMD_GE_ZERO) ? ~0ULL : 0ULL); - BlockRange().InsertBefore(node, node->gtOp.gtOp1); - if ((origOp1->gtFlags & GTF_ALL_EFFECT) == 0) - { - BlockRange().Remove(origOp1, true); - } - else - { - origOp1->SetUnusedValue(); - } - break; - default: - assert(!"Unhandled LowerCmpUZero case"); - } - } - - ContainCheckHWIntrinsic(node); -#endif + NYI_LOONGARCH64("unimplemented on LoongArch64 yet"); } //---------------------------------------------------------------------------------------------- @@ -641,55 +543,7 @@ void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) // This check may end up modifying node->gtOp1 if it is a cast node that can be removed bool Lowering::IsValidConstForMovImm(GenTreeHWIntrinsic* node) { - assert((node->gtHWIntrinsicId == NI_Vector64_Create) || (node->gtHWIntrinsicId == NI_Vector128_Create) || - (node->gtHWIntrinsicId == NI_Vector64_CreateScalarUnsafe) || - (node->gtHWIntrinsicId == NI_Vector128_CreateScalarUnsafe) || - (node->gtHWIntrinsicId == NI_AdvSimd_DuplicateToVector64) || - (node->gtHWIntrinsicId == NI_AdvSimd_DuplicateToVector128) || - (node->gtHWIntrinsicId == NI_AdvSimd_Arm64_DuplicateToVector64) || - (node->gtHWIntrinsicId == NI_AdvSimd_Arm64_DuplicateToVector128)); - assert(HWIntrinsicInfo::lookupNumArgs(node) == 1); - - GenTree* op1 = node->gtOp1; - GenTree* castOp = nullptr; - - if (varTypeIsIntegral(node->GetSimdBaseType()) && op1->OperIs(GT_CAST)) - { - // We will sometimes get a cast around a constant value (such as for - // certain long constants) which would block the below containment. - // So we will temporarily check what the cast is from instead so we - // can catch those cases as well. - - castOp = op1->AsCast()->CastOp(); - op1 = castOp; - } - - if (op1->IsCnsIntOrI()) - { - const ssize_t dataValue = op1->AsIntCon()->gtIconVal; - - if (comp->GetEmitter()->emitIns_valid_imm_for_movi(dataValue, emitActualTypeSize(node->GetSimdBaseType()))) - { - if (castOp != nullptr) - { - // We found a containable immediate under - // a cast, so remove the cast from the LIR. - - BlockRange().Remove(node->gtOp1); - node->gtOp1 = op1; - } - return true; - } - } - else if (op1->IsCnsFltOrDbl()) - { - assert(varTypeIsFloating(node->GetSimdBaseType())); - assert(castOp == nullptr); - - const double dataValue = op1->AsDblCon()->gtDconVal; - return comp->GetEmitter()->emitIns_valid_imm_for_fmov(dataValue); - } - + NYI_LOONGARCH64("unimplemented on LoongArch64 yet"); return false; } @@ -702,116 +556,7 @@ bool Lowering::IsValidConstForMovImm(GenTreeHWIntrinsic* node) // void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp) { - NamedIntrinsic intrinsicId = node->gtHWIntrinsicId; - CorInfoType simdBaseJitType = node->GetSimdBaseJitType(); - var_types simdBaseType = node->GetSimdBaseType(); - unsigned simdSize = node->GetSimdSize(); - var_types simdType = Compiler::getSIMDTypeForSize(simdSize); - - assert((intrinsicId == NI_Vector64_op_Equality) || (intrinsicId == NI_Vector64_op_Inequality) || - (intrinsicId == NI_Vector128_op_Equality) || (intrinsicId == NI_Vector128_op_Inequality)); - - assert(varTypeIsSIMD(simdType)); - assert(varTypeIsArithmetic(simdBaseType)); - assert(simdSize != 0); - assert(node->gtType == TYP_BOOL); - assert((cmpOp == GT_EQ) || (cmpOp == GT_NE)); - - // We have the following (with the appropriate simd size and where the intrinsic could be op_Inequality): - // /--* op2 simd - // /--* op1 simd - // node = * HWINTRINSIC simd T op_Equality - - GenTree* op1 = node->gtGetOp1(); - GenTree* op2 = node->gtGetOp2(); - - NamedIntrinsic cmpIntrinsic; - - switch (simdBaseType) - { - case TYP_BYTE: - case TYP_UBYTE: - case TYP_SHORT: - case TYP_USHORT: - case TYP_INT: - case TYP_UINT: - case TYP_FLOAT: - { - cmpIntrinsic = NI_AdvSimd_CompareEqual; - break; - } - - case TYP_LONG: - case TYP_ULONG: - case TYP_DOUBLE: - { - cmpIntrinsic = NI_AdvSimd_Arm64_CompareEqual; - break; - } - - default: - { - unreached(); - } - } - - GenTree* cmp = comp->gtNewSimdHWIntrinsicNode(simdType, op1, op2, cmpIntrinsic, simdBaseJitType, simdSize); - BlockRange().InsertBefore(node, cmp); - LowerNode(cmp); - - if ((simdBaseType == TYP_FLOAT) && (simdSize == 12)) - { - // For TYP_SIMD12 we don't want the upper bits to participate in the comparison. So, we will insert all ones - // into those bits of the result, "as if" the upper bits are equal. Then if all lower bits are equal, we get the - // expected all-ones result, and will get the expected 0's only where there are non-matching bits. - - GenTree* idxCns = comp->gtNewIconNode(3, TYP_INT); - BlockRange().InsertAfter(cmp, idxCns); - - GenTree* insCns = comp->gtNewIconNode(-1, TYP_INT); - BlockRange().InsertAfter(idxCns, insCns); - - GenTree* tmp = comp->gtNewSimdAsHWIntrinsicNode(simdType, cmp, idxCns, insCns, NI_AdvSimd_Insert, - CORINFO_TYPE_INT, simdSize); - BlockRange().InsertAfter(insCns, tmp); - LowerNode(tmp); - - cmp = tmp; - } - - GenTree* msk = - comp->gtNewSimdHWIntrinsicNode(simdType, cmp, NI_AdvSimd_Arm64_MinAcross, CORINFO_TYPE_UBYTE, simdSize); - BlockRange().InsertAfter(cmp, msk); - LowerNode(msk); - - GenTree* zroCns = comp->gtNewIconNode(0, TYP_INT); - BlockRange().InsertAfter(msk, zroCns); - - GenTree* val = - comp->gtNewSimdAsHWIntrinsicNode(TYP_UBYTE, msk, zroCns, NI_AdvSimd_Extract, CORINFO_TYPE_UBYTE, simdSize); - BlockRange().InsertAfter(zroCns, val); - LowerNode(val); - - zroCns = comp->gtNewIconNode(0, TYP_INT); - BlockRange().InsertAfter(val, zroCns); - - node->ChangeOper(cmpOp); - - node->gtType = TYP_INT; - node->gtOp1 = val; - node->gtOp2 = zroCns; - - // The CompareEqual will set (condition is true) or clear (condition is false) all bits of the respective element - // The MinAcross then ensures we get either all bits set (all conditions are true) or clear (any condition is false) - // So, we need to invert the condition from the operation since we compare against zero - - GenCondition cmpCnd = (cmpOp == GT_EQ) ? GenCondition::NE : GenCondition::EQ; - GenTree* cc = LowerNodeCC(node, cmpCnd); - - node->gtType = TYP_VOID; - node->ClearUnusedValue(); - - LowerNode(node); + NYI_LOONGARCH64("unimplemented on LoongArch64 yet"); } //---------------------------------------------------------------------------------------------- @@ -822,279 +567,7 @@ void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp) // void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) { - NamedIntrinsic intrinsicId = node->gtHWIntrinsicId; - var_types simdType = node->gtType; - CorInfoType simdBaseJitType = node->GetSimdBaseJitType(); - var_types simdBaseType = node->GetSimdBaseType(); - unsigned simdSize = node->GetSimdSize(); - VectorConstant vecCns = {}; - - if ((simdSize == 8) && (simdType == TYP_DOUBLE)) - { - // TODO-Cleanup: Struct retyping means we have the wrong type here. We need to - // manually fix it up so the simdType checks below are correct. - simdType = TYP_SIMD8; - } - - assert(varTypeIsSIMD(simdType)); - assert(varTypeIsArithmetic(simdBaseType)); - assert(simdSize != 0); - - GenTreeArgList* argList = nullptr; - GenTree* op1 = node->gtGetOp1(); - GenTree* op2 = node->gtGetOp2(); - - // Spare GenTrees to be used for the lowering logic below - // Defined upfront to avoid naming conflicts, etc... - GenTree* idx = nullptr; - GenTree* tmp1 = nullptr; - GenTree* tmp2 = nullptr; - GenTree* tmp3 = nullptr; - - assert(op1 != nullptr); - - unsigned argCnt = 0; - unsigned cnsArgCnt = 0; - - if (op1->OperIsList()) - { - assert(op2 == nullptr); - - for (argList = op1->AsArgList(); argList != nullptr; argList = argList->Rest()) - { - if (HandleArgForHWIntrinsicCreate(argList->Current(), argCnt, vecCns, simdBaseType)) - { - cnsArgCnt += 1; - } - argCnt += 1; - } - } - else - { - if (HandleArgForHWIntrinsicCreate(op1, argCnt, vecCns, simdBaseType)) - { - cnsArgCnt += 1; - } - argCnt += 1; - - if (op2 != nullptr) - { - if (HandleArgForHWIntrinsicCreate(op2, argCnt, vecCns, simdBaseType)) - { - cnsArgCnt += 1; - } - argCnt += 1; - } - else if (cnsArgCnt == 1) - { - // These intrinsics are meant to set the same value to every element - // so we'll just specially handle it here and copy it into the remaining - // indices. - - for (unsigned i = 1; i < simdSize / genTypeSize(simdBaseType); i++) - { - HandleArgForHWIntrinsicCreate(op1, i, vecCns, simdBaseType); - } - } - } - assert((argCnt == 1) || (argCnt == (simdSize / genTypeSize(simdBaseType)))); - - if ((argCnt == cnsArgCnt) && (argCnt == 1)) - { - GenTree* castOp = nullptr; - - if (varTypeIsIntegral(simdBaseType) && op1->OperIs(GT_CAST)) - { - // We will sometimes get a cast around a constant value (such as for - // certain long constants) which would block the below containment. - // So we will temporarily check what the cast is from instead so we - // can catch those cases as well. - - castOp = op1->AsCast()->CastOp(); - op1 = castOp; - } - - if (IsValidConstForMovImm(node)) - { - // Set the cnsArgCnt to zero so we get lowered to a DuplicateToVector - // intrinsic, which will itself mark the node as contained. - cnsArgCnt = 0; - - // Reacquire op1 as the above check may have removed a cast node and - // changed op1. - op1 = node->gtOp1; - } - } - - if (argCnt == cnsArgCnt) - { - if (op1->OperIsList()) - { - for (argList = op1->AsArgList(); argList != nullptr; argList = argList->Rest()) - { - BlockRange().Remove(argList->Current()); - } - } - else - { - BlockRange().Remove(op1); - - if (op2 != nullptr) - { - BlockRange().Remove(op2); - } - } - - assert((simdSize == 8) || (simdSize == 16)); - - if (VectorConstantIsBroadcastedI64(vecCns, simdSize / 8)) - { - // If we are a single constant or if all parts are the same, we might be able to optimize - // this even further for certain values, such as Zero or AllBitsSet. - - if (vecCns.i64[0] == 0) - { - node->gtOp1 = nullptr; - node->gtOp2 = nullptr; - node->gtHWIntrinsicId = (simdSize == 8) ? NI_Vector64_get_Zero : NI_Vector128_get_Zero; - return; - } - else if (vecCns.i64[0] == -1) - { - node->gtOp1 = nullptr; - node->gtOp2 = nullptr; - node->gtHWIntrinsicId = (simdSize == 8) ? NI_Vector64_get_AllBitsSet : NI_Vector128_get_AllBitsSet; - return; - } - } - - unsigned cnsSize = (simdSize == 12) ? 16 : simdSize; - unsigned cnsAlign = cnsSize; - var_types dataType = Compiler::getSIMDTypeForSize(simdSize); - - UNATIVE_OFFSET cnum = comp->GetEmitter()->emitDataConst(&vecCns, cnsSize, cnsAlign, dataType); - CORINFO_FIELD_HANDLE hnd = comp->eeFindJitDataOffs(cnum); - GenTree* clsVarAddr = new (comp, GT_CLS_VAR_ADDR) GenTreeClsVar(GT_CLS_VAR_ADDR, TYP_I_IMPL, hnd, nullptr); - BlockRange().InsertBefore(node, clsVarAddr); - - node->ChangeOper(GT_IND); - node->gtOp1 = clsVarAddr; - - // TODO-ARM64-CQ: We should be able to modify at least the paths that use Insert to trivially support partial - // vector constants. With this, we can create a constant if say 50% of the inputs are also constant and just - // insert the non-constant values which should still allow some gains. - - return; - } - else if (argCnt == 1) - { - // We have the following (where simd is simd8 or simd16): - // /--* op1 T - // node = * HWINTRINSIC simd T Create - - // We will be constructing the following parts: - // /--* op1 T - // node = * HWINTRINSIC simd T DuplicateToVector - - // This is roughly the following managed code: - // return AdvSimd.Arm64.DuplicateToVector(op1); - - if (varTypeIsLong(simdBaseType) || (simdBaseType == TYP_DOUBLE)) - { - node->gtHWIntrinsicId = - (simdType == TYP_SIMD8) ? NI_AdvSimd_Arm64_DuplicateToVector64 : NI_AdvSimd_Arm64_DuplicateToVector128; - } - else - { - node->gtHWIntrinsicId = - (simdType == TYP_SIMD8) ? NI_AdvSimd_DuplicateToVector64 : NI_AdvSimd_DuplicateToVector128; - } - return; - } - - // We have the following (where simd is simd8 or simd16): - // /--* op1 T - // +--* ... T - // +--* opN T - // node = * HWINTRINSIC simd T Create - - if (op1->OperIsList()) - { - argList = op1->AsArgList(); - op1 = argList->Current(); - argList = argList->Rest(); - } - - // We will be constructing the following parts: - // /--* op1 T - // tmp1 = * HWINTRINSIC simd8 T CreateScalarUnsafe - // ... - - // This is roughly the following managed code: - // var tmp1 = Vector64.CreateScalarUnsafe(op1); - // ... - - NamedIntrinsic createScalarUnsafe = - (simdType == TYP_SIMD8) ? NI_Vector64_CreateScalarUnsafe : NI_Vector128_CreateScalarUnsafe; - - tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, op1, createScalarUnsafe, simdBaseJitType, simdSize); - BlockRange().InsertAfter(op1, tmp1); - LowerNode(tmp1); - - unsigned N = 0; - GenTree* opN = nullptr; - - for (N = 1; N < argCnt - 1; N++) - { - // We will be constructing the following parts: - // ... - // idx = CNS_INT int N - // /--* tmp1 simd - // +--* idx int - // +--* opN T - // tmp1 = * HWINTRINSIC simd T Insert - // ... - - // This is roughly the following managed code: - // ... - // tmp1 = AdvSimd.Insert(tmp1, N, opN); - // ... - - opN = argList->Current(); - - idx = comp->gtNewIconNode(N, TYP_INT); - BlockRange().InsertBefore(opN, idx); - - tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, tmp1, idx, opN, NI_AdvSimd_Insert, simdBaseJitType, simdSize); - BlockRange().InsertAfter(opN, tmp1); - LowerNode(tmp1); - - argList = argList->Rest(); - } - - assert(N == (argCnt - 1)); - - // We will be constructing the following parts: - // idx = CNS_INT int N - // /--* tmp1 simd - // +--* idx int - // +--* opN T - // node = * HWINTRINSIC simd T Insert - - // This is roughly the following managed code: - // ... - // tmp1 = AdvSimd.Insert(tmp1, N, opN); - // ... - - opN = (argCnt == 2) ? op2 : argList->Current(); - - idx = comp->gtNewIconNode(N, TYP_INT); - BlockRange().InsertBefore(opN, idx); - - node->gtOp1 = comp->gtNewArgList(tmp1, idx, opN); - node->gtOp2 = nullptr; - - node->gtHWIntrinsicId = NI_AdvSimd_Insert; + NYI_LOONGARCH64("unimplemented on LoongArch64 yet"); } //---------------------------------------------------------------------------------------------- @@ -1105,233 +578,7 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // void Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) { - NamedIntrinsic intrinsicId = node->gtHWIntrinsicId; - CorInfoType simdBaseJitType = node->GetSimdBaseJitType(); - var_types simdBaseType = node->GetSimdBaseType(); - unsigned simdSize = node->GetSimdSize(); - var_types simdType = Compiler::getSIMDTypeForSize(simdSize); - - assert((intrinsicId == NI_Vector64_Dot) || (intrinsicId == NI_Vector128_Dot)); - assert(varTypeIsSIMD(simdType)); - assert(varTypeIsArithmetic(simdBaseType)); - assert(simdSize != 0); - - GenTree* op1 = node->gtGetOp1(); - GenTree* op2 = node->gtGetOp2(); - - assert(op1 != nullptr); - assert(op2 != nullptr); - assert(!op1->OperIsList()); - - // Spare GenTrees to be used for the lowering logic below - // Defined upfront to avoid naming conflicts, etc... - GenTree* idx = nullptr; - GenTree* tmp1 = nullptr; - GenTree* tmp2 = nullptr; - - if (simdSize == 12) - { - assert(simdBaseType == TYP_FLOAT); - - // For 12 byte SIMD, we need to clear the upper 4 bytes: - // idx = CNS_INT int 0x03 - // tmp1 = * CNS_DLB float 0.0 - // /--* op1 simd16 - // +--* idx int - // +--* tmp1 simd16 - // op1 = * HWINTRINSIC simd16 T Insert - // ... - - // This is roughly the following managed code: - // op1 = AdvSimd.Insert(op1, 0x03, 0.0f); - // ... - - idx = comp->gtNewIconNode(0x03, TYP_INT); - BlockRange().InsertAfter(op1, idx); - - tmp1 = comp->gtNewZeroConNode(TYP_FLOAT); - BlockRange().InsertAfter(idx, tmp1); - LowerNode(tmp1); - - op1 = comp->gtNewSimdAsHWIntrinsicNode(simdType, op1, idx, tmp1, NI_AdvSimd_Insert, simdBaseJitType, simdSize); - BlockRange().InsertAfter(tmp1, op1); - LowerNode(op1); - - idx = comp->gtNewIconNode(0x03, TYP_INT); - BlockRange().InsertAfter(op2, idx); - - tmp2 = comp->gtNewZeroConNode(TYP_FLOAT); - BlockRange().InsertAfter(idx, tmp2); - LowerNode(tmp2); - - op2 = comp->gtNewSimdAsHWIntrinsicNode(simdType, op2, idx, tmp2, NI_AdvSimd_Insert, simdBaseJitType, simdSize); - BlockRange().InsertAfter(tmp2, op2); - LowerNode(op2); - } - - // We will be constructing the following parts: - // ... - // /--* op1 simd16 - // +--* op2 simd16 - // tmp1 = * HWINTRINSIC simd16 T Multiply - // ... - - // This is roughly the following managed code: - // ... - // var tmp1 = AdvSimd.Multiply(op1, op2); - // ... - - NamedIntrinsic multiply = (simdBaseType == TYP_DOUBLE) ? NI_AdvSimd_Arm64_Multiply : NI_AdvSimd_Multiply; - assert(!varTypeIsLong(simdBaseType)); - - tmp1 = comp->gtNewSimdAsHWIntrinsicNode(simdType, op1, op2, multiply, simdBaseJitType, simdSize); - BlockRange().InsertBefore(node, tmp1); - LowerNode(tmp1); - - if (varTypeIsFloating(simdBaseType)) - { - // We will be constructing the following parts: - // ... - // /--* tmp1 simd16 - // * STORE_LCL_VAR simd16 - // tmp1 = LCL_VAR simd16 - // tmp2 = LCL_VAR simd16 - // ... - - // This is roughly the following managed code: - // ... - // var tmp2 = tmp1; - // ... - - node->gtOp1 = tmp1; - LIR::Use tmp1Use(BlockRange(), &node->gtOp1, node); - ReplaceWithLclVar(tmp1Use); - tmp1 = node->gtOp1; - - tmp2 = comp->gtClone(tmp1); - BlockRange().InsertAfter(tmp1, tmp2); - - if (simdSize == 8) - { - assert(simdBaseType == TYP_FLOAT); - - // We will be constructing the following parts: - // ... - // /--* tmp1 simd8 - // +--* tmp2 simd8 - // tmp1 = * HWINTRINSIC simd8 T AddPairwise - // ... - - // This is roughly the following managed code: - // ... - // var tmp1 = AdvSimd.AddPairwise(tmp1, tmp2); - // ... - - tmp1 = comp->gtNewSimdAsHWIntrinsicNode(simdType, tmp1, tmp2, NI_AdvSimd_AddPairwise, simdBaseJitType, - simdSize); - BlockRange().InsertAfter(tmp2, tmp1); - LowerNode(tmp1); - } - else - { - assert((simdSize == 12) || (simdSize == 16)); - - // We will be constructing the following parts: - // ... - // /--* tmp1 simd16 - // +--* tmp2 simd16 - // tmp2 = * HWINTRINSIC simd16 T AddPairwise - // ... - - // This is roughly the following managed code: - // ... - // var tmp1 = AdvSimd.Arm64.AddPairwise(tmp1, tmp2); - // ... - - tmp1 = comp->gtNewSimdAsHWIntrinsicNode(simdType, tmp1, tmp2, NI_AdvSimd_Arm64_AddPairwise, simdBaseJitType, - simdSize); - BlockRange().InsertAfter(tmp2, tmp1); - LowerNode(tmp1); - - if (simdBaseType == TYP_FLOAT) - { - // Float needs an additional pairwise add to finish summing the parts - // The first will have summed e0 with e1 and e2 with e3 and then repeats that for the upper half - // So, we will have a vector that looks like this: - // < e0 + e1, e2 + e3, e0 + e1, e2 + e3> - // Doing a second horizontal add with itself will then give us - // e0 + e1 + e2 + e3 in all elements of the vector - - // We will be constructing the following parts: - // ... - // /--* tmp1 simd16 - // * STORE_LCL_VAR simd16 - // tmp1 = LCL_VAR simd16 - // tmp2 = LCL_VAR simd16 - // /--* tmp1 simd16 - // +--* tmp2 simd16 - // tmp2 = * HWINTRINSIC simd16 T AddPairwise - // ... - - // This is roughly the following managed code: - // ... - // var tmp2 = tmp1; - // var tmp1 = AdvSimd.Arm64.AddPairwise(tmp1, tmp2); - // ... - - node->gtOp1 = tmp1; - LIR::Use tmp1Use(BlockRange(), &node->gtOp1, node); - ReplaceWithLclVar(tmp1Use); - tmp1 = node->gtOp1; - - tmp2 = comp->gtClone(tmp1); - BlockRange().InsertAfter(tmp1, tmp2); - - tmp1 = comp->gtNewSimdAsHWIntrinsicNode(simdType, tmp1, tmp2, NI_AdvSimd_Arm64_AddPairwise, - simdBaseJitType, simdSize); - BlockRange().InsertAfter(tmp2, tmp1); - LowerNode(tmp1); - } - } - - tmp2 = tmp1; - } - else - { - assert(varTypeIsIntegral(simdBaseType)); - - // We will be constructing the following parts: - // ... - // /--* tmp1 simd16 - // tmp2 = * HWINTRINSIC simd16 T AddAcross - // ... - - // This is roughly the following managed code: - // ... - // var tmp2 = AdvSimd.Arm64.AddAcross(tmp1); - // ... - - tmp2 = comp->gtNewSimdAsHWIntrinsicNode(simdType, tmp1, NI_AdvSimd_Arm64_AddAcross, simdBaseJitType, simdSize); - BlockRange().InsertAfter(tmp1, tmp2); - LowerNode(tmp2); - } - - // We will be constructing the following parts: - // ... - // /--* tmp2 simd16 - // node = * HWINTRINSIC simd16 T ToScalar - - // This is roughly the following managed code: - // ... - // return tmp2.ToScalar(); - - node->gtOp1 = tmp2; - node->gtOp2 = nullptr; - - node->gtHWIntrinsicId = (simdSize == 8) ? NI_Vector64_ToScalar : NI_Vector128_ToScalar; - LowerNode(node); - - return; + NYI_LOONGARCH64("unimplemented on LoongArch64 yet"); } #endif // FEATURE_HW_INTRINSICS diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp index 66745063b96dd..d70273386eacf 100644 --- a/src/coreclr/jit/lsraloongarch64.cpp +++ b/src/coreclr/jit/lsraloongarch64.cpp @@ -396,11 +396,13 @@ int LinearScan::BuildNode(GenTree* tree) case GT_CMPXCHG: { + NYI_LOONGARCH64("-----unimplemented on LOONGARCH64 yet----"); + GenTreeCmpXchg* cmpXchgNode = tree->AsCmpXchg(); srcCount = cmpXchgNode->gtOpComparand->isContained() ? 2 : 3; assert(dstCount == 1); - if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics)) + // if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics)) { // For LOONGARCH exclusives requires a single internal register buildInternalIntRegisterDefForNode(tree); @@ -422,7 +424,7 @@ int LinearScan::BuildNode(GenTree* tree) // For LOONGARCH exclusives the lifetime of the comparand must be extended because // it may be used used multiple during retries - if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics)) + // if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics)) { setDelayFree(comparandUse); } @@ -441,9 +443,12 @@ int LinearScan::BuildNode(GenTree* tree) case GT_XADD: case GT_XCHG: { + NYI_LOONGARCH64("-----unimplemented on LOONGARCH64 yet----"); + assert(dstCount == (tree->TypeGet() == TYP_VOID) ? 0 : 1); srcCount = tree->gtGetOp2()->isContained() ? 1 : 2; +#if 0 if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics)) { // GT_XCHG requires a single internal register; the others require two. @@ -458,6 +463,7 @@ int LinearScan::BuildNode(GenTree* tree) // for ldclral we need an internal register. buildInternalIntRegisterDefForNode(tree); } +#endif assert(!tree->gtGetOp1()->isContained()); RefPosition* op1Use = BuildUse(tree->gtGetOp1()); @@ -467,9 +473,9 @@ int LinearScan::BuildNode(GenTree* tree) op2Use = BuildUse(tree->gtGetOp2()); } - // For LOONGARCH exclusives the lifetime of the addr and data must be extended because + // For LOONGARCH64 exclusives the lifetime of the addr and data must be extended because // it may be used used multiple during retries - if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics)) + // if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics)) { // Internals may not collide with target if (dstCount == 1) diff --git a/src/coreclr/pal/inc/rt/ntimage.h b/src/coreclr/pal/inc/rt/ntimage.h index cd56b305aed48..e6970cca7cd38 100644 --- a/src/coreclr/pal/inc/rt/ntimage.h +++ b/src/coreclr/pal/inc/rt/ntimage.h @@ -240,6 +240,7 @@ typedef struct _IMAGE_FILE_HEADER { #define IMAGE_FILE_MACHINE_M32R 0x9041 // M32R little-endian #define IMAGE_FILE_MACHINE_ARM64 0xAA64 // ARM64 Little-Endian #define IMAGE_FILE_MACHINE_CEE 0xC0EE +#define IMAGE_FILE_MACHINE_LOONGARCH64 0x6264 // LOONGARCH64. // // Directory format. From df5b3d356bbf0767ca9500e4e6f1755863ec1831 Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Thu, 24 Feb 2022 10:30:30 +0800 Subject: [PATCH 28/46] [LoongArch64] apply the format and fix compiling warning. --- src/coreclr/inc/crosscomp.h | 11 ++++------- src/coreclr/jit/codegencommon.cpp | 4 ++-- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/src/coreclr/inc/crosscomp.h b/src/coreclr/inc/crosscomp.h index 63a48d0e4ceea..1a7fdb37b9c25 100644 --- a/src/coreclr/inc/crosscomp.h +++ b/src/coreclr/inc/crosscomp.h @@ -399,7 +399,7 @@ enum #define CONTEXT_UNWOUND_TO_CALL 0x20000000 -typedef struct DECLSPEC_ALIGN(16) _T_CONTEXT { +typedef struct DECLSPEC_ALIGN(8) _T_CONTEXT { // // Control flags. @@ -414,8 +414,8 @@ typedef struct DECLSPEC_ALIGN(16) _T_CONTEXT { DWORD64 Ra; DWORD64 Tp; DWORD64 Sp; - DWORD64 A0;//DWORD64 V0; - DWORD64 A1;//DWORD64 V1; + DWORD64 A0; + DWORD64 A1; DWORD64 A2; DWORD64 A3; DWORD64 A4; @@ -447,7 +447,7 @@ typedef struct DECLSPEC_ALIGN(16) _T_CONTEXT { // // Floating Point Registers // - //TODO: support the SIMD. + //TODO-LoongArch64: support the SIMD. DWORD64 F[32]; DWORD Fcsr; } T_CONTEXT, *PT_CONTEXT; @@ -469,7 +469,6 @@ typedef struct _T_RUNTIME_FUNCTION { }; } T_RUNTIME_FUNCTION, *PT_RUNTIME_FUNCTION; - // // Define exception dispatch context structure. // @@ -489,8 +488,6 @@ typedef struct _T_DISPATCHER_CONTEXT { PBYTE NonVolatileRegisters; } T_DISPATCHER_CONTEXT, *PT_DISPATCHER_CONTEXT; - - // // Nonvolatile context pointer record. // diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 5900d163d4c41..dbf711d70d404 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -6029,7 +6029,7 @@ void CodeGen::genFnProlog() #if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) genPushCalleeSavedRegisters(initReg, &initRegZeroed); -#else // !TARGET_ARM64 || !TARGET_LOONGARCH64 +#else // !TARGET_ARM64 || !TARGET_LOONGARCH64 if (!isOSRx64Root) { genPushCalleeSavedRegisters(); @@ -8128,7 +8128,7 @@ void CodeGen::genStructReturn(GenTree* treeNode) assert(regCount == 2); int offset = genTypeSize(type); type = retTypeDesc.GetReturnRegType(1); - offset = offset < genTypeSize(type) ? genTypeSize(type) : offset; + offset = (int)((unsigned int)offset < genTypeSize(type) ? genTypeSize(type) : offset); toReg = retTypeDesc.GetABIReturnReg(1); GetEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), toReg, lclNode->GetLclNum(), offset); } From b912e84119a34383413da034218e07ddeebf9e71 Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Fri, 25 Feb 2022 18:16:31 +0800 Subject: [PATCH 29/46] [LoongArch64] round 1 amend for reviewing by @kunalspathak. --- src/coreclr/jit/codegencommon.cpp | 4 +- src/coreclr/jit/codegenloongarch64.cpp | 14 +- src/coreclr/jit/emit.cpp | 29 +- src/coreclr/jit/emitloongarch64.cpp | 6 +- src/coreclr/jit/emitloongarch64.h | 10 +- src/coreclr/jit/importer.cpp | 8 +- src/coreclr/jit/lsra.cpp | 23 +- src/coreclr/jit/lsra.h | 15 +- src/coreclr/jit/lsrabuild.cpp | 6 +- src/coreclr/jit/lsraloongarch64.cpp | 491 ++++--------------------- src/coreclr/jit/registerloongarch64.h | 5 + 11 files changed, 131 insertions(+), 480 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index dbf711d70d404..c1c990f627c62 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -6029,7 +6029,9 @@ void CodeGen::genFnProlog() #if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) genPushCalleeSavedRegisters(initReg, &initRegZeroed); -#else // !TARGET_ARM64 || !TARGET_LOONGARCH64 + +#else // !TARGET_ARM64 || !TARGET_LOONGARCH64 + if (!isOSRx64Root) { genPushCalleeSavedRegisters(); diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index cfc8b6587cd3e..af586a60e0715 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -21,11 +21,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #include "gcinfo.h" #include "gcinfoencoder.h" -static short splitLow(int value) -{ - return (value & 0xffff); -} - // Returns true if 'value' is a legal signed immediate 12 bit encoding. static bool isValidSimm12(ssize_t value) { @@ -3951,8 +3946,8 @@ void CodeGen::genCkfinite(GenTree* treeNode) GenTree* op1 = treeNode->AsOp()->gtOp1; var_types targetType = treeNode->TypeGet(); ssize_t expMask = (targetType == TYP_FLOAT) ? 0xFF : 0x7FF; // Bit mask to extract exponent. - ssize_t size = (targetType == TYP_FLOAT) ? 8 : 11; // Bit size to extract exponent. - ssize_t pos = (targetType == TYP_FLOAT) ? 23 : 52; // Bit pos of exponent. + int size = (targetType == TYP_FLOAT) ? 8 : 11; // Bit size to extract exponent. + int pos = (targetType == TYP_FLOAT) ? 23 : 52; // Bit pos of exponent. emitter* emit = GetEmitter(); emitAttr attr = emitActualTypeSize(treeNode); @@ -6086,7 +6081,6 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) // unsigned gcPtrCount; // The count of GC pointers in the struct unsigned srcSize; - bool isHfa; // gcPtrCount = treeNode->gtNumSlots; // Setup the srcSize and layout @@ -6570,7 +6564,7 @@ void CodeGen::genMultiRegCallStoreToLocal(GenTree* treeNode) offset = genTypeSize(type); type = pRetTypeDesc->GetReturnRegType(1); reg = call->GetRegNumByIdx(1); - offset = offset < genTypeSize(type) ? genTypeSize(type) : offset; + offset = (offset < (int)genTypeSize(type)) ? genTypeSize(type) : offset; GetEmitter()->emitIns_S_R(ins_Store(type), emitTypeSize(type), reg, lclNum, offset); } @@ -9489,7 +9483,7 @@ void CodeGen::genFnPrologCalleeRegArgs() } else { - for (int i = 0; i < regArgNum; i++) + for (unsigned i = 0; i < regArgNum; i++) { LclVarDsc* varDsc2 = compiler->lvaTable + regArgsVars[i]; var_types destMemType = varDsc2->GetRegisterType(); diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index c371a33f03cda..30c28972ac01d 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -4217,8 +4217,10 @@ void emitter::emitJumpDistBind() // offset of the jump UNATIVE_OFFSET dstOffs; NATIVE_OFFSET jmpDist; // the relative jump distance, as it will be encoded +#ifndef TARGET_LOONGARCH64 UNATIVE_OFFSET oldSize; UNATIVE_OFFSET sizeDif; +#endif #ifdef TARGET_XARCH assert(jmp->idInsFmt() == IF_LABEL || jmp->idInsFmt() == IF_RWR_LABEL || jmp->idInsFmt() == IF_SWR_LABEL); @@ -4635,10 +4637,10 @@ void emitter::emitJumpDistBind() jmp->idInsOpt(INS_OPTS_JIRL); jmp->idCodeSize(jmp->idCodeSize() + extra); - jmpIG->igSize += extra; // the placeholder sizeof(INS_OPTS_JIRL) - sizeof(INS_OPTS_J). - adjLJ += extra; - adjIG += extra; - emitTotalCodeSize += extra; + jmpIG->igSize += (unsigned short)extra; // the placeholder sizeof(INS_OPTS_JIRL) - sizeof(INS_OPTS_J). + adjLJ += (UNATIVE_OFFSET)extra; + adjIG += (UNATIVE_OFFSET)extra; + emitTotalCodeSize += (UNATIVE_OFFSET)extra; jmpIG->igFlags |= IGF_UPD_ISZ; isLinkingEnd_LA |= 0x1; } @@ -4711,7 +4713,7 @@ void emitter::emitJumpDistBind() } else { - assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000); // TODO:later will be deleted!!! + assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000); extra = 8; } } @@ -4724,20 +4726,20 @@ void emitter::emitJumpDistBind() // assert((emitTotalCodeSize + emitCounts_INS_OPTS_J*4) < 0x8000000); assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000); } - else // if (ins == INS_b || ins == INS_bl) + else { assert(ins == INS_b || ins == INS_bl); // assert((emitTotalCodeSize + emitCounts_INS_OPTS_J*4) < 0x8000000); - assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000); // TODO + assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000); continue; } jmp->idInsOpt(INS_OPTS_JIRL); jmp->idCodeSize(jmp->idCodeSize() + extra); - jmpIG->igSize += extra; // the placeholder sizeof(INS_OPTS_JIRL) - sizeof(INS_OPTS_J). - adjLJ += extra; - adjIG += extra; - emitTotalCodeSize += extra; + jmpIG->igSize += (unsigned short)extra; // the placeholder sizeof(INS_OPTS_JIRL) - sizeof(INS_OPTS_J). + adjLJ += (UNATIVE_OFFSET)extra; + adjIG += (UNATIVE_OFFSET)extra; + emitTotalCodeSize += (UNATIVE_OFFSET)extra; jmpIG->igFlags |= IGF_UPD_ISZ; isLinkingEnd_LA |= 0x1; } @@ -4884,8 +4886,6 @@ void emitter::emitJumpDistBind() // The size of IF_LARGEJMP/IF_LARGEADR/IF_LARGELDC are 8 or 12. // All other code size is 4. assert((sizeDif == 4) || (sizeDif == 8)); -#elif defined(TARGET_LOONGARCH64) - assert(sizeDif == 0); #else #error Unsupported or unset target architecture #endif @@ -6815,8 +6815,7 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, } #ifdef TARGET_LOONGARCH64 - // cp = cp - 4; - unsigned actualCodeSize = cp - codeBlock; + unsigned actualCodeSize = (unsigned)(cp - codeBlock); #endif #if EMIT_TRACK_STACK_DEPTH diff --git a/src/coreclr/jit/emitloongarch64.cpp b/src/coreclr/jit/emitloongarch64.cpp index dbf81a1e207fe..eb0b8659632bb 100644 --- a/src/coreclr/jit/emitloongarch64.cpp +++ b/src/coreclr/jit/emitloongarch64.cpp @@ -2496,9 +2496,9 @@ void emitter::emitIns_Call(EmitCallType callType, assert(callType == EC_FUNC_TOKEN); assert(addr != NULL); - assert(((long)addr & 3) == 0); + assert((((size_t)addr) & 3) == 0); - addr = (void*)((long)addr + (isJump ? 0 : 1)); // NOTE: low-bit0 is used for jirl ra/r0,rd,0 + addr = (void*)(((size_t)addr) + (isJump ? 0 : 1)); // NOTE: low-bit0 is used for jirl ra/r0,rd,0 id->idAddr()->iiaAddr = (BYTE*)addr; if (emitComp->opts.compReloc) @@ -2599,7 +2599,7 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t *(code_t*)dst = 0x1e00000e; - long addr = (long)id->idAddr()->iiaAddr; // get addr. + size_t addr = (size_t)(id->idAddr()->iiaAddr); // get addr. // should assert(addr-dst < 38bits); int reg2 = (int)addr & 1; diff --git a/src/coreclr/jit/emitloongarch64.h b/src/coreclr/jit/emitloongarch64.h index 2c9cbfd211bf1..cbeb66f7ded82 100644 --- a/src/coreclr/jit/emitloongarch64.h +++ b/src/coreclr/jit/emitloongarch64.h @@ -77,6 +77,11 @@ bool IsRedundantMov(instruction ins, emitAttr size, regNumber dst, regNumber src bool IsRedundantLdStr( instruction ins, regNumber reg1, regNumber reg2, ssize_t imm, emitAttr size, insFormat fmt); // New functions end. +/************************************************************************/ +/* Public inline informational methods */ +/************************************************************************/ + +public: // Returns true if 'value' is a legal signed immediate 12 bit encoding. static bool isValidSimm12(ssize_t value) { @@ -89,11 +94,6 @@ static bool isValidSimm20(ssize_t value) return -(((int)1) << 19) <= value && value < (((int)1) << 19); }; -/************************************************************************/ -/* Public inline informational methods */ -/************************************************************************/ - -public: // Returns the number of bits used by the given 'size'. inline static unsigned getBitWidth(emitAttr size) { diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 1ace0c67a39e3..e8658621055bb 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -13742,9 +13742,9 @@ void Compiler::impImportBlockCode(BasicBlock* block) #ifdef TARGET_LOONGARCH64 assertImp((genActualType(op1->TypeGet()) == TYP_LONG || genActualType(op1->TypeGet()) == TYP_INT) || (genActualType(op2->TypeGet()) == TYP_LONG || genActualType(op2->TypeGet()) == TYP_INT) || - genActualType(op1->TypeGet()) == genActualType(op2->TypeGet()) || - varTypeIsI(op1->TypeGet()) && varTypeIsI(op2->TypeGet()) || - varTypeIsFloating(op1->gtType) && varTypeIsFloating(op2->gtType)); + (genActualType(op1->TypeGet()) == genActualType(op2->TypeGet())) || + (varTypeIsI(op1->TypeGet()) && varTypeIsI(op2->TypeGet())) || + (varTypeIsFloating(op1->gtType) && varTypeIsFloating(op2->gtType))); #else assertImp(genActualType(op1->TypeGet()) == genActualType(op2->TypeGet()) || (varTypeIsI(op1->TypeGet()) && varTypeIsI(op2->TypeGet())) || @@ -14035,7 +14035,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) return; } - op1->gtType == TYP_INT; + op1->gtType = TYP_INT; impPushOnStack(op1, tiRetVal); break; diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index c0a2feaef75ff..bcb834ab3e0f9 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -1573,16 +1573,19 @@ bool LinearScan::isRegCandidate(LclVarDsc* varDsc) #endif // FEATURE_SIMD case TYP_STRUCT: -// TODO-1stClassStructs: support vars with GC pointers. The issue is that such -// vars will have `lvMustInit` set, because emitter has poor support for struct liveness, -// but if the variable is tracked the prolog generator would expect it to be in liveIn set, -// so an assert in `genFnProlog` will fire. + { + // TODO-1stClassStructs: support vars with GC pointers. The issue is that such + // vars will have `lvMustInit` set, because emitter has poor support for struct liveness, + // but if the variable is tracked the prolog generator would expect it to be in liveIn set, + // so an assert in `genFnProlog` will fire. + bool isRegCandidate = compiler->compEnregStructLocals() && !varDsc->HasGCPtr(); #ifdef TARGET_LOONGARCH64 - return !genIsValidFloatReg(varDsc->GetOtherArgReg()) && compiler->compEnregStructLocals() && - !varDsc->HasGCPtr(); -#else - return compiler->compEnregStructLocals() && !varDsc->HasGCPtr(); + // The LoongArch64's ABI which the float args within a struct maybe passed by integer register + // when no float register left but free integer register. + isRegCandidate &= !genIsValidFloatReg(varDsc->GetOtherArgReg()); #endif + return isRegCandidate; + } case TYP_UNDEF: case TYP_UNKNOWN: @@ -7728,7 +7731,7 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block) } } } -#endif +#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) VarToRegMap sameVarToRegMap = sharedCriticalVarToRegMap; regMaskTP sameWriteRegs = RBM_NONE; @@ -7808,7 +7811,7 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block) { sameToReg = REG_NA; } -#endif +#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) // If the var is live only at those blocks connected by a split edge and not live-in at some of the // target blocks, we will resolve it the same way as if it were in diffResolutionSet and resolution diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 279ac5292ec0d..a916d10abaf44 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -2216,17 +2216,12 @@ class RefPosition // no reg is allocated. unsigned char regOptional : 1; -// Used by RefTypeDef/Use positions of a multi-reg call node. -// Indicates the position of the register that this ref position refers to. -// The max bits needed is based on max value of MAX_RET_REG_COUNT value -// across all targets and that happens 4 on on Arm. Hence index value -// would be 0..MAX_RET_REG_COUNT-1. -#ifdef TARGET_LOONGARCH64 - // TODO for LOONGARCH64: should confirm for ArgSplit? - unsigned char multiRegIdx : 3; -#else // !TARGET_LOONGARCH64 + // Used by RefTypeDef/Use positions of a multi-reg call node. + // Indicates the position of the register that this ref position refers to. + // The max bits needed is based on max value of MAX_RET_REG_COUNT value + // across all targets and that happens 4 on on Arm. Hence index value + // would be 0..MAX_RET_REG_COUNT-1. unsigned char multiRegIdx : 2; -#endif // !TARGET_LOONGARCH64 // Last Use - this may be true for multiple RefPositions in the same Interval unsigned char lastUse : 1; diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index b6bdb51884cc4..bba2b26ed5bba 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -594,6 +594,8 @@ RefPosition* LinearScan::newRefPosition(Interval* theInterval, RefPosition* pos = newRefPosition(physicalReg, theLocation, RefTypeFixedReg, nullptr, mask); assert(theInterval != nullptr); #ifndef TARGET_LOONGARCH64 + // The LoongArch64's ABI which the float args maybe passed by integer register + // when no float register left but free integer register. assert((allRegs(theInterval->registerType) & mask) != 0); #endif } @@ -3946,8 +3948,8 @@ int LinearScan::BuildGCWriteBarrier(GenTree* tree) srcCandidates = RBM_WRITE_BARRIER_SRC; #elif defined(TARGET_LOONGARCH64) - // the 'addr' goes into (REG_WRITE_BARRIER_DST) - // the 'src' goes into (REG_WRITE_BARRIER_SRC) + // the 'addr' goes into t6 (REG_WRITE_BARRIER_DST) + // the 'src' goes into t7 (REG_WRITE_BARRIER_SRC) // addrCandidates = RBM_WRITE_BARRIER_DST; srcCandidates = RBM_WRITE_BARRIER_SRC; diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp index d70273386eacf..0c5d995953bdb 100644 --- a/src/coreclr/jit/lsraloongarch64.cpp +++ b/src/coreclr/jit/lsraloongarch64.cpp @@ -105,7 +105,7 @@ int LinearScan::BuildNode(GenTree* tree) case GT_STORE_LCL_VAR: if (tree->IsMultiRegLclVar() && isCandidateMultiRegLclVar(tree->AsLclVar())) { - dstCount = compiler->lvaGetDesc(tree->AsLclVar()->GetLclNum())->lvFieldCnt; + dstCount = compiler->lvaGetDesc(tree->AsLclVar())->lvFieldCnt; } FALLTHROUGH; @@ -143,19 +143,10 @@ int LinearScan::BuildNode(GenTree* tree) case GT_CNS_DBL: { - GenTreeDblCon* dblConst = tree->AsDblCon(); - double constValue = dblConst->AsDblCon()->gtDconVal; - - if ((constValue == (double)(int)constValue) && (-2048 <= constValue) && (constValue <= 2047)) - { - // Directly encode constant to instructions. - } - else - { - // Reserve int to load constant from memory (IF_LARGELDC) - buildInternalIntRegisterDefForNode(tree); - buildInternalRegisterUses(); - } + // There is no instruction for loading float/double imm directly into FPR. + // Reserve int to load constant from memory (IF_LARGELDC) + buildInternalIntRegisterDefForNode(tree); + buildInternalRegisterUses(); } FALLTHROUGH; @@ -263,13 +254,6 @@ int LinearScan::BuildNode(GenTree* tree) // everything is made explicit by adding casts. assert(tree->gtGetOp1()->TypeGet() == tree->gtGetOp2()->TypeGet()); } - - if (tree->gtOverflow()) - { - // Need a register different from target reg to check for overflow. - buildInternalIntRegisterDefForNode(tree); - setInternalRegsDelayFree = true; - } FALLTHROUGH; case GT_AND: @@ -295,27 +279,21 @@ int LinearScan::BuildNode(GenTree* tree) BuildDefsWithKills(tree, 0, RBM_NONE, killMask); break; - // case GT_MOD: - // case GT_UMOD: - // NYI_IF(varTypeIsFloating(tree->TypeGet()), "FP Remainder in LOONGARCH64"); - // assert(!"Shouldn't see an integer typed GT_MOD node in LOONGARCH64"); - // srcCount = 0; - // break; - case GT_MUL: + if (tree->gtOverflow()) + { + // Need a register different from target reg to check for overflow. + buildInternalIntRegisterDefForNode(tree); + setInternalRegsDelayFree = true; + } + FALLTHROUGH; + case GT_MOD: case GT_UMOD: case GT_DIV: case GT_MULHI: case GT_UDIV: { - if (emitActualTypeSize(tree) == EA_4BYTE) - { - // We need two registers: tmpRegOp1 and tmpRegOp2 - buildInternalIntRegisterDefForNode(tree); - buildInternalIntRegisterDefForNode(tree); - } - srcCount = BuildBinaryUses(tree->AsOp()); buildInternalRegisterUses(); assert(dstCount == 1); @@ -351,7 +329,7 @@ int LinearScan::BuildNode(GenTree* tree) #ifdef FEATURE_HW_INTRINSICS case GT_HWINTRINSIC: - srcCount = BuildHWIntrinsic(tree->AsHWIntrinsic()); + srcCount = BuildHWIntrinsic(tree->AsHWIntrinsic(), &dstCount); break; #endif // FEATURE_HW_INTRINSICS @@ -375,13 +353,6 @@ int LinearScan::BuildNode(GenTree* tree) case GT_GE: case GT_GT: case GT_JCMP: - if (!varTypeIsFloating(tree->gtGetOp1())) - { - // We need two registers: tmpRegOp1 and tmpRegOp2 - buildInternalIntRegisterDefForNode(tree); - buildInternalIntRegisterDefForNode(tree); - buildInternalRegisterUses(); - } srcCount = BuildCmp(tree); break; @@ -444,55 +415,7 @@ int LinearScan::BuildNode(GenTree* tree) case GT_XCHG: { NYI_LOONGARCH64("-----unimplemented on LOONGARCH64 yet----"); - - assert(dstCount == (tree->TypeGet() == TYP_VOID) ? 0 : 1); - srcCount = tree->gtGetOp2()->isContained() ? 1 : 2; - -#if 0 - if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics)) - { - // GT_XCHG requires a single internal register; the others require two. - buildInternalIntRegisterDefForNode(tree); - if (tree->OperGet() != GT_XCHG) - { - buildInternalIntRegisterDefForNode(tree); - } - } - else if (tree->OperIs(GT_XAND)) - { - // for ldclral we need an internal register. - buildInternalIntRegisterDefForNode(tree); - } -#endif - - assert(!tree->gtGetOp1()->isContained()); - RefPosition* op1Use = BuildUse(tree->gtGetOp1()); - RefPosition* op2Use = nullptr; - if (!tree->gtGetOp2()->isContained()) - { - op2Use = BuildUse(tree->gtGetOp2()); - } - - // For LOONGARCH64 exclusives the lifetime of the addr and data must be extended because - // it may be used used multiple during retries - // if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics)) - { - // Internals may not collide with target - if (dstCount == 1) - { - setDelayFree(op1Use); - if (op2Use != nullptr) - { - setDelayFree(op2Use); - } - setInternalRegsDelayFree = true; - } - buildInternalRegisterUses(); - } - if (dstCount == 1) - { - BuildDef(tree); - } + srcCount = 1; } break; @@ -501,7 +424,7 @@ int LinearScan::BuildNode(GenTree* tree) srcCount = BuildPutArgSplit(tree->AsPutArgSplit()); dstCount = tree->AsPutArgSplit()->gtNumRegs; break; -#endif // FEATURE _SPLIT_ARG +#endif // FEATURE_ARG_SPLIT case GT_PUTARG_STK: srcCount = BuildPutArgStk(tree->AsPutArgStk()); @@ -553,14 +476,14 @@ int LinearScan::BuildNode(GenTree* tree) { assert(dstCount == 1); - // Need a variable number of temp regs (see genLclHeap() in codegenamd64.cpp): + // Need a variable number of temp regs (see genLclHeap() in codegenloongarch64.cpp): // Here '-' means don't care. // // Size? Init Memory? # temp regs // 0 - 0 - // const and <=6 ptr words - 0 + // const and <=UnrollLimit - 0 // const and 6 ptr words Yes 0 + // >UnrollLimit Yes 0 // Non-const Yes 0 // Non-const No 2 // @@ -580,11 +503,11 @@ int LinearScan::BuildNode(GenTree* tree) // This should also help in debugging as we can examine the original size specified with // localloc. sizeVal = AlignUp(sizeVal, STACK_ALIGN); - size_t stpCount = sizeVal / (REGSIZE_BYTES * 2); + size_t insCount = sizeVal / (REGSIZE_BYTES * 2); - // For small allocations up to 4 'stp' instructions (i.e. 16 to 64 bytes of localloc) - // - if (stpCount <= 4) + // For small allocations up to 4 'st' instructions (i.e. 16 to 64 bytes of localloc) + // TODO-LoongArch64: maybe use paird-load/store or SIMD in future. + if (sizeVal <= (REGSIZE_BYTES * 2 * 4)) { // Need no internal registers } @@ -703,7 +626,7 @@ int LinearScan::BuildNode(GenTree* tree) // LOONGARCH64 does not support both Index and offset so we need an internal register buildInternalIntRegisterDefForNode(tree); } - else if (!((-2048 <= cns) && (cns <= 2047))) + else if (!emitter::isValidSimm12(cns)) { // This offset can't be contained in the add instruction, so we need an internal register buildInternalIntRegisterDefForNode(tree); @@ -792,197 +715,8 @@ int LinearScan::BuildNode(GenTree* tree) // int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) { - assert(!"unimplemented on LOONGARCH yet"); -#if 0 - int srcCount = 0; - // Only SIMDIntrinsicInit can be contained - if (simdTree->isContained()) - { - assert(simdTree->gtSIMDIntrinsicID == SIMDIntrinsicInit); - } - int dstCount = simdTree->IsValue() ? 1 : 0; - assert(dstCount == 1); - - bool buildUses = true; - - GenTree* op1 = simdTree->gtGetOp1(); - GenTree* op2 = simdTree->gtGetOp2(); - - switch (simdTree->gtSIMDIntrinsicID) - { - case SIMDIntrinsicInit: - case SIMDIntrinsicCast: - case SIMDIntrinsicSqrt: - case SIMDIntrinsicAbs: - case SIMDIntrinsicConvertToSingle: - case SIMDIntrinsicConvertToInt32: - case SIMDIntrinsicConvertToDouble: - case SIMDIntrinsicConvertToInt64: - case SIMDIntrinsicWidenLo: - case SIMDIntrinsicWidenHi: - // No special handling required. - break; - - case SIMDIntrinsicGetItem: - { - op1 = simdTree->gtGetOp1(); - op2 = simdTree->gtGetOp2(); - - // We have an object and an index, either of which may be contained. - bool setOp2DelayFree = false; - if (!op2->IsCnsIntOrI() && (!op1->isContained() || op1->OperIsLocal())) - { - // If the index is not a constant and the object is not contained or is a local - // we will need a general purpose register to calculate the address - // internal register must not clobber input index - // TODO-Cleanup: An internal register will never clobber a source; this code actually - // ensures that the index (op2) doesn't interfere with the target. - buildInternalIntRegisterDefForNode(simdTree); - setOp2DelayFree = true; - } - srcCount += BuildOperandUses(op1); - if (!op2->isContained()) - { - RefPosition* op2Use = BuildUse(op2); - if (setOp2DelayFree) - { - setDelayFree(op2Use); - } - srcCount++; - } - - if (!op2->IsCnsIntOrI() && (!op1->isContained())) - { - // If vector is not already in memory (contained) and the index is not a constant, - // we will use the SIMD temp location to store the vector. - compiler->getSIMDInitTempVarNum(); - } - buildUses = false; - } - break; - - case SIMDIntrinsicAdd: - case SIMDIntrinsicSub: - case SIMDIntrinsicMul: - case SIMDIntrinsicDiv: - case SIMDIntrinsicBitwiseAnd: - case SIMDIntrinsicBitwiseAndNot: - case SIMDIntrinsicBitwiseOr: - case SIMDIntrinsicBitwiseXor: - case SIMDIntrinsicMin: - case SIMDIntrinsicMax: - case SIMDIntrinsicEqual: - case SIMDIntrinsicLessThan: - case SIMDIntrinsicGreaterThan: - case SIMDIntrinsicLessThanOrEqual: - case SIMDIntrinsicGreaterThanOrEqual: - // No special handling required. - break; - - case SIMDIntrinsicSetX: - case SIMDIntrinsicSetY: - case SIMDIntrinsicSetZ: - case SIMDIntrinsicSetW: - case SIMDIntrinsicNarrow: - { - // Op1 will write to dst before Op2 is free - BuildUse(op1); - RefPosition* op2Use = BuildUse(op2); - setDelayFree(op2Use); - srcCount = 2; - buildUses = false; - break; - } - - case SIMDIntrinsicInitN: - { - var_types baseType = simdTree->gtSIMDBaseType; - srcCount = (short)(simdTree->gtSIMDSize / genTypeSize(baseType)); - if (varTypeIsFloating(simdTree->gtSIMDBaseType)) - { - // Need an internal register to stitch together all the values into a single vector in a SIMD reg. - buildInternalFloatRegisterDefForNode(simdTree); - } - - for (GenTree* operand : simdTree->Operands()) - { - assert(operand->TypeIs(baseType)); - assert(!operand->isContained()); - - BuildUse(operand); - } - - buildUses = false; - break; - } - - case SIMDIntrinsicInitArray: - // We have an array and an index, which may be contained. - break; - - case SIMDIntrinsicOpEquality: - case SIMDIntrinsicOpInEquality: - buildInternalFloatRegisterDefForNode(simdTree); - break; - - case SIMDIntrinsicDotProduct: - buildInternalFloatRegisterDefForNode(simdTree); - break; - - case SIMDIntrinsicSelect: - // TODO-LOONGARCH64-CQ Allow lowering to see SIMDIntrinsicSelect so we can generate BSL VC, VA, VB - // bsl target register must be VC. Reserve a temp in case we need to shuffle things. - // This will require a different approach, as GenTreeSIMD has only two operands. - assert(!"SIMDIntrinsicSelect not yet supported"); - buildInternalFloatRegisterDefForNode(simdTree); - break; - - case SIMDIntrinsicInitArrayX: - case SIMDIntrinsicInitFixed: - case SIMDIntrinsicCopyToArray: - case SIMDIntrinsicCopyToArrayX: - case SIMDIntrinsicNone: - case SIMDIntrinsicGetCount: - case SIMDIntrinsicGetOne: - case SIMDIntrinsicGetZero: - case SIMDIntrinsicGetAllOnes: - case SIMDIntrinsicGetX: - case SIMDIntrinsicGetY: - case SIMDIntrinsicGetZ: - case SIMDIntrinsicGetW: - case SIMDIntrinsicInstEquals: - case SIMDIntrinsicHWAccel: - case SIMDIntrinsicWiden: - case SIMDIntrinsicInvalid: - assert(!"These intrinsics should not be seen during register allocation"); - __fallthrough; - - default: - noway_assert(!"Unimplemented SIMD node type."); - unreached(); - } - if (buildUses) - { - assert(!op1->OperIs(GT_LIST)); - assert(srcCount == 0); - srcCount = BuildOperandUses(op1); - if ((op2 != nullptr) && !op2->isContained()) - { - srcCount += BuildOperandUses(op2); - } - } - assert(internalCount <= MaxInternalCount); - buildInternalRegisterUses(); - if (dstCount == 1) - { - BuildDef(simdTree); - } - else - { - assert(dstCount == 0); - } - return srcCount; -#endif + NYI_LOONGARCH64("-----unimplemented on LOONGARCH64 yet----"); + return 0; } #endif // FEATURE_SIMD @@ -999,110 +733,8 @@ int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) // int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) { - assert(!"unimplemented on LOONGARCH yet"); -#if 0 - NamedIntrinsic intrinsicID = intrinsicTree->gtHWIntrinsicId; - int numArgs = HWIntrinsicInfo::lookupNumArgs(intrinsicTree); - - GenTree* op1 = intrinsicTree->gtGetOp1(); - GenTree* op2 = intrinsicTree->gtGetOp2(); - GenTree* op3 = nullptr; - int srcCount = 0; - - if ((op1 != nullptr) && op1->OperIsList()) - { - // op2 must be null, and there must be at least two more arguments. - assert(op2 == nullptr); - noway_assert(op1->AsArgList()->Rest() != nullptr); - noway_assert(op1->AsArgList()->Rest()->Rest() != nullptr); - assert(op1->AsArgList()->Rest()->Rest()->Rest() == nullptr); - op2 = op1->AsArgList()->Rest()->Current(); - op3 = op1->AsArgList()->Rest()->Rest()->Current(); - op1 = op1->AsArgList()->Current(); - } - - bool op2IsDelayFree = false; - bool op3IsDelayFree = false; - - // Create internal temps, and handle any other special requirements. - switch (HWIntrinsicInfo::lookup(intrinsicID).form) - { - case HWIntrinsicInfo::Sha1HashOp: - assert((numArgs == 3) && (op2 != nullptr) && (op3 != nullptr)); - if (!op2->isContained()) - { - assert(!op3->isContained()); - op2IsDelayFree = true; - op3IsDelayFree = true; - setInternalRegsDelayFree = true; - } - buildInternalFloatRegisterDefForNode(intrinsicTree); - break; - case HWIntrinsicInfo::SimdTernaryRMWOp: - assert((numArgs == 3) && (op2 != nullptr) && (op3 != nullptr)); - if (!op2->isContained()) - { - assert(!op3->isContained()); - op2IsDelayFree = true; - op3IsDelayFree = true; - } - break; - case HWIntrinsicInfo::Sha1RotateOp: - buildInternalFloatRegisterDefForNode(intrinsicTree); - break; - - case HWIntrinsicInfo::SimdExtractOp: - case HWIntrinsicInfo::SimdInsertOp: - if (!op2->isContained()) - { - // We need a temp to create a switch table - buildInternalIntRegisterDefForNode(intrinsicTree); - } - break; - - default: - break; - } - - // Next, build uses - if (numArgs > 3) - { - srcCount = 0; - assert(!op2IsDelayFree && !op3IsDelayFree); - assert(op1->OperIs(GT_LIST)); - { - for (GenTreeArgList* list = op1->AsArgList(); list != nullptr; list = list->Rest()) - { - srcCount += BuildOperandUses(list->Current()); - } - } - assert(srcCount == numArgs); - } - else - { - if (op1 != nullptr) - { - srcCount += BuildOperandUses(op1); - if (op2 != nullptr) - { - srcCount += (op2IsDelayFree) ? BuildDelayFreeUses(op2) : BuildOperandUses(op2); - if (op3 != nullptr) - { - srcCount += (op3IsDelayFree) ? BuildDelayFreeUses(op3) : BuildOperandUses(op3); - } - } - } - } - buildInternalRegisterUses(); - - // Now defs - if (intrinsicTree->IsValue()) - { - BuildDef(intrinsicTree); - } - - return srcCount; -#endif + NYI_LOONGARCH64("-----unimplemented on LOONGARCH64 yet----"); + return 0; } #endif @@ -1141,12 +773,17 @@ int LinearScan::BuildIndir(GenTreeIndir* indirTree) // LOONGARCH does not support both Index and offset so we need an internal register buildInternalIntRegisterDefForNode(indirTree); } - else if (!((-2048 <= cns) && (cns <= 2047))) + else if (!emitter::isValidSimm12(cns)) { // This offset can't be contained in the ldr/str instruction, so we need an internal register buildInternalIntRegisterDefForNode(indirTree); } } + else if (addr->OperGet() == GT_CLS_VAR_ADDR) + { + // Reserve int to load constant from memory (IF_LARGELDC) + buildInternalIntRegisterDefForNode(indirTree); + } } #ifdef FEATURE_SIMD @@ -1224,14 +861,23 @@ int LinearScan::BuildCall(GenTreeCall* call) // computed into a register. if (call->IsFastTailCall()) { - // Fast tail call - make sure that call target is always computed in T9(LOONGARCH64) - // so that epilog sequence can generate "jr t9" to achieve fast tail call. + // Fast tail call - make sure that call target is always computed in T4(LOONGARCH64) + // so that epilog sequence can generate "jirl t4" to achieve fast tail call. ctrlExprCandidates = RBM_FASTTAILCALL_TARGET; } } else if (call->IsR2ROrVirtualStubRelativeIndir()) { - buildInternalIntRegisterDefForNode(call); + // For R2R and VSD we have stub address in REG_R2R_INDIRECT_PARAM + // and will load call address into the temp register from this register. + regMaskTP candidates = RBM_NONE; + if (call->IsFastTailCall()) + { + candidates = allRegs(TYP_INT) & RBM_INT_CALLEE_TRASH; + assert(candidates != RBM_NONE); + } + + buildInternalIntRegisterDefForNode(call, candidates); } RegisterType registerType = call->TypeGet(); @@ -1291,6 +937,9 @@ int LinearScan::BuildCall(GenTreeCall* call) { #ifdef DEBUG assert(use.GetNode()->OperIs(GT_PUTARG_REG)); + assert(use.GetNode()->GetRegNum() == argReg); + // Update argReg for the next putarg_reg (if any) + argReg = genRegArgNext(argReg); #endif BuildUse(use.GetNode(), genRegMask(use.GetNode()->GetRegNum())); srcCount++; @@ -1320,6 +969,7 @@ int LinearScan::BuildCall(GenTreeCall* call) } } +#ifdef DEBUG // Now, count stack args // Note that these need to be computed into a register, but then // they're just stored to the stack - so the reg doesn't @@ -1334,10 +984,8 @@ int LinearScan::BuildCall(GenTreeCall* call) // Skip arguments that have been moved to the Late Arg list if ((arg->gtFlags & GTF_LATE_ARG) == 0) { -#ifdef DEBUG fgArgTabEntry* curArgTabEntry = compiler->gtArgEntryByNode(call, arg); assert(curArgTabEntry != nullptr); -#endif #if FEATURE_ARG_SPLIT // PUTARG_SPLIT nodes must be in the gtCallLateArgs list, since they // define registers used by the call. @@ -1353,6 +1001,7 @@ int LinearScan::BuildCall(GenTreeCall* call) } } } +#endif // DEBUG // If it is a fast tail call, it is already preferenced to use IP0. // Therefore, no need set src candidates on call tgt again. @@ -1415,7 +1064,7 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* argNode) } else { - // We can use a ldp/stp sequence so we need two internal registers for LOONGARCH64; one for ARM. + // We can use a ld/st sequence so we need two internal registers for LOONGARCH64. buildInternalIntRegisterDefForNode(argNode); if (putArgChild->OperGet() == GT_OBJ) @@ -1476,14 +1125,13 @@ int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode) // Registers for split argument corresponds to source int dstCount = argNode->gtNumRegs; - regNumber argReg = argNode->GetRegNum(); - regMaskTP argMask = RBM_NONE; - regMaskTP argMaskArr[MAX_REG_ARG] = {RBM_NONE}; - - for (unsigned i = 0; i < dstCount; i++) + regNumber argReg = argNode->GetRegNum(); + regMaskTP argMask = RBM_NONE; + for (unsigned i = 0; i < argNode->gtNumRegs; i++) { - argMaskArr[i] = genRegMask(argNode->GetRegNumByIdx(i)); - argMask |= argMaskArr[i]; + regNumber thisArgReg = (regNumber)((unsigned)argReg + i); + argMask |= genRegMask(thisArgReg); + argNode->SetRegNumByIdx(thisArgReg, i); } if (putArgChild->OperGet() == GT_FIELD_LIST) @@ -1506,9 +1154,16 @@ int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode) // Consume all the registers, setting the appropriate register mask for the ones that // go into registers. - // (sourceRegCount < argNode->gtNumRegs) - BuildUse(node, argMaskArr[sourceRegCount], 0); - sourceRegCount++; + for (unsigned regIndex = 0; regIndex < 1; regIndex++) + { + regMaskTP sourceMask = RBM_NONE; + if (sourceRegCount < argNode->gtNumRegs) + { + sourceMask = genRegMask((regNumber)((unsigned)argReg + sourceRegCount)); + } + sourceRegCount++; + BuildUse(node, sourceMask, regIndex); + } } srcCount += sourceRegCount; assert(putArgChild->isContained()); @@ -1518,7 +1173,7 @@ int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode) assert(putArgChild->TypeGet() == TYP_STRUCT); assert(putArgChild->OperGet() == GT_OBJ); - // We can use a ldr/str sequence so we need an internal register + // We can use a ld/st sequence so we need an internal register buildInternalIntRegisterDefForNode(argNode, allRegs(TYP_INT) & ~argMask); GenTree* objChild = putArgChild->gtGetOp1(); @@ -1536,11 +1191,7 @@ int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode) assert(putArgChild->isContained()); } buildInternalRegisterUses(); - assert((argMask != RBM_NONE) && ((int)genCountBits(argMask) == dstCount)); - for (int i = 0; i < dstCount; i++) - { - BuildDef(argNode, argMaskArr[i], i); - } + BuildDefs(argNode, dstCount, argMask); return srcCount; } #endif // FEATURE_ARG_SPLIT @@ -1611,7 +1262,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) if (size >= 2 * REGSIZE_BYTES) { - // We will use ldp/stp to reduce code size and improve performance + // TODO-LoongArch64: We will use ld/st paired to reduce code size and improve performance // so we need to reserve an extra internal register buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates); } diff --git a/src/coreclr/jit/registerloongarch64.h b/src/coreclr/jit/registerloongarch64.h index 5519b0639de4c..2962798473df0 100644 --- a/src/coreclr/jit/registerloongarch64.h +++ b/src/coreclr/jit/registerloongarch64.h @@ -52,6 +52,11 @@ REGDEF(S8, 31, 0x80000000, "s8" ) //NOTE for LoongArch64: // The `REG_R21` which alias `REG_X0` is specially reserved !!! // It can be used only by manully and should be very careful!!! +// e.g. right now LoongArch64's backend-codegen/emit, there is usually +// needed a extra regitster for some case liking +// constructing a large imm or offset, saving some intermediate result +// of the overflowing check and integer-comparing result. +// Using the a specially reserved register maybe more efficient. REGALIAS(R21, X0) #define FBASE 32 From c40d0b89a198d555e354bbc2707a5538612f970c Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Mon, 28 Feb 2022 16:10:24 +0800 Subject: [PATCH 30/46] [LoongArch64] merge fast-tail-call from main. --- src/coreclr/jit/codegenloongarch64.cpp | 429 +++++++++++++++---------- src/coreclr/jit/lsraloongarch64.cpp | 7 +- src/coreclr/jit/targetloongarch64.h | 3 - 3 files changed, 255 insertions(+), 184 deletions(-) diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index af586a60e0715..6ab13a7c9198b 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -1450,7 +1450,7 @@ void CodeGen::genFnEpilog(BasicBlock* block) addrInfo.addr = nullptr; addrInfo.accessType = IAT_VALUE; - if (jmpEpilog && lastNode->gtOper == GT_JMP) + if (jmpEpilog && (lastNode->gtOper == GT_JMP)) { methHnd = (CORINFO_METHOD_HANDLE)lastNode->AsVal()->gtVal1; compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo); @@ -1558,13 +1558,7 @@ void CodeGen::genFnEpilog(BasicBlock* block) else { genPopCalleeSavedRegisters(true); - // Fast tail call. - // Call target = REG_FASTTAILCALL_TARGET - // https://github.com/dotnet/coreclr/issues/4827 - // Do we need a special encoding for stack walker like rex.w prefix for x64? - - // TODO-LOONGARCH64: whether the relative address is enough for optimize? - GetEmitter()->emitIns_R_R_I(INS_jirl, emitTypeSize(TYP_I_IMPL), REG_R0, REG_FASTTAILCALL_TARGET, 0); + genCallInstruction(jmpNode->AsCall()); } #endif // FEATURE_FASTTAILCALL } @@ -5652,7 +5646,7 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) #endif // FEATURE_ARG_SPLIT case GT_CALL: - genCallInstruction(treeNode->AsCall()); + genCall(treeNode->AsCall()); break; case GT_MEMORYBARRIER: @@ -6799,8 +6793,6 @@ void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset) // void CodeGen::genCodeForShift(GenTree* tree) { - // var_types targetType = tree->TypeGet(); - // genTreeOps oper = tree->OperGet(); instruction ins = genGetInsForOper(tree); emitAttr size = emitActualTypeSize(tree); @@ -7286,17 +7278,10 @@ void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst } //------------------------------------------------------------------------ -// genCallInstruction: Produce code for a GT_CALL node +// genCall: Produce code for a GT_CALL node // -void CodeGen::genCallInstruction(GenTreeCall* call) +void CodeGen::genCall(GenTreeCall* call) { - gtCallTypes callType = (gtCallTypes)call->gtCallType; - - DebugInfo di; - - // all virtuals should have been expanded into a control expression - assert(!call->IsVirtual() || call->gtControlExpr || call->gtCallAddr); - // Consume all the arg regs for (GenTreeCall::Use& use : call->LateArgs()) { @@ -7321,23 +7306,22 @@ void CodeGen::genCallInstruction(GenTreeCall* call) assert(putArgRegNode->gtOper == GT_PUTARG_REG); genConsumeReg(putArgRegNode); + inst_Mov_Extend(putArgRegNode->TypeGet(), /* srcInReg */ true, argReg, putArgRegNode->GetRegNum(), + /* canSkip */ true, emitActualTypeSize(TYP_I_IMPL)); + + argReg = genRegArgNext(argReg); } } -#if FEATURE_ARG_SPLIT else if (curArgTabEntry->IsSplit()) { - assert(curArgTabEntry->numRegs >= 1); - genConsumeArgSplitStruct(argNode->AsPutArgSplit()); + NYI("unimplemented on LOONGARCH64 yet"); } -#endif // FEATURE_ARG_SPLIT else { regNumber argReg = curArgTabEntry->GetRegNum(); genConsumeReg(argNode); - if (argNode->GetRegNum() != argReg) - { - inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), true), argReg, argNode->GetRegNum()); - } + inst_Mov_Extend(argNode->TypeGet(), /* srcInReg */ true, argReg, argNode->GetRegNum(), /* canSkip */ true, + emitActualTypeSize(TYP_I_IMPL)); } } @@ -7346,54 +7330,39 @@ void CodeGen::genCallInstruction(GenTreeCall* call) { const regNumber regThis = genGetThisArgReg(call); - // Ditto as genCodeForNullCheck GetEmitter()->emitIns_R_R_I(INS_ld_w, EA_4BYTE, REG_R0, regThis, 0); } - // Either gtControlExpr != null or gtCallAddr != null or it is a direct non-virtual call to a user or helper - // method. - CORINFO_METHOD_HANDLE methHnd; - GenTree* target = call->gtControlExpr; - if (callType == CT_INDIRECT) - { - assert(target == nullptr); - target = call->gtCallAddr; - methHnd = nullptr; - } - else - { - methHnd = call->gtCallMethHnd; - } - - CORINFO_SIG_INFO* sigInfo = nullptr; -#ifdef DEBUG - // Pass the call signature information down into the emitter so the emitter can associate - // native call sites with the signatures they were generated from. - if (callType != CT_HELPER) - { - sigInfo = call->callSig; - } -#endif // DEBUG - - // If fast tail call, then we are done. In this case we setup the args (both reg args - // and stack args in incoming arg area) and call target. Epilog sequence would - // generate "br ". + // If fast tail call, then we are done here, we just have to load the call + // target into the right registers. We ensure in RA that target is loaded + // into a volatile register that won't be restored by epilog sequence. if (call->IsFastTailCall()) { - // Don't support fast tail calling JIT helpers - assert(callType != CT_HELPER); + GenTree* target = getCallTarget(call, nullptr); if (target != nullptr) { // Indirect fast tail calls materialize call target either in gtControlExpr or in gtCallAddr. genConsumeReg(target); + } +#ifdef FEATURE_READYTORUN + else if (call->IsR2ROrVirtualStubRelativeIndir()) + { + assert(((call->IsR2RRelativeIndir()) && (call->gtEntryPoint.accessType == IAT_PVALUE)) || + ((call->IsVirtualStubRelativeIndir()) && (call->gtEntryPoint.accessType == IAT_VALUE))); + assert(call->gtControlExpr == nullptr); - // Use REG_FASTTAILCALL_TARGET on LOONGARCH64 as the call target register. - if (target->GetRegNum() != REG_FASTTAILCALL_TARGET) - { - GetEmitter()->emitIns_R_R_I(INS_ori, EA_4BYTE, REG_FASTTAILCALL_TARGET, target->GetRegNum(), 0); - } + regNumber tmpReg = call->GetSingleTempReg(); + // Register where we save call address in should not be overridden by epilog. + assert((tmpReg & (RBM_INT_CALLEE_TRASH & ~RBM_RA)) == tmpReg); + + regNumber callAddrReg = + call->IsVirtualStubRelativeIndir() ? compiler->virtualStubParamInfo->GetReg() : REG_R2R_INDIRECT_PARAM; + GetEmitter()->emitIns_R_R(ins_Load(TYP_I_IMPL), emitActualTypeSize(TYP_I_IMPL), tmpReg, callAddrReg); + // We will use this again when emitting the jump in genCallInstruction in the epilog + call->gtRsvdRegs |= genRegMask(tmpReg); } +#endif return; } @@ -7407,6 +7376,94 @@ void CodeGen::genCallInstruction(GenTreeCall* call) genDefineTempLabel(genCreateTempLabel()); } + genCallInstruction(call); + + // for pinvoke/intrinsic/tailcalls we may have needed to get the address of + // a label. In case it is indirect with CFG enabled make sure we do not get + // the address after the validation but only after the actual call that + // comes after. + if (genPendingCallLabel && !call->IsHelperCall(compiler, CORINFO_HELP_VALIDATE_INDIRECT_CALL)) + { + genDefineInlineTempLabel(genPendingCallLabel); + genPendingCallLabel = nullptr; + } + +#ifdef DEBUG + // We should not have GC pointers in killed registers live around the call. + // GC info for arg registers were cleared when consuming arg nodes above + // and LSRA should ensure it for other trashed registers. + regMaskTP killMask = RBM_CALLEE_TRASH; + if (call->IsHelperCall()) + { + CorInfoHelpFunc helpFunc = compiler->eeGetHelperNum(call->gtCallMethHnd); + killMask = compiler->compHelperCallKillSet(helpFunc); + } + + assert((gcInfo.gcRegGCrefSetCur & killMask) == 0); + assert((gcInfo.gcRegByrefSetCur & killMask) == 0); +#endif + + var_types returnType = call->TypeGet(); + if (returnType != TYP_VOID) + { + regNumber returnReg; + + if (call->HasMultiRegRetVal()) + { + const ReturnTypeDesc* pRetTypeDesc = call->GetReturnTypeDesc(); + assert(pRetTypeDesc != nullptr); + unsigned regCount = pRetTypeDesc->GetReturnRegCount(); + + // If regs allocated to call node are different from ABI return + // regs in which the call has returned its result, move the result + // to regs allocated to call node. + for (unsigned i = 0; i < regCount; ++i) + { + var_types regType = pRetTypeDesc->GetReturnRegType(i); + returnReg = pRetTypeDesc->GetABIReturnReg(i); + regNumber allocatedReg = call->GetRegNumByIdx(i); + inst_Mov(regType, allocatedReg, returnReg, /* canSkip */ true); + } + } + else + { + if (varTypeUsesFloatArgReg(returnType)) + { + returnReg = REG_FLOATRET; + } + else + { + returnReg = REG_INTRET; + } + + if (call->GetRegNum() != returnReg) + { + inst_Mov(returnType, call->GetRegNum(), returnReg, /* canSkip */ false); + } + } + + genProduceReg(call); + } + + // If there is nothing next, that means the result is thrown away, so this value is not live. + // However, for minopts or debuggable code, we keep it live to support managed return value debugging. + if ((call->gtNext == nullptr) && !compiler->opts.MinOpts() && !compiler->opts.compDbgCode) + { + gcInfo.gcMarkRegSetNpt(RBM_INTRET); + } +} + +//------------------------------------------------------------------------ +// genCallInstruction - Generate instructions necessary to transfer control to the call. +// +// Arguments: +// call - the GT_CALL node +// +// Remaks: +// For tailcalls this function will generate a jump. +// +void CodeGen::genCallInstruction(GenTreeCall* call) +{ // Determine return value size(s). const ReturnTypeDesc* pRetTypeDesc = call->GetReturnTypeDesc(); emitAttr retSize = EA_PTRSIZE; @@ -7431,7 +7488,8 @@ void CodeGen::genCallInstruction(GenTreeCall* call) } } - // We need to propagate the IL offset information to the call instruction, so we can emit + DebugInfo di; + // We need to propagate the debug information to the call instruction, so we can emit // an IL to native mapping record for the call, to support managed return value debugging. // We don't want tail call helper calls that were converted from normal calls to get a record, // so we skip this hash table lookup logic in that case. @@ -7440,153 +7498,168 @@ void CodeGen::genCallInstruction(GenTreeCall* call) (void)compiler->genCallSite2DebugInfoMap->Lookup(call, &di); } + CORINFO_SIG_INFO* sigInfo = nullptr; +#ifdef DEBUG + // Pass the call signature information down into the emitter so the emitter can associate + // native call sites with the signatures they were generated from. + if (call->gtCallType != CT_HELPER) + { + sigInfo = call->callSig; + } + + if (call->IsFastTailCall()) + { + regMaskTP trashedByEpilog = RBM_CALLEE_SAVED; + + // The epilog may use and trash REG_GSCOOKIE_TMP_0/1. Make sure we have no + // non-standard args that may be trash if this is a tailcall. + if (compiler->getNeedsGSSecurityCookie()) + { + trashedByEpilog |= genRegMask(REG_GSCOOKIE_TMP_0); + trashedByEpilog |= genRegMask(REG_GSCOOKIE_TMP_1); + } + + for (unsigned i = 0; i < call->fgArgInfo->ArgCount(); i++) + { + fgArgTabEntry* entry = call->fgArgInfo->GetArgEntry(i); + for (unsigned j = 0; j < entry->numRegs; j++) + { + regNumber reg = entry->GetRegNum(j); + if ((trashedByEpilog & genRegMask(reg)) != 0) + { + JITDUMP("Tail call node:\n"); + DISPTREE(call); + JITDUMP("Register used: %s\n", getRegName(reg)); + assert(!"Argument to tailcall may be trashed by epilog"); + } + } + } + } +#endif // DEBUG + CORINFO_METHOD_HANDLE methHnd; + GenTree* target = getCallTarget(call, &methHnd); + if (target != nullptr) { // A call target can not be a contained indirection assert(!target->isContainedIndir()); - genConsumeReg(target); + // For fast tailcall we have already consumed the target. We ensure in + // RA that the target was allocated into a volatile register that will + // not be messed up by epilog sequence. + if (!call->IsFastTailCall()) + { + genConsumeReg(target); + } // We have already generated code for gtControlExpr evaluating it into a register. // We just need to emit "call reg" in this case. // assert(genIsValidIntReg(target->GetRegNum())); - genEmitCall(emitter::EC_INDIR_R, methHnd, - INDEBUG_LDISASM_COMMA(sigInfo) nullptr, // addr - retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), di, target->GetRegNum(), + // clang-format off + genEmitCall(emitter::EC_INDIR_R, + methHnd, + INDEBUG_LDISASM_COMMA(sigInfo) + nullptr, // addr + retSize + MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), + di, + target->GetRegNum(), call->IsFastTailCall()); - } - else if (call->IsR2ROrVirtualStubRelativeIndir()) - { - // Generate a direct call to a non-virtual user defined or helper method - assert(callType == CT_HELPER || callType == CT_USER_FUNC); -#ifdef FEATURE_READYTORUN_COMPILER - assert(((call->IsR2RRelativeIndir()) && (call->gtEntryPoint.accessType == IAT_PVALUE)) || - ((call->IsVirtualStubRelativeIndir()) && (call->gtEntryPoint.accessType == IAT_VALUE))); -#endif // FEATURE_READYTORUN_COMPILER - assert(call->gtControlExpr == nullptr); - assert(!call->IsTailCall()); - - regNumber tmpReg = call->GetSingleTempReg(); - GetEmitter()->emitIns_R_R(ins_Load(TYP_I_IMPL), emitActualTypeSize(TYP_I_IMPL), tmpReg, REG_R2R_INDIRECT_PARAM); - - // We have now generated code for gtControlExpr evaluating it into `tmpReg`. - // We just need to emit "call tmpReg" in this case. - // - assert(genIsValidIntReg(tmpReg)); - - genEmitCall(emitter::EC_INDIR_R, methHnd, - INDEBUG_LDISASM_COMMA(sigInfo) nullptr, // addr - retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), di, tmpReg, call->IsFastTailCall()); + // clang-format on } else { - // Generate a direct call to a non-virtual user defined or helper method - assert(callType == CT_HELPER || callType == CT_USER_FUNC); - - void* addr = nullptr; -#ifdef FEATURE_READYTORUN_COMPILER - if (call->gtEntryPoint.addr != NULL) + // If we have no target and this is a call with indirection cell then + // we do an optimization where we load the call address directly from + // the indirection cell instead of duplicating the tree. In BuildCall + // we ensure that get an extra register for the purpose. Note that for + // CFG the call might have changed to + // CORINFO_HELP_DISPATCH_INDIRECT_CALL in which case we still have the + // indirection cell but we should not try to optimize. + regNumber callThroughIndirReg = REG_NA; + if (!call->IsHelperCall(compiler, CORINFO_HELP_DISPATCH_INDIRECT_CALL)) { - assert(call->gtEntryPoint.accessType == IAT_VALUE); - addr = call->gtEntryPoint.addr; + callThroughIndirReg = getCallIndirectionCellReg(call); } - else -#endif // FEATURE_READYTORUN_COMPILER - if (callType == CT_HELPER) - { - CorInfoHelpFunc helperNum = compiler->eeGetHelperNum(methHnd); - noway_assert(helperNum != CORINFO_HELP_UNDEF); - void* pAddr = nullptr; - addr = compiler->compGetHelperFtn(helperNum, (void**)&pAddr); - assert(pAddr == nullptr); - } - else + if (callThroughIndirReg != REG_NA) { - // Direct call to a non-virtual user function. - addr = call->gtDirectCallAddress; - } + assert(call->IsR2ROrVirtualStubRelativeIndir()); + regNumber targetAddrReg = call->GetSingleTempReg(); + // For fast tailcalls we have already loaded the call target when processing the call node. + if (!call->IsFastTailCall()) + { + GetEmitter()->emitIns_R_R(ins_Load(TYP_I_IMPL), emitActualTypeSize(TYP_I_IMPL), targetAddrReg, + callThroughIndirReg); + } + else + { + // Register where we save call address in should not be overridden by epilog. + assert((targetAddrReg & (RBM_INT_CALLEE_TRASH & ~RBM_RA)) == targetAddrReg); + } - assert(addr != nullptr); + // We have now generated code loading the target address from the indirection cell into `targetAddrReg`. + // We just need to emit "bl targetAddrReg" in this case. + // + assert(genIsValidIntReg(targetAddrReg)); - // Non-virtual direct call to known addresses - { - genEmitCall(emitter::EC_FUNC_TOKEN, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr, - retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), di, REG_R21, + // clang-format off + genEmitCall(emitter::EC_INDIR_R, + methHnd, + INDEBUG_LDISASM_COMMA(sigInfo) + nullptr, // addr + retSize + MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), + di, + targetAddrReg, call->IsFastTailCall()); - } - } - - // if it was a pinvoke we may have needed to get the address of a label - if (genPendingCallLabel) - { - genDefineInlineTempLabel(genPendingCallLabel); - genPendingCallLabel = nullptr; - } - - // Update GC info: - // All Callee arg registers are trashed and no longer contain any GC pointers. - // TODO-Bug?: As a matter of fact shouldn't we be killing all of callee trashed regs here? - // For now we will assert that other than arg regs gc ref/byref set doesn't contain any other - // registers from RBM_CALLEE_TRASH - assert((gcInfo.gcRegGCrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0); - assert((gcInfo.gcRegByrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0); - gcInfo.gcRegGCrefSetCur &= ~RBM_ARG_REGS; - gcInfo.gcRegByrefSetCur &= ~RBM_ARG_REGS; - - var_types returnType = call->TypeGet(); - if (returnType != TYP_VOID) - { - regNumber returnReg; - - if (call->HasMultiRegRetVal()) - { - assert(pRetTypeDesc != nullptr); - unsigned regCount = pRetTypeDesc->GetReturnRegCount(); - - // If regs allocated to call node are different from ABI return - // regs in which the call has returned its result, move the result - // to regs allocated to call node. - for (unsigned i = 0; i < regCount; ++i) - { - var_types regType = pRetTypeDesc->GetReturnRegType(i); - returnReg = pRetTypeDesc->GetABIReturnReg(i); - regNumber allocatedReg = call->GetRegNumByIdx(i); - if (returnReg != allocatedReg) - { - inst_RV_RV(ins_Copy(regType), allocatedReg, returnReg, regType); - } - } + // clang-format on } else { - if (varTypeUsesFloatArgReg(returnType)) + // Generate a direct call to a non-virtual user defined or helper method + assert(call->gtCallType == CT_HELPER || call->gtCallType == CT_USER_FUNC); + + void* addr = nullptr; +#ifdef FEATURE_READYTORUN + if (call->gtEntryPoint.addr != NULL) { - returnReg = REG_FLOATRET; + assert(call->gtEntryPoint.accessType == IAT_VALUE); + addr = call->gtEntryPoint.addr; } else +#endif // FEATURE_READYTORUN + if (call->gtCallType == CT_HELPER) { - returnReg = REG_INTRET; - } + CorInfoHelpFunc helperNum = compiler->eeGetHelperNum(methHnd); + noway_assert(helperNum != CORINFO_HELP_UNDEF); - if (call->GetRegNum() != returnReg) + void* pAddr = nullptr; + addr = compiler->compGetHelperFtn(helperNum, (void**)&pAddr); + assert(pAddr == nullptr); + } + else { - { - inst_RV_RV(ins_Copy(returnType), call->GetRegNum(), returnReg, returnType); - } + // Direct call to a non-virtual user function. + addr = call->gtDirectCallAddress; } - } - genProduceReg(call); - } + assert(addr != nullptr); - // If there is nothing next, that means the result is thrown away, so this value is not live. - // However, for minopts or debuggable code, we keep it live to support managed return value debugging. - if ((call->gtNext == nullptr) && !compiler->opts.MinOpts() && !compiler->opts.compDbgCode) - { - gcInfo.gcMarkRegSetNpt(RBM_INTRET); + // clang-format off + genEmitCall(emitter::EC_FUNC_TOKEN, + methHnd, + INDEBUG_LDISASM_COMMA(sigInfo) + addr, + retSize + MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), + di, + REG_NA, + call->IsFastTailCall()); + // clang-format on + } } } diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp index 0c5d995953bdb..76416c597d82b 100644 --- a/src/coreclr/jit/lsraloongarch64.cpp +++ b/src/coreclr/jit/lsraloongarch64.cpp @@ -861,9 +861,10 @@ int LinearScan::BuildCall(GenTreeCall* call) // computed into a register. if (call->IsFastTailCall()) { - // Fast tail call - make sure that call target is always computed in T4(LOONGARCH64) - // so that epilog sequence can generate "jirl t4" to achieve fast tail call. - ctrlExprCandidates = RBM_FASTTAILCALL_TARGET; + // Fast tail call - make sure that call target is always computed in volatile registers + // that will not be overridden by epilog sequence. + ctrlExprCandidates = allRegs(TYP_INT) & RBM_INT_CALLEE_TRASH; + assert(ctrlExprCandidates != RBM_NONE); } } else if (call->IsR2ROrVirtualStubRelativeIndir()) diff --git a/src/coreclr/jit/targetloongarch64.h b/src/coreclr/jit/targetloongarch64.h index 8e74ea0334a9c..2106d3f2bc0b1 100644 --- a/src/coreclr/jit/targetloongarch64.h +++ b/src/coreclr/jit/targetloongarch64.h @@ -80,9 +80,6 @@ #define REG_DEFAULT_HELPER_CALL_TARGET REG_T2 #define RBM_DEFAULT_HELPER_CALL_TARGET RBM_T2 - #define REG_FASTTAILCALL_TARGET REG_T4 // Target register for fast tail call - #define RBM_FASTTAILCALL_TARGET RBM_T4 - #define RBM_ALLINT (RBM_INT_CALLEE_SAVED | RBM_INT_CALLEE_TRASH) #define RBM_ALLFLOAT (RBM_FLT_CALLEE_SAVED | RBM_FLT_CALLEE_TRASH) #define RBM_ALLDOUBLE RBM_ALLFLOAT From 20de75f0a7624ba3169d1764f6379a50898c69f1 Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Sat, 26 Feb 2022 20:30:37 +0800 Subject: [PATCH 31/46] [LoongArch64] temp commit for windows compiling error. --- src/coreclr/inc/palclr.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/coreclr/inc/palclr.h b/src/coreclr/inc/palclr.h index 2ab9c62c3e844..40fe2d1d3a2d1 100644 --- a/src/coreclr/inc/palclr.h +++ b/src/coreclr/inc/palclr.h @@ -606,4 +606,8 @@ #include "palclr_win.h" +#ifndef IMAGE_FILE_MACHINE_LOONGARCH64 +#define IMAGE_FILE_MACHINE_LOONGARCH64 0x6264 // LOONGARCH64. +#endif + #endif // defined(HOST_WINDOWS) From d86e6b466b37dc44a6fca39e1038fca71b525d8d Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Tue, 1 Mar 2022 10:30:27 +0800 Subject: [PATCH 32/46] [LoongArch64] amend format for reviewing. --- src/coreclr/jit/emit.h | 18 ++-- src/coreclr/jit/lowerloongarch64.cpp | 114 +------------------------- src/coreclr/jit/lsra.cpp | 8 ++ src/coreclr/jit/lsrabuild.cpp | 5 +- src/coreclr/jit/lsraloongarch64.cpp | 58 ++----------- src/coreclr/jit/registerloongarch64.h | 2 +- 6 files changed, 32 insertions(+), 173 deletions(-) diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 5008f79e680f9..126c74bdbefa7 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -607,9 +607,7 @@ class emitter static_assert_no_msg(IF_COUNT <= 128); insFormat _idInsFmt : 7; #elif defined(TARGET_LOONGARCH64) - unsigned _idCodeSize : 5; // the instruction(s) size of this instrDesc described. If not enough, please use the - // _idInsCount. - // unsigned _idInsCount : 5; // the instruction(s) count of this instrDesc described. + unsigned _idCodeSize : 5; // the instruction(s) size of this instrDesc described. #else static_assert_no_msg(IF_COUNT <= 256); insFormat _idInsFmt : 8; @@ -644,7 +642,7 @@ class emitter { } #else - insFormat idInsFmt() const + insFormat idInsFmt() const { return _idInsFmt; } @@ -1044,17 +1042,19 @@ class emitter #elif defined(TARGET_LOONGARCH64) unsigned idCodeSize() const { - return _idCodeSize; //_idInsCount; + return _idCodeSize; } void idCodeSize(unsigned sz) { - assert(sz < 32); + // LoongArch64's instrDesc is not always meaning only one instruction. + // e.g. the `emitter::emitIns_I_la` for emitting the immediates. + assert(sz <= 16); _idCodeSize = sz; } #endif // TARGET_LOONGARCH64 emitAttr idOpSize() - { // NOTE: not used for LOONGARCH64. + { return emitDecodeSize(_idOpSize); } void idOpSize(emitAttr opsz) @@ -1889,8 +1889,8 @@ class emitter #endif // !defined(HOST_64BIT) #ifdef TARGET_LOONGARCH64 - unsigned int emitCounts_INS_OPTS_J; // INS_OPTS_J -#endif // defined(TARGET_LOONGARCH64) + unsigned int emitCounts_INS_OPTS_J; +#endif // TARGET_LOONGARCH64 size_t emitIssue1Instr(insGroup* ig, instrDesc* id, BYTE** dp); size_t emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp); diff --git a/src/coreclr/jit/lowerloongarch64.cpp b/src/coreclr/jit/lowerloongarch64.cpp index 0054decb0b019..13d6cb3469dd9 100644 --- a/src/coreclr/jit/lowerloongarch64.cpp +++ b/src/coreclr/jit/lowerloongarch64.cpp @@ -827,61 +827,7 @@ void Lowering::ContainCheckBoundsChk(GenTreeBoundsChk* node) // void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode) { - assert(!"unimplemented on LOONGARCH yet"); -#if 0 - switch (simdNode->gtSIMDIntrinsicID) - { - GenTree* op1; - GenTree* op2; - - case SIMDIntrinsicInit: - op1 = simdNode->gtOp.gtOp1; - if (op1->IsIntegralConst(0)) - { - MakeSrcContained(simdNode, op1); - } - break; - - case SIMDIntrinsicInitArray: - // We have an array and an index, which may be contained. - CheckImmedAndMakeContained(simdNode, simdNode->gtGetOp2()); - break; - - case SIMDIntrinsicOpEquality: - case SIMDIntrinsicOpInEquality: - // TODO-LOONGARCH64-CQ Support containing 0 - break; - - case SIMDIntrinsicGetItem: - { - // This implements get_Item method. The sources are: - // - the source SIMD struct - // - index (which element to get) - // The result is baseType of SIMD struct. - op1 = simdNode->gtOp.gtOp1; - op2 = simdNode->gtOp.gtOp2; - - // If the index is a constant, mark it as contained. - if (op2->IsCnsIntOrI()) - { - MakeSrcContained(simdNode, op2); - } - - if (IsContainableMemoryOp(op1)) - { - MakeSrcContained(simdNode, op1); - if (op1->OperGet() == GT_IND) - { - op1->AsIndir()->Addr()->ClearContained(); - } - } - break; - } - - default: - break; - } -#endif + NYI_LOONGARCH64("-----unimplemented on LOONGARCH64 yet----"); } #endif // FEATURE_SIMD @@ -894,63 +840,7 @@ void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode) // void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) { - assert(!"unimplemented on LOONGARCH yet"); -#if 0 - GenTreeArgList* argList = nullptr; - GenTree* op1 = node->gtOp.gtOp1; - GenTree* op2 = node->gtOp.gtOp2; - - if (op1->OperIs(GT_LIST)) - { - argList = op1->AsArgList(); - op1 = argList->Current(); - op2 = argList->Rest()->Current(); - } - - switch (HWIntrinsicInfo::lookup(node->gtHWIntrinsicId).form) - { - case HWIntrinsicInfo::SimdExtractOp: - if (op2->IsCnsIntOrI()) - { - MakeSrcContained(node, op2); - } - break; - - case HWIntrinsicInfo::SimdInsertOp: - if (op2->IsCnsIntOrI()) - { - MakeSrcContained(node, op2); - -#if 0 - // This is currently not supported downstream. The following (at least) need to be modifed: - // GenTree::isContainableHWIntrinsic() needs to handle this. - // CodeGen::genConsumRegs() - // - GenTree* op3 = argList->Rest()->Rest()->Current(); - - // In the HW intrinsics C# API there is no direct way to specify a vector element to element mov - // VX[a] = VY[b] - // In C# this would naturally be expressed by - // Insert(VX, a, Extract(VY, b)) - // If both a & b are immediate constants contain the extract/getItem so that we can emit - // the single instruction mov Vx[a], Vy[b] - if (op3->OperIs(GT_HWIntrinsic) && (op3->AsHWIntrinsic()->gtHWIntrinsicId == NI_LOONGARCH64_SIMD_GetItem)) - { - ContainCheckHWIntrinsic(op3->AsHWIntrinsic()); - - if (op3->gtOp.gtOp2->isContained()) - { - MakeSrcContained(node, op3); - } - } -#endif - } - break; - - default: - break; - } -#endif + NYI_LOONGARCH64("-----unimplemented on LOONGARCH64 yet----"); } #endif // FEATURE_HW_INTRINSICS diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index bcb834ab3e0f9..6a42d72203b33 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -2663,10 +2663,18 @@ RegisterType LinearScan::getRegisterType(Interval* currentInterval, RefPosition* RegisterType regType = currentInterval->registerType; regMaskTP candidates = refPosition->registerAssignment; #ifdef TARGET_LOONGARCH64 + // The LoongArch64's ABI which the float args maybe passed by integer register + // when no float register left but free integer register. if ((candidates & allRegs(regType)) != RBM_NONE) + { return regType; + } else + { + assert((regType == TYP_DOUBLE) || (regType == TYP_FLOAT)); + assert((candidates & allRegs(TYP_I_IMPL)) != RBM_NONE); return TYP_I_IMPL; + } #else assert((candidates & allRegs(regType)) != RBM_NONE); return regType; diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index bba2b26ed5bba..0ae460498883d 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -593,9 +593,12 @@ RefPosition* LinearScan::newRefPosition(Interval* theInterval, regNumber physicalReg = genRegNumFromMask(mask); RefPosition* pos = newRefPosition(physicalReg, theLocation, RefTypeFixedReg, nullptr, mask); assert(theInterval != nullptr); -#ifndef TARGET_LOONGARCH64 +#ifdef TARGET_LOONGARCH64 // The LoongArch64's ABI which the float args maybe passed by integer register // when no float register left but free integer register. + assert((regType(theInterval->registerType) == FloatRegisterType) || + (allRegs(theInterval->registerType) & mask) != 0); +#else assert((allRegs(theInterval->registerType) & mask) != 0); #endif } diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp index 76416c597d82b..207c79864470e 100644 --- a/src/coreclr/jit/lsraloongarch64.cpp +++ b/src/coreclr/jit/lsraloongarch64.cpp @@ -45,7 +45,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX int LinearScan::BuildNode(GenTree* tree) { assert(!tree->isContained()); - int srcCount; + int srcCount = 0; int dstCount = 0; regMaskTP dstCandidates = RBM_NONE; regMaskTP killMask = RBM_NONE; @@ -264,7 +264,6 @@ int LinearScan::BuildNode(GenTree* tree) case GT_RSZ: case GT_ROR: srcCount = BuildBinaryUses(tree->AsOp()); - buildInternalRegisterUses(); assert(dstCount == 1); BuildDef(tree); break; @@ -368,43 +367,6 @@ int LinearScan::BuildNode(GenTree* tree) case GT_CMPXCHG: { NYI_LOONGARCH64("-----unimplemented on LOONGARCH64 yet----"); - - GenTreeCmpXchg* cmpXchgNode = tree->AsCmpXchg(); - srcCount = cmpXchgNode->gtOpComparand->isContained() ? 2 : 3; - assert(dstCount == 1); - - // if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics)) - { - // For LOONGARCH exclusives requires a single internal register - buildInternalIntRegisterDefForNode(tree); - } - - // For LOONGARCH exclusives the lifetime of the addr and data must be extended because - // it may be used multiple during retries - - // For LOONGARCH atomic cas the lifetime of the addr and data must be extended to prevent - // them being reused as the target register which must be destroyed early - - RefPosition* locationUse = BuildUse(tree->AsCmpXchg()->gtOpLocation); - setDelayFree(locationUse); - RefPosition* valueUse = BuildUse(tree->AsCmpXchg()->gtOpValue); - setDelayFree(valueUse); - if (!cmpXchgNode->gtOpComparand->isContained()) - { - RefPosition* comparandUse = BuildUse(tree->AsCmpXchg()->gtOpComparand); - - // For LOONGARCH exclusives the lifetime of the comparand must be extended because - // it may be used used multiple during retries - // if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics)) - { - setDelayFree(comparandUse); - } - } - - // Internals may not collide with target - setInternalRegsDelayFree = true; - buildInternalRegisterUses(); - BuildDef(tree); } break; @@ -415,7 +377,6 @@ int LinearScan::BuildNode(GenTree* tree) case GT_XCHG: { NYI_LOONGARCH64("-----unimplemented on LOONGARCH64 yet----"); - srcCount = 1; } break; @@ -502,8 +463,7 @@ int LinearScan::BuildNode(GenTree* tree) // Note: The Gentree node is not updated here as it is cheap to recompute stack aligned size. // This should also help in debugging as we can examine the original size specified with // localloc. - sizeVal = AlignUp(sizeVal, STACK_ALIGN); - size_t insCount = sizeVal / (REGSIZE_BYTES * 2); + sizeVal = AlignUp(sizeVal, STACK_ALIGN); // For small allocations up to 4 'st' instructions (i.e. 16 to 64 bytes of localloc) // TODO-LoongArch64: maybe use paird-load/store or SIMD in future. @@ -1067,6 +1027,7 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* argNode) { // We can use a ld/st sequence so we need two internal registers for LOONGARCH64. buildInternalIntRegisterDefForNode(argNode); + buildInternalIntRegisterDefForNode(argNode); if (putArgChild->OperGet() == GT_OBJ) { @@ -1155,16 +1116,13 @@ int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode) // Consume all the registers, setting the appropriate register mask for the ones that // go into registers. - for (unsigned regIndex = 0; regIndex < 1; regIndex++) + regMaskTP sourceMask = RBM_NONE; + if (sourceRegCount < argNode->gtNumRegs) { - regMaskTP sourceMask = RBM_NONE; - if (sourceRegCount < argNode->gtNumRegs) - { - sourceMask = genRegMask((regNumber)((unsigned)argReg + sourceRegCount)); - } - sourceRegCount++; - BuildUse(node, sourceMask, regIndex); + sourceMask = genRegMask((regNumber)((unsigned)argReg + sourceRegCount)); } + sourceRegCount++; + BuildUse(node, sourceMask, 0); } srcCount += sourceRegCount; assert(putArgChild->isContained()); diff --git a/src/coreclr/jit/registerloongarch64.h b/src/coreclr/jit/registerloongarch64.h index 2962798473df0..b58b7757b41d2 100644 --- a/src/coreclr/jit/registerloongarch64.h +++ b/src/coreclr/jit/registerloongarch64.h @@ -53,7 +53,7 @@ REGDEF(S8, 31, 0x80000000, "s8" ) // The `REG_R21` which alias `REG_X0` is specially reserved !!! // It can be used only by manully and should be very careful!!! // e.g. right now LoongArch64's backend-codegen/emit, there is usually -// needed a extra regitster for some case liking +// a need for an extra register for cases like // constructing a large imm or offset, saving some intermediate result // of the overflowing check and integer-comparing result. // Using the a specially reserved register maybe more efficient. From a235523cda2b7f5e5f43ce4995442dd1acaffe86 Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Fri, 4 Mar 2022 17:47:21 +0800 Subject: [PATCH 33/46] [LoongArch64] amend the coding for LA-ABI's flags. --- src/coreclr/jit/compiler.cpp | 2 +- src/coreclr/jit/importer.cpp | 2 +- src/coreclr/jit/lclvars.cpp | 2 +- src/coreclr/jit/morph.cpp | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 278f6466aca6a..15df62dadb9b6 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -937,7 +937,7 @@ var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, { uint32_t floatFieldFlags = info.compCompHnd->getLoongArch64PassStructInRegisterFlags(clsHnd); - if (floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_ONE) + if ((floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_ONE) != 0) { howToReturnStruct = SPK_PrimitiveType; useType = (structSize > 4) ? TYP_DOUBLE : TYP_FLOAT; diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index e8658621055bb..32d821142acbc 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -11389,7 +11389,7 @@ var_types Compiler::impGetByRefResultType(genTreeOps oper, bool fUnsigned, GenTr fUnsigned ? (uint32_t)op1->AsIntCon()->gtIconVal : op1->AsIntCon()->gtIconVal; op1->gtType = TYP_LONG; } - else if (op1->gtOper == GT_CNS_INT) + else *pOp1 = gtNewCastNode(TYP_I_IMPL, op1, fUnsigned, fUnsigned ? TYP_U_IMPL : TYP_I_IMPL); #else op1 = *pOp1 = gtNewCastNode(TYP_I_IMPL, op1, fUnsigned, fUnsigned ? TYP_U_IMPL : TYP_I_IMPL); diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 7cb1109f4afef..b15be80826c44 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -891,7 +891,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un { assert(varTypeIsStruct(argType)); int floatNum = 0; - if (floatFlags == STRUCT_FLOAT_FIELD_ONLY_ONE) + if ((floatFlags & STRUCT_FLOAT_FIELD_ONLY_ONE) != 0) { assert(argSize <= 8); assert(varDsc->lvExactSize <= argSize); diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 0e348b24befb5..e5c2a413aadb5 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -3518,7 +3518,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) else intArgRegNum += size; } - else if (floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_ONE) + else if ((floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_ONE) != 0) { structBaseType = structSize == 8 ? TYP_DOUBLE : TYP_FLOAT; fltArgRegNum += 1; From 9cc28ade034c215123b08f8eb0876e3d49338465 Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Tue, 15 Mar 2022 15:51:53 +0800 Subject: [PATCH 34/46] [LoongArch64] amend some missed CRs. --- src/coreclr/jit/codegenloongarch64.cpp | 276 ++++++++++++++++++++----- src/coreclr/jit/emit.h | 2 +- src/coreclr/jit/lclvars.cpp | 5 +- src/coreclr/jit/lsraloongarch64.cpp | 42 +++- src/coreclr/jit/morph.cpp | 111 ++-------- src/coreclr/jit/targetloongarch64.h | 2 +- 6 files changed, 281 insertions(+), 157 deletions(-) diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index 6ab13a7c9198b..07c876496da57 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -234,7 +234,9 @@ void CodeGen::genPrologSaveRegPair(regNumber reg1, instruction ins = INS_st_d; if (genIsValidFloatReg(reg1)) + { ins = INS_fst_d; + } if (spDelta != 0) { @@ -281,7 +283,9 @@ void CodeGen::genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNum instruction ins = INS_st_d; if (genIsValidFloatReg(reg1)) + { ins = INS_fst_d; + } if (spDelta != 0) { @@ -330,7 +334,9 @@ void CodeGen::genEpilogRestoreRegPair(regNumber reg1, instruction ins = INS_ld_d; if (genIsValidFloatReg(reg1)) + { ins = INS_fld_d; + } if (spDelta != 0) { @@ -378,7 +384,9 @@ void CodeGen::genEpilogRestoreReg(regNumber reg1, int spOffset, int spDelta, reg instruction ins = INS_ld_d; if (genIsValidFloatReg(reg1)) + { ins = INS_fld_d; + } if (spDelta != 0) { @@ -452,6 +460,7 @@ void CodeGen::genBuildRegPairsStack(regMaskTP regsMask, ArrayStack* reg } } } + if (!isPairSave) { regStack->Push(RegPair(reg1)); @@ -1034,7 +1043,9 @@ void CodeGen::genFuncletProlog(BasicBlock* block) #ifdef DEBUG if (compiler->opts.disAsm) + { printf("DEBUG: CodeGen::genFuncletProlog, frameType:%d\n\n", genFuncletInfo.fiFrameType); + } #endif int offset = 0; @@ -1166,7 +1177,9 @@ void CodeGen::genFuncletEpilog() { #ifdef DEBUG if (verbose) + { printf("*************** In genFuncletEpilog()\n"); + } #endif ScopedSetVariable _setGeneratingEpilog(&compiler->compGeneratingEpilog, true); @@ -1190,7 +1203,9 @@ void CodeGen::genFuncletEpilog() #ifdef DEBUG if (compiler->opts.disAsm) + { printf("DEBUG: CodeGen::genFuncletEpilog, frameType:%d\n\n", genFuncletInfo.fiFrameType); + } #endif regMaskTP regsToRestoreMask = maskRestoreRegsInt | maskRestoreRegsFloat; @@ -1292,7 +1307,9 @@ void CodeGen::genFuncletEpilog() void CodeGen::genCaptureFuncletPrologEpilogInfo() { if (!compiler->ehAnyFunclets()) + { return; + } assert(isFramePointerUsed()); @@ -1312,10 +1329,14 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() unsigned saveRegsPlusPSPSize; if (!IsSaveFpRaWithAllCalleeSavedRegisters()) + { saveRegsPlusPSPSize = roundUp((UINT)genTotalFrameSize(), STACK_ALIGN) - compiler->compLclFrameSize + PSPSize /* -2*8*/; + } else + { saveRegsPlusPSPSize = roundUp((UINT)genTotalFrameSize(), STACK_ALIGN) - compiler->compLclFrameSize + PSPSize; + } if (compiler->info.compIsVarArgs) { @@ -1413,7 +1434,9 @@ void CodeGen::genFnEpilog(BasicBlock* block) { #ifdef DEBUG if (verbose) + { printf("*************** In genFnEpilog()\n"); + } #endif // DEBUG ScopedSetVariable _setGeneratingEpilog(&compiler->compGeneratingEpilog, true); @@ -1424,7 +1447,9 @@ void CodeGen::genFnEpilog(BasicBlock* block) #ifdef DEBUG if (compiler->opts.dspCode) + { printf("\n__epilog:\n"); + } if (verbose) { @@ -2239,20 +2264,26 @@ void CodeGen::genSimpleReturn(GenTree* treeNode) if (varTypeUsesFloatArgReg(treeNode)) { if (attr == EA_4BYTE) + { GetEmitter()->emitIns_R_R(INS_fmov_s, attr, retReg, op1->GetRegNum()); + } else + { GetEmitter()->emitIns_R_R(INS_fmov_d, attr, retReg, op1->GetRegNum()); + } } else { - if (attr == EA_4BYTE) // && op1->OperIs(GT_LCL_VAR) && - // (emitActualTypeSize(compiler->lvaTable[op1->AsLclVar()->GetLclNum()].lvType) == - // EA_8BYTE)) + if (attr == EA_4BYTE) { if (treeNode->gtFlags & GTF_UNSIGNED) + { GetEmitter()->emitIns_R_R_I_I(INS_bstrpick_d, EA_PTRSIZE, retReg, op1->GetRegNum(), 31, 0); + } else + { GetEmitter()->emitIns_R_R_I(INS_slli_w, attr, retReg, op1->GetRegNum(), 0); + } } else GetEmitter()->emitIns_R_R_I(INS_ori, attr, retReg, op1->GetRegNum(), 0); @@ -2368,26 +2399,29 @@ void CodeGen::genLclHeap(GenTree* tree) static_assert_no_msg(STACK_ALIGN == (REGSIZE_BYTES * 2)); assert(amount % (REGSIZE_BYTES * 2) == 0); // stp stores two registers at a time size_t stpCount = amount / (REGSIZE_BYTES * 2); - if (stpCount <= 4) + if (compiler->info.compInitMem) { - imm = -16 * stpCount; - emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, imm); - - imm = -imm; - while (stpCount != 0) + if (stpCount <= 4) { - imm -= 8; - emit->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, REG_SPBASE, imm); - imm -= 8; - emit->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, REG_SPBASE, imm); - stpCount -= 1; - } + imm = -16 * stpCount; + emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, imm); - lastTouchDelta = 0; + imm = -imm; + while (stpCount != 0) + { + imm -= 8; + emit->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, REG_SPBASE, imm); + imm -= 8; + emit->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, REG_SPBASE, imm); + stpCount -= 1; + } - goto ALLOC_DONE; + lastTouchDelta = 0; + + goto ALLOC_DONE; + } } - else if (!compiler->info.compInitMem && (amount < compiler->eeGetPageSize())) // must be < not <= + else if (amount < compiler->eeGetPageSize()) // must be < not <= { // Since the size is less than a page, simply adjust the SP value. // The SP might already be in the guard page, so we must touch it BEFORE @@ -2398,14 +2432,14 @@ void CodeGen::genLclHeap(GenTree* tree) lastTouchDelta = amount; imm = -(ssize_t)amount; - assert(-8192 <= imm && imm < 0); - if (-2048 <= imm && imm < 0) + if (isValidSimm12(imm)) + { emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, imm); + } else { - emit->emitIns_R_R_I(INS_srai_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, 3); - emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, imm >> 3); - emit->emitIns_R_R_I(INS_slli_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, 3); + emit->emitIns_I_la(EA_PTRSIZE, rsGetRsvdReg(), amount); + emit->emitIns_R_R_R(INS_sub_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, rsGetRsvdReg()); } goto ALLOC_DONE; @@ -2749,7 +2783,9 @@ void CodeGen::genCodeForDivMod(GenTreeOp* tree) ins = INS_div_w; } else + { ins = INS_mod_w; + } } else { @@ -2758,7 +2794,9 @@ void CodeGen::genCodeForDivMod(GenTreeOp* tree) ins = INS_div_d; } else + { ins = INS_mod_d; + } } emit->emitIns_R_R_R(ins, size, tree->GetRegNum(), Reg1, divisorReg); @@ -2786,7 +2824,9 @@ void CodeGen::genCodeForDivMod(GenTreeOp* tree) ins = INS_div_wu; } else + { ins = INS_mod_wu; + } // TODO-LOONGARCH64: here is just for signed-extension ? emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, Reg1, Reg1, 0); @@ -2799,7 +2839,9 @@ void CodeGen::genCodeForDivMod(GenTreeOp* tree) ins = INS_div_du; } else + { ins = INS_mod_du; + } } emit->emitIns_R_R_R(ins, size, tree->GetRegNum(), Reg1, divisorReg); @@ -3188,15 +3230,12 @@ void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode) static inline bool isImmed(GenTree* treeNode) { - if (treeNode->gtGetOp1()->isContainedIntOrIImmed()) + assert(treeNode->OperIsBinary()); + + if (treeNode->gtGetOp2()->isContainedIntOrIImmed()) { return true; } - else if (treeNode->OperIsBinary()) - { - if (treeNode->gtGetOp2()->isContainedIntOrIImmed()) - return true; - } return false; } @@ -3218,33 +3257,53 @@ instruction CodeGen::genGetInsForOper(GenTree* treeNode) { case GT_ADD: if (attr == EA_4BYTE) + { ins = INS_fadd_s; + } else + { ins = INS_fadd_d; + } break; case GT_SUB: if (attr == EA_4BYTE) + { ins = INS_fsub_s; + } else + { ins = INS_fsub_d; + } break; case GT_MUL: if (attr == EA_4BYTE) + { ins = INS_fmul_s; + } else + { ins = INS_fmul_d; + } break; case GT_DIV: if (attr == EA_4BYTE) + { ins = INS_fdiv_s; + } else + { ins = INS_fdiv_d; + } break; case GT_NEG: if (attr == EA_4BYTE) + { ins = INS_fneg_s; + } else + { ins = INS_fneg_d; + } break; default: @@ -3286,7 +3345,6 @@ instruction CodeGen::genGetInsForOper(GenTree* treeNode) break; case GT_SUB: - isImm = isImmed(treeNode); if ((attr == EA_8BYTE) || (attr == EA_BYREF)) { ins = INS_sub_d; @@ -3410,16 +3468,24 @@ instruction CodeGen::genGetInsForOper(GenTree* treeNode) { // it's better to check sa. if (attr == EA_4BYTE) + { ins = INS_slli_w; + } else + { ins = INS_slli_d; + } } else { if (attr == EA_4BYTE) + { ins = INS_sll_w; + } else + { ins = INS_sll_d; + } } break; @@ -3429,16 +3495,24 @@ instruction CodeGen::genGetInsForOper(GenTree* treeNode) { // it's better to check sa. if (attr == EA_4BYTE) + { ins = INS_srli_w; + } else + { ins = INS_srli_d; + } } else { if (attr == EA_4BYTE) + { ins = INS_srl_w; + } else + { ins = INS_srl_d; + } } break; @@ -3448,16 +3522,24 @@ instruction CodeGen::genGetInsForOper(GenTree* treeNode) { // it's better to check sa. if (attr == EA_4BYTE) + { ins = INS_srai_w; + } else + { ins = INS_srai_d; + } } else { if (attr == EA_4BYTE) + { ins = INS_sra_w; + } else + { ins = INS_sra_d; + } } break; @@ -3467,16 +3549,24 @@ instruction CodeGen::genGetInsForOper(GenTree* treeNode) { // it's better to check sa. if (attr == EA_4BYTE) + { ins = INS_rotri_w; + } else + { ins = INS_rotri_d; + } } else { if (attr == EA_4BYTE) + { ins = INS_rotr_w; + } else + { ins = INS_rotr_d; + } } break; @@ -3874,17 +3964,6 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode) } } - //{ - // GetEmitter()->emitIns_R_R(INS_movgr2fr_d, EA_8BYTE, tmpReg, REG_R0); - - // GetEmitter()->emitIns_R_R_I(srcType == TYP_DOUBLE ? INS_c_olt_d : INS_c_olt_s, EA_8BYTE, op1->GetRegNum(), - // tmpReg, 2); - // GetEmitter()->emitIns_I_I(INS_bc1f, EA_PTRSIZE, 2, 4 << 2); - - // GetEmitter()->emitIns_R_R_I(INS_ori*/, EA_PTRSIZE, treeNode->GetRegNum(), REG_R0, 0); - // GetEmitter()->emitIns_I(INS_b, EA_PTRSIZE, srcType == TYP_DOUBLE ? 14 << 2 : 13 << 2); - //} - if (srcType == TYP_DOUBLE) GetEmitter()->emitIns_R_R_I(INS_lu52i_d, EA_8BYTE, REG_R21, REG_R0, imm >> 8); else @@ -4017,44 +4096,68 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree) if (IsUnordered) { if (tree->OperIs(GT_LT)) + { emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cult_s : INS_fcmp_cult_d, cmpSize, op1->GetRegNum(), op2->GetRegNum(), 1 /*cc*/); + } else if (tree->OperIs(GT_LE)) + { emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cule_s : INS_fcmp_cule_d, cmpSize, op1->GetRegNum(), op2->GetRegNum(), 1 /*cc*/); + } else if (tree->OperIs(GT_EQ)) + { emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cueq_s : INS_fcmp_cueq_d, cmpSize, op1->GetRegNum(), op2->GetRegNum(), 1 /*cc*/); + } else if (tree->OperIs(GT_NE)) + { emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cune_s : INS_fcmp_cune_d, cmpSize, op1->GetRegNum(), op2->GetRegNum(), 1 /*cc*/); + } else if (tree->OperIs(GT_GT)) + { emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cult_s : INS_fcmp_cult_d, cmpSize, op2->GetRegNum(), op1->GetRegNum(), 1 /*cc*/); + } else if (tree->OperIs(GT_GE)) + { emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cule_s : INS_fcmp_cule_d, cmpSize, op2->GetRegNum(), op1->GetRegNum(), 1 /*cc*/); + } } else { if (tree->OperIs(GT_LT)) + { emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_clt_s : INS_fcmp_clt_d, cmpSize, op1->GetRegNum(), op2->GetRegNum(), 1 /*cc*/); + } else if (tree->OperIs(GT_LE)) + { emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cle_s : INS_fcmp_cle_d, cmpSize, op1->GetRegNum(), op2->GetRegNum(), 1 /*cc*/); + } else if (tree->OperIs(GT_EQ)) + { emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_ceq_s : INS_fcmp_ceq_d, cmpSize, op1->GetRegNum(), op2->GetRegNum(), 1 /*cc*/); + } else if (tree->OperIs(GT_NE)) + { emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cne_s : INS_fcmp_cne_d, cmpSize, op1->GetRegNum(), op2->GetRegNum(), 1 /*cc*/); + } else if (tree->OperIs(GT_GT)) + { emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_clt_s : INS_fcmp_clt_d, cmpSize, op2->GetRegNum(), op1->GetRegNum(), 1 /*cc*/); + } else if (tree->OperIs(GT_GE)) + { emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cle_s : INS_fcmp_cle_d, cmpSize, op2->GetRegNum(), op1->GetRegNum(), 1 /*cc*/); + } } emit->emitIns_R_R(INS_mov, EA_PTRSIZE, targetReg, REG_R0); @@ -4544,7 +4647,9 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue) } if (IsEq && (ins != INS_invalid)) + { emit->emitIns_J(ins, compiler->compCurBB->bbJumpDest, 0); // 5-bits; + } else if (ins != INS_invalid) { jtrue->gtOp2 = (GenTree*)(uint64_t)SaveCcResultReg; @@ -4592,9 +4697,13 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue) { case EA_4BYTE: if (IsUnsigned || ((op2->gtFlags | op1->gtFlags) & GTF_UNSIGNED)) + { imm = static_cast(imm); + } else + { imm = static_cast(imm); + } break; case EA_8BYTE: break; @@ -4719,7 +4828,9 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue) } if (IsEq) + { emit->emitIns_J(ins, compiler->compCurBB->bbJumpDest, SaveCcResultReg); // 5-bits; + } else { jtrue->gtOp2 = (GenTree*)(uint64_t)SaveCcResultReg; @@ -5303,13 +5414,14 @@ void CodeGen::genStackPointerConstantAdjustment(ssize_t spDelta, regNumber regTm // function that does a probe, which will in turn call this function. assert((target_size_t)(-spDelta) <= compiler->eeGetPageSize()); - if (-2048 <= spDelta && spDelta < 0) + if (isValidSimm12(spDelta)) + { GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, spDelta); + } else { - GetEmitter()->emitIns_R_R_I(INS_srai_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, 3); - GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, spDelta >> 3); - GetEmitter()->emitIns_R_R_I(INS_slli_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, 3); + GetEmitter()->emitIns_I_la(EA_PTRSIZE, REG_R21, spDelta); + GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, REG_R21); } } @@ -5450,7 +5562,9 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) case GT_CNS_INT: if ((targetType == TYP_DOUBLE) || (targetType == TYP_FLOAT)) + { treeNode->gtOper = GT_CNS_DBL; + } FALLTHROUGH; case GT_CNS_DBL: genSetRegToConst(targetReg, targetType, treeNode); @@ -5590,14 +5704,18 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) genCodeForCompare(treeNode->AsOp()); } else if (!treeNode->gtNext) + { genCodeForJumpTrue(treeNode->AsOp()); + } else if (!treeNode->gtNext->OperIs(GT_JTRUE)) { GenTree* treeNode_next = treeNode->gtNext; while (treeNode_next) { if (treeNode_next->OperIs(GT_JTRUE)) + { break; + } treeNode_next = treeNode_next->gtNext; }; assert(treeNode_next->OperIs(GT_JTRUE)); @@ -5853,7 +5971,9 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg) // Make sure that the return register is reported as live GC-ref so that any GC that kicks in while // executing GS cookie check will not collect the object pointed to by REG_INTRET (A0). if (!pushReg && (compiler->info.compRetNativeType == TYP_REF)) + { gcInfo.gcRegGCrefSetCur |= RBM_INTRET; + } // We need two temporary registers, to load the GS cookie values and compare them. We can't use // any argument registers if 'pushReg' is true (meaning we have a JMP call). They should be @@ -6227,16 +6347,18 @@ void CodeGen::genPutArgReg(GenTreeOp* tree) if (targetReg != op1->GetRegNum()) { if (emitter::isFloatReg(targetReg) == emitter::isFloatReg(op1->GetRegNum())) + { inst_RV_RV(ins_Copy(targetType), targetReg, op1->GetRegNum(), targetType); -#if 1 + } else if (emitter::isFloatReg(targetReg)) + { GetEmitter()->emitIns_R_R(INS_movgr2fr_d, EA_8BYTE, targetReg, op1->GetRegNum()); - else // if (!emitter::isFloatReg(targetReg)) + } + else { assert(!emitter::isFloatReg(targetReg)); GetEmitter()->emitIns_R_R(INS_movfr2gr_d, EA_8BYTE, targetReg, op1->GetRegNum()); } -#endif } genProduceReg(tree); } @@ -6925,7 +7047,7 @@ void CodeGen::genCodeForIndexAddr(GenTreeIndexAddr* node) // } // // sltu REG_R21, index, REG_R21 - // bne REG_21, zero, RngChkExit + // bne REG_R21, zero, RngChkExit // IndRngFail: // ... // RngChkExit: @@ -7268,7 +7390,9 @@ void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst if (base->OperIsLocalAddr()) { if (base->gtOper == GT_LCL_FLD_ADDR) + { offset += base->AsLclFld()->GetLclOffs(); + } emit->emitIns_R_S(ins, size, dst, base->AsLclVarCommon()->GetLclNum(), offset); } else @@ -7294,7 +7418,9 @@ void CodeGen::genCall(GenTreeCall* call) argNode = argNode->gtSkipReloadOrCopy(); if (curArgTabEntry->GetRegNum() == REG_STK) + { continue; + } // Deal with multi register passed struct args. if (argNode->OperGet() == GT_FIELD_LIST) @@ -7711,7 +7837,9 @@ void CodeGen::genJmpMethod(GenTree* jmp) // If we need to generate a tail call profiler hook, then spill all // arg regs to free them up for the callback. if (!compiler->compIsProfilerHookNeeded() && (varDsc->GetRegNum() == varDsc->GetArgReg())) + { continue; + } } else if (varDsc->GetRegNum() == REG_STK) { @@ -7763,7 +7891,9 @@ void CodeGen::genJmpMethod(GenTree* jmp) // Skip if arg not passed in a register. if (!varDsc->lvIsRegArg) + { continue; + } // Register argument noway_assert(isRegParamType(genActualType(varDsc->TypeGet()))); @@ -8057,9 +8187,13 @@ void CodeGen::genIntToIntCast(GenTreeCast* cast) default: assert(desc.ExtendKind() == GenIntCastDesc::COPY); if (srcType == TYP_INT) + { emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, dstReg, srcReg, 0); + } else + { emit->emitIns_R_R_I(INS_ori, EA_PTRSIZE, dstReg, srcReg, 0); + } break; } } @@ -8161,7 +8295,9 @@ void CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, if (compiler->info.compFlags & CORINFO_FLG_SYNCH) { if (!(compiler->info.compFlags & CORINFO_FLG_STATIC)) + { preservedAreaSize += REGSIZE_BYTES; + } preservedAreaSize += 1; // bool for synchronized methods } @@ -8582,15 +8718,26 @@ inline void CodeGen::genJumpToThrowHlpBlk_la( // maybe optimize // ins = (instruction)(ins^((ins != INS_beq)+(ins != INS_bne))); if (ins == INS_blt) + { ins = INS_bge; + } else if (ins == INS_bltu) + { ins = INS_bgeu; + } else if (ins == INS_bge) + { ins = INS_blt; + } else if (ins == INS_bgeu) + { ins = INS_bltu; + } else + { ins = ins == INS_beq ? INS_bne : INS_beq; + } + if (addr == nullptr) { callType = emitter::EC_INDIR_R; @@ -8626,7 +8773,9 @@ inline void CodeGen::genJumpToThrowHlpBlk_la( ssize_t imm = 5 << 2; if (compiler->opts.compReloc) + { imm = 3 << 2; + } emit->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, reg2, imm); } @@ -8714,7 +8863,9 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe // On LA we push the FP (frame-pointer) here along with all other callee saved registers if (isFramePointerUsed()) + { rsPushRegs |= RBM_FPBASE; + } // // It may be possible to skip pushing/popping ra for leaf methods. However, such optimization would require @@ -9016,7 +9167,9 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe #ifdef DEBUG if (compiler->opts.disAsm) + { printf("DEBUG: LOONGARCH64, frameType:%d\n\n", frameType); + } #endif if (frameType == 1) { @@ -9181,10 +9334,7 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) else { outSzAligned = compiler->lvaOutgoingArgSpaceSize & ~0xf; - // if (outSzAligned > 0) - { - genStackPointerAdjustment(outSzAligned, REG_R21, nullptr, /* reportUnwindData */ true); - } + genStackPointerAdjustment(outSzAligned, REG_R21, nullptr, /* reportUnwindData */ true); } regsToRestoreMask &= ~(RBM_FP | RBM_RA); // We'll restore FP/RA at the end. @@ -9360,7 +9510,9 @@ void CodeGen::genFnPrologCalleeRegArgs() psiMoveToReg(varNum); #endif // USING_SCOPE_INFO if (!varDsc->lvLiveInOutOfHndlr) + { continue; + } } // When we have a promoted struct we have two possible LclVars that can represent the incoming argument @@ -9382,13 +9534,17 @@ void CodeGen::genFnPrologCalleeRegArgs() { storeType = varDsc->lvIs4Field1 ? TYP_FLOAT : TYP_DOUBLE; } - else // if (emitter::isGeneralRegister(varDsc->GetArgReg())) + else { assert(emitter::isGeneralRegister(varDsc->GetArgReg())); if (varDsc->lvIs4Field1) + { storeType = TYP_INT; + } else + { storeType = varDsc->GetLayout()->GetGCPtrType(0); + } } slotSize = (unsigned)emitActualTypeSize(storeType); @@ -9416,7 +9572,9 @@ void CodeGen::genFnPrologCalleeRegArgs() noway_assert(varDsc->lvRefCnt() == 0); regArgMaskLive &= ~genRegMask(varDsc->GetArgReg()); if (varDsc->GetOtherArgReg() < REG_STK) + { regArgMaskLive &= ~genRegMask(varDsc->GetOtherArgReg()); + } } else { @@ -9464,13 +9622,19 @@ void CodeGen::genFnPrologCalleeRegArgs() { baseOffset = (int)EA_SIZE(slotSize); if (varDsc->lvIs4Field2) + { storeType = TYP_INT; + } else + { storeType = varDsc->GetLayout()->GetGCPtrType(1); - size = emitActualTypeSize(storeType); + } + size = emitActualTypeSize(storeType); if (baseOffset < (int)EA_SIZE(size)) + { baseOffset = (int)EA_SIZE(size); - srcRegNum = varDsc->GetOtherArgReg(); + } + srcRegNum = varDsc->GetOtherArgReg(); } if (srcRegNum == varDsc->GetOtherArgReg()) diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 126c74bdbefa7..28d52d5e0bd38 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -597,7 +597,7 @@ class emitter // TODO-LoongArch64: not include SIMD-vector. static_assert_no_msg(INS_count <= 512); instruction _idIns : 9; -#else // !(defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)) +#else static_assert_no_msg(INS_count <= 256); instruction _idIns : 8; #endif // !(defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)) diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index c0b9c1e0cce58..c4d14d2f2006d 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -1048,14 +1048,12 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un { firstAllocatedRegArgNum = varDscInfo->allocRegArg(arg2Type, 1); varDsc->SetOtherArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, arg2Type)); - varDsc->lvIs4Field2 = (int)emitActualTypeSize(arg2Type) == 4 ? 1 : 0; - varDscInfo->hasMultiSlotStruct = true; + varDsc->lvIs4Field2 = (int)emitActualTypeSize(arg2Type) == 4 ? 1 : 0; } else if (cSlots > 1) { varDsc->lvIsSplit = 1; varDsc->SetOtherArgReg(REG_STK); - varDscInfo->hasMultiSlotStruct = true; varDscInfo->setAllRegArgUsed(arg1Type); varDscInfo->stackArgSize += TARGET_POINTER_SIZE; } @@ -1066,7 +1064,6 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un if (cSlots == 2) { varDsc->SetOtherArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum + 1, TYP_I_IMPL)); - varDscInfo->hasMultiSlotStruct = true; } } } diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp index 207c79864470e..fa2a28179dd48 100644 --- a/src/coreclr/jit/lsraloongarch64.cpp +++ b/src/coreclr/jit/lsraloongarch64.cpp @@ -1189,7 +1189,24 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) switch (blkNode->gtBlkOpKind) { case GenTreeBlk::BlkOpKindUnroll: - break; + { + if (dstAddr->isContained()) + { + // Since the dstAddr is contained the address will be computed in CodeGen. + // This might require an integer register to store the value. + buildInternalIntRegisterDefForNode(blkNode); + } + + const bool isDstRegAddrAlignmentKnown = dstAddr->OperIsLocalAddr(); + + if (isDstRegAddrAlignmentKnown && (size > FP_REGSIZE_BYTES)) + { + // TODO-LoongArch64: For larger block sizes CodeGen can choose to use 16-byte SIMD instructions. + // here just used a temp register. + buildInternalFloatRegisterDefForNode(blkNode); + } + } + break; case GenTreeBlk::BlkOpKindHelper: assert(!src->isContained()); @@ -1222,7 +1239,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) if (size >= 2 * REGSIZE_BYTES) { // TODO-LoongArch64: We will use ld/st paired to reduce code size and improve performance - // so we need to reserve an extra internal register + // so we need to reserve an extra internal register. buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates); } @@ -1243,8 +1260,27 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) switch (blkNode->gtBlkOpKind) { case GenTreeBlk::BlkOpKindUnroll: + { buildInternalIntRegisterDefForNode(blkNode); - break; + + const bool isSrcAddrLocal = src->OperIs(GT_LCL_VAR, GT_LCL_FLD) || + ((srcAddrOrFill != nullptr) && srcAddrOrFill->OperIsLocalAddr()); + const bool isDstAddrLocal = dstAddr->OperIsLocalAddr(); + + // TODO-LoongArch64: using 16-byte SIMD instructions. + const bool srcAddrMayNeedReg = + isSrcAddrLocal || ((srcAddrOrFill != nullptr) && srcAddrOrFill->isContained()); + const bool dstAddrMayNeedReg = isDstAddrLocal || dstAddr->isContained(); + + // The following allocates an additional integer register in a case + // when a load instruction and a store instruction cannot be encoded using offset + // from a corresponding base register. + if (srcAddrMayNeedReg && dstAddrMayNeedReg) + { + buildInternalIntRegisterDefForNode(blkNode); + } + } + break; case GenTreeBlk::BlkOpKindHelper: dstAddrRegMask = RBM_ARG_0; diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 6b89ad3c91c38..b95293dcacf7c 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -214,8 +214,8 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) { if (!tree->gtOverflow()) { -#if defined(TARGET_ARM64) || \ - defined(TARGET_LOONGARCH64) // On ARM64 All non-overflow checking conversions can be optimized +// ARM64 and LoongArch64 optimize all non-overflow checking conversions +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) return nullptr; #else switch (dstType) @@ -243,7 +243,7 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) default: unreached(); } -#endif // TARGET_ARM64 +#endif // TARGET_ARM64 || TARGET_LOONGARCH64 } else { @@ -938,7 +938,6 @@ fgArgTabEntry* fgArgInfo::AddRegArg(unsigned argNum, if (numRegs == 2) { curArgTabEntry->setRegNum(1, otherRegNum); - // curArgTabEntry->isSplit = true; } return curArgTabEntry; @@ -2038,7 +2037,12 @@ void fgArgInfo::EvalArgsToTemps() { setupArg = compiler->fgMorphCopyBlock(setupArg); #if defined(TARGET_ARMARCH) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_LOONGARCH64) + // For LoongArch64, the struct {float a; float b;} passed by float-registers. + if ((lclVarType == TYP_STRUCT) && (curArgTabEntry->numRegs == 1)) +#else if (lclVarType == TYP_STRUCT) +#endif { // This scalar LclVar widening step is only performed for ARM architectures. // @@ -3016,7 +3020,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) eeGetSystemVAmd64PassStructInRegisterDescriptor(objClass, &structDesc); } #else // !UNIX_AMD64_ABI - size = 1; // On AMD64 Windows, all args fit in a single (64-bit) 'slot' + size = 1; // On AMD64 Windows, all args fit in a single (64-bit) 'slot' if (!isStructArg) { byteSize = genTypeSize(argx); @@ -3307,10 +3311,10 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) } } - assert(!isHfaArg); // LOONGARCH not support HFA. + assert(!isHfaArg); // LoongArch64 does not support HFA. } - // if run out the fp argument register, try the int argument register. + // if we run out of floating-point argument registers, try the int argument registers. if (!isRegArg) { // Check if the last register needed is still in the int argument register range. @@ -3320,7 +3324,6 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) // Did we run out of registers when we had a 16-byte struct (size===2) ? // (i.e we only have one register remaining but we needed two registers to pass this arg) - // This prevents us from backfilling a subsequent arg into x7 // if (!isRegArg && (size > 1)) { @@ -3329,7 +3332,6 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) // isRegArg = intArgRegNum < maxRegArgs; // the split-struct case. nextOtherRegNum = REG_STK; - // assert((intArgRegNum + 1) == maxRegArgs); } } #else // not TARGET_ARM or TARGET_ARM64 or TARGET_LOONGARCH64 @@ -3915,12 +3917,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) } } #endif // UNIX_AMD64_ABI -#elif defined(TARGET_ARM64) - if ((passingSize != structSize) && (lclVar == nullptr)) - { - copyBlkClass = objClass; - } -#elif defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) if ((passingSize != structSize) && (lclVar == nullptr)) { copyBlkClass = objClass; @@ -4136,12 +4133,8 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) #if FEATURE_MULTIREG_ARGS if (isStructArg) { -#if defined(TARGET_LOONGARCH64) - if ((argEntry->numRegs + argEntry->GetStackSlotsNumber()) > 1) -#else if (((argEntry->numRegs + argEntry->GetStackSlotsNumber()) > 1) || (isHfaArg && argx->TypeGet() == TYP_STRUCT)) -#endif { hasMultiregStructArgs = true; } @@ -4373,28 +4366,6 @@ void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call) if ((size > 1) || (fgEntryPtr->IsHfaArg() && argx->TypeGet() == TYP_STRUCT)) { foundStructArg = true; -#if defined(TARGET_LOONGARCH64) - if (!argx->OperIs(GT_FIELD_LIST)) - { - GenTree* newArgx = fgMorphMultiregStructArg(argx, fgEntryPtr); - - // Did we replace 'argx' with a new tree? - if (newArgx != argx) - { - // link the new arg node into either the late arg list or the gtCallArgs list - if (isLateArg) - { - lateUse->SetNode(newArgx); - } - else - { - use.SetNode(newArgx); - } - - assert(fgEntryPtr->GetNode() == newArgx); - } - } -#else if (varTypeIsStruct(argx) && !argx->OperIs(GT_FIELD_LIST)) { if (fgEntryPtr->IsHfaRegArg()) @@ -4444,7 +4415,6 @@ void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call) assert(fgEntryPtr->GetNode() == newArgx); } } -#endif } } @@ -4479,13 +4449,10 @@ void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call) // this also forces the struct to be stack allocated into the local frame. // For the GT_OBJ case will clone the address expression and generate two (or more) // indirections. -// Currently the implementation handles ARM64/ARM and will NYI for other architectures. // GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntryPtr) { -#if !defined(TARGET_LOONGARCH64) assert(varTypeIsStruct(arg->TypeGet())); -#endif #if !defined(TARGET_ARMARCH) && !defined(UNIX_AMD64_ABI) && !defined(TARGET_LOONGARCH64) NYI("fgMorphMultiregStructArg requires implementation for this target"); @@ -4536,40 +4503,11 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry } #if FEATURE_MULTIREG_ARGS -// Examine 'arg' and setup argValue objClass and structSize -// -#if defined(TARGET_LOONGARCH64) - const CORINFO_CLASS_HANDLE objClass = gtGetStructHandleIfPresent(arg); - if (objClass == NO_CLASS_HANDLE) - { - assert(arg->TypeGet() != TYP_STRUCT); - assert(arg->OperGet() == GT_LCL_FLD); - assert(fgEntryPtr->numRegs == 2); - - GenTreeLclVarCommon* varNode = arg->AsLclVarCommon(); - unsigned varNum = varNode->GetLclNum(); - assert(varNum < lvaCount); - LclVarDsc* varDsc = &lvaTable[varNum]; - assert(varDsc->lvExactSize == 8); - - unsigned offset = arg->AsLclVarCommon()->GetLclOffs(); - GenTreeFieldList* newArg = nullptr; - var_types tmp_type = fgEntryPtr->isPassedInFloatRegisters() ? TYP_FLOAT : TYP_INT; - arg->gtType = tmp_type; - - newArg = new (this, GT_FIELD_LIST) GenTreeFieldList(); - newArg->AddField(this, arg, offset, tmp_type); - tmp_type = isValidFloatArgReg(fgEntryPtr->GetOtherRegNum()) ? TYP_FLOAT : TYP_INT; - GenTree* nextLclFld = gtNewLclFldNode(varNum, tmp_type, offset + 4); - newArg->AddField(this, nextLclFld, offset + 4, tmp_type); - - return newArg; - } -#else - const CORINFO_CLASS_HANDLE objClass = gtGetStructHandle(arg); -#endif - GenTree* argValue = arg; // normally argValue will be arg, but see right below - unsigned structSize = 0; + // Examine 'arg' and setup argValue objClass and structSize + // + const CORINFO_CLASS_HANDLE objClass = gtGetStructHandle(arg); + GenTree* argValue = arg; // normally argValue will be arg, but see right below + unsigned structSize = 0; if (arg->TypeGet() != TYP_STRUCT) { @@ -4724,7 +4662,6 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry #endif // DEBUG #ifndef UNIX_AMD64_ABI -#if !defined(TARGET_LOONGARCH64) // This local variable must match the layout of the 'objClass' type exactly if (varDsc->lvIsHfa() && fgEntryPtr->isPassedInFloatRegisters()) { @@ -4740,7 +4677,6 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry } } else -#endif { #if defined(TARGET_ARM64) // We must have a 16-byte struct (non-HFA) @@ -4791,7 +4727,7 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry } else #endif // !UNIX_AMD64_ABI -#if defined(TARGET_ARM64) || defined(UNIX_AMD64_ABI) +#if defined(TARGET_ARM64) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) // Is this LclVar a promoted struct with exactly 2 fields? if (varDsc->lvPromoted && (varDsc->lvFieldCnt == 2) && !varDsc->lvIsHfa()) { @@ -4899,16 +4835,7 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry // lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DoNotEnregisterReason::LocalField)); } -#elif defined(TARGET_LOONGARCH64) - // Is this LclVar a promoted struct with exactly same size? - assert(!varDsc->lvPromoted); - - assert(structSize >= TARGET_POINTER_SIZE); - { - // We will create a list of GT_LCL_FLDs nodes to pass this struct - lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DoNotEnregisterReason::LocalField)); - } -#endif // TARGET_LOONGARCH64 +#endif // TARGET_ARM } // If we didn't set newarg to a new List Node tree diff --git a/src/coreclr/jit/targetloongarch64.h b/src/coreclr/jit/targetloongarch64.h index 2106d3f2bc0b1..daf251b33477d 100644 --- a/src/coreclr/jit/targetloongarch64.h +++ b/src/coreclr/jit/targetloongarch64.h @@ -98,7 +98,7 @@ #define REG_CALLEE_SAVED_ORDER REG_S0,REG_S1,REG_S2,REG_S3,REG_S4,REG_S5,REG_S6,REG_S7,REG_S8 #define RBM_CALLEE_SAVED_ORDER RBM_S0,RBM_S1,RBM_S2,RBM_S3,RBM_S4,RBM_S5,RBM_S6,RBM_S7,RBM_S8 - #define CNT_CALLEE_SAVED (9) //s0-s8, not including fp,ra. + #define CNT_CALLEE_SAVED (10) //s0-s8,fp. #define CNT_CALLEE_TRASH (17) #define CNT_CALLEE_ENREG (CNT_CALLEE_SAVED-1) From 38b91f2f47db57f161e24312247afcc527b8c268 Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Thu, 17 Mar 2022 10:06:58 +0800 Subject: [PATCH 35/46] [LoongArch64] amend some code for CR. --- src/coreclr/gcinfo/CMakeLists.txt | 9 +- src/coreclr/jit/CMakeLists.txt | 7 +- src/coreclr/jit/codegenloongarch64.cpp | 60 +- src/coreclr/jit/emit.cpp | 245 +-- src/coreclr/jit/emit.h | 43 +- src/coreclr/jit/emitfmtsloongarch64.h | 29 +- src/coreclr/jit/emitloongarch64.cpp | 2472 ++++++++++++++---------- src/coreclr/jit/emitloongarch64.h | 18 + src/coreclr/jit/lsraloongarch64.cpp | 23 +- src/coreclr/jit/registerloongarch64.h | 2 +- 10 files changed, 1510 insertions(+), 1398 deletions(-) diff --git a/src/coreclr/gcinfo/CMakeLists.txt b/src/coreclr/gcinfo/CMakeLists.txt index 5f10c54e5d9f9..34b3843d6893e 100644 --- a/src/coreclr/gcinfo/CMakeLists.txt +++ b/src/coreclr/gcinfo/CMakeLists.txt @@ -69,12 +69,15 @@ if (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_ARM) create_gcinfo_lib(TARGET gcinfo_${TARGET_OS_NAME}_${ARCH_TARGET_NAME} OS ${TARGET_OS_NAME} ARCH ${ARCH_TARGET_NAME}) endif() -if (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64) - create_gcinfo_lib(TARGET gcinfo_unix_loongarch64 OS unix ARCH loongarch64) +if (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64) create_gcinfo_lib(TARGET gcinfo_universal_arm64 OS universal ARCH arm64) create_gcinfo_lib(TARGET gcinfo_unix_x64 OS unix ARCH x64) create_gcinfo_lib(TARGET gcinfo_win_x64 OS win ARCH x64) -endif (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64) +endif (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64) + +if (CLR_CMAKE_TARGET_ARCH_LOONGARCH64) + create_gcinfo_lib(TARGET gcinfo_unix_loongarch64 OS unix ARCH loongarch64) +endif (CLR_CMAKE_TARGET_ARCH_LOONGARCH64) create_gcinfo_lib(TARGET gcinfo_universal_arm OS universal ARCH arm) create_gcinfo_lib(TARGET gcinfo_win_x86 OS win ARCH x86) diff --git a/src/coreclr/jit/CMakeLists.txt b/src/coreclr/jit/CMakeLists.txt index 613cba4265138..927bf7a238ac5 100644 --- a/src/coreclr/jit/CMakeLists.txt +++ b/src/coreclr/jit/CMakeLists.txt @@ -576,12 +576,15 @@ install_clr(TARGETS clrjit DESTINATIONS . sharedFramework COMPONENT jit) # Enable profile guided optimization add_pgo(clrjit) -if (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64) +if (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64) create_standalone_jit(TARGET clrjit_universal_arm64_${ARCH_HOST_NAME} OS universal ARCH arm64 DESTINATIONS .) create_standalone_jit(TARGET clrjit_unix_x64_${ARCH_HOST_NAME} OS unix ARCH x64 DESTINATIONS .) create_standalone_jit(TARGET clrjit_win_x64_${ARCH_HOST_NAME} OS win ARCH x64 DESTINATIONS .) +endif (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64) + +if (CLR_CMAKE_TARGET_ARCH_LOONGARCH64) create_standalone_jit(TARGET clrjit_unix_loongarch64_${ARCH_HOST_NAME} OS unix ARCH loongarch64 DESTINATIONS .) -endif (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64) +endif (CLR_CMAKE_TARGET_ARCH_LOONGARCH64) create_standalone_jit(TARGET clrjit_universal_arm_${ARCH_HOST_NAME} OS universal ARCH arm DESTINATIONS .) target_compile_definitions(clrjit_universal_arm_${ARCH_HOST_NAME} PRIVATE ARM_SOFTFP CONFIGURABLE_ARM_ABI) diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index 07c876496da57..260b54ee15000 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -21,24 +21,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #include "gcinfo.h" #include "gcinfoencoder.h" -// Returns true if 'value' is a legal signed immediate 12 bit encoding. -static bool isValidSimm12(ssize_t value) -{ - return -(((int)1) << 11) <= value && value < (((int)1) << 11); -} - -// Returns true if 'value' is a legal unsigned immediate 11 bit encoding. -static bool isValidUimm11(ssize_t value) -{ - return (0 == (value >> 11)); -} - -// Returns true if 'value' is a legal unsigned immediate 12 bit encoding. -static bool isValidUimm12(ssize_t value) -{ - return (0 == (value >> 12)); -} - /* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX @@ -114,7 +96,7 @@ bool CodeGen::genInstrWithConstant(instruction ins, break; } #endif - bool immFitsInIns = isValidSimm12(imm); + bool immFitsInIns = emitter::isValidSimm12(imm); if (immFitsInIns) { @@ -1653,7 +1635,7 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu assert((genRegMask(rAddr) & intRegState.rsCalleeRegArgMaskLiveIn) == 0); assert(untrLclLo % 4 == 0); - if (isValidSimm12(untrLclLo)) + if (emitter::isValidSimm12(untrLclLo)) { GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, rAddr, genFramePointerReg(), untrLclLo); } @@ -2432,7 +2414,7 @@ void CodeGen::genLclHeap(GenTree* tree) lastTouchDelta = amount; imm = -(ssize_t)amount; - if (isValidSimm12(imm)) + if (emitter::isValidSimm12(imm)) { emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, imm); } @@ -4297,11 +4279,11 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree) if (tree->OperIs(GT_LT)) { - if (!IsUnsigned && isValidSimm12(imm)) + if (!IsUnsigned && emitter::isValidSimm12(imm)) { emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, targetReg, regOp1, imm); } - else if (IsUnsigned && isValidUimm11(imm)) + else if (IsUnsigned && emitter::isValidUimm11(imm)) { emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, regOp1, imm); } @@ -4313,11 +4295,11 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree) } else if (tree->OperIs(GT_LE)) { - if (!IsUnsigned && isValidSimm12(imm + 1)) + if (!IsUnsigned && emitter::isValidSimm12(imm + 1)) { emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, targetReg, regOp1, imm + 1); } - else if (IsUnsigned && isValidUimm11(imm + 1)) + else if (IsUnsigned && emitter::isValidUimm11(imm + 1)) { emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, regOp1, imm + 1); } @@ -4329,12 +4311,12 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree) } else if (tree->OperIs(GT_GT)) { - if (!IsUnsigned && isValidSimm12(imm + 1)) + if (!IsUnsigned && emitter::isValidSimm12(imm + 1)) { emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, REG_RA, regOp1, imm + 1); emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, REG_RA, 1); } - else if (IsUnsigned && isValidUimm11(imm + 1)) + else if (IsUnsigned && emitter::isValidUimm11(imm + 1)) { emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, REG_RA, regOp1, imm + 1); emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, REG_RA, 1); @@ -4347,11 +4329,11 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree) } else if (tree->OperIs(GT_GE)) { - if (!IsUnsigned && isValidSimm12(imm)) + if (!IsUnsigned && emitter::isValidSimm12(imm)) { emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, targetReg, regOp1, imm); } - else if (IsUnsigned && isValidUimm11(imm)) + else if (IsUnsigned && emitter::isValidUimm11(imm)) { emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, regOp1, imm); } @@ -4368,7 +4350,7 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree) { emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, targetReg, REG_R0, regOp1); } - else if (isValidUimm12(imm)) + else if (emitter::isValidUimm12(imm)) { emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, regOp1, imm); emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, targetReg, REG_R0, targetReg); @@ -4386,7 +4368,7 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree) { emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, regOp1, 1); } - else if (isValidUimm12(imm)) + else if (emitter::isValidUimm12(imm)) { emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, regOp1, imm); emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, targetReg, 1); @@ -5414,7 +5396,7 @@ void CodeGen::genStackPointerConstantAdjustment(ssize_t spDelta, regNumber regTm // function that does a probe, which will in turn call this function. assert((target_size_t)(-spDelta) <= compiler->eeGetPageSize()); - if (isValidSimm12(spDelta)) + if (emitter::isValidSimm12(spDelta)) { GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, spDelta); } @@ -8106,7 +8088,7 @@ void CodeGen::genIntCastOverflowCheck(GenTreeCast* cast, const GenIntCastDesc& d if (castMinValue != 0) { - if (isValidSimm12(castMinValue)) + if (emitter::isValidSimm12(castMinValue)) { GetEmitter()->emitIns_R_R_I(INS_slti, EA_ATTR(desc.CheckSrcSize()), REG_R21, reg, castMinValue); } @@ -8427,7 +8409,7 @@ void CodeGen::genLeaInstruction(GenTreeAddrMode* lea) if (offset != 0) { - if (isValidSimm12(offset)) + if (emitter::isValidSimm12(offset)) { emit->emitIns_R_R_I(INS_addi_d, size, tmpReg, tmpReg, offset); } @@ -8451,7 +8433,7 @@ void CodeGen::genLeaInstruction(GenTreeAddrMode* lea) { GenTree* memBase = lea->Base(); - if (isValidSimm12(offset)) + if (emitter::isValidSimm12(offset)) { if (offset != 0) { @@ -8508,7 +8490,7 @@ void CodeGen::genEstablishFramePointer(int delta, bool reportUnwindData) } else { - assert(isValidSimm12(delta)); + assert(emitter::isValidSimm12(delta)); GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, delta); } @@ -9589,7 +9571,7 @@ void CodeGen::genFnPrologCalleeRegArgs() base += baseOffset; - if (isValidSimm12(base)) + if (emitter::isValidSimm12(base)) { GetEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, baseOffset); } @@ -9641,7 +9623,7 @@ void CodeGen::genFnPrologCalleeRegArgs() { base += baseOffset; - if (isValidSimm12(base)) + if (emitter::isValidSimm12(base)) { GetEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, baseOffset); } @@ -9672,7 +9654,7 @@ void CodeGen::genFnPrologCalleeRegArgs() base += 8; GetEmitter()->emitIns_R_R_Imm(INS_ld_d, size, REG_SCRATCH, REG_SPBASE, genTotalFrameSize()); - if (isValidSimm12(base)) + if (emitter::isValidSimm12(base)) { GetEmitter()->emitIns_S_R(INS_st_d, size, REG_SCRATCH, varNum, baseOffset); } diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index 30c28972ac01d..55d517f996d1d 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -743,12 +743,8 @@ insGroup* emitter::emitSavIG(bool emitAdd) ig->igFlags |= IGF_BYREF_REGS; -// We'll allocate extra space (DWORD aligned) to record the GC regs -#if defined(TARGET_LOONGARCH64) - gs += sizeof(regMaskTP); -#else + // We'll allocate extra space (DWORD aligned) to record the GC regs gs += sizeof(int); -#endif } // Allocate space for the instructions and optional liveset @@ -759,13 +755,8 @@ insGroup* emitter::emitSavIG(bool emitAdd) if (ig->igFlags & IGF_BYREF_REGS) { -// Record the byref regs in front the of the instructions - -#if defined(TARGET_LOONGARCH64) - *castto(id, regMaskTP*)++ = emitInitByrefRegs; -#else + // Record the byref regs in front the of the instructions *castto(id, unsigned*)++ = (unsigned)emitInitByrefRegs; -#endif } // Do we need to store the liveset? @@ -1310,7 +1301,8 @@ weight_t emitter::getCurrentBlockWeight() #if defined(TARGET_LOONGARCH64) void emitter::dispIns(instrDesc* id) { - assert(!"Not used on LOONGARCH64."); + // For LoongArch64 using the emitDisInsName(). + NYI_LOONGARCH64("Not used on LOONGARCH64."); } #else void emitter::dispIns(instrDesc* id) @@ -3022,11 +3014,9 @@ void emitter::emitGenerateUnwindNop(instrDesc* id, void* context) Compiler* comp = (Compiler*)context; #if defined(TARGET_ARM) comp->unwindNop(id->idCodeSize()); -#elif defined(TARGET_ARM64) +#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) comp->unwindNop(); -#elif defined(TARGET_LOONGARCH64) - comp->unwindNop(); -#endif // defined(TARGET_LOONGARCH64) +#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) } /***************************************************************************** @@ -4111,8 +4101,10 @@ void emitter::emitDispCommentForHandle(size_t handle, GenTreeFlags flag) * ARM64 has a small and large encoding for both conditional branch and loading label addresses. * The large encodings are pseudo-ops that represent a multiple instruction sequence, similar to ARM. (Currently * NYI). + * LoongArch64 has an individual implementation for emitJumpDistBind(). */ +#ifndef TARGET_LOONGARCH64 void emitter::emitJumpDistBind() { #ifdef DEBUG @@ -4146,22 +4138,6 @@ void emitter::emitJumpDistBind() int jmp_iteration = 1; -#ifdef TARGET_LOONGARCH64 - // NOTE: - // bit0 of isLinkingEnd_LA: indicating whether updating the instrDescJmp's size with the type INS_OPTS_J; - // bit1 of isLinkingEnd_LA: indicating not needed updating ths size while emitTotalCodeSize <= (0x7fff << 2) or had - // updated; - unsigned int isLinkingEnd_LA = emitTotalCodeSize <= (0x7fff << 2) ? 2 : 0; - - UNATIVE_OFFSET ssz = 0; // relative small jump's delay-slot. - // small jump max. neg distance - NATIVE_OFFSET nsd = B_DIST_SMALL_MAX_NEG; - // small jump max. pos distance - NATIVE_OFFSET psd = - B_DIST_SMALL_MAX_POS - - emitCounts_INS_OPTS_J * (3 << 2); // the max placeholder sizeof(INS_OPTS_JIRL) - sizeof(INS_OPTS_J). -#endif - /*****************************************************************************/ /* If we iterate to look for more jumps to shorten, we start again here. */ /*****************************************************************************/ @@ -4198,11 +4174,9 @@ void emitter::emitJumpDistBind() UNATIVE_OFFSET jsz; // size of the jump instruction in bytes -#ifndef TARGET_LOONGARCH64 UNATIVE_OFFSET ssz = 0; // small jump size NATIVE_OFFSET nsd = 0; // small jump max. neg distance NATIVE_OFFSET psd = 0; // small jump max. pos distance -#endif #if defined(TARGET_ARM) UNATIVE_OFFSET msz = 0; // medium jump size @@ -4217,10 +4191,8 @@ void emitter::emitJumpDistBind() // offset of the jump UNATIVE_OFFSET dstOffs; NATIVE_OFFSET jmpDist; // the relative jump distance, as it will be encoded -#ifndef TARGET_LOONGARCH64 UNATIVE_OFFSET oldSize; UNATIVE_OFFSET sizeDif; -#endif #ifdef TARGET_XARCH assert(jmp->idInsFmt() == IF_LABEL || jmp->idInsFmt() == IF_RWR_LABEL || jmp->idInsFmt() == IF_SWR_LABEL); @@ -4323,14 +4295,7 @@ void emitter::emitJumpDistBind() /* Make sure the jumps are properly ordered */ #ifdef DEBUG -#if defined(TARGET_LOONGARCH64) -#if defined(UNALIGNED_CHECK_DISABLE) - UNALIGNED_CHECK_DISABLE; -#endif - assert(lastLJ == nullptr || lastIG != jmp->idjIG || lastLJ->idjOffs < (jmp->idjOffs + adjLJ)); -#else assert(lastLJ == nullptr || lastIG != jmp->idjIG || lastLJ->idjOffs < jmp->idjOffs); -#endif lastLJ = (lastIG == jmp->idjIG) ? jmp : nullptr; assert(lastIG == nullptr || lastIG->igNum <= jmp->idjIG->igNum || jmp->idjIG == prologIG || @@ -4364,19 +4329,11 @@ void emitter::emitJumpDistBind() if (EMITVERBOSE) { printf("Adjusted offset of " FMT_BB " from %04X to %04X\n", lstIG->igNum, lstIG->igOffs, -#if defined(TARGET_LOONGARCH64) - lstIG->igOffs + adjIG -#else - lstIG->igOffs - adjIG -#endif - ); + lstIG->igOffs - adjIG); } #endif // DEBUG -#if defined(TARGET_LOONGARCH64) - lstIG->igOffs += adjIG; -#else + lstIG->igOffs -= adjIG; -#endif assert(IsCodeAligned(lstIG->igOffs)); } while (lstIG != jmpIG); } @@ -4387,13 +4344,9 @@ void emitter::emitJumpDistBind() lstIG = jmpIG; } -/* Apply any local size adjustment to the jump's relative offset */ + /* Apply any local size adjustment to the jump's relative offset */ -#if defined(TARGET_LOONGARCH64) - jmp->idjOffs += adjLJ; -#else jmp->idjOffs -= adjLJ; -#endif // If this is a jump via register, the instruction size does not change, so we are done. CLANG_FORMAT_COMMENT_ANCHOR; @@ -4441,9 +4394,8 @@ void emitter::emitJumpDistBind() if (jmp->idjShort) { -#ifndef TARGET_LOONGARCH64 assert(jmp->idCodeSize() == ssz); -#endif + // We should not be jumping/branching across funclets/functions emitCheckFuncletBranch(jmp, jmpIG); @@ -4547,17 +4499,13 @@ void emitter::emitJumpDistBind() if (jmpIG->igNum < tgtIG->igNum) { -/* Forward jump */ + /* Forward jump */ -/* Adjust the target offset by the current delta. This is a worst-case estimate, as jumps between - here and the target could be shortened, causing the actual distance to shrink. - */ + /* Adjust the target offset by the current delta. This is a worst-case estimate, as jumps between + here and the target could be shortened, causing the actual distance to shrink. + */ -#if defined(TARGET_LOONGARCH64) - dstOffs += adjIG; -#else dstOffs -= adjIG; -#endif /* Compute the distance estimate */ @@ -4592,67 +4540,11 @@ void emitter::emitJumpDistBind() } #endif // DEBUG_EMIT -#if defined(TARGET_LOONGARCH64) - assert(jmpDist >= 0); // Forward jump - assert(!(jmpDist & 0x3)); - - if (isLinkingEnd_LA & 0x2) - { - jmp->idAddr()->iiaSetJmpOffset(jmpDist); - } - else if ((extra > 0) && (jmp->idInsOpt() == INS_OPTS_J)) - { - instruction ins = jmp->idIns(); - assert((INS_bceqz <= ins) && (ins <= INS_bl)); - - if (ins < - INS_beqz) // bceqz/bcnez/beq/bne/blt/bltu/bge/bgeu < beqz < bnez // See instrsloongarch64.h. - { - if ((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000) - { - extra = 4; - } - else - { - assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000); // TODO:later will be deleted!!! - extra = 8; - } - } - else if (ins < INS_b) // beqz/bnez < b < bl // See instrsloongarch64.h. - { - if (jmpDist + emitCounts_INS_OPTS_J * 4 < 0x200000) - continue; - - extra = 4; - // assert((emitTotalCodeSize + emitCounts_INS_OPTS_J*4) < 0x8000000); - assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000); - } - else // if (ins == INS_b || ins == INS_bl) - { - assert(ins == INS_b || ins == INS_bl); - // assert((emitTotalCodeSize + emitCounts_INS_OPTS_J*4) < 0x8000000); - assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000); - continue; - } - - jmp->idInsOpt(INS_OPTS_JIRL); - jmp->idCodeSize(jmp->idCodeSize() + extra); - jmpIG->igSize += (unsigned short)extra; // the placeholder sizeof(INS_OPTS_JIRL) - sizeof(INS_OPTS_J). - adjLJ += (UNATIVE_OFFSET)extra; - adjIG += (UNATIVE_OFFSET)extra; - emitTotalCodeSize += (UNATIVE_OFFSET)extra; - jmpIG->igFlags |= IGF_UPD_ISZ; - isLinkingEnd_LA |= 0x1; - } - continue; - -#else // not defined(TARGET_LOONGARCH64) if (extra <= 0) { /* This jump will be a short one */ goto SHORT_JMP; } -#endif } else { @@ -4691,70 +4583,13 @@ void emitter::emitJumpDistBind() } #endif // DEBUG_EMIT -#if defined(TARGET_LOONGARCH64) - assert(jmpDist >= 0); // Backward jump - assert(!(jmpDist & 0x3)); - - if (isLinkingEnd_LA & 0x2) - { - jmp->idAddr()->iiaSetJmpOffset(-jmpDist); // Backward jump is negative! - } - else if ((extra > 0) && (jmp->idInsOpt() == INS_OPTS_J)) - { - instruction ins = jmp->idIns(); - assert((INS_bceqz <= ins) && (ins <= INS_bl)); - - if (ins < - INS_beqz) // bceqz/bcnez/beq/bne/blt/bltu/bge/bgeu < beqz < bnez // See instrsloongarch64.h. - { - if ((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000) - { - extra = 4; - } - else - { - assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000); - extra = 8; - } - } - else if (ins < INS_b) // beqz/bnez < b < bl // See instrsloongarch64.h. - { - if (jmpDist + emitCounts_INS_OPTS_J * 4 < 0x200000) - continue; - - extra = 4; - // assert((emitTotalCodeSize + emitCounts_INS_OPTS_J*4) < 0x8000000); - assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000); - } - else - { - assert(ins == INS_b || ins == INS_bl); - // assert((emitTotalCodeSize + emitCounts_INS_OPTS_J*4) < 0x8000000); - assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000); - continue; - } - - jmp->idInsOpt(INS_OPTS_JIRL); - jmp->idCodeSize(jmp->idCodeSize() + extra); - jmpIG->igSize += (unsigned short)extra; // the placeholder sizeof(INS_OPTS_JIRL) - sizeof(INS_OPTS_J). - adjLJ += (UNATIVE_OFFSET)extra; - adjIG += (UNATIVE_OFFSET)extra; - emitTotalCodeSize += (UNATIVE_OFFSET)extra; - jmpIG->igFlags |= IGF_UPD_ISZ; - isLinkingEnd_LA |= 0x1; - } - continue; - -#else // not defined(TARGET_LOONGARCH64) if (extra <= 0) { /* This jump will be a short one */ goto SHORT_JMP; } -#endif } -#ifndef TARGET_LOONGARCH64 /* We arrive here if the jump couldn't be made short, at least for now */ /* We had better not have eagerly marked the jump as short @@ -4946,40 +4781,8 @@ void emitter::emitJumpDistBind() jmpIG->igFlags |= IGF_UPD_ISZ; -#endif // not defined(TARGET_LOONGARCH64) - } // end for each jump - -#if defined(TARGET_LOONGARCH64) - if ((isLinkingEnd_LA & 0x3) < 0x2) - { - // indicating had updated the instrDescJmp's size with the type INS_OPTS_J. - isLinkingEnd_LA = 0x2; - // emitRecomputeIGoffsets(); - /* Adjust offsets of any remaining blocks */ - - for (; lstIG;) - { - lstIG = lstIG->igNext; - if (!lstIG) - { - break; - } -#ifdef DEBUG - if (EMITVERBOSE) - { - printf("Adjusted offset of " FMT_BB " from %04X to %04X\n", lstIG->igNum, lstIG->igOffs, - lstIG->igOffs + adjIG); - } -#endif // DEBUG + } // end for each jump - lstIG->igOffs += adjIG; - - assert(IsCodeAligned(lstIG->igOffs)); - } - goto AGAIN; - } - -#else /* Did we shorten any jumps? */ if (adjIG) @@ -5043,7 +4846,6 @@ void emitter::emitJumpDistBind() goto AGAIN; } } -#endif #ifdef DEBUG if (EMIT_INSTLIST_VERBOSE) @@ -5055,6 +4857,7 @@ void emitter::emitJumpDistBind() emitCheckIGoffsets(); #endif // DEBUG } +#endif #if FEATURE_LOOP_ALIGN @@ -5866,7 +5669,7 @@ emitter::instrDescAlign* emitter::emitAlignInNextIG(instrDescAlign* alignInstr) void emitter::emitCheckFuncletBranch(instrDesc* jmp, insGroup* jmpIG) { #ifdef TARGET_LOONGARCH64 - /* TODO: for LOONGARCH64: not support idDebugOnlyInfo.*/ + // TODO-LoongArch64: support idDebugOnlyInfo. return; #else @@ -7045,13 +6848,7 @@ void emitter::emitGenGCInfoIfFuncletRetTarget(insGroup* ig, BYTE* cp) * instruction number for this instruction */ -#if defined(TARGET_LOONGARCH64) -unsigned emitter::emitFindInsNum(insGroup* ig, instrDesc* idMatch) -{ - assert(!"unimplemented yet on LOONGARCH"); - return -1; -} -#else +#ifndef TARGET_LOONGARCH64 unsigned emitter::emitFindInsNum(insGroup* ig, instrDesc* idMatch) { instrDesc* id = (instrDesc*)ig->igData; @@ -9532,7 +9329,7 @@ regMaskTP emitter::emitGetGCRegsKilledByNoGCCall(CorInfoHelpFunc helper) result = RBM_CALLEE_TRASH_NOGC & ~(RBM_RDI | RBM_RSI); break; #elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) - result = RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF; + result = RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF; break; #else assert(!"unknown arch"); diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 28d52d5e0bd38..c64a67192b645 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -344,11 +344,8 @@ struct insGroup ptr -= sizeof(VARSET_TP); } -#if defined(TARGET_LOONGARCH64) - ptr -= sizeof(VARSET_TP); -#else ptr -= sizeof(unsigned); -#endif + return *(unsigned*)ptr; } @@ -677,7 +674,7 @@ class emitter // At this point we have fully consumed first DWORD so that next field // doesn't cross a byte boundary. #elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) -/* _idOpSize defined bellow. */ +/* _idOpSize defined below. */ #else opSize _idOpSize : 2; // operand size: 0=1 , 1=2 , 2=4 , 3=8 #endif // ARM || TARGET_LOONGARCH64 @@ -1793,11 +1790,13 @@ class emitter #endif // FEATURE_EH_FUNCLETS - /************************************************************************/ - /* Methods to record a code position and later convert to offset */ - /************************************************************************/ +/************************************************************************/ +/* Methods to record a code position and later convert to offset */ +/************************************************************************/ +#ifndef TARGET_LOONGARCH64 unsigned emitFindInsNum(insGroup* ig, instrDesc* id); +#endif UNATIVE_OFFSET emitFindOffset(insGroup* ig, unsigned insNum); /************************************************************************/ @@ -1957,7 +1956,7 @@ class emitter // CLANG_FORMAT_COMMENT_ANCHOR; -#ifdef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) // ARM32 and ARM64 both can require a bigger prolog instruction group. One scenario is where // a function uses all the incoming integer and single-precision floating-point arguments, // and must store them all to the frame on entry. If the frame is very large, we generate @@ -1972,13 +1971,9 @@ class emitter // which eats up our insGroup buffer. #define SC_IG_BUFFER_SIZE (200 * sizeof(emitter::instrDesc)) -#elif defined(TARGET_LOONGARCH64) - -#define SC_IG_BUFFER_SIZE (50 * sizeof(emitter::instrDesc) + 20 * SMALL_IDSC_SIZE) - -#else // !TARGET_LOONGARCH64 +#else #define SC_IG_BUFFER_SIZE (50 * sizeof(emitter::instrDesc) + 14 * SMALL_IDSC_SIZE) -#endif // !TARGET_LOONGARCH64 +#endif // !(TARGET_ARMARCH || TARGET_LOONGARCH64) size_t emitIGbuffSize; @@ -2160,7 +2155,7 @@ class emitter const char* emitLabelString(insGroup* ig); #endif -#ifdef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) void emitGetInstrDescs(insGroup* ig, instrDesc** id, int* insCnt); @@ -2174,19 +2169,7 @@ class emitter static void emitGenerateUnwindNop(instrDesc* id, void* context); -#elif defined(TARGET_LOONGARCH64) - void emitGetInstrDescs(insGroup* ig, instrDesc** id, int* insCnt); - bool emitGetLocationInfo(emitLocation* emitLoc, insGroup** pig, instrDesc** pid, int* pinsRemaining = NULL); - - bool emitNextID(insGroup*& ig, instrDesc*& id, int& insRemaining); - - typedef void (*emitProcessInstrFunc_t)(instrDesc* id, void* context); - - void emitWalkIDs(emitLocation* locFrom, emitProcessInstrFunc_t processFunc, void* context); - - static void emitGenerateUnwindNop(instrDesc* id, void* context); - -#endif // TARGET_LOONGARCH64 +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 #ifdef TARGET_X86 void emitMarkStackLvl(unsigned stackLevel); @@ -2356,8 +2339,10 @@ class emitter static emitJumpKind emitReverseJumpKind(emitJumpKind jumpKind); #ifdef DEBUG +#ifndef TARGET_LOONGARCH64 void emitInsSanityCheck(instrDesc* id); #endif +#endif #ifdef TARGET_ARMARCH // Returns true if instruction "id->idIns()" writes to a register that might be used to contain a GC diff --git a/src/coreclr/jit/emitfmtsloongarch64.h b/src/coreclr/jit/emitfmtsloongarch64.h index 2f47160ac8d39..3dab2b7dc2704 100644 --- a/src/coreclr/jit/emitfmtsloongarch64.h +++ b/src/coreclr/jit/emitfmtsloongarch64.h @@ -2,6 +2,8 @@ // The .NET Foundation licenses this file to you under the MIT license. ////////////////////////////////////////////////////////////////////////////// +// define this file for LoongArch64 just for avoiding compiling errors. +// This is moot right now. // clang-format off #if !defined(TARGET_LOONGARCH64) @@ -33,32 +35,7 @@ enum ID_OPS // (unused) ////////////////////////////////////////////////////////////////////////////// -IF_DEF(NONE, IS_NONE, NONE) // - -IF_DEF(OPCODE, IS_NONE, NONE) -IF_DEF(OPCODES_16, IS_NONE, NONE) -IF_DEF(OP_FMT, IS_NONE, NONE) -IF_DEF(OP_FMT_16, IS_NONE, NONE) -IF_DEF(OP_FMTS_16, IS_NONE, NONE) -IF_DEF(FMT_FUNC, IS_NONE, NONE) -IF_DEF(FMT_FUNC_6, IS_NONE, NONE) -IF_DEF(FMT_FUNC_16, IS_NONE, NONE) -IF_DEF(FMT_FUNCS_6, IS_NONE, NONE) -IF_DEF(FMT_FUNCS_16, IS_NONE, NONE) -IF_DEF(FMT_FUNCS_6A, IS_NONE, NONE) -IF_DEF(FMT_FUNCS_11A, IS_NONE, NONE) -IF_DEF(FUNC, IS_NONE, NONE) -IF_DEF(FUNC_6, IS_NONE, NONE) -IF_DEF(FUNC_16, IS_NONE, NONE) -IF_DEF(FUNC_21, IS_NONE, NONE) -IF_DEF(FUNCS_6, IS_NONE, NONE) -IF_DEF(FUNCS_6A, IS_NONE, NONE) -IF_DEF(FUNCS_6B, IS_NONE, NONE) -IF_DEF(FUNCS_6C, IS_NONE, NONE) -IF_DEF(FUNCS_6D, IS_NONE, NONE) -IF_DEF(FUNCS_6E, IS_NONE, NONE) -IF_DEF(FUNCS_11, IS_NONE, NONE) - +IF_DEF(NONE, IS_NONE, NONE) ////////////////////////////////////////////////////////////////////////////// #undef IF_DEF diff --git a/src/coreclr/jit/emitloongarch64.cpp b/src/coreclr/jit/emitloongarch64.cpp index eb0b8659632bb..c7ba9f4a55ffe 100644 --- a/src/coreclr/jit/emitloongarch64.cpp +++ b/src/coreclr/jit/emitloongarch64.cpp @@ -384,60 +384,6 @@ const emitJumpKind emitReverseJumpKinds[] = { #include "emitjmps.h" }; -/***************************************************************************** - * The macro define for instructions. - */ - -#define D_INST_2RI12(op0_code, op1_reg, op2_reg, op3_imm) \ - op0_code |= ((code_t)(op1_reg)); /* rd or fd or hint */ \ - op0_code |= ((code_t)(op2_reg)) << 5; /* rj */ \ - op0_code |= ((op3_imm)&0xfff) << 10 - -#define D_INST_add_d(op0_code, op1_reg, op2_reg, op3_reg) \ - op0_code |= ((code_t)(op1_reg)); /* rd */ \ - op0_code |= ((code_t)(op2_reg)) << 5; /* rj */ \ - op0_code |= ((code_t)(op3_reg)) << 10 /* rk */ - -#define D_INST_3R(op0_code, op1_reg, op2_reg, op3_reg) \ - op0_code |= ((code_t)(op1_reg)); /* rd */ \ - op0_code |= ((code_t)(op2_reg)) << 5; /* rj */ \ - op0_code |= ((code_t)(op3_reg)) << 10 /* rk */ - -#define D_INST_JIRL(op0_code, op1_reg, op2_reg, op3_imm) \ - op0_code |= ((code_t)(op1_reg)); /* rd */ \ - op0_code |= ((code_t)(op2_reg)) << 5; /* rj */ \ - op0_code |= ((op3_imm)&0xffff) << 10 /* offs */ - -#define D_INST_lu12i_w(op0_code, op1_reg, op2_imm) \ - op0_code |= ((code_t)(op1_reg)); /* rd */ \ - op0_code |= ((op2_imm)&0xfffff) << 5 /* si20 */ - -#define D_INST_lu32i_d(op0_code, op1_reg, op2_imm) D_INST_lu12i_w(op0_code, op1_reg, op2_imm) - -#define D_INST_lu52i_d(op0_code, op1_reg, op2_reg, op3_imm) D_INST_2RI12(op0_code, op1_reg, op2_reg, op3_imm) - -#define D_INST_ori(op0_code, op1_reg, op2_reg, op3_imm) D_INST_2RI12(op0_code, op1_reg, op2_reg, op3_imm) - -// Load or Store instructions. -#define D_INST_LS(op0_code, op1_reg, op2_reg, op3_imm) D_INST_2RI12(op0_code, op1_reg, op2_reg, op3_imm) - -#define D_INST_Bcond(op0_code, op1_reg, op2_reg, op3_imm) \ - op0_code |= ((code_t)(op1_reg) /*& 0x1f */) << 5; /* rj */ \ - op0_code |= ((code_t)(op2_reg) /*& 0x1f */); /* rd */ \ - assert(!((code_t)(op3_imm)&0x3)); \ - op0_code |= (((code_t)(op3_imm) << 8) & 0x3fffc00) /* offset */ - -#define D_INST_Bcond_Z(op0_code, op1_reg, op1_imm) \ - assert(!((code_t)(op1_imm)&0x3)); \ - op0_code |= ((code_t)(op1_reg) /*& 0x1f */) << 5; /* rj */ \ - op0_code |= (((code_t)(op1_imm) << 8) & 0x3fffc00); \ - op0_code |= (((code_t)(op1_imm) >> 18) & 0x1f) /* offset */ - -#define D_INST_B(op0_code, op1_imm) \ - assert(!((code_t)(op1_imm)&0x3)); \ - op0_code |= (((code_t)(op1_imm) >> 18) & 0x3ff); \ - op0_code |= (((code_t)(op1_imm) << 8) & 0x3fffc00) /* offset */ - /***************************************************************************** * Look up the instruction for a jump kind */ @@ -500,7 +446,6 @@ size_t emitter::emitSizeOfInsDsc(instrDesc* id) assert(!id->idIsLargeCns()); return sizeof(instrDesc); } - // break; case INS_OPTS_I: case INS_OPTS_RC: @@ -514,54 +459,10 @@ size_t emitter::emitSizeOfInsDsc(instrDesc* id) } } -#ifdef DEBUG -/***************************************************************************** - * - * The following called for each recorded instruction -- use for debugging. - */ -void emitter::emitInsSanityCheck(instrDesc* id) -{ - /* What instruction format have we got? */ - - switch (id->idInsFmt()) - { - case IF_OPCODE: - case IF_OPCODES_16: - case IF_OP_FMT: - case IF_OP_FMT_16: - case IF_OP_FMTS_16: - case IF_FMT_FUNC: - case IF_FMT_FUNC_6: - case IF_FMT_FUNC_16: - case IF_FMT_FUNCS_6: - case IF_FMT_FUNCS_16: - case IF_FMT_FUNCS_6A: - case IF_FMT_FUNCS_11A: - case IF_FUNC: - case IF_FUNC_6: - case IF_FUNC_16: - case IF_FUNC_21: - case IF_FUNCS_6: - case IF_FUNCS_6A: - case IF_FUNCS_6B: - case IF_FUNCS_6C: - case IF_FUNCS_6D: - case IF_FUNCS_11: - // case IF_LA: - break; - - default: - printf("unexpected format %s\n", emitIfName(id->idInsFmt())); - assert(!"Unexpected format"); - break; - } -} -#endif // DEBUG - inline bool emitter::emitInsMayWriteToGCReg(instruction ins) { assert(ins != INS_invalid); - ////NOTE: please reference the file "instrsloongarch64.h" for details !!! + // NOTE: please reference the file "instrsloongarch64.h" for details !!! return (INS_mov <= ins) && (ins <= INS_jirl) ? true : false; } @@ -586,38 +487,13 @@ bool emitter::emitInsWritesToLclVarStackLoc(instrDesc* id) case INS_stx_w: case INS_stx_b: case INS_stx_h: - // case INS_sc_d: - // case INS_sc_w: - //// not used these instrs right now !!! - //#ifdef DEBUG - // case INS_stgt_b: - // case INS_stgt_h: - // case INS_stgt_w: - // case INS_stgt_d: - // case INS_stle_b: - // case INS_stle_h: - // case INS_stle_w: - // case INS_stle_d: - //#endif return true; + default: return false; } } -/*****************************************************************************/ -#ifdef DEBUG - -// clang-format off -static const char * const RegNames[] = -{ - #define REGDEF(name, rnum, mask, sname) sname, - #include "register.h" -}; -// clang-format on - -#endif // DEBUG - #define LD 1 #define ST 2 @@ -704,9 +580,8 @@ void emitter::emitIns(instruction ins) id->idIns(ins); id->idAddr()->iiaSetInstrEncode(emitInsCode(ins)); - id->idCodeSize(4); - // dispIns(id); + appendToCurIG(id); } @@ -809,14 +684,15 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int va id->idIns(ins); code_t code = emitInsCode(ins); - D_INST_2RI12(code, (reg1 & 0x1f), reg2, imm); + code |= (code_t)(reg1 & 0x1f); + code |= (code_t)reg2 << 5; + code |= (code_t)(imm & 0xfff) << 10; id->idAddr()->iiaSetInstrEncode(code); id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs); id->idSetIsLclVar(); - id->idCodeSize(4); - // dispIns(id); + appendToCurIG(id); } @@ -878,7 +754,9 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va ins = INS_addi_d; } code = emitInsCode(ins); - D_INST_2RI12(code, reg1, reg2, imm); + code |= (code_t)(reg1 & 0x1f); + code |= (code_t)reg2 << 5; + code |= (imm & 0xfff) << 10; } else { @@ -891,7 +769,9 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va ins = INS_add_d; code = emitInsCode(ins); - D_INST_add_d(code, reg1, reg2, REG_RA); + code |= (code_t)reg1; + code |= (code_t)reg2 << 5; + code |= (code_t)REG_RA << 10; } else { @@ -903,8 +783,11 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va emitIns_R_R_R(INS_add_d, attr, REG_RA, REG_RA, reg2); imm2 = imm2 & 0x7ff; + imm3 = imm3 ? imm2 - imm3 : imm2; code = emitInsCode(ins); - D_INST_2RI12(code, reg1 /* & 0x1f*/, REG_RA, imm3 ? imm2 - imm3 : imm2); + code |= (code_t)reg1; + code |= (code_t)REG_RA; + code |= (code_t)(imm3 & 0xfff) << 10; } } @@ -917,9 +800,8 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va id->idAddr()->iiaSetInstrEncode(code); id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs); id->idSetIsLclVar(); - id->idCodeSize(4); - // dispIns(id); + appendToCurIG(id); } @@ -953,9 +835,8 @@ void emitter::emitIns_I(instruction ins, emitAttr attr, ssize_t imm) id->idIns(ins); id->idAddr()->iiaSetInstrEncode(code); - id->idCodeSize(4); - // dispIns(id); + appendToCurIG(id); } @@ -967,9 +848,6 @@ void emitter::emitIns_I_I(instruction ins, emitAttr attr, ssize_t cc, ssize_t of case INS_bceqz: case INS_bcnez: break; - // case INS_: - // case INS_: - // break; default: unreached(); @@ -988,9 +866,8 @@ void emitter::emitIns_I_I(instruction ins, emitAttr attr, ssize_t cc, ssize_t of id->idIns(ins); id->idAddr()->iiaSetInstrEncode(code); - id->idCodeSize(4); - // dispIns(id); + appendToCurIG(id); } @@ -1032,28 +909,28 @@ void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t assert((0 <= imm) && (imm <= 7)); code |= (reg & 0x1f) << 5; // fj - code |= imm /*& 0x7*/; // cc + code |= imm; // cc break; case INS_movcf2fr: assert(isFloatReg(reg)); assert((0 <= imm) && (imm <= 7)); - code |= (reg & 0x1f); // fd - code |= (imm /*& 0x7*/) << 5; // cc + code |= (reg & 0x1f); // fd + code |= imm << 5; // cc break; case INS_movgr2cf: assert(isGeneralRegister(reg)); assert((0 <= imm) && (imm <= 7)); - code |= reg << 5; // rj - code |= imm /*& 0x7*/; // cc + code |= reg << 5; // rj + code |= imm; // cc break; case INS_movcf2gr: assert(isGeneralRegister(reg)); assert((0 <= imm) && (imm <= 7)); - code |= reg; // rd - code |= (imm /*& 0x7*/) << 5; // cc + code |= reg; // rd + code |= imm << 5; // cc break; default: unreached(); @@ -1065,9 +942,8 @@ void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t id->idIns(ins); id->idReg1(reg); id->idAddr()->iiaSetInstrEncode(code); - id->idCodeSize(4); - // dispIns(id); + appendToCurIG(id); } @@ -1084,7 +960,7 @@ void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t // void emitter::emitIns_Mov( instruction ins, emitAttr attr, regNumber dstReg, regNumber srcReg, bool canSkip, insOpts opt /* = INS_OPTS_NONE */) -{ // TODO-LoongArch64: should amend for LoongArch64/LOONGARCH64. +{ assert(IsMovInstruction(ins)); if (!canSkip || (dstReg != srcReg)) @@ -1115,30 +991,38 @@ void emitter::emitIns_R_R( } else if ((INS_ext_w_b <= ins) && (ins <= INS_cpucfg)) { - // case INS_ext_w_b: - // case INS_ext_w_h: - // case INS_clo_w: - // case INS_clz_w: - // case INS_cto_w: - // case INS_ctz_w: - // case INS_clo_d: - // case INS_clz_d: - // case INS_cto_d: - // case INS_ctz_d: - // case INS_revb_2h: - // case INS_revb_4h: - // case INS_revb_2w: - // case INS_revb_d: - // case INS_revh_2w: - // case INS_revh_d: - // case INS_bitrev_4b: - // case INS_bitrev_8b: - // case INS_bitrev_w: - // case INS_bitrev_d: - // case INS_rdtimel_w: - // case INS_rdtimeh_w: - // case INS_rdtime_d: - // case INS_cpucfg: +#ifdef DEBUG + switch (ins) + { + case INS_ext_w_b: + case INS_ext_w_h: + case INS_clo_w: + case INS_clz_w: + case INS_cto_w: + case INS_ctz_w: + case INS_clo_d: + case INS_clz_d: + case INS_cto_d: + case INS_ctz_d: + case INS_revb_2h: + case INS_revb_4h: + case INS_revb_2w: + case INS_revb_d: + case INS_revh_2w: + case INS_revh_d: + case INS_bitrev_4b: + case INS_bitrev_8b: + case INS_bitrev_w: + case INS_bitrev_d: + case INS_rdtimel_w: + case INS_rdtimeh_w: + case INS_rdtime_d: + case INS_cpucfg: + break; + default: + NYI_LOONGARCH64("illegal ins within emitIns_R_R --1!"); + } +#endif assert(isGeneralRegisterOrR0(reg1)); assert(isGeneralRegisterOrR0(reg2)); code |= reg1; // rd @@ -1146,8 +1030,6 @@ void emitter::emitIns_R_R( } else if ((INS_asrtle_d == ins) || (INS_asrtgt_d == ins)) { - // case INS_asrtle_d: - // case INS_asrtgt_d: assert(isGeneralRegisterOrR0(reg1)); assert(isGeneralRegisterOrR0(reg2)); code |= reg1 << 5; // rj @@ -1155,50 +1037,58 @@ void emitter::emitIns_R_R( } else if ((INS_fabs_s <= ins) && (ins <= INS_fmov_d)) { - // case INS_fabs_s: - // case INS_fabs_d: - // case INS_fneg_s: - // case INS_fneg_d: - // case INS_fsqrt_s: - // case INS_fsqrt_d: - // case INS_frsqrt_s: - // case INS_frsqrt_d: - // case INS_frecip_s: - // case INS_frecip_d: - // case INS_flogb_s: - // case INS_flogb_d: - // case INS_fclass_s: - // case INS_fclass_d: - // case INS_fcvt_s_d: - // case INS_fcvt_d_s: - // case INS_ffint_s_w: - // case INS_ffint_s_l: - // case INS_ffint_d_w: - // case INS_ffint_d_l: - // case INS_ftint_w_s: - // case INS_ftint_w_d: - // case INS_ftint_l_s: - // case INS_ftint_l_d: - // case INS_ftintrm_w_s: - // case INS_ftintrm_w_d: - // case INS_ftintrm_l_s: - // case INS_ftintrm_l_d: - // case INS_ftintrp_w_s: - // case INS_ftintrp_w_d: - // case INS_ftintrp_l_s: - // case INS_ftintrp_l_d: - // case INS_ftintrz_w_s: - // case INS_ftintrz_w_d: - // case INS_ftintrz_l_s: - // case INS_ftintrz_l_d: - // case INS_ftintrne_w_s: - // case INS_ftintrne_w_d: - // case INS_ftintrne_l_s: - // case INS_ftintrne_l_d: - // case INS_frint_s: - // case INS_frint_d: - // case INS_fmov_s: - // case INS_fmov_d: +#ifdef DEBUG + switch (ins) + { + case INS_fabs_s: + case INS_fabs_d: + case INS_fneg_s: + case INS_fneg_d: + case INS_fsqrt_s: + case INS_fsqrt_d: + case INS_frsqrt_s: + case INS_frsqrt_d: + case INS_frecip_s: + case INS_frecip_d: + case INS_flogb_s: + case INS_flogb_d: + case INS_fclass_s: + case INS_fclass_d: + case INS_fcvt_s_d: + case INS_fcvt_d_s: + case INS_ffint_s_w: + case INS_ffint_s_l: + case INS_ffint_d_w: + case INS_ffint_d_l: + case INS_ftint_w_s: + case INS_ftint_w_d: + case INS_ftint_l_s: + case INS_ftint_l_d: + case INS_ftintrm_w_s: + case INS_ftintrm_w_d: + case INS_ftintrm_l_s: + case INS_ftintrm_l_d: + case INS_ftintrp_w_s: + case INS_ftintrp_w_d: + case INS_ftintrp_l_s: + case INS_ftintrp_l_d: + case INS_ftintrz_w_s: + case INS_ftintrz_w_d: + case INS_ftintrz_l_s: + case INS_ftintrz_l_d: + case INS_ftintrne_w_s: + case INS_ftintrne_w_d: + case INS_ftintrne_l_s: + case INS_ftintrne_l_d: + case INS_frint_s: + case INS_frint_d: + case INS_fmov_s: + case INS_fmov_d: + break; + default: + NYI_LOONGARCH64("illegal ins within emitIns_R_R --2!"); + } +#endif assert(isFloatReg(reg1)); assert(isFloatReg(reg2)); code |= (reg1 & 0x1f); // fd @@ -1206,9 +1096,17 @@ void emitter::emitIns_R_R( } else if ((INS_movgr2fr_w <= ins) && (ins <= INS_movgr2frh_w)) { - // case INS_movgr2fr_w: - // case INS_movgr2fr_d: - // case INS_movgr2frh_w: +#ifdef DEBUG + switch (ins) + { + case INS_movgr2fr_w: + case INS_movgr2fr_d: + case INS_movgr2frh_w: + break; + default: + NYI_LOONGARCH64("illegal ins within emitIns_R_R --3!"); + } +#endif assert(isFloatReg(reg1)); assert(isGeneralRegisterOrR0(reg2)); code |= (reg1 & 0x1f); // fd @@ -1216,9 +1114,17 @@ void emitter::emitIns_R_R( } else if ((INS_movfr2gr_s <= ins) && (ins <= INS_movfrh2gr_s)) { - // case INS_movfr2gr_s: - // case INS_movfr2gr_d: - // case INS_movfrh2gr_s: +#ifdef DEBUG + switch (ins) + { + case INS_movfr2gr_s: + case INS_movfr2gr_d: + case INS_movfrh2gr_s: + break; + default: + NYI_LOONGARCH64("illegal ins within emitIns_R_R --4!"); + } +#endif assert(isGeneralRegisterOrR0(reg1)); assert(isFloatReg(reg2)); code |= reg1; // rd @@ -1252,9 +1158,8 @@ void emitter::emitIns_R_R( id->idReg1(reg1); id->idReg2(reg2); id->idAddr()->iiaSetInstrEncode(code); - id->idCodeSize(4); - // dispIns(id); + appendToCurIG(id); } @@ -1270,10 +1175,19 @@ void emitter::emitIns_R_R_I( if ((INS_slli_w <= ins) && (ins <= INS_rotri_w)) { - // INS_slli_w - // INS_srli_w - // INS_srai_w - // INS_rotri_w +#ifdef DEBUG + switch (ins) + { + case INS_slli_w: + case INS_srli_w: + case INS_srai_w: + case INS_rotri_w: + break; + default: + NYI_LOONGARCH64("illegal ins within emitIns_R_R_I --1!"); + } +#endif + assert(isGeneralRegister(reg1)); assert(isGeneralRegisterOrR0(reg2)); assert((0 <= imm) && (imm <= 0x1f)); @@ -1284,10 +1198,18 @@ void emitter::emitIns_R_R_I( } else if ((INS_slli_d <= ins) && (ins <= INS_rotri_d)) { - // INS_slli_d - // INS_srli_d - // INS_srai_d - // INS_rotri_d +#ifdef DEBUG + switch (ins) + { + case INS_slli_d: + case INS_srli_d: + case INS_srai_d: + case INS_rotri_d: + break; + default: + NYI_LOONGARCH64("illegal ins within emitIns_R_R_I --2!"); + } +#endif assert(isGeneralRegister(reg1)); assert(isGeneralRegisterOrR0(reg2)); assert((0 <= imm) && (imm <= 0x3f)); @@ -1305,34 +1227,45 @@ void emitter::emitIns_R_R_I( if (((INS_addi_w <= ins) && (ins <= INS_slti)) || ((INS_ld_b <= ins) && (ins <= INS_ld_wu)) || ((INS_st_b <= ins) && (ins <= INS_st_d))) { - // case INS_addi_w: - // case INS_addi_d: - // case INS_lu52i_d: - // case INS_slti: - // case INS_ld_b: - // case INS_ld_h: - // case INS_ld_w: - // case INS_ld_d: - // case INS_ld_bu: - // case INS_ld_hu: - // case INS_ld_wu: - // case INS_st_b: - // case INS_st_h: - // case INS_st_w: - // case INS_st_d: + switch (ins) + { + case INS_addi_w: + case INS_addi_d: + case INS_lu52i_d: + case INS_slti: + case INS_ld_b: + case INS_ld_h: + case INS_ld_w: + case INS_ld_d: + case INS_ld_bu: + case INS_ld_hu: + case INS_ld_wu: + case INS_st_b: + case INS_st_h: + case INS_st_w: + case INS_st_d: + break; + default: + NYI_LOONGARCH64("illegal ins within emitIns_R_R_I --3!"); + } assert((-2048 <= imm) && (imm <= 2047)); } else if (ins == INS_sltui) { - // case INS_sltui: assert((0 <= imm) && (imm <= 0x7ff)); } else { - // case INS_andi: - // case INS_ori: - // case INS_xori: + switch (ins) + { + case INS_andi: + case INS_ori: + case INS_xori: + break; + default: + NYI_LOONGARCH64("illegal ins within emitIns_R_R_I --4!"); + } assert((0 <= imm) && (imm <= 0xfff)); } #endif @@ -1342,10 +1275,18 @@ void emitter::emitIns_R_R_I( } else if ((INS_fld_s <= ins) && (ins <= INS_fst_d)) { - // INS_fld_s - // INS_fld_d - // INS_fst_s - // INS_fst_d +#ifdef DEBUG + switch (ins) + { + case INS_fld_s: + case INS_fld_d: + case INS_fst_s: + case INS_fst_d: + break; + default: + NYI_LOONGARCH64("illegal ins within emitIns_R_R_I --5!"); + } +#endif assert(isFloatReg(reg1)); assert(isGeneralRegisterOrR0(reg2)); assert((-2048 <= imm) && (imm <= 2047)); @@ -1356,15 +1297,22 @@ void emitter::emitIns_R_R_I( } else if (((INS_ll_d >= ins) && (ins >= INS_ldptr_w)) || ((INS_sc_d >= ins) && (ins >= INS_stptr_w))) { - // INS_ldptr_w - // INS_ldptr_d - // INS_ll_w - // INS_ll_d - - // INS_stptr_w - // INS_stptr_d - // INS_sc_w - // INS_sc_d +#ifdef DEBUG + switch (ins) + { + case INS_ldptr_w: + case INS_ldptr_d: + case INS_ll_w: + case INS_ll_d: + case INS_stptr_w: + case INS_stptr_d: + case INS_sc_w: + case INS_sc_d: + break; + default: + NYI_LOONGARCH64("illegal ins within emitIns_R_R_I --6!"); + } +#endif assert(isGeneralRegister(reg1)); assert(isGeneralRegisterOrR0(reg2)); assert((-8192 <= imm) && (imm <= 8191)); @@ -1375,12 +1323,20 @@ void emitter::emitIns_R_R_I( } else if ((INS_beq <= ins) && (ins <= INS_bgeu)) { - // INS_beq - // INS_bne - // INS_blt - // INS_bltu - // INS_bge - // INS_bgeu +#ifdef DEBUG + switch (ins) + { + case INS_beq: + case INS_bne: + case INS_blt: + case INS_bltu: + case INS_bge: + case INS_bgeu: + break; + default: + NYI_LOONGARCH64("illegal ins within emitIns_R_R_I --7!"); + } +#endif assert(isGeneralRegisterOrR0(reg1)); assert(isGeneralRegisterOrR0(reg2)); assert(!(imm & 0x3)); @@ -1392,50 +1348,58 @@ void emitter::emitIns_R_R_I( } else if ((INS_fcmp_caf_s <= ins) && (ins <= INS_fcmp_sune_s)) { - // INS_fcmp_caf_s - // INS_fcmp_cun_s - // INS_fcmp_ceq_s - // INS_fcmp_cueq_s - // INS_fcmp_clt_s - // INS_fcmp_cult_s - // INS_fcmp_cle_s - // INS_fcmp_cule_s - // INS_fcmp_cne_s - // INS_fcmp_cor_s - // INS_fcmp_cune_s - // INS_fcmp_saf_d - // INS_fcmp_sun_d - // INS_fcmp_seq_d - // INS_fcmp_sueq_d - // INS_fcmp_slt_d - // INS_fcmp_sult_d - // INS_fcmp_sle_d - // INS_fcmp_sule_d - // INS_fcmp_sne_d - // INS_fcmp_sor_d - // INS_fcmp_sune_d - // INS_fcmp_caf_d - // INS_fcmp_cun_d - // INS_fcmp_ceq_d - // INS_fcmp_cueq_d - // INS_fcmp_clt_d - // INS_fcmp_cult_d - // INS_fcmp_cle_d - // INS_fcmp_cule_d - // INS_fcmp_cne_d - // INS_fcmp_cor_d - // INS_fcmp_cune_d - // INS_fcmp_saf_s - // INS_fcmp_sun_s - // INS_fcmp_seq_s - // INS_fcmp_sueq_s - // INS_fcmp_slt_s - // INS_fcmp_sult_s - // INS_fcmp_sle_s - // INS_fcmp_sule_s - // INS_fcmp_sne_s - // INS_fcmp_sor_s - // INS_fcmp_sune_s +#ifdef DEBUG + switch (ins) + { + case INS_fcmp_caf_s: + case INS_fcmp_cun_s: + case INS_fcmp_ceq_s: + case INS_fcmp_cueq_s: + case INS_fcmp_clt_s: + case INS_fcmp_cult_s: + case INS_fcmp_cle_s: + case INS_fcmp_cule_s: + case INS_fcmp_cne_s: + case INS_fcmp_cor_s: + case INS_fcmp_cune_s: + case INS_fcmp_saf_d: + case INS_fcmp_sun_d: + case INS_fcmp_seq_d: + case INS_fcmp_sueq_d: + case INS_fcmp_slt_d: + case INS_fcmp_sult_d: + case INS_fcmp_sle_d: + case INS_fcmp_sule_d: + case INS_fcmp_sne_d: + case INS_fcmp_sor_d: + case INS_fcmp_sune_d: + case INS_fcmp_caf_d: + case INS_fcmp_cun_d: + case INS_fcmp_ceq_d: + case INS_fcmp_cueq_d: + case INS_fcmp_clt_d: + case INS_fcmp_cult_d: + case INS_fcmp_cle_d: + case INS_fcmp_cule_d: + case INS_fcmp_cne_d: + case INS_fcmp_cor_d: + case INS_fcmp_cune_d: + case INS_fcmp_saf_s: + case INS_fcmp_sun_s: + case INS_fcmp_seq_s: + case INS_fcmp_sueq_s: + case INS_fcmp_slt_s: + case INS_fcmp_sult_s: + case INS_fcmp_sle_s: + case INS_fcmp_sule_s: + case INS_fcmp_sne_s: + case INS_fcmp_sor_s: + case INS_fcmp_sune_s: + break; + default: + NYI_LOONGARCH64("illegal ins within emitIns_R_R_I --8!"); + } +#endif assert(isFloatReg(reg1)); assert(isFloatReg(reg2)); assert((0 <= imm) && (imm <= 7)); @@ -1475,9 +1439,8 @@ void emitter::emitIns_R_R_I( id->idReg1(reg1); id->idReg2(reg2); id->idAddr()->iiaSetInstrEncode(code); - id->idCodeSize(4); - // dispIns(id); + appendToCurIG(id); } @@ -1487,12 +1450,9 @@ void emitter::emitIns_R_R_I( * Also checks for a large immediate that needs a second instruction * and will load it in reg1 * -* - Supports instructions: add, adds, sub, subs, and, ands, eor and orr -* - Requires that reg1 is a general register and not SP or ZR -* - Requires that reg1 != reg2 */ void emitter::emitIns_R_R_Imm(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, ssize_t imm) -{ // maybe optimize. +{ assert(isGeneralRegister(reg1)); assert(reg1 != reg2); @@ -1503,20 +1463,7 @@ void emitter::emitIns_R_R_Imm(instruction ins, emitAttr attr, regNumber reg1, re { case INS_addi_w: case INS_addi_d: - // case INS_lui: - // case INS_lbu: - // case INS_lhu: - // case INS_lwu: - // case INS_lb: - // case INS_lh: - // case INS_lw: case INS_ld_d: - // case INS_sb: - // case INS_sh: - // case INS_sw: - // case INS_sd: - ////case INS_lwc1: - ////case INS_ldc1: immFits = isValidSimm12(imm); break; @@ -1560,123 +1507,131 @@ void emitter::emitIns_R_R_R( if (((INS_add_w <= ins) && (ins <= INS_crcc_w_d_w)) || ((INS_ldx_b <= ins) && (ins <= INS_ldle_d)) || ((INS_stx_b <= ins) && (ins <= INS_stle_d))) { - // case INS_add_w: - // case INS_add_d: - // case INS_sub_w: - // case INS_sub_d: - // case INS_and: - // case INS_or: - // case INS_nor: - // case INS_xor: - // case INS_andn: - // case INS_orn: - - // case INS_mul_w: - // case INS_mul_d: - // case INS_mulh_w: - // case INS_mulh_wu: - // case INS_mulh_d: - // case INS_mulh_du: - // case INS_mulw_d_w: - // case INS_mulw_d_wu: - // case INS_div_w: - // case INS_div_wu: - // case INS_div_d: - // case INS_div_du: - // case INS_mod_w: - // case INS_mod_wu: - // case INS_mod_d: - // case INS_mod_du: - - // case INS_sll_w: - // case INS_srl_w: - // case INS_sra_w: - // case INS_rotr_w: - // case INS_sll_d: - // case INS_srl_d: - // case INS_sra_d: - // case INS_rotr_d: - - // case INS_maskeqz: - // case INS_masknez: - - // case INS_slt: - // case INS_sltu: - - // case INS_ldx_b: - // case INS_ldx_h: - // case INS_ldx_w: - // case INS_ldx_d: - // case INS_ldx_bu: - // case INS_ldx_hu: - // case INS_ldx_wu: - // case INS_stx_b: - // case INS_stx_h: - // case INS_stx_w: - // case INS_stx_d: - - // case INS_ldgt_b: - // case INS_ldgt_h: - // case INS_ldgt_w: - // case INS_ldgt_d: - // case INS_ldle_b: - // case INS_ldle_h: - // case INS_ldle_w: - // case INS_ldle_d: - // case INS_stgt_b: - // case INS_stgt_h: - // case INS_stgt_w: - // case INS_stgt_d: - // case INS_stle_b: - // case INS_stle_h: - // case INS_stle_w: - // case INS_stle_d: - - // case INS_amswap_w: - // case INS_amswap_d: - // case INS_amswap_db_w: - // case INS_amswap_db_d: - // case INS_amadd_w: - // case INS_amadd_d: - // case INS_amadd_db_w: - // case INS_amadd_db_d: - // case INS_amand_w: - // case INS_amand_d: - // case INS_amand_db_w: - // case INS_amand_db_d: - // case INS_amor_w: - // case INS_amor_d: - // case INS_amor_db_w: - // case INS_amor_db_d: - // case INS_amxor_w: - // case INS_amxor_d: - // case INS_amxor_db_w: - // case INS_amxor_db_d: - // case INS_ammax_w: - // case INS_ammax_d: - // case INS_ammax_db_w: - // case INS_ammax_db_d: - // case INS_ammin_w: - // case INS_ammin_d: - // case INS_ammin_db_w: - // case INS_ammin_db_d: - // case INS_ammax_wu: - // case INS_ammax_du: - // case INS_ammax_db_wu: - // case INS_ammax_db_du: - // case INS_ammin_wu: - // case INS_ammin_du: - // case INS_ammin_db_wu: - // case INS_ammin_db_du: - - // case INS_crc_w_b_w: - // case INS_crc_w_h_w: - // case INS_crc_w_w_w: - // case INS_crc_w_d_w: - // case INS_crcc_w_b_w: - // case INS_crcc_w_h_w: - // case INS_crcc_w_w_w: - // case INS_crcc_w_d_w: +#ifdef DEBUG + switch (ins) + { + case INS_add_w: + case INS_add_d: + case INS_sub_w: + case INS_sub_d: + case INS_and: + case INS_or: + case INS_nor: + case INS_xor: + case INS_andn: + case INS_orn: + + case INS_mul_w: + case INS_mul_d: + case INS_mulh_w: + case INS_mulh_wu: + case INS_mulh_d: + case INS_mulh_du: + case INS_mulw_d_w: + case INS_mulw_d_wu: + case INS_div_w: + case INS_div_wu: + case INS_div_d: + case INS_div_du: + case INS_mod_w: + case INS_mod_wu: + case INS_mod_d: + case INS_mod_du: + + case INS_sll_w: + case INS_srl_w: + case INS_sra_w: + case INS_rotr_w: + case INS_sll_d: + case INS_srl_d: + case INS_sra_d: + case INS_rotr_d: + + case INS_maskeqz: + case INS_masknez: + + case INS_slt: + case INS_sltu: + + case INS_ldx_b: + case INS_ldx_h: + case INS_ldx_w: + case INS_ldx_d: + case INS_ldx_bu: + case INS_ldx_hu: + case INS_ldx_wu: + case INS_stx_b: + case INS_stx_h: + case INS_stx_w: + case INS_stx_d: + + case INS_ldgt_b: + case INS_ldgt_h: + case INS_ldgt_w: + case INS_ldgt_d: + case INS_ldle_b: + case INS_ldle_h: + case INS_ldle_w: + case INS_ldle_d: + case INS_stgt_b: + case INS_stgt_h: + case INS_stgt_w: + case INS_stgt_d: + case INS_stle_b: + case INS_stle_h: + case INS_stle_w: + case INS_stle_d: + + case INS_amswap_w: + case INS_amswap_d: + case INS_amswap_db_w: + case INS_amswap_db_d: + case INS_amadd_w: + case INS_amadd_d: + case INS_amadd_db_w: + case INS_amadd_db_d: + case INS_amand_w: + case INS_amand_d: + case INS_amand_db_w: + case INS_amand_db_d: + case INS_amor_w: + case INS_amor_d: + case INS_amor_db_w: + case INS_amor_db_d: + case INS_amxor_w: + case INS_amxor_d: + case INS_amxor_db_w: + case INS_amxor_db_d: + case INS_ammax_w: + case INS_ammax_d: + case INS_ammax_db_w: + case INS_ammax_db_d: + case INS_ammin_w: + case INS_ammin_d: + case INS_ammin_db_w: + case INS_ammin_db_d: + case INS_ammax_wu: + case INS_ammax_du: + case INS_ammax_db_wu: + case INS_ammax_db_du: + case INS_ammin_wu: + case INS_ammin_du: + case INS_ammin_db_wu: + case INS_ammin_db_du: + + case INS_crc_w_b_w: + case INS_crc_w_h_w: + case INS_crc_w_w_w: + case INS_crc_w_d_w: + case INS_crcc_w_b_w: + case INS_crcc_w_h_w: + case INS_crcc_w_w_w: + case INS_crcc_w_d_w: + break; + default: + NYI_LOONGARCH64("illegal ins within emitIns_R_R_R --1!"); + } +#endif assert(isGeneralRegister(reg1)); assert(isGeneralRegisterOrR0(reg2)); assert(isGeneralRegisterOrR0(reg3)); @@ -1687,26 +1642,34 @@ void emitter::emitIns_R_R_R( } else if ((INS_fadd_s <= ins) && (ins <= INS_fcopysign_d)) { - // case INS_fadd_s: - // case INS_fadd_d: - // case INS_fsub_s: - // case INS_fsub_d: - // case INS_fmul_s: - // case INS_fmul_d: - // case INS_fdiv_s: - // case INS_fdiv_d: - // case INS_fmax_s: - // case INS_fmax_d: - // case INS_fmin_s: - // case INS_fmin_d: - // case INS_fmaxa_s: - // case INS_fmaxa_d: - // case INS_fmina_s: - // case INS_fmina_d: - // case INS_fscaleb_s: - // case INS_fscaleb_d: - // case INS_fcopysign_s: - // case INS_fcopysign_d: +#ifdef DEBUG + switch (ins) + { + case INS_fadd_s: + case INS_fadd_d: + case INS_fsub_s: + case INS_fsub_d: + case INS_fmul_s: + case INS_fmul_d: + case INS_fdiv_s: + case INS_fdiv_d: + case INS_fmax_s: + case INS_fmax_d: + case INS_fmin_s: + case INS_fmin_d: + case INS_fmaxa_s: + case INS_fmaxa_d: + case INS_fmina_s: + case INS_fmina_d: + case INS_fscaleb_s: + case INS_fscaleb_d: + case INS_fcopysign_s: + case INS_fcopysign_d: + break; + default: + NYI_LOONGARCH64("illegal ins within emitIns_R_R_R --2!"); + } +#endif assert(isFloatReg(reg1)); assert(isFloatReg(reg2)); assert(isFloatReg(reg3)); @@ -1717,19 +1680,27 @@ void emitter::emitIns_R_R_R( } else if ((INS_fldx_s <= ins) && (ins <= INS_fstle_d)) { - // case INS_fldx_s: - // case INS_fldx_d: - // case INS_fstx_s: - // case INS_fstx_d: - - // case INS_fldgt_s: - // case INS_fldgt_d: - // case INS_fldle_s: - // case INS_fldle_d: - // case INS_fstgt_s: - // case INS_fstgt_d: - // case INS_fstle_s: - // case INS_fstle_d: +#ifdef DEBUG + switch (ins) + { + case INS_fldx_s: + case INS_fldx_d: + case INS_fstx_s: + case INS_fstx_d: + + case INS_fldgt_s: + case INS_fldgt_d: + case INS_fldle_s: + case INS_fldle_d: + case INS_fstgt_s: + case INS_fstgt_d: + case INS_fstle_s: + case INS_fstle_d: + break; + default: + NYI_LOONGARCH64("illegal ins within emitIns_R_R_R --3!"); + } +#endif assert(isFloatReg(reg1)); assert(isGeneralRegisterOrR0(reg2)); assert(isGeneralRegisterOrR0(reg3)); @@ -1740,7 +1711,7 @@ void emitter::emitIns_R_R_R( } else { - assert(!"Unsupported instruction in emitIns_R_R_R"); + NYI_LOONGARCH64("Unsupported instruction in emitIns_R_R_R"); } instrDesc* id = emitNewInstr(attr); @@ -1750,9 +1721,8 @@ void emitter::emitIns_R_R_R( id->idReg2(reg2); id->idReg3(reg3); id->idAddr()->iiaSetInstrEncode(code); - id->idCodeSize(4); - // dispIns(id); + appendToCurIG(id); } @@ -1774,19 +1744,27 @@ void emitter::emitIns_R_R_R_I(instruction ins, if ((INS_alsl_w <= ins) && (ins <= INS_bytepick_w)) { - // INS_alsl_w - // INS_alsl_wu - // INS_alsl_d - // INS_bytepick_w +#ifdef DEBUG + switch (ins) + { + case INS_alsl_w: + case INS_alsl_wu: + case INS_alsl_d: + case INS_bytepick_w: + break; + default: + NYI_LOONGARCH64("illegal ins within emitIns_R_R --4!"); + } +#endif assert(isGeneralRegister(reg1)); assert(isGeneralRegisterOrR0(reg2)); assert(isGeneralRegisterOrR0(reg3)); assert((0 <= imm) && (imm <= 3)); - code |= reg1; // rd - code |= reg2 << 5; // rj - code |= reg3 << 10; // rk - code |= (imm /*& 0x3*/) << 15; // sa2 + code |= reg1; // rd + code |= reg2 << 5; // rj + code |= reg3 << 10; // rk + code |= imm << 15; // sa2 } else if (INS_bytepick_d == ins) { @@ -1795,10 +1773,10 @@ void emitter::emitIns_R_R_R_I(instruction ins, assert(isGeneralRegisterOrR0(reg3)); assert((0 <= imm) && (imm <= 7)); - code |= reg1; // rd - code |= reg2 << 5; // rj - code |= reg3 << 10; // rk - code |= (imm /*& 0x7*/) << 15; // sa3 + code |= reg1; // rd + code |= reg2 << 5; // rj + code |= reg3 << 10; // rk + code |= imm << 15; // sa3 } else if (INS_fsel == ins) { @@ -1807,10 +1785,10 @@ void emitter::emitIns_R_R_R_I(instruction ins, assert(isFloatReg(reg3)); assert((0 <= imm) && (imm <= 7)); - code |= (reg1 & 0x1f); // fd - code |= (reg2 & 0x1f) << 5; // fj - code |= (reg3 & 0x1f) << 10; // fk - code |= (imm /*& 0x7*/) << 15; // ca + code |= (reg1 & 0x1f); // fd + code |= (reg2 & 0x1f) << 5; // fj + code |= (reg3 & 0x1f) << 10; // fk + code |= imm << 15; // ca } else { @@ -1824,9 +1802,8 @@ void emitter::emitIns_R_R_R_I(instruction ins, id->idReg2(reg2); id->idReg3(reg3); id->idAddr()->iiaSetInstrEncode(code); - id->idCodeSize(4); - // dispIns(id); + appendToCurIG(id); } @@ -1870,9 +1847,8 @@ void emitter::emitIns_R_R_I_I( id->idReg1(reg1); id->idReg2(reg2); id->idAddr()->iiaSetInstrEncode(code); - id->idCodeSize(4); - // dispIns(id); + appendToCurIG(id); } @@ -1886,7 +1862,6 @@ void emitter::emitIns_R_R_R_R( { code_t code = emitInsCode(ins); - //#ifdef DEBUG switch (ins) { case INS_fmadd_s: @@ -1910,16 +1885,14 @@ void emitter::emitIns_R_R_R_R( default: unreached(); } - //#endif instrDesc* id = emitNewInstr(attr); id->idIns(ins); id->idReg1(reg1); id->idAddr()->iiaSetInstrEncode(code); - id->idCodeSize(4); - // dispIns(id); + appendToCurIG(id); } @@ -1945,7 +1918,7 @@ void emitter::emitIns_R_C( // pcaddu12i reg, off-hi-20bits // load reg, offs_lo-12bits(reg) #when ins is load ins. // - // INS_OPTS_RC: ins == bl placeholders. 3-ins: ////TODO-LoongArch64: maybe optimize. + // INS_OPTS_RC: ins == bl placeholders. 3-ins: // TODO-LoongArch64: maybe optimize. // lu12i_w reg, addr-hi-20bits // ori reg, reg, addr-lo-12bits // lu32i_d reg, addr_hi-32bits @@ -1992,7 +1965,6 @@ void emitter::emitIns_R_C( id->idAddr()->iiaFieldHnd = fldHnd; - // dispIns(id);//loongarch dumping instr by other-fun. appendToCurIG(id); } @@ -2041,9 +2013,8 @@ void emitter::emitIns_R_AI(instruction ins, } id->idAddr()->iiaAddr = (BYTE*)addr; - id->idCodeSize(8); - // dispIns(id);//loongarch dumping instr by other-fun. + appendToCurIG(id); } @@ -2113,7 +2084,6 @@ void emitter::emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNu } #endif // DEBUG - // dispIns(id); appendToCurIG(id); } @@ -2136,37 +2106,12 @@ void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount) assert(instrCount != 0); assert(ins == INS_b); // when dst==nullptr, ins is INS_b by now. -#if 1 assert((-33554432 <= instrCount) && (instrCount < 33554432)); // 0x2000000. emitIns_I(ins, EA_PTRSIZE, instrCount << 2); // NOTE: instrCount is the number of the instructions. -#else - instrCount = instrCount << 2; - if ((-33554432 <= instrCount) && (instrCount < 33554432)) - { - /* This jump is really short */ - emitIns_I(ins, EA_PTRSIZE, instrCount); - } - else - { - // NOTE: should not be here !!! - assert(!"should not be here on LOONGARCH64 !!!"); - - // emitIns_I(INS_bl, EA_PTRSIZE, 4); - // ssize_t imm = ((ssize_t)instrCount>>12); - // assert(isValidSimm12(imm)); - // emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, imm); - // imm = (instrCount & 0xfffff); - // emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, imm); - - // emitIns_R_R_R(INS_add_d, EA_8BYTE, REG_R21, REG_R21, REG_RA); - // emitIns_R_R_I(INS_jirl, EA_PTRSIZE, REG_R0, REG_R21, 0); - } -#endif return; } - // (dst != nullptr) // // INS_OPTS_J: placeholders. 1-ins: if the dst outof-range will be replaced by INS_OPTS_JIRL. // bceqz/bcnez/beq/bne/blt/bltu/bge/bgeu/beqz/bnez/b/bl dst @@ -2210,7 +2155,7 @@ void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount) #endif id->idCodeSize(4); - // dispIns(id); + appendToCurIG(id); } @@ -2267,7 +2212,7 @@ void emitter::emitIns_J_cond_la(instruction ins, BasicBlock* dst, regNumber reg1 #endif id->idCodeSize(4); - // dispIns(id); + appendToCurIG(id); } @@ -2337,7 +2282,6 @@ void emitter::emitIns_I_la(emitAttr size, regNumber reg, ssize_t imm) id->idAddr()->iiaAddr = (BYTE*)imm; - // dispIns(id); appendToCurIG(id); } @@ -2382,7 +2326,7 @@ void emitter::emitIns_Call(EmitCallType callType, assert(callType < EC_INDIR_R || addr == NULL); assert(callType != EC_INDIR_R || (ireg < REG_COUNT && xreg == REG_NA && xmul == 0 && disp == 0)); - // ARM never uses these + // LoongArch64 never uses these assert(xreg == REG_NA && xmul == 0 && disp == 0); // Our stack level should be always greater than the bytes of arguments we push. Just @@ -2533,7 +2477,6 @@ void emitter::emitIns_Call(EmitCallType callType, } #endif // LATE_DISASM - // dispIns(id); appendToCurIG(id); } @@ -2588,7 +2531,9 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t if (id->idIsCallRegPtr()) { // EC_INDIR_R code = emitInsCode(id->idIns()); - D_INST_JIRL(code, id->idReg4(), id->idReg3(), 0); + code |= (code_t)id->idReg4(); + code |= (code_t)id->idReg3() << 5; + // the offset default is 0; } else if (id->idIsReloc()) { @@ -2600,11 +2545,13 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t *(code_t*)dst = 0x1e00000e; size_t addr = (size_t)(id->idAddr()->iiaAddr); // get addr. - // should assert(addr-dst < 38bits); int reg2 = (int)addr & 1; addr = addr ^ 1; + assert(isValidSimm38(addr - (ssize_t)dst)); + assert((addr & 3) == 0); + dst += 4; #ifdef DEBUG code = emitInsCode(INS_pcaddu18i); @@ -2619,37 +2566,42 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t } else { - // lu12i_w t2, dst_offset_lo32-hi // TODO-LoongArch64: maybe optimize. - // ori t2, t2, dst_offset_lo32-lo - // lu32i_d t2, dst_offset_hi32-lo - // jirl t2 + // lu12i_w t2, dst_offset_lo32-hi // TODO-LoongArch64: maybe optimize. + // ori t2, t2, dst_offset_lo32-lo + // lu32i_d t2, dst_offset_hi32-lo + // jirl t2 ssize_t imm = (ssize_t)(id->idAddr()->iiaAddr); - // assert((imm >> 32) <= 0x7ffff);//In fact max is <= 0xffff. - assert((imm >> 32) == 0xff); // for LA64 addr-is 0xff. but this is not the best !!! + assert((imm >> 32) == 0xff); int reg2 = (int)(imm & 1); imm -= reg2; code = emitInsCode(INS_lu12i_w); - D_INST_lu12i_w(code, REG_T2, imm >> 12); + code |= (code_t)REG_T2; + code |= ((code_t)(imm >> 12) & 0xfffff) << 5; + *(code_t*)dst = code; dst += 4; code = emitInsCode(INS_ori); - D_INST_ori(code, REG_T2, REG_T2, imm); + code |= (code_t)REG_T2; + code |= (code_t)REG_T2 << 5; + code |= (code_t)(imm & 0xfff) << 10; *(code_t*)dst = code; dst += 4; - // emitIns_R_I(INS_lu32i_d, size, REG_T2, imm >> 32); code = emitInsCode(INS_lu32i_d); - // D_INST_lu32i_d(code, REG_T2, imm >> 32); - D_INST_lu32i_d(code, REG_T2, 0xff); + code |= (code_t)REG_T2; + code |= 0xff << 5; + *(code_t*)dst = code; dst += 4; code = emitInsCode(INS_jirl); - D_INST_JIRL(code, reg2, REG_T2, 0); + code |= (code_t)reg2; + code |= (code_t)REG_T2 << 5; + // the offset default is 0; } dst += 4; @@ -2727,6 +2679,442 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t return callInstrSize; } +/***************************************************************************** + * LoongArch64 has an individual implementation for emitJumpDistBind(). + * + * Bind targets of relative jumps/branch to choose the smallest possible encoding. + * LoongArch64 has a small medium, and large encoding. + * + * Even though the small encoding is offset-18bits which lowest 2bits is always 0. + * The small encoding as the default is fit for most cases. + */ + +void emitter::emitJumpDistBind() +{ +#ifdef DEBUG + if (emitComp->verbose) + { + printf("*************** In emitJumpDistBind()\n"); + } + if (EMIT_INSTLIST_VERBOSE) + { + printf("\nInstruction list before jump distance binding:\n\n"); + emitDispIGlist(true); + } +#endif + + instrDescJmp* jmp; + + UNATIVE_OFFSET adjIG; + UNATIVE_OFFSET adjSJ; + insGroup* lstIG; +#ifdef DEBUG + insGroup* prologIG = emitPrologIG; +#endif // DEBUG + + // NOTE: + // bit0 of isLinkingEnd_LA: indicating whether updating the instrDescJmp's size with the type INS_OPTS_J; + // bit1 of isLinkingEnd_LA: indicating not needed updating the size while emitTotalCodeSize <= (0x7fff << 2) or had + // updated; + unsigned int isLinkingEnd_LA = emitTotalCodeSize <= (0x7fff << 2) ? 2 : 0; + + UNATIVE_OFFSET ssz = 0; // relative small jump's delay-slot. + // small jump max. neg distance + NATIVE_OFFSET nsd = B_DIST_SMALL_MAX_NEG; + // small jump max. pos distance + NATIVE_OFFSET psd = + B_DIST_SMALL_MAX_POS - + emitCounts_INS_OPTS_J * (3 << 2); // the max placeholder sizeof(INS_OPTS_JIRL) - sizeof(INS_OPTS_J). + +/*****************************************************************************/ +/* If the default small encoding is not enough, we start again here. */ +/*****************************************************************************/ + +AGAIN: + +#ifdef DEBUG + emitCheckIGoffsets(); +#endif + +#ifdef DEBUG + insGroup* lastIG = nullptr; + instrDescJmp* lastSJ = nullptr; +#endif + + lstIG = nullptr; + adjSJ = 0; + adjIG = 0; + + for (jmp = emitJumpList; jmp; jmp = jmp->idjNext) + { + insGroup* jmpIG; + insGroup* tgtIG; + + UNATIVE_OFFSET jsz; // size of the jump instruction in bytes + + NATIVE_OFFSET extra; // How far beyond the short jump range is this jump offset? + UNATIVE_OFFSET srcInstrOffs; // offset of the source instruction of the jump + UNATIVE_OFFSET srcEncodingOffs; // offset of the source used by the instruction set to calculate the relative + // offset of the jump + UNATIVE_OFFSET dstOffs; + NATIVE_OFFSET jmpDist; // the relative jump distance, as it will be encoded + +/* Make sure the jumps are properly ordered */ + +#ifdef DEBUG + assert(lastSJ == nullptr || lastIG != jmp->idjIG || lastSJ->idjOffs < (jmp->idjOffs + adjSJ)); + lastSJ = (lastIG == jmp->idjIG) ? jmp : nullptr; + + assert(lastIG == nullptr || lastIG->igNum <= jmp->idjIG->igNum || jmp->idjIG == prologIG || + emitNxtIGnum > unsigned(0xFFFF)); // igNum might overflow + lastIG = jmp->idjIG; +#endif // DEBUG + + /* Get hold of the current jump size */ + + jsz = jmp->idCodeSize(); + + /* Get the group the jump is in */ + + jmpIG = jmp->idjIG; + + /* Are we in a group different from the previous jump? */ + + if (lstIG != jmpIG) + { + /* Were there any jumps before this one? */ + + if (lstIG) + { + /* Adjust the offsets of the intervening blocks */ + + do + { + lstIG = lstIG->igNext; + assert(lstIG); +#ifdef DEBUG + if (EMITVERBOSE) + { + printf("Adjusted offset of " FMT_BB " from %04X to %04X\n", lstIG->igNum, lstIG->igOffs, + lstIG->igOffs + adjIG); + } +#endif // DEBUG + lstIG->igOffs += adjIG; + assert(IsCodeAligned(lstIG->igOffs)); + } while (lstIG != jmpIG); + } + + /* We've got the first jump in a new group */ + adjSJ = 0; + lstIG = jmpIG; + } + + /* Apply any local size adjustment to the jump's relative offset */ + jmp->idjOffs += adjSJ; + + // If this is a jump via register, the instruction size does not change, so we are done. + CLANG_FORMAT_COMMENT_ANCHOR; + + /* Have we bound this jump's target already? */ + + if (jmp->idIsBound()) + { + /* Does the jump already have the smallest size? */ + + if (jmp->idjShort) + { + // We should not be jumping/branching across funclets/functions + emitCheckFuncletBranch(jmp, jmpIG); + + continue; + } + + tgtIG = jmp->idAddr()->iiaIGlabel; + } + else + { + /* First time we've seen this label, convert its target */ + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef DEBUG + if (EMITVERBOSE) + { + printf("Binding: "); + emitDispIns(jmp, false, false, false); + printf("Binding L_M%03u_" FMT_BB, emitComp->compMethodID, jmp->idAddr()->iiaBBlabel->bbNum); + } +#endif // DEBUG + + tgtIG = (insGroup*)emitCodeGetCookie(jmp->idAddr()->iiaBBlabel); + +#ifdef DEBUG + if (EMITVERBOSE) + { + if (tgtIG) + { + printf(" to %s\n", emitLabelString(tgtIG)); + } + else + { + printf("-- ERROR, no emitter cookie for " FMT_BB "; it is probably missing BBF_HAS_LABEL.\n", + jmp->idAddr()->iiaBBlabel->bbNum); + } + } + assert(tgtIG); +#endif // DEBUG + + /* Record the bound target */ + + jmp->idAddr()->iiaIGlabel = tgtIG; + jmp->idSetIsBound(); + } + + // We should not be jumping/branching across funclets/functions + emitCheckFuncletBranch(jmp, jmpIG); + + /* + In the following distance calculations, if we're not actually + scheduling the code (i.e. reordering instructions), we can + use the actual offset of the jump (rather than the beg/end of + the instruction group) since the jump will not be moved around + and thus its offset is accurate. + + First we need to figure out whether this jump is a forward or + backward one; to do this we simply look at the ordinals of the + group that contains the jump and the target. + */ + + srcInstrOffs = jmpIG->igOffs + jmp->idjOffs; + + /* Note that the destination is always the beginning of an IG, so no need for an offset inside it */ + dstOffs = tgtIG->igOffs; + + srcEncodingOffs = srcInstrOffs + ssz; // Encoding offset of relative offset for small branch + + if (jmpIG->igNum < tgtIG->igNum) + { + /* Forward jump */ + + /* Adjust the target offset by the current delta. This is a worst-case estimate, as jumps between + here and the target could be shortened, causing the actual distance to shrink. + */ + + dstOffs += adjIG; + + /* Compute the distance estimate */ + + jmpDist = dstOffs - srcEncodingOffs; + + /* How much beyond the max. short distance does the jump go? */ + + extra = jmpDist - psd; + +#if DEBUG_EMIT + assert(jmp->idDebugOnlyInfo() != nullptr); + if (jmp->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0) + { + if (INTERESTING_JUMP_NUM == 0) + { + printf("[1] Jump %u:\n", jmp->idDebugOnlyInfo()->idNum); + } + printf("[1] Jump block is at %08X\n", jmpIG->igOffs); + printf("[1] Jump reloffset is %04X\n", jmp->idjOffs); + printf("[1] Jump source is at %08X\n", srcEncodingOffs); + printf("[1] Label block is at %08X\n", dstOffs); + printf("[1] Jump dist. is %04X\n", jmpDist); + if (extra > 0) + { + printf("[1] Dist excess [S] = %d \n", extra); + } + } + if (EMITVERBOSE) + { + printf("Estimate of fwd jump [%08X/%03u]: %04X -> %04X = %04X\n", dspPtr(jmp), + jmp->idDebugOnlyInfo()->idNum, srcInstrOffs, dstOffs, jmpDist); + } +#endif // DEBUG_EMIT + + assert(jmpDist >= 0); // Forward jump + assert(!(jmpDist & 0x3)); + + if (isLinkingEnd_LA & 0x2) + { + jmp->idAddr()->iiaSetJmpOffset(jmpDist); + } + else if ((extra > 0) && (jmp->idInsOpt() == INS_OPTS_J)) + { + instruction ins = jmp->idIns(); + assert((INS_bceqz <= ins) && (ins <= INS_bl)); + + if (ins < + INS_beqz) // bceqz/bcnez/beq/bne/blt/bltu/bge/bgeu < beqz < bnez // See instrsloongarch64.h. + { + if ((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000) + { + extra = 4; + } + else + { + assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000); + extra = 8; + } + } + else if (ins < INS_b) // beqz/bnez < b < bl // See instrsloongarch64.h. + { + if (jmpDist + emitCounts_INS_OPTS_J * 4 < 0x200000) + continue; + + extra = 4; + assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000); + } + else + { + assert(ins == INS_b || ins == INS_bl); + assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000); + continue; + } + + jmp->idInsOpt(INS_OPTS_JIRL); + jmp->idCodeSize(jmp->idCodeSize() + extra); + jmpIG->igSize += (unsigned short)extra; // the placeholder sizeof(INS_OPTS_JIRL) - sizeof(INS_OPTS_J). + adjSJ += (UNATIVE_OFFSET)extra; + adjIG += (UNATIVE_OFFSET)extra; + emitTotalCodeSize += (UNATIVE_OFFSET)extra; + jmpIG->igFlags |= IGF_UPD_ISZ; + isLinkingEnd_LA |= 0x1; + } + continue; + } + else + { + /* Backward jump */ + + /* Compute the distance estimate */ + + jmpDist = srcEncodingOffs - dstOffs; + + /* How much beyond the max. short distance does the jump go? */ + + extra = jmpDist + nsd; + +#if DEBUG_EMIT + assert(jmp->idDebugOnlyInfo() != nullptr); + if (jmp->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0) + { + if (INTERESTING_JUMP_NUM == 0) + { + printf("[2] Jump %u:\n", jmp->idDebugOnlyInfo()->idNum); + } + printf("[2] Jump block is at %08X\n", jmpIG->igOffs); + printf("[2] Jump reloffset is %04X\n", jmp->idjOffs); + printf("[2] Jump source is at %08X\n", srcEncodingOffs); + printf("[2] Label block is at %08X\n", dstOffs); + printf("[2] Jump dist. is %04X\n", jmpDist); + if (extra > 0) + { + printf("[2] Dist excess [S] = %d \n", extra); + } + } + if (EMITVERBOSE) + { + printf("Estimate of bwd jump [%08X/%03u]: %04X -> %04X = %04X\n", dspPtr(jmp), + jmp->idDebugOnlyInfo()->idNum, srcInstrOffs, dstOffs, jmpDist); + } +#endif // DEBUG_EMIT + + assert(jmpDist >= 0); // Backward jump + assert(!(jmpDist & 0x3)); + + if (isLinkingEnd_LA & 0x2) + { + jmp->idAddr()->iiaSetJmpOffset(-jmpDist); // Backward jump is negative! + } + else if ((extra > 0) && (jmp->idInsOpt() == INS_OPTS_J)) + { + instruction ins = jmp->idIns(); + assert((INS_bceqz <= ins) && (ins <= INS_bl)); + + if (ins < + INS_beqz) // bceqz/bcnez/beq/bne/blt/bltu/bge/bgeu < beqz < bnez // See instrsloongarch64.h. + { + if ((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000) + { + extra = 4; + } + else + { + assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000); + extra = 8; + } + } + else if (ins < INS_b) // beqz/bnez < b < bl // See instrsloongarch64.h. + { + if (jmpDist + emitCounts_INS_OPTS_J * 4 < 0x200000) + continue; + + extra = 4; + assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000); + } + else + { + assert(ins == INS_b || ins == INS_bl); + assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000); + continue; + } + + jmp->idInsOpt(INS_OPTS_JIRL); + jmp->idCodeSize(jmp->idCodeSize() + extra); + jmpIG->igSize += (unsigned short)extra; // the placeholder sizeof(INS_OPTS_JIRL) - sizeof(INS_OPTS_J). + adjSJ += (UNATIVE_OFFSET)extra; + adjIG += (UNATIVE_OFFSET)extra; + emitTotalCodeSize += (UNATIVE_OFFSET)extra; + jmpIG->igFlags |= IGF_UPD_ISZ; + isLinkingEnd_LA |= 0x1; + } + continue; + } + } // end for each jump + + if ((isLinkingEnd_LA & 0x3) < 0x2) + { + // indicating the instrDescJmp's size of the type INS_OPTS_J had updated + // after the first round and should iterate again to update. + isLinkingEnd_LA = 0x2; + + // Adjust offsets of any remaining blocks. + for (; lstIG;) + { + lstIG = lstIG->igNext; + if (!lstIG) + { + break; + } +#ifdef DEBUG + if (EMITVERBOSE) + { + printf("Adjusted offset of " FMT_BB " from %04X to %04X\n", lstIG->igNum, lstIG->igOffs, + lstIG->igOffs + adjIG); + } +#endif // DEBUG + + lstIG->igOffs += adjIG; + + assert(IsCodeAligned(lstIG->igOffs)); + } + goto AGAIN; + } + +#ifdef DEBUG + if (EMIT_INSTLIST_VERBOSE) + { + printf("\nLabels list after the jump dist binding:\n\n"); + emitDispIGlist(false); + } + + emitCheckIGoffsets(); +#endif // DEBUG +} + /***************************************************************************** * * Emit a 32-bit LOONGARCH64 instruction @@ -2831,77 +3219,96 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) { ssize_t imm = (ssize_t)(id->idAddr()->iiaAddr); regNumber reg1 = id->idReg1(); - dst2 += 4; // assert(dst2 == dst); + dst2 += 4; switch (id->idCodeSize()) { - case 8: // if (id->idCodeSize() == 8) + case 8: { if (id->idReg2()) { // special for INT64_MAX or UINT32_MAX; code = emitInsCode(INS_addi_d); - // emitIns_R_R_I(INS_addi_d, size, reg, REG_R0, -1); - D_INST_2RI12(code, reg1, REG_R0, -1); + code |= (code_t)reg1; + code |= (code_t)REG_R0; + code |= 0xfff << 10; + *(code_t*)dst = code; dst += 4; ssize_t ui6 = (imm == INT64_MAX) ? 1 : 32; code = emitInsCode(INS_srli_d); - // emitIns_R_R_I(INS_srli_d, size, reg, reg, ui6); code |= ((code_t)reg1 | ((code_t)reg1 << 5) | (ui6 << 10)); *(code_t*)dst = code; } else { code = emitInsCode(INS_lu12i_w); - D_INST_lu12i_w(code, reg1, imm >> 12); + code |= (code_t)reg1; + code |= ((code_t)(imm >> 12) & 0xfffff) << 5; + *(code_t*)dst = code; dst += 4; code = emitInsCode(INS_ori); - D_INST_ori(code, reg1, reg1, imm); + code |= (code_t)reg1; + code |= (code_t)reg1 << 5; + code |= (code_t)(imm & 0xfff) << 10; *(code_t*)dst = code; } break; } - case 12: // else if (id->idCodeSize() == 12) + case 12: { code = emitInsCode(INS_lu12i_w); - D_INST_lu12i_w(code, reg1, imm >> 12); + code |= (code_t)reg1; + code |= ((code_t)(imm >> 12) & 0xfffff) << 5; + *(code_t*)dst = code; dst += 4; code = emitInsCode(INS_ori); - D_INST_ori(code, reg1, reg1, imm); + code |= (code_t)reg1; + code |= (code_t)reg1 << 5; + code |= (code_t)(imm & 0xfff) << 10; *(code_t*)dst = code; dst += 4; code = emitInsCode(INS_lu32i_d); - // emitIns_R_I(INS_lu32i_d, size, reg, (imm>>32)); - D_INST_lu32i_d(code, reg1, imm >> 32); + code |= (code_t)reg1; + code |= ((code_t)(imm >> 32) & 0xfffff) << 5; + *(code_t*)dst = code; break; } - case 16: // else if (id->idCodeSize() == 16) + case 16: { code = emitInsCode(INS_lu12i_w); - D_INST_lu12i_w(code, reg1, imm >> 12); + code |= (code_t)reg1; + code |= ((code_t)(imm >> 12) & 0xfffff) << 5; + *(code_t*)dst = code; dst += 4; code = emitInsCode(INS_ori); - D_INST_ori(code, reg1, reg1, imm); + code |= (code_t)reg1; + code |= (code_t)reg1 << 5; + code |= (code_t)(imm & 0xfff) << 10; *(code_t*)dst = code; dst += 4; code = emitInsCode(INS_lu32i_d); - D_INST_lu32i_d(code, reg1, imm >> 32); + code |= (code_t)reg1; + code |= (code_t)(imm >> 32) << 5; + *(code_t*)dst = code; dst += 4; code = emitInsCode(INS_lu52i_d); - D_INST_lu52i_d(code, reg1, reg1, imm >> 52); + code |= (code_t)reg1; + code |= (code_t)(reg1) << 5; + code |= ((code_t)(imm >> 52) & 0xfff) << 10; + *(code_t*)dst = code; break; @@ -2988,7 +3395,9 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) else { code = emitInsCode(ins); - D_INST_LS(code, (reg1 & 0x1f), REG_R21, doff); // NOTE:here must be REG_R21 !!! + code |= (code_t)(reg1 & 0x1f); + code |= (code_t)REG_R21 << 5; // NOTE:here must be REG_R21 !!! + code |= (code_t)(doff & 0xfff) << 10; *(code_t*)dst = code; } dst += 4; @@ -3003,23 +3412,27 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) if (ins == INS_bl) { assert((imm >> 32) == 0xff); - // assert((imm >> 32) <= 0x7ffff); doff = (int)imm >> 12; - D_INST_lu12i_w(code, REG_R21, doff); + code |= (code_t)REG_R21; + code |= ((code_t)doff & 0xfffff) << 5; + *(code_t*)dst = code; dst += 4; code = emitInsCode(INS_ori); - D_INST_ori(code, reg1, REG_R21, imm); + code |= (code_t)reg1; + code |= (code_t)REG_R21 << 5; + code |= (code_t)(imm & 0xfff) << 10; *(code_t*)dst = code; dst += 4; dst2 = dst; ins = INS_lu32i_d; code = emitInsCode(INS_lu32i_d); - // D_INST_lu32i_d(code, reg1, imm >> 32); - D_INST_lu32i_d(code, reg1, 0xff); + code |= (code_t)reg1; + code |= 0xff << 5; + *(code_t*)dst = code; dst += 4; } @@ -3030,22 +3443,26 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) doff = (int)(imm & 0x7ff) - doff; // addr-lo-12bit. assert((imm >> 32) == 0xff); - // assert((imm >> 32) <= 0x7ffff); dataOffs = (unsigned)(imm >> 12); // addr-hi-20bits. - D_INST_lu12i_w(code, REG_R21, dataOffs); + code |= (code_t)REG_R21; + code |= ((code_t)dataOffs & 0xfffff) << 5; + *(code_t*)dst = code; dst += 4; - // emitIns_R_I(INS_lu32i_d, size, REG_R21, imm >> 32); code = emitInsCode(INS_lu32i_d); - // D_INST_lu32i_d(code, REG_R21, imm >> 32); - D_INST_lu32i_d(code, REG_R21, 0xff); + code |= (code_t)REG_R21; + code |= 0xff << 5; + *(code_t*)dst = code; dst += 4; code = emitInsCode(ins); - D_INST_LS(code, (reg1 & 0x1f), REG_R21, doff); + code |= (code_t)(reg1 & 0x1f); + code |= (code_t)REG_R21 << 5; + code |= (code_t)(doff & 0xfff) << 10; + *(code_t*)dst = code; dst += 4; dst2 = dst; @@ -3101,25 +3518,28 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) else { ssize_t imm = (ssize_t)tgtIG->igOffs + (ssize_t)emitCodeBlock; - // assert((imm >> 32) <= 0x7ffff);//In fact max is <= 0xffff assert((imm >> 32) == 0xff); code = emitInsCode(INS_lu12i_w); - D_INST_lu12i_w(code, REG_R21, imm >> 12); + code |= (code_t)REG_R21; + code |= ((code_t)(imm >> 12) & 0xfffff) << 5; + *(code_t*)dst = code; dst += 4; code = emitInsCode(INS_ori); - D_INST_ori(code, reg1, REG_R21, imm); + code |= (code_t)reg1; + code |= (code_t)REG_R21 << 5; + code |= (code_t)(imm & 0xfff) << 10; *(code_t*)dst = code; dst += 4; dst2 = dst; - ins = INS_lu32i_d; - // emitIns_R_I(INS_lu32i_d, size, reg1, 0xff); + ins = INS_lu32i_d; code = emitInsCode(INS_lu32i_d); - // D_INST_lu32i_d(code, reg1, imm >> 32); - D_INST_lu32i_d(code, reg1, 0xff); + code |= (code_t)reg1; + code |= 0xff << 5; + *(code_t*)dst = code; } @@ -3155,6 +3575,8 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) ssize_t imm = (ssize_t)id->idAddr()->iiaGetJmpOffset(); imm -= 4; + assert((imm & 0x3) == 0); + ins = jmp->idIns(); assert(jmp->idCodeSize() > 4); // The original INS_OPTS_JIRL: not used by now!!! switch (jmp->idCodeSize()) @@ -3163,22 +3585,28 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) { regNumber reg2 = id->idReg2(); assert((INS_bceqz <= ins) && (ins <= INS_bgeu)); - // assert((INS_bceqz <= ins) && (ins <= INS_bl)); // TODO-LoongArch64 + if ((INS_beq == ins) || (INS_bne == ins)) { if ((-0x400000 <= imm) && (imm < 0x400000)) { code = emitInsCode(INS_xor); - D_INST_3R(code, REG_R21, reg1, reg2); + code |= (code_t)REG_R21; + code |= (code_t)reg1 << 5; + code |= (code_t)reg2 << 10; + *(code_t*)dst = code; dst += 4; code = emitInsCode(ins == INS_beq ? INS_beqz : INS_bnez); - D_INST_Bcond_Z(code, REG_R21, imm); + code |= (code_t)REG_R21 << 5; + code |= (((code_t)imm << 8) & 0x3fffc00); + code |= (((code_t)imm >> 18) & 0x1f); + *(code_t*)dst = code; dst += 4; } - else // if ((-0x8000000 <= imm) && (imm < 0x8000000)) + else { assert((-0x8000000 <= imm) && (imm < 0x8000000)); assert((INS_bne & 0xfffe) == INS_beq); @@ -3191,12 +3619,12 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += 4; code = emitInsCode(INS_b); - D_INST_B(code, imm); + code |= ((code_t)imm >> 18) & 0x3ff; + code |= ((code_t)imm << 8) & 0x3fffc00; + *(code_t*)dst = code; dst += 4; } - // else - // unreached(); } else if ((INS_bceqz == ins) || (INS_bcnez == ins)) { @@ -3204,13 +3632,15 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) assert((INS_bcnez & 0xfffe) == INS_bceqz); code = emitInsCode((instruction)((int)ins ^ 0x1)); - code |= ((code_t)reg1) << 5; /* rj */ + code |= ((code_t)reg1) << 5; code |= 0x800; *(code_t*)dst = code; dst += 4; code = emitInsCode(INS_b); - D_INST_B(code, imm); + code |= ((code_t)imm >> 18) & 0x3ff; + code |= ((code_t)imm << 8) & 0x3fffc00; + *(code_t*)dst = code; dst += 4; } @@ -3228,13 +3658,15 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += 4; code = emitInsCode(INS_b); - D_INST_B(code, imm); + code |= ((code_t)imm >> 18) & 0x3ff; + code |= ((code_t)imm << 8) & 0x3fffc00; + *(code_t*)dst = code; dst += 4; } break; } - // case 12: + default: unreached(); break; @@ -3256,7 +3688,10 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) ins = id->idIns(); code = emitInsCode(ins); - D_INST_Bcond(code, id->idReg1(), id->idReg2(), imm); + code |= ((code_t)id->idReg1()) << 5; + code |= ((code_t)id->idReg2()); + code |= (((code_t)imm << 8) & 0x3fffc00); + *(code_t*)dst = code; dst += 4; @@ -3267,31 +3702,39 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) // bceqz/bcnez/beq/bne/blt/bltu/bge/bgeu/beqz/bnez/b/bl dst-relative. { ssize_t imm = (ssize_t)id->idAddr()->iiaGetJmpOffset(); // get jmp's offset relative delay-slot. - assert(!(imm & 3)); + assert((imm & 3) == 0); ins = id->idIns(); code = emitInsCode(ins); if (ins == INS_b || ins == INS_bl) { - D_INST_B(code, imm); + code |= ((code_t)imm >> 18) & 0x3ff; + code |= ((code_t)imm << 8) & 0x3fffc00; } else if (ins == INS_bnez || ins == INS_beqz) { - D_INST_Bcond_Z(code, id->idReg1(), imm); + code |= (code_t)id->idReg1() << 5; + code |= (((code_t)imm << 8) & 0x3fffc00); + code |= (((code_t)imm >> 18) & 0x1f); } else if (ins == INS_bcnez || ins == INS_bceqz) { assert((code_t)(id->idReg1()) < 8); // cc - D_INST_Bcond_Z(code, id->idReg1(), imm); + code |= (code_t)id->idReg1() << 5; + code |= (((code_t)imm << 8) & 0x3fffc00); + code |= (((code_t)imm >> 18) & 0x1f); } else if ((INS_beq <= ins) && (ins <= INS_bgeu)) { - D_INST_Bcond(code, id->idReg1(), id->idReg2(), imm); + code |= ((code_t)id->idReg1()) << 5; + code |= ((code_t)id->idReg2()); + code |= (((code_t)imm << 8) & 0x3fffc00); } else { assert(!"unimplemented on LOONGARCH yet"); } + *(code_t*)dst = code; dst += 4; @@ -3341,20 +3784,6 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) { emitGCregDeadUpd(id->idReg1(), dst2); } - - // if (emitInsMayWriteMultipleRegs(id)) - //{ - // // INS_gslq etc... - // // "idReg2" is the secondary destination register - // if (id->idGCrefReg2() != GCT_NONE) - // { - // emitGCregLiveUpd(id->idGCrefReg2(), id->idReg2(), *dp); - // } - // else - // { - // emitGCregDeadUpd(id->idReg2(), *dp); - // } - //} } // Now we determine if the instruction has written to a (local variable) stack location, and either written a GC @@ -3455,6 +3884,14 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) #ifdef DEBUG +// clang-format off +static const char* const RegNames[] = +{ + #define REGDEF(name, rnum, mask, sname) sname, + #include "register.h" +}; +// clang-format on + /**************************************************************************** * * Display the given instruction. @@ -3472,13 +3909,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) return; } - // clang-format off - const char * const regName[] = {"zero", "ra", "tp", "sp", "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "x0", "fp", "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8"}; - - const char * const FregName[] = {"fa0", "fa1", "fa2", "fa3", "fa4", "fa5", "fa6", "fa7", "ft0", "ft1", "ft2", "ft3", "ft4", "ft5", "ft6", "ft7", "ft8", "ft9", "ft10", "ft11", "ft12", "ft13", "ft14", "ft15", "fs0", "fs1", "fs2", "fs3", "fs4", "fs5", "fs6", "fs7"}; - - const char * const CFregName[] = {"fcc0", "fcc1", "fcc2", "fcc3", "fcc4", "fcc5", "fcc6", "fcc7"}; - // clang-format on + const char* const CFregName[] = {"fcc0", "fcc1", "fcc2", "fcc3", "fcc4", "fcc5", "fcc6", "fcc7"}; unsigned int opcode = (code >> 26) & 0x3f; @@ -3507,8 +3938,8 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) } case LA_2RI16_ADDU16I_D: // 0x4 { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; short si16 = (code >> 10) & 0xffff; printf(" 0x%llx addu16i.d %s, %s, %d\n", insstrs, rd, rj, si16); return; @@ -3519,7 +3950,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) { // bits: 31-25,MSB7 unsigned int inscode = (code >> 25) & 0x7f; - const char* rd = regName[code & 0x1f]; + const char* rd = RegNames[code & 0x1f]; unsigned int si20 = (code >> 5) & 0xfffff; switch (inscode) { @@ -3554,8 +3985,8 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) { // bits: 31-24,MSB8 unsigned int inscode = (code >> 24) & 0xff; - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; short si14 = ((code >> 10) & 0x3fff) << 2; si14 >>= 2; switch (inscode) @@ -3594,9 +4025,9 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) { // bits: 31-24,MSB8 unsigned int inscode = (code >> 22) & 0x3ff; - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* fd = FregName[code & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* fd = RegNames[(code & 0x1f) + 32]; short si12 = ((code >> 10) & 0xfff) << 4; si12 >>= 4; switch (inscode) @@ -3657,7 +4088,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) } case LA_1RI21_BEQZ: // 0x10 { - const char* rj = regName[(code >> 5) & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; int offs21 = (((code >> 10) & 0xffff) | ((code & 0x1f) << 16)) << 11; offs21 >>= 9; printf(" 0x%llx beqz %s, 0x%llx\n", insstrs, rj, (int64_t)insstrs + offs21); @@ -3665,7 +4096,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) } case LA_1RI21_BNEZ: // 0x11 { - const char* rj = regName[(code >> 5) & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; int offs21 = (((code >> 10) & 0xffff) | ((code & 0x1f) << 16)) << 11; offs21 >>= 9; printf(" 0x%llx bnez %s, 0x%llx\n", insstrs, rj, (int64_t)insstrs + offs21); @@ -3697,8 +4128,8 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) } case LA_2RI16_JIRL: // 0x13 { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; int offs16 = (short)((code >> 10) & 0xffff); offs16 <<= 2; if (id->idDebugOnlyInfo()->idMemCookie) @@ -3730,8 +4161,8 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) } case LA_2RI16_BEQ: // 0x16 { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; int offs16 = (short)((code >> 10) & 0xffff); offs16 <<= 2; printf(" 0x%llx beq %s, %s, 0x%llx\n", insstrs, rj, rd, (int64_t)insstrs + offs16); @@ -3739,8 +4170,8 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) } case LA_2RI16_BNE: // 0x17 { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; int offs16 = (short)((code >> 10) & 0xffff); offs16 <<= 2; printf(" 0x%llx bne %s, %s, 0x%llx\n", insstrs, rj, rd, (int64_t)insstrs + offs16); @@ -3748,8 +4179,8 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) } case LA_2RI16_BLT: // 0x18 { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; int offs16 = (short)((code >> 10) & 0xffff); offs16 <<= 2; printf(" 0x%llx blt %s, %s, 0x%llx\n", insstrs, rj, rd, (int64_t)insstrs + offs16); @@ -3757,8 +4188,8 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) } case LA_2RI16_BGE: // 0x19 { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; int offs16 = (short)((code >> 10) & 0xffff); offs16 <<= 2; printf(" 0x%llx bge %s, %s, 0x%llx\n", insstrs, rj, rd, (int64_t)insstrs + offs16); @@ -3766,8 +4197,8 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) } case LA_2RI16_BLTU: // 0x1a { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; int offs16 = (short)((code >> 10) & 0xffff); offs16 <<= 2; printf(" 0x%llx bltu %s, %s, 0x%llx\n", insstrs, rj, rd, (int64_t)insstrs + offs16); @@ -3775,8 +4206,8 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) } case LA_2RI16_BGEU: // 0x1b { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; int offs16 = (short)((code >> 10) & 0xffff); offs16 <<= 2; printf(" 0x%llx bgeu %s, %s, 0x%llx\n", insstrs, rj, rd, (int64_t)insstrs + offs16); @@ -3810,8 +4241,8 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) { // bits:31-10,MSB22 unsigned int inscode3 = (code >> 10) & 0x3fffff; - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; switch (inscode3) { case LA_2R_CLO_W: @@ -3895,15 +4326,15 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) } case LA_2R_ASRTLE_D: { - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx asrtle.d %s, %s\n", insstrs, rj, rk); return; } case LA_2R_ASRTGT_D: { - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx asrtgt.d %s, %s\n", insstrs, rj, rk); return; } @@ -3917,9 +4348,9 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) { // LA_OP_ALSL_W // LA_OP_ALSL_WU - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; unsigned int sa2 = (code >> 15) & 0x3; if (0 == ((code >> 17) & 0x1)) { @@ -3940,18 +4371,18 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) } case LA_OP_BYTEPICK_W: // 0x2 { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; unsigned int sa2 = (code >> 15) & 0x3; printf(" 0x%llx bytepick.w %s, %s, %s, %d\n", insstrs, rd, rj, rk, sa2); return; } case LA_OP_BYTEPICK_D: // 0x3 { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; unsigned int sa3 = (code >> 15) & 0x7; printf(" 0x%llx bytepick.d %s, %s, %s, %d\n", insstrs, rd, rj, rk, sa3); return; @@ -3965,9 +4396,9 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) { // bits: 31-15,MSB17 unsigned int inscode2 = (code >> 15) & 0x1ffff; - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; switch (inscode2) { @@ -4137,9 +4568,9 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) } case LA_OP_ALSL_D: // 0xb { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; unsigned int sa2 = (code >> 15) & 0x3; printf(" 0x%llx alsl.d %s, %s, %s, %d\n", insstrs, rd, rj, rk, (sa2 + 1)); return; @@ -4156,8 +4587,8 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) { // LA_OP_BSTRINS_W // LA_OP_BSTRPICK_W - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; unsigned int lsbw = (code >> 10) & 0x1f; unsigned int msbw = (code >> 16) & 0x1f; if (!(code & 0x8000)) @@ -4186,8 +4617,8 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) { // LA_OP_SLLI_W: // LA_OP_SLLI_D: - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; if (1 == ((code >> 15) & 0x7)) { unsigned int ui5 = (code >> 10) & 0x1f; @@ -4211,8 +4642,8 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) { // LA_OP_SRLI_W: // LA_OP_SRLI_D: - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; if (1 == ((code >> 15) & 0x7)) { unsigned int ui5 = (code >> 10) & 0x1f; @@ -4236,8 +4667,8 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) { // LA_OP_SRAI_W: // LA_OP_SRAI_D: - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; if (1 == ((code >> 15) & 0x7)) { unsigned int ui5 = (code >> 10) & 0x1f; @@ -4261,8 +4692,8 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) { // LA_OP_ROTRI_W: // LA_OP_ROTRI_D: - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; if (1 == ((code >> 15) & 0x7)) { unsigned int ui5 = (code >> 10) & 0x1f; @@ -4292,8 +4723,8 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) } case LA_OP_BSTRINS_D: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; unsigned int lsbd = (code >> 10) & 0x3f; unsigned int msbd = (code >> 16) & 0x3f; printf(" 0x%llx bstrins.d %s, %s, %d, %d\n", insstrs, rd, rj, msbd, lsbd); @@ -4301,8 +4732,8 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) } case LA_OP_BSTRPICK_D: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; unsigned int lsbd = (code >> 10) & 0x3f; unsigned int msbd = (code >> 16) & 0x3f; printf(" 0x%llx bstrpick.d %s, %s, %d, %d\n", insstrs, rd, rj, msbd, lsbd); @@ -4312,11 +4743,11 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) { // bits: 31-15,MSB17 unsigned int inscode1 = (code >> 15) & 0x1ffff; - const char* fd = FregName[code & 0x1f]; - const char* fj = FregName[(code >> 5) & 0x1f]; - const char* fk = FregName[(code >> 10) & 0x1f]; - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; + const char* fd = RegNames[(code & 0x1f) + 32]; + const char* fj = RegNames[((code >> 5) & 0x1f) + 32]; + const char* fk = RegNames[((code >> 10) & 0x1f) + 32]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; switch (inscode1) { @@ -4588,8 +5019,8 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) } case LA_2RI12_SLTI: // 0x8 { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; short si12 = ((code >> 10) & 0xfff) << 4; si12 >>= 4; printf(" 0x%llx slti %s, %s, %d\n", insstrs, rd, rj, si12); @@ -4597,8 +5028,8 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) } case LA_2RI12_SLTUI: // 0x9 { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; short si12 = ((code >> 10) & 0xfff) << 4; si12 >>= 4; printf(" 0x%llx sltui %s, %s, %d\n", insstrs, rd, rj, si12); @@ -4606,8 +5037,8 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) } case LA_2RI12_ADDI_W: // 0xa { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; short si12 = ((code >> 10) & 0xfff) << 4; si12 >>= 4; printf(" 0x%llx addi.w %s, %s, %d\n", insstrs, rd, rj, si12); @@ -4615,8 +5046,8 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) } case LA_2RI12_ADDI_D: // 0xb { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; short si12 = ((code >> 10) & 0xfff) << 4; si12 >>= 4; printf(" 0x%llx addi.d %s, %s, %ld\n", insstrs, rd, rj, si12); @@ -4624,32 +5055,32 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) } case LA_2RI12_LU52I_D: // 0xc { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; unsigned int si12 = (code >> 10) & 0xfff; printf(" 0x%llx lu52i.d %s, %s, 0x%x\n", insstrs, rd, rj, si12); return; } case LA_2RI12_ANDI: // 0xd { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; unsigned int ui12 = ((code >> 10) & 0xfff); printf(" 0x%llx andi %s, %s, 0x%x\n", insstrs, rd, rj, ui12); return; } case LA_2RI12_ORI: // 0xe { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; unsigned int ui12 = ((code >> 10) & 0xfff); printf(" 0x%llx ori %s, %s, 0x%x\n", insstrs, rd, rj, ui12); return; } case LA_2RI12_XORI: // 0xf { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; unsigned int ui12 = ((code >> 10) & 0xfff); printf(" 0x%llx xori %s, %s, 0x%x\n", insstrs, rd, rj, ui12); return; @@ -4672,73 +5103,73 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) { case LA_4R_FMADD_S: { - const char* fd = FregName[code & 0x1f]; - const char* fj = FregName[(code >> 5) & 0x1f]; - const char* fk = FregName[(code >> 10) & 0x1f]; - const char* fa = FregName[(code >> 15) & 0x1f]; + const char* fd = RegNames[(code & 0x1f) + 32]; + const char* fj = RegNames[((code >> 5) & 0x1f) + 32]; + const char* fk = RegNames[((code >> 10) & 0x1f) + 32]; + const char* fa = RegNames[((code >> 15) & 0x1f) + 32]; printf(" 0x%llx fmadd.s %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa); return; } case LA_4R_FMADD_D: { - const char* fd = FregName[code & 0x1f]; - const char* fj = FregName[(code >> 5) & 0x1f]; - const char* fk = FregName[(code >> 10) & 0x1f]; - const char* fa = FregName[(code >> 15) & 0x1f]; + const char* fd = RegNames[(code & 0x1f) + 32]; + const char* fj = RegNames[((code >> 5) & 0x1f) + 32]; + const char* fk = RegNames[((code >> 10) & 0x1f) + 32]; + const char* fa = RegNames[((code >> 15) & 0x1f) + 32]; printf(" 0x%llx fmadd.d %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa); return; } case LA_4R_FMSUB_S: { - const char* fd = FregName[code & 0x1f]; - const char* fj = FregName[(code >> 5) & 0x1f]; - const char* fk = FregName[(code >> 10) & 0x1f]; - const char* fa = FregName[(code >> 15) & 0x1f]; + const char* fd = RegNames[(code & 0x1f) + 32]; + const char* fj = RegNames[((code >> 5) & 0x1f) + 32]; + const char* fk = RegNames[((code >> 10) & 0x1f) + 32]; + const char* fa = RegNames[((code >> 15) & 0x1f) + 32]; printf(" 0x%llx fmsub.s %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa); return; } case LA_4R_FMSUB_D: { - const char* fd = FregName[code & 0x1f]; - const char* fj = FregName[(code >> 5) & 0x1f]; - const char* fk = FregName[(code >> 10) & 0x1f]; - const char* fa = FregName[(code >> 15) & 0x1f]; + const char* fd = RegNames[(code & 0x1f) + 32]; + const char* fj = RegNames[((code >> 5) & 0x1f) + 32]; + const char* fk = RegNames[((code >> 10) & 0x1f) + 32]; + const char* fa = RegNames[((code >> 15) & 0x1f) + 32]; printf(" 0x%llx fmsub.d %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa); return; } case LA_4R_FNMADD_S: { - const char* fd = FregName[code & 0x1f]; - const char* fj = FregName[(code >> 5) & 0x1f]; - const char* fk = FregName[(code >> 10) & 0x1f]; - const char* fa = FregName[(code >> 15) & 0x1f]; + const char* fd = RegNames[(code & 0x1f) + 32]; + const char* fj = RegNames[((code >> 5) & 0x1f) + 32]; + const char* fk = RegNames[((code >> 10) & 0x1f) + 32]; + const char* fa = RegNames[((code >> 15) & 0x1f) + 32]; printf(" 0x%llx fnmadd.s %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa); return; } case LA_4R_FNMADD_D: { - const char* fd = FregName[code & 0x1f]; - const char* fj = FregName[(code >> 5) & 0x1f]; - const char* fk = FregName[(code >> 10) & 0x1f]; - const char* fa = FregName[(code >> 15) & 0x1f]; + const char* fd = RegNames[(code & 0x1f) + 32]; + const char* fj = RegNames[((code >> 5) & 0x1f) + 32]; + const char* fk = RegNames[((code >> 10) & 0x1f) + 32]; + const char* fa = RegNames[((code >> 15) & 0x1f) + 32]; printf(" 0x%llx fnmadd.d %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa); return; } case LA_4R_FNMSUB_S: { - const char* fd = FregName[code & 0x1f]; - const char* fj = FregName[(code >> 5) & 0x1f]; - const char* fk = FregName[(code >> 10) & 0x1f]; - const char* fa = FregName[(code >> 15) & 0x1f]; + const char* fd = RegNames[(code & 0x1f) + 32]; + const char* fj = RegNames[((code >> 5) & 0x1f) + 32]; + const char* fk = RegNames[((code >> 10) & 0x1f) + 32]; + const char* fa = RegNames[((code >> 15) & 0x1f) + 32]; printf(" 0x%llx fnmsub.s %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa); return; } case LA_4R_FNMSUB_D: { - const char* fd = FregName[code & 0x1f]; - const char* fj = FregName[(code >> 5) & 0x1f]; - const char* fk = FregName[(code >> 10) & 0x1f]; - const char* fa = FregName[(code >> 15) & 0x1f]; + const char* fd = RegNames[(code & 0x1f) + 32]; + const char* fj = RegNames[((code >> 5) & 0x1f) + 32]; + const char* fk = RegNames[((code >> 10) & 0x1f) + 32]; + const char* fa = RegNames[((code >> 15) & 0x1f) + 32]; printf(" 0x%llx fnmsub.d %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa); return; } @@ -4758,8 +5189,8 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) // bits:19-15,cond unsigned int cond = (code >> 15) & 0x1f; const char* cd = CFregName[code & 0x7]; - const char* fj = FregName[(code >> 5) & 0x1f]; - const char* fk = FregName[(code >> 10) & 0x1f]; + const char* fj = RegNames[((code >> 5) & 0x1f) + 32]; + const char* fk = RegNames[((code >> 10) & 0x1f) + 32]; switch (cond) { case 0x0: @@ -4838,8 +5269,8 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) // bits:19-15,cond unsigned int cond = (code >> 15) & 0x1f; const char* cd = CFregName[code & 0x7]; - const char* fj = FregName[(code >> 5) & 0x1f]; - const char* fk = FregName[(code >> 10) & 0x1f]; + const char* fj = RegNames[((code >> 5) & 0x1f) + 32]; + const char* fk = RegNames[((code >> 10) & 0x1f) + 32]; switch (cond) { case 0x0: @@ -4915,9 +5346,9 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) } case LA_4R_FSEL: { - const char* fd = FregName[code & 0x1f]; - const char* fj = FregName[(code >> 5) & 0x1f]; - const char* fk = FregName[(code >> 10) & 0x1f]; + const char* fd = RegNames[(code & 0x1f) + 32]; + const char* fj = RegNames[((code >> 5) & 0x1f) + 32]; + const char* fk = RegNames[((code >> 10) & 0x1f) + 32]; const char* ca = CFregName[(code >> 15) & 0x7]; printf(" 0x%llx fsel %s, %s, %s, %s\n", insstrs, fd, fj, fk, ca); return; @@ -4935,89 +5366,89 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) { case LA_3R_LDX_B: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx ldx.b %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_LDX_H: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx ldx.h %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_LDX_W: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx ldx.w %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_LDX_D: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx ldx.d %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_STX_B: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx stx.b %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_STX_H: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx stx.h %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_STX_W: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx stx.w %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_STX_D: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx stx.d %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_LDX_BU: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx ldx.bu %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_LDX_HU: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx ldx.hu %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_LDX_WU: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx ldx.wu %s, %s, %s\n", insstrs, rd, rj, rk); return; } @@ -5026,321 +5457,321 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) return; case LA_3R_FLDX_S: { - const char* fd = FregName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* fd = RegNames[(code & 0x1f) + 32]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx fldx.s %s, %s, %s\n", insstrs, fd, rj, rk); return; } case LA_3R_FLDX_D: { - const char* fd = FregName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* fd = RegNames[(code & 0x1f) + 32]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx fldx.d %s, %s, %s\n", insstrs, fd, rj, rk); return; } case LA_3R_FSTX_S: { - const char* fd = FregName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* fd = RegNames[(code & 0x1f) + 32]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx fstx.s %s, %s, %s\n", insstrs, fd, rj, rk); return; } case LA_3R_FSTX_D: { - const char* fd = FregName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* fd = RegNames[(code & 0x1f) + 32]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx fstx.d %s, %s, %s\n", insstrs, fd, rj, rk); return; } case LA_3R_AMSWAP_W: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx amswap.w %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMSWAP_D: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx amswap.d %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMADD_W: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx amadd.w %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMADD_D: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx amadd.d %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMAND_W: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx amand.w %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMAND_D: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx amand.d %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMOR_W: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx amor.w %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMOR_D: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx amor.d %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMXOR_W: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx amxor.w %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMXOR_D: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx amxor.d %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMMAX_W: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx ammax.w %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMMAX_D: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx ammax.d %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMMIN_W: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx ammin.w %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMMIN_D: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx ammin.d %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMMAX_WU: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx ammax.wu %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMMAX_DU: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx ammax.du %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMMIN_WU: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx ammin.wu %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMMIN_DU: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx ammin.du %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMSWAP_DB_W: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx amswap_db.w %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMSWAP_DB_D: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx amswap_db.d %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMADD_DB_W: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx amadd_db.w %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMADD_DB_D: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx amadd_db.d %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMAND_DB_W: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx amand_db.w %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMAND_DB_D: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx amand_db.d %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMOR_DB_W: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx amor_db.w %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMOR_DB_D: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx amor_db.d %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMXOR_DB_W: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx amxor_db.w %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMXOR_DB_D: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx amxor_db.d %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMMAX_DB_W: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx ammax_db.w %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMMAX_DB_D: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx ammax_db.d %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMMIN_DB_W: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx ammin_db.w %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMMIN_DB_D: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx ammin_db.d %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMMAX_DB_WU: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx ammax_db.wu %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMMAX_DB_DU: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx ammax_db.du %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMMIN_DB_WU: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx ammin_db.wu %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_AMMIN_DB_DU: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx ammin_db.du %s, %s, %s\n", insstrs, rd, rj, rk); return; } @@ -5358,193 +5789,193 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) } case LA_3R_FLDGT_S: { - const char* fd = FregName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* fd = RegNames[(code & 0x1f) + 32]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx fldgt.s %s, %s, %s\n", insstrs, fd, rj, rk); return; } case LA_3R_FLDGT_D: { - const char* fd = FregName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* fd = RegNames[(code & 0x1f) + 32]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx fldgt.d %s, %s, %s\n", insstrs, fd, rj, rk); return; } case LA_3R_FLDLE_S: { - const char* fd = FregName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* fd = RegNames[(code & 0x1f) + 32]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx fldle.s %s, %s, %s\n", insstrs, fd, rj, rk); return; } case LA_3R_FLDLE_D: { - const char* fd = FregName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* fd = RegNames[(code & 0x1f) + 32]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx fldle.d %s, %s, %s\n", insstrs, fd, rj, rk); return; } case LA_3R_FSTGT_S: { - const char* fd = FregName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* fd = RegNames[(code & 0x1f) + 32]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx fstgt.s %s, %s, %s\n", insstrs, fd, rj, rk); return; } case LA_3R_FSTGT_D: { - const char* fd = FregName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* fd = RegNames[(code & 0x1f) + 32]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx fstgt.d %s, %s, %s\n", insstrs, fd, rj, rk); return; } case LA_3R_FSTLE_S: { - const char* fd = FregName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* fd = RegNames[(code & 0x1f) + 32]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx fstle.s %s, %s, %s\n", insstrs, fd, rj, rk); return; } case LA_3R_FSTLE_D: { - const char* fd = FregName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* fd = RegNames[(code & 0x1f) + 32]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx fstle.d %s, %s, %s\n", insstrs, fd, rj, rk); return; } case LA_3R_LDGT_B: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx ldgt.b %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_LDGT_H: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx ldgt.h %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_LDGT_W: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx ldgt.w %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_LDGT_D: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx ldgt.d %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_LDLE_B: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx ldle.b %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_LDLE_H: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx ldle.h %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_LDLE_W: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx ldle.w %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_LDLE_D: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx ldle.d %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_STGT_B: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx stgt.b %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_STGT_H: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx stgt.h %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_STGT_W: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx stgt.w %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_STGT_D: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx stgt.d %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_STLE_B: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx stle.b %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_STLE_H: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx stle.h %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_STLE_W: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx stle.w %s, %s, %s\n", insstrs, rd, rj, rk); return; } case LA_3R_STLE_D: { - const char* rd = regName[code & 0x1f]; - const char* rj = regName[(code >> 5) & 0x1f]; - const char* rk = regName[(code >> 10) & 0x1f]; + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + const char* rk = RegNames[(code >> 10) & 0x1f]; printf(" 0x%llx stle.d %s, %s, %s\n", insstrs, rd, rj, rk); return; } @@ -6314,13 +6745,14 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins //------------------------------------------------------------------------ // emitRegName: Returns a general-purpose register name or SIMD and floating-point scalar register name. // +// TODO-LoongArch64: supporting SIMD. // Arguments: -// reg - A general-purpose register or SIMD and floating-point register. -// size - A register size. +// reg - A general-purpose register orfloating-point register. +// size - unused parameter. // varName - unused parameter. // // Return value: -// A string that represents a general-purpose register name or SIMD and floating-point scalar register name. +// A string that represents a general-purpose register name or floating-point scalar register name. // const char* emitter::emitRegName(regNumber reg, emitAttr size, bool varName) { @@ -6362,70 +6794,4 @@ bool emitter::IsMovInstruction(instruction ins) } } } - -//---------------------------------------------------------------------------------------- -// IsRedundantMov: -// Check if the current `mov` instruction is redundant and can be omitted. -// A `mov` is redundant in following 3 cases: -// -// 1. Move to same register -// (Except 4-byte movement like "mov w1, w1" which zeros out upper bits of x1 register) -// -// mov Rx, Rx -// -// 2. Move that is identical to last instruction emitted. -// -// mov Rx, Ry # <-- last instruction -// mov Rx, Ry # <-- current instruction can be omitted. -// -// 3. Opposite Move as that of last instruction emitted. -// -// mov Rx, Ry # <-- last instruction -// mov Ry, Rx # <-- current instruction can be omitted. -// -// Arguments: -// ins - The current instruction -// size - Operand size of current instruction -// dst - The current destination -// src - The current source -// canSkip - The move can be skipped as it doesn't represent special semantics -// -// Return Value: -// true if previous instruction moved from current dst to src. - -bool emitter::IsRedundantMov(instruction ins, emitAttr size, regNumber dst, regNumber src, bool canSkip) -{ - NYI_LOONGARCH64("IsRedundantMov-----unimplemented on LOONGARCH64 yet----"); - return false; -} - -//---------------------------------------------------------------------------------------- -// IsRedundantLdStr: -// For ldr/str pair next to each other, check if the current load or store is needed or is -// the value already present as of previous instruction. -// -// ldr x1, [x2, #56] -// str x1, [x2, #56] <-- redundant -// -// OR -// -// str x1, [x2, #56] -// ldr x1, [x2, #56] <-- redundant - -// Arguments: -// ins - The current instruction -// dst - The current destination -// src - The current source -// imm - Immediate offset -// size - Operand size -// fmt - Format of instruction -// Return Value: -// true if previous instruction already has desired value in register/memory location. - -bool emitter::IsRedundantLdStr( - instruction ins, regNumber reg1, regNumber reg2, ssize_t imm, emitAttr size, insFormat fmt) -{ - NYI_LOONGARCH64("IsRedundantLdStr-----unimplemented on LOONGARCH64 yet----"); - return false; -} #endif // defined(TARGET_LOONGARCH64) diff --git a/src/coreclr/jit/emitloongarch64.h b/src/coreclr/jit/emitloongarch64.h index cbeb66f7ded82..9b79d544a9a3a 100644 --- a/src/coreclr/jit/emitloongarch64.h +++ b/src/coreclr/jit/emitloongarch64.h @@ -88,12 +88,30 @@ static bool isValidSimm12(ssize_t value) return -(((int)1) << 11) <= value && value < (((int)1) << 11); }; +// Returns true if 'value' is a legal unsigned immediate 12 bit encoding. +static bool isValidUimm12(ssize_t value) +{ + return (0 == (value >> 12)); +} + +// Returns true if 'value' is a legal unsigned immediate 11 bit encoding. +static bool isValidUimm11(ssize_t value) +{ + return (0 == (value >> 11)); +} + // Returns true if 'value' is a legal signed immediate 20 bit encoding. static bool isValidSimm20(ssize_t value) { return -(((int)1) << 19) <= value && value < (((int)1) << 19); }; +// Returns true if 'value' is a legal signed immediate 38 bit encoding. +static bool isValidSimm38(ssize_t value) +{ + return -(((ssize_t)1) << 37) <= value && value < (((ssize_t)1) << 37); +}; + // Returns the number of bits used by the given 'size'. inline static unsigned getBitWidth(emitAttr size) { diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp index fa2a28179dd48..826d89dd2a491 100644 --- a/src/coreclr/jit/lsraloongarch64.cpp +++ b/src/coreclr/jit/lsraloongarch64.cpp @@ -1203,7 +1203,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) { // TODO-LoongArch64: For larger block sizes CodeGen can choose to use 16-byte SIMD instructions. // here just used a temp register. - buildInternalFloatRegisterDefForNode(blkNode); + buildInternalIntRegisterDefForNode(blkNode); } } break; @@ -1260,27 +1260,8 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) switch (blkNode->gtBlkOpKind) { case GenTreeBlk::BlkOpKindUnroll: - { buildInternalIntRegisterDefForNode(blkNode); - - const bool isSrcAddrLocal = src->OperIs(GT_LCL_VAR, GT_LCL_FLD) || - ((srcAddrOrFill != nullptr) && srcAddrOrFill->OperIsLocalAddr()); - const bool isDstAddrLocal = dstAddr->OperIsLocalAddr(); - - // TODO-LoongArch64: using 16-byte SIMD instructions. - const bool srcAddrMayNeedReg = - isSrcAddrLocal || ((srcAddrOrFill != nullptr) && srcAddrOrFill->isContained()); - const bool dstAddrMayNeedReg = isDstAddrLocal || dstAddr->isContained(); - - // The following allocates an additional integer register in a case - // when a load instruction and a store instruction cannot be encoded using offset - // from a corresponding base register. - if (srcAddrMayNeedReg && dstAddrMayNeedReg) - { - buildInternalIntRegisterDefForNode(blkNode); - } - } - break; + break; case GenTreeBlk::BlkOpKindHelper: dstAddrRegMask = RBM_ARG_0; diff --git a/src/coreclr/jit/registerloongarch64.h b/src/coreclr/jit/registerloongarch64.h index b58b7757b41d2..8f3cd157016bb 100644 --- a/src/coreclr/jit/registerloongarch64.h +++ b/src/coreclr/jit/registerloongarch64.h @@ -51,7 +51,7 @@ REGDEF(S8, 31, 0x80000000, "s8" ) //NOTE for LoongArch64: // The `REG_R21` which alias `REG_X0` is specially reserved !!! -// It can be used only by manully and should be very careful!!! +// It should be only used with hand written assembly code and should be very careful!!! // e.g. right now LoongArch64's backend-codegen/emit, there is usually // a need for an extra register for cases like // constructing a large imm or offset, saving some intermediate result From 5e84a3ecc85a1358e670c699e126ca33286ea7b0 Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Fri, 18 Mar 2022 09:13:43 +0800 Subject: [PATCH 36/46] [LoongArch64] amend some code for CR round2. --- src/coreclr/jit/codegen.h | 6 - src/coreclr/jit/codegencommon.cpp | 10 +- src/coreclr/jit/codegeninterface.h | 5 - src/coreclr/jit/codegenloongarch64.cpp | 415 +++-------- src/coreclr/jit/compiler.cpp | 12 - src/coreclr/jit/compiler.h | 7 - src/coreclr/jit/emit.cpp | 16 +- src/coreclr/jit/emit.h | 8 +- src/coreclr/jit/emitloongarch64.cpp | 4 +- src/coreclr/jit/gentree.cpp | 7 - src/coreclr/jit/instr.cpp | 5 +- src/coreclr/jit/instr.h | 2 +- src/coreclr/jit/instrsloongarch64.h | 780 ++++++++++---------- src/coreclr/jit/jitconfigvalues.h | 7 - src/coreclr/jit/lclvars.cpp | 213 +----- src/coreclr/jit/lower.cpp | 29 +- src/coreclr/jit/lowerloongarch64.cpp | 49 +- src/coreclr/jit/morph.cpp | 83 +-- src/coreclr/jit/register_arg_convention.cpp | 2 +- src/coreclr/jit/target.h | 23 +- src/coreclr/jit/targetloongarch64.h | 2 +- src/coreclr/jit/unwind.h | 9 +- 22 files changed, 588 insertions(+), 1106 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 89d8ba379b124..baa48b722454b 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -644,12 +644,6 @@ class CodeGen final : public CodeGenInterface bool genSaveFpLrWithAllCalleeSavedRegisters; #endif // TARGET_ARM64 -#ifdef TARGET_LOONGARCH64 - virtual void SetSaveFpRaWithAllCalleeSavedRegisters(bool value); - virtual bool IsSaveFpRaWithAllCalleeSavedRegisters() const; - bool genSaveFpRaWithAllCalleeSavedRegisters; -#endif // TARGET_LOONGARCH64 - //------------------------------------------------------------------------- // // End prolog/epilog generation diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index e69b2cc38aa6d..0a773b0f1ad47 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -127,9 +127,9 @@ CodeGen::CodeGen(Compiler* theCompiler) : CodeGenInterface(theCompiler) /* Assume that we not fully interruptible */ SetInterruptible(false); -#ifdef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) SetHasTailCalls(false); -#endif // TARGET_ARMARCH +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 #ifdef DEBUG genInterruptibleUsed = false; genCurDispOffset = (unsigned)-1; @@ -138,10 +138,6 @@ CodeGen::CodeGen(Compiler* theCompiler) : CodeGenInterface(theCompiler) #ifdef TARGET_ARM64 genSaveFpLrWithAllCalleeSavedRegisters = false; #endif // TARGET_ARM64 - -#ifdef TARGET_LOONGARCH64 - genSaveFpRaWithAllCalleeSavedRegisters = false; -#endif // TARGET_LOONGARCH64 } void CodeGenInterface::genMarkTreeInReg(GenTree* tree, regNumber reg) @@ -4298,7 +4294,7 @@ void CodeGen::genEnregisterIncomingStackArgs() bool FPbased; int base = compiler->lvaFrameAddress(varNum, &FPbased); - if ((-2048 <= base) && (base < 2048)) + if (emitter::isValidSimm12(base)) { GetEmitter()->emitIns_R_S(ins_Load(regType), emitTypeSize(regType), regNum, varNum, 0); } diff --git a/src/coreclr/jit/codegeninterface.h b/src/coreclr/jit/codegeninterface.h index f692193104f12..dbd53ffbad46f 100644 --- a/src/coreclr/jit/codegeninterface.h +++ b/src/coreclr/jit/codegeninterface.h @@ -196,11 +196,6 @@ class CodeGenInterface virtual bool IsSaveFpLrWithAllCalleeSavedRegisters() const = 0; #endif // TARGET_ARM64 -#ifdef TARGET_LOONGARCH64 - virtual void SetSaveFpRaWithAllCalleeSavedRegisters(bool value) = 0; - virtual bool IsSaveFpRaWithAllCalleeSavedRegisters() const = 0; -#endif // TARGET_LOONGARCH64 - regNumber genGetThisArgReg(GenTreeCall* call) const; #ifdef TARGET_XARCH diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index 260b54ee15000..b6a7442c19dce 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -1053,18 +1053,6 @@ void CodeGen::genFuncletProlog(BasicBlock* block) 0); } else if (genFuncletInfo.fiFrameType == 2) - { - // fiFrameType constraints: - assert(frameSize < 0); - assert(frameSize >= -2048); - - assert(genFuncletInfo.fiSP_to_FPRA_save_delta < 2040); - genStackPointerAdjustment(frameSize, REG_R21, nullptr, /* reportUnwindData */ true); - - genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, genFuncletInfo.fiSP_to_PSP_slot_delta + 8, - 0); - } - else if (genFuncletInfo.fiFrameType == 3) { // fiFrameType constraints: assert(frameSize < -2048); @@ -1088,21 +1076,6 @@ void CodeGen::genFuncletProlog(BasicBlock* block) genStackPointerAdjustment(frameSize + SP_delta, REG_R21, nullptr, /* reportUnwindData */ true); } - else if (genFuncletInfo.fiFrameType == 4) - { - // fiFrameType constraints: - assert(frameSize < -2048); - - offset = -frameSize - (genFuncletInfo.fiSP_to_PSP_slot_delta + 8); - int SP_delta = roundUp((UINT)offset, STACK_ALIGN); - offset = SP_delta - offset; - - genStackPointerAdjustment(-SP_delta, REG_R21, nullptr, /* reportUnwindData */ true); - - genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, offset, 0); - - genStackPointerAdjustment(frameSize + SP_delta, REG_R21, nullptr, /* reportUnwindData */ true); - } else { unreached(); @@ -1214,17 +1187,6 @@ void CodeGen::genFuncletEpilog() genStackPointerAdjustment(-frameSize, REG_R21, nullptr, /* reportUnwindData */ true); } else if (genFuncletInfo.fiFrameType == 2) - { - // fiFrameType constraints: - assert(frameSize >= -2048); - assert(genFuncletInfo.fiSP_to_FPRA_save_delta < 2040); - - genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, genFuncletInfo.fiSP_to_PSP_slot_delta + 8, 0); - - // generate daddiu SP,SP,imm - genStackPointerAdjustment(-frameSize, REG_R21, nullptr, /* reportUnwindData */ true); - } - else if (genFuncletInfo.fiFrameType == 3) { // fiFrameType constraints: assert(frameSize < -2048); @@ -1251,21 +1213,6 @@ void CodeGen::genFuncletEpilog() // second, generate daddiu SP,SP,imm for remaine space. genStackPointerAdjustment(SP_delta, REG_R21, nullptr, /* reportUnwindData */ true); } - else if (genFuncletInfo.fiFrameType == 4) - { - // fiFrameType constraints: - assert(frameSize < -2048); - - int offset = -frameSize - (genFuncletInfo.fiSP_to_PSP_slot_delta + 8); - int SP_delta = roundUp((UINT)offset, STACK_ALIGN); - offset = SP_delta - offset; - - genStackPointerAdjustment(-frameSize - SP_delta, REG_R21, nullptr, /* reportUnwindData */ true); - - genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, offset, 0); - - genStackPointerAdjustment(SP_delta, REG_R21, nullptr, /* reportUnwindData */ true); - } else { unreached(); @@ -1309,16 +1256,8 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() unsigned saveRegsCount = genCountBits(rsMaskSaveRegs); assert((saveRegsCount == compiler->compCalleeRegsPushed) || (saveRegsCount == compiler->compCalleeRegsPushed - 1)); - unsigned saveRegsPlusPSPSize; - if (!IsSaveFpRaWithAllCalleeSavedRegisters()) - { - saveRegsPlusPSPSize = - roundUp((UINT)genTotalFrameSize(), STACK_ALIGN) - compiler->compLclFrameSize + PSPSize /* -2*8*/; - } - else - { - saveRegsPlusPSPSize = roundUp((UINT)genTotalFrameSize(), STACK_ALIGN) - compiler->compLclFrameSize + PSPSize; - } + unsigned saveRegsPlusPSPSize = + roundUp((UINT)genTotalFrameSize(), STACK_ALIGN) - compiler->compLclFrameSize + PSPSize; if (compiler->info.compIsVarArgs) { @@ -1345,32 +1284,16 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() if (maxFuncletFrameSizeAligned <= (2048 - 8)) { - if (!IsSaveFpRaWithAllCalleeSavedRegisters()) - { - genFuncletInfo.fiFrameType = 1; - saveRegsPlusPSPSize -= 2 * 8; // FP/RA - } - else - { - genFuncletInfo.fiFrameType = 2; - SP_to_FPRA_save_delta += REGSIZE_BYTES + PSPSize; - } + genFuncletInfo.fiFrameType = 1; + saveRegsPlusPSPSize -= 2 * 8; // FP/RA } else { unsigned saveRegsPlusPSPAlignmentPad = saveRegsPlusPSPSizeAligned - saveRegsPlusPSPSize; assert((saveRegsPlusPSPAlignmentPad == 0) || (saveRegsPlusPSPAlignmentPad == REGSIZE_BYTES)); - if (!IsSaveFpRaWithAllCalleeSavedRegisters()) - { - genFuncletInfo.fiFrameType = 3; - saveRegsPlusPSPSize -= 2 * 8; // FP/RA - } - else - { - genFuncletInfo.fiFrameType = 4; - SP_to_FPRA_save_delta += REGSIZE_BYTES + PSPSize; - } + genFuncletInfo.fiFrameType = 2; + saveRegsPlusPSPSize -= 2 * 8; // FP/RA } int CallerSP_to_PSP_slot_delta = -(int)saveRegsPlusPSPSize; @@ -4903,16 +4826,7 @@ int CodeGenInterface::genSPtoFPdelta() const { assert(isFramePointerUsed()); - int delta; - if (IsSaveFpRaWithAllCalleeSavedRegisters()) - { - delta = genTotalFrameSize() - (compiler->info.compIsVarArgs ? MAX_REG_ARG * REGSIZE_BYTES : 0) - - (compiler->compCalleeRegsPushed - 1) * REGSIZE_BYTES; - } - else - { - delta = compiler->lvaOutgoingArgSpaceSize; - } + int delta = compiler->lvaOutgoingArgSpaceSize; assert(delta >= 0); return delta; @@ -4975,23 +4889,6 @@ int CodeGenInterface::genCallerSPtoInitialSPdelta() const return callerSPtoSPdelta; } -//--------------------------------------------------------------------- -// SetSaveFpRaWithAllCalleeSavedRegisters - Set the variable that indicates if FP/RA registers -// are stored with the rest of the callee-saved registers. -void CodeGen::SetSaveFpRaWithAllCalleeSavedRegisters(bool value) -{ - JITDUMP("Setting genSaveFpRaWithAllCalleeSavedRegisters to %s\n", dspBool(value)); - genSaveFpRaWithAllCalleeSavedRegisters = value; -} - -//--------------------------------------------------------------------- -// IsSaveFpRaWithAllCalleeSavedRegisters - Return the value that indicates where FP/RA registers -// are stored in the prolog. -bool CodeGen::IsSaveFpRaWithAllCalleeSavedRegisters() const -{ - return genSaveFpRaWithAllCalleeSavedRegisters; -} - /***************************************************************************** * Emit a call to a helper function. */ @@ -9031,79 +8928,48 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, -totalFrameSize); compiler->unwindAllocStack(totalFrameSize); - if (!IsSaveFpRaWithAllCalleeSavedRegisters()) - { - // Case #1. - // - // Generate: - // daddiu sp, sp, -framesz - // sd fp, outsz(sp) - // sd ra, outsz+8(sp) - // - // The (totalFrameSize <= 2047) condition ensures the offsets of sd/ld. - // - // After saving callee-saved registers, we establish the frame pointer with: - // daddiu fp, sp, offset-fp - // We do this *after* saving callee-saved registers, so the prolog/epilog unwind codes mostly match. - - JITDUMP("Frame type 1. #outsz=%d; #framesz=%d; LclFrameSize=%d\n", - unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compLclFrameSize); - - frameType = 1; - - offsetSpToSavedFp = compiler->lvaOutgoingArgSpaceSize; + // Case #1. + // + // Generate: + // daddiu sp, sp, -framesz + // sd fp, outsz(sp) + // sd ra, outsz+8(sp) + // + // The (totalFrameSize <= 2047) condition ensures the offsets of sd/ld. + // + // After saving callee-saved registers, we establish the frame pointer with: + // daddiu fp, sp, offset-fp + // We do this *after* saving callee-saved registers, so the prolog/epilog unwind codes mostly match. - GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offsetSpToSavedFp); - compiler->unwindSaveReg(REG_FP, offsetSpToSavedFp); + JITDUMP("Frame type 1. #outsz=%d; #framesz=%d; LclFrameSize=%d\n", + unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compLclFrameSize); - GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offsetSpToSavedFp + 8); - compiler->unwindSaveReg(REG_RA, offsetSpToSavedFp + 8); + frameType = 1; - maskSaveRegsInt &= ~(RBM_FP | RBM_RA); // We've already saved FP/RA + offsetSpToSavedFp = compiler->lvaOutgoingArgSpaceSize; - offset = compiler->compLclFrameSize + 2 * REGSIZE_BYTES; // FP/RA - } - else - { - frameType = 2; + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offsetSpToSavedFp); + compiler->unwindSaveReg(REG_FP, offsetSpToSavedFp); - offsetSpToSavedFp = genSPtoFPdelta(); + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offsetSpToSavedFp + 8); + compiler->unwindSaveReg(REG_RA, offsetSpToSavedFp + 8); - JITDUMP("Frame type 2(Top). #outsz=%d; #framesz=%d; LclFrameSize=%d, fpDelta:%d\n", - unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compLclFrameSize, - offsetSpToSavedFp); + maskSaveRegsInt &= ~(RBM_FP | RBM_RA); // We've already saved FP/RA - offset = compiler->compLclFrameSize; - } + offset = compiler->compLclFrameSize + 2 * REGSIZE_BYTES; // FP/RA } else { - if (!IsSaveFpRaWithAllCalleeSavedRegisters()) - { - JITDUMP("Frame type 3. #outsz=%d; #framesz=%d; LclFrameSize=%d\n", - unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compLclFrameSize); - - frameType = 3; + JITDUMP("Frame type 2. #outsz=%d; #framesz=%d; LclFrameSize=%d\n", + unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compLclFrameSize); - maskSaveRegsInt &= ~(RBM_FP | RBM_RA); // We've already saved FP/RA + frameType = 2; - offset = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES; - calleeSaveSPDelta = AlignUp((UINT)offset, STACK_ALIGN); - offset = calleeSaveSPDelta - offset; - } - else - { - frameType = 4; + maskSaveRegsInt &= ~(RBM_FP | RBM_RA); // We've already saved FP/RA - JITDUMP("Frame type 4(Top). #outsz=%d; #framesz=%d; LclFrameSize=%d, SPDelta-1:%d\n", - unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compLclFrameSize, - calleeSaveSPDelta); - - offset = totalFrameSize - compiler->compLclFrameSize; - calleeSaveSPDelta = AlignUp((UINT)offset, STACK_ALIGN); - offset = calleeSaveSPDelta - offset; - offsetSpToSavedFp = offset + REGSIZE_BYTES; - } + offset = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES; + calleeSaveSPDelta = AlignUp((UINT)offset, STACK_ALIGN); + offset = calleeSaveSPDelta - offset; } } else @@ -9124,28 +8990,14 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe JITDUMP(" offset=%d, calleeSaveSPDelta=%d\n", offset, calleeSaveSPDelta); genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, offset, -calleeSaveSPDelta); -// For varargs, home the incoming arg registers last. Note that there is nothing to unwind here, -// so we just report "NOP" unwind codes. If there's no more frame setup after this, we don't -// need to add codes at all. - -// if (compiler->info.compIsVarArgs) -//{ -// JITDUMP(" compIsVarArgs=true\n"); - -// // There are 8 general-purpose registers to home, thus 'offset' must be 16-byte aligned here. -// assert((offset % 16) == 0); -// for (regNumber reg1 = REG_ARG_FIRST; reg1 < REG_ARG_LAST; reg1 = REG_NEXT(REG_NEXT(reg1))) -// { -// regNumber reg2 = REG_NEXT(reg1); -// // sd REG, offset(SP) -// // sd REG + 1, (offset+8)(SP) -// GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, reg1, REG_SPBASE, offset); -// compiler->unwindNop(); -// GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, reg2, REG_SPBASE, offset + 8); -// compiler->unwindNop(); -// offset += 2 * REGSIZE_BYTES; -// } -//} + // For varargs, home the incoming arg registers last. Note that there is nothing to unwind here, + // so we just report "NOP" unwind codes. If there's no more frame setup after this, we don't + // need to add codes at all. + if (compiler->info.compIsVarArgs) + { + JITDUMP(" compIsVarArgs=true\n"); + NYI_LOONGARCH64("genPushCalleeSavedRegisters - compIsVarArgs"); + } #ifdef DEBUG if (compiler->opts.disAsm) @@ -9158,10 +9010,6 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe // offsetSpToSavedFp = genSPtoFPdelta(); } else if (frameType == 2) - { - // offsetSpToSavedFp = genSPtoFPdelta(); - } - else if (frameType == 3) { if (compiler->lvaOutgoingArgSpaceSize >= 2040) { @@ -9202,19 +9050,6 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe establishFramePointer = false; } - else if (frameType == 4) - { - genEstablishFramePointer(offsetSpToSavedFp, /* reportUnwindData */ true); - - establishFramePointer = false; - - int remainingFrameSz = totalFrameSize - calleeSaveSPDelta; - - if (remainingFrameSz > 0) - { - genStackPointerAdjustment(-remainingFrameSz, initReg, pInitRegZeroed, /* reportUnwindData */ true); - } - } else { unreached(); @@ -9261,131 +9096,84 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) compiler->unwindSetFrameReg(REG_FPBASE, SPtoFPdelta); } - if (!IsSaveFpRaWithAllCalleeSavedRegisters()) - { - JITDUMP("Frame type 1(save FP/RA at bottom). #outsz=%d; #framesz=%d; localloc? %s\n", - unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, - dspBool(compiler->compLocallocUsed)); + JITDUMP("Frame type 1(save FP/RA at bottom). #outsz=%d; #framesz=%d; localloc? %s\n", + unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, dspBool(compiler->compLocallocUsed)); - frameType = 1; + frameType = 1; - regsToRestoreMask &= ~(RBM_FP | RBM_RA); // We'll restore FP/RA at the end. + regsToRestoreMask &= ~(RBM_FP | RBM_RA); // We'll restore FP/RA at the end. - calleeSaveSPOffset = compiler->compLclFrameSize + 2 * REGSIZE_BYTES; - } - else - { - frameType = 2; - - calleeSaveSPOffset = compiler->compLclFrameSize; - - JITDUMP("Frame type 2(Top). #outsz=%d; #framesz=%d; localloc? %s\n", - unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, - dspBool(compiler->compLocallocUsed)); - } - // calleeSaveSPDelta = 0; + calleeSaveSPOffset = compiler->compLclFrameSize + 2 * REGSIZE_BYTES; } else { - if (!IsSaveFpRaWithAllCalleeSavedRegisters()) - { - JITDUMP("Frame type 3(save FP/RA at bottom). #outsz=%d; #framesz=%d; #calleeSaveRegsPushed:%d; " - "localloc? %s\n", - unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compCalleeRegsPushed, - dspBool(compiler->compLocallocUsed)); - - frameType = 3; - - int outSzAligned; - if (compiler->lvaOutgoingArgSpaceSize >= 2040) - { - int offset = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES; - calleeSaveSPDelta = AlignUp((UINT)offset, STACK_ALIGN); - calleeSaveSPOffset = calleeSaveSPDelta - offset; + JITDUMP("Frame type 2(save FP/RA at bottom). #outsz=%d; #framesz=%d; #calleeSaveRegsPushed:%d; " + "localloc? %s\n", + unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compCalleeRegsPushed, + dspBool(compiler->compLocallocUsed)); - int offset2 = totalFrameSize - calleeSaveSPDelta - compiler->lvaOutgoingArgSpaceSize; - calleeSaveSPDelta = AlignUp((UINT)offset2, STACK_ALIGN); - offset2 = calleeSaveSPDelta - offset2; + frameType = 2; - if (compiler->compLocallocUsed) - { - // Restore sp from fp - GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -offset2); - compiler->unwindSetFrameReg(REG_FPBASE, offset2); - } - else - { - outSzAligned = compiler->lvaOutgoingArgSpaceSize & ~0xf; - genStackPointerAdjustment(outSzAligned, REG_R21, nullptr, /* reportUnwindData */ true); - } - - regsToRestoreMask &= ~(RBM_FP | RBM_RA); // We'll restore FP/RA at the end. - - GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offset2 + 8); - compiler->unwindSaveReg(REG_RA, offset2 + 8); - - GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset2); - compiler->unwindSaveReg(REG_FP, offset2); + int outSzAligned; + if (compiler->lvaOutgoingArgSpaceSize >= 2040) + { + int offset = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES; + calleeSaveSPDelta = AlignUp((UINT)offset, STACK_ALIGN); + calleeSaveSPOffset = calleeSaveSPDelta - offset; - genStackPointerAdjustment(calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true); + int offset2 = totalFrameSize - calleeSaveSPDelta - compiler->lvaOutgoingArgSpaceSize; + calleeSaveSPDelta = AlignUp((UINT)offset2, STACK_ALIGN); + offset2 = calleeSaveSPDelta - offset2; - calleeSaveSPDelta = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES; - calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDelta, STACK_ALIGN); + if (compiler->compLocallocUsed) + { + // Restore sp from fp + GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -offset2); + compiler->unwindSetFrameReg(REG_FPBASE, offset2); } else { - int offset2 = compiler->lvaOutgoingArgSpaceSize; - if (compiler->compLocallocUsed) - { - // Restore sp from fp - GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -offset2); - compiler->unwindSetFrameReg(REG_FPBASE, offset2); - } + outSzAligned = compiler->lvaOutgoingArgSpaceSize & ~0xf; + genStackPointerAdjustment(outSzAligned, REG_R21, nullptr, /* reportUnwindData */ true); + } - regsToRestoreMask &= ~(RBM_FP | RBM_RA); // We'll restore FP/RA at the end. + regsToRestoreMask &= ~(RBM_FP | RBM_RA); // We'll restore FP/RA at the end. - GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offset2 + 8); - compiler->unwindSaveReg(REG_RA, offset2 + 8); + GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offset2 + 8); + compiler->unwindSaveReg(REG_RA, offset2 + 8); - GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset2); - compiler->unwindSaveReg(REG_FP, offset2); + GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset2); + compiler->unwindSaveReg(REG_FP, offset2); - calleeSaveSPOffset = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES; - calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPOffset, STACK_ALIGN); - calleeSaveSPOffset = calleeSaveSPDelta - calleeSaveSPOffset; + genStackPointerAdjustment(calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true); - genStackPointerAdjustment(totalFrameSize - calleeSaveSPDelta, REG_R21, nullptr, - /* reportUnwindData */ true); - } + calleeSaveSPDelta = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES; + calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDelta, STACK_ALIGN); } else { - frameType = 4; - - JITDUMP("Frame type 4(Top). #outsz=%d; #framesz=%d; #calleeSaveRegsPushed:%d; localloc? %s\n", - unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compCalleeRegsPushed, - dspBool(compiler->compLocallocUsed)); - - calleeSaveSPOffset = totalFrameSize - compiler->compLclFrameSize; - calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPOffset, STACK_ALIGN); - calleeSaveSPOffset = calleeSaveSPDelta - calleeSaveSPOffset; - + int offset2 = compiler->lvaOutgoingArgSpaceSize; if (compiler->compLocallocUsed) { - calleeSaveSPDelta = calleeSaveSPOffset + REGSIZE_BYTES; - // Restore sp from fp - GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -calleeSaveSPDelta); - compiler->unwindSetFrameReg(REG_FPBASE, calleeSaveSPDelta); - } - else - { - calleeSaveSPDelta = totalFrameSize - calleeSaveSPDelta; - genStackPointerAdjustment(calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true); + GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -offset2); + compiler->unwindSetFrameReg(REG_FPBASE, offset2); } - calleeSaveSPDelta = totalFrameSize - compiler->compLclFrameSize; - calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDelta, STACK_ALIGN); + regsToRestoreMask &= ~(RBM_FP | RBM_RA); // We'll restore FP/RA at the end. + + GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offset2 + 8); + compiler->unwindSaveReg(REG_RA, offset2 + 8); + + GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset2); + compiler->unwindSaveReg(REG_FP, offset2); + + calleeSaveSPOffset = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES; + calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPOffset, STACK_ALIGN); + calleeSaveSPOffset = calleeSaveSPDelta - calleeSaveSPOffset; + + genStackPointerAdjustment(totalFrameSize - calleeSaveSPDelta, REG_R21, nullptr, + /* reportUnwindData */ true); } } } @@ -9414,16 +9202,7 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) } else if (frameType == 2) { - GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize); - compiler->unwindAllocStack(totalFrameSize); - } - else if (frameType == 3) - { - // genStackPointerAdjustment(calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true); - } - else if (frameType == 4) - { - // genStackPointerAdjustment(calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true); + // had done. } else { diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 49a0005a810e7..be5c3e5a41c43 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -2858,11 +2858,6 @@ void Compiler::compInitOptions(JitFlags* jitFlags) opts.compJitSaveFpLrWithCalleeSavedRegisters = 0; #endif // defined(TARGET_ARM64) -#if defined(TARGET_LOONGARCH64) - // 0 is default: use the appropriate frame type based on the function. - opts.compJitSaveFpRaWithCalleeSavedRegisters = 0; -#endif // defined(TARGET_LOONGARCH64) - #ifdef DEBUG opts.dspInstrs = false; opts.dspLines = false; @@ -3361,13 +3356,6 @@ void Compiler::compInitOptions(JitFlags* jitFlags) opts.compJitSaveFpLrWithCalleeSavedRegisters = JitConfig.JitSaveFpLrWithCalleeSavedRegisters(); } #endif // defined(DEBUG) && defined(TARGET_ARM64) - -#if defined(DEBUG) && defined(TARGET_LOONGARCH64) - if ((s_pJitMethodSet == nullptr) || s_pJitMethodSet->IsActiveMethod(info.compFullName, info.compMethodHash())) - { - opts.compJitSaveFpRaWithCalleeSavedRegisters = JitConfig.JitSaveFpRaWithCalleeSavedRegisters(); - } -#endif // defined(DEBUG) && defined(TARGET_LOONGARCH64) } #ifdef DEBUG diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index e0aac6909a6c0..328b4fff82553 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -10007,13 +10007,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX int compJitSaveFpLrWithCalleeSavedRegisters; #endif // defined(TARGET_ARM64) -#if defined(TARGET_LOONGARCH64) - // Decision about whether to save FP/RA registers with callee-saved registers (see - // COMPlus_JitSaveFpRaWithCalleSavedRegisters). - // TODO: will delete this in future. - int compJitSaveFpRaWithCalleeSavedRegisters; -#endif // defined(TARGET_LOONGARCH64) - #ifdef CONFIGURABLE_ARM_ABI bool compUseSoftFP = false; #else diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index 55d517f996d1d..c8f0b0701a70a 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -6617,10 +6617,6 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, ig->igSize = (unsigned short)(cp - bp); } -#ifdef TARGET_LOONGARCH64 - unsigned actualCodeSize = (unsigned)(cp - codeBlock); -#endif - #if EMIT_TRACK_STACK_DEPTH assert(emitCurStackLvl == 0); #endif @@ -6661,7 +6657,6 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, emitUpdateLiveGCregs(GCT_GCREF, RBM_NONE, cp); } -#ifndef TARGET_LOONGARCH64 /* Patch any forward jumps */ if (emitFwdJumps) @@ -6726,6 +6721,9 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, #elif defined(TARGET_ARM64) assert(!jmp->idAddr()->iiaHasInstrCount()); emitOutputLJ(NULL, adr, jmp); +#elif defined(TARGET_LOONGARCH64) + // For LoongArch64 `emitFwdJumps` is always false. + unreached(); #else #error Unsupported or unset target architecture #endif @@ -6739,6 +6737,9 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, #elif defined(TARGET_ARMARCH) assert(!jmp->idAddr()->iiaHasInstrCount()); emitOutputLJ(NULL, adr, jmp); +#elif defined(TARGET_LOONGARCH64) + // For LoongArch64 `emitFwdJumps` is always false. + unreached(); #else #error Unsupported or unset target architecture #endif @@ -6746,7 +6747,6 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, } } } -#endif //! TARGET_LOONGARCH64 #ifdef DEBUG if (emitComp->opts.disAsm) @@ -6755,9 +6755,7 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, } #endif -#ifndef TARGET_LOONGARCH64 unsigned actualCodeSize = emitCurCodeOffs(cp); -#endif #if defined(TARGET_ARM64) assert(emitTotalCodeSize == actualCodeSize); @@ -6848,7 +6846,6 @@ void emitter::emitGenGCInfoIfFuncletRetTarget(insGroup* ig, BYTE* cp) * instruction number for this instruction */ -#ifndef TARGET_LOONGARCH64 unsigned emitter::emitFindInsNum(insGroup* ig, instrDesc* idMatch) { instrDesc* id = (instrDesc*)ig->igData; @@ -6877,7 +6874,6 @@ unsigned emitter::emitFindInsNum(insGroup* ig, instrDesc* idMatch) assert(!"emitFindInsNum failed"); return -1; } -#endif /***************************************************************************** * diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index c64a67192b645..60971839bc507 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -1790,13 +1790,11 @@ class emitter #endif // FEATURE_EH_FUNCLETS -/************************************************************************/ -/* Methods to record a code position and later convert to offset */ -/************************************************************************/ + /************************************************************************/ + /* Methods to record a code position and later convert to offset */ + /************************************************************************/ -#ifndef TARGET_LOONGARCH64 unsigned emitFindInsNum(insGroup* ig, instrDesc* id); -#endif UNATIVE_OFFSET emitFindOffset(insGroup* ig, unsigned insNum); /************************************************************************/ diff --git a/src/coreclr/jit/emitloongarch64.cpp b/src/coreclr/jit/emitloongarch64.cpp index c7ba9f4a55ffe..098227a7ce230 100644 --- a/src/coreclr/jit/emitloongarch64.cpp +++ b/src/coreclr/jit/emitloongarch64.cpp @@ -500,7 +500,7 @@ bool emitter::emitInsWritesToLclVarStackLoc(instrDesc* id) // clang-format off /*static*/ const BYTE CodeGenInterface::instInfo[] = { - #define INST(id, nm, fp, info, fmt, e1) info, + #define INST(id, nm, info, e1) info, #include "instrs.h" }; // clang-format on @@ -556,7 +556,7 @@ inline emitter::code_t emitter::emitInsCode(instruction ins /*, insFormat fmt*/) // clang-format off const static code_t insCode[] = { - #define INST(id, nm, fp, info, fmt, e1) e1, + #define INST(id, nm, info, e1) e1, #include "instrs.h" }; // clang-format on diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 2b305d018d86e..18b5fee55e7cc 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -6108,13 +6108,6 @@ GenTree* Compiler::gtNewZeroConNode(var_types type) switch (type) { case TYP_INT: -#ifdef TARGET_LOONGARCH64 - case TYP_UINT: -// For LoongArch64, the register $r0 is always const-zero with 64bits-width. -// Besides the instructions's operation of the 64bits and 32bits using the whole -// 64bits-width register which is unlike the AMD64 and ARM64. -// So for UINT type, LoongArch64 can't share with INT liking AMD64 and ARM64. -#endif zero = gtNewIconNode(0); break; diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index bae791f106393..6406ceb3715da 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -67,7 +67,7 @@ const char* CodeGen::genInsName(instruction ins) #include "instrs.h" #elif defined(TARGET_LOONGARCH64) - #define INST(id, nm, fp, ldst, fmt, e1) nm, + #define INST(id, nm, ldst, e1) nm, #include "instrs.h" #else @@ -425,7 +425,8 @@ void CodeGen::inst_RV(instruction ins, regNumber reg, var_types type, emitAttr s } #ifdef TARGET_LOONGARCH64 - NYI_LOONGARCH64("inst_RV-----unimplemented/unused on LOONGARCH64 yet----"); + // inst_RV is not used for LoongArch64, so there is no need to define `emitIns_R`. + NYI_LOONGARCH64("inst_RV-----unused on LOONGARCH64----"); #else GetEmitter()->emitIns_R(ins, size, reg); #endif diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h index 92b1c6f63f065..a01492d08b8a9 100644 --- a/src/coreclr/jit/instr.h +++ b/src/coreclr/jit/instr.h @@ -52,7 +52,7 @@ enum instruction : unsigned INS_lea, // Not a real instruction. It is used for load the address of stack locals #elif defined(TARGET_LOONGARCH64) - #define INST(id, nm, fp, ldst, fmt, e1) INS_##id, + #define INST(id, nm, ldst, e1) INS_##id, #include "instrs.h" INS_lea, // Not a real instruction. It is used for load the address of stack locals diff --git a/src/coreclr/jit/instrsloongarch64.h b/src/coreclr/jit/instrsloongarch64.h index e3da6728fd982..ada87672e397a 100644 --- a/src/coreclr/jit/instrsloongarch64.h +++ b/src/coreclr/jit/instrsloongarch64.h @@ -6,9 +6,7 @@ * * id -- the enum name for the instruction * nm -- textual name (for assembly dipslay) - * fp -- floating point instruction * ld/st/cmp -- load/store/compare instruction - * fmt -- encoding format used by this instruction * encode -- encoding 1 * ******************************************************************************/ @@ -31,32 +29,32 @@ // emitInsMayWriteMultipleRegs in emitLoongarch64.cpp. // clang-format off -INST(invalid, "INVALID", 0, 0, IF_NONE, BAD_CODE) -INST(nop , "nop", 0, 0, IF_LA, 0x03400000) +INST(invalid, "INVALID", 0, BAD_CODE) +INST(nop , "nop", 0, 0x03400000) // INS_bceqz/INS_beq/INS_blt/INS_bltu must be even number. -INST(bceqz, "bceqz", 0, 0, IF_LA, 0x48000000) -INST(bcnez, "bcnez", 0, 0, IF_LA, 0x48000100) +INST(bceqz, "bceqz", 0, 0x48000000) +INST(bcnez, "bcnez", 0, 0x48000100) -INST(beq, "beq", 0, 0, IF_LA, 0x58000000) -INST(bne, "bne", 0, 0, IF_LA, 0x5c000000) +INST(beq, "beq", 0, 0x58000000) +INST(bne, "bne", 0, 0x5c000000) -INST(blt, "blt", 0, 0, IF_LA, 0x60000000) -INST(bge, "bge", 0, 0, IF_LA, 0x64000000) -INST(bltu, "bltu", 0, 0, IF_LA, 0x68000000) -INST(bgeu, "bgeu", 0, 0, IF_LA, 0x6c000000) +INST(blt, "blt", 0, 0x60000000) +INST(bge, "bge", 0, 0x64000000) +INST(bltu, "bltu", 0, 0x68000000) +INST(bgeu, "bgeu", 0, 0x6c000000) ////R_I. -INST(beqz, "beqz", 0, 0, IF_LA, 0x40000000) -INST(bnez, "bnez", 0, 0, IF_LA, 0x44000000) +INST(beqz, "beqz", 0, 0x40000000) +INST(bnez, "bnez", 0, 0x44000000) ////I. -INST(b, "b", 0, 0, IF_LA, 0x50000000) -INST(bl, "bl", 0, 0, IF_LA, 0x54000000) +INST(b, "b", 0, 0x50000000) +INST(bl, "bl", 0, 0x54000000) /////////////////////////////////////////////////////////////////////////////////////////// ////NOTE: Begin -//// the fllowing instructions will be used by emitter::emitInsMayWriteToGCReg(). +//// the following instructions will be used by emitter::emitInsMayWriteToGCReg(). //////////////////////////////////////////////// // enum name FP LD/ST FMT ENCODE // @@ -64,214 +62,214 @@ INST(bl, "bl", 0, 0, IF_LA, 0x54000000) /////////////////////////////////////////////////////////////////////////////////////////// // mov rd,rj // In fact, mov is an alias instruction, "ori rd,rj,0" -INST(mov, "mov", 0, 0, IF_LA, 0x03800000) +INST(mov, "mov", 0, 0x03800000) //dneg is a alias instruction. //sub_d rd, zero, rk -INST(dneg, "dneg", 0, 0, IF_LA, 0x00118000) +INST(dneg, "dneg", 0, 0x00118000) //neg is a alias instruction. //sub_w rd, zero, rk -INST(neg, "neg", 0, 0, IF_LA, 0x00110000) +INST(neg, "neg", 0, 0x00110000) //not is a alias instruction. //nor rd, rj, zero -INST(not, "not", 0, 0, IF_LA, 0x00140000) +INST(not, "not", 0, 0x00140000) // enum:id name FP LD/ST Formate ENCODE ////R_R_R. -INST(add_w, "add.w", 0, 0, IF_LA, 0x00100000) -INST(add_d, "add.d", 0, 0, IF_LA, 0x00108000) -INST(sub_w, "sub.w", 0, 0, IF_LA, 0x00110000) -INST(sub_d, "sub.d", 0, 0, IF_LA, 0x00118000) - -INST(and, "and", 0, 0, IF_LA, 0x00148000) -INST(or, "or", 0, 0, IF_LA, 0x00150000) -INST(nor, "nor", 0, 0, IF_LA, 0x00140000) -INST(xor, "xor", 0, 0, IF_LA, 0x00158000) -INST(andn, "andn", 0, 0, IF_LA, 0x00168000) -INST(orn, "orn", 0, 0, IF_LA, 0x00160000) - -INST(mul_w, "mul.w", 0, 0, IF_LA, 0x001c0000) -INST(mul_d, "mul.d", 0, 0, IF_LA, 0x001d8000) -INST(mulh_w, "mulh.w", 0, 0, IF_LA, 0x001c8000) -INST(mulh_wu, "mulh.wu", 0, 0, IF_LA, 0x001d0000) -INST(mulh_d, "mulh.d", 0, 0, IF_LA, 0x001e0000) -INST(mulh_du, "mulh.du", 0, 0, IF_LA, 0x001e8000) -INST(mulw_d_w, "mulw.d.w", 0, 0, IF_LA, 0x001f0000) -INST(mulw_d_wu, "mulw.d.wu", 0, 0, IF_LA, 0x001f8000) -INST(div_w, "div.w", 0, 0, IF_LA, 0x00200000) -INST(div_wu, "div.wu", 0, 0, IF_LA, 0x00210000) -INST(div_d, "div.d", 0, 0, IF_LA, 0x00220000) -INST(div_du, "div.du", 0, 0, IF_LA, 0x00230000) -INST(mod_w, "mod.w", 0, 0, IF_LA, 0x00208000) -INST(mod_wu, "mod.wu", 0, 0, IF_LA, 0x00218000) -INST(mod_d, "mod.d", 0, 0, IF_LA, 0x00228000) -INST(mod_du, "mod.du", 0, 0, IF_LA, 0x00238000) - -INST(sll_w, "sll.w", 0, 0, IF_LA, 0x00170000) -INST(srl_w, "srl.w", 0, 0, IF_LA, 0x00178000) -INST(sra_w, "sra.w", 0, 0, IF_LA, 0x00180000) -INST(rotr_w, "rotr_w", 0, 0, IF_LA, 0x001b0000) -INST(sll_d, "sll.d", 0, 0, IF_LA, 0x00188000) -INST(srl_d, "srl.d", 0, 0, IF_LA, 0x00190000) -INST(sra_d, "sra.d", 0, 0, IF_LA, 0x00198000) -INST(rotr_d, "rotr.d", 0, 0, IF_LA, 0x001b8000) - -INST(maskeqz, "maskeqz", 0, 0, IF_LA, 0x00130000) -INST(masknez, "masknez", 0, 0, IF_LA, 0x00138000) - -INST(slt, "slt", 0, 0, IF_LA, 0x00120000) -INST(sltu, "sltu", 0, 0, IF_LA, 0x00128000) - -INST(amswap_w, "amswap.w", 0, 0, IF_LA, 0x38600000) -INST(amswap_d, "amswap.d", 0, 0, IF_LA, 0x38608000) -INST(amswap_db_w, "amswap_db.w", 0, 0, IF_LA, 0x38690000) -INST(amswap_db_d, "amswap_db.d", 0, 0, IF_LA, 0x38698000) -INST(amadd_w, "amadd.w", 0, 0, IF_LA, 0x38610000) -INST(amadd_d, "amadd.d", 0, 0, IF_LA, 0x38618000) -INST(amadd_db_w, "amadd_db.w", 0, 0, IF_LA, 0x386a0000) -INST(amadd_db_d, "amadd_db.d", 0, 0, IF_LA, 0x386a8000) -INST(amand_w, "amand.w", 0, 0, IF_LA, 0x38620000) -INST(amand_d, "amand.d", 0, 0, IF_LA, 0x38628000) -INST(amand_db_w, "amand_db.w", 0, 0, IF_LA, 0x386b0000) -INST(amand_db_d, "amand_db.d", 0, 0, IF_LA, 0x386b8000) -INST(amor_w, "amor.w", 0, 0, IF_LA, 0x38630000) -INST(amor_d, "amor.d", 0, 0, IF_LA, 0x38638000) -INST(amor_db_w, "amor_db.w", 0, 0, IF_LA, 0x386c0000) -INST(amor_db_d, "amor_db.d", 0, 0, IF_LA, 0x386c8000) -INST(amxor_w, "amxor.w", 0, 0, IF_LA, 0x38640000) -INST(amxor_d, "amxor.d", 0, 0, IF_LA, 0x38648000) -INST(amxor_db_w, "amxor_db.w", 0, 0, IF_LA, 0x386d0000) -INST(amxor_db_d, "amxor_db.d", 0, 0, IF_LA, 0x386d8000) -INST(ammax_w, "ammax.w", 0, 0, IF_LA, 0x38650000) -INST(ammax_d, "ammax.d", 0, 0, IF_LA, 0x38658000) -INST(ammax_db_w, "ammax_db.w", 0, 0, IF_LA, 0x386e0000) -INST(ammax_db_d, "ammax_db.d", 0, 0, IF_LA, 0x386e8000) -INST(ammin_w, "ammin.w", 0, 0, IF_LA, 0x38660000) -INST(ammin_d, "ammin.d", 0, 0, IF_LA, 0x38668000) -INST(ammin_db_w, "ammin_db.w", 0, 0, IF_LA, 0x386f0000) -INST(ammin_db_d, "ammin_db.d", 0, 0, IF_LA, 0x386f8000) -INST(ammax_wu, "ammax.wu", 0, 0, IF_LA, 0x38670000) -INST(ammax_du, "ammax.du", 0, 0, IF_LA, 0x38678000) -INST(ammax_db_wu, "ammax_db.wu", 0, 0, IF_LA, 0x38700000) -INST(ammax_db_du, "ammax_db.du", 0, 0, IF_LA, 0x38708000) -INST(ammin_wu, "ammin.wu", 0, 0, IF_LA, 0x38680000) -INST(ammin_du, "ammin.du", 0, 0, IF_LA, 0x38688000) -INST(ammin_db_wu, "ammin_db.wu", 0, 0, IF_LA, 0x38710000) -INST(ammin_db_du, "ammin_db.du", 0, 0, IF_LA, 0x38718000) - -INST(crc_w_b_w, "crc.w.b.w", 0, 0, IF_LA, 0x00240000) -INST(crc_w_h_w, "crc.w.h.w", 0, 0, IF_LA, 0x00248000) -INST(crc_w_w_w, "crc.w.w.w", 0, 0, IF_LA, 0x00250000) -INST(crc_w_d_w, "crc.w.d.w", 0, 0, IF_LA, 0x00258000) -INST(crcc_w_b_w, "crcc.w.b.w", 0, 0, IF_LA, 0x00260000) -INST(crcc_w_h_w, "crcc.w.h.w", 0, 0, IF_LA, 0x00268000) -INST(crcc_w_w_w, "crcc.w.w.w", 0, 0, IF_LA, 0x00270000) -INST(crcc_w_d_w, "crcc.w.d.w", 0, 0, IF_LA, 0x00278000) +INST(add_w, "add.w", 0, 0x00100000) +INST(add_d, "add.d", 0, 0x00108000) +INST(sub_w, "sub.w", 0, 0x00110000) +INST(sub_d, "sub.d", 0, 0x00118000) + +INST(and, "and", 0, 0x00148000) +INST(or, "or", 0, 0x00150000) +INST(nor, "nor", 0, 0x00140000) +INST(xor, "xor", 0, 0x00158000) +INST(andn, "andn", 0, 0x00168000) +INST(orn, "orn", 0, 0x00160000) + +INST(mul_w, "mul.w", 0, 0x001c0000) +INST(mul_d, "mul.d", 0, 0x001d8000) +INST(mulh_w, "mulh.w", 0, 0x001c8000) +INST(mulh_wu, "mulh.wu", 0, 0x001d0000) +INST(mulh_d, "mulh.d", 0, 0x001e0000) +INST(mulh_du, "mulh.du", 0, 0x001e8000) +INST(mulw_d_w, "mulw.d.w", 0, 0x001f0000) +INST(mulw_d_wu, "mulw.d.wu", 0, 0x001f8000) +INST(div_w, "div.w", 0, 0x00200000) +INST(div_wu, "div.wu", 0, 0x00210000) +INST(div_d, "div.d", 0, 0x00220000) +INST(div_du, "div.du", 0, 0x00230000) +INST(mod_w, "mod.w", 0, 0x00208000) +INST(mod_wu, "mod.wu", 0, 0x00218000) +INST(mod_d, "mod.d", 0, 0x00228000) +INST(mod_du, "mod.du", 0, 0x00238000) + +INST(sll_w, "sll.w", 0, 0x00170000) +INST(srl_w, "srl.w", 0, 0x00178000) +INST(sra_w, "sra.w", 0, 0x00180000) +INST(rotr_w, "rotr_w", 0, 0x001b0000) +INST(sll_d, "sll.d", 0, 0x00188000) +INST(srl_d, "srl.d", 0, 0x00190000) +INST(sra_d, "sra.d", 0, 0x00198000) +INST(rotr_d, "rotr.d", 0, 0x001b8000) + +INST(maskeqz, "maskeqz", 0, 0x00130000) +INST(masknez, "masknez", 0, 0x00138000) + +INST(slt, "slt", 0, 0x00120000) +INST(sltu, "sltu", 0, 0x00128000) + +INST(amswap_w, "amswap.w", 0, 0x38600000) +INST(amswap_d, "amswap.d", 0, 0x38608000) +INST(amswap_db_w, "amswap_db.w", 0, 0x38690000) +INST(amswap_db_d, "amswap_db.d", 0, 0x38698000) +INST(amadd_w, "amadd.w", 0, 0x38610000) +INST(amadd_d, "amadd.d", 0, 0x38618000) +INST(amadd_db_w, "amadd_db.w", 0, 0x386a0000) +INST(amadd_db_d, "amadd_db.d", 0, 0x386a8000) +INST(amand_w, "amand.w", 0, 0x38620000) +INST(amand_d, "amand.d", 0, 0x38628000) +INST(amand_db_w, "amand_db.w", 0, 0x386b0000) +INST(amand_db_d, "amand_db.d", 0, 0x386b8000) +INST(amor_w, "amor.w", 0, 0x38630000) +INST(amor_d, "amor.d", 0, 0x38638000) +INST(amor_db_w, "amor_db.w", 0, 0x386c0000) +INST(amor_db_d, "amor_db.d", 0, 0x386c8000) +INST(amxor_w, "amxor.w", 0, 0x38640000) +INST(amxor_d, "amxor.d", 0, 0x38648000) +INST(amxor_db_w, "amxor_db.w", 0, 0x386d0000) +INST(amxor_db_d, "amxor_db.d", 0, 0x386d8000) +INST(ammax_w, "ammax.w", 0, 0x38650000) +INST(ammax_d, "ammax.d", 0, 0x38658000) +INST(ammax_db_w, "ammax_db.w", 0, 0x386e0000) +INST(ammax_db_d, "ammax_db.d", 0, 0x386e8000) +INST(ammin_w, "ammin.w", 0, 0x38660000) +INST(ammin_d, "ammin.d", 0, 0x38668000) +INST(ammin_db_w, "ammin_db.w", 0, 0x386f0000) +INST(ammin_db_d, "ammin_db.d", 0, 0x386f8000) +INST(ammax_wu, "ammax.wu", 0, 0x38670000) +INST(ammax_du, "ammax.du", 0, 0x38678000) +INST(ammax_db_wu, "ammax_db.wu", 0, 0x38700000) +INST(ammax_db_du, "ammax_db.du", 0, 0x38708000) +INST(ammin_wu, "ammin.wu", 0, 0x38680000) +INST(ammin_du, "ammin.du", 0, 0x38688000) +INST(ammin_db_wu, "ammin_db.wu", 0, 0x38710000) +INST(ammin_db_du, "ammin_db.du", 0, 0x38718000) + +INST(crc_w_b_w, "crc.w.b.w", 0, 0x00240000) +INST(crc_w_h_w, "crc.w.h.w", 0, 0x00248000) +INST(crc_w_w_w, "crc.w.w.w", 0, 0x00250000) +INST(crc_w_d_w, "crc.w.d.w", 0, 0x00258000) +INST(crcc_w_b_w, "crcc.w.b.w", 0, 0x00260000) +INST(crcc_w_h_w, "crcc.w.h.w", 0, 0x00268000) +INST(crcc_w_w_w, "crcc.w.w.w", 0, 0x00270000) +INST(crcc_w_d_w, "crcc.w.d.w", 0, 0x00278000) ////R_R_R_I. -INST(alsl_w, "alsl.w", 0, 0, IF_LA, 0x00040000) -INST(alsl_wu, "alsl.wu", 0, 0, IF_LA, 0x00060000) -INST(alsl_d, "alsl.d", 0, 0, IF_LA, 0x002c0000) +INST(alsl_w, "alsl.w", 0, 0x00040000) +INST(alsl_wu, "alsl.wu", 0, 0x00060000) +INST(alsl_d, "alsl.d", 0, 0x002c0000) -INST(bytepick_w, "bytepick.w", 0, 0, IF_LA, 0x00080000) -INST(bytepick_d, "bytepick.d", 0, 0, IF_LA, 0x000c0000) +INST(bytepick_w, "bytepick.w", 0, 0x00080000) +INST(bytepick_d, "bytepick.d", 0, 0x000c0000) -INST(fsel, "fsel", 0, 0, IF_LA, 0x0d000000) +INST(fsel, "fsel", 0, 0x0d000000) ////R_I. -INST(lu12i_w, "lu12i.w", 0, 0, IF_LA, 0x14000000) -INST(lu32i_d, "lu32i.d", 0, 0, IF_LA, 0x16000000) +INST(lu12i_w, "lu12i.w", 0, 0x14000000) +INST(lu32i_d, "lu32i.d", 0, 0x16000000) -INST(pcaddi, "pcaddi", 0, 0, IF_LA, 0x18000000) -INST(pcaddu12i, "pcaddu12i", 0, 0, IF_LA, 0x1c000000) -INST(pcalau12i, "pcalau12i", 0, 0, IF_LA, 0x1a000000) -INST(pcaddu18i, "pcaddu18i", 0, 0, IF_LA, 0x1e000000) +INST(pcaddi, "pcaddi", 0, 0x18000000) +INST(pcaddu12i, "pcaddu12i", 0, 0x1c000000) +INST(pcalau12i, "pcalau12i", 0, 0x1a000000) +INST(pcaddu18i, "pcaddu18i", 0, 0x1e000000) ////R_R. -INST(ext_w_b, "ext.w.b", 0, 0, IF_LA, 0x00005c00) -INST(ext_w_h, "ext.w.h", 0, 0, IF_LA, 0x00005800) -INST(clo_w, "clo.w", 0, 0, IF_LA, 0x00001000) -INST(clz_w, "clz.w", 0, 0, IF_LA, 0x00001400) -INST(cto_w, "cto.w", 0, 0, IF_LA, 0x00001800) -INST(ctz_w, "ctz.w", 0, 0, IF_LA, 0x00001c00) -INST(clo_d, "clo.d", 0, 0, IF_LA, 0x00002000) -INST(clz_d, "clz.d", 0, 0, IF_LA, 0x00002400) -INST(cto_d, "cto.d", 0, 0, IF_LA, 0x00002800) -INST(ctz_d, "ctz.d", 0, 0, IF_LA, 0x00002c00) -INST(revb_2h, "revb.2h", 0, 0, IF_LA, 0x00003000) -INST(revb_4h, "revb.4h", 0, 0, IF_LA, 0x00003400) -INST(revb_2w, "revb.2w", 0, 0, IF_LA, 0x00003800) -INST(revb_d, "revb.d", 0, 0, IF_LA, 0x00003c00) -INST(revh_2w, "revh.2w", 0, 0, IF_LA, 0x00004000) -INST(revh_d, "revh.d", 0, 0, IF_LA, 0x00004400) -INST(bitrev_4b, "bitrev.4b", 0, 0, IF_LA, 0x00004800) -INST(bitrev_8b, "bitrev.8b", 0, 0, IF_LA, 0x00004c00) -INST(bitrev_w, "bitrev.w", 0, 0, IF_LA, 0x00005000) -INST(bitrev_d, "bitrev.d", 0, 0, IF_LA, 0x00005400) -INST(rdtimel_w, "rdtimel.w", 0, 0, IF_LA, 0x00006000) -INST(rdtimeh_w, "rdtimeh.w", 0, 0, IF_LA, 0x00006400) -INST(rdtime_d, "rdtime.d", 0, 0, IF_LA, 0x00006800) -INST(cpucfg, "cpucfg", 0, 0, IF_LA, 0x00006c00) +INST(ext_w_b, "ext.w.b", 0, 0x00005c00) +INST(ext_w_h, "ext.w.h", 0, 0x00005800) +INST(clo_w, "clo.w", 0, 0x00001000) +INST(clz_w, "clz.w", 0, 0x00001400) +INST(cto_w, "cto.w", 0, 0x00001800) +INST(ctz_w, "ctz.w", 0, 0x00001c00) +INST(clo_d, "clo.d", 0, 0x00002000) +INST(clz_d, "clz.d", 0, 0x00002400) +INST(cto_d, "cto.d", 0, 0x00002800) +INST(ctz_d, "ctz.d", 0, 0x00002c00) +INST(revb_2h, "revb.2h", 0, 0x00003000) +INST(revb_4h, "revb.4h", 0, 0x00003400) +INST(revb_2w, "revb.2w", 0, 0x00003800) +INST(revb_d, "revb.d", 0, 0x00003c00) +INST(revh_2w, "revh.2w", 0, 0x00004000) +INST(revh_d, "revh.d", 0, 0x00004400) +INST(bitrev_4b, "bitrev.4b", 0, 0x00004800) +INST(bitrev_8b, "bitrev.8b", 0, 0x00004c00) +INST(bitrev_w, "bitrev.w", 0, 0x00005000) +INST(bitrev_d, "bitrev.d", 0, 0x00005400) +INST(rdtimel_w, "rdtimel.w", 0, 0x00006000) +INST(rdtimeh_w, "rdtimeh.w", 0, 0x00006400) +INST(rdtime_d, "rdtime.d", 0, 0x00006800) +INST(cpucfg, "cpucfg", 0, 0x00006c00) ////R_R_I_I. -INST(bstrins_w, "bstrins.w", 0, 0, IF_LA, 0x00600000) -INST(bstrins_d, "bstrins.d", 0, 0, IF_LA, 0x00800000) -INST(bstrpick_w, "bstrpick.w", 0, 0, IF_LA, 0x00608000) -INST(bstrpick_d, "bstrpick.d", 0, 0, IF_LA, 0x00c00000) +INST(bstrins_w, "bstrins.w", 0, 0x00600000) +INST(bstrins_d, "bstrins.d", 0, 0x00800000) +INST(bstrpick_w, "bstrpick.w", 0, 0x00608000) +INST(bstrpick_d, "bstrpick.d", 0, 0x00c00000) ////Load. -INST(ld_b, "ld.b", 0, LD, IF_LA, 0x28000000) -INST(ld_h, "ld.h", 0, LD, IF_LA, 0x28400000) -INST(ld_w, "ld.w", 0, LD, IF_LA, 0x28800000) -INST(ld_d, "ld.d", 0, LD, IF_LA, 0x28c00000) -INST(ld_bu, "ld.bu", 0, LD, IF_LA, 0x2a000000) -INST(ld_hu, "ld.hu", 0, LD, IF_LA, 0x2a400000) -INST(ld_wu, "ld.wu", 0, LD, IF_LA, 0x2a800000) - -INST(ldptr_w, "ldptr.w", 0, LD, IF_LA, 0x24000000) -INST(ldptr_d, "ldptr.d", 0, LD, IF_LA, 0x26000000) -INST(ll_w, "ll.w", 0, 0, IF_LA, 0x20000000) -INST(ll_d, "ll.d", 0, 0, IF_LA, 0x22000000) - -INST(ldx_b, "ldx.b", 0, LD, IF_LA, 0x38000000) -INST(ldx_h, "ldx.h", 0, LD, IF_LA, 0x38040000) -INST(ldx_w, "ldx.w", 0, LD, IF_LA, 0x38080000) -INST(ldx_d, "ldx.d", 0, LD, IF_LA, 0x380c0000) -INST(ldx_bu, "ldx.bu", 0, LD, IF_LA, 0x38200000) -INST(ldx_hu, "ldx.hu", 0, LD, IF_LA, 0x38240000) -INST(ldx_wu, "ldx.wu", 0, LD, IF_LA, 0x38280000) - -INST(ldgt_b, "ldgt.b", 0, 0, IF_LA, 0x38780000) -INST(ldgt_h, "ldgt.h", 0, 0, IF_LA, 0x38788000) -INST(ldgt_w, "ldgt.w", 0, 0, IF_LA, 0x38790000) -INST(ldgt_d, "ldgt.d", 0, 0, IF_LA, 0x38798000) -INST(ldle_b, "ldle.b", 0, 0, IF_LA, 0x387a0000) -INST(ldle_h, "ldle.h", 0, 0, IF_LA, 0x387a8000) -INST(ldle_w, "ldle.w", 0, 0, IF_LA, 0x387b0000) -INST(ldle_d, "ldle.d", 0, 0, IF_LA, 0x387b8000) +INST(ld_b, "ld.b", LD, 0x28000000) +INST(ld_h, "ld.h", LD, 0x28400000) +INST(ld_w, "ld.w", LD, 0x28800000) +INST(ld_d, "ld.d", LD, 0x28c00000) +INST(ld_bu, "ld.bu", LD, 0x2a000000) +INST(ld_hu, "ld.hu", LD, 0x2a400000) +INST(ld_wu, "ld.wu", LD, 0x2a800000) + +INST(ldptr_w, "ldptr.w", LD, 0x24000000) +INST(ldptr_d, "ldptr.d", LD, 0x26000000) +INST(ll_w, "ll.w", 0, 0x20000000) +INST(ll_d, "ll.d", 0, 0x22000000) + +INST(ldx_b, "ldx.b", LD, 0x38000000) +INST(ldx_h, "ldx.h", LD, 0x38040000) +INST(ldx_w, "ldx.w", LD, 0x38080000) +INST(ldx_d, "ldx.d", LD, 0x380c0000) +INST(ldx_bu, "ldx.bu", LD, 0x38200000) +INST(ldx_hu, "ldx.hu", LD, 0x38240000) +INST(ldx_wu, "ldx.wu", LD, 0x38280000) + +INST(ldgt_b, "ldgt.b", 0, 0x38780000) +INST(ldgt_h, "ldgt.h", 0, 0x38788000) +INST(ldgt_w, "ldgt.w", 0, 0x38790000) +INST(ldgt_d, "ldgt.d", 0, 0x38798000) +INST(ldle_b, "ldle.b", 0, 0x387a0000) +INST(ldle_h, "ldle.h", 0, 0x387a8000) +INST(ldle_w, "ldle.w", 0, 0x387b0000) +INST(ldle_d, "ldle.d", 0, 0x387b8000) ////R_R_I. -INST(addi_w, "addi.w", 0, 0, IF_LA, 0x02800000) -INST(addi_d, "addi.d", 0, 0, IF_LA, 0x02c00000) -INST(lu52i_d, "lu52i.d", 0, 0, IF_LA, 0x03000000) -INST(slti, "slti", 0, 0, IF_LA, 0x02000000) - -INST(sltui, "sltui", 0, 0, IF_LA, 0x02400000) -INST(andi, "andi", 0, 0, IF_LA, 0x03400000) -INST(ori, "ori", 0, 0, IF_LA, 0x03800000) -INST(xori, "xori", 0, 0, IF_LA, 0x03c00000) - -INST(slli_w, "slli.w", 0, 0, IF_LA, 0x00408000) -INST(srli_w, "srli.w", 0, 0, IF_LA, 0x00448000) -INST(srai_w, "srai.w", 0, 0, IF_LA, 0x00488000) -INST(rotri_w, "rotri.w", 0, 0, IF_LA, 0x004c8000) -INST(slli_d, "slli.d", 0, 0, IF_LA, 0x00410000) -INST(srli_d, "srli.d", 0, 0, IF_LA, 0x00450000) -INST(srai_d, "srai.d", 0, 0, IF_LA, 0x00490000) -INST(rotri_d, "rotri.d", 0, 0, IF_LA, 0x004d0000) - -INST(addu16i_d, "addu16i.d", 0, 0, IF_LA, 0x10000000) - -INST(jirl, "jirl", 0, 0, IF_LA, 0x4c000000) +INST(addi_w, "addi.w", 0, 0x02800000) +INST(addi_d, "addi.d", 0, 0x02c00000) +INST(lu52i_d, "lu52i.d", 0, 0x03000000) +INST(slti, "slti", 0, 0x02000000) + +INST(sltui, "sltui", 0, 0x02400000) +INST(andi, "andi", 0, 0x03400000) +INST(ori, "ori", 0, 0x03800000) +INST(xori, "xori", 0, 0x03c00000) + +INST(slli_w, "slli.w", 0, 0x00408000) +INST(srli_w, "srli.w", 0, 0x00448000) +INST(srai_w, "srai.w", 0, 0x00488000) +INST(rotri_w, "rotri.w", 0, 0x004c8000) +INST(slli_d, "slli.d", 0, 0x00410000) +INST(srli_d, "srli.d", 0, 0x00450000) +INST(srai_d, "srai.d", 0, 0x00490000) +INST(rotri_d, "rotri.d", 0, 0x004d0000) + +INST(addu16i_d, "addu16i.d", 0, 0x10000000) + +INST(jirl, "jirl", 0, 0x4c000000) //////////////////////////////////////////////////////////////////////////////////////////// ////NOTE: jirl must be the last one !!! more info to see emitter::emitInsMayWriteToGCReg(). // @@ -280,209 +278,209 @@ INST(jirl, "jirl", 0, 0, IF_LA, 0x4c000000) //////////////////////////////////////////////////////////////////////////////////////////// ////Store. -INST(st_b, "st.b", 0, ST, IF_LA, 0x29000000) -INST(st_h, "st.h", 0, ST, IF_LA, 0x29400000) -INST(st_w, "st.w", 0, ST, IF_LA, 0x29800000) -INST(st_d, "st.d", 0, ST, IF_LA, 0x29c00000) - -INST(stptr_w, "stptr.w", 0, ST, IF_LA, 0x25000000) -INST(stptr_d, "stptr.d", 0, ST, IF_LA, 0x27000000) -INST(sc_w, "sc.w", 0, 0, IF_LA, 0x21000000) -INST(sc_d, "sc.d", 0, 0, IF_LA, 0x23000000) - -INST(stx_b, "stx.b", 0, ST, IF_LA, 0x38100000) -INST(stx_h, "stx.h", 0, ST, IF_LA, 0x38140000) -INST(stx_w, "stx.w", 0, ST, IF_LA, 0x38180000) -INST(stx_d, "stx.d", 0, ST, IF_LA, 0x381c0000) -INST(stgt_b, "stgt.b", 0, 0, IF_LA, 0x387c0000) -INST(stgt_h, "stgt.h", 0, 0, IF_LA, 0x387c8000) -INST(stgt_w, "stgt.w", 0, 0, IF_LA, 0x387d0000) -INST(stgt_d, "stgt.d", 0, 0, IF_LA, 0x387d8000) -INST(stle_b, "stle.b", 0, 0, IF_LA, 0x387e0000) -INST(stle_h, "stle.h", 0, 0, IF_LA, 0x387e8000) -INST(stle_w, "stle.w", 0, 0, IF_LA, 0x387f0000) -INST(stle_d, "stle.d", 0, 0, IF_LA, 0x387f8000) - -INST(dbar, "dbar", 0, 0, IF_LA, 0x38720000) -INST(ibar, "ibar", 0, 0, IF_LA, 0x38728000) - -INST(syscall, "syscall", 0, 0, IF_LA, 0x002b0000) -INST(break, "break", 0, 0, IF_LA, 0x002a0005) - -INST(asrtle_d, "asrtle.d", 0, 0, IF_LA, 0x00010000) -INST(asrtgt_d, "asrtgt.d", 0, 0, IF_LA, 0x00018000) - -INST(preld, "preld", 0, LD, IF_LA, 0x2ac00000) -INST(preldx, "preldx", 0, LD, IF_LA, 0x382c0000) +INST(st_b, "st.b", ST, 0x29000000) +INST(st_h, "st.h", ST, 0x29400000) +INST(st_w, "st.w", ST, 0x29800000) +INST(st_d, "st.d", ST, 0x29c00000) + +INST(stptr_w, "stptr.w", ST, 0x25000000) +INST(stptr_d, "stptr.d", ST, 0x27000000) +INST(sc_w, "sc.w", 0, 0x21000000) +INST(sc_d, "sc.d", 0, 0x23000000) + +INST(stx_b, "stx.b", ST, 0x38100000) +INST(stx_h, "stx.h", ST, 0x38140000) +INST(stx_w, "stx.w", ST, 0x38180000) +INST(stx_d, "stx.d", ST, 0x381c0000) +INST(stgt_b, "stgt.b", 0, 0x387c0000) +INST(stgt_h, "stgt.h", 0, 0x387c8000) +INST(stgt_w, "stgt.w", 0, 0x387d0000) +INST(stgt_d, "stgt.d", 0, 0x387d8000) +INST(stle_b, "stle.b", 0, 0x387e0000) +INST(stle_h, "stle.h", 0, 0x387e8000) +INST(stle_w, "stle.w", 0, 0x387f0000) +INST(stle_d, "stle.d", 0, 0x387f8000) + +INST(dbar, "dbar", 0, 0x38720000) +INST(ibar, "ibar", 0, 0x38728000) + +INST(syscall, "syscall", 0, 0x002b0000) +INST(break, "break", 0, 0x002a0005) + +INST(asrtle_d, "asrtle.d", 0, 0x00010000) +INST(asrtgt_d, "asrtgt.d", 0, 0x00018000) + +INST(preld, "preld", LD, 0x2ac00000) +INST(preldx, "preldx", LD, 0x382c0000) ////Float instructions. ////R_R_R. -INST(fadd_s, "fadd.s", 0, 0, IF_LA, 0x01008000) -INST(fadd_d, "fadd.d", 0, 0, IF_LA, 0x01010000) -INST(fsub_s, "fsub.s", 0, 0, IF_LA, 0x01028000) -INST(fsub_d, "fsub.d", 0, 0, IF_LA, 0x01030000) -INST(fmul_s, "fmul.s", 0, 0, IF_LA, 0x01048000) -INST(fmul_d, "fmul.d", 0, 0, IF_LA, 0x01050000) -INST(fdiv_s, "fdiv.s", 0, 0, IF_LA, 0x01068000) -INST(fdiv_d, "fdiv.d", 0, 0, IF_LA, 0x01070000) - -INST(fmax_s, "fmax.s", 0, 0, IF_LA, 0x01088000) -INST(fmax_d, "fmax.d", 0, 0, IF_LA, 0x01090000) -INST(fmin_s, "fmin.s", 0, 0, IF_LA, 0x010a8000) -INST(fmin_d, "fmin.d", 0, 0, IF_LA, 0x010b0000) -INST(fmaxa_s, "fmaxa.s", 0, 0, IF_LA, 0x010c8000) -INST(fmaxa_d, "fmaxa.d", 0, 0, IF_LA, 0x010d0000) -INST(fmina_s, "fmina.s", 0, 0, IF_LA, 0x010e8000) -INST(fmina_d, "fmina.d", 0, 0, IF_LA, 0x010f0000) - -INST(fscaleb_s, "fscaleb.s", 0, 0, IF_LA, 0x01108000) -INST(fscaleb_d, "fscaleb.d", 0, 0, IF_LA, 0x01110000) - -INST(fcopysign_s, "fcopysign.s", 0, 0, IF_LA, 0x01128000) -INST(fcopysign_d, "fcopysign.d", 0, 0, IF_LA, 0x01130000) - -INST(fldx_s, "fldx.s", 0, LD, IF_LA, 0x38300000) -INST(fldx_d, "fldx.d", 0, LD, IF_LA, 0x38340000) -INST(fstx_s, "fstx.s", 0, ST, IF_LA, 0x38380000) -INST(fstx_d, "fstx.d", 0, ST, IF_LA, 0x383c0000) - -INST(fldgt_s, "fldgt.s", 0, 0, IF_LA, 0x38740000) -INST(fldgt_d, "fldgt.d", 0, 0, IF_LA, 0x38748000) -INST(fldle_s, "fldle.s", 0, 0, IF_LA, 0x38750000) -INST(fldle_d, "fldle.d", 0, 0, IF_LA, 0x38758000) -INST(fstgt_s, "fstgt.s", 0, 0, IF_LA, 0x38760000) -INST(fstgt_d, "fstgt.d", 0, 0, IF_LA, 0x38768000) -INST(fstle_s, "fstle.s", 0, 0, IF_LA, 0x38770000) -INST(fstle_d, "fstle.d", 0, 0, IF_LA, 0x38778000) +INST(fadd_s, "fadd.s", 0, 0x01008000) +INST(fadd_d, "fadd.d", 0, 0x01010000) +INST(fsub_s, "fsub.s", 0, 0x01028000) +INST(fsub_d, "fsub.d", 0, 0x01030000) +INST(fmul_s, "fmul.s", 0, 0x01048000) +INST(fmul_d, "fmul.d", 0, 0x01050000) +INST(fdiv_s, "fdiv.s", 0, 0x01068000) +INST(fdiv_d, "fdiv.d", 0, 0x01070000) + +INST(fmax_s, "fmax.s", 0, 0x01088000) +INST(fmax_d, "fmax.d", 0, 0x01090000) +INST(fmin_s, "fmin.s", 0, 0x010a8000) +INST(fmin_d, "fmin.d", 0, 0x010b0000) +INST(fmaxa_s, "fmaxa.s", 0, 0x010c8000) +INST(fmaxa_d, "fmaxa.d", 0, 0x010d0000) +INST(fmina_s, "fmina.s", 0, 0x010e8000) +INST(fmina_d, "fmina.d", 0, 0x010f0000) + +INST(fscaleb_s, "fscaleb.s", 0, 0x01108000) +INST(fscaleb_d, "fscaleb.d", 0, 0x01110000) + +INST(fcopysign_s, "fcopysign.s", 0, 0x01128000) +INST(fcopysign_d, "fcopysign.d", 0, 0x01130000) + +INST(fldx_s, "fldx.s", LD, 0x38300000) +INST(fldx_d, "fldx.d", LD, 0x38340000) +INST(fstx_s, "fstx.s", ST, 0x38380000) +INST(fstx_d, "fstx.d", ST, 0x383c0000) + +INST(fldgt_s, "fldgt.s", 0, 0x38740000) +INST(fldgt_d, "fldgt.d", 0, 0x38748000) +INST(fldle_s, "fldle.s", 0, 0x38750000) +INST(fldle_d, "fldle.d", 0, 0x38758000) +INST(fstgt_s, "fstgt.s", 0, 0x38760000) +INST(fstgt_d, "fstgt.d", 0, 0x38768000) +INST(fstle_s, "fstle.s", 0, 0x38770000) +INST(fstle_d, "fstle.d", 0, 0x38778000) ////R_R_R_R. -INST(fmadd_s, "fmadd.s", 0, 0, IF_LA, 0x08100000) -INST(fmadd_d, "fmadd.d", 0, 0, IF_LA, 0x08200000) -INST(fmsub_s, "fmsub.s", 0, 0, IF_LA, 0x08500000) -INST(fmsub_d, "fmsub.d", 0, 0, IF_LA, 0x08600000) -INST(fnmadd_s, "fnmadd.s", 0, 0, IF_LA, 0x08900000) -INST(fnmadd_d, "fnmadd.d", 0, 0, IF_LA, 0x08a00000) -INST(fnmsub_s, "fnmsub.s", 0, 0, IF_LA, 0x08d00000) -INST(fnmsub_d, "fnmsub.d", 0, 0, IF_LA, 0x08e00000) +INST(fmadd_s, "fmadd.s", 0, 0x08100000) +INST(fmadd_d, "fmadd.d", 0, 0x08200000) +INST(fmsub_s, "fmsub.s", 0, 0x08500000) +INST(fmsub_d, "fmsub.d", 0, 0x08600000) +INST(fnmadd_s, "fnmadd.s", 0, 0x08900000) +INST(fnmadd_d, "fnmadd.d", 0, 0x08a00000) +INST(fnmsub_s, "fnmsub.s", 0, 0x08d00000) +INST(fnmsub_d, "fnmsub.d", 0, 0x08e00000) ////R_R. -INST(fabs_s, "fabs.s", 0, 0, IF_LA, 0x01140400) -INST(fabs_d, "fabs.d", 0, 0, IF_LA, 0x01140800) -INST(fneg_s, "fneg.s", 0, 0, IF_LA, 0x01141400) -INST(fneg_d, "fneg.d", 0, 0, IF_LA, 0x01141800) - -INST(fsqrt_s, "fsqrt.s", 0, 0, IF_LA, 0x01144400) -INST(fsqrt_d, "fsqrt.d", 0, 0, IF_LA, 0x01144800) -INST(frsqrt_s, "frsqrt.s", 0, 0, IF_LA, 0x01146400) -INST(frsqrt_d, "frsqrt.d", 0, 0, IF_LA, 0x01146800) -INST(frecip_s, "frecip.s", 0, 0, IF_LA, 0x01145400) -INST(frecip_d, "frecip.d", 0, 0, IF_LA, 0x01145800) -INST(flogb_s, "flogb.s", 0, 0, IF_LA, 0x01142400) -INST(flogb_d, "flogb.d", 0, 0, IF_LA, 0x01142800) -INST(fclass_s, "fclass.s", 0, 0, IF_LA, 0x01143400) -INST(fclass_d, "fclass.d", 0, 0, IF_LA, 0x01143800) - -INST(fcvt_s_d, "fcvt.s.d", 0, 0, IF_LA, 0x01191800) -INST(fcvt_d_s, "fcvt.d.s", 0, 0, IF_LA, 0x01192400) -INST(ffint_s_w, "ffint.s.w", 0, 0, IF_LA, 0x011d1000) -INST(ffint_s_l, "ffint.s.l", 0, 0, IF_LA, 0x011d1800) -INST(ffint_d_w, "ffint.d.w", 0, 0, IF_LA, 0x011d2000) -INST(ffint_d_l, "ffint.d.l", 0, 0, IF_LA, 0x011d2800) -INST(ftint_w_s, "ftint.w.s", 0, 0, IF_LA, 0x011b0400) -INST(ftint_w_d, "ftint.w.d", 0, 0, IF_LA, 0x011b0800) -INST(ftint_l_s, "ftint.l.s", 0, 0, IF_LA, 0x011b2400) -INST(ftint_l_d, "ftint.l.d", 0, 0, IF_LA, 0x011b2800) -INST(ftintrm_w_s, "ftintrm.w.s", 0, 0, IF_LA, 0x011a0400) -INST(ftintrm_w_d, "ftintrm.w.d", 0, 0, IF_LA, 0x011a0800) -INST(ftintrm_l_s, "ftintrm.l.s", 0, 0, IF_LA, 0x011a2400) -INST(ftintrm_l_d, "ftintrm.l.d", 0, 0, IF_LA, 0x011a2800) -INST(ftintrp_w_s, "ftintrp.w.s", 0, 0, IF_LA, 0x011a4400) -INST(ftintrp_w_d, "ftintrp.w.d", 0, 0, IF_LA, 0x011a4800) -INST(ftintrp_l_s, "ftintrp.l.s", 0, 0, IF_LA, 0x011a6400) -INST(ftintrp_l_d, "ftintrp.l.d", 0, 0, IF_LA, 0x011a6800) -INST(ftintrz_w_s, "ftintrz.w.s", 0, 0, IF_LA, 0x011a8400) -INST(ftintrz_w_d, "ftintrz.w.d", 0, 0, IF_LA, 0x011a8800) -INST(ftintrz_l_s, "ftintrz.l.s", 0, 0, IF_LA, 0x011aa400) -INST(ftintrz_l_d, "ftintrz.l.d", 0, 0, IF_LA, 0x011aa800) -INST(ftintrne_w_s, "ftintrne.w.s", 0, 0, IF_LA, 0x011ac400) -INST(ftintrne_w_d, "ftintrne.w.d", 0, 0, IF_LA, 0x011ac800) -INST(ftintrne_l_s, "ftintrne.l.s", 0, 0, IF_LA, 0x011ae400) -INST(ftintrne_l_d, "ftintrne.l.d", 0, 0, IF_LA, 0x011ae800) -INST(frint_s, "frint.s", 0, 0, IF_LA, 0x011e4400) -INST(frint_d, "frint.d", 0, 0, IF_LA, 0x011e4800) - -INST(fmov_s, "fmov.s", 0, 0, IF_LA, 0x01149400) -INST(fmov_d, "fmov.d", 0, 0, IF_LA, 0x01149800) - -INST(movgr2fr_w, "movgr2fr.w", 0, 0, IF_LA, 0x0114a400) -INST(movgr2fr_d, "movgr2fr.d", 0, 0, IF_LA, 0x0114a800) -INST(movgr2frh_w, "movgr2frh.w", 0, 0, IF_LA, 0x0114ac00) -INST(movfr2gr_s, "movfr2gr.s", 0, 0, IF_LA, 0x0114b400) -INST(movfr2gr_d, "movfr2gr.d", 0, 0, IF_LA, 0x0114b800) -INST(movfrh2gr_s, "movfrh2gr.s", 0, 0, IF_LA, 0x0114bc00) +INST(fabs_s, "fabs.s", 0, 0x01140400) +INST(fabs_d, "fabs.d", 0, 0x01140800) +INST(fneg_s, "fneg.s", 0, 0x01141400) +INST(fneg_d, "fneg.d", 0, 0x01141800) + +INST(fsqrt_s, "fsqrt.s", 0, 0x01144400) +INST(fsqrt_d, "fsqrt.d", 0, 0x01144800) +INST(frsqrt_s, "frsqrt.s", 0, 0x01146400) +INST(frsqrt_d, "frsqrt.d", 0, 0x01146800) +INST(frecip_s, "frecip.s", 0, 0x01145400) +INST(frecip_d, "frecip.d", 0, 0x01145800) +INST(flogb_s, "flogb.s", 0, 0x01142400) +INST(flogb_d, "flogb.d", 0, 0x01142800) +INST(fclass_s, "fclass.s", 0, 0x01143400) +INST(fclass_d, "fclass.d", 0, 0x01143800) + +INST(fcvt_s_d, "fcvt.s.d", 0, 0x01191800) +INST(fcvt_d_s, "fcvt.d.s", 0, 0x01192400) +INST(ffint_s_w, "ffint.s.w", 0, 0x011d1000) +INST(ffint_s_l, "ffint.s.l", 0, 0x011d1800) +INST(ffint_d_w, "ffint.d.w", 0, 0x011d2000) +INST(ffint_d_l, "ffint.d.l", 0, 0x011d2800) +INST(ftint_w_s, "ftint.w.s", 0, 0x011b0400) +INST(ftint_w_d, "ftint.w.d", 0, 0x011b0800) +INST(ftint_l_s, "ftint.l.s", 0, 0x011b2400) +INST(ftint_l_d, "ftint.l.d", 0, 0x011b2800) +INST(ftintrm_w_s, "ftintrm.w.s", 0, 0x011a0400) +INST(ftintrm_w_d, "ftintrm.w.d", 0, 0x011a0800) +INST(ftintrm_l_s, "ftintrm.l.s", 0, 0x011a2400) +INST(ftintrm_l_d, "ftintrm.l.d", 0, 0x011a2800) +INST(ftintrp_w_s, "ftintrp.w.s", 0, 0x011a4400) +INST(ftintrp_w_d, "ftintrp.w.d", 0, 0x011a4800) +INST(ftintrp_l_s, "ftintrp.l.s", 0, 0x011a6400) +INST(ftintrp_l_d, "ftintrp.l.d", 0, 0x011a6800) +INST(ftintrz_w_s, "ftintrz.w.s", 0, 0x011a8400) +INST(ftintrz_w_d, "ftintrz.w.d", 0, 0x011a8800) +INST(ftintrz_l_s, "ftintrz.l.s", 0, 0x011aa400) +INST(ftintrz_l_d, "ftintrz.l.d", 0, 0x011aa800) +INST(ftintrne_w_s, "ftintrne.w.s", 0, 0x011ac400) +INST(ftintrne_w_d, "ftintrne.w.d", 0, 0x011ac800) +INST(ftintrne_l_s, "ftintrne.l.s", 0, 0x011ae400) +INST(ftintrne_l_d, "ftintrne.l.d", 0, 0x011ae800) +INST(frint_s, "frint.s", 0, 0x011e4400) +INST(frint_d, "frint.d", 0, 0x011e4800) + +INST(fmov_s, "fmov.s", 0, 0x01149400) +INST(fmov_d, "fmov.d", 0, 0x01149800) + +INST(movgr2fr_w, "movgr2fr.w", 0, 0x0114a400) +INST(movgr2fr_d, "movgr2fr.d", 0, 0x0114a800) +INST(movgr2frh_w, "movgr2frh.w", 0, 0x0114ac00) +INST(movfr2gr_s, "movfr2gr.s", 0, 0x0114b400) +INST(movfr2gr_d, "movfr2gr.d", 0, 0x0114b800) +INST(movfrh2gr_s, "movfrh2gr.s", 0, 0x0114bc00) //// -INST(movgr2fcsr, "movgr2fcsr", 0, 0, IF_LA, 0x0114c000) -INST(movfcsr2gr, "movfcsr2gr", 0, 0, IF_LA, 0x0114c800) -INST(movfr2cf, "movfr2cf", 0, 0, IF_LA, 0x0114d000) -INST(movcf2fr, "movcf2fr", 0, 0, IF_LA, 0x0114d400) -INST(movgr2cf, "movgr2cf", 0, 0, IF_LA, 0x0114d800) -INST(movcf2gr, "movcf2gr", 0, 0, IF_LA, 0x0114dc00) +INST(movgr2fcsr, "movgr2fcsr", 0, 0x0114c000) +INST(movfcsr2gr, "movfcsr2gr", 0, 0x0114c800) +INST(movfr2cf, "movfr2cf", 0, 0x0114d000) +INST(movcf2fr, "movcf2fr", 0, 0x0114d400) +INST(movgr2cf, "movgr2cf", 0, 0x0114d800) +INST(movcf2gr, "movcf2gr", 0, 0x0114dc00) ////R_R_I. -INST(fcmp_caf_s, "fcmp.caf.s", 0, 0, IF_LA, 0x0c100000) -INST(fcmp_cun_s, "fcmp.cun.s", 0, 0, IF_LA, 0x0c140000) -INST(fcmp_ceq_s, "fcmp.ceq.s", 0, 0, IF_LA, 0x0c120000) -INST(fcmp_cueq_s, "fcmp.cueq.s", 0, 0, IF_LA, 0x0c160000) -INST(fcmp_clt_s, "fcmp.clt.s", 0, 0, IF_LA, 0x0c110000) -INST(fcmp_cult_s, "fcmp.cult.s", 0, 0, IF_LA, 0x0c150000) -INST(fcmp_cle_s, "fcmp.cle.s", 0, 0, IF_LA, 0x0c130000) -INST(fcmp_cule_s, "fcmp.cule.s", 0, 0, IF_LA, 0x0c170000) -INST(fcmp_cne_s, "fcmp.cne.s", 0, 0, IF_LA, 0x0c180000) -INST(fcmp_cor_s, "fcmp.cor.s", 0, 0, IF_LA, 0x0c1a0000) -INST(fcmp_cune_s, "fcmp.cune.s", 0, 0, IF_LA, 0x0c1c0000) - -INST(fcmp_saf_d, "fcmp.saf.d", 0, 0, IF_LA, 0x0c208000) -INST(fcmp_sun_d, "fcmp.sun.d", 0, 0, IF_LA, 0x0c248000) -INST(fcmp_seq_d, "fcmp.seq.d", 0, 0, IF_LA, 0x0c228000) -INST(fcmp_sueq_d, "fcmp.sueq.d", 0, 0, IF_LA, 0x0c268000) -INST(fcmp_slt_d, "fcmp.slt.d", 0, 0, IF_LA, 0x0c218000) -INST(fcmp_sult_d, "fcmp.sult.d", 0, 0, IF_LA, 0x0c258000) -INST(fcmp_sle_d, "fcmp.sle.d", 0, 0, IF_LA, 0x0c238000) -INST(fcmp_sule_d, "fcmp.sule.d", 0, 0, IF_LA, 0x0c278000) -INST(fcmp_sne_d, "fcmp.sne.d", 0, 0, IF_LA, 0x0c288000) -INST(fcmp_sor_d, "fcmp.sor.d", 0, 0, IF_LA, 0x0c2a8000) -INST(fcmp_sune_d, "fcmp.sune.d", 0, 0, IF_LA, 0x0c2c8000) - -INST(fcmp_caf_d, "fcmp.caf.d", 0, 0, IF_LA, 0x0c200000) -INST(fcmp_cun_d, "fcmp.cun.d", 0, 0, IF_LA, 0x0c240000) -INST(fcmp_ceq_d, "fcmp.ceq.d", 0, 0, IF_LA, 0x0c220000) -INST(fcmp_cueq_d, "fcmp.cueq.d", 0, 0, IF_LA, 0x0c260000) -INST(fcmp_clt_d, "fcmp.clt.d", 0, 0, IF_LA, 0x0c210000) -INST(fcmp_cult_d, "fcmp.cult.d", 0, 0, IF_LA, 0x0c250000) -INST(fcmp_cle_d, "fcmp.cle.d", 0, 0, IF_LA, 0x0c230000) -INST(fcmp_cule_d, "fcmp.cule.d", 0, 0, IF_LA, 0x0c270000) -INST(fcmp_cne_d, "fcmp.cne.d", 0, 0, IF_LA, 0x0c280000) -INST(fcmp_cor_d, "fcmp.cor.d", 0, 0, IF_LA, 0x0c2a0000) -INST(fcmp_cune_d, "fcmp.cune.d", 0, 0, IF_LA, 0x0c2c0000) - -INST(fcmp_saf_s, "fcmp.saf.s", 0, 0, IF_LA, 0x0c108000) -INST(fcmp_sun_s, "fcmp.sun.s", 0, 0, IF_LA, 0x0c148000) -INST(fcmp_seq_s, "fcmp.seq.s", 0, 0, IF_LA, 0x0c128000) -INST(fcmp_sueq_s, "fcmp.sueq.s", 0, 0, IF_LA, 0x0c168000) -INST(fcmp_slt_s, "fcmp.slt.s", 0, 0, IF_LA, 0x0c118000) -INST(fcmp_sult_s, "fcmp.sult.s", 0, 0, IF_LA, 0x0c158000) -INST(fcmp_sle_s, "fcmp.sle.s", 0, 0, IF_LA, 0x0c138000) -INST(fcmp_sule_s, "fcmp.sule.s", 0, 0, IF_LA, 0x0c178000) -INST(fcmp_sne_s, "fcmp.sne.s", 0, 0, IF_LA, 0x0c188000) -INST(fcmp_sor_s, "fcmp.sor.s", 0, 0, IF_LA, 0x0c1a8000) -INST(fcmp_sune_s, "fcmp.sune.s", 0, 0, IF_LA, 0x0c1c8000) +INST(fcmp_caf_s, "fcmp.caf.s", 0, 0x0c100000) +INST(fcmp_cun_s, "fcmp.cun.s", 0, 0x0c140000) +INST(fcmp_ceq_s, "fcmp.ceq.s", 0, 0x0c120000) +INST(fcmp_cueq_s, "fcmp.cueq.s", 0, 0x0c160000) +INST(fcmp_clt_s, "fcmp.clt.s", 0, 0x0c110000) +INST(fcmp_cult_s, "fcmp.cult.s", 0, 0x0c150000) +INST(fcmp_cle_s, "fcmp.cle.s", 0, 0x0c130000) +INST(fcmp_cule_s, "fcmp.cule.s", 0, 0x0c170000) +INST(fcmp_cne_s, "fcmp.cne.s", 0, 0x0c180000) +INST(fcmp_cor_s, "fcmp.cor.s", 0, 0x0c1a0000) +INST(fcmp_cune_s, "fcmp.cune.s", 0, 0x0c1c0000) + +INST(fcmp_saf_d, "fcmp.saf.d", 0, 0x0c208000) +INST(fcmp_sun_d, "fcmp.sun.d", 0, 0x0c248000) +INST(fcmp_seq_d, "fcmp.seq.d", 0, 0x0c228000) +INST(fcmp_sueq_d, "fcmp.sueq.d", 0, 0x0c268000) +INST(fcmp_slt_d, "fcmp.slt.d", 0, 0x0c218000) +INST(fcmp_sult_d, "fcmp.sult.d", 0, 0x0c258000) +INST(fcmp_sle_d, "fcmp.sle.d", 0, 0x0c238000) +INST(fcmp_sule_d, "fcmp.sule.d", 0, 0x0c278000) +INST(fcmp_sne_d, "fcmp.sne.d", 0, 0x0c288000) +INST(fcmp_sor_d, "fcmp.sor.d", 0, 0x0c2a8000) +INST(fcmp_sune_d, "fcmp.sune.d", 0, 0x0c2c8000) + +INST(fcmp_caf_d, "fcmp.caf.d", 0, 0x0c200000) +INST(fcmp_cun_d, "fcmp.cun.d", 0, 0x0c240000) +INST(fcmp_ceq_d, "fcmp.ceq.d", 0, 0x0c220000) +INST(fcmp_cueq_d, "fcmp.cueq.d", 0, 0x0c260000) +INST(fcmp_clt_d, "fcmp.clt.d", 0, 0x0c210000) +INST(fcmp_cult_d, "fcmp.cult.d", 0, 0x0c250000) +INST(fcmp_cle_d, "fcmp.cle.d", 0, 0x0c230000) +INST(fcmp_cule_d, "fcmp.cule.d", 0, 0x0c270000) +INST(fcmp_cne_d, "fcmp.cne.d", 0, 0x0c280000) +INST(fcmp_cor_d, "fcmp.cor.d", 0, 0x0c2a0000) +INST(fcmp_cune_d, "fcmp.cune.d", 0, 0x0c2c0000) + +INST(fcmp_saf_s, "fcmp.saf.s", 0, 0x0c108000) +INST(fcmp_sun_s, "fcmp.sun.s", 0, 0x0c148000) +INST(fcmp_seq_s, "fcmp.seq.s", 0, 0x0c128000) +INST(fcmp_sueq_s, "fcmp.sueq.s", 0, 0x0c168000) +INST(fcmp_slt_s, "fcmp.slt.s", 0, 0x0c118000) +INST(fcmp_sult_s, "fcmp.sult.s", 0, 0x0c158000) +INST(fcmp_sle_s, "fcmp.sle.s", 0, 0x0c138000) +INST(fcmp_sule_s, "fcmp.sule.s", 0, 0x0c178000) +INST(fcmp_sne_s, "fcmp.sne.s", 0, 0x0c188000) +INST(fcmp_sor_s, "fcmp.sor.s", 0, 0x0c1a8000) +INST(fcmp_sune_s, "fcmp.sune.s", 0, 0x0c1c8000) ////R_R_I. -INST(fld_s, "fld.s", 0, LD, IF_LA, 0x2b000000) -INST(fld_d, "fld.d", 0, LD, IF_LA, 0x2b800000) -INST(fst_s, "fst.s", 0, ST, IF_LA, 0x2b400000) -INST(fst_d, "fst.d", 0, ST, IF_LA, 0x2bc00000) +INST(fld_s, "fld.s", LD, 0x2b000000) +INST(fld_d, "fld.d", LD, 0x2b800000) +INST(fst_s, "fst.s", ST, 0x2b400000) +INST(fst_d, "fst.d", ST, 0x2bc00000) // clang-format on /*****************************************************************************/ diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index b6a490b89742b..769e5fcbdcaa2 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -548,13 +548,6 @@ CONFIG_INTEGER(JitSaveFpLrWithCalleeSavedRegisters, W("JitSaveFpLrWithCalleeSave #endif // defined(TARGET_ARM64) #if defined(TARGET_LOONGARCH64) -// JitSaveFpRaWithCalleeSavedRegisters: -// 0: use default frame type decision -// 1: disable frames that save FP/RA registers with the callee-saved registers (at the top of the frame) -// 2: force all frames to use the frame types that save FP/RA registers with the callee-saved registers (at the top -// of the frame) -CONFIG_INTEGER(JitSaveFpRaWithCalleeSavedRegisters, W("JitSaveFpRaWithCalleeSavedRegisters"), 0) - // Disable emitDispIns by default CONFIG_INTEGER(JitDispIns, W("JitDispIns"), 0) #endif // defined(TARGET_LOONGARCH64) diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index c4d14d2f2006d..c522b12f21683 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -5359,21 +5359,14 @@ void Compiler::lvaFixVirtualFrameOffsets() // We set FP to be after LR, FP delta += 2 * REGSIZE_BYTES; } -#elif defined(TARGET_AMD64) || defined(TARGET_ARM64) +#elif defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) else { // FP is used. JITDUMP("--- delta bump %d for FP frame\n", codeGen->genTotalFrameSize() - codeGen->genSPtoFPdelta()); delta += codeGen->genTotalFrameSize() - codeGen->genSPtoFPdelta(); } -#elif defined(TARGET_LOONGARCH64) - else - { - // FP is used. - JITDUMP("--- delta bump %d for RBP frame\n", codeGen->genTotalFrameSize() - codeGen->genSPtoFPdelta()); - delta += codeGen->genTotalFrameSize() - codeGen->genSPtoFPdelta(); - } -#endif // TARGET_LOONGARCH64 +#endif // TARGET_AMD64 || TARGET_ARM64 || TARGET_LOONGARCH64 if (opts.IsOSR()) { @@ -5487,23 +5480,11 @@ void Compiler::lvaFixVirtualFrameOffsets() #endif // FEATURE_FIXED_OUT_ARGS -#ifdef TARGET_ARM64 - // We normally add alignment below the locals between them and the outgoing - // arg space area. When we store fp/lr at the bottom, however, this will be - // below the alignment. So we should not apply the alignment adjustment to - // them. On ARM64 it turns out we always store these at +0 and +8 of the FP, - // so instead of dealing with skipping adjustment just for them we just set - // them here always. - assert(codeGen->isFramePointerUsed()); - if (lvaRetAddrVar != BAD_VAR_NUM) - { - lvaTable[lvaRetAddrVar].SetStackOffset(REGSIZE_BYTES); - } -#elif defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) // We normally add alignment below the locals between them and the outgoing - // arg space area. When we store fp/ra at the bottom, however, this will be - // below the alignment. So we should not apply the alignment adjustment to - // them. On LOONGARCH64 it turns out we always store these at +0 and +8 of the FP, + // arg space area. When we store fp/lr(ra) at the bottom, however, this will + // be below the alignment. So we should not apply the alignment adjustment to + // them. It turns out we always store these at +0 and +8 of the FP, // so instead of dealing with skipping adjustment just for them we just set // them here always. assert(codeGen->isFramePointerUsed()); @@ -5511,7 +5492,7 @@ void Compiler::lvaFixVirtualFrameOffsets() { lvaTable[lvaRetAddrVar].SetStackOffset(REGSIZE_BYTES); } -#endif +#endif // TARGET_ARM64 || TARGET_LOONGARCH64 } #ifdef TARGET_ARM @@ -6018,16 +5999,8 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, } #elif defined(TARGET_LOONGARCH64) -// if (compFeatureArgSplit() && this->info.compIsVarArgs) -//{//TODO: should confirm for "info.compIsVarArgs". -// if (varDsc->lvType == TYP_STRUCT && varDsc->lvOtherArgReg >= MAX_REG_ARG && varDsc->lvOtherArgReg != REG_NA) -// { -// // This is a split struct. It will account for an extra (8 bytes) -// // of alignment. -// varDsc->lvStkOffs += TARGET_POINTER_SIZE; -// argOffs += TARGET_POINTER_SIZE; -// } -//} +// empty for LoongArch64. + #else // TARGET* #error Unsupported or unset target architecture #endif // TARGET* @@ -6267,30 +6240,6 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() } #endif -#ifdef TARGET_LOONGARCH64 - // Decide where to save FP and RA registers. We store FP/RA registers at the bottom of the frame if there is - // a frame pointer used (so we get positive offsets from the frame pointer to access locals), but not if we - // need a GS cookie AND localloc is used, since we need the GS cookie to protect the saved return value, - // and also the saved frame pointer. See CodeGen::genPushCalleeSavedRegisters() for more details about the - // frame types. Since saving FP/RA at high addresses is a relatively rare case, force using it during stress. - // (It should be legal to use these frame types for every frame). - - if (opts.compJitSaveFpRaWithCalleeSavedRegisters == 0) - { - // Default configuration - codeGen->SetSaveFpRaWithAllCalleeSavedRegisters((getNeedsGSSecurityCookie() && compLocallocUsed) || - compStressCompile(STRESS_GENERIC_VARN, 20)); - } - else if (opts.compJitSaveFpRaWithCalleeSavedRegisters == 1) - { - codeGen->SetSaveFpRaWithAllCalleeSavedRegisters(false); // Disable using new frames - } - else if (opts.compJitSaveFpRaWithCalleeSavedRegisters == 2) - { - codeGen->SetSaveFpRaWithAllCalleeSavedRegisters(true); // Force using new frames - } -#endif // TARGET_LOONGARCH64 - int preSpillSize = 0; bool mustDoubleAlign = false; @@ -6342,17 +6291,10 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() initialStkOffs = MAX_REG_ARG * REGSIZE_BYTES; stkOffs -= initialStkOffs; } - if (codeGen->IsSaveFpRaWithAllCalleeSavedRegisters() || - !isFramePointerUsed()) // Note that currently we always have a frame pointer - { - stkOffs -= compCalleeRegsPushed * REGSIZE_BYTES; - } - else - { - // Subtract off FP and RA. - assert(compCalleeRegsPushed >= 2); - stkOffs -= (compCalleeRegsPushed - 2) * REGSIZE_BYTES; - } + + // Subtract off FP and RA. + assert(compCalleeRegsPushed >= 2); + stkOffs -= (compCalleeRegsPushed - 2) * REGSIZE_BYTES; #else // !TARGET_LOONGARCH64 #ifdef TARGET_ARM @@ -6451,7 +6393,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() } #endif // TARGET_AMD64 -#if defined(FEATURE_EH_FUNCLETS) && defined(TARGET_ARMARCH) +#if defined(FEATURE_EH_FUNCLETS) && (defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)) if (lvaPSPSym != BAD_VAR_NUM) { // On ARM/ARM64, if we need a PSPSym, allocate it first, before anything else, including @@ -6460,18 +6402,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() noway_assert(codeGen->isFramePointerUsed()); // We need an explicit frame pointer stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaPSPSym, TARGET_POINTER_SIZE, stkOffs); } -#endif // FEATURE_EH_FUNCLETS && defined(TARGET_ARMARCH) - -#if defined(FEATURE_EH_FUNCLETS) && defined(TARGET_LOONGARCH64) - if (lvaPSPSym != BAD_VAR_NUM) - { - // If we need a PSPSym, allocate it first, before anything else, including - // padding (so we can avoid computing the same padding in the funclet - // frame). Note that there is no special padding requirement for the PSPSym. - noway_assert(codeGen->isFramePointerUsed()); // We need an explicit frame pointer - stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaPSPSym, TARGET_POINTER_SIZE, stkOffs); - } -#endif // FEATURE_EH_FUNCLETS || TARGET_LOONGARCH64 +#endif // FEATURE_EH_FUNCLETS && (TARGET_ARMARCH || TARGET_LOONGARCH64) if (mustDoubleAlign) { @@ -6866,7 +6797,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() continue; } -#ifdef TARGET_ARM64 +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) if (info.compIsVarArgs && varDsc->GetArgReg() != theFixedRetBuffArgNum()) { // Stack offset to varargs (parameters) should point to home area which will be preallocated. @@ -6888,16 +6819,6 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() } #endif -#ifdef TARGET_LOONGARCH64 - if (info.compIsVarArgs && varDsc->GetArgReg() != theFixedRetBuffArgNum()) - { // TODO: add VarArgs for LOONGARCH64. - // Stack offset to parameters should point to home area which will be preallocated. - varDsc->SetStackOffset(-initialStkOffs + - genMapIntRegNumToRegArgNum(varDsc->GetArgReg()) * REGSIZE_BYTES); - continue; - } -#endif - #endif // !TARGET_AMD64 } @@ -6980,7 +6901,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() // Reserve the stack space for this variable stkOffs = lvaAllocLocalAndSetVirtualOffset(lclNum, lvaLclSize(lclNum), stkOffs); -#ifdef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) // If we have an incoming register argument that has a struct promoted field // then we need to copy the lvStkOff (the stack home) from the reg arg to the field lclvar // @@ -7006,20 +6927,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() lvaTable[fieldVarNum + 1].SetStackOffset(varDsc->GetStackOffset() + 4); } #endif // TARGET_ARM -#endif // TARGET_ARM64 - -#ifdef TARGET_LOONGARCH64 - // If we have an incoming register argument that has a struct promoted field - // then we need to copy the lvStkOff (the stack home) from the reg arg to the field lclvar - // - if (varDsc->lvIsRegArg && varDsc->lvPromotedStruct()) - { - noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here - - unsigned fieldVarNum = varDsc->lvFieldLclStart; - lvaTable[fieldVarNum].SetStackOffset(varDsc->GetStackOffset()); - } -#endif // TARGET_LOONGARCH64 +#endif // TARGET_ARM64 || TARGET_LOONGARCH64 } } @@ -7124,13 +7032,9 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() } #endif // TARGET_ARM64 -#ifdef TARGET_LOONGARCH64 - if (!codeGen->IsSaveFpRaWithAllCalleeSavedRegisters() && - isFramePointerUsed()) // Note that currently we always have a frame pointer - { - // Create space for saving FP and RA. - stkOffs -= 2 * REGSIZE_BYTES; - } +#if defined(TARGET_LOONGARCH64) + assert(isFramePointerUsed()); // Note that currently we always have a frame pointer + stkOffs -= 2 * REGSIZE_BYTES; #endif // TARGET_LOONGARCH64 #if FEATURE_FIXED_OUT_ARGS @@ -7153,7 +7057,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() // and the pushed frame pointer register which for some strange reason isn't part of 'compCalleeRegsPushed'. int pushedCount = compCalleeRegsPushed; -#ifdef TARGET_ARM64 +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) if (info.compIsVarArgs) { pushedCount += MAX_REG_ARG; @@ -7168,13 +7072,6 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() pushedCount += 1; // pushed PC (return address) #endif -#ifdef TARGET_LOONGARCH64 - if (info.compIsVarArgs) - { - pushedCount += MAX_REG_ARG; - } -#endif - noway_assert(compLclFrameSize + originalFrameSize == (unsigned)-(stkOffs + (pushedCount * (int)TARGET_POINTER_SIZE))); } @@ -7343,9 +7240,9 @@ void Compiler::lvaAlignFrame() lvaIncrementFrameSize(REGSIZE_BYTES); } -#elif defined(TARGET_ARM64) +#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) - // The stack on ARM64 must be 16 byte aligned. + // The stack on ARM64/LoongArch64 must be 16 byte aligned. // First, align up to 8. if ((compLclFrameSize % 8) != 0) @@ -7428,34 +7325,6 @@ void Compiler::lvaAlignFrame() } } -#elif defined(TARGET_LOONGARCH64) - - // First, align up to 8. - if ((compLclFrameSize % 8) != 0) - { - lvaIncrementFrameSize(8 - (compLclFrameSize % 8)); - } - else if (lvaDoneFrameLayout != FINAL_FRAME_LAYOUT) - { - // If we are not doing final layout, we don't know the exact value of compLclFrameSize - // and thus do not know how much we will need to add in order to be aligned. - // We add 8 so compLclFrameSize is still a multiple of 8. - lvaIncrementFrameSize(8); - } - assert((compLclFrameSize % 8) == 0); - - // Ensure that the stack is always 16-byte aligned by grabbing an unused 16-byte - // if needed. - bool regPushedCountAligned = (compCalleeRegsPushed % (16 / REGSIZE_BYTES)) != 0; - bool lclFrameSizeAligned = (compLclFrameSize % 16) != 0; - - // If this isn't the final frame layout, assume we have to push an extra QWORD - // Just so the offsets are true upper limits. - if ((lvaDoneFrameLayout != FINAL_FRAME_LAYOUT) || (regPushedCountAligned != lclFrameSizeAligned)) - { - lvaIncrementFrameSize(REGSIZE_BYTES); - } - #else NYI("TARGET specific lvaAlignFrame"); #endif // !TARGET_AMD64 @@ -8030,11 +7899,11 @@ unsigned Compiler::lvaFrameSize(FrameLayoutState curState) compCalleeRegsPushed = CNT_CALLEE_SAVED; -#if defined(TARGET_ARMARCH) +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) if (compFloatingPointUsed) compCalleeRegsPushed += CNT_CALLEE_SAVED_FLOAT; - compCalleeRegsPushed++; // we always push LR. See genPushCalleeSavedRegisters + compCalleeRegsPushed++; // we always push LR/RA. See genPushCalleeSavedRegisters #elif defined(TARGET_AMD64) if (compFloatingPointUsed) { @@ -8044,11 +7913,6 @@ unsigned Compiler::lvaFrameSize(FrameLayoutState curState) { compCalleeFPRegsSavedMask = RBM_NONE; } -#elif defined(TARGET_LOONGARCH64) - if (compFloatingPointUsed) - compCalleeRegsPushed += CNT_CALLEE_SAVED_FLOAT; - - compCalleeRegsPushed++; // we always push RA. See genPushCalleeSavedRegisters #endif #if DOUBLE_ALIGN @@ -8071,20 +7935,12 @@ unsigned Compiler::lvaFrameSize(FrameLayoutState curState) lvaAssignFrameOffsets(curState); unsigned calleeSavedRegMaxSz = CALLEE_SAVED_REG_MAXSZ; -#if defined(TARGET_ARMARCH) - if (compFloatingPointUsed) - { - calleeSavedRegMaxSz += CALLEE_SAVED_FLOAT_MAXSZ; - } - calleeSavedRegMaxSz += REGSIZE_BYTES; // we always push LR. See genPushCalleeSavedRegisters -#endif - -#if defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) if (compFloatingPointUsed) { calleeSavedRegMaxSz += CALLEE_SAVED_FLOAT_MAXSZ; } - calleeSavedRegMaxSz += REGSIZE_BYTES; // we always push RA. See genPushCalleeSavedRegisters + calleeSavedRegMaxSz += REGSIZE_BYTES; // we always push LR/RA. See genPushCalleeSavedRegisters #endif result = compLclFrameSize + calleeSavedRegMaxSz; @@ -8396,20 +8252,13 @@ Compiler::fgWalkResult Compiler::lvaStressLclFldCB(GenTree** pTree, fgWalkData* // Calculate padding unsigned padding = LCL_FLD_PADDING(lclNum); -#ifdef TARGET_ARMARCH - // We need to support alignment requirements to access memory on ARM ARCH - unsigned alignment = 1; - pComp->codeGen->InferOpSizeAlign(lcl, &alignment); - alignment = roundUp(alignment, TARGET_POINTER_SIZE); - padding = roundUp(padding, alignment); -#endif // TARGET_ARMARCH - -#ifdef TARGET_LOONGARCH64 +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) + // We need to support alignment requirements to access memory. unsigned alignment = 1; pComp->codeGen->InferOpSizeAlign(lcl, &alignment); alignment = roundUp(alignment, TARGET_POINTER_SIZE); padding = roundUp(padding, alignment); -#endif // TARGET_LOONGARCH64 +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 // Change the variable to a TYP_BLK if (varType != TYP_BLK) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 859a9d3100676..8ea2160a5454a 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -1461,7 +1461,7 @@ void Lowering::LowerArg(GenTreeCall* call, GenTree** ppArg) #endif // !defined(TARGET_64BIT) { -#ifdef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) if (call->IsVarargs() || comp->opts.compUseSoftFP) { // For vararg call or on armel, reg args should be all integer. @@ -1472,32 +1472,8 @@ void Lowering::LowerArg(GenTreeCall* call, GenTree** ppArg) type = newNode->TypeGet(); } } -#endif // TARGET_ARMARCH +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 -#if defined(TARGET_LOONGARCH64) - if (call->IsVarargs()) - { - // For vararg call, reg args should be all integer. - // Insert copies as needed to move float value to integer register. - GenTree* newNode = LowerFloatArg(ppArg, info); - if (newNode != nullptr) - { - type = newNode->TypeGet(); - } - } - else - { - GenTree* putArg = NewPutArg(call, arg, info, type); - - // In the case of register passable struct (in one or two registers) - // the NewPutArg returns a new node (GT_PUTARG_REG or a GT_FIELD_LIST with two GT_PUTARG_REGs.) - // If an extra node is returned, splice it in the right place in the tree. - if (arg != putArg) - { - ReplaceArgWithPutArgOrBitcast(ppArg, putArg); - } - } -#else GenTree* putArg = NewPutArg(call, arg, info, type); // In the case of register passable struct (in one or two registers) @@ -1507,7 +1483,6 @@ void Lowering::LowerArg(GenTreeCall* call, GenTree** ppArg) { ReplaceArgWithPutArgOrBitcast(ppArg, putArg); } -#endif } } diff --git a/src/coreclr/jit/lowerloongarch64.cpp b/src/coreclr/jit/lowerloongarch64.cpp index 13d6cb3469dd9..9bd8beca15157 100644 --- a/src/coreclr/jit/lowerloongarch64.cpp +++ b/src/coreclr/jit/lowerloongarch64.cpp @@ -66,15 +66,13 @@ bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) const switch (parentNode->OperGet()) { - case GT_ADD: - return ((-2048 <= immVal) && (immVal <= 2047)); - break; case GT_CMPXCHG: case GT_LOCKADD: case GT_XADD: - NYI_LOONGARCH64("unimplemented on LOONGARCH yet"); + NYI_LOONGARCH64("GT_CMPXCHG,GT_LOCKADD,GT_XADD"); break; + case GT_ADD: case GT_EQ: case GT_NE: case GT_LT: @@ -82,11 +80,11 @@ bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) const case GT_GE: case GT_GT: case GT_BOUNDS_CHECK: - return ((-32768 <= immVal) && (immVal <= 32767)); + return emitter::isValidSimm12(immVal); case GT_AND: case GT_OR: case GT_XOR: - return ((-2048 <= immVal) && (immVal <= 2047)); + return emitter::isValidUimm11(immVal); case GT_JCMP: assert(((parentNode->gtFlags & GTF_JCMP_TST) == 0) ? (immVal == 0) : isPow2(immVal)); return true; @@ -411,11 +409,8 @@ void Lowering::ContainBlockStoreAddress(GenTreeBlk* blkNode, unsigned size, GenT GenTreeIntCon* offsetNode = addr->AsOp()->gtGetOp2()->AsIntCon(); ssize_t offset = offsetNode->IconValue(); - // All integer load/store instructions on both ARM32 and ARM64 support - // offsets in range -255..255. Of course, this is a rather conservative - // check. For example, if the offset and size are a multiple of 8 we - // could allow a combined offset of up to 32760 on ARM64. - if ((offset < -255) || (offset > 255) || (offset + static_cast(size) > 256)) + // TODO-LoongArch64: not including the ldptr and SIMD offset which not used right now. + if (!emitter::isValidSimm12(offset) || !emitter::isValidSimm12(offset + static_cast(size))) { return; } @@ -514,7 +509,7 @@ void Lowering::LowerRotate(GenTree* tree) // void Lowering::LowerSIMD(GenTreeSIMD* simdNode) { - NYI_LOONGARCH64("unimplemented on LoongArch64 yet"); + NYI_LOONGARCH64("LowerSIMD"); } #endif // FEATURE_SIMD @@ -527,7 +522,7 @@ void Lowering::LowerSIMD(GenTreeSIMD* simdNode) // void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) { - NYI_LOONGARCH64("unimplemented on LoongArch64 yet"); + NYI_LOONGARCH64("LowerHWIntrinsic"); } //---------------------------------------------------------------------------------------------- @@ -543,7 +538,7 @@ void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) // This check may end up modifying node->gtOp1 if it is a cast node that can be removed bool Lowering::IsValidConstForMovImm(GenTreeHWIntrinsic* node) { - NYI_LOONGARCH64("unimplemented on LoongArch64 yet"); + NYI_LOONGARCH64("IsValidConstForMovImm"); return false; } @@ -556,7 +551,7 @@ bool Lowering::IsValidConstForMovImm(GenTreeHWIntrinsic* node) // void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp) { - NYI_LOONGARCH64("unimplemented on LoongArch64 yet"); + NYI_LOONGARCH64("LowerHWIntrinsicCmpOp"); } //---------------------------------------------------------------------------------------------- @@ -567,7 +562,7 @@ void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp) // void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) { - NYI_LOONGARCH64("unimplemented on LoongArch64 yet"); + NYI_LOONGARCH64("LowerHWIntrinsicCreate"); } //---------------------------------------------------------------------------------------------- @@ -578,7 +573,7 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // void Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) { - NYI_LOONGARCH64("unimplemented on LoongArch64 yet"); + NYI_LOONGARCH64("LowerHWIntrinsicDot"); } #endif // FEATURE_HW_INTRINSICS @@ -598,7 +593,7 @@ void Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) // void Lowering::ContainCheckCallOperands(GenTreeCall* call) { - // There are no contained operands for LOONGARCH. + // There are no contained operands for LoongArch64. } //------------------------------------------------------------------------ @@ -640,18 +635,7 @@ void Lowering::ContainCheckIndir(GenTreeIndir* indirNode) } #ifdef FEATURE_SIMD - assert(!"unimplemented on LOONGARCH yet"); - // If indirTree is of TYP_SIMD12, don't mark addr as contained - // so that it always get computed to a register. This would - // mean codegen side logic doesn't need to handle all possible - // addr expressions that could be contained. - // - // TODO-LOONGARCH64-CQ: handle other addr mode expressions that could be marked - // as contained. - if (indirNode->TypeGet() == TYP_SIMD12) - { - return; - } + NYI_LOONGARCH64("ContainCheckIndir-SIMD"); #endif // FEATURE_SIMD GenTree* addr = indirNode->Addr(); @@ -790,6 +774,7 @@ void Lowering::ContainCheckStoreLoc(GenTreeLclVarCommon* storeLoc) const // void Lowering::ContainCheckCast(GenTreeCast* node) { + // There are no contained operands for LoongArch64. } //------------------------------------------------------------------------ @@ -827,7 +812,7 @@ void Lowering::ContainCheckBoundsChk(GenTreeBoundsChk* node) // void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode) { - NYI_LOONGARCH64("-----unimplemented on LOONGARCH64 yet----"); + NYI_LOONGARCH64("ContainCheckSIMD"); } #endif // FEATURE_SIMD @@ -840,7 +825,7 @@ void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode) // void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) { - NYI_LOONGARCH64("-----unimplemented on LOONGARCH64 yet----"); + NYI_LOONGARCH64("ContainCheckHWIntrinsic"); } #endif // FEATURE_HW_INTRINSICS diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index b95293dcacf7c..39096cec86af8 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -1869,23 +1869,10 @@ GenTree* Compiler::fgMakeTmpArgNode(fgArgTabEntry* curArgTabEntry) addrNode = arg; #if FEATURE_MULTIREG_ARGS -#ifdef TARGET_ARM64 - assert(varTypeIsStruct(type)); - if (lvaIsMultiregStruct(varDsc, curArgTabEntry->IsVararg())) - { - // We will create a GT_OBJ for the argument below. - // This will be passed by value in two registers. - assert(addrNode != nullptr); - - // Create an Obj of the temp to use it as a call argument. - arg = gtNewObjNode(lvaGetStruct(tmpVarNum), arg); - } -#elif defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) assert(varTypeIsStruct(type)); if (lvaIsMultiregStruct(varDsc, curArgTabEntry->IsVararg())) { - // ToDo-LOONGARCH64: Consider using: arg->ChangeOper(GT_LCL_FLD); - // as that is how UNIX_AMD64_ABI works. // We will create a GT_OBJ for the argument below. // This will be passed by value in two registers. assert(addrNode != nullptr); @@ -1896,7 +1883,7 @@ GenTree* Compiler::fgMakeTmpArgNode(fgArgTabEntry* curArgTabEntry) #else // Always create an Obj of the temp to use it as a call argument. arg = gtNewObjNode(lvaGetStruct(tmpVarNum), arg); -#endif // !TARGET_ARM64 +#endif // !(TARGET_ARM64 || TARGET_LOONGARCH64) #endif // FEATURE_MULTIREG_ARGS } @@ -2939,7 +2926,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) #endif } -#elif defined(TARGET_ARM64) +#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) assert(!callIsVararg || !isHfaArg); passUsingFloatRegs = !callIsVararg && (isHfaArg || varTypeUsesFloatReg(argx)); @@ -2950,17 +2937,15 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) #elif defined(TARGET_X86) - passUsingFloatRegs = false; - -#elif defined(TARGET_LOONGARCH64) - assert(!callIsVararg); - assert(!isHfaArg); - passUsingFloatRegs = !callIsVararg && varTypeIsFloating(argx); + passUsingFloatRegs = false; #else #error Unsupported or unset target architecture #endif // TARGET* +#if defined(TARGET_LOONGARCH64) + DWORD floatFieldFlags = 0; +#endif bool isBackFilled = false; unsigned nextFltArgRegNum = fltArgRegNum; // This is the next floating-point argument register number to use var_types structBaseType = TYP_STRUCT; @@ -3027,7 +3012,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) } #endif // UNIX_AMD64_ABI -#elif defined(TARGET_ARM64) +#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) if (isStructArg) { if (isHfaArg) @@ -3073,27 +3058,6 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) size = genTypeStSz(argx->gtType); byteSize = genTypeSize(argx); } -#elif defined(TARGET_LOONGARCH64) - DWORD floatFieldFlags = 0; - if (!isStructArg) - { - size = 1; - byteSize = genTypeSize(argx); - } - else - { - // Structs are either passed in 1 or 2 (64-bit) slots. - // Structs that are the size of 2 pointers are passed by value in multiple registers, - // if sufficient registers are available. - // Structs that are larger than 2 pointers are passed by reference (to a copy). - size = (unsigned)(roundUp(structSize, TARGET_POINTER_SIZE)) / TARGET_POINTER_SIZE; - - if (size > 2) - { - size = 1; - } - byteSize = structSize; - } #else #error Unsupported or unset target architecture #endif // TARGET_XXX @@ -3117,9 +3081,13 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) compFloatingPointUsed |= passUsingFloatRegs; if (floatFieldFlags & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_TWO)) + { size = 1; + } else if (floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_TWO) + { size = 2; + } } else // if (passStructByRef) { @@ -3284,7 +3252,9 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) { if ((floatFieldFlags & (STRUCT_FLOAT_FIELD_FIRST | STRUCT_FLOAT_FIELD_SECOND)) && passUsingFloatRegs) + { passUsingFloatRegs = isRegArg = intArgRegNum < maxRegArgs; + } if (!passUsingFloatRegs) { @@ -3294,7 +3264,9 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) else if (passUsingFloatRegs) { if (floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_TWO) + { nextOtherRegNum = genMapFloatRegArgNumToRegNum(nextFltArgRegNum + 1); + } else if (floatFieldFlags & STRUCT_FLOAT_FIELD_SECOND) { assert(size == 1); @@ -3320,7 +3292,9 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) // Check if the last register needed is still in the int argument register range. isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs; if (!passUsingFloatRegs && isRegArg && (size > 1)) + { nextOtherRegNum = genMapIntRegArgNumToRegNum(intArgRegNum + 1); + } // Did we run out of registers when we had a 16-byte struct (size===2) ? // (i.e we only have one register remaining but we needed two registers to pass this arg) @@ -3502,23 +3476,20 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) { if ((size > 1) && ((intArgRegNum + 1) == maxRegArgs) && (nextOtherRegNum == REG_STK)) { -#if FEATURE_ARG_SPLIT - // This indicates a partial enregistration of a struct type - assert((isStructArg) || argx->OperIs(GT_FIELD_LIST) || argx->OperIsCopyBlkOp() || - (argx->gtOper == GT_COMMA && (argx->gtFlags & GTF_ASG))); - call->fgArgInfo->SplitArg(argIndex, 1, 1); -#endif // FEATURE_ARG_SPLIT assert(!passUsingFloatRegs); assert(size == 2); - // assert(nextOtherRegNum == REG_STK); intArgRegNum = maxRegArgs; } else if ((floatFieldFlags & STRUCT_HAS_FLOAT_FIELDS_MASK) == 0x0) { if (passUsingFloatRegs) + { fltArgRegNum += 1; + } else + { intArgRegNum += size; + } } else if ((floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_ONE) != 0) { @@ -4927,8 +4898,9 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry } else { - assert(!"----------------unimplemented type-case... on LOONGARCH"); - unreached(); + NYI_LOONGARCH64("fgMorphMultiregStructArg -- GT_LCL_FLD,GT_LCL_VAR"); + tmp_type_1 = TYP_UNDEF; + tmp_type_2 = TYP_UNDEF; } elemSize = (floatFieldFlags & STRUCT_HAS_8BYTES_FIELDS_MASK) ? 8 : 4; @@ -5014,8 +4986,9 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry } else { - assert(!"----------------unimplemented type-case... on LOONGARCH"); - unreached(); + NYI_LOONGARCH64("fgMorphMultiregStructArg -- GT_OBJ struct"); + tmp_type_1 = TYP_UNDEF; + tmp_type_2 = TYP_UNDEF; } elemSize = (floatFieldFlags & STRUCT_HAS_8BYTES_FIELDS_MASK) ? 8 : 4; diff --git a/src/coreclr/jit/register_arg_convention.cpp b/src/coreclr/jit/register_arg_convention.cpp index 755dd28915684..1b5d1839b5e4c 100644 --- a/src/coreclr/jit/register_arg_convention.cpp +++ b/src/coreclr/jit/register_arg_convention.cpp @@ -48,7 +48,7 @@ unsigned InitVarDscInfo::allocRegArg(var_types type, unsigned numRegs /* = 1 */) { assert(varTypeIsStruct(type)); nextReg(TYP_INT, 1); // TYP_BYREF - } // TODO:struct-float. + } else { nextReg(type, numRegs); diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index 536ef627d6062..e5bf31e7f66c2 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -92,7 +92,7 @@ inline bool compUnixX86Abi() // be assigned during register allocation. // REG_NA - Used to indicate that a register is either not yet assigned or not required. // -#if defined(TARGET_ARM) +#if defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) enum _regNumber_enum : unsigned { #define REGDEF(name, rnum, mask, sname) REG_##name = rnum, @@ -177,27 +177,6 @@ enum _regMask_enum : unsigned #include "register.h" }; -#elif defined(TARGET_LOONGARCH64) - -enum _regNumber_enum : unsigned -{ -#define REGDEF(name, rnum, mask, sname) REG_##name = rnum, -#define REGALIAS(alias, realname) REG_##alias = REG_##realname, -#include "register.h" - - REG_COUNT, - REG_NA = REG_COUNT, - ACTUAL_REG_COUNT = REG_COUNT - 1 // everything but REG_STK (only real regs) -}; - -enum _regMask_enum : unsigned __int64 -{ - RBM_NONE = 0, -#define REGDEF(name, rnum, mask, sname) RBM_##name = mask, -#define REGALIAS(alias, realname) RBM_##alias = RBM_##realname, -#include "register.h" -}; - #else #error Unsupported target architecture #endif diff --git a/src/coreclr/jit/targetloongarch64.h b/src/coreclr/jit/targetloongarch64.h index daf251b33477d..25355994d385b 100644 --- a/src/coreclr/jit/targetloongarch64.h +++ b/src/coreclr/jit/targetloongarch64.h @@ -8,7 +8,7 @@ // NOTE for LoongArch64: // The `REG_R21` which alias `REG_X0` is specially reserved !!! -// It can be used only by manully and should be very careful!!! +// It can be used only manully and very carefully!!! // clang-format off #define CPU_LOAD_STORE_ARCH 1 diff --git a/src/coreclr/jit/unwind.h b/src/coreclr/jit/unwind.h index bb93348cc2fdd..ae9a19a4b37f3 100644 --- a/src/coreclr/jit/unwind.h +++ b/src/coreclr/jit/unwind.h @@ -10,7 +10,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */ -////TODO for LOONGARCH64: should seperately define for loongarch64. #if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) // Windows no longer imposes a maximum prolog size. However, we still have an @@ -138,11 +137,9 @@ class UnwindCodesBase { #if defined(TARGET_ARM) return b >= 0xFD; -#elif defined(TARGET_ARM64) +#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) return (b == UWC_END); // TODO-ARM64-Bug?: what about the "end_c" code? -#elif defined(TARGET_LOONGARCH64) - return (b == UWC_END); -#endif // TARGET_ARM64 +#endif // TARGET_ARM64 || TARGET_LOONGARCH64 } #ifdef DEBUG @@ -875,4 +872,4 @@ void DumpUnwindInfo(Compiler* comp, #endif // DEBUG -#endif // TARGET_ARMARCH +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 From 66e495356b20e39c78908f9ba45365aa2994a45c Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Sat, 19 Mar 2022 21:46:47 +0800 Subject: [PATCH 37/46] [LoongArch64] amend the output format of `emitDisInsName`. --- src/coreclr/jit/emitloongarch64.cpp | 849 ++++++++++++++-------------- src/coreclr/jit/emitloongarch64.h | 3 +- 2 files changed, 431 insertions(+), 421 deletions(-) diff --git a/src/coreclr/jit/emitloongarch64.cpp b/src/coreclr/jit/emitloongarch64.cpp index 098227a7ce230..a3f2a226f8923 100644 --- a/src/coreclr/jit/emitloongarch64.cpp +++ b/src/coreclr/jit/emitloongarch64.cpp @@ -2679,15 +2679,15 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t return callInstrSize; } -/***************************************************************************** - * LoongArch64 has an individual implementation for emitJumpDistBind(). - * - * Bind targets of relative jumps/branch to choose the smallest possible encoding. - * LoongArch64 has a small medium, and large encoding. - * - * Even though the small encoding is offset-18bits which lowest 2bits is always 0. - * The small encoding as the default is fit for most cases. - */ +//---------------------------------------------------------------------------------- +// LoongArch64 has an individual implementation for emitJumpDistBind(). +// +// Bind targets of relative jumps/branch to choose the smallest possible encoding. +// LoongArch64 has a small medium, and large encoding. +// +// Even though the small encoding is offset-18bits which lowest 2bits is always 0. +// The small encoding as the default is fit for most cases. +// void emitter::emitJumpDistBind() { @@ -2836,15 +2836,6 @@ void emitter::emitJumpDistBind() /* First time we've seen this label, convert its target */ CLANG_FORMAT_COMMENT_ANCHOR; -#ifdef DEBUG - if (EMITVERBOSE) - { - printf("Binding: "); - emitDispIns(jmp, false, false, false); - printf("Binding L_M%03u_" FMT_BB, emitComp->compMethodID, jmp->idAddr()->iiaBBlabel->bbNum); - } -#endif // DEBUG - tgtIG = (insGroup*)emitCodeGetCookie(jmp->idAddr()->iiaBBlabel); #ifdef DEBUG @@ -3856,7 +3847,6 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) emitDisInsName(*cp, (BYTE*)cp, id); cp++; } - // emitDispIns(id, false, dspOffs, true, emitCurCodeOffs(odst), *dp, (dst - *dp), ig); } if (emitComp->compDebugBreak) @@ -3892,26 +3882,41 @@ static const char* const RegNames[] = }; // clang-format on -/**************************************************************************** - * - * Display the given instruction. - */ +//---------------------------------------------------------------------------------------- +// Disassemble the given instruction. +// The `emitter::emitDisInsName` is focused on the most important for debugging. +// So it implemented as far as simply and independently which is very useful for +// porting easily to the release mode. +// +// Arguments: +// code - The instruction's encoding. +// addr - The address of the code. +// id - The instrDesc of the code if needed. +// +// Note: +// The length of the instruction's name include aligned space is 13. +// -// NOTE: At least 32bytes within dst. -void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) +void emitter::emitDisInsName(code_t code, const BYTE* addr, instrDesc* id) { - const BYTE* insstrs = dst; + const BYTE* insAdr = addr; + const char* const CFregName[] = {"fcc0", "fcc1", "fcc2", "fcc3", "fcc4", "fcc5", "fcc6", "fcc7"}; - if (!code) + unsigned int opcode = (code >> 26) & 0x3f; + + bool disOpcode = !emitComp->opts.disDiffable; + bool disAddr = emitComp->opts.disAddr; + if (disAddr) { - printf("LOONGARCH invalid instruction: 0x%x\n", code); - assert(!"invalid inscode on LOONGARCH!"); - return; + printf(" 0x%llx", insAdr); } - const char* const CFregName[] = {"fcc0", "fcc1", "fcc2", "fcc3", "fcc4", "fcc5", "fcc6", "fcc7"}; + printf(" "); - unsigned int opcode = (code >> 26) & 0x3f; + if (disOpcode) + { + printf("%08X ", code); + } // bits: 31-26,MSB6 switch (opcode) @@ -3919,29 +3924,25 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) case 0x0: { goto Label_OPCODE_0; - // break; } case 0x2: { goto Label_OPCODE_2; - // break; } case 0x3: { goto Label_OPCODE_3; - // break; } case 0xe: { goto Label_OPCODE_E; - // break; } case LA_2RI16_ADDU16I_D: // 0x4 { const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; short si16 = (code >> 10) & 0xffff; - printf(" 0x%llx addu16i.d %s, %s, %d\n", insstrs, rd, rj, si16); + printf("addu16i.d %s, %s, %d\n", rd, rj, si16); return; } case 0x5: @@ -3955,27 +3956,27 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) switch (inscode) { case LA_1RI20_LU12I_W: - printf(" 0x%llx lu12i.w %s, 0x%x\n", insstrs, rd, si20); + printf("lu12i.w %s, 0x%x\n", rd, si20); return; case LA_1RI20_LU32I_D: - printf(" 0x%llx lu32i.d %s, 0x%x\n", insstrs, rd, si20); + printf("lu32i.d %s, 0x%x\n", rd, si20); return; case LA_1RI20_PCADDI: - printf(" 0x%llx pcaddi %s, 0x%x\n", insstrs, rd, si20); + printf("pcaddi %s, 0x%x\n", rd, si20); return; case LA_1RI20_PCALAU12I: - printf(" 0x%llx pcalau12i %s, 0x%x\n", insstrs, rd, si20); + printf("pcalau12i %s, 0x%x\n", rd, si20); return; case LA_1RI20_PCADDU12I: - printf(" 0x%llx pcaddu12i %s, 0x%x\n", insstrs, rd, si20); + printf("pcaddu12i %s, 0x%x\n", rd, si20); return; case LA_1RI20_PCADDU18I: { - printf(" 0x%llx pcaddu18i %s, 0x%x\n", insstrs, rd, si20); + printf("pcaddu18i %s, 0x%x\n", rd, si20); return; } default: - printf("LOONGARCH illegal instruction: 0x%08x\n", code); + printf("LOONGARCH illegal instruction: %08X\n", code); return; } return; @@ -3992,31 +3993,31 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) switch (inscode) { case LA_2RI14_LL_W: - printf(" 0x%llx ll.w %s, %s, %d\n", insstrs, rd, rj, si14); + printf("ll.w %s, %s, %d\n", rd, rj, si14); return; case LA_2RI14_SC_W: - printf(" 0x%llx sc.w %s, %s, %d\n", insstrs, rd, rj, si14); + printf("sc.w %s, %s, %d\n", rd, rj, si14); return; case LA_2RI14_LL_D: - printf(" 0x%llx ll.d %s, %s, %d\n", insstrs, rd, rj, si14); + printf("ll.d %s, %s, %d\n", rd, rj, si14); return; case LA_2RI14_SC_D: - printf(" 0x%llx sc.d %s, %s, %d\n", insstrs, rd, rj, si14); + printf("sc.d %s, %s, %d\n", rd, rj, si14); return; case LA_2RI14_LDPTR_W: - printf(" 0x%llx ldptr.w %s, %s, %d\n", insstrs, rd, rj, si14); + printf("ldptr.w %s, %s, %d\n", rd, rj, si14); return; case LA_2RI14_STPTR_W: - printf(" 0x%llx stptr.w %s, %s, %d\n", insstrs, rd, rj, si14); + printf("stptr.w %s, %s, %d\n", rd, rj, si14); return; case LA_2RI14_LDPTR_D: - printf(" 0x%llx ldptr.d %s, %s, %d\n", insstrs, rd, rj, si14); + printf("ldptr.d %s, %s, %d\n", rd, rj, si14); return; case LA_2RI14_STPTR_D: - printf(" 0x%llx stptr.d %s, %s, %d\n", insstrs, rd, rj, si14); + printf("stptr.d %s, %s, %d\n", rd, rj, si14); return; default: - printf("LOONGARCH illegal instruction: 0x%08x\n", code); + printf("LOONGARCH illegal instruction: %08X\n", code); return; } return; @@ -4033,55 +4034,55 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) switch (inscode) { case LA_2RI12_LD_B: - printf(" 0x%llx ld.b %s, %s, %d\n", insstrs, rd, rj, si12); + printf("ld.b %s, %s, %d\n", rd, rj, si12); return; case LA_2RI12_LD_H: - printf(" 0x%llx ld.h %s, %s, %d\n", insstrs, rd, rj, si12); + printf("ld.h %s, %s, %d\n", rd, rj, si12); return; case LA_2RI12_LD_W: - printf(" 0x%llx ld.w %s, %s, %d\n", insstrs, rd, rj, si12); + printf("ld.w %s, %s, %d\n", rd, rj, si12); return; case LA_2RI12_LD_D: - printf(" 0x%llx ld.d %s, %s, %d\n", insstrs, rd, rj, si12); + printf("ld.d %s, %s, %d\n", rd, rj, si12); return; case LA_2RI12_ST_B: - printf(" 0x%llx st.b %s, %s, %d\n", insstrs, rd, rj, si12); + printf("st.b %s, %s, %d\n", rd, rj, si12); return; case LA_2RI12_ST_H: - printf(" 0x%llx st.h %s, %s, %d\n", insstrs, rd, rj, si12); + printf("st.h %s, %s, %d\n", rd, rj, si12); return; case LA_2RI12_ST_W: - printf(" 0x%llx st.w %s, %s, %d\n", insstrs, rd, rj, si12); + printf("st.w %s, %s, %d\n", rd, rj, si12); return; case LA_2RI12_ST_D: - printf(" 0x%llx st.d %s, %s, %d\n", insstrs, rd, rj, si12); + printf("st.d %s, %s, %d\n", rd, rj, si12); return; case LA_2RI12_LD_BU: - printf(" 0x%llx ld.bu %s, %s, %d\n", insstrs, rd, rj, si12); + printf("ld.bu %s, %s, %d\n", rd, rj, si12); return; case LA_2RI12_LD_HU: - printf(" 0x%llx ld.hu %s, %s, %d\n", insstrs, rd, rj, si12); + printf("ld.hu %s, %s, %d\n", rd, rj, si12); return; case LA_2RI12_LD_WU: - printf(" 0x%llx ld.wu %s, %s, %d\n", insstrs, rd, rj, si12); + printf("ld.wu %s, %s, %d\n", rd, rj, si12); return; case LA_2RI12_PRELD: - assert(!"unimplemented on loongarch yet!"); + NYI_LOONGARCH64("unused instr LA_2RI12_PRELD"); return; case LA_2RI12_FLD_S: - printf(" 0x%llx fld.s %s, %s, %d\n", insstrs, fd, rj, si12); + printf("fld.s %s, %s, %d\n", fd, rj, si12); return; case LA_2RI12_FST_S: - printf(" 0x%llx fst.s %s, %s, %d\n", insstrs, fd, rj, si12); + printf("fst.s %s, %s, %d\n", fd, rj, si12); return; case LA_2RI12_FLD_D: - printf(" 0x%llx fld.d %s, %s, %d\n", insstrs, fd, rj, si12); + printf("fld.d %s, %s, %d\n", fd, rj, si12); return; case LA_2RI12_FST_D: - printf(" 0x%llx fst.d %s, %s, %d\n", insstrs, fd, rj, si12); + printf("fst.d %s, %s, %d\n", fd, rj, si12); return; default: - printf("LOONGARCH illegal instruction: 0x%08x\n", code); + printf("LOONGARCH illegal instruction: %08X\n", code); return; } return; @@ -4091,7 +4092,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rj = RegNames[(code >> 5) & 0x1f]; int offs21 = (((code >> 10) & 0xffff) | ((code & 0x1f) << 16)) << 11; offs21 >>= 9; - printf(" 0x%llx beqz %s, 0x%llx\n", insstrs, rj, (int64_t)insstrs + offs21); + printf("beqz %s, 0x%llx\n", rj, (int64_t)insAdr + offs21); return; } case LA_1RI21_BNEZ: // 0x11 @@ -4099,7 +4100,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rj = RegNames[(code >> 5) & 0x1f]; int offs21 = (((code >> 10) & 0xffff) | ((code & 0x1f) << 16)) << 11; offs21 >>= 9; - printf(" 0x%llx bnez %s, 0x%llx\n", insstrs, rj, (int64_t)insstrs + offs21); + printf("bnez %s, 0x%llx\n", rj, (int64_t)insAdr + offs21); return; } case 0x12: @@ -4111,17 +4112,17 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) offs21 >>= 9; if (0 == ((code >> 8) & 0x3)) { - printf(" 0x%llx bceqz %s, 0x%llx\n", insstrs, cj, (int64_t)insstrs + offs21); + printf("bceqz %s, 0x%llx\n", cj, (int64_t)insAdr + offs21); return; } else if (1 == ((code >> 8) & 0x3)) { - printf(" 0x%llx bcnez %s, 0x%llx\n", insstrs, cj, (int64_t)insstrs + offs21); + printf("bcnez %s, 0x%llx\n", cj, (int64_t)insAdr + offs21); return; } else { - printf("LOONGARCH illegal instruction: 0x%08x\n", code); + printf("LOONGARCH illegal instruction: %08X\n", code); return; } return; @@ -4137,11 +4138,11 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) assert(0 < id->idDebugOnlyInfo()->idMemCookie); const char* methodName; methodName = emitComp->eeGetMethodFullName((CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie); - printf(" 0x%llx jirl %s, %s, %d #%s\n", insstrs, rd, rj, offs16, methodName); + printf("jirl %s, %s, %d #%s\n", rd, rj, offs16, methodName); } else { - printf(" 0x%llx jirl %s, %s, %d\n", insstrs, rd, rj, offs16); + printf("jirl %s, %s, %d\n", rd, rj, offs16); } return; } @@ -4149,14 +4150,14 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) { int offs26 = (((code >> 10) & 0xffff) | ((code & 0x3ff) << 16)) << 6; offs26 >>= 4; - printf(" 0x%llx b 0x%llx\n", insstrs, (int64_t)insstrs + offs26); + printf("b 0x%llx\n", (int64_t)insAdr + offs26); return; } case LA_I26_BL: // 0x15 { int offs26 = (((code >> 10) & 0xffff) | ((code & 0x3ff) << 16)) << 6; offs26 >>= 4; - printf(" 0x%llx bl 0x%llx\n", insstrs, (int64_t)insstrs + offs26); + printf("bl 0x%llx\n", (int64_t)insAdr + offs26); return; } case LA_2RI16_BEQ: // 0x16 @@ -4165,7 +4166,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rj = RegNames[(code >> 5) & 0x1f]; int offs16 = (short)((code >> 10) & 0xffff); offs16 <<= 2; - printf(" 0x%llx beq %s, %s, 0x%llx\n", insstrs, rj, rd, (int64_t)insstrs + offs16); + printf("beq %s, %s, 0x%llx\n", rj, rd, (int64_t)insAdr + offs16); return; } case LA_2RI16_BNE: // 0x17 @@ -4174,7 +4175,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rj = RegNames[(code >> 5) & 0x1f]; int offs16 = (short)((code >> 10) & 0xffff); offs16 <<= 2; - printf(" 0x%llx bne %s, %s, 0x%llx\n", insstrs, rj, rd, (int64_t)insstrs + offs16); + printf("bne %s, %s, 0x%llx\n", rj, rd, (int64_t)insAdr + offs16); return; } case LA_2RI16_BLT: // 0x18 @@ -4183,7 +4184,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rj = RegNames[(code >> 5) & 0x1f]; int offs16 = (short)((code >> 10) & 0xffff); offs16 <<= 2; - printf(" 0x%llx blt %s, %s, 0x%llx\n", insstrs, rj, rd, (int64_t)insstrs + offs16); + printf("blt %s, %s, 0x%llx\n", rj, rd, (int64_t)insAdr + offs16); return; } case LA_2RI16_BGE: // 0x19 @@ -4192,7 +4193,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rj = RegNames[(code >> 5) & 0x1f]; int offs16 = (short)((code >> 10) & 0xffff); offs16 <<= 2; - printf(" 0x%llx bge %s, %s, 0x%llx\n", insstrs, rj, rd, (int64_t)insstrs + offs16); + printf("bge %s, %s, 0x%llx\n", rj, rd, (int64_t)insAdr + offs16); return; } case LA_2RI16_BLTU: // 0x1a @@ -4201,7 +4202,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rj = RegNames[(code >> 5) & 0x1f]; int offs16 = (short)((code >> 10) & 0xffff); offs16 <<= 2; - printf(" 0x%llx bltu %s, %s, 0x%llx\n", insstrs, rj, rd, (int64_t)insstrs + offs16); + printf("bltu %s, %s, 0x%llx\n", rj, rd, (int64_t)insAdr + offs16); return; } case LA_2RI16_BGEU: // 0x1b @@ -4210,12 +4211,12 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rj = RegNames[(code >> 5) & 0x1f]; int offs16 = (short)((code >> 10) & 0xffff); offs16 <<= 2; - printf(" 0x%llx bgeu %s, %s, 0x%llx\n", insstrs, rj, rd, (int64_t)insstrs + offs16); + printf("bgeu %s, %s, 0x%llx\n", rj, rd, (int64_t)insAdr + offs16); return; } default: - printf("LOONGARCH illegal instruction: 0x%08x\n", code); + printf("LOONGARCH illegal instruction: %08X\n", code); return; } @@ -4246,80 +4247,80 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) switch (inscode3) { case LA_2R_CLO_W: - printf(" 0x%llx clo.w %s, %s\n", insstrs, rd, rj); + printf("clo.w %s, %s\n", rd, rj); return; case LA_2R_CLZ_W: - printf(" 0x%llx clz.w %s, %s\n", insstrs, rd, rj); + printf("clz.w %s, %s\n", rd, rj); return; case LA_2R_CTO_W: - printf(" 0x%llx cto.w %s, %s\n", insstrs, rd, rj); + printf("cto.w %s, %s\n", rd, rj); return; case LA_2R_CTZ_W: - printf(" 0x%llx ctz.w %s, %s\n", insstrs, rd, rj); + printf("ctz.w %s, %s\n", rd, rj); return; case LA_2R_CLO_D: - printf(" 0x%llx clo.d %s, %s\n", insstrs, rd, rj); + printf("clo.d %s, %s\n", rd, rj); return; case LA_2R_CLZ_D: - printf(" 0x%llx clz.d %s, %s\n", insstrs, rd, rj); + printf("clz.d %s, %s\n", rd, rj); return; case LA_2R_CTO_D: - printf(" 0x%llx cto.d %s, %s\n", insstrs, rd, rj); + printf("cto.d %s, %s\n", rd, rj); return; case LA_2R_CTZ_D: - printf(" 0x%llx ctz.d %s, %s\n", insstrs, rd, rj); + printf("ctz.d %s, %s\n", rd, rj); return; case LA_2R_REVB_2H: - printf(" 0x%llx revb.2h %s, %s\n", insstrs, rd, rj); + printf("revb.2h %s, %s\n", rd, rj); return; case LA_2R_REVB_4H: - printf(" 0x%llx revb.4h %s, %s\n", insstrs, rd, rj); + printf("revb.4h %s, %s\n", rd, rj); return; case LA_2R_REVB_2W: - printf(" 0x%llx revb.2w %s, %s\n", insstrs, rd, rj); + printf("revb.2w %s, %s\n", rd, rj); return; case LA_2R_REVB_D: - printf(" 0x%llx revb.d %s, %s\n", insstrs, rd, rj); + printf("revb.d %s, %s\n", rd, rj); return; case LA_2R_REVH_2W: - printf(" 0x%llx revh.2w %s, %s\n", insstrs, rd, rj); + printf("revh.2w %s, %s\n", rd, rj); return; case LA_2R_REVH_D: - printf(" 0x%llx revh.d %s, %s\n", insstrs, rd, rj); + printf("revh.d %s, %s\n", rd, rj); return; case LA_2R_BITREV_4B: - printf(" 0x%llx bitrev.4b %s, %s\n", insstrs, rd, rj); + printf("bitrev.4b %s, %s\n", rd, rj); return; case LA_2R_BITREV_8B: - printf(" 0x%llx bitrev.8b %s, %s\n", insstrs, rd, rj); + printf("bitrev.8b %s, %s\n", rd, rj); return; case LA_2R_BITREV_W: - printf(" 0x%llx bitrev.w %s, %s\n", insstrs, rd, rj); + printf("bitrev.w %s, %s\n", rd, rj); return; case LA_2R_BITREV_D: - printf(" 0x%llx bitrev.d %s, %s\n", insstrs, rd, rj); + printf("bitrev.d %s, %s\n", rd, rj); return; case LA_2R_EXT_W_H: - printf(" 0x%llx ext.w.h %s, %s\n", insstrs, rd, rj); + printf("ext.w.h %s, %s\n", rd, rj); return; case LA_2R_EXT_W_B: - printf(" 0x%llx ext.w.b %s, %s\n", insstrs, rd, rj); + printf("ext.w.b %s, %s\n", rd, rj); return; case LA_2R_RDTIMEL_W: - printf(" 0x%llx rdtimel.w %s, %s\n", insstrs, rd, rj); + printf("rdtimel.w %s, %s\n", rd, rj); return; case LA_2R_RDTIMEH_W: - printf(" 0x%llx rdtimeh.w %s, %s\n", insstrs, rd, rj); + printf("rdtimeh.w %s, %s\n", rd, rj); return; case LA_2R_RDTIME_D: - printf(" 0x%llx rdtime.d %s, %s\n", insstrs, rd, rj); + printf("rdtime.d %s, %s\n", rd, rj); return; case LA_2R_CPUCFG: - printf(" 0x%llx cpucfg %s, %s\n", insstrs, rd, rj); + printf("cpucfg %s, %s\n", rd, rj); return; default: - printf("LOONGARCH illegal instruction: 0x%08x\n", code); + printf("LOONGARCH illegal instruction: %08X\n", code); return; } return; @@ -4328,18 +4329,18 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) { const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx asrtle.d %s, %s\n", insstrs, rj, rk); + printf("asrtle.d %s, %s\n", rj, rk); return; } case LA_2R_ASRTGT_D: { const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx asrtgt.d %s, %s\n", insstrs, rj, rk); + printf("asrtgt.d %s, %s\n", rj, rk); return; } default: - printf("LOONGARCH illegal instruction: 0x%08x\n", code); + printf("LOONGARCH illegal instruction: %08X\n", code); return; } return; @@ -4354,17 +4355,17 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) unsigned int sa2 = (code >> 15) & 0x3; if (0 == ((code >> 17) & 0x1)) { - printf(" 0x%llx alsl.w %s, %s, %s, %d\n", insstrs, rd, rj, rk, (sa2 + 1)); + printf("alsl.w %s, %s, %s, %d\n", rd, rj, rk, (sa2 + 1)); return; } else if (1 == ((code >> 17) & 0x1)) { - printf(" 0x%llx alsl.wu %s, %s, %s, %d\n", insstrs, rd, rj, rk, (sa2 + 1)); + printf("alsl.wu %s, %s, %s, %d\n", rd, rj, rk, (sa2 + 1)); return; } else { - printf("LOONGARCH illegal instruction: 0x%08x\n", code); + printf("LOONGARCH illegal instruction: %08X\n", code); return; } return; @@ -4375,7 +4376,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; unsigned int sa2 = (code >> 15) & 0x3; - printf(" 0x%llx bytepick.w %s, %s, %s, %d\n", insstrs, rd, rj, rk, sa2); + printf("bytepick.w %s, %s, %s, %d\n", rd, rj, rk, sa2); return; } case LA_OP_BYTEPICK_D: // 0x3 @@ -4384,7 +4385,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; unsigned int sa3 = (code >> 15) & 0x7; - printf(" 0x%llx bytepick.d %s, %s, %s, %d\n", insstrs, rd, rj, rk, sa3); + printf("bytepick.d %s, %s, %s, %d\n", rd, rj, rk, sa3); return; } case 0x4: @@ -4403,145 +4404,145 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) switch (inscode2) { case LA_3R_ADD_W: - printf(" 0x%llx add.w %s, %s, %s\n", insstrs, rd, rj, rk); + printf("add.w %s, %s, %s\n", rd, rj, rk); return; case LA_3R_ADD_D: - printf(" 0x%llx add.d %s, %s, %s\n", insstrs, rd, rj, rk); + printf("add.d %s, %s, %s\n", rd, rj, rk); return; case LA_3R_SUB_W: - printf(" 0x%llx sub.w %s, %s, %s\n", insstrs, rd, rj, rk); + printf("sub.w %s, %s, %s\n", rd, rj, rk); return; case LA_3R_SUB_D: - printf(" 0x%llx sub.d %s, %s, %s\n", insstrs, rd, rj, rk); + printf("sub.d %s, %s, %s\n", rd, rj, rk); return; case LA_3R_SLT: - printf(" 0x%llx slt %s, %s, %s\n", insstrs, rd, rj, rk); + printf("slt %s, %s, %s\n", rd, rj, rk); return; case LA_3R_SLTU: - printf(" 0x%llx sltu %s, %s, %s\n", insstrs, rd, rj, rk); + printf("sltu %s, %s, %s\n", rd, rj, rk); return; case LA_3R_MASKEQZ: - printf(" 0x%llx maskeqz %s, %s, %s\n", insstrs, rd, rj, rk); + printf("maskeqz %s, %s, %s\n", rd, rj, rk); return; case LA_3R_MASKNEZ: - printf(" 0x%llx masknez %s, %s, %s\n", insstrs, rd, rj, rk); + printf("masknez %s, %s, %s\n", rd, rj, rk); return; case LA_3R_NOR: - printf(" 0x%llx nor %s, %s, %s\n", insstrs, rd, rj, rk); + printf("nor %s, %s, %s\n", rd, rj, rk); return; case LA_3R_AND: - printf(" 0x%llx and %s, %s, %s\n", insstrs, rd, rj, rk); + printf("and %s, %s, %s\n", rd, rj, rk); return; case LA_3R_OR: - printf(" 0x%llx or %s, %s, %s\n", insstrs, rd, rj, rk); + printf("or %s, %s, %s\n", rd, rj, rk); return; case LA_3R_XOR: - printf(" 0x%llx xor %s, %s, %s\n", insstrs, rd, rj, rk); + printf("xor %s, %s, %s\n", rd, rj, rk); return; case LA_3R_ORN: - printf(" 0x%llx orn %s, %s, %s\n", insstrs, rd, rj, rk); + printf("orn %s, %s, %s\n", rd, rj, rk); return; case LA_3R_ANDN: - printf(" 0x%llx andn %s, %s, %s\n", insstrs, rd, rj, rk); + printf("andn %s, %s, %s\n", rd, rj, rk); return; case LA_3R_SLL_W: - printf(" 0x%llx sll.w %s, %s, %s\n", insstrs, rd, rj, rk); + printf("sll.w %s, %s, %s\n", rd, rj, rk); return; case LA_3R_SRL_W: - printf(" 0x%llx srl.w %s, %s, %s\n", insstrs, rd, rj, rk); + printf("srl.w %s, %s, %s\n", rd, rj, rk); return; case LA_3R_SRA_W: - printf(" 0x%llx sra.w %s, %s, %s\n", insstrs, rd, rj, rk); + printf("sra.w %s, %s, %s\n", rd, rj, rk); return; case LA_3R_SLL_D: - printf(" 0x%llx sll.d %s, %s, %s\n", insstrs, rd, rj, rk); + printf("sll.d %s, %s, %s\n", rd, rj, rk); return; case LA_3R_SRL_D: - printf(" 0x%llx srl.d %s, %s, %s\n", insstrs, rd, rj, rk); + printf("srl.d %s, %s, %s\n", rd, rj, rk); return; case LA_3R_SRA_D: - printf(" 0x%llx sra.d %s, %s, %s\n", insstrs, rd, rj, rk); + printf("sra.d %s, %s, %s\n", rd, rj, rk); return; case LA_3R_ROTR_W: - printf(" 0x%llx rotr.w %s, %s, %s\n", insstrs, rd, rj, rk); + printf("rotr.w %s, %s, %s\n", rd, rj, rk); return; case LA_3R_ROTR_D: - printf(" 0x%llx rotr.d %s, %s, %s\n", insstrs, rd, rj, rk); + printf("rotr.d %s, %s, %s\n", rd, rj, rk); return; case LA_3R_MUL_W: - printf(" 0x%llx mul.w %s, %s, %s\n", insstrs, rd, rj, rk); + printf("mul.w %s, %s, %s\n", rd, rj, rk); return; case LA_3R_MULH_W: - printf(" 0x%llx mulh.w %s, %s, %s\n", insstrs, rd, rj, rk); + printf("mulh.w %s, %s, %s\n", rd, rj, rk); return; case LA_3R_MULH_WU: - printf(" 0x%llx mulh.wu %s, %s, %s\n", insstrs, rd, rj, rk); + printf("mulh.wu %s, %s, %s\n", rd, rj, rk); return; case LA_3R_MUL_D: - printf(" 0x%llx mul.d %s, %s, %s\n", insstrs, rd, rj, rk); + printf("mul.d %s, %s, %s\n", rd, rj, rk); return; case LA_3R_MULH_D: - printf(" 0x%llx mulh.d %s, %s, %s\n", insstrs, rd, rj, rk); + printf("mulh.d %s, %s, %s\n", rd, rj, rk); return; case LA_3R_MULH_DU: - printf(" 0x%llx mulh.du %s, %s, %s\n", insstrs, rd, rj, rk); + printf("mulh.du %s, %s, %s\n", rd, rj, rk); return; case LA_3R_MULW_D_W: - printf(" 0x%llx mulw.d.w %s, %s, %s\n", insstrs, rd, rj, rk); + printf("mulw.d.w %s, %s, %s\n", rd, rj, rk); return; case LA_3R_MULW_D_WU: - printf(" 0x%llx mulw.d.wu %s, %s, %s\n", insstrs, rd, rj, rk); + printf("mulw.d.wu %s, %s, %s\n", rd, rj, rk); return; case LA_3R_DIV_W: - printf(" 0x%llx div.w %s, %s, %s\n", insstrs, rd, rj, rk); + printf("div.w %s, %s, %s\n", rd, rj, rk); return; case LA_3R_MOD_W: - printf(" 0x%llx mod.w %s, %s, %s\n", insstrs, rd, rj, rk); + printf("mod.w %s, %s, %s\n", rd, rj, rk); return; case LA_3R_DIV_WU: - printf(" 0x%llx div.wu %s, %s, %s\n", insstrs, rd, rj, rk); + printf("div.wu %s, %s, %s\n", rd, rj, rk); return; case LA_3R_MOD_WU: - printf(" 0x%llx mod.wu %s, %s, %s\n", insstrs, rd, rj, rk); + printf("mod.wu %s, %s, %s\n", rd, rj, rk); return; case LA_3R_DIV_D: - printf(" 0x%llx div.d %s, %s, %s\n", insstrs, rd, rj, rk); + printf("div.d %s, %s, %s\n", rd, rj, rk); return; case LA_3R_MOD_D: - printf(" 0x%llx mod.d %s, %s, %s\n", insstrs, rd, rj, rk); + printf("mod.d %s, %s, %s\n", rd, rj, rk); return; case LA_3R_DIV_DU: - printf(" 0x%llx div.du %s, %s, %s\n", insstrs, rd, rj, rk); + printf("div.du %s, %s, %s\n", rd, rj, rk); return; case LA_3R_MOD_DU: - printf(" 0x%llx mod.du %s, %s, %s\n", insstrs, rd, rj, rk); + printf("mod.du %s, %s, %s\n", rd, rj, rk); return; case LA_3R_CRC_W_B_W: - printf(" 0x%llx crc.w.b.w %s, %s, %s\n", insstrs, rd, rj, rk); + printf("crc.w.b.w %s, %s, %s\n", rd, rj, rk); return; case LA_3R_CRC_W_H_W: - printf(" 0x%llx crc.w.h.w %s, %s, %s\n", insstrs, rd, rj, rk); + printf("crc.w.h.w %s, %s, %s\n", rd, rj, rk); return; case LA_3R_CRC_W_W_W: - printf(" 0x%llx crc.w.w.w %s, %s, %s\n", insstrs, rd, rj, rk); + printf("crc.w.w.w %s, %s, %s\n", rd, rj, rk); return; case LA_3R_CRC_W_D_W: - printf(" 0x%llx crc.w.d.w %s, %s, %s\n", insstrs, rd, rj, rk); + printf("crc.w.d.w %s, %s, %s\n", rd, rj, rk); return; case LA_3R_CRCC_W_B_W: - printf(" 0x%llx crcc.w.b.w %s, %s, %s\n", insstrs, rd, rj, rk); + printf("crcc.w.b.w %s, %s, %s\n", rd, rj, rk); return; case LA_3R_CRCC_W_H_W: - printf(" 0x%llx crcc.w.h.w %s, %s, %s\n", insstrs, rd, rj, rk); + printf("crcc.w.h.w %s, %s, %s\n", rd, rj, rk); return; case LA_3R_CRCC_W_W_W: - printf(" 0x%llx crcc.w.w.w %s, %s, %s\n", insstrs, rd, rj, rk); + printf("crcc.w.w.w %s, %s, %s\n", rd, rj, rk); return; case LA_3R_CRCC_W_D_W: - printf(" 0x%llx crcc.w.d.w %s, %s, %s\n", insstrs, rd, rj, rk); + printf("crcc.w.d.w %s, %s, %s\n", rd, rj, rk); return; default: - printf("LOONGARCH illegal instruction: 0x%08x\n", code); + printf("LOONGARCH illegal instruction: %08X\n", code); return; } } @@ -4553,16 +4554,16 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) switch (inscode2) { case LA_OP_BREAK: - printf(" 0x%llx break 0x%x\n", insstrs, codefield); + printf("break 0x%x\n", codefield); return; case LA_OP_DBGCALL: - printf(" 0x%llx dbgcall 0x%x\n", insstrs, codefield); + printf("dbgcall 0x%x\n", codefield); return; case LA_OP_SYSCALL: - printf(" 0x%llx syscall 0x%x\n", insstrs, codefield); + printf("syscall 0x%x\n", codefield); return; default: - printf("LOONGARCH illegal instruction: 0x%08x\n", code); + printf("LOONGARCH illegal instruction: %08X\n", code); return; } } @@ -4572,11 +4573,11 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; unsigned int sa2 = (code >> 15) & 0x3; - printf(" 0x%llx alsl.d %s, %s, %s, %d\n", insstrs, rd, rj, rk, (sa2 + 1)); + printf("alsl.d %s, %s, %s, %d\n", rd, rj, rk, (sa2 + 1)); return; } default: - printf("LOONGARCH illegal instruction: 0x%08x\n", code); + printf("LOONGARCH illegal instruction: %08X\n", code); return; } return; @@ -4593,17 +4594,17 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) unsigned int msbw = (code >> 16) & 0x1f; if (!(code & 0x8000)) { - printf(" 0x%llx bstrins.w %s, %s, %d, %d\n", insstrs, rd, rj, msbw, lsbw); + printf("bstrins.w %s, %s, %d, %d\n", rd, rj, msbw, lsbw); return; } else if (code & 0x8000) { - printf(" 0x%llx bstrpick.w %s, %s, %d, %d\n", insstrs, rd, rj, msbw, lsbw); + printf("bstrpick.w %s, %s, %d, %d\n", rd, rj, msbw, lsbw); return; } else { - printf("LOONGARCH illegal instruction: 0x%08x\n", code); + printf("LOONGARCH illegal instruction: %08X\n", code); return; } } @@ -4622,18 +4623,18 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) if (1 == ((code >> 15) & 0x7)) { unsigned int ui5 = (code >> 10) & 0x1f; - printf(" 0x%llx slli.w %s, %s, %d\n", insstrs, rd, rj, ui5); + printf("slli.w %s, %s, %d\n", rd, rj, ui5); return; } else if (1 == ((code >> 16) & 0x3)) { unsigned int ui6 = (code >> 10) & 0x3f; - printf(" 0x%llx slli.d %s, %s, %d\n", insstrs, rd, rj, ui6); + printf("slli.d %s, %s, %d\n", rd, rj, ui6); return; } else { - printf("LOONGARCH illegal instruction: 0x%08x\n", code); + printf("LOONGARCH illegal instruction: %08X\n", code); return; } return; @@ -4647,18 +4648,18 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) if (1 == ((code >> 15) & 0x7)) { unsigned int ui5 = (code >> 10) & 0x1f; - printf(" 0x%llx srli.w %s, %s, %d\n", insstrs, rd, rj, ui5); + printf("srli.w %s, %s, %d\n", rd, rj, ui5); return; } else if (1 == ((code >> 16) & 0x3)) { unsigned int ui6 = (code >> 10) & 0x3f; - printf(" 0x%llx srli.d %s, %s, %d\n", insstrs, rd, rj, ui6); + printf("srli.d %s, %s, %d\n", rd, rj, ui6); return; } else { - printf("LOONGARCH illegal instruction: 0x%08x\n", code); + printf("LOONGARCH illegal instruction: %08X\n", code); return; } return; @@ -4672,18 +4673,18 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) if (1 == ((code >> 15) & 0x7)) { unsigned int ui5 = (code >> 10) & 0x1f; - printf(" 0x%llx srai.w %s, %s, %d\n", insstrs, rd, rj, ui5); + printf("srai.w %s, %s, %d\n", rd, rj, ui5); return; } else if (1 == ((code >> 16) & 0x3)) { unsigned int ui6 = (code >> 10) & 0x3f; - printf(" 0x%llx srai.d %s, %s, %d\n", insstrs, rd, rj, ui6); + printf("srai.d %s, %s, %d\n", rd, rj, ui6); return; } else { - printf("LOONGARCH illegal instruction: 0x%08x\n", code); + printf("LOONGARCH illegal instruction: %08X\n", code); return; } return; @@ -4697,24 +4698,24 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) if (1 == ((code >> 15) & 0x7)) { unsigned int ui5 = (code >> 10) & 0x1f; - printf(" 0x%llx rotri.w %s, %s, %d\n", insstrs, rd, rj, ui5); + printf("rotri.w %s, %s, %d\n", rd, rj, ui5); return; } else if (1 == ((code >> 16) & 0x3)) { unsigned int ui6 = (code >> 10) & 0x3f; - printf(" 0x%llx rotri.d %s, %s, %d\n", insstrs, rd, rj, ui6); + printf("rotri.d %s, %s, %d\n", rd, rj, ui6); return; } else { - printf("LOONGARCH illegal instruction: 0x%08x\n", code); + printf("LOONGARCH illegal instruction: %08X\n", code); return; } return; } default: - printf("LOONGARCH illegal instruction: 0x%08x\n", code); + printf("LOONGARCH illegal instruction: %08X\n", code); return; } return; @@ -4727,7 +4728,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rj = RegNames[(code >> 5) & 0x1f]; unsigned int lsbd = (code >> 10) & 0x3f; unsigned int msbd = (code >> 16) & 0x3f; - printf(" 0x%llx bstrins.d %s, %s, %d, %d\n", insstrs, rd, rj, msbd, lsbd); + printf("bstrins.d %s, %s, %d, %d\n", rd, rj, msbd, lsbd); return; } case LA_OP_BSTRPICK_D: @@ -4736,7 +4737,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rj = RegNames[(code >> 5) & 0x1f]; unsigned int lsbd = (code >> 10) & 0x3f; unsigned int msbd = (code >> 16) & 0x3f; - printf(" 0x%llx bstrpick.d %s, %s, %d, %d\n", insstrs, rd, rj, msbd, lsbd); + printf("bstrpick.d %s, %s, %d, %d\n", rd, rj, msbd, lsbd); return; } case 0x4: @@ -4752,64 +4753,64 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) switch (inscode1) { case LA_3R_FADD_S: - printf(" 0x%llx fadd.s %s, %s, %s\n", insstrs, fd, fj, fk); + printf("fadd.s %s, %s, %s\n", fd, fj, fk); return; case LA_3R_FADD_D: - printf(" 0x%llx fadd.d %s, %s, %s\n", insstrs, fd, fj, fk); + printf("fadd.d %s, %s, %s\n", fd, fj, fk); return; case LA_3R_FSUB_S: - printf(" 0x%llx fsub.s %s, %s, %s\n", insstrs, fd, fj, fk); + printf("fsub.s %s, %s, %s\n", fd, fj, fk); return; case LA_3R_FSUB_D: - printf(" 0x%llx fsub.d %s, %s, %s\n", insstrs, fd, fj, fk); + printf("fsub.d %s, %s, %s\n", fd, fj, fk); return; case LA_3R_FMUL_S: - printf(" 0x%llx fmul.s %s, %s, %s\n", insstrs, fd, fj, fk); + printf("fmul.s %s, %s, %s\n", fd, fj, fk); return; case LA_3R_FMUL_D: - printf(" 0x%llx fmul.d %s, %s, %s\n", insstrs, fd, fj, fk); + printf("fmul.d %s, %s, %s\n", fd, fj, fk); return; case LA_3R_FDIV_S: - printf(" 0x%llx fdiv.s %s, %s, %s\n", insstrs, fd, fj, fk); + printf("fdiv.s %s, %s, %s\n", fd, fj, fk); return; case LA_3R_FDIV_D: - printf(" 0x%llx fdiv.d %s, %s, %s\n", insstrs, fd, fj, fk); + printf("fdiv.d %s, %s, %s\n", fd, fj, fk); return; case LA_3R_FMAX_S: - printf(" 0x%llx fmax.s %s, %s, %s\n", insstrs, fd, fj, fk); + printf("fmax.s %s, %s, %s\n", fd, fj, fk); return; case LA_3R_FMAX_D: - printf(" 0x%llx fmax.d %s, %s, %s\n", insstrs, fd, fj, fk); + printf("fmax.d %s, %s, %s\n", fd, fj, fk); return; case LA_3R_FMIN_S: - printf(" 0x%llx fmin.s %s, %s, %s\n", insstrs, fd, fj, fk); + printf("fmin.s %s, %s, %s\n", fd, fj, fk); return; case LA_3R_FMIN_D: - printf(" 0x%llx fmin.d %s, %s, %s\n", insstrs, fd, fj, fk); + printf("fmin.d %s, %s, %s\n", fd, fj, fk); return; case LA_3R_FMAXA_S: - printf(" 0x%llx fmaxa.s %s, %s, %s\n", insstrs, fd, fj, fk); + printf("fmaxa.s %s, %s, %s\n", fd, fj, fk); return; case LA_3R_FMAXA_D: - printf(" 0x%llx fmaxa.d %s, %s, %s\n", insstrs, fd, fj, fk); + printf("fmaxa.d %s, %s, %s\n", fd, fj, fk); return; case LA_3R_FMINA_S: - printf(" 0x%llx fmina.s %s, %s, %s\n", insstrs, fd, fj, fk); + printf("fmina.s %s, %s, %s\n", fd, fj, fk); return; case LA_3R_FMINA_D: - printf(" 0x%llx fmina.d %s, %s, %s\n", insstrs, fd, fj, fk); + printf("fmina.d %s, %s, %s\n", fd, fj, fk); return; case LA_3R_FSCALEB_S: - printf(" 0x%llx fscaleb.s %s, %s, %s\n", insstrs, fd, fj, fk); + printf("fscaleb.s %s, %s, %s\n", fd, fj, fk); return; case LA_3R_FSCALEB_D: - printf(" 0x%llx fscaleb.d %s, %s, %s\n", insstrs, fd, fj, fk); + printf("fscaleb.d %s, %s, %s\n", fd, fj, fk); return; case LA_3R_FCOPYSIGN_S: - printf(" 0x%llx fcopysign.s %s, %s, %s\n", insstrs, fd, fj, fk); + printf("fcopysign.s %s, %s, %s\n", fd, fj, fk); return; case LA_3R_FCOPYSIGN_D: - printf(" 0x%llx fcopysign.d %s, %s, %s\n", insstrs, fd, fj, fk); + printf("fcopysign.d %s, %s, %s\n", fd, fj, fk); return; case 0x228: case 0x229: @@ -4825,194 +4826,194 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) switch (inscode2) { case LA_2R_FABS_S: - printf(" 0x%llx fabs.s %s, %s\n", insstrs, fd, fj); + printf("fabs.s %s, %s\n", fd, fj); return; case LA_2R_FABS_D: - printf(" 0x%llx fabs.d %s, %s\n", insstrs, fd, fj); + printf("fabs.d %s, %s\n", fd, fj); return; case LA_2R_FNEG_S: - printf(" 0x%llx fneg.s %s, %s\n", insstrs, fd, fj); + printf("fneg.s %s, %s\n", fd, fj); return; case LA_2R_FNEG_D: - printf(" 0x%llx fneg.d %s, %s\n", insstrs, fd, fj); + printf("fneg.d %s, %s\n", fd, fj); return; case LA_2R_FLOGB_S: - printf(" 0x%llx flogb.s %s, %s\n", insstrs, fd, fj); + printf("flogb.s %s, %s\n", fd, fj); return; case LA_2R_FLOGB_D: - printf(" 0x%llx flogb.d %s, %s\n", insstrs, fd, fj); + printf("flogb.d %s, %s\n", fd, fj); return; case LA_2R_FCLASS_S: - printf(" 0x%llx fclass.s %s, %s\n", insstrs, fd, fj); + printf("fclass.s %s, %s\n", fd, fj); return; case LA_2R_FCLASS_D: - printf(" 0x%llx fclass.d %s, %s\n", insstrs, fd, fj); + printf("fclass.d %s, %s\n", fd, fj); return; case LA_2R_FSQRT_S: - printf(" 0x%llx fsqrt.s %s, %s\n", insstrs, fd, fj); + printf("fsqrt.s %s, %s\n", fd, fj); return; case LA_2R_FSQRT_D: - printf(" 0x%llx fsqrt.d %s, %s\n", insstrs, fd, fj); + printf("fsqrt.d %s, %s\n", fd, fj); return; case LA_2R_FRECIP_S: - printf(" 0x%llx frecip.s %s, %s\n", insstrs, fd, fj); + printf("frecip.s %s, %s\n", fd, fj); return; case LA_2R_FRECIP_D: - printf(" 0x%llx frecip.d %s, %s\n", insstrs, fd, fj); + printf("frecip.d %s, %s\n", fd, fj); return; case LA_2R_FRSQRT_S: - printf(" 0x%llx frsqrt.s %s, %s\n", insstrs, fd, fj); + printf("frsqrt.s %s, %s\n", fd, fj); return; case LA_2R_FRSQRT_D: - printf(" 0x%llx frsqrt.d %s, %s\n", insstrs, fd, fj); + printf("frsqrt.d %s, %s\n", fd, fj); return; case LA_2R_FMOV_S: - printf(" 0x%llx fmov.s %s, %s\n", insstrs, fd, fj); + printf("fmov.s %s, %s\n", fd, fj); return; case LA_2R_FMOV_D: - printf(" 0x%llx fmov.d %s, %s\n", insstrs, fd, fj); + printf("fmov.d %s, %s\n", fd, fj); return; case LA_2R_MOVGR2FR_W: - printf(" 0x%llx movgr2fr.w %s, %s\n", insstrs, fd, rj); + printf("movgr2fr.w %s, %s\n", fd, rj); return; case LA_2R_MOVGR2FR_D: - printf(" 0x%llx movgr2fr.d %s, %s\n", insstrs, fd, rj); + printf("movgr2fr.d %s, %s\n", fd, rj); return; case LA_2R_MOVGR2FRH_W: - printf(" 0x%llx movgr2frh.w %s, %s\n", insstrs, fd, rj); + printf("movgr2frh.w %s, %s\n", fd, rj); return; case LA_2R_MOVFR2GR_S: - printf(" 0x%llx movfr2gr.s %s, %s\n", insstrs, rd, fj); + printf("movfr2gr.s %s, %s\n", rd, fj); return; case LA_2R_MOVFR2GR_D: - printf(" 0x%llx movfr2gr.d %s, %s\n", insstrs, rd, fj); + printf("movfr2gr.d %s, %s\n", rd, fj); return; case LA_2R_MOVFRH2GR_S: - printf(" 0x%llx movfrh2gr.s %s, %s\n", insstrs, rd, fj); + printf("movfrh2gr.s %s, %s\n", rd, fj); return; case LA_2R_MOVGR2FCSR: - assert(!"unimplemented on loongarch yet!"); + NYI_LOONGARCH64("unused instr LA_2R_MOVGR2FCSR"); return; case LA_2R_MOVFCSR2GR: - assert(!"unimplemented on loongarch yet!"); + NYI_LOONGARCH64("unused instr LA_2R_MOVFCSR2GR"); return; case LA_2R_MOVFR2CF: { const char* cd = CFregName[code & 0x7]; - printf(" 0x%llx movfr2cf %s, %s\n", insstrs, cd, fj); + printf("movfr2cf %s, %s\n", cd, fj); return; } case LA_2R_MOVCF2FR: { const char* cj = CFregName[(code >> 5) & 0x7]; - printf(" 0x%llx movcf2fr %s, %s\n", insstrs, fd, cj); + printf("movcf2fr %s, %s\n", fd, cj); return; } case LA_2R_MOVGR2CF: { const char* cd = CFregName[code & 0x7]; - printf(" 0x%llx movgr2cf %s, %s\n", insstrs, cd, rj); + printf("movgr2cf %s, %s\n", cd, rj); return; } case LA_2R_MOVCF2GR: { const char* cj = CFregName[(code >> 5) & 0x7]; - printf(" 0x%llx movcf2gr %s, %s\n", insstrs, rd, cj); + printf("movcf2gr %s, %s\n", rd, cj); return; } case LA_2R_FCVT_S_D: - printf(" 0x%llx fcvt.s.d %s, %s\n", insstrs, fd, fj); + printf("fcvt.s.d %s, %s\n", fd, fj); return; case LA_2R_FCVT_D_S: - printf(" 0x%llx fcvt.d.s %s, %s\n", insstrs, fd, fj); + printf("fcvt.d.s %s, %s\n", fd, fj); return; case LA_2R_FTINTRM_W_S: - printf(" 0x%llx ftintrm.w.s %s, %s\n", insstrs, fd, fj); + printf("ftintrm.w.s %s, %s\n", fd, fj); return; case LA_2R_FTINTRM_W_D: - printf(" 0x%llx ftintrm.w.d %s, %s\n", insstrs, fd, fj); + printf("ftintrm.w.d %s, %s\n", fd, fj); return; case LA_2R_FTINTRM_L_S: - printf(" 0x%llx ftintrm.l.s %s, %s\n", insstrs, fd, fj); + printf("ftintrm.l.s %s, %s\n", fd, fj); return; case LA_2R_FTINTRM_L_D: - printf(" 0x%llx ftintrm.l.d %s, %s\n", insstrs, fd, fj); + printf("ftintrm.l.d %s, %s\n", fd, fj); return; case LA_2R_FTINTRP_W_S: - printf(" 0x%llx ftintrp.w.s %s, %s\n", insstrs, fd, fj); + printf("ftintrp.w.s %s, %s\n", fd, fj); return; case LA_2R_FTINTRP_W_D: - printf(" 0x%llx ftintrp.w.d %s, %s\n", insstrs, fd, fj); + printf("ftintrp.w.d %s, %s\n", fd, fj); return; case LA_2R_FTINTRP_L_S: - printf(" 0x%llx ftintrp.l.s %s, %s\n", insstrs, fd, fj); + printf("ftintrp.l.s %s, %s\n", fd, fj); return; case LA_2R_FTINTRP_L_D: - printf(" 0x%llx ftintrp.l.d %s, %s\n", insstrs, fd, fj); + printf("ftintrp.l.d %s, %s\n", fd, fj); return; case LA_2R_FTINTRZ_W_S: - printf(" 0x%llx ftintrz.w.s %s, %s\n", insstrs, fd, fj); + printf("ftintrz.w.s %s, %s\n", fd, fj); return; case LA_2R_FTINTRZ_W_D: - printf(" 0x%llx ftintrz.w.d %s, %s\n", insstrs, fd, fj); + printf("ftintrz.w.d %s, %s\n", fd, fj); return; case LA_2R_FTINTRZ_L_S: - printf(" 0x%llx ftintrz.l.s %s, %s\n", insstrs, fd, fj); + printf("ftintrz.l.s %s, %s\n", fd, fj); return; case LA_2R_FTINTRZ_L_D: - printf(" 0x%llx ftintrz.l.d %s, %s\n", insstrs, fd, fj); + printf("ftintrz.l.d %s, %s\n", fd, fj); return; case LA_2R_FTINTRNE_W_S: - printf(" 0x%llx ftintrne.w.s %s, %s\n", insstrs, fd, fj); + printf("ftintrne.w.s %s, %s\n", fd, fj); return; case LA_2R_FTINTRNE_W_D: - printf(" 0x%llx ftintrne.w.d %s, %s\n", insstrs, fd, fj); + printf("ftintrne.w.d %s, %s\n", fd, fj); return; case LA_2R_FTINTRNE_L_S: - printf(" 0x%llx ftintrne.l.s %s, %s\n", insstrs, fd, fj); + printf("ftintrne.l.s %s, %s\n", fd, fj); return; case LA_2R_FTINTRNE_L_D: - printf(" 0x%llx ftintrne.l.d %s, %s\n", insstrs, fd, fj); + printf("ftintrne.l.d %s, %s\n", fd, fj); return; case LA_2R_FTINT_W_S: - printf(" 0x%llx ftint.w.s %s, %s\n", insstrs, fd, fj); + printf("ftint.w.s %s, %s\n", fd, fj); return; case LA_2R_FTINT_W_D: - printf(" 0x%llx ftint.w.d %s, %s\n", insstrs, fd, fj); + printf("ftint.w.d %s, %s\n", fd, fj); return; case LA_2R_FTINT_L_S: - printf(" 0x%llx ftint.l.s %s, %s\n", insstrs, fd, fj); + printf("ftint.l.s %s, %s\n", fd, fj); return; case LA_2R_FTINT_L_D: - printf(" 0x%llx ftint.l.d %s, %s\n", insstrs, fd, fj); + printf("ftint.l.d %s, %s\n", fd, fj); return; case LA_2R_FFINT_S_W: - printf(" 0x%llx ffint.s.w %s, %s\n", insstrs, fd, fj); + printf("ffint.s.w %s, %s\n", fd, fj); return; case LA_2R_FFINT_S_L: - printf(" 0x%llx ffint.s.l %s, %s\n", insstrs, fd, fj); + printf("ffint.s.l %s, %s\n", fd, fj); return; case LA_2R_FFINT_D_W: - printf(" 0x%llx ffint.d.w %s, %s\n", insstrs, fd, fj); + printf("ffint.d.w %s, %s\n", fd, fj); return; case LA_2R_FFINT_D_L: - printf(" 0x%llx ffint.d.l %s, %s\n", insstrs, fd, fj); + printf("ffint.d.l %s, %s\n", fd, fj); return; case LA_2R_FRINT_S: - printf(" 0x%llx frint.s %s, %s\n", insstrs, fd, fj); + printf("frint.s %s, %s\n", fd, fj); return; case LA_2R_FRINT_D: - printf(" 0x%llx frint.d %s, %s\n", insstrs, fd, fj); + printf("frint.d %s, %s\n", fd, fj); return; default: - printf("LOONGARCH illegal instruction: 0x%08x\n", code); + printf("LOONGARCH illegal instruction: %08X\n", code); return; } return; } default: - printf("LOONGARCH illegal instruction: 0x%08x\n", code); + printf("LOONGARCH illegal instruction: %08X\n", code); return; } return; @@ -5023,7 +5024,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rj = RegNames[(code >> 5) & 0x1f]; short si12 = ((code >> 10) & 0xfff) << 4; si12 >>= 4; - printf(" 0x%llx slti %s, %s, %d\n", insstrs, rd, rj, si12); + printf("slti %s, %s, %d\n", rd, rj, si12); return; } case LA_2RI12_SLTUI: // 0x9 @@ -5032,7 +5033,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rj = RegNames[(code >> 5) & 0x1f]; short si12 = ((code >> 10) & 0xfff) << 4; si12 >>= 4; - printf(" 0x%llx sltui %s, %s, %d\n", insstrs, rd, rj, si12); + printf("sltui %s, %s, %d\n", rd, rj, si12); return; } case LA_2RI12_ADDI_W: // 0xa @@ -5041,7 +5042,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rj = RegNames[(code >> 5) & 0x1f]; short si12 = ((code >> 10) & 0xfff) << 4; si12 >>= 4; - printf(" 0x%llx addi.w %s, %s, %d\n", insstrs, rd, rj, si12); + printf("addi.w %s, %s, %d\n", rd, rj, si12); return; } case LA_2RI12_ADDI_D: // 0xb @@ -5050,7 +5051,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rj = RegNames[(code >> 5) & 0x1f]; short si12 = ((code >> 10) & 0xfff) << 4; si12 >>= 4; - printf(" 0x%llx addi.d %s, %s, %ld\n", insstrs, rd, rj, si12); + printf("addi.d %s, %s, %ld\n", rd, rj, si12); return; } case LA_2RI12_LU52I_D: // 0xc @@ -5058,15 +5059,22 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; unsigned int si12 = (code >> 10) & 0xfff; - printf(" 0x%llx lu52i.d %s, %s, 0x%x\n", insstrs, rd, rj, si12); + printf("lu52i.d %s, %s, 0x%x\n", rd, rj, si12); return; } case LA_2RI12_ANDI: // 0xd { - const char* rd = RegNames[code & 0x1f]; - const char* rj = RegNames[(code >> 5) & 0x1f]; - unsigned int ui12 = ((code >> 10) & 0xfff); - printf(" 0x%llx andi %s, %s, 0x%x\n", insstrs, rd, rj, ui12); + if (code == 0x03400000) + { + printf("nop\n"); + } + else + { + const char* rd = RegNames[code & 0x1f]; + const char* rj = RegNames[(code >> 5) & 0x1f]; + unsigned int ui12 = ((code >> 10) & 0xfff); + printf("andi %s, %s, 0x%x\n", rd, rj, ui12); + } return; } case LA_2RI12_ORI: // 0xe @@ -5074,7 +5082,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; unsigned int ui12 = ((code >> 10) & 0xfff); - printf(" 0x%llx ori %s, %s, 0x%x\n", insstrs, rd, rj, ui12); + printf("ori %s, %s, 0x%x\n", rd, rj, ui12); return; } case LA_2RI12_XORI: // 0xf @@ -5082,12 +5090,12 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; unsigned int ui12 = ((code >> 10) & 0xfff); - printf(" 0x%llx xori %s, %s, 0x%x\n", insstrs, rd, rj, ui12); + printf("xori %s, %s, 0x%x\n", rd, rj, ui12); return; } default: - printf("LOONGARCH illegal instruction: 0x%08x\n", code); + printf("LOONGARCH illegal instruction: %08X\n", code); return; } @@ -5107,7 +5115,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* fj = RegNames[((code >> 5) & 0x1f) + 32]; const char* fk = RegNames[((code >> 10) & 0x1f) + 32]; const char* fa = RegNames[((code >> 15) & 0x1f) + 32]; - printf(" 0x%llx fmadd.s %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa); + printf("fmadd.s %s, %s, %s, %s\n", fd, fj, fk, fa); return; } case LA_4R_FMADD_D: @@ -5116,7 +5124,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* fj = RegNames[((code >> 5) & 0x1f) + 32]; const char* fk = RegNames[((code >> 10) & 0x1f) + 32]; const char* fa = RegNames[((code >> 15) & 0x1f) + 32]; - printf(" 0x%llx fmadd.d %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa); + printf("fmadd.d %s, %s, %s, %s\n", fd, fj, fk, fa); return; } case LA_4R_FMSUB_S: @@ -5125,7 +5133,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* fj = RegNames[((code >> 5) & 0x1f) + 32]; const char* fk = RegNames[((code >> 10) & 0x1f) + 32]; const char* fa = RegNames[((code >> 15) & 0x1f) + 32]; - printf(" 0x%llx fmsub.s %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa); + printf("fmsub.s %s, %s, %s, %s\n", fd, fj, fk, fa); return; } case LA_4R_FMSUB_D: @@ -5134,7 +5142,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* fj = RegNames[((code >> 5) & 0x1f) + 32]; const char* fk = RegNames[((code >> 10) & 0x1f) + 32]; const char* fa = RegNames[((code >> 15) & 0x1f) + 32]; - printf(" 0x%llx fmsub.d %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa); + printf("fmsub.d %s, %s, %s, %s\n", fd, fj, fk, fa); return; } case LA_4R_FNMADD_S: @@ -5143,7 +5151,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* fj = RegNames[((code >> 5) & 0x1f) + 32]; const char* fk = RegNames[((code >> 10) & 0x1f) + 32]; const char* fa = RegNames[((code >> 15) & 0x1f) + 32]; - printf(" 0x%llx fnmadd.s %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa); + printf("fnmadd.s %s, %s, %s, %s\n", fd, fj, fk, fa); return; } case LA_4R_FNMADD_D: @@ -5152,7 +5160,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* fj = RegNames[((code >> 5) & 0x1f) + 32]; const char* fk = RegNames[((code >> 10) & 0x1f) + 32]; const char* fa = RegNames[((code >> 15) & 0x1f) + 32]; - printf(" 0x%llx fnmadd.d %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa); + printf("fnmadd.d %s, %s, %s, %s\n", fd, fj, fk, fa); return; } case LA_4R_FNMSUB_S: @@ -5161,7 +5169,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* fj = RegNames[((code >> 5) & 0x1f) + 32]; const char* fk = RegNames[((code >> 10) & 0x1f) + 32]; const char* fa = RegNames[((code >> 15) & 0x1f) + 32]; - printf(" 0x%llx fnmsub.s %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa); + printf("fnmsub.s %s, %s, %s, %s\n", fd, fj, fk, fa); return; } case LA_4R_FNMSUB_D: @@ -5170,11 +5178,11 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* fj = RegNames[((code >> 5) & 0x1f) + 32]; const char* fk = RegNames[((code >> 10) & 0x1f) + 32]; const char* fa = RegNames[((code >> 15) & 0x1f) + 32]; - printf(" 0x%llx fnmsub.d %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa); + printf("fnmsub.d %s, %s, %s, %s\n", fd, fj, fk, fa); return; } default: - printf("LOONGARCH illegal instruction: 0x%08x\n", code); + printf("LOONGARCH illegal instruction: %08X\n", code); return; } @@ -5194,73 +5202,73 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) switch (cond) { case 0x0: - printf(" 0x%llx fcmp.caf.s %s, %s, %s\n", insstrs, cd, fj, fk); + printf("fcmp.caf.s %s, %s, %s\n", cd, fj, fk); return; case 0x1: - printf(" 0x%llx fcmp.saf.s %s, %s, %s\n", insstrs, cd, fj, fk); + printf("fcmp.saf.s %s, %s, %s\n", cd, fj, fk); return; case 0x2: - printf(" 0x%llx fcmp.clt.s %s, %s, %s\n", insstrs, cd, fj, fk); + printf("fcmp.clt.s %s, %s, %s\n", cd, fj, fk); return; case 0x3: - printf(" 0x%llx fcmp.slt.s %s, %s, %s\n", insstrs, cd, fj, fk); + printf("fcmp.slt.s %s, %s, %s\n", cd, fj, fk); return; case 0x4: - printf(" 0x%llx fcmp.ceq.s %s, %s, %s\n", insstrs, cd, fj, fk); + printf("fcmp.ceq.s %s, %s, %s\n", cd, fj, fk); return; case 0x5: - printf(" 0x%llx fcmp.seq.s %s, %s, %s\n", insstrs, cd, fj, fk); + printf("fcmp.seq.s %s, %s, %s\n", cd, fj, fk); return; case 0x6: - printf(" 0x%llx fcmp.cle.s %s, %s, %s\n", insstrs, cd, fj, fk); + printf("fcmp.cle.s %s, %s, %s\n", cd, fj, fk); return; case 0x7: - printf(" 0x%llx fcmp.sle.s %s, %s, %s\n", insstrs, cd, fj, fk); + printf("fcmp.sle.s %s, %s, %s\n", cd, fj, fk); return; case 0x8: - printf(" 0x%llx fcmp.cun.s %s, %s, %s\n", insstrs, cd, fj, fk); + printf("fcmp.cun.s %s, %s, %s\n", cd, fj, fk); return; case 0x9: - printf(" 0x%llx fcmp.sun.s %s, %s, %s\n", insstrs, cd, fj, fk); + printf("fcmp.sun.s %s, %s, %s\n", cd, fj, fk); return; case 0xA: - printf(" 0x%llx fcmp.cult.s %s, %s, %s\n", insstrs, cd, fj, fk); + printf("fcmp.cult.s %s, %s, %s\n", cd, fj, fk); return; case 0xB: - printf(" 0x%llx fcmp.sult.s %s, %s, %s\n", insstrs, cd, fj, fk); + printf("fcmp.sult.s %s, %s, %s\n", cd, fj, fk); return; case 0xC: - printf(" 0x%llx fcmp.cueq.s %s, %s, %s\n", insstrs, cd, fj, fk); + printf("fcmp.cueq.s %s, %s, %s\n", cd, fj, fk); return; case 0xD: - printf(" 0x%llx fcmp.sueq.s %s, %s, %s\n", insstrs, cd, fj, fk); + printf("fcmp.sueq.s %s, %s, %s\n", cd, fj, fk); return; case 0xE: - printf(" 0x%llx fcmp.cule.s %s, %s, %s\n", insstrs, cd, fj, fk); + printf("fcmp.cule.s %s, %s, %s\n", cd, fj, fk); return; case 0xF: - printf(" 0x%llx fcmp.sule.s %s, %s, %s\n", insstrs, cd, fj, fk); + printf("fcmp.sule.s %s, %s, %s\n", cd, fj, fk); return; case 0x10: - printf(" 0x%llx fcmp.cne.s %s, %s, %s\n", insstrs, cd, fj, fk); + printf("fcmp.cne.s %s, %s, %s\n", cd, fj, fk); return; case 0x11: - printf(" 0x%llx fcmp.sne.s %s, %s, %s\n", insstrs, cd, fj, fk); + printf("fcmp.sne.s %s, %s, %s\n", cd, fj, fk); return; case 0x14: - printf(" 0x%llx fcmp.cor.s %s, %s, %s\n", insstrs, cd, fj, fk); + printf("fcmp.cor.s %s, %s, %s\n", cd, fj, fk); return; case 0x15: - printf(" 0x%llx fcmp.sor.s %s, %s, %s\n", insstrs, cd, fj, fk); + printf("fcmp.sor.s %s, %s, %s\n", cd, fj, fk); return; case 0x18: - printf(" 0x%llx fcmp.cune.s %s, %s, %s\n", insstrs, cd, fj, fk); + printf("fcmp.cune.s %s, %s, %s\n", cd, fj, fk); return; case 0x19: - printf(" 0x%llx fcmp.sune.s %s, %s, %s\n", insstrs, cd, fj, fk); + printf("fcmp.sune.s %s, %s, %s\n", cd, fj, fk); return; default: - printf("LOONGARCH illegal instruction: 0x%08x\n", code); + printf("LOONGARCH illegal instruction: %08X\n", code); return; } } @@ -5274,73 +5282,73 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) switch (cond) { case 0x0: - printf(" 0x%llx fcmp.caf.d %s, %s, %s\n", insstrs, cd, fj, fk); + printf("fcmp.caf.d %s, %s, %s\n", cd, fj, fk); return; case 0x1: - printf(" 0x%llx fcmp.saf.d %s, %s, %s\n", insstrs, cd, fj, fk); + printf("fcmp.saf.d %s, %s, %s\n", cd, fj, fk); return; case 0x2: - printf(" 0x%llx fcmp.clt.d %s, %s, %s\n", insstrs, cd, fj, fk); + printf("fcmp.clt.d %s, %s, %s\n", cd, fj, fk); return; case 0x3: - printf(" 0x%llx fcmp.slt.d %s, %s, %s\n", insstrs, cd, fj, fk); + printf("fcmp.slt.d %s, %s, %s\n", cd, fj, fk); return; case 0x4: - printf(" 0x%llx fcmp.ceq.d %s, %s, %s\n", insstrs, cd, fj, fk); + printf("fcmp.ceq.d %s, %s, %s\n", cd, fj, fk); return; case 0x5: - printf(" 0x%llx fcmp.seq.d %s, %s, %s\n", insstrs, cd, fj, fk); + printf("fcmp.seq.d %s, %s, %s\n", cd, fj, fk); return; case 0x6: - printf(" 0x%llx fcmp.cle.d %s, %s, %s\n", insstrs, cd, fj, fk); + printf("fcmp.cle.d %s, %s, %s\n", cd, fj, fk); return; case 0x7: - printf(" 0x%llx fcmp.sle.d %s, %s, %s\n", insstrs, cd, fj, fk); + printf("fcmp.sle.d %s, %s, %s\n", cd, fj, fk); return; case 0x8: - printf(" 0x%llx fcmp.cun.d %s, %s, %s\n", insstrs, cd, fj, fk); + printf("fcmp.cun.d %s, %s, %s\n", cd, fj, fk); return; case 0x9: - printf(" 0x%llx fcmp.sun.d %s, %s, %s\n", insstrs, cd, fj, fk); + printf("fcmp.sun.d %s, %s, %s\n", cd, fj, fk); return; case 0xA: - printf(" 0x%llx fcmp.cult.d %s, %s, %s\n", insstrs, cd, fj, fk); + printf("fcmp.cult.d %s, %s, %s\n", cd, fj, fk); return; case 0xB: - printf(" 0x%llx fcmp.sult.d %s, %s, %s\n", insstrs, cd, fj, fk); + printf("fcmp.sult.d %s, %s, %s\n", cd, fj, fk); return; case 0xC: - printf(" 0x%llx fcmp.cueq.d %s, %s, %s\n", insstrs, cd, fj, fk); + printf("fcmp.cueq.d %s, %s, %s\n", cd, fj, fk); return; case 0xD: - printf(" 0x%llx fcmp.sueq.d %s, %s, %s\n", insstrs, cd, fj, fk); + printf("fcmp.sueq.d %s, %s, %s\n", cd, fj, fk); return; case 0xE: - printf(" 0x%llx fcmp.cule.d %s, %s, %s\n", insstrs, cd, fj, fk); + printf("fcmp.cule.d %s, %s, %s\n", cd, fj, fk); return; case 0xF: - printf(" 0x%llx fcmp.sule.d %s, %s, %s\n", insstrs, cd, fj, fk); + printf("fcmp.sule.d %s, %s, %s\n", cd, fj, fk); return; case 0x10: - printf(" 0x%llx fcmp.cne.d %s, %s, %s\n", insstrs, cd, fj, fk); + printf("fcmp.cne.d %s, %s, %s\n", cd, fj, fk); return; case 0x11: - printf(" 0x%llx fcmp.sne.d %s, %s, %s\n", insstrs, cd, fj, fk); + printf("fcmp.sne.d %s, %s, %s\n", cd, fj, fk); return; case 0x14: - printf(" 0x%llx fcmp.cor.d %s, %s, %s\n", insstrs, cd, fj, fk); + printf("fcmp.cor.d %s, %s, %s\n", cd, fj, fk); return; case 0x15: - printf(" 0x%llx fcmp.sor.d %s, %s, %s\n", insstrs, cd, fj, fk); + printf("fcmp.sor.d %s, %s, %s\n", cd, fj, fk); return; case 0x18: - printf(" 0x%llx fcmp.cune.d %s, %s, %s\n", insstrs, cd, fj, fk); + printf("fcmp.cune.d %s, %s, %s\n", cd, fj, fk); return; case 0x19: - printf(" 0x%llx fcmp.sune.d %s, %s, %s\n", insstrs, cd, fj, fk); + printf("fcmp.sune.d %s, %s, %s\n", cd, fj, fk); return; default: - printf("LOONGARCH illegal instruction: 0x%08x\n", code); + printf("LOONGARCH illegal instruction: %08X\n", code); return; } } @@ -5350,11 +5358,11 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* fj = RegNames[((code >> 5) & 0x1f) + 32]; const char* fk = RegNames[((code >> 10) & 0x1f) + 32]; const char* ca = CFregName[(code >> 15) & 0x7]; - printf(" 0x%llx fsel %s, %s, %s, %s\n", insstrs, fd, fj, fk, ca); + printf("fsel %s, %s, %s, %s\n", fd, fj, fk, ca); return; } default: - printf("LOONGARCH illegal instruction: 0x%08x\n", code); + printf("LOONGARCH illegal instruction: %08X\n", code); return; } @@ -5369,7 +5377,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx ldx.b %s, %s, %s\n", insstrs, rd, rj, rk); + printf("ldx.b %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_LDX_H: @@ -5377,7 +5385,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx ldx.h %s, %s, %s\n", insstrs, rd, rj, rk); + printf("ldx.h %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_LDX_W: @@ -5385,7 +5393,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx ldx.w %s, %s, %s\n", insstrs, rd, rj, rk); + printf("ldx.w %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_LDX_D: @@ -5393,7 +5401,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx ldx.d %s, %s, %s\n", insstrs, rd, rj, rk); + printf("ldx.d %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_STX_B: @@ -5401,7 +5409,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx stx.b %s, %s, %s\n", insstrs, rd, rj, rk); + printf("stx.b %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_STX_H: @@ -5409,7 +5417,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx stx.h %s, %s, %s\n", insstrs, rd, rj, rk); + printf("stx.h %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_STX_W: @@ -5417,7 +5425,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx stx.w %s, %s, %s\n", insstrs, rd, rj, rk); + printf("stx.w %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_STX_D: @@ -5425,7 +5433,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx stx.d %s, %s, %s\n", insstrs, rd, rj, rk); + printf("stx.d %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_LDX_BU: @@ -5433,7 +5441,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx ldx.bu %s, %s, %s\n", insstrs, rd, rj, rk); + printf("ldx.bu %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_LDX_HU: @@ -5441,7 +5449,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx ldx.hu %s, %s, %s\n", insstrs, rd, rj, rk); + printf("ldx.hu %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_LDX_WU: @@ -5449,18 +5457,18 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx ldx.wu %s, %s, %s\n", insstrs, rd, rj, rk); + printf("ldx.wu %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_PRELDX: - assert(!"unimplemented on loongarch yet!"); + NYI_LOONGARCH64("unused instr LA_3R_PRELDX"); return; case LA_3R_FLDX_S: { const char* fd = RegNames[(code & 0x1f) + 32]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx fldx.s %s, %s, %s\n", insstrs, fd, rj, rk); + printf("fldx.s %s, %s, %s\n", fd, rj, rk); return; } case LA_3R_FLDX_D: @@ -5468,7 +5476,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* fd = RegNames[(code & 0x1f) + 32]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx fldx.d %s, %s, %s\n", insstrs, fd, rj, rk); + printf("fldx.d %s, %s, %s\n", fd, rj, rk); return; } case LA_3R_FSTX_S: @@ -5476,7 +5484,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* fd = RegNames[(code & 0x1f) + 32]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx fstx.s %s, %s, %s\n", insstrs, fd, rj, rk); + printf("fstx.s %s, %s, %s\n", fd, rj, rk); return; } case LA_3R_FSTX_D: @@ -5484,7 +5492,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* fd = RegNames[(code & 0x1f) + 32]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx fstx.d %s, %s, %s\n", insstrs, fd, rj, rk); + printf("fstx.d %s, %s, %s\n", fd, rj, rk); return; } case LA_3R_AMSWAP_W: @@ -5492,7 +5500,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx amswap.w %s, %s, %s\n", insstrs, rd, rj, rk); + printf("amswap.w %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_AMSWAP_D: @@ -5500,7 +5508,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx amswap.d %s, %s, %s\n", insstrs, rd, rj, rk); + printf("amswap.d %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_AMADD_W: @@ -5508,7 +5516,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx amadd.w %s, %s, %s\n", insstrs, rd, rj, rk); + printf("amadd.w %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_AMADD_D: @@ -5516,7 +5524,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx amadd.d %s, %s, %s\n", insstrs, rd, rj, rk); + printf("amadd.d %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_AMAND_W: @@ -5524,7 +5532,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx amand.w %s, %s, %s\n", insstrs, rd, rj, rk); + printf("amand.w %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_AMAND_D: @@ -5532,7 +5540,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx amand.d %s, %s, %s\n", insstrs, rd, rj, rk); + printf("amand.d %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_AMOR_W: @@ -5540,7 +5548,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx amor.w %s, %s, %s\n", insstrs, rd, rj, rk); + printf("amor.w %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_AMOR_D: @@ -5548,7 +5556,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx amor.d %s, %s, %s\n", insstrs, rd, rj, rk); + printf("amor.d %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_AMXOR_W: @@ -5556,7 +5564,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx amxor.w %s, %s, %s\n", insstrs, rd, rj, rk); + printf("amxor.w %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_AMXOR_D: @@ -5564,7 +5572,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx amxor.d %s, %s, %s\n", insstrs, rd, rj, rk); + printf("amxor.d %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_AMMAX_W: @@ -5572,7 +5580,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx ammax.w %s, %s, %s\n", insstrs, rd, rj, rk); + printf("ammax.w %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_AMMAX_D: @@ -5580,7 +5588,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx ammax.d %s, %s, %s\n", insstrs, rd, rj, rk); + printf("ammax.d %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_AMMIN_W: @@ -5588,7 +5596,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx ammin.w %s, %s, %s\n", insstrs, rd, rj, rk); + printf("ammin.w %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_AMMIN_D: @@ -5596,7 +5604,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx ammin.d %s, %s, %s\n", insstrs, rd, rj, rk); + printf("ammin.d %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_AMMAX_WU: @@ -5604,7 +5612,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx ammax.wu %s, %s, %s\n", insstrs, rd, rj, rk); + printf("ammax.wu %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_AMMAX_DU: @@ -5612,7 +5620,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx ammax.du %s, %s, %s\n", insstrs, rd, rj, rk); + printf("ammax.du %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_AMMIN_WU: @@ -5620,7 +5628,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx ammin.wu %s, %s, %s\n", insstrs, rd, rj, rk); + printf("ammin.wu %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_AMMIN_DU: @@ -5628,7 +5636,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx ammin.du %s, %s, %s\n", insstrs, rd, rj, rk); + printf("ammin.du %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_AMSWAP_DB_W: @@ -5636,7 +5644,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx amswap_db.w %s, %s, %s\n", insstrs, rd, rj, rk); + printf("amswap_db.w %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_AMSWAP_DB_D: @@ -5644,7 +5652,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx amswap_db.d %s, %s, %s\n", insstrs, rd, rj, rk); + printf("amswap_db.d %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_AMADD_DB_W: @@ -5652,7 +5660,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx amadd_db.w %s, %s, %s\n", insstrs, rd, rj, rk); + printf("amadd_db.w %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_AMADD_DB_D: @@ -5660,7 +5668,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx amadd_db.d %s, %s, %s\n", insstrs, rd, rj, rk); + printf("amadd_db.d %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_AMAND_DB_W: @@ -5668,7 +5676,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx amand_db.w %s, %s, %s\n", insstrs, rd, rj, rk); + printf("amand_db.w %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_AMAND_DB_D: @@ -5676,7 +5684,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx amand_db.d %s, %s, %s\n", insstrs, rd, rj, rk); + printf("amand_db.d %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_AMOR_DB_W: @@ -5684,7 +5692,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx amor_db.w %s, %s, %s\n", insstrs, rd, rj, rk); + printf("amor_db.w %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_AMOR_DB_D: @@ -5692,7 +5700,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx amor_db.d %s, %s, %s\n", insstrs, rd, rj, rk); + printf("amor_db.d %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_AMXOR_DB_W: @@ -5700,7 +5708,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx amxor_db.w %s, %s, %s\n", insstrs, rd, rj, rk); + printf("amxor_db.w %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_AMXOR_DB_D: @@ -5708,7 +5716,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx amxor_db.d %s, %s, %s\n", insstrs, rd, rj, rk); + printf("amxor_db.d %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_AMMAX_DB_W: @@ -5716,7 +5724,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx ammax_db.w %s, %s, %s\n", insstrs, rd, rj, rk); + printf("ammax_db.w %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_AMMAX_DB_D: @@ -5724,7 +5732,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx ammax_db.d %s, %s, %s\n", insstrs, rd, rj, rk); + printf("ammax_db.d %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_AMMIN_DB_W: @@ -5732,7 +5740,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx ammin_db.w %s, %s, %s\n", insstrs, rd, rj, rk); + printf("ammin_db.w %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_AMMIN_DB_D: @@ -5740,7 +5748,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx ammin_db.d %s, %s, %s\n", insstrs, rd, rj, rk); + printf("ammin_db.d %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_AMMAX_DB_WU: @@ -5748,7 +5756,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx ammax_db.wu %s, %s, %s\n", insstrs, rd, rj, rk); + printf("ammax_db.wu %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_AMMAX_DB_DU: @@ -5756,7 +5764,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx ammax_db.du %s, %s, %s\n", insstrs, rd, rj, rk); + printf("ammax_db.du %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_AMMIN_DB_WU: @@ -5764,7 +5772,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx ammin_db.wu %s, %s, %s\n", insstrs, rd, rj, rk); + printf("ammin_db.wu %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_AMMIN_DB_DU: @@ -5772,19 +5780,19 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx ammin_db.du %s, %s, %s\n", insstrs, rd, rj, rk); + printf("ammin_db.du %s, %s, %s\n", rd, rj, rk); return; } case LA_OP_DBAR: { unsigned int hint = code & 0x7fff; - printf(" 0x%llx dbar 0x%x\n", insstrs, hint); + printf("dbar 0x%x\n", hint); return; } case LA_OP_IBAR: { unsigned int hint = code & 0x7fff; - printf(" 0x%llx ibar 0x%x\n", insstrs, hint); + printf("ibar 0x%x\n", hint); return; } case LA_3R_FLDGT_S: @@ -5792,7 +5800,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* fd = RegNames[(code & 0x1f) + 32]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx fldgt.s %s, %s, %s\n", insstrs, fd, rj, rk); + printf("fldgt.s %s, %s, %s\n", fd, rj, rk); return; } case LA_3R_FLDGT_D: @@ -5800,7 +5808,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* fd = RegNames[(code & 0x1f) + 32]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx fldgt.d %s, %s, %s\n", insstrs, fd, rj, rk); + printf("fldgt.d %s, %s, %s\n", fd, rj, rk); return; } case LA_3R_FLDLE_S: @@ -5808,7 +5816,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* fd = RegNames[(code & 0x1f) + 32]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx fldle.s %s, %s, %s\n", insstrs, fd, rj, rk); + printf("fldle.s %s, %s, %s\n", fd, rj, rk); return; } case LA_3R_FLDLE_D: @@ -5816,7 +5824,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* fd = RegNames[(code & 0x1f) + 32]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx fldle.d %s, %s, %s\n", insstrs, fd, rj, rk); + printf("fldle.d %s, %s, %s\n", fd, rj, rk); return; } case LA_3R_FSTGT_S: @@ -5824,7 +5832,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* fd = RegNames[(code & 0x1f) + 32]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx fstgt.s %s, %s, %s\n", insstrs, fd, rj, rk); + printf("fstgt.s %s, %s, %s\n", fd, rj, rk); return; } case LA_3R_FSTGT_D: @@ -5832,7 +5840,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* fd = RegNames[(code & 0x1f) + 32]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx fstgt.d %s, %s, %s\n", insstrs, fd, rj, rk); + printf("fstgt.d %s, %s, %s\n", fd, rj, rk); return; } case LA_3R_FSTLE_S: @@ -5840,7 +5848,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* fd = RegNames[(code & 0x1f) + 32]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx fstle.s %s, %s, %s\n", insstrs, fd, rj, rk); + printf("fstle.s %s, %s, %s\n", fd, rj, rk); return; } case LA_3R_FSTLE_D: @@ -5848,7 +5856,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* fd = RegNames[(code & 0x1f) + 32]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx fstle.d %s, %s, %s\n", insstrs, fd, rj, rk); + printf("fstle.d %s, %s, %s\n", fd, rj, rk); return; } case LA_3R_LDGT_B: @@ -5856,7 +5864,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx ldgt.b %s, %s, %s\n", insstrs, rd, rj, rk); + printf("ldgt.b %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_LDGT_H: @@ -5864,7 +5872,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx ldgt.h %s, %s, %s\n", insstrs, rd, rj, rk); + printf("ldgt.h %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_LDGT_W: @@ -5872,7 +5880,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx ldgt.w %s, %s, %s\n", insstrs, rd, rj, rk); + printf("ldgt.w %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_LDGT_D: @@ -5880,7 +5888,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx ldgt.d %s, %s, %s\n", insstrs, rd, rj, rk); + printf("ldgt.d %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_LDLE_B: @@ -5888,7 +5896,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx ldle.b %s, %s, %s\n", insstrs, rd, rj, rk); + printf("ldle.b %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_LDLE_H: @@ -5896,7 +5904,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx ldle.h %s, %s, %s\n", insstrs, rd, rj, rk); + printf("ldle.h %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_LDLE_W: @@ -5904,7 +5912,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx ldle.w %s, %s, %s\n", insstrs, rd, rj, rk); + printf("ldle.w %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_LDLE_D: @@ -5912,7 +5920,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx ldle.d %s, %s, %s\n", insstrs, rd, rj, rk); + printf("ldle.d %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_STGT_B: @@ -5920,7 +5928,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx stgt.b %s, %s, %s\n", insstrs, rd, rj, rk); + printf("stgt.b %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_STGT_H: @@ -5928,7 +5936,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx stgt.h %s, %s, %s\n", insstrs, rd, rj, rk); + printf("stgt.h %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_STGT_W: @@ -5936,7 +5944,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx stgt.w %s, %s, %s\n", insstrs, rd, rj, rk); + printf("stgt.w %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_STGT_D: @@ -5944,7 +5952,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx stgt.d %s, %s, %s\n", insstrs, rd, rj, rk); + printf("stgt.d %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_STLE_B: @@ -5952,7 +5960,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx stle.b %s, %s, %s\n", insstrs, rd, rj, rk); + printf("stle.b %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_STLE_H: @@ -5960,7 +5968,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx stle.h %s, %s, %s\n", insstrs, rd, rj, rk); + printf("stle.h %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_STLE_W: @@ -5968,7 +5976,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx stle.w %s, %s, %s\n", insstrs, rd, rj, rk); + printf("stle.w %s, %s, %s\n", rd, rj, rk); return; } case LA_3R_STLE_D: @@ -5976,11 +5984,11 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id) const char* rd = RegNames[code & 0x1f]; const char* rj = RegNames[(code >> 5) & 0x1f]; const char* rk = RegNames[(code >> 10) & 0x1f]; - printf(" 0x%llx stle.d %s, %s, %s\n", insstrs, rd, rj, rk); + printf("stle.d %s, %s, %s\n", rd, rj, rk); return; } default: - printf("LOONGARCH illegal instruction: 0x%08x\n", code); + printf("LOONGARCH illegal instruction: %08X\n", code); return; } } @@ -6009,8 +6017,11 @@ void emitter::emitDispInsHex(instrDesc* id, BYTE* code, size_t sz) void emitter::emitDispIns( instrDesc* id, bool isNew, bool doffs, bool asmfm, unsigned offset, BYTE* pCode, size_t sz, insGroup* ig) -{ // not used on loongarch64. - printf("------------not implements emitDispIns() for loongarch64!!!\n"); +{ + // LA implements this similar by `emitter::emitDisInsName`. + // For LA maybe the `emitDispIns` is over complicate. + // The `emitter::emitDisInsName` is focused on the most important for debugging. + NYI_LOONGARCH64("LA not used the emitter::emitDispIns"); } /***************************************************************************** @@ -6278,7 +6289,7 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src) { - NYI_LOONGARCH64("emitInsBinary-----unimplemented on LOONGARCH64 yet----"); + NYI_LOONGARCH64("emitInsBinary-----unused"); return REG_R0; } diff --git a/src/coreclr/jit/emitloongarch64.h b/src/coreclr/jit/emitloongarch64.h index 9b79d544a9a3a..d7e7cc5450acb 100644 --- a/src/coreclr/jit/emitloongarch64.h +++ b/src/coreclr/jit/emitloongarch64.h @@ -28,8 +28,7 @@ struct CnsVal const char* emitFPregName(unsigned reg, bool varName = true); const char* emitVectorRegName(regNumber reg); -// NOTE: At least 32bytes within dst. -void emitDisInsName(code_t code, const BYTE* dst, instrDesc* id); +void emitDisInsName(code_t code, const BYTE* addr, instrDesc* id); #endif // DEBUG void emitIns_J_cond_la(instruction ins, BasicBlock* dst, regNumber reg1 = REG_R0, regNumber reg2 = REG_R0); From 7280c46b4b707d13145810578785243da15f53e6 Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Wed, 23 Mar 2022 15:24:55 +0800 Subject: [PATCH 38/46] [LoongArch64] remove the optimization for type-cast which depends on LoongArch64. --- src/coreclr/jit/emit.cpp | 4 +- src/coreclr/jit/gentree.cpp | 9 -- src/coreclr/jit/importer.cpp | 226 +++------------------------------- src/coreclr/jit/morph.cpp | 52 +------- src/coreclr/jit/optimizer.cpp | 17 --- 5 files changed, 21 insertions(+), 287 deletions(-) diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index c8f0b0701a70a..79d63b659c633 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -744,6 +744,7 @@ insGroup* emitter::emitSavIG(bool emitAdd) ig->igFlags |= IGF_BYREF_REGS; // We'll allocate extra space (DWORD aligned) to record the GC regs + gs += sizeof(int); } @@ -756,6 +757,7 @@ insGroup* emitter::emitSavIG(bool emitAdd) if (ig->igFlags & IGF_BYREF_REGS) { // Record the byref regs in front the of the instructions + *castto(id, unsigned*)++ = (unsigned)emitInitByrefRegs; } @@ -4332,7 +4334,6 @@ void emitter::emitJumpDistBind() lstIG->igOffs - adjIG); } #endif // DEBUG - lstIG->igOffs -= adjIG; assert(IsCodeAligned(lstIG->igOffs)); } while (lstIG != jmpIG); @@ -4846,7 +4847,6 @@ void emitter::emitJumpDistBind() goto AGAIN; } } - #ifdef DEBUG if (EMIT_INSTLIST_VERBOSE) { diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index c678219c21db5..539d58688c969 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -13806,16 +13806,7 @@ GenTree* Compiler::gtFoldExprConst(GenTree* tree) case TYP_INT: -#ifdef TARGET_LOONGARCH64 - // For LoongArch64's instructions operation of the 64bits and 32bits using the whole - // 64bits-width register which is unlike the AMD64 and ARM64. - // And the INT type instruction will be signed-extend by default. - // e.g. 'ld_w $r4, $5, 4' and `addi_w $r4,$r5,-1` the result of INT - // will be signed-extend by default. - assert(tree->TypeIs(TYP_INT, TYP_LONG) || varTypeIsGC(tree) || tree->OperIs(GT_MKREFANY)); -#else assert(tree->TypeIs(TYP_INT) || varTypeIsGC(tree) || tree->OperIs(GT_MKREFANY)); -#endif // No GC pointer types should be folded here... assert(!varTypeIsGC(op1->TypeGet()) && !varTypeIsGC(op2->TypeGet())); diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index b2e9fced636f5..7a27813cdff6b 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -11430,45 +11430,13 @@ var_types Compiler::impGetByRefResultType(genTreeOps oper, bool fUnsigned, GenTr if (genActualType(op1->TypeGet()) != TYP_I_IMPL) { -// insert an explicit upcast -#ifdef TARGET_LOONGARCH64 - if (op1->TypeGet() == TYP_INT && op1->gtOper == GT_CNS_INT) - { - // For LoongArch64's instructions operation of the 64bits and 32bits using the whole - // 64bits-width register which is unlike the AMD64 and ARM64. - // And the INT type instruction will be signed-extend by default. - // e.g. 'ld_w $r4, $5, 4' and `addi_w $r4,$r5,-1` the result of INT - // will be signed-extend by default. - op1->AsIntCon()->gtIconVal = - fUnsigned ? (uint32_t)op1->AsIntCon()->gtIconVal : op1->AsIntCon()->gtIconVal; - op1->gtType = TYP_LONG; - } - else - *pOp1 = gtNewCastNode(TYP_I_IMPL, op1, fUnsigned, fUnsigned ? TYP_U_IMPL : TYP_I_IMPL); -#else + // insert an explicit upcast op1 = *pOp1 = gtNewCastNode(TYP_I_IMPL, op1, fUnsigned, fUnsigned ? TYP_U_IMPL : TYP_I_IMPL); -#endif } else if (genActualType(op2->TypeGet()) != TYP_I_IMPL) { -// insert an explicit upcast -#ifdef TARGET_LOONGARCH64 - if (op2->TypeGet() == TYP_INT && op2->gtOper == GT_CNS_INT) - { - // For LoongArch64's instructions operation of the 64bits and 32bits using the whole - // 64bits-width register which is unlike the AMD64 and ARM64. - // And the INT type instruction will be signed-extend by default. - // e.g. 'ld_w $r4, $5, 4' and `addi_w $r4,$r5,-1` the result of INT - // will be signed-extend by default. - op2->AsIntCon()->gtIconVal = - fUnsigned ? (uint32_t)op2->AsIntCon()->gtIconVal : op2->AsIntCon()->gtIconVal; - op2->gtType = TYP_LONG; - } - else - *pOp2 = gtNewCastNode(TYP_I_IMPL, op2, fUnsigned, fUnsigned ? TYP_U_IMPL : TYP_I_IMPL); -#else + // insert an explicit upcast op2 = *pOp2 = gtNewCastNode(TYP_I_IMPL, op2, fUnsigned, fUnsigned ? TYP_U_IMPL : TYP_I_IMPL); -#endif } type = TYP_I_IMPL; @@ -12670,17 +12638,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) } #endif // FEATURE_SIMD -#ifdef TARGET_LOONGARCH64 - if (prevOpcode == CEE_LDC_I4_0 || prevOpcode == CEE_LDNULL) - { - op1->gtType = lclTyp; - op1->gtFlags |= GTF_CONTAINED; - } - else - op1 = impImplicitIorI4Cast(op1, lclTyp); -#else - op1 = impImplicitIorI4Cast(op1, lclTyp); -#endif + op1 = impImplicitIorI4Cast(op1, lclTyp); #ifdef TARGET_64BIT // Downcast the TYP_I_IMPL into a 32-bit Int for x86 JIT compatiblity @@ -13560,17 +13518,8 @@ void Compiler::impImportBlockCode(BasicBlock* block) op1 = impPopStack().val; // operand to be shifted impBashVarAddrsToI(op1, op2); -#ifdef TARGET_LOONGARCH64 - if (op2->gtOper == GT_CNS_INT && op2->AsIntCon()->gtIconVal > 31) - { - type = TYP_LONG; - } - else - type = genActualType(op1->TypeGet()); -#else type = genActualType(op1->TypeGet()); -#endif - op1 = gtNewOperNode(oper, type, op1, op2); + op1 = gtNewOperNode(oper, type, op1, op2); impPushOnStack(op1, tiRetVal); break; @@ -13777,29 +13726,11 @@ void Compiler::impImportBlockCode(BasicBlock* block) // See also identical code in impGetByRefResultType and STSFLD import. if (varTypeIsI(op1) && (genActualType(op2) == TYP_INT)) { -#ifdef TARGET_LOONGARCH64 - if (op2->gtOper == GT_CNS_INT) - { - op2->AsIntCon()->gtIconVal = - uns ? (uint32_t)op2->AsIntCon()->gtIconVal : (int32_t)op2->AsIntCon()->gtIconVal; - op2->gtType = TYP_LONG; - } - else -#endif - op2 = gtNewCastNode(TYP_I_IMPL, op2, uns, TYP_I_IMPL); + op2 = gtNewCastNode(TYP_I_IMPL, op2, uns, TYP_I_IMPL); } else if (varTypeIsI(op2) && (genActualType(op1) == TYP_INT)) { -#ifdef TARGET_LOONGARCH64 - if (op1->gtOper == GT_CNS_INT) - { - op1->AsIntCon()->gtIconVal = - uns ? (uint32_t)op1->AsIntCon()->gtIconVal : (int32_t)op1->AsIntCon()->gtIconVal; - op1->gtType = TYP_LONG; - } - else -#endif - op1 = gtNewCastNode(TYP_I_IMPL, op1, uns, TYP_I_IMPL); + op1 = gtNewCastNode(TYP_I_IMPL, op1, uns, TYP_I_IMPL); } #endif // TARGET_64BIT @@ -13886,18 +13817,6 @@ void Compiler::impImportBlockCode(BasicBlock* block) op1 = impPopStack().val; #ifdef TARGET_64BIT -#ifdef TARGET_LOONGARCH64 - if ((op2->OperGet() == GT_CNS_INT) /* && (op2->AsIntCon()->IconValue() == 0)*/) - { - op2->gtType = op1->TypeGet(); - } -/*if (op1->OperGet() == GT_CNS_INT) -{ - //assert(op1->gtType == op2->TypeGet()); - //op2->gtType = op1->TypeGet(); - op1->gtFlags |= GTF_CONTAINED; -}*/ -#else if ((op1->TypeGet() == TYP_I_IMPL) && (genActualType(op2->TypeGet()) == TYP_INT)) { op2 = gtNewCastNode(TYP_I_IMPL, op2, uns, uns ? TYP_U_IMPL : TYP_I_IMPL); @@ -13906,20 +13825,11 @@ void Compiler::impImportBlockCode(BasicBlock* block) { op1 = gtNewCastNode(TYP_I_IMPL, op1, uns, uns ? TYP_U_IMPL : TYP_I_IMPL); } -#endif #endif // TARGET_64BIT -#ifdef TARGET_LOONGARCH64 - assertImp((genActualType(op1->TypeGet()) == TYP_LONG || genActualType(op1->TypeGet()) == TYP_INT) || - (genActualType(op2->TypeGet()) == TYP_LONG || genActualType(op2->TypeGet()) == TYP_INT) || - (genActualType(op1->TypeGet()) == genActualType(op2->TypeGet())) || - (varTypeIsI(op1->TypeGet()) && varTypeIsI(op2->TypeGet())) || - (varTypeIsFloating(op1->gtType) && varTypeIsFloating(op2->gtType))); -#else assertImp(genActualType(op1->TypeGet()) == genActualType(op2->TypeGet()) || (varTypeIsI(op1->TypeGet()) && varTypeIsI(op2->TypeGet())) || (varTypeIsFloating(op1->gtType) && varTypeIsFloating(op2->gtType))); -#endif if (opts.OptimizationEnabled() && (block->bbJumpDest == block->bbNext)) { @@ -14160,18 +14070,6 @@ void Compiler::impImportBlockCode(BasicBlock* block) } op1 = impPopStack().val; -#ifdef TARGET_LOONGARCH64 - if (!callNode && prevOpcode == CEE_LDC_I4_0) - { - assert(op1->gtOper == GT_CNS_INT && op1->AsIntCon()->gtIconVal == 0); - if (varTypeIsFloating(lclTyp)) - op1->gtOper = GT_CNS_DBL; - op1->gtType = genActualType(lclTyp); - impPushOnStack(op1, tiRetVal); - // opcode = CEE_LDC_I4_0; - break; - } -#endif impBashVarAddrsToI(op1); @@ -14181,38 +14079,9 @@ void Compiler::impImportBlockCode(BasicBlock* block) uns = false; } -// At this point uns, ovf, callNode are all set. - -#ifdef TARGET_LOONGARCH64 - if (varTypeIsSmall(lclTyp) && !ovfl && op1->gtOper == GT_CNS_INT) - { - switch (lclTyp) - { - case TYP_BYTE: - op1->AsIntCon()->gtIconVal = (int8_t)op1->AsIntCon()->gtIconVal; - break; - case TYP_UBYTE: - op1->AsIntCon()->gtIconVal = (uint8_t)op1->AsIntCon()->gtIconVal; - break; - case TYP_USHORT: - op1->AsIntCon()->gtIconVal = (uint16_t)op1->AsIntCon()->gtIconVal; - break; - case TYP_SHORT: - op1->AsIntCon()->gtIconVal = (short)op1->AsIntCon()->gtIconVal; - break; - default: - assert(!"unexpected type"); - return; - } - - op1->gtType = TYP_INT; + // At this point uns, ovf, callNode are all set. - impPushOnStack(op1, tiRetVal); - break; - } - else -#endif - if (varTypeIsSmall(lclTyp) && !ovfl && op1->gtType == TYP_INT && op1->gtOper == GT_AND) + if (varTypeIsSmall(lclTyp) && !ovfl && op1->gtType == TYP_INT && op1->gtOper == GT_AND) { op2 = op1->AsOp()->gtOp2; @@ -14276,32 +14145,6 @@ void Compiler::impImportBlockCode(BasicBlock* block) op1 = gtNewCastNodeL(type, op1, uns, lclTyp); } else -#ifdef TARGET_LOONGARCH64 - if (type != TYP_LONG) - { - if (!ovfl && op1->gtOper == GT_CNS_INT && op1->TypeGet() == TYP_LONG) - { - assert(lclTyp == TYP_INT || lclTyp == TYP_UINT); - if (lclTyp == TYP_INT) - { - op1->AsIntCon()->gtIconVal = (int32_t)op1->AsIntCon()->gtIconVal; - op1->gtType = TYP_INT; - } - else if (lclTyp == TYP_UINT) - { - op1->AsIntCon()->gtIconVal = (uint32_t)op1->AsIntCon()->gtIconVal; - op1->gtType = TYP_UINT; - } - else - op1 = gtNewCastNode(type, op1, uns, lclTyp); - } - else - { - op1 = gtNewCastNode(type, op1, uns, lclTyp); - } - } - else -#endif { op1 = gtNewCastNode(type, op1, uns, lclTyp); } @@ -14311,13 +14154,11 @@ void Compiler::impImportBlockCode(BasicBlock* block) op1->gtFlags |= (GTF_OVERFLOW | GTF_EXCEPT); } -#ifndef TARGET_LOONGARCH64 if (op1->gtGetOp1()->OperIsConst() && opts.OptimizationEnabled()) { // Try and fold the introduced cast op1 = gtFoldExprConst(op1); } -#endif } impPushOnStack(op1, tiRetVal); @@ -15943,9 +15784,6 @@ void Compiler::impImportBlockCode(BasicBlock* block) op2->gtType = TYP_I_IMPL; } else -#ifdef TARGET_LOONGARCH64 - if (genActualType(op2->TypeGet()) != TYP_INT) -#endif { bool isUnsigned = false; op2 = gtNewCastNode(TYP_I_IMPL, op2, isUnsigned, TYP_I_IMPL); @@ -17310,21 +17148,12 @@ bool Compiler::impReturnInstruction(int prefixFlags, OPCODE& opcode) impBashVarAddrsToI(op2); op2 = impImplicitIorI4Cast(op2, info.compRetType); op2 = impImplicitR4orR8Cast(op2, info.compRetType); -// Note that we allow TYP_I_IMPL<->TYP_BYREF transformation, but only TYP_I_IMPL<-TYP_REF. -#ifdef TARGET_LOONGARCH64 + // Note that we allow TYP_I_IMPL<->TYP_BYREF transformation, but only TYP_I_IMPL<-TYP_REF. assertImp((genActualType(op2->TypeGet()) == genActualType(info.compRetType)) || - (genTypeStSz(op2->TypeGet()) == genTypeStSz(info.compRetType)) || ((op2->TypeGet() == TYP_I_IMPL) && TypeIs(info.compRetType, TYP_BYREF)) || (op2->TypeIs(TYP_BYREF, TYP_REF) && (info.compRetType == TYP_I_IMPL)) || (varTypeIsFloating(op2->gtType) && varTypeIsFloating(info.compRetType)) || (varTypeIsStruct(op2) && varTypeIsStruct(info.compRetType))); -#else - assertImp((genActualType(op2->TypeGet()) == genActualType(info.compRetType)) || - ((op2->TypeGet() == TYP_I_IMPL) && TypeIs(info.compRetType, TYP_BYREF)) || - (op2->TypeIs(TYP_BYREF, TYP_REF) && (info.compRetType == TYP_I_IMPL)) || - (varTypeIsFloating(op2->gtType) && varTypeIsFloating(info.compRetType)) || - (varTypeIsStruct(op2) && varTypeIsStruct(info.compRetType))); -#endif #ifdef DEBUG if (!isTailCall && opts.compGcChecks && (info.compRetType == TYP_REF)) @@ -18169,17 +17998,9 @@ void Compiler::impImportBlock(BasicBlock* block) } else if (genActualType(tree->gtType) == TYP_INT && lvaTable[tempNum].lvType == TYP_I_IMPL) { -// Spill clique has decided this should be "native int", but this block only pushes an "int". -// Insert a sign-extension to "native int" so we match the clique. -#ifdef TARGET_LOONGARCH64 - if (tree->gtOper == GT_CNS_INT) - { - tree->gtType = TYP_I_IMPL; - tree->SetContained(); - } - else -#endif - verCurrentState.esStack[level].val = gtNewCastNode(TYP_I_IMPL, tree, false, TYP_I_IMPL); + // Spill clique has decided this should be "native int", but this block only pushes an "int". + // Insert a sign-extension to "native int" so we match the clique. + verCurrentState.esStack[level].val = gtNewCastNode(TYP_I_IMPL, tree, false, TYP_I_IMPL); } // Consider the case where one branch left a 'byref' on the stack and the other leaves @@ -18199,17 +18020,9 @@ void Compiler::impImportBlock(BasicBlock* block) } else if (genActualType(tree->gtType) == TYP_INT && lvaTable[tempNum].lvType == TYP_BYREF) { -// Spill clique has decided this should be "byref", but this block only pushes an "int". -// Insert a sign-extension to "native int" so we match the clique size. -#ifdef TARGET_LOONGARCH64 - if (tree->gtOper == GT_CNS_INT) - { - tree->gtType = TYP_I_IMPL; - tree->SetContained(); - } - else -#endif - verCurrentState.esStack[level].val = gtNewCastNode(TYP_I_IMPL, tree, false, TYP_I_IMPL); + // Spill clique has decided this should be "byref", but this block only pushes an "int". + // Insert a sign-extension to "native int" so we match the clique size. + verCurrentState.esStack[level].val = gtNewCastNode(TYP_I_IMPL, tree, false, TYP_I_IMPL); } #endif // TARGET_64BIT @@ -20934,13 +20747,8 @@ bool Compiler::IsTargetIntrinsic(NamedIntrinsic intrinsicName) return false; } #elif defined(TARGET_LOONGARCH64) - switch (intrinsicName) - { - // LOONGARCH64: will amend in the future - - default: - return false; - } + // TODO-LoongArch64: add some instrinsics. + return false; #else // TODO: This portion of logic is not implemented for other arch. // The reason for returning true is that on all other arch the only intrinsic diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 9932390b5f48b..84a75fb85c879 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -5235,7 +5235,7 @@ void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, GenTreeCall::Use* GenTree* arg = fgMakeTmpArgNode(argEntry); // Change the expression to "(tmp=val),tmp" - arg = gtNewOperNode(GT_COMMA, arg->TypeGet(), copyBlk, arg); + arg = gtNewOperNode(GT_COMMA, arg->TypeGet(), copyBlk, arg); #endif // FEATURE_FIXED_OUT_ARGS @@ -5414,17 +5414,6 @@ void Compiler::fgMoveOpsLeft(GenTree* tree) noway_assert(op1->gtType == TYP_I_IMPL && ad1->gtType == TYP_I_IMPL); new_op1->gtType = TYP_I_IMPL; } -#ifdef TARGET_LOONGARCH64 - else if ((op1->TypeGet() == TYP_LONG) && (ad2->TypeGet() == TYP_INT)) - { - // For LoongArch64's instructions operation of the 64bits and 32bits using the whole - // 64bits-width register which is unlike the AMD64 and ARM64. - // And the INT type instruction will be signed-extend by default. - // e.g. 'ld_w $r4, $5, 4' and `addi_w $r4,$r5,-1` the result of INT - // will be signed-extend by default. - new_op1->gtType = TYP_LONG; - } -#endif // If new_op1 is a new expression. Assign it a new unique value number. // vnStore is null before the ValueNumber phase has run @@ -5719,14 +5708,6 @@ GenTree* Compiler::fgMorphArrayIndex(GenTree* tree) noway_assert(index2 != nullptr); } -#ifdef TARGET_LOONGARCH64 - // For LoongArch64's instructions operation of the 64bits and 32bits using the whole - // 64bits-width register which is unlike the AMD64 and ARM64. - // And the INT type instruction will be signed-extend by default. - // e.g. 'ld_w $r4, $5, 4' and `addi_w $r4,$r5,-1` the result of INT - // will be signed-extend by default. - GenTree* arrLen = gtNewArrLen(TYP_INT, arrRef, (int)lenOffs, compCurBB); -#else // Next introduce a GT_BOUNDS_CHECK node var_types bndsChkType = TYP_INT; // By default, try to use 32-bit comparison for array bounds check. @@ -5746,7 +5727,6 @@ GenTree* Compiler::fgMorphArrayIndex(GenTree* tree) { arrLen = gtNewCastNode(bndsChkType, arrLen, true, bndsChkType); } -#endif GenTreeBoundsChk* arrBndsChk = new (this, GT_BOUNDS_CHECK) GenTreeBoundsChk(index, arrLen, SCK_RNGCHK_FAIL); @@ -5764,13 +5744,7 @@ GenTree* Compiler::fgMorphArrayIndex(GenTree* tree) GenTree* addr; #ifdef TARGET_64BIT -#ifndef TARGET_LOONGARCH64 // Widen 'index' on 64-bit targets - // But For LoongArch64's instructions operation of the 64bits and 32bits using the whole - // 64bits-width register which is unlike the AMD64 and ARM64. - // And the INT type instruction will be signed-extend by default. - // e.g. 'ld_w $r4, $5, 4' and `addi_w $r4,$r5,-1` the result of INT - // will be signed-extend by default. if (index->TypeGet() != TYP_I_IMPL) { if (index->OperGet() == GT_CNS_INT) @@ -5782,7 +5756,6 @@ GenTree* Compiler::fgMorphArrayIndex(GenTree* tree) index = gtNewCastNode(TYP_I_IMPL, index, true, TYP_I_IMPL); } } -#endif #endif // TARGET_64BIT /* Scale the index value if necessary */ @@ -14710,17 +14683,6 @@ GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree) // be in a fully-interruptible code region. if (!varTypeIsGC(ad1->TypeGet()) && !varTypeIsGC(op2->TypeGet())) { -#ifdef TARGET_LOONGARCH64 - // For LoongArch64's instructions operation of the 64bits and 32bits using the whole - // 64bits-width register which is unlike the AMD64 and ARM64. - // And the INT type instruction will be signed-extend by default. - // e.g. 'ld_w $r4, $5, 4' and `addi_w $r4,$r5,-1` the result of INT - // will be signed-extend by default. - if (op2->TypeGet() == TYP_LONG) - { - op1->gtType = TYP_LONG; - } -#endif tree->gtOp2 = ad2; op1->AsOp()->gtOp2 = op2; @@ -18479,18 +18441,8 @@ GenTree* Compiler::fgMorphImplicitByRefArgs(GenTree* tree, bool isAddr) // void Compiler::fgAddFieldSeqForZeroOffset(GenTree* addr, FieldSeqNode* fieldSeqZero) { -// We expect 'addr' to be an address at this point. -#ifdef TARGET_LOONGARCH64 - // For LoongArch64's instructions operation of the 64bits and 32bits using the whole - // 64bits-width register which is unlike the AMD64 and ARM64. - // And the INT type instruction will be signed-extend by default. - // e.g. 'ld_w $r4, $5, 4' and `addi_w $r4,$r5,-1` the result of INT - // will be signed-extend by default. - assert(addr->TypeGet() == TYP_BYREF || addr->TypeGet() == TYP_I_IMPL || addr->TypeGet() == TYP_INT || - addr->TypeGet() == TYP_REF); -#else + // We expect 'addr' to be an address at this point. assert(addr->TypeGet() == TYP_BYREF || addr->TypeGet() == TYP_I_IMPL || addr->TypeGet() == TYP_REF); -#endif // Tunnel through any commas. const bool commaOnly = true; diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 1ad459299dc76..fe8a092a191ce 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -5092,14 +5092,7 @@ bool Compiler::optNarrowTree(GenTree* tree, var_types srct, var_types dstt, Valu unsigned kind; noway_assert(tree); -#ifndef TARGET_LOONGARCH64 - // For LoongArch64's instructions operation of the 64bits and 32bits using the whole - // 64bits-width register which is unlike the AMD64 and ARM64. - // And the INT type instruction will be signed-extend by default. - // e.g. 'ld_w $r4, $5, 4' and `addi_w $r4,$r5,-1` the result of INT - // will be signed-extend by default. So `LONG != INT(but default is LONG)` noway_assert(genActualType(tree->gtType) == genActualType(srct)); -#endif /* Assume we're only handling integer types */ noway_assert(varTypeIsIntegral(srct)); @@ -5267,18 +5260,8 @@ bool Compiler::optNarrowTree(GenTree* tree, var_types srct, var_types dstt, Valu switch (tree->gtOper) { case GT_AND: -#ifdef TARGET_LOONGARCH64 - // For LoongArch64's instructions operation of the 64bits and 32bits using the whole - // 64bits-width register which is unlike the AMD64 and ARM64. - // And the INT type instruction will be signed-extend by default. - // e.g. 'ld_w $r4, $5, 4' and `addi_w $r4,$r5,-1` the result of INT - // will be signed-extend by default. So `LONG != INT(but default is LONG)` - noway_assert(genTypeSize(genActualType(tree->gtType)) >= genTypeSize(genActualType(op1->gtType))); - noway_assert(genTypeSize(genActualType(tree->gtType)) >= genTypeSize(genActualType(op2->gtType))); -#else noway_assert(genActualType(tree->gtType) == genActualType(op1->gtType)); noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType)); -#endif GenTree* opToNarrow; opToNarrow = nullptr; From e1b5f9da81bae7cc2f9296ea5658f49ad1be8449 Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Fri, 25 Mar 2022 10:24:15 +0800 Subject: [PATCH 39/46] [LoongArch64] ament the code for CR. --- src/coreclr/jit/codegenlinear.cpp | 5 ++-- src/coreclr/jit/compiler.cpp | 45 ++++++------------------------- src/coreclr/jit/gentree.cpp | 33 +++++++++++++---------- src/coreclr/jit/instr.cpp | 20 ++------------ src/coreclr/jit/lower.cpp | 4 --- src/coreclr/jit/morph.cpp | 13 ++++----- src/coreclr/jit/valuenum.cpp | 7 ----- 7 files changed, 39 insertions(+), 88 deletions(-) diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index 6b7c1f1fa2c45..e03e83d49c553 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -2517,8 +2517,9 @@ CodeGen::GenIntCastDesc::GenIntCastDesc(GenTreeCast* cast) } #ifdef TARGET_LOONGARCH64 - m_extendKind = castUnsigned ? ZERO_EXTEND_INT : SIGN_EXTEND_INT; - cast->gtFlags |= castUnsigned ? GTF_UNSIGNED : GTF_EMPTY; + // For LoongArch64's ISA which is same with the MIPS64 ISA, even the instructions of 32bits operation need + // the upper 32bits be sign-extended to 64 bits. + m_extendKind = SIGN_EXTEND_INT; #else m_extendKind = COPY; #endif diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 1b6fc4255b1eb..5d6fff9b367a7 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -749,34 +749,14 @@ var_types Compiler::getArgTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, useType = TYP_UNKNOWN; } -#elif defined(TARGET_X86) || defined(TARGET_ARM) +#elif defined(TARGET_X86) || defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) // Otherwise we pass this struct by value on the stack // setup wbPassType and useType indicate that this is passed by value according to the X86/ARM32 ABI + // On LOONGARCH64 struct that is 1-16 bytes is passed by value in one/two register(s) howToPassStruct = SPK_ByValue; useType = TYP_STRUCT; -#elif defined(TARGET_LOONGARCH64) - // Structs that are pointer sized or smaller. - // assert(structSize > TARGET_POINTER_SIZE); - - // On LOONGARCH64 structs that are 1-16 bytes are passed by value in one/multiple register(s) - if (structSize <= (TARGET_POINTER_SIZE * 2)) - { - // setup wbPassType and useType indicate that this is passed by value in multiple registers - // (when all of the parameters registers are used, then the stack will be used) - howToPassStruct = SPK_ByValue; - useType = TYP_STRUCT; - } - else // a structSize that is 17-32 bytes in size - { - // Otherwise we pass this struct by reference to a copy - // setup wbPassType and useType indicate that this is passed using one register - // (by reference to a copy) - howToPassStruct = SPK_ByReference; - useType = TYP_UNKNOWN; - } - #else // TARGET_XXX noway_assert(!"Unhandled TARGET in getArgTypeForStruct (with FEATURE_MULTIREG_ARGS=1)"); @@ -1084,21 +1064,9 @@ var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, #elif defined(TARGET_LOONGARCH64) - // On LOONGARCH64 structs that are 1-16 bytes are returned by value in one/multiple register(s) - if (structSize <= (TARGET_POINTER_SIZE * 2)) - { - // setup wbPassType and useType indicate that this is return by value in multiple registers - howToReturnStruct = SPK_ByValue; - useType = TYP_STRUCT; - } - else // a structSize that is 17-32 bytes in size - { - // Otherwise we return this struct using a return buffer/byreference. - // setup wbPassType and useType indicate that this is returned using a return buffer register - // (reference to a return buffer) - howToReturnStruct = SPK_ByReference; - useType = TYP_UNKNOWN; - } + // On LOONGARCH64 struct that is 1-16 bytes is returned by value in one/two register(s) + howToReturnStruct = SPK_ByValue; + useType = TYP_STRUCT; #else // TARGET_XXX @@ -2276,8 +2244,11 @@ void Compiler::compSetProcessor() info.genCPU = CPU_X86_PENTIUM_4; else info.genCPU = CPU_X86; + #elif defined(TARGET_LOONGARCH64) + info.genCPU = CPU_LOONGARCH64; + #endif // diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 539d58688c969..26049b26f4fe2 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -3208,11 +3208,11 @@ bool Compiler::gtMarkAddrMode(GenTree* addr, int* pCostEx, int* pCostSz, var_typ *pCostEx += idx->GetCostEx(); *pCostSz += idx->GetCostSz(); } - // TODO-LOONGARCH64: workround, should amend for LoongArch64. if (cns != 0) { - if (cns >= (4096 * genTypeSize(type))) + if (!emitter::isValidSimm12(cns)) { + // TODO-LoongArch64-CQ: tune for LoongArch64. *pCostEx += 1; *pCostSz += 4; } @@ -3632,11 +3632,15 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) goto COMMON_CNS; #elif defined(TARGET_LOONGARCH64) + // TODO-LoongArch64-CQ: tune the costs. case GT_CNS_STR: + costEx = IND_COST_EX + 2; + costSz = 4; + goto COMMON_CNS; + case GT_CNS_LNG: case GT_CNS_INT: - // TODO-LOONGARCH64: workround, should amend for LoongArch64. - costEx = 4; + costEx = 1; costSz = 4; goto COMMON_CNS; #else @@ -3701,7 +3705,7 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) costSz = 4; } #elif defined(TARGET_LOONGARCH64) - // TODO-LOONGARCH64: workround, should amend for LoongArch64. + // TODO-LoongArch64-CQ: tune the costs. costEx = 2; costSz = 8; #else @@ -3878,14 +3882,9 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) costSz = 6; } #elif defined(TARGET_LOONGARCH64) - // TODO-LOONGARCH64: workround, should amend for LoongArch64. + // TODO-LoongArch64-CQ: tune the costs. costEx = 1; - costSz = 2; - if (isflt || varTypeIsFloating(op1->TypeGet())) - { - costEx = 2; - costSz = 4; - } + costSz = 4; #else #error "Unknown TARGET" #endif @@ -22273,11 +22272,17 @@ regNumber ReturnTypeDesc::GetABIReturnReg(unsigned idx) const } else { - noway_assert(idx < 2); // Up to 2 return registers for two-float-field structs + noway_assert(idx == 1); // Up to 2 return registers for two-float-field structs + + // If the first return register is from the same register file, return the one next to it. if (varTypeIsIntegralOrI(regType)) + { resultReg = varTypeIsIntegralOrI(GetReturnRegType(0)) ? REG_INTRET_1 : REG_INTRET; // A0 or A1 - else // if (!varTypeIsIntegralOrI(regType)) + } + else // varTypeUsesFloatReg(regType) + { resultReg = varTypeIsIntegralOrI(GetReturnRegType(0)) ? REG_FLOATRET : REG_FLOATRET_1; // F0 or F1 + } } #endif // TARGET_XXX diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index 6406ceb3715da..2d797889323ce 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -906,19 +906,7 @@ void CodeGen::inst_RV_TT(instruction ins, regSet.verifyRegUsed(regTmp); return; } -#else // !TARGET_ARM -#ifdef TARGET_LOONGARCH64 - // For LoongArch64-ABI, the float arg might be passed by integer register, - // when there is no float register left but there is integer register(s) left. - if (emitter::isFloatReg(reg)) - { - assert((ins == INS_fld_d) || (ins == INS_fld_s)); - } - else if (emitter::isGeneralRegister(reg) && (ins != INS_lea)) - { - ins = size == EA_4BYTE ? INS_ld_w : INS_ld_d; - } -#endif +#else // !TARGET_ARM GetEmitter()->emitIns_R_S(ins, size, reg, varNum, offs); return; #endif // !TARGET_ARM @@ -1725,10 +1713,6 @@ instruction CodeGenInterface::ins_Load(var_types srcType, bool aligned /*=false* { ins = INS_ld_w; } - else if (TYP_UINT == srcType) - { - ins = INS_ld_wu; - } else { ins = INS_ld_d; // default ld_d. @@ -1956,7 +1940,7 @@ instruction CodeGenInterface::ins_Store(var_types dstType, bool aligned /*=false ins = aligned ? INS_stx_b : INS_st_b; else if (varTypeIsShort(dstType)) ins = aligned ? INS_stx_h : INS_st_h; - else if ((TYP_INT == dstType) || (TYP_UINT == dstType)) + else if (TYP_INT == dstType) ins = aligned ? INS_stx_w : INS_st_w; else ins = aligned ? INS_stx_d : INS_st_d; diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 79ca6fb190527..77deae14efc45 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -5874,11 +5874,7 @@ GenTree* Lowering::LowerConstIntDivOrMod(GenTree* node) GenTree* dividend = divMod->gtGetOp1(); GenTree* divisor = divMod->gtGetOp2(); -#if defined(TARGET_LOONGARCH64) - const var_types type = genActualType(divMod->TypeGet()); -#else const var_types type = divMod->TypeGet(); -#endif assert((type == TYP_INT) || (type == TYP_LONG)); #if defined(USE_HELPERS_FOR_INT_DIV) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 84a75fb85c879..2902c35b9c496 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -3815,17 +3815,18 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) assert(!"Structs are not passed by reference on x64/ux"); #endif // UNIX_AMD64_ABI } -#if defined(DEBUG) && defined(TARGET_LOONGARCH64) - else if ((structBaseType == TYP_STRUCT) && (originalSize == TARGET_POINTER_SIZE) && (size == 2)) - { - DEBUG_ARG_SLOTS_ASSERT(size == argEntry->numRegs); - } -#endif else // This is passed by value. { +#if defined(TARGET_LOONGARCH64) + // For LoongArch64 the struct {float a; float b;} can be passed by two float registers. + DEBUG_ARG_SLOTS_ASSERT((size == roundupSize / TARGET_POINTER_SIZE) || + ((structBaseType == TYP_STRUCT) && (originalSize == TARGET_POINTER_SIZE) && + (size == 2) && (size == argEntry->numRegs))); +#else // Check to see if we can transform this into load of a primitive type. // 'size' must be the number of pointer sized items DEBUG_ARG_SLOTS_ASSERT(size == roundupSize / TARGET_POINTER_SIZE); +#endif structSize = originalSize; unsigned passingSize = originalSize; diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index 3f2033446ed08..779813c72eabc 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -2845,14 +2845,7 @@ ValueNum ValueNumStore::EvalFuncForConstantArgs(var_types typ, VNFunc func, Valu } else { -#ifdef TARGET_LOONGARCH64 - // For LoongArch64, the int32 will signed-extend default, - // e.g. `ld_w $r4, $r5, 4` loading a int32 from the addr `$r5+4`. - // So there is no need to signed-extend. - assert(typ == TYP_INT || typ == TYP_LONG); -#else assert(typ == TYP_INT); -#endif int resultVal = EvalOp(func, arg0Val, arg1Val); // Bin op on a handle results in a handle. ValueNum handleVN = IsVNHandle(arg0VN) ? arg0VN : IsVNHandle(arg1VN) ? arg1VN : NoVN; From 81120de98afd286e277f3bee2c46d48abf8c35ab Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Wed, 30 Mar 2022 14:23:27 +0800 Subject: [PATCH 40/46] [LoongArch64] amend some code for CR. --- src/coreclr/jit/compiler.cpp | 20 +++++++++----------- src/coreclr/jit/morph.cpp | 23 +++++++---------------- src/coreclr/jit/scopeinfo.cpp | 6 +++++- 3 files changed, 21 insertions(+), 28 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 5d6fff9b367a7..4849d73ae1f7a 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -902,16 +902,7 @@ var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, howToReturnStruct = SPK_ByReference; useType = TYP_UNKNOWN; } -#endif - if (TargetOS::IsWindows && !TargetArchitecture::IsArm32 && callConvIsInstanceMethodCallConv(callConv) && - !isNativePrimitiveStructType(clsHnd)) - { - canReturnInRegister = false; - howToReturnStruct = SPK_ByReference; - useType = TYP_UNKNOWN; - } - -#ifdef TARGET_LOONGARCH64 +#elif TARGET_LOONGARCH64 if (structSize <= (TARGET_POINTER_SIZE * 2)) { uint32_t floatFieldFlags = info.compCompHnd->getLoongArch64PassStructInRegisterFlags(clsHnd); @@ -927,7 +918,14 @@ var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, useType = TYP_STRUCT; } } -#endif // TARGET_LOONGARCH64 +#endif + if (TargetOS::IsWindows && !TargetArchitecture::IsArm32 && callConvIsInstanceMethodCallConv(callConv) && + !isNativePrimitiveStructType(clsHnd)) + { + canReturnInRegister = false; + howToReturnStruct = SPK_ByReference; + useType = TYP_UNKNOWN; + } // Check for cases where a small struct is returned in a register // via a primitive type. diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 2902c35b9c496..7ccbea685939b 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -12632,24 +12632,11 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac) break; -#ifdef TARGET_ARM64 +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) case GT_DIV: - if (!varTypeIsFloating(tree->gtType)) - { - // Codegen for this instruction needs to be able to throw two exceptions: - fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW); - fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO); - } - break; - case GT_UDIV: - // Codegen for this instruction needs to be able to throw one exception: - fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO); - break; -#endif - #ifdef TARGET_LOONGARCH64 - case GT_DIV: case GT_MOD: +#endif if (!varTypeIsFloating(tree->gtType)) { // Codegen for this instruction needs to be able to throw two exceptions: @@ -12658,11 +12645,15 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac) } break; case GT_UDIV: +#ifdef TARGET_LOONGARCH64 case GT_UMOD: +#endif // Codegen for this instruction needs to be able to throw one exception: fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO); break; -#endif + +#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) + case GT_ADD: CM_OVF_OP: diff --git a/src/coreclr/jit/scopeinfo.cpp b/src/coreclr/jit/scopeinfo.cpp index 2e315d2faa84b..1c0d6679cb6a7 100644 --- a/src/coreclr/jit/scopeinfo.cpp +++ b/src/coreclr/jit/scopeinfo.cpp @@ -1609,17 +1609,21 @@ void CodeGen::psiBegProlog() noway_assert(EA_SIZE_IN_BYTES(lclVarDsc->lvSize()) <= 16); if (emitter::isFloatReg(lclVarDsc->GetArgReg())) { - // regType = lclVarDsc->lvIs4Field1 ? TYP_FLOAT : TYP_DOUBLE; regType = TYP_DOUBLE; } else + { regType = lclVarDsc->GetLayout()->GetGCPtrType(0); + } } else { regType = compiler->mangleVarArgsType(lclVarDsc->TypeGet()); if (emitter::isGeneralRegisterOrR0(lclVarDsc->GetArgReg()) && isFloatRegType(regType)) + { + // For LoongArch64's ABI, the float args may be passed by integer register. regType = TYP_LONG; + } } #else var_types regType = compiler->mangleVarArgsType(lclVarDsc->TypeGet()); From f369343267462878a7d6053e782ba8141ea53521 Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Thu, 31 Mar 2022 11:16:54 +0800 Subject: [PATCH 41/46] [LoongArch64] amend some code for CR round2. --- src/coreclr/jit/codegenloongarch64.cpp | 18 ++-- src/coreclr/jit/emitloongarch64.cpp | 46 ++-------- src/coreclr/jit/lclvars.cpp | 122 ++++++++++++------------- src/coreclr/jit/lower.cpp | 7 +- src/coreclr/jit/lowerloongarch64.cpp | 63 ++++++------- src/coreclr/jit/lsraloongarch64.cpp | 1 + 6 files changed, 108 insertions(+), 149 deletions(-) diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index b6a7442c19dce..e5b4be91ba713 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -1876,7 +1876,7 @@ void CodeGen::genCodeForMulHi(GenTreeOp* treeNode) genProduceReg(treeNode); } -// Generate code for ADD, SUB, MUL, AND, OR and XOR +// Generate code for ADD, SUB, MUL, AND, AND_NOT, OR and XOR // This method is expected to have called genConsumeOperands() before calling it. void CodeGen::genCodeForBinary(GenTreeOp* treeNode) { @@ -1884,7 +1884,7 @@ void CodeGen::genCodeForBinary(GenTreeOp* treeNode) regNumber targetReg = treeNode->GetRegNum(); emitter* emit = GetEmitter(); - assert(oper == GT_ADD || oper == GT_SUB || oper == GT_MUL || oper == GT_AND || oper == GT_OR || oper == GT_XOR); + assert(treeNode->OperIs(GT_ADD, GT_SUB, GT_MUL, GT_AND, GT_AND_NOT, GT_OR, GT_XOR)); GenTree* op1 = treeNode->gtGetOp1(); GenTree* op2 = treeNode->gtGetOp2(); @@ -2548,19 +2548,18 @@ void CodeGen::genCodeForNegNot(GenTree* tree) // void CodeGen::genCodeForBswap(GenTree* tree) { - assert(!"unimpleement on LOONGARCH64 yet"); + NYI_LOONGARCH64("genCodeForBswap unimpleement yet"); } //------------------------------------------------------------------------ -// genCodeForDivMod: Produce code for a GT_DIV/GT_UDIV node. We don't see MOD: -// (1) integer MOD is morphed into a sequence of sub, mul, div in fgMorph; -// (2) float/double MOD is morphed into a helper call by front-end. +// genCodeForDivMod: Produce code for a GT_DIV/GT_UDIV node. +// (1) float/double MOD is morphed into a helper call by front-end. // // Arguments: // tree - the node // void CodeGen::genCodeForDivMod(GenTreeOp* tree) -{ // can amend further. +{ assert(tree->OperIs(GT_MOD, GT_UMOD, GT_DIV, GT_UDIV)); var_types targetType = tree->TypeGet(); @@ -3355,6 +3354,11 @@ instruction CodeGen::genGetInsForOper(GenTree* treeNode) } break; + case GT_AND_NOT: + assert(!isImmed(treeNode)); + ins = INS_andn; + break; + case GT_OR: isImm = isImmed(treeNode); if (isImm) diff --git a/src/coreclr/jit/emitloongarch64.cpp b/src/coreclr/jit/emitloongarch64.cpp index a3f2a226f8923..0905579406548 100644 --- a/src/coreclr/jit/emitloongarch64.cpp +++ b/src/coreclr/jit/emitloongarch64.cpp @@ -6395,13 +6395,16 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, } if (intConst != nullptr) - { // should re-design this case!!! ---2020.04.11. + { ssize_t imm = intConst->IconValue(); if (ins == INS_andi || ins == INS_ori || ins == INS_xori) - // assert((0 <= imm) && (imm <= 0xfff)); - assert((-2048 <= imm) && (imm <= 0xfff)); + { + assert(isValidUimm12(imm)); + } else - assert((-2049 < imm) && (imm < 2048)); + { + assert(isValidSimm12(imm)); + } if (ins == INS_sub_d) { @@ -6420,35 +6423,6 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, assert(ins == INS_addi_d || ins == INS_addi_w || ins == INS_andi || ins == INS_ori || ins == INS_xori); - if ((imm < 0) && (ins == INS_andi || ins == INS_ori || ins == INS_xori)) - { - assert(attr == EA_8BYTE || attr == EA_4BYTE); - assert(nonIntReg->GetRegNum() != REG_R21); - - emitIns_R_R_I(INS_addi_d, EA_8BYTE, REG_R21, REG_R0, imm); - - if (ins == INS_andi) - { - ins = INS_and; - } - else if (ins == INS_ori) - { - ins = INS_or; - } - else if (ins == INS_xori) - { - ins = INS_xor; - } - else - { - unreached(); - } - - emitIns_R_R_R(ins, attr, dst->GetRegNum(), REG_R21, nonIntReg->GetRegNum()); - - goto L_Done; - } - if (needCheckOv) { emitIns_R_R_R(INS_or, attr, REG_R21, nonIntReg->GetRegNum(), REG_R0); @@ -6567,11 +6541,11 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, } } } - else if (dst->OperGet() == GT_AND || dst->OperGet() == GT_OR || dst->OperGet() == GT_XOR) + else if (dst->OperIs(GT_AND, GT_AND_NOT, GT_OR, GT_XOR)) { emitIns_R_R_R(ins, attr, dst->GetRegNum(), src1->GetRegNum(), src2->GetRegNum()); - // NOTE: can/should amend: LOONGARCH needs to sign-extend dst when deal with 32bit data. + // TODO-LOONGARCH64-CQ: here sign-extend dst when deal with 32bit data is too conservative. if (EA_SIZE(attr) == EA_4BYTE) emitIns_R_R_I(INS_slli_w, attr, dst->GetRegNum(), dst->GetRegNum(), 0); } @@ -6713,8 +6687,6 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, } } -L_Done: - return dst->GetRegNum(); } diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 33adb5285ff1f..f661b0c8f1e69 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -660,14 +660,6 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un bool isHfaArg = false; var_types hfaType = TYP_UNDEF; -#if defined(TARGET_LOONGARCH64) - uint32_t floatFlags = STRUCT_NO_FLOAT_FIELD; - if ((strip(corInfoType) == CORINFO_TYPE_VALUECLASS) && (argSize <= MAX_PASS_MULTIREG_BYTES)) - { - floatFlags = info.compCompHnd->getLoongArch64PassStructInRegisterFlags(typeHnd); - } -#endif - // Methods that use VarArg or SoftFP cannot have HFA arguments except // Native varargs on arm64 unix use the regular calling convention. if (((TargetOS::IsUnix && TargetArchitecture::IsArm64) || !info.compIsVarArgs) && !opts.compUseSoftFP) @@ -808,23 +800,6 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un } #else // !TARGET_ARM -#if defined(TARGET_LOONGARCH64) - - if (compFeatureArgSplit()) - { - // This does not affect the normal calling convention for LoongArch64!! - if (this->info.compIsVarArgs && (argType == TYP_STRUCT)) - { - if (varDscInfo->canEnreg(TYP_INT, 1) && // The beginning of the struct can go in a register - !varDscInfo->canEnreg(TYP_INT, cSlots)) // The end of the struct can't fit in a register - { - cSlotsToEnregister = 1; // Force the split - } - } - } - -#endif // defined(TARGET_LOONGARCH64) - #if defined(UNIX_AMD64_ABI) SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; if (varTypeIsStruct(argType)) @@ -886,9 +861,16 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un } else #elif defined(TARGET_LOONGARCH64) - var_types arg1Type = TYP_UNKNOWN; - var_types arg2Type = TYP_UNKNOWN; - if (floatFlags & STRUCT_HAS_FLOAT_FIELDS_MASK) + uint32_t floatFlags = STRUCT_NO_FLOAT_FIELD; + var_types argRegTypeInStruct1 = TYP_UNKNOWN; + var_types argRegTypeInStruct2 = TYP_UNKNOWN; + + if ((strip(corInfoType) == CORINFO_TYPE_VALUECLASS) && (argSize <= MAX_PASS_MULTIREG_BYTES)) + { + floatFlags = info.compCompHnd->getLoongArch64PassStructInRegisterFlags(typeHnd); + } + + if ((floatFlags & STRUCT_HAS_FLOAT_FIELDS_MASK) != 0) { assert(varTypeIsStruct(argType)); int floatNum = 0; @@ -896,50 +878,56 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un { assert(argSize <= 8); assert(varDsc->lvExactSize <= argSize); - floatNum = 1; - arg1Type = (varDsc->lvExactSize == 8) ? TYP_DOUBLE : TYP_FLOAT; - canPassArgInRegisters = varDscInfo->canEnreg(arg1Type, 1); + floatNum = 1; + canPassArgInRegisters = varDscInfo->canEnreg(argRegTypeInStruct1, 1); + + argRegTypeInStruct1 = (varDsc->lvExactSize == 8) ? TYP_DOUBLE : TYP_FLOAT; } - else if (floatFlags & STRUCT_FLOAT_FIELD_ONLY_TWO) + else if ((floatFlags & STRUCT_FLOAT_FIELD_ONLY_TWO) != 0) { - arg1Type = (floatFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; - arg2Type = (floatFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; floatNum = 2; canPassArgInRegisters = varDscInfo->canEnreg(TYP_DOUBLE, 2); + + argRegTypeInStruct1 = (floatFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; + argRegTypeInStruct2 = (floatFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; } - else if (floatFlags & STRUCT_FLOAT_FIELD_FIRST) + else if ((floatFlags & STRUCT_FLOAT_FIELD_FIRST) != 0) { floatNum = 1; canPassArgInRegisters = varDscInfo->canEnreg(TYP_DOUBLE, 1); canPassArgInRegisters = canPassArgInRegisters && varDscInfo->canEnreg(TYP_I_IMPL, 1); - arg1Type = (floatFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; - arg2Type = (floatFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_LONG : TYP_INT; + argRegTypeInStruct1 = (floatFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; + argRegTypeInStruct2 = (floatFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_LONG : TYP_INT; } - else if (floatFlags & STRUCT_FLOAT_FIELD_SECOND) + else if ((floatFlags & STRUCT_FLOAT_FIELD_SECOND) != 0) { floatNum = 1; canPassArgInRegisters = varDscInfo->canEnreg(TYP_DOUBLE, 1); canPassArgInRegisters = canPassArgInRegisters && varDscInfo->canEnreg(TYP_I_IMPL, 1); - arg1Type = (floatFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_LONG : TYP_INT; - arg2Type = (floatFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; + argRegTypeInStruct1 = (floatFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_LONG : TYP_INT; + argRegTypeInStruct2 = (floatFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; } if (!canPassArgInRegisters) { - assert(floatNum > 0); + assert((floatNum == 1) || (floatNum == 2)); // `if ((floatFlags & STRUCT_HAS_FLOAT_FIELDS_MASK) != 0)` canPassArgInRegisters = varDscInfo->canEnreg(argType, cSlotsToEnregister); - arg1Type = TYP_UNKNOWN; - arg2Type = TYP_UNKNOWN; + + // On LoongArch64, there aren't even any remaining integer registers to pass the arguments. + argRegTypeInStruct1 = TYP_UNKNOWN; + argRegTypeInStruct2 = TYP_UNKNOWN; } } else -#endif // defined(UNIX_AMD64_ABI) +#endif // defined(TARGET_LOONGARCH64) { canPassArgInRegisters = varDscInfo->canEnreg(argType, cSlotsToEnregister); #if defined(TARGET_LOONGARCH64) + // On LoongArch64, if there aren't any remaining floating-point registers to pass the argument, + // integer registers (if any) are used instead. if (!canPassArgInRegisters && varTypeIsFloating(argType)) { canPassArgInRegisters = varDscInfo->canEnreg(TYP_I_IMPL, cSlotsToEnregister); @@ -947,8 +935,10 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un } if (!canPassArgInRegisters && (cSlots > 1)) { + // If a struct-arg which needs two registers but only one integer register available, + // it has to be split. canPassArgInRegisters = varDscInfo->canEnreg(TYP_I_IMPL, 1); - arg1Type = canPassArgInRegisters ? TYP_I_IMPL : TYP_UNKNOWN; + argRegTypeInStruct1 = canPassArgInRegisters ? TYP_I_IMPL : TYP_UNKNOWN; } #endif } @@ -981,9 +971,9 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un } else #elif defined(TARGET_LOONGARCH64) - if (arg1Type != TYP_UNKNOWN) + if (argRegTypeInStruct1 != TYP_UNKNOWN) { - firstAllocatedRegArgNum = varDscInfo->allocRegArg(arg1Type, 1); + firstAllocatedRegArgNum = varDscInfo->allocRegArg(argRegTypeInStruct1, 1); } else #endif // defined(TARGET_LOONGARCH64) @@ -1037,18 +1027,20 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un #elif defined(TARGET_LOONGARCH64) if (argType == TYP_STRUCT) { - if (arg1Type != TYP_UNKNOWN) + if (argRegTypeInStruct1 != TYP_UNKNOWN) { - varDsc->SetArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, arg1Type)); - varDsc->lvIs4Field1 = (int)emitActualTypeSize(arg1Type) == 4 ? 1 : 0; - if (arg2Type != TYP_UNKNOWN) + varDsc->SetArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, argRegTypeInStruct1)); + varDsc->lvIs4Field1 = (genTypeSize(argRegTypeInStruct1) == 4) ? 1 : 0; + if (argRegTypeInStruct2 != TYP_UNKNOWN) { - firstAllocatedRegArgNum = varDscInfo->allocRegArg(arg2Type, 1); - varDsc->SetOtherArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, arg2Type)); - varDsc->lvIs4Field2 = (int)emitActualTypeSize(arg2Type) == 4 ? 1 : 0; + unsigned secondAllocatedRegArgNum = varDscInfo->allocRegArg(argRegTypeInStruct2, 1); + varDsc->SetOtherArgReg(genMapRegArgNumToRegNum(secondAllocatedRegArgNum, argRegTypeInStruct2)); + varDsc->lvIs4Field2 = (genTypeSize(argRegTypeInStruct2) == 4) ? 1 : 0; } else if (cSlots > 1) { + // Here a struct-arg which needs two registers but only one integer register available, + // it has to be split. But we reserved extra 8-bytes for the whole struct. varDsc->lvIsSplit = 1; varDsc->SetOtherArgReg(REG_STK); varDscInfo->setAllRegArgUsed(arg1Type); @@ -1190,18 +1182,14 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un varDscInfo->setAnyFloatStackArgs(); } -#elif defined(TARGET_ARM64) +#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) // If we needed to use the stack in order to pass this argument then // record the fact that we have used up any remaining registers of this 'type' - // This prevents any 'backfilling' from occuring on ARM64 + // This prevents any 'backfilling' from occuring on ARM64/LoongArch64. // varDscInfo->setAllRegArgUsed(argType); -#elif defined(TARGET_LOONGARCH64) - - varDscInfo->setAllRegArgUsed(argType); - #endif // TARGET_XXX #if FEATURE_FASTTAILCALL @@ -5433,11 +5421,6 @@ void Compiler::lvaFixVirtualFrameOffsets() JITDUMP("-- V%02u was %d, now %d\n", lclNum, varDsc->GetStackOffset(), varDsc->GetStackOffset() + delta); varDsc->SetStackOffset(varDsc->GetStackOffset() + delta); -#if defined(TARGET_LOONGARCH64) - if (varDsc->GetStackOffset() >= delta) - varDsc->SetStackOffset(varDsc->GetStackOffset() + (varDsc->lvIsSplit ? 8 : 0)); -#endif - #if DOUBLE_ALIGN if (genDoubleAlign() && !codeGen->isFramePointerUsed()) { @@ -6001,7 +5984,14 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, } #elif defined(TARGET_LOONGARCH64) -// empty for LoongArch64. + + if (varDsc->lvIsSplit) + { + assert((varDsc->lvType == TYP_STRUCT) && (varDsc->GetOtherArgReg() == REG_STK)); + // This is a split struct. It will account for an extra (8 bytes) for the whole struct. + varDsc->SetStackOffset(varDsc->GetStackOffset() + TARGET_POINTER_SIZE); + argOffs += TARGET_POINTER_SIZE; + } #else // TARGET* #error Unsupported or unset target architecture diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 77deae14efc45..ddd36057acd6b 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -3232,12 +3232,7 @@ GenTree* Lowering::LowerJTrue(GenTreeOp* jtrue) GenTree* relopOp1 = relop->AsOp()->gtGetOp1(); GenTree* relopOp2 = relop->AsOp()->gtGetOp2(); - if (relopOp1->IsCnsIntOrI() && relopOp2->IsCnsIntOrI()) - { - relopOp1->SetContained(); - relopOp2->SetContained(); - } - else if (relop->gtNext == jtrue) + if (relop->gtNext == jtrue) { if (relopOp2->IsCnsIntOrI()) { diff --git a/src/coreclr/jit/lowerloongarch64.cpp b/src/coreclr/jit/lowerloongarch64.cpp index 9bd8beca15157..78ac528ba4c64 100644 --- a/src/coreclr/jit/lowerloongarch64.cpp +++ b/src/coreclr/jit/lowerloongarch64.cpp @@ -61,8 +61,6 @@ bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) const // TODO-CrossBitness: we wouldn't need the cast below if GenTreeIntCon::gtIconVal had target_ssize_t type. target_ssize_t immVal = (target_ssize_t)childNode->AsIntCon()->gtIconVal; - emitAttr attr = emitActualTypeSize(childNode->TypeGet()); - emitAttr size = EA_SIZE(attr); switch (parentNode->OperGet()) { @@ -84,7 +82,7 @@ bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) const case GT_AND: case GT_OR: case GT_XOR: - return emitter::isValidUimm11(immVal); + return emitter::isValidUimm12(immVal); case GT_JCMP: assert(((parentNode->gtFlags & GTF_JCMP_TST) == 0) ? (immVal == 0) : isPow2(immVal)); return true; @@ -106,8 +104,10 @@ bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) const //------------------------------------------------------------------------ // LowerMul: Lower a GT_MUL/GT_MULHI/GT_MUL_LONG node. // -// TODO: For LoongArch64 recognized GT_MULs that can be turned into GT_MUL_LONGs, as -// those are cheaper. Performs contaiment checks. +// Performs contaiment checks. +// +// TODO-LoongArch64-CQ: recognize GT_MULs that can be turned into MUL_LONGs, +// as those are cheaper. // // Arguments: // mul - The node to lower @@ -370,26 +370,13 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) } //------------------------------------------------------------------------ -// LowerCast: Lower GT_CAST(srcType, DstType) nodes. +// ContainBlockStoreAddress: Attempt to contain an address used by an unrolled block store. // // Arguments: -// tree - GT_CAST node to be lowered -// -// Return Value: -// None. -// -// Notes: -// Casts from float/double to a smaller int type are transformed as follows: -// GT_CAST(float/double, byte) = GT_CAST(GT_CAST(float/double, int32), byte) -// GT_CAST(float/double, sbyte) = GT_CAST(GT_CAST(float/double, int32), sbyte) -// GT_CAST(float/double, int16) = GT_CAST(GT_CAST(double/double, int32), int16) -// GT_CAST(float/double, uint16) = GT_CAST(GT_CAST(double/double, int32), uint16) -// -// Note that for the overflow conversions we still depend on helper calls and -// don't expect to see them here. -// i) GT_CAST(float/double, int type with overflow detection) +// blkNode - the block store node +// size - the block size +// addr - the address node to try to contain // - void Lowering::ContainBlockStoreAddress(GenTreeBlk* blkNode, unsigned size, GenTree* addr) { assert(blkNode->OperIs(GT_STORE_BLK) && (blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindUnroll)); @@ -429,6 +416,27 @@ void Lowering::ContainBlockStoreAddress(GenTreeBlk* blkNode, unsigned size, GenT addr->SetContained(); } +//------------------------------------------------------------------------ +// LowerCast: Lower GT_CAST(srcType, DstType) nodes. +// +// Arguments: +// tree - GT_CAST node to be lowered +// +// Return Value: +// None. +// +// Notes: +// Casts from float/double to a smaller int type are transformed as follows: +// GT_CAST(float/double, byte) = GT_CAST(GT_CAST(float/double, int32), byte) +// GT_CAST(float/double, sbyte) = GT_CAST(GT_CAST(float/double, int32), sbyte) +// GT_CAST(float/double, int16) = GT_CAST(GT_CAST(double/double, int32), int16) +// GT_CAST(float/double, uint16) = GT_CAST(GT_CAST(double/double, int32), uint16) +// +// Note that for the overflow conversions we still depend on helper calls and +// don't expect to see them here. +// i) GT_CAST(float/double, int type with overflow detection) +// + void Lowering::LowerCast(GenTree* tree) { assert(tree->OperGet() == GT_CAST); @@ -440,7 +448,6 @@ void Lowering::LowerCast(GenTree* tree) GenTree* op1 = tree->AsOp()->gtOp1; var_types dstType = tree->CastToType(); var_types srcType = genActualType(op1->TypeGet()); - var_types tmpType = TYP_UNDEF; if (varTypeIsFloating(srcType)) { @@ -451,16 +458,6 @@ void Lowering::LowerCast(GenTree* tree) assert(!varTypeIsSmall(srcType)); - if (tmpType != TYP_UNDEF) - { - GenTree* tmp = comp->gtNewCastNode(tmpType, op1, tree->IsUnsigned(), tmpType); - tmp->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT)); - - tree->gtFlags &= ~GTF_UNSIGNED; - tree->AsOp()->gtOp1 = tmp; - BlockRange().InsertAfter(op1, tmp); - } - // Now determine if we have operands that should be contained. ContainCheckCast(tree->AsCast()); } diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp index 826d89dd2a491..52d7191e528dc 100644 --- a/src/coreclr/jit/lsraloongarch64.cpp +++ b/src/coreclr/jit/lsraloongarch64.cpp @@ -257,6 +257,7 @@ int LinearScan::BuildNode(GenTree* tree) FALLTHROUGH; case GT_AND: + case GT_AND_NOT: case GT_OR: case GT_XOR: case GT_LSH: From 93e27c02ee37e088abe992bf2d19fd9f5c3f062c Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Fri, 1 Apr 2022 10:57:49 +0800 Subject: [PATCH 42/46] [LoongArch64] amend some code for CR round3. --- src/coreclr/jit/codegencommon.cpp | 10 +- src/coreclr/jit/codegenloongarch64.cpp | 85 ++------ src/coreclr/jit/compiler.h | 6 + src/coreclr/jit/lclvars.cpp | 21 +- src/coreclr/jit/lower.cpp | 63 ++---- src/coreclr/jit/lsraloongarch64.cpp | 35 +--- src/coreclr/jit/morph.cpp | 211 ++++++++------------ src/coreclr/jit/register_arg_convention.cpp | 11 +- 8 files changed, 134 insertions(+), 308 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 4eab2fbd6cffd..46f429f81b4f2 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -4341,9 +4341,9 @@ void CodeGen::genEnregisterIncomingStackArgs() } } } -#else +#else // !TARGET_LOONGARCH64 GetEmitter()->emitIns_R_S(ins_Load(regType), emitTypeSize(regType), regNum, varNum, 0); -#endif // TARGET_LOONGARCH64 +#endif // !TARGET_LOONGARCH64 regSet.verifyRegUsed(regNum); #ifdef USING_SCOPE_INFO @@ -8142,6 +8142,8 @@ void CodeGen::genStructReturn(GenTree* treeNode) LclVarDsc* varDsc = compiler->lvaGetDesc(lclNode); assert(varDsc->lvIsMultiRegRet); #ifdef TARGET_LOONGARCH64 + // On LoongArch64, for a struct like "{ int, double }", "retTypeDesc" will be "{ TYP_INT, TYP_DOUBLE }", + // i. e. not include the padding for the first field, and so the general loop below won't work. var_types type = retTypeDesc.GetReturnRegType(0); regNumber toReg = retTypeDesc.GetABIReturnReg(0); GetEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), toReg, lclNode->GetLclNum(), 0); @@ -8154,7 +8156,7 @@ void CodeGen::genStructReturn(GenTree* treeNode) toReg = retTypeDesc.GetABIReturnReg(1); GetEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), toReg, lclNode->GetLclNum(), offset); } -#else +#else // !TARGET_LOONGARCH64 int offset = 0; for (unsigned i = 0; i < regCount; ++i) { @@ -8163,7 +8165,7 @@ void CodeGen::genStructReturn(GenTree* treeNode) GetEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), toReg, lclNode->GetLclNum(), offset); offset += genTypeSize(type); } -#endif +#endif // !TARGET_LOONGARCH64 } else { diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index e5b4be91ba713..40a19300095e9 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -1259,12 +1259,6 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() unsigned saveRegsPlusPSPSize = roundUp((UINT)genTotalFrameSize(), STACK_ALIGN) - compiler->compLclFrameSize + PSPSize; - if (compiler->info.compIsVarArgs) - { - // For varargs we always save all of the integer register arguments - // so that they are contiguous with the incoming stack arguments. - saveRegsPlusPSPSize += MAX_REG_ARG * REGSIZE_BYTES; - } unsigned saveRegsPlusPSPSizeAligned = roundUp(saveRegsPlusPSPSize, STACK_ALIGN); assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == 0); @@ -4853,8 +4847,7 @@ int CodeGenInterface::genTotalFrameSize() const assert(!IsUninitialized(compiler->compCalleeRegsPushed)); - int totalFrameSize = (compiler->info.compIsVarArgs ? MAX_REG_ARG * REGSIZE_BYTES : 0) + - compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize; + int totalFrameSize = compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize; assert(totalFrameSize > 0); return totalFrameSize; @@ -5502,7 +5495,7 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) case GT_LCL_FLD_ADDR: case GT_LCL_VAR_ADDR: - genCodeForLclAddr(treeNode); + genCodeForLclAddr(treeNode->AsLclVarCommon()); break; case GT_LCL_FLD: @@ -6852,20 +6845,20 @@ void CodeGen::genCodeForShift(GenTree* tree) // Arguments: // tree - the node. // -void CodeGen::genCodeForLclAddr(GenTree* tree) +void CodeGen::genCodeForLclAddr(GenTreeLclVarCommon* lclAddrNode) { - assert(tree->OperIs(GT_LCL_FLD_ADDR, GT_LCL_VAR_ADDR)); + assert(lclAddrNode->OperIs(GT_LCL_FLD_ADDR, GT_LCL_VAR_ADDR)); - var_types targetType = tree->TypeGet(); - regNumber targetReg = tree->GetRegNum(); + var_types targetType = lclAddrNode->TypeGet(); + emitAttr size = emitTypeSize(targetType); + regNumber targetReg = lclAddrNode->GetRegNum(); // Address of a local var. noway_assert((targetType == TYP_BYREF) || (targetType == TYP_I_IMPL)); - emitAttr size = emitTypeSize(targetType); + GetEmitter()->emitIns_R_S(INS_lea, size, targetReg, lclAddrNode->GetLclNum(), lclAddrNode->GetLclOffs()); - inst_RV_TT(INS_lea, targetReg, tree, 0, size); - genProduceReg(tree); + genProduceReg(lclAddrNode); } //------------------------------------------------------------------------ @@ -7758,8 +7751,7 @@ void CodeGen::genJmpMethod(GenTree* jmp) #endif // Next move any un-enregistered register arguments back to their register. - regMaskTP fixedIntArgMask = RBM_NONE; // tracks the int arg regs occupying fixed args in case of a vararg method. - unsigned firstArgVarNum = BAD_VAR_NUM; // varNum of the first argument in case of a vararg method. + unsigned firstArgVarNum = BAD_VAR_NUM; // varNum of the first argument in case of a vararg method. for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++) { varDsc = compiler->lvaTable + varNum; @@ -7821,7 +7813,6 @@ void CodeGen::genJmpMethod(GenTree* jmp) regSet.AddMaskVars(genRegMask(argReg)); gcInfo.gcMarkRegPtrVal(argReg, loadType); - // if (compiler->lvaIsMultiregStruct(varDsc, compiler->info.compIsVarArgs)) if (varDsc->GetOtherArgReg() < REG_STK) { // Restore the second register. @@ -7851,59 +7842,7 @@ void CodeGen::genJmpMethod(GenTree* jmp) if (compiler->info.compIsVarArgs) { - NYI("unimplemented on LOONGARCH64 yet"); - // In case of a jmp call to a vararg method ensure only integer registers are passed. - assert((genRegMask(argReg) & (RBM_ARG_REGS)) != RBM_NONE); - assert(!varDsc->lvIsHfaRegArg()); - - fixedIntArgMask |= genRegMask(argReg); - - if (compiler->lvaIsMultiregStruct(varDsc, compiler->info.compIsVarArgs)) - { - assert(argRegNext != REG_NA); - fixedIntArgMask |= genRegMask(argRegNext); - } - - if (argReg == REG_ARG_0) - { - assert(firstArgVarNum == BAD_VAR_NUM); - firstArgVarNum = varNum; - } - } - } - - // Jmp call to a vararg method - if the method has fewer than fixed arguments that can be max size of reg, - // load the remaining integer arg registers from the corresponding - // shadow stack slots. This is for the reason that we don't know the number and type - // of non-fixed params passed by the caller, therefore we have to assume the worst case - // of caller passing all integer arg regs that can be max size of reg. - // - // The caller could have passed gc-ref/byref type var args. Since these are var args - // the callee no way of knowing their gc-ness. Therefore, mark the region that loads - // remaining arg registers from shadow stack slots as non-gc interruptible. - if (fixedIntArgMask != RBM_NONE) - { - assert(compiler->info.compIsVarArgs); - assert(firstArgVarNum != BAD_VAR_NUM); - - regMaskTP remainingIntArgMask = RBM_ARG_REGS & ~fixedIntArgMask; - if (remainingIntArgMask != RBM_NONE) - { - GetEmitter()->emitDisableGC(); - for (int argNum = 0, argOffset = 0; argNum < MAX_REG_ARG; ++argNum) - { - regNumber argReg = intArgRegs[argNum]; - regMaskTP argRegMask = genRegMask(argReg); - - if ((remainingIntArgMask & argRegMask) != 0) - { - remainingIntArgMask &= ~argRegMask; - GetEmitter()->emitIns_R_S(INS_ld_d, EA_PTRSIZE, argReg, firstArgVarNum, argOffset); - } - - argOffset += REGSIZE_BYTES; - } - GetEmitter()->emitEnableGC(); + NYI_LOONGARCH64("genJmpMethod unsupports compIsVarArgs"); } } } @@ -9000,7 +8939,7 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe if (compiler->info.compIsVarArgs) { JITDUMP(" compIsVarArgs=true\n"); - NYI_LOONGARCH64("genPushCalleeSavedRegisters - compIsVarArgs"); + NYI_LOONGARCH64("genPushCalleeSavedRegisters unsupports compIsVarArgs"); } #ifdef DEBUG diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 2a0f894c8c1a8..8a1ab5809e679 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -1696,6 +1696,12 @@ struct fgArgTabEntry unsigned numRegs; // Count of number of registers that this argument uses. // Note that on ARM, if we have a double hfa, this reflects the number // of DOUBLE registers. +#ifdef TARGET_LOONGARCH64 + // For LoongArch64's ABI, the struct which has float field(s) and no more than two fields + // may be passed by float register(s). + // e.g `struct {int a; float b;}` passed by an integer register and a float register. + var_types structFloatFieldType[2]; +#endif #if defined(UNIX_AMD64_ABI) // Unix amd64 will split floating point types and integer types in structs diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 61fabffaff304..115cc7953f46e 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -925,12 +925,14 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un argRegTypeInStruct2 = (floatFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; } + assert((floatNum == 1) || (floatNum == 2)); + if (!canPassArgInRegisters) { - assert((floatNum == 1) || (floatNum == 2)); // `if ((floatFlags & STRUCT_HAS_FLOAT_FIELDS_MASK) != 0)` + // On LoongArch64, if there aren't any remaining floating-point registers to pass the argument, + // integer registers (if any) are used instead. canPassArgInRegisters = varDscInfo->canEnreg(argType, cSlotsToEnregister); - // On LoongArch64, there aren't even any remaining integer registers to pass the arguments. argRegTypeInStruct1 = TYP_UNKNOWN; argRegTypeInStruct2 = TYP_UNKNOWN; } @@ -1057,8 +1059,10 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un // it has to be split. But we reserved extra 8-bytes for the whole struct. varDsc->lvIsSplit = 1; varDsc->SetOtherArgReg(REG_STK); - varDscInfo->setAllRegArgUsed(arg1Type); + varDscInfo->setAllRegArgUsed(argRegTypeInStruct1); +#if FEATURE_FASTTAILCALL varDscInfo->stackArgSize += TARGET_POINTER_SIZE; +#endif } } else @@ -6314,13 +6318,6 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() #elif defined(TARGET_LOONGARCH64) int initialStkOffs = 0; - if (info.compIsVarArgs) - { - // For varargs we always save all of the integer register arguments - // so that they are contiguous with the incoming stack arguments. - initialStkOffs = MAX_REG_ARG * REGSIZE_BYTES; - stkOffs -= initialStkOffs; - } // Subtract off FP and RA. assert(compCalleeRegsPushed >= 2); @@ -6827,7 +6824,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() continue; } -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM64) if (info.compIsVarArgs && varDsc->GetArgReg() != theFixedRetBuffArgNum()) { // Stack offset to varargs (parameters) should point to home area which will be preallocated. @@ -7087,7 +7084,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() // and the pushed frame pointer register which for some strange reason isn't part of 'compCalleeRegsPushed'. int pushedCount = compCalleeRegsPushed; -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM64) if (info.compIsVarArgs) { pushedCount += MAX_REG_ARG; diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 1460715b788aa..61a655ddc2cf9 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -2833,7 +2833,6 @@ GenTree* Lowering::OptimizeConstCompare(GenTree* cmp) assert(cmp->gtGetOp2()->IsIntegralConst()); #if defined(TARGET_XARCH) || defined(TARGET_ARM64) - // TODO-LoongArch64: add optimize for LoongArch64. GenTree* op1 = cmp->gtGetOp1(); GenTreeIntCon* op2 = cmp->gtGetOp2()->AsIntCon(); ssize_t op2Value = op2->IconValue(); @@ -3190,7 +3189,7 @@ GenTree* Lowering::LowerCompare(GenTree* cmp) // GenTree* Lowering::LowerJTrue(GenTreeOp* jtrue) { -#ifdef TARGET_ARM64 +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) GenTree* relop = jtrue->gtGetOp1(); GenTree* relopOp2 = relop->AsOp()->gtGetOp2(); @@ -3199,6 +3198,14 @@ GenTree* Lowering::LowerJTrue(GenTreeOp* jtrue) bool useJCMP = false; GenTreeFlags flags = GTF_EMPTY; +#if defined(TARGET_LOONGARCH64) + if (relop->OperIs(GT_EQ, GT_NE)) + { + // Codegen will use beq or bne. + flags = relop->OperIs(GT_EQ) ? GTF_JCMP_EQ : GTF_EMPTY; + useJCMP = true; + } +#else // TARGET_ARM64 if (relop->OperIs(GT_EQ, GT_NE) && relopOp2->IsIntegralConst(0)) { // Codegen will use cbz or cbnz in codegen which do not affect the flag register @@ -3211,6 +3218,7 @@ GenTree* Lowering::LowerJTrue(GenTreeOp* jtrue) flags = GTF_JCMP_TST | (relop->OperIs(GT_TEST_EQ) ? GTF_JCMP_EQ : GTF_EMPTY); useJCMP = true; } +#endif // TARGET_ARM64 if (useJCMP) { @@ -3227,48 +3235,7 @@ GenTree* Lowering::LowerJTrue(GenTreeOp* jtrue) return nullptr; } } -#elif defined(TARGET_LOONGARCH64) - GenTree* relop = jtrue->gtGetOp1(); - GenTree* relopOp1 = relop->AsOp()->gtGetOp1(); - GenTree* relopOp2 = relop->AsOp()->gtGetOp2(); - - if (relop->gtNext == jtrue) - { - if (relopOp2->IsCnsIntOrI()) - { - if (relop->OperIs(GT_EQ, GT_NE)) - { - - // Codegen will use beq or bne in codegen. - GenTreeFlags flags = relop->OperIs(GT_EQ) ? GTF_JCMP_EQ : GTF_EMPTY; - - relop->SetOper(GT_JCMP); - relop->gtFlags &= ~(GTF_JCMP_TST | GTF_JCMP_EQ); - relop->gtFlags |= flags; - relop->gtType = TYP_VOID; - - relopOp2->SetContained(); - - BlockRange().Remove(jtrue); - - assert(relop->gtNext == nullptr); - return nullptr; - } - } - else if (relopOp1->IsCnsIntOrI()) - { - relopOp1->SetContained(); - } - } - else if (relopOp1->IsCnsIntOrI()) - { - relopOp1->SetContained(); - } - else if (relopOp2->IsCnsIntOrI()) - { - relopOp2->SetContained(); - } -#endif // TARGET_LOONGARCH64 +#endif // TARGET_ARM64 || TARGET_LOONGARCH64 ContainCheckJTrue(jtrue); @@ -3981,9 +3948,9 @@ void Lowering::LowerStoreSingleRegCallStruct(GenTreeBlk* store) { #if defined(TARGET_LOONGARCH64) if (varTypeIsFloating(call->TypeGet())) + { regType = call->TypeGet(); - assert(regType != TYP_UNDEF); - assert(regType != TYP_STRUCT); + } #endif store->ChangeType(regType); store->SetOper(GT_STOREIND); @@ -5735,7 +5702,7 @@ bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod) GenTree* firstNode = nullptr; GenTree* adjustedDividend = dividend; -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM64) // On ARM64 we will use a 32x32->64 bit multiply instead of a 64x64->64 one. bool widenToNativeIntForMul = (type != TYP_I_IMPL) && !simpleMul; #else @@ -5789,7 +5756,7 @@ bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod) } else { -#if defined(TARGET_ARM64) +#ifdef TARGET_ARM64 // 64-bit MUL is more expensive than UMULL on ARM64. genTreeOps mulOper = simpleMul ? GT_MUL_LONG : GT_MULHI; #else diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp index 52d7191e528dc..aee2f9791b898 100644 --- a/src/coreclr/jit/lsraloongarch64.cpp +++ b/src/coreclr/jit/lsraloongarch64.cpp @@ -163,6 +163,8 @@ int LinearScan::BuildNode(GenTree* tree) case GT_COMMA: case GT_QMARK: case GT_COLON: + case GT_CLS_VAR: + case GT_ADDR: srcCount = 0; assert(dstCount == 0); unreached(); @@ -404,18 +406,6 @@ int LinearScan::BuildNode(GenTree* tree) } break; - case GT_ADDR: - { - // For a GT_ADDR, the child node should not be evaluated into a register - GenTree* child = tree->gtGetOp1(); - assert(!isCandidateLocalRef(child)); - assert(child->isContained()); - assert(dstCount == 1); - srcCount = 0; - BuildDef(tree); - } - break; - case GT_BLK: // These should all be eliminated prior to Lowering. assert(!"Non-store block node in Lowering"); @@ -628,20 +618,6 @@ int LinearScan::BuildNode(GenTree* tree) BuildDef(tree, RBM_EXCEPTION_OBJECT); break; - case GT_CLS_VAR: - srcCount = 0; - // GT_CLS_VAR, by the time we reach the backend, must always - // be a pure use. - // It will produce a result of the type of the - // node, and use an internal register for the address. - - assert(dstCount == 1); - assert((tree->gtFlags & (GTF_VAR_DEF | GTF_VAR_USEASG)) == 0); - buildInternalIntRegisterDefForNode(tree); - buildInternalRegisterUses(); - BuildDef(tree); - break; - case GT_INDEX_ADDR: assert(dstCount == 1); srcCount = BuildBinaryUses(tree->AsOp()); @@ -1339,13 +1315,6 @@ int LinearScan::BuildCast(GenTreeCast* cast) const var_types srcType = genActualType(src->TypeGet()); const var_types castType = cast->gtCastType; - // Overflow checking cast from TYP_LONG to TYP_INT requires a temporary register to - // store the min and max immediate values that cannot be encoded in the CMP instruction. - if (cast->gtOverflow() && varTypeIsLong(srcType) && !cast->IsUnsigned() && (castType == TYP_INT)) - { - buildInternalIntRegisterDefForNode(cast); - } - int srcCount = BuildOperandUses(src); buildInternalRegisterUses(); BuildDef(cast); diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 4b9911ca03559..cc6d4371f4ee2 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -932,8 +932,6 @@ fgArgTabEntry* fgArgInfo::AddRegArg(unsigned argNum, AddRegArg(argNum, node, use, regNum, numRegs, byteSize, byteAlignment, isStruct, false, isVararg); assert(curArgTabEntry != nullptr); - curArgTabEntry->isStruct = isStruct; // is this a struct arg - INDEBUG(curArgTabEntry->checkIsStruct();) assert(numRegs <= 2); if (numRegs == 2) @@ -2932,7 +2930,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) #endif } -#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARM64) assert(!callIsVararg || !isHfaArg); passUsingFloatRegs = !callIsVararg && (isHfaArg || varTypeUsesFloatReg(argx)); @@ -2945,13 +2943,16 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) passUsingFloatRegs = false; +#elif defined(TARGET_LOONGARCH64) + + assert(!callIsVararg && !isHfaArg); + passUsingFloatRegs = varTypeUsesFloatReg(argx); + DWORD floatFieldFlags = STRUCT_NO_FLOAT_FIELD; + #else #error Unsupported or unset target architecture #endif // TARGET* -#if defined(TARGET_LOONGARCH64) - DWORD floatFieldFlags = 0; -#endif bool isBackFilled = false; unsigned nextFltArgRegNum = fltArgRegNum; // This is the next floating-point argument register number to use var_types structBaseType = TYP_STRUCT; @@ -3086,11 +3087,11 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) passUsingFloatRegs = (floatFieldFlags & STRUCT_HAS_FLOAT_FIELDS_MASK) ? true : false; compFloatingPointUsed |= passUsingFloatRegs; - if (floatFieldFlags & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_TWO)) + if ((floatFieldFlags & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_TWO)) != 0) { size = 1; } - else if (floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_TWO) + else if ((floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_TWO) != 0) { size = 2; } @@ -3269,18 +3270,18 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) } else if (passUsingFloatRegs) { - if (floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_TWO) + if ((floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_TWO) != 0) { nextOtherRegNum = genMapFloatRegArgNumToRegNum(nextFltArgRegNum + 1); } - else if (floatFieldFlags & STRUCT_FLOAT_FIELD_SECOND) + else if ((floatFieldFlags & STRUCT_FLOAT_FIELD_SECOND) != 0) { assert(size == 1); size = 2; passUsingFloatRegs = false; nextOtherRegNum = genMapFloatRegArgNumToRegNum(nextFltArgRegNum); } - else if (floatFieldFlags & STRUCT_FLOAT_FIELD_FIRST) + else if ((floatFieldFlags & STRUCT_FLOAT_FIELD_FIRST) != 0) { assert(size == 1); size = 2; @@ -3501,15 +3502,34 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) { structBaseType = structSize == 8 ? TYP_DOUBLE : TYP_FLOAT; fltArgRegNum += 1; + newArgEntry->structFloatFieldType[0] = structBaseType; } - else if (floatFieldFlags & (STRUCT_FLOAT_FIELD_FIRST | STRUCT_FLOAT_FIELD_SECOND)) + else if ((floatFieldFlags & (STRUCT_FLOAT_FIELD_FIRST | STRUCT_FLOAT_FIELD_SECOND)) != 0) { fltArgRegNum += 1; intArgRegNum += 1; + if ((floatFieldFlags & STRUCT_FLOAT_FIELD_FIRST) != 0) + { + newArgEntry->structFloatFieldType[0] = + (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; + newArgEntry->structFloatFieldType[1] = + (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_LONG : TYP_INT; + } + else + { + newArgEntry->structFloatFieldType[0] = + (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_LONG : TYP_INT; + newArgEntry->structFloatFieldType[1] = + (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; + } } - else if (floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_TWO) + else if ((floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_TWO) != 0) { fltArgRegNum += 2; + newArgEntry->structFloatFieldType[0] = + (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; + newArgEntry->structFloatFieldType[1] = + (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; } } #else @@ -4549,19 +4569,34 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry { assert(structSize <= MAX_ARG_REG_COUNT * TARGET_POINTER_SIZE); BYTE gcPtrs[MAX_ARG_REG_COUNT]; - elemCount = roundUp(structSize, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE; info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]); + elemCount = roundUp(structSize, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE; +#ifdef TARGET_LOONGARCH64 + // For LoongArch64's ABI, the struct which size is TARGET_POINTER_SIZE + // may be passed by two registers. + // e.g `struct {int a; float b;}` passed by an integer register and a float register. + if (fgEntryPtr->numRegs == 2) + { + elemCount = 2; + } +#endif for (unsigned inx = 0; inx < elemCount; inx++) { -#ifdef UNIX_AMD64_ABI +#if defined(UNIX_AMD64_ABI) if (gcPtrs[inx] == TYPE_GC_NONE) { type[inx] = GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc.eightByteClassifications[inx], fgEntryPtr->structDesc.eightByteSizes[inx]); } else -#endif // UNIX_AMD64_ABI +#elif defined(TARGET_LOONGARCH64) + if (fgEntryPtr->structFloatFieldType[inx] != TYP_UNDEF) + { + type[inx] = fgEntryPtr->structFloatFieldType[inx]; + } + else +#endif // TARGET_LOONGARCH64 { type[inx] = getJitGCType(gcPtrs[inx]); } @@ -4575,7 +4610,12 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry // a GT_LCL_VAR or a GT_LCL_FLD which is properly padded and // lives in the stack frame or will be a promoted field. // +#ifndef TARGET_LOONGARCH64 + // For LoongArch64's ABI, the struct which size is TARGET_POINTER_SIZE + // may be passed by two registers. + // e.g `struct {int a; float b;}` passed by an integer register and a float register. structSize = elemCount * TARGET_POINTER_SIZE; +#endif } else // we must have a GT_OBJ { @@ -4879,65 +4919,23 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry // unsigned offset = baseOffset; newArg = new (this, GT_FIELD_LIST) GenTreeFieldList(); -#if defined(TARGET_LOONGARCH64) - uint32_t floatFieldFlags = info.compCompHnd->getLoongArch64PassStructInRegisterFlags(objClass); - if (floatFieldFlags & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_ONE)) + for (unsigned inx = 0; inx < elemCount; inx++) { - assert((floatFieldFlags & STRUCT_HAS_FLOAT_FIELDS_MASK) > 1); - var_types tmp_type_1; - var_types tmp_type_2; - - compFloatingPointUsed = true; - if (floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_TWO) - { - tmp_type_1 = (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; - tmp_type_2 = (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; - } - else if (floatFieldFlags & STRUCT_FLOAT_FIELD_FIRST) - { - tmp_type_1 = (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; - tmp_type_2 = (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? type[1] : TYP_INT; - } - else if (floatFieldFlags & STRUCT_FLOAT_FIELD_SECOND) + GenTree* nextLclFld = gtNewLclFldNode(varNum, type[inx], offset); + newArg->AddField(this, nextLclFld, offset, type[inx]); +#ifdef TARGET_LOONGARCH64 + if (structSize > TARGET_POINTER_SIZE) { - tmp_type_1 = (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? type[0] : TYP_INT; - tmp_type_2 = (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; + // For LoongArch64's ABI, maybe there is a padding. + // e.g. `struct {float a; long b;}` + offset += TARGET_POINTER_SIZE; } else +#endif { - NYI_LOONGARCH64("fgMorphMultiregStructArg -- GT_LCL_FLD,GT_LCL_VAR"); - tmp_type_1 = TYP_UNDEF; - tmp_type_2 = TYP_UNDEF; - } - elemSize = (floatFieldFlags & STRUCT_HAS_8BYTES_FIELDS_MASK) ? 8 : 4; - - GenTree* nextLclFld = gtNewLclFldNode(varNum, tmp_type_1, offset); - newArg->AddField(this, nextLclFld, offset, tmp_type_1); - offset += elemSize; - nextLclFld = gtNewLclFldNode(varNum, tmp_type_2, offset); - newArg->AddField(this, nextLclFld, offset, tmp_type_2); - } - else - { - GenTree* nextLclFld = gtNewLclFldNode(varNum, type[0], offset); - newArg->AddField(this, nextLclFld, offset, type[0]); - - if (elemCount > 1) - { - assert(elemCount == 2); - elemSize = genTypeSize(type[1]); - nextLclFld = gtNewLclFldNode(varNum, type[1], offset + elemSize); - newArg->AddField(this, nextLclFld, offset + elemSize, type[1]); + offset += genTypeSize(type[inx]); } } -#else - for (unsigned inx = 0; inx < elemCount; inx++) - { - GenTree* nextLclFld = gtNewLclFldNode(varNum, type[inx], offset); - newArg->AddField(this, nextLclFld, offset, type[inx]); - offset += genTypeSize(type[inx]); - } -#endif } // Are we passing a GT_OBJ struct? // @@ -4967,78 +4965,35 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry newArg = new (this, GT_FIELD_LIST) GenTreeFieldList(); unsigned offset = 0; -#if defined(TARGET_LOONGARCH64) - uint32_t floatFieldFlags = info.compCompHnd->getLoongArch64PassStructInRegisterFlags(objClass); - if (floatFieldFlags & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_ONE)) + for (unsigned inx = 0; inx < elemCount; inx++) { - assert((floatFieldFlags & STRUCT_HAS_FLOAT_FIELDS_MASK) > 1); - var_types tmp_type_1; - var_types tmp_type_2; - - compFloatingPointUsed = true; - if (floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_TWO) - { - tmp_type_1 = (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; - tmp_type_2 = (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; - } - else if (floatFieldFlags & STRUCT_FLOAT_FIELD_SECOND) - { - tmp_type_1 = (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; - tmp_type_2 = (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? type[1] : TYP_INT; - } - else if (floatFieldFlags & STRUCT_FLOAT_FIELD_SECOND) + GenTree* curAddr = baseAddr; + if (offset != 0) { - tmp_type_1 = (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? type[0] : TYP_INT; - tmp_type_2 = (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; + GenTree* baseAddrDup = gtCloneExpr(baseAddr); + noway_assert(baseAddrDup != nullptr); + curAddr = gtNewOperNode(GT_ADD, addrType, baseAddrDup, gtNewIconNode(offset, TYP_I_IMPL)); } else { - NYI_LOONGARCH64("fgMorphMultiregStructArg -- GT_OBJ struct"); - tmp_type_1 = TYP_UNDEF; - tmp_type_2 = TYP_UNDEF; + curAddr = baseAddr; } - elemSize = (floatFieldFlags & STRUCT_HAS_8BYTES_FIELDS_MASK) ? 8 : 4; + GenTree* curItem = gtNewIndir(type[inx], curAddr); - GenTree* curItem = gtNewIndir(tmp_type_1, baseAddr); // For safety all GT_IND should have at least GT_GLOB_REF set. curItem->gtFlags |= GTF_GLOB_REF; - newArg = new (this, GT_FIELD_LIST) GenTreeFieldList(); - newArg->AddField(this, curItem, 0, tmp_type_1); - - // GenTree* curAddr = baseAddr; - baseAddr = gtCloneExpr(baseAddr); - noway_assert(baseAddr != nullptr); - baseAddr = gtNewOperNode(GT_ADD, addrType, baseAddr, gtNewIconNode(elemSize, TYP_I_IMPL)); - - curItem = gtNewIndir(tmp_type_2, baseAddr); - // For safety all GT_IND should have at least GT_GLOB_REF set. - curItem->gtFlags |= GTF_GLOB_REF; - - newArg->AddField(this, curItem, elemSize, tmp_type_2); - } - else + newArg->AddField(this, curItem, offset, type[inx]); +#ifdef TARGET_LOONGARCH64 + if (structSize > TARGET_POINTER_SIZE) + { + // For LoongArch64's ABI, maybe there is a padding. + // e.g. `struct {float a; long b;}` + offset += TARGET_POINTER_SIZE; + } + else #endif - { - for (unsigned inx = 0; inx < elemCount; inx++) { - GenTree* curAddr = baseAddr; - if (offset != 0) - { - GenTree* baseAddrDup = gtCloneExpr(baseAddr); - noway_assert(baseAddrDup != nullptr); - curAddr = gtNewOperNode(GT_ADD, addrType, baseAddrDup, gtNewIconNode(offset, TYP_I_IMPL)); - } - else - { - curAddr = baseAddr; - } - GenTree* curItem = gtNewIndir(type[inx], curAddr); - - // For safety all GT_IND should have at least GT_GLOB_REF set. - curItem->gtFlags |= GTF_GLOB_REF; - - newArg->AddField(this, curItem, offset, type[inx]); offset += genTypeSize(type[inx]); } } diff --git a/src/coreclr/jit/register_arg_convention.cpp b/src/coreclr/jit/register_arg_convention.cpp index 1b5d1839b5e4c..bcc3dbc87e471 100644 --- a/src/coreclr/jit/register_arg_convention.cpp +++ b/src/coreclr/jit/register_arg_convention.cpp @@ -43,16 +43,7 @@ unsigned InitVarDscInfo::allocRegArg(var_types type, unsigned numRegs /* = 1 */) nextReg(TYP_INT, numRegs); nextReg(TYP_FLOAT, numRegs); #elif defined(TARGET_LOONGARCH64) - // LA-ABI64. - if (numRegs > MAX_PASS_MULTIREG_BYTES / TARGET_POINTER_SIZE) - { - assert(varTypeIsStruct(type)); - nextReg(TYP_INT, 1); // TYP_BYREF - } - else - { - nextReg(type, numRegs); - } + nextReg(type, numRegs); #else // We didn't back-fill a register (on ARM), so skip the number of registers that we allocated. nextReg(type, numRegs); From c0bbc8a0edc7822c7170c50a4fe05d4ada6ae646 Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Fri, 1 Apr 2022 17:09:08 +0800 Subject: [PATCH 43/46] [LoongArch64] amend some code for CR round4. --- src/coreclr/jit/lclvars.cpp | 18 ++++++++++++++---- src/coreclr/jit/lower.cpp | 4 ++-- src/coreclr/jit/lsraloongarch64.cpp | 9 ++------- src/coreclr/jit/morph.cpp | 11 ++++++----- src/coreclr/jit/register_arg_convention.cpp | 2 -- 5 files changed, 24 insertions(+), 20 deletions(-) diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 115cc7953f46e..7cb32eac9f9ff 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -931,10 +931,22 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un { // On LoongArch64, if there aren't any remaining floating-point registers to pass the argument, // integer registers (if any) are used instead. + varDscInfo->setAllRegArgUsed(TYP_DOUBLE); canPassArgInRegisters = varDscInfo->canEnreg(argType, cSlotsToEnregister); argRegTypeInStruct1 = TYP_UNKNOWN; argRegTypeInStruct2 = TYP_UNKNOWN; + + if (cSlotsToEnregister == 2) + { + if (!canPassArgInRegisters && varDscInfo->canEnreg(TYP_I_IMPL, 1)) + { + // Here a struct-arg which needs two registers but only one integer register available, + // it has to be split. + argRegTypeInStruct1 = TYP_I_IMPL; + canPassArgInRegisters = true; + } + } } } else @@ -6317,8 +6329,6 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() #elif defined(TARGET_LOONGARCH64) - int initialStkOffs = 0; - // Subtract off FP and RA. assert(compCalleeRegsPushed >= 2); stkOffs -= (compCalleeRegsPushed - 2) * REGSIZE_BYTES; @@ -6824,7 +6834,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() continue; } -#if defined(TARGET_ARM64) +#ifdef TARGET_ARM64 if (info.compIsVarArgs && varDsc->GetArgReg() != theFixedRetBuffArgNum()) { // Stack offset to varargs (parameters) should point to home area which will be preallocated. @@ -7084,7 +7094,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() // and the pushed frame pointer register which for some strange reason isn't part of 'compCalleeRegsPushed'. int pushedCount = compCalleeRegsPushed; -#if defined(TARGET_ARM64) +#ifdef TARGET_ARM64 if (info.compIsVarArgs) { pushedCount += MAX_REG_ARG; diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 61a655ddc2cf9..9e96abc04d727 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -5494,7 +5494,7 @@ GenTree* Lowering::LowerAdd(GenTreeOp* node) return next; } -#if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64) +#ifdef TARGET_XARCH if (BlockRange().TryGetUse(node, &use)) { // If this is a child of an indir, let the parent handle it. @@ -5505,7 +5505,7 @@ GenTree* Lowering::LowerAdd(GenTreeOp* node) TryCreateAddrMode(node, false, parent); } } -#endif // !TARGET_ARMARCH && !TARGET_LOONGARCH64 +#endif // TARGET_XARCH } if (node->OperIs(GT_ADD)) diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp index aee2f9791b898..e51e1d8ca0e56 100644 --- a/src/coreclr/jit/lsraloongarch64.cpp +++ b/src/coreclr/jit/lsraloongarch64.cpp @@ -1310,14 +1310,9 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) // int LinearScan::BuildCast(GenTreeCast* cast) { - GenTree* src = cast->gtGetOp1(); - - const var_types srcType = genActualType(src->TypeGet()); - const var_types castType = cast->gtCastType; - - int srcCount = BuildOperandUses(src); - buildInternalRegisterUses(); + int srcCount = BuildOperandUses(cast->CastOp()); BuildDef(cast); + return srcCount; } diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index cc6d4371f4ee2..d1234696f77af 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -4591,7 +4591,8 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry } else #elif defined(TARGET_LOONGARCH64) - if (fgEntryPtr->structFloatFieldType[inx] != TYP_UNDEF) + if (varTypeIsFloating(fgEntryPtr->structFloatFieldType[inx]) || + (genTypeSize(fgEntryPtr->structFloatFieldType[inx]) == 4)) { type[inx] = fgEntryPtr->structFloatFieldType[inx]; } @@ -4606,10 +4607,10 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry if ((argValue->OperGet() == GT_LCL_FLD) || (argValue->OperGet() == GT_LCL_VAR)) { elemSize = TARGET_POINTER_SIZE; - // We can safely widen this to aligned bytes since we are loading from - // a GT_LCL_VAR or a GT_LCL_FLD which is properly padded and - // lives in the stack frame or will be a promoted field. - // +// We can safely widen this to aligned bytes since we are loading from +// a GT_LCL_VAR or a GT_LCL_FLD which is properly padded and +// lives in the stack frame or will be a promoted field. +// #ifndef TARGET_LOONGARCH64 // For LoongArch64's ABI, the struct which size is TARGET_POINTER_SIZE // may be passed by two registers. diff --git a/src/coreclr/jit/register_arg_convention.cpp b/src/coreclr/jit/register_arg_convention.cpp index bcc3dbc87e471..a90e61c3a59fd 100644 --- a/src/coreclr/jit/register_arg_convention.cpp +++ b/src/coreclr/jit/register_arg_convention.cpp @@ -42,8 +42,6 @@ unsigned InitVarDscInfo::allocRegArg(var_types type, unsigned numRegs /* = 1 */) // For System V the reg type counters should be independent. nextReg(TYP_INT, numRegs); nextReg(TYP_FLOAT, numRegs); -#elif defined(TARGET_LOONGARCH64) - nextReg(type, numRegs); #else // We didn't back-fill a register (on ARM), so skip the number of registers that we allocated. nextReg(type, numRegs); From d57ddb537cd0ee1a04d112724296921679bb3aac Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Fri, 1 Apr 2022 21:35:22 +0800 Subject: [PATCH 44/46] [LoongArch64] amend some code for CR round5. --- src/coreclr/jit/codegenloongarch64.cpp | 79 -------------------------- src/coreclr/jit/morph.cpp | 36 +++++++----- 2 files changed, 23 insertions(+), 92 deletions(-) diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index 40a19300095e9..15d4f78de4704 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -4066,85 +4066,6 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree) emit->emitIns_R_R(INS_mov, EA_PTRSIZE, targetReg, REG_R0); emit->emitIns_R_I(INS_movcf2gr, EA_PTRSIZE, targetReg, 1 /*cc*/); } - else if (op1->isContainedIntOrIImmed() && op2->isContainedIntOrIImmed()) - { - ssize_t imm1 = op1->AsIntCon()->gtIconVal; - ssize_t imm2 = op2->AsIntCon()->gtIconVal; - - assert(tree->OperIs(GT_LT, GT_LE, GT_EQ, GT_NE, GT_GT, GT_GE)); - - bool IsUnsigned = (tree->gtFlags & GTF_UNSIGNED) != 0; - instruction ins = INS_beqz; - - switch (cmpSize) - { - case EA_4BYTE: - { - imm1 = static_cast(imm1); - imm2 = static_cast(imm2); - } - break; - case EA_8BYTE: - break; - case EA_1BYTE: - { - imm1 = static_cast(imm1); - imm2 = static_cast(imm2); - } - break; - // case EA_2BYTE: - // imm = static_cast(imm); - // break; - default: - assert(!"Unexpected type in jumpCompare."); - } - - switch (tree->OperGet()) - { - case GT_LT: - if (((!IsUnsigned) && (imm1 < imm2)) || ((IsUnsigned) && ((unsigned)imm1 < (unsigned)imm2))) - { - ins = INS_b; - } - break; - case GT_LE: - if (((!IsUnsigned) && (imm1 <= imm2)) || ((IsUnsigned) && ((unsigned)imm1 <= (unsigned)imm2))) - { - ins = INS_b; - } - break; - case GT_EQ: - if (imm1 == imm2) - { - ins = INS_b; - } - break; - case GT_NE: - if (imm1 != imm2) - { - ins = INS_b; - } - break; - case GT_GT: - if (((!IsUnsigned) && (imm1 > imm2)) || ((IsUnsigned) && ((unsigned)imm1 > (unsigned)imm2))) - { - ins = INS_b; - } - break; - case GT_GE: - if (((!IsUnsigned) && (imm1 >= imm2)) || ((IsUnsigned) && ((unsigned)imm1 >= (unsigned)imm2))) - { - ins = INS_b; - } - break; - default: - break; - } - - assert(ins != INS_invalid); - jtree->gtOp2 = (GenTree*)REG_SP; - jtree->SetRegNum((regNumber)ins); - } else { if (op1->isContainedIntOrIImmed()) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index d1234696f77af..2b9f7ebf80c8c 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -2024,7 +2024,12 @@ void fgArgInfo::EvalArgsToTemps() setupArg = compiler->fgMorphCopyBlock(setupArg); #if defined(TARGET_ARMARCH) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) #if defined(TARGET_LOONGARCH64) - // For LoongArch64, the struct {float a; float b;} passed by float-registers. + // On LoongArch64, "getPrimitiveTypeForStruct" will incorrectly return "TYP_LONG" + // for "struct { float, float }", and retyping to a primitive here will cause the + // multi-reg morphing to not kick in (the struct in question needs to be passed in + // two FP registers). + // TODO-LoongArch64: fix "getPrimitiveTypeForStruct" or use the ABI information in + // the arg entry instead of calling it here. if ((lclVarType == TYP_STRUCT) && (curArgTabEntry->numRegs == 1)) #else if (lclVarType == TYP_STRUCT) @@ -3087,6 +3092,17 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) passUsingFloatRegs = (floatFieldFlags & STRUCT_HAS_FLOAT_FIELDS_MASK) ? true : false; compFloatingPointUsed |= passUsingFloatRegs; + if ((floatFieldFlags & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_ONE)) != 0) + { + // On LoongArch64, "getPrimitiveTypeForStruct" will incorrectly return "TYP_LONG" + // for "struct { float, float }", and retyping to a primitive here will cause the + // multi-reg morphing to not kick in (the struct in question needs to be passed in + // two FP registers). Here is just keep "structBaseType" as "TYP_STRUCT". + // TODO-LoongArch64: fix "getPrimitiveTypeForStruct" or use the ABI information in + // the arg entry instead of calling it here. + structBaseType = TYP_STRUCT; + } + if ((floatFieldFlags & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_TWO)) != 0) { size = 1; @@ -3615,14 +3631,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) if (newArgEntry->isStruct) { newArgEntry->passedByRef = passStructByRef; -#if defined(TARGET_LOONGARCH64) - newArgEntry->argType = ((floatFieldFlags & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_ONE)) || - (structBaseType == TYP_UNKNOWN)) - ? argx->TypeGet() - : structBaseType; -#else - newArgEntry->argType = (structBaseType == TYP_UNKNOWN) ? argx->TypeGet() : structBaseType; -#endif + newArgEntry->argType = (structBaseType == TYP_UNKNOWN) ? argx->TypeGet() : structBaseType; } else { @@ -4607,10 +4616,11 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry if ((argValue->OperGet() == GT_LCL_FLD) || (argValue->OperGet() == GT_LCL_VAR)) { elemSize = TARGET_POINTER_SIZE; -// We can safely widen this to aligned bytes since we are loading from -// a GT_LCL_VAR or a GT_LCL_FLD which is properly padded and -// lives in the stack frame or will be a promoted field. -// + // We can safely widen this to aligned bytes since we are loading from + // a GT_LCL_VAR or a GT_LCL_FLD which is properly padded and + // lives in the stack frame or will be a promoted field. + CLANG_FORMAT_COMMENT_ANCHOR; + #ifndef TARGET_LOONGARCH64 // For LoongArch64's ABI, the struct which size is TARGET_POINTER_SIZE // may be passed by two registers. From ae3fbc02d2406006749b5c4e9c5c1e0b6bf9fa35 Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Sun, 3 Apr 2022 01:08:54 +0800 Subject: [PATCH 45/46] [LoongArch64] amend some code after refacting. --- src/coreclr/jit/codegenloongarch64.cpp | 124 ++++--------------------- src/coreclr/jit/emitloongarch64.cpp | 5 +- src/coreclr/jit/instr.cpp | 14 ++- src/coreclr/jit/lsraloongarch64.cpp | 6 ++ 4 files changed, 40 insertions(+), 109 deletions(-) diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index 15d4f78de4704..9f74cef09f096 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -4100,23 +4100,21 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree) { ssize_t imm = op2->AsIntCon()->gtIconVal; + switch (cmpSize) { - switch (cmpSize) - { - case EA_4BYTE: - imm = static_cast(imm); - break; - case EA_8BYTE: - break; - case EA_1BYTE: - imm = static_cast(imm); - break; - // case EA_2BYTE: - // imm = static_cast(imm); - // break; - default: - assert(!"Unexpected type in jumpTrue(imm)."); - } + case EA_4BYTE: + imm = static_cast(imm); + break; + case EA_8BYTE: + break; + case EA_1BYTE: + imm = static_cast(imm); + break; + // case EA_2BYTE: + // imm = static_cast(imm); + // break; + default: + assert(!"Unexpected type in jumpTrue(imm)."); } if (tree->OperIs(GT_LT)) @@ -4222,8 +4220,6 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree) emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, targetReg, 1); } } - - genProduceReg(tree); } else { @@ -4274,9 +4270,8 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree) emit->emitIns_R_R_R(INS_xor, EA_PTRSIZE, targetReg, regOp1, regOp2); emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, targetReg, 1); } - - genProduceReg(tree); } + genProduceReg(tree); } } @@ -4399,87 +4394,6 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue) jtrue->SetRegNum((regNumber)ins); } } - else if (op1->isContainedIntOrIImmed() && op2->isContainedIntOrIImmed()) - { - ssize_t imm1 = op1->AsIntCon()->gtIconVal; - ssize_t imm2 = op2->AsIntCon()->gtIconVal; - - assert(tree->OperIs(GT_LT, GT_LE, GT_EQ, GT_NE, GT_GT, GT_GE)); - - bool IsUnsigned = (tree->gtFlags & GTF_UNSIGNED) != 0; - - switch (cmpSize) - { - case EA_4BYTE: - { - imm1 = static_cast(imm1); - imm2 = static_cast(imm2); - } - break; - case EA_8BYTE: - break; - case EA_1BYTE: - { - imm1 = static_cast(imm1); - imm2 = static_cast(imm2); - } - break; - - default: - assert(!"Unexpected type in jumpTrue."); - } - switch (tree->OperGet()) - { - case GT_LT: - if (((!IsUnsigned) && (imm1 < imm2)) || ((IsUnsigned) && ((unsigned)imm1 < (unsigned)imm2))) - { - ins = INS_b; - } - break; - case GT_LE: - if (((!IsUnsigned) && (imm1 <= imm2)) || ((IsUnsigned) && ((unsigned)imm1 <= (unsigned)imm2))) - { - ins = INS_b; - } - break; - case GT_EQ: - if (imm1 == imm2) - { - ins = INS_b; - } - break; - case GT_NE: - if (imm1 != imm2) - { - ins = INS_b; - } - break; - case GT_GT: - if (((!IsUnsigned) && (imm1 > imm2)) || ((IsUnsigned) && ((unsigned)imm1 > (unsigned)imm2))) - { - ins = INS_b; - } - break; - case GT_GE: - if (((!IsUnsigned) && (imm1 >= imm2)) || ((IsUnsigned) && ((unsigned)imm1 >= (unsigned)imm2))) - { - ins = INS_b; - } - break; - default: - break; - } - - if (IsEq && (ins != INS_invalid)) - { - emit->emitIns_J(ins, compiler->compCurBB->bbJumpDest, 0); // 5-bits; - } - else if (ins != INS_invalid) - { - jtrue->gtOp2 = (GenTree*)(uint64_t)SaveCcResultReg; - jtrue->SetRegNum((regNumber)ins); - } - } else { if (op1->isContainedIntOrIImmed()) @@ -7229,8 +7143,8 @@ void CodeGen::genCall(GenTreeCall* call) assert(putArgRegNode->gtOper == GT_PUTARG_REG); genConsumeReg(putArgRegNode); - inst_Mov_Extend(putArgRegNode->TypeGet(), /* srcInReg */ true, argReg, putArgRegNode->GetRegNum(), - /* canSkip */ true, emitActualTypeSize(TYP_I_IMPL)); + var_types dstType = emitter::isFloatReg(argReg) ? TYP_DOUBLE : TYP_I_IMPL; + inst_Mov(dstType, argReg, putArgRegNode->GetRegNum(), /* canSkip */ true); argReg = genRegArgNext(argReg); } @@ -7243,8 +7157,8 @@ void CodeGen::genCall(GenTreeCall* call) { regNumber argReg = curArgTabEntry->GetRegNum(); genConsumeReg(argNode); - inst_Mov_Extend(argNode->TypeGet(), /* srcInReg */ true, argReg, argNode->GetRegNum(), /* canSkip */ true, - emitActualTypeSize(TYP_I_IMPL)); + var_types dstType = emitter::isFloatReg(argReg) ? TYP_DOUBLE : TYP_I_IMPL; + inst_Mov(dstType, argReg, argNode->GetRegNum(), /* canSkip */ true); } } diff --git a/src/coreclr/jit/emitloongarch64.cpp b/src/coreclr/jit/emitloongarch64.cpp index 0905579406548..1cdd8b7fd8b89 100644 --- a/src/coreclr/jit/emitloongarch64.cpp +++ b/src/coreclr/jit/emitloongarch64.cpp @@ -2534,6 +2534,7 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t code |= (code_t)id->idReg4(); code |= (code_t)id->idReg3() << 5; // the offset default is 0; + *(code_t*)dst = code; } else if (id->idIsReloc()) { @@ -2602,6 +2603,7 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t code |= (code_t)reg2; code |= (code_t)REG_T2 << 5; // the offset default is 0; + *(code_t*)dst = code; } dst += 4; @@ -3290,7 +3292,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) code = emitInsCode(INS_lu32i_d); code |= (code_t)reg1; - code |= (code_t)(imm >> 32) << 5; + code |= (code_t)((imm >> 32) & 0xfffff) << 5; *(code_t*)dst = code; dst += 4; @@ -3751,7 +3753,6 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) // case INS_OPTS_NONE: default: - // assert(id->idGCref() == GCT_NONE); *(code_t*)dst = id->idAddr()->iiaGetInstrEncode(); dst += 4; dst2 = dst; diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index b545df3649765..67ae437f03b75 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -447,15 +447,25 @@ void CodeGen::inst_Mov(var_types dstType, if (isFloatRegType(dstType) != genIsValidFloatReg(dstReg)) { if (dstType == TYP_FLOAT) + { dstType = TYP_INT; + } else if (dstType == TYP_DOUBLE) + { dstType = TYP_LONG; + } else if (dstType == TYP_INT) + { dstType = TYP_FLOAT; + } else if (dstType == TYP_LONG) + { dstType = TYP_DOUBLE; + } else - assert(!"unimplemented on LOONGARCH yet"); + { + NYI_LOONGARCH64("CodeGen::inst_Mov dstType"); + } } #endif instruction ins = ins_Copy(srcReg, dstType); @@ -1640,7 +1650,7 @@ instruction CodeGen::ins_Copy(regNumber srcReg, var_types dstType) return INS_vmov_f2i; } #elif defined(TARGET_LOONGARCH64) - // No SIMD support yet. + // TODO-LoongArch64-CQ: supporting SIMD. assert(!varTypeIsSIMD(dstType)); if (dstIsFloatReg) { diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp index e51e1d8ca0e56..ca0efc8bea169 100644 --- a/src/coreclr/jit/lsraloongarch64.cpp +++ b/src/coreclr/jit/lsraloongarch64.cpp @@ -355,6 +355,12 @@ int LinearScan::BuildNode(GenTree* tree) case GT_GE: case GT_GT: case GT_JCMP: + if (!varTypeIsFloating(tree)) + { + buildInternalIntRegisterDefForNode(tree); + buildInternalIntRegisterDefForNode(tree); + buildInternalRegisterUses(); + } srcCount = BuildCmp(tree); break; From 4b8a5968f107508d15013c4551ddd3fc9f1ede19 Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Fri, 8 Apr 2022 10:24:08 +0800 Subject: [PATCH 46/46] [LoongArch64] amend the compare and fix the error when running hello-world within debug-mode. --- src/coreclr/jit/codegencommon.cpp | 4 ++-- src/coreclr/jit/codegenloongarch64.cpp | 30 +++++++++++--------------- src/coreclr/jit/emit.cpp | 6 +++++- src/coreclr/jit/emitloongarch64.cpp | 2 +- src/coreclr/jit/lsraloongarch64.cpp | 12 +++++------ src/coreclr/jit/morph.cpp | 5 +++++ 6 files changed, 31 insertions(+), 28 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 36c62a7c23643..0eccb2abfc8e5 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -1313,7 +1313,7 @@ bool CodeGen::genCreateAddrMode( noway_assert(op2); switch (op2->gtOper) { -#if !defined(TARGET_ARMARCH) +#if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64) // TODO-ARM64-CQ, TODO-ARM-CQ: For now we only handle MUL and LSH because // arm doesn't support both scale and offset at the same. Offset is handled // at the emitter as a peephole optimization. @@ -1370,7 +1370,7 @@ bool CodeGen::genCreateAddrMode( goto FOUND_AM; } break; -#endif // TARGET_ARMARCH +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 case GT_NOP: diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index 60f4f405fc582..afe5b0b95d5bd 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -4223,23 +4223,18 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree) } else { - regNumber tmpRegOp1 = tree->ExtractTempReg(); - regNumber tmpRegOp2 = tree->ExtractTempReg(); - regNumber regOp2 = op2->GetRegNum(); - if (cmpSize == EA_4BYTE) + regNumber regOp2 = op2->GetRegNum(); + + if ((cmpSize == EA_4BYTE) && IsUnsigned) { + regNumber tmpRegOp1 = REG_RA; + regNumber tmpRegOp2 = rsGetRsvdReg(); + + emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, tmpRegOp1, regOp1, 0); + emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, tmpRegOp2, regOp2, 0); + regOp1 = tmpRegOp1; regOp2 = tmpRegOp2; - if (IsUnsigned) - { - emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_PTRSIZE, tmpRegOp1, op1->GetRegNum(), 31, 0); - emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_PTRSIZE, tmpRegOp2, op2->GetRegNum(), 31, 0); - } - else - { - emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, tmpRegOp1, op1->GetRegNum(), 0); - emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, tmpRegOp2, op2->GetRegNum(), 0); - } } if (tree->OperIs(GT_LT)) @@ -4271,7 +4266,6 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree) emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, targetReg, 1); } } - genProduceReg(tree); } } @@ -4497,7 +4491,7 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue) if (IsUnsigned && cmpSize == EA_4BYTE && op2->OperIs(GT_LCL_VAR) && compiler->lvaTable[op2->AsLclVar()->GetLclNum()].lvIsRegCandidate()) { - regNumber tmpRegOp1 = tree->ExtractTempReg(); + regNumber tmpRegOp1 = rsGetRsvdReg(); emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, REG_RA, regOp2, 31, 0); emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, tmpRegOp1, regOp1, 31, 0); regOp1 = tmpRegOp1; @@ -4506,7 +4500,7 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue) else if (IsUnsigned && cmpSize == EA_4BYTE && op1->OperIs(GT_LCL_VAR) && compiler->lvaTable[op1->AsLclVar()->GetLclNum()].lvIsRegCandidate()) { - regNumber tmpRegOp1 = tree->ExtractTempReg(); + regNumber tmpRegOp1 = rsGetRsvdReg(); emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, tmpRegOp1, regOp1, 31, 0); emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, REG_RA, regOp2, 31, 0); regOp1 = tmpRegOp1; @@ -4522,7 +4516,7 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue) { if (!(op1->gtFlags & GTF_UNSIGNED)) { - regNumber tmpRegOp1 = tree->ExtractTempReg(); + regNumber tmpRegOp1 = rsGetRsvdReg(); emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, tmpRegOp1, regOp1, 31, 0); regOp1 = tmpRegOp1; } diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index c372e44db0414..ba73a2f8e09f3 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -6552,7 +6552,11 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, } } -#endif // TARGET_XARCH +#elif defined(TARGET_LOONGARCH64) + + isJccAffectedIns = true; + +#endif // TARGET_LOONGARCH64 // Jcc affected instruction boundaries were printed above; handle other cases here. if (!isJccAffectedIns) diff --git a/src/coreclr/jit/emitloongarch64.cpp b/src/coreclr/jit/emitloongarch64.cpp index 1cdd8b7fd8b89..9fb3e1f9cac1c 100644 --- a/src/coreclr/jit/emitloongarch64.cpp +++ b/src/coreclr/jit/emitloongarch64.cpp @@ -786,7 +786,7 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va imm3 = imm3 ? imm2 - imm3 : imm2; code = emitInsCode(ins); code |= (code_t)reg1; - code |= (code_t)REG_RA; + code |= (code_t)REG_RA << 5; code |= (code_t)(imm3 & 0xfff) << 10; } } diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp index ca0efc8bea169..2f259f7efffbd 100644 --- a/src/coreclr/jit/lsraloongarch64.cpp +++ b/src/coreclr/jit/lsraloongarch64.cpp @@ -256,6 +256,12 @@ int LinearScan::BuildNode(GenTree* tree) // everything is made explicit by adding casts. assert(tree->gtGetOp1()->TypeGet() == tree->gtGetOp2()->TypeGet()); } + else if (tree->gtOverflow()) + { + // Need a register different from target reg to check for overflow. + buildInternalIntRegisterDefForNode(tree); + setInternalRegsDelayFree = true; + } FALLTHROUGH; case GT_AND: @@ -355,12 +361,6 @@ int LinearScan::BuildNode(GenTree* tree) case GT_GE: case GT_GT: case GT_JCMP: - if (!varTypeIsFloating(tree)) - { - buildInternalIntRegisterDefForNode(tree); - buildInternalIntRegisterDefForNode(tree); - buildInternalRegisterUses(); - } srcCount = BuildCmp(tree); break; diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 3d4bf56ceb391..19deba41f7657 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -862,6 +862,11 @@ fgArgTabEntry* fgArgInfo::AddRegArg(unsigned argNum, curArgTabEntry->SetByteSize(byteSize, isStruct, isFloatHfa); curArgTabEntry->SetByteOffset(0); +#ifdef TARGET_LOONGARCH64 + curArgTabEntry->structFloatFieldType[0] = TYP_UNDEF; + curArgTabEntry->structFloatFieldType[1] = TYP_UNDEF; +#endif + hasRegArgs = true; if (argCount >= argTableSize) {